@@ -38,56 +38,87 @@ def load(f):
3838#from html5lib import html5parser, serializer, treewalkers, treebuilders
3939#END RELEASE
4040
41+ def PullDOMAdapter (node ):
42+ from xml .dom import Node
43+ from xml .dom .pulldom import START_ELEMENT , END_ELEMENT , COMMENT , CHARACTERS
44+
45+ if node .nodeType in (Node .DOCUMENT_NODE , Node .DOCUMENT_FRAGMENT_NODE ):
46+ for childNode in node .childNodes :
47+ for event in PullDOMAdapter (childNode ):
48+ yield event
49+
50+ elif node .nodeType == Node .DOCUMENT_TYPE_NODE :
51+ raise NotImplementedError ("DOCTYPE nodes are not supported by PullDOM" )
52+
53+ elif node .nodeType == Node .COMMENT_NODE :
54+ yield COMMENT , node
55+
56+ elif node .nodeType in (Node .TEXT_NODE , Node .CDATA_SECTION_NODE ):
57+ yield CHARACTERS , node
58+
59+ elif node .nodeType == Node .ELEMENT_NODE :
60+ yield START_ELEMENT , node
61+ for childNode in node .childNodes :
62+ for event in PullDOMAdapter (childNode ):
63+ yield event
64+ yield END_ELEMENT , node
65+
66+ else :
67+ raise NotImplementedError ("Node type not supported: " + str (node .nodeType ))
68+
4169treeTypes = {
42- "simpletree" : (treebuilders .getTreeBuilder ("simpletree" ),
43- treewalkers .getTreeWalker ("simpletree" )),
44- "DOM" : (treebuilders .getTreeBuilder ("dom" ),
45- treewalkers .getTreeWalker ("dom" )),
70+ "simpletree" : {"builder" : treebuilders .getTreeBuilder ("simpletree" ),
71+ "walker" : treewalkers .getTreeWalker ("simpletree" )},
72+ "DOM" : {"builder" : treebuilders .getTreeBuilder ("dom" ),
73+ "walker" : treewalkers .getTreeWalker ("dom" )},
74+ "PullDOM" : {"builder" : treebuilders .getTreeBuilder ("dom" ),
75+ "adapter" : PullDOMAdapter ,
76+ "walker" : treewalkers .getTreeWalker ("pulldom" )},
4677}
4778
4879#Try whatever etree implementations are available from a list that are
4980#"supposed" to work
5081try :
5182 import xml .etree .ElementTree as ElementTree
5283 treeTypes ['ElementTree' ] = \
53- ( treebuilders .getTreeBuilder ("etree" , ElementTree ),
54- treewalkers .getTreeWalker ("etree" , ElementTree ))
84+ { "builder" : treebuilders .getTreeBuilder ("etree" , ElementTree ),
85+ "walker" : treewalkers .getTreeWalker ("etree" , ElementTree )}
5586except ImportError :
5687 try :
5788 import elementtree .ElementTree as ElementTree
5889 treeTypes ['ElementTree' ] = \
59- ( treebuilders .getTreeBuilder ("etree" , ElementTree ),
60- treewalkers .getTreeWalker ("etree" , ElementTree ))
90+ { "builder" : treebuilders .getTreeBuilder ("etree" , ElementTree ),
91+ "walker" : treewalkers .getTreeWalker ("etree" , ElementTree )}
6192 except ImportError :
6293 pass
6394
6495try :
6596 import xml .etree .cElementTree as cElementTree
6697 treeTypes ['cElementTree' ] = \
67- ( treebuilders .getTreeBuilder ("etree" , cElementTree ),
68- treewalkers .getTreeWalker ("etree" , cElementTree ))
98+ { "builder" : treebuilders .getTreeBuilder ("etree" , cElementTree ),
99+ "walker" : treewalkers .getTreeWalker ("etree" , cElementTree )}
69100except ImportError :
70101 try :
71102 import cElementTree
72103 treeTypes ['cElementTree' ] = \
73- ( treebuilders .getTreeBuilder ("etree" , cElementTree ),
74- treewalkers .getTreeWalker ("etree" , cElementTree ))
104+ { "builder" : treebuilders .getTreeBuilder ("etree" , cElementTree ),
105+ "walker" : treewalkers .getTreeWalker ("etree" , cElementTree )}
75106 except ImportError :
76107 pass
77108
78109try :
79110 import lxml .etree as lxml
80111 treeTypes ['lxml' ] = \
81- ( treebuilders .getTreeBuilder ("etree" , lxml ),
82- treewalkers .getTreeWalker ("etree" , lxml ))
112+ { "builder" : treebuilders .getTreeBuilder ("etree" , lxml ),
113+ "walker" : treewalkers .getTreeWalker ("etree" , lxml )}
83114except ImportError :
84115 pass
85116
86117try :
87118 import BeautifulSoup
88119 treeTypes ["beautifulsoup" ] = \
89- ( treebuilders .getTreeBuilder ("beautifulsoup" ),
90- treewalkers .getTreeWalker ("beautifulsoup" ))
120+ { "builder" : treebuilders .getTreeBuilder ("beautifulsoup" ),
121+ "walker" : treewalkers .getTreeWalker ("beautifulsoup" )}
91122except ImportError :
92123 pass
93124
@@ -146,12 +177,14 @@ def sortattrs(x):
146177
147178class TestCase (unittest .TestCase ):
148179 def runTest (self , innerHTML , input , expected , errors , treeClass ):
149- p = html5parser .HTMLParser (tree = treeClass [0 ])
180+ p = html5parser .HTMLParser (tree = treeClass ["builder" ])
181+
150182 if innerHTML :
151183 document = p .parseFragment (StringIO .StringIO (input ), innerHTML )
152184 else :
153185 document = p .parse (StringIO .StringIO (input ))
154- output = convertTokens (treeClass [1 ]().walk (document ))
186+ document = treeClass .get ("adapter" , lambda x : x )(document )
187+ output = convertTokens (treeClass ["walker" ]().walk (document ))
155188 output = attrlist .sub (sortattrs , output )
156189 expected = attrlist .sub (sortattrs , expected )
157190 errorMsg = "\n " .join (["\n \n Expected:" , expected ,
0 commit comments