@@ -23,43 +23,90 @@ def getETreeModule(ElementTreeImplementation):
2323def getETreeBuilder (ElementTreeImplementation ):
2424 ElementTree = ElementTreeImplementation
2525
26- class TreeWalker (_base .TreeWalker ):
27- def walk (self , node ):
26+ class TreeWalker (_base .NonRecursiveTreeWalker ):
27+ """Given the particular ElementTree representation, this implementation,
28+ to avoid using recursion, returns "nodes" as tuples with the following
29+ content:
30+
31+ 1. An Element node serving as *context* (it cannot be called the parent
32+ node due to the particular ``tail`` text nodes.
33+
34+ 2. Either the string literals ``"text"`` or ``"tail"`` or a child index
35+
36+ 3. A list used as a stack of all ancestor *context nodes*. It is a
37+ pair tuple whose first item is an Element and second item is a child
38+ index.
39+ """
40+
41+ def getNodeDetails (self , node ):
42+ if isinstance (node , tuple ): # It might be the root Element
43+ elt , key , parents = node
44+ if key in ("text" , "tail" ):
45+ return _base .TEXT , getattr (elt , key )
46+ else :
47+ node = elt [int (key )]
48+
2849 if not (hasattr (node , "tag" )):
2950 node = node .getroot ()
3051
3152 if node .tag in ("<DOCUMENT_ROOT>" , "<DOCUMENT_FRAGMENT>" ):
32- for token in self .walkChildren (node ):
33- yield token
53+ return (_base .DOCUMENT ,)
3454
3555 elif node .tag == "<!DOCTYPE>" :
36- yield self . doctype ( node .text )
56+ return _base . DOCTYPE , node .text
3757
3858 elif type (node .tag ) == type (ElementTree .Comment ):
39- yield self . comment ( node .text )
59+ return _base . COMMENT , node .text
4060
4161 else :
4262 #This is assumed to be an ordinary element
43- if node .tag in voidElements :
44- for token in self .emptyTag (node .tag , \
45- node .attrib .items (), len (node ) or node .text ):
46- yield token
47- else :
48- yield self .startTag (node .tag , node .attrib .items ())
49- for token in self .walkChildren (node ):
50- yield token
51- yield self .endTag (node .tag )
63+ return _base .ELEMENT , node .tag , node .attrib .items (), len (node ) or node .text
5264
53- if node .tail :
54- for token in self .text (node .tail ):
55- yield token
56-
57- def walkChildren (self , node ):
65+ def getFirstChild (self , node ):
66+ if isinstance (node , tuple ): # It might be the root Element
67+ elt , key , parents = node
68+ assert key not in ("text" , "tail" ), "Text nodes have no children"
69+ parents .append ((elt , int (key )))
70+ node = elt [int (key )]
71+ else :
72+ parents = []
73+
74+ assert len (node ) or node .text , "Node has no children"
5875 if node .text :
59- for token in self .text (node .text ):
60- yield token
61- for childNode in node .getchildren ():
62- for token in self .walk (childNode ):
63- yield token
76+ return (node , "text" , parents )
77+ else :
78+ return (node , 0 , parents )
79+
80+ def getNextSibling (self , node ):
81+ assert isinstance (node , tuple ), "Node is not a tuple: " + str (node )
82+
83+ elt , key , parents = node
84+ if key == "text" :
85+ key = - 1
86+ elif key == "tail" :
87+ elt , key = parents .pop ()
88+ else :
89+ # Look for "tail" of the "revisited" node
90+ child = elt [key ]
91+ if child .tail :
92+ parents .append ((elt , key ))
93+ return (child , "tail" , parents )
94+
95+ # case where key were "text" or "tail" or elt[key] had a tail
96+ key += 1
97+ if len (elt ) > key :
98+ return (elt , key , parents )
99+ else :
100+ return None
101+
102+ def getParentNode (self , node ):
103+ assert isinstance (node , tuple )
104+ elt , key , parents = node
105+ if parents :
106+ elt , key = parents .pop ()
107+ return elt , key , parents
108+ else :
109+ # HACK: We could return ``elt`` but None will stop the algorithm the same way
110+ return None
64111
65112 return locals ()
0 commit comments