7171 )
7272
7373def xpath_tokenizer (pattern , namespaces = None ):
74+ default_namespace = namespaces .get ('' ) if namespaces else None
75+ parsing_attribute = False
7476 for token in xpath_tokenizer_re .findall (pattern ):
75- tag = token [ 1 ]
76- if tag and tag [0 ] != "{" and ":" in tag :
77- try :
77+ ttype , tag = token
78+ if tag and tag [0 ] != "{" :
79+ if ":" in tag :
7880 prefix , uri = tag .split (":" , 1 )
79- if not namespaces :
80- raise KeyError
81- yield token [0 ], "{%s}%s" % (namespaces [prefix ], uri )
82- except KeyError :
83- raise SyntaxError ("prefix %r not found in prefix map" % prefix )
81+ try :
82+ if not namespaces :
83+ raise KeyError
84+ yield ttype , "{%s}%s" % (namespaces [prefix ], uri )
85+ except KeyError :
86+ raise SyntaxError ("prefix %r not found in prefix map" % prefix ) from None
87+ elif default_namespace and not parsing_attribute :
88+ yield ttype , "{%s}%s" % (default_namespace , tag )
89+ else :
90+ yield token
91+ parsing_attribute = False
8492 else :
8593 yield token
94+ parsing_attribute = ttype == '@'
95+
8696
8797def get_parent_map (context ):
8898 parent_map = context .parent_map
@@ -93,13 +103,69 @@ def get_parent_map(context):
93103 parent_map [e ] = p
94104 return parent_map
95105
106+
107+ def _is_wildcard_tag (tag ):
108+ return tag [:3 ] == '{*}' or tag [- 2 :] == '}*'
109+
110+
111+ def _prepare_tag (tag ):
112+ _isinstance , _str = isinstance , str
113+ if tag == '{*}*' :
114+ # Same as '*', but no comments or processing instructions.
115+ # It can be a surprise that '*' includes those, but there is no
116+ # justification for '{*}*' doing the same.
117+ def select (context , result ):
118+ for elem in result :
119+ if _isinstance (elem .tag , _str ):
120+ yield elem
121+ elif tag == '{}*' :
122+ # Any tag that is not in a namespace.
123+ def select (context , result ):
124+ for elem in result :
125+ el_tag = elem .tag
126+ if _isinstance (el_tag , _str ) and el_tag [0 ] != '{' :
127+ yield elem
128+ elif tag [:3 ] == '{*}' :
129+ # The tag in any (or no) namespace.
130+ suffix = tag [2 :] # '}name'
131+ no_ns = slice (- len (suffix ), None )
132+ tag = tag [3 :]
133+ def select (context , result ):
134+ for elem in result :
135+ el_tag = elem .tag
136+ if el_tag == tag or _isinstance (el_tag , _str ) and el_tag [no_ns ] == suffix :
137+ yield elem
138+ elif tag [- 2 :] == '}*' :
139+ # Any tag in the given namespace.
140+ ns = tag [:- 1 ]
141+ ns_only = slice (None , len (ns ))
142+ def select (context , result ):
143+ for elem in result :
144+ el_tag = elem .tag
145+ if _isinstance (el_tag , _str ) and el_tag [ns_only ] == ns :
146+ yield elem
147+ else :
148+ raise RuntimeError (f"internal parser error, got { tag } " )
149+ return select
150+
151+
96152def prepare_child (next , token ):
97153 tag = token [1 ]
98- def select (context , result ):
99- for elem in result :
100- for e in elem :
101- if e .tag == tag :
102- yield e
154+ if _is_wildcard_tag (tag ):
155+ select_tag = _prepare_tag (tag )
156+ def select (context , result ):
157+ def select_child (result ):
158+ for elem in result :
159+ yield from elem
160+ return select_tag (context , select_child (result ))
161+ else :
162+ if tag [:2 ] == '{}' :
163+ tag = tag [2 :] # '{}tag' == 'tag'
164+ def select (context , result ):
165+ for elem in result :
166+ for e in elem :
167+ if e .tag == tag :
168+ yield e
103169 return select
104170
105171def prepare_star (next , token ):
@@ -124,11 +190,24 @@ def prepare_descendant(next, token):
124190 tag = token [1 ]
125191 else :
126192 raise SyntaxError ("invalid descendant" )
127- def select (context , result ):
128- for elem in result :
129- for e in elem .iter (tag ):
130- if e is not elem :
131- yield e
193+
194+ if _is_wildcard_tag (tag ):
195+ select_tag = _prepare_tag (tag )
196+ def select (context , result ):
197+ def select_child (result ):
198+ for elem in result :
199+ for e in elem .iter ():
200+ if e is not elem :
201+ yield e
202+ return select_tag (context , select_child (result ))
203+ else :
204+ if tag [:2 ] == '{}' :
205+ tag = tag [2 :] # '{}tag' == 'tag'
206+ def select (context , result ):
207+ for elem in result :
208+ for e in elem .iter (tag ):
209+ if e is not elem :
210+ yield e
132211 return select
133212
134213def prepare_parent (next , token ):
@@ -157,6 +236,9 @@ def prepare_predicate(next, token):
157236 return
158237 if token [0 ] == "]" :
159238 break
239+ if token == ('' , '' ):
240+ # ignore whitespace
241+ continue
160242 if token [0 ] and token [0 ][:1 ] in "'\" " :
161243 token = "'" , token [0 ][1 :- 1 ]
162244 signature .append (token [0 ] or "-" )
@@ -188,16 +270,22 @@ def select(context, result):
188270 if elem .find (tag ) is not None :
189271 yield elem
190272 return select
191- if signature == "-='" and not re .match (r"\-?\d+$" , predicate [0 ]):
192- # [tag='value']
273+ if signature == ".='" or ( signature == " -='" and not re .match (r"\-?\d+$" , predicate [0 ]) ):
274+ # [.='value'] or [ tag='value']
193275 tag = predicate [0 ]
194276 value = predicate [- 1 ]
195- def select (context , result ):
196- for elem in result :
197- for e in elem .findall (tag ):
198- if "" .join (e .itertext ()) == value :
277+ if tag :
278+ def select (context , result ):
279+ for elem in result :
280+ for e in elem .findall (tag ):
281+ if "" .join (e .itertext ()) == value :
282+ yield elem
283+ break
284+ else :
285+ def select (context , result ):
286+ for elem in result :
287+ if "" .join (elem .itertext ()) == value :
199288 yield elem
200- break
201289 return select
202290 if signature == "-" or signature == "-()" or signature == "-()-" :
203291 # [index] or [last()] or [last()-index]
@@ -255,10 +343,13 @@ def __init__(self, root):
255343
256344def iterfind (elem , path , namespaces = None ):
257345 # compile selector pattern
258- cache_key = (path , None if namespaces is None
259- else tuple (sorted (namespaces .items ())))
260346 if path [- 1 :] == "/" :
261347 path = path + "*" # implicit all (FIXME: keep this?)
348+
349+ cache_key = (path ,)
350+ if namespaces :
351+ cache_key += tuple (sorted (namespaces .items ()))
352+
262353 try :
263354 selector = _cache [cache_key ]
264355 except KeyError :
@@ -276,7 +367,7 @@ def iterfind(elem, path, namespaces=None):
276367 try :
277368 selector .append (ops [token [0 ]](next , token ))
278369 except StopIteration :
279- raise SyntaxError ("invalid path" )
370+ raise SyntaxError ("invalid path" ) from None
280371 try :
281372 token = next ()
282373 if token [0 ] == "/" :
0 commit comments