Skip to content

Commit 84a5fdb

Browse files
committed
Update xml.etree.ElementPath to CPython 3.8
1 parent 05fff92 commit 84a5fdb

File tree

1 file changed

+119
-28
lines changed

1 file changed

+119
-28
lines changed

Lib/xml/etree/ElementPath.py

Lines changed: 119 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -71,18 +71,28 @@
7171
)
7272

7373
def xpath_tokenizer(pattern, namespaces=None):
74+
default_namespace = namespaces.get('') if namespaces else None
75+
parsing_attribute = False
7476
for token in xpath_tokenizer_re.findall(pattern):
75-
tag = token[1]
76-
if tag and tag[0] != "{" and ":" in tag:
77-
try:
77+
ttype, tag = token
78+
if tag and tag[0] != "{":
79+
if ":" in tag:
7880
prefix, uri = tag.split(":", 1)
79-
if not namespaces:
80-
raise KeyError
81-
yield token[0], "{%s}%s" % (namespaces[prefix], uri)
82-
except KeyError:
83-
raise SyntaxError("prefix %r not found in prefix map" % prefix)
81+
try:
82+
if not namespaces:
83+
raise KeyError
84+
yield ttype, "{%s}%s" % (namespaces[prefix], uri)
85+
except KeyError:
86+
raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
87+
elif default_namespace and not parsing_attribute:
88+
yield ttype, "{%s}%s" % (default_namespace, tag)
89+
else:
90+
yield token
91+
parsing_attribute = False
8492
else:
8593
yield token
94+
parsing_attribute = ttype == '@'
95+
8696

8797
def get_parent_map(context):
8898
parent_map = context.parent_map
@@ -93,13 +103,69 @@ def get_parent_map(context):
93103
parent_map[e] = p
94104
return parent_map
95105

106+
107+
def _is_wildcard_tag(tag):
108+
return tag[:3] == '{*}' or tag[-2:] == '}*'
109+
110+
111+
def _prepare_tag(tag):
112+
_isinstance, _str = isinstance, str
113+
if tag == '{*}*':
114+
# Same as '*', but no comments or processing instructions.
115+
# It can be a surprise that '*' includes those, but there is no
116+
# justification for '{*}*' doing the same.
117+
def select(context, result):
118+
for elem in result:
119+
if _isinstance(elem.tag, _str):
120+
yield elem
121+
elif tag == '{}*':
122+
# Any tag that is not in a namespace.
123+
def select(context, result):
124+
for elem in result:
125+
el_tag = elem.tag
126+
if _isinstance(el_tag, _str) and el_tag[0] != '{':
127+
yield elem
128+
elif tag[:3] == '{*}':
129+
# The tag in any (or no) namespace.
130+
suffix = tag[2:] # '}name'
131+
no_ns = slice(-len(suffix), None)
132+
tag = tag[3:]
133+
def select(context, result):
134+
for elem in result:
135+
el_tag = elem.tag
136+
if el_tag == tag or _isinstance(el_tag, _str) and el_tag[no_ns] == suffix:
137+
yield elem
138+
elif tag[-2:] == '}*':
139+
# Any tag in the given namespace.
140+
ns = tag[:-1]
141+
ns_only = slice(None, len(ns))
142+
def select(context, result):
143+
for elem in result:
144+
el_tag = elem.tag
145+
if _isinstance(el_tag, _str) and el_tag[ns_only] == ns:
146+
yield elem
147+
else:
148+
raise RuntimeError(f"internal parser error, got {tag}")
149+
return select
150+
151+
96152
def prepare_child(next, token):
97153
tag = token[1]
98-
def select(context, result):
99-
for elem in result:
100-
for e in elem:
101-
if e.tag == tag:
102-
yield e
154+
if _is_wildcard_tag(tag):
155+
select_tag = _prepare_tag(tag)
156+
def select(context, result):
157+
def select_child(result):
158+
for elem in result:
159+
yield from elem
160+
return select_tag(context, select_child(result))
161+
else:
162+
if tag[:2] == '{}':
163+
tag = tag[2:] # '{}tag' == 'tag'
164+
def select(context, result):
165+
for elem in result:
166+
for e in elem:
167+
if e.tag == tag:
168+
yield e
103169
return select
104170

105171
def prepare_star(next, token):
@@ -124,11 +190,24 @@ def prepare_descendant(next, token):
124190
tag = token[1]
125191
else:
126192
raise SyntaxError("invalid descendant")
127-
def select(context, result):
128-
for elem in result:
129-
for e in elem.iter(tag):
130-
if e is not elem:
131-
yield e
193+
194+
if _is_wildcard_tag(tag):
195+
select_tag = _prepare_tag(tag)
196+
def select(context, result):
197+
def select_child(result):
198+
for elem in result:
199+
for e in elem.iter():
200+
if e is not elem:
201+
yield e
202+
return select_tag(context, select_child(result))
203+
else:
204+
if tag[:2] == '{}':
205+
tag = tag[2:] # '{}tag' == 'tag'
206+
def select(context, result):
207+
for elem in result:
208+
for e in elem.iter(tag):
209+
if e is not elem:
210+
yield e
132211
return select
133212

134213
def prepare_parent(next, token):
@@ -157,6 +236,9 @@ def prepare_predicate(next, token):
157236
return
158237
if token[0] == "]":
159238
break
239+
if token == ('', ''):
240+
# ignore whitespace
241+
continue
160242
if token[0] and token[0][:1] in "'\"":
161243
token = "'", token[0][1:-1]
162244
signature.append(token[0] or "-")
@@ -188,16 +270,22 @@ def select(context, result):
188270
if elem.find(tag) is not None:
189271
yield elem
190272
return select
191-
if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]):
192-
# [tag='value']
273+
if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])):
274+
# [.='value'] or [tag='value']
193275
tag = predicate[0]
194276
value = predicate[-1]
195-
def select(context, result):
196-
for elem in result:
197-
for e in elem.findall(tag):
198-
if "".join(e.itertext()) == value:
277+
if tag:
278+
def select(context, result):
279+
for elem in result:
280+
for e in elem.findall(tag):
281+
if "".join(e.itertext()) == value:
282+
yield elem
283+
break
284+
else:
285+
def select(context, result):
286+
for elem in result:
287+
if "".join(elem.itertext()) == value:
199288
yield elem
200-
break
201289
return select
202290
if signature == "-" or signature == "-()" or signature == "-()-":
203291
# [index] or [last()] or [last()-index]
@@ -255,10 +343,13 @@ def __init__(self, root):
255343

256344
def iterfind(elem, path, namespaces=None):
257345
# compile selector pattern
258-
cache_key = (path, None if namespaces is None
259-
else tuple(sorted(namespaces.items())))
260346
if path[-1:] == "/":
261347
path = path + "*" # implicit all (FIXME: keep this?)
348+
349+
cache_key = (path,)
350+
if namespaces:
351+
cache_key += tuple(sorted(namespaces.items()))
352+
262353
try:
263354
selector = _cache[cache_key]
264355
except KeyError:
@@ -276,7 +367,7 @@ def iterfind(elem, path, namespaces=None):
276367
try:
277368
selector.append(ops[token[0]](next, token))
278369
except StopIteration:
279-
raise SyntaxError("invalid path")
370+
raise SyntaxError("invalid path") from None
280371
try:
281372
token = next()
282373
if token[0] == "/":

0 commit comments

Comments
 (0)