Skip to content

Commit 85e76cd

Browse files
CPython Developersyouknowone
authored andcommitted
Update xml and test_xml_etree
1 parent 4bbbd42 commit 85e76cd

13 files changed

Lines changed: 358 additions & 168 deletions

Lib/test/test_xml_etree.py

Lines changed: 77 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import html
1111
import io
1212
import itertools
13-
import locale
1413
import operator
1514
import os
1615
import pickle
@@ -130,6 +129,9 @@ def newtest(*args, **kwargs):
130129
return newtest
131130
return decorator
132131

132+
def convlinesep(data):
133+
return data.replace(b'\n', os.linesep.encode())
134+
133135

134136
class ModuleTest(unittest.TestCase):
135137
def test_sanity(self):
@@ -1023,17 +1025,15 @@ def test_tostring_xml_declaration(self):
10231025
@unittest.expectedFailure
10241026
def test_tostring_xml_declaration_unicode_encoding(self):
10251027
elem = ET.XML('<body><tag/></body>')
1026-
preferredencoding = locale.getpreferredencoding()
10271028
self.assertEqual(
1028-
f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>",
1029-
ET.tostring(elem, encoding='unicode', xml_declaration=True)
1029+
ET.tostring(elem, encoding='unicode', xml_declaration=True),
1030+
"<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>"
10301031
)
10311032

10321033
# TODO: RUSTPYTHON
10331034
@unittest.expectedFailure
10341035
def test_tostring_xml_declaration_cases(self):
10351036
elem = ET.XML('<body><tag>ø</tag></body>')
1036-
preferredencoding = locale.getpreferredencoding()
10371037
TESTCASES = [
10381038
# (expected_retval, encoding, xml_declaration)
10391039
# ... xml_declaration = None
@@ -1060,7 +1060,7 @@ def test_tostring_xml_declaration_cases(self):
10601060
b"<body><tag>&#248;</tag></body>", 'US-ASCII', True),
10611061
(b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
10621062
b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True),
1063-
(f"<?xml version='1.0' encoding='{preferredencoding}'?>\n"
1063+
("<?xml version='1.0' encoding='utf-8'?>\n"
10641064
"<body><tag>ø</tag></body>", 'unicode', True),
10651065

10661066
]
@@ -1102,11 +1102,10 @@ def test_tostringlist_xml_declaration(self):
11021102
b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>"
11031103
)
11041104

1105-
preferredencoding = locale.getpreferredencoding()
11061105
stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True)
11071106
self.assertEqual(
11081107
''.join(stringlist),
1109-
f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>"
1108+
"<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>"
11101109
)
11111110
self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>")
11121111
self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:])
@@ -3914,54 +3913,107 @@ def test_encoding(self):
39143913
@unittest.expectedFailure
39153914
def test_write_to_filename(self):
39163915
self.addCleanup(os_helper.unlink, TESTFN)
3917-
tree = ET.ElementTree(ET.XML('''<site />'''))
3916+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
39183917
tree.write(TESTFN)
39193918
with open(TESTFN, 'rb') as f:
3920-
self.assertEqual(f.read(), b'''<site />''')
3919+
self.assertEqual(f.read(), b'''<site>&#248;</site>''')
3920+
3921+
def test_write_to_filename_with_encoding(self):
3922+
self.addCleanup(os_helper.unlink, TESTFN)
3923+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3924+
tree.write(TESTFN, encoding='utf-8')
3925+
with open(TESTFN, 'rb') as f:
3926+
self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
3927+
3928+
tree.write(TESTFN, encoding='ISO-8859-1')
3929+
with open(TESTFN, 'rb') as f:
3930+
self.assertEqual(f.read(), convlinesep(
3931+
b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
3932+
b'''<site>\xf8</site>'''))
3933+
3934+
def test_write_to_filename_as_unicode(self):
3935+
self.addCleanup(os_helper.unlink, TESTFN)
3936+
with open(TESTFN, 'w') as f:
3937+
encoding = f.encoding
3938+
os_helper.unlink(TESTFN)
3939+
3940+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3941+
tree.write(TESTFN, encoding='unicode')
3942+
with open(TESTFN, 'rb') as f:
3943+
self.assertEqual(f.read(), b"<site>\xc3\xb8</site>")
39213944

39223945
# TODO: RUSTPYTHON
39233946
@unittest.expectedFailure
39243947
def test_write_to_text_file(self):
39253948
self.addCleanup(os_helper.unlink, TESTFN)
3926-
tree = ET.ElementTree(ET.XML('''<site />'''))
3949+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
39273950
with open(TESTFN, 'w', encoding='utf-8') as f:
39283951
tree.write(f, encoding='unicode')
39293952
self.assertFalse(f.closed)
39303953
with open(TESTFN, 'rb') as f:
3931-
self.assertEqual(f.read(), b'''<site />''')
3954+
self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
3955+
3956+
with open(TESTFN, 'w', encoding='ascii', errors='xmlcharrefreplace') as f:
3957+
tree.write(f, encoding='unicode')
3958+
self.assertFalse(f.closed)
3959+
with open(TESTFN, 'rb') as f:
3960+
self.assertEqual(f.read(), b'''<site>&#248;</site>''')
3961+
3962+
with open(TESTFN, 'w', encoding='ISO-8859-1') as f:
3963+
tree.write(f, encoding='unicode')
3964+
self.assertFalse(f.closed)
3965+
with open(TESTFN, 'rb') as f:
3966+
self.assertEqual(f.read(), b'''<site>\xf8</site>''')
39323967

39333968
# TODO: RUSTPYTHON
39343969
@unittest.expectedFailure
39353970
def test_write_to_binary_file(self):
39363971
self.addCleanup(os_helper.unlink, TESTFN)
3937-
tree = ET.ElementTree(ET.XML('''<site />'''))
3972+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
39383973
with open(TESTFN, 'wb') as f:
39393974
tree.write(f)
39403975
self.assertFalse(f.closed)
39413976
with open(TESTFN, 'rb') as f:
3942-
self.assertEqual(f.read(), b'''<site />''')
3977+
self.assertEqual(f.read(), b'''<site>&#248;</site>''')
3978+
3979+
def test_write_to_binary_file_with_encoding(self):
3980+
self.addCleanup(os_helper.unlink, TESTFN)
3981+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3982+
with open(TESTFN, 'wb') as f:
3983+
tree.write(f, encoding='utf-8')
3984+
self.assertFalse(f.closed)
3985+
with open(TESTFN, 'rb') as f:
3986+
self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
3987+
3988+
with open(TESTFN, 'wb') as f:
3989+
tree.write(f, encoding='ISO-8859-1')
3990+
self.assertFalse(f.closed)
3991+
with open(TESTFN, 'rb') as f:
3992+
self.assertEqual(f.read(),
3993+
b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
3994+
b'''<site>\xf8</site>''')
39433995

39443996
# TODO: RUSTPYTHON
39453997
@unittest.expectedFailure
39463998
def test_write_to_binary_file_with_bom(self):
39473999
self.addCleanup(os_helper.unlink, TESTFN)
3948-
tree = ET.ElementTree(ET.XML('''<site />'''))
4000+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
39494001
# test BOM writing to buffered file
39504002
with open(TESTFN, 'wb') as f:
39514003
tree.write(f, encoding='utf-16')
39524004
self.assertFalse(f.closed)
39534005
with open(TESTFN, 'rb') as f:
39544006
self.assertEqual(f.read(),
39554007
'''<?xml version='1.0' encoding='utf-16'?>\n'''
3956-
'''<site />'''.encode("utf-16"))
4008+
'''<site>\xf8</site>'''.encode("utf-16"))
39574009
# test BOM writing to non-buffered file
39584010
with open(TESTFN, 'wb', buffering=0) as f:
39594011
tree.write(f, encoding='utf-16')
39604012
self.assertFalse(f.closed)
39614013
with open(TESTFN, 'rb') as f:
39624014
self.assertEqual(f.read(),
39634015
'''<?xml version='1.0' encoding='utf-16'?>\n'''
3964-
'''<site />'''.encode("utf-16"))
4016+
'''<site>\xf8</site>'''.encode("utf-16"))
39654017

39664018
# TODO: RUSTPYTHON
39674019
@unittest.expectedFailure
@@ -3974,10 +4026,10 @@ def test_read_from_stringio(self):
39744026
# TODO: RUSTPYTHON
39754027
@unittest.expectedFailure
39764028
def test_write_to_stringio(self):
3977-
tree = ET.ElementTree(ET.XML('''<site />'''))
4029+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
39784030
stream = io.StringIO()
39794031
tree.write(stream, encoding='unicode')
3980-
self.assertEqual(stream.getvalue(), '''<site />''')
4032+
self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''')
39814033

39824034
# TODO: RUSTPYTHON
39834035
@unittest.expectedFailure
@@ -3990,10 +4042,10 @@ def test_read_from_bytesio(self):
39904042
# TODO: RUSTPYTHON
39914043
@unittest.expectedFailure
39924044
def test_write_to_bytesio(self):
3993-
tree = ET.ElementTree(ET.XML('''<site />'''))
4045+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
39944046
raw = io.BytesIO()
39954047
tree.write(raw)
3996-
self.assertEqual(raw.getvalue(), b'''<site />''')
4048+
self.assertEqual(raw.getvalue(), b'''<site>&#248;</site>''')
39974049

39984050
class dummy:
39994051
pass
@@ -4011,12 +4063,12 @@ def test_read_from_user_text_reader(self):
40114063
# TODO: RUSTPYTHON
40124064
@unittest.expectedFailure
40134065
def test_write_to_user_text_writer(self):
4014-
tree = ET.ElementTree(ET.XML('''<site />'''))
4066+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
40154067
stream = io.StringIO()
40164068
writer = self.dummy()
40174069
writer.write = stream.write
40184070
tree.write(writer, encoding='unicode')
4019-
self.assertEqual(stream.getvalue(), '''<site />''')
4071+
self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''')
40204072

40214073
# TODO: RUSTPYTHON
40224074
@unittest.expectedFailure
@@ -4032,12 +4084,12 @@ def test_read_from_user_binary_reader(self):
40324084
# TODO: RUSTPYTHON
40334085
@unittest.expectedFailure
40344086
def test_write_to_user_binary_writer(self):
4035-
tree = ET.ElementTree(ET.XML('''<site />'''))
4087+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
40364088
raw = io.BytesIO()
40374089
writer = self.dummy()
40384090
writer.write = raw.write
40394091
tree.write(writer)
4040-
self.assertEqual(raw.getvalue(), b'''<site />''')
4092+
self.assertEqual(raw.getvalue(), b'''<site>&#248;</site>''')
40414093

40424094
# TODO: RUSTPYTHON
40434095
@unittest.expectedFailure

Lib/xml/dom/expatbuilder.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -204,11 +204,11 @@ def parseFile(self, file):
204204
buffer = file.read(16*1024)
205205
if not buffer:
206206
break
207-
parser.Parse(buffer, 0)
207+
parser.Parse(buffer, False)
208208
if first_buffer and self.document.documentElement:
209209
self._setup_subset(buffer)
210210
first_buffer = False
211-
parser.Parse("", True)
211+
parser.Parse(b"", True)
212212
except ParseEscape:
213213
pass
214214
doc = self.document
@@ -637,7 +637,7 @@ def parseString(self, string):
637637
nsattrs = self._getNSattrs() # get ns decls from node's ancestors
638638
document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs)
639639
try:
640-
parser.Parse(document, 1)
640+
parser.Parse(document, True)
641641
except:
642642
self.reset()
643643
raise
@@ -697,7 +697,7 @@ def external_entity_ref_handler(self, context, base, systemId, publicId):
697697
self.fragment = self.document.createDocumentFragment()
698698
self.curNode = self.fragment
699699
try:
700-
parser.Parse(self._source, 1)
700+
parser.Parse(self._source, True)
701701
finally:
702702
self.curNode = old_cur_node
703703
self.document = old_document

Lib/xml/dom/minidom.py

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,11 @@ class Node(xml.dom.Node):
4343
def __bool__(self):
4444
return True
4545

46-
def toxml(self, encoding=None):
47-
return self.toprettyxml("", "", encoding)
46+
def toxml(self, encoding=None, standalone=None):
47+
return self.toprettyxml("", "", encoding, standalone)
4848

49-
def toprettyxml(self, indent="\t", newl="\n", encoding=None):
49+
def toprettyxml(self, indent="\t", newl="\n", encoding=None,
50+
standalone=None):
5051
if encoding is None:
5152
writer = io.StringIO()
5253
else:
@@ -56,7 +57,7 @@ def toprettyxml(self, indent="\t", newl="\n", encoding=None):
5657
newline='\n')
5758
if self.nodeType == Node.DOCUMENT_NODE:
5859
# Can pass encoding only to document, to put it into XML header
59-
self.writexml(writer, "", indent, newl, encoding)
60+
self.writexml(writer, "", indent, newl, encoding, standalone)
6061
else:
6162
self.writexml(writer, "", indent, newl)
6263
if encoding is None:
@@ -718,6 +719,14 @@ def unlink(self):
718719
Node.unlink(self)
719720

720721
def getAttribute(self, attname):
722+
"""Returns the value of the specified attribute.
723+
724+
Returns the value of the element's attribute named attname as
725+
a string. An empty string is returned if the element does not
726+
have such an attribute. Note that an empty string may also be
727+
returned as an explicitly given attribute value, use the
728+
hasAttribute method to distinguish these two cases.
729+
"""
721730
if self._attrs is None:
722731
return ""
723732
try:
@@ -823,10 +832,16 @@ def removeAttributeNode(self, node):
823832
# Restore this since the node is still useful and otherwise
824833
# unlinked
825834
node.ownerDocument = self.ownerDocument
835+
return node
826836

827837
removeAttributeNodeNS = removeAttributeNode
828838

829839
def hasAttribute(self, name):
840+
"""Checks whether the element has an attribute with the specified name.
841+
842+
Returns True if the element has an attribute with the specified name.
843+
Otherwise, returns False.
844+
"""
830845
if self._attrs is None:
831846
return False
832847
return name in self._attrs
@@ -837,6 +852,11 @@ def hasAttributeNS(self, namespaceURI, localName):
837852
return (namespaceURI, localName) in self._attrsNS
838853

839854
def getElementsByTagName(self, name):
855+
"""Returns all descendant elements with the given tag name.
856+
857+
Returns the list of all descendant elements (not direct children
858+
only) with the specified tag name.
859+
"""
840860
return _get_elements_by_tagName_helper(self, name, NodeList())
841861

842862
def getElementsByTagNameNS(self, namespaceURI, localName):
@@ -847,22 +867,27 @@ def __repr__(self):
847867
return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
848868

849869
def writexml(self, writer, indent="", addindent="", newl=""):
870+
"""Write an XML element to a file-like object
871+
872+
Write the element to the writer object that must provide
873+
a write method (e.g. a file or StringIO object).
874+
"""
850875
# indent = current indentation
851876
# addindent = indentation to add to higher levels
852877
# newl = newline string
853878
writer.write(indent+"<" + self.tagName)
854879

855880
attrs = self._get_attributes()
856-
a_names = sorted(attrs.keys())
857881

858-
for a_name in a_names:
882+
for a_name in attrs.keys():
859883
writer.write(" %s=\"" % a_name)
860884
_write_data(writer, attrs[a_name].value)
861885
writer.write("\"")
862886
if self.childNodes:
863887
writer.write(">")
864888
if (len(self.childNodes) == 1 and
865-
self.childNodes[0].nodeType == Node.TEXT_NODE):
889+
self.childNodes[0].nodeType in (
890+
Node.TEXT_NODE, Node.CDATA_SECTION_NODE)):
866891
self.childNodes[0].writexml(writer, '', '', '')
867892
else:
868893
writer.write(newl)
@@ -1786,12 +1811,17 @@ def importNode(self, node, deep):
17861811
raise xml.dom.NotSupportedErr("cannot import document type nodes")
17871812
return _clone_node(node, deep, self)
17881813

1789-
def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
1790-
if encoding is None:
1791-
writer.write('<?xml version="1.0" ?>'+newl)
1792-
else:
1793-
writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
1794-
encoding, newl))
1814+
def writexml(self, writer, indent="", addindent="", newl="", encoding=None,
1815+
standalone=None):
1816+
declarations = []
1817+
1818+
if encoding:
1819+
declarations.append(f'encoding="{encoding}"')
1820+
if standalone is not None:
1821+
declarations.append(f'standalone="{"yes" if standalone else "no"}"')
1822+
1823+
writer.write(f'<?xml version="1.0" {" ".join(declarations)}?>{newl}')
1824+
17951825
for node in self.childNodes:
17961826
node.writexml(writer, indent, addindent, newl)
17971827

0 commit comments

Comments
 (0)