From 66c28599e08bccf0e155f5f3c118a35d36aafbf2 Mon Sep 17 00:00:00 2001 From: mefistotelis Date: Mon, 10 Feb 2020 01:00:50 +0100 Subject: [PATCH 1/2] bpo-39011: Preserve line endings within attributes Line endings within attributes were previously normalized to "\n". This patch removes that normalization, as line endings which were replaced by entity numbers should be preserved in original form. --- Lib/xml/etree/ElementTree.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index c8d898f32816dc..da2bcad0b4d629 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1057,15 +1057,15 @@ def _escape_attrib(text): text = text.replace(">", ">") if "\"" in text: text = text.replace("\"", """) - # The following business with carriage returns is to satisfy - # Section 2.11 of the XML specification, stating that - # CR or CR LN should be replaced with just LN + # Although section 2.11 of the XML specification states that CR or + # CR LN should be replaced with just LN, it applies only to EOLNs + # which take part of organizing file into lines. Within attributes, + # we are replacing these with entity numbers, so they do not count. # http://www.w3.org/TR/REC-xml/#sec-line-ends - if "\r\n" in text: - text = text.replace("\r\n", "\n") + # The current solution, contained in following six lines, was + # discussed in issue 17582 and 39011. if "\r" in text: - text = text.replace("\r", "\n") - #The following four lines are issue 17582 + text = text.replace("\r", " ") if "\n" in text: text = text.replace("\n", " ") if "\t" in text: From 4afc662605083655a009d43286773af9ea6581f9 Mon Sep 17 00:00:00 2001 From: mefistotelis Date: Tue, 11 Feb 2020 01:52:38 +0100 Subject: [PATCH 2/2] bpo-39011: White space preservation in attribs, test and news This updates tests for the changes in white space handling. Also ads entry to NEWS and whatsnew. --- Doc/whatsnew/3.9.rst | 9 +++++++++ Lib/test/test_xml_etree.py | 5 +++-- .../Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst | 3 +++ 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index c0404101d7ce2e..aad9a75bc4a4e0 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -311,6 +311,15 @@ customization consistently by always using the value specified by case), and one used ``__VENV_NAME__`` instead. (Contributed by Brett Cannon in :issue:`37663`.) +xml +--- + +White space characters within attributes are now preserved when serializing +:mod:`xml.etree.ElementTree` to XML file. EOLNs are no longer normalized +to "\n". This is the result of discussion about how to interpret +section 2.11 of XML spec. +(Contributed by Mefistotelis in :issue:`39011`.) + Optimizations ============= diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 09c234ca6890a4..afce0ba3e5a1c6 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -430,13 +430,14 @@ def test_attrib(self): self.assertEqual(ET.tostring(elem), b'aa') + # Test preserving white space chars in attributes elem = ET.Element('test') elem.set('a', '\r') elem.set('b', '\r\n') elem.set('c', '\t\n\r ') - elem.set('d', '\n\n') + elem.set('d', '\n\n\r\r\t\t ') self.assertEqual(ET.tostring(elem), - b'') + b'') def test_makeelement(self): # Test makeelement handling. diff --git a/Misc/NEWS.d/next/Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst b/Misc/NEWS.d/next/Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst new file mode 100644 index 00000000000000..43962f0bf17fd2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst @@ -0,0 +1,3 @@ +Normalization of line endings in ElementTree attributes was removed, as line +endings which were replaced by entity numbers should be preserved in +original form.