From c8f1615ea03ca8d1fdb00beae7cc66e6ceaf1c80 Mon Sep 17 00:00:00 2001 From: gaweng Date: Thu, 9 Apr 2026 17:38:21 +0200 Subject: [PATCH] gh-139423: Fix plistlib to preserve carriage returns in XML plist round-trips plistlib's _escape() function was normalizing \r\n to \n and \r to \n during XML plist serialization. When the plist was loaded back, the original carriage return characters were lost because expat also normalizes newlines in XML character data. Fix by encoding \r as the XML character reference instead of converting it to \n. Character references are not subject to XML newline normalization, so expat correctly decodes back to \r, preserving the original data during round-trips. --- Lib/plistlib.py | 3 +-- Lib/test/test_plistlib.py | 17 +++++++++++++++-- ...-09-14-30-00.gh-issue-139423.UD0SN_qTKdI.rst | 4 ++++ 3 files changed, 20 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-04-09-14-30-00.gh-issue-139423.UD0SN_qTKdI.rst diff --git a/Lib/plistlib.py b/Lib/plistlib.py index 93f3ef5e38af84..8f3c7404c62f2e 100644 --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -164,11 +164,10 @@ def _escape(text): if m is not None: raise ValueError("strings can't contain control characters; " "use bytes instead") - text = text.replace("\r\n", "\n") # convert DOS line endings - text = text.replace("\r", "\n") # convert Mac line endings text = text.replace("&", "&") # escape '&' text = text.replace("<", "<") # escape '<' text = text.replace(">", ">") # escape '>' + text = text.replace("\r", " ") # preserve CR via character reference return text class _PlistParser: diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py index b9c261310bb567..e697d8fef73ff0 100644 --- a/Lib/test/test_plistlib.py +++ b/Lib/test/test_plistlib.py @@ -818,13 +818,26 @@ def test_controlcharacters(self): if i >= 32 or c in "\r\n\t": # \r, \n and \t are the only legal control chars in XML data = plistlib.dumps(testString, fmt=plistlib.FMT_XML) - if c != "\r": - self.assertEqual(plistlib.loads(data), testString) + self.assertEqual(plistlib.loads(data), testString) else: with self.assertRaises(ValueError): plistlib.dumps(testString, fmt=plistlib.FMT_XML) plistlib.dumps(testString, fmt=plistlib.FMT_BINARY) + def test_cr_newline_roundtrip(self): + # gh-139423: Carriage returns should survive XML plist round-trip. + test_cases = [ + "hello\rworld", # standalone CR + "hello\r\nworld", # CRLF + "a\rb\nc\r\nd", # mixed newlines + "\r", # bare CR + "\r\n", # bare CRLF + ] + for s in test_cases: + with self.subTest(s=s): + data = plistlib.dumps(s, fmt=plistlib.FMT_XML) + self.assertEqual(plistlib.loads(data), s) + def test_non_bmp_characters(self): pl = {'python': '\U0001f40d'} for fmt in ALL_FORMATS: diff --git a/Misc/NEWS.d/next/Library/2026-04-09-14-30-00.gh-issue-139423.UD0SN_qTKdI.rst b/Misc/NEWS.d/next/Library/2026-04-09-14-30-00.gh-issue-139423.UD0SN_qTKdI.rst new file mode 100644 index 00000000000000..3712383fc1b4da --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-09-14-30-00.gh-issue-139423.UD0SN_qTKdI.rst @@ -0,0 +1,4 @@ +Fixed :mod:`plistlib` to preserve carriage return characters (``\r``) during +XML plist round-trips. Previously, ``\r`` and ``\r\n`` were normalized to +``\n`` during serialization, causing data corruption. Carriage returns are now +encoded as `` `` XML character references, which the XML parser preserves.