From 2d3153fdbec8771799c7154bfb7fdfa1a36d35a7 Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Sun, 12 Aug 2018 00:15:27 -0600 Subject: [PATCH 1/2] bpo-22602: Raise an exception in the UTF-7 decoder for ill-formed sequences starting with "+" The UTF-7 decoder now raises UnicodeDecodeError for ill-formed sequences starting with "+" (as specified in RFC 2152). --- Lib/test/test_unicode.py | 4 ++++ .../next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst | 3 +++ Objects/unicodeobject.c | 5 +++++ 3 files changed, 12 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 3cc018c0cc2caa..fb7bb2d523fe6e 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1630,6 +1630,10 @@ def test_codecs_utf7(self): for c in set_o: self.assertEqual(c.encode('ascii').decode('utf7'), c) + with self.assertRaisesRegex(UnicodeDecodeError, + 'ill-formed sequence'): + b'+@'.decode('utf-7') + def test_codecs_utf8(self): self.assertEqual(''.encode('utf-8'), b'') self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac') diff --git a/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst new file mode 100644 index 00000000000000..5b113e3204c104 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst @@ -0,0 +1,3 @@ +The UTF-7 decoder now raises :exc:`UnicodeDecodeError` for ill-formed +sequences starting with "+" (as specified in RFC 2152). Patch by Zackery +Spytz. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 04fd6d03b464e8..0460d184932ee0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4479,6 +4479,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s, if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0) goto onError; } + else if (s < e && !IS_BASE64(*s)) { + s++; + errmsg = "ill-formed sequence"; + goto utf7Error; + } else { /* begin base64-encoded section */ inShift = 1; surrogate = 0; From 794faa916e8e95f368cc74af4721715c00f2dbff Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Sun, 12 Aug 2018 01:32:08 -0600 Subject: [PATCH 2/2] Add a test for the "replace" error handler. --- Lib/test/test_codecs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index a59a5e21358e7b..86d0dde1705763 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1020,6 +1020,7 @@ def test_errors(self): (b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'), (b'a+IKw-b\xff', 'a\u20acb\ufffd'), (b'a+IKw\xffb', 'a\u20ac\ufffdb'), + (b'a+@b', 'a\ufffdb'), ] for raw, expected in tests: with self.subTest(raw=raw):