From 60dcf03752cc2ff601fcce5914236cc2b388e77f Mon Sep 17 00:00:00 2001 From: aldwinaldwin Date: Thu, 11 Jul 2019 10:35:12 +0800 Subject: [PATCH 1/2] bpo-37532: email.header.make_header() doesn't work if any `ascii` code is out of range(128) --- Lib/email/header.py | 9 ++++++++- Lib/test/test_email/test_email.py | 8 ++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Lib/email/header.py b/Lib/email/header.py index 4ab0032bc66123..f186d193577908 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -136,7 +136,14 @@ def decode_header(header): last_word = last_charset = None for word, charset in decoded_words: if isinstance(word, str): - word = bytes(word, 'raw-unicode-escape') + word_tmp = bytes(word, 'raw-unicode-escape') + input_charset = charset or 'us-ascii' + try: + # Test to avoid UnicodeDecodeError in Header.append() + _ = word_tmp.decode(input_charset, errors='strict') + word = word_tmp + except UnicodeDecodeError: + word, charset = word.encode('utf-8'), 'utf-8' if last_word is None: last_word = word last_charset = charset diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index c29cc56203b1f7..b711887c3615a6 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -2396,6 +2396,14 @@ def test_multiline_header(self): self.assertEqual(str(make_header(decode_header(s))), '"Müller T" ') + def test_unicode_decode_error(self): + s = 'Hostel,=?UTF-8?B?UGFuYW3DoSBDaXR5?=, Panamá' + self.assertEqual(decode_header(s), + [(b'Hostel,', None), + (b'Panam\xc3\xa1 City, Panam\xc3\xa1', 'utf-8')]) + self.assertEqual(str(make_header(decode_header(s))), + 'Hostel, Panamá City, Panamá') + # Test the MIMEMessage class class TestMIMEMessage(TestEmailBase): From 37e2c0c49a837d7509266a01d7581f93dde5d052 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 11 Jul 2019 02:50:19 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2019-07-11-02-50-17.bpo-37532.efSn_v.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2019-07-11-02-50-17.bpo-37532.efSn_v.rst diff --git a/Misc/NEWS.d/next/Library/2019-07-11-02-50-17.bpo-37532.efSn_v.rst b/Misc/NEWS.d/next/Library/2019-07-11-02-50-17.bpo-37532.efSn_v.rst new file mode 100644 index 00000000000000..3da640ef513d78 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-07-11-02-50-17.bpo-37532.efSn_v.rst @@ -0,0 +1 @@ +Avoid UnicodeDecodeError in email.header.make_header(). Patch by Aldwin Pollefeyt. \ No newline at end of file