Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
gh-121284: Fix email address header folding with parsed encoded-word
Email generators using email.policy.default may convert an RFC 2047
encoded-word to unencoded form during header refolding. In a structured
header, this could allow 'specials' chars outside a quoted-string,
leading to invalid address headers and enabling spoofing. This change
ensures a parsed encoded-word that contains specials is kept as an
encoded-word while the header is refolded.
  • Loading branch information
medmunds committed Aug 6, 2024
commit c540c9f6a5e1d7ebe14e2a4b737e8b1e76a7bb1b
11 changes: 9 additions & 2 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2829,6 +2829,13 @@ def _refold_parse_tree(parse_tree, *, policy):
_fold_mime_parameters(part, lines, maxlen, encoding)
continue

allow_refolding_subparts = True
if part.token_type == 'encoded-word':
# A parsed encoded-word containing specials must remain encoded,
# to keep specials from sneaking into a structured header unquoted.
# (The encoded-word can be split for folding.)
allow_refolding_subparts = SPECIALSNL.isdisjoint(tstr)

if want_encoding and not wrap_as_ew_blocked:
if not part.as_ew_allowed:
want_encoding = False
Expand All @@ -2848,7 +2855,7 @@ def _refold_parse_tree(parse_tree, *, policy):
# want it on a line by itself even if it fits, or it
# doesn't fit on a line by itself. Either way, fall through
# to unpacking the subparts and wrapping them.
if not hasattr(part, 'encode'):
if allow_refolding_subparts and not hasattr(part, 'encode'):
# It's not a Terminal, do each piece individually.
parts = list(part) + parts
want_encoding = False
Expand Down Expand Up @@ -2902,7 +2909,7 @@ def _refold_parse_tree(parse_tree, *, policy):
leading_whitespace = ''.join(whitespace_accumulator)
last_ew = None
continue
if not hasattr(part, 'encode'):
if allow_refolding_subparts and not hasattr(part, 'encode'):
# It's not a terminal, try folding the subparts.
newparts = list(part)
if not part.as_ew_allowed:
Expand Down
25 changes: 25 additions & 0 deletions Lib/test/test_email/test__header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3076,6 +3076,31 @@ def test_address_list_with_unicode_names_in_quotes(self):
'=?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>,\n'
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <biter@example.com>\n')

def test_address_list_with_specials_in_encoded_word(self):
# An encoded-word parsed from a structured header must remain
# encoded when it contains specials. Regression for gh-121284.
policy = self.policy.clone(max_line_length=40)
cases = [
# (to, folded)
('=?utf-8?q?A_v=C3=A9ry_long_name_with=2C_comma?= <to@example.com>',
'=?utf-8?q?A_v=C3=A9ry_long_name_with?=\n'
' =?utf-8?q?=2C_comma?= <to@example.com>\n'),
('=?utf-8?q?This_long_name_does_not_need_encoded=2Dword?= <to@example.com>',
'This long name does not need\n'
' encoded-word <to@example.com>\n'),
('"A véry long name with, comma" <to@example.com>',
# (This isn't the best fold point, but it's not invalid.)
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
('"A véry long name containing a, comma" <to@example.com>',
'A =?utf-8?q?v=C3=A9ry?= long name\n'
' containing =?utf-8?q?a=2C?= comma\n'
' <to@example.com>\n'),
]
for (to, folded) in cases:
with self.subTest(to=to):
self._test(parser.get_address_list(to)[0], folded, policy=policy)

def test_address_list_with_list_separator_after_fold(self):
a = 'x' * 66 + '@example.com'
to = f'{a}, "Hübsch Kaktus" <beautiful@example.com>'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix a problem where email.policy.default header refolding could incorrectly
convert an RFC 2047 encoded-word containing commas or other special
characters to unencoded, unquoted text, enabling sender or recipient
spoofing via a carefully crafted display-name.