Skip to content

Commit 833f734

Browse files
authored
Update email library v3.13.11 (#6642)
* Updated the email library + added test suite * Added Windows altzone + Fixed memory error in `bytes_inner.rs`
1 parent c934265 commit 833f734

95 files changed

Lines changed: 17238 additions & 51 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Lib/email/_header_value_parser.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,16 @@
9595
NLSET = {'\n', '\r'}
9696
SPECIALSNL = SPECIALS | NLSET
9797

98+
99+
def make_quoted_pairs(value):
100+
"""Escape dquote and backslash for use within a quoted-string."""
101+
return str(value).replace('\\', '\\\\').replace('"', '\\"')
102+
103+
98104
def quote_string(value):
99-
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
105+
escaped = make_quoted_pairs(value)
106+
return f'"{escaped}"'
107+
100108

101109
# Match a RFC 2047 word, looks like =?utf-8?q?someword?=
102110
rfc2047_matcher = re.compile(r'''
@@ -1012,6 +1020,8 @@ def _get_ptext_to_endchars(value, endchars):
10121020
a flag that is True iff there were any quoted printables decoded.
10131021
10141022
"""
1023+
if not value:
1024+
return '', '', False
10151025
fragment, *remainder = _wsp_splitter(value, 1)
10161026
vchars = []
10171027
escape = False
@@ -1045,7 +1055,7 @@ def get_fws(value):
10451055
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
10461056
return fws, newvalue
10471057

1048-
def get_encoded_word(value):
1058+
def get_encoded_word(value, terminal_type='vtext'):
10491059
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
10501060
10511061
"""
@@ -1084,7 +1094,7 @@ def get_encoded_word(value):
10841094
ew.append(token)
10851095
continue
10861096
chars, *remainder = _wsp_splitter(text, 1)
1087-
vtext = ValueTerminal(chars, 'vtext')
1097+
vtext = ValueTerminal(chars, terminal_type)
10881098
_validate_xtext(vtext)
10891099
ew.append(vtext)
10901100
text = ''.join(remainder)
@@ -1126,7 +1136,7 @@ def get_unstructured(value):
11261136
valid_ew = True
11271137
if value.startswith('=?'):
11281138
try:
1129-
token, value = get_encoded_word(value)
1139+
token, value = get_encoded_word(value, 'utext')
11301140
except _InvalidEwError:
11311141
valid_ew = False
11321142
except errors.HeaderParseError:
@@ -1155,7 +1165,7 @@ def get_unstructured(value):
11551165
# the parser to go in an infinite loop.
11561166
if valid_ew and rfc2047_matcher.search(tok):
11571167
tok, *remainder = value.partition('=?')
1158-
vtext = ValueTerminal(tok, 'vtext')
1168+
vtext = ValueTerminal(tok, 'utext')
11591169
_validate_xtext(vtext)
11601170
unstructured.append(vtext)
11611171
value = ''.join(remainder)
@@ -1565,7 +1575,7 @@ def get_dtext(value):
15651575
def _check_for_early_dl_end(value, domain_literal):
15661576
if value:
15671577
return False
1568-
domain_literal.append(errors.InvalidHeaderDefect(
1578+
domain_literal.defects.append(errors.InvalidHeaderDefect(
15691579
"end of input inside domain-literal"))
15701580
domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
15711581
return True
@@ -1584,9 +1594,9 @@ def get_domain_literal(value):
15841594
raise errors.HeaderParseError("expected '[' at start of domain-literal "
15851595
"but found '{}'".format(value))
15861596
value = value[1:]
1597+
domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
15871598
if _check_for_early_dl_end(value, domain_literal):
15881599
return domain_literal, value
1589-
domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
15901600
if value[0] in WSP:
15911601
token, value = get_fws(value)
15921602
domain_literal.append(token)
@@ -2805,7 +2815,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28052815
continue
28062816
tstr = str(part)
28072817
if not want_encoding:
2808-
if part.token_type == 'ptext':
2818+
if part.token_type in ('ptext', 'vtext'):
28092819
# Encode if tstr contains special characters.
28102820
want_encoding = not SPECIALSNL.isdisjoint(tstr)
28112821
else:
@@ -2905,6 +2915,15 @@ def _refold_parse_tree(parse_tree, *, policy):
29052915
if not hasattr(part, 'encode'):
29062916
# It's not a terminal, try folding the subparts.
29072917
newparts = list(part)
2918+
if part.token_type == 'bare-quoted-string':
2919+
# To fold a quoted string we need to create a list of terminal
2920+
# tokens that will render the leading and trailing quotes
2921+
# and use quoted pairs in the value as appropriate.
2922+
newparts = (
2923+
[ValueTerminal('"', 'ptext')] +
2924+
[ValueTerminal(make_quoted_pairs(p), 'ptext')
2925+
for p in newparts] +
2926+
[ValueTerminal('"', 'ptext')])
29082927
if not part.as_ew_allowed:
29092928
wrap_as_ew_blocked += 1
29102929
newparts.append(end_ew_not_allowed)

Lib/email/_parseaddr.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,9 @@ def _parsedate_tz(data):
146146
return None
147147
# Check for a yy specified in two-digit format, then convert it to the
148148
# appropriate four-digit format, according to the POSIX standard. RFC 822
149-
# calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
150-
# mandates a 4-digit yy. For more information, see the documentation for
149+
# calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) already
150+
# mandated a 4-digit yy, and RFC 5322 (which obsoletes RFC 2822) continues
151+
# this requirement. For more information, see the documentation for
151152
# the time module.
152153
if yy < 100:
153154
# The year is between 1969 and 1999 (inclusive).
@@ -233,9 +234,11 @@ def __init__(self, field):
233234
self.CR = '\r\n'
234235
self.FWS = self.LWS + self.CR
235236
self.atomends = self.specials + self.LWS + self.CR
236-
# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
237-
# is obsolete syntax. RFC 2822 requires that we recognize obsolete
238-
# syntax, so allow dots in phrases.
237+
# Note that RFC 2822 section 4.1 introduced '.' as obs-phrase to handle
238+
# existing practice (periods in display names), even though it was not
239+
# allowed in RFC 822. RFC 5322 section 4.1 (which obsoletes RFC 2822)
240+
# continues this requirement. We must recognize obsolete syntax, so
241+
# allow dots in phrases.
239242
self.phraseends = self.atomends.replace('.', '')
240243
self.field = field
241244
self.commentlist = []

Lib/email/_policybase.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ def _fold(self, name, value, sanitize):
370370
h = value
371371
if h is not None:
372372
# The Header class interprets a value of None for maxlinelen as the
373-
# default value of 78, as recommended by RFC 2822.
373+
# default value of 78, as recommended by RFC 5322 section 2.1.1.
374374
maxlinelen = 0
375375
if self.max_line_length is not None:
376376
maxlinelen = self.max_line_length

Lib/email/contentmanager.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import email.charset
33
import email.message
44
import email.errors
5+
import sys
56
from email import quoprimime
67

78
class ContentManager:
@@ -142,22 +143,23 @@ def _encode_base64(data, max_line_length):
142143

143144

144145
def _encode_text(string, charset, cte, policy):
146+
# If max_line_length is 0 or None, there is no limit.
147+
maxlen = policy.max_line_length or sys.maxsize
145148
lines = string.encode(charset).splitlines()
146149
linesep = policy.linesep.encode('ascii')
147150
def embedded_body(lines): return linesep.join(lines) + linesep
148151
def normal_body(lines): return b'\n'.join(lines) + b'\n'
149152
if cte is None:
150153
# Use heuristics to decide on the "best" encoding.
151-
if max((len(x) for x in lines), default=0) <= policy.max_line_length:
154+
if max(map(len, lines), default=0) <= maxlen:
152155
try:
153156
return '7bit', normal_body(lines).decode('ascii')
154157
except UnicodeDecodeError:
155158
pass
156159
if policy.cte_type == '8bit':
157160
return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
158161
sniff = embedded_body(lines[:10])
159-
sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
160-
policy.max_line_length)
162+
sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), maxlen)
161163
sniff_base64 = binascii.b2a_base64(sniff)
162164
# This is a little unfair to qp; it includes lineseps, base64 doesn't.
163165
if len(sniff_qp) > len(sniff_base64):
@@ -172,9 +174,9 @@ def normal_body(lines): return b'\n'.join(lines) + b'\n'
172174
data = normal_body(lines).decode('ascii', 'surrogateescape')
173175
elif cte == 'quoted-printable':
174176
data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
175-
policy.max_line_length)
177+
maxlen)
176178
elif cte == 'base64':
177-
data = _encode_base64(embedded_body(lines), policy.max_line_length)
179+
data = _encode_base64(embedded_body(lines), maxlen)
178180
else:
179181
raise ValueError("Unknown content transfer encoding {}".format(cte))
180182
return cte, data

Lib/email/feedparser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
NLCRE_bol = re.compile(r'(\r\n|\r|\n)')
3333
NLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z')
3434
NLCRE_crack = re.compile(r'(\r\n|\r|\n)')
35-
# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
35+
# RFC 5322 section 3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
3636
# except controls, SP, and ":".
3737
headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
3838
EMPTYSTRING = ''
@@ -294,7 +294,7 @@ def _parsegen(self):
294294
return
295295
if self._cur.get_content_maintype() == 'message':
296296
# The message claims to be a message/* type, then what follows is
297-
# another RFC 2822 message.
297+
# another RFC 5322 message.
298298
for retval in self._parsegen():
299299
if retval is NeedMoreData:
300300
yield NeedMoreData

Lib/email/generator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *,
5050
expanded to 8 spaces) than maxheaderlen, the header will split as
5151
defined in the Header class. Set maxheaderlen to zero to disable
5252
header wrapping. The default is 78, as recommended (but not required)
53-
by RFC 2822.
53+
by RFC 5322 section 2.1.1.
5454
5555
The policy keyword specifies a policy object that controls a number of
5656
aspects of the generator's operation. If no policy is specified,

Lib/email/header.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,22 @@
5959
def decode_header(header):
6060
"""Decode a message header value without converting charset.
6161
62-
Returns a list of (string, charset) pairs containing each of the decoded
63-
parts of the header. Charset is None for non-encoded parts of the header,
64-
otherwise a lower-case string containing the name of the character set
65-
specified in the encoded string.
62+
For historical reasons, this function may return either:
63+
64+
1. A list of length 1 containing a pair (str, None).
65+
2. A list of (bytes, charset) pairs containing each of the decoded
66+
parts of the header. Charset is None for non-encoded parts of the header,
67+
otherwise a lower-case string containing the name of the character set
68+
specified in the encoded string.
6669
6770
header may be a string that may or may not contain RFC2047 encoded words,
6871
or it may be a Header object.
6972
7073
An email.errors.HeaderParseError may be raised when certain decoding error
7174
occurs (e.g. a base64 decoding exception).
75+
76+
This function exists for backwards compatibility only. For new code, we
77+
recommend using email.headerregistry.HeaderRegistry instead.
7278
"""
7379
# If it is a Header object, we can just return the encoded chunks.
7480
if hasattr(header, '_chunks'):
@@ -161,6 +167,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None,
161167
This function takes one of those sequence of pairs and returns a Header
162168
instance. Optional maxlinelen, header_name, and continuation_ws are as in
163169
the Header constructor.
170+
171+
This function exists for backwards compatibility only, and is not
172+
recommended for use in new code.
164173
"""
165174
h = Header(maxlinelen=maxlinelen, header_name=header_name,
166175
continuation_ws=continuation_ws)

Lib/email/message.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -74,19 +74,25 @@ def _parseparam(s):
7474
# RDM This might be a Header, so for now stringify it.
7575
s = ';' + str(s)
7676
plist = []
77-
while s[:1] == ';':
78-
s = s[1:]
79-
end = s.find(';')
80-
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
81-
end = s.find(';', end + 1)
77+
start = 0
78+
while s.find(';', start) == start:
79+
start += 1
80+
end = s.find(';', start)
81+
ind, diff = start, 0
82+
while end > 0:
83+
diff += s.count('"', ind, end) - s.count('\\"', ind, end)
84+
if diff % 2 == 0:
85+
break
86+
end, ind = ind, s.find(';', end + 1)
8287
if end < 0:
8388
end = len(s)
84-
f = s[:end]
85-
if '=' in f:
86-
i = f.index('=')
87-
f = f[:i].strip().lower() + '=' + f[i+1:].strip()
89+
i = s.find('=', start, end)
90+
if i == -1:
91+
f = s[start:end]
92+
else:
93+
f = s[start:i].rstrip().lower() + '=' + s[i+1:end].lstrip()
8894
plist.append(f.strip())
89-
s = s[end:]
95+
start = end
9096
return plist
9197

9298

@@ -135,7 +141,7 @@ def _decode_uu(encoded):
135141
class Message:
136142
"""Basic message object.
137143
138-
A message object is defined as something that has a bunch of RFC 2822
144+
A message object is defined as something that has a bunch of RFC 5322
139145
headers and a payload. It may optionally have an envelope header
140146
(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
141147
multipart or a message/rfc822), then the payload is a list of Message
@@ -286,8 +292,12 @@ def get_payload(self, i=None, decode=False):
286292
if i is not None and not isinstance(self._payload, list):
287293
raise TypeError('Expected list, got %s' % type(self._payload))
288294
payload = self._payload
289-
# cte might be a Header, so for now stringify it.
290-
cte = str(self.get('content-transfer-encoding', '')).lower()
295+
cte = self.get('content-transfer-encoding', '')
296+
if hasattr(cte, 'cte'):
297+
cte = cte.cte
298+
else:
299+
# cte might be a Header, so for now stringify it.
300+
cte = str(cte).strip().lower()
291301
# payload may be bytes here.
292302
if not decode:
293303
if isinstance(payload, str) and utils._has_surrogates(payload):
@@ -309,6 +319,8 @@ def get_payload(self, i=None, decode=False):
309319
# If it does happen, turn the string into bytes in a way
310320
# guaranteed not to fail.
311321
bpayload = payload.encode('raw-unicode-escape')
322+
else:
323+
bpayload = payload
312324
if cte == 'quoted-printable':
313325
return quopri.decodestring(bpayload)
314326
elif cte == 'base64':
@@ -560,7 +572,7 @@ def add_header(self, _name, _value, **_params):
560572
561573
msg.add_header('content-disposition', 'attachment', filename='bud.gif')
562574
msg.add_header('content-disposition', 'attachment',
563-
filename=('utf-8', '', Fußballer.ppt'))
575+
filename=('utf-8', '', 'Fußballer.ppt'))
564576
msg.add_header('content-disposition', 'attachment',
565577
filename='Fußballer.ppt'))
566578
"""

Lib/email/parser.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
33
# Contact: email-sig@python.org
44

5-
"""A parser of RFC 2822 and MIME email messages."""
5+
"""A parser of RFC 5322 and MIME email messages."""
66

77
__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
88
'FeedParser', 'BytesFeedParser']
@@ -15,14 +15,14 @@
1515

1616
class Parser:
1717
def __init__(self, _class=None, *, policy=compat32):
18-
"""Parser of RFC 2822 and MIME email messages.
18+
"""Parser of RFC 5322 and MIME email messages.
1919
2020
Creates an in-memory object tree representing the email message, which
2121
can then be manipulated and turned over to a Generator to return the
2222
textual representation of the message.
2323
24-
The string must be formatted as a block of RFC 2822 headers and header
25-
continuation lines, optionally preceded by a `Unix-from' header. The
24+
The string must be formatted as a block of RFC 5322 headers and header
25+
continuation lines, optionally preceded by a 'Unix-from' header. The
2626
header block is terminated either by the end of the string or by a
2727
blank line.
2828
@@ -75,14 +75,14 @@ def parsestr(self, text, headersonly=True):
7575
class BytesParser:
7676

7777
def __init__(self, *args, **kw):
78-
"""Parser of binary RFC 2822 and MIME email messages.
78+
"""Parser of binary RFC 5322 and MIME email messages.
7979
8080
Creates an in-memory object tree representing the email message, which
8181
can then be manipulated and turned over to a Generator to return the
8282
textual representation of the message.
8383
84-
The input must be formatted as a block of RFC 2822 headers and header
85-
continuation lines, optionally preceded by a `Unix-from' header. The
84+
The input must be formatted as a block of RFC 5322 headers and header
85+
continuation lines, optionally preceded by a 'Unix-from' header. The
8686
header block is terminated either by the end of the input or by a
8787
blank line.
8888

Lib/email/utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -417,8 +417,14 @@ def decode_params(params):
417417
for name, continuations in rfc2231_params.items():
418418
value = []
419419
extended = False
420-
# Sort by number
421-
continuations.sort()
420+
# Sort by number, treating None as 0 if there is no 0,
421+
# and ignore it if there is already a 0.
422+
has_zero = any(x[0] == 0 for x in continuations)
423+
if has_zero:
424+
continuations = [x for x in continuations if x[0] is not None]
425+
else:
426+
continuations = [(x[0] or 0, x[1], x[2]) for x in continuations]
427+
continuations.sort(key=lambda x: x[0])
422428
# And now append all values in numerical order, converting
423429
# %-encodings for the encoded segments. If any of the
424430
# continuation names ends in a *, then the entire string, after

0 commit comments

Comments
 (0)