RustPython
diff --git a/‎Lib/email/_header_value_parser.py‎
Lines changed: 27 additions & 8 deletions b/‎Lib/email/_header_value_parser.py‎
Lines changed: 27 additions & 8 deletions
diff --git a/‎Lib/email/_parseaddr.py‎
Lines changed: 8 additions & 5 deletions b/‎Lib/email/_parseaddr.py‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎Lib/email/_policybase.py‎
Lines changed: 1 addition & 1 deletion b/‎Lib/email/_policybase.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Lib/email/contentmanager.py‎
Lines changed: 7 additions & 5 deletions b/‎Lib/email/contentmanager.py‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎Lib/email/feedparser.py‎
Lines changed: 2 additions & 2 deletions b/‎Lib/email/feedparser.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Lib/email/generator.py‎
Lines changed: 1 addition & 1 deletion b/‎Lib/email/generator.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Lib/email/header.py‎
Lines changed: 13 additions & 4 deletions b/‎Lib/email/header.py‎
Lines changed: 13 additions & 4 deletions
diff --git a/‎Lib/email/message.py‎
Lines changed: 26 additions & 14 deletions b/‎Lib/email/message.py‎
Lines changed: 26 additions & 14 deletions
diff --git a/‎Lib/email/parser.py‎
Lines changed: 7 additions & 7 deletions b/‎Lib/email/parser.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎Lib/email/utils.py‎
Lines changed: 8 additions & 2 deletions b/‎Lib/email/utils.py‎
Lines changed: 8 additions & 2 deletions
@@ -95,8 +95,16 @@
 NLSET = {'\n', '\r'}
 SPECIALSNL = SPECIALS | NLSET
 
+
+def make_quoted_pairs(value):
+    """Escape dquote and backslash for use within a quoted-string."""
+    return str(value).replace('\\', '\\\\').replace('"', '\\"')
+
+
 def quote_string(value):
-    return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
+    escaped = make_quoted_pairs(value)
+    return f'"{escaped}"'
+
 
 # Match a RFC 2047 word, looks like =?utf-8?q?someword?=
 rfc2047_matcher = re.compile(r'''
@@ -1012,6 +1020,8 @@ def _get_ptext_to_endchars(value, endchars):
     a flag that is True iff there were any quoted printables decoded.
 
     """
+    if not value:
+        return '', '', False
     fragment, *remainder = _wsp_splitter(value, 1)
     vchars = []
     escape = False
@@ -1045,7 +1055,7 @@ def get_fws(value):
     fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
     return fws, newvalue
 
-def get_encoded_word(value):
+def get_encoded_word(value, terminal_type='vtext'):
     """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
 
     """
@@ -1084,7 +1094,7 @@ def get_encoded_word(value):
             ew.append(token)
             continue
         chars, *remainder = _wsp_splitter(text, 1)
-        vtext = ValueTerminal(chars, 'vtext')
+        vtext = ValueTerminal(chars, terminal_type)
         _validate_xtext(vtext)
         ew.append(vtext)
         text = ''.join(remainder)
@@ -1126,7 +1136,7 @@ def get_unstructured(value):
         valid_ew = True
         if value.startswith('=?'):
             try:
-                token, value = get_encoded_word(value)
+                token, value = get_encoded_word(value, 'utext')
             except _InvalidEwError:
                 valid_ew = False
             except errors.HeaderParseError:
@@ -1155,7 +1165,7 @@ def get_unstructured(value):
         # the parser to go in an infinite loop.
         if valid_ew and rfc2047_matcher.search(tok):
             tok, *remainder = value.partition('=?')
-        vtext = ValueTerminal(tok, 'vtext')
+        vtext = ValueTerminal(tok, 'utext')
         _validate_xtext(vtext)
         unstructured.append(vtext)
         value = ''.join(remainder)
@@ -1565,7 +1575,7 @@ def get_dtext(value):
 def _check_for_early_dl_end(value, domain_literal):
     if value:
         return False
-    domain_literal.append(errors.InvalidHeaderDefect(
+    domain_literal.defects.append(errors.InvalidHeaderDefect(
         "end of input inside domain-literal"))
     domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
     return True
@@ -1584,9 +1594,9 @@ def get_domain_literal(value):
         raise errors.HeaderParseError("expected '[' at start of domain-literal "
                 "but found '{}'".format(value))
     value = value[1:]
+    domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
     if _check_for_early_dl_end(value, domain_literal):
         return domain_literal, value
-    domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
     if value[0] in WSP:
         token, value = get_fws(value)
         domain_literal.append(token)
@@ -2805,7 +2815,7 @@ def _refold_parse_tree(parse_tree, *, policy):
             continue
         tstr = str(part)
         if not want_encoding:
-            if part.token_type == 'ptext':
+            if part.token_type in ('ptext', 'vtext'):
                 # Encode if tstr contains special characters.
                 want_encoding = not SPECIALSNL.isdisjoint(tstr)
             else:
@@ -2905,6 +2915,15 @@ def _refold_parse_tree(parse_tree, *, policy):
         if not hasattr(part, 'encode'):
             # It's not a terminal, try folding the subparts.
             newparts = list(part)
+            if part.token_type == 'bare-quoted-string':
+                # To fold a quoted string we need to create a list of terminal
+                # tokens that will render the leading and trailing quotes
+                # and use quoted pairs in the value as appropriate.
+                newparts = (
+                    [ValueTerminal('"', 'ptext')] +
+                    [ValueTerminal(make_quoted_pairs(p), 'ptext')
+                     for p in newparts] +
+                    [ValueTerminal('"', 'ptext')])
             if not part.as_ew_allowed:
                 wrap_as_ew_blocked += 1
                 newparts.append(end_ew_not_allowed)
 
@@ -146,8 +146,9 @@ def _parsedate_tz(data):
         return None
     # Check for a yy specified in two-digit format, then convert it to the
     # appropriate four-digit format, according to the POSIX standard. RFC 822
-    # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
-    # mandates a 4-digit yy. For more information, see the documentation for
+    # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) already
+    # mandated a 4-digit yy, and RFC 5322 (which obsoletes RFC 2822) continues
+    # this requirement. For more information, see the documentation for
     # the time module.
     if yy < 100:
         # The year is between 1969 and 1999 (inclusive).
@@ -233,9 +234,11 @@ def __init__(self, field):
         self.CR = '\r\n'
         self.FWS = self.LWS + self.CR
         self.atomends = self.specials + self.LWS + self.CR
-        # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
-        # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
-        # syntax, so allow dots in phrases.
+        # Note that RFC 2822 section 4.1 introduced '.' as obs-phrase to handle
+        # existing practice (periods in display names), even though it was not
+        # allowed in RFC 822. RFC 5322 section 4.1 (which obsoletes RFC 2822)
+        # continues this requirement. We must recognize obsolete syntax, so
+        # allow dots in phrases.
         self.phraseends = self.atomends.replace('.', '')
         self.field = field
         self.commentlist = []
 
@@ -370,7 +370,7 @@ def _fold(self, name, value, sanitize):
             h = value
         if h is not None:
             # The Header class interprets a value of None for maxlinelen as the
-            # default value of 78, as recommended by RFC 2822.
+            # default value of 78, as recommended by RFC 5322 section 2.1.1.
             maxlinelen = 0
             if self.max_line_length is not None:
                 maxlinelen = self.max_line_length
 
@@ -2,6 +2,7 @@
 import email.charset
 import email.message
 import email.errors
+import sys
 from email import quoprimime
 
 class ContentManager:
@@ -142,22 +143,23 @@ def _encode_base64(data, max_line_length):
 
 
 def _encode_text(string, charset, cte, policy):
+    # If max_line_length is 0 or None, there is no limit.
+    maxlen = policy.max_line_length or sys.maxsize
     lines = string.encode(charset).splitlines()
     linesep = policy.linesep.encode('ascii')
     def embedded_body(lines): return linesep.join(lines) + linesep
     def normal_body(lines): return b'\n'.join(lines) + b'\n'
     if cte is None:
         # Use heuristics to decide on the "best" encoding.
-        if max((len(x) for x in lines), default=0) <= policy.max_line_length:
+        if max(map(len, lines), default=0) <= maxlen:
             try:
                 return '7bit', normal_body(lines).decode('ascii')
             except UnicodeDecodeError:
                 pass
             if policy.cte_type == '8bit':
                 return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
         sniff = embedded_body(lines[:10])
-        sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
-                                          policy.max_line_length)
+        sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), maxlen)
         sniff_base64 = binascii.b2a_base64(sniff)
         # This is a little unfair to qp; it includes lineseps, base64 doesn't.
         if len(sniff_qp) > len(sniff_base64):
@@ -172,9 +174,9 @@ def normal_body(lines): return b'\n'.join(lines) + b'\n'
         data = normal_body(lines).decode('ascii', 'surrogateescape')
     elif cte == 'quoted-printable':
         data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
-                                      policy.max_line_length)
+                                      maxlen)
     elif cte == 'base64':
-        data = _encode_base64(embedded_body(lines), policy.max_line_length)
+        data = _encode_base64(embedded_body(lines), maxlen)
     else:
         raise ValueError("Unknown content transfer encoding {}".format(cte))
     return cte, data
 
@@ -32,7 +32,7 @@
 NLCRE_bol = re.compile(r'(\r\n|\r|\n)')
 NLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z')
 NLCRE_crack = re.compile(r'(\r\n|\r|\n)')
-# RFC 2822 $3.6.8 Optional fields.  ftext is %d33-57 / %d59-126, Any character
+# RFC 5322 section 3.6.8 Optional fields.  ftext is %d33-57 / %d59-126, Any character
 # except controls, SP, and ":".
 headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
 EMPTYSTRING = ''
@@ -294,7 +294,7 @@ def _parsegen(self):
             return
         if self._cur.get_content_maintype() == 'message':
             # The message claims to be a message/* type, then what follows is
-            # another RFC 2822 message.
+            # another RFC 5322 message.
             for retval in self._parsegen():
                 if retval is NeedMoreData:
                     yield NeedMoreData
 
@@ -50,7 +50,7 @@ def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *,
         expanded to 8 spaces) than maxheaderlen, the header will split as
         defined in the Header class.  Set maxheaderlen to zero to disable
         header wrapping.  The default is 78, as recommended (but not required)
-        by RFC 2822.
+        by RFC 5322 section 2.1.1.
 
         The policy keyword specifies a policy object that controls a number of
         aspects of the generator's operation.  If no policy is specified,
 
@@ -59,16 +59,22 @@
 def decode_header(header):
     """Decode a message header value without converting charset.
 
-    Returns a list of (string, charset) pairs containing each of the decoded
-    parts of the header.  Charset is None for non-encoded parts of the header,
-    otherwise a lower-case string containing the name of the character set
-    specified in the encoded string.
+    For historical reasons, this function may return either:
+
+    1. A list of length 1 containing a pair (str, None).
+    2. A list of (bytes, charset) pairs containing each of the decoded
+       parts of the header.  Charset is None for non-encoded parts of the header,
+       otherwise a lower-case string containing the name of the character set
+       specified in the encoded string.
 
     header may be a string that may or may not contain RFC2047 encoded words,
     or it may be a Header object.
 
     An email.errors.HeaderParseError may be raised when certain decoding error
     occurs (e.g. a base64 decoding exception).
+
+    This function exists for backwards compatibility only. For new code, we
+    recommend using email.headerregistry.HeaderRegistry instead.
     """
     # If it is a Header object, we can just return the encoded chunks.
     if hasattr(header, '_chunks'):
@@ -161,6 +167,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None,
     This function takes one of those sequence of pairs and returns a Header
     instance.  Optional maxlinelen, header_name, and continuation_ws are as in
     the Header constructor.
+
+    This function exists for backwards compatibility only, and is not
+    recommended for use in new code.
     """
     h = Header(maxlinelen=maxlinelen, header_name=header_name,
                continuation_ws=continuation_ws)
 
@@ -74,19 +74,25 @@ def _parseparam(s):
     # RDM This might be a Header, so for now stringify it.
     s = ';' + str(s)
     plist = []
-    while s[:1] == ';':
-        s = s[1:]
-        end = s.find(';')
-        while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
-            end = s.find(';', end + 1)
+    start = 0
+    while s.find(';', start) == start:
+        start += 1
+        end = s.find(';', start)
+        ind, diff = start, 0
+        while end > 0:
+            diff += s.count('"', ind, end) - s.count('\\"', ind, end)
+            if diff % 2 == 0:
+                break
+            end, ind = ind, s.find(';', end + 1)
         if end < 0:
             end = len(s)
-        f = s[:end]
-        if '=' in f:
-            i = f.index('=')
-            f = f[:i].strip().lower() + '=' + f[i+1:].strip()
+        i = s.find('=', start, end)
+        if i == -1:
+            f = s[start:end]
+        else:
+            f = s[start:i].rstrip().lower() + '=' + s[i+1:end].lstrip()
         plist.append(f.strip())
-        s = s[end:]
+        start = end
     return plist
 
 
@@ -135,7 +141,7 @@ def _decode_uu(encoded):
 class Message:
     """Basic message object.
 
-    A message object is defined as something that has a bunch of RFC 2822
+    A message object is defined as something that has a bunch of RFC 5322
     headers and a payload.  It may optionally have an envelope header
     (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
     multipart or a message/rfc822), then the payload is a list of Message
@@ -286,8 +292,12 @@ def get_payload(self, i=None, decode=False):
         if i is not None and not isinstance(self._payload, list):
             raise TypeError('Expected list, got %s' % type(self._payload))
         payload = self._payload
-        # cte might be a Header, so for now stringify it.
-        cte = str(self.get('content-transfer-encoding', '')).lower()
+        cte = self.get('content-transfer-encoding', '')
+        if hasattr(cte, 'cte'):
+            cte = cte.cte
+        else:
+            # cte might be a Header, so for now stringify it.
+            cte = str(cte).strip().lower()
         # payload may be bytes here.
         if not decode:
             if isinstance(payload, str) and utils._has_surrogates(payload):
@@ -309,6 +319,8 @@ def get_payload(self, i=None, decode=False):
                 # If it does happen, turn the string into bytes in a way
                 # guaranteed not to fail.
                 bpayload = payload.encode('raw-unicode-escape')
+        else:
+            bpayload = payload
         if cte == 'quoted-printable':
             return quopri.decodestring(bpayload)
         elif cte == 'base64':
@@ -560,7 +572,7 @@ def add_header(self, _name, _value, **_params):
 
         msg.add_header('content-disposition', 'attachment', filename='bud.gif')
         msg.add_header('content-disposition', 'attachment',
-                       filename=('utf-8', '', Fußballer.ppt'))
+                       filename=('utf-8', '', 'Fußballer.ppt'))
         msg.add_header('content-disposition', 'attachment',
                        filename='Fußballer.ppt'))
         """
 
@@ -2,7 +2,7 @@
 # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
 # Contact: email-sig@python.org
 
-"""A parser of RFC 2822 and MIME email messages."""
+"""A parser of RFC 5322 and MIME email messages."""
 
 __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
            'FeedParser', 'BytesFeedParser']
@@ -15,14 +15,14 @@
 
 class Parser:
     def __init__(self, _class=None, *, policy=compat32):
-        """Parser of RFC 2822 and MIME email messages.
+        """Parser of RFC 5322 and MIME email messages.
 
         Creates an in-memory object tree representing the email message, which
         can then be manipulated and turned over to a Generator to return the
         textual representation of the message.
 
-        The string must be formatted as a block of RFC 2822 headers and header
-        continuation lines, optionally preceded by a `Unix-from' header.  The
+        The string must be formatted as a block of RFC 5322 headers and header
+        continuation lines, optionally preceded by a 'Unix-from' header.  The
         header block is terminated either by the end of the string or by a
         blank line.
 
@@ -75,14 +75,14 @@ def parsestr(self, text, headersonly=True):
 class BytesParser:
 
     def __init__(self, *args, **kw):
-        """Parser of binary RFC 2822 and MIME email messages.
+        """Parser of binary RFC 5322 and MIME email messages.
 
         Creates an in-memory object tree representing the email message, which
         can then be manipulated and turned over to a Generator to return the
         textual representation of the message.
 
-        The input must be formatted as a block of RFC 2822 headers and header
-        continuation lines, optionally preceded by a `Unix-from' header.  The
+        The input must be formatted as a block of RFC 5322 headers and header
+        continuation lines, optionally preceded by a 'Unix-from' header.  The
         header block is terminated either by the end of the input or by a
         blank line.
 
 
@@ -417,8 +417,14 @@ def decode_params(params):
         for name, continuations in rfc2231_params.items():
             value = []
             extended = False
-            # Sort by number
-            continuations.sort()
+            # Sort by number, treating None as 0 if there is no 0,
+            # and ignore it if there is already a 0.
+            has_zero = any(x[0] == 0 for x in continuations)
+            if has_zero:
+                continuations = [x for x in continuations if x[0] is not None]
+            else:
+                continuations = [(x[0] or 0, x[1], x[2]) for x in continuations]
+            continuations.sort(key=lambda x: x[0])
             # And now append all values in numerical order, converting
             # %-encodings for the encoded segments.  If any of the
             # continuation names ends in a *, then the entire string, after