From f626fff92a4b6f1d79d8f959d8c94d258886862b Mon Sep 17 00:00:00 2001
From: Pierre Quentel <pierre.quentel@orange.com>
Date: Tue, 4 Apr 2017 15:55:29 +0200
Subject: [PATCH 1/5] Rewrite parse_multipart to make it consistent with
 FieldStorage

---
 Doc/library/cgi.rst  |  27 +++++------
 Lib/cgi.py           | 107 +++++++------------------------------------
 Lib/test/test_cgi.py |   4 +-
 3 files changed, 33 insertions(+), 105 deletions(-)

diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
index 41219eeaabae08..93363d353922ae 100644
--- a/Doc/library/cgi.rst
+++ b/Doc/library/cgi.rst
@@ -294,21 +294,22 @@ algorithms implemented in this module in other circumstances.
    This function is deprecated in this module. Use :func:`urllib.parse.parse_qsl`
    instead. It is maintained here only for backward compatibility.
 
-.. function:: parse_multipart(fp, pdict)
+.. function:: parse_multipart(fp, pdict, encoding="utf-8")
 
    Parse input of type :mimetype:`multipart/form-data` (for  file uploads).
-   Arguments are *fp* for the input file and *pdict* for a dictionary containing
-   other parameters in the :mailheader:`Content-Type` header.
-
-   Returns a dictionary just like :func:`urllib.parse.parse_qs` keys are the field names, each
-   value is a list of values for that field.  This is easy to use but not much good
-   if you are expecting megabytes to be uploaded --- in that case, use the
-   :class:`FieldStorage` class instead which is much more flexible.
-
-   Note that this does not parse nested multipart parts --- use
-   :class:`FieldStorage` for that.
-
-
+   Arguments are *fp* for the input file, *pdict* for a dictionary containing
+   other parameters in the :mailheader:`Content-Type` header, and *encoding*,
+   the request encoding.
+
+   Returns a dictionary just like :func:`urllib.parse.parse_qs`: keys are the
+   field names, each value is a list of values for that field. For non-file
+   fields, the value is a list of strings.
+   
+   This is easy to use but not much good if you are expecting megabytes to be
+   uploaded --- in that case, use the :class:`FieldStorage` class instead
+   which is much more flexible.
+
+   
 .. function:: parse_header(string)
 
    Parse a MIME header (such as :mailheader:`Content-Type`) into a main value and a
diff --git a/Lib/cgi.py b/Lib/cgi.py
index 233a496e8170ab..0655336c861d99 100755
--- a/Lib/cgi.py
+++ b/Lib/cgi.py
@@ -198,105 +198,32 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
          DeprecationWarning, 2)
     return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
 
-def parse_multipart(fp, pdict):
+def parse_multipart(fp, pdict, encoding="utf-8"):
     """Parse multipart input.
 
     Arguments:
     fp   : input file
     pdict: dictionary containing other parameters of content-type header
+    encoding: request encoding
 
     Returns a dictionary just like parse_qs(): keys are the field names, each
-    value is a list of values for that field.  This is easy to use but not
-    much good if you are expecting megabytes to be uploaded -- in that case,
-    use the FieldStorage class instead which is much more flexible.  Note
-    that content-type is the raw, unparsed contents of the content-type
-    header.
-
-    XXX This does not parse nested multipart parts -- use FieldStorage for
-    that.
-
-    XXX This should really be subsumed by FieldStorage altogether -- no
-    point in having two implementations of the same parsing algorithm.
-    Also, FieldStorage protects itself better against certain DoS attacks
-    by limiting the size of the data read in one chunk.  The API here
-    does not support that kind of protection.  This also affects parse()
-    since it can call parse_multipart().
+    value is a list of values for that field. For non-file fields, the value
+    is a list of strings.
+    
+    This is easy to use but not much good if you are expecting megabytes to be
+    uploaded -- in that case, use the FieldStorage class instead which is much
+    more flexible.  Note that content-type is the raw, unparsed contents of
+    the content-type header.
 
     """
-    import http.client
-
-    boundary = b""
-    if 'boundary' in pdict:
-        boundary = pdict['boundary']
-    if not valid_boundary(boundary):
-        raise ValueError('Invalid boundary in multipart form: %r'
-                            % (boundary,))
-
-    nextpart = b"--" + boundary
-    lastpart = b"--" + boundary + b"--"
-    partdict = {}
-    terminator = b""
-
-    while terminator != lastpart:
-        bytes = -1
-        data = None
-        if terminator:
-            # At start of next part.  Read headers first.
-            headers = http.client.parse_headers(fp)
-            clength = headers.get('content-length')
-            if clength:
-                try:
-                    bytes = int(clength)
-                except ValueError:
-                    pass
-            if bytes > 0:
-                if maxlen and bytes > maxlen:
-                    raise ValueError('Maximum content length exceeded')
-                data = fp.read(bytes)
-            else:
-                data = b""
-        # Read lines until end of part.
-        lines = []
-        while 1:
-            line = fp.readline()
-            if not line:
-                terminator = lastpart # End outer loop
-                break
-            if line.startswith(b"--"):
-                terminator = line.rstrip()
-                if terminator in (nextpart, lastpart):
-                    break
-            lines.append(line)
-        # Done with part.
-        if data is None:
-            continue
-        if bytes < 0:
-            if lines:
-                # Strip final line terminator
-                line = lines[-1]
-                if line[-2:] == b"\r\n":
-                    line = line[:-2]
-                elif line[-1:] == b"\n":
-                    line = line[:-1]
-                lines[-1] = line
-                data = b"".join(lines)
-        line = headers['content-disposition']
-        if not line:
-            continue
-        key, params = parse_header(line)
-        if key != 'form-data':
-            continue
-        if 'name' in params:
-            name = params['name']
-        else:
-            continue
-        if name in partdict:
-            partdict[name].append(data)
-        else:
-            partdict[name] = [data]
-
-    return partdict
-
+    boundary = pdict['boundary'].decode('ascii') # cf. RFC 2046
+    ctype = "multipart/form-data; boundary={}".format(boundary)
+    headers = Message()
+    headers.set_type(ctype)
+    headers['Content-Length'] = pdict['CONTENT-LENGTH']
+    fs = FieldStorage(fp, headers=headers, encoding=encoding,
+        environ={'REQUEST_METHOD': 'POST'})
+    return {k: fs.getlist(k) for k in fs}
 
 def _parseparam(s):
     while s[:1] == ';':
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
index 637322137d639f..903d0731f977fd 100644
--- a/Lib/test/test_cgi.py
+++ b/Lib/test/test_cgi.py
@@ -126,8 +126,8 @@ def test_parse_multipart(self):
         env = {'boundary': BOUNDARY.encode('latin1'),
                'CONTENT-LENGTH': '558'}
         result = cgi.parse_multipart(fp, env)
-        expected = {'submit': [b' Add '], 'id': [b'1234'],
-                    'file': [b'Testing 123.\n'], 'title': [b'']}
+        expected = {'submit': [' Add '], 'id': ['1234'],
+                    'file': [b'Testing 123.\n'], 'title': ['']}
         self.assertEqual(result, expected)
 
     def test_fieldstorage_properties(self):

From 77585f7b488d23aeb17f2ce65ee8278540ae9ff5 Mon Sep 17 00:00:00 2001
From: Pierre Quentel <pierre.quentel@orange.com>
Date: Tue, 4 Apr 2017 16:09:53 +0200
Subject: [PATCH 2/5] Update whatsnew and NEWS

---
 Doc/whatsnew/3.7.rst | 8 ++++++++
 Misc/NEWS            | 4 ++++
 2 files changed, 12 insertions(+)

diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index 12f65ff9a59423..1219567ae81f05 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -95,6 +95,14 @@ New Modules
 Improved Modules
 ================
 
+cgi
+---
+
+:func:`~cgi.parse_multipart` returns the same results as 
+:class:`~FieldStorage` : for non-file fields, the value associated to a key
+is a list of strings, not bytes.
+(Contributed by Pierre Quentel in :issue:`29979`.)
+
 http.server
 -----------
 
diff --git a/Misc/NEWS b/Misc/NEWS
index 95092afa0b38f2..310786cb5ef784 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -303,6 +303,10 @@ Extension Modules
 Library
 -------
 
+- bpo-29979: rewrite cgi.parse_multipart, reusing the FieldStorage class and
+  making its results consistent with those of FieldStorage for
+  multipart/form-data requests. Patch by Pierre Quentel.
+  
 - bpo-29649: Improve struct.pack_into() exception messages for problems
   with the buffer size and offset.  Patch by Andrew Nester.
 

From 7b398f1c679c0de91839d9c3a905a8a70bd2333a Mon Sep 17 00:00:00 2001
From: Pierre Quentel <pierre.quentel@orange.com>
Date: Fri, 14 Apr 2017 13:52:37 +0200
Subject: [PATCH 3/5] Remove mention of FieldStorage in docstring + more
 explicit comment on boundary encoding

---
 Lib/cgi.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/Lib/cgi.py b/Lib/cgi.py
index 0655336c861d99..d3334a433bc735 100755
--- a/Lib/cgi.py
+++ b/Lib/cgi.py
@@ -209,14 +209,10 @@ def parse_multipart(fp, pdict, encoding="utf-8"):
     Returns a dictionary just like parse_qs(): keys are the field names, each
     value is a list of values for that field. For non-file fields, the value
     is a list of strings.
-    
-    This is easy to use but not much good if you are expecting megabytes to be
-    uploaded -- in that case, use the FieldStorage class instead which is much
-    more flexible.  Note that content-type is the raw, unparsed contents of
-    the content-type header.
-
     """
-    boundary = pdict['boundary'].decode('ascii') # cf. RFC 2046
+    # RFC 2026, Section 5.1 : The "multipart" boundary delimiters are always
+    # represented as 7bit US-ASCII.
+    boundary = pdict['boundary'].decode('ascii')
     ctype = "multipart/form-data; boundary={}".format(boundary)
     headers = Message()
     headers.set_type(ctype)

From e9d4f3e6695101ffc810386519d23adce2c1291f Mon Sep 17 00:00:00 2001
From: Pierre Quentel <pierre.quentel@gmail.com>
Date: Wed, 3 May 2017 15:58:49 +0200
Subject: [PATCH 4/5] Update cgi.rst

Remove trailing whitespaces
---
 Doc/library/cgi.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
index 93363d353922ae..b60e1cc41e4be0 100644
--- a/Doc/library/cgi.rst
+++ b/Doc/library/cgi.rst
@@ -304,12 +304,12 @@ algorithms implemented in this module in other circumstances.
    Returns a dictionary just like :func:`urllib.parse.parse_qs`: keys are the
    field names, each value is a list of values for that field. For non-file
    fields, the value is a list of strings.
-   
+
    This is easy to use but not much good if you are expecting megabytes to be
    uploaded --- in that case, use the :class:`FieldStorage` class instead
    which is much more flexible.
 
-   
+
 .. function:: parse_header(string)
 
    Parse a MIME header (such as :mailheader:`Content-Type`) into a main value and a

From c6818e52d95ce0a2b089a6e2bd3f22b97c7b266a Mon Sep 17 00:00:00 2001
From: Pierre Quentel <pierre.quentel@gmail.com>
Date: Wed, 3 May 2017 15:59:44 +0200
Subject: [PATCH 5/5] Update 3.7.rst

Remove trailing whitespace
---
 Doc/whatsnew/3.7.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index 2e7304a4250e3a..b2dc995dced122 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -98,10 +98,11 @@ Improved Modules
 cgi
 ---
 
-:func:`~cgi.parse_multipart` returns the same results as 
+:func:`~cgi.parse_multipart` returns the same results as
 :class:`~FieldStorage` : for non-file fields, the value associated to a key
 is a list of strings, not bytes.
 (Contributed by Pierre Quentel in :issue:`29979`.)
+
 binascii
 --------