Skip to content

Commit 3b42c92

Browse files
committed
Fix an issue with Python3 multipart encodings.
As discovered in googleapis/google-cloud-python#1760, we were mangling bytes when encoding them as part of a multipart upload request. The fix is to switch from using `six.StringIO` to `six.BytesIO` in `transfer.py`. The patch here is closely based on googleapis/google-cloud-python#1779.
1 parent d6a3172 commit 3b42c92

2 files changed

Lines changed: 77 additions & 55 deletions

File tree

apitools/base/py/transfer.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -758,20 +758,24 @@ def __ConfigureMultipartRequest(self, http_request):
758758
# NOTE: We encode the body, but can't use
759759
# `email.message.Message.as_string` because it prepends
760760
# `> ` to `From ` lines.
761-
# NOTE: We must use six.StringIO() instead of io.StringIO() since the
762-
# `email` library uses cStringIO in Py2 and io.StringIO in Py3.
763-
fp = six.StringIO()
764-
g = email_generator.Generator(fp, mangle_from_=False)
761+
fp = six.BytesIO()
762+
if six.PY3:
763+
generator_class = email_generator.BytesGenerator
764+
else:
765+
generator_class = email_generator.Generator
766+
g = generator_class(fp, mangle_from_=False)
765767
g.flatten(msg_root, unixfrom=False)
766768
http_request.body = fp.getvalue()
767769

768770
multipart_boundary = msg_root.get_boundary()
769771
http_request.headers['content-type'] = (
770772
'multipart/related; boundary=%r' % multipart_boundary)
773+
if isinstance(multipart_boundary, six.text_type):
774+
multipart_boundary = multipart_boundary.encode('ascii')
771775

772776
body_components = http_request.body.split(multipart_boundary)
773-
headers, _, _ = body_components[-2].partition('\n\n')
774-
body_components[-2] = '\n\n'.join([headers, '<media body>\n\n--'])
777+
headers, _, _ = body_components[-2].partition(b'\n\n')
778+
body_components[-2] = b'\n\n'.join([headers, b'<media body>\n\n--'])
775779
http_request.loggable_body = multipart_boundary.join(body_components)
776780

777781
def __ConfigureResumableRequest(self, http_request):

apitools/base/py/transfer_test.py

Lines changed: 67 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
#
23
# Copyright 2015 Google Inc.
34
#
@@ -199,52 +200,69 @@ def _ReturnBytes(unused_http, http_request,
199200
self.assertEqual(string.ascii_lowercase + string.ascii_uppercase,
200201
download_stream.getvalue())
201202

202-
def testFromEncoding(self):
203-
# Test a specific corner case in multipart encoding.
204-
205-
# Python's mime module by default encodes lines that start with
206-
# "From " as ">From ", which we need to make sure we don't run afoul
207-
# of when sending content that isn't intended to be so encoded. This
208-
# test calls out that we get this right. We test for both the
209-
# multipart and non-multipart case.
210-
multipart_body = '{"body_field_one": 7}'
211-
upload_contents = 'line one\nFrom \nline two'
212-
upload_config = base_api.ApiUploadInfo(
213-
accept=['*/*'],
214-
max_size=None,
215-
resumable_multipart=True,
216-
resumable_path=u'/resumable/upload',
217-
simple_multipart=True,
218-
simple_path=u'/upload',
219-
)
220-
url_builder = base_api._UrlBuilder('http://www.uploads.com')
221-
222-
# Test multipart: having a body argument in http_request forces
223-
# multipart here.
224-
upload = transfer.Upload.FromStream(
225-
six.StringIO(upload_contents),
226-
'text/plain',
227-
total_size=len(upload_contents))
228-
http_request = http_wrapper.Request(
229-
'http://www.uploads.com',
230-
headers={'content-type': 'text/plain'},
231-
body=multipart_body)
232-
upload.ConfigureRequest(upload_config, http_request, url_builder)
233-
self.assertEqual(url_builder.query_params['uploadType'], 'multipart')
234-
rewritten_upload_contents = '\n'.join(
235-
http_request.body.split('--')[2].splitlines()[1:])
236-
self.assertTrue(rewritten_upload_contents.endswith(upload_contents))
237-
238-
# Test non-multipart (aka media): no body argument means this is
239-
# sent as media.
240-
upload = transfer.Upload.FromStream(
241-
six.StringIO(upload_contents),
242-
'text/plain',
243-
total_size=len(upload_contents))
244-
http_request = http_wrapper.Request(
245-
'http://www.uploads.com',
246-
headers={'content-type': 'text/plain'})
247-
upload.ConfigureRequest(upload_config, http_request, url_builder)
248-
self.assertEqual(url_builder.query_params['uploadType'], 'media')
249-
rewritten_upload_contents = http_request.body
250-
self.assertTrue(rewritten_upload_contents.endswith(upload_contents))
203+
def testMultipartEncoding(self):
204+
# This is really a table test for various issues we've seen in
205+
# the past; see notes below for particular histories.
206+
207+
test_cases = [
208+
# Python's mime module by default encodes lines that start
209+
# with "From " as ">From ", which we need to make sure we
210+
# don't run afoul of when sending content that isn't
211+
# intended to be so encoded. This test calls out that we
212+
# get this right. We test for both the multipart and
213+
# non-multipart case.
214+
'line one\nFrom \nline two',
215+
216+
# We had originally used a `six.StringIO` to hold the http
217+
# request body in the case of a multipart upload; for
218+
# bytes being uploaded in Python3, however, this causes
219+
# issues like this:
220+
# https://github.com/GoogleCloudPlatform/gcloud-python/issues/1760
221+
# We test below to ensure that we don't end up mangling
222+
# the body before sending.
223+
u'name,main_ingredient\nRäksmörgås,Räkor\nBaguette,Bröd',
224+
]
225+
226+
for upload_contents in test_cases:
227+
multipart_body = '{"body_field_one": 7}'
228+
upload_bytes = upload_contents.encode('ascii', 'backslashreplace')
229+
upload_config = base_api.ApiUploadInfo(
230+
accept=['*/*'],
231+
max_size=None,
232+
resumable_multipart=True,
233+
resumable_path=u'/resumable/upload',
234+
simple_multipart=True,
235+
simple_path=u'/upload',
236+
)
237+
url_builder = base_api._UrlBuilder('http://www.uploads.com')
238+
239+
# Test multipart: having a body argument in http_request forces
240+
# multipart here.
241+
upload = transfer.Upload.FromStream(
242+
six.BytesIO(upload_bytes),
243+
'text/plain',
244+
total_size=len(upload_bytes))
245+
http_request = http_wrapper.Request(
246+
'http://www.uploads.com',
247+
headers={'content-type': 'text/plain'},
248+
body=multipart_body)
249+
upload.ConfigureRequest(upload_config, http_request, url_builder)
250+
self.assertEqual(
251+
'multipart', url_builder.query_params['uploadType'])
252+
rewritten_upload_contents = b'\n'.join(
253+
http_request.body.split(b'--')[2].splitlines()[1:])
254+
self.assertTrue(rewritten_upload_contents.endswith(upload_bytes))
255+
256+
# Test non-multipart (aka media): no body argument means this is
257+
# sent as media.
258+
upload = transfer.Upload.FromStream(
259+
six.BytesIO(upload_bytes),
260+
'text/plain',
261+
total_size=len(upload_bytes))
262+
http_request = http_wrapper.Request(
263+
'http://www.uploads.com',
264+
headers={'content-type': 'text/plain'})
265+
upload.ConfigureRequest(upload_config, http_request, url_builder)
266+
self.assertEqual(url_builder.query_params['uploadType'], 'media')
267+
rewritten_upload_contents = http_request.body
268+
self.assertTrue(rewritten_upload_contents.endswith(upload_bytes))

0 commit comments

Comments
 (0)