Skip to content

Commit f451f80

Browse files
committed
Updated httplib, urllib, urllib2 to CPython 2.7.13 versions.
1 parent bdd6690 commit f451f80

5 files changed

Lines changed: 244 additions & 31 deletions

File tree

Lib/test/test_httplib.py

Lines changed: 116 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,120 @@ def test_malformed_headers_coped_with(self):
241241
self.assertEqual(resp.getheader('First'), 'val')
242242
self.assertEqual(resp.getheader('Second'), 'val')
243243

244+
def test_malformed_truncation(self):
245+
# Other malformed header lines, especially without colons, used to
246+
# cause the rest of the header section to be truncated
247+
resp = (
248+
b'HTTP/1.1 200 OK\r\n'
249+
b'Public-Key-Pins: \n'
250+
b'pin-sha256="xxx=";\n'
251+
b'report-uri="https://..."\r\n'
252+
b'Transfer-Encoding: chunked\r\n'
253+
b'\r\n'
254+
b'4\r\nbody\r\n0\r\n\r\n'
255+
)
256+
resp = httplib.HTTPResponse(FakeSocket(resp))
257+
resp.begin()
258+
self.assertIsNotNone(resp.getheader('Public-Key-Pins'))
259+
self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
260+
self.assertEqual(resp.read(), b'body')
261+
262+
def test_blank_line_forms(self):
263+
# Test that both CRLF and LF blank lines can terminate the header
264+
# section and start the body
265+
for blank in (b'\r\n', b'\n'):
266+
resp = b'HTTP/1.1 200 OK\r\n' b'Transfer-Encoding: chunked\r\n'
267+
resp += blank
268+
resp += b'4\r\nbody\r\n0\r\n\r\n'
269+
resp = httplib.HTTPResponse(FakeSocket(resp))
270+
resp.begin()
271+
self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
272+
self.assertEqual(resp.read(), b'body')
273+
274+
resp = b'HTTP/1.0 200 OK\r\n' + blank + b'body'
275+
resp = httplib.HTTPResponse(FakeSocket(resp))
276+
resp.begin()
277+
self.assertEqual(resp.read(), b'body')
278+
279+
# A blank line ending in CR is not treated as the end of the HTTP
280+
# header section, therefore header fields following it should be
281+
# parsed if possible
282+
resp = (
283+
b'HTTP/1.1 200 OK\r\n'
284+
b'\r'
285+
b'Name: value\r\n'
286+
b'Transfer-Encoding: chunked\r\n'
287+
b'\r\n'
288+
b'4\r\nbody\r\n0\r\n\r\n'
289+
)
290+
resp = httplib.HTTPResponse(FakeSocket(resp))
291+
resp.begin()
292+
self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
293+
self.assertEqual(resp.read(), b'body')
294+
295+
# No header fields nor blank line
296+
resp = b'HTTP/1.0 200 OK\r\n'
297+
resp = httplib.HTTPResponse(FakeSocket(resp))
298+
resp.begin()
299+
self.assertEqual(resp.read(), b'')
300+
301+
def test_from_line(self):
302+
# The parser handles "From" lines specially, so test this does not
303+
# affect parsing the rest of the header section
304+
resp = (
305+
b'HTTP/1.1 200 OK\r\n'
306+
b'From start\r\n'
307+
b' continued\r\n'
308+
b'Name: value\r\n'
309+
b'From middle\r\n'
310+
b' continued\r\n'
311+
b'Transfer-Encoding: chunked\r\n'
312+
b'From end\r\n'
313+
b'\r\n'
314+
b'4\r\nbody\r\n0\r\n\r\n'
315+
)
316+
resp = httplib.HTTPResponse(FakeSocket(resp))
317+
resp.begin()
318+
self.assertIsNotNone(resp.getheader('Name'))
319+
self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
320+
self.assertEqual(resp.read(), b'body')
321+
322+
resp = (
323+
b'HTTP/1.0 200 OK\r\n'
324+
b'From alone\r\n'
325+
b'\r\n'
326+
b'body'
327+
)
328+
resp = httplib.HTTPResponse(FakeSocket(resp))
329+
resp.begin()
330+
self.assertEqual(resp.read(), b'body')
331+
332+
def test_parse_all_octets(self):
333+
# Ensure no valid header field octet breaks the parser
334+
body = (
335+
b'HTTP/1.1 200 OK\r\n'
336+
b"!#$%&'*+-.^_`|~: value\r\n" # Special token characters
337+
b'VCHAR: ' + bytearray(range(0x21, 0x7E + 1)) + b'\r\n'
338+
b'obs-text: ' + bytearray(range(0x80, 0xFF + 1)) + b'\r\n'
339+
b'obs-fold: text\r\n'
340+
b' folded with space\r\n'
341+
b'\tfolded with tab\r\n'
342+
b'Content-Length: 0\r\n'
343+
b'\r\n'
344+
)
345+
sock = FakeSocket(body)
346+
resp = httplib.HTTPResponse(sock)
347+
resp.begin()
348+
self.assertEqual(resp.getheader('Content-Length'), '0')
349+
self.assertEqual(resp.getheader("!#$%&'*+-.^_`|~"), 'value')
350+
vchar = ''.join(map(chr, range(0x21, 0x7E + 1)))
351+
self.assertEqual(resp.getheader('VCHAR'), vchar)
352+
self.assertIsNotNone(resp.getheader('obs-text'))
353+
folded = resp.getheader('obs-fold')
354+
self.assertTrue(folded.startswith('text'))
355+
self.assertIn(' folded with space', folded)
356+
self.assertTrue(folded.endswith('folded with tab'))
357+
244358
def test_invalid_headers(self):
245359
conn = httplib.HTTPConnection('example.com')
246360
conn.sock = FakeSocket('')
@@ -525,7 +639,7 @@ def test_filenoattr(self):
525639
self.assertTrue(hasattr(resp,'fileno'),
526640
'HTTPResponse should expose a fileno attribute')
527641

528-
# Test lines overflowing the max line size (_MAXLINE in http.client)
642+
# Test lines overflowing the max line size (_MAXLINE in httplib)
529643

530644
def test_overflowing_status_line(self):
531645
self.skipTest("disabled for HTTP 0.9 support")
@@ -624,7 +738,7 @@ def testHTTPConnectionSourceAddress(self):
624738
def testHTTPSConnectionSourceAddress(self):
625739
self.conn = httplib.HTTPSConnection(HOST, self.port,
626740
source_address=('', self.source_port))
627-
# We don't test anything here other the constructor not barfing as
741+
# We don't test anything here other than the constructor not barfing as
628742
# this code doesn't deal with setting up an active running SSL server
629743
# for an ssl_wrapped connect() to actually return from.
630744

Lib/urllib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def __init__(self, proxies=None, context=None, **x509):
142142
self.key_file = x509.get('key_file')
143143
self.cert_file = x509.get('cert_file')
144144
self.context = context
145-
self.addheaders = [('User-Agent', self.version)]
145+
self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
146146
self.__tempfiles = []
147147
self.__unlink = os.unlink # See cleanup()
148148
self.tempcache = None

lib-python/2.7/httplib.py

Lines changed: 10 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@
242242
#
243243
# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
244244

245-
# the patterns for both name and value are more leniant than RFC
245+
# the patterns for both name and value are more lenient than RFC
246246
# definitions to allow for backwards compatibility
247247
_is_legal_header_name = re.compile(r'\A[^:\s][^:\r\n]*\Z').match
248248
_is_illegal_header_value = re.compile(r'\n(?![ \t])|\r(?![ \t\n])').search
@@ -273,9 +273,8 @@ def readheaders(self):
273273
274274
Read header lines up to the entirely blank line that terminates them.
275275
The (normally blank) line that ends the headers is skipped, but not
276-
included in the returned list. If a non-header line ends the headers,
277-
(which is an error), an attempt is made to backspace over it; it is
278-
never included in the returned list.
276+
included in the returned list. If an invalid line is found in the
277+
header section, it is skipped, and further lines are processed.
279278
280279
The variable self.status is set to the empty string if all went well,
281280
otherwise it is an error message. The variable self.headers is a
@@ -302,19 +301,17 @@ def readheaders(self):
302301
self.status = ''
303302
headerseen = ""
304303
firstline = 1
305-
startofline = unread = tell = None
306-
if hasattr(self.fp, 'unread'):
307-
unread = self.fp.unread
308-
elif self.seekable:
304+
tell = None
305+
if not hasattr(self.fp, 'unread') and self.seekable:
309306
tell = self.fp.tell
310307
while True:
311308
if len(hlist) > _MAXHEADERS:
312309
raise HTTPException("got more than %d headers" % _MAXHEADERS)
313310
if tell:
314311
try:
315-
startofline = tell()
312+
tell()
316313
except IOError:
317-
startofline = tell = None
314+
tell = None
318315
self.seekable = 0
319316
line = self.fp.readline(_MAXLINE + 1)
320317
if len(line) > _MAXLINE:
@@ -345,26 +342,14 @@ def readheaders(self):
345342
# It's a legal header line, save it.
346343
hlist.append(line)
347344
self.addheader(headerseen, line[len(headerseen)+1:].strip())
348-
continue
349345
elif headerseen is not None:
350346
# An empty header name. These aren't allowed in HTTP, but it's
351347
# probably a benign mistake. Don't add the header, just keep
352348
# going.
353-
continue
349+
pass
354350
else:
355-
# It's not a header line; throw it back and stop here.
356-
if not self.dict:
357-
self.status = 'No headers'
358-
else:
359-
self.status = 'Non-header line where header expected'
360-
# Try to undo the read.
361-
if unread:
362-
unread(line)
363-
elif tell:
364-
self.fp.seek(startofline)
365-
else:
366-
self.status = self.status + '; bad seek'
367-
break
351+
# It's not a header line; skip it and try the next line.
352+
self.status = 'Non-header line where header expected'
368353

369354
class HTTPResponse:
370355

lib-python/2.7/test/test_httplib.py

Lines changed: 116 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,120 @@ def test_malformed_headers_coped_with(self):
241241
self.assertEqual(resp.getheader('First'), 'val')
242242
self.assertEqual(resp.getheader('Second'), 'val')
243243

244+
def test_malformed_truncation(self):
245+
# Other malformed header lines, especially without colons, used to
246+
# cause the rest of the header section to be truncated
247+
resp = (
248+
b'HTTP/1.1 200 OK\r\n'
249+
b'Public-Key-Pins: \n'
250+
b'pin-sha256="xxx=";\n'
251+
b'report-uri="https://..."\r\n'
252+
b'Transfer-Encoding: chunked\r\n'
253+
b'\r\n'
254+
b'4\r\nbody\r\n0\r\n\r\n'
255+
)
256+
resp = httplib.HTTPResponse(FakeSocket(resp))
257+
resp.begin()
258+
self.assertIsNotNone(resp.getheader('Public-Key-Pins'))
259+
self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
260+
self.assertEqual(resp.read(), b'body')
261+
262+
def test_blank_line_forms(self):
263+
# Test that both CRLF and LF blank lines can terminate the header
264+
# section and start the body
265+
for blank in (b'\r\n', b'\n'):
266+
resp = b'HTTP/1.1 200 OK\r\n' b'Transfer-Encoding: chunked\r\n'
267+
resp += blank
268+
resp += b'4\r\nbody\r\n0\r\n\r\n'
269+
resp = httplib.HTTPResponse(FakeSocket(resp))
270+
resp.begin()
271+
self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
272+
self.assertEqual(resp.read(), b'body')
273+
274+
resp = b'HTTP/1.0 200 OK\r\n' + blank + b'body'
275+
resp = httplib.HTTPResponse(FakeSocket(resp))
276+
resp.begin()
277+
self.assertEqual(resp.read(), b'body')
278+
279+
# A blank line ending in CR is not treated as the end of the HTTP
280+
# header section, therefore header fields following it should be
281+
# parsed if possible
282+
resp = (
283+
b'HTTP/1.1 200 OK\r\n'
284+
b'\r'
285+
b'Name: value\r\n'
286+
b'Transfer-Encoding: chunked\r\n'
287+
b'\r\n'
288+
b'4\r\nbody\r\n0\r\n\r\n'
289+
)
290+
resp = httplib.HTTPResponse(FakeSocket(resp))
291+
resp.begin()
292+
self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
293+
self.assertEqual(resp.read(), b'body')
294+
295+
# No header fields nor blank line
296+
resp = b'HTTP/1.0 200 OK\r\n'
297+
resp = httplib.HTTPResponse(FakeSocket(resp))
298+
resp.begin()
299+
self.assertEqual(resp.read(), b'')
300+
301+
def test_from_line(self):
302+
# The parser handles "From" lines specially, so test this does not
303+
# affect parsing the rest of the header section
304+
resp = (
305+
b'HTTP/1.1 200 OK\r\n'
306+
b'From start\r\n'
307+
b' continued\r\n'
308+
b'Name: value\r\n'
309+
b'From middle\r\n'
310+
b' continued\r\n'
311+
b'Transfer-Encoding: chunked\r\n'
312+
b'From end\r\n'
313+
b'\r\n'
314+
b'4\r\nbody\r\n0\r\n\r\n'
315+
)
316+
resp = httplib.HTTPResponse(FakeSocket(resp))
317+
resp.begin()
318+
self.assertIsNotNone(resp.getheader('Name'))
319+
self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
320+
self.assertEqual(resp.read(), b'body')
321+
322+
resp = (
323+
b'HTTP/1.0 200 OK\r\n'
324+
b'From alone\r\n'
325+
b'\r\n'
326+
b'body'
327+
)
328+
resp = httplib.HTTPResponse(FakeSocket(resp))
329+
resp.begin()
330+
self.assertEqual(resp.read(), b'body')
331+
332+
def test_parse_all_octets(self):
333+
# Ensure no valid header field octet breaks the parser
334+
body = (
335+
b'HTTP/1.1 200 OK\r\n'
336+
b"!#$%&'*+-.^_`|~: value\r\n" # Special token characters
337+
b'VCHAR: ' + bytearray(range(0x21, 0x7E + 1)) + b'\r\n'
338+
b'obs-text: ' + bytearray(range(0x80, 0xFF + 1)) + b'\r\n'
339+
b'obs-fold: text\r\n'
340+
b' folded with space\r\n'
341+
b'\tfolded with tab\r\n'
342+
b'Content-Length: 0\r\n'
343+
b'\r\n'
344+
)
345+
sock = FakeSocket(body)
346+
resp = httplib.HTTPResponse(sock)
347+
resp.begin()
348+
self.assertEqual(resp.getheader('Content-Length'), '0')
349+
self.assertEqual(resp.getheader("!#$%&'*+-.^_`|~"), 'value')
350+
vchar = ''.join(map(chr, range(0x21, 0x7E + 1)))
351+
self.assertEqual(resp.getheader('VCHAR'), vchar)
352+
self.assertIsNotNone(resp.getheader('obs-text'))
353+
folded = resp.getheader('obs-fold')
354+
self.assertTrue(folded.startswith('text'))
355+
self.assertIn(' folded with space', folded)
356+
self.assertTrue(folded.endswith('folded with tab'))
357+
244358
def test_invalid_headers(self):
245359
conn = httplib.HTTPConnection('example.com')
246360
conn.sock = FakeSocket('')
@@ -525,7 +639,7 @@ def test_filenoattr(self):
525639
self.assertTrue(hasattr(resp,'fileno'),
526640
'HTTPResponse should expose a fileno attribute')
527641

528-
# Test lines overflowing the max line size (_MAXLINE in http.client)
642+
# Test lines overflowing the max line size (_MAXLINE in httplib)
529643

530644
def test_overflowing_status_line(self):
531645
self.skipTest("disabled for HTTP 0.9 support")
@@ -624,7 +738,7 @@ def testHTTPConnectionSourceAddress(self):
624738
def testHTTPSConnectionSourceAddress(self):
625739
self.conn = httplib.HTTPSConnection(HOST, self.port,
626740
source_address=('', self.source_port))
627-
# We don't test anything here other the constructor not barfing as
741+
# We don't test anything here other than the constructor not barfing as
628742
# this code doesn't deal with setting up an active running SSL server
629743
# for an ssl_wrapped connect() to actually return from.
630744

lib-python/2.7/urllib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def __init__(self, proxies=None, context=None, **x509):
138138
self.key_file = x509.get('key_file')
139139
self.cert_file = x509.get('cert_file')
140140
self.context = context
141-
self.addheaders = [('User-Agent', self.version)]
141+
self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
142142
self.__tempfiles = []
143143
self.__unlink = os.unlink # See cleanup()
144144
self.tempcache = None

0 commit comments

Comments
 (0)