Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion Lib/gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,21 @@ def _read_until_null(fp, crc=None):
'''Read until the first encountered null byte in fp.
If crc is not None, update and return the CRC.
'''
if crc is None:
if isinstance(fp, _PaddedFile):
size = 1
while True:
s = fp.read(size)
if not s:
break
i = s.find(0) + 1
if crc is not None:
crc = zlib.crc32(s[:i] if i else s, crc)
if i:
fp.prepend(s[i:])
break
if size < 2**20:
size *= 2
elif crc is None:
while True:
s = fp.read(1)
if not s or s == b'\000':
Expand Down
42 changes: 42 additions & 0 deletions Lib/test/test_gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import sys
import unittest
from subprocess import PIPE, Popen
from test import support
from test.support import catch_unraisable_exception
from test.support import force_not_colorized_test_class, import_helper
from test.support import os_helper
Expand Down Expand Up @@ -824,6 +825,47 @@ def test_corrupted_gzip_header(self):
f"Corrupted gzip header. Checksums do not "
f"match: {true_crc:04x} != {corrupted_crc:04x}")

def _test_long_header(self, flags):
with_crc = flags & 0x02
prefix = b'\x1f\x8b\x08' + bytes([flags]) + b'\x00\x00\x00\x00\x00\xff'
suffix = b'\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00'
with open(self.filename, 'wb') as f:
f.write(prefix)
if with_crc:
crc = zlib.crc32(prefix)
block = b'ABCDEFGHIJKLMNOP' * 2**16 # 1 MiB
for i in range(1024):
f.write(block)
if with_crc:
crc = zlib.crc32(block, crc)
f.write(b'\x00')
if with_crc:
crc = zlib.crc32(b'\x00', crc)
f.write(struct.pack("<H", crc & 0xFFFF))
f.write(suffix)
with gzip.GzipFile(self.filename, 'rb') as f:
f.read(1)

@support.requires_resource('largefile')
@support.requires_resource('cpu')
def test_long_filename(self):
self._test_long_header(0x08) # FNAME

@support.requires_resource('largefile')
@support.requires_resource('cpu')
def test_long_filename_with_crc(self):
self._test_long_header(0x0a) # FNAME | FHCRC

@support.requires_resource('largefile')
@support.requires_resource('cpu')
def test_long_comment(self):
self._test_long_header(0x10) # FCOMMENT

@support.requires_resource('largefile')
@support.requires_resource('cpu')
def test_long_comment_with_crc(self):
self._test_long_header(0x12) # FCOMMENT | FHCRC

def test_read_truncated(self):
data = data1*50
# Drop the CRC (4 bytes) and file size (4 bytes).
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Sped up reading :mod:`gzip` files with long embedded filename or comment.
Loading