From 6531eb7203af79784cf2fd9d29907485fcd64daa Mon Sep 17 00:00:00 2001 From: Rohit Date: Mon, 15 Jun 2026 20:42:37 +0530 Subject: [PATCH] gh-151307: Bound zipfile reads for forged compressed sizes --- Lib/test/test_zipfile/test_core.py | 66 +++++++++++++++++++ Lib/zipfile/__init__.py | 13 +++- ...-06-15-20-35-00.gh-issue-151307.4Tyv93.rst | 2 + 3 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-15-20-35-00.gh-issue-151307.4Tyv93.rst diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index ffed328b171fda..631515ba3d76ee 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -2632,6 +2632,72 @@ def test_full_overlap_same_name(self): zipf.open(zipf.infolist()[0]).close() self.assertEqual(cm.filename, __file__) + def test_forged_compress_size_read_is_bounded(self): + # The ZIP file contains two central directory entries with the + # same name that point at the same local header. The first + # entry claims an oversized compressed size, but the underlying + # read request should still stay bounded. + filename = b"a.txt" + file_data = b"x" + claimed_size = 0xFFFF_FFFE + local_header = struct.pack( + "<4s2B4HL2L2H", + b"PK\x03\x04", + 20, 0, 0, 0, 0, 0, 0, + len(file_data), len(file_data), + len(filename), 0, + ) + cd_offset = len(local_header) + len(filename) + len(file_data) + cd_entry_forged = struct.pack( + "<4s4B4HL2L5H2L", + b"PK\x01\x02", + 20, 0, 20, 0, + 0, 0, 0, 0, 0, + claimed_size, claimed_size, + len(filename), + 0, 0, 0, 0, 0, + 0, + ) + cd_entry_normal = struct.pack( + "<4s4B4HL2L5H2L", + b"PK\x01\x02", + 20, 0, 20, 0, + 0, 0, 0, 0, 0, + len(file_data), len(file_data), + len(filename), + 0, 0, 0, 0, 0, + 0, + ) + central_directory = ( + cd_entry_forged + filename + + cd_entry_normal + filename + ) + end_record = struct.pack( + zipfile.structEndArchive, + b"PK\x05\x06", + 0, 0, 2, 2, + len(central_directory), cd_offset, 0, + ) + data = local_header + filename + file_data + central_directory + end_record + + class ObservedBytesIO(io.BytesIO): + def __init__(self, data): + super().__init__(data) + self.read_sizes = [] + + def read(self, n=-1): + self.read_sizes.append(n) + return super().read(n) + + fileobj = ObservedBytesIO(data) + with zipfile.ZipFile(fileobj, "r") as zipf: + forged = zipf.filelist[0] + with self.assertWarnsRegex(UserWarning, "Overlapped entries"): + with self.assertRaises(EOFError): + zipf.open(forged).read() + + self.assertLessEqual(max(fileobj.read_sizes), len(data)) + @requires_zlib() def test_quoted_overlap(self): # The ZIP file contains two files. The second local header diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 005b6f4eb84044..eec74026484abf 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -962,6 +962,10 @@ class ZipExtFile(io.BufferedIOBase): # Read from compressed files in 4k blocks. MIN_READ_SIZE = 4096 + # Max chunk size to read from the underlying file object when its + # remaining size is unknown. + MAX_READ_SIZE = 1 << 24 + # Chunk size to read during seek MAX_SEEK_READ = 1 << 24 @@ -1000,6 +1004,11 @@ def __init__(self, fileobj, mode, zipinfo, pwd=None, self._orig_file_size = zipinfo.file_size self._orig_start_crc = self._running_crc self._orig_crc = self._expected_crc + try: + fileobj.seek(0, os.SEEK_END) + self._compress_end = fileobj.tell() + finally: + fileobj.seek(self._orig_compress_start) self._seekable = True except AttributeError: pass @@ -1199,7 +1208,9 @@ def _read2(self, n): return b'' n = max(n, self.MIN_READ_SIZE) - n = min(n, self._compress_left) + n = min(n, self._compress_left, self.MAX_READ_SIZE) + if self._seekable: + n = min(n, max(0, self._compress_end - self._fileobj.tell())) data = self._fileobj.read(n) self._compress_left -= len(data) diff --git a/Misc/NEWS.d/next/Library/2026-06-15-20-35-00.gh-issue-151307.4Tyv93.rst b/Misc/NEWS.d/next/Library/2026-06-15-20-35-00.gh-issue-151307.4Tyv93.rst new file mode 100644 index 00000000000000..139ba56f9c873f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-15-20-35-00.gh-issue-151307.4Tyv93.rst @@ -0,0 +1,2 @@ +Bound :mod:`zipfile` member reads so a forged compressed size cannot make +``ZipExtFile`` request multi-gigabyte reads from the underlying file object.