Skip to content

Commit 000025e

Browse files
CPython Developersyouknowone
authored andcommitted
Update zipfile from v3.14.3
1 parent 3ec7b5c commit 000025e

File tree

3 files changed

+91
-31
lines changed

3 files changed

+91
-31
lines changed

Lib/zipfile/__init__.py

Lines changed: 88 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,15 @@
3131
except ImportError:
3232
lzma = None
3333

34+
try:
35+
from compression import zstd # We may need its compression method
36+
except ImportError:
37+
zstd = None
38+
3439
__all__ = ["BadZipFile", "BadZipfile", "error",
3540
"ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
36-
"is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
37-
"Path"]
41+
"ZIP_ZSTANDARD", "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile",
42+
"LargeZipFile", "Path"]
3843

3944
class BadZipFile(Exception):
4045
pass
@@ -58,12 +63,14 @@ class LargeZipFile(Exception):
5863
ZIP_DEFLATED = 8
5964
ZIP_BZIP2 = 12
6065
ZIP_LZMA = 14
66+
ZIP_ZSTANDARD = 93
6167
# Other ZIP compression methods not supported
6268

6369
DEFAULT_VERSION = 20
6470
ZIP64_VERSION = 45
6571
BZIP2_VERSION = 46
6672
LZMA_VERSION = 63
73+
ZSTANDARD_VERSION = 63
6774
# we recognize (but not necessarily support) all features up to that version
6875
MAX_EXTRACT_VERSION = 63
6976

@@ -227,8 +234,19 @@ def strip(cls, data, xids):
227234

228235
def _check_zipfile(fp):
229236
try:
230-
if _EndRecData(fp):
231-
return True # file has correct magic number
237+
endrec = _EndRecData(fp)
238+
if endrec:
239+
if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0:
240+
return True # Empty zipfiles are still zipfiles
241+
elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]:
242+
# Central directory is on the same disk
243+
fp.seek(sum(_handle_prepended_data(endrec)))
244+
if endrec[_ECD_SIZE] >= sizeCentralDir:
245+
data = fp.read(sizeCentralDir) # CD is where we expect it to be
246+
if len(data) == sizeCentralDir:
247+
centdir = struct.unpack(structCentralDir, data) # CD is the right size
248+
if centdir[_CD_SIGNATURE] == stringCentralDir:
249+
return True # First central directory entry has correct magic number
232250
except OSError:
233251
pass
234252
return False
@@ -241,14 +259,29 @@ def is_zipfile(filename):
241259
result = False
242260
try:
243261
if hasattr(filename, "read"):
262+
pos = filename.tell()
244263
result = _check_zipfile(fp=filename)
264+
filename.seek(pos)
245265
else:
246266
with open(filename, "rb") as fp:
247267
result = _check_zipfile(fp)
248268
except (OSError, BadZipFile):
249269
pass
250270
return result
251271

272+
def _handle_prepended_data(endrec, debug=0):
273+
size_cd = endrec[_ECD_SIZE] # bytes in central directory
274+
offset_cd = endrec[_ECD_OFFSET] # offset of central directory
275+
276+
# "concat" is zero, unless zip was concatenated to another file
277+
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
278+
279+
if debug > 2:
280+
inferred = concat + offset_cd
281+
print("given, inferred, offset", offset_cd, inferred, concat)
282+
283+
return offset_cd, concat
284+
252285
def _EndRecData64(fpin, offset, endrec):
253286
"""
254287
Read the ZIP64 end-of-archive records and use that to update endrec
@@ -519,6 +552,8 @@ def FileHeader(self, zip64=None):
519552
min_version = max(BZIP2_VERSION, min_version)
520553
elif self.compress_type == ZIP_LZMA:
521554
min_version = max(LZMA_VERSION, min_version)
555+
elif self.compress_type == ZIP_ZSTANDARD:
556+
min_version = max(ZSTANDARD_VERSION, min_version)
522557

523558
self.extract_version = max(min_version, self.extract_version)
524559
self.create_version = max(min_version, self.create_version)
@@ -619,6 +654,28 @@ def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
619654

620655
return zinfo
621656

657+
def _for_archive(self, archive):
658+
"""Resolve suitable defaults from the archive.
659+
660+
Resolve the date_time, compression attributes, and external attributes
661+
to suitable defaults as used by :method:`ZipFile.writestr`.
662+
663+
Return self.
664+
"""
665+
# gh-91279: Set the SOURCE_DATE_EPOCH to a specific timestamp
666+
epoch = os.environ.get('SOURCE_DATE_EPOCH')
667+
get_time = int(epoch) if epoch else time.time()
668+
self.date_time = time.localtime(get_time)[:6]
669+
670+
self.compress_type = archive.compression
671+
self.compress_level = archive.compresslevel
672+
if self.filename.endswith('/'): # pragma: no cover
673+
self.external_attr = 0o40775 << 16 # drwxrwxr-x
674+
self.external_attr |= 0x10 # MS-DOS directory flag
675+
else:
676+
self.external_attr = 0o600 << 16 # ?rw-------
677+
return self
678+
622679
def is_dir(self):
623680
"""Return True if this archive member is a directory."""
624681
if self.filename.endswith('/'):
@@ -758,6 +815,7 @@ def decompress(self, data):
758815
14: 'lzma',
759816
18: 'terse',
760817
19: 'lz77',
818+
93: 'zstd',
761819
97: 'wavpack',
762820
98: 'ppmd',
763821
}
@@ -777,6 +835,10 @@ def _check_compression(compression):
777835
if not lzma:
778836
raise RuntimeError(
779837
"Compression requires the (missing) lzma module")
838+
elif compression == ZIP_ZSTANDARD:
839+
if not zstd:
840+
raise RuntimeError(
841+
"Compression requires the (missing) compression.zstd module")
780842
else:
781843
raise NotImplementedError("That compression method is not supported")
782844

@@ -793,6 +855,8 @@ def _get_compressor(compress_type, compresslevel=None):
793855
# compresslevel is ignored for ZIP_LZMA
794856
elif compress_type == ZIP_LZMA:
795857
return LZMACompressor()
858+
elif compress_type == ZIP_ZSTANDARD:
859+
return zstd.ZstdCompressor(level=compresslevel)
796860
else:
797861
return None
798862

@@ -807,6 +871,8 @@ def _get_decompressor(compress_type):
807871
return bz2.BZ2Decompressor()
808872
elif compress_type == ZIP_LZMA:
809873
return LZMADecompressor()
874+
elif compress_type == ZIP_ZSTANDARD:
875+
return zstd.ZstdDecompressor()
810876
else:
811877
descr = compressor_names.get(compress_type)
812878
if descr:
@@ -1326,7 +1392,8 @@ class ZipFile:
13261392
mode: The mode can be either read 'r', write 'w', exclusive create 'x',
13271393
or append 'a'.
13281394
compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1329-
ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1395+
ZIP_BZIP2 (requires bz2), ZIP_LZMA (requires lzma), or
1396+
ZIP_ZSTANDARD (requires compression.zstd).
13301397
allowZip64: if True ZipFile will create files with ZIP64 extensions when
13311398
needed, otherwise it will raise an exception when this would
13321399
be necessary.
@@ -1335,6 +1402,9 @@ class ZipFile:
13351402
When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
13361403
When using ZIP_DEFLATED integers 0 through 9 are accepted.
13371404
When using ZIP_BZIP2 integers 1 through 9 are accepted.
1405+
When using ZIP_ZSTANDARD integers -7 though 22 are common,
1406+
see the CompressionParameter enum in compression.zstd for
1407+
details.
13381408
13391409
"""
13401410

@@ -1468,21 +1538,17 @@ def _RealGetContents(self):
14681538
raise BadZipFile("File is not a zip file")
14691539
if self.debug > 1:
14701540
print(endrec)
1471-
size_cd = endrec[_ECD_SIZE] # bytes in central directory
1472-
offset_cd = endrec[_ECD_OFFSET] # offset of central directory
14731541
self._comment = endrec[_ECD_COMMENT] # archive comment
14741542

1475-
# "concat" is zero, unless zip was concatenated to another file
1476-
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1543+
offset_cd, concat = _handle_prepended_data(endrec, self.debug)
14771544

1478-
if self.debug > 2:
1479-
inferred = concat + offset_cd
1480-
print("given, inferred, offset", offset_cd, inferred, concat)
14811545
# self.start_dir: Position of start of central directory
14821546
self.start_dir = offset_cd + concat
1547+
14831548
if self.start_dir < 0:
14841549
raise BadZipFile("Bad offset for central directory")
14851550
fp.seek(self.start_dir, 0)
1551+
size_cd = endrec[_ECD_SIZE]
14861552
data = fp.read(size_cd)
14871553
fp = io.BytesIO(data)
14881554
total = 0
@@ -1771,8 +1837,8 @@ def _open_to_write(self, zinfo, force_zip64=False):
17711837
def extract(self, member, path=None, pwd=None):
17721838
"""Extract a member from the archive to the current working directory,
17731839
using its full name. Its file information is extracted as accurately
1774-
as possible. `member' may be a filename or a ZipInfo object. You can
1775-
specify a different directory using `path'. You can specify the
1840+
as possible. 'member' may be a filename or a ZipInfo object. You can
1841+
specify a different directory using 'path'. You can specify the
17761842
password to decrypt the file using 'pwd'.
17771843
"""
17781844
if path is None:
@@ -1784,8 +1850,8 @@ def extract(self, member, path=None, pwd=None):
17841850

17851851
def extractall(self, path=None, members=None, pwd=None):
17861852
"""Extract all members from the archive to the current working
1787-
directory. `path' specifies a different directory to extract to.
1788-
`members' is optional and must be a subset of the list returned
1853+
directory. 'path' specifies a different directory to extract to.
1854+
'members' is optional and must be a subset of the list returned
17891855
by namelist(). You can specify the password to decrypt all files
17901856
using 'pwd'.
17911857
"""
@@ -1929,18 +1995,10 @@ def writestr(self, zinfo_or_arcname, data,
19291995
the name of the file in the archive."""
19301996
if isinstance(data, str):
19311997
data = data.encode("utf-8")
1932-
if not isinstance(zinfo_or_arcname, ZipInfo):
1933-
zinfo = ZipInfo(filename=zinfo_or_arcname,
1934-
date_time=time.localtime(time.time())[:6])
1935-
zinfo.compress_type = self.compression
1936-
zinfo.compress_level = self.compresslevel
1937-
if zinfo.filename.endswith('/'):
1938-
zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1939-
zinfo.external_attr |= 0x10 # MS-DOS directory flag
1940-
else:
1941-
zinfo.external_attr = 0o600 << 16 # ?rw-------
1942-
else:
1998+
if isinstance(zinfo_or_arcname, ZipInfo):
19431999
zinfo = zinfo_or_arcname
2000+
else:
2001+
zinfo = ZipInfo(zinfo_or_arcname)._for_archive(self)
19442002

19452003
if not self.fp:
19462004
raise ValueError(
@@ -2059,6 +2117,8 @@ def _write_end_record(self):
20592117
min_version = max(BZIP2_VERSION, min_version)
20602118
elif zinfo.compress_type == ZIP_LZMA:
20612119
min_version = max(LZMA_VERSION, min_version)
2120+
elif zinfo.compress_type == ZIP_ZSTANDARD:
2121+
min_version = max(ZSTANDARD_VERSION, min_version)
20622122

20632123
extract_version = max(min_version, zinfo.extract_version)
20642124
create_version = max(min_version, zinfo.create_version)
@@ -2301,7 +2361,7 @@ def main(args=None):
23012361
import argparse
23022362

23032363
description = 'A simple command-line interface for zipfile module.'
2304-
parser = argparse.ArgumentParser(description=description)
2364+
parser = argparse.ArgumentParser(description=description, color=True)
23052365
group = parser.add_mutually_exclusive_group(required=True)
23062366
group.add_argument('-l', '--list', metavar='<zipfile>',
23072367
help='Show listing of a zipfile')

Lib/zipfile/_path/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ class Path:
281281
>>> str(path.parent)
282282
'mem'
283283
284-
If the zipfile has no filename, such attributes are not
284+
If the zipfile has no filename, such attributes are not
285285
valid and accessing them will raise an Exception.
286286
287287
>>> zf.filename = None

Lib/zipfile/_path/glob.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ def extend(self, pattern):
3636
Apply '(?s:)' to create a non-matching group that
3737
matches newlines (valid on Unix).
3838
39-
Append '\Z' to imply fullmatch even when match is used.
39+
Append '\z' to imply fullmatch even when match is used.
4040
"""
41-
return rf'(?s:{pattern})\Z'
41+
return rf'(?s:{pattern})\z'
4242

4343
def match_dirs(self, pattern):
4444
"""

0 commit comments

Comments
 (0)