3131except ImportError :
3232 lzma = None
3333
34+ try :
35+ from compression import zstd # We may need its compression method
36+ except ImportError :
37+ zstd = None
38+
3439__all__ = ["BadZipFile" , "BadZipfile" , "error" ,
3540 "ZIP_STORED" , "ZIP_DEFLATED" , "ZIP_BZIP2" , "ZIP_LZMA" ,
36- "is_zipfile " , "ZipInfo " , "ZipFile " , "PyZipFile " , "LargeZipFile " ,
37- "Path" ]
41+ "ZIP_ZSTANDARD " , "is_zipfile " , "ZipInfo " , "ZipFile " , "PyZipFile " ,
42+ "LargeZipFile" , " Path" ]
3843
3944class BadZipFile (Exception ):
4045 pass
@@ -58,12 +63,14 @@ class LargeZipFile(Exception):
5863ZIP_DEFLATED = 8
5964ZIP_BZIP2 = 12
6065ZIP_LZMA = 14
66+ ZIP_ZSTANDARD = 93
6167# Other ZIP compression methods not supported
6268
6369DEFAULT_VERSION = 20
6470ZIP64_VERSION = 45
6571BZIP2_VERSION = 46
6672LZMA_VERSION = 63
73+ ZSTANDARD_VERSION = 63
6774# we recognize (but not necessarily support) all features up to that version
6875MAX_EXTRACT_VERSION = 63
6976
@@ -227,8 +234,19 @@ def strip(cls, data, xids):
227234
228235def _check_zipfile (fp ):
229236 try :
230- if _EndRecData (fp ):
231- return True # file has correct magic number
237+ endrec = _EndRecData (fp )
238+ if endrec :
239+ if endrec [_ECD_ENTRIES_TOTAL ] == 0 and endrec [_ECD_SIZE ] == 0 and endrec [_ECD_OFFSET ] == 0 :
240+ return True # Empty zipfiles are still zipfiles
241+ elif endrec [_ECD_DISK_NUMBER ] == endrec [_ECD_DISK_START ]:
242+ # Central directory is on the same disk
243+ fp .seek (sum (_handle_prepended_data (endrec )))
244+ if endrec [_ECD_SIZE ] >= sizeCentralDir :
245+ data = fp .read (sizeCentralDir ) # CD is where we expect it to be
246+ if len (data ) == sizeCentralDir :
247+ centdir = struct .unpack (structCentralDir , data ) # CD is the right size
248+ if centdir [_CD_SIGNATURE ] == stringCentralDir :
249+ return True # First central directory entry has correct magic number
232250 except OSError :
233251 pass
234252 return False
@@ -241,14 +259,29 @@ def is_zipfile(filename):
241259 result = False
242260 try :
243261 if hasattr (filename , "read" ):
262+ pos = filename .tell ()
244263 result = _check_zipfile (fp = filename )
264+ filename .seek (pos )
245265 else :
246266 with open (filename , "rb" ) as fp :
247267 result = _check_zipfile (fp )
248268 except (OSError , BadZipFile ):
249269 pass
250270 return result
251271
272+ def _handle_prepended_data (endrec , debug = 0 ):
273+ size_cd = endrec [_ECD_SIZE ] # bytes in central directory
274+ offset_cd = endrec [_ECD_OFFSET ] # offset of central directory
275+
276+ # "concat" is zero, unless zip was concatenated to another file
277+ concat = endrec [_ECD_LOCATION ] - size_cd - offset_cd
278+
279+ if debug > 2 :
280+ inferred = concat + offset_cd
281+ print ("given, inferred, offset" , offset_cd , inferred , concat )
282+
283+ return offset_cd , concat
284+
252285def _EndRecData64 (fpin , offset , endrec ):
253286 """
254287 Read the ZIP64 end-of-archive records and use that to update endrec
@@ -519,6 +552,8 @@ def FileHeader(self, zip64=None):
519552 min_version = max (BZIP2_VERSION , min_version )
520553 elif self .compress_type == ZIP_LZMA :
521554 min_version = max (LZMA_VERSION , min_version )
555+ elif self .compress_type == ZIP_ZSTANDARD :
556+ min_version = max (ZSTANDARD_VERSION , min_version )
522557
523558 self .extract_version = max (min_version , self .extract_version )
524559 self .create_version = max (min_version , self .create_version )
@@ -619,6 +654,28 @@ def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
619654
620655 return zinfo
621656
657+ def _for_archive (self , archive ):
658+ """Resolve suitable defaults from the archive.
659+
660+ Resolve the date_time, compression attributes, and external attributes
661+ to suitable defaults as used by :method:`ZipFile.writestr`.
662+
663+ Return self.
664+ """
665+ # gh-91279: Set the SOURCE_DATE_EPOCH to a specific timestamp
666+ epoch = os .environ .get ('SOURCE_DATE_EPOCH' )
667+ get_time = int (epoch ) if epoch else time .time ()
668+ self .date_time = time .localtime (get_time )[:6 ]
669+
670+ self .compress_type = archive .compression
671+ self .compress_level = archive .compresslevel
672+ if self .filename .endswith ('/' ): # pragma: no cover
673+ self .external_attr = 0o40775 << 16 # drwxrwxr-x
674+ self .external_attr |= 0x10 # MS-DOS directory flag
675+ else :
676+ self .external_attr = 0o600 << 16 # ?rw-------
677+ return self
678+
622679 def is_dir (self ):
623680 """Return True if this archive member is a directory."""
624681 if self .filename .endswith ('/' ):
@@ -758,6 +815,7 @@ def decompress(self, data):
758815 14 : 'lzma' ,
759816 18 : 'terse' ,
760817 19 : 'lz77' ,
818+ 93 : 'zstd' ,
761819 97 : 'wavpack' ,
762820 98 : 'ppmd' ,
763821}
@@ -777,6 +835,10 @@ def _check_compression(compression):
777835 if not lzma :
778836 raise RuntimeError (
779837 "Compression requires the (missing) lzma module" )
838+ elif compression == ZIP_ZSTANDARD :
839+ if not zstd :
840+ raise RuntimeError (
841+ "Compression requires the (missing) compression.zstd module" )
780842 else :
781843 raise NotImplementedError ("That compression method is not supported" )
782844
@@ -793,6 +855,8 @@ def _get_compressor(compress_type, compresslevel=None):
793855 # compresslevel is ignored for ZIP_LZMA
794856 elif compress_type == ZIP_LZMA :
795857 return LZMACompressor ()
858+ elif compress_type == ZIP_ZSTANDARD :
859+ return zstd .ZstdCompressor (level = compresslevel )
796860 else :
797861 return None
798862
@@ -807,6 +871,8 @@ def _get_decompressor(compress_type):
807871 return bz2 .BZ2Decompressor ()
808872 elif compress_type == ZIP_LZMA :
809873 return LZMADecompressor ()
874+ elif compress_type == ZIP_ZSTANDARD :
875+ return zstd .ZstdDecompressor ()
810876 else :
811877 descr = compressor_names .get (compress_type )
812878 if descr :
@@ -1326,7 +1392,8 @@ class ZipFile:
13261392 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
13271393 or append 'a'.
13281394 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1329- ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1395+ ZIP_BZIP2 (requires bz2), ZIP_LZMA (requires lzma), or
1396+ ZIP_ZSTANDARD (requires compression.zstd).
13301397 allowZip64: if True ZipFile will create files with ZIP64 extensions when
13311398 needed, otherwise it will raise an exception when this would
13321399 be necessary.
@@ -1335,6 +1402,9 @@ class ZipFile:
13351402 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
13361403 When using ZIP_DEFLATED integers 0 through 9 are accepted.
13371404 When using ZIP_BZIP2 integers 1 through 9 are accepted.
1405+ When using ZIP_ZSTANDARD integers -7 though 22 are common,
1406+ see the CompressionParameter enum in compression.zstd for
1407+ details.
13381408
13391409 """
13401410
@@ -1468,21 +1538,17 @@ def _RealGetContents(self):
14681538 raise BadZipFile ("File is not a zip file" )
14691539 if self .debug > 1 :
14701540 print (endrec )
1471- size_cd = endrec [_ECD_SIZE ] # bytes in central directory
1472- offset_cd = endrec [_ECD_OFFSET ] # offset of central directory
14731541 self ._comment = endrec [_ECD_COMMENT ] # archive comment
14741542
1475- # "concat" is zero, unless zip was concatenated to another file
1476- concat = endrec [_ECD_LOCATION ] - size_cd - offset_cd
1543+ offset_cd , concat = _handle_prepended_data (endrec , self .debug )
14771544
1478- if self .debug > 2 :
1479- inferred = concat + offset_cd
1480- print ("given, inferred, offset" , offset_cd , inferred , concat )
14811545 # self.start_dir: Position of start of central directory
14821546 self .start_dir = offset_cd + concat
1547+
14831548 if self .start_dir < 0 :
14841549 raise BadZipFile ("Bad offset for central directory" )
14851550 fp .seek (self .start_dir , 0 )
1551+ size_cd = endrec [_ECD_SIZE ]
14861552 data = fp .read (size_cd )
14871553 fp = io .BytesIO (data )
14881554 total = 0
@@ -1771,8 +1837,8 @@ def _open_to_write(self, zinfo, force_zip64=False):
17711837 def extract (self , member , path = None , pwd = None ):
17721838 """Extract a member from the archive to the current working directory,
17731839 using its full name. Its file information is extracted as accurately
1774- as possible. ` member' may be a filename or a ZipInfo object. You can
1775- specify a different directory using ` path'. You can specify the
1840+ as possible. ' member' may be a filename or a ZipInfo object. You can
1841+ specify a different directory using ' path'. You can specify the
17761842 password to decrypt the file using 'pwd'.
17771843 """
17781844 if path is None :
@@ -1784,8 +1850,8 @@ def extract(self, member, path=None, pwd=None):
17841850
17851851 def extractall (self , path = None , members = None , pwd = None ):
17861852 """Extract all members from the archive to the current working
1787- directory. ` path' specifies a different directory to extract to.
1788- ` members' is optional and must be a subset of the list returned
1853+ directory. ' path' specifies a different directory to extract to.
1854+ ' members' is optional and must be a subset of the list returned
17891855 by namelist(). You can specify the password to decrypt all files
17901856 using 'pwd'.
17911857 """
@@ -1929,18 +1995,10 @@ def writestr(self, zinfo_or_arcname, data,
19291995 the name of the file in the archive."""
19301996 if isinstance (data , str ):
19311997 data = data .encode ("utf-8" )
1932- if not isinstance (zinfo_or_arcname , ZipInfo ):
1933- zinfo = ZipInfo (filename = zinfo_or_arcname ,
1934- date_time = time .localtime (time .time ())[:6 ])
1935- zinfo .compress_type = self .compression
1936- zinfo .compress_level = self .compresslevel
1937- if zinfo .filename .endswith ('/' ):
1938- zinfo .external_attr = 0o40775 << 16 # drwxrwxr-x
1939- zinfo .external_attr |= 0x10 # MS-DOS directory flag
1940- else :
1941- zinfo .external_attr = 0o600 << 16 # ?rw-------
1942- else :
1998+ if isinstance (zinfo_or_arcname , ZipInfo ):
19431999 zinfo = zinfo_or_arcname
2000+ else :
2001+ zinfo = ZipInfo (zinfo_or_arcname )._for_archive (self )
19442002
19452003 if not self .fp :
19462004 raise ValueError (
@@ -2059,6 +2117,8 @@ def _write_end_record(self):
20592117 min_version = max (BZIP2_VERSION , min_version )
20602118 elif zinfo .compress_type == ZIP_LZMA :
20612119 min_version = max (LZMA_VERSION , min_version )
2120+ elif zinfo .compress_type == ZIP_ZSTANDARD :
2121+ min_version = max (ZSTANDARD_VERSION , min_version )
20622122
20632123 extract_version = max (min_version , zinfo .extract_version )
20642124 create_version = max (min_version , zinfo .create_version )
@@ -2301,7 +2361,7 @@ def main(args=None):
23012361 import argparse
23022362
23032363 description = 'A simple command-line interface for zipfile module.'
2304- parser = argparse .ArgumentParser (description = description )
2364+ parser = argparse .ArgumentParser (description = description , color = True )
23052365 group = parser .add_mutually_exclusive_group (required = True )
23062366 group .add_argument ('-l' , '--list' , metavar = '<zipfile>' ,
23072367 help = 'Show listing of a zipfile' )
0 commit comments