Skip to content

Commit e226d43

Browse files
committed
Add support for zstd compression provided by stdlib (Python 3.14+)
1 parent 3a0b5d3 commit e226d43

2 files changed

Lines changed: 135 additions & 45 deletions

File tree

tests/test_zipstream.py

Lines changed: 85 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from zipstream import ZipStream
2121

2222

23+
PY313 = sys.version_info < (3, 14)
2324
PY36 = sys.version_info < (3, 7)
2425
PY35 = sys.version_info < (3, 6)
2526

@@ -30,6 +31,14 @@
3031
("mbyte", 1024 * 1024),
3132
]
3233

34+
COMPRESS_TYPES = [
35+
zipfile.ZIP_STORED,
36+
zipfile.ZIP_LZMA,
37+
zipfile.ZIP_DEFLATED,
38+
zipfile.ZIP_BZIP2,
39+
]
40+
if not PY313:
41+
COMPRESS_TYPES.append(zipfile.ZIP_ZSTANDARD)
3342

3443
# Patch is_dir onto ZipInfo objects in 3.5 to make testing easier
3544
@pytest.fixture(autouse=PY35)
@@ -107,12 +116,7 @@ def _gen_rand():
107116
# Tests start
108117
################################
109118

110-
@pytest.mark.parametrize("ct", [
111-
zipfile.ZIP_STORED,
112-
zipfile.ZIP_LZMA,
113-
zipfile.ZIP_DEFLATED,
114-
zipfile.ZIP_BZIP2
115-
])
119+
@pytest.mark.parametrize("ct", COMPRESS_TYPES)
116120
def test_zipstream_compression(caplog, files, ct):
117121
"""Test that all types of compression properly compress and extract"""
118122
caplog.set_level(logging.WARNING)
@@ -135,12 +139,7 @@ def test_zipstream_compression(caplog, files, ct):
135139
_verify_zip_contains(zf, f)
136140

137141

138-
@pytest.mark.parametrize("ct", [
139-
zipfile.ZIP_STORED,
140-
zipfile.ZIP_LZMA,
141-
zipfile.ZIP_DEFLATED,
142-
zipfile.ZIP_BZIP2
143-
])
142+
@pytest.mark.parametrize("ct", COMPRESS_TYPES)
144143
@pytest.mark.parametrize("cl", [None, 2])
145144
def test_mixed_compression_and_getinfo(ct, cl):
146145
"""Test that files are compressed using the correct method and level and
@@ -159,11 +158,14 @@ def test_mixed_compression_and_getinfo(ct, cl):
159158
zs.add(b"3c", arcname="3c", compress_type=zipfile.ZIP_DEFLATED, compress_level=TEST_CL)
160159
zs.add(b"4", arcname="4", compress_type=zipfile.ZIP_BZIP2)
161160
zs.add(b"4c", arcname="4c", compress_type=zipfile.ZIP_BZIP2, compress_level=TEST_CL)
161+
if not PY313:
162+
zs.add(b"5", arcname="5", compress_type=zipfile.ZIP_ZSTANDARD)
163+
zs.add(b"5c", arcname="5c", compress_type=zipfile.ZIP_ZSTANDARD, compress_level=TEST_CL)
162164

163165
zf = _get_zip(zs)
164166
zinfos = zf.infolist()
165167
fullinfos = zs.info_list()
166-
assert len(zinfos) == len(fullinfos) == 9
168+
assert len(zinfos) == len(fullinfos) == 9 + (0 if PY313 else 2)
167169

168170
def assert_zinfo(idx, name, compress_type, compress_level):
169171
zi = zinfos[idx]
@@ -189,6 +191,9 @@ def assert_zinfo(idx, name, compress_type, compress_level):
189191
assert_zinfo(6, "3c", zipfile.ZIP_DEFLATED, TEST_CL)
190192
assert_zinfo(7, "4", zipfile.ZIP_BZIP2, cl)
191193
assert_zinfo(8, "4c", zipfile.ZIP_BZIP2, TEST_CL)
194+
if not PY313:
195+
assert_zinfo(9, "5", zipfile.ZIP_ZSTANDARD, cl)
196+
assert_zinfo(10, "5c", zipfile.ZIP_ZSTANDARD, TEST_CL)
192197

193198

194199
@pytest.mark.parametrize("zip64", [False, True])
@@ -368,6 +373,34 @@ def test_invalid_compression(ct):
368373
zs.add(".", arcname=".", compress_type=ct)
369374

370375

376+
@pytest.mark.skipif(PY313, reason="Tests zstd compress_level (Python 3.14+ only)")
377+
def test_invalid_zstd_compression():
378+
"""Test zstd values outside of valid ones cause an error"""
379+
ZipStream(compress_type=zipfile.ZIP_ZSTANDARD)
380+
381+
from compression.zstd import CompressionParameter
382+
lower, upper = CompressionParameter.compression_level.bounds()
383+
384+
for x in (lower, lower+1, 0, upper-1, upper):
385+
ZipStream(compress_type=zipfile.ZIP_ZSTANDARD, compress_level=x)
386+
387+
for x in (lower-1, upper+1):
388+
with pytest.raises(ValueError):
389+
ZipStream(compress_type=zipfile.ZIP_ZSTANDARD, compress_level=x)
390+
with pytest.raises(ValueError):
391+
ZipStream().add_path(".", compress_type=zipfile.ZIP_ZSTANDARD, compress_level=x)
392+
with pytest.raises(ValueError):
393+
ZipStream().add(".", arcname=".", compress_type=zipfile.ZIP_ZSTANDARD, compress_level=x)
394+
395+
zs = ZipStream(compress_type=zipfile.ZIP_ZSTANDARD)
396+
with pytest.raises(ValueError):
397+
zs.add(".", arcname=".", compress_level=x)
398+
399+
zs = ZipStream(compress_level=x)
400+
with pytest.raises(ValueError):
401+
zs.add(".", arcname=".", compress_type=zipfile.ZIP_ZSTANDARD)
402+
403+
371404
def test_multibyte_and_non_ascii_characters_in_filenames():
372405
zs = ZipStream(sized=True)
373406
zs.add(None, "☆/")
@@ -734,12 +767,7 @@ def custom_walk(path):
734767
[b"a", b"list", b"of", b"bytes"],
735768
_gen_rand()
736769
])
737-
@pytest.mark.parametrize("ct", [
738-
zipfile.ZIP_STORED,
739-
zipfile.ZIP_LZMA,
740-
zipfile.ZIP_DEFLATED,
741-
zipfile.ZIP_BZIP2
742-
])
770+
@pytest.mark.parametrize("ct", COMPRESS_TYPES)
743771
def test_adding_data(caplog, data, ct):
744772
"""Test adding non-files with different compression methods"""
745773
caplog.set_level(logging.WARNING)
@@ -1173,6 +1201,36 @@ def fakelocaltime(_=None):
11731201
assert zinfos[0].date_time == (2107, 12, 31, 23, 59, 58)
11741202

11751203

1204+
@pytest.mark.skipif(PY313, reason="Tests zstd compress_level (Python 3.14+ only)")
1205+
def test_zstd_uses_compression_level():
1206+
"""Test that the zstd compression level is applied"""
1207+
zs = ZipStream(compress_type=zipfile.ZIP_ZSTANDARD)
1208+
test = b"a"*1024
1209+
zs.add(test, "-7.txt", compress_level=-7)
1210+
zs.add(test, "default.txt")
1211+
zs.add(test, "22.txt", compress_level=22)
1212+
1213+
data = bytes(zs)
1214+
info = list(zs.info_list())
1215+
assert len(info) == zs.num_streamed() == 3
1216+
1217+
for x in info:
1218+
assert x["size"] == 1024
1219+
assert x["compress_type"] == zipfile.ZIP_ZSTANDARD
1220+
assert x["CRC"] == 2085984185
1221+
1222+
assert info[0]["name"] == "-7.txt"
1223+
assert info[1]["name"] == "default.txt"
1224+
assert info[2]["name"] == "22.txt"
1225+
1226+
# check compress level set
1227+
assert info[0]["compress_level"] == -7
1228+
assert info[1]["compress_level"] == None
1229+
assert info[2]["compress_level"] == 22
1230+
1231+
# check different compressed sizes for each level (in decreasing order as level increases)
1232+
assert info[0]["compressed_size"] > info[1]["compressed_size"] > info[2]["compressed_size"]
1233+
11761234
def test_info_list(monkeypatch):
11771235
faketime = (1980, 1, 1, 0, 0, 0)
11781236

@@ -1228,8 +1286,8 @@ def fakelocaltime(_=None):
12281286
assert len([x for x in info2 if not x["streamed"]]) == zs.num_queued() == 0
12291287
assert len([x for x in info2 if x["streamed"]]) == zs.num_streamed() == 3
12301288

1231-
# Make sure any information that ws provided up-front hasn't changed
1232-
# (except for the "streamed" key which mush got False -> True)
1289+
# Make sure any information that was provided up-front hasn't changed
1290+
# (except for the "streamed" key which must go False -> True)
12331291
for pre, post in zip(info, info2):
12341292
for k, v in pre.items():
12351293
if k == "streamed":
@@ -1525,6 +1583,9 @@ def test_sized_zipstream(monkeypatch, files, zip64):
15251583
ZipStream(sized=True, compress_type=zipfile.ZIP_LZMA)
15261584
with pytest.raises(ValueError):
15271585
ZipStream(sized=True, compress_type=zipfile.ZIP_BZIP2)
1586+
if not PY313:
1587+
with pytest.raises(ValueError):
1588+
ZipStream(sized=True, compress_type=zipfile.ZIP_ZSTANDARD)
15281589

15291590
with pytest.raises(ValueError):
15301591
ZipStream.from_path(".", sized=True, compress_type=zipfile.ZIP_DEFLATED)
@@ -1546,6 +1607,9 @@ def test_sized_zipstream(monkeypatch, files, zip64):
15461607
szs.add("invalid", "invalid", compress_type=zipfile.ZIP_LZMA)
15471608
with pytest.raises(ValueError):
15481609
szs.add("invalid", "invalid", compress_type=zipfile.ZIP_BZIP2)
1610+
if not PY313:
1611+
with pytest.raises(ValueError):
1612+
szs.add("invalid", "invalid", compress_type=zipfile.ZIP_ZSTANDARD)
15491613

15501614
assert szs.sized
15511615
calculated = len(szs)

zipstream/ng.py

Lines changed: 50 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@
3333
)
3434

3535

36+
# Constants for compatibility modes
37+
PY313_COMPAT = sys.version_info < (3, 14) # disable zstd
38+
PY36_COMPAT = sys.version_info < (3, 7) # disable compress_level
39+
PY35_COMPAT = sys.version_info < (3, 6) # backport ZipInfo functions, stringify path-like objects
40+
3641
# Size of chunks to read out of files
3742
# Note that when compressing data the compressor will operate on bigger chunks
3843
# than this - it keeps a cache as new chunks are fed to it.
@@ -51,16 +56,22 @@
5156
# (includes "/" regardless of platform as per ZIP format specification)
5257
PATH_SEPARATORS = set(x for x in (os.sep, os.altsep, "/") if x)
5358

54-
# Constants for compatibility modes
55-
PY36_COMPAT = sys.version_info < (3, 7) # disable compress_level
56-
PY35_COMPAT = sys.version_info < (3, 6) # backport ZipInfo functions, stringify path-like objects
59+
# zstd-related constants
60+
if not PY313_COMPAT:
61+
from zipfile import ZIP_ZSTANDARD, ZSTANDARD_VERSION
62+
from compression.zstd import CompressionParameter
63+
ZSTD_LEVEL_BOUNDS = CompressionParameter.compression_level.bounds()
5764

5865

5966
__all__ = [
6067
# Defined classes
6168
"ZipStream", "ZipStreamInfo",
6269
# Compression constants (imported from zipfile)
63-
"ZIP_STORED", "ZIP_DEFLATED", "BZIP2_VERSION", "ZIP_BZIP2", "LZMA_VERSION", "ZIP_LZMA",
70+
"ZIP_STORED",
71+
"ZIP_DEFLATED",
72+
"ZIP_BZIP2", "BZIP2_VERSION",
73+
"ZIP_LZMA", "LZMA_VERSION",
74+
*(["ZIP_ZSTANDARD", "ZSTANDARD_VERSION"] if not PY313_COMPAT else []),
6475
# Helper functions
6576
"walk"
6677
]
@@ -93,6 +104,24 @@ def _check_compression(compress_type, compress_level):
93104
raise ValueError(
94105
"compress_level must be between 1 and 9 when using ZIP_BZIP2"
95106
)
107+
elif not PY313_COMPAT and compress_type == ZIP_ZSTANDARD:
108+
if not ZSTD_LEVEL_BOUNDS[0] <= compress_level <= ZSTD_LEVEL_BOUNDS[1]:
109+
raise ValueError(
110+
"compress_level must be between {} and {} when using ZIP_ZSTANDARD".format(
111+
*ZSTD_LEVEL_BOUNDS
112+
)
113+
)
114+
115+
116+
def _min_version_for_compress_type(compress_type, min_version=0):
117+
"""Ensure the compress_type is supported by the min_version"""
118+
if compress_type == ZIP_BZIP2:
119+
min_version = max(BZIP2_VERSION, min_version)
120+
elif compress_type == ZIP_LZMA:
121+
min_version = max(LZMA_VERSION, min_version)
122+
elif not PY313_COMPAT and compress_type == ZIP_ZSTANDARD:
123+
min_version = max(ZSTANDARD_VERSION, min_version)
124+
return min_version
96125

97126

98127
def _timestamp_to_dos(ts):
@@ -177,11 +206,7 @@ def FileHeader(self, zip64):
177206
file_size = 0xFFFFFFFF
178207
compress_size = 0xFFFFFFFF
179208

180-
if self.compress_type == ZIP_BZIP2:
181-
min_version = max(BZIP2_VERSION, min_version)
182-
elif self.compress_type == ZIP_LZMA:
183-
min_version = max(LZMA_VERSION, min_version)
184-
209+
min_version = _min_version_for_compress_type(self.compress_type, min_version)
185210
self.extract_version = max(min_version, self.extract_version)
186211
self.create_version = max(min_version, self.create_version)
187212
filename, flag_bits = self._encodeFilenameFlags()
@@ -315,11 +340,7 @@ def _central_directory_header_data(self):
315340
) + extra_data
316341
min_version = ZIP64_VERSION
317342

318-
if self.compress_type == ZIP_BZIP2:
319-
min_version = max(BZIP2_VERSION, min_version)
320-
elif self.compress_type == ZIP_LZMA:
321-
min_version = max(LZMA_VERSION, min_version)
322-
343+
min_version = _min_version_for_compress_type(self.compress_type, min_version)
323344
extract_version = max(min_version, self.extract_version)
324345
create_version = max(min_version, self.create_version)
325346
filename, flag_bits = self._encodeFilenameFlags()
@@ -502,19 +523,24 @@ def __init__(self, *, compress_type=ZIP_STORED, compress_level=None, sized=False
502523
503524
compress_type:
504525
The ZIP compression method to use when writing the archive, and
505-
should be ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2 or ZIP_LZMA;
506-
unrecognized values will cause NotImplementedError to be raised. If
507-
ZIP_DEFLATED, ZIP_BZIP2 or ZIP_LZMA is specified but the
508-
corresponding module (zlib, bz2 or lzma) is not available,
509-
RuntimeError is raised. The default is ZIP_STORED.
526+
should be ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2, ZIP_LZMA, or
527+
ZIP_ZSTANDARD (Python 3.14+); unrecognized values will cause
528+
NotImplementedError to be raised.
529+
If ZIP_DEFLATED, ZIP_BZIP2, ZIP_LZMA, or ZIP_ZSTANDARD is specified
530+
but the corresponding module (zlib, bz2, lzma, or compression.zstd)
531+
is not available, RuntimeError is raised. The default is ZIP_STORED.
510532
511533
compress_level:
512534
Controls the compression level to use when writing files to the
513-
archive. When using ZIP_STORED or ZIP_LZMA it has no effect. When
514-
using ZIP_DEFLATED integers 0 through 9 are accepted (see zlib for
515-
more information). When using ZIP_BZIP2 integers 1 through 9 are
516-
accepted (see bz2 for more information). Raises a ValueError if the
517-
provided value isn't valid for the `compress_type`.
535+
archive. When using ZIP_STORED or ZIP_LZMA it has no effect.
536+
When using ZIP_DEFLATED integers 0 through 9 are accepted (see zlib
537+
for more information).
538+
When using ZIP_BZIP2 integers 1 through 9 are accepted (see bz2 for
539+
more information).
540+
When using ZIP_ZSTANDARD integers -7 though 22 are common (see
541+
compression.zstd.CompressionParameter for more information).
542+
Raises a ValueError if the provided value isn't valid for the
543+
`compress_type`.
518544
519545
Only available in Python 3.7+ (raises a ValueError if used on a
520546
lower version)

0 commit comments

Comments
 (0)