Skip to content

Commit 9b5eefb

Browse files
authored
Replace _compression with compression and update related too (#6788)
1 parent 0717b53 commit 9b5eefb

File tree

17 files changed

+3623
-194
lines changed

17 files changed

+3623
-194
lines changed

Lib/bz2.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
1111

1212
from builtins import open as _builtin_open
13+
from compression._common import _streams
1314
import io
1415
import os
15-
import _compression
1616

1717
from _bz2 import BZ2Compressor, BZ2Decompressor
1818

@@ -23,7 +23,7 @@
2323
_MODE_WRITE = 3
2424

2525

26-
class BZ2File(_compression.BaseStream):
26+
class BZ2File(_streams.BaseStream):
2727

2828
"""A file object providing transparent bzip2 (de)compression.
2929
@@ -88,7 +88,7 @@ def __init__(self, filename, mode="r", *, compresslevel=9):
8888
raise TypeError("filename must be a str, bytes, file or PathLike object")
8989

9090
if self._mode == _MODE_READ:
91-
raw = _compression.DecompressReader(self._fp,
91+
raw = _streams.DecompressReader(self._fp,
9292
BZ2Decompressor, trailing_error=OSError)
9393
self._buffer = io.BufferedReader(raw)
9494
else:
@@ -248,7 +248,7 @@ def writelines(self, seq):
248248
249249
Line separators are not added between the written byte strings.
250250
"""
251-
return _compression.BaseStream.writelines(self, seq)
251+
return _streams.BaseStream.writelines(self, seq)
252252

253253
def seek(self, offset, whence=io.SEEK_SET):
254254
"""Change the file position.

Lib/compression/__init__.py

Whitespace-only changes.

Lib/compression/_common/__init__.py

Whitespace-only changes.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Internal classes used by the gzip, lzma and bz2 modules"""
1+
"""Internal classes used by compression modules"""
22

33
import io
44
import sys

Lib/compression/bz2.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import bz2
2+
__doc__ = bz2.__doc__
3+
del bz2
4+
5+
from bz2 import *

Lib/compression/gzip.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import gzip
2+
__doc__ = gzip.__doc__
3+
del gzip
4+
5+
from gzip import *

Lib/compression/lzma.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import lzma
2+
__doc__ = lzma.__doc__
3+
del lzma
4+
5+
from lzma import *

Lib/compression/zlib.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import zlib
2+
__doc__ = zlib.__doc__
3+
del zlib
4+
5+
from zlib import *

Lib/compression/zstd/__init__.py

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
"""Python bindings to the Zstandard (zstd) compression library (RFC-8878)."""
2+
3+
__all__ = (
4+
# compression.zstd
5+
'COMPRESSION_LEVEL_DEFAULT',
6+
'compress',
7+
'CompressionParameter',
8+
'decompress',
9+
'DecompressionParameter',
10+
'finalize_dict',
11+
'get_frame_info',
12+
'Strategy',
13+
'train_dict',
14+
15+
# compression.zstd._zstdfile
16+
'open',
17+
'ZstdFile',
18+
19+
# _zstd
20+
'get_frame_size',
21+
'zstd_version',
22+
'zstd_version_info',
23+
'ZstdCompressor',
24+
'ZstdDecompressor',
25+
'ZstdDict',
26+
'ZstdError',
27+
)
28+
29+
import _zstd
30+
import enum
31+
from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError,
32+
get_frame_size, zstd_version)
33+
from compression.zstd._zstdfile import ZstdFile, open, _nbytes
34+
35+
# zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE)
36+
zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100),
37+
_zstd.zstd_version_number % 100)
38+
"""Version number of the runtime zstd library as a tuple of integers."""
39+
40+
COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT
41+
"""The default compression level for Zstandard, currently '3'."""
42+
43+
44+
class FrameInfo:
45+
"""Information about a Zstandard frame."""
46+
47+
__slots__ = 'decompressed_size', 'dictionary_id'
48+
49+
def __init__(self, decompressed_size, dictionary_id):
50+
super().__setattr__('decompressed_size', decompressed_size)
51+
super().__setattr__('dictionary_id', dictionary_id)
52+
53+
def __repr__(self):
54+
return (f'FrameInfo(decompressed_size={self.decompressed_size}, '
55+
f'dictionary_id={self.dictionary_id})')
56+
57+
def __setattr__(self, name, _):
58+
raise AttributeError(f"can't set attribute {name!r}")
59+
60+
61+
def get_frame_info(frame_buffer):
62+
"""Get Zstandard frame information from a frame header.
63+
64+
*frame_buffer* is a bytes-like object. It should start from the beginning
65+
of a frame, and needs to include at least the frame header (6 to 18 bytes).
66+
67+
The returned FrameInfo object has two attributes.
68+
'decompressed_size' is the size in bytes of the data in the frame when
69+
decompressed, or None when the decompressed size is unknown.
70+
'dictionary_id' is an int in the range (0, 2**32). The special value 0
71+
means that the dictionary ID was not recorded in the frame header,
72+
the frame may or may not need a dictionary to be decoded,
73+
and the ID of such a dictionary is not specified.
74+
"""
75+
return FrameInfo(*_zstd.get_frame_info(frame_buffer))
76+
77+
78+
def train_dict(samples, dict_size):
79+
"""Return a ZstdDict representing a trained Zstandard dictionary.
80+
81+
*samples* is an iterable of samples, where a sample is a bytes-like
82+
object representing a file.
83+
84+
*dict_size* is the dictionary's maximum size, in bytes.
85+
"""
86+
if not isinstance(dict_size, int):
87+
ds_cls = type(dict_size).__qualname__
88+
raise TypeError(f'dict_size must be an int object, not {ds_cls!r}.')
89+
90+
samples = tuple(samples)
91+
chunks = b''.join(samples)
92+
chunk_sizes = tuple(_nbytes(sample) for sample in samples)
93+
if not chunks:
94+
raise ValueError("samples contained no data; can't train dictionary.")
95+
dict_content = _zstd.train_dict(chunks, chunk_sizes, dict_size)
96+
return ZstdDict(dict_content)
97+
98+
99+
def finalize_dict(zstd_dict, /, samples, dict_size, level):
100+
"""Return a ZstdDict representing a finalized Zstandard dictionary.
101+
102+
Given a custom content as a basis for dictionary, and a set of samples,
103+
finalize *zstd_dict* by adding headers and statistics according to the
104+
Zstandard dictionary format.
105+
106+
You may compose an effective dictionary content by hand, which is used as
107+
basis dictionary, and use some samples to finalize a dictionary. The basis
108+
dictionary may be a "raw content" dictionary. See *is_raw* in ZstdDict.
109+
110+
*samples* is an iterable of samples, where a sample is a bytes-like object
111+
representing a file.
112+
*dict_size* is the dictionary's maximum size, in bytes.
113+
*level* is the expected compression level. The statistics for each
114+
compression level differ, so tuning the dictionary to the compression level
115+
can provide improvements.
116+
"""
117+
118+
if not isinstance(zstd_dict, ZstdDict):
119+
raise TypeError('zstd_dict argument should be a ZstdDict object.')
120+
if not isinstance(dict_size, int):
121+
raise TypeError('dict_size argument should be an int object.')
122+
if not isinstance(level, int):
123+
raise TypeError('level argument should be an int object.')
124+
125+
samples = tuple(samples)
126+
chunks = b''.join(samples)
127+
chunk_sizes = tuple(_nbytes(sample) for sample in samples)
128+
if not chunks:
129+
raise ValueError("The samples are empty content, can't finalize the "
130+
"dictionary.")
131+
dict_content = _zstd.finalize_dict(zstd_dict.dict_content, chunks,
132+
chunk_sizes, dict_size, level)
133+
return ZstdDict(dict_content)
134+
135+
136+
def compress(data, level=None, options=None, zstd_dict=None):
137+
"""Return Zstandard compressed *data* as bytes.
138+
139+
*level* is an int specifying the compression level to use, defaulting to
140+
COMPRESSION_LEVEL_DEFAULT ('3').
141+
*options* is a dict object that contains advanced compression
142+
parameters. See CompressionParameter for more on options.
143+
*zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See
144+
the function train_dict for how to train a ZstdDict on sample data.
145+
146+
For incremental compression, use a ZstdCompressor instead.
147+
"""
148+
comp = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict)
149+
return comp.compress(data, mode=ZstdCompressor.FLUSH_FRAME)
150+
151+
152+
def decompress(data, zstd_dict=None, options=None):
153+
"""Decompress one or more frames of Zstandard compressed *data*.
154+
155+
*zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See
156+
the function train_dict for how to train a ZstdDict on sample data.
157+
*options* is a dict object that contains advanced compression
158+
parameters. See DecompressionParameter for more on options.
159+
160+
For incremental decompression, use a ZstdDecompressor instead.
161+
"""
162+
results = []
163+
while True:
164+
decomp = ZstdDecompressor(options=options, zstd_dict=zstd_dict)
165+
results.append(decomp.decompress(data))
166+
if not decomp.eof:
167+
raise ZstdError('Compressed data ended before the '
168+
'end-of-stream marker was reached')
169+
data = decomp.unused_data
170+
if not data:
171+
break
172+
return b''.join(results)
173+
174+
175+
class CompressionParameter(enum.IntEnum):
176+
"""Compression parameters."""
177+
178+
compression_level = _zstd.ZSTD_c_compressionLevel
179+
window_log = _zstd.ZSTD_c_windowLog
180+
hash_log = _zstd.ZSTD_c_hashLog
181+
chain_log = _zstd.ZSTD_c_chainLog
182+
search_log = _zstd.ZSTD_c_searchLog
183+
min_match = _zstd.ZSTD_c_minMatch
184+
target_length = _zstd.ZSTD_c_targetLength
185+
strategy = _zstd.ZSTD_c_strategy
186+
187+
enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching
188+
ldm_hash_log = _zstd.ZSTD_c_ldmHashLog
189+
ldm_min_match = _zstd.ZSTD_c_ldmMinMatch
190+
ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog
191+
ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog
192+
193+
content_size_flag = _zstd.ZSTD_c_contentSizeFlag
194+
checksum_flag = _zstd.ZSTD_c_checksumFlag
195+
dict_id_flag = _zstd.ZSTD_c_dictIDFlag
196+
197+
nb_workers = _zstd.ZSTD_c_nbWorkers
198+
job_size = _zstd.ZSTD_c_jobSize
199+
overlap_log = _zstd.ZSTD_c_overlapLog
200+
201+
def bounds(self):
202+
"""Return the (lower, upper) int bounds of a compression parameter.
203+
204+
Both the lower and upper bounds are inclusive.
205+
"""
206+
return _zstd.get_param_bounds(self.value, is_compress=True)
207+
208+
209+
class DecompressionParameter(enum.IntEnum):
210+
"""Decompression parameters."""
211+
212+
window_log_max = _zstd.ZSTD_d_windowLogMax
213+
214+
def bounds(self):
215+
"""Return the (lower, upper) int bounds of a decompression parameter.
216+
217+
Both the lower and upper bounds are inclusive.
218+
"""
219+
return _zstd.get_param_bounds(self.value, is_compress=False)
220+
221+
222+
class Strategy(enum.IntEnum):
223+
"""Compression strategies, listed from fastest to strongest.
224+
225+
Note that new strategies might be added in the future.
226+
Only the order (from fast to strong) is guaranteed,
227+
the numeric value might change.
228+
"""
229+
230+
fast = _zstd.ZSTD_fast
231+
dfast = _zstd.ZSTD_dfast
232+
greedy = _zstd.ZSTD_greedy
233+
lazy = _zstd.ZSTD_lazy
234+
lazy2 = _zstd.ZSTD_lazy2
235+
btlazy2 = _zstd.ZSTD_btlazy2
236+
btopt = _zstd.ZSTD_btopt
237+
btultra = _zstd.ZSTD_btultra
238+
btultra2 = _zstd.ZSTD_btultra2
239+
240+
241+
# Check validity of the CompressionParameter & DecompressionParameter types
242+
_zstd.set_parameter_types(CompressionParameter, DecompressionParameter)

0 commit comments

Comments
 (0)