Skip to content

Commit c36ef35

Browse files
[3.12] gh-150599: Prevent bz2 decompressor reuse after errors (#150600) (#151054)
(cherry picked from commit 5755d0f)
1 parent 7c999be commit c36ef35

3 files changed

Lines changed: 33 additions & 3 deletions

File tree

Lib/test/test_bz2.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -958,6 +958,21 @@ def test_failure(self):
958958
# Previously, a second call could crash due to internal inconsistency
959959
self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30)
960960

961+
def test_decompress_after_data_error(self):
962+
data = bytes.fromhex(
963+
"425a6839314159265359000000000000007fffff000000000000000000000000"
964+
"00000000000000000000000000000000000000e0370000000000000000000000"
965+
"000000000000000000000000000000000000000000000000000083f3"
966+
)
967+
bzd = BZ2Decompressor()
968+
with self.assertRaisesRegex(OSError, "Invalid data stream"):
969+
bzd.decompress(data)
970+
# Previously, a second call could crash due to internal inconsistency
971+
self.assertFalse(bzd.needs_input)
972+
self.assertFalse(bzd.eof)
973+
with self.assertRaisesRegex(ValueError, "previous error"):
974+
bzd.decompress(b'\x00' * 18)
975+
961976
@support.refcount_test
962977
def test_refleaks_in___init__(self):
963978
gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount')
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix a possible stack buffer overflow in :mod:`bz2` when a
2+
:class:`bz2.BZ2Decompressor` is reused after a decompression error.
3+
The decompressor now becomes unusable after libbz2 reports an error.

Modules/_bz2module.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ typedef struct {
114114
typedef struct {
115115
PyObject_HEAD
116116
bz_stream bzs;
117+
int bzerror;
117118
char eof; /* T_BOOL expects a char */
118119
PyObject *unused_data;
119120
char needs_input;
@@ -453,8 +454,11 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
453454

454455
d->bzs_avail_in_real += bzs->avail_in;
455456

456-
if (catch_bz2_error(bzret))
457+
if (catch_bz2_error(bzret)) {
458+
d->bzerror = bzret;
459+
_Py_atomic_store_char_relaxed(&d->needs_input, 0);
457460
goto error;
461+
}
458462
if (bzret == BZ_STREAM_END) {
459463
d->eof = 1;
460464
break;
@@ -621,10 +625,17 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
621625
PyObject *result = NULL;
622626

623627
ACQUIRE_LOCK(self);
624-
if (self->eof)
628+
if (self->eof) {
625629
PyErr_SetString(PyExc_EOFError, "End of stream already reached");
626-
else
630+
}
631+
else if (self->bzerror) {
632+
// Re-entering BZ2_bzDecompress() after an error can write out of bounds.
633+
PyErr_SetString(PyExc_ValueError,
634+
"Decompressor is unusable after a previous error");
635+
}
636+
else {
627637
result = decompress(self, data->buf, data->len, max_length);
638+
}
628639
RELEASE_LOCK(self);
629640
return result;
630641
}
@@ -658,6 +669,7 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type)
658669
return NULL;
659670
}
660671

672+
self->bzerror = 0;
661673
self->needs_input = 1;
662674
self->bzs_avail_in_real = 0;
663675
self->input_buffer = NULL;

0 commit comments

Comments
 (0)