Skip to content

Commit 80fd339

Browse files
author
jhylton
committed
[ #403753 ] zlib decompress; uncontrollable memory usage
Mostly by Toby Dickenson and Titus Brown. Add an optional argument to a decompression object's decompress() method. The argument specifies the maximum length of the return value. If the uncompressed data exceeds this length, the excess data is stored as the unconsumed_tail attribute. (Not to be confused with unused_data, which is a separate issue.) Difference from SF patch: Default value for unconsumed_tail is "" rather than None. It's simpler if the attribute is always a string. git-svn-id: http://svn.python.org/projects/python/trunk@23730 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 30b34e6 commit 80fd339

4 files changed

Lines changed: 113 additions & 12 deletions

File tree

Doc/lib/libzlib.tex

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ \section{\module{zlib} ---
120120
action is to delete the object.
121121
\end{methoddesc}
122122

123-
Decompression objects support the following methods, and a single attribute:
123+
Decompression objects support the following methods, and two attributes:
124124

125125
\begin{memberdesc}{unused_data}
126126
A string which contains any unused data from the last string fed to
@@ -135,13 +135,27 @@ \section{\module{zlib} ---
135135
no longer the empty string.
136136
\end{memberdesc}
137137

138-
\begin{methoddesc}[Decompress]{decompress}{string}
138+
\begin{memberdesc}{unconsumed_tail}
139+
A string that contains any data that was not consumed by the last
140+
\method{decompress} call because it exceeded the limit for the
141+
uncompressed data buffer.
142+
\end{memberdesc}
143+
144+
\begin{methoddesc}[Decompress]{decompress}{string}{\optional{max_length}}
139145
Decompress \var{string}, returning a string containing the
140146
uncompressed data corresponding to at least part of the data in
141147
\var{string}. This data should be concatenated to the output produced
142148
by any preceding calls to the
143149
\method{decompress()} method. Some of the input data may be preserved
144150
in internal buffers for later processing.
151+
152+
If the optional parameter \var{max_length} is supplied then the return value
153+
will be no longer than \var{max_length}. This may mean that not all of the
154+
compressed input can be processed; and unconsumed data will be stored
155+
in the attribute \member{unconsumed_tail}. This string must be passed
156+
to a subsequent call to \method{decompress()} if decompression is to
157+
continue. If \var{max_length} is not supplied then the whole input is
158+
decompressed, and \member{unconsumed_tail} is an empty string.
145159
\end{methoddesc}
146160

147161
\begin{methoddesc}[Decompress]{flush}{}

Lib/test/output/test_zlib

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,7 @@ normal compression/decompression succeeded
88
compress/decompression obj succeeded
99
decompress with init options succeeded
1010
decompressobj with init options succeeded
11+
should be '': ''
12+
max_length decompressobj succeeded
13+
unconsumed_tail should be '': ''
1114
Testing on 17K of random data

Lib/test/test_zlib.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,36 @@
7676
else:
7777
print "decompressobj with init options succeeded"
7878

79+
print "should be '':", `deco.unconsumed_tail`
80+
81+
# Check a decompression object with max_length specified
82+
deco = zlib.decompressobj(-12)
83+
cb = combuf
84+
bufs = []
85+
while cb:
86+
max_length = 1 + len(cb)/10
87+
chunk = deco.decompress(cb, max_length)
88+
if len(chunk) > max_length:
89+
print 'chunk too big (%d>%d)' % (len(chunk),max_length)
90+
bufs.append(chunk)
91+
cb = deco.unconsumed_tail
92+
bufs.append(deco.flush())
93+
decomp2 = ''.join(buf)
94+
if decomp2 != buf:
95+
print "max_length decompressobj failed"
96+
else:
97+
print "max_length decompressobj succeeded"
98+
99+
# Misc tests of max_length
100+
deco = zlib.decompressobj(-12)
101+
try:
102+
deco.decompress("", -1)
103+
except ValueError:
104+
pass
105+
else:
106+
print "failed to raise value error on bad max_length"
107+
print "unconsumed_tail should be '':", `deco.unconsumed_tail`
108+
79109
# Test flush() with the various options, using all the different levels
80110
# in order to provide more variations.
81111
sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH']

Modules/zlibmodule.c

Lines changed: 64 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ typedef struct
7878
PyObject_HEAD
7979
z_stream zst;
8080
PyObject *unused_data;
81+
PyObject *unconsumed_tail;
8182
int is_initialised;
8283
} compobject;
8384

@@ -100,6 +101,15 @@ newcompobject(PyTypeObject *type)
100101
return NULL;
101102
self->is_initialised = 0;
102103
self->unused_data = PyString_FromString("");
104+
if (self->unused_data == NULL) {
105+
Py_DECREF(self);
106+
return NULL;
107+
}
108+
self->unconsumed_tail = PyString_FromString("");
109+
if (self->unconsumed_tail == NULL) {
110+
Py_DECREF(self);
111+
return NULL;
112+
}
103113
return self;
104114
}
105115

@@ -485,6 +495,7 @@ Comp_dealloc(compobject *self)
485495
if (self->is_initialised)
486496
deflateEnd(&self->zst);
487497
Py_XDECREF(self->unused_data);
498+
Py_XDECREF(self->unconsumed_tail);
488499
PyObject_Del(self);
489500

490501
LEAVE_ZLIB
@@ -498,6 +509,7 @@ Decomp_dealloc(compobject *self)
498509
if (self->is_initialised)
499510
inflateEnd(&self->zst);
500511
Py_XDECREF(self->unused_data);
512+
Py_XDECREF(self->unconsumed_tail);
501513
PyObject_Del(self);
502514

503515
LEAVE_ZLIB
@@ -595,27 +607,41 @@ PyZlib_objcompress(compobject *self, PyObject *args)
595607
}
596608

597609
static char decomp_decompress__doc__[] =
598-
"decompress(data) -- Return a string containing the decompressed version of the data.\n\n"
610+
"decompress(data, max_length) -- Return a string containing\n"
611+
"the decompressed version of the data.\n\n"
599612
"After calling this function, some of the input data may still\n"
600613
"be stored in internal buffers for later processing.\n"
601-
"Call the flush() method to clear these buffers."
614+
"Call the flush() method to clear these buffers.\n"
615+
"If the max_length parameter is specified then the return value will be\n"
616+
"no longer than max_length. Unconsumed input data will be stored in\n"
617+
"the unconsumed_tail attribute."
602618
;
603619

604620
static PyObject *
605621
PyZlib_objdecompress(compobject *self, PyObject *args)
606622
{
607-
int err, inplen, length = DEFAULTALLOC;
623+
int err, inplen, old_length, length = DEFAULTALLOC;
624+
int max_length = 0;
608625
PyObject *RetVal;
609626
Byte *input;
610627
unsigned long start_total_out;
611628
int return_error;
612629
PyObject * inputString;
613630

614-
if (!PyArg_ParseTuple(args, "S:decompress", &inputString))
631+
if (!PyArg_ParseTuple(args, "S|i:decompress", &inputString, &max_length))
632+
return NULL;
633+
if (max_length < 0) {
634+
PyErr_SetString(PyExc_ValueError,
635+
"max_length must be greater than zero");
615636
return NULL;
637+
}
638+
616639
if (PyString_AsStringAndSize(inputString, (char**)&input, &inplen) == -1)
617640
return NULL;
618641

642+
/* limit amount of data allocated to max_length */
643+
if (max_length && length > max_length)
644+
length = max_length;
619645
if (!(RetVal = PyString_FromStringAndSize(NULL, length))) {
620646
PyErr_SetString(PyExc_MemoryError,
621647
"Can't allocate memory to compress data");
@@ -637,23 +663,46 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
637663
err = inflate(&(self->zst), Z_SYNC_FLUSH);
638664
Py_END_ALLOW_THREADS
639665

640-
/* while Z_OK and the output buffer is full, there might be more output,
641-
so extend the output buffer and try again */
666+
/* While Z_OK and the output buffer is full, there might be more output.
667+
So extend the output buffer and try again.
668+
*/
642669
while (err == Z_OK && self->zst.avail_out == 0) {
643-
if (_PyString_Resize(&RetVal, length << 1) == -1) {
670+
/* If max_length set, don't continue decompressing if we've already
671+
reached the limit.
672+
*/
673+
if (max_length && length >= max_length)
674+
break;
675+
676+
/* otherwise, ... */
677+
old_length = length;
678+
length = length << 1;
679+
if (max_length && length > max_length)
680+
length = max_length;
681+
682+
if (_PyString_Resize(&RetVal, length) == -1) {
644683
PyErr_SetString(PyExc_MemoryError,
645684
"Can't allocate memory to compress data");
646685
return_error = 1;
647686
break;
648687
}
649-
self->zst.next_out = (unsigned char *)PyString_AsString(RetVal) + length;
650-
self->zst.avail_out = length;
651-
length = length << 1;
688+
self->zst.next_out = (unsigned char *)PyString_AsString(RetVal)+old_length;
689+
self->zst.avail_out = length - old_length;
690+
652691
Py_BEGIN_ALLOW_THREADS
653692
err = inflate(&(self->zst), Z_SYNC_FLUSH);
654693
Py_END_ALLOW_THREADS
655694
}
656695

696+
/* Not all of the compressed data could be accomodated in the output buffer
697+
of specified size. Return the unconsumed tail in an attribute.*/
698+
if(max_length) {
699+
Py_DECREF(self->unconsumed_tail);
700+
self->unconsumed_tail = PyString_FromStringAndSize(self->zst.next_in,
701+
self->zst.avail_in);
702+
if(!self->unconsumed_tail)
703+
return_error = 1;
704+
}
705+
657706
/* The end of the compressed data has been reached, so set the unused_data
658707
attribute to a string containing the remainder of the data in the string.
659708
Note that this is also a logical place to call inflateEnd, but the old
@@ -885,6 +934,11 @@ Decomp_getattr(compobject *self, char *name)
885934
Py_INCREF(self->unused_data);
886935
retval = self->unused_data;
887936
}
937+
else if (strcmp(name, "unconsumed_tail") == 0)
938+
{
939+
Py_INCREF(self->unconsumed_tail);
940+
retval = self->unconsumed_tail;
941+
}
888942
else
889943
retval = Py_FindMethod(Decomp_methods, (PyObject *)self, name);
890944

0 commit comments

Comments
 (0)