Skip to content

Commit 90493ab

Browse files
Issue python#25761: Improved error reporting about truncated pickle data in
C implementation of unpickler. UnpicklingError is now raised instead of AttributeError and ValueError in some cases.
1 parent df6ff7b commit 90493ab

File tree

3 files changed

+46
-41
lines changed

3 files changed

+46
-41
lines changed

Lib/test/test_pickle.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,7 @@ def get_dispatch_table(self):
139139
class CUnpicklerTests(PyUnpicklerTests):
140140
unpickler = _pickle.Unpickler
141141
bad_stack_errors = (pickle.UnpicklingError,)
142-
truncated_errors = (pickle.UnpicklingError, EOFError,
143-
AttributeError, ValueError)
142+
truncated_errors = (pickle.UnpicklingError,)
144143

145144
class CPicklerTests(PyPicklerTests):
146145
pickler = _pickle.Pickler

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@ Core and Builtins
8989
Library
9090
-------
9191

92+
- Issue #25761: Improved error reporting about truncated pickle data in
93+
C implementation of unpickler. UnpicklingError is now raised instead of
94+
AttributeError and ValueError in some cases.
95+
9296
- Issue #26798: Add BLAKE2 (blake2b and blake2s) to hashlib.
9397

9498
- Issue #25596: Optimized glob() and iglob() functions in the

Modules/_pickle.c

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,14 @@ _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
10911091
return self->input_len;
10921092
}
10931093

1094+
static int
1095+
bad_readline(void)
1096+
{
1097+
PickleState *st = _Pickle_GetGlobalState();
1098+
PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1099+
return -1;
1100+
}
1101+
10941102
static int
10951103
_Unpickler_SkipConsumed(UnpicklerObject *self)
10961104
{
@@ -1195,17 +1203,14 @@ _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
11951203
/* This case is handled by the _Unpickler_Read() macro for efficiency */
11961204
assert(self->next_read_idx + n > self->input_len);
11971205

1198-
if (!self->read) {
1199-
PyErr_Format(PyExc_EOFError, "Ran out of input");
1200-
return -1;
1201-
}
1206+
if (!self->read)
1207+
return bad_readline();
1208+
12021209
num_read = _Unpickler_ReadFromFile(self, n);
12031210
if (num_read < 0)
12041211
return -1;
1205-
if (num_read < n) {
1206-
PyErr_Format(PyExc_EOFError, "Ran out of input");
1207-
return -1;
1208-
}
1212+
if (num_read < n)
1213+
return bad_readline();
12091214
*s = self->input_buffer;
12101215
self->next_read_idx = n;
12111216
return n;
@@ -1249,7 +1254,7 @@ _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
12491254
}
12501255

12511256
/* Read a line from the input stream/buffer. If we run off the end of the input
1252-
before hitting \n, return the data we found.
1257+
before hitting \n, raise an error.
12531258
12541259
Returns the number of chars read, or -1 on failure. */
12551260
static Py_ssize_t
@@ -1265,20 +1270,16 @@ _Unpickler_Readline(UnpicklerObject *self, char **result)
12651270
return _Unpickler_CopyLine(self, line_start, num_read, result);
12661271
}
12671272
}
1268-
if (self->read) {
1269-
num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1270-
if (num_read < 0)
1271-
return -1;
1272-
self->next_read_idx = num_read;
1273-
return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1274-
}
1273+
if (!self->read)
1274+
return bad_readline();
12751275

1276-
/* If we get here, we've run off the end of the input string. Return the
1277-
remaining string and let the caller figure it out. */
1278-
*result = self->input_buffer + self->next_read_idx;
1279-
num_read = i - self->next_read_idx;
1280-
self->next_read_idx = i;
1281-
return num_read;
1276+
num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1277+
if (num_read < 0)
1278+
return -1;
1279+
if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1280+
return bad_readline();
1281+
self->next_read_idx = num_read;
1282+
return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
12821283
}
12831284

12841285
/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
@@ -4599,14 +4600,6 @@ load_none(UnpicklerObject *self)
45994600
return 0;
46004601
}
46014602

4602-
static int
4603-
bad_readline(void)
4604-
{
4605-
PickleState *st = _Pickle_GetGlobalState();
4606-
PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
4607-
return -1;
4608-
}
4609-
46104603
static int
46114604
load_int(UnpicklerObject *self)
46124605
{
@@ -6245,8 +6238,13 @@ load(UnpicklerObject *self)
62456238
case opcode: if (load_func(self, (arg)) < 0) break; continue;
62466239

62476240
while (1) {
6248-
if (_Unpickler_Read(self, &s, 1) < 0)
6249-
break;
6241+
if (_Unpickler_Read(self, &s, 1) < 0) {
6242+
PickleState *st = _Pickle_GetGlobalState();
6243+
if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6244+
PyErr_Format(PyExc_EOFError, "Ran out of input");
6245+
}
6246+
return NULL;
6247+
}
62506248

62516249
switch ((enum opcode)s[0]) {
62526250
OP(NONE, load_none)
@@ -6318,15 +6316,19 @@ load(UnpicklerObject *self)
63186316
break;
63196317

63206318
default:
6321-
if (s[0] == '\0') {
6322-
PyErr_SetNone(PyExc_EOFError);
6323-
}
6324-
else {
6319+
{
63256320
PickleState *st = _Pickle_GetGlobalState();
6326-
PyErr_Format(st->UnpicklingError,
6327-
"invalid load key, '%c'.", s[0]);
6321+
unsigned char c = (unsigned char) *s;
6322+
if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6323+
PyErr_Format(st->UnpicklingError,
6324+
"invalid load key, '%c'.", c);
6325+
}
6326+
else {
6327+
PyErr_Format(st->UnpicklingError,
6328+
"invalid load key, '\\x%02x'.", c);
6329+
}
6330+
return NULL;
63286331
}
6329-
return NULL;
63306332
}
63316333

63326334
break; /* and we are done! */

0 commit comments

Comments
 (0)