Skip to content

Commit 579ddc2

Browse files
Issue python#16741: Fix an error reporting in int().
2 parents e633bed + f6d0aee commit 579ddc2

5 files changed

Lines changed: 98 additions & 68 deletions

File tree

Include/longobject.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ PyAPI_FUNC(PyObject *) PyLong_FromString(char *, char **, int);
9797
#ifndef Py_LIMITED_API
9898
PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int);
9999
PyAPI_FUNC(PyObject *) PyLong_FromUnicodeObject(PyObject *u, int base);
100+
PyAPI_FUNC(PyObject *) _PyLong_FromBytes(const char *, Py_ssize_t, int);
100101
#endif
101102

102103
#ifndef Py_LIMITED_API

Lib/test/test_int.py

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,6 @@ def test_basic(self):
7373
x = -1-sys.maxsize
7474
self.assertEqual(x >> 1, x//2)
7575

76-
self.assertRaises(ValueError, int, '123\0')
77-
self.assertRaises(ValueError, int, '53', 40)
78-
79-
# SF bug 1545497: embedded NULs were not detected with
80-
# explicit base
81-
self.assertRaises(ValueError, int, '123\0', 10)
82-
self.assertRaises(ValueError, int, '123\x00 245', 20)
83-
8476
x = int('1' * 600)
8577
self.assertIsInstance(x, int)
8678

@@ -401,14 +393,37 @@ def __trunc__(self):
401393
int(TruncReturnsBadInt())
402394

403395
def test_error_message(self):
404-
testlist = ('\xbd', '123\xbd', ' 123 456 ')
405-
for s in testlist:
406-
try:
407-
int(s)
408-
except ValueError as e:
409-
self.assertIn(s.strip(), e.args[0])
410-
else:
411-
self.fail("Expected int(%r) to raise a ValueError", s)
396+
def check(s, base=None):
397+
with self.assertRaises(ValueError,
398+
msg="int(%r, %r)" % (s, base)) as cm:
399+
if base is None:
400+
int(s)
401+
else:
402+
int(s, base)
403+
self.assertEqual(cm.exception.args[0],
404+
"invalid literal for int() with base %d: %r" %
405+
(10 if base is None else base, s))
406+
407+
check('\xbd')
408+
check('123\xbd')
409+
check(' 123 456 ')
410+
411+
check('123\x00')
412+
# SF bug 1545497: embedded NULs were not detected with explicit base
413+
check('123\x00', 10)
414+
check('123\x00 245', 20)
415+
check('123\x00 245', 16)
416+
check('123\x00245', 20)
417+
check('123\x00245', 16)
418+
# byte string with embedded NUL
419+
check(b'123\x00')
420+
check(b'123\x00', 10)
421+
# non-UTF-8 byte string
422+
check(b'123\xbd')
423+
check(b'123\xbd', 10)
424+
# lone surrogate in Unicode string
425+
check('123\ud800')
426+
check('123\ud800', 10)
412427

413428
def test_main():
414429
support.run_unittest(IntTestCases)

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ What's New in Python 3.4.0 Alpha 1?
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #16741: Fix an error reporting in int().
14+
1315
- Issue #17899: Fix rare file descriptor leak in os.listdir().
1416

1517
- Issue #9035: ismount now recognises volumes mounted below a drive root

Objects/abstract.c

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1261,25 +1261,6 @@ convert_integral_to_int(PyObject *integral, const char *error_format)
12611261
}
12621262

12631263

1264-
/* Add a check for embedded NULL-bytes in the argument. */
1265-
static PyObject *
1266-
long_from_string(const char *s, Py_ssize_t len)
1267-
{
1268-
char *end;
1269-
PyObject *x;
1270-
1271-
x = PyLong_FromString((char*)s, &end, 10);
1272-
if (x == NULL)
1273-
return NULL;
1274-
if (end != s + len) {
1275-
PyErr_SetString(PyExc_ValueError,
1276-
"null byte in argument for int()");
1277-
Py_DECREF(x);
1278-
return NULL;
1279-
}
1280-
return x;
1281-
}
1282-
12831264
PyObject *
12841265
PyNumber_Long(PyObject *o)
12851266
{
@@ -1327,16 +1308,16 @@ PyNumber_Long(PyObject *o)
13271308

13281309
if (PyBytes_Check(o))
13291310
/* need to do extra error checking that PyLong_FromString()
1330-
* doesn't do. In particular int('9.5') must raise an
1331-
* exception, not truncate the float.
1311+
* doesn't do. In particular int('9\x005') must raise an
1312+
* exception, not truncate at the null.
13321313
*/
1333-
return long_from_string(PyBytes_AS_STRING(o),
1334-
PyBytes_GET_SIZE(o));
1314+
return _PyLong_FromBytes(PyBytes_AS_STRING(o),
1315+
PyBytes_GET_SIZE(o), 10);
13351316
if (PyUnicode_Check(o))
13361317
/* The above check is done in PyLong_FromUnicode(). */
13371318
return PyLong_FromUnicodeObject(o, 10);
13381319
if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len))
1339-
return long_from_string(buffer, buffer_len);
1320+
return _PyLong_FromBytes(buffer, buffer_len, 10);
13401321

13411322
return type_error("int() argument must be a string or a "
13421323
"number, not '%.200s'", o);

Objects/longobject.c

Lines changed: 59 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2000,6 +2000,14 @@ long_from_binary_base(char **str, int base)
20002000
return long_normalize(z);
20012001
}
20022002

2003+
/* Parses a long from a bytestring. Leading and trailing whitespace will be
2004+
* ignored.
2005+
*
2006+
* If successful, a PyLong object will be returned and 'pend' will be pointing
2007+
* to the first unused byte unless it's NULL.
2008+
*
2009+
* If unsuccessful, NULL will be returned.
2010+
*/
20032011
PyObject *
20042012
PyLong_FromString(char *str, char **pend, int base)
20052013
{
@@ -2262,24 +2270,54 @@ digit beyond the first.
22622270
str++;
22632271
if (*str != '\0')
22642272
goto onError;
2265-
if (pend)
2266-
*pend = str;
22672273
long_normalize(z);
2268-
return (PyObject *) maybe_small_long(z);
2274+
z = maybe_small_long(z);
2275+
if (z == NULL)
2276+
return NULL;
2277+
if (pend != NULL)
2278+
*pend = str;
2279+
return (PyObject *) z;
22692280

22702281
onError:
2282+
if (pend != NULL)
2283+
*pend = str;
22712284
Py_XDECREF(z);
22722285
slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200;
22732286
strobj = PyUnicode_FromStringAndSize(orig_str, slen);
22742287
if (strobj == NULL)
22752288
return NULL;
22762289
PyErr_Format(PyExc_ValueError,
2277-
"invalid literal for int() with base %d: %R",
2290+
"invalid literal for int() with base %d: %.200R",
22782291
base, strobj);
22792292
Py_DECREF(strobj);
22802293
return NULL;
22812294
}
22822295

2296+
/* Since PyLong_FromString doesn't have a length parameter,
2297+
* check here for possible NULs in the string.
2298+
*
2299+
* Reports an invalid literal as a bytes object.
2300+
*/
2301+
PyObject *
2302+
_PyLong_FromBytes(const char *s, Py_ssize_t len, int base)
2303+
{
2304+
PyObject *result, *strobj;
2305+
char *end = NULL;
2306+
2307+
result = PyLong_FromString((char*)s, &end, base);
2308+
if (end == NULL || (result != NULL && end == s + len))
2309+
return result;
2310+
Py_XDECREF(result);
2311+
strobj = PyBytes_FromStringAndSize(s, Py_MIN(len, 200));
2312+
if (strobj != NULL) {
2313+
PyErr_Format(PyExc_ValueError,
2314+
"invalid literal for int() with base %d: %.200R",
2315+
base, strobj);
2316+
Py_DECREF(strobj);
2317+
}
2318+
return NULL;
2319+
}
2320+
22832321
PyObject *
22842322
PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
22852323
{
@@ -2294,9 +2332,8 @@ PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
22942332
PyObject *
22952333
PyLong_FromUnicodeObject(PyObject *u, int base)
22962334
{
2297-
PyObject *result;
2298-
PyObject *asciidig;
2299-
char *buffer, *end;
2335+
PyObject *result, *asciidig;
2336+
char *buffer, *end = NULL;
23002337
Py_ssize_t buflen;
23012338

23022339
asciidig = _PyUnicode_TransformDecimalAndSpaceToASCII(u);
@@ -2305,17 +2342,22 @@ PyLong_FromUnicodeObject(PyObject *u, int base)
23052342
buffer = PyUnicode_AsUTF8AndSize(asciidig, &buflen);
23062343
if (buffer == NULL) {
23072344
Py_DECREF(asciidig);
2308-
return NULL;
2345+
if (!PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2346+
return NULL;
23092347
}
2310-
result = PyLong_FromString(buffer, &end, base);
2311-
if (result != NULL && end != buffer + buflen) {
2312-
PyErr_SetString(PyExc_ValueError,
2313-
"null byte in argument for int()");
2314-
Py_DECREF(result);
2315-
result = NULL;
2348+
else {
2349+
result = PyLong_FromString(buffer, &end, base);
2350+
if (end == NULL || (result != NULL && end == buffer + buflen)) {
2351+
Py_DECREF(asciidig);
2352+
return result;
2353+
}
2354+
Py_DECREF(asciidig);
2355+
Py_XDECREF(result);
23162356
}
2317-
Py_DECREF(asciidig);
2318-
return result;
2357+
PyErr_Format(PyExc_ValueError,
2358+
"invalid literal for int() with base %d: %.200R",
2359+
base, u);
2360+
return NULL;
23192361
}
23202362

23212363
/* forward */
@@ -4319,23 +4361,12 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
43194361
if (PyUnicode_Check(x))
43204362
return PyLong_FromUnicodeObject(x, (int)base);
43214363
else if (PyByteArray_Check(x) || PyBytes_Check(x)) {
4322-
/* Since PyLong_FromString doesn't have a length parameter,
4323-
* check here for possible NULs in the string. */
43244364
char *string;
4325-
Py_ssize_t size = Py_SIZE(x);
43264365
if (PyByteArray_Check(x))
43274366
string = PyByteArray_AS_STRING(x);
43284367
else
43294368
string = PyBytes_AS_STRING(x);
4330-
if (strlen(string) != (size_t)size || !size) {
4331-
/* We only see this if there's a null byte in x or x is empty,
4332-
x is a bytes or buffer, *and* a base is given. */
4333-
PyErr_Format(PyExc_ValueError,
4334-
"invalid literal for int() with base %d: %R",
4335-
(int)base, x);
4336-
return NULL;
4337-
}
4338-
return PyLong_FromString(string, NULL, (int)base);
4369+
return _PyLong_FromBytes(string, Py_SIZE(x), (int)base);
43394370
}
43404371
else {
43414372
PyErr_SetString(PyExc_TypeError,

0 commit comments

Comments
 (0)