Skip to content

Commit f45bbb6

Browse files
committed
Issue python#19638: Raise ValueError instead of crashing when converting billion character strings to float.
1 parent 22dc4d5 commit f45bbb6

3 files changed

Lines changed: 77 additions & 12 deletions

File tree

Lib/test/test_strtod.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,37 @@ def test_parsing(self):
248248
else:
249249
assert False, "expected ValueError"
250250

251+
@test.support.bigmemtest(size=5 * test.support._1G, memuse=1, dry_run=False)
252+
def test_oversized_digit_strings(self, maxsize):
253+
# Input string whose length doesn't fit in an INT.
254+
s = "1." + "1" * int(2.2e9)
255+
with self.assertRaises(ValueError):
256+
float(s)
257+
del s
258+
259+
s = "0." + "0" * int(2.2e9) + "1"
260+
with self.assertRaises(ValueError):
261+
float(s)
262+
del s
263+
264+
def test_large_exponents(self):
265+
# Verify that the clipping of the exponent in strtod doesn't affect the
266+
# output values.
267+
def positive_exp(n):
268+
""" Long string with value 1.0 and exponent n"""
269+
return '0.{}1e+{}'.format('0'*(n-1), n)
270+
271+
def negative_exp(n):
272+
""" Long string with value 1.0 and exponent -n"""
273+
return '1{}e-{}'.format('0'*n, n)
274+
275+
self.assertEqual(float(positive_exp(10000)), 1.0)
276+
self.assertEqual(float(positive_exp(20000)), 1.0)
277+
self.assertEqual(float(positive_exp(30000)), 1.0)
278+
self.assertEqual(float(negative_exp(10000)), 1.0)
279+
self.assertEqual(float(negative_exp(20000)), 1.0)
280+
self.assertEqual(float(negative_exp(30000)), 1.0)
281+
251282
def test_particular(self):
252283
# inputs that produced crashes or incorrectly rounded results with
253284
# previous versions of dtoa.c, for various reasons

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.3.4 release candidate 1?
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #19638: Fix possible crash / undefined behaviour from huge (more than 2
14+
billion characters) input strings in _Py_dg_strtod.
15+
1316
Library
1417
-------
1518

Python/dtoa.c

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,24 @@ typedef union { double d; ULong L[2]; } U;
204204
MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP. MAX_ABS_EXP
205205
should fit into an int. */
206206
#ifndef MAX_ABS_EXP
207-
#define MAX_ABS_EXP 19999U
207+
#define MAX_ABS_EXP 1100000000U
208+
#endif
209+
/* Bound on length of pieces of input strings in _Py_dg_strtod; specifically,
210+
this is used to bound the total number of digits ignoring leading zeros and
211+
the number of digits that follow the decimal point. Ideally, MAX_DIGITS
212+
should satisfy MAX_DIGITS + 400 < MAX_ABS_EXP; that ensures that the
213+
exponent clipping in _Py_dg_strtod can't affect the value of the output. */
214+
#ifndef MAX_DIGITS
215+
#define MAX_DIGITS 1000000000U
216+
#endif
217+
218+
/* Guard against trying to use the above values on unusual platforms with ints
219+
* of width less than 32 bits. */
220+
#if MAX_ABS_EXP > INT_MAX
221+
#error "MAX_ABS_EXP should fit in an int"
222+
#endif
223+
#if MAX_DIGITS > INT_MAX
224+
#error "MAX_DIGITS should fit in an int"
208225
#endif
209226

210227
/* The following definition of Storeinc is appropriate for MIPS processors.
@@ -1538,6 +1555,7 @@ _Py_dg_strtod(const char *s00, char **se)
15381555
Long L;
15391556
BCinfo bc;
15401557
Bigint *bb, *bb1, *bd, *bd0, *bs, *delta;
1558+
size_t ndigits, fraclen;
15411559

15421560
dval(&rv) = 0.;
15431561

@@ -1560,39 +1578,52 @@ _Py_dg_strtod(const char *s00, char **se)
15601578
c = *++s;
15611579
lz = s != s1;
15621580

1563-
/* Point s0 at the first nonzero digit (if any). nd0 will be the position
1564-
of the point relative to s0. nd will be the total number of digits
1565-
ignoring leading zeros. */
1581+
/* Point s0 at the first nonzero digit (if any). fraclen will be the
1582+
number of digits between the decimal point and the end of the
1583+
digit string. ndigits will be the total number of digits ignoring
1584+
leading zeros. */
15661585
s0 = s1 = s;
15671586
while ('0' <= c && c <= '9')
15681587
c = *++s;
1569-
nd0 = nd = s - s1;
1588+
ndigits = s - s1;
1589+
fraclen = 0;
15701590

15711591
/* Parse decimal point and following digits. */
15721592
if (c == '.') {
15731593
c = *++s;
1574-
if (!nd) {
1594+
if (!ndigits) {
15751595
s1 = s;
15761596
while (c == '0')
15771597
c = *++s;
15781598
lz = lz || s != s1;
1579-
nd0 -= s - s1;
1599+
fraclen += (s - s1);
15801600
s0 = s;
15811601
}
15821602
s1 = s;
15831603
while ('0' <= c && c <= '9')
15841604
c = *++s;
1585-
nd += s - s1;
1605+
ndigits += s - s1;
1606+
fraclen += s - s1;
1607+
}
1608+
1609+
/* Now lz is true if and only if there were leading zero digits, and
1610+
ndigits gives the total number of digits ignoring leading zeros. A
1611+
valid input must have at least one digit. */
1612+
if (!ndigits && !lz) {
1613+
if (se)
1614+
*se = (char *)s00;
1615+
goto parse_error;
15861616
}
15871617

1588-
/* Now lz is true if and only if there were leading zero digits, and nd
1589-
gives the total number of digits ignoring leading zeros. A valid input
1590-
must have at least one digit. */
1591-
if (!nd && !lz) {
1618+
/* Range check ndigits and fraclen to make sure that they, and values
1619+
computed with them, can safely fit in an int. */
1620+
if (ndigits > MAX_DIGITS || fraclen > MAX_DIGITS) {
15921621
if (se)
15931622
*se = (char *)s00;
15941623
goto parse_error;
15951624
}
1625+
nd = (int)ndigits;
1626+
nd0 = (int)ndigits - (int)fraclen;
15961627

15971628
/* Parse exponent. */
15981629
e = 0;

0 commit comments

Comments
 (0)