Skip to content

Commit 6823065

Browse files
committed
Issue #19638: Raise ValueError instead of crashing when converting billion character strings to float.
1 parent ad52a35 commit 6823065

3 files changed

Lines changed: 82 additions & 17 deletions

File tree

Lib/test/test_strtod.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,37 @@ def test_parsing(self):
249249
else:
250250
assert False, "expected ValueError"
251251

252+
@test_support.bigmemtest(minsize=5 * test_support._1G, memuse=1)
253+
def test_oversized_digit_strings(self, maxsize):
254+
# Input string whose length doesn't fit in an INT.
255+
s = "1." + "1" * int(2.2e9)
256+
with self.assertRaises(ValueError):
257+
float(s)
258+
del s
259+
260+
s = "0." + "0" * int(2.2e9) + "1"
261+
with self.assertRaises(ValueError):
262+
float(s)
263+
del s
264+
265+
def test_large_exponents(self):
266+
# Verify that the clipping of the exponent in strtod doesn't affect the
267+
# output values.
268+
def positive_exp(n):
269+
""" Long string with value 1.0 and exponent n"""
270+
return '0.{}1e+{}'.format('0'*(n-1), n)
271+
272+
def negative_exp(n):
273+
""" Long string with value 1.0 and exponent -n"""
274+
return '1{}e-{}'.format('0'*n, n)
275+
276+
self.assertEqual(float(positive_exp(10000)), 1.0)
277+
self.assertEqual(float(positive_exp(20000)), 1.0)
278+
self.assertEqual(float(positive_exp(30000)), 1.0)
279+
self.assertEqual(float(negative_exp(10000)), 1.0)
280+
self.assertEqual(float(negative_exp(20000)), 1.0)
281+
self.assertEqual(float(negative_exp(30000)), 1.0)
282+
252283
def test_particular(self):
253284
# inputs that produced crashes or incorrectly rounded results with
254285
# previous versions of dtoa.c, for various reasons

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ What's New in Python 2.7.7?
99
Core and Builtins
1010
-----------------
1111

12+
- Issue #19638: Fix possible crash / undefined behaviour from huge (more than 2
13+
billion characters) input strings in _Py_dg_strtod.
14+
1215
Library
1316
-------
1417

Python/dtoa.c

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,24 @@ typedef union { double d; ULong L[2]; } U;
204204
MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP. MAX_ABS_EXP
205205
should fit into an int. */
206206
#ifndef MAX_ABS_EXP
207-
#define MAX_ABS_EXP 19999U
207+
#define MAX_ABS_EXP 1100000000U
208+
#endif
209+
/* Bound on length of pieces of input strings in _Py_dg_strtod; specifically,
210+
this is used to bound the total number of digits ignoring leading zeros and
211+
the number of digits that follow the decimal point. Ideally, MAX_DIGITS
212+
should satisfy MAX_DIGITS + 400 < MAX_ABS_EXP; that ensures that the
213+
exponent clipping in _Py_dg_strtod can't affect the value of the output. */
214+
#ifndef MAX_DIGITS
215+
#define MAX_DIGITS 1000000000U
216+
#endif
217+
218+
/* Guard against trying to use the above values on unusual platforms with ints
219+
* of width less than 32 bits. */
220+
#if MAX_ABS_EXP > INT_MAX
221+
#error "MAX_ABS_EXP should fit in an int"
222+
#endif
223+
#if MAX_DIGITS > INT_MAX
224+
#error "MAX_DIGITS should fit in an int"
208225
#endif
209226

210227
/* The following definition of Storeinc is appropriate for MIPS processors.
@@ -1498,6 +1515,7 @@ _Py_dg_strtod(const char *s00, char **se)
14981515
Long L;
14991516
BCinfo bc;
15001517
Bigint *bb, *bb1, *bd, *bd0, *bs, *delta;
1518+
size_t ndigits, fraclen;
15011519

15021520
dval(&rv) = 0.;
15031521

@@ -1520,39 +1538,52 @@ _Py_dg_strtod(const char *s00, char **se)
15201538
c = *++s;
15211539
lz = s != s1;
15221540

1523-
/* Point s0 at the first nonzero digit (if any). nd0 will be the position
1524-
of the point relative to s0. nd will be the total number of digits
1525-
ignoring leading zeros. */
1541+
/* Point s0 at the first nonzero digit (if any). fraclen will be the
1542+
number of digits between the decimal point and the end of the
1543+
digit string. ndigits will be the total number of digits ignoring
1544+
leading zeros. */
15261545
s0 = s1 = s;
15271546
while ('0' <= c && c <= '9')
15281547
c = *++s;
1529-
nd0 = nd = s - s1;
1548+
ndigits = s - s1;
1549+
fraclen = 0;
15301550

15311551
/* Parse decimal point and following digits. */
15321552
if (c == '.') {
15331553
c = *++s;
1534-
if (!nd) {
1554+
if (!ndigits) {
15351555
s1 = s;
15361556
while (c == '0')
15371557
c = *++s;
15381558
lz = lz || s != s1;
1539-
nd0 -= s - s1;
1559+
fraclen += (s - s1);
15401560
s0 = s;
15411561
}
15421562
s1 = s;
15431563
while ('0' <= c && c <= '9')
15441564
c = *++s;
1545-
nd += s - s1;
1565+
ndigits += s - s1;
1566+
fraclen += s - s1;
1567+
}
1568+
1569+
/* Now lz is true if and only if there were leading zero digits, and
1570+
ndigits gives the total number of digits ignoring leading zeros. A
1571+
valid input must have at least one digit. */
1572+
if (!ndigits && !lz) {
1573+
if (se)
1574+
*se = (char *)s00;
1575+
goto parse_error;
15461576
}
15471577

1548-
/* Now lz is true if and only if there were leading zero digits, and nd
1549-
gives the total number of digits ignoring leading zeros. A valid input
1550-
must have at least one digit. */
1551-
if (!nd && !lz) {
1578+
/* Range check ndigits and fraclen to make sure that they, and values
1579+
computed with them, can safely fit in an int. */
1580+
if (ndigits > MAX_DIGITS || fraclen > MAX_DIGITS) {
15521581
if (se)
15531582
*se = (char *)s00;
15541583
goto parse_error;
15551584
}
1585+
nd = (int)ndigits;
1586+
nd0 = (int)ndigits - (int)fraclen;
15561587

15571588
/* Parse exponent. */
15581589
e = 0;
@@ -1886,20 +1917,20 @@ _Py_dg_strtod(const char *s00, char **se)
18861917
bd2++;
18871918

18881919
/* At this stage bd5 - bb5 == e == bd2 - bb2 + bbe, bb2 - bs2 == 1,
1889-
and bs == 1, so:
1920+
and bs == 1, so:
18901921
18911922
tdv == bd * 10**e = bd * 2**(bbe - bb2 + bd2) * 5**(bd5 - bb5)
18921923
srv == bb * 2**bbe = bb * 2**(bbe - bb2 + bb2)
1893-
0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2)
1924+
0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2)
18941925
1895-
It follows that:
1926+
It follows that:
18961927
18971928
M * tdv = bd * 2**bd2 * 5**bd5
18981929
M * srv = bb * 2**bb2 * 5**bb5
18991930
M * 0.5 ulp(srv) = bs * 2**bs2 * 5**bb5
19001931
1901-
for some constant M. (Actually, M == 2**(bb2 - bbe) * 5**bb5, but
1902-
this fact is not needed below.)
1932+
for some constant M. (Actually, M == 2**(bb2 - bbe) * 5**bb5, but
1933+
this fact is not needed below.)
19031934
*/
19041935

19051936
/* Remove factor of 2**i, where i = min(bb2, bd2, bs2). */

0 commit comments

Comments
 (0)