Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
ca77774
Backport CVE-2020-10735 to 3.7 from 3.8.
gpshead Aug 30, 2022
f145128
Add What's New entry.
gpshead Aug 30, 2022
00a5114
Hack: Force CI run
tiran Sep 1, 2022
7eaad20
revert 1dae140b610a465b4d3e6fb2109ec13da6093e6d CI hack
gpshead Sep 1, 2022
635e292
Backport ctypes test_macholib fix from b29d0a5a7811418c0a1082ca188fd4…
gpshead Sep 1, 2022
a2956f3
annotate test_bad_password @requires_zlib.
gpshead Sep 1, 2022
95645b6
disable MachOTest.test_find unless macOS 11+ support is backported.
gpshead Sep 1, 2022
2cc321e
Move the whatsnew 3.7.14 text per review.
gpshead Sep 1, 2022
bc83515
LOL at my typo
gpshead Sep 1, 2022
76c9c2b
Make the doctest actually run & fix it.
gpshead Sep 1, 2022
e7bc47e
remove a line that prevents doctest error reporting.
gpshead Sep 2, 2022
0ef7ec0
Fix the docs build.
gpshead Sep 2, 2022
ad13c50
Update the ABI dump with the new private symbols.
gpshead Sep 2, 2022
2788f3f
Merge branch '3.7' into CVE-2020-10735-3.7backport
gpshead Sep 2, 2022
ca92fd2
Rename the news file to appease the Bedevere bot.
gpshead Sep 2, 2022
67905b2
Merge branch 'CVE-2020-10735-3.7backport' of github.com:gpshead/cpyth…
gpshead Sep 2, 2022
db48ddc
hexadecimal spelling =)
gpshead Sep 2, 2022
38ec6a9
Work around Windows Yield macro vs Python-ast.h
gpshead Sep 2, 2022
feaded8
doc typo: limitation
gpshead Sep 4, 2022
c9f2c57
Misc: Fix a typo in the header comment.
gpshead Sep 4, 2022
f69b587
remove unneeded doc note on float.as_integer_ratio
gpshead Sep 4, 2022
39837b6
gh-95778: Correctly pre-check for int-to-str conversion (#96537)
mdickinson Sep 4, 2022
7f911c1
backport cherry pick fix: lookup max from the right place.
gpshead Sep 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
gh-95778: Correctly pre-check for int-to-str conversion (#96537)
Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =)

The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact.

The justification for the current check. The C code check is:
```c
max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10
```

In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is:
$$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$

From this it follows that
$$\frac{M}{3L} < \frac{s-1}{10}$$
hence that
$$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$
So
$$2^{L(s-1)} > 10^M.$$
But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check.

<!-- gh-issue-number: gh-95778 -->
* Issue: gh-95778
<!-- /gh-issue-number -->

Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org>
  • Loading branch information
mdickinson and gpshead committed Sep 4, 2022
commit 39837b68cf67c3ba7f3c790672117dad7fc52e71
82 changes: 82 additions & 0 deletions Lib/test/test_int.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
import time

import unittest
from test import support
Expand Down Expand Up @@ -571,6 +572,87 @@ def test_max_str_digits(self):
with self.assertRaises(ValueError):
str(i)

def test_denial_of_service_prevented_int_to_str(self):
"""Regression test: ensure we fail before performing O(N**2) work."""
maxdigits = sys.get_int_max_str_digits()
assert maxdigits < 50_000, maxdigits # A test prerequisite.
get_time = time.process_time
if get_time() <= 0: # some platforms like WASM lack process_time()
get_time = time.monotonic

huge_int = int(f'0x{"c"*65_000}', base=16) # 78268 decimal digits.
digits = 78_268
with support.adjust_int_max_str_digits(digits):
start = get_time()
huge_decimal = str(huge_int)
seconds_to_convert = get_time() - start
self.assertEqual(len(huge_decimal), digits)
# Ensuring that we chose a slow enough conversion to measure.
# It takes 0.1 seconds on a Zen based cloud VM in an opt build.
if seconds_to_convert < 0.005:
raise unittest.SkipTest('"slow" conversion took only '
f'{seconds_to_convert} seconds.')

# We test with the limit almost at the size needed to check performance.
# The performant limit check is slightly fuzzy, give it a some room.
with support.adjust_int_max_str_digits(int(.995 * digits)):
with self.assertRaises(ValueError) as err:
start = get_time()
str(huge_int)
seconds_to_fail_huge = get_time() - start
self.assertIn('conversion', str(err.exception))
self.assertLess(seconds_to_fail_huge, seconds_to_convert/8)

# Now we test that a conversion that would take 30x as long also fails
# in a similarly fast fashion.
extra_huge_int = int(f'0x{"c"*500_000}', base=16) # 602060 digits.
with self.assertRaises(ValueError) as err:
start = get_time()
# If not limited, 8 seconds said Zen based cloud VM.
str(extra_huge_int)
seconds_to_fail_extra_huge = get_time() - start
self.assertIn('conversion', str(err.exception))
self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8)

def test_denial_of_service_prevented_str_to_int(self):
"""Regression test: ensure we fail before performing O(N**2) work."""
maxdigits = sys.get_int_max_str_digits()
assert maxdigits < 100_000, maxdigits # A test prerequisite.
get_time = time.process_time
if get_time() <= 0: # some platforms like WASM lack process_time()
get_time = time.monotonic

digits = 133700
huge = '8'*digits
with support.adjust_int_max_str_digits(digits):
start = get_time()
int(huge)
seconds_to_convert = get_time() - start
# Ensuring that we chose a slow enough conversion to measure.
# It takes 0.1 seconds on a Zen based cloud VM in an opt build.
if seconds_to_convert < 0.005:
raise unittest.SkipTest('"slow" conversion took only '
f'{seconds_to_convert} seconds.')

with support.adjust_int_max_str_digits(digits - 1):
with self.assertRaises(ValueError) as err:
start = get_time()
int(huge)
seconds_to_fail_huge = get_time() - start
self.assertIn('conversion', str(err.exception))
self.assertLess(seconds_to_fail_huge, seconds_to_convert/8)

# Now we test that a conversion that would take 30x as long also fails
# in a similarly fast fashion.
extra_huge = '7'*1_200_000
with self.assertRaises(ValueError) as err:
start = get_time()
# If not limited, 8 seconds in the Zen based cloud VM.
int(extra_huge)
seconds_to_fail_extra_huge = get_time() - start
self.assertIn('conversion', str(err.exception))
self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8)

def test_power_of_two_bases_unlimited(self):
"""The limit does not apply to power of 2 bases."""
maxdigits = sys.get_int_max_str_digits()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ limitation <int_max_str_digits>` documentation. The default limit is 4300
digits in string form.

Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback
from Victor Stinner, Thomas Wouters, Steve Dower, and Ned Deily.
from Victor Stinner, Thomas Wouters, Steve Dower, Ned Deily, and Mark Dickinson.
26 changes: 22 additions & 4 deletions Objects/longobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ static PyLongObject small_ints[NSMALLNEGINTS + NSMALLPOSINTS];
Py_ssize_t quick_int_allocs, quick_neg_int_allocs;
#endif

#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits"
#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits"
#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion"

static PyObject *
get_small_int(sdigit ival)
Expand Down Expand Up @@ -1606,6 +1607,23 @@ long_to_decimal_string_internal(PyObject *aa,
size_a = Py_ABS(Py_SIZE(a));
negative = Py_SIZE(a) < 0;

/* quick and dirty pre-check for overflowing the decimal digit limit,
based on the inequality 10/3 >= log2(10)

explanation in https://github.com/python/cpython/pull/96537
*/
if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD
/ (3 * PyLong_SHIFT) + 2) {
PyInterpreterState *interp = _PyInterpreterState_GET();
int max_str_digits = interp->int_max_str_digits;
if ((max_str_digits > 0) &&
(max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) {
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,
max_str_digits);
return -1;
}
}

/* quick and dirty upper bound for the number of digits
required to express a in base _PyLong_DECIMAL_BASE:

Expand Down Expand Up @@ -1670,8 +1688,8 @@ long_to_decimal_string_internal(PyObject *aa,
Py_ssize_t strlen_nosign = strlen - negative;
if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) {
Py_DECREF(scratch);
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT,
max_str_digits, strlen_nosign);
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,
max_str_digits);
return -1;
}
}
Expand Down Expand Up @@ -2344,7 +2362,7 @@ digit beyond the first.
if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
int max_str_digits = _PyRuntime.int_max_str_digits;
if ((max_str_digits > 0) && (digits > max_str_digits)) {
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT,
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT,
max_str_digits, digits);
return NULL;
}
Expand Down