Skip to content

Commit 9794ab7

Browse files
Enforce int_max_str_digits on int-to-str conversions (RustPython#7688)
* Enforce int_max_str_digits on int-to-str conversions The str-to-int direction already enforced sys.get_int_max_str_digits() via bytes_to_int; the int-to-str direction did not. CPython 3.14 enforces both per PEP 644. Adds check_int_to_str_digits helper in builtins::int (bit-count fast path + digit upper-bound from log10(2)), wired into the four Python-level entry points: repr, the str fast path in protocol::object, int.__format__ (decimal/n/empty spec only — binary bases x/o/b are exempt per CPython), and the DecimalD/I/U branches of vm::cformat for both str % and bytes %. Unmasks 8 expectedFailure tests across test_int (max_str_digits, DoS prevention, int_from_other_bases — each mirrored in IntSubclass), test_ast (test_repr_large_input_crash) and test_reprlib (test_numbers). Boundary cases (4299/4300/4301 digits at limit=4300) match CPython 3.14.4. * Skip int-to-str DoS test on platforms without time.get_clock_info The test_denial_of_service_prevented_int_to_str regression test uses support.Stopwatch, which calls time.get_clock_info('monotonic'). In RustPython that function is gated to unix/windows targets only, so on wasm32-wasip1 it surfaces as AttributeError and breaks the wasm-wasi CI. Guard the test with skipUnless(hasattr(time, 'get_clock_info'), ...) so it runs everywhere it can and is skipped on wasm. Also narrow is_decimal_int_format to Number(Case::Lower): 'N' is rejected by format_int as UnknownFormatCode, so excluding it preserves that error path instead of intercepting it with the digit-limit check. * Add TODO: RUSTPYTHON marker to skipUnless reason scripts/update_lib uses TODO: RUSTPYTHON markers inside unittest decorator reason strings to identify and migrate custom RustPython patches across CPython library updates. * Use expectedFailureIf for wasm get_clock_info gap skipUnless silently hides the test forever; expectedFailureIf surfaces unexpected success once RustPython implements time.get_clock_info on wasm, prompting marker removal.
1 parent dc81c74 commit 9794ab7

8 files changed

Lines changed: 94 additions & 14 deletions

File tree

Lib/test/test_ast/test_ast.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1057,7 +1057,6 @@ def test_repr(self) -> None:
10571057
with self.subTest(test_input=test):
10581058
self.assertEqual(repr(ast.parse(test)), snapshot)
10591059

1060-
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: ValueError not raised
10611060
def test_repr_large_input_crash(self):
10621061
# gh-125010: Fix use-after-free in ast repr()
10631062
source = "0x0" + "e" * 10_000

Lib/test/test_int.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import sys
2+
import time
23

34
import unittest
45
# TODO: RUSTPYTHON
@@ -573,7 +574,6 @@ def check(self, i, base=None):
573574
else:
574575
self.int_class(i, base)
575576

576-
@unittest.expectedFailure # TODO: RUSTPYTHON
577577
def test_max_str_digits(self):
578578
maxdigits = sys.get_int_max_str_digits()
579579

@@ -588,7 +588,10 @@ def test_max_str_digits(self):
588588
with self.assertRaises(ValueError):
589589
str(i)
590590

591-
@unittest.expectedFailure # TODO: RUSTPYTHON
591+
@unittest.expectedFailureIf(
592+
not hasattr(time, "get_clock_info"),
593+
"TODO: RUSTPYTHON; time.get_clock_info is not available on wasm",
594+
)
592595
def test_denial_of_service_prevented_int_to_str(self):
593596
"""Regression test: ensure we fail before performing O(N**2) work."""
594597
maxdigits = sys.get_int_max_str_digits()
@@ -713,7 +716,6 @@ def _other_base_helper(self, base):
713716
with self.assertRaises(ValueError) as err:
714717
int_class(f'{s}1', base)
715718

716-
@unittest.expectedFailure # TODO: RUSTPYTHON
717719
def test_int_from_other_bases(self):
718720
base = 3
719721
with self.subTest(base=base):

Lib/test/test_reprlib.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@ def test_frozenset(self):
150150
eq(r(frozenset({1, 2, 3, 4, 5, 6})), "frozenset({1, 2, 3, 4, 5, 6})")
151151
eq(r(frozenset({1, 2, 3, 4, 5, 6, 7})), "frozenset({1, 2, 3, 4, 5, 6, ...})")
152152

153-
@unittest.expectedFailure # TODO: RUSTPYTHON
154153
def test_numbers(self):
155154
for x in [123, 1.0 / 3]:
156155
self.assertEqual(r(x), repr(x))

crates/common/src/format.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,17 @@ impl FormatSpec {
478478
matches!(self.format_type, Some(FormatType::Number(Case::Lower)))
479479
}
480480

481+
/// Returns true if this format spec produces a decimal int representation
482+
/// subject to `sys.get_int_max_str_digits()` (no spec, 'd', or 'n').
483+
/// Binary bases ('b', 'o', 'x', 'X') are exempt per CPython. 'N' is rejected
484+
/// later in `format_int` as `UnknownFormatCode`, so it is not included here.
485+
pub fn is_decimal_int_format(&self) -> bool {
486+
matches!(
487+
self.format_type,
488+
None | Some(FormatType::Decimal) | Some(FormatType::Number(Case::Lower))
489+
)
490+
}
491+
481492
/// Insert locale-aware thousands separators into an integer string.
482493
/// Follows CPython's GroupGenerator logic for variable-width grouping.
483494
fn insert_locale_grouping(int_part: &str, locale: &LocaleInfo) -> String {

crates/vm/src/builtins/int.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,9 @@ impl PyInt {
500500
}
501501
let format_spec =
502502
FormatSpec::parse(spec.as_str()).map_err(|err| err.into_pyexception(vm))?;
503+
if format_spec.is_decimal_int_format() {
504+
check_int_to_str_digits(&zelf.value, vm)?;
505+
}
503506
let result = if format_spec.has_locale_format() {
504507
let locale = crate::format::get_locale_info();
505508
format_spec.format_int_locale(&zelf.value, &locale)
@@ -655,9 +658,34 @@ impl Comparable for PyInt {
655658
}
656659
}
657660

661+
/// Pre-format check enforcing `sys.get_int_max_str_digits()` on int → str conversions.
662+
/// Mirrors CPython's PEP 644 DoS mitigation. Cheap fast-path for small values via
663+
/// bit-count upper bound on decimal digits.
664+
pub(crate) fn check_int_to_str_digits(value: &BigInt, vm: &VirtualMachine) -> PyResult<()> {
665+
let limit = vm.state.int_max_str_digits.load();
666+
if limit == 0 {
667+
return Ok(());
668+
}
669+
let bits = value.bits();
670+
// Below ~452 decimal digits: definitely under any reasonable limit.
671+
if bits < 1500 {
672+
return Ok(());
673+
}
674+
// Upper bound on decimal digit count: ⌈bits × log10(2)⌉ + 1, with log10(2) ≈ 0.30103.
675+
let digits_upper = (bits as usize * 30103 / 100000) + 1;
676+
if digits_upper > limit {
677+
return Err(vm.new_value_error(format!(
678+
"Exceeds the limit ({limit} digits) for integer string conversion; \
679+
use sys.set_int_max_str_digits() to increase the limit"
680+
)));
681+
}
682+
Ok(())
683+
}
684+
658685
impl Representable for PyInt {
659686
#[inline]
660-
fn repr_str(zelf: &Py<Self>, _vm: &VirtualMachine) -> PyResult<String> {
687+
fn repr_str(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<String> {
688+
check_int_to_str_digits(&zelf.value, vm)?;
661689
Ok(zelf.to_str_radix_10())
662690
}
663691
}

crates/vm/src/cformat.rs

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ use crate::{
99
AsObject, PyObject, PyObjectRef, PyResult, TryFromBorrowedObject, TryFromObject,
1010
VirtualMachine,
1111
builtins::{
12-
PyBaseExceptionRef, PyByteArray, PyBytes, PyFloat, PyInt, PyStr, try_f64_to_bigint, tuple,
12+
PyBaseExceptionRef, PyByteArray, PyBytes, PyFloat, PyInt, PyStr,
13+
int::check_int_to_str_digits, try_f64_to_bigint, tuple,
1314
},
1415
function::ArgIntoFloat,
1516
protocol::PyBuffer,
@@ -54,17 +55,19 @@ fn spec_format_bytes(
5455
CNumberType::DecimalD | CNumberType::DecimalI | CNumberType::DecimalU => {
5556
match_class!(match &obj {
5657
ref i @ PyInt => {
58+
check_int_to_str_digits(i.as_bigint(), vm)?;
5759
Ok(spec.format_number(i.as_bigint()).into_bytes())
5860
}
5961
ref f @ PyFloat => {
60-
Ok(spec
61-
.format_number(&try_f64_to_bigint(f.to_f64(), vm)?)
62-
.into_bytes())
62+
let bigint = try_f64_to_bigint(f.to_f64(), vm)?;
63+
check_int_to_str_digits(&bigint, vm)?;
64+
Ok(spec.format_number(&bigint).into_bytes())
6365
}
6466
obj => {
6567
if let Some(method) = vm.get_method(obj.clone(), identifier!(vm, __int__)) {
6668
let result = method?.call((), vm)?;
6769
if let Some(i) = result.downcast_ref::<PyInt>() {
70+
check_int_to_str_digits(i.as_bigint(), vm)?;
6871
return Ok(spec.format_number(i.as_bigint()).into_bytes());
6972
}
7073
}
@@ -149,17 +152,19 @@ fn spec_format_string(
149152
CNumberType::DecimalD | CNumberType::DecimalI | CNumberType::DecimalU => {
150153
match_class!(match &obj {
151154
ref i @ PyInt => {
155+
check_int_to_str_digits(i.as_bigint(), vm)?;
152156
Ok(spec.format_number(i.as_bigint()).into())
153157
}
154158
ref f @ PyFloat => {
155-
Ok(spec
156-
.format_number(&try_f64_to_bigint(f.to_f64(), vm)?)
157-
.into())
159+
let bigint = try_f64_to_bigint(f.to_f64(), vm)?;
160+
check_int_to_str_digits(&bigint, vm)?;
161+
Ok(spec.format_number(&bigint).into())
158162
}
159163
obj => {
160164
if let Some(method) = vm.get_method(obj.clone(), identifier!(vm, __int__)) {
161165
let result = method?.call((), vm)?;
162166
if let Some(i) = result.downcast_ref::<PyInt>() {
167+
check_int_to_str_digits(i.as_bigint(), vm)?;
163168
return Ok(spec.format_number(i.as_bigint()).into());
164169
}
165170
}

crates/vm/src/protocol/object.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::{
55
AsObject, Py, PyObject, PyObjectRef, PyRef, PyResult, TryFromObject, VirtualMachine,
66
builtins::{
77
PyBytes, PyDict, PyDictRef, PyGenericAlias, PyInt, PyList, PyStr, PyTuple, PyTupleRef,
8-
PyType, PyTypeRef, PyUtf8Str, pystr::AsPyStr,
8+
PyType, PyTypeRef, PyUtf8Str, int::check_int_to_str_digits, pystr::AsPyStr,
99
},
1010
common::{hash::PyHash, str::to_ascii},
1111
convert::{ToPyObject, ToPyResult},
@@ -392,6 +392,7 @@ impl PyObject {
392392
// Fast path for exact int: skip __str__ method resolution
393393
let obj = match obj.downcast_exact::<PyInt>(vm) {
394394
Ok(int) => {
395+
check_int_to_str_digits(int.as_bigint(), vm)?;
395396
return Ok(vm.ctx.new_str(int.to_str_radix_10()));
396397
}
397398
Err(obj) => obj,

extra_tests/snippets/builtin_int.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,3 +366,38 @@ class SubInt(int):
366366
subint = int.__new__(SubInt, 11)
367367
assert subint.real is not subint
368368
assert type(subint.real) is int
369+
370+
371+
# sys.set_int_max_str_digits enforced on int → str conversions (PEP 644).
372+
# Decimal paths (str, repr, f-string, %d, format(d/n/empty)) raise ValueError;
373+
# binary bases ('x', 'o', 'b') are exempt.
374+
import sys
375+
376+
_orig_limit = sys.get_int_max_str_digits()
377+
try:
378+
sys.set_int_max_str_digits(4000)
379+
huge = 10**4001 # 4002 decimal digits, well over the limit
380+
381+
for fn in [
382+
lambda: str(huge),
383+
lambda: repr(huge),
384+
lambda: f"{huge}",
385+
lambda: "%d" % huge,
386+
lambda: b"%d" % huge,
387+
lambda: format(huge, ""),
388+
lambda: format(huge, "d"),
389+
lambda: format(huge, ",d"),
390+
]:
391+
with assert_raises(ValueError):
392+
fn()
393+
394+
# Binary bases must NOT raise.
395+
assert format(huge, "x")
396+
assert format(huge, "o")
397+
assert format(huge, "b")
398+
399+
# Limit disabled: no check.
400+
sys.set_int_max_str_digits(0)
401+
assert str(huge)
402+
finally:
403+
sys.set_int_max_str_digits(_orig_limit)

0 commit comments

Comments
 (0)