Skip to content

Commit ab7fdd3

Browse files
committed
Reject non-ASCII digits in JSON numbers
1 parent a6fee92 commit ab7fdd3

3 files changed

Lines changed: 40 additions & 23 deletions

File tree

Lib/test/test_json/test_decode.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ def test_float(self):
1818
self.assertIsInstance(rval, float)
1919
self.assertEqual(rval, 1.0)
2020

21-
@unittest.skip("TODO: RUSTPYTHON; called `Result::unwrap()` on an `Err` value: ParseFloatError { kind: Invalid }")
2221
def test_nonascii_digits_rejected(self):
2322
# JSON specifies only ascii digits, see gh-125687
2423
for num in ["1\uff10", "0.\uff10", "0e\uff10"]:

Lib/test/test_json/test_fail.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,4 @@ def test_linecol(self):
239239
(line, col, idx))
240240

241241
class TestPyFail(TestFail, PyTest): pass
242-
class TestCFail(TestFail, CTest):
243-
@unittest.expectedFailure # TODO: RUSTPYTHON
244-
def test_failures(self):
245-
return super().test_failures()
242+
class TestCFail(TestFail, CTest): pass

crates/stdlib/src/json.rs

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -189,30 +189,51 @@ mod _json {
189189

190190
fn parse_number(&self, bytes: &[u8], vm: &VirtualMachine) -> Option<(PyResult, usize)> {
191191
flame_guard!("JsonScanner::parse_number");
192-
let mut has_neg = false;
193-
let mut has_decimal = false;
194-
let mut has_exponent = false;
195-
let mut has_e_sign = false;
192+
// RFC 8259 defines JSON numbers in ASCII syntax, including digits,
193+
// '-', '.', 'e'/'E', and an optional exponent sign, so byte iteration
194+
// is equivalent to char iteration here.
196195
let mut i = 0;
197-
// JSON numbers are ASCII per RFC 8259 (digits, '-', '+', '.', 'e', 'E'),
198-
// so byte iteration is equivalent to char iteration here.
199-
for &b in bytes {
200-
match b {
201-
b'-' if i == 0 => has_neg = true,
202-
b'0'..=b'9' => {}
203-
b'.' if !has_decimal => has_decimal = true,
204-
b'e' | b'E' if !has_exponent => has_exponent = true,
205-
b'+' | b'-' if !has_e_sign => has_e_sign = true,
206-
_ => break,
207-
}
196+
if bytes.get(i) == Some(&b'-') {
208197
i += 1;
209198
}
210-
if i == 0 || (i == 1 && has_neg) {
211-
return None;
199+
match bytes.get(i) {
200+
Some(b'0') => i += 1,
201+
Some(b'1'..=b'9') => {
202+
i += 1;
203+
while matches!(bytes.get(i), Some(b'0'..=b'9')) {
204+
i += 1;
205+
}
206+
}
207+
_ => return None,
208+
}
209+
210+
let mut is_float = false;
211+
if bytes.get(i) == Some(&b'.') && matches!(bytes.get(i + 1), Some(b'0'..=b'9')) {
212+
is_float = true;
213+
i += 2;
214+
while matches!(bytes.get(i), Some(b'0'..=b'9')) {
215+
i += 1;
216+
}
217+
}
218+
219+
if matches!(bytes.get(i), Some(b'e' | b'E')) {
220+
let mut exponent_end = i + 1;
221+
if matches!(bytes.get(exponent_end), Some(b'+' | b'-')) {
222+
exponent_end += 1;
223+
}
224+
if matches!(bytes.get(exponent_end), Some(b'0'..=b'9')) {
225+
is_float = true;
226+
exponent_end += 1;
227+
while matches!(bytes.get(exponent_end), Some(b'0'..=b'9')) {
228+
exponent_end += 1;
229+
}
230+
i = exponent_end;
231+
}
212232
}
233+
213234
// SAFETY: the loop above accepts only ASCII bytes, so bytes[..i] is valid UTF-8.
214235
let buf = unsafe { core::str::from_utf8_unchecked(&bytes[..i]) };
215-
let ret = if has_decimal || has_exponent {
236+
let ret = if is_float {
216237
// float
217238
if let Some(ref parse_float) = self.parse_float {
218239
parse_float.call((buf,), vm)

0 commit comments

Comments
 (0)