Skip to content

Commit 4772f32

Browse files
committed
Correct wrong index access
1 parent 51b6bd7 commit 4772f32

File tree

1 file changed

+127
-70
lines changed

1 file changed

+127
-70
lines changed

crates/stdlib/src/json.rs

Lines changed: 127 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,22 @@ mod _json {
3737
count
3838
}
3939

40+
/// Check if a character iterator starts with a given pattern.
41+
/// This avoids byte/char index mismatch issues with non-ASCII strings.
42+
#[inline]
43+
fn starts_with_chars<I>(mut chars: I, pattern: &str) -> bool
44+
where
45+
I: Iterator<Item = char>,
46+
{
47+
for expected in pattern.chars() {
48+
match chars.next() {
49+
Some(c) if c == expected => continue,
50+
_ => return false,
51+
}
52+
}
53+
true
54+
}
55+
4056
#[pyattr(name = "make_scanner")]
4157
#[pyclass(name = "Scanner", traverse)]
4258
#[derive(Debug, PyPayload)]
@@ -202,6 +218,54 @@ mod _json {
202218
Some((ret, buf.len()))
203219
}
204220

221+
/// Parse a number from a character iterator.
222+
/// Returns (result, character_count) where character_count is the number of chars consumed.
223+
fn parse_number_from_chars<I>(
224+
&self,
225+
chars: I,
226+
vm: &VirtualMachine,
227+
) -> Option<(PyResult, usize)>
228+
where
229+
I: Iterator<Item = char>,
230+
{
231+
let mut buf = String::new();
232+
let mut has_neg = false;
233+
let mut has_decimal = false;
234+
let mut has_exponent = false;
235+
let mut has_e_sign = false;
236+
237+
for c in chars {
238+
let i = buf.len();
239+
match c {
240+
'-' if i == 0 => has_neg = true,
241+
n if n.is_ascii_digit() => {}
242+
'.' if !has_decimal => has_decimal = true,
243+
'e' | 'E' if !has_exponent => has_exponent = true,
244+
'+' | '-' if !has_e_sign => has_e_sign = true,
245+
_ => break,
246+
}
247+
buf.push(c);
248+
}
249+
250+
let len = buf.len();
251+
if len == 0 || (len == 1 && has_neg) {
252+
return None;
253+
}
254+
255+
let ret = if has_decimal || has_exponent {
256+
if let Some(ref parse_float) = self.parse_float {
257+
parse_float.call((&buf,), vm)
258+
} else {
259+
Ok(vm.ctx.new_float(f64::from_str(&buf).unwrap()).into())
260+
}
261+
} else if let Some(ref parse_int) = self.parse_int {
262+
parse_int.call((&buf,), vm)
263+
} else {
264+
Ok(vm.new_pyobj(BigInt::from_str(&buf).unwrap()))
265+
};
266+
Some((ret, len))
267+
}
268+
205269
/// Parse a JSON object starting after the opening '{'.
206270
/// Returns (parsed_object, end_character_index).
207271
fn parse_object(
@@ -458,6 +522,7 @@ mod _json {
458522
}
459523

460524
/// Call scan_once and handle the result.
525+
/// Uses character iterators to avoid byte/char index mismatch with non-ASCII strings.
461526
fn call_scan_once(
462527
&self,
463528
scan_once: &PyObjectRef,
@@ -466,100 +531,92 @@ mod _json {
466531
memo: &mut HashMap<String, PyStrRef>,
467532
vm: &VirtualMachine,
468533
) -> PyResult<(PyObjectRef, usize)> {
469-
// First try to handle common cases directly in Rust
470534
let s = pystr.as_str();
471-
let mut chars = s.chars().skip(idx).peekable();
535+
let chars = s.chars().skip(idx).peekable();
472536

473-
let remaining = &s[idx..];
537+
let first_char = match chars.clone().next() {
538+
Some(c) => c,
539+
None => return Err(self.make_decode_error("Expecting value", pystr, idx, vm)),
540+
};
474541

475-
match chars.peek() {
476-
Some('"') => {
477-
// String - parse directly in Rust
542+
match first_char {
543+
'"' => {
544+
// String
478545
let (wtf8, end) = machinery::scanstring(pystr.as_wtf8(), idx + 1, self.strict)
479546
.map_err(|e| py_decode_error(e, pystr.clone(), vm))?;
480547
let py_str = vm.ctx.new_str(wtf8.to_string());
481-
return Ok((py_str.into(), end));
548+
Ok((py_str.into(), end))
482549
}
483-
Some('{') => {
484-
// Nested object - parse recursively in Rust
485-
return self.parse_object(pystr, idx + 1, scan_once, memo, vm);
550+
'{' => {
551+
// Object
552+
self.parse_object(pystr, idx + 1, scan_once, memo, vm)
486553
}
487-
Some('[') => {
488-
// Nested array - parse recursively in Rust
489-
return self.parse_array(pystr, idx + 1, scan_once, memo, vm);
554+
'[' => {
555+
// Array
556+
self.parse_array(pystr, idx + 1, scan_once, memo, vm)
490557
}
491-
Some('n') => {
492-
// null - parse directly in Rust
493-
if remaining.starts_with("null") {
494-
return Ok((vm.ctx.none(), idx + 4));
495-
}
558+
'n' if starts_with_chars(chars.clone(), "null") => {
559+
// null
560+
Ok((vm.ctx.none(), idx + 4))
496561
}
497-
Some('t') => {
498-
// true - parse directly in Rust
499-
if remaining.starts_with("true") {
500-
return Ok((vm.ctx.new_bool(true).into(), idx + 4));
501-
}
562+
't' if starts_with_chars(chars.clone(), "true") => {
563+
// true
564+
Ok((vm.ctx.new_bool(true).into(), idx + 4))
502565
}
503-
Some('f') => {
504-
// false - parse directly in Rust
505-
if remaining.starts_with("false") {
506-
return Ok((vm.ctx.new_bool(false).into(), idx + 5));
507-
}
566+
'f' if starts_with_chars(chars.clone(), "false") => {
567+
// false
568+
Ok((vm.ctx.new_bool(false).into(), idx + 5))
508569
}
509-
Some(c) if c.is_ascii_digit() => {
510-
// Number starting with digit - parse directly in Rust
511-
if let Some((result, len)) = self.parse_number(remaining, vm) {
512-
return Ok((result?, idx + len));
513-
}
570+
'N' if starts_with_chars(chars.clone(), "NaN") => {
571+
// NaN
572+
let result = self.parse_constant.call(("NaN",), vm)?;
573+
Ok((result, idx + 3))
514574
}
515-
Some('N') => {
516-
// NaN - parse directly in Rust
517-
if remaining.starts_with("NaN") {
518-
let result = self.parse_constant.call(("NaN",), vm)?;
519-
return Ok((result, idx + 3));
520-
}
575+
'I' if starts_with_chars(chars.clone(), "Infinity") => {
576+
// Infinity
577+
let result = self.parse_constant.call(("Infinity",), vm)?;
578+
Ok((result, idx + 8))
521579
}
522-
Some('I') => {
523-
// Infinity - parse directly in Rust
524-
if remaining.starts_with("Infinity") {
525-
let result = self.parse_constant.call(("Infinity",), vm)?;
526-
return Ok((result, idx + 8));
527-
}
528-
}
529-
Some('-') => {
580+
'-' => {
530581
// -Infinity or negative number
531-
if remaining.starts_with("-Infinity") {
582+
if starts_with_chars(chars.clone(), "-Infinity") {
532583
let result = self.parse_constant.call(("-Infinity",), vm)?;
533584
return Ok((result, idx + 9));
534585
}
535-
// Try parsing as negative number
536-
if let Some((result, len)) = self.parse_number(remaining, vm) {
586+
// Negative number - collect number characters
587+
if let Some((result, len)) = self.parse_number_from_chars(chars, vm) {
537588
return Ok((result?, idx + len));
538589
}
590+
Err(self.make_decode_error("Expecting value", pystr, idx, vm))
539591
}
540-
_ => {
541-
// fall through to call scan_once
542-
}
543-
}
544-
545-
// Fall back to scan_once for other value types
546-
let result = scan_once.call((pystr.clone(), idx as isize), vm);
547-
548-
match result {
549-
Ok(tuple) => {
550-
use crate::vm::builtins::PyTupleRef;
551-
let tuple: PyTupleRef = tuple.try_into_value(vm)?;
552-
if tuple.len() != 2 {
553-
return Err(vm.new_value_error("scan_once must return 2-tuple"));
592+
c if c.is_ascii_digit() => {
593+
// Positive number
594+
if let Some((result, len)) = self.parse_number_from_chars(chars, vm) {
595+
return Ok((result?, idx + len));
554596
}
555-
let value = tuple.as_slice()[0].clone();
556-
let end_idx: isize = tuple.as_slice()[1].try_to_value(vm)?;
557-
Ok((value, end_idx as usize))
558-
}
559-
Err(err) if err.fast_isinstance(vm.ctx.exceptions.stop_iteration) => {
560597
Err(self.make_decode_error("Expecting value", pystr, idx, vm))
561598
}
562-
Err(err) => Err(err),
599+
_ => {
600+
// Fall back to scan_once for unrecognized input
601+
let result = scan_once.call((pystr.clone(), idx as isize), vm);
602+
603+
match result {
604+
Ok(tuple) => {
605+
use crate::vm::builtins::PyTupleRef;
606+
let tuple: PyTupleRef = tuple.try_into_value(vm)?;
607+
if tuple.len() != 2 {
608+
return Err(vm.new_value_error("scan_once must return 2-tuple"));
609+
}
610+
let value = tuple.as_slice()[0].clone();
611+
let end_idx: isize = tuple.as_slice()[1].try_to_value(vm)?;
612+
Ok((value, end_idx as usize))
613+
}
614+
Err(err) if err.fast_isinstance(vm.ctx.exceptions.stop_iteration) => {
615+
Err(self.make_decode_error("Expecting value", pystr, idx, vm))
616+
}
617+
Err(err) => Err(err),
618+
}
619+
}
563620
}
564621
}
565622

0 commit comments

Comments
 (0)