Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
vm: finalize unicode/subscr specialization parity and regressions
  • Loading branch information
youknowone committed Mar 8, 2026
commit f6872fa405962cf24a37a1a4025a8cd98d70e3f5
18 changes: 17 additions & 1 deletion crates/vm/src/builtins/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,16 @@ impl PyList {

fn _setitem(&self, needle: &PyObject, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
match SequenceIndex::try_from_borrowed_object(vm, needle, "list")? {
SequenceIndex::Int(index) => self.borrow_vec_mut().setitem_by_index(vm, index, value),
SequenceIndex::Int(index) => self
.borrow_vec_mut()
.setitem_by_index(vm, index, value)
.map_err(|e| {
if e.class().is(vm.ctx.exceptions.index_error) {
vm.new_index_error("list assignment index out of range".to_owned())
} else {
e
}
}),
SequenceIndex::Slice(slice) => {
let sec = extract_cloned(&value, Ok, vm)?;
self.borrow_vec_mut().setitem_by_slice(vm, slice, &sec)
Expand Down Expand Up @@ -509,6 +518,13 @@ impl AsSequence for PyList {
} else {
zelf.borrow_vec_mut().delitem_by_index(vm, i)
}
.map_err(|e| {
if e.class().is(vm.ctx.exceptions.index_error) {
vm.new_index_error("list assignment index out of range".to_owned())
} else {
e
}
})
}),
contains: atomic_func!(|seq, target, vm| {
let zelf = PyList::sequence_downcast(seq);
Expand Down
29 changes: 25 additions & 4 deletions crates/vm/src/builtins/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1500,14 +1500,25 @@ impl PyRef<PyStr> {
}

pub fn concat_in_place(&mut self, other: &Wtf8, vm: &VirtualMachine) {
// TODO: call [A]Rc::get_mut on the str to try to mutate the data in place
if other.is_empty() {
return;
}
let mut s = Wtf8Buf::with_capacity(self.byte_len() + other.len());
s.push_wtf8(self.as_ref());
s.push_wtf8(other);
*self = PyStr::from(s).into_ref(&vm.ctx);
if self.as_object().strong_count() == 1 {
// SAFETY: strong_count()==1 guarantees unique ownership of this PyStr.
// Mutating payload in place preserves semantics while avoiding PyObject reallocation.
unsafe {
let payload = self.payload() as *const PyStr as *mut PyStr;
(*payload).data = PyStr::from(s).data;
(*payload)
.hash
.store(hash::SENTINEL, atomic::Ordering::Relaxed);
}
} else {
*self = PyStr::from(s).into_ref(&vm.ctx);
}
}

pub fn try_into_utf8(self, vm: &VirtualMachine) -> PyResult<PyRef<PyUtf8Str>> {
Expand Down Expand Up @@ -1678,13 +1689,23 @@ impl ToPyObject for Wtf8Buf {

impl ToPyObject for char {
fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
vm.ctx.new_str(self).into()
let cp = self as u32;
if cp <= u8::MAX as u32 {
vm.ctx.latin1_char_cache[cp as usize].clone().into()
} else {
vm.ctx.new_str(self).into()
}
}
}

impl ToPyObject for CodePoint {
fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
vm.ctx.new_str(self).into()
let cp = self.to_u32();
if cp <= u8::MAX as u32 {
vm.ctx.latin1_char_cache[cp as usize].clone().into()
} else {
vm.ctx.new_str(self).into()
}
}
}

Expand Down
31 changes: 23 additions & 8 deletions crates/vm/src/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1669,23 +1669,37 @@ impl ExecutingFrame<'_> {
self.adaptive(|s, ii, cb| s.specialize_binary_op(vm, op_val, ii, cb));
self.execute_bin_op(vm, op_val)
}
// TODO: In CPython, this does in-place unicode concatenation when
// refcount is 1. Falls back to regular iadd for now.
// Super-instruction for BINARY_OP_ADD_UNICODE + STORE_FAST targeting
// the left local, mirroring CPython's BINARY_OP_INPLACE_ADD_UNICODE shape.
Instruction::BinaryOpInplaceAddUnicode => {
let b = self.top_value();
let a = self.nth_value(1);
let instr_idx = self.lasti() as usize - 1;
let cache_base = instr_idx + 1;
let target_local = self.binary_op_inplace_unicode_target_local(cache_base, a);
if let (Some(a_str), Some(b_str), Some(target_local)) = (
if let (Some(_a_str), Some(_b_str), Some(target_local)) = (
a.downcast_ref_if_exact::<PyStr>(vm),
b.downcast_ref_if_exact::<PyStr>(vm),
target_local,
) {
let result = a_str.as_wtf8().py_add(b_str.as_wtf8());
self.pop_value();
self.pop_value();
self.localsplus.fastlocals_mut()[target_local] = Some(result.to_pyobject(vm));
let right = self.pop_value();
let left = self.pop_value();

let local_obj = self.localsplus.fastlocals_mut()[target_local]
.take()
.expect("BINARY_OP_INPLACE_ADD_UNICODE target local missing");
debug_assert!(local_obj.is(&left));
let mut local_str = local_obj
.downcast_exact::<PyStr>(vm)
.expect("BINARY_OP_INPLACE_ADD_UNICODE target local not exact str")
.into_pyref();
drop(left);
let right_str = right
.downcast_ref_if_exact::<PyStr>(vm)
.expect("BINARY_OP_INPLACE_ADD_UNICODE right operand not exact str");
local_str.concat_in_place(right_str.as_wtf8(), vm);

self.localsplus.fastlocals_mut()[target_local] = Some(local_str.into());
self.jump_relative_forward(
1,
Instruction::BinaryOpInplaceAddUnicode.cache_entries() as u32,
Expand Down Expand Up @@ -4005,9 +4019,10 @@ impl ExecutingFrame<'_> {
&& let Ok(ch) = a_str.getitem_by_index(vm, i as isize)
&& ch.is_ascii()
{
let ascii_idx = ch.to_u32() as usize;
self.pop_value();
self.pop_value();
self.push_value(PyStr::from(ch).into_pyobject(vm));
self.push_value(vm.ctx.ascii_char_cache[ascii_idx].clone().into());
return Ok(None);
}
self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr)
Expand Down
8 changes: 8 additions & 0 deletions crates/vm/src/vm/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ pub struct Context {
pub types: TypeZoo,
pub exceptions: exceptions::ExceptionZoo,
pub int_cache_pool: Vec<PyIntRef>,
pub(crate) latin1_char_cache: Vec<PyRef<PyStr>>,
pub(crate) ascii_char_cache: Vec<PyRef<PyStr>>,
// there should only be exact objects of str in here, no non-str objects and no subclasses
pub(crate) string_pool: StringPool,
pub(crate) slot_new_wrapper: PyMethodDef,
Expand Down Expand Up @@ -324,6 +326,10 @@ impl Context {
)
})
.collect();
let latin1_char_cache: Vec<PyRef<PyStr>> = (0u8..=255)
.map(|b| create_object(PyStr::from(char::from(b)), types.str_type))
.collect();
let ascii_char_cache = latin1_char_cache[..128].to_vec();

let true_value = create_object(PyBool(PyInt::from(1)), types.bool_type);
let false_value = create_object(PyBool(PyInt::from(0)), types.bool_type);
Expand Down Expand Up @@ -371,6 +377,8 @@ impl Context {
types,
exceptions,
int_cache_pool,
latin1_char_cache,
ascii_char_cache,
string_pool,
slot_new_wrapper,
names,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
class S(str):
def __add__(self, other):
return "ADD"

def __iadd__(self, other):
return "IADD"


def add_path_fallback_uses_add():
x = "a"
y = "b"
for i in range(1200):
if i == 600:
x = S("s")
y = "t"
x = x + y
return x


def iadd_path_fallback_uses_iadd():
x = "a"
y = "b"
for i in range(1200):
if i == 600:
x = S("s")
y = "t"
x += y
return x


assert add_path_fallback_uses_add().startswith("ADD")
assert iadd_path_fallback_uses_iadd().startswith("IADD")
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
def check_ascii_subscr_singleton_after_warmup():
s = "abc"
first = None
for i in range(4000):
c = s[0]
if i >= 3500:
if first is None:
first = c
else:
assert c is first


check_ascii_subscr_singleton_after_warmup()
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
def check_latin1_subscr_singleton_after_warmup():
for s in ("abc", "éx"):
first = None
for i in range(5000):
c = s[0]
if i >= 4500:
if first is None:
first = c
else:
assert c is first


check_latin1_subscr_singleton_after_warmup()