diff --git a/crates/common/src/refcount.rs b/crates/common/src/refcount.rs
index 9c350b5497a..18905d00f3e 100644
--- a/crates/common/src/refcount.rs
+++ b/crates/common/src/refcount.rs
@@ -92,26 +92,26 @@ impl RefCount {
/// Get current strong count
#[inline]
pub fn get(&self) -> usize {
- State::from_raw(self.state.load(Ordering::SeqCst)).strong() as usize
+ State::from_raw(self.state.load(Ordering::Relaxed)).strong() as usize
}
/// Increment strong count
#[inline]
pub fn inc(&self) {
- let val = State::from_raw(self.state.fetch_add(COUNT, Ordering::SeqCst));
+ let val = State::from_raw(self.state.fetch_add(COUNT, Ordering::Relaxed));
if val.destructed() || (val.strong() as usize) > STRONG - 1 {
refcount_overflow();
}
if val.strong() == 0 {
// The previous fetch_add created a permission to run decrement again
- self.state.fetch_add(COUNT, Ordering::SeqCst);
+ self.state.fetch_add(COUNT, Ordering::Relaxed);
}
}
#[inline]
pub fn inc_by(&self, n: usize) {
debug_assert!(n <= STRONG);
- let val = State::from_raw(self.state.fetch_add(n * COUNT, Ordering::SeqCst));
+ let val = State::from_raw(self.state.fetch_add(n * COUNT, Ordering::Relaxed));
if val.destructed() || (val.strong() as usize) > STRONG - n {
refcount_overflow();
}
@@ -121,7 +121,7 @@ impl RefCount {
#[inline]
#[must_use]
pub fn safe_inc(&self) -> bool {
- let mut old = State::from_raw(self.state.load(Ordering::SeqCst));
+ let mut old = State::from_raw(self.state.load(Ordering::Relaxed));
loop {
if old.destructed() {
return false;
@@ -130,11 +130,11 @@ impl RefCount {
refcount_overflow();
}
let new_state = old.add_strong(1);
- match self.state.compare_exchange(
+ match self.state.compare_exchange_weak(
old.as_raw(),
new_state.as_raw(),
- Ordering::SeqCst,
- Ordering::SeqCst,
+ Ordering::Relaxed,
+ Ordering::Relaxed,
) {
Ok(_) => return true,
Err(curr) => old = State::from_raw(curr),
@@ -146,27 +146,31 @@ impl RefCount {
#[inline]
#[must_use]
pub fn dec(&self) -> bool {
- let old = State::from_raw(self.state.fetch_sub(COUNT, Ordering::SeqCst));
+ let old = State::from_raw(self.state.fetch_sub(COUNT, Ordering::Release));
// LEAKED objects never reach 0
if old.leaked() {
return false;
}
- old.strong() == 1
+ if old.strong() == 1 {
+ core::sync::atomic::fence(Ordering::Acquire);
+ return true;
+ }
+ false
}
/// Mark this object as leaked (interned). It will never be deallocated.
pub fn leak(&self) {
debug_assert!(!self.is_leaked());
- let mut old = State::from_raw(self.state.load(Ordering::SeqCst));
+ let mut old = State::from_raw(self.state.load(Ordering::Relaxed));
loop {
let new_state = old.with_leaked(true);
- match self.state.compare_exchange(
+ match self.state.compare_exchange_weak(
old.as_raw(),
new_state.as_raw(),
- Ordering::SeqCst,
- Ordering::SeqCst,
+ Ordering::AcqRel,
+ Ordering::Relaxed,
) {
Ok(_) => return,
Err(curr) => old = State::from_raw(curr),
diff --git a/crates/vm/src/builtins/frame.rs b/crates/vm/src/builtins/frame.rs
index 9b057b5b806..6f906e531d5 100644
--- a/crates/vm/src/builtins/frame.rs
+++ b/crates/vm/src/builtins/frame.rs
@@ -690,8 +690,9 @@ impl Py {
}
// Clear fastlocals
+ // SAFETY: Frame is not executing (detached or stopped).
{
- let mut fastlocals = self.fastlocals.lock();
+ let fastlocals = unsafe { self.fastlocals.borrow_mut() };
for slot in fastlocals.iter_mut() {
*slot = None;
}
diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs
index f8ff44e8a72..58f818cc7a7 100644
--- a/crates/vm/src/builtins/function.rs
+++ b/crates/vm/src/builtins/function.rs
@@ -224,7 +224,8 @@ impl PyFunction {
// See also: PyEval_EvalCodeWithName in cpython:
// https://github.com/python/cpython/blob/main/Python/ceval.c#L3681
- let mut fastlocals = frame.fastlocals.lock();
+ // SAFETY: Frame was just created and not yet executing.
+ let fastlocals = unsafe { frame.fastlocals.borrow_mut() };
let mut args_iter = func_args.args.into_iter();
diff --git a/crates/vm/src/builtins/super.rs b/crates/vm/src/builtins/super.rs
index 8286c2a5e5f..3d1ec715e48 100644
--- a/crates/vm/src/builtins/super.rs
+++ b/crates/vm/src/builtins/super.rs
@@ -85,10 +85,9 @@ impl Initializer for PySuper {
if frame.code.arg_count == 0 {
return Err(vm.new_runtime_error("super(): no arguments"));
}
- // Lock fastlocals briefly to get arg[0], then drop the guard
- // before calling get_cell_contents (which also locks fastlocals).
- let first_arg = frame.fastlocals.lock()[0].clone();
- let obj = first_arg
+ // SAFETY: Frame is current and not concurrently mutated.
+ let obj = unsafe { frame.fastlocals.borrow() }[0]
+ .clone()
.or_else(|| {
if let Some(cell2arg) = frame.code.cell2arg.as_deref() {
cell2arg[..frame.code.cellvars.len()]
diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs
index 5a5c8674c42..132ad61a65b 100644
--- a/crates/vm/src/frame.rs
+++ b/crates/vm/src/frame.rs
@@ -26,6 +26,7 @@ use crate::{
};
use alloc::fmt;
use bstr::ByteSlice;
+use core::cell::UnsafeCell;
use core::iter::zip;
use core::sync::atomic;
use core::sync::atomic::AtomicPtr;
@@ -92,12 +93,60 @@ impl FrameOwner {
}
}
+/// Lock-free storage for local variables (localsplus).
+///
+/// # Safety
+/// Access is serialized by the frame's state mutex in `with_exec()`, which
+/// prevents concurrent frame execution. Trace callbacks that access `f_locals`
+/// run sequentially on the same thread as instruction execution.
+pub struct FastLocals {
+ inner: UnsafeCell]>>,
+}
+
+// SAFETY: Frame execution is serialized by the state mutex.
+#[cfg(feature = "threading")]
+unsafe impl Send for FastLocals {}
+#[cfg(feature = "threading")]
+unsafe impl Sync for FastLocals {}
+
+impl FastLocals {
+ fn new(data: Box<[Option]>) -> Self {
+ Self {
+ inner: UnsafeCell::new(data),
+ }
+ }
+
+ /// # Safety
+ /// Caller must ensure exclusive access (frame state locked or frame
+ /// not executing).
+ #[inline(always)]
+ pub unsafe fn borrow(&self) -> &[Option] {
+ unsafe { &*self.inner.get() }
+ }
+
+ /// # Safety
+ /// Caller must ensure exclusive mutable access.
+ #[inline(always)]
+ #[allow(clippy::mut_from_ref)]
+ pub unsafe fn borrow_mut(&self) -> &mut [Option] {
+ unsafe { &mut *self.inner.get() }
+ }
+}
+
+unsafe impl Traverse for FastLocals {
+ fn traverse(&self, traverse_fn: &mut TraverseFn<'_>) {
+ // SAFETY: GC runs on the same thread; no concurrent mutation.
+ let data = unsafe { &*self.inner.get() };
+ data.traverse(traverse_fn);
+ }
+}
+
#[pyclass(module = false, name = "frame", traverse = "manual")]
pub struct Frame {
pub code: PyRef,
pub func_obj: Option,
- pub fastlocals: PyMutex]>>,
+ pub fastlocals: FastLocals,
pub locals: ArgMapping,
pub globals: PyDictRef,
pub builtins: PyObjectRef,
@@ -208,7 +257,7 @@ impl Frame {
};
Self {
- fastlocals: PyMutex::new(fastlocals_vec.into_boxed_slice()),
+ fastlocals: FastLocals::new(fastlocals_vec.into_boxed_slice()),
locals: scope.locals,
globals: scope.globals,
builtins,
@@ -241,7 +290,8 @@ impl Frame {
/// Releases references held by the frame, matching _PyFrame_ClearLocals.
pub(crate) fn clear_locals_and_stack(&self) {
self.clear_stack_and_cells();
- let mut fastlocals = self.fastlocals.lock();
+ // SAFETY: Frame is not executing (generator closed).
+ let fastlocals = unsafe { self.fastlocals.borrow_mut() };
for slot in fastlocals.iter_mut() {
*slot = None;
}
@@ -250,7 +300,8 @@ impl Frame {
/// Get cell contents by cell index. Reads through fastlocals (no state lock needed).
pub(crate) fn get_cell_contents(&self, cell_idx: usize) -> Option {
let nlocals = self.code.varnames.len();
- let fastlocals = self.fastlocals.lock();
+ // SAFETY: Frame not executing; no concurrent mutation.
+ let fastlocals = unsafe { self.fastlocals.borrow() };
fastlocals
.get(nlocals + cell_idx)
.and_then(|slot| slot.as_ref())
@@ -318,7 +369,8 @@ impl Frame {
return Ok(());
}
let code = &**self.code;
- let mut fastlocals = self.fastlocals.lock();
+ // SAFETY: Called before generator resume; no concurrent access.
+ let fastlocals = unsafe { self.fastlocals.borrow_mut() };
for (i, &varname) in code.varnames.iter().enumerate() {
if i >= fastlocals.len() {
break;
@@ -339,8 +391,9 @@ impl Frame {
let map = &code.varnames;
let j = core::cmp::min(map.len(), code.varnames.len());
if !code.varnames.is_empty() {
- let fastlocals = self.fastlocals.lock();
- for (&k, v) in zip(&map[..j], &**fastlocals) {
+ // SAFETY: Trace callbacks run sequentially on the same thread.
+ let fastlocals = unsafe { self.fastlocals.borrow() };
+ for (&k, v) in zip(&map[..j], fastlocals) {
match locals.mapping().ass_subscript(k, v.clone(), vm) {
Ok(()) => {}
Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {}
@@ -461,7 +514,7 @@ impl Py {
/// with the mutable data inside
struct ExecutingFrame<'a> {
code: &'a PyRef,
- fastlocals: &'a PyMutex]>>,
+ fastlocals: &'a FastLocals,
locals: &'a ArgMapping,
globals: &'a PyDictRef,
builtins: &'a PyObjectRef,
@@ -1247,7 +1300,7 @@ impl ExecutingFrame<'_> {
Ok(None)
}
Instruction::DeleteFast(idx) => {
- let mut fastlocals = self.fastlocals.lock();
+ let fastlocals = unsafe { self.fastlocals.borrow_mut() };
let idx = idx.get(arg) as usize;
if fastlocals[idx].is_none() {
return Err(vm.new_exception_msg(
@@ -1749,7 +1802,7 @@ impl ExecutingFrame<'_> {
)
}
let idx = idx.get(arg) as usize;
- let x = self.fastlocals.lock()[idx]
+ let x = unsafe { self.fastlocals.borrow() }[idx]
.clone()
.ok_or_else(|| reference_error(self.code.varnames[idx], vm))?;
self.push_value(x);
@@ -1759,7 +1812,7 @@ impl ExecutingFrame<'_> {
// Load value and clear the slot (for inlined comprehensions)
// If slot is empty, push None (not an error - variable may not exist yet)
let idx = idx.get(arg) as usize;
- let x = self.fastlocals.lock()[idx]
+ let x = unsafe { self.fastlocals.borrow_mut() }[idx]
.take()
.unwrap_or_else(|| vm.ctx.none());
self.push_value(x);
@@ -1769,16 +1822,18 @@ impl ExecutingFrame<'_> {
// Same as LoadFast but explicitly checks for unbound locals
// (LoadFast in RustPython already does this check)
let idx = idx.get(arg) as usize;
- let x = self.fastlocals.lock()[idx].clone().ok_or_else(|| {
- vm.new_exception_msg(
- vm.ctx.exceptions.unbound_local_error.to_owned(),
- format!(
- "local variable '{}' referenced before assignment",
- self.code.varnames[idx]
+ let x = unsafe { self.fastlocals.borrow() }[idx]
+ .clone()
+ .ok_or_else(|| {
+ vm.new_exception_msg(
+ vm.ctx.exceptions.unbound_local_error.to_owned(),
+ format!(
+ "local variable '{}' referenced before assignment",
+ self.code.varnames[idx]
+ )
+ .into(),
)
- .into(),
- )
- })?;
+ })?;
self.push_value(x);
Ok(None)
}
@@ -1788,7 +1843,7 @@ impl ExecutingFrame<'_> {
let oparg = packed.get(arg);
let idx1 = (oparg >> 4) as usize;
let idx2 = (oparg & 15) as usize;
- let fastlocals = self.fastlocals.lock();
+ let fastlocals = unsafe { self.fastlocals.borrow() };
let x1 = fastlocals[idx1].clone().ok_or_else(|| {
vm.new_exception_msg(
vm.ctx.exceptions.unbound_local_error.to_owned(),
@@ -1809,40 +1864,34 @@ impl ExecutingFrame<'_> {
.into(),
)
})?;
- drop(fastlocals);
self.push_value(x1);
self.push_value(x2);
Ok(None)
}
// TODO: Implement true borrow optimization (skip Arc::clone).
- // CPython's LOAD_FAST_BORROW uses PyStackRef_Borrow to avoid refcount
- // increment for values that are consumed within the same basic block.
- // Possible approaches:
- // - Store raw pointers with careful lifetime management
- // - Add a "borrowed" variant to stack slots
- // - Use arena allocation for short-lived stack values
// Currently this just clones like LoadFast.
Instruction::LoadFastBorrow(idx) => {
let idx = idx.get(arg) as usize;
- let x = self.fastlocals.lock()[idx].clone().ok_or_else(|| {
- vm.new_exception_msg(
- vm.ctx.exceptions.unbound_local_error.to_owned(),
- format!(
- "local variable '{}' referenced before assignment",
- self.code.varnames[idx]
+ let x = unsafe { self.fastlocals.borrow() }[idx]
+ .clone()
+ .ok_or_else(|| {
+ vm.new_exception_msg(
+ vm.ctx.exceptions.unbound_local_error.to_owned(),
+ format!(
+ "local variable '{}' referenced before assignment",
+ self.code.varnames[idx]
+ )
+ .into(),
)
- .into(),
- )
- })?;
+ })?;
self.push_value(x);
Ok(None)
}
- // TODO: Same as LoadFastBorrow - implement true borrow optimization.
Instruction::LoadFastBorrowLoadFastBorrow { arg: packed } => {
let oparg = packed.get(arg);
let idx1 = (oparg >> 4) as usize;
let idx2 = (oparg & 15) as usize;
- let fastlocals = self.fastlocals.lock();
+ let fastlocals = unsafe { self.fastlocals.borrow() };
let x1 = fastlocals[idx1].clone().ok_or_else(|| {
vm.new_exception_msg(
vm.ctx.exceptions.unbound_local_error.to_owned(),
@@ -1863,7 +1912,6 @@ impl ExecutingFrame<'_> {
.into(),
)
})?;
- drop(fastlocals);
self.push_value(x1);
self.push_value(x2);
Ok(None)
@@ -2347,33 +2395,28 @@ impl ExecutingFrame<'_> {
}
Instruction::StoreFast(idx) => {
let value = self.pop_value();
- self.fastlocals.lock()[idx.get(arg) as usize] = Some(value);
+ let fastlocals = unsafe { self.fastlocals.borrow_mut() };
+ fastlocals[idx.get(arg) as usize] = Some(value);
Ok(None)
}
Instruction::StoreFastLoadFast { var_nums } => {
- // Store to one slot and load from another (often the same) - for inlined comprehensions
let value = self.pop_value();
- let mut locals = self.fastlocals.lock();
+ let locals = unsafe { self.fastlocals.borrow_mut() };
let oparg = var_nums.get(arg);
locals[oparg.store_idx() as usize] = Some(value);
let load_value = locals[oparg.load_idx() as usize]
.clone()
.expect("StoreFastLoadFast: load slot should have value after store");
- drop(locals);
self.push_value(load_value);
Ok(None)
}
Instruction::StoreFastStoreFast { arg: packed } => {
- // Store two values to two local variables at once
- // STORE_FAST idx1 executes first: pops TOS -> locals[idx1]
- // STORE_FAST idx2 executes second: pops new TOS -> locals[idx2]
- // oparg encoding: (idx1 << 4) | idx2
let oparg = packed.get(arg);
let idx1 = (oparg >> 4) as usize;
let idx2 = (oparg & 15) as usize;
- let value1 = self.pop_value(); // TOS -> idx1
- let value2 = self.pop_value(); // second -> idx2
- let mut fastlocals = self.fastlocals.lock();
+ let value1 = self.pop_value();
+ let value2 = self.pop_value();
+ let fastlocals = unsafe { self.fastlocals.borrow_mut() };
fastlocals[idx1] = Some(value1);
fastlocals[idx2] = Some(value2);
Ok(None)
@@ -3389,12 +3432,9 @@ impl ExecutingFrame<'_> {
let callable = self.pop_value();
let final_args = if let Some(self_val) = self_or_null {
- let mut all_args = vec![self_val];
- all_args.extend(args.args);
- FuncArgs {
- args: all_args,
- kwargs: args.kwargs,
- }
+ let mut args = args;
+ args.prepend_arg(self_val);
+ args
} else {
args
};
@@ -3410,12 +3450,9 @@ impl ExecutingFrame<'_> {
let callable = self.pop_value();
let final_args = if let Some(self_val) = self_or_null {
- let mut all_args = vec![self_val];
- all_args.extend(args.args);
- FuncArgs {
- args: all_args,
- kwargs: args.kwargs,
- }
+ let mut args = args;
+ args.prepend_arg(self_val);
+ args
} else {
args
};
diff --git a/crates/vm/src/protocol/callable.rs b/crates/vm/src/protocol/callable.rs
index 1f01221ff0c..5ffad41a229 100644
--- a/crates/vm/src/protocol/callable.rs
+++ b/crates/vm/src/protocol/callable.rs
@@ -49,6 +49,9 @@ impl<'a> PyCallable<'a> {
pub fn invoke(&self, args: impl IntoFuncArgs, vm: &VirtualMachine) -> PyResult {
let args = args.into_args(vm);
+ if !vm.use_tracing.get() {
+ return (self.call)(self.obj, args, vm);
+ }
// Python functions get 'call'/'return' events from with_frame().
// Bound methods delegate to the inner callable, which fires its own events.
// All other callables (built-in functions, etc.) get 'c_call'/'c_return'/'c_exception'.