Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Replace fastlocals PyMutex with UnsafeCell-based FastLocals
Eliminate per-instruction mutex lock/unlock overhead for local
variable access. FastLocals uses UnsafeCell with safety guaranteed
by the frame's state mutex and sequential same-thread execution.

Affects 14+ lock() call sites in hot instruction paths (LoadFast,
StoreFast, DeleteFast, and their paired variants).
  • Loading branch information
youknowone committed Mar 1, 2026
commit 958853f51f9041ce444d9d22b1b2b655f967633a
3 changes: 2 additions & 1 deletion crates/vm/src/builtins/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -690,8 +690,9 @@ impl Py<Frame> {
}

// Clear fastlocals
// SAFETY: Frame is not executing (detached or stopped).
{
let mut fastlocals = self.fastlocals.lock();
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
for slot in fastlocals.iter_mut() {
*slot = None;
}
Expand Down
3 changes: 2 additions & 1 deletion crates/vm/src/builtins/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ impl PyFunction {
// See also: PyEval_EvalCodeWithName in cpython:
// https://github.com/python/cpython/blob/main/Python/ceval.c#L3681

let mut fastlocals = frame.fastlocals.lock();
// SAFETY: Frame was just created and not yet executing.
let fastlocals = unsafe { frame.fastlocals.borrow_mut() };

let mut args_iter = func_args.args.into_iter();

Expand Down
7 changes: 3 additions & 4 deletions crates/vm/src/builtins/super.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,9 @@ impl Initializer for PySuper {
if frame.code.arg_count == 0 {
return Err(vm.new_runtime_error("super(): no arguments"));
}
// Lock fastlocals briefly to get arg[0], then drop the guard
// before calling get_cell_contents (which also locks fastlocals).
let first_arg = frame.fastlocals.lock()[0].clone();
let obj = first_arg
// SAFETY: Frame is current and not concurrently mutated.
let obj = unsafe { frame.fastlocals.borrow() }[0]
.clone()
.or_else(|| {
if let Some(cell2arg) = frame.code.cell2arg.as_deref() {
cell2arg[..frame.code.cellvars.len()]
Expand Down
109 changes: 74 additions & 35 deletions crates/vm/src/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ use core::sync::atomic::AtomicPtr;
use core::sync::atomic::Ordering::Relaxed;
use indexmap::IndexMap;
use itertools::Itertools;
use core::cell::UnsafeCell;
use rustpython_common::atomic::{PyAtomic, Radium};
use rustpython_common::{
boxvec::BoxVec,
Expand Down Expand Up @@ -92,12 +93,60 @@ impl FrameOwner {
}
}

/// Lock-free storage for local variables (localsplus).
///
/// # Safety
/// Access is serialized by the frame's state mutex in `with_exec()`, which
/// prevents concurrent frame execution. Trace callbacks that access `f_locals`
/// run sequentially on the same thread as instruction execution.
pub struct FastLocals {
inner: UnsafeCell<Box<[Option<PyObjectRef>]>>,
}

// SAFETY: Frame execution is serialized by the state mutex.
#[cfg(feature = "threading")]
unsafe impl Send for FastLocals {}
#[cfg(feature = "threading")]
unsafe impl Sync for FastLocals {}

impl FastLocals {
fn new(data: Box<[Option<PyObjectRef>]>) -> Self {
Self {
inner: UnsafeCell::new(data),
}
}

/// # Safety
/// Caller must ensure exclusive access (frame state locked or frame
/// not executing).
#[inline(always)]
pub unsafe fn borrow(&self) -> &[Option<PyObjectRef>] {
unsafe { &*self.inner.get() }
}

/// # Safety
/// Caller must ensure exclusive mutable access.
#[inline(always)]
#[allow(clippy::mut_from_ref)]
pub unsafe fn borrow_mut(&self) -> &mut [Option<PyObjectRef>] {
unsafe { &mut *self.inner.get() }
}
Comment on lines +96 to +133
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "1) Enumerate all fastlocals unsafe borrow call sites"
rg -n 'fastlocals\.borrow(_mut)?\(' \
  crates/vm/src/frame.rs \
  crates/vm/src/builtins/frame.rs \
  crates/vm/src/builtins/function.rs \
  crates/vm/src/builtins/super.rs

echo
echo "2) Inspect synchronization context around main externally reachable paths"
rg -n -A25 -B8 'fn locals\(|fn clear\(|fn f_locals\(|fn with_exec\(' \
  crates/vm/src/frame.rs \
  crates/vm/src/builtins/frame.rs

echo
echo "3) Inspect owner synchronization primitives (load/store vs CAS)"
rg -n 'owner\.(load|store|compare_exchange|compare_exchange_weak)' \
  crates/vm/src/frame.rs \
  crates/vm/src/builtins/frame.rs

Repository: RustPython/RustPython

Length of output: 9197


FastLocals synchronization contract is violated by externally reachable paths; f_locals() getter permits concurrent unsynchronized access.

The doc comment claims access is "serialized by the frame's state mutex in with_exec()", but the f_locals() getter (line 456 in builtins/frame.rs) directly calls locals() which accesses fastlocals without holding that guard (line 395 in frame.rs). The comment at lines 405–406 acknowledges fastlocals is intentionally accessed outside with_exec() to avoid blocking. Since f_locals() is Python-exposed and callable from any thread, and FastLocals is marked Send + Sync, concurrent calls from multiple threads will perform unsynchronized reads/writes to the same UnsafeCell, violating Rust's aliasing guarantees.

Gate fastlocals borrows behind a guard/token tied to frame execution, or document that frames must not be accessed concurrently and remove Send + Sync (or add runtime checks that panic if accessed cross-thread).

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/vm/src/frame.rs` around lines 96 - 133, The FastLocals UnsafeCell can
be accessed concurrently via the Python-exposed f_locals() path (which calls
locals()), violating the assumption that access is serialized by with_exec(); to
fix, either (A) remove the unsafe impls unsafe impl Send/Sync for FastLocals and
add runtime assertions to prevent cross-thread access, or (B) gate all
fastlocals borrows behind the frame execution guard/token (have f_locals()
acquire the same with_exec() guard or a borrow token before calling
locals()/FastLocals::borrow/borrow_mut), or (C) add a runtime thread-check that
panics on unsynchronized cross-thread access; update the FastLocals docs to
reflect the chosen model and adjust callers (f_locals, locals, and any direct
FastLocals usage) to obtain the guard/token or to compile without Send/Sync
accordingly.

}

unsafe impl Traverse for FastLocals {
fn traverse(&self, traverse_fn: &mut TraverseFn<'_>) {
// SAFETY: GC runs on the same thread; no concurrent mutation.
let data = unsafe { &*self.inner.get() };
data.traverse(traverse_fn);
}
}

#[pyclass(module = false, name = "frame", traverse = "manual")]
pub struct Frame {
pub code: PyRef<PyCode>,
pub func_obj: Option<PyObjectRef>,

pub fastlocals: PyMutex<Box<[Option<PyObjectRef>]>>,
pub fastlocals: FastLocals,
pub locals: ArgMapping,
pub globals: PyDictRef,
pub builtins: PyObjectRef,
Expand Down Expand Up @@ -208,7 +257,7 @@ impl Frame {
};

Self {
fastlocals: PyMutex::new(fastlocals_vec.into_boxed_slice()),
fastlocals: FastLocals::new(fastlocals_vec.into_boxed_slice()),
locals: scope.locals,
globals: scope.globals,
builtins,
Expand Down Expand Up @@ -241,7 +290,8 @@ impl Frame {
/// Releases references held by the frame, matching _PyFrame_ClearLocals.
pub(crate) fn clear_locals_and_stack(&self) {
self.clear_stack_and_cells();
let mut fastlocals = self.fastlocals.lock();
// SAFETY: Frame is not executing (generator closed).
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
for slot in fastlocals.iter_mut() {
*slot = None;
}
Expand All @@ -250,7 +300,8 @@ impl Frame {
/// Get cell contents by cell index. Reads through fastlocals (no state lock needed).
pub(crate) fn get_cell_contents(&self, cell_idx: usize) -> Option<PyObjectRef> {
let nlocals = self.code.varnames.len();
let fastlocals = self.fastlocals.lock();
// SAFETY: Frame not executing; no concurrent mutation.
let fastlocals = unsafe { self.fastlocals.borrow() };
fastlocals
.get(nlocals + cell_idx)
.and_then(|slot| slot.as_ref())
Expand Down Expand Up @@ -318,7 +369,8 @@ impl Frame {
return Ok(());
}
let code = &**self.code;
let mut fastlocals = self.fastlocals.lock();
// SAFETY: Called before generator resume; no concurrent access.
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
for (i, &varname) in code.varnames.iter().enumerate() {
if i >= fastlocals.len() {
break;
Expand All @@ -339,8 +391,9 @@ impl Frame {
let map = &code.varnames;
let j = core::cmp::min(map.len(), code.varnames.len());
if !code.varnames.is_empty() {
let fastlocals = self.fastlocals.lock();
for (&k, v) in zip(&map[..j], &**fastlocals) {
// SAFETY: Trace callbacks run sequentially on the same thread.
let fastlocals = unsafe { self.fastlocals.borrow() };
for (&k, v) in zip(&map[..j], fastlocals) {
match locals.mapping().ass_subscript(k, v.clone(), vm) {
Ok(()) => {}
Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {}
Expand Down Expand Up @@ -461,7 +514,7 @@ impl Py<Frame> {
/// with the mutable data inside
struct ExecutingFrame<'a> {
code: &'a PyRef<PyCode>,
fastlocals: &'a PyMutex<Box<[Option<PyObjectRef>]>>,
fastlocals: &'a FastLocals,
locals: &'a ArgMapping,
globals: &'a PyDictRef,
builtins: &'a PyObjectRef,
Expand Down Expand Up @@ -1247,7 +1300,7 @@ impl ExecutingFrame<'_> {
Ok(None)
}
Instruction::DeleteFast(idx) => {
let mut fastlocals = self.fastlocals.lock();
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
let idx = idx.get(arg) as usize;
if fastlocals[idx].is_none() {
return Err(vm.new_exception_msg(
Expand Down Expand Up @@ -1749,7 +1802,7 @@ impl ExecutingFrame<'_> {
)
}
let idx = idx.get(arg) as usize;
let x = self.fastlocals.lock()[idx]
let x = unsafe { self.fastlocals.borrow() }[idx]
.clone()
.ok_or_else(|| reference_error(self.code.varnames[idx], vm))?;
self.push_value(x);
Expand All @@ -1759,7 +1812,7 @@ impl ExecutingFrame<'_> {
// Load value and clear the slot (for inlined comprehensions)
// If slot is empty, push None (not an error - variable may not exist yet)
let idx = idx.get(arg) as usize;
let x = self.fastlocals.lock()[idx]
let x = unsafe { self.fastlocals.borrow_mut() }[idx]
.take()
.unwrap_or_else(|| vm.ctx.none());
self.push_value(x);
Expand All @@ -1769,7 +1822,7 @@ impl ExecutingFrame<'_> {
// Same as LoadFast but explicitly checks for unbound locals
// (LoadFast in RustPython already does this check)
let idx = idx.get(arg) as usize;
let x = self.fastlocals.lock()[idx].clone().ok_or_else(|| {
let x = unsafe { self.fastlocals.borrow() }[idx].clone().ok_or_else(|| {
vm.new_exception_msg(
vm.ctx.exceptions.unbound_local_error.to_owned(),
format!(
Expand All @@ -1788,7 +1841,7 @@ impl ExecutingFrame<'_> {
let oparg = packed.get(arg);
let idx1 = (oparg >> 4) as usize;
let idx2 = (oparg & 15) as usize;
let fastlocals = self.fastlocals.lock();
let fastlocals = unsafe { self.fastlocals.borrow() };
let x1 = fastlocals[idx1].clone().ok_or_else(|| {
vm.new_exception_msg(
vm.ctx.exceptions.unbound_local_error.to_owned(),
Expand All @@ -1809,22 +1862,15 @@ impl ExecutingFrame<'_> {
.into(),
)
})?;
drop(fastlocals);
self.push_value(x1);
self.push_value(x2);
Ok(None)
}
// TODO: Implement true borrow optimization (skip Arc::clone).
// CPython's LOAD_FAST_BORROW uses PyStackRef_Borrow to avoid refcount
// increment for values that are consumed within the same basic block.
// Possible approaches:
// - Store raw pointers with careful lifetime management
// - Add a "borrowed" variant to stack slots
// - Use arena allocation for short-lived stack values
// Currently this just clones like LoadFast.
Instruction::LoadFastBorrow(idx) => {
let idx = idx.get(arg) as usize;
let x = self.fastlocals.lock()[idx].clone().ok_or_else(|| {
let x = unsafe { self.fastlocals.borrow() }[idx].clone().ok_or_else(|| {
vm.new_exception_msg(
vm.ctx.exceptions.unbound_local_error.to_owned(),
format!(
Expand All @@ -1837,12 +1883,11 @@ impl ExecutingFrame<'_> {
self.push_value(x);
Ok(None)
}
// TODO: Same as LoadFastBorrow - implement true borrow optimization.
Instruction::LoadFastBorrowLoadFastBorrow { arg: packed } => {
let oparg = packed.get(arg);
let idx1 = (oparg >> 4) as usize;
let idx2 = (oparg & 15) as usize;
let fastlocals = self.fastlocals.lock();
let fastlocals = unsafe { self.fastlocals.borrow() };
let x1 = fastlocals[idx1].clone().ok_or_else(|| {
vm.new_exception_msg(
vm.ctx.exceptions.unbound_local_error.to_owned(),
Expand All @@ -1863,7 +1908,6 @@ impl ExecutingFrame<'_> {
.into(),
)
})?;
drop(fastlocals);
self.push_value(x1);
self.push_value(x2);
Ok(None)
Expand Down Expand Up @@ -2347,33 +2391,28 @@ impl ExecutingFrame<'_> {
}
Instruction::StoreFast(idx) => {
let value = self.pop_value();
self.fastlocals.lock()[idx.get(arg) as usize] = Some(value);
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
fastlocals[idx.get(arg) as usize] = Some(value);
Ok(None)
}
Instruction::StoreFastLoadFast { var_nums } => {
// Store to one slot and load from another (often the same) - for inlined comprehensions
let value = self.pop_value();
let mut locals = self.fastlocals.lock();
let locals = unsafe { self.fastlocals.borrow_mut() };
let oparg = var_nums.get(arg);
locals[oparg.store_idx() as usize] = Some(value);
let load_value = locals[oparg.load_idx() as usize]
.clone()
.expect("StoreFastLoadFast: load slot should have value after store");
drop(locals);
self.push_value(load_value);
Ok(None)
}
Instruction::StoreFastStoreFast { arg: packed } => {
// Store two values to two local variables at once
// STORE_FAST idx1 executes first: pops TOS -> locals[idx1]
// STORE_FAST idx2 executes second: pops new TOS -> locals[idx2]
// oparg encoding: (idx1 << 4) | idx2
let oparg = packed.get(arg);
let idx1 = (oparg >> 4) as usize;
let idx2 = (oparg & 15) as usize;
let value1 = self.pop_value(); // TOS -> idx1
let value2 = self.pop_value(); // second -> idx2
let mut fastlocals = self.fastlocals.lock();
let value1 = self.pop_value();
let value2 = self.pop_value();
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
fastlocals[idx1] = Some(value1);
fastlocals[idx2] = Some(value2);
Ok(None)
Expand Down
Loading