Skip to content

Commit 958853f

Browse files
committed
Replace fastlocals PyMutex with UnsafeCell-based FastLocals
Eliminate per-instruction mutex lock/unlock overhead for local variable access. FastLocals uses UnsafeCell with safety guaranteed by the frame's state mutex and sequential same-thread execution. Affects 14+ lock() call sites in hot instruction paths (LoadFast, StoreFast, DeleteFast, and their paired variants).
1 parent 6a9be9f commit 958853f

File tree

4 files changed

+81
-41
lines changed

4 files changed

+81
-41
lines changed

crates/vm/src/builtins/frame.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -690,8 +690,9 @@ impl Py<Frame> {
690690
}
691691

692692
// Clear fastlocals
693+
// SAFETY: Frame is not executing (detached or stopped).
693694
{
694-
let mut fastlocals = self.fastlocals.lock();
695+
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
695696
for slot in fastlocals.iter_mut() {
696697
*slot = None;
697698
}

crates/vm/src/builtins/function.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,8 @@ impl PyFunction {
224224
// See also: PyEval_EvalCodeWithName in cpython:
225225
// https://github.com/python/cpython/blob/main/Python/ceval.c#L3681
226226

227-
let mut fastlocals = frame.fastlocals.lock();
227+
// SAFETY: Frame was just created and not yet executing.
228+
let fastlocals = unsafe { frame.fastlocals.borrow_mut() };
228229

229230
let mut args_iter = func_args.args.into_iter();
230231

crates/vm/src/builtins/super.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,9 @@ impl Initializer for PySuper {
8585
if frame.code.arg_count == 0 {
8686
return Err(vm.new_runtime_error("super(): no arguments"));
8787
}
88-
// Lock fastlocals briefly to get arg[0], then drop the guard
89-
// before calling get_cell_contents (which also locks fastlocals).
90-
let first_arg = frame.fastlocals.lock()[0].clone();
91-
let obj = first_arg
88+
// SAFETY: Frame is current and not concurrently mutated.
89+
let obj = unsafe { frame.fastlocals.borrow() }[0]
90+
.clone()
9291
.or_else(|| {
9392
if let Some(cell2arg) = frame.code.cell2arg.as_deref() {
9493
cell2arg[..frame.code.cellvars.len()]

crates/vm/src/frame.rs

Lines changed: 74 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use core::sync::atomic::AtomicPtr;
3232
use core::sync::atomic::Ordering::Relaxed;
3333
use indexmap::IndexMap;
3434
use itertools::Itertools;
35+
use core::cell::UnsafeCell;
3536
use rustpython_common::atomic::{PyAtomic, Radium};
3637
use rustpython_common::{
3738
boxvec::BoxVec,
@@ -92,12 +93,60 @@ impl FrameOwner {
9293
}
9394
}
9495

96+
/// Lock-free storage for local variables (localsplus).
97+
///
98+
/// # Safety
99+
/// Access is serialized by the frame's state mutex in `with_exec()`, which
100+
/// prevents concurrent frame execution. Trace callbacks that access `f_locals`
101+
/// run sequentially on the same thread as instruction execution.
102+
pub struct FastLocals {
103+
inner: UnsafeCell<Box<[Option<PyObjectRef>]>>,
104+
}
105+
106+
// SAFETY: Frame execution is serialized by the state mutex.
107+
#[cfg(feature = "threading")]
108+
unsafe impl Send for FastLocals {}
109+
#[cfg(feature = "threading")]
110+
unsafe impl Sync for FastLocals {}
111+
112+
impl FastLocals {
113+
fn new(data: Box<[Option<PyObjectRef>]>) -> Self {
114+
Self {
115+
inner: UnsafeCell::new(data),
116+
}
117+
}
118+
119+
/// # Safety
120+
/// Caller must ensure exclusive access (frame state locked or frame
121+
/// not executing).
122+
#[inline(always)]
123+
pub unsafe fn borrow(&self) -> &[Option<PyObjectRef>] {
124+
unsafe { &*self.inner.get() }
125+
}
126+
127+
/// # Safety
128+
/// Caller must ensure exclusive mutable access.
129+
#[inline(always)]
130+
#[allow(clippy::mut_from_ref)]
131+
pub unsafe fn borrow_mut(&self) -> &mut [Option<PyObjectRef>] {
132+
unsafe { &mut *self.inner.get() }
133+
}
134+
}
135+
136+
unsafe impl Traverse for FastLocals {
137+
fn traverse(&self, traverse_fn: &mut TraverseFn<'_>) {
138+
// SAFETY: GC runs on the same thread; no concurrent mutation.
139+
let data = unsafe { &*self.inner.get() };
140+
data.traverse(traverse_fn);
141+
}
142+
}
143+
95144
#[pyclass(module = false, name = "frame", traverse = "manual")]
96145
pub struct Frame {
97146
pub code: PyRef<PyCode>,
98147
pub func_obj: Option<PyObjectRef>,
99148

100-
pub fastlocals: PyMutex<Box<[Option<PyObjectRef>]>>,
149+
pub fastlocals: FastLocals,
101150
pub locals: ArgMapping,
102151
pub globals: PyDictRef,
103152
pub builtins: PyObjectRef,
@@ -208,7 +257,7 @@ impl Frame {
208257
};
209258

210259
Self {
211-
fastlocals: PyMutex::new(fastlocals_vec.into_boxed_slice()),
260+
fastlocals: FastLocals::new(fastlocals_vec.into_boxed_slice()),
212261
locals: scope.locals,
213262
globals: scope.globals,
214263
builtins,
@@ -241,7 +290,8 @@ impl Frame {
241290
/// Releases references held by the frame, matching _PyFrame_ClearLocals.
242291
pub(crate) fn clear_locals_and_stack(&self) {
243292
self.clear_stack_and_cells();
244-
let mut fastlocals = self.fastlocals.lock();
293+
// SAFETY: Frame is not executing (generator closed).
294+
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
245295
for slot in fastlocals.iter_mut() {
246296
*slot = None;
247297
}
@@ -250,7 +300,8 @@ impl Frame {
250300
/// Get cell contents by cell index. Reads through fastlocals (no state lock needed).
251301
pub(crate) fn get_cell_contents(&self, cell_idx: usize) -> Option<PyObjectRef> {
252302
let nlocals = self.code.varnames.len();
253-
let fastlocals = self.fastlocals.lock();
303+
// SAFETY: Frame not executing; no concurrent mutation.
304+
let fastlocals = unsafe { self.fastlocals.borrow() };
254305
fastlocals
255306
.get(nlocals + cell_idx)
256307
.and_then(|slot| slot.as_ref())
@@ -318,7 +369,8 @@ impl Frame {
318369
return Ok(());
319370
}
320371
let code = &**self.code;
321-
let mut fastlocals = self.fastlocals.lock();
372+
// SAFETY: Called before generator resume; no concurrent access.
373+
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
322374
for (i, &varname) in code.varnames.iter().enumerate() {
323375
if i >= fastlocals.len() {
324376
break;
@@ -339,8 +391,9 @@ impl Frame {
339391
let map = &code.varnames;
340392
let j = core::cmp::min(map.len(), code.varnames.len());
341393
if !code.varnames.is_empty() {
342-
let fastlocals = self.fastlocals.lock();
343-
for (&k, v) in zip(&map[..j], &**fastlocals) {
394+
// SAFETY: Trace callbacks run sequentially on the same thread.
395+
let fastlocals = unsafe { self.fastlocals.borrow() };
396+
for (&k, v) in zip(&map[..j], fastlocals) {
344397
match locals.mapping().ass_subscript(k, v.clone(), vm) {
345398
Ok(()) => {}
346399
Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {}
@@ -461,7 +514,7 @@ impl Py<Frame> {
461514
/// with the mutable data inside
462515
struct ExecutingFrame<'a> {
463516
code: &'a PyRef<PyCode>,
464-
fastlocals: &'a PyMutex<Box<[Option<PyObjectRef>]>>,
517+
fastlocals: &'a FastLocals,
465518
locals: &'a ArgMapping,
466519
globals: &'a PyDictRef,
467520
builtins: &'a PyObjectRef,
@@ -1247,7 +1300,7 @@ impl ExecutingFrame<'_> {
12471300
Ok(None)
12481301
}
12491302
Instruction::DeleteFast(idx) => {
1250-
let mut fastlocals = self.fastlocals.lock();
1303+
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
12511304
let idx = idx.get(arg) as usize;
12521305
if fastlocals[idx].is_none() {
12531306
return Err(vm.new_exception_msg(
@@ -1749,7 +1802,7 @@ impl ExecutingFrame<'_> {
17491802
)
17501803
}
17511804
let idx = idx.get(arg) as usize;
1752-
let x = self.fastlocals.lock()[idx]
1805+
let x = unsafe { self.fastlocals.borrow() }[idx]
17531806
.clone()
17541807
.ok_or_else(|| reference_error(self.code.varnames[idx], vm))?;
17551808
self.push_value(x);
@@ -1759,7 +1812,7 @@ impl ExecutingFrame<'_> {
17591812
// Load value and clear the slot (for inlined comprehensions)
17601813
// If slot is empty, push None (not an error - variable may not exist yet)
17611814
let idx = idx.get(arg) as usize;
1762-
let x = self.fastlocals.lock()[idx]
1815+
let x = unsafe { self.fastlocals.borrow_mut() }[idx]
17631816
.take()
17641817
.unwrap_or_else(|| vm.ctx.none());
17651818
self.push_value(x);
@@ -1769,7 +1822,7 @@ impl ExecutingFrame<'_> {
17691822
// Same as LoadFast but explicitly checks for unbound locals
17701823
// (LoadFast in RustPython already does this check)
17711824
let idx = idx.get(arg) as usize;
1772-
let x = self.fastlocals.lock()[idx].clone().ok_or_else(|| {
1825+
let x = unsafe { self.fastlocals.borrow() }[idx].clone().ok_or_else(|| {
17731826
vm.new_exception_msg(
17741827
vm.ctx.exceptions.unbound_local_error.to_owned(),
17751828
format!(
@@ -1788,7 +1841,7 @@ impl ExecutingFrame<'_> {
17881841
let oparg = packed.get(arg);
17891842
let idx1 = (oparg >> 4) as usize;
17901843
let idx2 = (oparg & 15) as usize;
1791-
let fastlocals = self.fastlocals.lock();
1844+
let fastlocals = unsafe { self.fastlocals.borrow() };
17921845
let x1 = fastlocals[idx1].clone().ok_or_else(|| {
17931846
vm.new_exception_msg(
17941847
vm.ctx.exceptions.unbound_local_error.to_owned(),
@@ -1809,22 +1862,15 @@ impl ExecutingFrame<'_> {
18091862
.into(),
18101863
)
18111864
})?;
1812-
drop(fastlocals);
18131865
self.push_value(x1);
18141866
self.push_value(x2);
18151867
Ok(None)
18161868
}
18171869
// TODO: Implement true borrow optimization (skip Arc::clone).
1818-
// CPython's LOAD_FAST_BORROW uses PyStackRef_Borrow to avoid refcount
1819-
// increment for values that are consumed within the same basic block.
1820-
// Possible approaches:
1821-
// - Store raw pointers with careful lifetime management
1822-
// - Add a "borrowed" variant to stack slots
1823-
// - Use arena allocation for short-lived stack values
18241870
// Currently this just clones like LoadFast.
18251871
Instruction::LoadFastBorrow(idx) => {
18261872
let idx = idx.get(arg) as usize;
1827-
let x = self.fastlocals.lock()[idx].clone().ok_or_else(|| {
1873+
let x = unsafe { self.fastlocals.borrow() }[idx].clone().ok_or_else(|| {
18281874
vm.new_exception_msg(
18291875
vm.ctx.exceptions.unbound_local_error.to_owned(),
18301876
format!(
@@ -1837,12 +1883,11 @@ impl ExecutingFrame<'_> {
18371883
self.push_value(x);
18381884
Ok(None)
18391885
}
1840-
// TODO: Same as LoadFastBorrow - implement true borrow optimization.
18411886
Instruction::LoadFastBorrowLoadFastBorrow { arg: packed } => {
18421887
let oparg = packed.get(arg);
18431888
let idx1 = (oparg >> 4) as usize;
18441889
let idx2 = (oparg & 15) as usize;
1845-
let fastlocals = self.fastlocals.lock();
1890+
let fastlocals = unsafe { self.fastlocals.borrow() };
18461891
let x1 = fastlocals[idx1].clone().ok_or_else(|| {
18471892
vm.new_exception_msg(
18481893
vm.ctx.exceptions.unbound_local_error.to_owned(),
@@ -1863,7 +1908,6 @@ impl ExecutingFrame<'_> {
18631908
.into(),
18641909
)
18651910
})?;
1866-
drop(fastlocals);
18671911
self.push_value(x1);
18681912
self.push_value(x2);
18691913
Ok(None)
@@ -2347,33 +2391,28 @@ impl ExecutingFrame<'_> {
23472391
}
23482392
Instruction::StoreFast(idx) => {
23492393
let value = self.pop_value();
2350-
self.fastlocals.lock()[idx.get(arg) as usize] = Some(value);
2394+
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
2395+
fastlocals[idx.get(arg) as usize] = Some(value);
23512396
Ok(None)
23522397
}
23532398
Instruction::StoreFastLoadFast { var_nums } => {
2354-
// Store to one slot and load from another (often the same) - for inlined comprehensions
23552399
let value = self.pop_value();
2356-
let mut locals = self.fastlocals.lock();
2400+
let locals = unsafe { self.fastlocals.borrow_mut() };
23572401
let oparg = var_nums.get(arg);
23582402
locals[oparg.store_idx() as usize] = Some(value);
23592403
let load_value = locals[oparg.load_idx() as usize]
23602404
.clone()
23612405
.expect("StoreFastLoadFast: load slot should have value after store");
2362-
drop(locals);
23632406
self.push_value(load_value);
23642407
Ok(None)
23652408
}
23662409
Instruction::StoreFastStoreFast { arg: packed } => {
2367-
// Store two values to two local variables at once
2368-
// STORE_FAST idx1 executes first: pops TOS -> locals[idx1]
2369-
// STORE_FAST idx2 executes second: pops new TOS -> locals[idx2]
2370-
// oparg encoding: (idx1 << 4) | idx2
23712410
let oparg = packed.get(arg);
23722411
let idx1 = (oparg >> 4) as usize;
23732412
let idx2 = (oparg & 15) as usize;
2374-
let value1 = self.pop_value(); // TOS -> idx1
2375-
let value2 = self.pop_value(); // second -> idx2
2376-
let mut fastlocals = self.fastlocals.lock();
2413+
let value1 = self.pop_value();
2414+
let value2 = self.pop_value();
2415+
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
23772416
fastlocals[idx1] = Some(value1);
23782417
fastlocals[idx2] = Some(value2);
23792418
Ok(None)

0 commit comments

Comments
 (0)