Skip to content

Commit 2b08445

Browse files
Optimize fast_locals and atomic ordering (#7289)
* Relax RefCount atomic ordering from SeqCst to Arc pattern - inc/inc_by/get: SeqCst → Relaxed - safe_inc CAS: SeqCst → Relaxed + compare_exchange_weak - dec: SeqCst → Release + Acquire fence when count drops to 0 - leak CAS: SeqCst → AcqRel/Relaxed + compare_exchange_weak * Reuse existing Vec via prepend_arg in execute_call Replace vec![self_val] + extend(args.args) with FuncArgs::prepend_arg() to avoid a second heap allocation on every method call. * Skip downcast_ref checks in invoke when tracing is disabled Early return in PyCallable::invoke() when use_tracing is false, avoiding two downcast_ref type checks on every function call. * Replace fastlocals PyMutex with UnsafeCell-based FastLocals Eliminate per-instruction mutex lock/unlock overhead for local variable access. FastLocals uses UnsafeCell with safety guaranteed by the frame's state mutex and sequential same-thread execution. Affects 14+ lock() call sites in hot instruction paths (LoadFast, StoreFast, DeleteFast, and their paired variants). * Auto-format: cargo fmt --all --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 333ce69 commit 2b08445

File tree

6 files changed

+128
-83
lines changed

6 files changed

+128
-83
lines changed

crates/common/src/refcount.rs

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -92,26 +92,26 @@ impl RefCount {
9292
/// Get current strong count
9393
#[inline]
9494
pub fn get(&self) -> usize {
95-
State::from_raw(self.state.load(Ordering::SeqCst)).strong() as usize
95+
State::from_raw(self.state.load(Ordering::Relaxed)).strong() as usize
9696
}
9797

9898
/// Increment strong count
9999
#[inline]
100100
pub fn inc(&self) {
101-
let val = State::from_raw(self.state.fetch_add(COUNT, Ordering::SeqCst));
101+
let val = State::from_raw(self.state.fetch_add(COUNT, Ordering::Relaxed));
102102
if val.destructed() || (val.strong() as usize) > STRONG - 1 {
103103
refcount_overflow();
104104
}
105105
if val.strong() == 0 {
106106
// The previous fetch_add created a permission to run decrement again
107-
self.state.fetch_add(COUNT, Ordering::SeqCst);
107+
self.state.fetch_add(COUNT, Ordering::Relaxed);
108108
}
109109
}
110110

111111
#[inline]
112112
pub fn inc_by(&self, n: usize) {
113113
debug_assert!(n <= STRONG);
114-
let val = State::from_raw(self.state.fetch_add(n * COUNT, Ordering::SeqCst));
114+
let val = State::from_raw(self.state.fetch_add(n * COUNT, Ordering::Relaxed));
115115
if val.destructed() || (val.strong() as usize) > STRONG - n {
116116
refcount_overflow();
117117
}
@@ -121,7 +121,7 @@ impl RefCount {
121121
#[inline]
122122
#[must_use]
123123
pub fn safe_inc(&self) -> bool {
124-
let mut old = State::from_raw(self.state.load(Ordering::SeqCst));
124+
let mut old = State::from_raw(self.state.load(Ordering::Relaxed));
125125
loop {
126126
if old.destructed() {
127127
return false;
@@ -130,11 +130,11 @@ impl RefCount {
130130
refcount_overflow();
131131
}
132132
let new_state = old.add_strong(1);
133-
match self.state.compare_exchange(
133+
match self.state.compare_exchange_weak(
134134
old.as_raw(),
135135
new_state.as_raw(),
136-
Ordering::SeqCst,
137-
Ordering::SeqCst,
136+
Ordering::Relaxed,
137+
Ordering::Relaxed,
138138
) {
139139
Ok(_) => return true,
140140
Err(curr) => old = State::from_raw(curr),
@@ -146,27 +146,31 @@ impl RefCount {
146146
#[inline]
147147
#[must_use]
148148
pub fn dec(&self) -> bool {
149-
let old = State::from_raw(self.state.fetch_sub(COUNT, Ordering::SeqCst));
149+
let old = State::from_raw(self.state.fetch_sub(COUNT, Ordering::Release));
150150

151151
// LEAKED objects never reach 0
152152
if old.leaked() {
153153
return false;
154154
}
155155

156-
old.strong() == 1
156+
if old.strong() == 1 {
157+
core::sync::atomic::fence(Ordering::Acquire);
158+
return true;
159+
}
160+
false
157161
}
158162

159163
/// Mark this object as leaked (interned). It will never be deallocated.
160164
pub fn leak(&self) {
161165
debug_assert!(!self.is_leaked());
162-
let mut old = State::from_raw(self.state.load(Ordering::SeqCst));
166+
let mut old = State::from_raw(self.state.load(Ordering::Relaxed));
163167
loop {
164168
let new_state = old.with_leaked(true);
165-
match self.state.compare_exchange(
169+
match self.state.compare_exchange_weak(
166170
old.as_raw(),
167171
new_state.as_raw(),
168-
Ordering::SeqCst,
169-
Ordering::SeqCst,
172+
Ordering::AcqRel,
173+
Ordering::Relaxed,
170174
) {
171175
Ok(_) => return,
172176
Err(curr) => old = State::from_raw(curr),

crates/vm/src/builtins/frame.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -690,8 +690,9 @@ impl Py<Frame> {
690690
}
691691

692692
// Clear fastlocals
693+
// SAFETY: Frame is not executing (detached or stopped).
693694
{
694-
let mut fastlocals = self.fastlocals.lock();
695+
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
695696
for slot in fastlocals.iter_mut() {
696697
*slot = None;
697698
}

crates/vm/src/builtins/function.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,8 @@ impl PyFunction {
224224
// See also: PyEval_EvalCodeWithName in cpython:
225225
// https://github.com/python/cpython/blob/main/Python/ceval.c#L3681
226226

227-
let mut fastlocals = frame.fastlocals.lock();
227+
// SAFETY: Frame was just created and not yet executing.
228+
let fastlocals = unsafe { frame.fastlocals.borrow_mut() };
228229

229230
let mut args_iter = func_args.args.into_iter();
230231

crates/vm/src/builtins/super.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,9 @@ impl Initializer for PySuper {
8585
if frame.code.arg_count == 0 {
8686
return Err(vm.new_runtime_error("super(): no arguments"));
8787
}
88-
// Lock fastlocals briefly to get arg[0], then drop the guard
89-
// before calling get_cell_contents (which also locks fastlocals).
90-
let first_arg = frame.fastlocals.lock()[0].clone();
91-
let obj = first_arg
88+
// SAFETY: Frame is current and not concurrently mutated.
89+
let obj = unsafe { frame.fastlocals.borrow() }[0]
90+
.clone()
9291
.or_else(|| {
9392
if let Some(cell2arg) = frame.code.cell2arg.as_deref() {
9493
cell2arg[..frame.code.cellvars.len()]

0 commit comments

Comments
 (0)