Skip to content

Commit 936d118

Browse files
committed
Fix PyStackRef: use NonZeroUsize for niche optimization, revert borrowed refs
- Change PyStackRef.bits from usize to NonZeroUsize so Option<PyStackRef> has the same size as Option<PyObjectRef> via niche optimization - Revert LoadFastBorrow to use clone instead of actual borrowed refs to avoid borrowed refs remaining on stack at yield points - Add static size assertions for Option<PyStackRef> - Add stackref, fastlocal to cspell dictionary - Remove debug eprintln statements - Fix clippy warning for unused push_borrowed
1 parent c706e52 commit 936d118

4 files changed

Lines changed: 82 additions & 65 deletions

File tree

.cspell.dict/cpython.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ eofs
5454
evalloop
5555
excepthandler
5656
exceptiontable
57+
fastlocal
58+
fastlocals
5759
fblock
5860
fblocks
5961
fdescr
@@ -171,6 +173,7 @@ SMALLBUF
171173
SOABI
172174
SSLEOF
173175
stackdepth
176+
stackref
174177
staticbase
175178
stginfo
176179
storefast

crates/vm/src/frame.rs

Lines changed: 47 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,8 +1209,8 @@ impl ExecutingFrame<'_> {
12091209
}
12101210
Instruction::CompareOp { op } => self.execute_compare(vm, op.get(arg)),
12111211
Instruction::ContainsOp(invert) => {
1212-
let b = self.pop_stackref();
1213-
let a = self.pop_stackref();
1212+
let b = self.pop_value();
1213+
let a = self.pop_value();
12141214

12151215
let value = match invert.get(arg) {
12161216
bytecode::Invert::No => self._in(vm, &a, &b)?,
@@ -1228,12 +1228,7 @@ impl ExecutingFrame<'_> {
12281228
// This is 1-indexed to match CPython
12291229
let idx = index.get(arg) as usize;
12301230
let stack_len = self.state.stack.len();
1231-
if stack_len < idx {
1232-
eprintln!("CopyItem ERROR: stack_len={}, idx={}", stack_len, idx);
1233-
eprintln!(" code: {}", self.code.obj_name);
1234-
eprintln!(" lasti: {}", self.lasti());
1235-
panic!("CopyItem: stack underflow");
1236-
}
1231+
debug_assert!(stack_len >= idx, "CopyItem: stack underflow");
12371232
let value = self.state.stack[stack_len - idx].clone();
12381233
self.push_stackref_opt(value);
12391234
Ok(None)
@@ -1816,9 +1811,10 @@ impl ExecutingFrame<'_> {
18161811
Ok(None)
18171812
}
18181813
Instruction::LoadFastBorrow(idx) => {
1814+
// TODO: re-enable true borrowed ref optimization after fixing
1815+
// stack corruption issue
18191816
let idx = idx.get(arg) as usize;
1820-
let fastlocals = self.fastlocals.lock();
1821-
let obj = fastlocals[idx].as_deref().ok_or_else(|| {
1817+
let x = self.fastlocals.lock()[idx].clone().ok_or_else(|| {
18221818
vm.new_exception_msg(
18231819
vm.ctx.exceptions.unbound_local_error.to_owned(),
18241820
format!(
@@ -1828,19 +1824,15 @@ impl ExecutingFrame<'_> {
18281824
.into(),
18291825
)
18301826
})?;
1831-
// SAFETY: the compiler guarantees this value is consumed within
1832-
// the same basic block, before any STORE_FAST/DELETE_FAST could
1833-
// overwrite the source fastlocal slot.
1834-
unsafe { self.push_borrowed(obj) };
1835-
drop(fastlocals);
1827+
self.push_value(x);
18361828
Ok(None)
18371829
}
18381830
Instruction::LoadFastBorrowLoadFastBorrow { arg: packed } => {
18391831
let oparg = packed.get(arg);
18401832
let idx1 = (oparg >> 4) as usize;
18411833
let idx2 = (oparg & 15) as usize;
18421834
let fastlocals = self.fastlocals.lock();
1843-
let obj1 = fastlocals[idx1].as_deref().ok_or_else(|| {
1835+
let x1 = fastlocals[idx1].clone().ok_or_else(|| {
18441836
vm.new_exception_msg(
18451837
vm.ctx.exceptions.unbound_local_error.to_owned(),
18461838
format!(
@@ -1850,7 +1842,7 @@ impl ExecutingFrame<'_> {
18501842
.into(),
18511843
)
18521844
})?;
1853-
let obj2 = fastlocals[idx2].as_deref().ok_or_else(|| {
1845+
let x2 = fastlocals[idx2].clone().ok_or_else(|| {
18541846
vm.new_exception_msg(
18551847
vm.ctx.exceptions.unbound_local_error.to_owned(),
18561848
format!(
@@ -1860,12 +1852,9 @@ impl ExecutingFrame<'_> {
18601852
.into(),
18611853
)
18621854
})?;
1863-
// SAFETY: see LoadFastBorrow above.
1864-
unsafe {
1865-
self.push_borrowed(obj1);
1866-
self.push_borrowed(obj2);
1867-
}
18681855
drop(fastlocals);
1856+
self.push_value(x1);
1857+
self.push_value(x2);
18691858
Ok(None)
18701859
}
18711860
Instruction::LoadGlobal(idx) => {
@@ -2460,7 +2449,11 @@ impl ExecutingFrame<'_> {
24602449
}
24612450
Instruction::YieldValue { arg: oparg } => {
24622451
debug_assert!(
2463-
self.state.stack.iter().flatten().all(|sr| !sr.is_borrowed()),
2452+
self.state
2453+
.stack
2454+
.iter()
2455+
.flatten()
2456+
.all(|sr| !sr.is_borrowed()),
24642457
"borrowed refs on stack at yield point"
24652458
);
24662459
let value = self.pop_value();
@@ -3740,37 +3733,37 @@ impl ExecutingFrame<'_> {
37403733

37413734
#[cfg_attr(feature = "flame-it", flame("Frame"))]
37423735
fn execute_bin_op(&mut self, vm: &VirtualMachine, op: bytecode::BinaryOperator) -> FrameResult {
3743-
let b_ref = self.pop_stackref();
3744-
let a_ref = self.pop_stackref();
3736+
let b_ref = &self.pop_value();
3737+
let a_ref = &self.pop_value();
37453738
let value = match op {
3746-
bytecode::BinaryOperator::Subtract => vm._sub(&a_ref, &b_ref),
3747-
bytecode::BinaryOperator::Add => vm._add(&a_ref, &b_ref),
3748-
bytecode::BinaryOperator::Multiply => vm._mul(&a_ref, &b_ref),
3749-
bytecode::BinaryOperator::MatrixMultiply => vm._matmul(&a_ref, &b_ref),
3750-
bytecode::BinaryOperator::Power => vm._pow(&a_ref, &b_ref, vm.ctx.none.as_object()),
3751-
bytecode::BinaryOperator::TrueDivide => vm._truediv(&a_ref, &b_ref),
3752-
bytecode::BinaryOperator::FloorDivide => vm._floordiv(&a_ref, &b_ref),
3753-
bytecode::BinaryOperator::Remainder => vm._mod(&a_ref, &b_ref),
3754-
bytecode::BinaryOperator::Lshift => vm._lshift(&a_ref, &b_ref),
3755-
bytecode::BinaryOperator::Rshift => vm._rshift(&a_ref, &b_ref),
3756-
bytecode::BinaryOperator::Xor => vm._xor(&a_ref, &b_ref),
3757-
bytecode::BinaryOperator::Or => vm._or(&a_ref, &b_ref),
3758-
bytecode::BinaryOperator::And => vm._and(&a_ref, &b_ref),
3759-
bytecode::BinaryOperator::InplaceSubtract => vm._isub(&a_ref, &b_ref),
3760-
bytecode::BinaryOperator::InplaceAdd => vm._iadd(&a_ref, &b_ref),
3761-
bytecode::BinaryOperator::InplaceMultiply => vm._imul(&a_ref, &b_ref),
3762-
bytecode::BinaryOperator::InplaceMatrixMultiply => vm._imatmul(&a_ref, &b_ref),
3739+
bytecode::BinaryOperator::Subtract => vm._sub(a_ref, b_ref),
3740+
bytecode::BinaryOperator::Add => vm._add(a_ref, b_ref),
3741+
bytecode::BinaryOperator::Multiply => vm._mul(a_ref, b_ref),
3742+
bytecode::BinaryOperator::MatrixMultiply => vm._matmul(a_ref, b_ref),
3743+
bytecode::BinaryOperator::Power => vm._pow(a_ref, b_ref, vm.ctx.none.as_object()),
3744+
bytecode::BinaryOperator::TrueDivide => vm._truediv(a_ref, b_ref),
3745+
bytecode::BinaryOperator::FloorDivide => vm._floordiv(a_ref, b_ref),
3746+
bytecode::BinaryOperator::Remainder => vm._mod(a_ref, b_ref),
3747+
bytecode::BinaryOperator::Lshift => vm._lshift(a_ref, b_ref),
3748+
bytecode::BinaryOperator::Rshift => vm._rshift(a_ref, b_ref),
3749+
bytecode::BinaryOperator::Xor => vm._xor(a_ref, b_ref),
3750+
bytecode::BinaryOperator::Or => vm._or(a_ref, b_ref),
3751+
bytecode::BinaryOperator::And => vm._and(a_ref, b_ref),
3752+
bytecode::BinaryOperator::InplaceSubtract => vm._isub(a_ref, b_ref),
3753+
bytecode::BinaryOperator::InplaceAdd => vm._iadd(a_ref, b_ref),
3754+
bytecode::BinaryOperator::InplaceMultiply => vm._imul(a_ref, b_ref),
3755+
bytecode::BinaryOperator::InplaceMatrixMultiply => vm._imatmul(a_ref, b_ref),
37633756
bytecode::BinaryOperator::InplacePower => {
3764-
vm._ipow(&a_ref, &b_ref, vm.ctx.none.as_object())
3765-
}
3766-
bytecode::BinaryOperator::InplaceTrueDivide => vm._itruediv(&a_ref, &b_ref),
3767-
bytecode::BinaryOperator::InplaceFloorDivide => vm._ifloordiv(&a_ref, &b_ref),
3768-
bytecode::BinaryOperator::InplaceRemainder => vm._imod(&a_ref, &b_ref),
3769-
bytecode::BinaryOperator::InplaceLshift => vm._ilshift(&a_ref, &b_ref),
3770-
bytecode::BinaryOperator::InplaceRshift => vm._irshift(&a_ref, &b_ref),
3771-
bytecode::BinaryOperator::InplaceXor => vm._ixor(&a_ref, &b_ref),
3772-
bytecode::BinaryOperator::InplaceOr => vm._ior(&a_ref, &b_ref),
3773-
bytecode::BinaryOperator::InplaceAnd => vm._iand(&a_ref, &b_ref),
3757+
vm._ipow(a_ref, b_ref, vm.ctx.none.as_object())
3758+
}
3759+
bytecode::BinaryOperator::InplaceTrueDivide => vm._itruediv(a_ref, b_ref),
3760+
bytecode::BinaryOperator::InplaceFloorDivide => vm._ifloordiv(a_ref, b_ref),
3761+
bytecode::BinaryOperator::InplaceRemainder => vm._imod(a_ref, b_ref),
3762+
bytecode::BinaryOperator::InplaceLshift => vm._ilshift(a_ref, b_ref),
3763+
bytecode::BinaryOperator::InplaceRshift => vm._irshift(a_ref, b_ref),
3764+
bytecode::BinaryOperator::InplaceXor => vm._ixor(a_ref, b_ref),
3765+
bytecode::BinaryOperator::InplaceOr => vm._ior(a_ref, b_ref),
3766+
bytecode::BinaryOperator::InplaceAnd => vm._iand(a_ref, b_ref),
37743767
bytecode::BinaryOperator::Subscr => a_ref.get_item(b_ref.as_object(), vm),
37753768
}?;
37763769

@@ -4063,6 +4056,7 @@ impl ExecutingFrame<'_> {
40634056
/// The compiler guarantees consumption within the same basic block.
40644057
#[inline]
40654058
#[track_caller]
4059+
#[allow(dead_code)]
40664060
unsafe fn push_borrowed(&mut self, obj: &PyObject) {
40674061
self.push_stackref_opt(Some(unsafe { PyStackRef::new_borrowed(obj) }));
40684062
}
@@ -4272,8 +4266,7 @@ impl ExecutingFrame<'_> {
42724266
);
42734267
}
42744268
self.state.stack.drain(stack_len - count..).map(|obj| {
4275-
expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.")
4276-
.to_pyobj()
4269+
expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.").to_pyobj()
42774270
})
42784271
}
42794272

crates/vm/src/object/core.rs

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ use core::{
4040
cell::UnsafeCell,
4141
marker::PhantomData,
4242
mem::ManuallyDrop,
43+
num::NonZeroUsize,
4344
ops::Deref,
4445
ptr::{self, NonNull},
4546
};
@@ -1414,9 +1415,12 @@ const STACKREF_BORROW_TAG: usize = 1;
14141415
///
14151416
/// Same size as `PyObjectRef` (one pointer-width). `PyObject` is at least
14161417
/// 8-byte aligned, so the low bit is always available for tagging.
1418+
///
1419+
/// Uses `NonZeroUsize` so that `Option<PyStackRef>` has the same size as
1420+
/// `PyStackRef` via niche optimization (matching `Option<PyObjectRef>`).
14171421
#[repr(transparent)]
14181422
pub struct PyStackRef {
1419-
bits: usize,
1423+
bits: NonZeroUsize,
14201424
}
14211425

14221426
impl PyStackRef {
@@ -1426,8 +1430,14 @@ impl PyStackRef {
14261430
pub fn new_owned(obj: PyObjectRef) -> Self {
14271431
let ptr = obj.into_raw();
14281432
let bits = ptr.as_ptr() as usize;
1429-
debug_assert!(bits & STACKREF_BORROW_TAG == 0, "PyObject pointer must be aligned");
1430-
Self { bits }
1433+
debug_assert!(
1434+
bits & STACKREF_BORROW_TAG == 0,
1435+
"PyObject pointer must be aligned"
1436+
);
1437+
Self {
1438+
// SAFETY: valid PyObject pointers are never null
1439+
bits: unsafe { NonZeroUsize::new_unchecked(bits) },
1440+
}
14311441
}
14321442

14331443
/// Create a borrowed stack reference from a `&PyObject`.
@@ -1440,19 +1450,22 @@ impl PyStackRef {
14401450
#[inline(always)]
14411451
pub unsafe fn new_borrowed(obj: &PyObject) -> Self {
14421452
let bits = (obj as *const PyObject as usize) | STACKREF_BORROW_TAG;
1443-
Self { bits }
1453+
Self {
1454+
// SAFETY: valid PyObject pointers are never null, and ORing with 1 keeps it non-zero
1455+
bits: unsafe { NonZeroUsize::new_unchecked(bits) },
1456+
}
14441457
}
14451458

14461459
/// Whether this is a borrowed (non-owning) reference.
14471460
#[inline(always)]
14481461
pub fn is_borrowed(&self) -> bool {
1449-
self.bits & STACKREF_BORROW_TAG != 0
1462+
self.bits.get() & STACKREF_BORROW_TAG != 0
14501463
}
14511464

14521465
/// Get a `&PyObject` reference. Works for both owned and borrowed.
14531466
#[inline(always)]
14541467
pub fn as_object(&self) -> &PyObject {
1455-
unsafe { &*((self.bits & !STACKREF_BORROW_TAG) as *const PyObject) }
1468+
unsafe { &*((self.bits.get() & !STACKREF_BORROW_TAG) as *const PyObject) }
14561469
}
14571470

14581471
/// Convert to an owned `PyObjectRef`.
@@ -1464,7 +1477,7 @@ impl PyStackRef {
14641477
let obj = if self.is_borrowed() {
14651478
self.as_object().to_owned() // inc refcount
14661479
} else {
1467-
let ptr = unsafe { NonNull::new_unchecked(self.bits as *mut PyObject) };
1480+
let ptr = unsafe { NonNull::new_unchecked(self.bits.get() as *mut PyObject) };
14681481
unsafe { PyObjectRef::from_raw(ptr) }
14691482
};
14701483
core::mem::forget(self); // don't run Drop
@@ -1477,7 +1490,9 @@ impl PyStackRef {
14771490
pub fn promote(&mut self) {
14781491
if self.is_borrowed() {
14791492
self.as_object().0.ref_count.inc();
1480-
self.bits &= !STACKREF_BORROW_TAG;
1493+
// SAFETY: clearing the low bit of a non-null pointer keeps it non-zero
1494+
self.bits =
1495+
unsafe { NonZeroUsize::new_unchecked(self.bits.get() & !STACKREF_BORROW_TAG) };
14811496
}
14821497
}
14831498
}
@@ -1487,7 +1502,7 @@ impl Drop for PyStackRef {
14871502
fn drop(&mut self) {
14881503
if !self.is_borrowed() {
14891504
// Owned: decrement refcount (potentially deallocate).
1490-
let ptr = unsafe { NonNull::new_unchecked(self.bits as *mut PyObject) };
1505+
let ptr = unsafe { NonNull::new_unchecked(self.bits.get() as *mut PyObject) };
14911506
drop(unsafe { PyObjectRef::from_raw(ptr) });
14921507
}
14931508
// Borrowed: nothing to do.
@@ -1530,6 +1545,13 @@ cfg_if::cfg_if! {
15301545
}
15311546
}
15321547

1548+
// Ensure Option<PyStackRef> uses niche optimization and matches Option<PyObjectRef> in size
1549+
const _: () = assert!(
1550+
core::mem::size_of::<Option<PyStackRef>>() == core::mem::size_of::<Option<PyObjectRef>>()
1551+
);
1552+
const _: () =
1553+
assert!(core::mem::size_of::<Option<PyStackRef>>() == core::mem::size_of::<PyStackRef>());
1554+
15331555
#[repr(transparent)]
15341556
pub struct Py<T>(PyInner<T>);
15351557

crates/vm/src/object/traverse.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@ use core::ptr::NonNull;
33
use rustpython_common::lock::{PyMutex, PyRwLock};
44

55
use crate::{
6-
AsObject, PyObject, PyObjectRef, PyRef, PyStackRef, function::Either,
7-
object::PyObjectPayload,
6+
AsObject, PyObject, PyObjectRef, PyRef, PyStackRef, function::Either, object::PyObjectPayload,
87
};
98

109
pub type TraverseFn<'a> = dyn FnMut(&PyObject) + 'a;

0 commit comments

Comments
 (0)