Skip to content

Commit 5c25fc7

Browse files
committed
Fix PyStackRef: use NonZeroUsize for niche optimization, revert borrowed refs
- Change PyStackRef.bits from usize to NonZeroUsize so Option<PyStackRef> has the same size as Option<PyObjectRef> via niche optimization - Revert LoadFastBorrow to use clone instead of actual borrowed refs to avoid borrowed refs remaining on stack at yield points - Add static size assertions for Option<PyStackRef> - Add stackref, fastlocal to cspell dictionary - Remove debug eprintln statements - Fix clippy warning for unused push_borrowed
1 parent 73179e4 commit 5c25fc7

File tree

4 files changed

+74
-52
lines changed

4 files changed

+74
-52
lines changed

.cspell.dict/cpython.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ eofs
5454
evalloop
5555
excepthandler
5656
exceptiontable
57+
fastlocal
58+
fastlocals
5759
fblock
5860
fblocks
5961
fdescr
@@ -171,6 +173,7 @@ SMALLBUF
171173
SOABI
172174
SSLEOF
173175
stackdepth
176+
stackref
174177
staticbase
175178
stginfo
176179
storefast

crates/vm/src/frame.rs

Lines changed: 39 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,8 +1262,8 @@ impl ExecutingFrame<'_> {
12621262
}
12631263
Instruction::CompareOp { op } => self.execute_compare(vm, op.get(arg)),
12641264
Instruction::ContainsOp(invert) => {
1265-
let b = self.pop_stackref();
1266-
let a = self.pop_stackref();
1265+
let b = self.pop_value();
1266+
let a = self.pop_value();
12671267

12681268
let value = match invert.get(arg) {
12691269
bytecode::Invert::No => self._in(vm, &a, &b)?,
@@ -1281,12 +1281,7 @@ impl ExecutingFrame<'_> {
12811281
// This is 1-indexed to match CPython
12821282
let idx = index.get(arg) as usize;
12831283
let stack_len = self.state.stack.len();
1284-
if stack_len < idx {
1285-
eprintln!("CopyItem ERROR: stack_len={}, idx={}", stack_len, idx);
1286-
eprintln!(" code: {}", self.code.obj_name);
1287-
eprintln!(" lasti: {}", self.lasti());
1288-
panic!("CopyItem: stack underflow");
1289-
}
1284+
debug_assert!(stack_len >= idx, "CopyItem: stack underflow");
12901285
let value = self.state.stack[stack_len - idx].clone();
12911286
self.push_stackref_opt(value);
12921287
Ok(None)
@@ -1913,7 +1908,6 @@ impl ExecutingFrame<'_> {
19131908
.into(),
19141909
)
19151910
})?;
1916-
drop(fastlocals);
19171911
self.push_value(x1);
19181912
self.push_value(x2);
19191913
Ok(None)
@@ -2505,7 +2499,11 @@ impl ExecutingFrame<'_> {
25052499
}
25062500
Instruction::YieldValue { arg: oparg } => {
25072501
debug_assert!(
2508-
self.state.stack.iter().flatten().all(|sr| !sr.is_borrowed()),
2502+
self.state
2503+
.stack
2504+
.iter()
2505+
.flatten()
2506+
.all(|sr| !sr.is_borrowed()),
25092507
"borrowed refs on stack at yield point"
25102508
);
25112509
let value = self.pop_value();
@@ -3779,37 +3777,37 @@ impl ExecutingFrame<'_> {
37793777

37803778
#[cfg_attr(feature = "flame-it", flame("Frame"))]
37813779
fn execute_bin_op(&mut self, vm: &VirtualMachine, op: bytecode::BinaryOperator) -> FrameResult {
3782-
let b_ref = self.pop_stackref();
3783-
let a_ref = self.pop_stackref();
3780+
let b_ref = &self.pop_value();
3781+
let a_ref = &self.pop_value();
37843782
let value = match op {
3785-
bytecode::BinaryOperator::Subtract => vm._sub(&a_ref, &b_ref),
3786-
bytecode::BinaryOperator::Add => vm._add(&a_ref, &b_ref),
3787-
bytecode::BinaryOperator::Multiply => vm._mul(&a_ref, &b_ref),
3788-
bytecode::BinaryOperator::MatrixMultiply => vm._matmul(&a_ref, &b_ref),
3789-
bytecode::BinaryOperator::Power => vm._pow(&a_ref, &b_ref, vm.ctx.none.as_object()),
3790-
bytecode::BinaryOperator::TrueDivide => vm._truediv(&a_ref, &b_ref),
3791-
bytecode::BinaryOperator::FloorDivide => vm._floordiv(&a_ref, &b_ref),
3792-
bytecode::BinaryOperator::Remainder => vm._mod(&a_ref, &b_ref),
3793-
bytecode::BinaryOperator::Lshift => vm._lshift(&a_ref, &b_ref),
3794-
bytecode::BinaryOperator::Rshift => vm._rshift(&a_ref, &b_ref),
3795-
bytecode::BinaryOperator::Xor => vm._xor(&a_ref, &b_ref),
3796-
bytecode::BinaryOperator::Or => vm._or(&a_ref, &b_ref),
3797-
bytecode::BinaryOperator::And => vm._and(&a_ref, &b_ref),
3798-
bytecode::BinaryOperator::InplaceSubtract => vm._isub(&a_ref, &b_ref),
3799-
bytecode::BinaryOperator::InplaceAdd => vm._iadd(&a_ref, &b_ref),
3800-
bytecode::BinaryOperator::InplaceMultiply => vm._imul(&a_ref, &b_ref),
3801-
bytecode::BinaryOperator::InplaceMatrixMultiply => vm._imatmul(&a_ref, &b_ref),
3783+
bytecode::BinaryOperator::Subtract => vm._sub(a_ref, b_ref),
3784+
bytecode::BinaryOperator::Add => vm._add(a_ref, b_ref),
3785+
bytecode::BinaryOperator::Multiply => vm._mul(a_ref, b_ref),
3786+
bytecode::BinaryOperator::MatrixMultiply => vm._matmul(a_ref, b_ref),
3787+
bytecode::BinaryOperator::Power => vm._pow(a_ref, b_ref, vm.ctx.none.as_object()),
3788+
bytecode::BinaryOperator::TrueDivide => vm._truediv(a_ref, b_ref),
3789+
bytecode::BinaryOperator::FloorDivide => vm._floordiv(a_ref, b_ref),
3790+
bytecode::BinaryOperator::Remainder => vm._mod(a_ref, b_ref),
3791+
bytecode::BinaryOperator::Lshift => vm._lshift(a_ref, b_ref),
3792+
bytecode::BinaryOperator::Rshift => vm._rshift(a_ref, b_ref),
3793+
bytecode::BinaryOperator::Xor => vm._xor(a_ref, b_ref),
3794+
bytecode::BinaryOperator::Or => vm._or(a_ref, b_ref),
3795+
bytecode::BinaryOperator::And => vm._and(a_ref, b_ref),
3796+
bytecode::BinaryOperator::InplaceSubtract => vm._isub(a_ref, b_ref),
3797+
bytecode::BinaryOperator::InplaceAdd => vm._iadd(a_ref, b_ref),
3798+
bytecode::BinaryOperator::InplaceMultiply => vm._imul(a_ref, b_ref),
3799+
bytecode::BinaryOperator::InplaceMatrixMultiply => vm._imatmul(a_ref, b_ref),
38023800
bytecode::BinaryOperator::InplacePower => {
3803-
vm._ipow(&a_ref, &b_ref, vm.ctx.none.as_object())
3804-
}
3805-
bytecode::BinaryOperator::InplaceTrueDivide => vm._itruediv(&a_ref, &b_ref),
3806-
bytecode::BinaryOperator::InplaceFloorDivide => vm._ifloordiv(&a_ref, &b_ref),
3807-
bytecode::BinaryOperator::InplaceRemainder => vm._imod(&a_ref, &b_ref),
3808-
bytecode::BinaryOperator::InplaceLshift => vm._ilshift(&a_ref, &b_ref),
3809-
bytecode::BinaryOperator::InplaceRshift => vm._irshift(&a_ref, &b_ref),
3810-
bytecode::BinaryOperator::InplaceXor => vm._ixor(&a_ref, &b_ref),
3811-
bytecode::BinaryOperator::InplaceOr => vm._ior(&a_ref, &b_ref),
3812-
bytecode::BinaryOperator::InplaceAnd => vm._iand(&a_ref, &b_ref),
3801+
vm._ipow(a_ref, b_ref, vm.ctx.none.as_object())
3802+
}
3803+
bytecode::BinaryOperator::InplaceTrueDivide => vm._itruediv(a_ref, b_ref),
3804+
bytecode::BinaryOperator::InplaceFloorDivide => vm._ifloordiv(a_ref, b_ref),
3805+
bytecode::BinaryOperator::InplaceRemainder => vm._imod(a_ref, b_ref),
3806+
bytecode::BinaryOperator::InplaceLshift => vm._ilshift(a_ref, b_ref),
3807+
bytecode::BinaryOperator::InplaceRshift => vm._irshift(a_ref, b_ref),
3808+
bytecode::BinaryOperator::InplaceXor => vm._ixor(a_ref, b_ref),
3809+
bytecode::BinaryOperator::InplaceOr => vm._ior(a_ref, b_ref),
3810+
bytecode::BinaryOperator::InplaceAnd => vm._iand(a_ref, b_ref),
38133811
bytecode::BinaryOperator::Subscr => a_ref.get_item(b_ref.as_object(), vm),
38143812
}?;
38153813

@@ -4102,6 +4100,7 @@ impl ExecutingFrame<'_> {
41024100
/// The compiler guarantees consumption within the same basic block.
41034101
#[inline]
41044102
#[track_caller]
4103+
#[allow(dead_code)]
41054104
unsafe fn push_borrowed(&mut self, obj: &PyObject) {
41064105
self.push_stackref_opt(Some(unsafe { PyStackRef::new_borrowed(obj) }));
41074106
}
@@ -4311,8 +4310,7 @@ impl ExecutingFrame<'_> {
43114310
);
43124311
}
43134312
self.state.stack.drain(stack_len - count..).map(|obj| {
4314-
expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.")
4315-
.to_pyobj()
4313+
expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.").to_pyobj()
43164314
})
43174315
}
43184316

crates/vm/src/object/core.rs

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ use core::{
4040
cell::UnsafeCell,
4141
marker::PhantomData,
4242
mem::ManuallyDrop,
43+
num::NonZeroUsize,
4344
ops::Deref,
4445
ptr::{self, NonNull},
4546
};
@@ -1414,9 +1415,12 @@ const STACKREF_BORROW_TAG: usize = 1;
14141415
///
14151416
/// Same size as `PyObjectRef` (one pointer-width). `PyObject` is at least
14161417
/// 8-byte aligned, so the low bit is always available for tagging.
1418+
///
1419+
/// Uses `NonZeroUsize` so that `Option<PyStackRef>` has the same size as
1420+
/// `PyStackRef` via niche optimization (matching `Option<PyObjectRef>`).
14171421
#[repr(transparent)]
14181422
pub struct PyStackRef {
1419-
bits: usize,
1423+
bits: NonZeroUsize,
14201424
}
14211425

14221426
impl PyStackRef {
@@ -1426,8 +1430,14 @@ impl PyStackRef {
14261430
pub fn new_owned(obj: PyObjectRef) -> Self {
14271431
let ptr = obj.into_raw();
14281432
let bits = ptr.as_ptr() as usize;
1429-
debug_assert!(bits & STACKREF_BORROW_TAG == 0, "PyObject pointer must be aligned");
1430-
Self { bits }
1433+
debug_assert!(
1434+
bits & STACKREF_BORROW_TAG == 0,
1435+
"PyObject pointer must be aligned"
1436+
);
1437+
Self {
1438+
// SAFETY: valid PyObject pointers are never null
1439+
bits: unsafe { NonZeroUsize::new_unchecked(bits) },
1440+
}
14311441
}
14321442

14331443
/// Create a borrowed stack reference from a `&PyObject`.
@@ -1440,19 +1450,22 @@ impl PyStackRef {
14401450
#[inline(always)]
14411451
pub unsafe fn new_borrowed(obj: &PyObject) -> Self {
14421452
let bits = (obj as *const PyObject as usize) | STACKREF_BORROW_TAG;
1443-
Self { bits }
1453+
Self {
1454+
// SAFETY: valid PyObject pointers are never null, and ORing with 1 keeps it non-zero
1455+
bits: unsafe { NonZeroUsize::new_unchecked(bits) },
1456+
}
14441457
}
14451458

14461459
/// Whether this is a borrowed (non-owning) reference.
14471460
#[inline(always)]
14481461
pub fn is_borrowed(&self) -> bool {
1449-
self.bits & STACKREF_BORROW_TAG != 0
1462+
self.bits.get() & STACKREF_BORROW_TAG != 0
14501463
}
14511464

14521465
/// Get a `&PyObject` reference. Works for both owned and borrowed.
14531466
#[inline(always)]
14541467
pub fn as_object(&self) -> &PyObject {
1455-
unsafe { &*((self.bits & !STACKREF_BORROW_TAG) as *const PyObject) }
1468+
unsafe { &*((self.bits.get() & !STACKREF_BORROW_TAG) as *const PyObject) }
14561469
}
14571470

14581471
/// Convert to an owned `PyObjectRef`.
@@ -1464,7 +1477,7 @@ impl PyStackRef {
14641477
let obj = if self.is_borrowed() {
14651478
self.as_object().to_owned() // inc refcount
14661479
} else {
1467-
let ptr = unsafe { NonNull::new_unchecked(self.bits as *mut PyObject) };
1480+
let ptr = unsafe { NonNull::new_unchecked(self.bits.get() as *mut PyObject) };
14681481
unsafe { PyObjectRef::from_raw(ptr) }
14691482
};
14701483
core::mem::forget(self); // don't run Drop
@@ -1477,7 +1490,9 @@ impl PyStackRef {
14771490
pub fn promote(&mut self) {
14781491
if self.is_borrowed() {
14791492
self.as_object().0.ref_count.inc();
1480-
self.bits &= !STACKREF_BORROW_TAG;
1493+
// SAFETY: clearing the low bit of a non-null pointer keeps it non-zero
1494+
self.bits =
1495+
unsafe { NonZeroUsize::new_unchecked(self.bits.get() & !STACKREF_BORROW_TAG) };
14811496
}
14821497
}
14831498
}
@@ -1487,7 +1502,7 @@ impl Drop for PyStackRef {
14871502
fn drop(&mut self) {
14881503
if !self.is_borrowed() {
14891504
// Owned: decrement refcount (potentially deallocate).
1490-
let ptr = unsafe { NonNull::new_unchecked(self.bits as *mut PyObject) };
1505+
let ptr = unsafe { NonNull::new_unchecked(self.bits.get() as *mut PyObject) };
14911506
drop(unsafe { PyObjectRef::from_raw(ptr) });
14921507
}
14931508
// Borrowed: nothing to do.
@@ -1530,6 +1545,13 @@ cfg_if::cfg_if! {
15301545
}
15311546
}
15321547

1548+
// Ensure Option<PyStackRef> uses niche optimization and matches Option<PyObjectRef> in size
1549+
const _: () = assert!(
1550+
core::mem::size_of::<Option<PyStackRef>>() == core::mem::size_of::<Option<PyObjectRef>>()
1551+
);
1552+
const _: () =
1553+
assert!(core::mem::size_of::<Option<PyStackRef>>() == core::mem::size_of::<PyStackRef>());
1554+
15331555
#[repr(transparent)]
15341556
pub struct Py<T>(PyInner<T>);
15351557

crates/vm/src/object/traverse.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@ use core::ptr::NonNull;
33
use rustpython_common::lock::{PyMutex, PyRwLock};
44

55
use crate::{
6-
AsObject, PyObject, PyObjectRef, PyRef, PyStackRef, function::Either,
7-
object::PyObjectPayload,
6+
AsObject, PyObject, PyObjectRef, PyRef, PyStackRef, function::Either, object::PyObjectPayload,
87
};
98

109
pub type TraverseFn<'a> = dyn FnMut(&PyObject) + 'a;

0 commit comments

Comments
 (0)