Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
LoadFastBorrow
  • Loading branch information
youknowone committed Jan 29, 2026
commit 3eb467655322820abb1dcc1d6d3b62a2ea4efd1a
3 changes: 0 additions & 3 deletions Lib/test/test_peepholer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,7 +1002,6 @@ def f():
self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a73")
self.assertInBytecode(f, 'LOAD_FAST_BORROW', "a73")

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_setting_lineno_no_undefined(self):
code = textwrap.dedent("""\
def f():
Expand Down Expand Up @@ -1123,7 +1122,6 @@ def f():
self.assertNotInBytecode(f, "LOAD_FAST_CHECK")
return f

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_modifying_local_does_not_add_check(self):
f = self.make_function_with_no_checks()
def trace(frame, event, arg):
Expand All @@ -1137,7 +1135,6 @@ def trace(frame, event, arg):
self.assertInBytecode(f, "LOAD_FAST_BORROW")
self.assertNotInBytecode(f, "LOAD_FAST_CHECK")

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_initializing_local_does_not_add_check(self):
f = self.make_function_with_no_checks()
def trace(frame, event, arg):
Expand Down
107 changes: 107 additions & 0 deletions crates/codegen/src/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ impl CodeInfo {
self.peephole_optimize();
}

// Always apply LOAD_FAST_BORROW optimization
self.optimize_load_fast_borrow();

let max_stackdepth = self.max_stackdepth()?;
let cell2arg = self.cell2arg();

Expand Down Expand Up @@ -687,6 +690,110 @@ impl CodeInfo {
}
}

/// Optimize LOAD_FAST to LOAD_FAST_BORROW where safe.
///
/// A LOAD_FAST can be converted to LOAD_FAST_BORROW if its value is
/// consumed within the same basic block (not passed to another block).
/// This is a reference counting optimization in CPython; in RustPython
/// we implement it for bytecode compatibility.
fn optimize_load_fast_borrow(&mut self) {
use rustpython_compiler_core::bytecode::InstructionMetadata;

// NOT_LOCAL marker: instruction didn't come from a LOAD_FAST
const NOT_LOCAL: usize = usize::MAX;

for block in &mut self.blocks {
if block.instructions.is_empty() {
continue;
}

// Track which instructions' outputs are still on stack at block end
// For each instruction, we track if its pushed value(s) are unconsumed
let mut unconsumed = vec![false; block.instructions.len()];

// Simulate stack: each entry is the instruction index that pushed it
// (or NOT_LOCAL if not from LOAD_FAST/LOAD_FAST_LOAD_FAST)
let mut stack: Vec<usize> = Vec::new();

for (i, info) in block.instructions.iter().enumerate() {
let Some(instr) = info.instr.real() else {
continue;
};
Comment on lines +724 to +727
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Handle pseudo-instructions in the borrow analysis.

optimize_load_fast_borrow runs before pseudo lowering, but info.instr.real() skips pseudo ops (e.g., LoadClosure, StoreFastMaybeNull) that can push/pop. That can mis-model the stack and mark a LOAD_FAST as consumed when it actually survives to block end, which would make a future borrow implementation unsafe. Consider bailing out on pseudo ops or moving this pass after pseudo conversion.

🛡️ Conservative fix (skip block on pseudo ops)
-            let Some(instr) = info.instr.real() else {
-                continue;
-            };
+            let Some(instr) = info.instr.real() else {
+                // Pseudo ops can change stack depth; skip this block to stay conservative.
+                underflow = true;
+                break;
+            };
🤖 Prompt for AI Agents
In `@crates/codegen/src/ir.rs` around lines 724 - 727, The borrow-analysis in
optimize_load_fast_borrow currently ignores pseudo-ops by using
info.instr.real() and continuing, which can mis-model the stack (pseudo ops like
LoadClosure/StoreFastMaybeNull affect push/pop); change the logic to
conservatively bail out for the whole block when a pseudo-op is encountered:
inside the loop over block.instructions detect when info.instr.real() is None
and skip further processing of that block (e.g., set a flag and break/continue
to the next block) so the pass does not make unsafe assumptions about LOAD_FAST
liveness, or alternatively move optimize_load_fast_borrow to run after pseudo
lowering if feasible.


// Get stack effect
let effect = instr.stack_effect(info.arg);
let num_popped = if effect < 0 { (-effect) as usize } else { 0 };
let num_pushed = if effect > 0 { effect as usize } else { 0 };

// More precise: calculate actual pops and pushes
// For most instructions: pops = max(0, -effect), pushes = max(0, effect)
// But some instructions have both pops and pushes
let (pops, pushes) = match instr {
// Instructions that both pop and push
Instruction::BinaryOp { .. } => (2, 1),
Instruction::CompareOp { .. } => (2, 1),
Instruction::ContainsOp(_) => (2, 1),
Instruction::IsOp(_) => (2, 1),
Instruction::UnaryInvert
| Instruction::UnaryNegative
| Instruction::UnaryNot
| Instruction::ToBool => (1, 1),
Instruction::GetIter | Instruction::GetAIter => (1, 1),
Instruction::LoadAttr { .. } => (1, 1), // simplified
Instruction::Call { nargs } => (nargs.get(info.arg) as usize + 2, 1),
Instruction::CallKw { nargs } => (nargs.get(info.arg) as usize + 3, 1),
// Use stack effect for others
_ => (num_popped, num_pushed),
};
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// Pop values from stack
for _ in 0..pops {
if stack.pop().is_none() {
// Stack underflow in simulation - block receives values from elsewhere
// Conservative: don't optimize this block
break;
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// Push values to stack with source instruction index
let source = match instr {
Instruction::LoadFast(_) | Instruction::LoadFastLoadFast { .. } => i,
_ => NOT_LOCAL,
};
for _ in 0..pushes {
stack.push(source);
}
}

// Mark instructions whose values remain on stack at block end
for &src in &stack {
if src != NOT_LOCAL {
unconsumed[src] = true;
}
}

// Convert LOAD_FAST to LOAD_FAST_BORROW where value is fully consumed
for (i, info) in block.instructions.iter_mut().enumerate() {
if unconsumed[i] {
continue;
}
let Some(instr) = info.instr.real() else {
continue;
};
match instr {
Instruction::LoadFast(_) => {
info.instr = Instruction::LoadFastBorrow(Arg::marker()).into();
}
Instruction::LoadFastLoadFast { .. } => {
info.instr =
Instruction::LoadFastBorrowLoadFastBorrow { arg: Arg::marker() }.into();
}
_ => {}
}
}
}
}

fn max_stackdepth(&self) -> crate::InternalResult<u32> {
let mut maxdepth = 0u32;
let mut stack = Vec::with_capacity(self.blocks.len());
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 26 additions & 2 deletions crates/compiler-core/src/bytecode/instruction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,10 @@ pub enum Instruction {
LoadDeref(Arg<NameIdx>) = 83,
LoadFast(Arg<NameIdx>) = 84,
LoadFastAndClear(Arg<NameIdx>) = 85,
LoadFastBorrow(Arg<NameIdx>) = 86, // Placeholder
LoadFastBorrow(Arg<NameIdx>) = 86,
LoadFastBorrowLoadFastBorrow {
arg: Arg<u32>,
} = 87, // Placeholder
} = 87,
LoadFastCheck(Arg<NameIdx>) = 88,
LoadFastLoadFast {
arg: Arg<u32>,
Expand Down Expand Up @@ -861,6 +861,29 @@ impl InstructionMetadata for Instruction {
Self::LoadDeref(idx) => w!(LOAD_DEREF, cell_name = idx),
Self::LoadFast(idx) => w!(LOAD_FAST, varname = idx),
Self::LoadFastAndClear(idx) => w!(LOAD_FAST_AND_CLEAR, varname = idx),
Self::LoadFastBorrow(idx) => w!(LOAD_FAST_BORROW, varname = idx),
Self::LoadFastCheck(idx) => w!(LOAD_FAST_CHECK, varname = idx),
Self::LoadFastLoadFast { arg: packed } => {
let oparg = packed.get(arg);
let idx1 = oparg >> 4;
let idx2 = oparg & 15;
let name1 = varname(idx1);
let name2 = varname(idx2);
write!(f, "{:pad$}({}, {})", "LOAD_FAST_LOAD_FAST", name1, name2)
}
Self::LoadFastBorrowLoadFastBorrow { arg: packed } => {
let oparg = packed.get(arg);
let idx1 = oparg >> 4;
let idx2 = oparg & 15;
let name1 = varname(idx1);
let name2 = varname(idx2);
write!(
f,
"{:pad$}({}, {})",
"LOAD_FAST_BORROW_LOAD_FAST_BORROW", name1, name2
)
}
Self::LoadFromDictOrGlobals(idx) => w!(LOAD_FROM_DICT_OR_GLOBALS, name = idx),
Self::LoadGlobal(idx) => w!(LOAD_GLOBAL, name = idx),
Self::LoadName(idx) => w!(LOAD_NAME, name = idx),
Self::LoadSpecial { method } => w!(LOAD_SPECIAL, method),
Expand Down Expand Up @@ -893,6 +916,7 @@ impl InstructionMetadata for Instruction {
Self::Reraise { depth } => w!(RERAISE, depth),
Self::Resume { arg } => w!(RESUME, arg),
Self::ReturnValue => w!(RETURN_VALUE),
Self::ReturnGenerator => w!(RETURN_GENERATOR),
Self::Send { target } => w!(SEND, target),
Self::SetAdd { i } => w!(SET_ADD, i),
Self::SetFunctionAttribute { attr } => w!(SET_FUNCTION_ATTRIBUTE, ?attr),
Expand Down
2 changes: 1 addition & 1 deletion crates/jit/src/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> {
self.stack.push(JitValue::Int(val));
Ok(())
}
Instruction::LoadFast(idx) => {
Instruction::LoadFast(idx) | Instruction::LoadFastBorrow(idx) => {
let local = self.variables[idx.get(arg) as usize]
.as_ref()
.ok_or(JitCompileError::BadBytecode)?;
Expand Down
51 changes: 51 additions & 0 deletions crates/vm/src/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,57 @@ impl ExecutingFrame<'_> {
self.push_value(x2);
Ok(None)
}
// TODO: Implement true borrow optimization (skip Arc::clone).
// CPython's LOAD_FAST_BORROW uses PyStackRef_Borrow to avoid refcount
// increment for values that are consumed within the same basic block.
// Possible approaches:
// - Store raw pointers with careful lifetime management
// - Add a "borrowed" variant to stack slots
// - Use arena allocation for short-lived stack values
// Currently this just clones like LoadFast.
Instruction::LoadFastBorrow(idx) => {
let idx = idx.get(arg) as usize;
let x = self.fastlocals.lock()[idx].clone().ok_or_else(|| {
vm.new_exception_msg(
vm.ctx.exceptions.unbound_local_error.to_owned(),
format!(
"local variable '{}' referenced before assignment",
self.code.varnames[idx]
),
)
})?;
self.push_value(x);
Ok(None)
}
// TODO: Same as LoadFastBorrow - implement true borrow optimization.
Instruction::LoadFastBorrowLoadFastBorrow { arg: packed } => {
let oparg = packed.get(arg);
let idx1 = (oparg >> 4) as usize;
let idx2 = (oparg & 15) as usize;
let fastlocals = self.fastlocals.lock();
let x1 = fastlocals[idx1].clone().ok_or_else(|| {
vm.new_exception_msg(
vm.ctx.exceptions.unbound_local_error.to_owned(),
format!(
"local variable '{}' referenced before assignment",
self.code.varnames[idx1]
),
)
})?;
let x2 = fastlocals[idx2].clone().ok_or_else(|| {
vm.new_exception_msg(
vm.ctx.exceptions.unbound_local_error.to_owned(),
format!(
"local variable '{}' referenced before assignment",
self.code.varnames[idx2]
),
)
})?;
drop(fastlocals);
self.push_value(x1);
self.push_value(x2);
Ok(None)
}
Instruction::LoadGlobal(idx) => {
let name = &self.code.names[idx.get(arg) as usize];
let x = self.load_global_or_builtin(name, vm)?;
Expand Down