Skip to content

Commit 34a5afb

Browse files
committed
Bytecode parity - direct loop backedges
1 parent a49ce5b commit 34a5afb

File tree

3 files changed

+139
-92
lines changed

3 files changed

+139
-92
lines changed

crates/codegen/src/compile.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11148,6 +11148,46 @@ def f(base, cls, state):
1114811148
assert_eq!(return_count, 2);
1114911149
}
1115011150

11151+
#[test]
11152+
fn test_loop_store_subscr_threads_direct_backedge() {
11153+
let code = compile_exec(
11154+
"\
11155+
def f(kwonlyargs, kwonlydefaults, arg2value):
11156+
missing = 0
11157+
for kwarg in kwonlyargs:
11158+
if kwarg not in arg2value:
11159+
if kwonlydefaults and kwarg in kwonlydefaults:
11160+
arg2value[kwarg] = kwonlydefaults[kwarg]
11161+
else:
11162+
missing += 1
11163+
return missing
11164+
",
11165+
);
11166+
let f = find_code(&code, "f").expect("missing function code");
11167+
let ops: Vec<_> = f
11168+
.instructions
11169+
.iter()
11170+
.map(|unit| unit.op)
11171+
.filter(|op| !matches!(op, Instruction::Cache))
11172+
.collect();
11173+
11174+
let store_subscr = ops
11175+
.iter()
11176+
.position(|op| matches!(op, Instruction::StoreSubscr))
11177+
.expect("missing STORE_SUBSCR");
11178+
let next_op = ops
11179+
.get(store_subscr + 1)
11180+
.expect("missing jump after STORE_SUBSCR");
11181+
let window_start = store_subscr.saturating_sub(3);
11182+
let window_end = (store_subscr + 5).min(ops.len());
11183+
let window = &ops[window_start..window_end];
11184+
11185+
assert!(
11186+
matches!(next_op, Instruction::JumpBackward { .. }),
11187+
"expected direct loop backedge after STORE_SUBSCR, got {next_op:?}; ops={window:?}"
11188+
);
11189+
}
11190+
1115111191
#[test]
1115211192
fn test_assert_without_message_raises_class_directly() {
1115311193
let code = compile_exec(

crates/codegen/src/ir.rs

Lines changed: 87 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,13 @@ pub struct ExceptHandlerInfo {
125125
pub preserve_lasti: bool,
126126
}
127127

128+
fn set_to_nop(info: &mut InstructionInfo) {
129+
info.instr = Instruction::Nop.into();
130+
info.arg = OpArg::new(0);
131+
info.target = BlockIdx::NULL;
132+
info.cache_entries = 0;
133+
}
134+
128135
// spell-checker:ignore petgraph
129136
// TODO: look into using petgraph for handling blocks and stuff? it's heavier than this, but it
130137
// might enable more analysis/optimizations
@@ -241,6 +248,12 @@ impl CodeInfo {
241248
self.dce(); // truncate after terminal in blocks that got return duplicated
242249
self.eliminate_unreachable_blocks(); // remove now-unreachable last block
243250
remove_redundant_nops_and_jumps(&mut self.blocks);
251+
// Some jump-only blocks only appear after late CFG cleanup. Thread them
252+
// once more so loop backedges stay direct instead of becoming
253+
// JUMP_FORWARD -> JUMP_BACKWARD chains.
254+
jump_threading_unconditional(&mut self.blocks);
255+
self.eliminate_unreachable_blocks();
256+
remove_redundant_nops_and_jumps(&mut self.blocks);
244257
self.add_checks_for_loads_of_uninitialized_variables();
245258
// optimize_load_fast: after normalize_jumps
246259
self.optimize_load_fast_borrow();
@@ -701,7 +714,7 @@ impl CodeInfo {
701714
block.instructions[i].arg = OpArg::new(const_idx as u32);
702715
// Replace UNARY_NEGATIVE with NOP, inheriting the LOAD_CONST
703716
// location so that remove_nops can clean it up
704-
block.instructions[i + 1].instr = Instruction::Nop.into();
717+
set_to_nop(&mut block.instructions[i + 1]);
705718
block.instructions[i + 1].location = load_location;
706719
block.instructions[i + 1].end_location = block.instructions[i].end_location;
707720
// Skip the NOP, don't re-check
@@ -761,10 +774,10 @@ impl CodeInfo {
761774
// NOP out the second and third instructions
762775
let loc = block.instructions[i].location;
763776
let end_loc = block.instructions[i].end_location;
764-
block.instructions[i + 1].instr = Instruction::Nop.into();
777+
set_to_nop(&mut block.instructions[i + 1]);
765778
block.instructions[i + 1].location = loc;
766779
block.instructions[i + 1].end_location = end_loc;
767-
block.instructions[i + 2].instr = Instruction::Nop.into();
780+
set_to_nop(&mut block.instructions[i + 2]);
768781
block.instructions[i + 2].location = loc;
769782
block.instructions[i + 2].end_location = end_loc;
770783
// Don't advance - check if the result can be folded again
@@ -1026,7 +1039,7 @@ impl CodeInfo {
10261039
// BUILD_TUPLE location so remove_nops() can eliminate them.
10271040
let folded_loc = block.instructions[i].location;
10281041
for j in start_idx..i {
1029-
block.instructions[j].instr = Instruction::Nop.into();
1042+
set_to_nop(&mut block.instructions[j]);
10301043
block.instructions[j].location = folded_loc;
10311044
}
10321045

@@ -1125,7 +1138,7 @@ impl CodeInfo {
11251138

11261139
// NOP the rest
11271140
for j in (start_idx + 2)..i {
1128-
block.instructions[j].instr = Instruction::Nop.into();
1141+
set_to_nop(&mut block.instructions[j]);
11291142
block.instructions[j].location = folded_loc;
11301143
}
11311144

@@ -1180,9 +1193,9 @@ impl CodeInfo {
11801193
if is_build && is_const && is_extend && is_iter {
11811194
// Replace: BUILD_X 0 → NOP, keep LOAD_CONST, LIST_EXTEND → NOP
11821195
let loc = block.instructions[i].location;
1183-
block.instructions[i].instr = Instruction::Nop.into();
1196+
set_to_nop(&mut block.instructions[i]);
11841197
block.instructions[i].location = loc;
1185-
block.instructions[i + 2].instr = Instruction::Nop.into();
1198+
set_to_nop(&mut block.instructions[i + 2]);
11861199
block.instructions[i + 2].location = loc;
11871200
i += 4;
11881201
} else if matches!(
@@ -1216,7 +1229,7 @@ impl CodeInfo {
12161229
let folded_loc = block.instructions[i].location;
12171230

12181231
for j in start_idx..i {
1219-
block.instructions[j].instr = Instruction::Nop.into();
1232+
set_to_nop(&mut block.instructions[j]);
12201233
block.instructions[j].location = folded_loc;
12211234
}
12221235

@@ -1323,7 +1336,7 @@ impl CodeInfo {
13231336
block.instructions[start_idx + 1].except_handler = eh;
13241337

13251338
for j in (start_idx + 2)..i {
1326-
block.instructions[j].instr = Instruction::Nop.into();
1339+
set_to_nop(&mut block.instructions[j]);
13271340
block.instructions[j].location = folded_loc;
13281341
}
13291342

@@ -1844,8 +1857,7 @@ impl CodeInfo {
18441857
.into();
18451858
block.instructions[i].arg = OpArg::new(packed);
18461859
// Replace second instruction with NOP (CPython: INSTR_SET_OP0(inst2, NOP))
1847-
block.instructions[i + 1].instr = Instruction::Nop.into();
1848-
block.instructions[i + 1].arg = OpArg::new(0);
1860+
set_to_nop(&mut block.instructions[i + 1]);
18491861
i += 2; // skip the NOP
18501862
} else {
18511863
i += 1;
@@ -2701,6 +2713,14 @@ fn split_blocks_at_jumps(blocks: &mut Vec<Block>) {
27012713
/// instruction is an unconditional jump, redirect to the final target.
27022714
/// flowgraph.c optimize_basic_block + jump_thread
27032715
fn jump_threading(blocks: &mut [Block]) {
2716+
jump_threading_impl(blocks, true);
2717+
}
2718+
2719+
fn jump_threading_unconditional(blocks: &mut [Block]) {
2720+
jump_threading_impl(blocks, false);
2721+
}
2722+
2723+
fn jump_threading_impl(blocks: &mut [Block], include_conditional: bool) {
27042724
let mut changed = true;
27052725
while changed {
27062726
changed = false;
@@ -2714,7 +2734,9 @@ fn jump_threading(blocks: &mut [Block]) {
27142734
if target == BlockIdx::NULL {
27152735
continue;
27162736
}
2717-
if !ins.instr.is_unconditional_jump() && !is_conditional_jump(&ins.instr) {
2737+
if !ins.instr.is_unconditional_jump()
2738+
&& !(include_conditional && is_conditional_jump(&ins.instr))
2739+
{
27182740
continue;
27192741
}
27202742
// Check if target block's first instruction is an unconditional jump
@@ -2794,8 +2816,7 @@ fn normalize_jumps(blocks: &mut Vec<Block>) {
27942816
&& ins.target == next
27952817
});
27962818
if is_jump_to_next && let Some(last_instr) = blocks[idx].instructions.last_mut() {
2797-
last_instr.instr = Instruction::Nop.into();
2798-
last_instr.target = BlockIdx::NULL;
2819+
set_to_nop(last_instr);
27992820
}
28002821
}
28012822

@@ -2888,51 +2909,6 @@ fn normalize_jumps(blocks: &mut Vec<Block>) {
28882909
current = blocks[current.idx()].next;
28892910
}
28902911

2891-
// Replace JUMP → value-producing-instr + RETURN_VALUE with inline return.
2892-
// This matches CPython's optimize_basic_block: "Replace JUMP to a RETURN".
2893-
for &block_idx in &visit_order {
2894-
let idx = block_idx.idx();
2895-
let mut replacements: Vec<(usize, Vec<InstructionInfo>)> = Vec::new();
2896-
for (i, ins) in blocks[idx].instructions.iter().enumerate() {
2897-
if !ins.instr.is_unconditional_jump() || ins.target == BlockIdx::NULL {
2898-
continue;
2899-
}
2900-
// Follow through empty blocks (next_nonempty_block)
2901-
let mut target_idx = ins.target.idx();
2902-
while blocks[target_idx].instructions.is_empty()
2903-
&& blocks[target_idx].next != BlockIdx::NULL
2904-
{
2905-
target_idx = blocks[target_idx].next.idx();
2906-
}
2907-
let target_block = &blocks[target_idx];
2908-
// Target must be exactly `value; RETURN_VALUE`.
2909-
if target_block.instructions.len() == 2 {
2910-
let t0 = &target_block.instructions[0];
2911-
let t1 = &target_block.instructions[1];
2912-
if matches!(t0.instr, AnyInstruction::Real(_))
2913-
&& !t0.instr.is_scope_exit()
2914-
&& !t0.instr.is_unconditional_jump()
2915-
&& matches!(t1.instr.real(), Some(Instruction::ReturnValue))
2916-
{
2917-
let mut load = *t0;
2918-
let mut ret = *t1;
2919-
// Use the jump's location for the inlined return
2920-
load.location = ins.location;
2921-
load.end_location = ins.end_location;
2922-
load.except_handler = ins.except_handler;
2923-
ret.location = ins.location;
2924-
ret.end_location = ins.end_location;
2925-
ret.except_handler = ins.except_handler;
2926-
replacements.push((i, vec![load, ret]));
2927-
}
2928-
}
2929-
}
2930-
// Apply replacements in reverse order
2931-
for (i, new_insts) in replacements.into_iter().rev() {
2932-
blocks[idx].instructions.splice(i..i + 1, new_insts);
2933-
}
2934-
}
2935-
29362912
// Resolve JUMP/JUMP_NO_INTERRUPT pseudo instructions before offset fixpoint.
29372913
let mut block_order = vec![0u32; blocks.len()];
29382914
for (pos, &block_idx) in visit_order.iter().enumerate() {
@@ -3019,9 +2995,7 @@ fn inline_small_or_no_lineno_blocks(blocks: &mut [Block]) {
30192995

30202996
if small_exit_block || no_lineno_no_fallthrough {
30212997
if let Some(last_instr) = blocks[current.idx()].instructions.last_mut() {
3022-
last_instr.instr = Instruction::Nop.into();
3023-
last_instr.arg = OpArg::new(0);
3024-
last_instr.target = BlockIdx::NULL;
2998+
set_to_nop(last_instr);
30252999
}
30263000
let appended = blocks[target.idx()].instructions.clone();
30273001
blocks[current.idx()].instructions.extend(appended);
@@ -3118,9 +3092,7 @@ fn remove_redundant_jumps_in_blocks(blocks: &mut [Block]) -> usize {
31183092
&& next_nonempty_block(blocks, target) == next
31193093
&& let Some(last_instr) = blocks[idx].instructions.last_mut()
31203094
{
3121-
last_instr.instr = Instruction::Nop.into();
3122-
last_instr.arg = OpArg::new(0);
3123-
last_instr.target = BlockIdx::NULL;
3095+
set_to_nop(last_instr);
31243096
changes += 1;
31253097
}
31263098
current = blocks[idx].next;
@@ -3199,6 +3171,13 @@ fn is_exit_without_lineno(block: &Block) -> bool {
31993171
!instruction_has_lineno(first) && last.instr.is_scope_exit()
32003172
}
32013173

3174+
fn is_jump_only_block(block: &Block) -> bool {
3175+
let [instr] = block.instructions.as_slice() else {
3176+
return false;
3177+
};
3178+
instr.instr.is_unconditional_jump() && instr.target != BlockIdx::NULL
3179+
}
3180+
32023181
fn maybe_propagate_location(
32033182
instr: &mut InstructionInfo,
32043183
location: SourceLocation,
@@ -3321,6 +3300,45 @@ fn duplicate_exits_without_lineno(blocks: &mut Vec<Block>, predecessors: &mut Ve
33213300
}
33223301
}
33233302

3303+
fn duplicate_jump_targets_without_lineno(blocks: &mut Vec<Block>, predecessors: &mut Vec<u32>) {
3304+
let mut current = BlockIdx(0);
3305+
while current != BlockIdx::NULL {
3306+
let block = &blocks[current.idx()];
3307+
let last = match block.instructions.last() {
3308+
Some(ins) if ins.instr.is_unconditional_jump() && ins.target != BlockIdx::NULL => *ins,
3309+
_ => {
3310+
current = blocks[current.idx()].next;
3311+
continue;
3312+
}
3313+
};
3314+
3315+
let target = next_nonempty_block(blocks, last.target);
3316+
if target == BlockIdx::NULL || !is_jump_only_block(&blocks[target.idx()]) {
3317+
current = blocks[current.idx()].next;
3318+
continue;
3319+
}
3320+
if predecessors[target.idx()] <= 1 {
3321+
current = blocks[current.idx()].next;
3322+
continue;
3323+
}
3324+
3325+
let new_idx = BlockIdx(blocks.len() as u32);
3326+
let mut new_block = blocks[target.idx()].clone();
3327+
propagate_locations_in_block(&mut new_block, last.location, last.end_location);
3328+
let old_next = blocks[current.idx()].next;
3329+
new_block.next = old_next;
3330+
blocks.push(new_block);
3331+
blocks[current.idx()].next = new_idx;
3332+
3333+
let last_mut = blocks[current.idx()].instructions.last_mut().unwrap();
3334+
last_mut.target = new_idx;
3335+
predecessors[target.idx()] -= 1;
3336+
predecessors.push(1);
3337+
3338+
current = old_next;
3339+
}
3340+
}
3341+
33243342
fn propagate_line_numbers(blocks: &mut [Block], predecessors: &[u32]) {
33253343
let mut current = BlockIdx(0);
33263344
while current != BlockIdx::NULL {
@@ -3371,6 +3389,7 @@ fn propagate_line_numbers(blocks: &mut [Block], predecessors: &[u32]) {
33713389
fn resolve_line_numbers(blocks: &mut Vec<Block>) {
33723390
let mut predecessors = compute_predecessors(blocks);
33733391
duplicate_exits_without_lineno(blocks, &mut predecessors);
3392+
duplicate_jump_targets_without_lineno(blocks, &mut predecessors);
33743393
propagate_line_numbers(blocks, &predecessors);
33753394
}
33763395

@@ -3516,7 +3535,7 @@ pub(crate) fn label_exception_targets(blocks: &mut [Block]) {
35163535
);
35173536
stack.pop();
35183537
// POP_BLOCK → NOP
3519-
blocks[bi].instructions[i].instr = Instruction::Nop.into();
3538+
set_to_nop(&mut blocks[bi].instructions[i]);
35203539
} else {
35213540
// Set except_handler for this instruction from except stack top
35223541
// stack_depth placeholder: filled by fixup_handler_depths
@@ -3595,12 +3614,12 @@ pub(crate) fn convert_pseudo_ops(blocks: &mut [Block], cellfixedoffsets: &[u32])
35953614
PseudoInstruction::SetupCleanup { .. }
35963615
| PseudoInstruction::SetupFinally { .. }
35973616
| PseudoInstruction::SetupWith { .. } => {
3598-
info.instr = Instruction::Nop.into();
3617+
set_to_nop(info);
35993618
}
36003619
// PopBlock in reachable blocks is converted to NOP by
36013620
// label_exception_targets. Dead blocks may still have them.
36023621
PseudoInstruction::PopBlock => {
3603-
info.instr = Instruction::Nop.into();
3622+
set_to_nop(info);
36043623
}
36053624
// LOAD_CLOSURE → LOAD_FAST (using cellfixedoffsets for merged layout)
36063625
PseudoInstruction::LoadClosure { i } => {

0 commit comments

Comments
 (0)