From 8b87ce4f60cf0e8dbf7c6e7a6bab9429a858d92f Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Fri, 22 May 2026 19:54:03 +0900 Subject: [PATCH] Align codegen metadata with CPython --- .cspell.dict/cpython.txt | 5 + Lib/test/test_compile.py | 2 - Lib/test/test_dis.py | 6 - Lib/test/test_exceptions.py | 1 - Lib/test/test_inspect/test_inspect.py | 1 - Lib/test/test_py_compile.py | 2 - Lib/test/test_strtod.py | 4 - Lib/test/test_sys_settrace.py | 2 - Lib/test/test_unittest/test_async_case.py | 1 - crates/codegen/src/compile.rs | 5423 ++++++++++++++--- crates/codegen/src/ir.rs | 870 ++- crates/codegen/src/symboltable.rs | 63 +- crates/compiler-core/src/bytecode.rs | 5 +- crates/literal/src/float.rs | 152 +- ...code__tests__nested_double_async_with.snap | 16 +- crates/vm/src/builtins/function.rs | 88 +- crates/vm/src/frame.rs | 13 +- crates/vm/src/stdlib/builtins.rs | 41 +- scripts/dis_dump.py | 24 +- 19 files changed, 5522 insertions(+), 1197 deletions(-) diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index 688982cd7d9..ffbed52121b 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -5,6 +5,7 @@ argtypes asdl asname atopen +atext attro augassign badcert @@ -104,6 +105,7 @@ inlinedepth inplace inpos isbytecode +ishidden ismine ISPOINTER isoctal @@ -113,6 +115,7 @@ keeped kwnames kwonlyarg kwonlyargs +kwonlydefaults lasti libffi linearise @@ -164,6 +167,7 @@ patma peepholer phcount platstdlib +ploc posonlyarg posonlyargs prec @@ -209,6 +213,7 @@ staticbase stginfo storefast stringlib +stringized structseq subkwargs subparams diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index fd1743e6701..052d2bfc041 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -1249,7 +1249,6 @@ def get_code_lines(self, code): last_line = line return res - @unittest.expectedFailure # TODO: RUSTPYTHON def test_lineno_attribute(self): def load_attr(): return ( @@ -1294,7 +1293,6 @@ def aug_store_attr(): code_lines = self.get_code_lines(func.__code__) self.assertEqual(lines, code_lines) - @unittest.expectedFailure # TODO: RUSTPYTHON; + [0] def test_line_number_genexp(self): def return_genexp(): diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index fcd6a6b8be7..cedad5a0fba 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1215,7 +1215,6 @@ def test_disassemble_fstring(self): def test_disassemble_with(self): self.do_disassembly_test(_with, dis_with) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_disassemble_asyncwith(self): self.do_disassembly_test(_asyncwith, dis_asyncwith) @@ -1991,26 +1990,22 @@ def test_first_line_set_to_None(self): actual = dis.get_instructions(simple, first_line=None) self.assertInstructionsEqual(list(actual), expected_opinfo_simple) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_outer(self): actual = dis.get_instructions(outer, first_line=expected_outer_line) self.assertInstructionsEqual(list(actual), expected_opinfo_outer) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_nested(self): with captured_stdout(): f = outer() actual = dis.get_instructions(f, first_line=expected_f_line) self.assertInstructionsEqual(list(actual), expected_opinfo_f) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_doubly_nested(self): with captured_stdout(): inner = outer()() actual = dis.get_instructions(inner, first_line=expected_inner_line) self.assertInstructionsEqual(list(actual), expected_opinfo_inner) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_jumpy(self): actual = dis.get_instructions(jumpy, first_line=expected_jumpy_line) self.assertInstructionsEqual(list(actual), expected_opinfo_jumpy) @@ -2314,7 +2309,6 @@ def test_iteration(self): via_generator = list(dis.get_instructions(obj)) self.assertInstructionsEqual(via_object, via_generator) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_explicit_first_line(self): actual = dis.Bytecode(outer, first_line=expected_outer_line) self.assertInstructionsEqual(list(actual), expected_opinfo_outer) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 10010ffa9b8..7e79732a3b9 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -2245,7 +2245,6 @@ def test_assertion_error_location(self): result = run_script(source) self.assertEqual(result[-3:], expected) - @unittest.expectedFailure # TODO: RUSTPYTHON @force_not_colorized def test_multiline_not_highlighted(self): cases = [ diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index 512adba2813..f7a7c0cc825 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -237,7 +237,6 @@ class FakePackage: self.assertFalse(inspect.ispackage(FakePackage())) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: False is not true def test_iscoroutine(self): async_gen_coro = async_generator_function_example(1) gen_coro = gen_coroutine_function_example(1) diff --git a/Lib/test/test_py_compile.py b/Lib/test/test_py_compile.py index f00f24204b4..c4788f47a06 100644 --- a/Lib/test/test_py_compile.py +++ b/Lib/test/test_py_compile.py @@ -132,7 +132,6 @@ def test_exceptions_propagate(self): finally: os.chmod(self.directory, mode.st_mode) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_bad_coding(self): bad_coding = os.path.join(os.path.dirname(__file__), 'tokenizedata', @@ -198,7 +197,6 @@ def test_invalidation_mode(self): fp.read(), 'test', {}) self.assertEqual(flags, 0b1) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_quiet(self): bad_coding = os.path.join(os.path.dirname(__file__), 'tokenizedata', diff --git a/Lib/test/test_strtod.py b/Lib/test/test_strtod.py index 03c8afa51ef..f263b7ab4f1 100644 --- a/Lib/test/test_strtod.py +++ b/Lib/test/test_strtod.py @@ -173,7 +173,6 @@ def test_halfway_cases(self): s = '{}e{}'.format(digits, exponent) self.check_strtod(s) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_boundaries(self): # boundaries expressed as triples (n, e, u), where # n*10**e is an approximation to the boundary value and @@ -194,7 +193,6 @@ def test_boundaries(self): u *= 10 e -= 1 - @unittest.expectedFailure # TODO: RUSTPYTHON def test_underflow_boundary(self): # test values close to 2**-1075, the underflow boundary; similar # to boundary_tests, except that the random error doesn't scale @@ -206,7 +204,6 @@ def test_underflow_boundary(self): s = '{}e{}'.format(digits, exponent) self.check_strtod(s) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_bigcomp(self): for ndigs in 5, 10, 14, 15, 16, 17, 18, 19, 20, 40, 41, 50: dig10 = 10**ndigs @@ -284,7 +281,6 @@ def negative_exp(n): self.assertEqual(float(negative_exp(20000)), 1.0) self.assertEqual(float(negative_exp(30000)), 1.0) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_particular(self): # inputs that produced crashes or incorrectly rounded results with # previous versions of dtoa.c, for various reasons diff --git a/Lib/test/test_sys_settrace.py b/Lib/test/test_sys_settrace.py index aa2d54ee16e..7eef1290dc2 100644 --- a/Lib/test/test_sys_settrace.py +++ b/Lib/test/test_sys_settrace.py @@ -1488,8 +1488,6 @@ def test_jump_in_nested_finally_3(output): output.append(11) output.append(12) - # TODO: RUSTPYTHON - @unittest.expectedFailure @jump_test(5, 11, [2, 4], (ValueError, 'after')) def test_no_jump_over_return_try_finally_in_finally_block(output): try: diff --git a/Lib/test/test_unittest/test_async_case.py b/Lib/test/test_unittest/test_async_case.py index 9b1678caf59..91d45283eb3 100644 --- a/Lib/test/test_unittest/test_async_case.py +++ b/Lib/test/test_unittest/test_async_case.py @@ -296,7 +296,6 @@ async def on_cleanup2(self): test.doCleanups() self.assertEqual(events, ['asyncSetUp', 'test', 'asyncTearDown', 'cleanup2', 'cleanup1']) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_deprecation_of_return_val_from_test(self): # Issue 41322 - deprecate return of value that is not None from a test class Nothing: diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index 6dc9fdd4bd7..41dbdac112b 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -246,6 +246,8 @@ enum ComprehensionLoopControl { loop_block: BlockIdx, if_cleanup_block: BlockIdx, after_block: BlockIdx, + iter_range: TextRange, + backedge_range: TextRange, is_async: bool, end_async_for_target: BlockIdx, }, @@ -637,6 +639,41 @@ impl Compiler { } } + fn mark_conditional_ifexp_orelse_entry_block(&mut self, block: BlockIdx) { + if block != BlockIdx::NULL { + self.current_code_info().blocks[block.idx()].conditional_ifexp_orelse_entry = true; + } + } + + fn instruction_count_snapshot(&mut self) -> Vec { + self.current_code_info() + .blocks + .iter() + .map(|block| block.instructions.len()) + .collect() + } + + fn mark_new_conditional_jump_locations_since( + &mut self, + snapshot: &[usize], + target: BlockIdx, + range: TextRange, + ) { + let source = self.source_file.to_source_code(); + let location = source.source_location(range.start(), PositionEncoding::Utf8); + let end_location = source.source_location(range.end(), PositionEncoding::Utf8); + for (idx, block) in self.current_code_info().blocks.iter_mut().enumerate() { + let start = snapshot.get(idx).copied().unwrap_or(0); + for instr in block.instructions.iter_mut().skip(start) { + if instr.target == target && ir::is_conditional_jump(&instr.instr) { + instr.location = location; + instr.end_location = end_location; + instr.preserve_tobool_jump_location = true; + } + } + } + } + fn new(opts: CompileOpts, source_file: SourceFile, code_name: &str) -> Self { let module_code = ir::CodeInfo { // CPython convention: top-level module / interactive / @@ -656,7 +693,7 @@ impl Compiler { metadata: ir::CodeUnitMetadata { name: code_name.to_string(), qualname: Some(code_name.to_string()), - consts: IndexSet::default(), + consts: Default::default(), names: IndexSet::default(), varnames: IndexSet::default(), cellvars: IndexSet::default(), @@ -896,6 +933,7 @@ impl Compiler { ast::Expr::ListComp(ast::ExprListComp { generators, .. }) | ast::Expr::SetComp(ast::ExprSetComp { generators, .. }) | ast::Expr::DictComp(ast::ExprDictComp { generators, .. }) + | ast::Expr::Generator(ast::ExprGenerator { generators, .. }) if generators.iter().any(|generator| generator.is_async) => { self.found = true; @@ -1629,6 +1667,7 @@ impl Compiler { fn compile_module_annotation_setup_sequence( &mut self, body: &[ast::Stmt], + loc: TextRange, ) -> CompileResult<()> { let (saved_blocks, saved_current_block) = { let code = self.current_code_info(); @@ -1638,7 +1677,7 @@ impl Compiler { ) }; - let result = self.compile_module_annotate(body); + let result = self.compile_module_annotate(body, Some(loc)); let annotations_blocks = { let code = self.current_code_info(); @@ -1658,6 +1697,7 @@ impl Compiler { if let Some(lower) = &s.lower { self.compile_expression(lower)?; } else { + self.set_source_range(s.range); self.emit_load_const(ConstantData::None); } @@ -1665,6 +1705,7 @@ impl Compiler { if let Some(upper) = &s.upper { self.compile_expression(upper)?; } else { + self.set_source_range(s.range); self.emit_load_const(ConstantData::None); } @@ -2171,8 +2212,14 @@ impl Compiler { /// Load arguments for super() optimization onto the stack /// Stack result: [global_super, class, self] - fn load_args_for_super(&mut self, super_type: &SuperCallType<'_>) -> CompileResult<()> { + fn load_args_for_super( + &mut self, + super_type: &SuperCallType<'_>, + super_name_range: TextRange, + super_call_range: TextRange, + ) -> CompileResult<()> { // 1. Load global super + self.set_source_range(super_name_range); self.compile_name("super", NameUsage::Load)?; match super_type { @@ -2187,6 +2234,7 @@ impl Compiler { SuperCallType::ZeroArg => { // 0-arg: load __class__ cell and first parameter // Load __class__ from cell/free variable + self.set_source_range(super_call_range); let scope = self.get_ref_type("__class__").map_err(|e| self.error(e))?; let idx = match scope { SymbolScope::Cell => self.get_cell_var_index("__class__"), @@ -2211,6 +2259,7 @@ impl Compiler { "super(): no arguments and no first parameter".to_owned(), )) })?; + self.set_source_range(super_call_range); self.compile_name(&first_param, NameUsage::Load)?; } } @@ -2349,7 +2398,7 @@ impl Compiler { } // Initialize u_metadata fields - let (flags, posonlyarg_count, arg_count, kwonlyarg_count) = match scope_type { + let (mut flags, posonlyarg_count, arg_count, kwonlyarg_count) = match scope_type { CompilerScope::Module => (bytecode::CodeFlags::empty(), 0, 0, 0), CompilerScope::Class => (bytecode::CodeFlags::empty(), 0, 0, 0), CompilerScope::Function | CompilerScope::AsyncFunction | CompilerScope::Lambda => ( @@ -2378,13 +2427,30 @@ impl Compiler { ), }; - // Set CO_NESTED for scopes defined inside another function/class/etc. - // (i.e., not at module level) - let flags = if self.code_stack.len() > 1 { + if ste.is_method { + flags |= bytecode::CodeFlags::METHOD; + } + + // CPython sets CO_NESTED from symtable's ste_nested, not merely + // from lexical depth: module-level class methods are CO_METHOD but + // not CO_NESTED. + let mut flags = if ste.is_nested + && matches!( + scope_type, + CompilerScope::Function + | CompilerScope::AsyncFunction + | CompilerScope::Lambda + | CompilerScope::Comprehension + | CompilerScope::Annotation + | CompilerScope::TypeParams + ) { flags | bytecode::CodeFlags::NESTED } else { flags }; + if self.future_annotations { + flags |= bytecode::CodeFlags::FUTURE_ANNOTATIONS; + } // Get private name from parent scope let private = if !self.code_stack.is_empty() { @@ -2404,7 +2470,7 @@ impl Compiler { metadata: ir::CodeUnitMetadata { name: name.to_owned(), qualname: None, // Will be set below - consts: IndexSet::default(), + consts: Default::default(), names: IndexSet::default(), varnames: varname_cache, cellvars: cellvar_cache, @@ -2470,7 +2536,9 @@ impl Compiler { scope_type == CompilerScope::AsyncFunction || self.current_symbol_table().is_generator; if is_gen { emit!(self, Instruction::ReturnGenerator); + self.mark_last_line_only_location(lineno); emit!(self, Instruction::PopTop); + self.mark_last_line_only_location(lineno); } // CPython: LOCATION(lineno, lineno, 0, 0) @@ -2510,6 +2578,8 @@ impl Compiler { match_success_jump: false, break_continue_cleanup_jump: false, for_loop_break_cleanup_jump: false, + preserve_tobool_jump_location: false, + preserve_store_fast_store_fast_jump_location: false, }); } @@ -2529,6 +2599,7 @@ impl Compiler { let i_varnum: oparg::VarNum = u32::try_from(oldindex).expect("too many cellvars").into(); emit!(self, Instruction::MakeCell { i: i_varnum }); + self.set_no_location(); } } @@ -2540,6 +2611,7 @@ impl Compiler { n: u32::try_from(nfrees).expect("too many freevars"), } ); + self.set_no_location(); } } @@ -2568,8 +2640,12 @@ impl Compiler { // enter_scope sets default values based on scope_type, but push_output // allows callers to specify exact values if let Some(info) = self.code_stack.last_mut() { - // Preserve NESTED flag set by enter_scope - info.flags = flags | (info.flags & bytecode::CodeFlags::NESTED); + // Preserve flags computed from the symbol-table context. + info.flags = flags + | (info.flags + & (bytecode::CodeFlags::NESTED + | bytecode::CodeFlags::METHOD + | bytecode::CodeFlags::FUTURE_ANNOTATIONS)); info.metadata.argcount = arg_count; info.metadata.posonlyargcount = posonlyarg_count; info.metadata.kwonlyargcount = kwonlyarg_count; @@ -2643,6 +2719,7 @@ impl Compiler { fn enter_annotation_scope( &mut self, _func_name: &str, + loc: TextRange, ) -> CompileResult> { if !self.push_annotation_symbol_table() { return Ok(None); @@ -2657,6 +2734,7 @@ impl Compiler { in_async_scope: false, }; + self.set_source_range(loc); let key = self.symbol_table_stack.len() - 1; let lineno = self.get_source_line_number().get(); self.enter_scope( @@ -2769,9 +2847,26 @@ impl Compiler { code.fblock.pop().expect("fblock stack underflow") } + fn set_unwind_source_range(&mut self, loc: Option) { + if let Some(range) = loc { + self.set_source_range(range); + } + } + + fn mark_unwind_no_location(&mut self, loc: Option) { + if loc.is_none() { + self.set_no_location(); + } + } + /// Unwind a single fblock, emitting cleanup code /// preserve_tos: if true, preserve the top of stack (e.g., return value) - fn unwind_fblock(&mut self, info: &FBlockInfo, preserve_tos: bool) -> CompileResult<()> { + fn unwind_fblock( + &mut self, + info: &FBlockInfo, + preserve_tos: bool, + loc: &mut Option, + ) -> CompileResult<()> { match info.fb_type { FBlockType::WhileLoop | FBlockType::ExceptionHandler @@ -2785,13 +2880,19 @@ impl Compiler { // When returning from a for-loop, CPython swaps the preserved // value with the iterator and uses POP_TOP for loop cleanup. if preserve_tos { + self.set_unwind_source_range(*loc); emit!(self, Instruction::Swap { i: 2 }); + self.mark_unwind_no_location(*loc); } + self.set_unwind_source_range(*loc); emit!(self, Instruction::PopTop); + self.mark_unwind_no_location(*loc); } FBlockType::TryExcept => { + self.set_unwind_source_range(*loc); emit!(self, PseudoInstruction::PopBlock); + self.mark_unwind_no_location(*loc); } FBlockType::FinallyTry => { @@ -2804,71 +2905,113 @@ impl Compiler { FBlockType::FinallyEnd => { // codegen_unwind_fblock(FINALLY_END) if preserve_tos { + self.set_unwind_source_range(*loc); emit!(self, Instruction::Swap { i: 2 }); + self.mark_unwind_no_location(*loc); } + self.set_unwind_source_range(*loc); emit!(self, Instruction::PopTop); // exc_value + self.mark_unwind_no_location(*loc); if preserve_tos { + self.set_unwind_source_range(*loc); emit!(self, Instruction::Swap { i: 2 }); + self.mark_unwind_no_location(*loc); } + self.set_unwind_source_range(*loc); emit!(self, PseudoInstruction::PopBlock); + self.mark_unwind_no_location(*loc); + self.set_unwind_source_range(*loc); emit!(self, Instruction::PopExcept); + self.mark_unwind_no_location(*loc); } FBlockType::With | FBlockType::AsyncWith => { // Stack: [..., exit_func, self_exit, return_value (if preserve_tos)] - self.set_source_range(info.fb_range); + // CPython codegen_unwind_fblock() assigns *ploc = info->fb_loc + // for WITH/ASYNC_WITH cleanup and then makes following unwind + // instructions artificial with *ploc = NO_LOCATION. + *loc = Some(info.fb_range); + self.set_unwind_source_range(*loc); emit!(self, PseudoInstruction::PopBlock); if preserve_tos { // Rotate return value below the exit pair // [exit_func, self_exit, value] → [value, exit_func, self_exit] + self.set_unwind_source_range(*loc); emit!(self, Instruction::Swap { i: 3 }); // [value, self_exit, exit_func] + self.set_unwind_source_range(*loc); emit!(self, Instruction::Swap { i: 2 }); // [value, exit_func, self_exit] } // Call exit_func(self_exit, None, None, None) + self.set_unwind_source_range(*loc); self.emit_load_const(ConstantData::None); + self.set_unwind_source_range(*loc); self.emit_load_const(ConstantData::None); + self.set_unwind_source_range(*loc); self.emit_load_const(ConstantData::None); + self.set_unwind_source_range(*loc); emit!(self, Instruction::Call { argc: 3 }); // For async with, await the result if matches!(info.fb_type, FBlockType::AsyncWith) { + self.set_unwind_source_range(*loc); emit!(self, Instruction::GetAwaitable { r#where: 2 }); + self.set_unwind_source_range(*loc); self.emit_load_const(ConstantData::None); let _ = self.compile_yield_from_sequence(true)?; } // Pop the __exit__ result + self.set_unwind_source_range(*loc); emit!(self, Instruction::PopTop); + *loc = None; } FBlockType::HandlerCleanup => { // codegen_unwind_fblock(HANDLER_CLEANUP) if let FBlockDatum::ExceptionName(_) = info.fb_datum { // Named handler: PopBlock for inner SETUP_CLEANUP + self.set_unwind_source_range(*loc); emit!(self, PseudoInstruction::PopBlock); + self.mark_unwind_no_location(*loc); } if preserve_tos { + self.set_unwind_source_range(*loc); emit!(self, Instruction::Swap { i: 2 }); + self.mark_unwind_no_location(*loc); } // PopBlock for outer SETUP_CLEANUP (ExceptionHandler) + self.set_unwind_source_range(*loc); emit!(self, PseudoInstruction::PopBlock); + self.mark_unwind_no_location(*loc); + self.set_unwind_source_range(*loc); emit!(self, Instruction::PopExcept); + self.mark_unwind_no_location(*loc); // If there's an exception name, clean it up if let FBlockDatum::ExceptionName(ref name) = info.fb_datum { + self.set_unwind_source_range(*loc); self.emit_load_const(ConstantData::None); + self.mark_unwind_no_location(*loc); + self.set_unwind_source_range(*loc); self.store_name(name)?; + self.mark_unwind_no_location(*loc); + self.set_unwind_source_range(*loc); self.compile_name(name, NameUsage::Delete)?; + self.mark_unwind_no_location(*loc); } } FBlockType::PopValue => { if preserve_tos { + self.set_unwind_source_range(*loc); emit!(self, Instruction::Swap { i: 2 }); + self.mark_unwind_no_location(*loc); } + self.set_unwind_source_range(*loc); emit!(self, Instruction::PopTop); + self.mark_unwind_no_location(*loc); } } Ok(()) @@ -2881,7 +3024,7 @@ impl Compiler { &mut self, preserve_tos: bool, stop_at_loop: bool, - ) -> CompileResult { + ) -> CompileResult> { // Collect the info we need, with indices for FinallyTry blocks #[derive(Clone)] enum UnwindInfo { @@ -2925,15 +3068,17 @@ impl Compiler { } // Process each fblock - let mut unwound_finally = false; + let mut unwind_loc = Some(self.current_source_range); for info in unwind_infos { match info { UnwindInfo::Normal(fblock_info) => { - self.unwind_fblock(&fblock_info, preserve_tos)?; + self.unwind_fblock(&fblock_info, preserve_tos, &mut unwind_loc)?; } UnwindInfo::FinallyTry { body, fblock_idx } => { // codegen_unwind_fblock(FINALLY_TRY) + self.set_unwind_source_range(unwind_loc); emit!(self, PseudoInstruction::PopBlock); + self.mark_unwind_no_location(unwind_loc); // Temporarily remove the FinallyTry fblock so nested return/break/continue // in the finally body won't see it again @@ -2950,7 +3095,7 @@ impl Compiler { } self.compile_statements(&body)?; - unwound_finally = true; + unwind_loc = None; if preserve_tos { self.pop_fblock(FBlockType::PopValue); @@ -2963,7 +3108,7 @@ impl Compiler { } } - Ok(unwound_finally) + Ok(unwind_loc) } // could take impl Into>, but everything is borrowed from ast structs; we never @@ -3120,6 +3265,11 @@ impl Compiler { let size_before = self.code_stack.len(); // Set future_annotations from symbol table (detected during symbol table scan) self.future_annotations = symbol_table.future_annotations; + if self.future_annotations { + self.current_code_info() + .flags + .insert(bytecode::CodeFlags::FUTURE_ANNOTATIONS); + } // Module-level __conditional_annotations__ cell let has_module_cond_ann = Self::scope_needs_conditional_annotations_cell(&symbol_table); @@ -3141,10 +3291,12 @@ impl Compiler { self.emit_resume_for_scope(CompilerScope::Module, 1); emit!(self, PseudoInstruction::AnnotationsPlaceholder); - let (doc, statements) = split_doc(&body.body, &self.opts); + let (doc, statements) = split_doc_with_range(&body.body, &self.opts); + let module_start_loc = self.module_start_location(&body.body); // Handle annotation bookkeeping before the docstring assignment, as // codegen_body() does after _PyCodegen_Module() inserts the prefix set. if Self::find_ann(statements) { + self.set_source_range(module_start_loc); if Self::scope_needs_conditional_annotations_cell(self.current_symbol_table()) { emit!(self, Instruction::BuildSet { count: 0 }); self.store_name("__conditional_annotations__")?; @@ -3155,19 +3307,22 @@ impl Compiler { } } - if let Some(value) = doc { + if let Some((value, range)) = doc { + let saved_range = self.current_source_range; + self.set_source_range(range); self.emit_load_const(ConstantData::Str { value: value.into(), }); let doc = self.name("__doc__"); - emit!(self, Instruction::StoreName { namei: doc }) + emit!(self, Instruction::StoreName { namei: doc }); + self.set_source_range(saved_range); } // Compile all statements self.compile_statements(statements)?; if Self::find_ann(statements) && !self.future_annotations { - self.compile_module_annotation_setup_sequence(statements)?; + self.compile_module_annotation_setup_sequence(statements, module_start_loc)?; } assert_eq!(self.code_stack.len(), size_before); @@ -3187,13 +3342,20 @@ impl Compiler { self.interactive = true; // Set future_annotations from symbol table (detected during symbol table scan) self.future_annotations = symbol_table.future_annotations; + if self.future_annotations { + self.current_code_info() + .flags + .insert(bytecode::CodeFlags::FUTURE_ANNOTATIONS); + } self.symbol_table_stack.push(symbol_table); + let module_start_loc = self.module_start_location(body); self.emit_resume_for_scope(CompilerScope::Module, 1); emit!(self, PseudoInstruction::AnnotationsPlaceholder); // Handle annotations based on future_annotations flag if Self::find_ann(body) { + self.set_source_range(module_start_loc); if self.future_annotations { // PEP 563: Initialize __annotations__ dict emit!(self, Instruction::SetupAnnotations); @@ -3246,7 +3408,7 @@ impl Compiler { }; if Self::find_ann(body) && !self.future_annotations { - self.compile_module_annotation_setup_sequence(body)?; + self.compile_module_annotation_setup_sequence(body, module_start_loc)?; } self.emit_return_value(); @@ -3901,8 +4063,9 @@ impl Compiler { body, orelse, is_async, + range, .. - }) => self.compile_for(target, iter, body, orelse, *is_async)?, + }) => self.compile_for(target, iter, body, orelse, *is_async, *range)?, ast::Stmt::Match(ast::StmtMatch { subject, cases, .. }) => { self.compile_match(subject, cases)? } @@ -3981,11 +4144,14 @@ impl Compiler { type_params.as_deref(), arguments.as_deref(), )?, - ast::Stmt::Assert(ast::StmtAssert { test, msg, .. }) => { + ast::Stmt::Assert(ast::StmtAssert { + test, msg, range, .. + }) => { // if some flag, ignore all assert statements! if self.opts.optimize == 0 { let after_block = self.new_block(); self.compile_jump_if(test, true, after_block)?; + self.set_source_range(*range); emit!( self, Instruction::LoadCommonConstant { @@ -3994,8 +4160,10 @@ impl Compiler { ); if let Some(e) = msg { self.compile_expression(e)?; + self.set_source_range(*range); emit!(self, Instruction::Call { argc: 0 }); } + self.set_source_range(test.range()); emit!( self, Instruction::RaiseVarargs { @@ -4044,10 +4212,7 @@ impl Compiler { match value { Some(v) => { if self.ctx.func == FunctionContext::AsyncFunction - && self - .current_code_info() - .flags - .contains(bytecode::CodeFlags::GENERATOR) + && self.current_symbol_table().is_generator { return Err(self.error_ranged( CodegenErrorType::AsyncReturnValue, @@ -4060,9 +4225,11 @@ impl Compiler { None }; let preserve_tos = folded_constant.is_none(); + let mut return_range = stmt_range; if preserve_tos { self.compile_expression(v)?; } else { + return_range = v.range(); self.set_source_range(v.range()); emit!(self, Instruction::Nop); } @@ -4071,16 +4238,17 @@ impl Compiler { if source.line_index(v.range().start()) != source.line_index(stmt_range.start()) { + return_range = stmt_range; self.set_source_range(stmt_range); emit!(self, Instruction::Nop); } - self.set_source_range(stmt_range); - let unwound_finally = self.unwind_fblock_stack(preserve_tos, false)?; - if !unwound_finally { - self.set_source_range(stmt_range); + self.set_source_range(return_range); + let unwind_loc = self.unwind_fblock_stack(preserve_tos, false)?; + if let Some(loc) = unwind_loc { + self.set_source_range(loc); } match folded_constant { - Some(constant) if unwound_finally => { + Some(constant) if unwind_loc.is_none() => { self.emit_return_const_no_location(constant); } Some(constant) => { @@ -4089,7 +4257,7 @@ impl Compiler { } None => { self.emit_return_value(); - if unwound_finally { + if unwind_loc.is_none() { self.set_no_location(); } } @@ -4099,12 +4267,12 @@ impl Compiler { self.set_source_range(stmt_range); emit!(self, Instruction::Nop); // Unwind fblock stack with preserve_tos=false (no value to preserve) - let unwound_finally = self.unwind_fblock_stack(false, false)?; - if unwound_finally { - self.emit_return_const_no_location(ConstantData::None); - } else { - self.set_source_range(stmt_range); + let unwind_loc = self.unwind_fblock_stack(false, false)?; + if let Some(loc) = unwind_loc { + self.set_source_range(loc); self.emit_return_const(ConstantData::None); + } else { + self.emit_return_const_no_location(ConstantData::None); } } } @@ -4112,7 +4280,12 @@ impl Compiler { let dead = self.new_block(); self.switch_to_block(dead); } - ast::Stmt::Assign(ast::StmtAssign { targets, value, .. }) => { + ast::Stmt::Assign(ast::StmtAssign { + targets, + value, + range, + .. + }) => { let folded_ifexp_assignment = matches!( value.as_ref(), ast::Expr::If(ast::ExprIf { test, .. }) @@ -4144,6 +4317,7 @@ impl Compiler { for (i, target) in targets.iter().enumerate() { if i + 1 != targets.len() { + self.set_source_range(*range); emit!(self, Instruction::Copy { i: 1 }); } self.compile_store(target)?; @@ -4157,9 +4331,16 @@ impl Compiler { annotation, value, simple, + range, .. }) => { - self.compile_annotated_assign(target, annotation, value.as_deref(), *simple)?; + self.compile_annotated_assign( + target, + annotation, + value.as_deref(), + *simple, + *range, + )?; // Bare annotations in function scope emit no code; restore // source range so subsequent instructions keep the correct line. if value.is_none() && self.ctx.in_func() { @@ -4178,6 +4359,7 @@ impl Compiler { name, type_params, value, + range, .. }) => { let Some(name) = name.as_name_expr() else { @@ -4207,7 +4389,7 @@ impl Compiler { value: name_string.clone().into(), }); self.compile_type_params(type_params)?; - self.compile_typealias_value_closure(&name_string, value)?; + self.compile_typealias_value_closure(&name_string, value, *range)?; emit!(self, Instruction::BuildTuple { count: 3 }); emit!( self, @@ -4227,7 +4409,7 @@ impl Compiler { value: name_string.clone().into(), }); self.emit_load_const(ConstantData::None); - self.compile_typealias_value_closure(&name_string, value)?; + self.compile_typealias_value_closure(&name_string, value, *range)?; emit!(self, Instruction::BuildTuple { count: 3 }); emit!( self, @@ -4245,32 +4427,43 @@ impl Compiler { } fn compile_delete(&mut self, expression: &ast::Expr) -> CompileResult<()> { - match &expression { - ast::Expr::Name(ast::ExprName { id, .. }) => { - self.compile_name(id.as_str(), NameUsage::Delete)? - } - ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) => { - self.compile_expression(value)?; - let namei = self.name(attr.as_str()); - emit!(self, Instruction::DeleteAttr { namei }); - } - ast::Expr::Subscript(ast::ExprSubscript { - value, slice, ctx, .. - }) => { - self.compile_subscript(value, slice, *ctx)?; - } - ast::Expr::Tuple(ast::ExprTuple { elts, .. }) - | ast::Expr::List(ast::ExprList { elts, .. }) => { - for element in elts { - self.compile_delete(element)?; + let prev_source_range = self.current_source_range; + self.set_source_range(expression.range()); + let result = (|| -> CompileResult<()> { + match &expression { + ast::Expr::Name(ast::ExprName { id, .. }) => { + self.compile_name(id.as_str(), NameUsage::Delete)? } + ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) => { + self.compile_expression(value)?; + let namei = self.name(attr.as_str()); + self.set_source_range(self.update_start_location_to_match_attr( + expression.range(), + expression.range(), + attr.as_str(), + )); + emit!(self, Instruction::DeleteAttr { namei }); + } + ast::Expr::Subscript(ast::ExprSubscript { + value, slice, ctx, .. + }) => { + self.compile_subscript(value, slice, *ctx)?; + } + ast::Expr::Tuple(ast::ExprTuple { elts, .. }) + | ast::Expr::List(ast::ExprList { elts, .. }) => { + for element in elts { + self.compile_delete(element)?; + } + } + ast::Expr::BinOp(_) | ast::Expr::UnaryOp(_) => { + return Err(self.error(CodegenErrorType::Delete("expression"))); + } + _ => return Err(self.error(CodegenErrorType::Delete(expression.python_name()))), } - ast::Expr::BinOp(_) | ast::Expr::UnaryOp(_) => { - return Err(self.error(CodegenErrorType::Delete("expression"))); - } - _ => return Err(self.error(CodegenErrorType::Delete(expression.python_name()))), - } - Ok(()) + Ok(()) + })(); + self.set_source_range(prev_source_range); + result } fn enter_function(&mut self, name: &str, parameters: &ast::Parameters) -> CompileResult<()> { @@ -4327,7 +4520,8 @@ impl Compiler { /// Apply decorators: each decorator calls the function below it. /// Stack: [dec1, dec2, func] → CALL 0 → [dec1, dec2(func)] → CALL 0 → [dec1(dec2(func))] fn apply_decorators(&mut self, decorator_list: &[ast::Decorator]) { - for _ in decorator_list { + for decorator in decorator_list.iter().rev() { + self.set_source_range(decorator.expression.range()); emit!(self, Instruction::Call { argc: 0 }); } } @@ -4339,6 +4533,8 @@ impl Compiler { name: &str, allow_starred: bool, ) -> CompileResult<()> { + let expr_range = expr.range(); + self.set_source_range(expr_range); self.emit_load_const(ConstantData::Tuple { elements: vec![ConstantData::Integer { value: 1.into() }], }); @@ -4373,6 +4569,7 @@ impl Compiler { if allow_starred && matches!(expr, ast::Expr::Starred(_)) { if let ast::Expr::Starred(starred) = expr { self.compile_expression(&starred.value)?; + self.set_source_range(expr_range); emit!(self, Instruction::UnpackSequence { count: 1 }); } } else { @@ -4380,12 +4577,14 @@ impl Compiler { } // Return value + self.set_source_range(expr_range); emit!(self, Instruction::ReturnValue); // Exit scope and create closure let code = self.exit_scope(); self.ctx = prev_ctx; + self.set_source_range(expr_range); self.make_closure( code, bytecode::MakeFunctionFlags::from([bytecode::MakeFunctionFlag::Defaults]), @@ -4398,7 +4597,9 @@ impl Compiler { &mut self, alias_name: &str, value: &ast::Expr, + alias_range: TextRange, ) -> CompileResult<()> { + self.set_source_range(alias_range); self.emit_load_const(ConstantData::Tuple { elements: vec![ConstantData::Integer { value: 1.into() }], }); @@ -4422,10 +4623,12 @@ impl Compiler { }; self.compile_expression(value)?; + self.set_source_range(alias_range); emit!(self, Instruction::ReturnValue); let code = self.exit_scope(); self.ctx = prev_ctx; + self.set_source_range(alias_range); self.make_closure( code, bytecode::MakeFunctionFlags::from([bytecode::MakeFunctionFlag::Defaults]), @@ -4444,8 +4647,10 @@ impl Compiler { name, bound, default, + range, .. }) => { + self.set_source_range(*range); self.emit_load_const(ConstantData::Str { value: name.as_str().into(), }); @@ -4453,6 +4658,7 @@ impl Compiler { if let Some(expr) = &bound { self.compile_type_param_bound_or_default(expr, name.as_str(), false)?; + self.set_source_range(*range); let intrinsic = if expr.is_tuple_expr() { bytecode::IntrinsicFunction2::TypeVarWithConstraint } else { @@ -4474,6 +4680,7 @@ impl Compiler { name.as_str(), false, )?; + self.set_source_range(*range); emit!( self, Instruction::CallIntrinsic2 { @@ -4482,10 +4689,17 @@ impl Compiler { ); } + self.set_source_range(*range); emit!(self, Instruction::Copy { i: 1 }); self.store_name(name.as_ref())?; } - ast::TypeParam::ParamSpec(ast::TypeParamParamSpec { name, default, .. }) => { + ast::TypeParam::ParamSpec(ast::TypeParamParamSpec { + name, + default, + range, + .. + }) => { + self.set_source_range(*range); self.emit_load_const(ConstantData::Str { value: name.as_str().into(), }); @@ -4502,6 +4716,7 @@ impl Compiler { name.as_str(), false, )?; + self.set_source_range(*range); emit!( self, Instruction::CallIntrinsic2 { @@ -4510,12 +4725,17 @@ impl Compiler { ); } + self.set_source_range(*range); emit!(self, Instruction::Copy { i: 1 }); self.store_name(name.as_ref())?; } ast::TypeParam::TypeVarTuple(ast::TypeParamTypeVarTuple { - name, default, .. + name, + default, + range, + .. }) => { + self.set_source_range(*range); self.emit_load_const(ConstantData::Str { value: name.as_str().into(), }); @@ -4533,6 +4753,7 @@ impl Compiler { name.as_str(), true, )?; + self.set_source_range(*range); emit!( self, Instruction::CallIntrinsic2 { @@ -4541,11 +4762,15 @@ impl Compiler { ); } + self.set_source_range(*range); emit!(self, Instruction::Copy { i: 1 }); self.store_name(name.as_ref())?; } }; } + if let Some(first) = type_params.type_params.first() { + self.set_source_range(first.range()); + } emit!( self, Instruction::BuildTuple { @@ -4736,8 +4961,10 @@ impl Compiler { // SETUP_CLEANUP before PUSH_EXC_INFO if let Some(cleanup) = finally_cleanup_block { emit!(self, PseudoInstruction::SetupCleanup { delta: cleanup }); + self.set_no_location(); } emit!(self, Instruction::PushExcInfo); + self.set_no_location(); if let Some(cleanup) = finally_cleanup_block { self.push_fblock(FBlockType::FinallyEnd, cleanup, cleanup)?; } @@ -4761,8 +4988,11 @@ impl Compiler { if let Some(cleanup) = finally_cleanup_block { self.switch_to_block(cleanup); emit!(self, Instruction::Copy { i: 3 }); + self.set_no_location(); emit!(self, Instruction::PopExcept); + self.set_no_location(); emit!(self, Instruction::Reraise { depth: 1 }); + self.set_no_location(); } if preserve_finally_exit_empty_label @@ -4867,6 +5097,7 @@ impl Compiler { if let Some(exc_type) = type_ { self.compile_expression(exc_type)?; + self.set_source_range(*handler_range); emit!(self, Instruction::CheckExcMatch); emit!( self, @@ -4920,12 +5151,17 @@ impl Compiler { ); self.switch_to_block(cleanup_end); + self.set_no_location(); if let Some(alias) = name { self.emit_load_const(ConstantData::None); + self.set_no_location(); self.store_name(alias.as_str())?; + self.set_no_location(); self.compile_name(alias.as_str(), NameUsage::Delete)?; + self.set_no_location(); } emit!(self, Instruction::Reraise { depth: 1 }); + self.set_no_location(); self.switch_to_block(handler_normal_exit); } @@ -5074,10 +5310,13 @@ impl Compiler { self.switch_to_block(cleanup); // COPY 3: copy the exception from position 3 emit!(self, Instruction::Copy { i: 3 }); + self.set_no_location(); // POP_EXCEPT: restore prev_exc as current exception emit!(self, Instruction::PopExcept); + self.set_no_location(); // RERAISE 1: reraise with lasti from stack emit!(self, Instruction::Reraise { depth: 1 }); + self.set_no_location(); } // End block - continuation point after try-finally @@ -5307,6 +5546,7 @@ impl Compiler { if let Some(exc_type) = type_ { self.compile_expression(exc_type)?; + self.set_source_range(*handler_range); emit!(self, Instruction::CheckExcMatch); emit!( self, @@ -5708,7 +5948,9 @@ impl Compiler { delta: finally_cleanup_block } ); + self.set_no_location(); emit!(self, Instruction::PushExcInfo); + self.set_no_location(); self.push_fblock( FBlockType::FinallyEnd, finally_cleanup_block, @@ -5721,8 +5963,11 @@ impl Compiler { self.switch_to_block(finally_cleanup_block); emit!(self, Instruction::Copy { i: 3 }); + self.set_no_location(); emit!(self, Instruction::PopExcept); + self.set_no_location(); emit!(self, Instruction::Reraise { depth: 1 }); + self.set_no_location(); self.switch_to_block(exit_block); if preserve_finally_exit_empty_label { @@ -6120,6 +6365,7 @@ impl Compiler { self, PseudoInstruction::JumpNoInterrupt { delta: exit_block } ); + self.set_no_location(); // Restore sub_tables for exception path compilation if let Some(cursor) = sub_table_cursor @@ -6131,9 +6377,11 @@ impl Compiler { // Exception handler path self.switch_to_block(finally_block); emit!(self, Instruction::PushExcInfo); + self.set_no_location(); if let Some(cleanup) = finally_cleanup_block { emit!(self, PseudoInstruction::SetupCleanup { delta: cleanup }); + self.set_no_location(); self.push_fblock(FBlockType::FinallyEnd, cleanup, cleanup)?; } @@ -6141,6 +6389,7 @@ impl Compiler { if finally_cleanup_block.is_some() { emit!(self, PseudoInstruction::PopBlock); + self.set_no_location(); self.pop_fblock(FBlockType::FinallyEnd); } @@ -6150,8 +6399,11 @@ impl Compiler { if let Some(cleanup) = finally_cleanup_block { self.switch_to_block(cleanup); emit!(self, Instruction::Copy { i: 3 }); + self.set_no_location(); emit!(self, Instruction::PopExcept); + self.set_no_location(); emit!(self, Instruction::Reraise { depth: 1 }); + self.set_no_location(); } } @@ -6173,6 +6425,7 @@ impl Compiler { fn compile_default_arguments( &mut self, parameters: &ast::Parameters, + loc: TextRange, ) -> CompileResult { let mut funcflags = bytecode::MakeFunctionFlags::new(); @@ -6188,6 +6441,7 @@ impl Compiler { for default in &defaults { self.compile_expression(default)?; } + self.set_source_range(loc); emit!( self, Instruction::BuildTuple { @@ -6208,11 +6462,13 @@ impl Compiler { if !kw_with_defaults.is_empty() { // Compile kwdefaults and build dict for (arg, default) in &kw_with_defaults { + self.set_source_range(loc); self.emit_load_const(ConstantData::Str { value: self.mangle(arg.name.as_str()).into_owned().into(), }); self.compile_expression(default)?; } + self.set_source_range(loc); emit!( self, Instruction::BuildMap { @@ -6234,10 +6490,8 @@ impl Compiler { body: &[ast::Stmt], is_async: bool, funcflags: bytecode::MakeFunctionFlags, + closure_range: TextRange, ) -> CompileResult<()> { - // Save source range so MAKE_FUNCTION gets the `def` line, not the body's last line - let saved_range = self.current_source_range; - // Always enter function scope self.enter_function(name, parameters)?; self.current_code_info() @@ -6272,6 +6526,11 @@ impl Compiler { } ); self.set_no_location(); + // CPython's codegen_wrap_in_stopiteration_handler() inserts + // SETUP_CLEANUP at instruction-sequence index 0, so after the + // generator prefix is inserted the protected range begins at the + // function-start RESUME. + self.move_last_instruction_before_scope_start_resume(); self.push_fblock(FBlockType::StopIteration, handler_block, handler_block)?; Some(handler_block) } else { @@ -6279,14 +6538,15 @@ impl Compiler { }; // Handle docstring - store in co_consts[0] if present - let (doc_str, body) = split_doc(body, &self.opts); + let (doc_info, body) = split_doc_with_range(body, &self.opts); + let doc_str = doc_info.as_ref().map(|(doc, _)| doc); if let Some(doc) = &doc_str { // Docstring present: store in co_consts[0] and set HAS_DOCSTRING flag self.current_code_info() .metadata .consts .insert_full(ConstantData::Str { - value: doc.to_string().into(), + value: (*doc).to_string().into(), }); self.current_code_info().flags |= bytecode::CodeFlags::HAS_DOCSTRING; } @@ -6329,7 +6589,7 @@ impl Compiler { let code = self.exit_scope(); self.ctx = prev_ctx; - self.set_source_range(saved_range); + self.set_source_range(closure_range); // Create function object with closure self.make_closure(code, funcflags)?; @@ -6348,6 +6608,7 @@ impl Compiler { func_name: &str, parameters: &ast::Parameters, returns: Option<&ast::Expr>, + func_range: TextRange, ) -> CompileResult { let has_signature_annotations = parameters .args @@ -6364,7 +6625,7 @@ impl Compiler { } // Try to enter annotation scope - returns None if no annotation_block exists - let Some(saved_ctx) = self.enter_annotation_scope(func_name)? else { + let Some(saved_ctx) = self.enter_annotation_scope(func_name, func_range)? else { return Ok(false); }; @@ -6395,6 +6656,7 @@ impl Compiler { for param in parameters_iter { if let Some(annotation) = ¶m.annotation { + self.set_source_range(func_range); self.emit_load_const(ConstantData::Str { value: self.mangle(param.name.as_str()).into_owned().into(), }); @@ -6404,6 +6666,7 @@ impl Compiler { // Handle return annotation if let Some(annotation) = returns { + self.set_source_range(func_range); self.emit_load_const(ConstantData::Str { value: "return".into(), }); @@ -6411,6 +6674,7 @@ impl Compiler { } // Build the map and return it + self.set_source_range(func_range); emit!( self, Instruction::BuildMap { @@ -6423,6 +6687,7 @@ impl Compiler { let annotate_code = self.exit_annotation_scope(saved_ctx); // Make a closure from the code object + self.set_source_range(func_range); self.make_closure(annotate_code, bytecode::MakeFunctionFlags::new())?; Ok(true) @@ -6484,9 +6749,55 @@ impl Compiler { annotations } + fn compile_annotation_for_symbol_cursor_only( + &mut self, + annotation: &ast::Expr, + ) -> CompileResult<()> { + let code_stack_len = self.code_stack.len(); + let code_info = self.current_code_info(); + let saved_blocks = code_info.blocks.clone(); + let saved_current_block = code_info.current_block; + let saved_annotations_blocks = code_info.annotations_blocks.clone(); + let saved_metadata = code_info.metadata.clone(); + let saved_static_attributes = code_info.static_attributes.clone(); + let saved_in_inlined_comp = code_info.in_inlined_comp; + let saved_fblock = code_info.fblock.clone(); + let saved_in_conditional_block = code_info.in_conditional_block; + let saved_in_final_with_cleanup_statement = code_info.in_final_with_cleanup_statement; + let saved_in_try_else_orelse = code_info.in_try_else_orelse; + let saved_next_conditional_annotation_index = code_info.next_conditional_annotation_index; + let saved_source_range = self.current_source_range; + + self.do_not_emit_bytecode += 1; + let result = self.compile_annotation(annotation); + self.do_not_emit_bytecode -= 1; + + debug_assert_eq!(self.code_stack.len(), code_stack_len); + let code_info = self.current_code_info(); + code_info.blocks = saved_blocks; + code_info.current_block = saved_current_block; + code_info.annotations_blocks = saved_annotations_blocks; + code_info.metadata = saved_metadata; + code_info.static_attributes = saved_static_attributes; + code_info.in_inlined_comp = saved_in_inlined_comp; + code_info.fblock = saved_fblock; + code_info.in_conditional_block = saved_in_conditional_block; + code_info.in_final_with_cleanup_statement = saved_in_final_with_cleanup_statement; + code_info.in_try_else_orelse = saved_in_try_else_orelse; + code_info.next_conditional_annotation_index = saved_next_conditional_annotation_index; + self.current_source_range = saved_source_range; + + result + } + /// Compile module-level __annotate__ function (PEP 649) /// Returns true if __annotate__ was created and stored - fn compile_module_annotate(&mut self, body: &[ast::Stmt]) -> CompileResult { + fn compile_module_annotate( + &mut self, + body: &[ast::Stmt], + loc: Option, + ) -> CompileResult { + let loc = loc.unwrap_or(self.current_source_range); let annotations = Self::collect_annotations(body); let simple_annotation_count = annotations .iter() @@ -6517,6 +6828,7 @@ impl Compiler { }; // Enter annotation scope for code generation + self.set_source_range(loc); let key = self.symbol_table_stack.len() - 1; let lineno = self.get_source_line_number().get(); self.enter_scope( @@ -6535,6 +6847,7 @@ impl Compiler { // Emit format validation: if format > VALUE_WITH_FAKE_GLOBALS: raise NotImplementedError self.emit_format_validation(); + self.set_source_range(loc); emit!(self, Instruction::BuildMap { count: 0 }); let mut simple_idx = 0usize; @@ -6543,6 +6856,7 @@ impl Compiler { target, annotation, simple, + range, .. } = stmt; let simple_name = if *simple { @@ -6556,10 +6870,7 @@ impl Compiler { if simple_name.is_none() { if !self.future_annotations { - self.do_not_emit_bytecode += 1; - let result = self.compile_annotation(annotation); - self.do_not_emit_bytecode -= 1; - result?; + self.compile_annotation_for_symbol_cursor_only(annotation)?; } continue; } @@ -6568,6 +6879,7 @@ impl Compiler { let name = simple_name.expect("missing simple annotation name"); if has_conditional { + self.set_source_range(*range); self.emit_load_const(ConstantData::Integer { value: simple_idx.into(), }); @@ -6593,10 +6905,12 @@ impl Compiler { } self.compile_annotation(annotation)?; + self.set_source_range(*range); emit!(self, Instruction::Copy { i: 2 }); self.emit_load_const(ConstantData::Str { value: self.mangle(name).into_owned().into(), }); + self.set_source_range(loc); emit!(self, Instruction::StoreSubscr); simple_idx += 1; @@ -6605,6 +6919,7 @@ impl Compiler { } } + self.set_source_range(loc); emit!(self, Instruction::ReturnValue); // Exit annotation scope - pop symbol table, restore to parent's annotation_block, and get code @@ -6624,6 +6939,7 @@ impl Compiler { ); // Make a closure from the code object + self.set_source_range(loc); self.make_closure(annotate_code, bytecode::MakeFunctionFlags::new())?; // Store as __annotate_func__ for classes, __annotate__ for modules @@ -6632,6 +6948,7 @@ impl Compiler { } else { "__annotate__" }; + self.set_source_range(loc); self.store_name(name)?; Ok(true) @@ -6649,14 +6966,19 @@ impl Compiler { is_async: bool, type_params: Option<&ast::TypeParams>, ) -> CompileResult<()> { - // Save the source range of the `def` line before compiling decorators/defaults, - // so that the function code object gets the correct co_firstlineno. - let def_source_range = self.current_source_range; + // CPython's FunctionDef/AsyncFunctionDef LOC(s) starts at the + // definition line even when decorators are present. + let stmt_source_range = self.current_source_range; + let def_source_range = self.decorated_definition_range( + stmt_source_range, + decorator_list, + if is_async { "async def " } else { "def " }, + ); self.prepare_decorators(decorator_list)?; // compile defaults and return funcflags - let funcflags = self.compile_default_arguments(parameters)?; + let funcflags = self.compile_default_arguments(parameters, def_source_range)?; // Restore the `def` line range so that enter_function → push_output → get_source_line_number() // records the `def` keyword's line as co_firstlineno, not the last default-argument line. @@ -6726,13 +7048,21 @@ impl Compiler { // Compile annotations as closure (PEP 649) let mut annotations_flag = bytecode::MakeFunctionFlags::new(); - if self.compile_annotations_closure(name, parameters, returns)? { + if self.compile_annotations_closure(name, parameters, returns, def_source_range)? { annotations_flag.insert(bytecode::MakeFunctionFlag::Annotate); } // Compile function body + self.set_source_range(stmt_source_range); let final_funcflags = funcflags | annotations_flag; - self.compile_function_body(name, parameters, body, is_async, final_funcflags)?; + self.compile_function_body( + name, + parameters, + body, + is_async, + final_funcflags, + def_source_range, + )?; // Handle type params if present if is_generic { @@ -6786,6 +7116,7 @@ impl Compiler { self.apply_decorators(decorator_list); // Store the function + self.set_source_range(def_source_range); self.store_name(name)?; Ok(()) @@ -7058,7 +7389,9 @@ impl Compiler { self.code_stack.last_mut().unwrap().private = Some(name.to_owned()); // 2. Set up class namespace - let (doc_str, body) = split_doc(body, &self.opts); + let (doc_str, body) = split_doc_with_range(body, &self.opts); + let class_body_prefix_range = self.source_line_start_range(firstlineno); + self.set_source_range(class_body_prefix_range); // Load __name__ and store as __module__ self.load_name("__name__")?; @@ -7104,16 +7437,19 @@ impl Compiler { } // Store __doc__ only if there's an explicit docstring. - if let Some(doc) = doc_str { + if let Some((doc, range)) = doc_str { + let saved_range = self.current_source_range; + self.set_source_range(range); self.emit_load_const(ConstantData::Str { value: doc.into() }); self.store_name("__doc__")?; + self.set_source_range(saved_range); } // 3. Compile the class body self.compile_statements(body)?; if Self::find_ann(body) && !self.future_annotations { - self.compile_module_annotate(body)?; + self.compile_module_annotate(body, Some(class_body_prefix_range))?; } // 4. Handle __classcell__ if needed @@ -7190,6 +7526,11 @@ impl Compiler { type_params: Option<&ast::TypeParams>, arguments: Option<&ast::Arguments>, ) -> CompileResult<()> { + // CPython's ClassDef LOC(s) starts at the class line even when + // decorators are present. + let stmt_source_range = self.current_source_range; + let class_source_range = + self.decorated_definition_range(stmt_source_range, decorator_list, "class "); self.prepare_decorators(decorator_list)?; let is_generic = type_params.is_some(); @@ -7233,6 +7574,7 @@ impl Compiler { // Compile type parameters and store them in the synthetic cell that // generic class bodies close over. self.compile_type_params(type_params.unwrap())?; + self.set_source_range(class_source_range); self.store_name(".type_params")?; } @@ -7246,6 +7588,7 @@ impl Compiler { }; let class_code = self.compile_class_body(name, body, type_params, firstlineno)?; self.ctx = prev_ctx; + self.set_source_range(class_source_range); // Step 3: Generate the rest of the code for the call if is_generic { @@ -7260,6 +7603,7 @@ impl Compiler { // Create .generic_base after the class function and name are on the // stack so the remaining call shape matches CPython's ordering. + self.set_source_range(class_source_range); self.load_name(".type_params")?; emit!( self, @@ -7267,6 +7611,7 @@ impl Compiler { func: bytecode::IntrinsicFunction1::SubscriptGeneric } ); + self.set_source_range(class_source_range); self.store_name(".generic_base")?; // Compile bases and call __build_class__ @@ -7303,10 +7648,13 @@ impl Compiler { } // Add .generic_base as final element + self.set_source_range(class_source_range); self.load_name(".generic_base")?; + self.set_source_range(class_source_range); emit!(self, Instruction::ListAppend { i: 1 }); // Convert list to tuple + self.set_source_range(class_source_range); emit!( self, Instruction::CallIntrinsic1 { @@ -7316,6 +7664,7 @@ impl Compiler { self.compile_call_function_ex_keywords( arguments.map_or(&[][..], |args| &args.keywords[..]), + class_source_range, )?; emit!(self, Instruction::CallFunctionEx); } else if has_double_star { @@ -7324,7 +7673,9 @@ impl Compiler { self.compile_expression(arg)?; } } + self.set_source_range(class_source_range); self.load_name(".generic_base")?; + self.set_source_range(class_source_range); emit!( self, Instruction::BuildTuple { @@ -7332,7 +7683,10 @@ impl Compiler { .map_or(0, |args| u32::try_from(args.args.len()).unwrap()) } ); - self.compile_call_function_ex_keywords(&arguments.unwrap().keywords[..])?; + self.compile_call_function_ex_keywords( + &arguments.unwrap().keywords[..], + class_source_range, + )?; emit!(self, Instruction::CallFunctionEx); } else { // Simple case: no starred bases, no **kwargs @@ -7347,6 +7701,7 @@ impl Compiler { }; // Load .generic_base as the last base + self.set_source_range(class_source_range); self.load_name(".generic_base")?; let nargs = 2 + u32::try_from(base_count).expect("too many base classes") + 1; @@ -7365,9 +7720,11 @@ impl Compiler { }); self.compile_expression(&keyword.value)?; } + self.set_source_range(class_source_range); self.emit_load_const(ConstantData::Tuple { elements: kwarg_names, }); + self.set_source_range(class_source_range); emit!( self, Instruction::CallKw { @@ -7377,11 +7734,13 @@ impl Compiler { } ); } else { + self.set_source_range(class_source_range); emit!(self, Instruction::Call { argc: nargs }); } } // Return the created class + self.set_source_range(class_source_range); self.emit_return_value(); // Exit type params scope and wrap in function @@ -7389,8 +7748,11 @@ impl Compiler { self.ctx = saved_ctx; // Execute the type params function + self.set_source_range(class_source_range); self.make_closure(type_params_code, bytecode::MakeFunctionFlags::new())?; + self.set_source_range(class_source_range); emit!(self, Instruction::PushNull); + self.set_source_range(class_source_range); emit!(self, Instruction::Call { argc: 0 }); } else { // Non-generic class: standard path @@ -7402,14 +7764,16 @@ impl Compiler { self.emit_load_const(ConstantData::Str { value: name.into() }); if let Some(arguments) = arguments { - self.codegen_call_helper(2, arguments, self.current_source_range)?; + self.codegen_call_helper(2, arguments, class_source_range, None)?; } else { + self.set_source_range(class_source_range); emit!(self, Instruction::Call { argc: 2 }); } } // Step 4: Apply decorators and store (common to both paths) self.apply_decorators(decorator_list); + self.set_source_range(class_source_range); self.store_name(name) } @@ -7879,9 +8243,14 @@ impl Compiler { // to be in the exception table for these instructions. // If we cleared fblock, exceptions here would propagate uncaught. self.switch_to_block(cleanup_block); + // CPython codegen_with_except_finish() emits POP_EXCEPT_AND_RERAISE + // with NO_LOCATION at this cleanup label. emit!(self, Instruction::Copy { i: 3 }); + self.set_no_location(); emit!(self, Instruction::PopExcept); + self.set_no_location(); emit!(self, Instruction::Reraise { depth: 1 }); + self.set_no_location(); // ===== After block ===== self.switch_to_block(after_block); @@ -7903,6 +8272,7 @@ impl Compiler { body: &[ast::Stmt], orelse: &[ast::Stmt], is_async: bool, + for_range: TextRange, ) -> CompileResult<()> { self.enter_conditional_block(); @@ -7935,9 +8305,11 @@ impl Compiler { if self.ctx.func != FunctionContext::AsyncFunction { return Err(self.error(CodegenErrorType::InvalidAsyncFor)); } + self.set_source_range(iter.range()); emit!(self, Instruction::GetAiter); self.switch_to_block(for_block); + self.set_source_range(for_range); // codegen_async_for: push fblock BEFORE SETUP_FINALLY self.push_fblock(FBlockType::ForLoop, for_block, after_block)?; @@ -8066,25 +8438,35 @@ impl Compiler { && elts.len() <= usize::try_from(STACK_USE_GUIDELINE).unwrap() && !elts.iter().any(|e| matches!(e, ast::Expr::Starred(_))) { - if let Some(folded) = self.try_fold_constant_collection(elts, CollectionType::List)? { - self.emit_load_const(folded); - } else { - for elt in elts { - self.compile_expression(elt)?; - } - emit!( - self, - Instruction::BuildTuple { - count: u32::try_from(elts.len()).expect("too many elements"), - } - ); + for elt in elts { + self.compile_expression(elt)?; } + self.set_source_range(iter.range()); + emit!( + self, + Instruction::BuildList { + count: u32::try_from(elts.len()).expect("too many elements"), + } + ); return Ok(()); } self.compile_expression(iter) } + fn compile_comprehension_iter(&mut self, generator: &ast::Comprehension) -> CompileResult<()> { + let saved_range = self.current_source_range; + self.compile_for_iterable_expression(&generator.iter, generator.is_async)?; + self.set_source_range(generator.iter.range()); + if generator.is_async { + emit!(self, Instruction::GetAiter); + } else { + emit!(self, Instruction::GetIter); + } + self.set_source_range(saved_range); + Ok(()) + } + fn singleton_comprehension_assignment_iter(iter: &ast::Expr) -> Option<&ast::Expr> { let elts = match iter { ast::Expr::List(ast::ExprList { elts, .. }) => elts, @@ -8476,6 +8858,7 @@ impl Compiler { // Compile the class expression. self.compile_expression(&match_class.cls)?; + self.set_source_range(p.range); // Create a new tuple of attribute names. let mut attr_names = vec![]; @@ -8652,8 +9035,9 @@ impl Compiler { seen.insert(key_repr); } - self.compile_expression(key)?; + self.compile_match_pattern_expr(key)?; } + self.set_source_range(p.range); } // Stack: [subject, key1, key2, ..., key_n] @@ -8781,6 +9165,7 @@ impl Compiler { pc.fail_pop.clear(); pc.on_top = 0; // Emit a COPY(1) instruction before compiling the alternative. + self.set_source_range(alt.range()); emit!(self, Instruction::Copy { i: 1 }); self.compile_pattern(alt, pc)?; @@ -8954,7 +9339,7 @@ impl Compiler { // Match CPython codegen_pattern_value(): compare, then normalize to bool // before the fail jump. Late IR folding will collapse COMPARE_OP+TO_BOOL // into COMPARE_OP bool(...) when applicable. - self.compile_expression(&p.value)?; + self.compile_match_pattern_expr(&p.value)?; emit!( self, Instruction::CompareOp { @@ -9085,6 +9470,7 @@ impl Compiler { for (i, m) in cases.iter().enumerate().take(case_count) { // Only copy the subject if not on the last case if i != case_count - 1 { + self.set_source_range(m.pattern.range()); emit!(self, Instruction::Copy { i: 1 }); } @@ -9139,6 +9525,7 @@ impl Compiler { } else { emit!(self, PseudoInstruction::Jump { delta: end }); } + self.set_no_location(); if let Some(last) = self.current_block().instructions.last_mut() { last.match_success_jump = true; } @@ -9381,6 +9768,7 @@ impl Compiler { fn compile_annotation(&mut self, annotation: &ast::Expr) -> CompileResult<()> { if self.future_annotations { + self.set_source_range(annotation.range()); self.emit_load_const(ConstantData::Str { value: UnparseExpr::new(annotation, &self.source_file) .to_string() @@ -9445,6 +9833,7 @@ impl Compiler { annotation: &ast::Expr, value: Option<&ast::Expr>, simple: bool, + loc: TextRange, ) -> CompileResult<()> { // Perform the actual assignment first if let Some(value) = value { @@ -9461,6 +9850,7 @@ impl Compiler { // PEP 563: Store stringified annotation directly to __annotations__ // Compile annotation as string self.compile_annotation(annotation)?; + self.set_source_range(loc); // Load __annotations__ let annotations_name = self.name("__annotations__"); emit!( @@ -9470,10 +9860,12 @@ impl Compiler { } ); // Load the variable name + self.set_source_range(loc); self.emit_load_const(ConstantData::Str { value: self.mangle(id.as_str()).into_owned().into(), }); // Store: __annotations__[name] = annotation + self.set_source_range(loc); emit!(self, Instruction::StoreSubscr); } else { // PEP 649: Handle conditional annotations @@ -9535,6 +9927,11 @@ impl Compiler { ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) => { self.maybe_add_static_attribute_to_class(value, attr.as_str()); self.compile_expression(value)?; + self.set_source_range(self.update_start_location_to_match_attr( + target.range(), + target.range(), + attr.as_str(), + )); let namei = self.name(attr.as_str()); emit!(self, Instruction::StoreAttr { namei }); } @@ -9603,15 +10000,25 @@ impl Compiler { op: &ast::Operator, value: &ast::Expr, ) -> CompileResult<()> { + let stmt_range = self.current_source_range; + let target_range = target.range(); enum AugAssignKind<'a> { - Name { id: &'a str }, - Subscript { use_slice_opt: bool }, - Attr { idx: bytecode::NameIdx }, + Name { + id: &'a str, + }, + Subscript { + use_slice_opt: bool, + }, + Attr { + idx: bytecode::NameIdx, + attr_range: TextRange, + }, } let kind = match &target { ast::Expr::Name(ast::ExprName { id, .. }) => { let id = id.as_str(); + self.set_source_range(target_range); self.compile_name(id, NameUsage::Load)?; AugAssignKind::Name { id } } @@ -9623,6 +10030,7 @@ impl Compiler { }) => { let use_slice_opt = slice.should_use_slice_optimization(); self.compile_expression(value)?; + self.set_source_range(target_range); if use_slice_opt { let ast::Expr::Slice(slice_expr) = slice.as_ref() else { unreachable!( @@ -9630,12 +10038,14 @@ impl Compiler { ); }; self.compile_slice_two_parts(slice_expr)?; + self.set_source_range(target_range); emit!(self, Instruction::Copy { i: 3 }); emit!(self, Instruction::Copy { i: 3 }); emit!(self, Instruction::Copy { i: 3 }); emit!(self, Instruction::BinarySlice); } else { self.compile_expression(slice)?; + self.set_source_range(target_range); emit!(self, Instruction::Copy { i: 2 }); emit!(self, Instruction::Copy { i: 2 }); emit!( @@ -9650,10 +10060,14 @@ impl Compiler { ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) => { let attr = attr.as_str(); self.compile_expression(value)?; + let attr_range = + self.update_start_location_to_match_attr(target_range, target_range, attr); + self.set_source_range(attr_range); emit!(self, Instruction::Copy { i: 1 }); let idx = self.name(attr); + self.set_source_range(attr_range); self.emit_load_attr(idx); - AugAssignKind::Attr { idx } + AugAssignKind::Attr { idx, attr_range } } _ => { return Err(self.error(CodegenErrorType::Assign(target.python_name()))); @@ -9661,14 +10075,17 @@ impl Compiler { }; self.compile_expression(value)?; + self.set_source_range(stmt_range); self.compile_op(op, true); match kind { AugAssignKind::Name { id } => { // stack: RESULT + self.set_source_range(target_range); self.compile_name(id, NameUsage::Store)?; } AugAssignKind::Subscript { use_slice_opt } => { + self.set_source_range(target_range); if use_slice_opt { // stack: CONTAINER START STOP RESULT emit!(self, Instruction::Swap { i: 4 }); @@ -9682,8 +10099,9 @@ impl Compiler { emit!(self, Instruction::StoreSubscr); } } - AugAssignKind::Attr { idx } => { + AugAssignKind::Attr { idx, attr_range } => { // stack: CONTAINER RESULT + self.set_source_range(attr_range); emit!(self, Instruction::Swap { i: 2 }); emit!(self, Instruction::StoreAttr { namei: idx }); } @@ -9766,6 +10184,10 @@ impl Compiler { self.compile_jump_if_inner(body, condition, target_block, source_range)?; emit!(self, PseudoInstruction::JumpNoInterrupt { delta: end }); self.set_no_location(); + // CPython emits this jump with NO_LOCATION in codegen_jump_if() + // and flowgraph.c::propagate_line_numbers() copies the previous + // body-expression location onto it. + self.copy_previous_location_to_last_instruction(); self.switch_to_block(next2); self.compile_jump_if_inner(orelse, condition, target_block, source_range)?; @@ -9793,6 +10215,12 @@ impl Compiler { && matches!(&comparators[0], ast::Expr::NoneLiteral(_)) => { self.compile_expression(left)?; + // CPython codegen first emits LOAD_CONST None; IS_OP; POP_JUMP... + // and flowgraph.c::basicblock_optimize_load_const folds it into + // POP_JUMP_IF_NONE / POP_JUMP_IF_NOT_NONE. Register None here + // to preserve CPython's co_consts ordering even though we emit + // the folded jump directly. + self.arg_constant(ConstantData::None); let source = self.source_file.to_source_code(); let comparator_line = source.line_index(comparators[0].range().start()); let left_line = source.line_index(left.range().start()); @@ -9807,6 +10235,7 @@ impl Compiler { // is not None + jump_if_false → POP_JUMP_IF_NONE // is not None + jump_if_true → POP_JUMP_IF_NOT_NONE let jump_if_none = condition != is_not; + self.set_source_range(source_range.unwrap_or_else(|| expression.range())); if jump_if_none { emit!( self, @@ -9831,6 +10260,7 @@ impl Compiler { self.disable_load_fast_borrow_for_block(target_block); } self.compile_expression(expression)?; + self.set_source_range(expression.range()); emit!(self, Instruction::ToBool); if condition { emit!( @@ -9867,39 +10297,48 @@ impl Compiler { /// Compile a boolean operation as an expression. /// This means, that the last value remains on the stack. fn compile_bool_op(&mut self, op: &ast::BoolOp, values: &[ast::Expr]) -> CompileResult<()> { + let boolop_range = self.current_source_range; fn flatten_same_boolop_values<'a>( op: &ast::BoolOp, - value: &'a ast::Expr, - out: &mut Vec<&'a ast::Expr>, + values: &'a [ast::Expr], + current_range: TextRange, + outer_pop_range: Option, + out: &mut Vec<(&'a ast::Expr, Option)>, ) { - if let ast::Expr::BoolOp(ast::ExprBoolOp { - op: inner_op, - values, - .. - }) = value - && inner_op == op - { - for value in values { - flatten_same_boolop_values(op, value, out); + for (idx, value) in values.iter().enumerate() { + let is_last = idx + 1 == values.len(); + let pop_range = if is_last { + outer_pop_range + } else { + Some(current_range) + }; + if let ast::Expr::BoolOp(ast::ExprBoolOp { + op: inner_op, + values, + .. + }) = value + && inner_op == op + { + flatten_same_boolop_values(op, values, value.range(), pop_range, out); + } else { + out.push((value, pop_range)); } - } else { - out.push(value); } } let mut flattened = Vec::with_capacity(values.len()); - for value in values { - flatten_same_boolop_values(op, value, &mut flattened); - } + flatten_same_boolop_values(op, values, boolop_range, None, &mut flattened); let after_block = self.new_block(); - let (last_value, prefix_values) = flattened.split_last().unwrap(); + let ((last_value, _), prefix_values) = flattened.split_last().unwrap(); - for value in prefix_values { + for &(value, pop_range) in prefix_values { let continue_block = self.new_block(); self.compile_expression(value)?; + self.set_source_range(boolop_range); self.emit_short_circuit_test(op, after_block); self.switch_to_block(continue_block); + self.set_source_range(pop_range.expect("prefix boolop value must have pop range")); emit!(self, Instruction::PopTop); } @@ -9908,28 +10347,6 @@ impl Compiler { Ok(()) } - fn compile_bool_op_with_head_constant( - &mut self, - op: &ast::BoolOp, - head: ConstantData, - tail: &[ast::Expr], - ) -> CompileResult<()> { - self.emit_load_const(head); - self.mark_last_instruction_folded_from_nonliteral_expr(); - if tail.is_empty() { - return Ok(()); - } - - let after_block = self.new_block(); - for value in tail { - self.emit_short_circuit_test(op, after_block); - emit!(self, Instruction::PopTop); - self.compile_expression(value)?; - } - self.switch_to_block(after_block); - Ok(()) - } - /// Emit `Copy 1` + conditional jump for short-circuit evaluation. /// For `And`, emits `PopJumpIfFalse`; for `Or`, emits `PopJumpIfTrue`. fn emit_short_circuit_test(&mut self, op: &ast::BoolOp, target: BlockIdx) { @@ -9945,7 +10362,7 @@ impl Compiler { } } - fn compile_dict(&mut self, items: &[ast::DictItem]) -> CompileResult<()> { + fn compile_dict(&mut self, items: &[ast::DictItem], range: TextRange) -> CompileResult<()> { let has_unpacking = items.iter().any(|item| item.key.is_none()); if !has_unpacking { @@ -9961,6 +10378,7 @@ impl Compiler { self.compile_expression(item.key.as_ref().unwrap())?; self.compile_expression(&item.value)?; } + self.set_source_range(range); emit!( self, Instruction::BuildMap { @@ -9987,11 +10405,13 @@ impl Compiler { (total_map_add, 0usize) }; + self.set_source_range(range); emit!(self, Instruction::BuildMap { count: 0 }); let mut idx = 0; for chunk_i in 0..big_count { if chunk_i > 0 { + self.set_source_range(range); emit!(self, Instruction::BuildMap { count: 0 }); } let chunk_size = if idx + BIG_MAP_CHUNK <= n - tail_count { @@ -10002,9 +10422,11 @@ impl Compiler { for item in &items[idx..idx + chunk_size] { self.compile_expression(item.key.as_ref().unwrap())?; self.compile_expression(&item.value)?; + self.set_source_range(range); emit!(self, Instruction::MapAdd { i: 1 }); } if chunk_i > 0 { + self.set_source_range(range); emit!(self, Instruction::DictUpdate { i: 1 }); } idx += chunk_size; @@ -10016,12 +10438,14 @@ impl Compiler { self.compile_expression(item.key.as_ref().unwrap())?; self.compile_expression(&item.value)?; } + self.set_source_range(range); emit!( self, Instruction::BuildMap { count: tail_count.to_u32(), } ); + self.set_source_range(range); emit!(self, Instruction::DictUpdate { i: 1 }); } } @@ -10040,8 +10464,10 @@ impl Compiler { () => { #[allow(unused_assignments)] if elements > 0 { + self.set_source_range(range); emit!(self, Instruction::BuildMap { count: elements }); if have_dict { + self.set_source_range(range); emit!(self, Instruction::DictUpdate { i: 1 }); } else { have_dict = true; @@ -10061,16 +10487,19 @@ impl Compiler { // ** unpacking entry flush_pending!(); if !have_dict { + self.set_source_range(range); emit!(self, Instruction::BuildMap { count: 0 }); have_dict = true; } self.compile_expression(&item.value)?; + self.set_source_range(range); emit!(self, Instruction::DictUpdate { i: 1 }); } } flush_pending!(); if !have_dict { + self.set_source_range(range); emit!(self, Instruction::BuildMap { count: 0 }); } @@ -10162,26 +10591,7 @@ impl Compiler { | ast::Expr::BooleanLiteral(_) | ast::Expr::NoneLiteral(_) | ast::Expr::EllipsisLiteral(_) - ) || matches!(expr, ast::Expr::FString(fstring) if Self::fstring_value_is_const(&fstring.value)) - } - - fn fstring_value_is_const(fstring: &ast::FStringValue) -> bool { - for part in fstring { - if !Self::fstring_part_is_const(part) { - return false; - } - } - true - } - - fn fstring_part_is_const(part: &ast::FStringPart) -> bool { - match part { - ast::FStringPart::Literal(_) => true, - ast::FStringPart::FString(fstring) => fstring - .elements - .iter() - .all(|element| matches!(element, ast::InterpolatedStringElement::Literal(_))), - } + ) } fn compile_expression(&mut self, expression: &ast::Expr) -> CompileResult<()> { @@ -10189,16 +10599,6 @@ impl Compiler { let range = expression.range(); self.set_source_range(range); - if let ast::Expr::Subscript(ast::ExprSubscript { - ctx: ast::ExprContext::Load, - .. - }) = expression - && let Some(constant) = self.try_fold_constant_expr(expression)? - { - self.emit_load_const(constant); - return Ok(()); - } - if matches!(expression, ast::Expr::BinOp(_)) && let Some(constant) = self.try_fold_constant_expr(expression)? { @@ -10211,25 +10611,31 @@ impl Compiler { { let mut simplified_prefix = 0usize; let mut last_constant = None; - let mut retained_head = None; + let mut last_constant_range = None; for value in values { let Some(constant) = self.try_fold_constant_expr(value)? else { break; }; if !Self::boolop_fast_fold_literal(value) { - retained_head = Some(constant); - simplified_prefix += 1; break; } + // CPython codegen_boolop() emits each literal with + // ADDOP_LOAD_CONST before flowgraph.c folds the constant + // branch away. Register it here so remove_unused_consts() + // preserves the same first-constant ordering. + self.arg_constant(constant.clone()); let is_truthy = Self::constant_truthiness(&constant); last_constant = Some(constant); + last_constant_range = Some(value.range()); match op { ast::BoolOp::Or if is_truthy => { + self.set_source_range(last_constant_range.expect("missing boolop range")); self.emit_load_const(last_constant.expect("missing boolop constant")); self.mark_last_instruction_folded_from_nonliteral_expr(); return Ok(()); } ast::BoolOp::And if !is_truthy => { + self.set_source_range(last_constant_range.expect("missing boolop range")); self.emit_load_const(last_constant.expect("missing boolop constant")); self.mark_last_instruction_folded_from_nonliteral_expr(); return Ok(()); @@ -10240,11 +10646,8 @@ impl Compiler { } } - if let Some(head) = retained_head { - self.compile_bool_op_with_head_constant(op, head, &values[simplified_prefix..])?; - return Ok(()); - } if simplified_prefix == values.len() { + self.set_source_range(last_constant_range.expect("missing boolop range")); self.emit_load_const(last_constant.expect("missing folded boolop constant")); self.mark_last_instruction_folded_from_nonliteral_expr(); return Ok(()); @@ -10284,10 +10687,25 @@ impl Compiler { self.compile_subscript(value, slice, *ctx)?; } ast::Expr::UnaryOp(ast::ExprUnaryOp { op, operand, .. }) => { - self.compile_expression(operand)?; - - // Restore full expression range before emitting the operation - self.set_source_range(range); + if let ( + ast::UnaryOp::Not, + ast::Expr::Compare(ast::ExprCompare { + left, + ops, + comparators, + .. + }), + ) = (op, operand.as_ref()) + && ops.len() == 1 + { + self.set_source_range(range); + self.compile_compare(left, ops, comparators)?; + } else { + self.compile_expression(operand)?; + } + + // Restore full expression range before emitting the operation + self.set_source_range(range); match op { ast::UnaryOp::UAdd => emit!( self, @@ -10308,11 +10726,14 @@ impl Compiler { if let Some(super_type) = self.can_optimize_super_call(value, attr.as_str()) { // super().attr or super(cls, self).attr optimization // Stack: [global_super, class, self] → LOAD_SUPER_ATTR → [attr] - // Set source range to super() call for arg-loading instructions - let super_range = value.range(); - self.set_source_range(super_range); - self.load_args_for_super(&super_type)?; - self.set_source_range(super_range); + let ast::Expr::Call(ast::ExprCall { + func: super_func, .. + }) = value.as_ref() + else { + unreachable!("can_optimize_super_call only accepts calls"); + }; + self.load_args_for_super(&super_type, super_func.range(), value.range())?; + self.set_source_range(range); let idx = self.name(attr.as_str()); match super_type { SuperCallType::TwoArg { .. } => { @@ -10325,6 +10746,11 @@ impl Compiler { } else { // Normal attribute access self.compile_expression(value)?; + self.set_source_range(self.update_start_location_to_match_attr( + range, + range, + attr.as_str(), + )); let idx = self.name(attr.as_str()); self.emit_load_attr(idx); } @@ -10349,29 +10775,37 @@ impl Compiler { ast::Expr::Set(ast::ExprSet { elts, .. }) => { self.starunpack_helper(elts, 0, CollectionType::Set)?; } - ast::Expr::Dict(ast::ExprDict { items, .. }) => { - self.compile_dict(items)?; + ast::Expr::Dict(ast::ExprDict { items, range, .. }) => { + self.compile_dict(items, *range)?; } ast::Expr::Slice(ast::ExprSlice { - lower, upper, step, .. + lower, + upper, + step, + range, + .. }) => { if let Some(folded_const) = self.try_fold_constant_slice( lower.as_deref(), upper.as_deref(), step.as_deref(), )? { + self.set_source_range(*range); self.emit_load_const(folded_const); return Ok(()); } - let mut compile_bound = |bound: Option<&ast::Expr>| match bound { - Some(exp) => self.compile_expression(exp), - None => { - self.emit_load_const(ConstantData::None); - Ok(()) - } - }; - compile_bound(lower.as_deref())?; - compile_bound(upper.as_deref())?; + if let Some(lower) = lower { + self.compile_expression(lower)?; + } else { + self.set_source_range(*range); + self.emit_load_const(ConstantData::None); + } + if let Some(upper) = upper { + self.compile_expression(upper)?; + } else { + self.set_source_range(*range); + self.emit_load_const(ConstantData::None); + } if let Some(step) = step { self.compile_expression(step)?; } @@ -10379,6 +10813,7 @@ impl Compiler { Some(_) => BuildSliceArgCount::Three, None => BuildSliceArgCount::Two, }; + self.set_source_range(*range); emit!(self, Instruction::BuildSlice { argc }); } ast::Expr::Yield(ast::ExprYield { value, .. }) => { @@ -10390,6 +10825,7 @@ impl Compiler { Some(expression) => self.compile_expression(expression)?, Option::None => self.emit_load_const(ConstantData::None), }; + self.set_source_range(range); if self.ctx.func == FunctionContext::AsyncFunction { emit!( self, @@ -10412,6 +10848,7 @@ impl Compiler { return Err(self.error(CodegenErrorType::InvalidAwait)); } self.compile_expression(value)?; + self.set_source_range(range); emit!(self, Instruction::GetAwaitable { r#where: 0 }); self.emit_load_const(ConstantData::None); let _ = self.compile_yield_from_sequence(true)?; @@ -10428,13 +10865,17 @@ impl Compiler { } self.mark_generator(); self.compile_expression(value)?; + self.set_source_range(range); emit!(self, Instruction::GetYieldFromIter); self.emit_load_const(ConstantData::None); let _ = self.compile_yield_from_sequence(false)?; } ast::Expr::Name(ast::ExprName { id, .. }) => self.load_name(id.as_str())?, ast::Expr::Lambda(ast::ExprLambda { - parameters, body, .. + parameters, + body, + range, + .. }) => { let default_params = ast::Parameters::default(); let params = parameters.as_deref().unwrap_or(&default_params); @@ -10456,6 +10897,7 @@ impl Compiler { for element in &defaults { self.compile_expression(element)?; } + self.set_source_range(*range); emit!(self, Instruction::BuildTuple { count: size }); } @@ -10471,11 +10913,13 @@ impl Compiler { if have_kwdefaults { let default_kw_count = kw_with_defaults.len(); for (arg, default) in &kw_with_defaults { + self.set_source_range(*range); self.emit_load_const(ConstantData::Str { value: self.mangle(arg.name.as_str()).into_owned().into(), }); self.compile_expression(default)?; } + self.set_source_range(*range); emit!( self, Instruction::BuildMap { @@ -10504,13 +10948,21 @@ impl Compiler { in_async_scope: false, }; - // Lambda cannot have docstrings, so no None is added to co_consts - self.compile_expression(body)?; + self.set_source_range(body.range()); self.emit_return_value(); + // _PyCodegen_AddReturnAtEnd() appends a no-location + // return-None epilogue even after lambda's explicit + // RETURN_VALUE. It is later removed as unreachable, but + // remove_unused_consts() keeps None when it was the first + // constant in an otherwise constant-free lambda. + if self.current_code_info().metadata.consts.is_empty() { + self.arg_constant(ConstantData::None); + } let code = self.exit_scope(); // Create lambda function with closure + self.set_source_range(*range); self.make_closure(code, func_flags)?; self.ctx = prev_ctx; @@ -10532,6 +10984,7 @@ impl Compiler { generators, &|compiler, collection_add_i| { compiler.compile_comprehension_element(elt)?; + compiler.set_source_range(elt.range()); emit!( compiler, Instruction::ListAppend { @@ -10543,6 +10996,8 @@ impl Compiler { ComprehensionType::List, Self::contains_await(elt) || Self::generators_contain_await(generators), *range, + elt.range(), + elt.range(), )?; } ast::Expr::SetComp(ast::ExprSetComp { @@ -10562,6 +11017,7 @@ impl Compiler { generators, &|compiler, collection_add_i| { compiler.compile_comprehension_element(elt)?; + compiler.set_source_range(elt.range()); emit!( compiler, Instruction::SetAdd { @@ -10573,6 +11029,8 @@ impl Compiler { ComprehensionType::Set, Self::contains_await(elt) || Self::generators_contain_await(generators), *range, + elt.range(), + elt.range(), )?; } ast::Expr::DictComp(ast::ExprDictComp { @@ -10596,6 +11054,10 @@ impl Compiler { compiler.compile_expression(key)?; compiler.compile_expression(value)?; + compiler.set_source_range(TextRange::new( + key.range().start(), + value.range().end(), + )); emit!( compiler, Instruction::MapAdd { @@ -10610,6 +11072,8 @@ impl Compiler { || Self::contains_await(value) || Self::generators_contain_await(generators), *range, + TextRange::new(key.range().start(), value.range().end()), + key.range(), )?; } ast::Expr::Generator(ast::ExprGenerator { @@ -10618,47 +11082,7 @@ impl Compiler { range, .. }) => { - // Check if element or generators contain async content - // This makes the generator expression into an async generator - let element_contains_await = - Self::contains_await(elt) || Self::generators_contain_await(generators); - self.compile_comprehension( - "", - None, - generators, - &|compiler, _collection_add_i| { - // Compile the element expression - // Note: if element is an async comprehension, compile_expression - // already handles awaiting it, so we don't need to await again here - compiler.compile_comprehension_element(elt)?; - - compiler.mark_generator(); - if compiler.ctx.func == FunctionContext::AsyncFunction { - emit!( - compiler, - Instruction::CallIntrinsic1 { - func: bytecode::IntrinsicFunction1::AsyncGenWrap - } - ); - } - // arg=0: direct yield (wrapped for async generators) - emit!(compiler, Instruction::YieldValue { arg: 0 }); - emit!( - compiler, - Instruction::Resume { - context: oparg::ResumeContext::from( - oparg::ResumeLocation::AfterYield - ) - } - ); - emit!(compiler, Instruction::PopTop); - - Ok(()) - }, - ComprehensionType::Generator, - element_contains_await, - *range, - )?; + self.compile_generator_expression(elt, generators, *range)?; } ast::Expr::Starred(ast::ExprStarred { value, .. }) => { if self.in_annotation { @@ -10682,6 +11106,9 @@ impl Compiler { .map(Self::constant_truthiness); let else_block = self.new_block(); let after_block = self.new_block(); + if self.current_code_info().in_conditional_block > 0 { + self.mark_conditional_ifexp_orelse_entry_block(else_block); + } self.compile_jump_if(test, false, else_block)?; // True case @@ -10721,7 +11148,7 @@ impl Compiler { target, value, node_index: _, - range: _, + range, }) => { // Walrus targets in inlined comps should NOT be hidden from locals() if self.current_code_info().in_inlined_comp @@ -10733,8 +11160,10 @@ impl Compiler { info.metadata.fast_hidden_final.swap_remove(name.as_ref()); } self.compile_expression(value)?; + self.set_source_range(*range); emit!(self, Instruction::Copy { i: 1 }); self.compile_store(target)?; + self.set_source_range(target.range()); } ast::Expr::FString(fstring) => { self.compile_expr_fstring(fstring)?; @@ -10812,6 +11241,7 @@ impl Compiler { &mut self, kind: BuiltinGeneratorCallKind, generator_expr: &ast::Expr, + loc: TextRange, end: BlockIdx, ) -> CompileResult<()> { let common_constant = match kind { @@ -10825,6 +11255,7 @@ impl Compiler { let cleanup = self.new_block(); // Stack: [func] — copy function for identity check + self.set_source_range(loc); emit!(self, Instruction::Copy { i: 1 }); emit!( self, @@ -10837,45 +11268,64 @@ impl Compiler { emit!(self, Instruction::PopTop); if matches!(kind, BuiltinGeneratorCallKind::Tuple) { + self.set_source_range(loc); emit!(self, Instruction::BuildList { count: 0 }); } let sub_table_cursor = self.symbol_table_stack.last().map(|t| t.next_sub_table); - self.compile_expression(generator_expr)?; + if let Some(range) = self.cpython_implicit_call_generator_range(generator_expr) { + self.compile_expression_with_generator_range(generator_expr, range)?; + } else { + self.compile_expression(generator_expr)?; + } if let Some(cursor) = sub_table_cursor && let Some(current_table) = self.symbol_table_stack.last_mut() { current_table.next_sub_table = cursor; } self.switch_to_block(loop_block); + self.set_source_range(loc); emit!(self, Instruction::ForIter { delta: cleanup }); match kind { BuiltinGeneratorCallKind::Tuple => { + self.set_source_range(loc); emit!(self, Instruction::ListAppend { i: 2 }); + self.set_source_range(loc); emit!(self, PseudoInstruction::Jump { delta: loop_block }); } BuiltinGeneratorCallKind::All => { + self.set_source_range(loc); emit!(self, Instruction::ToBool); emit!(self, Instruction::PopJumpIfTrue { delta: loop_block }); + self.set_source_range(loc); emit!(self, Instruction::PopIter); + self.set_source_range(loc); self.emit_load_const(ConstantData::Boolean { value: false }); + self.set_source_range(loc); emit!(self, PseudoInstruction::Jump { delta: end }); } BuiltinGeneratorCallKind::Any => { + self.set_source_range(loc); emit!(self, Instruction::ToBool); emit!(self, Instruction::PopJumpIfFalse { delta: loop_block }); + self.set_source_range(loc); emit!(self, Instruction::PopIter); + self.set_source_range(loc); self.emit_load_const(ConstantData::Boolean { value: true }); + self.set_source_range(loc); emit!(self, PseudoInstruction::Jump { delta: end }); } } self.switch_to_block(cleanup); + self.set_source_range(loc); emit!(self, Instruction::EndFor); + self.set_source_range(loc); emit!(self, Instruction::PopIter); match kind { BuiltinGeneratorCallKind::Tuple => { + self.set_source_range(loc); emit!( self, Instruction::CallIntrinsic1 { @@ -10884,12 +11334,15 @@ impl Compiler { ); } BuiltinGeneratorCallKind::All => { + self.set_source_range(loc); self.emit_load_const(ConstantData::Boolean { value: true }); } BuiltinGeneratorCallKind::Any => { + self.set_source_range(loc); self.emit_load_const(ConstantData::Boolean { value: false }); } } + self.set_source_range(loc); emit!(self, PseudoInstruction::Jump { delta: end }); self.switch_to_block(fallback); @@ -10910,13 +11363,27 @@ impl Compiler { // super().method() or super(cls, self).method() optimization // CALL path: [global_super, class, self] → LOAD_SUPER_METHOD → [method, self] // CALL_FUNCTION_EX path: [global_super, class, self] → LOAD_SUPER_ATTR → [attr] - // Set source range to the super() call for LOAD_GLOBAL/LOAD_DEREF/etc. - let super_range = value.range(); - self.set_source_range(super_range); - self.load_args_for_super(&super_type)?; - self.set_source_range(super_range); + let ast::Expr::Call(ast::ExprCall { + func: super_func, .. + }) = value.as_ref() + else { + unreachable!("can_optimize_super_call only accepts calls"); + }; + self.load_args_for_super(&super_type, super_func.range(), value.range())?; + let attr_access_range = self.update_start_location_to_match_attr( + func.range(), + func.range(), + attr.as_str(), + ); + let method_call_range = self.update_start_location_to_match_attr( + call_range, + func.range(), + attr.as_str(), + ); + self.set_source_range(attr_access_range); let idx = self.name(attr.as_str()); if uses_ex_call { + self.set_source_range(func.range()); match super_type { SuperCallType::TwoArg { .. } => { self.emit_load_super_attr(idx); @@ -10928,11 +11395,11 @@ impl Compiler { // CPython's Attribute_kind super path emits an attr-line // NOP after LOAD_SUPER_ATTR, even when the call later uses // CALL_FUNCTION_EX for starred arguments. - self.set_source_range(attr.range()); + self.set_source_range(attr_access_range); emit!(self, Instruction::Nop); - self.set_source_range(super_range); + self.set_source_range(func.range()); emit!(self, Instruction::PushNull); - self.codegen_call_helper(0, args, call_range)?; + self.codegen_call_helper(0, args, call_range, None)?; } else { match super_type { SuperCallType::TwoArg { .. } => { @@ -10943,14 +11410,25 @@ impl Compiler { } } // NOP for line tracking at .method( line - self.set_source_range(attr.range()); + self.set_source_range(attr_access_range); emit!(self, Instruction::Nop); // CALL at .method( line (not the full expression line) - self.codegen_call_helper(0, args, attr.range())?; + self.codegen_call_helper(0, args, method_call_range, Some(attr_access_range))?; } } else { self.compile_expression(value)?; let idx = self.name(attr.as_str()); + let attr_access_range = self.update_start_location_to_match_attr( + func.range(), + func.range(), + attr.as_str(), + ); + let method_call_range = self.update_start_location_to_match_attr( + call_range, + func.range(), + attr.as_str(), + ); + self.set_source_range(attr_access_range); // Imported names and CALL_FUNCTION_EX-style calls use plain // LOAD_ATTR + PUSH_NULL; other names use method-call mode. // Check current scope and enclosing scopes for IMPORTED flag. @@ -10962,7 +11440,11 @@ impl Compiler { } else { self.emit_load_attr_method(idx); } - self.codegen_call_helper(0, args, call_range)?; + if is_import || uses_ex_call { + self.codegen_call_helper(0, args, call_range, None)?; + } else { + self.codegen_call_helper(0, args, method_call_range, Some(attr_access_range))?; + } } } else if let Some(kind) = (!uses_ex_call) .then(|| self.detect_builtin_generator_call(func, args)) @@ -10970,17 +11452,17 @@ impl Compiler { { let end = self.new_block(); self.compile_expression(func)?; - self.optimize_builtin_generator_call(kind, &args.args[0], end)?; - self.set_source_range(call_range); + self.optimize_builtin_generator_call(kind, &args.args[0], func.range(), end)?; + self.set_source_range(func.range()); emit!(self, Instruction::PushNull); - self.codegen_call_helper(0, args, call_range)?; + self.codegen_call_helper(0, args, call_range, None)?; self.switch_to_block(end); } else { // Regular call: push func, then NULL for self_or_null slot // Stack layout: [func, NULL, args...] - same as method call [func, self, args...] self.compile_expression(func)?; emit!(self, Instruction::PushNull); - self.codegen_call_helper(0, args, call_range)?; + self.codegen_call_helper(0, args, call_range, None)?; } Ok(()) } @@ -11002,6 +11484,7 @@ impl Compiler { keywords: &[ast::Keyword], begin: usize, end: usize, + call_range: TextRange, ) -> CompileResult<()> { let n = end - begin; assert!(n > 0); @@ -11010,22 +11493,26 @@ impl Compiler { let big = n * 2 > STACK_USE_GUIDELINE as usize; if big { + self.set_source_range(call_range); emit!(self, Instruction::BuildMap { count: 0 }); } for kw in &keywords[begin..end] { // Key first, then value - this is critical! + self.set_source_range(call_range); self.emit_load_const(ConstantData::Str { value: kw.arg.as_ref().unwrap().as_str().into(), }); self.compile_expression(&kw.value)?; if big { + self.set_source_range(call_range); emit!(self, Instruction::MapAdd { i: 1 }); } } if !big { + self.set_source_range(call_range); emit!(self, Instruction::BuildMap { count: n.to_u32() }); } @@ -11040,6 +11527,7 @@ impl Compiler { additional_positional: u32, arguments: &ast::Arguments, call_range: TextRange, + kw_names_range: Option, ) -> CompileResult<()> { let nelts = arguments.args.len(); let nkwelts = arguments.keywords.len(); @@ -11058,8 +11546,18 @@ impl Compiler { if !has_starred && !has_double_star && !too_big { // Simple call path: no * or ** args + let implicit_generator_range = + if additional_positional == 0 && nelts == 1 && nkwelts == 0 { + self.cpython_implicit_call_generator_range(&arguments.args[0]) + } else { + None + }; for arg in &arguments.args { - self.compile_expression(arg)?; + if let Some(range) = implicit_generator_range { + self.compile_expression_with_generator_range(arg, range)?; + } else { + self.compile_expression(arg)?; + } } if nkwelts > 0 { @@ -11073,11 +11571,12 @@ impl Compiler { } // Restore call expression range for kwnames and CALL_KW - self.set_source_range(call_range); + self.set_source_range(kw_names_range.unwrap_or(call_range)); self.emit_load_const(ConstantData::Tuple { elements: kwarg_names, }); + self.set_source_range(call_range); let argc = additional_positional + nelts.to_u32() + nkwelts.to_u32(); emit!(self, Instruction::CallKw { argc }); } else { @@ -11104,23 +11603,21 @@ impl Compiler { } self.set_source_range(call_range); let positional_count = additional_positional + nelts.to_u32(); - if positional_count == 0 { - self.emit_load_const(ConstantData::Tuple { elements: vec![] }); - } else { - emit!( - self, - Instruction::BuildTuple { - count: positional_count - } - ); - } + emit!( + self, + Instruction::BuildTuple { + count: positional_count + } + ); } else { // Use starunpack_helper to build a list, then convert to tuple + self.set_source_range(call_range); self.starunpack_helper( &arguments.args, additional_positional, CollectionType::List, )?; + self.set_source_range(call_range); emit!( self, Instruction::CallIntrinsic1 { @@ -11129,7 +11626,7 @@ impl Compiler { ); } - self.compile_call_function_ex_keywords(&arguments.keywords)?; + self.compile_call_function_ex_keywords(&arguments.keywords, call_range)?; self.set_source_range(call_range); emit!(self, Instruction::CallFunctionEx); @@ -11141,8 +11638,10 @@ impl Compiler { fn compile_call_function_ex_keywords( &mut self, keywords: &[ast::Keyword], + call_range: TextRange, ) -> CompileResult<()> { if keywords.is_empty() { + self.set_source_range(call_range); emit!(self, Instruction::PushNull); return Ok(()); } @@ -11153,8 +11652,9 @@ impl Compiler { for (i, keyword) in keywords.iter().enumerate() { if keyword.arg.is_none() { if nseen > 0 { - self.codegen_subkwargs(keywords, i - nseen, i)?; + self.codegen_subkwargs(keywords, i - nseen, i, call_range)?; if have_dict { + self.set_source_range(call_range); emit!(self, Instruction::DictMerge { i: 1 }); } have_dict = true; @@ -11162,11 +11662,13 @@ impl Compiler { } if !have_dict { + self.set_source_range(call_range); emit!(self, Instruction::BuildMap { count: 0 }); have_dict = true; } self.compile_expression_without_const_boolop_folding(&keyword.value)?; + self.set_source_range(call_range); emit!(self, Instruction::DictMerge { i: 1 }); } else { nseen += 1; @@ -11174,8 +11676,9 @@ impl Compiler { } if nseen > 0 { - self.codegen_subkwargs(keywords, keywords.len() - nseen, keywords.len())?; + self.codegen_subkwargs(keywords, keywords.len() - nseen, keywords.len(), call_range)?; if have_dict { + self.set_source_range(call_range); emit!(self, Instruction::DictMerge { i: 1 }); } have_dict = true; @@ -11197,6 +11700,173 @@ impl Compiler { }) } + fn compile_expression_with_generator_range( + &mut self, + expression: &ast::Expr, + range: TextRange, + ) -> CompileResult<()> { + if let ast::Expr::Generator(ast::ExprGenerator { + elt, generators, .. + }) = expression + { + self.set_source_range(range); + self.compile_generator_expression(elt, generators, range) + } else { + self.compile_expression(expression) + } + } + + fn cpython_implicit_call_generator_range(&self, expression: &ast::Expr) -> Option { + if !matches!(expression, ast::Expr::Generator(_)) { + return None; + } + let range = expression.range(); + let source = self.source_file.source_text().as_bytes(); + let start = range.start().to_usize(); + let end = range.end().to_usize(); + if source.get(start) == Some(&b'(') + && !Self::starts_with_parenthesized_generator_element(source, start, end) + { + return None; + } + + let mut open = start; + while open > 0 && source[open - 1].is_ascii_whitespace() { + open -= 1; + } + if open == 0 || source[open - 1] != b'(' { + return None; + } + + let mut close = end; + while close < source.len() && source[close].is_ascii_whitespace() { + close += 1; + } + if source.get(close) != Some(&b')') { + return None; + } + + let adjusted_start = u32::try_from(open - 1).ok()?; + let adjusted_end = u32::try_from(close + 1).ok()?; + Some(TextRange::new( + TextSize::from(adjusted_start), + TextSize::from(adjusted_end), + )) + } + + fn starts_with_parenthesized_generator_element( + source: &[u8], + start: usize, + end: usize, + ) -> bool { + let mut depth = 0usize; + let mut i = start; + while i < end { + match source[i] { + b'(' | b'[' | b'{' => depth += 1, + b')' | b']' | b'}' => { + if depth == 0 { + return false; + } + depth -= 1; + if depth == 0 { + return Self::next_token_is_for(source, i + 1, end); + } + } + b'\'' | b'"' => i = Self::skip_python_string_literal(source, i), + _ => {} + } + i += 1; + } + false + } + + fn skip_python_string_literal(source: &[u8], quote: usize) -> usize { + let quote_byte = source[quote]; + let triple = source.get(quote + 1) == Some("e_byte) + && source.get(quote + 2) == Some("e_byte); + let mut i = quote + if triple { 3 } else { 1 }; + while i < source.len() { + if source[i] == b'\\' { + i += 2; + continue; + } + if triple { + if source[i] == quote_byte + && source.get(i + 1) == Some("e_byte) + && source.get(i + 2) == Some("e_byte) + { + return i + 2; + } + } else if source[i] == quote_byte { + return i; + } + i += 1; + } + source.len().saturating_sub(1) + } + + fn next_token_is_for(source: &[u8], mut i: usize, end: usize) -> bool { + while i < end && source[i].is_ascii_whitespace() { + i += 1; + } + source.get(i..i + 3) == Some(b"for") + && source + .get(i + 3) + .is_none_or(|byte| !byte.is_ascii_alphanumeric() && *byte != b'_') + } + + fn compile_generator_expression( + &mut self, + elt: &ast::Expr, + generators: &[ast::Comprehension], + range: TextRange, + ) -> CompileResult<()> { + // Check if element or generators contain async content + // This makes the generator expression into an async generator + let element_contains_await = + Self::contains_await(elt) || Self::generators_contain_await(generators); + self.compile_comprehension( + "", + None, + generators, + &|compiler, _collection_add_i| { + // Compile the element expression + // Note: if element is an async comprehension, compile_expression + // already handles awaiting it, so we don't need to await again here + compiler.compile_comprehension_element(elt)?; + + compiler.mark_generator(); + if compiler.ctx.func == FunctionContext::AsyncFunction { + compiler.set_source_range(elt.range()); + emit!( + compiler, + Instruction::CallIntrinsic1 { + func: bytecode::IntrinsicFunction1::AsyncGenWrap + } + ); + } + // arg=0: direct yield (wrapped for async generators) + compiler.set_source_range(elt.range()); + emit!(compiler, Instruction::YieldValue { arg: 0 }); + emit!( + compiler, + Instruction::Resume { + context: oparg::ResumeContext::from(oparg::ResumeLocation::AfterYield) + } + ); + emit!(compiler, Instruction::PopTop); + + Ok(()) + }, + ComprehensionType::Generator, + element_contains_await, + range, + elt.range(), + elt.range(), + ) + } + fn consume_next_sub_table(&mut self) -> CompileResult<()> { { let _ = self.push_symbol_table()?; @@ -11328,7 +11998,11 @@ impl Compiler { self.enter_scope(obj_name, scope_type, key, lineno.to_u32())?; if let Some(info) = self.code_stack.last_mut() { - info.flags = flags | (info.flags & bytecode::CodeFlags::NESTED); + info.flags = flags + | (info.flags + & (bytecode::CodeFlags::NESTED + | bytecode::CodeFlags::METHOD + | bytecode::CodeFlags::FUTURE_ANNOTATIONS)); info.metadata.argcount = arg_count; info.metadata.posonlyargcount = posonlyarg_count; info.metadata.kwonlyargcount = kwonlyarg_count; @@ -11346,6 +12020,8 @@ impl Compiler { comprehension_type: ComprehensionType, element_contains_await: bool, comprehension_range: TextRange, + element_range: TextRange, + outer_backedge_range: TextRange, ) -> CompileResult<()> { let prev_ctx = self.ctx; let has_an_async_gen = generators.iter().any(|g| g.is_async); @@ -11393,8 +12069,7 @@ impl Compiler { init_collection, generators, compile_element, - has_an_async_gen, - comprehension_range, + (comprehension_range, element_range, outer_backedge_range), ); } @@ -11424,10 +12099,11 @@ impl Compiler { // scope itself. Peek past those nested scopes so we can enter the // correct comprehension table here, then let the real outermost // iterator compile consume its nested scopes later in parent scope. - self.push_output_with_symbol_table(comp_table, flags, 1, 1, 0, name)?; + self.push_output_with_symbol_table(comp_table, flags, 0, 1, 0, name)?; // Set qualname for comprehension self.set_qualname(); + self.set_source_range(comprehension_range); let arg0 = self.varname(".0"); @@ -11444,6 +12120,11 @@ impl Compiler { } ); self.set_no_location(); + // CPython's codegen_wrap_in_stopiteration_handler() inserts + // SETUP_CLEANUP at instruction-sequence index 0, so after the + // generator prefix is inserted the protected range begins at the + // comprehension-start RESUME. + self.move_last_instruction_before_scope_start_resume(); self.push_fblock(FBlockType::StopIteration, handler_block, handler_block)?; Some(handler_block) } else { @@ -11469,7 +12150,13 @@ impl Compiler { if !generator.ifs.is_empty() { let if_cleanup_block = self.new_block(); for if_condition in &generator.ifs { + let snapshot = self.instruction_count_snapshot(); self.compile_jump_if(if_condition, false, if_cleanup_block)?; + self.mark_new_conditional_jump_locations_since( + &snapshot, + if_cleanup_block, + element_range, + ); } let body_block = self.new_block(); self.switch_to_block(body_block); @@ -11487,14 +12174,7 @@ impl Compiler { emit!(self, Instruction::LoadFast { var_num: arg0 }); } else { // Evaluate iterated item: - self.compile_for_iterable_expression(&generator.iter, generator.is_async)?; - - // Get iterator / turn item into an iterator - if generator.is_async { - emit!(self, Instruction::GetAiter); - } else { - emit!(self, Instruction::GetIter); - } + self.compile_comprehension_iter(generator)?; } self.switch_to_block(loop_block); @@ -11515,14 +12195,24 @@ impl Compiler { self.pop_fblock(FBlockType::AsyncComprehensionGenerator); self.compile_store(&generator.target)?; } else { + let saved_range = self.current_source_range; + self.set_source_range(generator.iter.range()); emit!(self, Instruction::ForIter { delta: after_block }); + self.set_source_range(saved_range); self.compile_store(&generator.target)?; } real_loop_depth += 1; + let backedge_range = if gen_index + 1 == generators.len() { + element_range + } else { + outer_backedge_range + }; loop_labels.push(ComprehensionLoopControl::Iteration { loop_block, if_cleanup_block, after_block, + iter_range: generator.iter.range(), + backedge_range, is_async: generator.is_async, end_async_for_target, }); @@ -11530,7 +12220,13 @@ impl Compiler { // CPython always lowers comprehension guards through codegen_jump_if // and leaves constant-folding to later CFG optimization passes. for if_condition in &generator.ifs { + let snapshot = self.instruction_count_snapshot(); self.compile_jump_if(if_condition, false, if_cleanup_block)?; + self.mark_new_conditional_jump_locations_since( + &snapshot, + if_cleanup_block, + element_range, + ); } if !generator.ifs.is_empty() { let body_block = self.new_block(); @@ -11546,20 +12242,26 @@ impl Compiler { loop_block, if_cleanup_block, after_block, + iter_range, + backedge_range, is_async, end_async_for_target, } => { + self.set_source_range(backedge_range); emit!(self, PseudoInstruction::Jump { delta: loop_block }); self.switch_to_block(if_cleanup_block); + self.set_source_range(backedge_range); emit!(self, PseudoInstruction::Jump { delta: loop_block }); self.switch_to_block(after_block); if is_async { + self.set_source_range(comprehension_range); // EndAsyncFor pops both the exception and the aiter // (handler depth is before GetANext, so aiter is at handler depth) self.emit_end_async_for(end_async_for_target); } else { + self.set_source_range(iter_range); // END_FOR + POP_ITER pattern (CPython 3.14) emit!(self, Instruction::EndFor); emit!(self, Instruction::PopIter); @@ -11599,24 +12301,18 @@ impl Compiler { self.ctx = prev_ctx; // Create comprehension function with closure + self.set_source_range(comprehension_range); self.make_closure(code, bytecode::MakeFunctionFlags::new())?; - // Evaluate iterated item: - self.compile_for_iterable_expression(&outermost.iter, outermost.is_async)?; + // Evaluate iterated item and get its iterator. + self.compile_comprehension_iter(outermost)?; self.symbol_table_stack .last_mut() .expect("no current symbol table") .next_sub_table += 1; - // Get iterator / turn item into an iterator - // Use is_async from the first generator, not has_an_async_gen which covers ALL generators - if outermost.is_async { - emit!(self, Instruction::GetAiter); - } else { - emit!(self, Instruction::GetIter); - }; - // Call just created function: + self.set_source_range(comprehension_range); emit!(self, Instruction::Call { argc: 0 }); if is_async_list_set_dict_comprehension { emit!(self, Instruction::GetAwaitable { r#where: 0 }); @@ -11635,9 +12331,9 @@ impl Compiler { init_collection: Option, generators: &[ast::Comprehension], compile_element: &dyn Fn(&mut Self, usize) -> CompileResult<()>, - has_async: bool, - comprehension_range: TextRange, + ranges: (TextRange, TextRange, TextRange), ) -> CompileResult<()> { + let (comprehension_range, element_range, outer_backedge_range) = ranges; fn collect_bound_names(target: &ast::Expr, out: &mut Vec) { match target { ast::Expr::Name(ast::ExprName { id, .. }) => out.push(id.to_string()), @@ -11658,10 +12354,7 @@ impl Compiler { // nested scopes (e.g. lambdas) whose sub_tables sit at the current // position in the parent's list. Those must be consumed before we // splice in the comprehension's own children. - self.compile_for_iterable_expression( - &generators[0].iter, - has_async && generators[0].is_async, - )?; + self.compile_comprehension_iter(&generators[0])?; self.symbol_table_stack .last_mut() .expect("no current symbol table") @@ -11691,12 +12384,6 @@ impl Compiler { current_table.sub_tables.insert(insert_pos + i, st.clone()); } } - if has_async && generators[0].is_async { - emit!(self, Instruction::GetAiter); - } else { - emit!(self, Instruction::GetIter); - } - let mut source_order_bound_names = Vec::new(); for generator in generators { collect_bound_names(&generator.target, &mut source_order_bound_names); @@ -11810,18 +12497,12 @@ impl Compiler { ); } - // Step 4: Create the collection (list/set/dict) - if let Some(init_collection) = init_collection { - self._emit(init_collection, OpArg::new(0), BlockIdx::NULL); - // SWAP to get iterator on top - emit!(self, Instruction::Swap { i: 2 }); - } - - // Set up exception handler for cleanup on exception - let cleanup_block = self.new_block(); - let end_block = self.new_block(); - - if !pushed_locals.is_empty() { + // CPython's codegen_push_inlined_comprehension_locals() + // installs the virtual cleanup before codegen_comprehension() + // emits BUILD_LIST/BUILD_SET/BUILD_MAP for the result object. + let cleanup_blocks = if !pushed_locals.is_empty() { + let cleanup_block = self.new_block(); + let end_block = self.new_block(); emit!( self, PseudoInstruction::SetupFinally { @@ -11829,6 +12510,16 @@ impl Compiler { } ); self.push_fblock(FBlockType::TryExcept, cleanup_block, end_block)?; + Some((cleanup_block, end_block)) + } else { + None + }; + + // Step 4: Create the collection (list/set/dict) + if let Some(init_collection) = init_collection { + self._emit(init_collection, OpArg::new(0), BlockIdx::NULL); + // SWAP to get iterator on top + emit!(self, Instruction::Swap { i: 2 }); } // Step 5: Compile the comprehension loop(s) @@ -11846,7 +12537,13 @@ impl Compiler { if !generator.ifs.is_empty() { let if_cleanup_block = self.new_block(); for if_condition in &generator.ifs { + let snapshot = self.instruction_count_snapshot(); self.compile_jump_if(if_condition, false, if_cleanup_block)?; + self.mark_new_conditional_jump_locations_since( + &snapshot, + if_cleanup_block, + element_range, + ); } let body_block = self.new_block(); self.switch_to_block(body_block); @@ -11861,12 +12558,7 @@ impl Compiler { let after_block = self.new_block(); if i > 0 { - self.compile_for_iterable_expression(&generator.iter, generator.is_async)?; - if generator.is_async { - emit!(self, Instruction::GetAiter); - } else { - emit!(self, Instruction::GetIter); - } + self.compile_comprehension_iter(generator)?; } self.switch_to_block(loop_block); @@ -11894,10 +12586,17 @@ impl Compiler { } real_loop_depth += 1; + let backedge_range = if i + 1 == generators.len() { + element_range + } else { + outer_backedge_range + }; loop_labels.push(ComprehensionLoopControl::Iteration { loop_block, if_cleanup_block, after_block, + iter_range: generator.iter.range(), + backedge_range, is_async: generator.is_async, end_async_for_target, }); @@ -11905,7 +12604,13 @@ impl Compiler { // CPython always lowers comprehension guards through codegen_jump_if // and leaves constant-folding to later CFG optimization passes. for if_condition in &generator.ifs { + let snapshot = self.instruction_count_snapshot(); self.compile_jump_if(if_condition, false, if_cleanup_block)?; + self.mark_new_conditional_jump_locations_since( + &snapshot, + if_cleanup_block, + element_range, + ); } } @@ -11919,18 +12624,24 @@ impl Compiler { loop_block, if_cleanup_block, after_block, + iter_range, + backedge_range, is_async, end_async_for_target, } => { + self.set_source_range(backedge_range); emit!(self, PseudoInstruction::Jump { delta: loop_block }); self.switch_to_block(if_cleanup_block); + self.set_source_range(backedge_range); emit!(self, PseudoInstruction::Jump { delta: loop_block }); self.switch_to_block(after_block); if is_async { + self.set_source_range(comprehension_range); self.emit_end_async_for(end_async_for_target); } else { + self.set_source_range(iter_range); emit!(self, Instruction::EndFor); emit!(self, Instruction::PopIter); } @@ -11943,8 +12654,9 @@ impl Compiler { // Step 8: Clean up - restore saved locals (and cell values) self.set_source_range(comprehension_range); - if total_stack_items > 0 { + if let Some((cleanup_block, end_block)) = cleanup_blocks { emit!(self, PseudoInstruction::PopBlock); + self.set_no_location(); self.pop_fblock(FBlockType::TryExcept); // Match CPython codegen_pop_inlined_comprehension_locals(): @@ -11955,14 +12667,18 @@ impl Compiler { self, PseudoInstruction::JumpNoInterrupt { delta: end_block } ); + self.set_no_location(); // Exception cleanup path self.switch_to_block(cleanup_block); // Stack: [saved_values..., collection, exception] emit!(self, Instruction::Swap { i: 2 }); + self.set_no_location(); emit!(self, Instruction::PopTop); // Pop incomplete collection + self.set_no_location(); // Restore locals and cell values + self.set_source_range(comprehension_range); emit!( self, Instruction::Swap { @@ -11975,9 +12691,11 @@ impl Compiler { } // Re-raise the exception emit!(self, Instruction::Reraise { depth: 0 }); + self.set_no_location(); // Normal end path self.switch_to_block(end_block); + self.set_source_range(comprehension_range); } // SWAP result to TOS (above saved values) @@ -12064,6 +12782,8 @@ impl Compiler { match_success_jump: false, break_continue_cleanup_jump: false, for_loop_break_cleanup_jump: false, + preserve_tobool_jump_location: false, + preserve_store_fast_store_fast_jump_location: false, }); } @@ -12091,6 +12811,21 @@ impl Compiler { } } + fn copy_previous_location_to_last_instruction(&mut self) { + let instructions = &mut self.current_block().instructions; + let Some(last_idx) = instructions.len().checked_sub(1) else { + return; + }; + let Some(previous_idx) = last_idx.checked_sub(1) else { + return; + }; + let previous = instructions[previous_idx]; + let last = &mut instructions[last_idx]; + last.location = previous.location; + last.end_location = previous.end_location; + last.lineno_override = previous.lineno_override; + } + fn force_remove_last_no_location_nop(&mut self) { if let Some(info) = self.current_block().instructions.last_mut() { info.remove_no_location_nop = true; @@ -12106,12 +12841,49 @@ impl Compiler { } } + fn move_last_instruction_before_scope_start_resume(&mut self) { + let instructions = &mut self.current_block().instructions; + let Some(last_idx) = instructions.len().checked_sub(1) else { + return; + }; + let Some(resume_idx) = + instructions[..last_idx] + .iter() + .rposition(|info| match info.instr.real() { + Some(Instruction::Resume { context }) => { + matches!( + context.get(info.arg).location(), + oparg::ResumeLocation::AtFuncStart + ) + } + _ => false, + }) + else { + return; + }; + + let instruction = instructions.remove(last_idx); + instructions.insert(resume_idx, instruction); + } + fn mark_last_no_location_exit(&mut self) { if let Some(last) = self.current_block().instructions.last_mut() { last.no_location_exit = true; } } + fn mark_last_line_only_location(&mut self, lineno: u32) { + if let Some(last) = self.current_block().instructions.last_mut() { + let location = SourceLocation { + line: OneIndexed::new(lineno as usize).unwrap_or(OneIndexed::MIN), + character_offset: OneIndexed::MIN, + }; + last.location = location; + last.end_location = location; + last.lineno_override = Some(ir::LINE_ONLY_LOCATION_OVERRIDE); + } + } + fn mark_last_break_continue_cleanup_jump(&mut self) { if let Some(last) = self.current_block().instructions.last_mut() { last.break_continue_cleanup_jump = true; @@ -12307,89 +13079,22 @@ impl Compiler { ) -> Option { let (left_int, left_is_bool) = Self::constant_as_fold_int(left)?; let (right_int, right_is_bool) = Self::constant_as_fold_int(right)?; - let zero = BigInt::from(0); - if !left_is_bool && !right_is_bool { + if !(left_is_bool && right_is_bool) { return None; } match op { - ast::Operator::Add => Some(ConstantData::Integer { - value: left_int + right_int, + ast::Operator::BitAnd => Some(ConstantData::Boolean { + value: !left_int.is_zero() & !right_int.is_zero(), }), - ast::Operator::Sub => Some(ConstantData::Integer { - value: left_int - right_int, + ast::Operator::BitOr => Some(ConstantData::Boolean { + value: !left_int.is_zero() | !right_int.is_zero(), }), - ast::Operator::Mult => Some(ConstantData::Integer { - value: left_int * right_int, + ast::Operator::BitXor => Some(ConstantData::Boolean { + value: !left_int.is_zero() ^ !right_int.is_zero(), }), - ast::Operator::Div => { - if right_int.is_zero() { - return None; - } - Some(ConstantData::Float { - value: left_int.to_f64()? / right_int.to_f64()?, - }) - } - ast::Operator::FloorDiv => { - if right_int.is_zero() || left_int < zero || right_int < zero { - return None; - } - Some(ConstantData::Integer { - value: left_int / right_int, - }) - } - ast::Operator::Mod => { - if right_int.is_zero() || left_int < zero || right_int < zero { - return None; - } - Some(ConstantData::Integer { - value: left_int % right_int, - }) - } - ast::Operator::Pow => { - let exponent = right_int.to_u32()?; - if exponent > 128 { - return None; - } - Some(ConstantData::Integer { - value: left_int.pow(exponent), - }) - } - ast::Operator::BitAnd => { - if left_is_bool && right_is_bool { - Some(ConstantData::Boolean { - value: !left_int.is_zero() & !right_int.is_zero(), - }) - } else { - Some(ConstantData::Integer { - value: left_int & right_int, - }) - } - } - ast::Operator::BitOr => { - if left_is_bool && right_is_bool { - Some(ConstantData::Boolean { - value: !left_int.is_zero() | !right_int.is_zero(), - }) - } else { - Some(ConstantData::Integer { - value: left_int | right_int, - }) - } - } - ast::Operator::BitXor => { - if left_is_bool && right_is_bool { - Some(ConstantData::Boolean { - value: !left_int.is_zero() ^ !right_int.is_zero(), - }) - } else { - Some(ConstantData::Integer { - value: left_int ^ right_int, - }) - } - } - ast::Operator::MatMult | ast::Operator::LShift | ast::Operator::RShift => None, + _ => None, } } @@ -12578,6 +13283,152 @@ impl Compiler { })) } + fn try_compile_ast_constant( + &mut self, + expr: &ast::Expr, + ) -> CompileResult> { + Ok(Some(match expr { + ast::Expr::NumberLiteral(num) => match &num.value { + ast::Number::Int(int) => ConstantData::Integer { + value: ruff_int_to_bigint(int).map_err(|e| self.error(e))?, + }, + ast::Number::Float(value) => ConstantData::Float { value: *value }, + ast::Number::Complex { real, imag } => ConstantData::Complex { + value: Complex::new(*real, *imag), + }, + }, + ast::Expr::StringLiteral(s) => ConstantData::Str { + value: self.compile_string_value(s), + }, + ast::Expr::BytesLiteral(b) => ConstantData::Bytes { + value: b.value.bytes().collect(), + }, + ast::Expr::BooleanLiteral(b) => ConstantData::Boolean { value: b.value }, + ast::Expr::NoneLiteral(_) => ConstantData::None, + ast::Expr::EllipsisLiteral(_) => ConstantData::Ellipsis, + _ => return Ok(None), + })) + } + + fn try_negate_match_pattern_constant(constant: ConstantData) -> Option { + match constant { + ConstantData::Integer { value } => Some(ConstantData::Integer { value: -value }), + ConstantData::Float { value } => Some(ConstantData::Float { value: -value }), + ConstantData::Complex { value } => Some(ConstantData::Complex { value: -value }), + ConstantData::Boolean { value } => Some(ConstantData::Integer { + value: -BigInt::from(u8::from(value)), + }), + _ => None, + } + } + + fn constant_as_match_pattern_complex(constant: &ConstantData) -> Option> { + match constant { + ConstantData::Integer { value } => Some(Complex::new(value.to_f64()?, 0.0)), + ConstantData::Float { value } => Some(Complex::new(*value, 0.0)), + ConstantData::Complex { value } => Some(*value), + ConstantData::Boolean { value } => Some(Complex::new(f64::from(u8::from(*value)), 0.0)), + _ => None, + } + } + + fn try_fold_match_pattern_binop( + op: ast::Operator, + left: &ConstantData, + right: &ConstantData, + ) -> Option { + if let (ConstantData::Integer { value: left }, ConstantData::Integer { value: right }) = + (left, right) + { + return match op { + ast::Operator::Add => Some(ConstantData::Integer { + value: left + right, + }), + ast::Operator::Sub => Some(ConstantData::Integer { + value: left - right, + }), + _ => None, + }; + } + + let left_is_complex = matches!(left, ConstantData::Complex { .. }); + let right_is_complex = matches!(right, ConstantData::Complex { .. }); + if left_is_complex || right_is_complex { + let left = Self::constant_as_match_pattern_complex(left)?; + let right = Self::constant_as_match_pattern_complex(right)?; + let value = match op { + ast::Operator::Add => Complex::new(left.re + right.re, left.im + right.im), + ast::Operator::Sub => { + let imag = if !left_is_complex && right_is_complex { + -right.im + } else { + left.im - right.im + }; + Complex::new(left.re - right.re, imag) + } + _ => return None, + }; + return Some(ConstantData::Complex { value }); + } + + let left = Self::constant_as_match_pattern_complex(left)?; + let right = Self::constant_as_match_pattern_complex(right)?; + match op { + ast::Operator::Add => Some(ConstantData::Float { + value: left.re + right.re, + }), + ast::Operator::Sub => Some(ConstantData::Float { + value: left.re - right.re, + }), + _ => None, + } + } + + fn try_fold_match_pattern_const_expr( + &mut self, + expr: &ast::Expr, + ) -> CompileResult> { + // CPython 3.14 ast_preprocess.c::fold_const_match_patterns() + // folds only the constant forms needed by match patterns before + // codegen_pattern_value()/codegen_pattern_mapping_key() visit them. + Ok(match expr { + ast::Expr::UnaryOp(ast::ExprUnaryOp { + op: ast::UnaryOp::USub, + operand, + .. + }) => { + let Some(constant) = self.try_compile_ast_constant(operand)? else { + return Ok(None); + }; + Self::try_negate_match_pattern_constant(constant) + } + ast::Expr::BinOp(ast::ExprBinOp { + left, op, right, .. + }) if matches!(op, ast::Operator::Add | ast::Operator::Sub) => { + let Some(left) = (match self.try_fold_match_pattern_const_expr(left)? { + Some(constant) => Some(constant), + None => self.try_compile_ast_constant(left)?, + }) else { + return Ok(None); + }; + let Some(right) = self.try_compile_ast_constant(right)? else { + return Ok(None); + }; + Self::try_fold_match_pattern_binop(*op, &left, &right) + } + _ => None, + }) + } + + fn compile_match_pattern_expr(&mut self, expr: &ast::Expr) -> CompileResult<()> { + if let Some(constant) = self.try_fold_match_pattern_const_expr(expr)? { + self.emit_load_const(constant); + } else { + self.compile_expression(expr)?; + } + Ok(()) + } + fn emit_load_const(&mut self, constant: ConstantData) { let idx = self.arg_constant(constant); self.emit_arg(idx, |consti| Instruction::LoadConst { consti }) @@ -13111,6 +13962,65 @@ impl Compiler { self.current_source_range = range; } + fn decorated_definition_range( + &self, + statement_range: TextRange, + decorator_list: &[ast::Decorator], + keyword: &str, + ) -> TextRange { + let Some(last_decorator) = decorator_list.last() else { + return statement_range; + }; + let search_start = last_decorator.expression.range().end(); + if search_start >= statement_range.end() { + return statement_range; + } + let search_range = TextRange::new(search_start, statement_range.end()); + let source = self.source_file.slice(search_range); + let Some(keyword_offset) = source.find(keyword) else { + return statement_range; + }; + let Ok(keyword_offset) = u32::try_from(keyword_offset) else { + return statement_range; + }; + TextRange::new( + search_start + TextSize::new(keyword_offset), + statement_range.end(), + ) + } + + fn update_start_location_to_match_attr( + &self, + loc_range: TextRange, + attr_range: TextRange, + attr: &str, + ) -> TextRange { + let source = self.source_file.to_source_code(); + if source.line_index(loc_range.start()) == source.line_index(attr_range.end()) { + return loc_range; + } + let Ok(attr_len) = u32::try_from(attr.len()) else { + return TextRange::new(loc_range.start(), loc_range.end()); + }; + let attr_len = TextSize::new(attr_len); + if attr_len > attr_range.len() { + return TextRange::new(loc_range.start(), loc_range.end()); + } + TextRange::new(attr_range.end() - attr_len, loc_range.end()) + } + + fn source_line_start_range(&self, lineno: u32) -> TextRange { + let source = self.source_file.to_source_code(); + let line = OneIndexed::new(lineno as usize).unwrap_or(OneIndexed::MIN); + let start = source.line_start(line); + TextRange::new(start, start) + } + + fn module_start_location(&self, body: &[ast::Stmt]) -> TextRange { + body.first() + .map_or_else(|| self.source_line_start_range(1), Ranged::range) + } + fn get_source_line_number(&mut self) -> OneIndexed { self.source_file .to_source_code() @@ -13118,7 +14028,14 @@ impl Compiler { } fn mark_generator(&mut self) { - self.current_code_info().flags |= bytecode::CodeFlags::GENERATOR + let is_async = self.ctx.func == FunctionContext::AsyncFunction; + let flags = &mut self.current_code_info().flags; + if is_async { + flags.remove(bytecode::CodeFlags::COROUTINE); + flags.insert(bytecode::CodeFlags::ASYNC_GENERATOR); + } else { + flags.insert(bytecode::CodeFlags::GENERATOR); + } } /// Whether the expression contains an await expression and @@ -13190,18 +14107,25 @@ impl Compiler { let mut element_count = 0; let mut pending_literal = None; + let mut pending_literal_range = None; let mut pending_literal_no_location = false; for part in fstring { self.compile_fstring_part_into( part, &mut pending_literal, + &mut pending_literal_range, &mut pending_literal_no_location, &mut element_count, false, )?; } - self.set_source_range(fstring_range); - self.finish_fstring(pending_literal, pending_literal_no_location, element_count); + self.finish_fstring( + pending_literal, + pending_literal_range, + pending_literal_no_location, + element_count, + Some(fstring_range), + ); Ok(()) } @@ -13215,17 +14139,24 @@ impl Compiler { let mut element_count = 0; let mut pending_literal = None; + let mut pending_literal_range = None; let mut pending_literal_no_location = false; for part in fstring { self.compile_fstring_part_into( part, &mut pending_literal, + &mut pending_literal_range, &mut pending_literal_no_location, &mut element_count, true, )?; } - self.finish_fstring_join(pending_literal, pending_literal_no_location, element_count); + self.finish_fstring_join( + pending_literal, + pending_literal_range, + pending_literal_no_location, + element_count, + ); Ok(()) } @@ -13233,6 +14164,7 @@ impl Compiler { &mut self, part: &ast::FStringPart, pending_literal: &mut Option, + pending_literal_range: &mut Option, pending_literal_no_location: &mut bool, element_count: &mut u32, append_to_join_list: bool, @@ -13241,10 +14173,11 @@ impl Compiler { ast::FStringPart::Literal(string) => { let value = self.compile_fstring_part_literal_value(string); if pending_literal.is_none() { - self.set_source_range(string.range); + *pending_literal_range = Some(string.range); *pending_literal_no_location = string.range == TextRange::default(); *pending_literal = Some(value); } else if let Some(pending) = pending_literal.as_mut() { + Self::extend_pending_literal_range(pending_literal_range, string.range); *pending_literal_no_location &= string.range == TextRange::default(); pending.push_wtf8(value.as_ref()); } @@ -13254,7 +14187,7 @@ impl Compiler { fstring.flags, &fstring.elements, pending_literal, - pending_literal_no_location, + (pending_literal_range, pending_literal_no_location), element_count, append_to_join_list, ), @@ -13264,12 +14197,15 @@ impl Compiler { fn finish_fstring( &mut self, mut pending_literal: Option, + mut pending_literal_range: Option, mut pending_literal_no_location: bool, mut element_count: u32, + fstring_range: Option, ) { let keep_empty = element_count == 0; self.emit_pending_fstring_literal( &mut pending_literal, + &mut pending_literal_range, &mut pending_literal_no_location, &mut element_count, keep_empty, @@ -13277,10 +14213,16 @@ impl Compiler { ); if element_count == 0 { + if let Some(fstring_range) = fstring_range { + self.set_source_range(fstring_range); + } self.emit_load_const(ConstantData::Str { value: Wtf8Buf::new(), }); } else if element_count > 1 { + if let Some(fstring_range) = fstring_range { + self.set_source_range(fstring_range); + } emit!( self, Instruction::BuildString { @@ -13293,12 +14235,14 @@ impl Compiler { fn finish_fstring_join( &mut self, mut pending_literal: Option, + mut pending_literal_range: Option, mut pending_literal_no_location: bool, mut element_count: u32, ) { let keep_empty = element_count == 0; self.emit_pending_fstring_literal( &mut pending_literal, + &mut pending_literal_range, &mut pending_literal_no_location, &mut element_count, keep_empty, @@ -13310,6 +14254,7 @@ impl Compiler { fn emit_pending_fstring_literal( &mut self, pending_literal: &mut Option, + pending_literal_range: &mut Option, pending_literal_no_location: &mut bool, element_count: &mut u32, keep_empty: bool, @@ -13318,6 +14263,7 @@ impl Compiler { let Some(value) = pending_literal.take() else { return; }; + let range = pending_literal_range.take(); let no_location = *pending_literal_no_location; *pending_literal_no_location = false; @@ -13328,6 +14274,9 @@ impl Compiler { return; } + if let Some(range) = range { + self.set_source_range(range); + } self.emit_load_const(ConstantData::Str { value }); if no_location { self.set_no_location(); @@ -13338,6 +14287,18 @@ impl Compiler { } } + fn extend_pending_literal_range(pending: &mut Option, range: TextRange) { + let Some(existing) = pending else { + *pending = Some(range); + return; + }; + if *existing == TextRange::default() { + *existing = range; + } else if range != TextRange::default() { + *existing = TextRange::new(existing.start(), range.end()); + } + } + fn count_fstring_parts(&self, fstring: &[ast::FStringPart]) -> u32 { let mut element_count = 0; let mut pending_literal = None; @@ -13393,6 +14354,7 @@ impl Compiler { &mut self, flags: ast::FStringFlags, fstring_elements: &ast::InterpolatedStringElements, + fstring_range: Option, ) -> CompileResult<()> { if self.count_fstring_elements(flags, fstring_elements) > STACK_USE_GUIDELINE { return self.compile_fstring_elements_joined(flags, fstring_elements); @@ -13400,16 +14362,23 @@ impl Compiler { let mut element_count = 0; let mut pending_literal: Option = None; + let mut pending_literal_range: Option = None; let mut pending_literal_no_location = false; self.compile_fstring_elements_into( flags, fstring_elements, &mut pending_literal, - &mut pending_literal_no_location, + (&mut pending_literal_range, &mut pending_literal_no_location), &mut element_count, false, )?; - self.finish_fstring(pending_literal, pending_literal_no_location, element_count); + self.finish_fstring( + pending_literal, + pending_literal_range, + pending_literal_no_location, + element_count, + fstring_range, + ); Ok(()) } @@ -13427,37 +14396,58 @@ impl Compiler { let mut element_count = 0; let mut pending_literal: Option = None; + let mut pending_literal_range: Option = None; let mut pending_literal_no_location = false; self.compile_fstring_elements_into( flags, fstring_elements, &mut pending_literal, - &mut pending_literal_no_location, + (&mut pending_literal_range, &mut pending_literal_no_location), &mut element_count, true, )?; - self.finish_fstring_join(pending_literal, pending_literal_no_location, element_count); + self.finish_fstring_join( + pending_literal, + pending_literal_range, + pending_literal_no_location, + element_count, + ); Ok(()) } + fn cpython_format_spec_range(&self, range: TextRange) -> TextRange { + let start = range.start().to_usize(); + if start == 0 { + return range; + } + let source = self.source_file.source_text().as_bytes(); + if source.get(start - 1) == Some(&b':') { + TextRange::new(range.start() - TextSize::new(1), range.end()) + } else { + range + } + } + fn compile_fstring_elements_into( &mut self, flags: ast::FStringFlags, fstring_elements: &ast::InterpolatedStringElements, pending_literal: &mut Option, - pending_literal_no_location: &mut bool, + pending_literal_meta: (&mut Option, &mut bool), element_count: &mut u32, append_to_join_list: bool, ) -> CompileResult<()> { + let (pending_literal_range, pending_literal_no_location) = pending_literal_meta; for element in fstring_elements { match element { ast::InterpolatedStringElement::Literal(string) => { let value = self.compile_fstring_literal_value(string, flags); if pending_literal.is_none() { - self.set_source_range(string.range); + *pending_literal_range = Some(string.range); *pending_literal_no_location = string.range == TextRange::default(); *pending_literal = Some(value); } else if let Some(pending) = pending_literal.as_mut() { + Self::extend_pending_literal_range(pending_literal_range, string.range); *pending_literal_no_location &= string.range == TextRange::default(); pending.push_wtf8(value.as_ref()); } @@ -13472,18 +14462,33 @@ impl Compiler { if let Some(ast::DebugText { leading, trailing }) = &fstring_expr.debug_text { let range = fstring_expr.expression.range(); + let leading = strip_fstring_debug_comments(leading); + let trailing = strip_fstring_debug_comments(trailing); let source = self.source_file.slice(range); - let text = [ - strip_fstring_debug_comments(leading).as_str(), - source, - strip_fstring_debug_comments(trailing).as_str(), - ] - .concat(); + let text = [leading.as_str(), source, trailing.as_str()].concat(); + let debug_text_range = TextRange::new( + range.start() + - TextSize::new( + u32::try_from(leading.len()) + .expect("debug f-string leading text too long"), + ), + range.end() + + TextSize::new( + u32::try_from(trailing.len()) + .expect("debug f-string trailing text too long"), + ), + ); let text: Wtf8Buf = text.into(); if pending_literal.is_none() { + *pending_literal_range = Some(debug_text_range); *pending_literal_no_location = false; *pending_literal = Some(Wtf8Buf::new()); + } else { + Self::extend_pending_literal_range( + pending_literal_range, + debug_text_range, + ); } pending_literal.as_mut().unwrap().push_wtf8(text.as_ref()); @@ -13499,6 +14504,7 @@ impl Compiler { self.emit_pending_fstring_literal( pending_literal, + pending_literal_range, pending_literal_no_location, element_count, false, @@ -13507,22 +14513,32 @@ impl Compiler { self.compile_expression(&fstring_expr.expression)?; + let formatted_value_range = fstring_expr.range; match conversion { ConvertValueOparg::None => {} ConvertValueOparg::Str | ConvertValueOparg::Repr | ConvertValueOparg::Ascii => { + self.set_source_range(formatted_value_range); emit!(self, Instruction::ConvertValue { oparg: conversion }) } } match &fstring_expr.format_spec { Some(format_spec) => { - self.compile_fstring_elements(flags, &format_spec.elements)?; - + let format_spec_range = + self.cpython_format_spec_range(format_spec.range); + self.compile_fstring_elements( + flags, + &format_spec.elements, + Some(format_spec_range), + )?; + + self.set_source_range(formatted_value_range); emit!(self, Instruction::FormatWithSpec); } None => { + self.set_source_range(formatted_value_range); emit!(self, Instruction::FormatSimple); } } @@ -13714,7 +14730,11 @@ impl Compiler { let has_format_spec = interp.format_spec.is_some(); if let Some(format_spec) = &interp.format_spec { - self.compile_fstring_elements(ast::FStringFlags::empty(), &format_spec.elements)?; + self.compile_fstring_elements( + ast::FStringFlags::empty(), + &format_spec.elements, + Some(format_spec.range), + )?; } // CPython keeps bit 1 set in BUILD_INTERPOLATION's oparg and uses @@ -13820,17 +14840,20 @@ fn expandtabs(input: &str, tab_size: usize) -> String { expanded_str } -fn split_doc<'a>(body: &'a [ast::Stmt], opts: &CompileOpts) -> (Option, &'a [ast::Stmt]) { +fn split_doc_with_range<'a>( + body: &'a [ast::Stmt], + opts: &CompileOpts, +) -> (Option<(String, TextRange)>, &'a [ast::Stmt]) { if let Some((ast::Stmt::Expr(expr), body_rest)) = body.split_first() { let doc_comment = match &*expr.value { - ast::Expr::StringLiteral(value) => Some(&value.value), + ast::Expr::StringLiteral(value) => Some((&value.value, expr.value.range())), // f-strings are not allowed in Python doc comments. ast::Expr::FString(_) => None, _ => None, }; - if let Some(doc) = doc_comment { + if let Some((doc, range)) = doc_comment { return if opts.optimize < 2 { - (Some(clean_doc(doc.to_str())), body_rest) + (Some((clean_doc(doc.to_str()), range)), body_rest) } else { (None, body_rest) }; @@ -13839,6 +14862,12 @@ fn split_doc<'a>(body: &'a [ast::Stmt], opts: &CompileOpts) -> (Option, (None, body) } +#[cfg(test)] +fn split_doc<'a>(body: &'a [ast::Stmt], opts: &CompileOpts) -> (Option, &'a [ast::Stmt]) { + let (doc, body) = split_doc_with_range(body, opts); + (doc.map(|(doc, _)| doc), body) +} + pub fn ruff_int_to_bigint(int: &ast::Int) -> Result { if let Some(small) = int.as_u64() { Ok(BigInt::from(small)) @@ -14113,6 +15142,46 @@ mod tests { compiler.exit_scope() } + #[test] + fn test_empty_module_implicit_return_inherits_resume_location_like_cpython() { + let code = compile_exec(""); + // CPython 3.14 codegen emits the implicit LOAD_CONST/RETURN_VALUE with + // NO_LOCATION, then flowgraph.c::propagate_line_numbers() propagates + // the module RESUME location, whose line is 0. + assert_eq!(code.linetable.as_ref(), &[0xf2, 0x03, 0x01, 0x01, 0x01]); + } + + #[test] + fn test_redundant_nop_location_copies_full_location_like_cpython() { + let code = compile_exec( + "\ +def f(x, y, z): + while x: + if y: + pass + elif z: + if y < 0: + return y + if z: + y = y + 1 + elif y: + return 1 + return -1 +", + ); + let f = find_code(&code, "f").expect("missing function code"); + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xdf, 0x0a, 0x0b, 0xdf, 0x0b, 0x0c, 0xd9, 0x0c, 0x10, 0xdf, 0x0d, 0x0e, + 0xd8, 0x0f, 0x10, 0x90, 0x31, 0x8c, 0x75, 0xd8, 0x17, 0x18, 0x90, 0x08, 0xdf, 0x0f, + 0x10, 0xd8, 0x14, 0x15, 0x98, 0x01, 0x95, 0x45, 0x92, 0x01, 0xf1, 0x03, 0x00, 0x10, + 0x11, 0xe7, 0x0d, 0x0e, 0x89, 0x51, 0xd9, 0x13, 0x14, 0xd8, 0x0b, 0x0d, 0x80, 0x49, + ], + "CPython basicblock_remove_redundant_nops() copies the full NOP location into a following no-location jump" + ); + } + fn scan_program_symbol_table(source: &str) -> SymbolTable { let source_file = SourceFileBuilder::new("source_path", source).finish(); let parsed = ruff_python_parser::parse( @@ -14224,6 +15293,7 @@ mod tests { } ); compiler.set_no_location(); + compiler.move_last_instruction_before_scope_start_resume(); compiler .push_fblock(FBlockType::StopIteration, handler_block, handler_block) .unwrap(); @@ -16056,542 +17126,2118 @@ def f(buffer, pos, last_char): }) } - fn non_cache_instructions(code: &CodeObject) -> impl Iterator { - code.instructions - .iter() - .filter(|unit| !matches!(unit.op, Instruction::Cache)) - } - - fn varname_index(code: &CodeObject, name: &str) -> usize { - code.varnames - .iter() - .position(|varname| varname.as_str() == name) - .unwrap_or_else(|| panic!("missing {name} local")) + fn find_direct_child_code<'a>(code: &'a CodeObject, name: &str) -> Option<&'a CodeObject> { + code.constants.iter().find_map(|constant| { + if let ConstantData::Code { code } = constant { + (code.obj_name == name).then_some(code.as_ref()) + } else { + None + } + }) } - fn load_fast_ops_for_var(code: &CodeObject, name: &str) -> Vec { - let var_idx = varname_index(code, name); - non_cache_instructions(code) - .filter_map(|unit| match unit.op { - Instruction::LoadFast { var_num } | Instruction::LoadFastBorrow { var_num } => { - let var_num = var_num.get(OpArg::new(u32::from(u8::from(unit.arg)))); - (usize::from(var_num) == var_idx).then_some(unit.op) - } - _ => None, - }) - .collect() + #[test] + fn test_annotated_multiline_function_body_keeps_def_firstlineno_like_cpython() { + let code = compile_exec( + r#" +a = 1 +def f( + x: a, +): ... +"#, + ); + let f = find_code(&code, "f").expect("missing f code"); + // CPython 3.14 codegen_function() computes firstlineno from the + // FunctionDef before compiling annotations, then passes it to + // codegen_function_body(). + assert_eq!(f.linetable.as_ref(), &[0x80, 0x00, 0xe1, 0x03, 0x06]); } - fn load_fast_pair_ops_for_vars( - code: &CodeObject, - left_name: &str, - right_name: &str, - ) -> Vec { - let left_idx = varname_index(code, left_name); - let right_idx = varname_index(code, right_name); - non_cache_instructions(code) - .filter_map(|unit| { - let var_nums = match unit.op { - Instruction::LoadFastLoadFast { var_nums } - | Instruction::LoadFastBorrowLoadFastBorrow { var_nums } => var_nums, - _ => return None, - }; - let (left, right) = var_nums - .get(OpArg::new(u32::from(u8::from(unit.arg)))) - .indexes(); - (usize::from(left) == left_idx && usize::from(right) == right_idx) - .then_some(unit.op) - }) - .collect() + #[test] + fn test_annotation_scope_return_uses_function_location_like_cpython() { + let code = compile_exec( + r#" +def g(): + def f(x: not (int is int), /): ... +"#, + ); + let g = find_code(&code, "g").expect("missing g code"); + let annotate = find_code(g, "__annotate__").expect("missing annotation code"); + // CPython 3.14 codegen_function_annotations() receives LOC(function) + // and uses it for the annotation closure's BUILD_MAP/RETURN_VALUE and + // for the parent MAKE_FUNCTION annotate sequence. + assert_eq!(g.linetable.as_ref(), &[0x80, 0x00, 0xdf, 0x04, 0x26]); + assert_eq!( + annotate.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd7, 0x04, 0x26, 0xd1, 0x04, 0x26, 0x94, 0x23, 0x9c, 0x13, 0xd0, 0x0d, + 0x1d, 0xd1, 0x04, 0x26, + ], + ); } - fn count_strong_loads_for_vars(code: &CodeObject, names: &[&str]) -> usize { - let var_indices = names + #[test] + fn test_module_deferred_annotations_use_start_location_like_cpython() { + let code = compile_exec( + "\ +import os +X: int +Y: str +", + ); + let annotate = find_code(&code, "__annotate__").expect("missing __annotate__ code"); + + // CPython 3.14 compile.c::start_location() passes the first module + // statement location into _PyCodegen_Module(), and + // codegen_process_deferred_annotations() uses that loc for annotation + // scope setup, BUILD_MAP, STORE_SUBSCR, and RETURN_VALUE. + assert_eq!( + annotate.linetable.as_ref(), + &[ + 0x80, 0x00, 0x87, 0x09, 0x81, 0x09, 0xdf, 0x00, 0x06, 0x82, 0x06, 0x84, 0x33, 0x81, + 0x06, 0xf1, 0x03, 0x00, 0x01, 0x0a, 0xe7, 0x00, 0x06, 0x82, 0x06, 0x84, 0x33, 0x81, + 0x06, 0xf2, 0x05, 0x00, 0x01, 0x0a, + ] + ); + } + + #[test] + fn test_super_method_call_kw_names_use_attribute_location_like_cpython() { + let code = compile_exec( + "\ +class C: + def f(self, x, y): + super().__init__( + x=x, + y=y) +", + ); + let f = find_code(&code, "f").expect("missing f code"); + let call_kw_index = f + .instructions .iter() - .map(|name| varname_index(code, name)) - .collect::>(); - non_cache_instructions(code) - .filter(|unit| match unit.op { - Instruction::LoadFast { var_num } => { - let var_num = var_num.get(OpArg::new(u32::from(u8::from(unit.arg)))); - var_indices.contains(&usize::from(var_num)) - } - _ => false, - }) - .count() + .position(|unit| matches!(unit.op, Instruction::CallKw { .. })) + .expect("missing CALL_KW"); + let (kw_names, (location, end_location)) = f + .instructions + .iter() + .zip(&f.locations) + .take(call_kw_index) + .rev() + .find(|(unit, _)| matches!(unit.op, Instruction::LoadConst { .. })) + .expect("missing CALL_KW names tuple"); + + assert!( + matches!(kw_names.op, Instruction::LoadConst { .. }), + "expected keyword names tuple before CALL_KW" + ); + assert_eq!( + (location.line.get(), end_location.line.get()), + (3, 3), + "CPython maybe_optimize_method_call() passes the updated method-attribute loc into codegen_call_simple_kw_helper()" + ); } - fn count_strong_loads(code: &CodeObject) -> usize { - non_cache_instructions(code) - .filter(|unit| matches!(unit.op, Instruction::LoadFast { .. })) - .count() + #[test] + fn test_lambda_return_uses_body_location_like_cpython() { + let code = compile_exec( + "\ +def outer(): + return lambda x: x if x else 1 +", + ); + let lambda = find_code(&code, "").expect("missing lambda code"); + let return_positions: Vec<_> = lambda + .instructions + .iter() + .zip(&lambda.locations) + .filter_map(|(unit, (location, end_location))| { + matches!(unit.op, Instruction::ReturnValue).then_some(( + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), + )) + }) + .collect(); + + assert_eq!( + return_positions, + vec![(2, 22, 2, 35), (2, 22, 2, 35)], + "CPython codegen_lambda() emits RETURN_VALUE at LOC(lambda body)" + ); } #[test] - fn test_match_or_default_block_keeps_load_fast_strong() { + fn test_not_compare_uses_unary_location_like_cpython() { let code = compile_exec( - r#" -def f(format, other): - match format: - case 1 | 2: - return other - case _: - raise NotImplementedError(other) -"#, + "\ +def f(self, other): + return not self == other +", ); - let function = find_code(&code, "f").expect("missing function code"); - let loads = load_fast_ops_for_var(function, "other"); - assert!( - matches!( - loads.as_slice(), - [ - Instruction::LoadFastBorrow { .. }, - Instruction::LoadFastBorrow { .. }, - Instruction::LoadFast { .. }, - ] - ), - "CPython optimize_load_fast() keeps trailing OR-pattern default loads strong, got {loads:?}", + let f = find_code(&code, "f").expect("missing f code"); + + // CPython 3.14 parses the Compare inside UnaryOp(Not) with the + // UnaryOp start location, so codegen_compare() emits COMPARE_OP at + // the full "not self == other" range before flowgraph folds TO_BOOL. + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd8, 0x0f, 0x13, 0xd2, 0x0b, 0x1c, 0xd0, 0x04, 0x1c, + ] ); } #[test] - fn test_match_nested_or_default_block_keeps_load_fast_strong() { + fn test_not_chained_compare_keeps_compare_location_like_cpython() { let code = compile_exec( - r#" -def f(format, other): - match format: - case [1 | 2, value]: - return other - case _: - raise NotImplementedError(other) -"#, + "\ +def f(c): + return not (b\" \" <= c <= b\"~\") +", ); - let function = find_code(&code, "f").expect("missing function code"); - let loads = load_fast_ops_for_var(function, "other"); - assert!( - loads - .iter() - .any(|op| matches!(op, Instruction::LoadFast { .. })), - "CPython optimize_load_fast() keeps trailing nested OR-pattern default loads strong, got {loads:?}", + let f = find_code(&code, "f").expect("missing f code"); + + // CPython's single Compare under UnaryOp(Not) includes "not" in the + // Compare range, but chained comparisons keep their inner range for + // compare scaffolding and only use the UnaryOp range for TO_BOOL and + // UNARY_NOT. + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd8, 0x10, 0x14, 0x98, 0x01, 0xd7, 0x10, 0x21, 0xd4, 0x10, 0x21, 0x98, + 0x54, 0xd1, 0x10, 0x21, 0xd4, 0x0b, 0x22, 0xd0, 0x04, 0x22, 0xd1, 0x10, 0x21, 0xd4, + 0x0b, 0x22, 0xd0, 0x04, 0x22, + ] ); } #[test] - fn test_match_success_next_location_preserves_pass_nop() { + fn test_type_param_scopes_use_cpython_locations() { + let code = compile_exec("type BoundGenericAlias[X: int] = set[X]\n"); + let type_params = find_code(&code, "") + .expect("missing generic parameters code"); + let bound = find_direct_child_code(type_params, "X").expect("missing X bound code"); + let alias = + find_direct_child_code(type_params, "BoundGenericAlias").expect("missing alias code"); + + // CPython 3.14 codegen_type_params() emits type-parameter ops at + // LOC(typeparam), bound/default evaluator ops at LOC(e), and type alias + // body plumbing at LOC(s). + assert_eq!( + type_params.linetable.as_ref(), + &[ + 0xf8, 0x80, 0x00, 0xd0, 0x00, 0x27, 0x90, 0x76, 0x9b, 0x23, 0x93, 0x76, 0xd7, 0x00, + 0x27, 0xd1, 0x00, 0x27, + ], + ); + assert_eq!( + bound.linetable.as_ref(), + &[0x80, 0x00, 0x9f, 0x23, 0x9e, 0x23] + ); + assert_eq!( + alias.linetable.as_ref(), + &[ + 0xf8, 0x80, 0x00, 0xd7, 0x00, 0x27, 0xd0, 0x00, 0x27, 0xa4, 0x13, 0xa0, 0x51, 0xa5, + 0x16, 0xd0, 0x00, 0x27, + ], + ); + } + + #[test] + fn test_generic_function_annotation_scope_uses_function_location_like_cpython() { + let code = compile_exec("def f[T](x: int): ...\n"); + let type_params = + find_code(&code, "").expect("missing type params code"); + let annotate = + find_direct_child_code(type_params, "__annotate__").expect("missing annotation code"); + + // CPython 3.14 passes LOC(function) into codegen_function_annotations(), + // even when the annotation closure is emitted inside the generic + // parameters scope after codegen_type_params(). + assert_eq!( + annotate.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd7, 0x00, 0x15, 0xd1, 0x00, 0x15, 0x8c, 0x43, 0xd1, 0x00, 0x15, + ], + ); + } + + #[test] + fn test_generic_class_type_params_store_uses_class_location_like_cpython() { let code = compile_exec( - r#" -def f(command): - match command: - case "": - pass - case _ as unknown: - sink(unknown) - return False -"#, + "\ +def outer(): + class X[T]: ... +", ); - let function = find_code(&code, "f").expect("missing function code"); - let ops = non_cache_instructions(function) - .map(|unit| unit.op) - .collect::>(); - assert!( - ops.windows(3).any(|window| matches!( - window, - [ - Instruction::PopTop, - Instruction::Nop, - Instruction::LoadConst { .. }, - ] - )), - "CPython NEXT_LOCATION keeps the pass NOP after match subject POP_TOP, got {ops:?}", + let type_params = + find_code(&code, "").expect("missing type params code"); + + // CPython 3.14 codegen_class() calls codegen_type_params(), then stores + // the resulting .type_params cell with codegen_nameop(c, LOC(class), ...). + assert_eq!( + type_params.linetable.as_ref(), + &[ + 0xf8, 0x80, 0x00, 0x8c, 0x41, 0x87, 0x4f, 0x87, 0x4f, 0x80, 0x4f, + ] ); } #[test] - fn test_while_try_body_layout_keeps_false_jump_to_anchor() { + fn test_generic_class_wrapper_ops_use_class_location_like_cpython() { let code = compile_exec( - r#" -def f(stack, itstack, node_to_stack_index): - while True: - while stack: - try: - node = itstack[-1]() - break - except StopIteration: - del node_to_stack_index[stack.pop()] - itstack.pop() - else: - break -"#, + "\ +def f(): + class X[T](tuple): + pass +", ); - let function = find_code(&code, "f").expect("missing function code"); - let ops = non_cache_instructions(function) - .map(|unit| unit.op) - .collect::>(); - let stack_test = ops - .windows(5) - .find(|window| { - matches!( - window, - [ - Instruction::LoadFastBorrow { .. } | Instruction::LoadFast { .. }, - Instruction::ToBool, - Instruction::PopJumpIfFalse { .. }, - Instruction::NotTaken, - Instruction::Nop, - ] + let f = find_code(&code, "f").expect("missing function code"); + let wrapper_positions: Vec<_> = f + .instructions + .iter() + .filter(|unit| !matches!(unit.op, Instruction::Resume { .. })) + .take(4) + .zip(f.locations.iter().filter(|_| true).skip(1)) + .map(|(unit, (location, end_location))| { + ( + unit.op, + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), ) }) - .unwrap_or_else(|| { - panic!("expected CPython-style while/try false jump to anchor, got {ops:?}") - }); - assert!(matches!(stack_test[2], Instruction::PopJumpIfFalse { .. })); + .collect(); + assert_eq!( + wrapper_positions + .iter() + .map(|(_, line, col, end_line, end_col)| (*line, *col, *end_line, *end_col)) + .collect::>(), + vec![(2, 5, 3, 13); 4], + "CPython codegen_class() emits type-params wrapper closure, PUSH_NULL, and CALL at LOC(class)" + ); + + let type_params = + find_code(f, "").expect("missing generic parameters code"); + let generic_base_position = type_params + .instructions + .iter() + .zip(&type_params.locations) + .find_map(|(unit, (location, end_location))| { + let Instruction::LoadFastBorrow { var_num } = unit.op else { + return None; + }; + let idx = var_num.get(OpArg::new(u32::from(u8::from(unit.arg)))); + let localsplus = type_params + .varnames + .iter() + .chain(type_params.cellvars.iter()) + .chain(type_params.freevars.iter()) + .collect::>(); + localsplus + .get(usize::from(idx)) + .is_some_and(|name| name.as_str() == ".generic_base") + .then_some(( + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), + )) + }) + .expect("missing .generic_base load"); + assert_eq!( + generic_base_position, + (2, 5, 3, 13), + "CPython codegen_class() injects .generic_base with LOC(class)" + ); } #[test] - fn test_while_if_not_break_keeps_body_call() { + fn test_class_deferred_annotations_use_class_body_location_like_cpython() { let code = compile_exec( r#" -def f(waiters): - while waiters: - waiter = waiters.popleft() - if not waiter.done(): - waiter.set_result(None) - break +class C: + "doc" + x: int "#, ); - let function = find_code(&code, "f").expect("missing function code"); - let ops = non_cache_instructions(function) - .map(|unit| unit.op) - .collect::>(); - assert!( - ops.windows(4).any(|window| matches!( - window, - [ - Instruction::LoadFastBorrow { .. } | Instruction::LoadFast { .. }, - Instruction::LoadAttr { .. }, - Instruction::LoadConst { .. }, - Instruction::Call { .. }, - ] - )), - "CPython keeps waiter.set_result(None) before the break, got {ops:?}", + let class_code = find_code(&code, "C").expect("missing class code"); + + // CPython 3.14 calls codegen_body(c, loc, ...) from codegen_class_body() + // with LOCATION(firstlineno, firstlineno, 0, 0). Deferred annotation + // closure setup and following artificial class tail inherit that class + // body location, not the annotation expression location. + assert_eq!( + class_code.linetable.as_ref(), + &[ + 0xf8, 0x87, 0x00, 0x80, 0x00, 0xd9, 0x04, 0x09, 0xf7, 0x03, 0x00, 0x01, 0x01, 0x83, + 0x00, + ], + ); + } + + #[test] + fn test_future_annotation_string_uses_annotation_location_like_cpython() { + let code = compile_exec("from __future__ import annotations\nclass Bar:\n foo: Foo\n"); + let class_code = find_code(&code, "Bar").expect("missing class code"); + + // CPython 3.14 codegen_annassign() calls codegen_visit_annexpr(), + // which emits the stringized annotation at LOC(annotation), then emits + // the __annotations__ store sequence at LOC(AnnAssign). + assert_eq!( + class_code.linetable.as_ref(), + &[0x87, 0x00, 0xd8, 0x09, 0x0c, 0x87, 0x48] + ); + } + + #[test] + fn test_lambda_dict_literal_ops_use_dict_location_like_cpython() { + let code = compile_exec( + "\ +f = lambda data: {'x': data} +g = lambda i: {**i} +", + ); + let f = find_code(&code, "").expect("missing f lambda code"); + let g = code + .constants + .iter() + .filter_map(|constant| { + if let ConstantData::Code { code } = constant { + (code.obj_name == "").then_some(code.as_ref()) + } else { + None + } + }) + .nth(1) + .expect("missing g lambda code"); + + // CPython 3.14 codegen_dict()/codegen_subdict() uses LOC(dict) for + // BUILD_MAP, MAP_ADD, and DICT_UPDATE, so the lambda RETURN_VALUE + // inherits the full dict literal location after compiling its body. + assert_eq!( + f.linetable.as_ref(), + &[0x80, 0x00, 0x90, 0x23, 0x90, 0x74, 0x91, 0x1b] + ); + assert_eq!( + g.linetable.as_ref(), + &[0x80, 0x00, 0x88, 0x65, 0x90, 0x11, 0x89, 0x65] + ); + } + + #[test] + fn test_class_function_like_scopes_set_method_flag_like_cpython() { + let code = compile_exec_with_options( + r#" +class C: + def m(self): + pass + + async def am(self): + pass + + f = lambda self: self + y = (i for i in ()) + +def f(): + pass +"#, + CompileOpts::default(), + ); + let class_code = find_code(&code, "C").expect("missing class code"); + let method = find_code(class_code, "m").expect("missing method code"); + let async_method = find_code(class_code, "am").expect("missing async method code"); + let lambda = find_code(class_code, "").expect("missing lambda code"); + let genexpr = find_code(class_code, "").expect("missing genexpr code"); + let module_function = find_code(&code, "f").expect("missing module function code"); + + for code in [method, async_method, lambda, genexpr] { + assert!( + code.flags.contains(bytecode::CodeFlags::METHOD), + "class-scope function-like code should carry CO_METHOD like CPython 3.14, got {:?}", + code.flags + ); + } + assert!( + !module_function.flags.contains(bytecode::CodeFlags::METHOD), + "module-scope function must not carry CO_METHOD" + ); + } + + #[test] + fn test_inlined_comprehension_lambda_in_class_is_not_method_like_cpython() { + let code = compile_exec( + "\ +class C: + def method(self): + super() + return __class__ + items = [(lambda: i) for i in range(5)] +", + ); + let class_code = find_code(&code, "C").expect("missing class code"); + let lambda = find_code(class_code, "").expect("missing lambda code"); + assert!( + lambda.flags.contains(bytecode::CodeFlags::NESTED), + "lambda under inlined class comprehension should stay nested" + ); + assert!( + !lambda.flags.contains(bytecode::CodeFlags::METHOD), + "CPython creates this lambda while the current symtable block is the comprehension, not the class" + ); + } + + #[test] + fn test_genexpr_implicit_iterator_is_not_posonly_like_cpython() { + let code = compile_exec("x = (i for i in ())"); + let genexpr = find_code(&code, "").expect("missing genexpr code"); + + assert_eq!(genexpr.arg_count, 1); + assert_eq!( + genexpr.posonlyarg_count, 0, + "CPython codegen_comprehension() sets u_argcount=1 and leaves u_posonlyargcount=0" + ); + } + + #[test] + fn test_async_generator_uses_cpython_async_generator_flag() { + let code = compile_exec_with_options( + r#" +def g(): + yield 1 + +async def c(): + return 1 + +async def ag(): + yield 1 +"#, + CompileOpts::default(), + ); + let generator = find_code(&code, "g").expect("missing generator code"); + let coroutine = find_code(&code, "c").expect("missing coroutine code"); + let async_generator = find_code(&code, "ag").expect("missing async generator code"); + + assert!(generator.flags.contains(bytecode::CodeFlags::GENERATOR)); + assert!(!generator.flags.contains(bytecode::CodeFlags::COROUTINE)); + assert!( + !generator + .flags + .contains(bytecode::CodeFlags::ASYNC_GENERATOR) + ); + + assert!(coroutine.flags.contains(bytecode::CodeFlags::COROUTINE)); + assert!(!coroutine.flags.contains(bytecode::CodeFlags::GENERATOR)); + assert!( + !coroutine + .flags + .contains(bytecode::CodeFlags::ASYNC_GENERATOR) + ); + + assert!( + async_generator + .flags + .contains(bytecode::CodeFlags::ASYNC_GENERATOR) + ); + assert!( + !async_generator + .flags + .contains(bytecode::CodeFlags::GENERATOR) + ); + assert!( + !async_generator + .flags + .contains(bytecode::CodeFlags::COROUTINE) + ); + } + + #[test] + fn test_is_none_jump_preserves_cpython_const_order() { + let code = compile_exec_with_options( + r#" +def f(self, payload): + "doc" + if self.x is None: + self.x = [payload] + else: + raise TypeError("bad") +"#, + CompileOpts::default(), + ); + let function = find_code(&code, "f").expect("missing function code"); + assert!( + matches!( + function.constants.as_ref(), + [ + ConstantData::Str { value: doc }, + ConstantData::None, + ConstantData::Str { value: message }, + ] if doc.as_ref() == "doc" && message.as_ref() == "bad" + ), + "CPython registers None from the pre-folded `is None` comparison before the else-body string" + ); + } + + #[test] + fn test_stop_iteration_handler_starts_at_scope_start_resume_like_cpython() { + let code = compile_exec_with_options( + r#" +def g(): + yield 1 + +async def c(): + return 1 + +x = (i for i in ()) +"#, + CompileOpts::default(), + ); + + fn assert_stop_iteration_table_starts_at_resume(code: &CodeObject) { + let resume_idx = u32::try_from( + code.instructions + .iter() + .position(|unit| { + matches!( + unit.op, + Instruction::Resume { context } + if matches!( + context + .get(OpArg::new(u32::from(u8::from(unit.arg)))) + .location(), + oparg::ResumeLocation::AtFuncStart + ) + ) + }) + .expect("missing function-start RESUME"), + ) + .unwrap(); + let entries = bytecode::decode_exception_table(&code.exceptiontable); + assert!( + entries.iter().any(|entry| entry.start == resume_idx), + "CPython codegen_wrap_in_stopiteration_handler() inserts SETUP_CLEANUP before RESUME so the StopIteration table starts at RESUME; resume_idx={resume_idx}, entries={entries:?}, instructions={:?}", + code.instructions + ); + } + + assert_stop_iteration_table_starts_at_resume(find_code(&code, "g").expect("missing g")); + assert_stop_iteration_table_starts_at_resume(find_code(&code, "c").expect("missing c")); + assert_stop_iteration_table_starts_at_resume( + find_code(&code, "").expect("missing genexpr"), + ); + } + + #[test] + fn test_inlined_comprehension_cleanup_starts_at_result_build_like_cpython() { + let code = compile_exec_with_options( + r#" +def f(self): + return [k for k, v in self._headers] +"#, + CompileOpts::default(), + ); + let f = find_code(&code, "f").expect("missing f"); + let build_list_idx = u32::try_from( + f.instructions + .iter() + .position(|unit| matches!(unit.op, Instruction::BuildList { .. })) + .expect("missing BUILD_LIST"), + ) + .unwrap(); + let entries = bytecode::decode_exception_table(&f.exceptiontable); + assert!( + entries.iter().any(|entry| { + entry.start == build_list_idx && entry.depth == 3 && !entry.push_lasti + }), + "CPython codegen_push_inlined_comprehension_locals() emits SETUP_FINALLY before BUILD_LIST, so the virtual cleanup table starts at BUILD_LIST with saved locals depth; build_list_idx={build_list_idx}, entries={entries:?}, instructions={:?}", + f.instructions + ); + } + + #[test] + fn test_or_return_not_taken_before_jump_target_splits_exception_table_like_cpython() { + let code = compile_exec_with_options( + r#" +def f(self, maintype): + if maintype != "multipart" or not self.is_multipart(): + return + yield 1 +"#, + CompileOpts::default(), + ); + let f = find_code(&code, "f").expect("missing f"); + let not_taken_before_return = u32::try_from( + f.instructions + .windows(3) + .position(|window| { + matches!( + window, + [ + CodeUnit { + op: Instruction::NotTaken, + .. + }, + CodeUnit { + op: Instruction::LoadConst { .. }, + .. + }, + CodeUnit { + op: Instruction::ReturnValue, + .. + }, + ] + ) + }) + .expect("missing NOT_TAKEN before return"), + ) + .unwrap(); + let return_load = not_taken_before_return + 1; + let entries = bytecode::decode_exception_table(&f.exceptiontable); + + assert!( + entries.iter().all(|entry| { + not_taken_before_return < entry.start || not_taken_before_return >= entry.end + }), + "CPython normalize_jumps() can leave a NOT_TAKEN before a separately labelled jump target outside the generator StopIteration range; entries={entries:?}, instructions={:?}", + f.instructions + ); + assert!( + entries + .iter() + .any(|entry| entry.start <= return_load && return_load < entry.end), + "the return block after that NOT_TAKEN is still protected by the StopIteration handler; entries={entries:?}, instructions={:?}", + f.instructions + ); + } + + #[test] + fn test_loop_break_condition_splits_exception_table_like_cpython() { + let code = compile_exec_with_options( + r#" +def f(start, items): + if start: + for x in items: + if x == start: + break + yield 1 +"#, + CompileOpts::default(), + ); + let f = find_code(&code, "f").expect("missing f"); + let break_jump = u32::try_from( + f.instructions + .windows(3) + .position(|window| { + matches!( + window, + [ + CodeUnit { + op: Instruction::PopJumpIfTrue { .. }, + .. + }, + CodeUnit { + op: Instruction::Cache, + .. + }, + CodeUnit { + op: Instruction::NotTaken, + .. + }, + ] + ) || matches!( + window, + [ + CodeUnit { + op: Instruction::PopJumpIfTrue { .. }, + .. + }, + CodeUnit { + op: Instruction::NotTaken, + .. + }, + CodeUnit { + op: Instruction::JumpBackward { .. }, + .. + }, + ] + ) + }) + .expect("missing loop break conditional jump"), + ) + .unwrap(); + let entries = bytecode::decode_exception_table(&f.exceptiontable); + + assert!( + entries + .iter() + .all(|entry| break_jump < entry.start || break_jump >= entry.end), + "CPython normalize_jumps() leaves the loop-break conditional before the synthetic NOT_TAKEN/JUMP_BACKWARD block outside the StopIteration table; break_jump={break_jump}, entries={entries:?}, instructions={:?}", + f.instructions + ); + } + + #[test] + fn test_nested_ifexp_not_taken_splits_exception_table_like_cpython() { + let code = compile_exec_with_options( + r#" +def f(flag, subparts): + if flag: + candidate = subparts[0] if subparts else None + yield 1 +"#, + CompileOpts::default(), + ); + let f = find_code(&code, "f").expect("missing f"); + let conditional_expr_not_taken = u32::try_from( + f.instructions + .iter() + .enumerate() + .find_map(|(idx, unit)| { + if !matches!(unit.op, Instruction::NotTaken) { + return None; + } + let prev = f.instructions[..idx] + .iter() + .rev() + .find(|unit| !matches!(unit.op, Instruction::Cache))?; + let mut following = f.instructions[idx + 1..] + .iter() + .filter(|unit| !matches!(unit.op, Instruction::Cache)); + let next = following.next()?; + let after_next = following.next()?; + (matches!(prev.op, Instruction::PopJumpIfFalse { .. }) + && matches!(next.op, Instruction::LoadFastBorrow { .. }) + && matches!(after_next.op, Instruction::LoadSmallInt { .. })) + .then_some(idx) + }) + .expect("missing conditional expression NOT_TAKEN"), + ) + .unwrap(); + let body_start = conditional_expr_not_taken + 1; + let entries = bytecode::decode_exception_table(&f.exceptiontable); + + assert!( + entries.iter().all(|entry| { + conditional_expr_not_taken < entry.start || conditional_expr_not_taken >= entry.end + }), + "CPython codegen_ifexp() uses a separate orelse label inside conditional statements, leaving the normalize_jumps NOT_TAKEN outside the StopIteration table; not_taken={conditional_expr_not_taken}, entries={entries:?}, instructions={:?}", + f.instructions + ); + assert!( + entries + .iter() + .any(|entry| entry.start <= body_start && body_start < entry.end), + "the conditional-expression body after that NOT_TAKEN remains protected; body_start={body_start}, entries={entries:?}, instructions={:?}", + f.instructions + ); + } + + #[test] + fn test_bool_not_taken_after_conditional_yield_splits_like_cpython() { + let code = compile_exec_with_options( + r#" +def f(a, b, c): + if a: + yield 1 + if b: + x = 2 + if c: + x = 3 + yield 4 +"#, + CompileOpts::default(), + ); + let f = find_code(&code, "f").expect("missing f"); + let split_not_taken = f + .instructions + .iter() + .enumerate() + .filter_map(|(idx, unit)| { + if !matches!(unit.op, Instruction::NotTaken) { + return None; + } + let prev = f.instructions[..idx] + .iter() + .rev() + .find(|unit| !matches!(unit.op, Instruction::Cache))?; + matches!( + prev.op, + Instruction::PopJumpIfFalse { .. } | Instruction::PopJumpIfTrue { .. } + ) + .then(|| u32::try_from(idx).unwrap()) + }) + .nth(1) + .expect("missing second bool conditional NOT_TAKEN"); + let entries = bytecode::decode_exception_table(&f.exceptiontable); + + assert!( + entries + .iter() + .all(|entry| split_not_taken < entry.start || split_not_taken >= entry.end), + "CPython labels exception targets before normalize_jumps(), so the general bool-jump NOT_TAKEN after a conditional yield is outside the StopIteration table; not_taken={split_not_taken}, entries={entries:?}, instructions={:?}", + f.instructions + ); + } + + fn non_cache_instructions(code: &CodeObject) -> impl Iterator { + code.instructions + .iter() + .filter(|unit| !matches!(unit.op, Instruction::Cache)) + } + + fn varname_index(code: &CodeObject, name: &str) -> usize { + code.varnames + .iter() + .position(|varname| varname.as_str() == name) + .unwrap_or_else(|| panic!("missing {name} local")) + } + + fn load_fast_ops_for_var(code: &CodeObject, name: &str) -> Vec { + let var_idx = varname_index(code, name); + non_cache_instructions(code) + .filter_map(|unit| match unit.op { + Instruction::LoadFast { var_num } | Instruction::LoadFastBorrow { var_num } => { + let var_num = var_num.get(OpArg::new(u32::from(u8::from(unit.arg)))); + (usize::from(var_num) == var_idx).then_some(unit.op) + } + _ => None, + }) + .collect() + } + + fn load_fast_pair_ops_for_vars( + code: &CodeObject, + left_name: &str, + right_name: &str, + ) -> Vec { + let left_idx = varname_index(code, left_name); + let right_idx = varname_index(code, right_name); + non_cache_instructions(code) + .filter_map(|unit| { + let var_nums = match unit.op { + Instruction::LoadFastLoadFast { var_nums } + | Instruction::LoadFastBorrowLoadFastBorrow { var_nums } => var_nums, + _ => return None, + }; + let (left, right) = var_nums + .get(OpArg::new(u32::from(u8::from(unit.arg)))) + .indexes(); + (usize::from(left) == left_idx && usize::from(right) == right_idx) + .then_some(unit.op) + }) + .collect() + } + + fn count_strong_loads_for_vars(code: &CodeObject, names: &[&str]) -> usize { + let var_indices = names + .iter() + .map(|name| varname_index(code, name)) + .collect::>(); + non_cache_instructions(code) + .filter(|unit| match unit.op { + Instruction::LoadFast { var_num } => { + let var_num = var_num.get(OpArg::new(u32::from(u8::from(unit.arg)))); + var_indices.contains(&usize::from(var_num)) + } + _ => false, + }) + .count() + } + + fn count_strong_loads(code: &CodeObject) -> usize { + non_cache_instructions(code) + .filter(|unit| matches!(unit.op, Instruction::LoadFast { .. })) + .count() + } + + #[test] + fn test_match_or_default_block_keeps_load_fast_strong() { + let code = compile_exec( + r#" +def f(format, other): + match format: + case 1 | 2: + return other + case _: + raise NotImplementedError(other) +"#, + ); + let function = find_code(&code, "f").expect("missing function code"); + let loads = load_fast_ops_for_var(function, "other"); + assert!( + matches!( + loads.as_slice(), + [ + Instruction::LoadFastBorrow { .. }, + Instruction::LoadFastBorrow { .. }, + Instruction::LoadFast { .. }, + ] + ), + "CPython optimize_load_fast() keeps trailing OR-pattern default loads strong, got {loads:?}", + ); + } + + #[test] + fn test_match_nested_or_default_block_keeps_load_fast_strong() { + let code = compile_exec( + r#" +def f(format, other): + match format: + case [1 | 2, value]: + return other + case _: + raise NotImplementedError(other) +"#, + ); + let function = find_code(&code, "f").expect("missing function code"); + let loads = load_fast_ops_for_var(function, "other"); + assert!( + loads + .iter() + .any(|op| matches!(op, Instruction::LoadFast { .. })), + "CPython optimize_load_fast() keeps trailing nested OR-pattern default loads strong, got {loads:?}", + ); + } + + #[test] + fn test_match_success_next_location_preserves_pass_nop() { + let code = compile_exec( + r#" +def f(command): + match command: + case "": + pass + case _ as unknown: + sink(unknown) + return False +"#, + ); + let function = find_code(&code, "f").expect("missing function code"); + let ops = non_cache_instructions(function) + .map(|unit| unit.op) + .collect::>(); + assert!( + ops.windows(3).any(|window| matches!( + window, + [ + Instruction::PopTop, + Instruction::Nop, + Instruction::LoadConst { .. }, + ] + )), + "CPython NEXT_LOCATION keeps the pass NOP after match subject POP_TOP, got {ops:?}", + ); + } + + #[test] + fn test_match_subject_copy_uses_case_pattern_location_like_cpython() { + let code = compile_exec( + "\ +def f(x): + match x: + case 1: + return True + case 2: + return False +", + ); + let f = find_code(&code, "f").expect("missing f code"); + let copy_line = f + .instructions + .iter() + .zip(&f.locations) + .find_map(|(unit, (location, _))| { + let Instruction::Copy { i } = unit.op else { + return None; + }; + let arg = OpArg::new(u32::from(u8::from(unit.arg))); + (i.get(arg) == 1).then_some(location.line.get()) + }) + .expect("missing match subject COPY"); + assert_eq!( + copy_line, 3, + "CPython codegen_match_inner() emits ADDOP_I(c, LOC(m->pattern), COPY, 1)" + ); + } + + #[test] + fn test_match_or_alternative_copies_use_alternative_locations_like_cpython() { + let code = compile_exec( + "\ +def f(): + x = False + match 0: + case 0 | 1 | 2 | 3: + x = True + return x +", + ); + let f = find_code(&code, "f").expect("missing f code"); + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd8, 0x08, 0x0d, 0x80, 0x41, 0xd8, 0x0a, 0x0b, 0xdf, 0x0d, 0x0e, 0x97, + 0x11, 0x97, 0x51, 0x9f, 0x11, 0x88, 0x5d, 0xe0, 0x0b, 0x0c, 0x80, 0x48, 0xf0, 0x05, + 0x00, 0x0e, 0x1b, 0xd8, 0x10, 0x14, 0x88, 0x41, 0xd8, 0x0b, 0x0c, 0x80, 0x48, + ], + "CPython codegen_pattern_or() emits each alternative COPY with LOC(alt)" + ); + } + + #[test] + fn test_match_success_jump_uses_no_location_like_cpython() { + let code = compile_exec( + "\ +def f(self): + match 0: + case 0: + x = True + case 0: + x = False + self.assertIs(x, True) +", + ); + let f = find_code(&code, "f").expect("missing f code"); + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd8, 0x0a, 0x0b, 0xde, 0x0d, 0x0e, 0xd9, 0x10, 0x14, 0x89, 0x41, 0xdd, + 0x0d, 0x0e, 0xd8, 0x10, 0x15, 0x88, 0x41, 0xd8, 0x04, 0x08, 0x87, 0x4d, 0x81, 0x4d, + 0x90, 0x21, 0x90, 0x54, 0xd6, 0x04, 0x1a, + ], + "CPython codegen_match_inner() emits the success jump with NO_LOCATION" + ); + } + + #[test] + fn test_match_mapping_keys_scaffolding_uses_mapping_location_like_cpython() { + let code = compile_exec( + "\ +def f(self): + x = {} + y = None + match x: + case {0: 0}: + y = 0 + self.assertIs(y, None) +", + ); + let f = find_code(&code, "f").expect("missing f code"); + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd8, 0x08, 0x0a, 0x80, 0x41, 0xd8, 0x08, 0x0c, 0x80, 0x41, 0xd8, 0x0a, + 0x0b, 0xdf, 0x0d, 0x13, 0x8f, 0x56, 0x8a, 0x56, 0x95, 0x11, 0x89, 0x56, 0xd8, 0x10, + 0x11, 0x89, 0x41, 0xf2, 0x03, 0x00, 0x0e, 0x14, 0xe0, 0x04, 0x08, 0x87, 0x4d, 0x81, + 0x4d, 0x90, 0x21, 0x90, 0x54, 0xd6, 0x04, 0x1a, + ], + "CPython codegen_pattern_mapping() returns to LOC(p) for BUILD_TUPLE/MATCH_KEYS scaffolding" + ); + } + + #[test] + fn test_match_class_scaffolding_uses_class_pattern_location_like_cpython() { + let code = compile_exec( + "\ +def f(x): + match x: + case bool(z): + y = 0 + return y, z +", + ); + let f = find_code(&code, "f").expect("missing f code"); + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd8, 0x0a, 0x0b, 0xdc, 0x0d, 0x11, 0x8f, 0x57, 0x88, 0x57, 0xd8, 0x10, + 0x11, 0x88, 0x41, 0xd8, 0x0b, 0x0c, 0x88, 0x34, 0x80, 0x4b, 0xf0, 0x05, 0x00, 0x0e, + 0x15, 0xe0, 0x0b, 0x0c, 0x88, 0x61, 0x88, 0x34, 0x80, 0x4b, + ], + "CPython codegen_pattern_class() returns to LOC(p) after VISIT(cls)" + ); + } + + #[test] + fn test_while_try_body_layout_keeps_false_jump_to_anchor() { + let code = compile_exec( + r#" +def f(stack, itstack, node_to_stack_index): + while True: + while stack: + try: + node = itstack[-1]() + break + except StopIteration: + del node_to_stack_index[stack.pop()] + itstack.pop() + else: + break +"#, + ); + let function = find_code(&code, "f").expect("missing function code"); + let ops = non_cache_instructions(function) + .map(|unit| unit.op) + .collect::>(); + let stack_test = ops + .windows(5) + .find(|window| { + matches!( + window, + [ + Instruction::LoadFastBorrow { .. } | Instruction::LoadFast { .. }, + Instruction::ToBool, + Instruction::PopJumpIfFalse { .. }, + Instruction::NotTaken, + Instruction::Nop, + ] + ) + }) + .unwrap_or_else(|| { + panic!("expected CPython-style while/try false jump to anchor, got {ops:?}") + }); + assert!(matches!(stack_test[2], Instruction::PopJumpIfFalse { .. })); + } + + #[test] + fn test_while_if_not_break_keeps_body_call() { + let code = compile_exec( + r#" +def f(waiters): + while waiters: + waiter = waiters.popleft() + if not waiter.done(): + waiter.set_result(None) + break +"#, + ); + let function = find_code(&code, "f").expect("missing function code"); + let ops = non_cache_instructions(function) + .map(|unit| unit.op) + .collect::>(); + assert!( + ops.windows(4).any(|window| matches!( + window, + [ + Instruction::LoadFastBorrow { .. } | Instruction::LoadFast { .. }, + Instruction::LoadAttr { .. }, + Instruction::LoadConst { .. }, + Instruction::Call { .. }, + ] + )), + "CPython keeps waiter.set_result(None) before the break, got {ops:?}", + ); + } + + fn localsplus_name(code: &CodeObject, idx: usize) -> Option<&str> { + if idx < code.varnames.len() { + return Some(code.varnames[idx].as_str()); + } + + let mut extra_idx = idx - code.varnames.len(); + for cellvar in &code.cellvars { + if !code.varnames.iter().any(|varname| varname == cellvar) { + if extra_idx == 0 { + return Some(cellvar.as_str()); + } + extra_idx -= 1; + } + } + code.freevars.get(extra_idx).map(|name| name.as_str()) + } + + fn has_common_constant(code: &CodeObject, expected: bytecode::CommonConstant) -> bool { + code.instructions.iter().any(|unit| match unit.op { + Instruction::LoadCommonConstant { idx } => { + idx.get(OpArg::new(u32::from(u8::from(unit.arg)))) == expected + } + _ => false, + }) + } + + fn has_intrinsic_1(code: &CodeObject, expected: IntrinsicFunction1) -> bool { + code.instructions.iter().any(|unit| match unit.op { + Instruction::CallIntrinsic1 { func } => { + func.get(OpArg::new(u32::from(u8::from(unit.arg)))) == expected + } + _ => false, + }) + } + + #[test] + fn test_trace_assert_true_try_pair() { + let trace = compile_exec_late_cfg_trace( + "\ +try: + assert True +except AssertionError as e: + fail() +try: + assert True, 'msg' +except AssertionError as e: + fail() +", + ); + for (stage, dump) in trace { + eprintln!("=== {stage} ===\n{dump}"); + } + } + + #[test] + fn test_trace_for_unpack_list_literal() { + let trace = compile_exec_late_cfg_trace( + "\ +result = [] +for x, in [(1,), (2,), (3,)]: + result.append(x) +", + ); + for (stage, dump) in trace { + eprintln!("=== {stage} ===\n{dump}"); + } + } + + #[test] + fn test_trace_break_in_finally_function() { + let trace = compile_single_function_late_cfg_trace( + "\ +def f(self): + count = 0 + while count < 2: + count += 1 + try: + pass + finally: + break + self.assertEqual(count, 1) +", + "f", + ); + for (stage, dump) in trace { + eprintln!("=== {stage} ===\n{dump}"); + } + } + + #[test] + fn test_import_originated_name_disables_method_call_optimization_even_with_local_import() { + let code = compile_exec( + "\ +import warnings + +def f(ch): + import warnings + warnings.warn( + '\"\\\\%c\" is an invalid escape sequence' % ch + if 0x20 <= ch < 0x7F + else '\"\\\\x%02x\" is an invalid escape sequence' % ch, + DeprecationWarning, + stacklevel=2, + ) +", + ); + let f = find_code(&code, "f").expect("missing f code"); + let ops: Vec<_> = f.instructions.iter().map(|unit| unit.op).collect(); + let warn_attr = ops + .iter() + .position(|op| matches!(op, Instruction::LoadAttr { .. })) + .expect("missing LOAD_ATTR for warnings.warn"); + let push_null = ops[warn_attr + 10..] + .iter() + .position(|op| matches!(op, Instruction::PushNull)) + .map(|idx| warn_attr + 10 + idx) + .expect("expected PUSH_NULL after plain LOAD_ATTR"); + + let load_attr = match f.instructions[warn_attr].op { + Instruction::LoadAttr { namei } => namei.get(OpArg::new(u32::from(u8::from( + f.instructions[warn_attr].arg, + )))), + _ => unreachable!(), + }; + assert!( + !load_attr.is_method(), + "import-originated names should use plain LOAD_ATTR" + ); + assert!( + matches!(ops[push_null + 1], Instruction::LoadSmallInt { .. }), + "expected warning message expression to start after PUSH_NULL, got ops={ops:?}" + ); + } + + #[test] + fn test_trace_constant_false_elif_chain() { + let trace = compile_exec_late_cfg_trace( + "\ +if 0: pass +elif 0: pass +elif 0: pass +elif 0: pass +else: pass +", + ); + for (stage, dump) in trace { + eprintln!("=== {stage} ===\n{dump}"); + } + } + + #[test] + fn test_trace_multi_pass_suite() { + let trace = compile_exec_late_cfg_trace( + "\ +if 1: + # + # + # + pass + pass + # + pass + # +", + ); + for (stage, dump) in trace { + eprintln!("=== {stage} ===\n{dump}"); + } + } + + #[test] + fn test_trace_single_compare_if() { + let trace = compile_exec_late_cfg_trace( + "\ +if 1 == 1: + pass +", + ); + for (stage, dump) in trace { + eprintln!("=== {stage} ===\n{dump}"); + } + } + + #[test] + fn test_trace_comparison_suite() { + let trace = compile_exec_late_cfg_trace( + "\ +if 1: pass +x = (1 == 1) +if 1 == 1: pass +if 1 != 1: pass +if 1 < 1: pass +if 1 > 1: pass +if 1 <= 1: pass +if 1 >= 1: pass +if x is x: pass +if x is not x: pass +if 1 in (): pass +if 1 not in (): pass +", + ); + for (stage, dump) in trace { + eprintln!("=== {stage} ===\n{dump}"); + } + } + + #[test] + fn test_trace_if_for_except_layout() { + let trace = compile_exec_late_cfg_trace( + "\ +from sys import maxsize +if maxsize == 2147483647: + for s in ('2147483648', '0o40000000000', '0x100000000', '0b10000000000000000000000000000000'): + try: + x = eval(s) + except OverflowError: + fail(\"OverflowError on huge integer literal %r\" % s) +elif maxsize == 9223372036854775807: + pass +", + ); + for (stage, dump) in trace { + eprintln!("=== {stage} ===\n{dump}"); + } + } + + #[test] + fn test_break_in_finally_tail_loads_borrow_through_empty_fallthrough_block() { + let code = compile_exec( + "\ +def f(self): + count = 0 + while count < 2: + count += 1 + try: + pass + finally: + break + self.assertEqual(count, 1) +", + ); + let code = find_code(&code, "f").unwrap(); + let ops: Vec<_> = code + .instructions + .iter() + .map(|unit| unit.op) + .filter(|op| !matches!(op, Instruction::Cache)) + .collect(); + assert!( + ops.windows(5).any(|window| { + matches!( + window, + [ + Instruction::LoadFastBorrow { .. }, + Instruction::LoadAttr { .. }, + Instruction::LoadFastBorrow { .. }, + Instruction::LoadSmallInt { .. }, + Instruction::Call { .. } + ] + ) + }), + "{:?}", + code.instructions + .iter() + .map(|unit| unit.op) + .collect::>() + ); + } + + #[test] + fn test_plain_constant_bool_op_folds_to_selected_operand() { + let code = compile_exec( + "\ +x = 1 or 2 or 3 +", + ); + let ops: Vec<_> = code + .instructions + .iter() + .map(|unit| unit.op) + .filter(|op| !matches!(op, Instruction::Cache)) + .collect(); + let folded_small_int = code.instructions.iter().any(|unit| { + matches!( + unit.op, + Instruction::LoadSmallInt { i } + if i.get(OpArg::new(u32::from(u8::from(unit.arg)))) == 1 + ) + }); + let folded_const_one = code + .instructions + .iter() + .find_map(|unit| match unit.op { + Instruction::LoadConst { .. } => code.constants.get(usize::from(u8::from(unit.arg))), + _ => None, + }) + .is_some_and(|constant| { + matches!(constant, ConstantData::Integer { value } if *value == BigInt::from(1)) + }); + + assert!( + folded_small_int || folded_const_one, + "expected folded constant 1, got ops={ops:?}" + ); + assert!( + !ops.iter().any(|op| { + matches!( + op, + Instruction::Copy { .. } + | Instruction::ToBool + | Instruction::PopJumpIfTrue { .. } + | Instruction::PopJumpIfFalse { .. } + ) + }), + "plain constant BoolOp should not leave short-circuit scaffolding, got ops={ops:?}" + ); + } + + #[test] + fn test_taken_constant_boolop_load_const_uses_literal_location_like_cpython() { + let code = compile_exec( + "\ +def and_false(x): + return False and x + +def or_true(x): + return True or x +", + ); + let and_false = find_code(&code, "and_false").expect("missing and_false code"); + let or_true = find_code(&code, "or_true").expect("missing or_true code"); + + // CPython 3.14 codegen_boolop() VISITs the selected literal before the + // short-circuit jump is optimized away, so the surviving LOAD_CONST + // keeps the literal range rather than the whole BoolOp range. + assert_eq!( + and_false.linetable.as_ref(), + &[0x80, 0x00, 0xd8, 0x0b, 0x10, 0xd0, 0x04, 0x16] + ); + assert_eq!( + or_true.linetable.as_ref(), + &[0x80, 0x00, 0xd8, 0x0b, 0x0f, 0xd0, 0x04, 0x14] + ); + } + + #[test] + fn test_assert_false_message_call_uses_assert_location_like_cpython() { + let code = compile_exec( + "\ +def f(): + assert False, \"x\" +", + ); + let f = find_code(&code, "f").expect("missing f code"); + + // CPython 3.14 codegen_assert() emits LOAD_COMMON_CONSTANT and CALL + // at LOC(assert statement), then RAISE_VARARGS at LOC(test). + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd8, 0x04, 0x15, 0x90, 0x23, 0xd3, 0x04, 0x15, 0x88, 0x35, + ] ); } - fn localsplus_name(code: &CodeObject, idx: usize) -> Option<&str> { - if idx < code.varnames.len() { - return Some(code.varnames[idx].as_str()); - } + #[test] + fn test_static_swap_implicit_return_keeps_preswap_store_location_like_cpython() { + let code = compile_exec( + "\ +def f(a, b): + a, b = a, b + b, a = a, b +", + ); + let f = find_code(&code, "f").expect("missing f code"); - let mut extra_idx = idx - code.varnames.len(); - for cellvar in &code.cellvars { - if !code.varnames.iter().any(|varname| varname == cellvar) { - if extra_idx == 0 { - return Some(cellvar.as_str()); - } - extra_idx -= 1; - } - } - code.freevars.get(extra_idx).map(|name| name.as_str()) + // CPython 3.14 flowgraph.c resolves line numbers before + // optimize_basic_block() turns BUILD_TUPLE/UNPACK_SEQUENCE into SWAP + // and apply_static_swaps() reorders the STORE_FAST pair. The + // synthetic return epilogue therefore keeps the pre-swap final store + // location. + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd8, 0x0b, 0x0c, 0x80, 0x71, 0xd8, 0x0b, 0x0c, 0x82, 0x71, + ] + ); } - fn has_common_constant(code: &CodeObject, expected: bytecode::CommonConstant) -> bool { - code.instructions.iter().any(|unit| match unit.op { - Instruction::LoadCommonConstant { idx } => { - idx.get(OpArg::new(u32::from(u8::from(unit.arg)))) == expected - } - _ => false, - }) - } + #[test] + fn test_unpack_store_pair_jump_uses_second_target_location_like_cpython() { + let code = compile_exec( + "\ +def f(value): + if value.startswith('=?'): + try: + token, value = get_encoded_word(value) + except E: + token, value = get_atext(value) + else: + token, value = get_atext(value) + atom.append(token) +", + ); + let f = find_code(&code, "f").expect("missing f code"); + let jump_position = f + .instructions + .iter() + .zip(&f.locations) + .find_map(|(unit, (location, end_location))| { + matches!(unit.op, Instruction::JumpForward { .. }).then_some(( + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), + )) + }) + .expect("missing post-try JUMP_FORWARD"); - fn has_intrinsic_1(code: &CodeObject, expected: IntrinsicFunction1) -> bool { - code.instructions.iter().any(|unit| match unit.op { - Instruction::CallIntrinsic1 { func } => { - func.get(OpArg::new(u32::from(u8::from(unit.arg)))) == expected - } - _ => false, - }) + // CPython 3.14 flowgraph.c turns the second STORE_FAST into a NOP + // during STORE_FAST_STORE_FAST fusion, then NOP removal copies that + // second target location onto the following no-location jump. + assert_eq!(jump_position, (4, 20, 4, 25)); } #[test] - fn test_trace_assert_true_try_pair() { - let trace = compile_exec_late_cfg_trace( + fn test_chained_store_pair_jump_keeps_copy_target_location_like_cpython() { + let code = compile_exec( "\ -try: - assert True -except AssertionError as e: - fail() -try: - assert True, 'msg' -except AssertionError as e: - fail() +def f(flag): + if flag: + a = b = True + else: + a = False + b = False + g(a, b) + return a ", ); - for (stage, dump) in trace { - eprintln!("=== {stage} ===\n{dump}"); - } + let f = find_code(&code, "f").expect("missing f code"); + let jump_position = f + .instructions + .windows(2) + .zip(f.locations.windows(2)) + .find_map(|(units, locations)| { + matches!(units[0].op, Instruction::StoreFastStoreFast { .. }) + .then(|| { + matches!(units[1].op, Instruction::JumpForward { .. }).then_some(( + locations[1].0.line.get(), + locations[1].0.character_offset.get(), + locations[1].1.line.get(), + locations[1].1.character_offset.get(), + )) + }) + .flatten() + }) + .expect("missing jump after chained STORE_FAST_STORE_FAST"); + + // CPython 3.14 flowgraph.c preserves the second chained-assignment + // target location on the jump that skips the else body. + assert_eq!(jump_position, (3, 13, 3, 14)); } #[test] - fn test_trace_for_unpack_list_literal() { - let trace = compile_exec_late_cfg_trace( + fn test_tuple_store_pair_jump_keeps_fused_store_location_like_cpython() { + let code = compile_exec( "\ -result = [] -for x, in [(1,), (2,), (3,)]: - result.append(x) +def f(flag, n, exp): + if flag: + n, d = n * 10**exp, 1 + else: + d = -exp + g(n, d) + return n ", ); - for (stage, dump) in trace { - eprintln!("=== {stage} ===\n{dump}"); - } + let f = find_code(&code, "f").expect("missing f code"); + let jump_position = f + .instructions + .windows(2) + .zip(f.locations.windows(2)) + .find_map(|(units, locations)| { + matches!(units[0].op, Instruction::StoreFastStoreFast { .. }) + .then(|| { + matches!(units[1].op, Instruction::JumpForward { .. }).then_some(( + locations[1].0.line.get(), + locations[1].0.character_offset.get(), + locations[1].1.line.get(), + locations[1].1.character_offset.get(), + )) + }) + .flatten() + }) + .expect("missing jump after tuple STORE_FAST_STORE_FAST"); + + // Without COPY before the fused stores, CPython keeps the fused + // STORE_FAST_STORE_FAST location on the following jump. + assert_eq!(jump_position, (3, 12, 3, 13)); } #[test] - fn test_trace_break_in_finally_function() { - let trace = compile_single_function_late_cfg_trace( + fn test_genexpr_make_closure_and_call_use_genexpr_location_like_cpython() { + let code = compile_exec( "\ -def f(self): - count = 0 - while count < 2: - count += 1 - try: - pass - finally: - break - self.assertEqual(count, 1) +def f(parameters): + return ((p, type(p)) for p in parameters) ", - "f", ); - for (stage, dump) in trace { - eprintln!("=== {stage} ===\n{dump}"); - } + let f = find_code(&code, "f").expect("missing f code"); + let genexpr = find_code(f, "").expect("missing genexpr code"); + + // CPython 3.14 codegen_comprehension() uses LOC(e) for + // codegen_make_closure(), the outer CALL, and the implicit .0 load + // in codegen_sync_comprehension_generator(). + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd9, 0x0b, 0x2d, 0xa1, 0x2a, 0xd3, 0x0b, 0x2d, 0xd0, 0x04, 0x2d, + ] + ); + assert_eq!( + genexpr.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xd0, 0x0b, 0x2d, 0xa1, 0x2a, 0x98, 0x51, 0x94, 0x04, 0x90, + 0x51, 0x93, 0x07, 0x8d, 0x4c, 0xa3, 0x2a, 0xf9, + ] + ); } #[test] - fn test_import_originated_name_disables_method_call_optimization_even_with_local_import() { + fn test_implicit_call_genexpr_range_includes_call_parens_like_cpython() { let code = compile_exec( "\ -import warnings +def implicit(): + return list(x for x in range(10)) -def f(ch): - import warnings - warnings.warn( - '\"\\\\%c\" is an invalid escape sequence' % ch - if 0x20 <= ch < 0x7F - else '\"\\\\x%02x\" is an invalid escape sequence' % ch, - DeprecationWarning, - stacklevel=2, - ) +def explicit(): + return list((x for x in range(10))) ", ); - let f = find_code(&code, "f").expect("missing f code"); - let ops: Vec<_> = f.instructions.iter().map(|unit| unit.op).collect(); - let warn_attr = ops - .iter() - .position(|op| matches!(op, Instruction::LoadAttr { .. })) - .expect("missing LOAD_ATTR for warnings.warn"); - let push_null = ops[warn_attr + 10..] - .iter() - .position(|op| matches!(op, Instruction::PushNull)) - .map(|idx| warn_attr + 10 + idx) - .expect("expected PUSH_NULL after plain LOAD_ATTR"); + let implicit = find_code(&code, "implicit").expect("missing implicit code"); + let implicit_gen = find_code(implicit, "").expect("missing implicit genexpr code"); + let explicit = find_code(&code, "explicit").expect("missing explicit code"); + let explicit_gen = find_code(explicit, "").expect("missing explicit genexpr code"); - let load_attr = match f.instructions[warn_attr].op { - Instruction::LoadAttr { namei } => namei.get(OpArg::new(u32::from(u8::from( - f.instructions[warn_attr].arg, - )))), - _ => unreachable!(), - }; - assert!( - !load_attr.is_method(), - "import-originated names should use plain LOAD_ATTR" + // CPython's parser gives an unparenthesized sole GeneratorExp call + // argument the call-parenthesized range, and codegen_comprehension() + // uses LOC(e) for MAKE_FUNCTION, the outer CALL, and the implicit .0 + // LOAD_FAST. Explicitly parenthesized genexprs already carry their own + // parentheses and must not be widened again. + assert_eq!( + implicit.linetable.as_ref(), + &[ + 0x80, 0x00, 0xdc, 0x0b, 0x0f, 0xd1, 0x0f, 0x25, 0x9c, 0x35, 0xa0, 0x12, 0x9c, 0x39, + 0xd3, 0x0f, 0x25, 0xd3, 0x0b, 0x25, 0xd0, 0x04, 0x25, + ] ); - assert!( - matches!(ops[push_null + 1], Instruction::LoadSmallInt { .. }), - "expected warning message expression to start after PUSH_NULL, got ops={ops:?}" + assert_eq!( + implicit_gen.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xd0, 0x0f, 0x25, 0x99, 0x39, 0x90, 0x61, 0x94, 0x01, 0x9b, + 0x39, 0xf9, + ] + ); + assert_eq!( + explicit_gen.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xd0, 0x10, 0x26, 0x99, 0x49, 0x90, 0x71, 0x94, 0x11, 0x9b, + 0x49, 0xf9, + ] ); } #[test] - fn test_trace_constant_false_elif_chain() { - let trace = compile_exec_late_cfg_trace( + fn test_implicit_call_genexpr_parenthesized_element_range_like_cpython() { + let code = compile_exec( "\ -if 0: pass -elif 0: pass -elif 0: pass -elif 0: pass -else: pass +def bytes_binop(): + return bytes((x ^ 0x5C) for x in range(256)) + +def dict_tuple(d): + return dict((v, k) for (k, v) in d.items()) + +def plain_tuple_elt(xs): + return list((x, y) for x, y in xs) + +def explicit_gen(xs): + return list(((x, y) for x, y in xs)) ", ); - for (stage, dump) in trace { - eprintln!("=== {stage} ===\n{dump}"); - } + let bytes_binop = find_code(&code, "bytes_binop").expect("missing bytes_binop code"); + let bytes_gen = find_code(bytes_binop, "").expect("missing bytes genexpr code"); + let dict_tuple = find_code(&code, "dict_tuple").expect("missing dict_tuple code"); + let dict_gen = find_code(dict_tuple, "").expect("missing dict genexpr code"); + let plain_tuple_elt = + find_code(&code, "plain_tuple_elt").expect("missing plain_tuple_elt code"); + let plain_gen = + find_code(plain_tuple_elt, "").expect("missing plain genexpr code"); + let explicit_gen = find_code(&code, "explicit_gen").expect("missing explicit_gen code"); + let explicit_inner = + find_code(explicit_gen, "").expect("missing explicit genexpr code"); + + // CPython 3.14's parser includes the call argument parentheses in + // LOC(GeneratorExp) for implicit sole-argument generator expressions, + // even when the element expression itself starts with parentheses. + assert_eq!( + bytes_binop.linetable.as_ref(), + &[ + 0x80, 0x00, 0xdc, 0x0b, 0x10, 0xd1, 0x10, 0x30, 0xa4, 0x55, 0xa8, 0x33, 0xa4, 0x5a, + 0xd3, 0x10, 0x30, 0xd3, 0x0b, 0x30, 0xd0, 0x04, 0x30, + ] + ); + assert_eq!( + bytes_gen.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xd0, 0x10, 0x30, 0xa1, 0x5a, 0xa0, 0x01, 0x90, 0x64, 0x97, + 0x28, 0x92, 0x28, 0xa3, 0x5a, 0xf9, + ] + ); + assert_eq!( + dict_tuple.linetable.as_ref(), + &[ + 0x80, 0x00, 0xdc, 0x0b, 0x0f, 0xd1, 0x0f, 0x2f, 0xa0, 0x51, 0xa7, 0x57, 0xa1, 0x57, + 0xa4, 0x59, 0xd3, 0x0f, 0x2f, 0xd3, 0x0b, 0x2f, 0xd0, 0x04, 0x2f, + ] + ); + assert_eq!( + dict_gen.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xd0, 0x0f, 0x2f, 0xa1, 0x59, 0x99, 0x36, 0x98, 0x41, 0x90, + 0x11, 0x95, 0x06, 0xa3, 0x59, 0xf9, + ] + ); + assert_eq!( + plain_tuple_elt.linetable.as_ref(), + &[ + 0x80, 0x00, 0xdc, 0x0b, 0x0f, 0xd1, 0x0f, 0x26, 0xa1, 0x32, 0xd3, 0x0f, 0x26, 0xd3, + 0x0b, 0x26, 0xd0, 0x04, 0x26, + ] + ); + assert_eq!( + plain_gen.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xd0, 0x0f, 0x26, 0xa1, 0x32, 0x99, 0x34, 0x98, 0x31, 0x90, + 0x11, 0x95, 0x06, 0xa3, 0x32, 0xf9, + ] + ); + assert_eq!( + explicit_gen.linetable.as_ref(), + &[ + 0x80, 0x00, 0xdc, 0x0b, 0x0f, 0xd1, 0x10, 0x27, 0xa1, 0x42, 0xd3, 0x10, 0x27, 0xd3, + 0x0b, 0x28, 0xd0, 0x04, 0x28, + ] + ); + assert_eq!( + explicit_inner.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xd0, 0x10, 0x27, 0xa1, 0x42, 0x99, 0x44, 0x98, 0x41, 0x90, + 0x21, 0x95, 0x16, 0xa3, 0x42, 0xf9, + ] + ); } #[test] - fn test_trace_multi_pass_suite() { - let trace = compile_exec_late_cfg_trace( + fn test_genexpr_filter_cleanup_jumps_use_element_location_like_cpython() { + let code = compile_exec( "\ -if 1: - # - # - # - pass - pass - # - pass - # +def simple(names): + return (x for x in names if not _ishidden(x)) + +def boolop(fields): + return (f for f in fields if f.init and not f.kw_only) ", ); - for (stage, dump) in trace { - eprintln!("=== {stage} ===\n{dump}"); - } + let simple = find_code(&code, "simple").expect("missing simple code"); + let simple_gen = find_code(simple, "").expect("missing simple genexpr code"); + let boolop = find_code(&code, "boolop").expect("missing boolop code"); + let boolop_gen = find_code(boolop, "").expect("missing boolop genexpr code"); + + // CPython 3.14 codegen_sync_comprehension_generator() emits the + // comprehension guard jump to if_cleanup, then emits the if_cleanup + // backedge with elt_loc. flowgraph.c::jump_thread() copies that target + // jump location to the threaded POP_JUMP/NOT_TAKEN cleanup path. + assert_eq!( + simple_gen.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xd0, 0x0b, 0x31, 0x91, 0x75, 0x90, 0x21, 0xa4, 0x49, 0xa8, + 0x61, 0xa7, 0x4c, 0x8f, 0x41, 0x8a, 0x41, 0x93, 0x75, 0xf9, + ] + ); + assert_eq!( + boolop_gen.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xd0, 0x0b, 0x3a, 0x91, 0x76, 0x90, 0x21, 0xa7, 0x16, 0xa5, + 0x16, 0x8c, 0x41, 0xb0, 0x01, 0xb7, 0x09, 0xb5, 0x09, 0x8f, 0x41, 0x8a, 0x41, 0x93, + 0x76, 0xf9, + ] + ); } #[test] - fn test_trace_single_compare_if() { - let trace = compile_exec_late_cfg_trace( + fn test_try_finally_exception_scaffolding_uses_no_location_like_cpython() { + let code = compile_exec( "\ -if 1 == 1: - pass +def f(self, node): + self.flag = True + try: + self.body(node) + finally: + self.flag = False ", ); - for (stage, dump) in trace { - eprintln!("=== {stage} ===\n{dump}"); - } + let f = find_code(&code, "f").expect("missing f code"); + + // CPython 3.14 codegen_try_finally() emits the exception path + // SETUP_CLEANUP/PUSH_EXC_INFO and POP_EXCEPT_AND_RERAISE with + // NO_LOCATION; flowgraph line propagation then gives only the + // finalbody's direct RERAISE the finalbody location. + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd8, 0x10, 0x14, 0x80, 0x44, 0x84, 0x49, 0xf0, 0x02, 0x03, 0x05, 0x1a, + 0xd8, 0x08, 0x0c, 0x8f, 0x09, 0x89, 0x09, 0x90, 0x24, 0x8c, 0x0f, 0xe0, 0x14, 0x19, + 0x88, 0x04, 0x8e, 0x09, 0xf8, 0x90, 0x45, 0x88, 0x04, 0x8d, 0x09, 0xfa, + ] + ); } #[test] - fn test_trace_comparison_suite() { - let trace = compile_exec_late_cfg_trace( + fn test_adjacent_no_location_entries_merge_like_cpython() { + let code = compile_exec( "\ -if 1: pass -x = (1 == 1) -if 1 == 1: pass -if 1 != 1: pass -if 1 < 1: pass -if 1 > 1: pass -if 1 <= 1: pass -if 1 >= 1: pass -if x is x: pass -if x is not x: pass -if 1 in (): pass -if 1 not in (): pass +def f(file): + if sys.platform == \"win32\": + try: + import nt + if not nt._supports_virtual_terminal(): + return False + except (ImportError, AttributeError): + return False + try: + return os.isatty(file.fileno()) + except OSError: + return hasattr(file, \"isatty\") and file.isatty() +", + ); + let f = find_code(&code, "f").expect("missing f code"); + + // CPython's NO_LOCATION is {-1, -1, -1, -1}, and + // assemble.c::assemble_location_info() merges adjacent instructions + // with the same NO_LOCATION into one linetable entry. + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xdc, 0x07, 0x0a, 0x87, 0x7c, 0x81, 0x7c, 0x90, 0x77, 0xd4, 0x07, 0x1e, + 0xf0, 0x02, 0x05, 0x09, 0x19, 0xdb, 0x0c, 0x15, 0xd8, 0x13, 0x15, 0xd7, 0x13, 0x30, + 0xd1, 0x13, 0x30, 0xd7, 0x13, 0x32, 0xd2, 0x13, 0x32, 0xd9, 0x17, 0x1c, 0xf0, 0x03, + 0x00, 0x14, 0x33, 0xf0, 0x08, 0x03, 0x05, 0x39, 0xdc, 0x0f, 0x11, 0x8f, 0x79, 0x89, + 0x79, 0x98, 0x14, 0x9f, 0x1b, 0x99, 0x1b, 0x9b, 0x1d, 0xd3, 0x0f, 0x27, 0xd0, 0x08, + 0x27, 0xf8, 0xf4, 0x07, 0x00, 0x11, 0x1c, 0x9c, 0x5e, 0xd0, 0x0f, 0x2c, 0xf4, 0x00, + 0x01, 0x09, 0x19, 0xda, 0x13, 0x18, 0xf0, 0x03, 0x01, 0x09, 0x19, 0xfb, 0xf4, 0x08, + 0x00, 0x0c, 0x13, 0xf4, 0x00, 0x01, 0x05, 0x39, 0xdc, 0x0f, 0x16, 0x90, 0x74, 0x98, + 0x58, 0xd3, 0x0f, 0x26, 0xd7, 0x0f, 0x38, 0xd0, 0x0f, 0x38, 0xa8, 0x34, 0xaf, 0x3b, + 0xa9, 0x3b, 0xab, 0x3d, 0xd2, 0x08, 0x38, 0xf0, 0x03, 0x01, 0x05, 0x39, 0xfa, + ] + ); + } + + #[test] + fn test_fstring_format_ops_use_formatted_value_location_like_cpython() { + let code = compile_exec( + "\ +def simple(self): + return f'{self.value}' + +def spec(x): + return f'{x!r:>3}' ", ); - for (stage, dump) in trace { - eprintln!("=== {stage} ===\n{dump}"); - } + let simple = find_code(&code, "simple").expect("missing simple code"); + let spec = find_code(&code, "spec").expect("missing spec code"); + + // CPython 3.14 codegen_formatted_value() VISITs the inner expression + // first, then emits CONVERT_VALUE / FORMAT_* at LOC(FormattedValue). + assert_eq!( + simple.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd8, 0x0e, 0x12, 0x8f, 0x6a, 0x89, 0x6a, 0x88, 0x5c, 0xd0, 0x04, 0x1a, + ] + ); + assert_eq!( + spec.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd8, 0x0e, 0x0f, 0x88, 0x58, 0x90, 0x22, 0x88, 0x58, 0xd0, 0x04, 0x16, + ] + ); } #[test] - fn test_trace_if_for_except_layout() { - let trace = compile_exec_late_cfg_trace( + fn test_debug_fstring_literal_location_like_cpython() { + fn string_load_position(code: &CodeObject, expected: &str) -> (usize, usize, usize, usize) { + code.instructions + .iter() + .zip(&code.locations) + .find_map(|(unit, (location, end_location))| { + let Instruction::LoadConst { consti } = unit.op else { + return None; + }; + let constant = + &code.constants[consti.get(OpArg::new(u32::from(u8::from(unit.arg))))]; + matches!(constant, ConstantData::Str { value } if value.to_string() == expected) + .then_some(( + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), + )) + }) + .expect("missing debug f-string literal") + } + + let code = compile_exec( "\ -from sys import maxsize -if maxsize == 2147483647: - for s in ('2147483648', '0o40000000000', '0x100000000', '0b10000000000000000000000000000000'): - try: - x = eval(s) - except OverflowError: - fail(\"OverflowError on huge integer literal %r\" % s) -elif maxsize == 9223372036854775807: - pass +def simple(x): + return f'{x=}' + +def prefixed(x): + return f'a {x=} b' ", ); - for (stage, dump) in trace { - eprintln!("=== {stage} ===\n{dump}"); - } + let simple = find_code(&code, "simple").expect("missing simple code"); + let prefixed = find_code(&code, "prefixed").expect("missing prefixed code"); + + assert_eq!( + string_load_position(simple, "x="), + (2, 15, 2, 17), + "CPython represents f'{{x=}}' debug text as a literal at the expression/debug-text location" + ); + assert_eq!( + string_load_position(prefixed, "a x="), + (5, 14, 5, 19), + "CPython extends a pending f-string literal through the debug text range" + ); } #[test] - fn test_break_in_finally_tail_loads_borrow_through_empty_fallthrough_block() { + fn test_fstring_format_spec_build_string_location_like_cpython() { let code = compile_exec( "\ -def f(self): - count = 0 - while count < 2: - count += 1 - try: - pass - finally: - break - self.assertEqual(count, 1) +def simple(lbl, label_width): + return f'{lbl:>{label_width}}' + +def padded(digits, int_len): + return f'{digits:0>{int_len + 1}d}' ", ); - let code = find_code(&code, "f").unwrap(); - let ops: Vec<_> = code - .instructions - .iter() - .map(|unit| unit.op) - .filter(|op| !matches!(op, Instruction::Cache)) - .collect(); - assert!( - ops.windows(5).any(|window| { - matches!( - window, - [ - Instruction::LoadFastBorrow { .. }, - Instruction::LoadAttr { .. }, - Instruction::LoadFastBorrow { .. }, - Instruction::LoadSmallInt { .. }, - Instruction::Call { .. } - ] - ) - }), - "{:?}", + let simple = find_code(&code, "simple").expect("missing simple code"); + let padded = find_code(&code, "padded").expect("missing padded code"); + + let build_string_position = |code: &CodeObject| { code.instructions .iter() - .map(|unit| unit.op) - .collect::>() + .zip(&code.locations) + .find_map(|(unit, (location, end_location))| { + matches!(unit.op, Instruction::BuildString { .. }).then_some(( + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), + )) + }) + .expect("missing format-spec BUILD_STRING") + }; + + assert_eq!( + build_string_position(simple), + (2, 18, 2, 33), + "CPython uses the format-spec JoinedStr location, including the ':' prefix, for BUILD_STRING" + ); + assert_eq!( + build_string_position(padded), + (5, 21, 5, 38), + "CPython format-spec JoinedStr location spans from ':' through the final literal" ); } #[test] - fn test_plain_constant_bool_op_folds_to_selected_operand() { + fn test_joined_string_literals_extend_pending_literal_location_like_cpython() { let code = compile_exec( "\ -x = 1 or 2 or 3 +def f(a): + return ( + 'x' + f'y{a}z' + 'w' + ) ", ); - let ops: Vec<_> = code - .instructions - .iter() - .map(|unit| unit.op) - .filter(|op| !matches!(op, Instruction::Cache)) - .collect(); - let folded_small_int = code.instructions.iter().any(|unit| { - matches!( - unit.op, - Instruction::LoadSmallInt { i } - if i.get(OpArg::new(u32::from(u8::from(unit.arg)))) == 1 - ) - }); - let folded_const_one = code - .instructions - .iter() - .find_map(|unit| match unit.op { - Instruction::LoadConst { .. } => code.constants.get(usize::from(u8::from(unit.arg))), - _ => None, - }) - .is_some_and(|constant| { - matches!(constant, ConstantData::Integer { value } if *value == BigInt::from(1)) - }); - - assert!( - folded_small_int || folded_const_one, - "expected folded constant 1, got ops={ops:?}" - ); - assert!( - !ops.iter().any(|op| { - matches!( - op, - Instruction::Copy { .. } - | Instruction::ToBool - | Instruction::PopJumpIfTrue { .. } - | Instruction::PopJumpIfFalse { .. } - ) - }), - "plain constant BoolOp should not leave short-circuit scaffolding, got ops={ops:?}" + let f = find_code(&code, "f").expect("missing f code"); + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xf0, 0x04, 0x01, 0x09, 0x0c, 0xd8, 0x0c, 0x0d, 0x88, 0x33, 0xf0, 0x00, + 0x01, 0x0f, 0x0c, 0xf0, 0x03, 0x02, 0x09, 0x0c, 0xf0, 0x03, 0x04, 0x05, 0x06, + ], + "CPython parser/codegen represents adjacent f-string literal fragments as Constant ranges spanning the merged fragments" ); } @@ -16667,6 +19313,60 @@ def outer(null): ); } + #[test] + fn test_decorated_definitions_use_cpython_locations() { + let code = compile_exec( + "\ +def dec(f): return f + +class C: + @dec + def f(self): + yield + +@dec +class D: + pass + +class E: + @dec + def g(self, flags: int, /) -> memoryview: + raise NotImplementedError +", + ); + let c = find_code(&code, "C").expect("missing C code"); + let d = find_code(&code, "D").expect("missing D code"); + let e = find_code(&code, "E").expect("missing E code"); + let annotate = find_code(e, "__annotate__").expect("missing annotation code"); + + // CPython 3.14 codegen_function()/codegen_class() evaluate + // decorators first, then use LOC(s) for codegen_make_closure() and + // codegen_nameop(); codegen_apply_decorators() emits CALL at each + // decorator expression's location. + assert_eq!( + c.linetable.as_ref(), + &[ + 0xf8, 0x87, 0x00, 0x80, 0x00, 0xd8, 0x05, 0x08, 0xf1, 0x02, 0x01, 0x05, 0x0e, 0xf3, + 0x03, 0x00, 0x06, 0x09, 0xf6, 0x02, 0x01, 0x05, 0x0e, + ] + ); + assert_eq!(d.linetable.as_ref(), &[0x86, 0x00, 0xe3, 0x04, 0x08]); + assert_eq!( + e.linetable.as_ref(), + &[ + 0xf8, 0x87, 0x00, 0x80, 0x00, 0xd8, 0x05, 0x08, 0xf7, 0x02, 0x01, 0x05, 0x22, 0xf3, + 0x03, 0x00, 0x06, 0x09, 0xf6, 0x02, 0x01, 0x05, 0x22, + ] + ); + assert_eq!( + annotate.linetable.as_ref(), + &[ + 0xf8, 0x80, 0x00, 0xf7, 0x00, 0x01, 0x05, 0x22, 0xf1, 0x00, 0x01, 0x05, 0x22, 0x91, + 0x73, 0xf0, 0x00, 0x01, 0x05, 0x22, 0xa1, 0x2a, 0xf1, 0x00, 0x01, 0x05, 0x22, + ] + ); + } + #[test] fn test_taken_constant_boolop_jump_disables_following_borrows() { for source in [ @@ -17337,6 +20037,25 @@ def f(xs): 1, "fallback call path should remain for shadowed any()" ); + let genexpr_const_count = f + .constants + .iter() + .filter(|constant| { + matches!(constant, ConstantData::Code { code } if code.obj_name == "") + }) + .count(); + assert_eq!( + genexpr_const_count, 1, + "optimized and fallback any(genexpr) paths should share the same CPython-range code const" + ); + assert_eq!( + f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xdf, 0x0b, 0x0e, 0x8b, 0x33, 0x89, 0x6f, 0x99, 0x22, 0x8b, 0x6f, 0x8f, + 0x33, 0x8c, 0x33, 0xd0, 0x04, 0x1d, 0x8a, 0x33, 0xd0, 0x04, 0x1d, 0x88, 0x33, 0x89, + 0x6f, 0x99, 0x22, 0x8b, 0x6f, 0xd3, 0x0b, 0x1d, 0xd0, 0x04, 0x1d, + ] + ); } #[test] @@ -17369,6 +20088,14 @@ def set_f(xs): }) .expect("tuple(genexpr) fast path should emit LIST_APPEND"); assert_eq!(tuple_list_append, 2); + assert_eq!( + tuple_f.linetable.as_ref(), + &[ + 0x80, 0x00, 0xdf, 0x0b, 0x10, 0x8c, 0x35, 0x91, 0x0f, 0x99, 0x42, 0x93, 0x0f, 0x8f, + 0x35, 0xd0, 0x04, 0x1f, 0x88, 0x35, 0x91, 0x0f, 0x99, 0x42, 0x93, 0x0f, 0xd3, 0x0b, + 0x1f, 0xd0, 0x04, 0x1f, + ] + ); let list_f = find_code(&code, "list_f").expect("missing list_f code"); assert!( @@ -17787,6 +20514,27 @@ def aug(x, a, b, y): ); } + #[test] + fn test_augassign_constant_slice_copy_uses_subscript_location_like_cpython() { + let code = compile_exec( + "\ +def aug_const(x, y): + x[1:2] += y +", + ); + let aug_const = find_code(&code, "aug_const").expect("missing aug_const code"); + + // CPython 3.14 codegen_augassign() visits a constant slice, then emits + // COPY/COPY/BINARY_OP NB_SUBSCR at LOC(target), not at LOC(slice). + assert_eq!( + aug_const.linetable.as_ref(), + &[ + 0x80, 0x00, 0xd8, 0x04, 0x05, 0x80, 0x63, 0x87, 0x46, 0x88, 0x61, 0x85, 0x4b, 0x85, + 0x46, + ] + ); + } + #[test] fn test_loop_return_reorders_backedge_before_exit_cleanup() { let code = compile_exec( @@ -19618,6 +22366,38 @@ def f(x): ); } + #[test] + fn test_match_negative_value_const_precedes_implicit_none_like_cpython() { + let code = compile_exec( + "\ +def f(x): + match x: + case -0.0: + y = 0 +", + ); + let f = find_code(&code, "f").expect("missing function code"); + let negative_zero_index = f + .constants + .iter() + .position(|constant| { + matches!( + constant, + ConstantData::Float { value } if *value == 0.0 && value.is_sign_negative() + ) + }) + .expect("missing folded -0.0 match value"); + let none_index = f + .constants + .iter() + .position(|constant| matches!(constant, ConstantData::None)) + .expect("missing implicit None"); + assert!( + negative_zero_index < none_index, + "CPython ast_preprocess.c folds MatchValue constants before codegen registers the implicit None" + ); + } + #[test] fn test_match_or_uses_shared_success_block() { let code = compile_exec( @@ -24046,6 +26826,92 @@ class C: ); } + #[test] + fn test_future_annotations_flag_is_inherited_like_cpython() { + let code = compile_exec( + "\ +from __future__ import annotations + +def f(): + class C: + pass + return C +", + ); + assert!(code.flags.contains(bytecode::CodeFlags::FUTURE_ANNOTATIONS)); + let f = find_code(&code, "f").expect("missing f code"); + assert!(f.flags.contains(bytecode::CodeFlags::FUTURE_ANNOTATIONS)); + let class_code = find_code(f, "C").expect("missing C code"); + assert!( + class_code + .flags + .contains(bytecode::CodeFlags::FUTURE_ANNOTATIONS) + ); + } + + #[test] + fn test_annotation_scope_nested_flag_matches_cpython() { + let code = compile_exec( + "\ +class C: + x: int + +def outer(): + class D: + y: int +", + ); + let class_code = find_code(&code, "C").expect("missing C code"); + let class_annotate = + find_code(class_code, "__annotate__").expect("missing class annotation code"); + assert!( + !class_annotate.flags.contains(bytecode::CodeFlags::NESTED), + "module-level class annotation scope should not be nested" + ); + + let outer = find_code(&code, "outer").expect("missing outer code"); + let nested_class = find_code(outer, "D").expect("missing nested class code"); + let nested_annotate = + find_code(nested_class, "__annotate__").expect("missing nested annotation code"); + assert!( + nested_annotate.flags.contains(bytecode::CodeFlags::NESTED), + "annotation scope under a nested class should be nested" + ); + } + + #[test] + fn test_function_like_parent_marks_child_nested_like_cpython() { + let code = compile_exec( + "\ +x = lambda: (lambda: None) +type A[T] = T +", + ); + let outer_lambda = find_code(&code, "").expect("missing outer lambda code"); + assert!( + !outer_lambda.flags.contains(bytecode::CodeFlags::NESTED), + "module-level lambda should not be nested" + ); + let inner_lambda = + find_direct_child_code(outer_lambda, "").expect("missing inner lambda code"); + assert!( + inner_lambda.flags.contains(bytecode::CodeFlags::NESTED), + "lambda inside lambda should be nested" + ); + + let type_params = + find_code(&code, "").expect("missing type params code"); + assert!( + !type_params.flags.contains(bytecode::CodeFlags::NESTED), + "module-level type-parameter scope should not be nested" + ); + let type_alias = find_direct_child_code(type_params, "A").expect("missing type alias code"); + assert!( + type_alias.flags.contains(bytecode::CodeFlags::NESTED), + "type alias body inside type-parameter scope should be nested" + ); + } + #[test] fn test_plain_super_call_keeps_class_freevar() { let code = compile_exec( @@ -25657,6 +28523,46 @@ def f(obj): ); } + #[test] + fn test_slice_none_bounds_and_build_slice_use_slice_location_like_cpython() { + let code = compile_exec( + "\ +def f(obj, step): + return obj[::step] +", + ); + let f = find_code(&code, "f").expect("missing function code"); + let slice_positions: Vec<_> = f + .instructions + .iter() + .zip(&f.locations) + .filter_map(|(unit, (location, end_location))| { + let op = match unit.op { + Instruction::LoadConst { .. } => "LOAD_CONST", + Instruction::BuildSlice { .. } => "BUILD_SLICE", + _ => return None, + }; + Some(( + op, + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), + )) + }) + .collect(); + + assert_eq!( + slice_positions, + vec![ + ("LOAD_CONST", 2, 16, 2, 22), + ("LOAD_CONST", 2, 16, 2, 22), + ("BUILD_SLICE", 2, 16, 2, 22), + ], + "CPython codegen_slice() emits missing bounds and BUILD_SLICE at LOC(slice)" + ); + } + #[test] fn test_bool_int_binop_constants_fold() { let code = compile_exec( @@ -26629,6 +29535,68 @@ def f(a, b, path): ); } + #[test] + fn test_with_return_value_uses_context_expr_location_like_cpython() { + let code = compile_exec( + "\ +def f(cm, func, args, kwds): + with cm: + return func(*args, **kwds) +", + ); + let f = find_code(&code, "f").expect("missing function code"); + let return_positions: Vec<_> = f + .instructions + .iter() + .zip(&f.locations) + .filter_map(|(unit, (location, end_location))| { + matches!(unit.op, Instruction::ReturnValue).then_some(( + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), + )) + }) + .collect(); + + assert_eq!( + return_positions, + vec![(2, 10, 2, 12), (2, 10, 2, 12)], + "CPython codegen_unwind_fblock(WITH) leaves RETURN_VALUE inheriting the context expression location" + ); + } + + #[test] + fn test_async_with_return_value_uses_context_expr_location_like_cpython() { + let code = compile_exec( + "\ +async def f(cm, func, args, kwds): + async with cm: + return await func(*args, **kwds) +", + ); + let f = find_code(&code, "f").expect("missing function code"); + let return_positions: Vec<_> = f + .instructions + .iter() + .zip(&f.locations) + .filter_map(|(unit, (location, end_location))| { + matches!(unit.op, Instruction::ReturnValue).then_some(( + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), + )) + }) + .collect(); + + assert_eq!( + return_positions, + vec![(2, 16, 2, 18), (2, 16, 2, 18)], + "CPython codegen_unwind_fblock(ASYNC_WITH) leaves RETURN_VALUE inheriting the context expression location" + ); + } + #[test] fn test_try_finally_conditional_return_duplicates_finally_exit_return() { let code = compile_exec( @@ -26812,7 +29780,7 @@ def f(cls, proto): } #[test] - fn test_literal_only_fstring_statement_is_optimized_away() { + fn test_literal_only_fstring_statement_keeps_const_like_cpython() { let code = compile_exec( "\ def f(): @@ -26822,17 +29790,11 @@ def f(): let f = find_code(&code, "f").expect("missing function code"); assert!( - !f.instructions - .iter() - .any(|unit| matches!(unit.op, Instruction::PopTop)), - "literal-only f-string statement should be removed" - ); - assert!( - !f.constants.iter().any(|constant| matches!( + f.constants.iter().any(|constant| matches!( constant, ConstantData::Str { value } if value.to_string() == "Not a docstring" )), - "literal-only f-string should not survive in constants" + "constant f-string statement should survive in co_consts like CPython" ); } @@ -27122,6 +30084,29 @@ values = [item for item in [r\"\\\\'a\\\\'\", r\"\\t3\", r\"\\\\\"[0]]]\n", } } + #[test] + fn test_constant_subscript_registers_source_const_before_result_like_cpython() { + let code = compile_exec("value = 'string'[3]\n"); + let source_index = code + .constants + .iter() + .position(|constant| { + matches!(constant, ConstantData::Str { value } if value.to_string() == "string") + }) + .expect("missing source string constant"); + let result_index = code + .constants + .iter() + .position(|constant| { + matches!(constant, ConstantData::Str { value } if value.to_string() == "i") + }) + .expect("missing folded subscript result"); + assert!( + source_index < result_index, + "CPython codegen_subscript emits the source constant before flowgraph.c folds NB_SUBSCR" + ); + } + #[test] fn test_constant_slice_subscript_folds_in_load_context() { let code = compile_exec( @@ -27285,6 +30270,29 @@ zero = 0j ** 2 ))); } + #[test] + fn test_folded_nan_constants_are_not_deduplicated_like_cpython() { + let code = compile_exec( + "\ +def f(): + repr(1e300 * 1e300 * 0) + repr(-1e300 * 1e300 * 0) + str(1e300 * 1e300 * 0) + str(-1e300 * 1e300 * 0) +", + ); + let f = find_code(&code, "f").expect("missing function code"); + let nan_count = f + .constants + .iter() + .filter(|constant| matches!(constant, ConstantData::Float { value } if value.is_nan())) + .count(); + assert_eq!( + nan_count, 4, + "CPython _PyCode_ConstantKey keeps folded NaN constants distinct" + ); + } + #[test] fn test_zero_complex_power_exception_constants_do_not_fold() { let code = compile_exec("value = 0j ** (3 - 2j)\n"); @@ -27370,6 +30378,53 @@ class C: assert_eq!(varnames, vec!["format"]); } + #[test] + fn test_non_simple_class_annotation_is_not_deferred_like_cpython() { + let code = compile_exec( + "\ +class C: + x.y: list = [] + z: int +", + ); + let annotate = find_code(&code, "__annotate__").expect("missing __annotate__ code"); + let names = annotate + .names + .iter() + .map(|name| name.as_str()) + .collect::>(); + assert_eq!(names, vec!["int"]); + } + + #[test] + fn test_non_simple_annotation_only_consumes_symbol_table_cursor() { + let code = compile_exec( + "\ +class C: + x.y: (lambda: str) = [] + z: (lambda: int) +", + ); + let annotate = find_code(&code, "__annotate__").expect("missing __annotate__ code"); + let lambdas = annotate + .constants + .iter() + .filter_map(|constant| match constant { + ConstantData::Code { code } if code.obj_name == "" => Some(code.as_ref()), + _ => None, + }) + .collect::>(); + assert_eq!(lambdas.len(), 1); + assert_eq!( + lambdas[0] + .names + .iter() + .map(|name| name.as_str()) + .collect::>(), + vec!["int"] + ); + } + #[test] fn test_type_param_evaluator_uses_dot_format_varname() { let code = compile_exec( @@ -27473,6 +30528,27 @@ def func[T](a: T = 'a', *, b: T = 'b'): ); } + #[test] + fn test_generic_function_type_params_varnames_include_defaults_like_cpython() { + let code = compile_exec( + "\ +def func[T](): + pass +", + ); + let type_params = + find_code(&code, "").expect("missing type params code"); + assert_eq!(type_params.arg_count, 0); + assert_eq!( + type_params + .varnames + .iter() + .map(String::as_str) + .collect::>(), + vec![".defaults", "T"] + ); + } + #[test] fn test_class_type_param_bound_prefers_classdict_over_outer_function_local() { let code = compile_exec( @@ -27773,96 +30849,318 @@ def f(): } #[test] - fn test_tuple_not_keeps_to_bool_unary_not_like_cpython() { + fn test_tuple_not_keeps_to_bool_unary_not_like_cpython() { + let code = compile_exec( + "\ +def f(): + return not () +", + ); + let f = find_code(&code, "f").expect("missing function code"); + let ops = f + .instructions + .iter() + .filter(|unit| !matches!(unit.op, Instruction::Cache)) + .collect::>(); + + assert!( + ops.windows(3).any(|window| { + matches!(window[0].op, Instruction::LoadConst { consti } + if matches!( + &f.constants[consti.get(OpArg::new(u32::from(u8::from(window[0].arg))))], + ConstantData::Tuple { elements } if elements.is_empty() + )) && matches!(window[1].op, Instruction::ToBool) + && matches!(window[2].op, Instruction::UnaryNot) + }), + "CPython codegen emits TO_BOOL; UNARY_NOT for UnaryOp(Not), while flowgraph.c folds tuple literals only after the LOAD_CONST+TO_BOOL pass, got instructions={:?}", + f.instructions + ); + } + + #[test] + fn test_tuple_if_test_keeps_to_bool_jump_like_cpython() { + let code = compile_exec( + "\ +def f(): + if (): + return 1 + return 2 +", + ); + let f = find_code(&code, "f").expect("missing function code"); + let ops = f + .instructions + .iter() + .filter(|unit| !matches!(unit.op, Instruction::Cache)) + .collect::>(); + + assert!( + ops.windows(3).any(|window| { + matches!(window[0].op, Instruction::LoadConst { consti } + if matches!( + &f.constants[consti.get(OpArg::new(u32::from(u8::from(window[0].arg))))], + ConstantData::Tuple { elements } if elements.is_empty() + )) && matches!(window[1].op, Instruction::ToBool) + && matches!(window[2].op, Instruction::PopJumpIfFalse { .. }) + }), + "CPython leaves tuple literal truth tests as LOAD_CONST tuple; TO_BOOL; POP_JUMP_IF_FALSE because tuple folding happens after constant jump folding, got instructions={:?}", + f.instructions + ); + } + + #[test] + fn test_constant_list_iterable_uses_tuple() { + let code = compile_exec( + "\ +def f(): + return {x: y for x, y in [(1, 2), ]} +", + ); + let f = find_code(&code, "f").expect("missing function code"); + + assert!( + !f.instructions + .iter() + .any(|unit| matches!(unit.op, Instruction::BuildList { .. })), + "constant list iterable should avoid BUILD_LIST before GET_ITER" + ); + assert!(f.constants.iter().any(|constant| matches!( + constant, + ConstantData::Tuple { elements } + if matches!( + elements.as_slice(), + [ConstantData::Tuple { elements: inner }] + if matches!( + inner.as_slice(), + [ + ConstantData::Integer { .. }, + ConstantData::Integer { .. } + ] + ) + ) + ))); + } + + #[test] + fn test_constant_list_iterable_preserves_cpython_const_order() { + let code = compile_exec( + "\ +def f(): + for x in ['a', 'b', 'c']: + pass +", + ); + let f = find_code(&code, "f").expect("missing function code"); + let constants = f.constants.iter().collect::>(); + + assert!( + matches!(constants[0], ConstantData::Str { value } if value.to_string() == "a"), + "CPython emits list elements as LOAD_CONST before flowgraph folds GET_ITER lists" + ); + assert!(matches!(constants[1], ConstantData::None)); + assert!(matches!( + constants[2], + ConstantData::Tuple { elements } + if matches!( + elements.as_slice(), + [ + ConstantData::Str { value: first }, + ConstantData::Str { value: second }, + ConstantData::Str { value: third }, + ] if first.to_string() == "a" + && second.to_string() == "b" + && third.to_string() == "c" + ) + )); + } + + #[test] + fn test_try_except_folded_tuple_consts_follow_cpython_block_order() { + let code = compile_exec( + "\ +def f(macrelease): + try: + g() + except ValueError: + macrelease = (10, 3) + if macrelease >= (10, 4): + pass +", + ); + let f = find_code(&code, "f").expect("missing function code"); + let constants = f.constants.iter().collect::>(); + + assert!( + constants.windows(2).any(|window| { + matches!( + window, + [ + ConstantData::Tuple { elements: first }, + ConstantData::Tuple { elements: second }, + ] if matches!( + (first.as_slice(), second.as_slice()), + ( + [ + ConstantData::Integer { value: a }, + ConstantData::Integer { value: b }, + ], + [ + ConstantData::Integer { value: c }, + ConstantData::Integer { value: d }, + ], + ) if a == &BigInt::from(10) + && b == &BigInt::from(3) + && c == &BigInt::from(10) + && d == &BigInt::from(4) + ) + ) + }), + "CPython flowgraph.c walks b_next order, so the except-body tuple is folded before the following if-test tuple; got {constants:?}" + ); + } + + #[test] + fn test_small_set_membership_folds_before_later_unary_const_like_cpython() { let code = compile_exec( - "\ -def f(): - return not () -", + r#" +def f(method, n): + if method not in {"linear", "ranked"}: + pass + if method == "ranked": + start = (n - 1) / -2 +"#, ); let f = find_code(&code, "f").expect("missing function code"); - let ops = f - .instructions + let constants = f.constants.iter().collect::>(); + let frozenset_index = constants .iter() - .filter(|unit| !matches!(unit.op, Instruction::Cache)) - .collect::>(); + .position(|constant| matches!(constant, ConstantData::Frozenset { .. })) + .expect("missing folded membership frozenset"); + let negative_two_index = constants + .iter() + .position(|constant| { + matches!( + constant, + ConstantData::Integer { value } if value == &BigInt::from(-2) + ) + }) + .expect("missing folded -2 constant"); assert!( - ops.windows(3).any(|window| { - matches!(window[0].op, Instruction::LoadConst { consti } - if matches!( - &f.constants[consti.get(OpArg::new(u32::from(u8::from(window[0].arg))))], - ConstantData::Tuple { elements } if elements.is_empty() - )) && matches!(window[1].op, Instruction::ToBool) - && matches!(window[2].op, Instruction::UnaryNot) - }), - "CPython codegen emits TO_BOOL; UNARY_NOT for UnaryOp(Not), while flowgraph.c folds tuple literals only after the LOAD_CONST+TO_BOOL pass, got instructions={:?}", - f.instructions + frozenset_index < negative_two_index, + "CPython flowgraph.c optimizes BUILD_SET+CONTAINS_OP inline before folding the later unary -2; got {constants:?}" ); } #[test] - fn test_tuple_if_test_keeps_to_bool_jump_like_cpython() { + fn test_boolop_const_order_keeps_cpython_codegen_constants() { let code = compile_exec( "\ -def f(): - if (): - return 1 - return 2 +def or_false(x): + return False or x + +def zero_or_tuple(): + return 0 or (1, -1) + +def tuple_or_tuple(): + return (1, -1) or (-1, 1) ", ); - let f = find_code(&code, "f").expect("missing function code"); - let ops = f - .instructions - .iter() - .filter(|unit| !matches!(unit.op, Instruction::Cache)) - .collect::>(); + let or_false = find_code(&code, "or_false").expect("missing or_false code"); + let constants = or_false.constants.iter().collect::>(); + assert_eq!(constants.len(), 1); assert!( - ops.windows(3).any(|window| { - matches!(window[0].op, Instruction::LoadConst { consti } - if matches!( - &f.constants[consti.get(OpArg::new(u32::from(u8::from(window[0].arg))))], - ConstantData::Tuple { elements } if elements.is_empty() - )) && matches!(window[1].op, Instruction::ToBool) - && matches!(window[2].op, Instruction::PopJumpIfFalse { .. }) - }), - "CPython leaves tuple literal truth tests as LOAD_CONST tuple; TO_BOOL; POP_JUMP_IF_FALSE because tuple folding happens after constant jump folding, got instructions={:?}", - f.instructions + matches!(constants[0], ConstantData::Boolean { value: false }), + "CPython registers the skipped boolop literal before flowgraph removes the branch" + ); + + let zero_or_tuple = find_code(&code, "zero_or_tuple").expect("missing zero_or_tuple code"); + let constants = zero_or_tuple.constants.iter().collect::>(); + assert_eq!(constants.len(), 2); + assert!( + matches!( + constants[0], + ConstantData::Integer { value } if value == &BigInt::from(0) + ) && matches!( + constants[1], + ConstantData::Tuple { elements } + if matches!( + elements.as_slice(), + [ + ConstantData::Integer { value: one }, + ConstantData::Integer { value: minus_one }, + ] if one == &BigInt::from(1) && minus_one == &BigInt::from(-1) + ) + ), + "CPython keeps the skipped scalar literal before the folded tuple constant" + ); + + let tuple_or_tuple = + find_code(&code, "tuple_or_tuple").expect("missing tuple_or_tuple code"); + let constants = tuple_or_tuple.constants.iter().collect::>(); + assert_eq!(constants.len(), 3); + assert!( + matches!( + constants[0], + ConstantData::Integer { value } if value == &BigInt::from(1) + ) && matches!( + constants[1], + ConstantData::Tuple { elements } + if matches!( + elements.as_slice(), + [ + ConstantData::Integer { value: one }, + ConstantData::Integer { value: minus_one }, + ] if one == &BigInt::from(1) && minus_one == &BigInt::from(-1) + ) + ) && matches!( + constants[2], + ConstantData::Tuple { elements } + if matches!( + elements.as_slice(), + [ + ConstantData::Integer { value: minus_one }, + ConstantData::Integer { value: one }, + ] if minus_one == &BigInt::from(-1) && one == &BigInt::from(1) + ) + ), + "CPython compiles boolop tuple heads before flowgraph folds them" ); } #[test] - fn test_constant_list_iterable_uses_tuple() { + fn test_lambda_without_body_constants_keeps_none_like_cpython() { + let code = compile_exec("f = lambda x: x"); + let lambda = find_code(&code, "").expect("missing lambda code"); + let constants = lambda.constants.iter().collect::>(); + assert_eq!(constants.len(), 1); + + assert!( + matches!(constants[0], ConstantData::None), + "CPython AddReturnAtEnd registers None for constant-free lambdas" + ); + } + + #[test] + fn test_call_function_ex_empty_args_tuple_is_folded_late_like_cpython() { let code = compile_exec( "\ -def f(): - return {x: y for x, y in [(1, 2), ]} +def f(g, kwargs, ns): + g(**kwargs) + ns['T'] ", ); let f = find_code(&code, "f").expect("missing function code"); + let constants = f.constants.iter().collect::>(); + assert_eq!(constants.len(), 3); assert!( - !f.instructions - .iter() - .any(|unit| matches!(unit.op, Instruction::BuildList { .. })), - "constant list iterable should avoid BUILD_LIST before GET_ITER" + matches!(constants[0], ConstantData::Str { value } if value.to_string() == "T") + && matches!(constants[1], ConstantData::None) + && matches!(constants[2], ConstantData::Tuple { elements } if elements.is_empty()), + "CPython emits BUILD_TUPLE 0 for CALL_FUNCTION_EX args and folds it after earlier constants" ); - assert!(f.constants.iter().any(|constant| matches!( - constant, - ConstantData::Tuple { elements } - if matches!( - elements.as_slice(), - [ConstantData::Tuple { elements: inner }] - if matches!( - inner.as_slice(), - [ - ConstantData::Integer { .. }, - ConstantData::Integer { .. } - ] - ) - ) - ))); } #[test] @@ -28086,6 +31384,220 @@ def g(): ); } + #[test] + fn test_comprehension_list_iterable_build_uses_iter_location_like_cpython() { + let code = compile_exec( + "\ +async def f(i): + return i + +async def run_list(): + return [await c for c in [f(1), f(41)]] +", + ); + let run_list = find_code(&code, "run_list").expect("missing run_list code"); + assert_eq!( + run_list.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xdc, 0x1e, 0x1f, 0xa0, 0x01, 0x9b, 0x64, 0xa4, 0x41, 0xa0, + 0x62, 0xa3, 0x45, 0x99, 0x5d, 0xd3, 0x0b, 0x2b, 0x99, 0x5d, 0x98, 0x01, 0x8f, 0x47, + 0x8a, 0x47, 0x99, 0x5d, 0xd1, 0x0b, 0x2b, 0xd0, 0x04, 0x2b, 0x89, 0x47, 0xf9, 0xd2, + 0x0b, 0x2b, 0xf9, + ], + "CPython codegen_comprehension_iter() emits GET_ITER at LOC(comp->iter)" + ); + } + + #[test] + fn test_comprehension_boolop_iter_get_iter_uses_iter_location_like_cpython() { + let code = compile_exec( + "\ +def f(self): + return any(not w.cancelled() for w in (self._waiters or ())) +", + ); + let f = find_code(&code, "f").expect("missing f code"); + let get_iter_positions: Vec<_> = f + .instructions + .iter() + .zip(&f.locations) + .filter_map(|(unit, (location, end_location))| { + matches!(unit.op, Instruction::GetIter).then_some(( + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), + )) + }) + .collect(); + + assert!( + get_iter_positions.contains(&(2, 44, 2, 63)), + "CPython codegen_comprehension_iter() emits GET_ITER at LOC(comp->iter), got {get_iter_positions:?}" + ); + } + + #[test] + fn test_inlined_comprehension_backedges_use_element_location_like_cpython() { + let code = compile_exec( + "\ +async def f(i): + return i + +async def run_list(): + return [s for c in [f(''), f('abc')] for s in await c] +", + ); + let run_list = find_code(&code, "run_list").expect("missing run_list code"); + assert_eq!( + run_list.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xdc, 0x18, 0x19, 0x98, 0x22, 0x9b, 0x05, 0x9c, 0x71, 0xa0, + 0x15, 0x9b, 0x78, 0xd1, 0x17, 0x28, 0xd4, 0x0b, 0x3a, 0xd1, 0x17, 0x28, 0x90, 0x21, + 0xb7, 0x27, 0xb2, 0x27, 0xa8, 0x51, 0x8a, 0x41, 0xb1, 0x27, 0x89, 0x41, 0xd1, 0x17, + 0x28, 0xd2, 0x0b, 0x3a, 0xd0, 0x04, 0x3a, 0xb1, 0x27, 0xf9, 0xd3, 0x0b, 0x3a, 0xf9, + ], + "CPython codegen_sync_comprehension_generator() emits comprehension backedges at elt_loc" + ); + } + + #[test] + fn test_nested_dict_comprehension_outer_backedge_uses_key_location_like_cpython() { + let code = compile_exec( + "\ +def f(items): + return {op: i for i, ops in items for op in ops} +", + ); + let f = find_code(&code, "f").expect("missing function code"); + let backedge_positions: Vec<_> = f + .instructions + .iter() + .zip(&f.locations) + .filter_map(|(unit, (location, end_location))| { + matches!(unit.op, Instruction::JumpBackward { .. }).then_some(( + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), + )) + }) + .collect(); + + assert!( + backedge_positions.contains(&(2, 13, 2, 18)), + "CPython extends only the terminal dict-comprehension MAP_ADD/backedge location from key through value, got {backedge_positions:?}" + ); + assert!( + backedge_positions.contains(&(2, 13, 2, 15)), + "CPython keeps outer dict-comprehension generator backedges at LOC(key), got {backedge_positions:?}" + ); + } + + #[test] + fn test_inlined_comprehension_filter_jump_uses_element_location_like_cpython() { + let code = compile_exec( + "\ +def f(self): + return [action for action in self._actions if action.option_strings] +", + ); + let f = find_code(&code, "f").expect("missing function code"); + let filter_jump_position = f + .instructions + .iter() + .zip(&f.locations) + .find_map(|(unit, (location, end_location))| { + matches!(unit.op, Instruction::PopJumpIfTrue { .. }).then_some(( + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), + )) + }) + .expect("missing optimized filter jump"); + assert_eq!( + filter_jump_position, + (2, 13, 2, 19), + "CPython inlined comprehension filter jump inherits the element/backedge location after CFG cleanup" + ); + } + + #[test] + fn test_inlined_comprehension_ifexp_guard_jump_uses_body_location_like_cpython() { + let code = compile_exec( + "\ +def f(fields): + return [f for f in fields if (f.compare if f.hash is None else f.hash)] +", + ); + let f = find_code(&code, "f").expect("missing function code"); + let jump_forward_position = f + .instructions + .iter() + .zip(&f.locations) + .find_map(|(unit, (location, end_location))| { + matches!(unit.op, Instruction::JumpForward { .. }).then_some(( + location.line.get(), + location.character_offset.get(), + end_location.line.get(), + end_location.character_offset.get(), + )) + }) + .expect("missing if-expression body jump"); + assert_eq!( + jump_forward_position, + (2, 35, 2, 44), + "CPython flowgraph.c::propagate_line_numbers() copies the if-expression body location onto the NO_LOCATION jump" + ); + } + + #[test] + fn test_inlined_async_comprehension_end_async_for_uses_comprehension_location_like_cpython() { + let code = compile_exec( + "\ +async def f(it): + for i in it: + yield i + +async def run_list(): + return [i + 1 async for i in f([10, 20])] +", + ); + let run_list = find_code(&code, "run_list").expect("missing run_list code"); + assert_eq!( + run_list.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xdc, 0x21, 0x22, 0xa0, 0x42, 0xa8, 0x02, 0xa0, 0x38, 0xa4, + 0x1b, 0xd7, 0x0b, 0x2d, 0xd3, 0x0b, 0x2d, 0x98, 0x41, 0x90, 0x01, 0x8f, 0x45, 0x88, + 0x45, 0xd4, 0x0b, 0x2d, 0xd0, 0x04, 0x2d, 0xf9, 0xd2, 0x0b, 0x2d, 0xf9, + ], + "CPython codegen_async_comprehension_generator() emits END_ASYNC_FOR at comprehension loc" + ); + } + + #[test] + fn test_async_for_anext_sequence_uses_statement_location_like_cpython() { + let code = compile_exec( + "\ +async def f(source, buffer): + async for i1, i2 in source(): + buffer.append(i1 + i2) +", + ); + let f = find_code(&code, "f").expect("missing f code"); + assert_eq!( + f.linetable.as_ref(), + &[ + 0xe9, 0x00, 0x80, 0x00, 0xd9, 0x18, 0x1e, 0x9c, 0x08, 0xf7, 0x00, 0x01, 0x05, 0x1f, + 0xf0, 0x00, 0x01, 0x05, 0x1f, 0x89, 0x66, 0x88, 0x62, 0xd8, 0x08, 0x0e, 0x8f, 0x0d, + 0x89, 0x0d, 0x90, 0x62, 0x95, 0x67, 0xd6, 0x08, 0x1e, 0xf1, 0x03, 0x01, 0x05, 0x1f, + 0x9a, 0x08, 0xf9, + ], + "CPython codegen_async_for() emits GET_ANEXT/yield-from scaffolding at LOC(s)" + ); + } + #[test] fn test_nested_comprehension_list_iterable_uses_tuple() { let code = compile_exec( @@ -28684,6 +32196,25 @@ def f(seq, emit): ); } + #[test] + fn test_inlined_comprehension_namedexpr_varnames_match_cpython_order() { + let code = compile_exec( + "\ +def f(): + def spam(a): + return a + input_data = [1, 2, 3] + res = [(x, y, x / y) for x in input_data if (y := spam(x)) > 0] + return res +", + ); + let f = find_code(&code, "f").expect("missing f code"); + assert_eq!( + f.varnames.iter().map(String::as_str).collect::>(), + vec!["spam", "input_data", "x", "y", "res"] + ); + } + #[test] fn test_global_namedexpr_in_inlined_comprehension_saves_fast_slot() { let code = compile_exec( @@ -28712,6 +32243,27 @@ def f(seq, value): })); } + #[test] + fn test_namedexpr_copy_uses_namedexpr_location_like_cpython() { + let code = compile_exec( + "\ +def outer(): + a = 10 + def spam(): + nonlocal a + (a := 20) +", + ); + let spam = find_code(&code, "spam").expect("missing spam code"); + + // CPython 3.14 NamedExpr_kind emits COPY at LOC(named expression), + // between visiting the value and visiting the target. + assert_eq!( + spam.linetable.as_ref(), + &[0xf8, 0x80, 0x00, 0xe0, 0x0e, 0x10, 0x88, 0x17, 0x8b, 0x11,] + ); + } + #[test] fn test_genexpr_namedexpr_target_is_cell_not_fast_local() { let code = compile_exec( @@ -28730,6 +32282,31 @@ def f(seq): ); } + #[test] + fn test_public_cellvars_follow_cpython_localsplus_order() { + let code = compile_exec( + "\ +def f(): + x = 10 + t = False + g = ((i, j) for i in range(x) if t for j in range(x)) + [x for x in range(3)] + return g +", + ); + let f = find_code(&code, "f").expect("missing f code"); + + assert_eq!( + f.varnames.iter().map(String::as_str).collect::>(), + ["g", "x"] + ); + assert_eq!( + f.cellvars.iter().map(String::as_str).collect::>(), + ["x", "t"], + "CPython assemble.c exposes co_cellvars in localsplus order: merged local cells before non-local cells" + ); + } + #[test] fn test_inlined_comprehension_restore_does_not_form_store_fast_load_fast() { let code = compile_exec( diff --git a/crates/codegen/src/ir.rs b/crates/codegen/src/ir.rs index 47be101f0af..db1119873b2 100644 --- a/crates/codegen/src/ir.rs +++ b/crates/codegen/src/ir.rs @@ -28,6 +28,8 @@ struct LineTableLocation { end_col: i32, } +pub(crate) const LINE_ONLY_LOCATION_OVERRIDE: i32 = -4; + const MAX_INT_SIZE_BITS: u64 = 128; const MAX_COLLECTION_SIZE: usize = 256; const MAX_TOTAL_ITEMS: isize = 1024; @@ -35,13 +37,92 @@ const MAX_STR_SIZE: usize = 4096; const MIN_CONST_SEQUENCE_SIZE: usize = 3; const STACK_USE_GUIDELINE: usize = 30; +#[derive(Clone, Debug, Default)] +pub struct ConstantPool { + constants: Vec, +} + +impl ConstantPool { + fn constant_contains_nan(constant: &ConstantData) -> bool { + match constant { + ConstantData::Float { value } => value.is_nan(), + ConstantData::Complex { value } => value.re.is_nan() || value.im.is_nan(), + ConstantData::Tuple { elements } | ConstantData::Frozenset { elements } => { + elements.iter().any(Self::constant_contains_nan) + } + ConstantData::Slice { elements } => elements.iter().any(Self::constant_contains_nan), + _ => false, + } + } + + pub fn insert_full(&mut self, constant: ConstantData) -> (usize, bool) { + // CPython's _PyCode_ConstantKey() keeps NaN-bearing constants distinct + // because Python-level NaN keys do not compare equal. + if !Self::constant_contains_nan(&constant) + && let Some(idx) = self + .constants + .iter() + .position(|existing| existing == &constant) + { + return (idx, false); + } + let idx = self.constants.len(); + self.constants.push(constant); + (idx, true) + } + + pub fn insert(&mut self, constant: ConstantData) -> bool { + self.insert_full(constant).1 + } + + #[must_use] + pub fn get_index(&self, idx: usize) -> Option<&ConstantData> { + self.constants.get(idx) + } + + pub fn iter(&self) -> core::slice::Iter<'_, ConstantData> { + self.constants.iter() + } + + #[must_use] + pub fn len(&self) -> usize { + self.constants.len() + } + + #[must_use] + pub fn is_empty(&self) -> bool { + self.constants.is_empty() + } + + pub fn clear(&mut self) { + self.constants.clear(); + } +} + +impl ops::Index for ConstantPool { + type Output = ConstantData; + + fn index(&self, idx: usize) -> &Self::Output { + &self.constants[idx] + } +} + +impl IntoIterator for ConstantPool { + type Item = ConstantData; + type IntoIter = alloc::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.constants.into_iter() + } +} + /// Metadata for a code unit // = _PyCompile_CodeUnitMetadata #[derive(Clone, Debug)] pub struct CodeUnitMetadata { pub name: String, // u_name (obj_name) pub qualname: Option, // u_qualname - pub consts: IndexSet, // u_consts + pub consts: ConstantPool, // u_consts pub names: IndexSet, // u_names pub varnames: IndexSet, // u_varnames pub cellvars: IndexSet, // u_cellvars @@ -140,6 +221,12 @@ pub struct InstructionInfo { /// This is the final jump emitted by codegen_break() after unwinding the /// iterator for a for-loop break. pub for_loop_break_cleanup_jump: bool, + /// Keep this conditional jump's own location when the preceding TO_BOOL + /// normally propagates its condition location into the jump. + pub preserve_tobool_jump_location: bool, + /// Keep the jump location copied from the second STORE_FAST NOP created + /// by STORE_FAST_STORE_FAST fusion. + pub preserve_store_fast_store_fast_jump_location: bool, } /// Exception handler information for an instruction. @@ -167,6 +254,8 @@ fn set_to_nop(info: &mut InstructionInfo) { info.match_success_jump = false; info.break_continue_cleanup_jump = false; info.for_loop_break_cleanup_jump = false; + info.preserve_tobool_jump_location = false; + info.preserve_store_fast_store_fast_jump_location = false; } fn nop_out_no_location(info: &mut InstructionInfo) { @@ -204,6 +293,8 @@ pub struct Block { pub load_fast_passthrough: bool, /// Continuation label that CPython attaches to a preceding empty block. pub load_fast_label_reuse_passthrough: bool, + /// If-expression orelse label emitted inside another conditional statement. + pub conditional_ifexp_orelse_entry: bool, } impl Default for Block { @@ -221,6 +312,7 @@ impl Default for Block { load_fast_barrier: false, load_fast_passthrough: false, load_fast_label_reuse_passthrough: false, + conditional_ifexp_orelse_entry: false, } } } @@ -295,17 +387,9 @@ impl CodeInfo { self.fold_set_constants(); self.optimize_lists_and_sets(); self.convert_to_load_small_int(); - self.remove_unused_consts(); // DCE always runs (removes dead code after terminal instructions) self.dce(); - // BUILD_TUPLE n + UNPACK_SEQUENCE n → NOP + SWAP (n=2,3) or NOP+NOP (n=1) - self.optimize_build_tuple_unpack(); - // Dead store elimination for duplicate STORE_FAST targets - // (apply_static_swaps in CPython's flowgraph.c) - self.eliminate_dead_stores(); - // apply_static_swaps: reorder stores to eliminate SWAPs - self.apply_static_swaps(); // Peephole optimizer handles constant and compare folding. self.peephole_optimize(); // Per-block walker first to preserve CPython-style instruction-order @@ -318,7 +402,6 @@ impl CodeInfo { self.fold_set_constants(); self.optimize_lists_and_sets(); self.convert_to_load_small_int(); - self.remove_unused_consts(); // CPython's CFG builder starts a new basic block after a terminator. // Peephole constant-jump folding can create new terminators, so split // before DCE clears unreachable successor instructions; otherwise the @@ -346,6 +429,14 @@ impl CodeInfo { // superinstruction insertion, so fusion decisions see propagated // source locations. resolve_line_numbers(&mut self.blocks); + // CPython flowgraph.c::optimize_cfg() runs optimize_basic_block() + // after the first resolve_line_numbers(). Keep tuple-unpack SWAP + // creation, duplicate STORE_FAST cleanup, and apply_static_swaps() + // here so synthetic no-location exits inherit the same pre-swap + // source locations as CPython. + self.optimize_build_tuple_unpack(); + self.eliminate_dead_stores(); + self.apply_static_swaps(); self.remove_nops(); self.add_checks_for_loads_of_uninitialized_variables(); // CPython inserts superinstructions in _PyCfg_OptimizeCodeUnit, before @@ -526,7 +617,7 @@ impl CodeInfo { if next_lineno == lineno { remove = true; } else if next_lineno < 0 { - src_instructions[src + 1].lineno_override = Some(lineno); + copy_instruction_location(instr, &mut src_instructions[src + 1]); remove = true; } } @@ -578,6 +669,8 @@ impl CodeInfo { } resolve_next_location_overrides(&mut blocks); + propagate_store_fast_store_fast_jump_locations(&mut blocks); + propagate_tobool_conditional_jump_locations(&mut blocks); // Pre-compute cache_entries for real (non-pseudo) instructions for block in &mut blocks { @@ -747,13 +840,31 @@ impl CodeInfo { info.arg.instr_size() + cache_count, )); // Collect linetable locations with lineno_override support - let lt_loc = LineTableLocation { - line: info - .lineno_override - .unwrap_or_else(|| info.location.line.get() as i32), - end_line: info.end_location.line.get() as i32, - col: info.location.character_offset.to_zero_indexed() as i32, - end_col: info.end_location.character_offset.to_zero_indexed() as i32, + let lt_loc = match info.lineno_override { + Some(-1) => LineTableLocation { + line: -1, + end_line: -1, + col: -1, + end_col: -1, + }, + Some(LINE_ONLY_LOCATION_OVERRIDE) => LineTableLocation { + line: info.location.line.get() as i32, + end_line: info.end_location.line.get() as i32, + col: -1, + end_col: -1, + }, + Some(lineno) => LineTableLocation { + line: lineno, + end_line: info.end_location.line.get() as i32, + col: info.location.character_offset.to_zero_indexed() as i32, + end_col: info.end_location.character_offset.to_zero_indexed() as i32, + }, + None => LineTableLocation { + line: info.location.line.get() as i32, + end_line: info.end_location.line.get() as i32, + col: info.location.character_offset.to_zero_indexed() as i32, + end_col: info.end_location.character_offset.to_zero_indexed() as i32, + }, }; linetable_locations.extend(core::iter::repeat_n(lt_loc, info.arg.instr_size())); // CACHE entries inherit parent instruction's location @@ -801,6 +912,20 @@ impl CodeInfo { // Generate exception table before moving source_path let exceptiontable = generate_exception_table(&blocks, &block_to_index); + // CPython builds u_cellvars in dictbytype() order, but the public + // co_cellvars tuple follows localsplus order from assemble.c: + // cell locals already present in varnames first, then remaining cells. + let final_cellvars = varname_cache + .iter() + .filter(|name| cellvar_cache.contains(name.as_str())) + .chain( + cellvar_cache + .iter() + .filter(|name| !varname_cache.contains(name.as_str())), + ) + .cloned() + .collect::>(); + // Build localspluskinds with cell-local merging let nlocals = varname_cache.len(); let ncells = cellvar_cache.len(); @@ -854,7 +979,7 @@ impl CodeInfo { constants: constants.into_iter().collect(), names: name_cache.into_iter().collect(), varnames: varname_cache.into_iter().collect(), - cellvars: cellvar_cache.into_iter().collect(), + cellvars: final_cellvars.into_boxed_slice(), freevars: freevar_cache.into_iter().collect(), localspluskinds: localspluskinds.into_boxed_slice(), linetable, @@ -1060,6 +1185,27 @@ impl CodeInfo { } } + fn instr_make_load_const( + metadata: &mut CodeUnitMetadata, + instr: &mut InstructionInfo, + constant: ConstantData, + ) { + if let ConstantData::Integer { value } = &constant + && let Some(small) = value.to_i32().filter(|v| (0..=255).contains(v)) + { + instr.instr = Opcode::LoadSmallInt.into(); + instr.arg = OpArg::new(small as u32); + return; + } + + let (const_idx, _) = metadata.consts.insert_full(constant); + instr.instr = Instruction::LoadConst { + consti: Arg::marker(), + } + .into(); + instr.arg = OpArg::new(const_idx as u32); + } + /// Try to fold a single unary instruction at position `i` in `block`. /// Returns true if folded. Mirrors CPython fold_const_unaryop(). fn fold_unary_constant_at( @@ -1100,7 +1246,6 @@ impl CodeInfo { let Some(folded_const) = Self::eval_unary_constant(&operand, op, intrinsic) else { return false; }; - let (const_idx, _) = metadata.consts.insert_full(folded_const); nop_out_no_location(&mut block.instructions[operand_index]); let mut prev = operand_index; while let Some(idx) = prev.checked_sub(1) { @@ -1111,18 +1256,15 @@ impl CodeInfo { block.instructions[idx].end_location = block.instructions[i].end_location; prev = idx; } - block.instructions[i].instr = Instruction::LoadConst { - consti: Arg::marker(), - } - .into(); - block.instructions[i].arg = OpArg::new(const_idx as u32); + Self::instr_make_load_const(metadata, &mut block.instructions[i], folded_const); block.instructions[i].folded_from_nonliteral_expr = false; true } /// Fold constant unary operations following CPython fold_const_unaryop(). fn fold_unary_constants(&mut self) { - for block in &mut self.blocks { + for block_idx in self.block_next_order() { + let block = &mut self.blocks[block_idx]; let mut i = 0; while i < block.instructions.len() { if Self::fold_unary_constant_at(&mut self.metadata, block, i) { @@ -1194,6 +1336,16 @@ impl CodeInfo { None } + fn block_next_order(&self) -> Vec { + let mut order = Vec::new(); + let mut current = BlockIdx(0); + while current != BlockIdx::NULL { + order.push(current); + current = self.blocks[current.idx()].next; + } + order + } + /// Try to fold a single BINARY_OP instruction at position `i` in `block`. /// Returns true if folded. Mirrors CPython fold_const_binop(). fn fold_binop_constant_at( @@ -1224,18 +1376,13 @@ impl CodeInfo { let Some(result_const) = Self::eval_binop(&left_val, &right_val, op) else { return false; }; - let (const_idx, _) = metadata.consts.insert_full(result_const); let folded_from_nonliteral_expr = operand_indices .iter() .any(|&idx| block.instructions[idx].folded_from_nonliteral_expr); for &idx in &operand_indices { nop_out_no_location(&mut block.instructions[idx]); } - block.instructions[i].instr = Instruction::LoadConst { - consti: Arg::marker(), - } - .into(); - block.instructions[i].arg = OpArg::new(const_idx as u32); + Self::instr_make_load_const(metadata, &mut block.instructions[i], result_const); block.instructions[i].folded_from_nonliteral_expr = folded_from_nonliteral_expr; true } @@ -1244,7 +1391,8 @@ impl CodeInfo { /// into a single LOAD_CONST when the result is computable at compile time. /// = fold_binops_on_constants in CPython flowgraph.c fn fold_binop_constants(&mut self) { - for block in &mut self.blocks { + for block_idx in self.block_next_order() { + let block = &mut self.blocks[block_idx]; let mut i = 0; while i < block.instructions.len() { if Self::fold_binop_constant_at(&mut self.metadata, block, i) { @@ -1260,27 +1408,26 @@ impl CodeInfo { /// unary, and binop constant folding. Mirrors optimize_basic_block() in /// flowgraph.c so constants are registered in co_consts in instruction /// order rather than in the order separate global passes would discover - /// them. Iterates per block to a fixed point so an inner fold can enable - /// a surrounding outer fold within the same block. + /// them. CPython runs optimize_basic_block() once per basic block, with a + /// single forward scan, so this deliberately does not iterate to a fixed + /// point. fn fold_constants_per_block(&mut self) { - for block in &mut self.blocks { - loop { - let mut changed = false; - let mut i = 0; - while i < block.instructions.len() { - let folded = Self::fold_tuple_constant_at(&mut self.metadata, block, i) - || Self::fold_list_constant_at(&mut self.metadata, block, i) - || Self::fold_set_constant_at(&mut self.metadata, block, i) - || Self::fold_unary_constant_at(&mut self.metadata, block, i) - || Self::fold_binop_constant_at(&mut self.metadata, block, i); - if folded { - changed = true; - } - i += 1; - } - if !changed { - break; - } + for block_idx in self.block_next_order() { + let block = &mut self.blocks[block_idx]; + let mut i = 0; + while i < block.instructions.len() { + let _ = Self::fold_tuple_constant_at(&mut self.metadata, block, i) + || Self::optimize_iterable_or_contains_collection_at( + &mut self.metadata, + block, + i, + ) + || Self::fold_list_constant_at(&mut self.metadata, block, i) + || Self::fold_set_constant_at(&mut self.metadata, block, i) + || Self::fold_constant_intrinsic_list_to_tuple_at(&mut self.metadata, block, i) + || Self::fold_unary_constant_at(&mut self.metadata, block, i) + || Self::fold_binop_constant_at(&mut self.metadata, block, i); + i += 1; } } } @@ -1588,6 +1735,46 @@ impl CodeInfo { return eval_const_subscript(left, right); } + fn constant_as_int(value: &ConstantData) -> Option<(BigInt, bool)> { + match value { + ConstantData::Boolean { value } => Some((BigInt::from(u8::from(*value)), true)), + ConstantData::Integer { value } => Some((value.clone(), false)), + _ => None, + } + } + + if let (Some((left_int, left_is_bool)), Some((right_int, right_is_bool))) = + (constant_as_int(left), constant_as_int(right)) + && (left_is_bool || right_is_bool) + { + if left_is_bool && right_is_bool { + match op { + BinOp::And => { + return Some(ConstantData::Boolean { + value: !left_int.is_zero() & !right_int.is_zero(), + }); + } + BinOp::Or => { + return Some(ConstantData::Boolean { + value: !left_int.is_zero() | !right_int.is_zero(), + }); + } + BinOp::Xor => { + return Some(ConstantData::Boolean { + value: !left_int.is_zero() ^ !right_int.is_zero(), + }); + } + _ => {} + } + } + + return Self::eval_binop( + &ConstantData::Integer { value: left_int }, + &ConstantData::Integer { value: right_int }, + op, + ); + } + match (left, right) { (ConstantData::Integer { value: l }, ConstantData::Integer { value: r }) => { let result = match op { @@ -1902,6 +2089,14 @@ impl CodeInfo { if func.get(block.instructions[i].arg) != IntrinsicFunction1::ListToTuple { return false; } + if block + .instructions + .get(i + 1) + .and_then(|instr| instr.instr.real()) + .is_some_and(|instr| matches!(instr, Instruction::GetIter)) + { + return false; + } let mut consts_found = 0usize; let mut expect_append = true; @@ -2036,11 +2231,80 @@ impl CodeInfo { true } + /// CPython's optimize_basic_block() calls optimize_lists_and_sets() in + /// place for BUILD_LIST/BUILD_SET. This handles the GET_ITER/CONTAINS_OP + /// subset there so small membership collections are folded before later + /// unary/binop constants in the same block. + fn optimize_iterable_or_contains_collection_at( + metadata: &mut CodeUnitMetadata, + block: &mut Block, + i: usize, + ) -> bool { + let Some(instr) = block.instructions[i].instr.real() else { + return false; + }; + let is_list = matches!(instr, Instruction::BuildList { .. }); + let is_set = matches!(instr, Instruction::BuildSet { .. }); + if !is_list && !is_set { + return false; + } + + let next_is_iter_or_contains = block + .instructions + .get(i + 1) + .and_then(|next| next.instr.real()) + .is_some_and(|next| { + matches!(next, Instruction::GetIter | Instruction::ContainsOp { .. }) + }); + if !next_is_iter_or_contains { + return false; + } + + let seq_size = u32::from(block.instructions[i].arg) as usize; + if seq_size > STACK_USE_GUIDELINE { + return false; + } + + let Some((operand_indices, elements)) = + Self::get_const_sequence(metadata, block, i, seq_size) + else { + if is_list { + block.instructions[i].instr = Opcode::BuildTuple.into(); + return true; + } + return false; + }; + + let const_data = if is_set { + ConstantData::Frozenset { elements } + } else { + ConstantData::Tuple { elements } + }; + let (const_idx, _) = metadata.consts.insert_full(const_data); + let folded_loc = block.instructions[i].location; + let end_loc = block.instructions[i].end_location; + let eh = block.instructions[i].except_handler; + + for &j in &operand_indices { + set_to_nop(&mut block.instructions[j]); + block.instructions[j].location = folded_loc; + block.instructions[j].end_location = end_loc; + } + + block.instructions[i].instr = Opcode::LoadConst.into(); + block.instructions[i].arg = OpArg::new(const_idx as u32); + block.instructions[i].location = folded_loc; + block.instructions[i].end_location = end_loc; + block.instructions[i].except_handler = eh; + true + } + /// Constant folding: fold LOAD_CONST/LOAD_SMALL_INT + BUILD_TUPLE into LOAD_CONST tuple /// fold_tuple_of_constants. This also folds constant list/set literals /// in block order to match CPython's optimize_basic_block() const-table order. fn fold_tuple_constants(&mut self) { - for block in &mut self.blocks { + for block_idx in self.block_next_order() { + let block = &mut self.blocks[block_idx]; let mut i = 0; while i < block.instructions.len() { if Self::fold_tuple_constant_at(&mut self.metadata, block, i) @@ -2058,7 +2322,8 @@ impl CodeInfo { /// Fold constant list literals: LOAD_CONST* + BUILD_LIST N → /// BUILD_LIST 0 + LOAD_CONST (tuple) + LIST_EXTEND 1 fn fold_list_constants(&mut self) { - for block in &mut self.blocks { + for block_idx in self.block_next_order() { + let block = &mut self.blocks[block_idx]; let mut i = 0; while i < block.instructions.len() { let instr = &block.instructions[i]; @@ -2135,7 +2400,8 @@ impl CodeInfo { /// - Previously folded BUILD_LIST 0 + LOAD_CONST + LIST_EXTEND and /// BUILD_SET 0 + LOAD_CONST + SET_UPDATE collapse back to LOAD_CONST. fn optimize_lists_and_sets(&mut self) { - for block in &mut self.blocks { + for block_idx in self.block_next_order() { + let block = &mut self.blocks[block_idx]; let mut i = 0; while i + 1 < block.instructions.len() { if matches!( @@ -2359,7 +2625,8 @@ impl CodeInfo { /// Fold constant set literals: LOAD_CONST* + BUILD_SET N → /// BUILD_SET 0 + LOAD_CONST (frozenset-as-tuple) + SET_UPDATE 1 fn fold_set_constants(&mut self) { - for block in &mut self.blocks { + for block_idx in self.block_next_order() { + let block = &mut self.blocks[block_idx]; let mut i = 0; while i < block.instructions.len() { let instr = &block.instructions[i]; @@ -3103,7 +3370,8 @@ impl CodeInfo { /// Convert LOAD_CONST for small integers to LOAD_SMALL_INT /// maybe_instr_make_load_smallint fn convert_to_load_small_int(&mut self) { - for block in &mut self.blocks { + for block_idx in self.block_next_order() { + let block = &mut self.blocks[block_idx]; for instr in &mut block.instructions { // Check if it's a LOAD_CONST instruction let Some(Instruction::LoadConst { .. }) = instr.instr.real() else { @@ -3145,7 +3413,8 @@ impl CodeInfo { let mut used = vec![false; nconsts]; used[0] = true; - for block in &self.blocks { + for block_idx in self.block_next_order() { + let block = &self.blocks[block_idx]; for instr in &block.instructions { if let Some(Instruction::LoadConst { .. }) = instr.instr.real() { let idx = u32::from(instr.arg) as usize; @@ -3182,7 +3451,8 @@ impl CodeInfo { } // Update LOAD_CONST instruction arguments - for block in &mut self.blocks { + for block_idx in self.block_next_order() { + let block = &mut self.blocks[block_idx]; for instr in &mut block.instructions { if let Some(Instruction::LoadConst { .. }) = instr.instr.real() { let old_idx = u32::from(instr.arg) as usize; @@ -3338,6 +3608,28 @@ impl CodeInfo { i += 1; continue; } + let first_store_location = curr.location; + let second_store_location = next.location; + let second_store_end_location = next.end_location; + let second_store_lineno_override = next.lineno_override; + let mut after_idx = i + 2; + while after_idx < block.instructions.len() { + let after = &mut block.instructions[after_idx]; + if after.instr.is_unconditional_jump() { + if instruction_lineno(after) < 0 + || after.location == first_store_location + { + after.location = second_store_location; + after.end_location = second_store_end_location; + after.lineno_override = second_store_lineno_override; + } + break; + } + if instruction_lineno(after) >= 0 { + break; + } + after_idx += 1; + } let packed = (idx1 << 4) | idx2; block.instructions[i].instr = Instruction::StoreFastStoreFast { var_nums: Arg::marker(), @@ -3345,6 +3637,8 @@ impl CodeInfo { .into(); block.instructions[i].arg = OpArg::new(packed); set_to_nop(&mut block.instructions[i + 1]); + block.instructions[i + 1].preserve_store_fast_store_fast_jump_location = + true; i += 1; } _ => i += 1, @@ -4335,6 +4629,7 @@ impl CodeInfo { self.debug_block_dump(), )); } + self.fold_constants_per_block(); self.fold_binop_constants(); self.fold_unary_constants(); self.fold_binop_constants(); @@ -4345,23 +4640,19 @@ impl CodeInfo { self.fold_set_constants(); self.optimize_lists_and_sets(); self.convert_to_load_small_int(); - self.remove_unused_consts(); self.dce(); - self.optimize_build_tuple_unpack(); - self.eliminate_dead_stores(); - self.apply_static_swaps(); self.peephole_optimize(); trace.push(( "after_peephole_optimize".to_owned(), self.debug_block_dump(), )); + self.fold_constants_per_block(); self.fold_tuple_constants(); self.fold_binop_constants(); self.fold_list_constants(); self.fold_set_constants(); self.optimize_lists_and_sets(); self.convert_to_load_small_int(); - self.remove_unused_consts(); split_blocks_at_jumps(&mut self.blocks); trace.push(( "after_split_blocks_at_jumps".to_owned(), @@ -4384,6 +4675,9 @@ impl CodeInfo { trace.push(("after_jump_threading".to_owned(), self.debug_block_dump())); self.eliminate_unreachable_blocks(); resolve_line_numbers(&mut self.blocks); + self.optimize_build_tuple_unpack(); + self.eliminate_dead_stores(); + self.apply_static_swaps(); trace.push(( "after_first_resolve_line_numbers".to_owned(), self.debug_block_dump(), @@ -4753,6 +5047,18 @@ fn generate_linetable( // Get column information (only when debug_ranges is enabled) let col = loc.col; let end_col = loc.end_col; + if (col < 0 || end_col < 0) && end_line == line { + linetable.push( + 0x80 | ((PyCodeLocationInfoKind::NoColumns as u8) << 3) + | ((entry_length - 1) as u8), + ); + write_signed_varint(&mut linetable, line_delta); + + prev_line = line; + length -= entry_length; + i += entry_length; + continue; + } // Choose the appropriate encoding based on line delta and column info if line_delta == 0 && end_line_delta == 0 { @@ -4807,8 +5113,11 @@ fn generate_linetable( ); write_signed_varint(&mut linetable, line_delta); write_varint(&mut linetable, end_line_delta as u32); - write_varint(&mut linetable, (col as u32) + 1); - write_varint(&mut linetable, (end_col as u32) + 1); + write_varint(&mut linetable, if col < 0 { 0 } else { (col as u32) + 1 }); + write_varint( + &mut linetable, + if end_col < 0 { 0 } else { (end_col as u32) + 1 }, + ); } prev_line = line; @@ -4825,34 +5134,100 @@ fn generate_exception_table(blocks: &[Block], block_to_index: &[u32]) -> Box<[u8 let mut entries: Vec = Vec::new(); let mut current_entry: Option<(ExceptHandlerInfo, u32)> = None; // (handler_info, start_index) let mut instr_index = 0u32; - let instructions: Vec<&InstructionInfo> = iter_blocks(blocks) - .flat_map(|(_, block)| block.instructions.iter()) + let instructions: Vec<(BlockIdx, usize, &InstructionInfo)> = iter_blocks(blocks) + .flat_map(|(idx, block)| { + block + .instructions + .iter() + .enumerate() + .map(move |(instr_idx, instr)| (idx, instr_idx, instr)) + }) .collect(); + let mut jump_targets = vec![false; blocks.len()]; + for (_, block) in iter_blocks(blocks) { + for instr in &block.instructions { + if instr.target != BlockIdx::NULL { + jump_targets[instr.target.idx()] = true; + } + } + } let same_handler = |left: ExceptHandlerInfo, right: ExceptHandlerInfo| { block_to_index[left.handler_block.idx()] == block_to_index[right.handler_block.idx()] && left.stack_depth == right.stack_depth && left.preserve_lasti == right.preserve_lasti }; + let mut conditional_jumps_since_exit = 0usize; // Iterate through all instructions in block order // instr_index is the index into the final instructions array (including EXTENDED_ARG) // This matches how frame.rs uses lasti - for (pos, instr) in instructions.iter().enumerate() { + for (pos, &(block_idx, instr_idx, instr)) in instructions.iter().enumerate() { // CPython's final exception table is keyed by bytecode offsets after // empty cleanup labels have been resolved. RustPython can still have // distinct block ids for those labels here, so compare handler offsets. - let effective_except_handler = if instr.except_handler.is_none() - && matches!(instr.instr.real(), Some(Instruction::NotTaken)) - && let Some((current_handler, _)) = current_entry - && let Some(next) = instructions.get(pos + 1) - && let Some(next_handler) = next.except_handler - && same_handler(current_handler, next_handler) - && !next.instr.is_scope_exit() - { - Some(current_handler) - } else { - instr.except_handler - }; + let next = instructions.get(pos + 1).copied(); + let next_is_jump_target_block = next.is_some_and(|(next_block, _, _)| { + next_block != block_idx + && instr_idx + 1 == blocks[block_idx.idx()].instructions.len() + && jump_targets[next_block.idx()] + }); + let next_is_normalized_backward_jump = next.is_some_and(|(next_block, _, _)| { + next_block != block_idx + && instr_idx + 1 == blocks[block_idx.idx()].instructions.len() + && matches!( + blocks[next_block.idx()].instructions.as_slice(), + [not_taken, jump] + if matches!(not_taken.instr.real(), Some(Instruction::NotTaken)) + && jump.instr.is_unconditional_jump() + && jump.target != BlockIdx::NULL + && comes_before(blocks, jump.target, next_block) + ) + }); + let previous_is_conditional_ifexp_jump = pos.checked_sub(1).is_some_and(|prev_pos| { + let (_, _, previous) = instructions[prev_pos]; + previous.target != BlockIdx::NULL + && is_conditional_jump(&previous.instr) + && blocks[previous.target.idx()].conditional_ifexp_orelse_entry + }); + let previous_is_general_bool_conditional_jump = + pos.checked_sub(1).is_some_and(|prev_pos| { + let (_, _, previous) = instructions[prev_pos]; + matches!( + previous.instr.real(), + Some(Instruction::PopJumpIfFalse { .. } | Instruction::PopJumpIfTrue { .. }) + ) + }); + let previous_jump_uses_to_bool = pos.checked_sub(1).is_some_and(|prev_pos| { + let (_, _, previous) = instructions[prev_pos]; + matches!( + previous.instr.real(), + Some(Instruction::PopJumpIfFalse { .. } | Instruction::PopJumpIfTrue { .. }) + ) && instructions[..prev_pos] + .iter() + .rev() + .find(|(_, _, info)| !matches!(info.instr.real(), Some(Instruction::Cache))) + .is_some_and(|(_, _, info)| matches!(info.instr.real(), Some(Instruction::ToBool))) + }); + let effective_except_handler = + if is_conditional_jump(&instr.instr) && next_is_normalized_backward_jump { + None + } else if instr.except_handler.is_none() + && matches!(instr.instr.real(), Some(Instruction::NotTaken)) + && let Some((current_handler, _)) = current_entry + && let Some((_, _, next)) = next + && let Some(next_handler) = next.except_handler + && same_handler(current_handler, next_handler) + && !next.instr.is_scope_exit() + && !(next_is_jump_target_block && previous_jump_uses_to_bool) + && !previous_is_conditional_ifexp_jump + && !(conditional_jumps_since_exit > 1 + && previous_jump_uses_to_bool + && previous_is_general_bool_conditional_jump) + { + Some(current_handler) + } else { + instr.except_handler + }; // instr_size includes EXTENDED_ARG and CACHE entries let instr_size = instr.arg.instr_size() as u32 + instr.cache_entries; @@ -4864,6 +5239,7 @@ fn generate_exception_table(blocks: &[Block], block_to_index: &[u32]) -> Box<[u8 // No current entry, handler starts - begin new entry (None, Some(handler)) => { current_entry = Some((handler, instr_index)); + conditional_jumps_since_exit = 0; } // Current entry exists, same handler - continue @@ -4880,6 +5256,7 @@ fn generate_exception_table(blocks: &[Block], block_to_index: &[u32]) -> Box<[u8 curr_handler.preserve_lasti, )); current_entry = Some((handler, instr_index)); + conditional_jumps_since_exit = 0; } // Current entry exists, no handler - finish current entry @@ -4896,6 +5273,13 @@ fn generate_exception_table(blocks: &[Block], block_to_index: &[u32]) -> Box<[u8 } } + if effective_except_handler.is_some() && is_conditional_jump(&instr.instr) { + conditional_jumps_since_exit += 1; + } + if instr.instr.is_scope_exit() { + conditional_jumps_since_exit = 0; + } + instr_index += instr_size; // Account for EXTENDED_ARG instructions } @@ -5069,6 +5453,8 @@ fn push_cold_blocks_to_end(blocks: &mut Vec) { match_success_jump: false, break_continue_cleanup_jump: false, for_loop_break_cleanup_jump: false, + preserve_tobool_jump_location: false, + preserve_store_fast_store_fast_jump_location: false, }); jump_block.next = blocks[cold_idx.idx()].next; blocks[cold_idx.idx()].next = jump_block_idx; @@ -5207,7 +5593,7 @@ fn retarget_assert_conditional_jumps_to_empty_predecessor(blocks: &mut [Block]) let assertion_lines: Vec> = blocks.iter().map(assertion_failure_start_line).collect(); - for block in blocks { + for block in &mut *blocks { for instr in &mut block.instructions { if instr.target == BlockIdx::NULL || !is_conditional_jump(&instr.instr) { continue; @@ -6146,6 +6532,7 @@ fn jump_threading_impl(blocks: &mut [Block], include_conditional: bool) { && target_ins.target != BlockIdx::NULL && target_ins.target != target { + let conditional = is_conditional_jump(&ins.instr); if !include_conditional && blocks[target.idx()] .instructions @@ -6190,7 +6577,6 @@ fn jump_threading_impl(blocks: &mut [Block], include_conditional: bool) { .get(final_target.idx()) .copied() .unwrap_or(u32::MAX); - let conditional = is_conditional_jump(&ins.instr); if !include_conditional && source_pos < target_pos && final_target_pos < target_pos @@ -6254,6 +6640,7 @@ fn jump_threading_impl(blocks: &mut [Block], include_conditional: bool) { threaded.target = final_target; threaded.location = target_ins.location; threaded.end_location = target_ins.end_location; + threaded.lineno_override = target_ins.lineno_override; threaded.cache_entries = 0; blocks[bi].instructions.push(threaded); changed = true; @@ -6334,7 +6721,7 @@ fn normalize_jumps(blocks: &mut Vec) { // has no i_except edge. except_handler: None, folded_from_nonliteral_expr: false, - lineno_override: None, + lineno_override: last_ins.lineno_override, cache_entries: 0, preserve_redundant_jump_as_nop: false, remove_no_location_nop: false, @@ -6344,6 +6731,8 @@ fn normalize_jumps(blocks: &mut Vec) { match_success_jump: false, break_continue_cleanup_jump: false, for_loop_break_cleanup_jump: false, + preserve_tobool_jump_location: false, + preserve_store_fast_store_fast_jump_location: false, }; blocks[idx].instructions.push(not_taken); } else { @@ -6376,7 +6765,7 @@ fn normalize_jumps(blocks: &mut Vec) { // after exception targets were labelled. except_handler: None, folded_from_nonliteral_expr: false, - lineno_override: None, + lineno_override: last_ins.lineno_override, cache_entries: 0, preserve_redundant_jump_as_nop: false, remove_no_location_nop: false, @@ -6386,6 +6775,8 @@ fn normalize_jumps(blocks: &mut Vec) { match_success_jump: false, break_continue_cleanup_jump: false, for_loop_break_cleanup_jump: false, + preserve_tobool_jump_location: false, + preserve_store_fast_store_fast_jump_location: false, }); new_block.instructions.push(InstructionInfo { instr: PseudoOpcode::Jump.into(), @@ -6397,7 +6788,7 @@ fn normalize_jumps(blocks: &mut Vec) { // an exception-table range. except_handler: None, folded_from_nonliteral_expr: false, - lineno_override: None, + lineno_override: last_ins.lineno_override, cache_entries: 0, preserve_redundant_jump_as_nop: false, remove_no_location_nop: false, @@ -6407,6 +6798,8 @@ fn normalize_jumps(blocks: &mut Vec) { match_success_jump: false, break_continue_cleanup_jump: false, for_loop_break_cleanup_jump: false, + preserve_tobool_jump_location: false, + preserve_store_fast_store_fast_jump_location: false, }); new_block.next = old_next; @@ -6768,8 +7161,10 @@ fn remove_redundant_nops_in_blocks(blocks: &mut [Block]) -> usize { && src_instructions[src + 1].target != block_idx { let next_lineno = instruction_lineno(&src_instructions[src + 1]); - if next_lineno == lineno || next_lineno < 0 { - src_instructions[src + 1].lineno_override = Some(lineno); + if next_lineno < 0 { + copy_instruction_location(instr, &mut src_instructions[src + 1]); + remove = true; + } else if next_lineno == lineno { remove = true; } } else if src_instructions[src + 1].folded_from_nonliteral_expr { @@ -6779,7 +7174,7 @@ fn remove_redundant_nops_in_blocks(blocks: &mut [Block]) -> usize { if next_lineno == lineno { remove = true; } else if next_lineno < 0 { - src_instructions[src + 1].lineno_override = Some(lineno); + copy_instruction_location(instr, &mut src_instructions[src + 1]); remove = true; } } @@ -7464,7 +7859,12 @@ fn materialize_empty_conditional_exit_targets(blocks: &mut [Block]) { continue; } if let Some(first) = blocks[target.idx()].instructions.first_mut() { - overwrite_location(first, source.location, source.end_location); + overwrite_location( + first, + source.location, + source.end_location, + source.lineno_override, + ); } } @@ -7476,7 +7876,12 @@ fn materialize_empty_conditional_exit_targets(blocks: &mut [Block]) { continue; }; let mut cloned = blocks[next.idx()].instructions[0]; - overwrite_location(&mut cloned, last.location, last.end_location); + overwrite_location( + &mut cloned, + last.location, + last.end_location, + last.lineno_override, + ); blocks[target.idx()].instructions.push(cloned); } @@ -7492,7 +7897,7 @@ fn materialize_empty_conditional_exit_targets(blocks: &mut [Block]) { end_location: source.end_location, except_handler: None, folded_from_nonliteral_expr: false, - lineno_override: None, + lineno_override: source.lineno_override, cache_entries: 0, preserve_redundant_jump_as_nop: false, remove_no_location_nop: false, @@ -7502,6 +7907,8 @@ fn materialize_empty_conditional_exit_targets(blocks: &mut [Block]) { match_success_jump: false, break_continue_cleanup_jump: false, for_loop_break_cleanup_jump: false, + preserve_tobool_jump_location: false, + preserve_store_fast_store_fast_jump_location: false, }); } @@ -7527,7 +7934,7 @@ fn materialize_empty_conditional_exit_targets(blocks: &mut [Block]) { end_location: source.end_location, except_handler: None, folded_from_nonliteral_expr: false, - lineno_override: None, + lineno_override: source.lineno_override, cache_entries: 0, preserve_redundant_jump_as_nop: false, remove_no_location_nop: false, @@ -7537,6 +7944,8 @@ fn materialize_empty_conditional_exit_targets(blocks: &mut [Block]) { match_success_jump: false, break_continue_cleanup_jump: false, for_loop_break_cleanup_jump: false, + preserve_tobool_jump_location: false, + preserve_store_fast_store_fast_jump_location: false, }, ); } @@ -7607,17 +8016,32 @@ fn block_tail_starts_with_async_with_normal_exit(instructions: &[InstructionInfo } fn instruction_lineno(instr: &InstructionInfo) -> i32 { - instr - .lineno_override - .unwrap_or_else(|| instr.location.line.get() as i32) + match instr.lineno_override { + Some(LINE_ONLY_LOCATION_OVERRIDE) | None => instr.location.line.get() as i32, + Some(lineno) => lineno, + } } fn instruction_has_lineno(instr: &InstructionInfo) -> bool { - instruction_lineno(instr) > 0 + instruction_lineno(instr) >= 0 +} + +fn copy_instruction_location(source: InstructionInfo, target: &mut InstructionInfo) { + target.location = source.location; + target.end_location = source.end_location; + target.lineno_override = source.lineno_override; + target.preserve_store_fast_store_fast_jump_location = + source.preserve_store_fast_store_fast_jump_location; } -fn propagation_location(instr: &InstructionInfo) -> Option<(SourceLocation, SourceLocation)> { - instruction_has_lineno(instr).then_some((instr.location, instr.end_location)) +fn propagation_location( + instr: &InstructionInfo, +) -> Option<(SourceLocation, SourceLocation, Option)> { + instruction_has_lineno(instr).then_some(( + instr.location, + instr.end_location, + instr.lineno_override, + )) } fn block_has_fallthrough(block: &Block) -> bool { @@ -7631,6 +8055,21 @@ fn is_jump_instruction(instr: &InstructionInfo) -> bool { instr.instr.is_unconditional_jump() || is_conditional_jump(&instr.instr) } +fn last_jump_for_line_propagation(block: &Block) -> Option { + let last = block.instructions.last().copied()?; + if matches!(last.instr.real(), Some(Instruction::NotTaken)) { + block + .instructions + .iter() + .rev() + .copied() + .find(|instr| !matches!(instr.instr.real(), Some(Instruction::NotTaken))) + .filter(is_jump_instruction) + } else { + is_jump_instruction(&last).then_some(last) + } +} + fn is_exit_without_lineno(blocks: &[Block], block_idx: BlockIdx) -> bool { let block = &blocks[block_idx.idx()]; let Some(first) = block.instructions.first() else { @@ -8795,7 +9234,7 @@ fn reorder_conditional_chain_and_jump_back_blocks(blocks: &mut Vec) { .is_some_and(|info| matches!(info.lineno_override, Some(line) if line < 0)); if is_generic_false_path_reorder && jump_targets_for_iter(blocks, jump_block) - && is_for_break_cleanup_block(blocks, chain_start) + && is_for_break_cleanup_block(blocks, next_nonempty_block(blocks, chain_start)) { current = next; continue; @@ -8922,7 +9361,12 @@ fn reorder_conditional_scope_exit_and_jump_back_blocks( if jump_block == BlockIdx::NULL { return false; } - let Some(info) = blocks[jump_block.idx()].instructions.first() else { + let Some(info) = blocks[jump_block.idx()].instructions.iter().find(|info| { + !matches!( + info.instr.real(), + Some(Instruction::Nop | Instruction::NotTaken) + ) + }) else { return false; }; matches!( @@ -8940,7 +9384,12 @@ fn reorder_conditional_scope_exit_and_jump_back_blocks( if !is_explicit_continue_to_for_iter(blocks, jump_block) { return false; } - let Some(info) = blocks[jump_block.idx()].instructions.first() else { + let Some(info) = blocks[jump_block.idx()].instructions.iter().find(|info| { + !matches!( + info.instr.real(), + Some(Instruction::Nop | Instruction::NotTaken) + ) + }) else { return false; }; instruction_lineno(info) > instruction_lineno(&cond) @@ -9059,9 +9508,10 @@ fn reorder_conditional_scope_exit_and_jump_back_blocks( // a fallthrough backward jump. This Rust layout pass must not // undo that normalized shape. || (is_jump_back_only_block(blocks, jump_block) - && next_nonempty_block(blocks, blocks[jump_block.idx()].next) == exit_block) - || (jump_targets_for_iter(blocks, jump_block) - && !is_explicit_continue_after_conditional(blocks, jump_block, cond)) + && next_nonempty_block(blocks, blocks[jump_block.idx()].next) == exit_block + && !(jump_targets_for_iter(blocks, jump_block) + && !is_explicit_continue_after_conditional(blocks, jump_block, cond) + && !block_is_protected(&blocks[idx]))) || next_nonempty_block(blocks, blocks[jump_block.idx()].next) != exit_block || !comes_before( blocks, @@ -10179,11 +10629,12 @@ fn maybe_propagate_location( instr: &mut InstructionInfo, location: SourceLocation, end_location: SourceLocation, + lineno_override: Option, ) { if instr.lineno_override != Some(-2) && !instruction_has_lineno(instr) { instr.location = location; instr.end_location = end_location; - instr.lineno_override = None; + instr.lineno_override = lineno_override; } } @@ -10191,10 +10642,11 @@ fn overwrite_location( instr: &mut InstructionInfo, location: SourceLocation, end_location: SourceLocation, + lineno_override: Option, ) { instr.location = location; instr.end_location = end_location; - instr.lineno_override = None; + instr.lineno_override = lineno_override; } fn compute_reachable_blocks(blocks: &[Block]) -> Vec { @@ -10344,9 +10796,9 @@ fn duplicate_exits_without_lineno(blocks: &mut Vec, predecessors: &mut Ve let new_idx = BlockIdx(blocks.len() as u32); let mut new_block = blocks[target.idx()].clone(); if let Some(first) = new_block.instructions.first_mut() - && let Some((location, end_location)) = propagation_location(last) + && let Some((location, end_location, lineno_override)) = propagation_location(last) { - overwrite_location(first, location, end_location); + overwrite_location(first, location, end_location, lineno_override); } let old_next = blocks[target.idx()].next; new_block.next = old_next; @@ -10381,10 +10833,10 @@ fn duplicate_exits_without_lineno(blocks: &mut Vec, predecessors: &mut Ve )) && (is_exit_without_lineno(blocks, target) || is_eval_break_without_lineno(blocks, target)) - && let Some((location, end_location)) = propagation_location(last) + && let Some((location, end_location, lineno_override)) = propagation_location(last) && let Some(first) = blocks[target.idx()].instructions.first_mut() { - maybe_propagate_location(first, location, end_location); + maybe_propagate_location(first, location, end_location, lineno_override); } } current = blocks[current.idx()].next; @@ -10406,14 +10858,14 @@ fn propagate_line_numbers(blocks: &mut [Block], predecessors: &[u32]) { let block = &mut blocks[current.idx()]; let mut prev_location = None; for instr in &mut block.instructions { - if let Some((location, end_location)) = prev_location { - maybe_propagate_location(instr, location, end_location); + if let Some((location, end_location, lineno_override)) = prev_location { + maybe_propagate_location(instr, location, end_location, lineno_override); } prev_location = propagation_location(instr); } prev_location }; - let last = blocks[current.idx()].instructions.last().copied().unwrap(); + let last_jump = last_jump_for_line_propagation(&blocks[current.idx()]); if has_fallthrough { let target = next_nonempty_block(blocks, next_block); @@ -10426,15 +10878,15 @@ fn propagate_line_numbers(blocks: &mut [Block], predecessors: &[u32]) { current, target, )) - && let Some((location, end_location)) = prev_location + && let Some((location, end_location, lineno_override)) = prev_location && let Some(first) = blocks[target.idx()].instructions.first_mut() { - maybe_propagate_location(first, location, end_location); + maybe_propagate_location(first, location, end_location, lineno_override); } } - if is_jump_instruction(&last) { - let mut target = next_nonempty_block(blocks, last.target); + if let Some(last_jump) = last_jump { + let mut target = next_nonempty_block(blocks, last_jump.target); while target != BlockIdx::NULL && blocks[target.idx()].instructions.is_empty() && predecessors[target.idx()] == 1 @@ -10450,10 +10902,10 @@ fn propagate_line_numbers(blocks: &mut [Block], predecessors: &[u32]) { current, target, )) - && let Some((location, end_location)) = prev_location + && let Some((location, end_location, lineno_override)) = prev_location && let Some(first) = blocks[target.idx()].instructions.first_mut() { - maybe_propagate_location(first, location, end_location); + maybe_propagate_location(first, location, end_location, lineno_override); } } } @@ -10504,6 +10956,125 @@ fn resolve_next_location_overrides(blocks: &mut [Block]) { } } +fn propagate_store_fast_store_fast_jump_locations(blocks: &mut [Block]) { + for block in blocks.iter_mut() { + for i in 1..block.instructions.len() { + let previous = block.instructions[i - 1]; + let follows_copy = i >= 2 + && matches!( + block.instructions[i - 2].instr.real(), + Some(Instruction::Copy { .. }) + ); + if !matches!( + previous.instr.real(), + Some(Instruction::StoreFastStoreFast { .. }) + ) || !block.instructions[i].instr.is_unconditional_jump() + || block.instructions[i].preserve_store_fast_store_fast_jump_location + || (follows_copy + && instruction_lineno(&block.instructions[i]) == instruction_lineno(&previous) + && block.instructions[i].location != previous.location) + { + continue; + } + let follows_unpack = i >= 2 + && matches!( + block.instructions[i - 2].instr.real(), + Some(Instruction::UnpackSequence { .. } | Instruction::UnpackEx { .. }) + ); + if follows_unpack && instruction_lineno(&block.instructions[i]) >= 0 { + continue; + } + block.instructions[i].location = previous.location; + block.instructions[i].end_location = previous.end_location; + block.instructions[i].lineno_override = previous.lineno_override; + } + } +} + +fn propagate_tobool_conditional_jump_locations(blocks: &mut [Block]) { + for block in blocks.iter_mut() { + let mut i = 1; + while i < block.instructions.len() { + if !matches!( + block.instructions[i - 1].instr.real(), + Some(Instruction::ToBool) + ) || !is_conditional_jump(&block.instructions[i].instr) + { + i += 1; + continue; + } + + let (location, end_location, lineno_override) = + if block.instructions[i].preserve_tobool_jump_location { + ( + block.instructions[i].location, + block.instructions[i].end_location, + block.instructions[i].lineno_override, + ) + } else { + ( + block.instructions[i - 1].location, + block.instructions[i - 1].end_location, + block.instructions[i - 1].lineno_override, + ) + }; + block.instructions[i].location = location; + block.instructions[i].end_location = end_location; + block.instructions[i].lineno_override = lineno_override; + + let mut j = i + 1; + if j < block.instructions.len() + && matches!( + block.instructions[j].instr.real(), + Some(Instruction::NotTaken) + ) + { + block.instructions[j].location = location; + block.instructions[j].end_location = end_location; + block.instructions[j].lineno_override = lineno_override; + j += 1; + } + if j < block.instructions.len() && block.instructions[j].instr.is_unconditional_jump() { + block.instructions[j].location = location; + block.instructions[j].end_location = end_location; + block.instructions[j].lineno_override = lineno_override; + } + + i = j; + } + } + + for idx in 0..blocks.len() { + let Some(last) = blocks[idx].instructions.last().copied() else { + continue; + }; + if !is_conditional_jump(&last.instr) { + continue; + } + let next = blocks[idx].next; + if next == BlockIdx::NULL { + continue; + } + let next_block = &mut blocks[next.idx()]; + if !next_block + .instructions + .first() + .is_some_and(|instr| matches!(instr.instr.real(), Some(Instruction::NotTaken))) + { + continue; + } + for instr in next_block.instructions.iter_mut().take(2) { + if matches!(instr.instr.real(), Some(Instruction::NotTaken)) + || instr.instr.is_unconditional_jump() + { + instr.location = last.location; + instr.end_location = last.end_location; + instr.lineno_override = last.lineno_override; + } + } + } +} + fn find_layout_predecessor(blocks: &[Block], target: BlockIdx) -> BlockIdx { if target == BlockIdx::NULL { return BlockIdx::NULL; @@ -10799,7 +11370,12 @@ fn duplicate_shared_jump_back_targets(blocks: &mut Vec) { let mut cloned = blocks[target.idx()].clone(); if let Some(first) = cloned.instructions.first_mut() { - overwrite_location(first, jump.location, jump.end_location); + overwrite_location( + first, + jump.location, + jump.end_location, + jump.lineno_override, + ); } let new_idx = BlockIdx(blocks.len() as u32); cloned.next = target; @@ -10812,7 +11388,12 @@ fn duplicate_shared_jump_back_targets(blocks: &mut Vec) { let jump = blocks[block_idx.idx()].instructions[instr_idx]; let mut cloned = blocks[target.idx()].clone(); if let Some(first) = cloned.instructions.first_mut() { - overwrite_location(first, jump.location, jump.end_location); + overwrite_location( + first, + jump.location, + jump.end_location, + jump.lineno_override, + ); } let new_idx = BlockIdx(blocks.len() as u32); @@ -10995,7 +11576,12 @@ fn duplicate_fallthrough_jump_back_targets(blocks: &mut Vec) { let new_idx = BlockIdx(blocks.len() as u32); let mut cloned = blocks[target.idx()].clone(); if let Some(first) = cloned.instructions.first_mut() { - overwrite_location(first, last.location, last.end_location); + overwrite_location( + first, + last.location, + last.end_location, + last.lineno_override, + ); } cloned.next = blocks[layout_pred.idx()].next; blocks.push(cloned); @@ -11200,13 +11786,13 @@ fn duplicate_end_returns(blocks: &mut Vec, metadata: &CodeUnitMetadata) { let propagated_location = blocks[block_idx.idx()] .instructions .last() - .map(|instr| (instr.location, instr.end_location)); + .map(|instr| (instr.location, instr.end_location, instr.lineno_override)); let mut cloned_return = return_insts.clone(); if !instruction_has_lineno(&cloned_return[0]) - && let Some((location, end_location)) = propagated_location + && let Some((location, end_location, lineno_override)) = propagated_location { for instr in &mut cloned_return { - overwrite_location(instr, location, end_location); + overwrite_location(instr, location, end_location, lineno_override); } } blocks[block_idx.idx()].instructions.extend(cloned_return); @@ -11218,7 +11804,12 @@ fn duplicate_end_returns(blocks: &mut Vec, metadata: &CodeUnitMetadata) { let jump = blocks[block_idx.idx()].instructions[instr_idx]; let mut cloned_return = return_insts.clone(); if let Some(first) = cloned_return.first_mut() { - overwrite_location(first, jump.location, jump.end_location); + overwrite_location( + first, + jump.location, + jump.end_location, + jump.lineno_override, + ); } let new_idx = BlockIdx(blocks.len() as u32); let is_conditional = is_conditional_jump(&jump.instr); @@ -11327,7 +11918,12 @@ fn inline_with_suppress_return_blocks(blocks: &mut [Block]) { let mut cloned_return = blocks[target.idx()].instructions.clone(); for instr in &mut cloned_return { - overwrite_location(instr, jump.location, jump.end_location); + overwrite_location( + instr, + jump.location, + jump.end_location, + jump.lineno_override, + ); } blocks[block_idx].instructions.pop(); blocks[block_idx].instructions.extend(cloned_return); @@ -11416,7 +12012,12 @@ fn duplicate_named_except_cleanup_returns(blocks: &mut Vec, metadata: &Co let jump = blocks[block_idx.idx()].instructions[instr_idx]; let mut cloned = blocks[target.idx()].instructions.clone(); if let Some(first) = cloned.first_mut() { - overwrite_location(first, jump.location, jump.end_location); + overwrite_location( + first, + jump.location, + jump.end_location, + jump.lineno_override, + ); } let new_idx = BlockIdx(blocks.len() as u32); @@ -11480,7 +12081,12 @@ fn inline_pop_except_return_blocks(blocks: &mut [Block]) { let mut cloned_return = blocks[target.idx()].instructions.clone(); for instr in &mut cloned_return { - overwrite_location(instr, jump.location, jump.end_location); + overwrite_location( + instr, + jump.location, + jump.end_location, + jump.lineno_override, + ); } blocks[block_idx].instructions.pop(); blocks[block_idx].instructions.extend(cloned_return); @@ -11890,6 +12496,8 @@ mod tests { match_success_jump: false, break_continue_cleanup_jump: false, for_loop_break_cleanup_jump: false, + preserve_tobool_jump_location: false, + preserve_store_fast_store_fast_jump_location: false, } } diff --git a/crates/codegen/src/symboltable.rs b/crates/codegen/src/symboltable.rs index 06eeaf520aa..28133b1008b 100644 --- a/crates/codegen/src/symboltable.rs +++ b/crates/codegen/src/symboltable.rs @@ -32,6 +32,9 @@ pub struct SymbolTable { // Return True if the block is a nested class or function pub is_nested: bool, + /// Whether this function-like scope was created directly in a class block. + pub is_method: bool, + /// A set of symbols present on this scope level. pub symbols: IndexMap, @@ -90,6 +93,7 @@ impl SymbolTable { typ, line_number, is_nested, + is_method: false, symbols: IndexMap::default(), sub_tables: vec![], next_sub_table: 0, @@ -1103,6 +1107,7 @@ impl SymbolTableBuilder { | CompilerScope::Lambda | CompilerScope::Comprehension | CompilerScope::Annotation + | CompilerScope::TypeParams ) } @@ -1118,11 +1123,17 @@ impl SymbolTableBuilder { } fn enter_scope(&mut self, name: &str, typ: CompilerScope, line_number: u32) { - let is_nested = self.tables.last().is_some_and(|table| { - table.is_nested - || matches!( - table.typ, - CompilerScope::Function | CompilerScope::AsyncFunction + let parent = self.tables.last(); + let is_nested = + parent.is_some_and(|table| table.is_nested || Self::is_function_like_scope(table.typ)); + let is_method = parent.is_some_and(|table| { + table.typ == CompilerScope::Class + && matches!( + typ, + CompilerScope::Function + | CompilerScope::AsyncFunction + | CompilerScope::Lambda + | CompilerScope::Comprehension ) }); // Inherit mangled_names from parent for non-class scopes @@ -1132,6 +1143,7 @@ impl SymbolTableBuilder { .and_then(|t| t.mangled_names.clone()) .filter(|_| typ != CompilerScope::Class); let mut table = SymbolTable::new(name.to_owned(), typ, line_number, is_nested); + table.is_method = is_method; table.future_annotations = self.future_annotations; table.mangled_names = inherited_mangled_names; self.tables.push(table); @@ -1145,6 +1157,8 @@ impl SymbolTableBuilder { name: &str, line_number: u32, for_class: bool, + has_defaults: bool, + has_kwdefaults: bool, ) -> SymbolTableResult { // Check if we're in a class scope let in_class = self @@ -1174,6 +1188,12 @@ impl SymbolTableBuilder { if for_class { self.register_name(".generic_base", SymbolUsage::Assigned, TextRange::default())?; } + if has_defaults { + self.register_name(".defaults", SymbolUsage::Parameter, TextRange::default())?; + } + if has_kwdefaults { + self.register_name(".kwdefaults", SymbolUsage::Parameter, TextRange::default())?; + } Ok(()) } @@ -1195,6 +1215,7 @@ impl SymbolTableBuilder { let can_see_class_scope = current.typ == CompilerScope::Class || current.can_see_class_scope; let has_conditional = current.has_conditional_annotations; + let is_nested = current.is_nested || Self::is_function_like_scope(current.typ); // Create annotation block if not exists if current.annotation_block.is_none() { @@ -1202,7 +1223,7 @@ impl SymbolTableBuilder { "__annotate__".to_owned(), CompilerScope::Annotation, line_number, - true, // is_nested + is_nested, ); // Annotation scope in class can see class scope annotation_table.can_see_class_scope = can_see_class_scope; @@ -1488,6 +1509,8 @@ impl SymbolTableBuilder { &format!("", name.as_str()), self.line_index_start(type_params.range), false, + true, + Self::has_kwonlydefaults(parameters), )?; self.scan_type_params(type_params)?; } @@ -1536,6 +1559,8 @@ impl SymbolTableBuilder { &format!("", name.as_str()), self.line_index_start(type_params.range), true, // for_class: enable selective mangling + false, + false, )?; // Set class_name for mangling in type param scope self.class_name = Some(name.to_string()); @@ -1847,6 +1872,8 @@ impl SymbolTableBuilder { &format!(""), self.line_index_start(type_params.range), false, + false, + false, )?; self.scan_type_params(type_params)?; } @@ -2583,6 +2610,13 @@ impl SymbolTableBuilder { Ok(()) } + fn has_kwonlydefaults(parameters: &ast::Parameters) -> bool { + parameters + .kwonlyargs + .iter() + .any(|arg| arg.default.is_some()) + } + fn enter_scope_with_parameters( &mut self, name: &str, @@ -2704,17 +2738,6 @@ impl SymbolTableBuilder { Ok(()) } - fn add_varname_to_scope(&mut self, table_idx: usize, name: &str) { - let varnames = if table_idx + 1 == self.tables.len() { - &mut self.current_varnames - } else { - &mut self.varnames_stack[table_idx + 1] - }; - if !varnames.iter().any(|existing| existing == name) { - varnames.push(name.to_owned()); - } - } - // Mirrors CPython symtable_extend_namedexpr_scope(): assignment expressions // inside comprehensions bind in the nearest function/module-like scope, not // in the synthetic comprehension scope itself. @@ -2752,9 +2775,6 @@ impl SymbolTableBuilder { match table_type { CompilerScope::Function | CompilerScope::AsyncFunction | CompilerScope::Lambda => { - let current_comp_inlined = self.tables.last().is_some_and(|table| { - table.typ == CompilerScope::Comprehension && table.comp_inlined - }); let parent_is_global = self.tables[table_idx] .symbols .get(mangled.as_str()) @@ -2777,9 +2797,6 @@ impl SymbolTableBuilder { .entry(mangled.clone()) .or_insert_with(|| Symbol::new(mangled.as_str())); symbol.flags.insert(SymbolFlags::ASSIGNED); - if !parent_is_global && current_comp_inlined { - self.add_varname_to_scope(table_idx, mangled.as_str()); - } return Ok(()); } CompilerScope::Module => { diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs index 86723f40022..6f931bbd114 100644 --- a/crates/compiler-core/src/bytecode.rs +++ b/crates/compiler-core/src/bytecode.rs @@ -460,9 +460,12 @@ bitflags! { const GENERATOR = 0x0020; const COROUTINE = 0x0080; const ITERABLE_COROUTINE = 0x0100; + const ASYNC_GENERATOR = 0x0200; + const FUTURE_ANNOTATIONS = 0x1000000; /// If a code object represents a function and has a docstring, /// this bit is set and the first item in co_consts is the docstring. const HAS_DOCSTRING = 0x4000000; + const METHOD = 0x8000000; } } @@ -906,8 +909,6 @@ impl PartialEq for ConstantData { match (self, other) { (Integer { value: a }, Integer { value: b }) => a == b, - // we want to compare floats *by actual value* - if we have the *exact same* float - // already in a constant cache, we want to use that (Float { value: a }, Float { value: b }) => a.to_bits() == b.to_bits(), (Complex { value: a }, Complex { value: b }) => { a.re.to_bits() == b.re.to_bits() && a.im.to_bits() == b.im.to_bits() diff --git a/crates/literal/src/float.rs b/crates/literal/src/float.rs index 0856f646b22..79caca0592c 100644 --- a/crates/literal/src/float.rs +++ b/crates/literal/src/float.rs @@ -3,7 +3,7 @@ use alloc::borrow::ToOwned; use alloc::format; use alloc::string::{String, ToString}; use core::f64; -use num_traits::{Float, Zero}; +use num_traits::Zero; pub fn parse_str(literal: &str) -> Option { parse_inner(literal.trim().as_bytes()) @@ -209,6 +209,111 @@ pub fn format_general( } } +fn prefer_cpython_tie_repr(s: String, value: f64) -> String { + let Some(exponent_pos) = s.find('e') else { + return s; + }; + let Some(digit_pos) = s[..exponent_pos].bytes().rposition(|b| b.is_ascii_digit()) else { + return s; + }; + + let digit = s.as_bytes()[digit_pos]; + if digit == b'0' { + return s; + } + let decremented = digit - 1; + if !(decremented - b'0').is_multiple_of(2) { + return s; + } + + let mut candidate = s.clone(); + candidate.replace_range( + digit_pos..digit_pos + 1, + core::str::from_utf8(&[decremented]).unwrap(), + ); + if parse_str(&candidate).is_none_or(|parsed| parsed.to_bits() != value.to_bits()) { + return s; + } + + let Some(current_distance) = decimal_distance_to_f64(&s, value) else { + return s; + }; + let Some(candidate_distance) = decimal_distance_to_f64(&candidate, value) else { + return s; + }; + + if candidate_distance <= current_distance { + candidate + } else { + s + } +} + +fn checked_pow_u128(base: u128, exp: u32) -> Option { + let mut result = 1u128; + for _ in 0..exp { + result = result.checked_mul(base)?; + } + Some(result) +} + +fn parse_decimal_rational(s: &str) -> Option<(u128, u32)> { + let exponent_pos = s.find('e')?; + let exponent = s[exponent_pos + 1..].parse::().ok()?; + let significand = s[..exponent_pos] + .strip_prefix('-') + .unwrap_or(&s[..exponent_pos]); + let dot_pos = significand.find('.'); + let frac_digits = dot_pos + .map(|pos| significand.len().saturating_sub(pos + 1)) + .unwrap_or(0); + let mut digits = String::with_capacity(significand.len()); + for ch in significand.chars() { + if ch != '.' { + digits.push(ch); + } + } + let mut int = digits.parse::().ok()?; + let mut scale = i32::try_from(frac_digits).ok()? - exponent; + if scale < 0 { + int = int.checked_mul(checked_pow_u128(10, (-scale) as u32)?)?; + scale = 0; + } + Some((int, scale as u32)) +} + +fn f64_mantissa_exponent(value: f64) -> Option<(u128, i32)> { + let bits = value.abs().to_bits(); + let exponent = ((bits >> 52) & 0x7ff) as i32; + let fraction = bits & ((1u64 << 52) - 1); + if exponent == 0 { + Some((u128::from(fraction), 1 - 1023 - 52)) + } else if exponent < 0x7ff { + Some((u128::from((1u64 << 52) | fraction), exponent - 1023 - 52)) + } else { + None + } +} + +fn decimal_distance_to_f64(s: &str, value: f64) -> Option { + let (decimal_int, decimal_scale) = parse_decimal_rational(s)?; + let (mantissa, binary_exponent) = f64_mantissa_exponent(value)?; + if binary_exponent >= 0 || decimal_scale > 38 { + return None; + } + + let binary_scale = u32::try_from(-binary_exponent).ok()?; + let common_twos = decimal_scale.max(binary_scale); + let decimal_scaled = + decimal_int.checked_mul(checked_pow_u128(2, common_twos - decimal_scale)?)?; + let five_power = checked_pow_u128(5, decimal_scale)?; + let binary_scaled = mantissa + .checked_mul(checked_pow_u128(2, common_twos - binary_scale)?)? + .checked_mul(five_power)?; + + Some(decimal_scaled.abs_diff(binary_scaled)) +} + // TODO: rewrite using format_general pub fn to_string(value: f64) -> String { let lit = format!("{value:e}"); @@ -223,7 +328,7 @@ pub fn to_string(value: f64) -> String { value.to_string() } } else { - format!("{significand}e{exponent:+#03}") + prefer_cpython_tie_repr(format!("{significand}e{exponent:+#03}"), value) } } else { let mut s = value.to_string(); @@ -232,6 +337,22 @@ pub fn to_string(value: f64) -> String { } } +#[cfg(test)] +mod tests { + use super::to_string; + + #[test] + fn repr_uses_cpython_tie_digit_for_power_of_two() { + assert_eq!(to_string(2.0f64.powi(-25)), "2.9802322387695312e-08"); + assert_eq!(to_string((-2.0f64).powi(-25)), "-2.9802322387695312e-08"); + assert_eq!(to_string(2.0f64.powi(-26)), "1.4901161193847656e-08"); + assert_eq!( + to_string(2.0f64.powi(-14) - 2.0f64.powi(-25)), + "6.1005353927612305e-05" + ); + } +} + pub fn from_hex(s: &str) -> Option { if let Ok(f) = hexf_parse::parse_hexf64(s, false) { return Some(f); @@ -281,22 +402,23 @@ pub fn from_hex(s: &str) -> Option { } pub fn to_hex(value: f64) -> String { - let (mantissa, exponent, sign) = value.integer_decode(); - let sign_fmt = if sign < 0 { "-" } else { "" }; + let bits = value.to_bits(); + let sign_fmt = if bits >> 63 != 0 { "-" } else { "" }; match value { value if value.is_zero() => format!("{sign_fmt}0x0.0p+0"), value if value.is_infinite() => format!("{sign_fmt}inf"), value if value.is_nan() => "nan".to_owned(), _ => { - const BITS: i16 = 52; - const FRACT_MASK: u64 = 0xf_ffff_ffff_ffff; - format!( - "{}{:#x}.{:013x}p{:+}", - sign_fmt, - mantissa >> BITS, - mantissa & FRACT_MASK, - exponent + BITS - ) + const FRACT_MASK: u64 = (1u64 << 52) - 1; + const EXP_MASK: u64 = 0x7ff; + let exponent = (bits >> 52) & EXP_MASK; + let fraction = bits & FRACT_MASK; + if exponent == 0 { + format!("{sign_fmt}0x0.{fraction:013x}p-1022") + } else { + let exponent = i32::try_from(exponent).unwrap() - 1023; + format!("{sign_fmt}0x1.{fraction:013x}p{exponent:+}") + } } } } @@ -304,6 +426,10 @@ pub fn to_hex(value: f64) -> String { #[test] fn test_to_hex() { use rand::Rng; + assert_eq!(to_hex(f64::from_bits(1)), "0x0.0000000000001p-1022"); + assert_eq!(to_hex(f64::from_bits(2)), "0x0.0000000000002p-1022"); + assert_eq!(to_hex(-f64::from_bits(1)), "-0x0.0000000000001p-1022"); + assert_eq!(to_hex(f64::MIN_POSITIVE), "0x1.0000000000000p-1022"); for _ in 0..20000 { let bytes = rand::rng().random::(); let f = f64::from_bits(bytes); diff --git a/crates/stdlib/src/snapshots/rustpython_stdlib___opcode__tests__nested_double_async_with.snap b/crates/stdlib/src/snapshots/rustpython_stdlib___opcode__tests__nested_double_async_with.snap index a30fa6a78ca..34b5ce7e5c9 100644 --- a/crates/stdlib/src/snapshots/rustpython_stdlib___opcode__tests__nested_double_async_with.snap +++ b/crates/stdlib/src/snapshots/rustpython_stdlib___opcode__tests__nested_double_async_with.snap @@ -13,9 +13,9 @@ expression: "dis(r#\"\nasync def test():\n for stop_exc in (StopIteration('sp Disassembly of ", line 1>: 1 RETURN_GENERATOR POP_TOP - RESUME 0 + L1: RESUME 0 - 2 L1: LOAD_GLOBAL 1 (StopIteration + NULL) + 2 LOAD_GLOBAL 1 (StopIteration + NULL) LOAD_CONST 0 ('spam') CALL 1 LOAD_GLOBAL 3 (StopAsyncIteration + NULL) @@ -90,10 +90,12 @@ Disassembly of ", line 1>: POP_TOP POP_TOP JUMP_FORWARD 3 (to L25) - L24: COPY 3 + + -- L24: COPY 3 POP_EXCEPT RERAISE 1 - L25: NOP + + 5 L25: NOP 10 L26: LOAD_GLOBAL 4 (self) LOAD_ATTR 13 (fail + NULL|self) @@ -153,11 +155,11 @@ Disassembly of ", line 1>: POP_TOP POP_TOP JUMP_BACKWARD 205 (to L2) - L39: COPY 3 + + -- L39: COPY 3 POP_EXCEPT RERAISE 1 - - -- L40: CALL_INTRINSIC_1 3 (INTRINSIC_STOPITERATION_ERROR) + L40: CALL_INTRINSIC_1 3 (INTRINSIC_STOPITERATION_ERROR) RERAISE 1 ExceptionTable: L1 to L3 -> L40 [0] lasti diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index bea84281109..6052e4fe256 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -564,7 +564,8 @@ impl Py { let is_gen = code.flags.contains(bytecode::CodeFlags::GENERATOR); let is_coro = code.flags.contains(bytecode::CodeFlags::COROUTINE); - let use_datastack = !(is_gen || is_coro); + let is_async_gen = code.flags.contains(bytecode::CodeFlags::ASYNC_GENERATOR); + let use_datastack = !(is_gen || is_coro || is_async_gen); // Construct frame: let frame = Frame::new( @@ -579,35 +580,30 @@ impl Py { .into_ref(&vm.ctx); self.fill_locals_from_args(&frame, func_args, vm)?; - match (is_gen, is_coro) { - (true, false) => { - let obj = PyGenerator::new(frame.clone(), self.__name__(), self.__qualname__()) - .into_pyobject(vm); - frame.set_generator(&obj); - Ok(obj) - } - (false, true) => { - let obj = PyCoroutine::new(frame.clone(), self.__name__(), self.__qualname__()) - .into_pyobject(vm); - frame.set_generator(&obj); - Ok(obj) - } - (true, true) => { - let obj = PyAsyncGen::new(frame.clone(), self.__name__(), self.__qualname__()) - .into_pyobject(vm); - frame.set_generator(&obj); - Ok(obj) - } - (false, false) => { - let result = vm.run_frame(frame.clone()); - // Release data stack memory after frame execution completes. - unsafe { - if let Some(base) = frame.materialize_localsplus() { - vm.datastack_pop(base); - } + if is_async_gen { + let obj = PyAsyncGen::new(frame.clone(), self.__name__(), self.__qualname__()) + .into_pyobject(vm); + frame.set_generator(&obj); + Ok(obj) + } else if is_gen { + let obj = PyGenerator::new(frame.clone(), self.__name__(), self.__qualname__()) + .into_pyobject(vm); + frame.set_generator(&obj); + Ok(obj) + } else if is_coro { + let obj = PyCoroutine::new(frame.clone(), self.__name__(), self.__qualname__()) + .into_pyobject(vm); + frame.set_generator(&obj); + Ok(obj) + } else { + let result = vm.run_frame(frame.clone()); + // Release data stack memory after frame execution completes. + unsafe { + if let Some(base) = frame.materialize_localsplus() { + vm.datastack_pop(base); } - result } + result } } @@ -689,11 +685,11 @@ impl Py { .intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS) ); debug_assert_eq!(code.kwonlyarg_count, 0); - debug_assert!( - !code - .flags - .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE) - ); + debug_assert!(!code.flags.intersects( + bytecode::CodeFlags::GENERATOR + | bytecode::CodeFlags::COROUTINE + | bytecode::CodeFlags::ASYNC_GENERATOR, + )); let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) { None @@ -741,10 +737,11 @@ impl Py { // Generator/coroutine code objects are SIMPLE_FUNCTION in call // specialization classification, but their call path must still // go through invoke() to produce generator/coroutine objects. - if code - .flags - .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE) - { + if code.flags.intersects( + bytecode::CodeFlags::GENERATOR + | bytecode::CodeFlags::COROUTINE + | bytecode::CodeFlags::ASYNC_GENERATOR, + ) { return self.invoke(FuncArgs::from(args), vm); } let frame = self.prepare_exact_args_frame(args, vm); @@ -760,10 +757,11 @@ impl Py { } pub(crate) fn datastack_frame_size_bytes_for_code(code: &Py) -> Option { - if code - .flags - .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE) - { + if code.flags.intersects( + bytecode::CodeFlags::GENERATOR + | bytecode::CodeFlags::COROUTINE + | bytecode::CodeFlags::ASYNC_GENERATOR, + ) { return None; } let nlocalsplus = code.localspluskinds.len(); @@ -1468,9 +1466,11 @@ pub(crate) fn vectorcall_function( && !code.flags.contains(bytecode::CodeFlags::VARARGS) && !code.flags.contains(bytecode::CodeFlags::VARKEYWORDS) && code.kwonlyarg_count == 0 - && !code - .flags - .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE); + && !code.flags.intersects( + bytecode::CodeFlags::GENERATOR + | bytecode::CodeFlags::COROUTINE + | bytecode::CodeFlags::ASYNC_GENERATOR, + ); if is_simple && nargs == code.arg_count as usize { // FAST PATH: simple positional-only call, exact arg count. diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 1bafe7f26a4..f1ed31d7189 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -710,10 +710,11 @@ impl Frame { // For generators/coroutines, initialize prev_line to the def line // so that preamble instructions (RETURN_GENERATOR, POP_TOP) don't // fire spurious LINE events. - let prev_line = if code - .flags - .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE) - { + let prev_line = if code.flags.intersects( + bytecode::CodeFlags::GENERATOR + | bytecode::CodeFlags::COROUTINE + | bytecode::CodeFlags::ASYNC_GENERATOR, + ) { code.first_line_number.map_or(0, |line| line.get() as u32) } else { 0 @@ -9523,9 +9524,7 @@ impl ExecutingFrame<'_> { // Returns the exception object; RERAISE will re-raise it if arg.fast_isinstance(vm.ctx.exceptions.stop_iteration) { let flags = &self.code.flags; - let msg = if flags - .contains(bytecode::CodeFlags::COROUTINE | bytecode::CodeFlags::GENERATOR) - { + let msg = if flags.contains(bytecode::CodeFlags::ASYNC_GENERATOR) { "async generator raised StopIteration" } else if flags.contains(bytecode::CodeFlags::COROUTINE) { "coroutine raised StopIteration" diff --git a/crates/vm/src/stdlib/builtins.rs b/crates/vm/src/stdlib/builtins.rs index 8358d41b2b4..f2d4dcdb648 100644 --- a/crates/vm/src/stdlib/builtins.rs +++ b/crates/vm/src/stdlib/builtins.rs @@ -160,7 +160,11 @@ mod builtins { .map(|&b| b as char) .collect(); - if name.is_empty() { None } else { Some(name) } + if name.is_empty() { + None + } else { + Some(normalize_source_encoding(&name)) + } } // Split into lines (first two only) @@ -186,15 +190,39 @@ mod builtins { lines.next().and_then(find_encoding_in_line) } + /// Match CPython's Parser/tokenizer/helpers.c:get_normal_name(). + #[cfg(feature = "parser")] + fn normalize_source_encoding(name: &str) -> String { + let mut normalized = String::with_capacity(name.len().min(12)); + for ch in name.chars().take(12) { + if ch == '_' { + normalized.push('-'); + } else { + normalized.push(ch.to_ascii_lowercase()); + } + } + + if normalized == "utf-8" || normalized.starts_with("utf-8-") { + "utf-8".to_owned() + } else if normalized == "latin-1" + || normalized == "iso-8859-1" + || normalized == "iso-latin-1" + || normalized.starts_with("latin-1-") + || normalized.starts_with("iso-8859-1-") + || normalized.starts_with("iso-latin-1-") + { + "iso-8859-1".to_owned() + } else { + name.to_owned() + } + } + /// Decode source bytes to a string, handling PEP 263 encoding declarations /// and BOM. Raises SyntaxError for invalid UTF-8 without an encoding /// declaration. - /// Check if an encoding name is a UTF-8 variant after normalization. - /// Matches: utf-8, utf_8, utf8, UTF-8, etc. #[cfg(feature = "parser")] fn is_utf8_encoding(name: &str) -> bool { - let normalized: String = name.chars().filter(|&c| c != '-' && c != '_').collect(); - normalized.eq_ignore_ascii_case("utf8") + name == "utf-8" } #[cfg(feature = "parser")] @@ -206,9 +234,10 @@ mod builtins { // Validate BOM + encoding combination if has_bom && !is_utf8 { + let enc = encoding.as_deref().unwrap_or("utf-8"); return Err(vm.new_exception_msg( vm.ctx.exceptions.syntax_error.to_owned(), - format!("encoding problem for '{filename}': utf-8").into(), + format!("encoding problem: {enc} with BOM").into(), )); } diff --git a/scripts/dis_dump.py b/scripts/dis_dump.py index d888cd23df3..813de22e658 100755 --- a/scripts/dis_dump.py +++ b/scripts/dis_dump.py @@ -18,7 +18,6 @@ import json import os import re -import struct import sys import types @@ -109,22 +108,6 @@ def _unescape(m): return argrepr -def _normalize_const_repr(value): - """Return a cross-interpreter representation for LOAD_CONST values.""" - if isinstance(value, float): - return f"float:{struct.pack('>d', value).hex()}" - if isinstance(value, tuple): - if not value: - return "()" - parts = [_normalize_const_repr(item) for item in value] - trailing = "," if len(parts) == 1 else "" - return f"({', '.join(parts)}{trailing})" - if isinstance(value, frozenset): - parts = sorted(_normalize_const_repr(item) for item in value) - return f"frozenset({{{', '.join(parts)}}})" - return _normalize_argrepr(repr(value)) - - _IS_RUSTPYTHON = ( hasattr(sys, "implementation") and sys.implementation.name == "rustpython" ) @@ -168,7 +151,7 @@ def _resolve_arg_fallback(code, opname, arg): return _resolve_localsplus_name(code, arg) elif opname == "LOAD_CONST": if 0 <= arg < len(code.co_consts): - return _normalize_const_repr(code.co_consts[arg]) + return _normalize_argrepr(repr(code.co_consts[arg])) elif opname in ( "LOAD_DEREF", "STORE_DEREF", @@ -311,10 +294,7 @@ def _metadata_cache_slot_offsets(inst): elif inst.arg is not None and inst.argrepr: # If argrepr is just a number, try to resolve it via fallback # (RustPython may return raw index instead of variable name) - if opname == "LOAD_CONST" and 0 <= inst.arg < len(code.co_consts): - argrepr = _normalize_const_repr(code.co_consts[inst.arg]) - else: - argrepr = inst.argrepr + argrepr = inst.argrepr if argrepr.isdigit() or (argrepr.startswith("-") and argrepr[1:].isdigit()): resolved = _resolve_arg_fallback(code, opname, inst.arg) if isinstance(resolved, str) and not resolved.isdigit():