Skip to content

Commit c79baa3

Browse files
committed
Fix for-loop target NOP and t-string stack order
- Remove unnecessary NOP between FOR_ITER and unpack/store by compiling loop target directly on target range - Fix t-string compilation to match stack order: build strings tuple first, then evaluate interpolations - Split compile_tstring_into into collect_tstring_strings and compile_tstring_interpolations - Handle debug text literals and default repr conversion for debug specifier in t-strings - Always set bit 1 in BUILD_INTERPOLATION oparg encoding
1 parent f0bf810 commit c79baa3

1 file changed

Lines changed: 202 additions & 75 deletions

File tree

crates/codegen/src/compile.rs

Lines changed: 202 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -5931,15 +5931,13 @@ impl Compiler {
59315931

59325932
emit!(self, Instruction::ForIter { delta: else_block });
59335933

5934-
// Match CPython codegen_for(): keep a line anchor on the target line
5935-
// so multiline/single-line `for ...: pass` bodies preserve tracing layout.
5934+
// Match CPython's line attribution by compiling the loop target on
5935+
// the target range directly instead of leaving a synthetic anchor
5936+
// NOP between FOR_ITER and the unpack/store sequence.
59365937
let saved_range = self.current_source_range;
59375938
self.set_source_range(target.range());
5938-
emit!(self, Instruction::Nop);
5939-
self.set_source_range(saved_range);
5940-
5941-
// Start of loop iteration, set targets:
59425939
self.compile_store(target)?;
5940+
self.set_source_range(saved_range);
59435941
};
59445942

59455943
let was_in_loop = self.ctx.loop_data.replace((for_block, after_block));
@@ -10734,128 +10732,128 @@ impl Compiler {
1073410732
}
1073510733

1073610734
fn compile_expr_tstring(&mut self, expr_tstring: &ast::ExprTString) -> CompileResult<()> {
10737-
// ast::TStringValue can contain multiple ast::TString parts (implicit concatenation)
10738-
// Each ast::TString part should be compiled and the results merged into a single Template
10735+
// ast::TStringValue can contain multiple ast::TString parts (implicit
10736+
// concatenation). Match CPython's stack order by materializing the
10737+
// strings tuple first, then evaluating interpolations left-to-right.
1073910738
let tstring_value = &expr_tstring.value;
1074010739

10741-
// Collect all strings and compile all interpolations
1074210740
let mut all_strings: Vec<Wtf8Buf> = Vec::new();
1074310741
let mut current_string = Wtf8Buf::new();
1074410742
let mut interp_count: u32 = 0;
1074510743

1074610744
for tstring in tstring_value.iter() {
10747-
self.compile_tstring_into(
10745+
self.collect_tstring_strings(
1074810746
tstring,
1074910747
&mut all_strings,
1075010748
&mut current_string,
1075110749
&mut interp_count,
10752-
)?;
10750+
);
1075310751
}
1075410752

10755-
// Add trailing string
1075610753
all_strings.push(core::mem::take(&mut current_string));
1075710754

10758-
// Now build the Template:
10759-
// Stack currently has all interpolations from compile_tstring_into calls
10760-
10761-
// 1. Build interpolations tuple from the interpolations on the stack
10762-
emit!(
10763-
self,
10764-
Instruction::BuildTuple {
10765-
count: interp_count
10766-
}
10767-
);
10768-
10769-
// 2. Load all string parts
1077010755
let string_count: u32 = all_strings
1077110756
.len()
1077210757
.try_into()
1077310758
.expect("t-string string count overflowed");
1077410759
for s in &all_strings {
1077510760
self.emit_load_const(ConstantData::Str { value: s.clone() });
1077610761
}
10777-
10778-
// 3. Build strings tuple
1077910762
emit!(
1078010763
self,
1078110764
Instruction::BuildTuple {
1078210765
count: string_count
1078310766
}
1078410767
);
1078510768

10786-
// 4. Swap so strings is below interpolations: [interps, strings] -> [strings, interps]
10787-
emit!(self, Instruction::Swap { i: 2 });
10769+
for tstring in tstring_value.iter() {
10770+
self.compile_tstring_interpolations(tstring)?;
10771+
}
1078810772

10789-
// 5. Build the Template
10773+
emit!(
10774+
self,
10775+
Instruction::BuildTuple {
10776+
count: interp_count
10777+
}
10778+
);
1079010779
emit!(self, Instruction::BuildTemplate);
1079110780

1079210781
Ok(())
1079310782
}
1079410783

10795-
fn compile_tstring_into(
10796-
&mut self,
10784+
fn collect_tstring_strings(
10785+
&self,
1079710786
tstring: &ast::TString,
1079810787
strings: &mut Vec<Wtf8Buf>,
1079910788
current_string: &mut Wtf8Buf,
1080010789
interp_count: &mut u32,
10801-
) -> CompileResult<()> {
10790+
) {
1080210791
for element in &tstring.elements {
1080310792
match element {
1080410793
ast::InterpolatedStringElement::Literal(lit) => {
10805-
// Accumulate literal parts into current_string
1080610794
current_string.push_str(&lit.value);
1080710795
}
1080810796
ast::InterpolatedStringElement::Interpolation(interp) => {
10809-
// Finish current string segment
10797+
if let Some(ast::DebugText { leading, trailing }) = &interp.debug_text {
10798+
let range = interp.expression.range();
10799+
let source = self.source_file.slice(range);
10800+
let text = [
10801+
strip_fstring_debug_comments(leading).as_str(),
10802+
source,
10803+
strip_fstring_debug_comments(trailing).as_str(),
10804+
]
10805+
.concat();
10806+
current_string.push_str(&text);
10807+
}
1081010808
strings.push(core::mem::take(current_string));
10809+
*interp_count += 1;
10810+
}
10811+
}
10812+
}
10813+
}
1081110814

10812-
// Compile the interpolation value
10813-
self.compile_expression(&interp.expression)?;
10815+
fn compile_tstring_interpolations(&mut self, tstring: &ast::TString) -> CompileResult<()> {
10816+
for element in &tstring.elements {
10817+
let ast::InterpolatedStringElement::Interpolation(interp) = element else {
10818+
continue;
10819+
};
1081410820

10815-
// Load the expression source string, including any
10816-
// whitespace between '{' and the expression start
10817-
let expr_range = interp.expression.range();
10818-
let expr_source = if interp.range.start() < expr_range.start()
10819-
&& interp.range.end() >= expr_range.end()
10820-
{
10821-
let after_brace = interp.range.start() + TextSize::new(1);
10822-
self.source_file
10823-
.slice(TextRange::new(after_brace, expr_range.end()))
10824-
} else {
10825-
// Fallback for programmatically constructed ASTs with dummy ranges
10826-
self.source_file.slice(expr_range)
10827-
};
10828-
self.emit_load_const(ConstantData::Str {
10829-
value: expr_source.to_string().into(),
10830-
});
10821+
self.compile_expression(&interp.expression)?;
1083110822

10832-
// Determine conversion code
10833-
let conversion: u32 = match interp.conversion {
10834-
ast::ConversionFlag::None => 0,
10835-
ast::ConversionFlag::Str => 1,
10836-
ast::ConversionFlag::Repr => 2,
10837-
ast::ConversionFlag::Ascii => 3,
10838-
};
10823+
let expr_range = interp.expression.range();
10824+
let expr_source = if interp.range.start() < expr_range.start()
10825+
&& interp.range.end() >= expr_range.end()
10826+
{
10827+
let after_brace = interp.range.start() + TextSize::new(1);
10828+
self.source_file
10829+
.slice(TextRange::new(after_brace, expr_range.end()))
10830+
} else {
10831+
self.source_file.slice(expr_range)
10832+
};
10833+
self.emit_load_const(ConstantData::Str {
10834+
value: expr_source.to_string().into(),
10835+
});
1083910836

10840-
// Handle format_spec
10841-
let has_format_spec = interp.format_spec.is_some();
10842-
if let Some(format_spec) = &interp.format_spec {
10843-
// Compile format_spec as a string using fstring element compilation
10844-
// Use default ast::FStringFlags since format_spec syntax is independent of t-string flags
10845-
self.compile_fstring_elements(
10846-
ast::FStringFlags::empty(),
10847-
&format_spec.elements,
10848-
)?;
10849-
}
10837+
let mut conversion: u32 = match interp.conversion {
10838+
ast::ConversionFlag::None => 0,
10839+
ast::ConversionFlag::Str => 1,
10840+
ast::ConversionFlag::Repr => 2,
10841+
ast::ConversionFlag::Ascii => 3,
10842+
};
1085010843

10851-
// Emit BUILD_INTERPOLATION
10852-
// oparg encoding: (conversion << 2) | has_format_spec
10853-
let format = (conversion << 2) | u32::from(has_format_spec);
10854-
emit!(self, Instruction::BuildInterpolation { format });
10844+
if interp.debug_text.is_some() && conversion == 0 && interp.format_spec.is_none() {
10845+
conversion = 2;
10846+
}
1085510847

10856-
*interp_count += 1;
10857-
}
10848+
let has_format_spec = interp.format_spec.is_some();
10849+
if let Some(format_spec) = &interp.format_spec {
10850+
self.compile_fstring_elements(ast::FStringFlags::empty(), &format_spec.elements)?;
1085810851
}
10852+
10853+
// CPython keeps bit 1 set in BUILD_INTERPOLATION's oparg and uses
10854+
// bit 0 for the optional format spec.
10855+
let format = 2 | (conversion << 2) | u32::from(has_format_spec);
10856+
emit!(self, Instruction::BuildInterpolation { format });
1085910857
}
1086010858

1086110859
Ok(())
@@ -12298,6 +12296,135 @@ elif maxsize == 9223372036854775807:
1229812296
);
1229912297
}
1230012298

12299+
#[test]
12300+
fn test_for_tuple_target_does_not_leave_loop_header_nop() {
12301+
let code = compile_exec(
12302+
"\
12303+
def f(pairs):
12304+
for left, right in pairs:
12305+
pass
12306+
",
12307+
);
12308+
let f = find_code(&code, "f").expect("missing function code");
12309+
let ops: Vec<_> = f
12310+
.instructions
12311+
.iter()
12312+
.map(|unit| unit.op)
12313+
.filter(|op| !matches!(op, Instruction::Cache))
12314+
.collect();
12315+
12316+
assert!(
12317+
ops.windows(2).any(|window| {
12318+
matches!(
12319+
window,
12320+
[
12321+
Instruction::ForIter { .. },
12322+
Instruction::UnpackSequence { .. }
12323+
]
12324+
)
12325+
}),
12326+
"expected FOR_ITER to flow directly into UNPACK_SEQUENCE, got ops={ops:?}"
12327+
);
12328+
assert!(
12329+
!ops.windows(3).any(|window| {
12330+
matches!(
12331+
window,
12332+
[
12333+
Instruction::ForIter { .. },
12334+
Instruction::Nop,
12335+
Instruction::UnpackSequence { .. },
12336+
]
12337+
)
12338+
}),
12339+
"unexpected loop-header NOP before tuple unpack, got ops={ops:?}"
12340+
);
12341+
}
12342+
12343+
#[test]
12344+
fn test_tstring_build_template_matches_cpython_stack_order() {
12345+
let code = compile_exec("t = t\"{0}\"");
12346+
let units: Vec<_> = code
12347+
.instructions
12348+
.iter()
12349+
.copied()
12350+
.filter(|unit| !matches!(unit.op, Instruction::Cache))
12351+
.collect();
12352+
12353+
assert!(
12354+
units.windows(6).any(|window| {
12355+
matches!(
12356+
window,
12357+
[
12358+
a,
12359+
b,
12360+
c,
12361+
d,
12362+
e,
12363+
f,
12364+
]
12365+
if matches!(a.op, Instruction::LoadConst { .. })
12366+
&& matches!(b.op, Instruction::LoadSmallInt { .. })
12367+
&& matches!(c.op, Instruction::LoadConst { .. })
12368+
&& matches!(d.op, Instruction::BuildInterpolation { .. })
12369+
&& u8::from(d.arg) == 2
12370+
&& matches!(e.op, Instruction::BuildTuple { .. })
12371+
&& u8::from(e.arg) == 1
12372+
&& matches!(f.op, Instruction::BuildTemplate)
12373+
)
12374+
}),
12375+
"expected CPython-style t-string lowering, got units={units:?}"
12376+
);
12377+
assert!(
12378+
!units
12379+
.iter()
12380+
.any(|unit| matches!(unit.op, Instruction::Swap { .. })),
12381+
"unexpected SWAP in t-string lowering, got units={units:?}"
12382+
);
12383+
}
12384+
12385+
#[test]
12386+
fn test_tstring_debug_specifier_uses_debug_literal_and_repr_default() {
12387+
let code = compile_exec(
12388+
"\
12389+
value = 42
12390+
t = t\"Value: {value=}\"
12391+
",
12392+
);
12393+
12394+
let string_consts = code
12395+
.instructions
12396+
.iter()
12397+
.filter_map(|unit| match unit.op {
12398+
Instruction::LoadConst { consti } => {
12399+
Some(&code.constants[consti.get(OpArg::new(u32::from(u8::from(unit.arg))))])
12400+
}
12401+
_ => None,
12402+
})
12403+
.collect::<Vec<_>>();
12404+
12405+
assert!(
12406+
string_consts.iter().any(|constant| matches!(
12407+
constant,
12408+
ConstantData::Tuple { elements }
12409+
if matches!(
12410+
&elements[..],
12411+
[
12412+
ConstantData::Str { value: first },
12413+
ConstantData::Str { value: second },
12414+
] if first.to_string() == "Value: value=" && second.is_empty()
12415+
)
12416+
)),
12417+
"expected debug literal prefix in t-string constants, got {string_consts:?}"
12418+
);
12419+
assert!(
12420+
code.instructions.iter().any(|unit| matches!(
12421+
unit.op,
12422+
Instruction::BuildInterpolation { .. }
12423+
) && u8::from(unit.arg) == 10),
12424+
"expected default repr conversion for debug t-string"
12425+
);
12426+
}
12427+
1230112428
#[test]
1230212429
fn test_break_in_finally_after_return_keeps_load_fast_check_for_loop_locals() {
1230312430
let code = compile_exec(

0 commit comments

Comments
 (0)