diff --git a/crates/codegen/src/ir.rs b/crates/codegen/src/ir.rs index 5728ddfbef..548b57d85a 100644 --- a/crates/codegen/src/ir.rs +++ b/crates/codegen/src/ir.rs @@ -1330,7 +1330,7 @@ impl Blocks { fn copy_basicblock(&mut self, block_idx: BlockIdx) -> crate::InternalResult { debug_assert!(bb_no_fallthrough(&self[block_idx])); - let result = blocks_new_block(self)?; + let result = self.blocks_new_block()?; self.basicblock_append_block_instructions(result, block_idx)?; Ok(result) } @@ -1464,19 +1464,19 @@ impl Blocks { AnyInstruction::Real( Instruction::PopJumpIfNotNone { .. } | Instruction::PopJumpIfNone { .. }, ) if matches!(target.instr.into(), AnyOpcode::Pseudo(PseudoOpcode::Jump)) - && jump_thread(self, block_idx, i, &target, inst.instr)? => + && self.jump_thread(block_idx, i, &target, inst.instr)? => { continue; } AnyInstruction::Real(Instruction::PopJumpIfFalse { .. }) if matches!(target.instr.into(), AnyOpcode::Pseudo(PseudoOpcode::Jump)) - && jump_thread(self, block_idx, i, &target, inst.instr)? => + && self.jump_thread(block_idx, i, &target, inst.instr)? => { continue; } AnyInstruction::Real(Instruction::PopJumpIfTrue { .. }) if matches!(target.instr.into(), AnyOpcode::Pseudo(PseudoOpcode::Jump)) - && jump_thread(self, block_idx, i, &target, inst.instr)? => + && self.jump_thread(block_idx, i, &target, inst.instr)? => { continue; } @@ -1487,7 +1487,7 @@ impl Blocks { let opcode = pseudo.into(); match target.instr.pseudo().map(Into::into) { Some(PseudoOpcode::Jump) - if jump_thread(self, block_idx, i, &target, opcode)? => + if self.jump_thread(block_idx, i, &target, opcode)? => { continue; } @@ -1495,7 +1495,7 @@ impl Blocks { if matches!( opcode, AnyInstruction::Pseudo(PseudoInstruction::JumpIfFalse { .. }) - ) && jump_thread(self, block_idx, i, &target, opcode)? => + ) && self.jump_thread(block_idx, i, &target, opcode)? => { continue; } @@ -1503,12 +1503,12 @@ impl Blocks { if matches!( opcode, AnyInstruction::Pseudo(PseudoInstruction::JumpIfTrue { .. }) - ) && jump_thread(self, block_idx, i, &target, opcode)? => + ) && self.jump_thread(block_idx, i, &target, opcode)? => { continue; } Some(PseudoOpcode::JumpIfFalse | PseudoOpcode::JumpIfTrue) => { - let next = self[inst.target.idx()].next; + let next = self[inst.target].next; debug_assert!(next != BlockIdx::NULL); debug_assert!(next != inst.target); self[block_idx].instructions[i].target = next; @@ -1521,12 +1521,17 @@ impl Blocks { PseudoInstruction::Jump { .. } | PseudoInstruction::JumpNoInterrupt { .. }, ) => match target.instr.into() { AnyOpcode::Pseudo(PseudoOpcode::Jump) - if jump_thread(self, block_idx, i, &target, PseudoOpcode::Jump.into())? => + if self.jump_thread( + block_idx, + i, + &target, + PseudoOpcode::Jump.into(), + )? => { continue; } AnyOpcode::Pseudo(PseudoOpcode::JumpNoInterrupt) - if jump_thread(self, block_idx, i, &target, inst.instr)? => + if self.jump_thread(block_idx, i, &target, inst.instr)? => { continue; } @@ -2007,7 +2012,7 @@ impl Blocks { let mut block_idx = BlockIdx::new(0); while block_idx != BlockIdx::NULL { - basicblock_remove_redundant_nops(self, block_idx)?; + self.basicblock_remove_redundant_nops(block_idx)?; if is_label(self[block_idx].cpython_label) { instr = None; } @@ -2140,7 +2145,7 @@ impl Blocks { let mut current = BlockIdx(0); while current != BlockIdx::NULL { self[current].visited = true; - normalize_jumps_in_block(self, current)?; + self.normalize_jumps_in_block(current)?; current = self[current].next; } @@ -2293,3722 +2298,3719 @@ impl Blocks { block_idx = next_block; } - let res = remove_redundant_nops(self)?; + let res = self.remove_redundant_nops()?; #[cfg(debug_assertions)] - assert!(no_redundant_nops(self)); + assert!(self.no_redundant_nops()); Ok(res) } -} -impl From> for Blocks { - fn from(value: Vec) -> Self { - Self(value) - } -} + /// Mark exception handler target blocks. + /// flowgraph.c mark_except_handlers + #[allow(clippy::unnecessary_wraps)] + pub(crate) fn mark_except_handlers(&mut self) -> crate::InternalResult<()> { + #[cfg(debug_assertions)] + { + let mut block_idx = BlockIdx(0); + while block_idx != BlockIdx::NULL { + assert!(!self[block_idx].except_handler); + block_idx = self[block_idx].next; + } + } -impl From> for Blocks { - fn from(value: Box<[Block]>) -> Self { - Self(value.into()) + let mut block_idx = BlockIdx(0); + while block_idx != BlockIdx::NULL { + let next = self[block_idx].next; + let instr_count = self[block_idx].instruction_used; + for i in 0..instr_count { + let instr = self[block_idx].instructions[i]; + if is_block_push(&instr) { + debug_assert!(instr.target != BlockIdx::NULL); + self[instr.target].except_handler = true; + } + } + block_idx = next; + } + Ok(()) } -} -impl From<&[Block]> for Blocks { - fn from(value: &[Block]) -> Self { - Self(value.to_vec()) - } -} + /// flowgraph.c mark_cold (two-pass to match CPython). + /// + /// Phase 1 (mark_warm): propagate "warm" from entry via fall-through and + /// jump targets. CPython asserts while visiting warm blocks that they are not + /// exception handlers. + /// + /// Phase 2 (mark_cold): propagate "cold" from except_handler blocks via + /// forward edges. Blocks reached only via runtime exception dispatch are + /// marked cold and pushed to the end by push_cold_blocks_to_end. + /// + /// Blocks reached by neither phase remain `cold=false`. They are typically + /// empty unreachable placeholders left by remove_unreachable; they stay in + /// their original chain position (e.g. between entry and the post-try + /// continuation for a nested try/except whose inner_end was emptied by + /// optimize_cfg). This matches CPython's behavior and is necessary for + /// optimize_load_fast to terminate fall-through at those placeholders. + /// flowgraph.c mark_warm + fn mark_warm(&mut self) -> crate::InternalResult<()> { + let mut stack = self.make_cfg_traversal_stack()?; + stack.push(BlockIdx(0)); + self[0].visited = true; + while let Some(block_idx) = stack.pop() { + debug_assert!(!self[block_idx].except_handler); + self[block_idx].warm = true; -impl From<&mut [Block]> for Blocks { - fn from(value: &mut [Block]) -> Self { - Self(value.to_vec()) - } -} + let next = self[block_idx].next; + if next != BlockIdx::NULL && bb_has_fallthrough(&self[block_idx]) && !self[next].visited + { + stack.push(next); + self[next].visited = true; + } -impl From<[Block; N]> for Blocks { - fn from(value: [Block; N]) -> Self { - Self(value.into()) + let instr_count = self[block_idx].instruction_used; + for i in 0..instr_count { + let instr = self[block_idx].instructions[i]; + if is_jump(&instr) { + let target = instr.target; + debug_assert!(target != BlockIdx::NULL); + if !self[target].visited { + stack.push(target); + self[target].visited = true; + } + } + } + } + Ok(()) } -} -impl From<&[Block; N]> for Blocks { - fn from(value: &[Block; N]) -> Self { - Self(value.to_vec()) - } -} + fn mark_cold(&mut self) -> crate::InternalResult<()> { + let mut block_idx = BlockIdx(0); + while block_idx != BlockIdx::NULL { + let block = &mut self[block_idx]; + debug_assert!(!block.cold); + debug_assert!(!block.warm); + block_idx = block.next; + } -impl Deref for Blocks { - type Target = [Block]; + self.mark_warm()?; - fn deref(&self) -> &Self::Target { - &self.0 - } -} + let mut cold_stack = self.make_cfg_traversal_stack()?; + block_idx = BlockIdx(0); + while block_idx != BlockIdx::NULL { + let next = self[block_idx].next; + let block = &self[block_idx]; + if block.except_handler { + debug_assert!(!block.warm); + cold_stack.push(block_idx); + self[block_idx].visited = true; + } + block_idx = next; + } -impl DerefMut for Blocks { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 + while let Some(block_idx) = cold_stack.pop() { + self[block_idx].cold = true; + let next = self[block_idx].next; + if next != BlockIdx::NULL + && bb_has_fallthrough(&self[block_idx]) + && !self[next].warm + && !self[next].visited + { + cold_stack.push(next); + self[next].visited = true; + } + + let instr_count = self[block_idx].instruction_used; + for i in 0..instr_count { + let instr = self[block_idx].instructions[i]; + if is_jump(&instr) { + debug_assert_eq!(i, instr_count - 1); + let target = instr.target; + debug_assert!(target != BlockIdx::NULL); + if !self[target].warm && !self[target].visited { + cold_stack.push(target); + self[target].visited = true; + } + } + } + } + Ok(()) } -} -impl Index for Blocks { - type Output = Block; + /// flowgraph.c push_cold_blocks_to_end + fn push_cold_blocks_to_end(&mut self) -> crate::InternalResult<()> { + if self[0].next == BlockIdx::NULL { + return Ok(()); + } - fn index(&self, idx: usize) -> &Self::Output { - &self.0[idx] - } -} + self.mark_cold()?; + let mut next_label = get_max_label(self) + 1; -impl IndexMut for Blocks { - fn index_mut(&mut self, idx: usize) -> &mut Self::Output { - &mut self.0[idx] - } -} + // If a cold block falls through to a warm block, add an explicit jump + let mut block_idx = BlockIdx(0); + while block_idx != BlockIdx::NULL { + let next = self[block_idx].next; + if self[block_idx].cold + && bb_has_fallthrough(&self[block_idx]) + && next != BlockIdx::NULL + && self[next].warm + { + let explicit_jump = self.blocks_new_block()?; + if !is_label(self[next].cpython_label) { + self[next].cpython_label = InstructionSequenceLabel::from_index(next_label); + next_label += 1; + } + let jump_label = self[next].cpython_label; + debug_assert!(is_label(jump_label)); + basicblock_addop( + &mut self[explicit_jump], + InstructionInfo { + instr: PseudoOpcode::JumpNoInterrupt.into(), + arg: instruction_sequence_label_oparg(jump_label), + target: BlockIdx::NULL, + location: SourceLocation::default(), + end_location: SourceLocation::default(), + except_handler: None, + lineno_override: Some(NO_LOCATION_OVERRIDE), + }, + )?; + self[explicit_jump].cold = true; + self[explicit_jump].next = next; + self[explicit_jump].predecessors = 1; + self[block_idx].next = explicit_jump; + let target = self[explicit_jump].next; + let last = basicblock_last_instr_mut(&mut self[explicit_jump]) + .expect("missing explicit jump"); + last.target = target; + } + block_idx = self[block_idx].next; + } -impl Index for Blocks { - type Output = Block; + assert!(!self[0].cold); + let mut cold_blocks: BlockIdx = BlockIdx::NULL; + let mut cold_blocks_tail: BlockIdx = BlockIdx::NULL; + let mut block_idx = BlockIdx(0); - fn index(&self, block_idx: BlockIdx) -> &Self::Output { - &self.0[block_idx.as_usize()] - } -} + while self[block_idx].next != BlockIdx::NULL { + debug_assert!(!self[block_idx].cold); + while self[block_idx].next != BlockIdx::NULL && !self[self[block_idx].next].cold { + block_idx = self[block_idx].next; + } -impl IndexMut for Blocks { - fn index_mut(&mut self, block_idx: BlockIdx) -> &mut Self::Output { - &mut self.0[block_idx.as_usize()] - } -} + if self[block_idx].next == BlockIdx::NULL { + break; + } -pub(crate) const START_DEPTH_UNSET: i32 = i32::MIN; -const CO_MAXBLOCKS: usize = 20; + debug_assert!(!self[block_idx].cold); + debug_assert!(self[self[block_idx].next].cold); -/// flowgraph.c struct _PyCfgExceptStack -#[derive(Clone, Debug)] -struct CfgExceptStack { - handlers: [BlockIdx; CO_MAXBLOCKS + 2], - depth: usize, -} + let mut block_end = self[block_idx].next; + while self[block_end].next != BlockIdx::NULL && self[self[block_end].next].cold { + block_end = self[block_end].next; + } -/// flowgraph.c `basicblock **stack` -#[derive(Clone, Debug)] -struct CfgTraversalStack { - stack: Vec, - sp: usize, -} + debug_assert!(self[block_end].cold); + debug_assert!( + self[block_end].next == BlockIdx::NULL || !self[self[block_end].next].cold + ); -impl CfgTraversalStack { - fn push(&mut self, block: BlockIdx) { - debug_assert!(self.sp < self.stack.len()); - self.stack[self.sp] = block; - self.sp += 1; - } + if cold_blocks == BlockIdx::NULL { + cold_blocks = self[block_idx].next; + } else { + self[cold_blocks_tail].next = self[block_idx].next; + } - fn pop(&mut self) -> Option { - if self.sp == 0 { - return None; + cold_blocks_tail = block_end; + self[block_idx].next = self[block_end].next; + self[block_end].next = BlockIdx::NULL; } - self.sp -= 1; - Some(self.stack[self.sp]) - } - fn capacity(&self) -> usize { - self.stack.len() - } -} - -#[derive(Clone, Debug)] -pub(crate) struct InstructionSequenceLabelMap { - block_labels: Vec, - /// Codegen-side shadow of CPython's instruction-sequence label map. - /// - /// `_PyInstructionSequence_UseLabel()` can map multiple labels to the same - /// instruction offset before `_PyCfg_FromInstructionSequence()` materializes - /// CFG blocks. The codegen CFG path keeps the same aliasing by resolving - /// those labels to the block that owns the shared offset. - cpython_block_by_label: Vec, -} + debug_assert!(self[block_idx].next == BlockIdx::NULL); + self[block_idx].next = cold_blocks; -fn instruction_sequence_label_map_register_label( - map: &mut InstructionSequenceLabelMap, - label: InstructionSequenceLabel, -) -> crate::InternalResult<()> { - debug_assert!(is_label(label)); - let old_size = map.cpython_block_by_label.len(); - let new_allocation = c_array_ensure_capacity::( - old_size, - label.idx(), - INITIAL_INSTR_SEQUENCE_LABELS_MAP_SIZE, - )?; - if new_allocation > old_size { - if new_allocation > map.cpython_block_by_label.capacity() { - map.cpython_block_by_label - .try_reserve_exact(new_allocation - map.cpython_block_by_label.capacity()) - .map_err(|_| InternalError::MalformedControlFlowGraph)?; + if cold_blocks != BlockIdx::NULL { + self.remove_redundant_nops_and_jumps()?; } - map.cpython_block_by_label - .resize(new_allocation, BlockIdx::NULL); - for i in old_size..map.cpython_block_by_label.len() { - map.cpython_block_by_label[i] = BlockIdx::NULL; + Ok(()) + } + + /// flowgraph.c check_cfg + fn check_cfg(&self) -> crate::InternalResult<()> { + let mut block_idx = BlockIdx(0); + while block_idx != BlockIdx::NULL { + let block = &self[block_idx]; + for i in 0..block.instruction_used { + let opcode = block.instructions[i].instr; + debug_assert!(!opcode.is_assembler()); + if opcode.is_terminator() && i != block.instruction_used - 1 { + return Err(InternalError::MalformedControlFlowGraph); + } + } + block_idx = block.next; } + Ok(()) } - debug_assert!(map.cpython_block_by_label.len() > label.idx()); - Ok(()) -} -fn instruction_sequence_label_map_ensure_label_for_block( - map: &mut InstructionSequenceLabelMap, - seq: &mut InstructionSequence, - block: BlockIdx, -) -> crate::InternalResult { - debug_assert_ne!(block, BlockIdx::NULL); - let block_label = map.block_labels[block.idx()]; - if is_label(block_label) { - return Ok(block_label); + /// flowgraph.c jump_thread + fn jump_thread( + &mut self, + block_idx: BlockIdx, + instr_idx: usize, + target: &InstructionInfo, + opcode: AnyInstruction, + ) -> crate::InternalResult { + debug_assert!(is_jump(&self[block_idx].instructions[instr_idx])); + debug_assert!(is_jump(target)); + debug_assert_eq!(instr_idx + 1, self[block_idx].instruction_used); + debug_assert!(target.target != BlockIdx::NULL); + if self[block_idx].instructions[instr_idx].target != target.target { + set_to_nop(&mut self[block_idx].instructions[instr_idx]); + self.basicblock_add_jump(block_idx, opcode, target.target, target)?; + return Ok(true); + } + Ok(false) } - let label = instruction_sequence_new_label(seq); - debug_assert_eq!(label.0, seq.next_free_label); - instruction_sequence_label_map_register_label(map, label)?; - map.cpython_block_by_label[label.idx()] = block; - map.block_labels[block.idx()] = label; - Ok(label) -} -fn instruction_sequence_label_map_label_for_block( - map: &InstructionSequenceLabelMap, - block: BlockIdx, -) -> InstructionSequenceLabel { - debug_assert_ne!(block, BlockIdx::NULL); - map.block_labels - .get(block.idx()) - .copied() - .unwrap_or(InstructionSequenceLabel::NO_LABEL) -} - -fn instruction_sequence_label_map_block_for_label( - map: &InstructionSequenceLabelMap, - label: InstructionSequenceLabel, -) -> Option { - if !is_label(label) { - return None; - } - map.cpython_block_by_label - .get(label.idx()) - .copied() - .filter(|&block| block != BlockIdx::NULL) -} - -fn instruction_sequence_label_map_resolve_label( - map: &InstructionSequenceLabelMap, - block: BlockIdx, -) -> BlockIdx { - if block == BlockIdx::NULL { - return BlockIdx::NULL; - } - let label = instruction_sequence_label_map_label_for_block(map, block); - if !is_label(label) { - return block; - } - instruction_sequence_label_map_block_for_label(map, label).unwrap_or_else(|| { - debug_assert!( - false, - "CPython instruction-sequence label must map to a codegen CFG block" - ); - BlockIdx::NULL - }) -} - -fn instruction_sequence_label_map_resolve_label_to_block( - map: &InstructionSequenceLabelMap, - label: InstructionSequenceLabel, -) -> BlockIdx { - if !is_label(label) { - return BlockIdx::NULL; - } - instruction_sequence_label_map_block_for_label(map, label).unwrap_or_else(|| { - debug_assert!( - false, - "CPython instruction-sequence label must map to a codegen CFG block" - ); - BlockIdx::NULL - }) -} - -fn instruction_sequence_label_oparg(label: InstructionSequenceLabel) -> OpArg { - debug_assert!(is_label(label)); - OpArg::new(label.idx() as u32) -} - -fn instruction_sequence_label_map_use_label_at_block( - map: &mut InstructionSequenceLabelMap, - seq: &mut InstructionSequence, - from: BlockIdx, - to: BlockIdx, -) -> crate::InternalResult<()> { - if from == BlockIdx::NULL || from == to { - return Ok(()); - } - let from_label = instruction_sequence_label_map_ensure_label_for_block(map, seq, from)?; - debug_assert!(map.cpython_block_by_label.len() > from_label.idx()); - let to_block = instruction_sequence_label_map_resolve_label(map, to); - if to_block == BlockIdx::NULL { - debug_assert!( - false, - "CPython label target must map to a codegen CFG block" - ); - return Ok(()); + /// flowgraph.c basicblock_add_jump + fn basicblock_add_jump( + &mut self, + block_idx: BlockIdx, + instr: AnyInstruction, + target: BlockIdx, + loc_source: &InstructionInfo, + ) -> crate::InternalResult<()> { + let last = basicblock_last_instr(&self[block_idx]); + if last.is_some_and(is_jump) { + return Err(InternalError::MalformedControlFlowGraph); + } + debug_assert!(target != BlockIdx::NULL); + let label = self[target].cpython_label; + debug_assert!(is_label(label)); + let arg = instruction_sequence_label_oparg(label); + let block = &mut self[block_idx]; + basicblock_addop( + block, + InstructionInfo { + instr, + arg, + target: BlockIdx::NULL, + location: loc_source.location, + end_location: loc_source.end_location, + except_handler: None, + lineno_override: loc_source.lineno_override, + }, + )?; + let last = basicblock_last_instr_mut(block).expect("missing jump"); + debug_assert!(match (last.instr, instr) { + (AnyInstruction::Real(last), AnyInstruction::Real(opcode)) => + last.as_opcode() == opcode.as_opcode(), + (AnyInstruction::Pseudo(last), AnyInstruction::Pseudo(opcode)) => + last.as_opcode() == opcode.as_opcode(), + _ => false, + }); + last.target = target; + Ok(()) } - map.cpython_block_by_label[from_label.idx()] = to_block; - Ok(()) -} - -fn instruction_sequence_label_map_push_unlabeled_block( - map: &mut InstructionSequenceLabelMap, -) -> crate::InternalResult<()> { - map.block_labels - .try_reserve(1) - .map_err(|_| InternalError::MalformedControlFlowGraph)?; - map.block_labels.push(InstructionSequenceLabel::NO_LABEL); - Ok(()) -} -fn instruction_sequence_label_map_push_unmapped_label( - map: &mut InstructionSequenceLabelMap, - seq: &mut InstructionSequence, -) -> crate::InternalResult<()> { - let label = instruction_sequence_new_label(seq); - debug_assert_eq!(label.0, seq.next_free_label); - instruction_sequence_label_map_register_label(map, label)?; - let block = BlockIdx( - map.block_labels - .len() - .to_u32() - .ok_or(InternalError::MalformedControlFlowGraph)?, - ); - map.cpython_block_by_label[label.idx()] = block; - map.block_labels - .try_reserve(1) - .map_err(|_| InternalError::MalformedControlFlowGraph)?; - map.block_labels.push(label); - Ok(()) -} + /// flowgraph.c convert_pseudo_conditional_jumps + fn convert_pseudo_conditional_jumps(&mut self) -> crate::InternalResult<()> { + let mut block_idx = BlockIdx(0); + while block_idx != BlockIdx::NULL { + let next = self[block_idx].next; + let block = &mut self[block_idx]; + let mut i = 0; + while i < block.instruction_used { + let instr = block.instructions[i]; + let opcode = instr.instr; + if matches!( + opcode.pseudo_opcode(), + Some(PseudoOpcode::JumpIfFalse | PseudoOpcode::JumpIfTrue) + ) { + debug_assert_eq!(i, block.instruction_used - 1); + block.instructions[i].instr = + if matches!(opcode.pseudo_opcode(), Some(PseudoOpcode::JumpIfFalse)) { + Opcode::PopJumpIfFalse + } else { + Opcode::PopJumpIfTrue + } + .into(); + + let location = instr.location; + let end_location = instr.end_location; + let except_handler = instr.except_handler; + let lineno_override = instr.lineno_override; + let copy = InstructionInfo { + instr: Opcode::Copy.into(), + arg: OpArg::new(1), + target: BlockIdx::NULL, + location, + end_location, + except_handler, + lineno_override, + }; + basicblock_insert_instruction(block, i, copy)?; + i += 1; -impl InstructionSequenceLabelMap { - pub(crate) fn new() -> Self { - Self { - block_labels: vec![InstructionSequenceLabel::NO_LABEL], - cpython_block_by_label: Vec::new(), + let to_bool = InstructionInfo { + instr: Opcode::ToBool.into(), + arg: OpArg::new(0), + target: BlockIdx::NULL, + location, + end_location, + except_handler, + lineno_override, + }; + basicblock_insert_instruction(block, i, to_bool)?; + i += 1; + } + i += 1; + } + block_idx = next; } + Ok(()) } -} - -pub struct CodeInfo { - pub flags: CodeFlags, - pub source_path: String, - pub private: Option, // For private name mangling, mostly for class - pub blocks: Blocks, - pub current_block: BlockIdx, - pub(crate) instr_sequence: InstructionSequence, - pub(crate) instr_sequence_label_map: InstructionSequenceLabelMap, - pub(crate) annotations_instr_sequence: Option, + /// flowgraph.c normalize_jumps_in_block + fn normalize_jumps_in_block(&mut self, block_idx: BlockIdx) -> crate::InternalResult<()> { + let Some(last_ins) = basicblock_last_instr(&self[block_idx]).copied() else { + return Ok(()); + }; + if !is_conditional_jump_opcode(last_ins.instr) { + return Ok(()); + } + debug_assert!(!last_ins.instr.is_assembler()); - pub metadata: CodeUnitMetadata, + debug_assert!(last_ins.target != BlockIdx::NULL); + let is_forward = !self[last_ins.target].visited; - // For class scopes: attributes accessed via self.X - pub static_attributes: Option>, + if is_forward { + // Insert NOT_TAKEN after forward conditional jump. + let not_taken = InstructionInfo { + instr: Opcode::NotTaken.into(), + arg: OpArg::new(0), + target: BlockIdx::NULL, + location: last_ins.location, + end_location: last_ins.end_location, + except_handler: None, + lineno_override: last_ins.lineno_override, + }; + basicblock_addop(&mut self[block_idx], not_taken)?; + return Ok(()); + } - // True if compiling an inlined comprehension - pub in_inlined_comp: bool, + let reversed_opcode = match last_ins.instr.real_opcode() { + Some(Opcode::PopJumpIfNotNone) => Opcode::PopJumpIfNone.into(), + Some(Opcode::PopJumpIfNone) => Opcode::PopJumpIfNotNone.into(), + Some(Opcode::PopJumpIfFalse) => Opcode::PopJumpIfTrue.into(), + Some(Opcode::PopJumpIfTrue) => Opcode::PopJumpIfFalse.into(), + _ => unreachable!("conditional jump has reverse opcode"), + }; - // Block stack for tracking nested control structures - pub fblock: Vec, + // Transform 'conditional jump T' to 'reversed_jump b_next' followed by + // 'jump_backwards T'. + let loc = last_ins.location; + let end_loc = last_ins.end_location; - // Reference to the symbol table for this scope - pub symbol_table_index: usize, - // CPython compile.c uses PyList_GET_SIZE(u->u_ste->ste_varnames) - // when calling flowgraph.c _PyCfg_OptimizeCodeUnit(). - pub nparams: usize, + let target = last_ins.target; + let backwards_jump_idx = self.blocks_new_block()?; + basicblock_addop( + &mut self[backwards_jump_idx], + InstructionInfo { + instr: Opcode::NotTaken.into(), + arg: OpArg::new(0), + target: BlockIdx::NULL, + location: loc, + end_location: end_loc, + except_handler: None, + lineno_override: last_ins.lineno_override, + }, + )?; + self.basicblock_add_jump( + backwards_jump_idx, + PseudoOpcode::Jump.into(), + target, + &last_ins, + )?; + self[backwards_jump_idx].start_depth = self[target].start_depth; - // PEP 649: Track nesting depth inside conditional blocks (if/for/while/etc.) - // u_in_conditional_block - pub in_conditional_block: u32, + let old_next = self[block_idx].next; + debug_assert!(old_next != BlockIdx::NULL); - // PEP 649: Next index for conditional annotation tracking - // u_next_conditional_annotation_index - pub next_conditional_annotation_index: u32, -} + let last_mut = basicblock_last_instr_mut(&mut self[block_idx]).unwrap(); + last_mut.instr = reversed_opcode; + last_mut.target = old_next; -impl CodeInfo { - pub(crate) fn addop_to_instr_sequence( - &mut self, - mut info: InstructionInfo, - ) -> crate::InternalResult<()> { - if info.instr.has_target() && info.target != BlockIdx::NULL { - let label = instruction_sequence_label_map_ensure_label_for_block( - &mut self.instr_sequence_label_map, - &mut self.instr_sequence, - info.target, - )?; - info.arg = instruction_sequence_label_oparg(label); - info.target = BlockIdx::NULL; - } - instruction_sequence_addop(&mut self.instr_sequence, info)?; + self[backwards_jump_idx].cold = self[block_idx].cold; + self[backwards_jump_idx].next = old_next; + self[block_idx].next = backwards_jump_idx; Ok(()) } - pub(crate) fn addop_to_instr_sequence_with_target_label( + /// flowgraph.c basicblock_inline_small_or_no_lineno_blocks + fn basicblock_inline_small_or_no_lineno_blocks( &mut self, - mut info: InstructionInfo, - target_label: InstructionSequenceLabel, - ) -> crate::InternalResult<()> { - if !info.instr.has_target() { - return Err(InternalError::MalformedControlFlowGraph); + block_idx: BlockIdx, + ) -> crate::InternalResult { + let Some(last) = basicblock_last_instr(&self[block_idx]).copied() else { + return Ok(false); + }; + + if !last.instr.is_unconditional_jump() { + return Ok(false); } - info.arg = instruction_sequence_label_oparg(target_label); - info.target = BlockIdx::NULL; - instruction_sequence_addop(&mut self.instr_sequence, info)?; - Ok(()) - } - pub(crate) fn addop_to_current_block( - &mut self, - info: InstructionInfo, - ) -> crate::InternalResult<()> { - basicblock_addop(&mut self.blocks[self.current_block.idx()], info) + let target = last.target; + debug_assert!(target != BlockIdx::NULL); + let small_exit_block = + basicblock_exits_scope(&self[target]) && self[target].instruction_used <= MAX_COPY_SIZE; + let no_lineno_no_fallthrough = + basicblock_has_no_lineno(&self[target]) && !bb_has_fallthrough(&self[target]); + if small_exit_block || no_lineno_no_fallthrough { + debug_assert!(is_jump(&last)); + let removed_jump_opcode = last.instr; + let last = basicblock_last_instr_mut(&mut self[block_idx]) + .expect("non-empty block has last instruction"); + set_to_nop(last); + self.basicblock_append_block_instructions(block_idx, target)?; + if no_lineno_no_fallthrough { + let last = basicblock_last_instr_mut(&mut self[block_idx]).unwrap(); + if last.instr.is_unconditional_jump() + && matches!( + removed_jump_opcode.into(), + AnyOpcode::Pseudo(PseudoOpcode::Jump) + ) + { + last.instr = PseudoOpcode::Jump.into(); + } + } + self[target].predecessors -= 1; + return Ok(true); + } + Ok(false) } - pub(crate) fn last_current_block_instr_mut(&mut self) -> Option<&mut InstructionInfo> { - basicblock_last_instr_mut(&mut self.blocks[self.current_block.idx()]) - } + /// flowgraph.c inline_small_or_no_lineno_blocks + fn inline_small_or_no_lineno_blocks(&mut self) -> crate::InternalResult { + loop { + let mut changes = false; + let mut current = BlockIdx(0); + while current != BlockIdx::NULL { + let next = self[current].next; + let res = self.basicblock_inline_small_or_no_lineno_blocks(current)?; + if res { + changes = true; + } - pub(crate) fn set_last_instr_sequence_lineno_override(&mut self, lineno_override: i32) { - if let Some(last) = instruction_sequence_last_info_mut(&mut self.instr_sequence) { - last.lineno_override = Some(lineno_override); + current = next; + } + if !changes { + return Ok(changes); + } } } - pub(crate) fn use_instr_sequence_label( + /// flowgraph.c basicblock_remove_redundant_nops + #[allow(clippy::unnecessary_wraps)] + fn basicblock_remove_redundant_nops( &mut self, - block: BlockIdx, - ) -> crate::InternalResult<()> { - let label = instruction_sequence_label_map_ensure_label_for_block( - &mut self.instr_sequence_label_map, - &mut self.instr_sequence, - block, - )?; - instruction_sequence_use_label(&mut self.instr_sequence, label) - } + block_idx: BlockIdx, + ) -> crate::InternalResult { + let mut dest = 0; + let mut prev_lineno = -1i32; + let instr_count = self[block_idx].instruction_used; - pub(crate) fn new_instr_sequence_label(&mut self) -> InstructionSequenceLabel { - instruction_sequence_new_label(&mut self.instr_sequence) - } + for src in 0..instr_count { + let instr = self[block_idx].instructions[src]; + let lineno = instruction_lineno(&instr); - pub(crate) fn use_raw_instr_sequence_label( - &mut self, - label: InstructionSequenceLabel, - ) -> crate::InternalResult<()> { - instruction_sequence_use_label(&mut self.instr_sequence, label) + if matches!(instr.instr.real(), Some(Instruction::Nop)) { + if lineno < 0 { + continue; + } + if prev_lineno == lineno { + continue; + } + if src < instr_count - 1 { + let next_lineno = instruction_lineno(&self[block_idx].instructions[src + 1]); + if next_lineno == lineno { + continue; + } + if next_lineno < 0 { + instr_set_loc( + &mut self[block_idx].instructions[src + 1], + instr.location, + instr.end_location, + instr.lineno_override, + ); + continue; + } + } else { + let next = next_nonempty_block(self, self[block_idx].next); + if next != BlockIdx::NULL { + let mut next_loc = no_linetable_location(); + let mut next_i = 0; + while next_i < self[next].instruction_used { + let instr = self[next].instructions[next_i]; + if matches!(instr.instr.real(), Some(Instruction::Nop)) + && instruction_lineno(&instr) < 0 + { + next_i += 1; + continue; + } + next_loc = instruction_linetable_location(&instr); + break; + } + if lineno == next_loc.line { + continue; + } + } + } + } + + if dest != src { + self[block_idx].instructions[dest] = self[block_idx].instructions[src]; + } + dest += 1; + prev_lineno = lineno; + } + + debug_assert!(dest <= instr_count); + let num_removed = instr_count - dest; + self[block_idx].instruction_used = dest; + Ok(num_removed) } - pub(crate) fn mark_cpython_cfg_label(&mut self, block: BlockIdx) -> crate::InternalResult<()> { - let label = instruction_sequence_label_map_ensure_label_for_block( - &mut self.instr_sequence_label_map, - &mut self.instr_sequence, - block, - )?; - self.blocks[block.idx()].cpython_label = label; - Ok(()) + /// flowgraph.c remove_redundant_nops + #[allow(clippy::unnecessary_wraps)] + fn remove_redundant_nops(&mut self) -> crate::InternalResult { + let mut changes = 0; + let mut current = BlockIdx(0); + while current != BlockIdx::NULL { + let next = self[current].next; + let change = self.basicblock_remove_redundant_nops(current)?; + changes += change; + current = next; + } + Ok(changes) } - pub(crate) fn resolve_instr_sequence_label(&self, block: BlockIdx) -> BlockIdx { - instruction_sequence_label_map_resolve_label(&self.instr_sequence_label_map, block) + /// flowgraph.c no_redundant_nops + #[cfg(debug_assertions)] + fn no_redundant_nops(&mut self) -> bool { + matches!(self.remove_redundant_nops(), Ok(0)) } - pub(crate) fn block_for_instr_sequence_label( - &self, - label: InstructionSequenceLabel, - ) -> BlockIdx { - instruction_sequence_label_map_resolve_label_to_block(&self.instr_sequence_label_map, label) + /// flowgraph.c remove_redundant_jumps + fn remove_redundant_jumps(&mut self) -> crate::InternalResult { + let mut changes = 0; + let mut current = BlockIdx(0); + while current != BlockIdx::NULL { + let Some(last) = basicblock_last_instr(&self[current]).copied() else { + current = self[current].next; + continue; + }; + + debug_assert!(!last.instr.is_assembler()); + if last.instr.is_unconditional_jump() { + let jump_target = next_nonempty_block(self, last.target); + if jump_target == BlockIdx::NULL { + return Err(InternalError::MalformedControlFlowGraph); + } + let next = next_nonempty_block(self, self[current].next); + if jump_target == next { + changes += 1; + let last = basicblock_last_instr_mut(&mut self[current]).unwrap(); + set_to_nop(last); + } + } + current = self[current].next; + } + Ok(changes) } - pub(crate) fn use_instr_sequence_label_at_block( - &mut self, - from: BlockIdx, - to: BlockIdx, - ) -> crate::InternalResult<()> { - instruction_sequence_label_map_use_label_at_block( - &mut self.instr_sequence_label_map, - &mut self.instr_sequence, - from, - to, - ) + /// flowgraph.c no_redundant_jumps + #[cfg(debug_assertions)] + fn no_redundant_jumps(&self) -> bool { + let mut current = BlockIdx(0); + while current != BlockIdx::NULL { + let block = &self[current]; + if let Some(last) = basicblock_last_instr(block) + && last.instr.is_unconditional_jump() + { + let next = next_nonempty_block(self, block.next); + let jump_target = next_nonempty_block(self, last.target); + if jump_target == next { + assert!(next != BlockIdx::NULL); + if instruction_lineno(last) == instruction_lineno(&self[next].instructions[0]) { + assert_ne!( + instruction_lineno(last), + instruction_lineno(&self[next].instructions[0]), + "redundant jump has same line as fallthrough target" + ); + return false; + } + } + } + current = block.next; + } + true } - pub(crate) fn instr_sequence_label_for_block( - &mut self, - block: BlockIdx, - ) -> crate::InternalResult { - if block == BlockIdx::NULL { - Ok(InstructionSequenceLabel::NO_LABEL) - } else { - instruction_sequence_label_map_ensure_label_for_block( - &mut self.instr_sequence_label_map, - &mut self.instr_sequence, - block, - ) + fn remove_redundant_nops_and_jumps(&mut self) -> crate::InternalResult<()> { + loop { + // Convergence is guaranteed because the number of redundant jumps and + // nops only decreases. + let removed_nops = self.remove_redundant_nops()?; + let removed_jumps = self.remove_redundant_jumps()?; + if removed_nops + removed_jumps == 0 { + break; + } } + Ok(()) } - pub(crate) fn insert_start_setup_cleanup( - &mut self, - handler_block: BlockIdx, - ) -> crate::InternalResult<()> { - let handler_label = instruction_sequence_label_map_ensure_label_for_block( - &mut self.instr_sequence_label_map, - &mut self.instr_sequence, - handler_block, - )?; - instruction_sequence_insert_instruction( - &mut self.instr_sequence, - 0, - InstructionInfo { - instr: PseudoOpcode::SetupCleanup.into(), - arg: instruction_sequence_label_oparg(handler_label), - target: BlockIdx::NULL, - location: SourceLocation::default(), - end_location: SourceLocation::default(), - except_handler: None, - lineno_override: Some(NO_LOCATION_OVERRIDE), - }, - ) + fn blocks_new_block(&mut self) -> crate::InternalResult { + self.try_reserve(1) + .map_err(|_| InternalError::MalformedControlFlowGraph)?; + let block_idx = BlockIdx( + self.len() + .to_u32() + .ok_or(InternalError::MalformedControlFlowGraph)?, + ); + self.push(Block::default()); + Ok(block_idx) } +} - pub(crate) fn push_unmapped_instr_sequence_label(&mut self) -> crate::InternalResult<()> { - instruction_sequence_label_map_push_unmapped_label( - &mut self.instr_sequence_label_map, - &mut self.instr_sequence, - ) +impl From> for Blocks { + fn from(value: Vec) -> Self { + Self(value) } +} - pub(crate) fn push_unlabeled_instr_sequence_block(&mut self) -> crate::InternalResult<()> { - instruction_sequence_label_map_push_unlabeled_block(&mut self.instr_sequence_label_map) +impl From> for Blocks { + fn from(value: Box<[Block]>) -> Self { + Self(value.into()) } +} - fn take_recorded_instr_sequence(&mut self) -> crate::InternalResult { - let mut instr_sequence = - core::mem::replace(&mut self.instr_sequence, instruction_sequence_new()); - if let Some(mut annotations_instr_sequence) = self.annotations_instr_sequence.take() { - instruction_sequence_apply_label_map(&mut annotations_instr_sequence)?; - instruction_sequence_set_annotations_code( - &mut instr_sequence, - Some(Box::new(annotations_instr_sequence)), - ); - } - Ok(instr_sequence) +impl From<&[Block]> for Blocks { + fn from(value: &[Block]) -> Self { + Self(value.to_vec()) } +} - fn prepare_cfg_from_codegen(&mut self) -> crate::InternalResult { - // CPython compile.c optimize_and_assemble_code_unit passes - // u_instr_sequence directly into flowgraph.c _PyCfg_FromInstructionSequence(). - self.take_recorded_instr_sequence() +impl From<&mut [Block]> for Blocks { + fn from(value: &mut [Block]) -> Self { + Self(value.to_vec()) } } -fn optimize_code_unit( - metadata: &mut CodeUnitMetadata, - blocks: &mut Blocks, - instr_sequence: InstructionSequence, - nlocals: usize, - nparams: usize, -) -> crate::InternalResult<()> { - // Phase 1: _PyCfg_OptimizeCodeUnit (flowgraph.c) - *blocks = cfg_from_instruction_sequence(instr_sequence)?; - translate_jump_labels_to_targets(blocks)?; - mark_except_handlers(blocks)?; - label_exception_targets(blocks)?; - optimize_cfg(metadata, blocks, metadata.firstlineno)?; - blocks.remove_unused_consts(&mut metadata.consts)?; - add_checks_for_loads_of_uninitialized_variables(blocks, nlocals, nparams)?; - // CPython inserts superinstructions in _PyCfg_OptimizeCodeUnit, before - // later jump normalization / block reordering can create adjacencies - // that never exist at this stage in flowgraph.c. - blocks.insert_superinstructions()?; - push_cold_blocks_to_end(blocks)?; - // CPython resolves line numbers again after cold-block extraction. - blocks.resolve_line_numbers(metadata.firstlineno)?; - Ok(()) +impl From<[Block; N]> for Blocks { + fn from(value: [Block; N]) -> Self { + Self(value.into()) + } } -fn optimize_cfg( - metadata: &mut CodeUnitMetadata, - blocks: &mut Blocks, - firstlineno: OneIndexed, -) -> crate::InternalResult<()> { - // flowgraph.c optimize_cfg - // CPython optimize_cfg() starts with check_cfg() and raises - // SystemError if a jump or scope exit is not the last instruction in - // its block. - check_cfg(blocks)?; - inline_small_or_no_lineno_blocks(blocks)?; - // CPython does not re-run instruction-sequence label-map/CFG conversion - // after this point. Unreferenced label blocks left by jump inlining - // remain block boundaries and can preserve line-marker NOPs. - blocks.remove_unreachable()?; - // CPython optimize_cfg resolves line numbers before local checks and - // superinstruction insertion, so fusion decisions see propagated - // source locations. - blocks.resolve_line_numbers(firstlineno)?; - // CPython optimize_cfg() runs optimize_load_const() and then - // optimize_basic_block() after line numbers are resolved. - optimize_load_const(metadata, blocks)?; - let mut block_idx = BlockIdx(0); - while block_idx != BlockIdx::NULL { - let next_block = blocks[block_idx].next; - blocks.optimize_basic_block(metadata, block_idx)?; - block_idx = next_block; +impl From<&[Block; N]> for Blocks { + fn from(value: &[Block; N]) -> Self { + Self(value.to_vec()) } - blocks.remove_redundant_nops_and_pairs()?; - // CPython optimize_cfg() removes newly-unreachable blocks and - // redundant NOP/jump chains before _PyCfg_OptimizeCodeUnit() prunes - // unused constants. - blocks.remove_unreachable()?; - remove_redundant_nops_and_jumps(blocks)?; - #[cfg(debug_assertions)] - assert!(no_redundant_jumps(blocks)); - Ok(()) } -fn optimized_cfg_to_instruction_sequence( - metadata: &CodeUnitMetadata, - flags: CodeFlags, - blocks: &mut Blocks, -) -> crate::InternalResult<(u32, usize, InstructionSequence)> { - // Phase 2: _PyCfg_OptimizedCfgToInstructionSequence (flowgraph.c) - convert_pseudo_conditional_jumps(blocks)?; - let max_stackdepth = blocks.calculate_stackdepth()?; - debug_assert!(!is_generator(flags) || max_stackdepth != 0); - let nlocalsplus = prepare_localsplus(metadata, blocks, flags)?; - // Match CPython order: pseudo ops are lowered after stackdepth and - // localsplus preparation, before normalize_jumps. - convert_pseudo_ops(blocks)?; - blocks.normalize_jumps()?; - #[cfg(debug_assertions)] - assert!(no_redundant_jumps(blocks)); - // optimize_load_fast: after normalize_jumps - blocks.optimize_load_fast()?; +impl Deref for Blocks { + type Target = [Block]; - let mut instr_sequence = instruction_sequence_new(); - blocks.cfg_to_instruction_sequence(&mut instr_sequence)?; - Ok((max_stackdepth, nlocalsplus, instr_sequence)) + fn deref(&self) -> &Self::Target { + &self.0 + } } -impl CodeInfo { - pub fn finalize_code( - mut self, - opts: &crate::compile::CompileOpts, - ) -> crate::InternalResult { - let instr_sequence = self.prepare_cfg_from_codegen()?; - let nlocals = self.metadata.varnames.len(); - let nparams = self.nparams; - optimize_code_unit( - &mut self.metadata, - &mut self.blocks, - instr_sequence, - nlocals, - nparams, - )?; - let (max_stackdepth, nlocalsplus, mut instr_sequence) = - optimized_cfg_to_instruction_sequence(&self.metadata, self.flags, &mut self.blocks)?; - let localsplusinfo = compute_localsplus_info(&self.metadata, nlocalsplus, self.flags)?; +impl DerefMut for Blocks { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} - let Self { - flags, - source_path, - private: _, // private is only used during compilation +impl Index for Blocks { + type Output = Block; - blocks: _, - current_block: _, - instr_sequence: _, - instr_sequence_label_map: _, - annotations_instr_sequence: _, - metadata, - static_attributes: _, - in_inlined_comp: _, - fblock: _, - symbol_table_index: _, - nparams: _, - in_conditional_block: _, - next_conditional_annotation_index: _, - } = self; + fn index(&self, idx: usize) -> &Self::Output { + &self.0[idx] + } +} - let CodeUnitMetadata { - name: obj_name, - qualname, - consts: constants, - names: name_cache, - varnames: varname_cache, - cellvars: _, - freevars: freevar_cache, - fast_hidden: _, - fast_hidden_final: _, - argcount: arg_count, - posonlyargcount: posonlyarg_count, - kwonlyargcount: kwonlyarg_count, - firstlineno: first_line_number, - } = metadata; +impl IndexMut for Blocks { + fn index_mut(&mut self, idx: usize) -> &mut Self::Output { + &mut self.0[idx] + } +} - resolve_unconditional_jumps(&mut instr_sequence)?; - resolve_jump_offsets(&mut instr_sequence)?; - let assembled = assemble_emit( - &mut instr_sequence, - first_line_number.get() as i32, - opts.debug_ranges, - )?; - let locations = rustpython_compiler_core::marshal::linetable_to_locations( - &assembled.linetable, - first_line_number.get() as i32, - assembled.instructions.len(), - ); +impl Index for Blocks { + type Output = Block; - Ok(CodeObject { - flags, - posonlyarg_count, - arg_count, - kwonlyarg_count, - source_path, - first_line_number: Some(first_line_number), - obj_name: obj_name.clone(), - qualname: qualname.unwrap_or(obj_name), + fn index(&self, block_idx: BlockIdx) -> &Self::Output { + &self.0[block_idx.as_usize()] + } +} - max_stackdepth, - instructions: CodeUnits::from(assembled.instructions), - locations, - constants: constants.into_iter().collect(), - names: name_cache.into_iter().collect(), - varnames: varname_cache.into_iter().collect(), - cellvars: localsplusinfo.cellvars, - freevars: freevar_cache.into_iter().collect(), - localspluskinds: localsplusinfo.kinds, - linetable: assembled.linetable, - exceptiontable: assembled.exceptiontable, - }) +impl IndexMut for Blocks { + fn index_mut(&mut self, block_idx: BlockIdx) -> &mut Self::Output { + &mut self.0[block_idx.as_usize()] } } -/// flowgraph.c IS_GENERATOR -fn is_generator(flags: CodeFlags) -> bool { - flags.intersects(CodeFlags::GENERATOR | CodeFlags::COROUTINE | CodeFlags::ASYNC_GENERATOR) +pub(crate) const START_DEPTH_UNSET: i32 = i32::MIN; +const CO_MAXBLOCKS: usize = 20; + +/// flowgraph.c struct _PyCfgExceptStack +#[derive(Clone, Debug)] +struct CfgExceptStack { + handlers: [BlockIdx; CO_MAXBLOCKS + 2], + depth: usize, } -/// flowgraph.c insert_prefix_instructions -fn insert_prefix_instructions( - metadata: &CodeUnitMetadata, - blocks: &mut Blocks, - cellfixedoffsets: &[i32], - nfreevars: usize, - flags: CodeFlags, -) -> crate::InternalResult<()> { - debug_assert!(!blocks.is_empty()); - let entry = &mut blocks[0]; - let ncellvars = metadata.cellvars.len(); - let firstlineno = metadata.firstlineno; - debug_assert!(firstlineno.get() > 0); +/// flowgraph.c `basicblock **stack` +#[derive(Clone, Debug)] +struct CfgTraversalStack { + stack: Vec, + sp: usize, +} - if is_generator(flags) { - let location = SourceLocation { - line: firstlineno, - character_offset: OneIndexed::MIN, - }; - basicblock_insert_instruction( - entry, - 0, - InstructionInfo { - instr: Instruction::ReturnGenerator.into(), - arg: OpArg::new(0), - target: BlockIdx::NULL, - location, - end_location: location, - except_handler: None, - lineno_override: Some(LINE_ONLY_LOCATION_OVERRIDE), - }, - )?; - basicblock_insert_instruction( - entry, - 1, - InstructionInfo { - instr: Instruction::PopTop.into(), - arg: OpArg::new(0), - target: BlockIdx::NULL, - location, - end_location: location, - except_handler: None, - lineno_override: Some(LINE_ONLY_LOCATION_OVERRIDE), - }, - )?; +impl CfgTraversalStack { + fn push(&mut self, block: BlockIdx) { + debug_assert!(self.sp < self.stack.len()); + self.stack[self.sp] = block; + self.sp += 1; } - if ncellvars > 0 { - let nvars = metadata.varnames.len() + ncellvars; - let mut sorted = Vec::new(); - vec_try_reserve_exact(&mut sorted, nvars)?; - sorted.resize(nvars, 0i32); - for i in 0..ncellvars { - sorted[cellfixedoffsets[i] as usize] = i as i32 + 1; - } - let mut ncellsused = 0; - let mut i = 0; - while ncellsused < ncellvars { - let oldindex = sorted[i] - 1; - i += 1; - if oldindex == -1 { - continue; - } - basicblock_insert_instruction( - entry, - ncellsused, - InstructionInfo { - instr: Opcode::MakeCell.into(), - arg: OpArg::new(oldindex as u32), - target: BlockIdx::NULL, - location: SourceLocation::default(), - end_location: SourceLocation::default(), - except_handler: None, - lineno_override: Some(NO_LOCATION_OVERRIDE), - }, - )?; - ncellsused += 1; + fn pop(&mut self) -> Option { + if self.sp == 0 { + return None; } + self.sp -= 1; + Some(self.stack[self.sp]) } - if nfreevars > 0 { - basicblock_insert_instruction( - entry, - 0, - InstructionInfo { - instr: Opcode::CopyFreeVars.into(), - arg: OpArg::new(nfreevars as u32), - target: BlockIdx::NULL, - location: SourceLocation::default(), - end_location: SourceLocation::default(), - except_handler: None, - lineno_override: Some(NO_LOCATION_OVERRIDE), - }, - )?; + fn capacity(&self) -> usize { + self.stack.len() } - Ok(()) } -/// flowgraph.c prepare_localsplus -fn prepare_localsplus( - metadata: &CodeUnitMetadata, - blocks: &mut Blocks, - flags: CodeFlags, -) -> crate::InternalResult { - let nlocals = metadata.varnames.len(); - let ncellvars = metadata.cellvars.len(); - let nfreevars = metadata.freevars.len(); - let int_max = i32::MAX as usize; - debug_assert!(nlocals < int_max); - debug_assert!(ncellvars < int_max); - debug_assert!(nfreevars < int_max); - debug_assert!(int_max - nlocals - ncellvars > 0); - debug_assert!(int_max - nlocals - ncellvars - nfreevars > 0); - let mut nlocalsplus = nlocals + ncellvars + nfreevars; - let mut cellfixedoffsets = build_cellfixedoffsets(metadata)?; - - // This must be called before fix_cell_offsets(). - insert_prefix_instructions(metadata, blocks, &cellfixedoffsets, nfreevars, flags)?; - - let numdropped = fix_cell_offsets(metadata, blocks, &mut cellfixedoffsets); - nlocalsplus -= numdropped; - Ok(nlocalsplus) +#[derive(Clone, Debug)] +pub(crate) struct InstructionSequenceLabelMap { + block_labels: Vec, + /// Codegen-side shadow of CPython's instruction-sequence label map. + /// + /// `_PyInstructionSequence_UseLabel()` can map multiple labels to the same + /// instruction offset before `_PyCfg_FromInstructionSequence()` materializes + /// CFG blocks. The codegen CFG path keeps the same aliasing by resolving + /// those labels to the block that owns the shared offset. + cpython_block_by_label: Vec, } -/// flowgraph.c eval_const_unaryop -fn eval_const_unaryop( - operand: &ConstantData, - op: Instruction, - intrinsic: Option, -) -> Option { - match (operand, op, intrinsic) { - (ConstantData::Integer { value }, Instruction::UnaryNegative, None) => { - Some(ConstantData::Integer { value: -value }) - } - (ConstantData::Float { value }, Instruction::UnaryNegative, None) => { - Some(ConstantData::Float { value: -value }) - } - (ConstantData::Complex { value }, Instruction::UnaryNegative, None) => { - Some(ConstantData::Complex { value: -value }) - } - (ConstantData::Boolean { value }, Instruction::UnaryNegative, None) => { - Some(ConstantData::Integer { - value: BigInt::from(-i32::from(*value)), - }) +fn instruction_sequence_label_map_register_label( + map: &mut InstructionSequenceLabelMap, + label: InstructionSequenceLabel, +) -> crate::InternalResult<()> { + debug_assert!(is_label(label)); + let old_size = map.cpython_block_by_label.len(); + let new_allocation = c_array_ensure_capacity::( + old_size, + label.idx(), + INITIAL_INSTR_SEQUENCE_LABELS_MAP_SIZE, + )?; + if new_allocation > old_size { + if new_allocation > map.cpython_block_by_label.capacity() { + map.cpython_block_by_label + .try_reserve_exact(new_allocation - map.cpython_block_by_label.capacity()) + .map_err(|_| InternalError::MalformedControlFlowGraph)?; } - (ConstantData::Integer { value }, Instruction::UnaryInvert, None) => { - Some(ConstantData::Integer { value: !value }) + map.cpython_block_by_label + .resize(new_allocation, BlockIdx::NULL); + for i in old_size..map.cpython_block_by_label.len() { + map.cpython_block_by_label[i] = BlockIdx::NULL; } - (ConstantData::Boolean { .. }, Instruction::UnaryInvert, None) => None, - (_, Instruction::UnaryNot, None) => Some(ConstantData::Boolean { - value: !operand.truthiness(), - }), - ( - ConstantData::Integer { value }, - Instruction::CallIntrinsic1 { .. }, - Some(oparg::IntrinsicFunction1::UnaryPositive), - ) => Some(ConstantData::Integer { - value: value.clone(), - }), - ( - ConstantData::Float { value }, - Instruction::CallIntrinsic1 { .. }, - Some(oparg::IntrinsicFunction1::UnaryPositive), - ) => Some(ConstantData::Float { value: *value }), - ( - ConstantData::Boolean { value }, - Instruction::CallIntrinsic1 { .. }, - Some(oparg::IntrinsicFunction1::UnaryPositive), - ) => Some(ConstantData::Integer { - value: BigInt::from(i32::from(*value)), - }), - ( - ConstantData::Complex { value }, - Instruction::CallIntrinsic1 { .. }, - Some(oparg::IntrinsicFunction1::UnaryPositive), - ) => Some(ConstantData::Complex { value: *value }), - _ => None, } + debug_assert!(map.cpython_block_by_label.len() > label.idx()); + Ok(()) } -fn load_const_truthiness( - instr: Instruction, - arg: OpArg, - metadata: &CodeUnitMetadata, -) -> Option { - match instr { - Instruction::LoadConst { consti } => { - let constant = &metadata.consts[consti.get(arg).as_usize()]; - Some(constant.truthiness()) - } - Instruction::LoadSmallInt { i } => Some(i.get(arg) != 0), - _ => None, +fn instruction_sequence_label_map_ensure_label_for_block( + map: &mut InstructionSequenceLabelMap, + seq: &mut InstructionSequence, + block: BlockIdx, +) -> crate::InternalResult { + debug_assert_ne!(block, BlockIdx::NULL); + let block_label = map.block_labels[block.idx()]; + if is_label(block_label) { + return Ok(block_label); } + let label = instruction_sequence_new_label(seq); + debug_assert_eq!(label.0, seq.next_free_label); + instruction_sequence_label_map_register_label(map, label)?; + map.cpython_block_by_label[label.idx()] = block; + map.block_labels[block.idx()] = label; + Ok(label) } -/// flowgraph.c add_const -fn add_const( - metadata: &mut CodeUnitMetadata, - constant: ConstantData, -) -> crate::InternalResult { - Ok(metadata.consts.try_insert_full(constant)?.0) +fn instruction_sequence_label_map_label_for_block( + map: &InstructionSequenceLabelMap, + block: BlockIdx, +) -> InstructionSequenceLabel { + debug_assert_ne!(block, BlockIdx::NULL); + map.block_labels + .get(block.idx()) + .copied() + .unwrap_or(InstructionSequenceLabel::NO_LABEL) } -fn instr_make_load_const( - metadata: &mut CodeUnitMetadata, - instr: &mut InstructionInfo, - constant: ConstantData, -) -> crate::InternalResult<()> { - if maybe_instr_make_load_smallint(instr, &constant) { - return Ok(()); +fn instruction_sequence_label_map_block_for_label( + map: &InstructionSequenceLabelMap, + label: InstructionSequenceLabel, +) -> Option { + if !is_label(label) { + return None; } - - let const_idx = add_const(metadata, constant)?; - instr_set_op1( - instr, - Opcode::LoadConst.into(), - OpArg::new(const_idx as u32), - ); - Ok(()) + map.cpython_block_by_label + .get(label.idx()) + .copied() + .filter(|&block| block != BlockIdx::NULL) } -/// flowgraph.c fold_const_unaryop -fn fold_const_unaryop( - metadata: &mut CodeUnitMetadata, - block: &mut Block, - i: usize, -) -> crate::InternalResult { - let instr = &block.instructions[i]; - let (op, intrinsic) = match instr.instr.real() { - Some(Instruction::UnaryNegative) => (Instruction::UnaryNegative, None), - Some(Instruction::UnaryInvert) => (Instruction::UnaryInvert, None), - Some(Instruction::UnaryNot) => (Instruction::UnaryNot, None), - Some(Instruction::CallIntrinsic1 { func }) - if matches!( - func.get(instr.arg), - oparg::IntrinsicFunction1::UnaryPositive - ) => - { - (Opcode::CallIntrinsic1.into(), Some(func.get(instr.arg))) - } - _ => return Ok(false), - }; - let Some(operand_index) = (if let Some(start) = i.checked_sub(1) { - get_const_loading_instrs(block, start, 1)? - } else { - None +fn instruction_sequence_label_map_resolve_label( + map: &InstructionSequenceLabelMap, + block: BlockIdx, +) -> BlockIdx { + if block == BlockIdx::NULL { + return BlockIdx::NULL; + } + let label = instruction_sequence_label_map_label_for_block(map, block); + if !is_label(label) { + return block; + } + instruction_sequence_label_map_block_for_label(map, label).unwrap_or_else(|| { + debug_assert!( + false, + "CPython instruction-sequence label must map to a codegen CFG block" + ); + BlockIdx::NULL }) - .and_then(|indices| indices.into_iter().next()) else { - return Ok(false); - }; - let operand = get_const_value(metadata, &block.instructions[operand_index]); - let Some(operand) = operand else { - return Ok(false); - }; - let Some(folded_const) = eval_const_unaryop(&operand, op, intrinsic) else { - return Ok(false); - }; - nop_out(block, &[operand_index]); - instr_make_load_const(metadata, &mut block.instructions[i], folded_const)?; - Ok(true) } -/// flowgraph.c get_const_loading_instrs -fn get_const_loading_instrs( - block: &Block, - mut start: usize, - size: usize, -) -> crate::InternalResult>> { - let mut indices = Vec::new(); - indices - .try_reserve_exact(size) - .map_err(|_| InternalError::MalformedControlFlowGraph)?; - loop { - if start >= block.instruction_used { - return Ok(None); - } - let instr = &block.instructions[start]; - if !matches!(instr.instr.real(), Some(Instruction::Nop)) { - if !loads_const(instr) { - return Ok(None); - } - indices.push(start); - if indices.len() == size { - break; - } - } - let Some(prev) = start.checked_sub(1) else { - return Ok(None); - }; - start = prev; +fn instruction_sequence_label_map_resolve_label_to_block( + map: &InstructionSequenceLabelMap, + label: InstructionSequenceLabel, +) -> BlockIdx { + if !is_label(label) { + return BlockIdx::NULL; } - indices.reverse(); - Ok(Some(indices)) + instruction_sequence_label_map_block_for_label(map, label).unwrap_or_else(|| { + debug_assert!( + false, + "CPython instruction-sequence label must map to a codegen CFG block" + ); + BlockIdx::NULL + }) } -/// flowgraph.c nop_out -fn nop_out(block: &mut Block, instrs: &[usize]) { - for &i in instrs { - nop_out_no_location(&mut block.instructions[i]); - } +fn instruction_sequence_label_oparg(label: InstructionSequenceLabel) -> OpArg { + debug_assert!(is_label(label)); + OpArg::new(label.idx() as u32) } -/// flowgraph.c fold_const_binop -fn fold_const_binop( - metadata: &mut CodeUnitMetadata, - block: &mut Block, - i: usize, -) -> crate::InternalResult { - use oparg::BinaryOperator as BinOp; - - let Some(Opcode::BinaryOp) = block.instructions[i].instr.real_opcode() else { - return Ok(false); - }; - - let Some(operand_indices) = (if let Some(start) = i.checked_sub(1) { - get_const_loading_instrs(block, start, 2)? - } else { - None - }) else { - return Ok(false); - }; - - let op_raw = u32::from(block.instructions[i].arg); - let Ok(op) = BinOp::try_from(op_raw) else { - return Ok(false); - }; - - let left = get_const_value(metadata, &block.instructions[operand_indices[0]]); - let right = get_const_value(metadata, &block.instructions[operand_indices[1]]); - let (Some(left_val), Some(right_val)) = (left, right) else { - return Ok(false); - }; - - let Some(result_const) = eval_const_binop(&left_val, &right_val, op) else { - return Ok(false); - }; +fn instruction_sequence_label_map_use_label_at_block( + map: &mut InstructionSequenceLabelMap, + seq: &mut InstructionSequence, + from: BlockIdx, + to: BlockIdx, +) -> crate::InternalResult<()> { + if from == BlockIdx::NULL || from == to { + return Ok(()); + } + let from_label = instruction_sequence_label_map_ensure_label_for_block(map, seq, from)?; + debug_assert!(map.cpython_block_by_label.len() > from_label.idx()); + let to_block = instruction_sequence_label_map_resolve_label(map, to); + if to_block == BlockIdx::NULL { + debug_assert!( + false, + "CPython label target must map to a codegen CFG block" + ); + return Ok(()); + } + map.cpython_block_by_label[from_label.idx()] = to_block; + Ok(()) +} - nop_out(block, &operand_indices); - instr_make_load_const(metadata, &mut block.instructions[i], result_const)?; - Ok(true) +fn instruction_sequence_label_map_push_unlabeled_block( + map: &mut InstructionSequenceLabelMap, +) -> crate::InternalResult<()> { + map.block_labels + .try_reserve(1) + .map_err(|_| InternalError::MalformedControlFlowGraph)?; + map.block_labels.push(InstructionSequenceLabel::NO_LABEL); + Ok(()) } -/// flowgraph.c loads_const -fn loads_const(info: &InstructionInfo) -> bool { - info.instr.has_const() || matches!(info.instr.real_opcode(), Some(Opcode::LoadSmallInt)) +fn instruction_sequence_label_map_push_unmapped_label( + map: &mut InstructionSequenceLabelMap, + seq: &mut InstructionSequence, +) -> crate::InternalResult<()> { + let label = instruction_sequence_new_label(seq); + debug_assert_eq!(label.0, seq.next_free_label); + instruction_sequence_label_map_register_label(map, label)?; + let block = BlockIdx( + map.block_labels + .len() + .to_u32() + .ok_or(InternalError::MalformedControlFlowGraph)?, + ); + map.cpython_block_by_label[label.idx()] = block; + map.block_labels + .try_reserve(1) + .map_err(|_| InternalError::MalformedControlFlowGraph)?; + map.block_labels.push(label); + Ok(()) } -/// flowgraph.c get_const_value -fn get_const_value(metadata: &CodeUnitMetadata, info: &InstructionInfo) -> Option { - match info.instr.real_opcode() { - Some(Opcode::LoadSmallInt) => { - let v = u32::from(info.arg) as i32; - Some(ConstantData::Integer { - value: BigInt::from(v), - }) - } - _ if info.instr.has_const() => { - let idx = u32::from(info.arg) as usize; - metadata.consts.get_index(idx).cloned() +impl InstructionSequenceLabelMap { + pub(crate) fn new() -> Self { + Self { + block_labels: vec![InstructionSequenceLabel::NO_LABEL], + cpython_block_by_label: Vec::new(), } - _ => None, } } -/// flowgraph.c const_folding_check_complexity -fn const_folding_check_complexity(obj: &ConstantData, mut limit: isize) -> Option { - if let ConstantData::Tuple { elements } = obj { - limit -= isize::try_from(elements.len()).ok()?; - if limit < 0 { - return None; +pub struct CodeInfo { + pub flags: CodeFlags, + pub source_path: String, + pub private: Option, // For private name mangling, mostly for class + + pub blocks: Blocks, + pub current_block: BlockIdx, + pub(crate) instr_sequence: InstructionSequence, + pub(crate) instr_sequence_label_map: InstructionSequenceLabelMap, + pub(crate) annotations_instr_sequence: Option, + + pub metadata: CodeUnitMetadata, + + // For class scopes: attributes accessed via self.X + pub static_attributes: Option>, + + // True if compiling an inlined comprehension + pub in_inlined_comp: bool, + + // Block stack for tracking nested control structures + pub fblock: Vec, + + // Reference to the symbol table for this scope + pub symbol_table_index: usize, + // CPython compile.c uses PyList_GET_SIZE(u->u_ste->ste_varnames) + // when calling flowgraph.c _PyCfg_OptimizeCodeUnit(). + pub nparams: usize, + + // PEP 649: Track nesting depth inside conditional blocks (if/for/while/etc.) + // u_in_conditional_block + pub in_conditional_block: u32, + + // PEP 649: Next index for conditional annotation tracking + // u_next_conditional_annotation_index + pub next_conditional_annotation_index: u32, +} + +impl CodeInfo { + pub(crate) fn addop_to_instr_sequence( + &mut self, + mut info: InstructionInfo, + ) -> crate::InternalResult<()> { + if info.instr.has_target() && info.target != BlockIdx::NULL { + let label = instruction_sequence_label_map_ensure_label_for_block( + &mut self.instr_sequence_label_map, + &mut self.instr_sequence, + info.target, + )?; + info.arg = instruction_sequence_label_oparg(label); + info.target = BlockIdx::NULL; } - for element in elements { - limit = const_folding_check_complexity(element, limit)?; + instruction_sequence_addop(&mut self.instr_sequence, info)?; + Ok(()) + } + + pub(crate) fn addop_to_instr_sequence_with_target_label( + &mut self, + mut info: InstructionInfo, + target_label: InstructionSequenceLabel, + ) -> crate::InternalResult<()> { + if !info.instr.has_target() { + return Err(InternalError::MalformedControlFlowGraph); } + info.arg = instruction_sequence_label_oparg(target_label); + info.target = BlockIdx::NULL; + instruction_sequence_addop(&mut self.instr_sequence, info)?; + Ok(()) } - Some(limit) -} -fn repeat_wtf8(value: &Wtf8Buf, n: usize) -> Option { - let mut result = Wtf8Buf::new(); - result.try_reserve_exact(value.len().checked_mul(n)?).ok()?; - for _ in 0..n { - result.push_wtf8(value); + pub(crate) fn addop_to_current_block( + &mut self, + info: InstructionInfo, + ) -> crate::InternalResult<()> { + basicblock_addop(&mut self.blocks[self.current_block.idx()], info) } - Some(result) -} -fn checked_repeat_count(n: &BigInt, item_size: usize) -> Option { - let n = n.to_isize()?; - if item_size != 0 && (n < 0 || n as usize > MAX_STR_SIZE / item_size) { - return None; + pub(crate) fn last_current_block_instr_mut(&mut self) -> Option<&mut InstructionInfo> { + basicblock_last_instr_mut(&mut self.blocks[self.current_block.idx()]) } - Some(n.max(0) as usize) -} -/// flowgraph.c const_folding_safe_multiply -fn const_folding_safe_multiply(left: &ConstantData, right: &ConstantData) -> Option { - match (left, right) { - (ConstantData::Integer { value: l }, ConstantData::Integer { value: r }) => { - if !l.is_zero() && !r.is_zero() && l.bits() + r.bits() > MAX_INT_SIZE { - return None; - } - Some(ConstantData::Integer { value: l * r }) - } - (ConstantData::Float { value: l }, ConstantData::Float { value: r }) => { - Some(ConstantData::Float { value: l * r }) - } - (ConstantData::Str { value: s }, ConstantData::Integer { value: n }) => { - let n = checked_repeat_count(n, s.code_points().count())?; - Some(ConstantData::Str { - value: repeat_wtf8(s, n)?, - }) - } - (ConstantData::Integer { .. }, ConstantData::Str { .. }) => { - const_folding_safe_multiply(right, left) - } - (ConstantData::Bytes { value: b }, ConstantData::Integer { value: n }) => { - let n = checked_repeat_count(n, b.len())?; - let mut value = Vec::new(); - value.try_reserve_exact(b.len().checked_mul(n)?).ok()?; - for _ in 0..n { - value.extend_from_slice(b); - } - Some(ConstantData::Bytes { value }) - } - (ConstantData::Integer { .. }, ConstantData::Bytes { .. }) => { - const_folding_safe_multiply(right, left) - } - (ConstantData::Tuple { elements }, ConstantData::Integer { value: n }) => { - let n = n.to_usize()?; - if n != 0 && !elements.is_empty() { - if n > MAX_COLLECTION_SIZE / elements.len() { - return None; - } - const_folding_check_complexity( - &ConstantData::Tuple { - elements: elements.clone(), - }, - MAX_TOTAL_ITEMS / isize::try_from(n).ok()?, - )?; - } - let mut result = Vec::new(); - result - .try_reserve_exact(elements.len().checked_mul(n)?) - .ok()?; - for _ in 0..n { - result.extend(elements.iter().cloned()); - } - Some(ConstantData::Tuple { elements: result }) - } - (ConstantData::Integer { .. }, ConstantData::Tuple { .. }) => { - const_folding_safe_multiply(right, left) + pub(crate) fn set_last_instr_sequence_lineno_override(&mut self, lineno_override: i32) { + if let Some(last) = instruction_sequence_last_info_mut(&mut self.instr_sequence) { + last.lineno_override = Some(lineno_override); } - _ => None, } -} -/// flowgraph.c const_folding_safe_power -fn const_folding_safe_power(left: &ConstantData, right: &ConstantData) -> Option { - match (left, right) { - (ConstantData::Integer { value: l }, ConstantData::Integer { value: r }) => { - if r < &BigInt::from(0) { - if l.is_zero() { - return None; - } - let base = l.to_f64()?; - if !base.is_finite() { - return None; - } - let result = if let Some(exp) = r.to_i32() { - base.powi(exp) - } else { - base.powf(r.to_f64()?) - }; - if !result.is_finite() { - return None; - } - return Some(ConstantData::Float { value: result }); - } - let exp: u64 = r.try_into().ok()?; - let exp_usize = usize::try_from(exp).ok()?; - if !l.is_zero() && exp > 0 && l.bits() > MAX_INT_SIZE / exp { - return None; - } - Some(ConstantData::Integer { - value: num_traits::pow::pow(l.clone(), exp_usize), - }) - } - (ConstantData::Float { value: l }, ConstantData::Float { value: r }) => { - let result = l.powf(*r); - result - .is_finite() - .then_some(ConstantData::Float { value: result }) - } - _ => None, + pub(crate) fn use_instr_sequence_label( + &mut self, + block: BlockIdx, + ) -> crate::InternalResult<()> { + let label = instruction_sequence_label_map_ensure_label_for_block( + &mut self.instr_sequence_label_map, + &mut self.instr_sequence, + block, + )?; + instruction_sequence_use_label(&mut self.instr_sequence, label) } -} -/// flowgraph.c const_folding_safe_lshift -fn const_folding_safe_lshift(left: &ConstantData, right: &ConstantData) -> Option { - let (ConstantData::Integer { value: l }, ConstantData::Integer { value: r }) = (left, right) - else { - return None; - }; - let shift: u64 = r.try_into().ok()?; - let shift_usize = usize::try_from(shift).ok()?; - if shift > MAX_INT_SIZE || (!l.is_zero() && l.bits() > MAX_INT_SIZE - shift) { - return None; + pub(crate) fn new_instr_sequence_label(&mut self) -> InstructionSequenceLabel { + instruction_sequence_new_label(&mut self.instr_sequence) } - Some(ConstantData::Integer { - value: l << shift_usize, - }) -} -/// flowgraph.c const_folding_safe_mod -fn const_folding_safe_mod(left: &ConstantData, right: &ConstantData) -> Option { - if matches!(left, ConstantData::Str { .. } | ConstantData::Bytes { .. }) { - return None; + pub(crate) fn use_raw_instr_sequence_label( + &mut self, + label: InstructionSequenceLabel, + ) -> crate::InternalResult<()> { + instruction_sequence_use_label(&mut self.instr_sequence, label) } - match (left, right) { - (ConstantData::Integer { value: l }, ConstantData::Integer { value: r }) => { - if r.is_zero() { - return None; - } - let rem = l.clone() % r.clone(); - let value = if !rem.is_zero() && (rem < BigInt::from(0)) != (*r < BigInt::from(0)) { - rem + r - } else { - rem - }; - Some(ConstantData::Integer { value }) - } - (ConstantData::Float { value: l }, ConstantData::Float { value: r }) => { - let (_, modulo) = float_div_mod(*l, *r)?; - Some(ConstantData::Float { value: modulo }) - } - _ => None, + pub(crate) fn mark_cpython_cfg_label(&mut self, block: BlockIdx) -> crate::InternalResult<()> { + let label = instruction_sequence_label_map_ensure_label_for_block( + &mut self.instr_sequence_label_map, + &mut self.instr_sequence, + block, + )?; + self.blocks[block.idx()].cpython_label = label; + Ok(()) } -} -fn float_div_mod(left: f64, right: f64) -> Option<(f64, f64)> { - if right == 0.0 { - return None; + pub(crate) fn resolve_instr_sequence_label(&self, block: BlockIdx) -> BlockIdx { + instruction_sequence_label_map_resolve_label(&self.instr_sequence_label_map, block) } - let mut modulo = left % right; - let div = (left - modulo) / right; - let floordiv = if modulo != 0.0 { - let div = if (right < 0.0) != (modulo < 0.0) { - modulo += right; - div - 1.0 + pub(crate) fn block_for_instr_sequence_label( + &self, + label: InstructionSequenceLabel, + ) -> BlockIdx { + instruction_sequence_label_map_resolve_label_to_block(&self.instr_sequence_label_map, label) + } + + pub(crate) fn use_instr_sequence_label_at_block( + &mut self, + from: BlockIdx, + to: BlockIdx, + ) -> crate::InternalResult<()> { + instruction_sequence_label_map_use_label_at_block( + &mut self.instr_sequence_label_map, + &mut self.instr_sequence, + from, + to, + ) + } + + pub(crate) fn instr_sequence_label_for_block( + &mut self, + block: BlockIdx, + ) -> crate::InternalResult { + if block == BlockIdx::NULL { + Ok(InstructionSequenceLabel::NO_LABEL) } else { - div - }; - let mut floordiv = div.floor(); - if div - floordiv > 0.5 { - floordiv += 1.0; + instruction_sequence_label_map_ensure_label_for_block( + &mut self.instr_sequence_label_map, + &mut self.instr_sequence, + block, + ) } - floordiv - } else { - modulo = 0.0f64.copysign(right); - 0.0f64.copysign(left / right) - }; + } - Some((floordiv, modulo)) -} + pub(crate) fn insert_start_setup_cleanup( + &mut self, + handler_block: BlockIdx, + ) -> crate::InternalResult<()> { + let handler_label = instruction_sequence_label_map_ensure_label_for_block( + &mut self.instr_sequence_label_map, + &mut self.instr_sequence, + handler_block, + )?; + instruction_sequence_insert_instruction( + &mut self.instr_sequence, + 0, + InstructionInfo { + instr: PseudoOpcode::SetupCleanup.into(), + arg: instruction_sequence_label_oparg(handler_label), + target: BlockIdx::NULL, + location: SourceLocation::default(), + end_location: SourceLocation::default(), + except_handler: None, + lineno_override: Some(NO_LOCATION_OVERRIDE), + }, + ) + } -/// flowgraph.c eval_const_binop complex result construction -fn eval_const_complex_const(value: Complex) -> Option { - (value.re.is_finite() && value.im.is_finite()).then_some(ConstantData::Complex { value }) -} + pub(crate) fn push_unmapped_instr_sequence_label(&mut self) -> crate::InternalResult<()> { + instruction_sequence_label_map_push_unmapped_label( + &mut self.instr_sequence_label_map, + &mut self.instr_sequence, + ) + } -/// flowgraph.c eval_const_binop complex operations -fn eval_const_complex_binop( - left: Complex, - right: Complex, - op: oparg::BinaryOperator, -) -> Option { - use oparg::BinaryOperator as BinOp; + pub(crate) fn push_unlabeled_instr_sequence_block(&mut self) -> crate::InternalResult<()> { + instruction_sequence_label_map_push_unlabeled_block(&mut self.instr_sequence_label_map) + } - let value = match op { - BinOp::Add => left + right, - BinOp::Subtract => { - let re = left.re - right.re; - // Preserve CPython's signed-zero behavior for real-zero - // minus zero-complex expressions such as `0 - 0j`. - let im = if left.re == 0.0 - && left.im == 0.0 - && right.re == 0.0 - && right.im == 0.0 - && !right.im.is_sign_negative() - { - -0.0 - } else { - left.im - right.im - }; - Complex::new(re, im) - } - BinOp::Multiply => left * right, - BinOp::TrueDivide => { - if right == Complex::new(0.0, 0.0) { - return None; - } - left / right + fn take_recorded_instr_sequence(&mut self) -> crate::InternalResult { + let mut instr_sequence = + core::mem::replace(&mut self.instr_sequence, instruction_sequence_new()); + if let Some(mut annotations_instr_sequence) = self.annotations_instr_sequence.take() { + instruction_sequence_apply_label_map(&mut annotations_instr_sequence)?; + instruction_sequence_set_annotations_code( + &mut instr_sequence, + Some(Box::new(annotations_instr_sequence)), + ); } - BinOp::Power => { - if left == Complex::new(0.0, 0.0) { - if right.im != 0.0 || right.re < 0.0 { - return None; - } + Ok(instr_sequence) + } - return eval_const_complex_const(if right.re == 0.0 { - Complex::new(1.0, 0.0) - } else { - Complex::new(0.0, 0.0) - }); - } + fn prepare_cfg_from_codegen(&mut self) -> crate::InternalResult { + // CPython compile.c optimize_and_assemble_code_unit passes + // u_instr_sequence directly into flowgraph.c _PyCfg_FromInstructionSequence(). + self.take_recorded_instr_sequence() + } +} - if right.im == 0.0 - && right.re.fract() == 0.0 - && right.re >= f64::from(i32::MIN) - && right.re <= f64::from(i32::MAX) - { - left.powi(right.re as i32) - } else { - left.powc(right) - } - } - _ => return None, - }; - eval_const_complex_const(value) +fn optimize_code_unit( + metadata: &mut CodeUnitMetadata, + blocks: &mut Blocks, + instr_sequence: InstructionSequence, + nlocals: usize, + nparams: usize, +) -> crate::InternalResult<()> { + // Phase 1: _PyCfg_OptimizeCodeUnit (flowgraph.c) + *blocks = cfg_from_instruction_sequence(instr_sequence)?; + translate_jump_labels_to_targets(blocks)?; + blocks.mark_except_handlers()?; + label_exception_targets(blocks)?; + optimize_cfg(metadata, blocks, metadata.firstlineno)?; + blocks.remove_unused_consts(&mut metadata.consts)?; + add_checks_for_loads_of_uninitialized_variables(blocks, nlocals, nparams)?; + // CPython inserts superinstructions in _PyCfg_OptimizeCodeUnit, before + // later jump normalization / block reordering can create adjacencies + // that never exist at this stage in flowgraph.c. + blocks.insert_superinstructions()?; + blocks.push_cold_blocks_to_end()?; + // CPython resolves line numbers again after cold-block extraction. + blocks.resolve_line_numbers(metadata.firstlineno)?; + Ok(()) } -/// flowgraph.c eval_const_binop subscript index conversion -fn constant_as_index(value: &ConstantData) -> Option { - match value { - ConstantData::Integer { value } => value.to_i64().or_else(|| { - if value < &BigInt::from(0) { - Some(i64::MIN) - } else { - Some(i64::MAX) - } - }), - ConstantData::Boolean { value } => Some(i64::from(*value)), - _ => None, +fn optimize_cfg( + metadata: &mut CodeUnitMetadata, + blocks: &mut Blocks, + firstlineno: OneIndexed, +) -> crate::InternalResult<()> { + // flowgraph.c optimize_cfg + // CPython optimize_cfg() starts with check_cfg() and raises + // SystemError if a jump or scope exit is not the last instruction in + // its block. + blocks.check_cfg()?; + blocks.inline_small_or_no_lineno_blocks()?; + // CPython does not re-run instruction-sequence label-map/CFG conversion + // after this point. Unreferenced label blocks left by jump inlining + // remain block boundaries and can preserve line-marker NOPs. + blocks.remove_unreachable()?; + // CPython optimize_cfg resolves line numbers before local checks and + // superinstruction insertion, so fusion decisions see propagated + // source locations. + blocks.resolve_line_numbers(firstlineno)?; + // CPython optimize_cfg() runs optimize_load_const() and then + // optimize_basic_block() after line numbers are resolved. + optimize_load_const(metadata, blocks)?; + let mut block_idx = BlockIdx(0); + while block_idx != BlockIdx::NULL { + let next_block = blocks[block_idx].next; + blocks.optimize_basic_block(metadata, block_idx)?; + block_idx = next_block; } + blocks.remove_redundant_nops_and_pairs()?; + // CPython optimize_cfg() removes newly-unreachable blocks and + // redundant NOP/jump chains before _PyCfg_OptimizeCodeUnit() prunes + // unused constants. + blocks.remove_unreachable()?; + blocks.remove_redundant_nops_and_jumps()?; + #[cfg(debug_assertions)] + assert!(blocks.no_redundant_jumps()); + Ok(()) } -/// flowgraph.c eval_const_binop subscript slice bound conversion -fn slice_bound(value: &ConstantData) -> Option> { - match value { - ConstantData::None => Some(None), - _ => constant_as_index(value).map(Some), - } +fn optimized_cfg_to_instruction_sequence( + metadata: &CodeUnitMetadata, + flags: CodeFlags, + blocks: &mut Blocks, +) -> crate::InternalResult<(u32, usize, InstructionSequence)> { + // Phase 2: _PyCfg_OptimizedCfgToInstructionSequence (flowgraph.c) + blocks.convert_pseudo_conditional_jumps()?; + let max_stackdepth = blocks.calculate_stackdepth()?; + debug_assert!(!is_generator(flags) || max_stackdepth != 0); + let nlocalsplus = prepare_localsplus(metadata, blocks, flags)?; + // Match CPython order: pseudo ops are lowered after stackdepth and + // localsplus preparation, before normalize_jumps. + convert_pseudo_ops(blocks)?; + blocks.normalize_jumps()?; + #[cfg(debug_assertions)] + assert!(blocks.no_redundant_jumps()); + // optimize_load_fast: after normalize_jumps + blocks.optimize_load_fast()?; + + let mut instr_sequence = instruction_sequence_new(); + blocks.cfg_to_instruction_sequence(&mut instr_sequence)?; + Ok((max_stackdepth, nlocalsplus, instr_sequence)) } -/// flowgraph.c eval_const_binop subscript slice index adjustment -fn adjusted_slice_indices(len: usize, slice: &[ConstantData; 3]) -> Option> { - let len = i64::try_from(len).ok()?; - let start = slice_bound(&slice[0])?; - let stop = slice_bound(&slice[1])?; - let step = slice_bound(&slice[2])?.unwrap_or(1); - if step == 0 || step == i64::MIN { - return None; - } +impl CodeInfo { + pub fn finalize_code( + mut self, + opts: &crate::compile::CompileOpts, + ) -> crate::InternalResult { + let instr_sequence = self.prepare_cfg_from_codegen()?; + let nlocals = self.metadata.varnames.len(); + let nparams = self.nparams; + optimize_code_unit( + &mut self.metadata, + &mut self.blocks, + instr_sequence, + nlocals, + nparams, + )?; + let (max_stackdepth, nlocalsplus, mut instr_sequence) = + optimized_cfg_to_instruction_sequence(&self.metadata, self.flags, &mut self.blocks)?; + let localsplusinfo = compute_localsplus_info(&self.metadata, nlocalsplus, self.flags)?; - let step_is_negative = step < 0; - let lower = if step_is_negative { -1 } else { 0 }; - let upper = if step_is_negative { len - 1 } else { len }; - let adjust = |value: Option, default: i64| { - let mut value = value.unwrap_or(default); - if value < 0 { - value = value.saturating_add(len); - if value < 0 { - value = lower; - } - } else if value >= len { - value = upper; - } - value - }; - let start = adjust(start, if step_is_negative { upper } else { lower }); - let stop = adjust(stop, if step_is_negative { lower } else { upper }); + let Self { + flags, + source_path, + private: _, // private is only used during compilation - let mut index = i128::from(start); - let stop = i128::from(stop); - let step = i128::from(step); - let slice_len = if step > 0 { - if index < stop { - usize::try_from((stop - index - 1) / step + 1).ok()? - } else { - 0 - } - } else if index > stop { - usize::try_from((index - stop - 1) / -step + 1).ok()? - } else { - 0 - }; - let mut indices = Vec::new(); - indices.try_reserve_exact(slice_len).ok()?; - if step > 0 { - while index < stop { - indices.push(usize::try_from(index).ok()?); - index += step; - } - } else { - while index > stop { - indices.push(usize::try_from(index).ok()?); - index += step; - } + blocks: _, + current_block: _, + instr_sequence: _, + instr_sequence_label_map: _, + annotations_instr_sequence: _, + metadata, + static_attributes: _, + in_inlined_comp: _, + fblock: _, + symbol_table_index: _, + nparams: _, + in_conditional_block: _, + next_conditional_annotation_index: _, + } = self; + + let CodeUnitMetadata { + name: obj_name, + qualname, + consts: constants, + names: name_cache, + varnames: varname_cache, + cellvars: _, + freevars: freevar_cache, + fast_hidden: _, + fast_hidden_final: _, + argcount: arg_count, + posonlyargcount: posonlyarg_count, + kwonlyargcount: kwonlyarg_count, + firstlineno: first_line_number, + } = metadata; + + resolve_unconditional_jumps(&mut instr_sequence)?; + resolve_jump_offsets(&mut instr_sequence)?; + let assembled = assemble_emit( + &mut instr_sequence, + first_line_number.get() as i32, + opts.debug_ranges, + )?; + let locations = rustpython_compiler_core::marshal::linetable_to_locations( + &assembled.linetable, + first_line_number.get() as i32, + assembled.instructions.len(), + ); + + Ok(CodeObject { + flags, + posonlyarg_count, + arg_count, + kwonlyarg_count, + source_path, + first_line_number: Some(first_line_number), + obj_name: obj_name.clone(), + qualname: qualname.unwrap_or(obj_name), + + max_stackdepth, + instructions: CodeUnits::from(assembled.instructions), + locations, + constants: constants.into_iter().collect(), + names: name_cache.into_iter().collect(), + varnames: varname_cache.into_iter().collect(), + cellvars: localsplusinfo.cellvars, + freevars: freevar_cache.into_iter().collect(), + localspluskinds: localsplusinfo.kinds, + linetable: assembled.linetable, + exceptiontable: assembled.exceptiontable, + }) } - Some(indices) } -/// flowgraph.c eval_const_binop subscript index adjustment -fn adjusted_const_index(len: usize, index: &ConstantData) -> Option { - let len = i64::try_from(len).ok()?; - let index = constant_as_index(index)?; - let index = if index < 0 { - index.saturating_add(len) - } else { - index - }; - if index < 0 || index >= len { - return None; - } - usize::try_from(index).ok() +/// flowgraph.c IS_GENERATOR +fn is_generator(flags: CodeFlags) -> bool { + flags.intersects(CodeFlags::GENERATOR | CodeFlags::COROUTINE | CodeFlags::ASYNC_GENERATOR) } -/// flowgraph.c eval_const_binop NB_SUBSCR -fn eval_const_subscript(container: &ConstantData, index: &ConstantData) -> Option { - match (container, index) { - ( - ConstantData::Str { value }, - ConstantData::Integer { .. } | ConstantData::Boolean { .. }, - ) => { - let string = value.to_string(); - if string.contains(char::REPLACEMENT_CHARACTER) { - return None; - } - let mut chars = Vec::new(); - chars.try_reserve_exact(string.chars().count()).ok()?; - chars.extend(string.chars()); - let index = adjusted_const_index(chars.len(), index)?; - Some(ConstantData::Str { - value: chars[index].to_string().into(), - }) +/// flowgraph.c insert_prefix_instructions +fn insert_prefix_instructions( + metadata: &CodeUnitMetadata, + blocks: &mut Blocks, + cellfixedoffsets: &[i32], + nfreevars: usize, + flags: CodeFlags, +) -> crate::InternalResult<()> { + debug_assert!(!blocks.is_empty()); + let entry = &mut blocks[0]; + let ncellvars = metadata.cellvars.len(); + let firstlineno = metadata.firstlineno; + debug_assert!(firstlineno.get() > 0); + + if is_generator(flags) { + let location = SourceLocation { + line: firstlineno, + character_offset: OneIndexed::MIN, + }; + basicblock_insert_instruction( + entry, + 0, + InstructionInfo { + instr: Instruction::ReturnGenerator.into(), + arg: OpArg::new(0), + target: BlockIdx::NULL, + location, + end_location: location, + except_handler: None, + lineno_override: Some(LINE_ONLY_LOCATION_OVERRIDE), + }, + )?; + basicblock_insert_instruction( + entry, + 1, + InstructionInfo { + instr: Instruction::PopTop.into(), + arg: OpArg::new(0), + target: BlockIdx::NULL, + location, + end_location: location, + except_handler: None, + lineno_override: Some(LINE_ONLY_LOCATION_OVERRIDE), + }, + )?; + } + + if ncellvars > 0 { + let nvars = metadata.varnames.len() + ncellvars; + let mut sorted = Vec::new(); + vec_try_reserve_exact(&mut sorted, nvars)?; + sorted.resize(nvars, 0i32); + for i in 0..ncellvars { + sorted[cellfixedoffsets[i] as usize] = i as i32 + 1; } - (ConstantData::Str { value }, ConstantData::Slice { elements }) => { - let string = value.to_string(); - if string.contains(char::REPLACEMENT_CHARACTER) { - return None; - } - let mut chars = Vec::new(); - chars.try_reserve_exact(string.chars().count()).ok()?; - chars.extend(string.chars()); - let indices = adjusted_slice_indices(chars.len(), elements)?; - let capacity = indices.iter().try_fold(0usize, |capacity, &index| { - capacity.checked_add(chars[index].len_utf8()) - })?; - let mut result = String::new(); - result.try_reserve_exact(capacity).ok()?; - for index in indices { - result.push(chars[index]); + let mut ncellsused = 0; + let mut i = 0; + while ncellsused < ncellvars { + let oldindex = sorted[i] - 1; + i += 1; + if oldindex == -1 { + continue; } - Some(ConstantData::Str { - value: result.into(), - }) + basicblock_insert_instruction( + entry, + ncellsused, + InstructionInfo { + instr: Opcode::MakeCell.into(), + arg: OpArg::new(oldindex as u32), + target: BlockIdx::NULL, + location: SourceLocation::default(), + end_location: SourceLocation::default(), + except_handler: None, + lineno_override: Some(NO_LOCATION_OVERRIDE), + }, + )?; + ncellsused += 1; } - ( - ConstantData::Bytes { value }, - ConstantData::Integer { .. } | ConstantData::Boolean { .. }, - ) => { - let index = adjusted_const_index(value.len(), index)?; + } + + if nfreevars > 0 { + basicblock_insert_instruction( + entry, + 0, + InstructionInfo { + instr: Opcode::CopyFreeVars.into(), + arg: OpArg::new(nfreevars as u32), + target: BlockIdx::NULL, + location: SourceLocation::default(), + end_location: SourceLocation::default(), + except_handler: None, + lineno_override: Some(NO_LOCATION_OVERRIDE), + }, + )?; + } + Ok(()) +} + +/// flowgraph.c prepare_localsplus +fn prepare_localsplus( + metadata: &CodeUnitMetadata, + blocks: &mut Blocks, + flags: CodeFlags, +) -> crate::InternalResult { + let nlocals = metadata.varnames.len(); + let ncellvars = metadata.cellvars.len(); + let nfreevars = metadata.freevars.len(); + let int_max = i32::MAX as usize; + debug_assert!(nlocals < int_max); + debug_assert!(ncellvars < int_max); + debug_assert!(nfreevars < int_max); + debug_assert!(int_max - nlocals - ncellvars > 0); + debug_assert!(int_max - nlocals - ncellvars - nfreevars > 0); + let mut nlocalsplus = nlocals + ncellvars + nfreevars; + let mut cellfixedoffsets = build_cellfixedoffsets(metadata)?; + + // This must be called before fix_cell_offsets(). + insert_prefix_instructions(metadata, blocks, &cellfixedoffsets, nfreevars, flags)?; + + let numdropped = fix_cell_offsets(metadata, blocks, &mut cellfixedoffsets); + nlocalsplus -= numdropped; + Ok(nlocalsplus) +} + +/// flowgraph.c eval_const_unaryop +fn eval_const_unaryop( + operand: &ConstantData, + op: Instruction, + intrinsic: Option, +) -> Option { + match (operand, op, intrinsic) { + (ConstantData::Integer { value }, Instruction::UnaryNegative, None) => { + Some(ConstantData::Integer { value: -value }) + } + (ConstantData::Float { value }, Instruction::UnaryNegative, None) => { + Some(ConstantData::Float { value: -value }) + } + (ConstantData::Complex { value }, Instruction::UnaryNegative, None) => { + Some(ConstantData::Complex { value: -value }) + } + (ConstantData::Boolean { value }, Instruction::UnaryNegative, None) => { Some(ConstantData::Integer { - value: BigInt::from(value[index]), + value: BigInt::from(-i32::from(*value)), }) } - (ConstantData::Bytes { value }, ConstantData::Slice { elements }) => { - let indices = adjusted_slice_indices(value.len(), elements)?; - let mut result = Vec::new(); - result.try_reserve_exact(indices.len()).ok()?; - for index in indices { - result.push(value[index]); - } - Some(ConstantData::Bytes { value: result }) + (ConstantData::Integer { value }, Instruction::UnaryInvert, None) => { + Some(ConstantData::Integer { value: !value }) } + (ConstantData::Boolean { .. }, Instruction::UnaryInvert, None) => None, + (_, Instruction::UnaryNot, None) => Some(ConstantData::Boolean { + value: !operand.truthiness(), + }), ( - ConstantData::Tuple { elements }, - ConstantData::Integer { .. } | ConstantData::Boolean { .. }, - ) => { - let index = adjusted_const_index(elements.len(), index)?; - Some(elements[index].clone()) - } - (ConstantData::Tuple { elements }, ConstantData::Slice { elements: slice }) => { - let indices = adjusted_slice_indices(elements.len(), slice)?; - let mut result = Vec::new(); - result.try_reserve_exact(indices.len()).ok()?; - for index in indices { - result.push(elements[index].clone()); - } - Some(ConstantData::Tuple { elements: result }) - } + ConstantData::Integer { value }, + Instruction::CallIntrinsic1 { .. }, + Some(oparg::IntrinsicFunction1::UnaryPositive), + ) => Some(ConstantData::Integer { + value: value.clone(), + }), + ( + ConstantData::Float { value }, + Instruction::CallIntrinsic1 { .. }, + Some(oparg::IntrinsicFunction1::UnaryPositive), + ) => Some(ConstantData::Float { value: *value }), + ( + ConstantData::Boolean { value }, + Instruction::CallIntrinsic1 { .. }, + Some(oparg::IntrinsicFunction1::UnaryPositive), + ) => Some(ConstantData::Integer { + value: BigInt::from(i32::from(*value)), + }), + ( + ConstantData::Complex { value }, + Instruction::CallIntrinsic1 { .. }, + Some(oparg::IntrinsicFunction1::UnaryPositive), + ) => Some(ConstantData::Complex { value: *value }), _ => None, } } -/// flowgraph.c eval_const_binop bool/int coercion -fn constant_as_int(value: &ConstantData) -> Option<(BigInt, bool)> { - match value { - ConstantData::Boolean { value } => Some((BigInt::from(u8::from(*value)), true)), - ConstantData::Integer { value } => Some((value.clone(), false)), +fn load_const_truthiness( + instr: Instruction, + arg: OpArg, + metadata: &CodeUnitMetadata, +) -> Option { + match instr { + Instruction::LoadConst { consti } => { + let constant = &metadata.consts[consti.get(arg).as_usize()]; + Some(constant.truthiness()) + } + Instruction::LoadSmallInt { i } => Some(i.get(arg) != 0), _ => None, } } -/// flowgraph.c eval_const_binop -fn eval_const_binop( - left: &ConstantData, - right: &ConstantData, - op: oparg::BinaryOperator, -) -> Option { - use oparg::BinaryOperator as BinOp; - - if matches!(op, BinOp::Subscr) { - return eval_const_subscript(left, right); - } - - if let (Some((left_int, left_is_bool)), Some((right_int, right_is_bool))) = - (constant_as_int(left), constant_as_int(right)) - && (left_is_bool || right_is_bool) - { - if left_is_bool && right_is_bool { - match op { - BinOp::And => { - return Some(ConstantData::Boolean { - value: !left_int.is_zero() & !right_int.is_zero(), - }); - } - BinOp::Or => { - return Some(ConstantData::Boolean { - value: !left_int.is_zero() | !right_int.is_zero(), - }); - } - BinOp::Xor => { - return Some(ConstantData::Boolean { - value: !left_int.is_zero() ^ !right_int.is_zero(), - }); - } - _ => {} - } - } +/// flowgraph.c add_const +fn add_const( + metadata: &mut CodeUnitMetadata, + constant: ConstantData, +) -> crate::InternalResult { + Ok(metadata.consts.try_insert_full(constant)?.0) +} - return eval_const_binop( - &ConstantData::Integer { value: left_int }, - &ConstantData::Integer { value: right_int }, - op, - ); +fn instr_make_load_const( + metadata: &mut CodeUnitMetadata, + instr: &mut InstructionInfo, + constant: ConstantData, +) -> crate::InternalResult<()> { + if maybe_instr_make_load_smallint(instr, &constant) { + return Ok(()); } - match (left, right) { - (ConstantData::Integer { value: l }, ConstantData::Integer { value: r }) => { - let result = match op { - BinOp::Add => l + r, - BinOp::Subtract => l - r, - BinOp::Multiply => { - return const_folding_safe_multiply(left, right); - } - BinOp::TrueDivide => { - if r.is_zero() { - return None; - } - let l_f = l.to_f64()?; - let r_f = r.to_f64()?; - let result = l_f / r_f; - if !result.is_finite() { - return None; - } - return Some(ConstantData::Float { value: result }); - } - BinOp::FloorDivide => { - if r.is_zero() { - return None; - } - // Python floor division: round towards negative infinity - let (q, rem) = (l.clone() / r.clone(), l.clone() % r.clone()); - if !rem.is_zero() && (rem < BigInt::from(0)) != (*r < BigInt::from(0)) { - q - 1 - } else { - q - } - } - BinOp::Remainder => return const_folding_safe_mod(left, right), - BinOp::Power => return const_folding_safe_power(left, right), - BinOp::Lshift => return const_folding_safe_lshift(left, right), - BinOp::Rshift => { - let shift: u32 = r.try_into().ok()?; - l >> (shift as usize) - } - BinOp::And => l & r, - BinOp::Or => l | r, - BinOp::Xor => l ^ r, - _ => return None, - }; - Some(ConstantData::Integer { value: result }) - } - (ConstantData::Float { value: l }, ConstantData::Float { value: r }) => { - let result = match op { - BinOp::Add => l + r, - BinOp::Subtract => l - r, - BinOp::Multiply => return const_folding_safe_multiply(left, right), - BinOp::TrueDivide => { - if *r == 0.0 { - return None; - } - l / r - } - BinOp::FloorDivide => { - let (floordiv, _) = float_div_mod(*l, *r)?; - floordiv - } - BinOp::Remainder => return const_folding_safe_mod(left, right), - BinOp::Power => return const_folding_safe_power(left, right), - _ => return None, - }; - if matches!(op, BinOp::Power) && !result.is_finite() { - return None; - } - Some(ConstantData::Float { value: result }) - } - // Int op Float or Float op Int → Float - (ConstantData::Integer { value: l }, ConstantData::Float { value: r }) => { - let l_f = l.to_f64()?; - eval_const_binop( - &ConstantData::Float { value: l_f }, - &ConstantData::Float { value: *r }, - op, - ) - } - (ConstantData::Float { value: l }, ConstantData::Integer { value: r }) => { - let r_f = r.to_f64()?; - eval_const_binop( - &ConstantData::Float { value: *l }, - &ConstantData::Float { value: r_f }, - op, - ) - } - (ConstantData::Integer { value: l }, ConstantData::Complex { value: r }) => { - eval_const_complex_binop(Complex::new(l.to_f64()?, 0.0), *r, op) - } - (ConstantData::Complex { value: l }, ConstantData::Integer { value: r }) => { - eval_const_complex_binop(*l, Complex::new(r.to_f64()?, 0.0), op) - } - (ConstantData::Float { value: l }, ConstantData::Complex { value: r }) => { - eval_const_complex_binop(Complex::new(*l, 0.0), *r, op) - } - (ConstantData::Complex { value: l }, ConstantData::Float { value: r }) => { - eval_const_complex_binop(*l, Complex::new(*r, 0.0), op) - } - (ConstantData::Complex { value: l }, ConstantData::Complex { value: r }) => { - eval_const_complex_binop(*l, *r, op) - } - // String concatenation and repetition - (ConstantData::Str { value: l }, ConstantData::Str { value: r }) - if matches!(op, BinOp::Add) => - { - let mut result = Wtf8Buf::new(); - result - .try_reserve_exact(l.len().checked_add(r.len())?) - .ok()?; - result.push_wtf8(l); - result.push_wtf8(r); - Some(ConstantData::Str { value: result }) - } - (ConstantData::Str { .. }, ConstantData::Integer { .. }) - if matches!(op, BinOp::Multiply) => - { - const_folding_safe_multiply(left, right) - } - (ConstantData::Tuple { elements: l }, ConstantData::Tuple { elements: r }) - if matches!(op, BinOp::Add) => - { - let mut result = Vec::new(); - result - .try_reserve_exact(l.len().checked_add(r.len())?) - .ok()?; - result.extend(l.iter().cloned()); - result.extend(r.iter().cloned()); - Some(ConstantData::Tuple { elements: result }) - } - (ConstantData::Tuple { .. }, ConstantData::Integer { .. }) - if matches!(op, BinOp::Multiply) => - { - const_folding_safe_multiply(left, right) - } - (ConstantData::Integer { .. }, ConstantData::Tuple { .. }) - if matches!(op, BinOp::Multiply) => - { - const_folding_safe_multiply(left, right) - } - (ConstantData::Integer { .. }, ConstantData::Str { .. }) - if matches!(op, BinOp::Multiply) => - { - const_folding_safe_multiply(left, right) - } - (ConstantData::Bytes { value: l }, ConstantData::Bytes { value: r }) - if matches!(op, BinOp::Add) => - { - let mut result = Vec::new(); - result - .try_reserve_exact(l.len().checked_add(r.len())?) - .ok()?; - result.extend_from_slice(l); - result.extend_from_slice(r); - Some(ConstantData::Bytes { value: result }) - } - (ConstantData::Bytes { .. }, ConstantData::Integer { .. }) - if matches!(op, BinOp::Multiply) => - { - const_folding_safe_multiply(left, right) - } - (ConstantData::Integer { .. }, ConstantData::Bytes { .. }) - if matches!(op, BinOp::Multiply) => - { - const_folding_safe_multiply(left, right) - } - _ => None, - } + let const_idx = add_const(metadata, constant)?; + instr_set_op1( + instr, + Opcode::LoadConst.into(), + OpArg::new(const_idx as u32), + ); + Ok(()) } -/// flowgraph.c fold_tuple_of_constants -fn fold_tuple_of_constants( +/// flowgraph.c fold_const_unaryop +fn fold_const_unaryop( metadata: &mut CodeUnitMetadata, block: &mut Block, i: usize, ) -> crate::InternalResult { - let Some(Opcode::BuildTuple) = block.instructions[i].instr.real_opcode() else { - return Ok(false); + let instr = &block.instructions[i]; + let (op, intrinsic) = match instr.instr.real() { + Some(Instruction::UnaryNegative) => (Instruction::UnaryNegative, None), + Some(Instruction::UnaryInvert) => (Instruction::UnaryInvert, None), + Some(Instruction::UnaryNot) => (Instruction::UnaryNot, None), + Some(Instruction::CallIntrinsic1 { func }) + if matches!( + func.get(instr.arg), + oparg::IntrinsicFunction1::UnaryPositive + ) => + { + (Opcode::CallIntrinsic1.into(), Some(func.get(instr.arg))) + } + _ => return Ok(false), }; - - let tuple_size = u32::from(block.instructions[i].arg) as usize; - if tuple_size > STACK_USE_GUIDELINE { - return Ok(false); - } - - let Some(operand_indices) = (if tuple_size == 0 { - Some(Vec::new()) - } else if let Some(start) = i.checked_sub(1) { - get_const_loading_instrs(block, start, tuple_size)? + let Some(operand_index) = (if let Some(start) = i.checked_sub(1) { + get_const_loading_instrs(block, start, 1)? } else { None - }) else { + }) + .and_then(|indices| indices.into_iter().next()) else { return Ok(false); }; - - let mut elements = Vec::new(); - elements - .try_reserve_exact(tuple_size) - .map_err(|_| InternalError::MalformedControlFlowGraph)?; - for &j in &operand_indices { - let Some(element) = get_const_value(metadata, &block.instructions[j]) else { - return Ok(false); - }; - elements.push(element); - } - - nop_out(block, &operand_indices); - instr_make_load_const( - metadata, - &mut block.instructions[i], - ConstantData::Tuple { elements }, - )?; - Ok(true) -} - -fn fold_constant_intrinsic_list_to_tuple( - metadata: &mut CodeUnitMetadata, - block: &mut Block, - i: usize, -) -> crate::InternalResult { - let Some(Instruction::CallIntrinsic1 { func }) = block.instructions[i].instr.real() else { + let operand = get_const_value(metadata, &block.instructions[operand_index]); + let Some(operand) = operand else { return Ok(false); }; - if func.get(block.instructions[i].arg) != IntrinsicFunction1::ListToTuple { + let Some(folded_const) = eval_const_unaryop(&operand, op, intrinsic) else { return Ok(false); - } - - let mut consts_found = 0usize; - let mut expect_append = true; - let mut pos = i; - while let Some(prev) = pos.checked_sub(1) { - pos = prev; - let instr = &block.instructions[pos]; - if matches!(instr.instr.real(), Some(Instruction::Nop)) { - continue; - } - - if matches!(instr.instr.real(), Some(Instruction::BuildList { .. })) - && u32::from(instr.arg) == 0 - { - if !expect_append { - return Ok(false); - } + }; + nop_out(block, &[operand_index]); + instr_make_load_const(metadata, &mut block.instructions[i], folded_const)?; + Ok(true) +} - let mut elements = Vec::new(); - elements - .try_reserve_exact(consts_found) - .map_err(|_| InternalError::MalformedControlFlowGraph)?; - for idx in (pos..i).rev() { - if matches!(block.instructions[idx].instr.real(), Some(Instruction::Nop)) { - continue; - } - if loads_const(&block.instructions[idx]) { - let Some(value) = get_const_value(metadata, &block.instructions[idx]) else { - return Ok(false); - }; - elements.push(value); - } - nop_out_no_location(&mut block.instructions[idx]); - } - debug_assert_eq!(elements.len(), consts_found); - elements.reverse(); - instr_make_load_const( - metadata, - &mut block.instructions[i], - ConstantData::Tuple { elements }, - )?; - return Ok(true); +/// flowgraph.c get_const_loading_instrs +fn get_const_loading_instrs( + block: &Block, + mut start: usize, + size: usize, +) -> crate::InternalResult>> { + let mut indices = Vec::new(); + indices + .try_reserve_exact(size) + .map_err(|_| InternalError::MalformedControlFlowGraph)?; + loop { + if start >= block.instruction_used { + return Ok(None); } - - if expect_append { - if !matches!(instr.instr.real(), Some(Instruction::ListAppend { .. })) - || u32::from(instr.arg) != 1 - { - return Ok(false); - } - } else { + let instr = &block.instructions[start]; + if !matches!(instr.instr.real(), Some(Instruction::Nop)) { if !loads_const(instr) { - return Ok(false); + return Ok(None); + } + indices.push(start); + if indices.len() == size { + break; } - consts_found += 1; } - expect_append = !expect_append; + let Some(prev) = start.checked_sub(1) else { + return Ok(None); + }; + start = prev; } + indices.reverse(); + Ok(Some(indices)) +} - Ok(false) +/// flowgraph.c nop_out +fn nop_out(block: &mut Block, instrs: &[usize]) { + for &i in instrs { + nop_out_no_location(&mut block.instructions[i]); + } } -/// Port of CPython's flowgraph.c optimize_lists_and_sets(). -fn optimize_lists_and_sets( +/// flowgraph.c fold_const_binop +fn fold_const_binop( metadata: &mut CodeUnitMetadata, block: &mut Block, i: usize, - nextop: Option, ) -> crate::InternalResult { - let Some(instr) = block.instructions[i].instr.real() else { - return Ok(false); - }; - let is_list = matches!(instr, Instruction::BuildList { .. }); - let is_set = matches!(instr, Instruction::BuildSet { .. }); - if !is_list && !is_set { - return Ok(false); - } + use oparg::BinaryOperator as BinOp; - let contains_or_iter = matches!( - nextop, - Some(Instruction::GetIter | Instruction::ContainsOp { .. }) - ); - let seq_size = u32::from(block.instructions[i].arg) as usize; - if seq_size > STACK_USE_GUIDELINE || (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter) { + let Some(Opcode::BinaryOp) = block.instructions[i].instr.real_opcode() else { return Ok(false); - } + }; - let Some(operand_indices) = (if seq_size == 0 { - Some(Vec::new()) - } else if let Some(start) = i.checked_sub(1) { - get_const_loading_instrs(block, start, seq_size)? + let Some(operand_indices) = (if let Some(start) = i.checked_sub(1) { + get_const_loading_instrs(block, start, 2)? } else { None }) else { - if contains_or_iter && is_list { - let arg = block.instructions[i].arg; - instr_set_op1(&mut block.instructions[i], Opcode::BuildTuple.into(), arg); - return Ok(true); - } return Ok(false); }; - let mut elements = Vec::new(); - elements - .try_reserve_exact(seq_size) - .map_err(|_| InternalError::MalformedControlFlowGraph)?; - for &j in &operand_indices { - let Some(element) = get_const_value(metadata, &block.instructions[j]) else { - return Ok(false); - }; - elements.push(element); - } - - let const_data = if is_list { - ConstantData::Tuple { elements } - } else { - ConstantData::Frozenset { elements } + let op_raw = u32::from(block.instructions[i].arg); + let Ok(op) = BinOp::try_from(op_raw) else { + return Ok(false); }; - let const_idx = add_const(metadata, const_data)?; - - if !contains_or_iter { - debug_assert!(i >= 2); - let folded_loc = block.instructions[i].location; - let end_loc = block.instructions[i].end_location; - - nop_out(block, &operand_indices); - - let build_instr = if is_list { - Opcode::BuildList - } else { - Opcode::BuildSet - } - .into(); - instr_set_op1(&mut block.instructions[i - 2], build_instr, OpArg::new(0)); - block.instructions[i - 2].location = folded_loc; - block.instructions[i - 2].end_location = end_loc; - block.instructions[i - 2].lineno_override = None; - instr_set_op1( - &mut block.instructions[i - 1], - Opcode::LoadConst.into(), - OpArg::new(const_idx as u32), - ); + let left = get_const_value(metadata, &block.instructions[operand_indices[0]]); + let right = get_const_value(metadata, &block.instructions[operand_indices[1]]); + let (Some(left_val), Some(right_val)) = (left, right) else { + return Ok(false); + }; - let extend_instr = if is_list { - Opcode::ListExtend - } else { - Opcode::SetUpdate - }; - instr_set_op1( - &mut block.instructions[i], - extend_instr.into(), - OpArg::new(1), - ); - return Ok(true); - } + let Some(result_const) = eval_const_binop(&left_val, &right_val, op) else { + return Ok(false); + }; nop_out(block, &operand_indices); - - instr_set_op1( - &mut block.instructions[i], - Opcode::LoadConst.into(), - OpArg::new(const_idx as u32), - ); + instr_make_load_const(metadata, &mut block.instructions[i], result_const)?; Ok(true) } -/// flowgraph.c VISITED -const VISITED: i32 = -1; - -/// flowgraph.c SWAPPABLE -fn is_swappable(instr: AnyInstruction) -> bool { - matches!( - instr.into(), - AnyOpcode::Real(Opcode::StoreFast | Opcode::PopTop) - | AnyOpcode::Pseudo(PseudoOpcode::StoreFastMaybeNull) - ) +/// flowgraph.c loads_const +fn loads_const(info: &InstructionInfo) -> bool { + info.instr.has_const() || matches!(info.instr.real_opcode(), Some(Opcode::LoadSmallInt)) } -/// flowgraph.c STORES_TO -fn stores_to(info: &InstructionInfo) -> i32 { - match info.instr.into() { - AnyOpcode::Real(Opcode::StoreFast) - | AnyOpcode::Pseudo(PseudoOpcode::StoreFastMaybeNull) => u32::from(info.arg) as i32, - _ => -1, +/// flowgraph.c get_const_value +fn get_const_value(metadata: &CodeUnitMetadata, info: &InstructionInfo) -> Option { + match info.instr.real_opcode() { + Some(Opcode::LoadSmallInt) => { + let v = u32::from(info.arg) as i32; + Some(ConstantData::Integer { + value: BigInt::from(v), + }) + } + _ if info.instr.has_const() => { + let idx = u32::from(info.arg) as usize; + metadata.consts.get_index(idx).cloned() + } + _ => None, } } -/// flowgraph.c next_swappable_instruction -fn next_swappable_instruction(block: &Block, mut i: usize, lineno: i32) -> Option { - loop { - i += 1; - if i >= block.instruction_used { - return None; - } - - let info = &block.instructions[i]; - let info_lineno = instruction_lineno(info); - - if lineno >= 0 && info_lineno != lineno { +/// flowgraph.c const_folding_check_complexity +fn const_folding_check_complexity(obj: &ConstantData, mut limit: isize) -> Option { + if let ConstantData::Tuple { elements } = obj { + limit -= isize::try_from(elements.len()).ok()?; + if limit < 0 { return None; } - - if matches!(info.instr, AnyInstruction::Real(Instruction::Nop)) { - continue; + for element in elements { + limit = const_folding_check_complexity(element, limit)?; } + } + Some(limit) +} - if is_swappable(info.instr) { - return Some(i); - } +fn repeat_wtf8(value: &Wtf8Buf, n: usize) -> Option { + let mut result = Wtf8Buf::new(); + result.try_reserve_exact(value.len().checked_mul(n)?).ok()?; + for _ in 0..n { + result.push_wtf8(value); + } + Some(result) +} +fn checked_repeat_count(n: &BigInt, item_size: usize) -> Option { + let n = n.to_isize()?; + if item_size != 0 && (n < 0 || n as usize > MAX_STR_SIZE / item_size) { return None; } + Some(n.max(0) as usize) } -/// flowgraph.c swaptimize -fn swaptimize(block: &mut Block, ix: &mut usize) -> crate::InternalResult<()> { - debug_assert!(matches!( - block.instructions[*ix].instr.real_opcode(), - Some(Opcode::Swap) - )); - let mut depth = u32::from(block.instructions[*ix].arg) as usize; - let mut len = 1usize; - let mut more = false; - let limit = block.instruction_used - *ix; - while len < limit { - match block.instructions[*ix + len].instr.real_opcode() { - Some(Opcode::Swap) => { - depth = depth.max(u32::from(block.instructions[*ix + len].arg) as usize); - more = true; - len += 1; - } - Some(Opcode::Nop) => { - len += 1; +/// flowgraph.c const_folding_safe_multiply +fn const_folding_safe_multiply(left: &ConstantData, right: &ConstantData) -> Option { + match (left, right) { + (ConstantData::Integer { value: l }, ConstantData::Integer { value: r }) => { + if !l.is_zero() && !r.is_zero() && l.bits() + r.bits() > MAX_INT_SIZE { + return None; } - _ => break, + Some(ConstantData::Integer { value: l * r }) } - } - - if !more { - return Ok(()); - } - - let mut stack = Vec::new(); - stack - .try_reserve_exact(depth) - .map_err(|_| InternalError::MalformedControlFlowGraph)?; - stack.resize(depth, 0); - let mut i = 0; - while i < depth { - stack[i] = i as i32; - i += 1; - } - - i = 0; - while i < len { - let info = &block.instructions[*ix + i]; - if matches!(info.instr.real_opcode(), Some(Opcode::Swap)) { - let oparg = u32::from(info.arg) as usize; - stack.swap(0, oparg - 1); + (ConstantData::Float { value: l }, ConstantData::Float { value: r }) => { + Some(ConstantData::Float { value: l * r }) } - i += 1; - } - - let mut current = len as isize - 1; - for i in 0..depth { - if stack[i] == VISITED || stack[i] == i as i32 { - continue; + (ConstantData::Str { value: s }, ConstantData::Integer { value: n }) => { + let n = checked_repeat_count(n, s.code_points().count())?; + Some(ConstantData::Str { + value: repeat_wtf8(s, n)?, + }) } - let mut j = i; - loop { - if j != 0 { - debug_assert!(current >= 0); - let out = &mut block.instructions[*ix + current as usize]; - out.instr = Opcode::Swap.into(); - out.arg = OpArg::new((j + 1) as u32); - current -= 1; + (ConstantData::Integer { .. }, ConstantData::Str { .. }) => { + const_folding_safe_multiply(right, left) + } + (ConstantData::Bytes { value: b }, ConstantData::Integer { value: n }) => { + let n = checked_repeat_count(n, b.len())?; + let mut value = Vec::new(); + value.try_reserve_exact(b.len().checked_mul(n)?).ok()?; + for _ in 0..n { + value.extend_from_slice(b); } - if stack[j] == VISITED { - debug_assert_eq!(j, i); - break; + Some(ConstantData::Bytes { value }) + } + (ConstantData::Integer { .. }, ConstantData::Bytes { .. }) => { + const_folding_safe_multiply(right, left) + } + (ConstantData::Tuple { elements }, ConstantData::Integer { value: n }) => { + let n = n.to_usize()?; + if n != 0 && !elements.is_empty() { + if n > MAX_COLLECTION_SIZE / elements.len() { + return None; + } + const_folding_check_complexity( + &ConstantData::Tuple { + elements: elements.clone(), + }, + MAX_TOTAL_ITEMS / isize::try_from(n).ok()?, + )?; } - let next_j = stack[j] as usize; - stack[j] = VISITED; - j = next_j; + let mut result = Vec::new(); + result + .try_reserve_exact(elements.len().checked_mul(n)?) + .ok()?; + for _ in 0..n { + result.extend(elements.iter().cloned()); + } + Some(ConstantData::Tuple { elements: result }) } + (ConstantData::Integer { .. }, ConstantData::Tuple { .. }) => { + const_folding_safe_multiply(right, left) + } + _ => None, } - - while current >= 0 { - set_to_nop(&mut block.instructions[*ix + current as usize]); - current -= 1; - } - *ix += len - 1; - Ok(()) } -/// flowgraph.c apply_static_swaps -fn apply_static_swaps(block: &mut Block, mut i: isize) { - while i >= 0 { - let idx = i as usize; - debug_assert!(idx < block.instruction_used); - let swap_arg = match block.instructions[idx].instr.real_opcode() { - Some(Opcode::Swap) => u32::from(block.instructions[idx].arg), - Some(Opcode::Nop | Opcode::PopTop | Opcode::StoreFast) => { - i -= 1; - continue; +/// flowgraph.c const_folding_safe_power +fn const_folding_safe_power(left: &ConstantData, right: &ConstantData) -> Option { + match (left, right) { + (ConstantData::Integer { value: l }, ConstantData::Integer { value: r }) => { + if r < &BigInt::from(0) { + if l.is_zero() { + return None; + } + let base = l.to_f64()?; + if !base.is_finite() { + return None; + } + let result = if let Some(exp) = r.to_i32() { + base.powi(exp) + } else { + base.powf(r.to_f64()?) + }; + if !result.is_finite() { + return None; + } + return Some(ConstantData::Float { value: result }); } - _ if matches!( - block.instructions[idx].instr.pseudo_opcode(), - Some(PseudoOpcode::StoreFastMaybeNull) - ) => - { - i -= 1; - continue; + let exp: u64 = r.try_into().ok()?; + let exp_usize = usize::try_from(exp).ok()?; + if !l.is_zero() && exp > 0 && l.bits() > MAX_INT_SIZE / exp { + return None; } - _ => return, - }; - - let Some(j) = next_swappable_instruction(block, idx, -1) else { - return; - }; - let lineno = instruction_lineno(&block.instructions[j]); - let mut k = j; - for _ in 1..swap_arg { - let Some(next) = next_swappable_instruction(block, k, lineno) else { - return; - }; - k = next; + Some(ConstantData::Integer { + value: num_traits::pow::pow(l.clone(), exp_usize), + }) } - - let store_j = stores_to(&block.instructions[j]); - let store_k = stores_to(&block.instructions[k]); - if store_j >= 0 || store_k >= 0 { - if store_j == store_k { - return; - } - let mut idx = j + 1; - while idx < k { - let store_idx = stores_to(&block.instructions[idx]); - if store_idx >= 0 && (store_idx == store_j || store_idx == store_k) { - return; - } - idx += 1; - } + (ConstantData::Float { value: l }, ConstantData::Float { value: r }) => { + let result = l.powf(*r); + result + .is_finite() + .then_some(ConstantData::Float { value: result }) } - - set_to_nop(&mut block.instructions[idx]); - block.instructions.swap(j, k); - i -= 1; + _ => None, } } -/// flowgraph.c optimize_basic_block swap pass -fn apply_static_swaps_block(block: &mut Block) -> crate::InternalResult<()> { - let mut i = 0; - while i < block.instruction_used { - if matches!( - block.instructions[i].instr.real_opcode(), - Some(Opcode::Swap) - ) { - swaptimize(block, &mut i)?; - apply_static_swaps(block, i as isize); - } - i += 1; +/// flowgraph.c const_folding_safe_lshift +fn const_folding_safe_lshift(left: &ConstantData, right: &ConstantData) -> Option { + let (ConstantData::Integer { value: l }, ConstantData::Integer { value: r }) = (left, right) + else { + return None; + }; + let shift: u64 = r.try_into().ok()?; + let shift_usize = usize::try_from(shift).ok()?; + if shift > MAX_INT_SIZE || (!l.is_zero() && l.bits() > MAX_INT_SIZE - shift) { + return None; } - Ok(()) + Some(ConstantData::Integer { + value: l << shift_usize, + }) } -/// flowgraph.c maybe_instr_make_load_smallint -fn maybe_instr_make_load_smallint(instr: &mut InstructionInfo, constant: &ConstantData) -> bool { - if let ConstantData::Integer { value } = constant - && let Some(small) = value.to_i32().filter(|v| (0..=255).contains(v)) - { - instr_set_op1(instr, Opcode::LoadSmallInt.into(), OpArg::new(small as u32)); - return true; +/// flowgraph.c const_folding_safe_mod +fn const_folding_safe_mod(left: &ConstantData, right: &ConstantData) -> Option { + if matches!(left, ConstantData::Str { .. } | ConstantData::Bytes { .. }) { + return None; } - false -} -/// flowgraph.c basicblock_optimize_load_const -fn basicblock_optimize_load_const( - metadata: &mut CodeUnitMetadata, - block: &mut Block, -) -> crate::InternalResult<()> { - let mut i = 0; - let mut effective_opcode = None; - let mut effective_oparg = OpArg::new(0); - while i < block.instruction_used { - if matches!( - block.instructions[i].instr.real(), - Some(Instruction::LoadConst { .. }) - ) && let Some(constant) = get_const_value(metadata, &block.instructions[i]) - { - maybe_instr_make_load_smallint(&mut block.instructions[i], &constant); + match (left, right) { + (ConstantData::Integer { value: l }, ConstantData::Integer { value: r }) => { + if r.is_zero() { + return None; + } + let rem = l.clone() % r.clone(); + let value = if !rem.is_zero() && (rem < BigInt::from(0)) != (*r < BigInt::from(0)) { + rem + r + } else { + rem + }; + Some(ConstantData::Integer { value }) + } + (ConstantData::Float { value: l }, ConstantData::Float { value: r }) => { + let (_, modulo) = float_div_mod(*l, *r)?; + Some(ConstantData::Float { value: modulo }) } + _ => None, + } +} - let curr = block.instructions[i]; - let curr_arg = curr.arg; +fn float_div_mod(left: f64, right: f64) -> Option<(f64, f64)> { + if right == 0.0 { + return None; + } - // Only combine if the source is a real instruction. - let Some(curr_instr) = curr.instr.real() else { - i += 1; - continue; + let mut modulo = left % right; + let div = (left - modulo) / right; + let floordiv = if modulo != 0.0 { + let div = if (right < 0.0) != (modulo < 0.0) { + modulo += right; + div - 1.0 + } else { + div }; - - let is_copy_of_load_const = matches!( - (effective_opcode, curr_instr), - (Some(Instruction::LoadConst { .. }), Instruction::Copy { i }) if i.get(curr_arg) == 1 - ); - if !is_copy_of_load_const { - effective_opcode = Some(curr_instr); - effective_oparg = curr_arg; + let mut floordiv = div.floor(); + if div - floordiv > 0.5 { + floordiv += 1.0; } - let Some(const_instr) = effective_opcode else { - i += 1; - continue; - }; - let const_arg = effective_oparg; + floordiv + } else { + modulo = 0.0f64.copysign(right); + 0.0f64.copysign(left / right) + }; - if i + 1 >= block.instruction_used { - i += 1; - continue; - } + Some((floordiv, modulo)) +} - let next = block.instructions[i + 1]; - let next_arg = next.arg; +/// flowgraph.c eval_const_binop complex result construction +fn eval_const_complex_const(value: Complex) -> Option { + (value.re.is_finite() && value.im.is_finite()).then_some(ConstantData::Complex { value }) +} - if let Some(is_true) = load_const_truthiness(const_instr, const_arg, metadata) { - let const_jump = match (next.instr.real_opcode(), next.instr.pseudo_opcode()) { - (_, Some(PseudoOpcode::JumpIfTrue)) => Some((true, false)), - (_, Some(PseudoOpcode::JumpIfFalse)) => Some((false, false)), - (Some(Opcode::PopJumpIfTrue), _) => Some((true, true)), - (Some(Opcode::PopJumpIfFalse), _) => Some((false, true)), - _ => None, +/// flowgraph.c eval_const_binop complex operations +fn eval_const_complex_binop( + left: Complex, + right: Complex, + op: oparg::BinaryOperator, +) -> Option { + use oparg::BinaryOperator as BinOp; + + let value = match op { + BinOp::Add => left + right, + BinOp::Subtract => { + let re = left.re - right.re; + // Preserve CPython's signed-zero behavior for real-zero + // minus zero-complex expressions such as `0 - 0j`. + let im = if left.re == 0.0 + && left.im == 0.0 + && right.re == 0.0 + && right.im == 0.0 + && !right.im.is_sign_negative() + { + -0.0 + } else { + left.im - right.im }; - if let Some((jump_if_true, pops_condition)) = const_jump { - if pops_condition { - set_to_nop(&mut block.instructions[i]); - } - if is_true == jump_if_true { - block.instructions[i + 1].instr = PseudoOpcode::Jump.into(); - } else { - set_to_nop(&mut block.instructions[i + 1]); - } - i += 1; - continue; + Complex::new(re, im) + } + BinOp::Multiply => left * right, + BinOp::TrueDivide => { + if right == Complex::new(0.0, 0.0) { + return None; } + left / right } - - // The remaining combinations require both instructions to be real. - let Some(next_instr) = next.instr.real() else { - i += 1; - continue; - }; - - if let Instruction::LoadConst { consti } = const_instr { - let constant = &metadata.consts[consti.get(const_arg).as_usize()]; - if matches!(constant, ConstantData::None) - && let Instruction::IsOp { invert } = next_instr - { - let mut jump_idx = i + 2; - if jump_idx >= block.instruction_used { - i += 1; - continue; - } - - if matches!( - block.instructions[jump_idx].instr.real(), - Some(Instruction::ToBool) - ) { - set_to_nop(&mut block.instructions[jump_idx]); - jump_idx += 1; - if jump_idx >= block.instruction_used { - i += 1; - continue; - } + BinOp::Power => { + if left == Complex::new(0.0, 0.0) { + if right.im != 0.0 || right.re < 0.0 { + return None; } - let Some(jump_instr) = block.instructions[jump_idx].instr.real() else { - i += 1; - continue; - }; - - let mut invert = matches!( - invert.get(next_arg), - rustpython_compiler_core::bytecode::Invert::Yes - ); - match jump_instr { - Instruction::PopJumpIfFalse { .. } => { - invert = !invert; - } - Instruction::PopJumpIfTrue { .. } => {} - _ => { - i += 1; - continue; - } - }; - - set_to_nop(&mut block.instructions[i]); - set_to_nop(&mut block.instructions[i + 1]); - block.instructions[jump_idx].instr = if invert { - Opcode::PopJumpIfNotNone + return eval_const_complex_const(if right.re == 0.0 { + Complex::new(1.0, 0.0) } else { - Opcode::PopJumpIfNone - } - .into(); - i = jump_idx; - continue; + Complex::new(0.0, 0.0) + }); } - } - if matches!( - const_instr, - Instruction::LoadConst { .. } | Instruction::LoadSmallInt { .. } - ) && matches!(next_instr, Instruction::ToBool) - && let Some(value) = load_const_truthiness(const_instr, const_arg, metadata) - { - let const_idx = add_const(metadata, ConstantData::Boolean { value })?; - set_to_nop(&mut block.instructions[i]); - instr_set_op1( - &mut block.instructions[i + 1], - Opcode::LoadConst.into(), - OpArg::new(const_idx as u32), - ); - i += 1; - continue; + if right.im == 0.0 + && right.re.fract() == 0.0 + && right.re >= f64::from(i32::MIN) + && right.re <= f64::from(i32::MAX) + { + left.powi(right.re as i32) + } else { + left.powc(right) + } } + _ => return None, + }; + eval_const_complex_const(value) +} - i += 1; +/// flowgraph.c eval_const_binop subscript index conversion +fn constant_as_index(value: &ConstantData) -> Option { + match value { + ConstantData::Integer { value } => value.to_i64().or_else(|| { + if value < &BigInt::from(0) { + Some(i64::MIN) + } else { + Some(i64::MAX) + } + }), + ConstantData::Boolean { value } => Some(i64::from(*value)), + _ => None, } - Ok(()) } -/// flowgraph.c optimize_load_const -fn optimize_load_const( - metadata: &mut CodeUnitMetadata, - blocks: &mut Blocks, -) -> crate::InternalResult<()> { - let mut block_idx = BlockIdx(0); - while block_idx != BlockIdx::NULL { - let next_block = blocks[block_idx.idx()].next; - let block = &mut blocks[block_idx]; - basicblock_optimize_load_const(metadata, block)?; - block_idx = next_block; +/// flowgraph.c eval_const_binop subscript slice bound conversion +fn slice_bound(value: &ConstantData) -> Option> { + match value { + ConstantData::None => Some(None), + _ => constant_as_index(value).map(Some), } - Ok(()) } -#[cfg(test)] -impl CodeInfo { - fn debug_block_dump(&self) -> String { - let mut out = String::new(); - let mut block_idx = BlockIdx(0); - while block_idx != BlockIdx::NULL { - use core::fmt::Write; - let block = &self.blocks[block_idx.idx()]; - let block_return = if basicblock_returns(block) { - " return" - } else { - "" - }; - let _ = writeln!( - out, - "block {} next={} cold={} except={} preserve_lasti={} start_depth={}{}", - u32::from(block_idx), - if block.next == BlockIdx::NULL { - String::from("NULL") - } else { - u32::from(block.next).to_string() - }, - block.cold, - block.except_handler, - block.preserve_lasti, - if block.start_depth < 0 { - String::from("None") - } else { - block.start_depth.to_string() - }, - block_return, - ); - for info in &block.instructions[..block.instruction_used] { - let lineno = instruction_lineno(info); - let _ = writeln!( - out, - " [disp={}:{} raw={}:{}-{}:{} override={:?}] {:?} arg={} target={}", - lineno, - info.location.character_offset.get(), - info.location.line.get(), - info.location.character_offset.get(), - info.end_location.line.get(), - info.end_location.character_offset.get(), - info.lineno_override, - info.instr, - u32::from(info.arg), - if info.target == BlockIdx::NULL { - String::from("NULL") - } else { - u32::from(info.target).to_string() - } - ); - } - block_idx = block.next; - } - out +/// flowgraph.c eval_const_binop subscript slice index adjustment +fn adjusted_slice_indices(len: usize, slice: &[ConstantData; 3]) -> Option> { + let len = i64::try_from(len).ok()?; + let start = slice_bound(&slice[0])?; + let stop = slice_bound(&slice[1])?; + let step = slice_bound(&slice[2])?.unwrap_or(1); + if step == 0 || step == i64::MIN { + return None; } - pub(crate) fn debug_late_cfg_trace(mut self) -> crate::InternalResult> { - let mut trace = Vec::new(); - trace.push(("initial".to_owned(), self.debug_block_dump())); - - let instr_sequence = self.prepare_cfg_from_codegen()?; - self.blocks = cfg_from_instruction_sequence(instr_sequence)?; - trace.push(( - "after_cfg_from_instruction_sequence".to_owned(), - self.debug_block_dump(), - )); - translate_jump_labels_to_targets(&mut self.blocks)?; - mark_except_handlers(&mut self.blocks)?; - label_exception_targets(&mut self.blocks)?; - check_cfg(&self.blocks)?; - inline_small_or_no_lineno_blocks(&mut self.blocks)?; - trace.push(( - "after_inline_small_or_no_lineno_blocks".to_owned(), - self.debug_block_dump(), - )); - self.blocks.remove_unreachable()?; - self.blocks - .resolve_line_numbers(self.metadata.firstlineno)?; - optimize_load_const(&mut self.metadata, &mut self.blocks)?; - trace.push(( - "after_optimize_load_const".to_owned(), - self.debug_block_dump(), - )); - let mut block_idx = BlockIdx(0); - while block_idx != BlockIdx::NULL { - let next_block = self.blocks[block_idx].next; - self.blocks - .optimize_basic_block(&mut self.metadata, block_idx)?; - block_idx = next_block; + let step_is_negative = step < 0; + let lower = if step_is_negative { -1 } else { 0 }; + let upper = if step_is_negative { len - 1 } else { len }; + let adjust = |value: Option, default: i64| { + let mut value = value.unwrap_or(default); + if value < 0 { + value = value.saturating_add(len); + if value < 0 { + value = lower; + } + } else if value >= len { + value = upper; } - trace.push(( - "after_optimize_basic_block".to_owned(), - self.debug_block_dump(), - )); - self.blocks.remove_redundant_nops_and_pairs()?; - self.blocks.remove_unreachable()?; - remove_redundant_nops_and_jumps(&mut self.blocks)?; - #[cfg(debug_assertions)] - assert!(no_redundant_jumps(&self.blocks)); - self.blocks - .remove_unused_consts(&mut self.metadata.consts)?; - trace.push(( - "after_optimize_cfg_cleanup".to_owned(), - self.debug_block_dump(), - )); - let nlocals = self.metadata.varnames.len(); - let nparams = self.nparams; - add_checks_for_loads_of_uninitialized_variables(&mut self.blocks, nlocals, nparams)?; - self.blocks.insert_superinstructions()?; - push_cold_blocks_to_end(&mut self.blocks)?; - trace.push(( - "after_push_cold_before_chain_reorder".to_owned(), - self.debug_block_dump(), - )); - self.blocks - .resolve_line_numbers(self.metadata.firstlineno)?; - trace.push(( - "after_push_cold_resolve_line_numbers".to_owned(), - self.debug_block_dump(), - )); - - trace.push(( - "after_push_cold_blocks_to_end".to_owned(), - self.debug_block_dump(), - )); + value + }; + let start = adjust(start, if step_is_negative { upper } else { lower }); + let stop = adjust(stop, if step_is_negative { lower } else { upper }); - convert_pseudo_conditional_jumps(&mut self.blocks)?; - trace.push(( - "after_convert_pseudo_conditional_jumps".to_owned(), - self.debug_block_dump(), - )); + let mut index = i128::from(start); + let stop = i128::from(stop); + let step = i128::from(step); + let slice_len = if step > 0 { + if index < stop { + usize::try_from((stop - index - 1) / step + 1).ok()? + } else { + 0 + } + } else if index > stop { + usize::try_from((index - stop - 1) / -step + 1).ok()? + } else { + 0 + }; + let mut indices = Vec::new(); + indices.try_reserve_exact(slice_len).ok()?; + if step > 0 { + while index < stop { + indices.push(usize::try_from(index).ok()?); + index += step; + } + } else { + while index > stop { + indices.push(usize::try_from(index).ok()?); + index += step; + } + } + Some(indices) +} - let _max_stackdepth = self.blocks.calculate_stackdepth()?; - let _nlocalsplus = prepare_localsplus(&self.metadata, &mut self.blocks, self.flags)?; - convert_pseudo_ops(&mut self.blocks)?; - trace.push(( - "after_convert_pseudo_ops".to_owned(), - self.debug_block_dump(), - )); +/// flowgraph.c eval_const_binop subscript index adjustment +fn adjusted_const_index(len: usize, index: &ConstantData) -> Option { + let len = i64::try_from(len).ok()?; + let index = constant_as_index(index)?; + let index = if index < 0 { + index.saturating_add(len) + } else { + index + }; + if index < 0 || index >= len { + return None; + } + usize::try_from(index).ok() +} - self.blocks.normalize_jumps()?; - #[cfg(debug_assertions)] - assert!(no_redundant_jumps(&self.blocks)); - trace.push(("after_normalize_jumps".to_owned(), self.debug_block_dump())); - self.blocks.optimize_load_fast()?; - trace.push(( - "after_optimize_load_fast".to_owned(), - self.debug_block_dump(), - )); +/// flowgraph.c eval_const_binop NB_SUBSCR +fn eval_const_subscript(container: &ConstantData, index: &ConstantData) -> Option { + match (container, index) { + ( + ConstantData::Str { value }, + ConstantData::Integer { .. } | ConstantData::Boolean { .. }, + ) => { + let string = value.to_string(); + if string.contains(char::REPLACEMENT_CHARACTER) { + return None; + } + let mut chars = Vec::new(); + chars.try_reserve_exact(string.chars().count()).ok()?; + chars.extend(string.chars()); + let index = adjusted_const_index(chars.len(), index)?; + Some(ConstantData::Str { + value: chars[index].to_string().into(), + }) + } + (ConstantData::Str { value }, ConstantData::Slice { elements }) => { + let string = value.to_string(); + if string.contains(char::REPLACEMENT_CHARACTER) { + return None; + } + let mut chars = Vec::new(); + chars.try_reserve_exact(string.chars().count()).ok()?; + chars.extend(string.chars()); + let indices = adjusted_slice_indices(chars.len(), elements)?; + let capacity = indices.iter().try_fold(0usize, |capacity, &index| { + capacity.checked_add(chars[index].len_utf8()) + })?; + let mut result = String::new(); + result.try_reserve_exact(capacity).ok()?; + for index in indices { + result.push(chars[index]); + } + Some(ConstantData::Str { + value: result.into(), + }) + } + ( + ConstantData::Bytes { value }, + ConstantData::Integer { .. } | ConstantData::Boolean { .. }, + ) => { + let index = adjusted_const_index(value.len(), index)?; + Some(ConstantData::Integer { + value: BigInt::from(value[index]), + }) + } + (ConstantData::Bytes { value }, ConstantData::Slice { elements }) => { + let indices = adjusted_slice_indices(value.len(), elements)?; + let mut result = Vec::new(); + result.try_reserve_exact(indices.len()).ok()?; + for index in indices { + result.push(value[index]); + } + Some(ConstantData::Bytes { value: result }) + } + ( + ConstantData::Tuple { elements }, + ConstantData::Integer { .. } | ConstantData::Boolean { .. }, + ) => { + let index = adjusted_const_index(elements.len(), index)?; + Some(elements[index].clone()) + } + (ConstantData::Tuple { elements }, ConstantData::Slice { elements: slice }) => { + let indices = adjusted_slice_indices(elements.len(), slice)?; + let mut result = Vec::new(); + result.try_reserve_exact(indices.len()).ok()?; + for index in indices { + result.push(elements[index].clone()); + } + Some(ConstantData::Tuple { elements: result }) + } + _ => None, + } +} + +/// flowgraph.c eval_const_binop bool/int coercion +fn constant_as_int(value: &ConstantData) -> Option<(BigInt, bool)> { + match value { + ConstantData::Boolean { value } => Some((BigInt::from(u8::from(*value)), true)), + ConstantData::Integer { value } => Some((value.clone(), false)), + _ => None, + } +} + +/// flowgraph.c eval_const_binop +fn eval_const_binop( + left: &ConstantData, + right: &ConstantData, + op: oparg::BinaryOperator, +) -> Option { + use oparg::BinaryOperator as BinOp; + + if matches!(op, BinOp::Subscr) { + return eval_const_subscript(left, right); + } + + if let (Some((left_int, left_is_bool)), Some((right_int, right_is_bool))) = + (constant_as_int(left), constant_as_int(right)) + && (left_is_bool || right_is_bool) + { + if left_is_bool && right_is_bool { + match op { + BinOp::And => { + return Some(ConstantData::Boolean { + value: !left_int.is_zero() & !right_int.is_zero(), + }); + } + BinOp::Or => { + return Some(ConstantData::Boolean { + value: !left_int.is_zero() | !right_int.is_zero(), + }); + } + BinOp::Xor => { + return Some(ConstantData::Boolean { + value: !left_int.is_zero() ^ !right_int.is_zero(), + }); + } + _ => {} + } + } + + return eval_const_binop( + &ConstantData::Integer { value: left_int }, + &ConstantData::Integer { value: right_int }, + op, + ); + } + + match (left, right) { + (ConstantData::Integer { value: l }, ConstantData::Integer { value: r }) => { + let result = match op { + BinOp::Add => l + r, + BinOp::Subtract => l - r, + BinOp::Multiply => { + return const_folding_safe_multiply(left, right); + } + BinOp::TrueDivide => { + if r.is_zero() { + return None; + } + let l_f = l.to_f64()?; + let r_f = r.to_f64()?; + let result = l_f / r_f; + if !result.is_finite() { + return None; + } + return Some(ConstantData::Float { value: result }); + } + BinOp::FloorDivide => { + if r.is_zero() { + return None; + } + // Python floor division: round towards negative infinity + let (q, rem) = (l.clone() / r.clone(), l.clone() % r.clone()); + if !rem.is_zero() && (rem < BigInt::from(0)) != (*r < BigInt::from(0)) { + q - 1 + } else { + q + } + } + BinOp::Remainder => return const_folding_safe_mod(left, right), + BinOp::Power => return const_folding_safe_power(left, right), + BinOp::Lshift => return const_folding_safe_lshift(left, right), + BinOp::Rshift => { + let shift: u32 = r.try_into().ok()?; + l >> (shift as usize) + } + BinOp::And => l & r, + BinOp::Or => l | r, + BinOp::Xor => l ^ r, + _ => return None, + }; + Some(ConstantData::Integer { value: result }) + } + (ConstantData::Float { value: l }, ConstantData::Float { value: r }) => { + let result = match op { + BinOp::Add => l + r, + BinOp::Subtract => l - r, + BinOp::Multiply => return const_folding_safe_multiply(left, right), + BinOp::TrueDivide => { + if *r == 0.0 { + return None; + } + l / r + } + BinOp::FloorDivide => { + let (floordiv, _) = float_div_mod(*l, *r)?; + floordiv + } + BinOp::Remainder => return const_folding_safe_mod(left, right), + BinOp::Power => return const_folding_safe_power(left, right), + _ => return None, + }; + if matches!(op, BinOp::Power) && !result.is_finite() { + return None; + } + Some(ConstantData::Float { value: result }) + } + // Int op Float or Float op Int → Float + (ConstantData::Integer { value: l }, ConstantData::Float { value: r }) => { + let l_f = l.to_f64()?; + eval_const_binop( + &ConstantData::Float { value: l_f }, + &ConstantData::Float { value: *r }, + op, + ) + } + (ConstantData::Float { value: l }, ConstantData::Integer { value: r }) => { + let r_f = r.to_f64()?; + eval_const_binop( + &ConstantData::Float { value: *l }, + &ConstantData::Float { value: r_f }, + op, + ) + } + (ConstantData::Integer { value: l }, ConstantData::Complex { value: r }) => { + eval_const_complex_binop(Complex::new(l.to_f64()?, 0.0), *r, op) + } + (ConstantData::Complex { value: l }, ConstantData::Integer { value: r }) => { + eval_const_complex_binop(*l, Complex::new(r.to_f64()?, 0.0), op) + } + (ConstantData::Float { value: l }, ConstantData::Complex { value: r }) => { + eval_const_complex_binop(Complex::new(*l, 0.0), *r, op) + } + (ConstantData::Complex { value: l }, ConstantData::Float { value: r }) => { + eval_const_complex_binop(*l, Complex::new(*r, 0.0), op) + } + (ConstantData::Complex { value: l }, ConstantData::Complex { value: r }) => { + eval_const_complex_binop(*l, *r, op) + } + // String concatenation and repetition + (ConstantData::Str { value: l }, ConstantData::Str { value: r }) + if matches!(op, BinOp::Add) => + { + let mut result = Wtf8Buf::new(); + result + .try_reserve_exact(l.len().checked_add(r.len())?) + .ok()?; + result.push_wtf8(l); + result.push_wtf8(r); + Some(ConstantData::Str { value: result }) + } + (ConstantData::Str { .. }, ConstantData::Integer { .. }) + if matches!(op, BinOp::Multiply) => + { + const_folding_safe_multiply(left, right) + } + (ConstantData::Tuple { elements: l }, ConstantData::Tuple { elements: r }) + if matches!(op, BinOp::Add) => + { + let mut result = Vec::new(); + result + .try_reserve_exact(l.len().checked_add(r.len())?) + .ok()?; + result.extend(l.iter().cloned()); + result.extend(r.iter().cloned()); + Some(ConstantData::Tuple { elements: result }) + } + (ConstantData::Tuple { .. }, ConstantData::Integer { .. }) + if matches!(op, BinOp::Multiply) => + { + const_folding_safe_multiply(left, right) + } + (ConstantData::Integer { .. }, ConstantData::Tuple { .. }) + if matches!(op, BinOp::Multiply) => + { + const_folding_safe_multiply(left, right) + } + (ConstantData::Integer { .. }, ConstantData::Str { .. }) + if matches!(op, BinOp::Multiply) => + { + const_folding_safe_multiply(left, right) + } + (ConstantData::Bytes { value: l }, ConstantData::Bytes { value: r }) + if matches!(op, BinOp::Add) => + { + let mut result = Vec::new(); + result + .try_reserve_exact(l.len().checked_add(r.len())?) + .ok()?; + result.extend_from_slice(l); + result.extend_from_slice(r); + Some(ConstantData::Bytes { value: result }) + } + (ConstantData::Bytes { .. }, ConstantData::Integer { .. }) + if matches!(op, BinOp::Multiply) => + { + const_folding_safe_multiply(left, right) + } + (ConstantData::Integer { .. }, ConstantData::Bytes { .. }) + if matches!(op, BinOp::Multiply) => + { + const_folding_safe_multiply(left, right) + } + _ => None, + } +} + +/// flowgraph.c fold_tuple_of_constants +fn fold_tuple_of_constants( + metadata: &mut CodeUnitMetadata, + block: &mut Block, + i: usize, +) -> crate::InternalResult { + let Some(Opcode::BuildTuple) = block.instructions[i].instr.real_opcode() else { + return Ok(false); + }; - Ok(trace) + let tuple_size = u32::from(block.instructions[i].arg) as usize; + if tuple_size > STACK_USE_GUIDELINE { + return Ok(false); } -} -impl InstrDisplayContext for CodeInfo { - type Constant = ConstantData; + let Some(operand_indices) = (if tuple_size == 0 { + Some(Vec::new()) + } else if let Some(start) = i.checked_sub(1) { + get_const_loading_instrs(block, start, tuple_size)? + } else { + None + }) else { + return Ok(false); + }; - fn get_constant(&self, consti: oparg::ConstIdx) -> &ConstantData { - &self.metadata.consts[consti.as_usize()] + let mut elements = Vec::new(); + elements + .try_reserve_exact(tuple_size) + .map_err(|_| InternalError::MalformedControlFlowGraph)?; + for &j in &operand_indices { + let Some(element) = get_const_value(metadata, &block.instructions[j]) else { + return Ok(false); + }; + elements.push(element); } - fn get_name(&self, i: usize) -> &str { - self.metadata.names[i].as_ref() - } + nop_out(block, &operand_indices); + instr_make_load_const( + metadata, + &mut block.instructions[i], + ConstantData::Tuple { elements }, + )?; + Ok(true) +} - fn get_varname(&self, var_num: oparg::VarNum) -> &str { - self.metadata.varnames[var_num.as_usize()].as_ref() +fn fold_constant_intrinsic_list_to_tuple( + metadata: &mut CodeUnitMetadata, + block: &mut Block, + i: usize, +) -> crate::InternalResult { + let Some(Instruction::CallIntrinsic1 { func }) = block.instructions[i].instr.real() else { + return Ok(false); + }; + if func.get(block.instructions[i].arg) != IntrinsicFunction1::ListToTuple { + return Ok(false); } - fn get_localsplus_name(&self, var_num: oparg::VarNum) -> &str { - let idx = var_num.as_usize(); - let nlocals = self.metadata.varnames.len(); - if idx < nlocals { - self.metadata.varnames[idx].as_ref() + let mut consts_found = 0usize; + let mut expect_append = true; + let mut pos = i; + while let Some(prev) = pos.checked_sub(1) { + pos = prev; + let instr = &block.instructions[pos]; + if matches!(instr.instr.real(), Some(Instruction::Nop)) { + continue; + } + + if matches!(instr.instr.real(), Some(Instruction::BuildList { .. })) + && u32::from(instr.arg) == 0 + { + if !expect_append { + return Ok(false); + } + + let mut elements = Vec::new(); + elements + .try_reserve_exact(consts_found) + .map_err(|_| InternalError::MalformedControlFlowGraph)?; + for idx in (pos..i).rev() { + if matches!(block.instructions[idx].instr.real(), Some(Instruction::Nop)) { + continue; + } + if loads_const(&block.instructions[idx]) { + let Some(value) = get_const_value(metadata, &block.instructions[idx]) else { + return Ok(false); + }; + elements.push(value); + } + nop_out_no_location(&mut block.instructions[idx]); + } + debug_assert_eq!(elements.len(), consts_found); + elements.reverse(); + instr_make_load_const( + metadata, + &mut block.instructions[i], + ConstantData::Tuple { elements }, + )?; + return Ok(true); + } + + if expect_append { + if !matches!(instr.instr.real(), Some(Instruction::ListAppend { .. })) + || u32::from(instr.arg) != 1 + { + return Ok(false); + } } else { - let cell_idx = idx - nlocals; - self.metadata - .cellvars - .get_index(cell_idx) - .unwrap_or_else(|| &self.metadata.freevars[cell_idx - self.metadata.cellvars.len()]) - .as_ref() + if !loads_const(instr) { + return Ok(false); + } + consts_found += 1; } + expect_append = !expect_append; } + + Ok(false) } -const NOT_LOCAL: isize = -1; -const DUMMY_INSTR: isize = -1; +/// Port of CPython's flowgraph.c optimize_lists_and_sets(). +fn optimize_lists_and_sets( + metadata: &mut CodeUnitMetadata, + block: &mut Block, + i: usize, + nextop: Option, +) -> crate::InternalResult { + let Some(instr) = block.instructions[i].instr.real() else { + return Ok(false); + }; + let is_list = matches!(instr, Instruction::BuildList { .. }); + let is_set = matches!(instr, Instruction::BuildSet { .. }); + if !is_list && !is_set { + return Ok(false); + } -/// flowgraph.c make_super_instruction -fn make_super_instruction( - inst1: &mut InstructionInfo, - inst2: &mut InstructionInfo, - super_op: AnyInstruction, -) { - let line1 = instruction_lineno(inst1); - let line2 = instruction_lineno(inst2); - if line1 >= 0 && line2 >= 0 && line1 != line2 { - return; + let contains_or_iter = matches!( + nextop, + Some(Instruction::GetIter | Instruction::ContainsOp { .. }) + ); + let seq_size = u32::from(block.instructions[i].arg) as usize; + if seq_size > STACK_USE_GUIDELINE || (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter) { + return Ok(false); } - let arg1 = u32::from(inst1.arg); - let arg2 = u32::from(inst2.arg); - if arg1 >= 16 || arg2 >= 16 { - return; + + let Some(operand_indices) = (if seq_size == 0 { + Some(Vec::new()) + } else if let Some(start) = i.checked_sub(1) { + get_const_loading_instrs(block, start, seq_size)? + } else { + None + }) else { + if contains_or_iter && is_list { + let arg = block.instructions[i].arg; + instr_set_op1(&mut block.instructions[i], Opcode::BuildTuple.into(), arg); + return Ok(true); + } + return Ok(false); + }; + + let mut elements = Vec::new(); + elements + .try_reserve_exact(seq_size) + .map_err(|_| InternalError::MalformedControlFlowGraph)?; + for &j in &operand_indices { + let Some(element) = get_const_value(metadata, &block.instructions[j]) else { + return Ok(false); + }; + elements.push(element); } - instr_set_op1(inst1, super_op, OpArg::new((arg1 << 4) | arg2)); - set_to_nop(inst2); -} -/// flowgraph.c LoadFastInstrFlag -#[derive(Clone, Copy, Eq, PartialEq)] -#[repr(u8)] -enum LoadFastInstrFlag { - SupportKilled = 1, - StoredAsLocal = 2, - RefUnconsumed = 4, -} + let const_data = if is_list { + ConstantData::Tuple { elements } + } else { + ConstantData::Frozenset { elements } + }; + let const_idx = add_const(metadata, const_data)?; -/// flowgraph.c ref -#[derive(Clone, Copy)] -struct Ref { - instr: isize, - local: isize, -} + if !contains_or_iter { + debug_assert!(i >= 2); + let folded_loc = block.instructions[i].location; + let end_loc = block.instructions[i].end_location; -/// flowgraph.c ref_stack -struct RefStack { - refs: Vec, - size: usize, - capacity: usize, -} + nop_out(block, &operand_indices); -/// flowgraph.c ref_stack_push -fn ref_stack_push(stack: &mut RefStack, r: Ref) -> crate::InternalResult<()> { - debug_assert_eq!(stack.refs.len(), stack.capacity); - if stack.size == stack.capacity { - let doubled = stack.capacity * 2; - let new_cap = 32.max(doubled); - stack - .refs - .try_reserve_exact(new_cap - stack.capacity) - .map_err(|_| InternalError::MalformedControlFlowGraph)?; - stack.refs.resize(new_cap, Ref { instr: 0, local: 0 }); - stack.capacity = new_cap; + let build_instr = if is_list { + Opcode::BuildList + } else { + Opcode::BuildSet + } + .into(); + instr_set_op1(&mut block.instructions[i - 2], build_instr, OpArg::new(0)); + block.instructions[i - 2].location = folded_loc; + block.instructions[i - 2].end_location = end_loc; + block.instructions[i - 2].lineno_override = None; + + instr_set_op1( + &mut block.instructions[i - 1], + Opcode::LoadConst.into(), + OpArg::new(const_idx as u32), + ); + + let extend_instr = if is_list { + Opcode::ListExtend + } else { + Opcode::SetUpdate + }; + instr_set_op1( + &mut block.instructions[i], + extend_instr.into(), + OpArg::new(1), + ); + return Ok(true); } - stack.refs[stack.size] = r; - stack.size += 1; - Ok(()) -} -/// flowgraph.c ref_stack_pop -fn ref_stack_pop(stack: &mut RefStack) -> Ref { - assert!(stack.size > 0); - stack.size -= 1; - stack.refs[stack.size] -} + nop_out(block, &operand_indices); -/// flowgraph.c ref_stack_swap_top -fn ref_stack_swap_top(stack: &mut RefStack, off: usize) { - assert!(off >= 2 && stack.size >= off); - let top = stack.size - 1; - let other = stack.size - off; - stack.refs.swap(top, other); + instr_set_op1( + &mut block.instructions[i], + Opcode::LoadConst.into(), + OpArg::new(const_idx as u32), + ); + Ok(true) } -/// flowgraph.c ref_stack_at -fn ref_stack_at(stack: &RefStack, idx: usize) -> Ref { - assert!(idx < stack.size); - stack.refs[idx] -} +/// flowgraph.c VISITED +const VISITED: i32 = -1; -/// flowgraph.c ref_stack_clear -fn ref_stack_clear(stack: &mut RefStack) { - stack.size = 0; +/// flowgraph.c SWAPPABLE +fn is_swappable(instr: AnyInstruction) -> bool { + matches!( + instr.into(), + AnyOpcode::Real(Opcode::StoreFast | Opcode::PopTop) + | AnyOpcode::Pseudo(PseudoOpcode::StoreFastMaybeNull) + ) } -/// flowgraph.c optimize_load_fast PUSH_REF -fn push_ref(stack: &mut RefStack, instr: isize, local: isize) -> crate::InternalResult<()> { - ref_stack_push(stack, Ref { instr, local }) +/// flowgraph.c STORES_TO +fn stores_to(info: &InstructionInfo) -> i32 { + match info.instr.into() { + AnyOpcode::Real(Opcode::StoreFast) + | AnyOpcode::Pseudo(PseudoOpcode::StoreFastMaybeNull) => u32::from(info.arg) as i32, + _ => -1, + } } -/// flowgraph.c kill_local -fn kill_local(instr_flags: &mut [u8], refs: &RefStack, local: isize) { - for i in 0..refs.size { - let r = ref_stack_at(refs, i); - if r.local != local { +/// flowgraph.c next_swappable_instruction +fn next_swappable_instruction(block: &Block, mut i: usize, lineno: i32) -> Option { + loop { + i += 1; + if i >= block.instruction_used { + return None; + } + + let info = &block.instructions[i]; + let info_lineno = instruction_lineno(info); + + if lineno >= 0 && info_lineno != lineno { + return None; + } + + if matches!(info.instr, AnyInstruction::Real(Instruction::Nop)) { continue; } - debug_assert!(r.instr >= 0); - instr_flags[r.instr as usize] |= LoadFastInstrFlag::SupportKilled as u8; + + if is_swappable(info.instr) { + return Some(i); + } + + return None; } } -/// flowgraph.c store_local -fn store_local(instr_flags: &mut [u8], refs: &RefStack, local: isize, r: Ref) { - kill_local(instr_flags, refs, local); - if r.instr != DUMMY_INSTR { - instr_flags[r.instr as usize] |= LoadFastInstrFlag::StoredAsLocal as u8; +/// flowgraph.c swaptimize +fn swaptimize(block: &mut Block, ix: &mut usize) -> crate::InternalResult<()> { + debug_assert!(matches!( + block.instructions[*ix].instr.real_opcode(), + Some(Opcode::Swap) + )); + let mut depth = u32::from(block.instructions[*ix].arg) as usize; + let mut len = 1usize; + let mut more = false; + let limit = block.instruction_used - *ix; + while len < limit { + match block.instructions[*ix + len].instr.real_opcode() { + Some(Opcode::Swap) => { + depth = depth.max(u32::from(block.instructions[*ix + len].arg) as usize); + more = true; + len += 1; + } + Some(Opcode::Nop) => { + len += 1; + } + _ => break, + } } -} -fn local_as_ref_local(local: usize) -> isize { - local as isize -} + if !more { + return Ok(()); + } -/// flowgraph.c load_fast_push_block -fn load_fast_push_block( - worklist: &mut CfgTraversalStack, - blocks: &mut Blocks, - target: BlockIdx, - start_depth: usize, -) { - debug_assert!(target != BlockIdx::NULL); - debug_assert!(blocks[target].start_depth >= 0); - debug_assert_eq!(blocks[target].start_depth as usize, start_depth,); - if !blocks[target].visited { - blocks[target].visited = true; - worklist.push(target); + let mut stack = Vec::new(); + stack + .try_reserve_exact(depth) + .map_err(|_| InternalError::MalformedControlFlowGraph)?; + stack.resize(depth, 0); + let mut i = 0; + while i < depth { + stack[i] = i as i32; + i += 1; } -} -fn stackdepth_push( - stack: &mut CfgTraversalStack, - blocks: &mut Blocks, - target: BlockIdx, - depth: i32, -) -> crate::InternalResult<()> { - let idx = target.idx(); - let block_depth = &mut blocks[idx].start_depth; - if !(*block_depth < 0 || *block_depth == depth) { - return Err(InternalError::InconsistentStackDepth); + i = 0; + while i < len { + let info = &block.instructions[*ix + i]; + if matches!(info.instr.real_opcode(), Some(Opcode::Swap)) { + let oparg = u32::from(info.arg) as usize; + stack.swap(0, oparg - 1); + } + i += 1; } - if *block_depth < depth && *block_depth < 100 { - debug_assert!(*block_depth < 0); - *block_depth = depth; - stack.push(target); + + let mut current = len as isize - 1; + for i in 0..depth { + if stack[i] == VISITED || stack[i] == i as i32 { + continue; + } + let mut j = i; + loop { + if j != 0 { + debug_assert!(current >= 0); + let out = &mut block.instructions[*ix + current as usize]; + out.instr = Opcode::Swap.into(); + out.arg = OpArg::new((j + 1) as u32); + current -= 1; + } + if stack[j] == VISITED { + debug_assert_eq!(j, i); + break; + } + let next_j = stack[j] as usize; + stack[j] = VISITED; + j = next_j; + } + } + + while current >= 0 { + set_to_nop(&mut block.instructions[*ix + current as usize]); + current -= 1; } + *ix += len - 1; Ok(()) } -/// flowgraph.c stack_effects -#[derive(Clone, Copy, Eq, PartialEq)] -struct StackEffects { - net: i32, -} +/// flowgraph.c apply_static_swaps +fn apply_static_swaps(block: &mut Block, mut i: isize) { + while i >= 0 { + let idx = i as usize; + debug_assert!(idx < block.instruction_used); + let swap_arg = match block.instructions[idx].instr.real_opcode() { + Some(Opcode::Swap) => u32::from(block.instructions[idx].arg), + Some(Opcode::Nop | Opcode::PopTop | Opcode::StoreFast) => { + i -= 1; + continue; + } + _ if matches!( + block.instructions[idx].instr.pseudo_opcode(), + Some(PseudoOpcode::StoreFastMaybeNull) + ) => + { + i -= 1; + continue; + } + _ => return, + }; -/// flowgraph.c get_stack_effects -#[allow(clippy::unnecessary_wraps)] -fn get_stack_effects( - instr: AnyInstruction, - oparg: OpArg, - jump: i32, -) -> crate::InternalResult { - if instr - .real() - .is_some_and(|op| op.as_opcode().deopt().is_some()) - { - return Err(InternalError::InvalidStackEffect); - } - let oparg = u32::from(oparg); - let net = if instr.is_block_push() && jump == 0 { - 0 - } else if jump != 0 { - instr.stack_effect_jump(oparg) - } else { - instr.stack_effect(oparg) - }; - Ok(StackEffects { net }) -} + let Some(j) = next_swappable_instruction(block, idx, -1) else { + return; + }; + let lineno = instruction_lineno(&block.instructions[j]); + let mut k = j; + for _ in 1..swap_arg { + let Some(next) = next_swappable_instruction(block, k, lineno) else { + return; + }; + k = next; + } + + let store_j = stores_to(&block.instructions[j]); + let store_k = stores_to(&block.instructions[k]); + if store_j >= 0 || store_k >= 0 { + if store_j == store_k { + return; + } + let mut idx = j + 1; + while idx < k { + let store_idx = stores_to(&block.instructions[idx]); + if store_idx >= 0 && (store_idx == store_j || store_idx == store_k) { + return; + } + idx += 1; + } + } -fn vec_try_reserve_exact(vec: &mut Vec, additional: usize) -> crate::InternalResult<()> { - vec.try_reserve_exact(additional) - .map_err(|_| InternalError::MalformedControlFlowGraph) + set_to_nop(&mut block.instructions[idx]); + block.instructions.swap(j, k); + i -= 1; + } } -fn vec_try_resize_to_double_capacity(vec: &mut Vec) -> crate::InternalResult<()> { - let capacity = vec.capacity(); - debug_assert!(capacity > 0); - let len = capacity - .checked_mul(core::mem::size_of::()) - .ok_or(InternalError::MalformedControlFlowGraph)?; - if capacity == 0 || len > usize::MAX / 2 { - return Err(InternalError::MalformedControlFlowGraph); +/// flowgraph.c optimize_basic_block swap pass +fn apply_static_swaps_block(block: &mut Block) -> crate::InternalResult<()> { + let mut i = 0; + while i < block.instruction_used { + if matches!( + block.instructions[i].instr.real_opcode(), + Some(Opcode::Swap) + ) { + swaptimize(block, &mut i)?; + apply_static_swaps(block, i as isize); + } + i += 1; } - let new_capacity = capacity * 2; - let additional = new_capacity - .checked_sub(vec.len()) - .ok_or(InternalError::MalformedControlFlowGraph)?; - vec_try_reserve_exact(vec, additional) + Ok(()) } -/// assemble.c write_location_first_byte -fn write_location_first_byte(linetable: &mut Vec, code: u8, length: usize) { - linetable.extend(write_location_entry_start(code, length)); +/// flowgraph.c maybe_instr_make_load_smallint +fn maybe_instr_make_load_smallint(instr: &mut InstructionInfo, constant: &ConstantData) -> bool { + if let ConstantData::Integer { value } = constant + && let Some(small) = value.to_i32().filter(|v| (0..=255).contains(v)) + { + instr_set_op1(instr, Opcode::LoadSmallInt.into(), OpArg::new(small as u32)); + return true; + } + false } -/// pycore_code.h write_location_entry_start -fn write_location_entry_start(code: u8, length: usize) -> [u8; 1] { - debug_assert!(length > 0 && length <= 8); - debug_assert_eq!(code & 15, code); - [0x80 | (code << 3) | ((length - 1) as u8)] -} +/// flowgraph.c basicblock_optimize_load_const +fn basicblock_optimize_load_const( + metadata: &mut CodeUnitMetadata, + block: &mut Block, +) -> crate::InternalResult<()> { + let mut i = 0; + let mut effective_opcode = None; + let mut effective_oparg = OpArg::new(0); + while i < block.instruction_used { + if matches!( + block.instructions[i].instr.real(), + Some(Instruction::LoadConst { .. }) + ) && let Some(constant) = get_const_value(metadata, &block.instructions[i]) + { + maybe_instr_make_load_smallint(&mut block.instructions[i], &constant); + } -/// assemble.c write_location_byte -fn write_location_byte(linetable: &mut Vec, value: u8) { - linetable.push(value); -} + let curr = block.instructions[i]; + let curr_arg = curr.arg; -/// assemble.c write_location_varint -fn write_location_varint(linetable: &mut Vec, value: u32) { - write_varint(linetable, value); -} + // Only combine if the source is a real instruction. + let Some(curr_instr) = curr.instr.real() else { + i += 1; + continue; + }; -/// assemble.c write_location_signed_varint -fn write_location_signed_varint(linetable: &mut Vec, value: i32) { - write_signed_varint(linetable, value); -} + let is_copy_of_load_const = matches!( + (effective_opcode, curr_instr), + (Some(Instruction::LoadConst { .. }), Instruction::Copy { i }) if i.get(curr_arg) == 1 + ); + if !is_copy_of_load_const { + effective_opcode = Some(curr_instr); + effective_oparg = curr_arg; + } + let Some(const_instr) = effective_opcode else { + i += 1; + continue; + }; + let const_arg = effective_oparg; -/// assemble.c write_location_info_short_form -fn write_location_info_short_form( - linetable: &mut Vec, - length: usize, - column: i32, - end_column: i32, -) { - debug_assert!(length > 0 && length <= 8); - debug_assert!(column < 80); - debug_assert!(end_column >= column); - debug_assert!(end_column - column < 16); - let column_low_bits = column & 7; - let column_group = column >> 3; - let code = PyCodeLocationInfoKind::Short0 as u8 + column_group as u8; - write_location_first_byte(linetable, code, length); - write_location_byte( - linetable, - ((column_low_bits as u8) << 4) | ((end_column - column) as u8), - ); -} + if i + 1 >= block.instruction_used { + i += 1; + continue; + } -/// assemble.c write_location_info_oneline_form -fn write_location_info_oneline_form( - linetable: &mut Vec, - length: usize, - line_delta: i32, - column: i32, - end_column: i32, -) { - debug_assert!(length > 0 && length <= 8); - debug_assert!((0..3).contains(&line_delta)); - debug_assert!(column < 128); - debug_assert!(end_column < 128); - let code = PyCodeLocationInfoKind::OneLine0 as u8 + line_delta as u8; - write_location_first_byte(linetable, code, length); - write_location_byte(linetable, column as u8); - write_location_byte(linetable, end_column as u8); -} + let next = block.instructions[i + 1]; + let next_arg = next.arg; -/// assemble.c write_location_info_long_form -fn write_location_info_long_form( - linetable: &mut Vec, - loc: LineTableLocation, - length: usize, - line_delta: i32, -) { - debug_assert!(length > 0 && length <= 8); - write_location_first_byte(linetable, PyCodeLocationInfoKind::Long as u8, length); - write_location_signed_varint(linetable, line_delta); - debug_assert!(loc.end_line >= loc.line); - write_location_varint(linetable, (loc.end_line - loc.line) as u32); - write_location_varint( - linetable, - if loc.col < 0 { 0 } else { (loc.col as u32) + 1 }, - ); - write_location_varint( - linetable, - if loc.end_col < 0 { - 0 - } else { - (loc.end_col as u32) + 1 - }, - ); -} + if let Some(is_true) = load_const_truthiness(const_instr, const_arg, metadata) { + let const_jump = match (next.instr.real_opcode(), next.instr.pseudo_opcode()) { + (_, Some(PseudoOpcode::JumpIfTrue)) => Some((true, false)), + (_, Some(PseudoOpcode::JumpIfFalse)) => Some((false, false)), + (Some(Opcode::PopJumpIfTrue), _) => Some((true, true)), + (Some(Opcode::PopJumpIfFalse), _) => Some((false, true)), + _ => None, + }; + if let Some((jump_if_true, pops_condition)) = const_jump { + if pops_condition { + set_to_nop(&mut block.instructions[i]); + } + if is_true == jump_if_true { + block.instructions[i + 1].instr = PseudoOpcode::Jump.into(); + } else { + set_to_nop(&mut block.instructions[i + 1]); + } + i += 1; + continue; + } + } -/// assemble.c write_location_info_none -fn write_location_info_none(linetable: &mut Vec, length: usize) { - write_location_first_byte(linetable, PyCodeLocationInfoKind::None as u8, length); -} + // The remaining combinations require both instructions to be real. + let Some(next_instr) = next.instr.real() else { + i += 1; + continue; + }; -/// assemble.c write_location_info_no_column -fn write_location_info_no_column(linetable: &mut Vec, length: usize, line_delta: i32) { - write_location_first_byte(linetable, PyCodeLocationInfoKind::NoColumns as u8, length); - write_location_signed_varint(linetable, line_delta); -} + if let Instruction::LoadConst { consti } = const_instr { + let constant = &metadata.consts[consti.get(const_arg).as_usize()]; + if matches!(constant, ConstantData::None) + && let Instruction::IsOp { invert } = next_instr + { + let mut jump_idx = i + 2; + if jump_idx >= block.instruction_used { + i += 1; + continue; + } -/// assemble.c write_location_info_entry -fn write_location_info_entry( - linetable: &mut Vec, - loc: LineTableLocation, - length: usize, - prev_line: &mut i32, - debug_ranges: bool, -) -> crate::InternalResult<()> { - const THEORETICAL_MAX_ENTRY_SIZE: usize = 25; - if linetable - .len() - .checked_add(THEORETICAL_MAX_ENTRY_SIZE) - .ok_or(InternalError::MalformedControlFlowGraph)? - >= linetable.capacity() - { - debug_assert!(linetable.capacity() > THEORETICAL_MAX_ENTRY_SIZE); - vec_try_resize_to_double_capacity(linetable)?; - } - if loc.line == NO_LOCATION_OVERRIDE { - write_location_info_none(linetable, length); - return Ok(()); - } + if matches!( + block.instructions[jump_idx].instr.real(), + Some(Instruction::ToBool) + ) { + set_to_nop(&mut block.instructions[jump_idx]); + jump_idx += 1; + if jump_idx >= block.instruction_used { + i += 1; + continue; + } + } - let line_delta = loc.line - *prev_line; - let column = loc.col; - let end_column = loc.end_col; - if !debug_ranges - || ((column < 0 || end_column < 0) && (loc.end_line == loc.line || loc.end_line < 0)) - { - write_location_info_no_column(linetable, length, line_delta); - *prev_line = loc.line; - return Ok(()); - } + let Some(jump_instr) = block.instructions[jump_idx].instr.real() else { + i += 1; + continue; + }; - if loc.end_line == loc.line { - if line_delta == 0 && column < 80 && end_column - column < 16 && end_column >= column { - write_location_info_short_form(linetable, length, column, end_column); - return Ok(()); + let mut invert = matches!( + invert.get(next_arg), + rustpython_compiler_core::bytecode::Invert::Yes + ); + match jump_instr { + Instruction::PopJumpIfFalse { .. } => { + invert = !invert; + } + Instruction::PopJumpIfTrue { .. } => {} + _ => { + i += 1; + continue; + } + }; + + set_to_nop(&mut block.instructions[i]); + set_to_nop(&mut block.instructions[i + 1]); + block.instructions[jump_idx].instr = if invert { + Opcode::PopJumpIfNotNone + } else { + Opcode::PopJumpIfNone + } + .into(); + i = jump_idx; + continue; + } } - if (0..3).contains(&line_delta) && column < 128 && end_column < 128 { - write_location_info_oneline_form(linetable, length, line_delta, column, end_column); - *prev_line = loc.line; - return Ok(()); + + if matches!( + const_instr, + Instruction::LoadConst { .. } | Instruction::LoadSmallInt { .. } + ) && matches!(next_instr, Instruction::ToBool) + && let Some(value) = load_const_truthiness(const_instr, const_arg, metadata) + { + let const_idx = add_const(metadata, ConstantData::Boolean { value })?; + set_to_nop(&mut block.instructions[i]); + instr_set_op1( + &mut block.instructions[i + 1], + Opcode::LoadConst.into(), + OpArg::new(const_idx as u32), + ); + i += 1; + continue; } - } - write_location_info_long_form(linetable, loc, length, line_delta); - *prev_line = loc.line; + i += 1; + } Ok(()) } -/// assemble.c assemble_emit_location -fn assemble_emit_location( - linetable: &mut Vec, - loc: LineTableLocation, - mut size: usize, - prev_line: &mut i32, - debug_ranges: bool, +/// flowgraph.c optimize_load_const +fn optimize_load_const( + metadata: &mut CodeUnitMetadata, + blocks: &mut Blocks, ) -> crate::InternalResult<()> { - if size == 0 { - return Ok(()); - } - while size > 8 { - write_location_info_entry(linetable, loc, 8, prev_line, debug_ranges)?; - size -= 8; + let mut block_idx = BlockIdx(0); + while block_idx != BlockIdx::NULL { + let next_block = blocks[block_idx.idx()].next; + let block = &mut blocks[block_idx]; + basicblock_optimize_load_const(metadata, block)?; + block_idx = next_block; } - write_location_info_entry(linetable, loc, size, prev_line, debug_ranges) + Ok(()) } -fn no_linetable_location() -> LineTableLocation { - LineTableLocation { - line: NO_LOCATION_OVERRIDE, - end_line: NO_LOCATION_OVERRIDE, - col: NO_LOCATION_OVERRIDE, - end_col: NO_LOCATION_OVERRIDE, +#[cfg(test)] +impl CodeInfo { + fn debug_block_dump(&self) -> String { + let mut out = String::new(); + let mut block_idx = BlockIdx(0); + while block_idx != BlockIdx::NULL { + use core::fmt::Write; + let block = &self.blocks[block_idx.idx()]; + let block_return = if basicblock_returns(block) { + " return" + } else { + "" + }; + let _ = writeln!( + out, + "block {} next={} cold={} except={} preserve_lasti={} start_depth={}{}", + u32::from(block_idx), + if block.next == BlockIdx::NULL { + String::from("NULL") + } else { + u32::from(block.next).to_string() + }, + block.cold, + block.except_handler, + block.preserve_lasti, + if block.start_depth < 0 { + String::from("None") + } else { + block.start_depth.to_string() + }, + block_return, + ); + for info in &block.instructions[..block.instruction_used] { + let lineno = instruction_lineno(info); + let _ = writeln!( + out, + " [disp={}:{} raw={}:{}-{}:{} override={:?}] {:?} arg={} target={}", + lineno, + info.location.character_offset.get(), + info.location.line.get(), + info.location.character_offset.get(), + info.end_location.line.get(), + info.end_location.character_offset.get(), + info.lineno_override, + info.instr, + u32::from(info.arg), + if info.target == BlockIdx::NULL { + String::from("NULL") + } else { + u32::from(info.target).to_string() + } + ); + } + block_idx = block.next; + } + out } -} -fn next_linetable_location() -> LineTableLocation { - LineTableLocation { - line: NEXT_LOCATION_OVERRIDE, - end_line: NEXT_LOCATION_OVERRIDE, - col: NEXT_LOCATION_OVERRIDE, - end_col: NEXT_LOCATION_OVERRIDE, + pub(crate) fn debug_late_cfg_trace(mut self) -> crate::InternalResult> { + let mut trace = Vec::new(); + trace.push(("initial".to_owned(), self.debug_block_dump())); + + let instr_sequence = self.prepare_cfg_from_codegen()?; + self.blocks = cfg_from_instruction_sequence(instr_sequence)?; + trace.push(( + "after_cfg_from_instruction_sequence".to_owned(), + self.debug_block_dump(), + )); + translate_jump_labels_to_targets(&mut self.blocks)?; + self.blocks.mark_except_handlers()?; + label_exception_targets(&mut self.blocks)?; + self.blocks.check_cfg()?; + self.blocks.inline_small_or_no_lineno_blocks()?; + trace.push(( + "after_inline_small_or_no_lineno_blocks".to_owned(), + self.debug_block_dump(), + )); + self.blocks.remove_unreachable()?; + self.blocks + .resolve_line_numbers(self.metadata.firstlineno)?; + optimize_load_const(&mut self.metadata, &mut self.blocks)?; + trace.push(( + "after_optimize_load_const".to_owned(), + self.debug_block_dump(), + )); + let mut block_idx = BlockIdx(0); + while block_idx != BlockIdx::NULL { + let next_block = self.blocks[block_idx].next; + self.blocks + .optimize_basic_block(&mut self.metadata, block_idx)?; + block_idx = next_block; + } + trace.push(( + "after_optimize_basic_block".to_owned(), + self.debug_block_dump(), + )); + self.blocks.remove_redundant_nops_and_pairs()?; + self.blocks.remove_unreachable()?; + self.blocks.remove_redundant_nops_and_jumps()?; + + #[cfg(debug_assertions)] + assert!(self.blocks.no_redundant_jumps()); + + self.blocks + .remove_unused_consts(&mut self.metadata.consts)?; + trace.push(( + "after_optimize_cfg_cleanup".to_owned(), + self.debug_block_dump(), + )); + let nlocals = self.metadata.varnames.len(); + let nparams = self.nparams; + add_checks_for_loads_of_uninitialized_variables(&mut self.blocks, nlocals, nparams)?; + self.blocks.insert_superinstructions()?; + self.blocks.push_cold_blocks_to_end()?; + trace.push(( + "after_push_cold_before_chain_reorder".to_owned(), + self.debug_block_dump(), + )); + self.blocks + .resolve_line_numbers(self.metadata.firstlineno)?; + trace.push(( + "after_push_cold_resolve_line_numbers".to_owned(), + self.debug_block_dump(), + )); + + trace.push(( + "after_push_cold_blocks_to_end".to_owned(), + self.debug_block_dump(), + )); + + self.blocks.convert_pseudo_conditional_jumps()?; + trace.push(( + "after_convert_pseudo_conditional_jumps".to_owned(), + self.debug_block_dump(), + )); + + let _max_stackdepth = self.blocks.calculate_stackdepth()?; + let _nlocalsplus = prepare_localsplus(&self.metadata, &mut self.blocks, self.flags)?; + convert_pseudo_ops(&mut self.blocks)?; + trace.push(( + "after_convert_pseudo_ops".to_owned(), + self.debug_block_dump(), + )); + + self.blocks.normalize_jumps()?; + + #[cfg(debug_assertions)] + assert!(self.blocks.no_redundant_jumps()); + + trace.push(("after_normalize_jumps".to_owned(), self.debug_block_dump())); + self.blocks.optimize_load_fast()?; + trace.push(( + "after_optimize_load_fast".to_owned(), + self.debug_block_dump(), + )); + + Ok(trace) } } -/// assemble.c assemble_emit_exception_table_item -fn assemble_emit_exception_table_item(table: &mut Vec, value: i32, mut msb: u8) { - debug_assert!((msb | 128) == 128); - debug_assert!((0..(1 << 30)).contains(&value)); - let value = value as u32; - const CONTINUATION_BIT: u8 = 64; - if value >= 1 << 24 { - table.push(((value >> 24) as u8) | CONTINUATION_BIT | msb); - msb = 0; +impl InstrDisplayContext for CodeInfo { + type Constant = ConstantData; + + fn get_constant(&self, consti: oparg::ConstIdx) -> &ConstantData { + &self.metadata.consts[consti.as_usize()] } - if value >= 1 << 18 { - table.push((((value >> 18) & 0x3f) as u8) | CONTINUATION_BIT | msb); - msb = 0; + + fn get_name(&self, i: usize) -> &str { + self.metadata.names[i].as_ref() } - if value >= 1 << 12 { - table.push((((value >> 12) & 0x3f) as u8) | CONTINUATION_BIT | msb); - msb = 0; + + fn get_varname(&self, var_num: oparg::VarNum) -> &str { + self.metadata.varnames[var_num.as_usize()].as_ref() } - if value >= 1 << 6 { - table.push((((value >> 6) & 0x3f) as u8) | CONTINUATION_BIT | msb); - msb = 0; + + fn get_localsplus_name(&self, var_num: oparg::VarNum) -> &str { + let idx = var_num.as_usize(); + let nlocals = self.metadata.varnames.len(); + if idx < nlocals { + self.metadata.varnames[idx].as_ref() + } else { + let cell_idx = idx - nlocals; + self.metadata + .cellvars + .get_index(cell_idx) + .unwrap_or_else(|| &self.metadata.freevars[cell_idx - self.metadata.cellvars.len()]) + .as_ref() + } } - table.push(((value & 0x3f) as u8) | msb); } -/// assemble.c assemble_emit_exception_table_entry -fn assemble_emit_exception_table_entry( - table: &mut Vec, - start: i32, - end: i32, - handler_offset: i32, - handler: InstructionSequenceExceptHandlerInfo, -) -> crate::InternalResult<()> { - const MAX_SIZE_OF_ENTRY: usize = 20; - if table - .len() - .checked_add(MAX_SIZE_OF_ENTRY) - .ok_or(InternalError::MalformedControlFlowGraph)? - >= table.capacity() - { - vec_try_resize_to_double_capacity(table)?; +const NOT_LOCAL: isize = -1; +const DUMMY_INSTR: isize = -1; + +/// flowgraph.c make_super_instruction +fn make_super_instruction( + inst1: &mut InstructionInfo, + inst2: &mut InstructionInfo, + super_op: AnyInstruction, +) { + let line1 = instruction_lineno(inst1); + let line2 = instruction_lineno(inst2); + if line1 >= 0 && line2 >= 0 && line1 != line2 { + return; } - let size = end - start; - debug_assert!(end > start); - let target = handler_offset; - let mut depth = handler.start_depth - 1; - if handler.preserve_lasti > 0 { - depth -= 1; + let arg1 = u32::from(inst1.arg); + let arg2 = u32::from(inst2.arg); + if arg1 >= 16 || arg2 >= 16 { + return; } - debug_assert!(depth >= 0); - let depth_lasti = (depth << 1) | handler.preserve_lasti; - assemble_emit_exception_table_item(table, start, 1 << 7); - assemble_emit_exception_table_item(table, size, 0); - assemble_emit_exception_table_item(table, target, 0); - assemble_emit_exception_table_item(table, depth_lasti, 0); - Ok(()) + instr_set_op1(inst1, super_op, OpArg::new((arg1 << 4) | arg2)); + set_to_nop(inst2); } -/// assemble.c assemble_exception_table -fn assemble_exception_table( - instrs: &[InstructionSequenceEntry], -) -> crate::InternalResult> { - let mut table = Vec::new(); - vec_try_reserve_exact(&mut table, DEFAULT_LNOTAB_SIZE)?; - let mut handler = InstructionSequenceExceptHandlerInfo { - h_label: NO_EXCEPTION_HANDLER_LABEL, - start_depth: -1, - preserve_lasti: -1, - }; - let mut start = -1; - let mut ioffset = 0i32; +/// flowgraph.c LoadFastInstrFlag +#[derive(Clone, Copy, Eq, PartialEq)] +#[repr(u8)] +enum LoadFastInstrFlag { + SupportKilled = 1, + StoredAsLocal = 2, + RefUnconsumed = 4, +} - for i in 0..instrs.len() { - let instr = &instrs[i]; - if instr.except_handler.h_label != handler.h_label { - if handler.h_label >= 0 { - let handler_offset = instrs[handler.h_label as usize].i_offset; - assemble_emit_exception_table_entry( - &mut table, - start, - ioffset, - handler_offset, - handler, - )?; - } - start = ioffset; - handler = instr.except_handler; - } - ioffset += instr_size(&instr.info) as i32; - } +/// flowgraph.c ref +#[derive(Clone, Copy)] +struct Ref { + instr: isize, + local: isize, +} - if handler.h_label >= 0 { - let handler_offset = instrs[handler.h_label as usize].i_offset; - assemble_emit_exception_table_entry(&mut table, start, ioffset, handler_offset, handler)?; +/// flowgraph.c ref_stack +struct RefStack { + refs: Vec, + size: usize, + capacity: usize, +} + +/// flowgraph.c ref_stack_push +fn ref_stack_push(stack: &mut RefStack, r: Ref) -> crate::InternalResult<()> { + debug_assert_eq!(stack.refs.len(), stack.capacity); + if stack.size == stack.capacity { + let doubled = stack.capacity * 2; + let new_cap = 32.max(doubled); + stack + .refs + .try_reserve_exact(new_cap - stack.capacity) + .map_err(|_| InternalError::MalformedControlFlowGraph)?; + stack.refs.resize(new_cap, Ref { instr: 0, local: 0 }); + stack.capacity = new_cap; } + stack.refs[stack.size] = r; + stack.size += 1; + Ok(()) +} - Ok(table.into_boxed_slice()) +/// flowgraph.c ref_stack_pop +fn ref_stack_pop(stack: &mut RefStack) -> Ref { + assert!(stack.size > 0); + stack.size -= 1; + stack.refs[stack.size] } -/// Mark exception handler target blocks. -/// flowgraph.c mark_except_handlers -#[allow(clippy::unnecessary_wraps)] -pub(crate) fn mark_except_handlers(blocks: &mut Blocks) -> crate::InternalResult<()> { - #[cfg(debug_assertions)] - { - let mut block_idx = BlockIdx(0); - while block_idx != BlockIdx::NULL { - assert!(!blocks[block_idx].except_handler); - block_idx = blocks[block_idx].next; - } - } +/// flowgraph.c ref_stack_swap_top +fn ref_stack_swap_top(stack: &mut RefStack, off: usize) { + assert!(off >= 2 && stack.size >= off); + let top = stack.size - 1; + let other = stack.size - off; + stack.refs.swap(top, other); +} - let mut block_idx = BlockIdx(0); - while block_idx != BlockIdx::NULL { - let next = blocks[block_idx].next; - let instr_count = blocks[block_idx].instruction_used; - for i in 0..instr_count { - let instr = blocks[block_idx].instructions[i]; - if is_block_push(&instr) { - debug_assert!(instr.target != BlockIdx::NULL); - blocks[instr.target].except_handler = true; - } - } - block_idx = next; - } - Ok(()) +/// flowgraph.c ref_stack_at +fn ref_stack_at(stack: &RefStack, idx: usize) -> Ref { + assert!(idx < stack.size); + stack.refs[idx] } -/// flowgraph.c mark_cold (two-pass to match CPython). -/// -/// Phase 1 (mark_warm): propagate "warm" from entry via fall-through and -/// jump targets. CPython asserts while visiting warm blocks that they are not -/// exception handlers. -/// -/// Phase 2 (mark_cold): propagate "cold" from except_handler blocks via -/// forward edges. Blocks reached only via runtime exception dispatch are -/// marked cold and pushed to the end by push_cold_blocks_to_end. -/// -/// Blocks reached by neither phase remain `cold=false`. They are typically -/// empty unreachable placeholders left by remove_unreachable; they stay in -/// their original chain position (e.g. between entry and the post-try -/// continuation for a nested try/except whose inner_end was emptied by -/// optimize_cfg). This matches CPython's behavior and is necessary for -/// optimize_load_fast to terminate fall-through at those placeholders. -/// flowgraph.c mark_warm -fn mark_warm(blocks: &mut Blocks) -> crate::InternalResult<()> { - let mut stack = blocks.make_cfg_traversal_stack()?; - stack.push(BlockIdx(0)); - blocks[0].visited = true; - while let Some(block_idx) = stack.pop() { - let idx = block_idx.idx(); - debug_assert!(!blocks[idx].except_handler); - blocks[idx].warm = true; +/// flowgraph.c ref_stack_clear +fn ref_stack_clear(stack: &mut RefStack) { + stack.size = 0; +} - let next = blocks[idx].next; - if next != BlockIdx::NULL && bb_has_fallthrough(&blocks[idx]) && !blocks[next].visited { - stack.push(next); - blocks[next.idx()].visited = true; - } +/// flowgraph.c optimize_load_fast PUSH_REF +fn push_ref(stack: &mut RefStack, instr: isize, local: isize) -> crate::InternalResult<()> { + ref_stack_push(stack, Ref { instr, local }) +} - let instr_count = blocks[idx].instruction_used; - for i in 0..instr_count { - let instr = blocks[idx].instructions[i]; - if is_jump(&instr) { - let target = instr.target; - debug_assert!(target != BlockIdx::NULL); - if !blocks[target.idx()].visited { - stack.push(target); - blocks[target.idx()].visited = true; - } - } +/// flowgraph.c kill_local +fn kill_local(instr_flags: &mut [u8], refs: &RefStack, local: isize) { + for i in 0..refs.size { + let r = ref_stack_at(refs, i); + if r.local != local { + continue; } + debug_assert!(r.instr >= 0); + instr_flags[r.instr as usize] |= LoadFastInstrFlag::SupportKilled as u8; } - Ok(()) } -fn mark_cold(blocks: &mut Blocks) -> crate::InternalResult<()> { - let mut block_idx = BlockIdx(0); - while block_idx != BlockIdx::NULL { - let block = &mut blocks[block_idx]; - debug_assert!(!block.cold); - debug_assert!(!block.warm); - block_idx = block.next; +/// flowgraph.c store_local +fn store_local(instr_flags: &mut [u8], refs: &RefStack, local: isize, r: Ref) { + kill_local(instr_flags, refs, local); + if r.instr != DUMMY_INSTR { + instr_flags[r.instr as usize] |= LoadFastInstrFlag::StoredAsLocal as u8; } +} - mark_warm(blocks)?; - - let mut cold_stack = blocks.make_cfg_traversal_stack()?; - block_idx = BlockIdx(0); - while block_idx != BlockIdx::NULL { - let i = block_idx.idx(); - let next = blocks[i].next; - let block = &blocks[i]; - if block.except_handler { - debug_assert!(!block.warm); - cold_stack.push(block_idx); - blocks[i].visited = true; - } - block_idx = next; - } - while let Some(block_idx) = cold_stack.pop() { - let idx = block_idx.idx(); - blocks[idx].cold = true; - let next = blocks[idx].next; - if next != BlockIdx::NULL && bb_has_fallthrough(&blocks[idx]) { - let next_idx = next.idx(); - if !blocks[next_idx].warm && !blocks[next_idx].visited { - cold_stack.push(next); - blocks[next_idx].visited = true; - } - } +fn local_as_ref_local(local: usize) -> isize { + local as isize +} - let instr_count = blocks[idx].instruction_used; - for i in 0..instr_count { - let instr = blocks[idx].instructions[i]; - if is_jump(&instr) { - debug_assert_eq!(i, instr_count - 1); - let target = instr.target; - debug_assert!(target != BlockIdx::NULL); - if !blocks[target.idx()].warm && !blocks[target.idx()].visited { - cold_stack.push(target); - blocks[target.idx()].visited = true; - } - } - } +/// flowgraph.c load_fast_push_block +fn load_fast_push_block( + worklist: &mut CfgTraversalStack, + blocks: &mut Blocks, + target: BlockIdx, + start_depth: usize, +) { + debug_assert!(target != BlockIdx::NULL); + debug_assert!(blocks[target].start_depth >= 0); + debug_assert_eq!(blocks[target].start_depth as usize, start_depth,); + if !blocks[target].visited { + blocks[target].visited = true; + worklist.push(target); } - Ok(()) } -/// flowgraph.c push_cold_blocks_to_end -fn push_cold_blocks_to_end(blocks: &mut Blocks) -> crate::InternalResult<()> { - if blocks[0].next == BlockIdx::NULL { - return Ok(()); +fn stackdepth_push( + stack: &mut CfgTraversalStack, + blocks: &mut Blocks, + target: BlockIdx, + depth: i32, +) -> crate::InternalResult<()> { + let idx = target.idx(); + let block_depth = &mut blocks[idx].start_depth; + if !(*block_depth < 0 || *block_depth == depth) { + return Err(InternalError::InconsistentStackDepth); } - - mark_cold(blocks)?; - let mut next_label = get_max_label(blocks) + 1; - - // If a cold block falls through to a warm block, add an explicit jump - let mut block_idx = BlockIdx(0); - while block_idx != BlockIdx::NULL { - let next = blocks[block_idx].next; - if blocks[block_idx].cold - && bb_has_fallthrough(&blocks[block_idx]) - && next != BlockIdx::NULL - && blocks[next].warm - { - let explicit_jump = blocks_new_block(blocks)?; - if !is_label(blocks[next].cpython_label) { - blocks[next].cpython_label = InstructionSequenceLabel::from_index(next_label); - next_label += 1; - } - let jump_label = blocks[next].cpython_label; - debug_assert!(is_label(jump_label)); - basicblock_addop( - &mut blocks[explicit_jump], - InstructionInfo { - instr: PseudoOpcode::JumpNoInterrupt.into(), - arg: instruction_sequence_label_oparg(jump_label), - target: BlockIdx::NULL, - location: SourceLocation::default(), - end_location: SourceLocation::default(), - except_handler: None, - lineno_override: Some(NO_LOCATION_OVERRIDE), - }, - )?; - blocks[explicit_jump].cold = true; - blocks[explicit_jump].next = next; - blocks[explicit_jump].predecessors = 1; - blocks[block_idx].next = explicit_jump; - let target = blocks[explicit_jump].next; - let last = basicblock_last_instr_mut(&mut blocks[explicit_jump]) - .expect("missing explicit jump"); - last.target = target; - } - block_idx = blocks[block_idx].next; + if *block_depth < depth && *block_depth < 100 { + debug_assert!(*block_depth < 0); + *block_depth = depth; + stack.push(target); } + Ok(()) +} - assert!(!blocks[0].cold); - let mut cold_blocks: BlockIdx = BlockIdx::NULL; - let mut cold_blocks_tail: BlockIdx = BlockIdx::NULL; - let mut block_idx = BlockIdx(0); +/// flowgraph.c stack_effects +#[derive(Clone, Copy, Eq, PartialEq)] +struct StackEffects { + net: i32, +} - while blocks[block_idx].next != BlockIdx::NULL { - debug_assert!(!blocks[block_idx].cold); - while blocks[block_idx].next != BlockIdx::NULL && !blocks[blocks[block_idx].next].cold { - block_idx = blocks[block_idx].next; - } - if blocks[block_idx].next == BlockIdx::NULL { - break; - } +/// flowgraph.c get_stack_effects +#[allow(clippy::unnecessary_wraps)] +fn get_stack_effects( + instr: AnyInstruction, + oparg: OpArg, + jump: i32, +) -> crate::InternalResult { + if instr + .real() + .is_some_and(|op| op.as_opcode().deopt().is_some()) + { + return Err(InternalError::InvalidStackEffect); + } + let oparg = u32::from(oparg); + let net = if instr.is_block_push() && jump == 0 { + 0 + } else if jump != 0 { + instr.stack_effect_jump(oparg) + } else { + instr.stack_effect(oparg) + }; + Ok(StackEffects { net }) +} - debug_assert!(!blocks[block_idx].cold); - debug_assert!(blocks[blocks[block_idx].next].cold); +fn vec_try_reserve_exact(vec: &mut Vec, additional: usize) -> crate::InternalResult<()> { + vec.try_reserve_exact(additional) + .map_err(|_| InternalError::MalformedControlFlowGraph) +} - let mut block_end = blocks[block_idx].next; - while blocks[block_end].next != BlockIdx::NULL && blocks[blocks[block_end].next].cold { - block_end = blocks[block_end].next; - } +fn vec_try_resize_to_double_capacity(vec: &mut Vec) -> crate::InternalResult<()> { + let capacity = vec.capacity(); + debug_assert!(capacity > 0); + let len = capacity + .checked_mul(core::mem::size_of::()) + .ok_or(InternalError::MalformedControlFlowGraph)?; + if capacity == 0 || len > usize::MAX / 2 { + return Err(InternalError::MalformedControlFlowGraph); + } + let new_capacity = capacity * 2; + let additional = new_capacity + .checked_sub(vec.len()) + .ok_or(InternalError::MalformedControlFlowGraph)?; + vec_try_reserve_exact(vec, additional) +} - debug_assert!(blocks[block_end].cold); - debug_assert!( - blocks[block_end].next == BlockIdx::NULL || !blocks[blocks[block_end].next].cold - ); +/// assemble.c write_location_first_byte +fn write_location_first_byte(linetable: &mut Vec, code: u8, length: usize) { + linetable.extend(write_location_entry_start(code, length)); +} - if cold_blocks == BlockIdx::NULL { - cold_blocks = blocks[block_idx].next; - } else { - blocks[cold_blocks_tail].next = blocks[block_idx].next; - } +/// pycore_code.h write_location_entry_start +fn write_location_entry_start(code: u8, length: usize) -> [u8; 1] { + debug_assert!(length > 0 && length <= 8); + debug_assert_eq!(code & 15, code); + [0x80 | (code << 3) | ((length - 1) as u8)] +} - cold_blocks_tail = block_end; - blocks[block_idx].next = blocks[block_end].next; - blocks[block_end].next = BlockIdx::NULL; - } +/// assemble.c write_location_byte +fn write_location_byte(linetable: &mut Vec, value: u8) { + linetable.push(value); +} - debug_assert!(blocks[block_idx].next == BlockIdx::NULL); - blocks[block_idx].next = cold_blocks; +/// assemble.c write_location_varint +fn write_location_varint(linetable: &mut Vec, value: u32) { + write_varint(linetable, value); +} - if cold_blocks != BlockIdx::NULL { - remove_redundant_nops_and_jumps(blocks)?; - } - Ok(()) +/// assemble.c write_location_signed_varint +fn write_location_signed_varint(linetable: &mut Vec, value: i32) { + write_signed_varint(linetable, value); } -/// flowgraph.c check_cfg -fn check_cfg(blocks: &Blocks) -> crate::InternalResult<()> { - let mut block_idx = BlockIdx(0); - while block_idx != BlockIdx::NULL { - let block = &blocks[block_idx]; - for i in 0..block.instruction_used { - let opcode = block.instructions[i].instr; - debug_assert!(!opcode.is_assembler()); - if opcode.is_terminator() && i != block.instruction_used - 1 { - return Err(InternalError::MalformedControlFlowGraph); - } - } - block_idx = block.next; - } - Ok(()) +/// assemble.c write_location_info_short_form +fn write_location_info_short_form( + linetable: &mut Vec, + length: usize, + column: i32, + end_column: i32, +) { + debug_assert!(length > 0 && length <= 8); + debug_assert!(column < 80); + debug_assert!(end_column >= column); + debug_assert!(end_column - column < 16); + let column_low_bits = column & 7; + let column_group = column >> 3; + let code = PyCodeLocationInfoKind::Short0 as u8 + column_group as u8; + write_location_first_byte(linetable, code, length); + write_location_byte( + linetable, + ((column_low_bits as u8) << 4) | ((end_column - column) as u8), + ); } -/// flowgraph.c jump_thread -fn jump_thread( - blocks: &mut Blocks, - block_idx: BlockIdx, - instr_idx: usize, - target: &InstructionInfo, - opcode: AnyInstruction, -) -> crate::InternalResult { - let bi = block_idx.idx(); - debug_assert!(is_jump(&blocks[bi].instructions[instr_idx])); - debug_assert!(is_jump(target)); - debug_assert_eq!(instr_idx + 1, blocks[bi].instruction_used); - debug_assert!(target.target != BlockIdx::NULL); - if blocks[bi].instructions[instr_idx].target != target.target { - set_to_nop(&mut blocks[bi].instructions[instr_idx]); - basicblock_add_jump(blocks, block_idx, opcode, target.target, target)?; - return Ok(true); - } - Ok(false) +/// assemble.c write_location_info_oneline_form +fn write_location_info_oneline_form( + linetable: &mut Vec, + length: usize, + line_delta: i32, + column: i32, + end_column: i32, +) { + debug_assert!(length > 0 && length <= 8); + debug_assert!((0..3).contains(&line_delta)); + debug_assert!(column < 128); + debug_assert!(end_column < 128); + let code = PyCodeLocationInfoKind::OneLine0 as u8 + line_delta as u8; + write_location_first_byte(linetable, code, length); + write_location_byte(linetable, column as u8); + write_location_byte(linetable, end_column as u8); } -/// flowgraph.c basicblock_add_jump -fn basicblock_add_jump( - blocks: &mut Blocks, - block_idx: BlockIdx, - instr: AnyInstruction, - target: BlockIdx, - loc_source: &InstructionInfo, -) -> crate::InternalResult<()> { - let bi = block_idx.idx(); - let last = basicblock_last_instr(&blocks[bi]); - if last.is_some_and(is_jump) { - return Err(InternalError::MalformedControlFlowGraph); - } - debug_assert!(target != BlockIdx::NULL); - let label = blocks[target.idx()].cpython_label; - debug_assert!(is_label(label)); - let arg = instruction_sequence_label_oparg(label); - let block = &mut blocks[bi]; - basicblock_addop( - block, - InstructionInfo { - instr, - arg, - target: BlockIdx::NULL, - location: loc_source.location, - end_location: loc_source.end_location, - except_handler: None, - lineno_override: loc_source.lineno_override, +/// assemble.c write_location_info_long_form +fn write_location_info_long_form( + linetable: &mut Vec, + loc: LineTableLocation, + length: usize, + line_delta: i32, +) { + debug_assert!(length > 0 && length <= 8); + write_location_first_byte(linetable, PyCodeLocationInfoKind::Long as u8, length); + write_location_signed_varint(linetable, line_delta); + debug_assert!(loc.end_line >= loc.line); + write_location_varint(linetable, (loc.end_line - loc.line) as u32); + write_location_varint( + linetable, + if loc.col < 0 { 0 } else { (loc.col as u32) + 1 }, + ); + write_location_varint( + linetable, + if loc.end_col < 0 { + 0 + } else { + (loc.end_col as u32) + 1 }, - )?; - let last = basicblock_last_instr_mut(block).expect("missing jump"); - debug_assert!(match (last.instr, instr) { - (AnyInstruction::Real(last), AnyInstruction::Real(opcode)) => - last.as_opcode() == opcode.as_opcode(), - (AnyInstruction::Pseudo(last), AnyInstruction::Pseudo(opcode)) => - last.as_opcode() == opcode.as_opcode(), - _ => false, - }); - last.target = target; - Ok(()) + ); } -/// pycore_opcode_utils.h IS_CONDITIONAL_JUMP_OPCODE -fn is_conditional_jump_opcode(instr: AnyInstruction) -> bool { - matches!( - instr.real().map(Into::into), - Some( - Opcode::PopJumpIfFalse - | Opcode::PopJumpIfTrue - | Opcode::PopJumpIfNone - | Opcode::PopJumpIfNotNone - ) - ) +/// assemble.c write_location_info_none +fn write_location_info_none(linetable: &mut Vec, length: usize) { + write_location_first_byte(linetable, PyCodeLocationInfoKind::None as u8, length); } -/// flowgraph.c convert_pseudo_conditional_jumps -fn convert_pseudo_conditional_jumps(blocks: &mut Blocks) -> crate::InternalResult<()> { - let mut block_idx = BlockIdx(0); - while block_idx != BlockIdx::NULL { - let next = blocks[block_idx.idx()].next; - let block = &mut blocks[block_idx.idx()]; - let mut i = 0; - while i < block.instruction_used { - let instr = block.instructions[i]; - let opcode = instr.instr; - if matches!( - opcode.pseudo_opcode(), - Some(PseudoOpcode::JumpIfFalse | PseudoOpcode::JumpIfTrue) - ) { - debug_assert_eq!(i, block.instruction_used - 1); - block.instructions[i].instr = - if matches!(opcode.pseudo_opcode(), Some(PseudoOpcode::JumpIfFalse)) { - Opcode::PopJumpIfFalse - } else { - Opcode::PopJumpIfTrue - } - .into(); - - let location = instr.location; - let end_location = instr.end_location; - let except_handler = instr.except_handler; - let lineno_override = instr.lineno_override; - let copy = InstructionInfo { - instr: Opcode::Copy.into(), - arg: OpArg::new(1), - target: BlockIdx::NULL, - location, - end_location, - except_handler, - lineno_override, - }; - basicblock_insert_instruction(block, i, copy)?; - i += 1; +/// assemble.c write_location_info_no_column +fn write_location_info_no_column(linetable: &mut Vec, length: usize, line_delta: i32) { + write_location_first_byte(linetable, PyCodeLocationInfoKind::NoColumns as u8, length); + write_location_signed_varint(linetable, line_delta); +} - let to_bool = InstructionInfo { - instr: Opcode::ToBool.into(), - arg: OpArg::new(0), - target: BlockIdx::NULL, - location, - end_location, - except_handler, - lineno_override, - }; - basicblock_insert_instruction(block, i, to_bool)?; - i += 1; - } - i += 1; - } - block_idx = next; +/// assemble.c write_location_info_entry +fn write_location_info_entry( + linetable: &mut Vec, + loc: LineTableLocation, + length: usize, + prev_line: &mut i32, + debug_ranges: bool, +) -> crate::InternalResult<()> { + const THEORETICAL_MAX_ENTRY_SIZE: usize = 25; + if linetable + .len() + .checked_add(THEORETICAL_MAX_ENTRY_SIZE) + .ok_or(InternalError::MalformedControlFlowGraph)? + >= linetable.capacity() + { + debug_assert!(linetable.capacity() > THEORETICAL_MAX_ENTRY_SIZE); + vec_try_resize_to_double_capacity(linetable)?; } - Ok(()) -} - -/// flowgraph.c normalize_jumps_in_block -fn normalize_jumps_in_block(blocks: &mut Blocks, block_idx: BlockIdx) -> crate::InternalResult<()> { - let idx = block_idx.idx(); - let Some(last_ins) = basicblock_last_instr(&blocks[idx]).copied() else { - return Ok(()); - }; - if !is_conditional_jump_opcode(last_ins.instr) { + if loc.line == NO_LOCATION_OVERRIDE { + write_location_info_none(linetable, length); return Ok(()); } - debug_assert!(!last_ins.instr.is_assembler()); - - debug_assert!(last_ins.target != BlockIdx::NULL); - let is_forward = !blocks[last_ins.target.idx()].visited; - if is_forward { - // Insert NOT_TAKEN after forward conditional jump. - let not_taken = InstructionInfo { - instr: Opcode::NotTaken.into(), - arg: OpArg::new(0), - target: BlockIdx::NULL, - location: last_ins.location, - end_location: last_ins.end_location, - except_handler: None, - lineno_override: last_ins.lineno_override, - }; - basicblock_addop(&mut blocks[idx], not_taken)?; + let line_delta = loc.line - *prev_line; + let column = loc.col; + let end_column = loc.end_col; + if !debug_ranges + || ((column < 0 || end_column < 0) && (loc.end_line == loc.line || loc.end_line < 0)) + { + write_location_info_no_column(linetable, length, line_delta); + *prev_line = loc.line; return Ok(()); } - let reversed_opcode = match last_ins.instr.real_opcode() { - Some(Opcode::PopJumpIfNotNone) => Opcode::PopJumpIfNone.into(), - Some(Opcode::PopJumpIfNone) => Opcode::PopJumpIfNotNone.into(), - Some(Opcode::PopJumpIfFalse) => Opcode::PopJumpIfTrue.into(), - Some(Opcode::PopJumpIfTrue) => Opcode::PopJumpIfFalse.into(), - _ => unreachable!("conditional jump has reverse opcode"), - }; - - // Transform 'conditional jump T' to 'reversed_jump b_next' followed by - // 'jump_backwards T'. - let loc = last_ins.location; - let end_loc = last_ins.end_location; - - let target = last_ins.target; - let backwards_jump_idx = blocks_new_block(blocks)?; - basicblock_addop( - &mut blocks[backwards_jump_idx.idx()], - InstructionInfo { - instr: Opcode::NotTaken.into(), - arg: OpArg::new(0), - target: BlockIdx::NULL, - location: loc, - end_location: end_loc, - except_handler: None, - lineno_override: last_ins.lineno_override, - }, - )?; - basicblock_add_jump( - blocks, - backwards_jump_idx, - PseudoOpcode::Jump.into(), - target, - &last_ins, - )?; - blocks[backwards_jump_idx.idx()].start_depth = blocks[target.idx()].start_depth; - - let old_next = blocks[idx].next; - debug_assert!(old_next != BlockIdx::NULL); - - let last_mut = basicblock_last_instr_mut(&mut blocks[idx]).unwrap(); - last_mut.instr = reversed_opcode; - last_mut.target = old_next; + if loc.end_line == loc.line { + if line_delta == 0 && column < 80 && end_column - column < 16 && end_column >= column { + write_location_info_short_form(linetable, length, column, end_column); + return Ok(()); + } + if (0..3).contains(&line_delta) && column < 128 && end_column < 128 { + write_location_info_oneline_form(linetable, length, line_delta, column, end_column); + *prev_line = loc.line; + return Ok(()); + } + } - blocks[backwards_jump_idx.idx()].cold = blocks[idx].cold; - blocks[backwards_jump_idx.idx()].next = old_next; - blocks[idx].next = backwards_jump_idx; + write_location_info_long_form(linetable, loc, length, line_delta); + *prev_line = loc.line; Ok(()) } -/// flowgraph.c basicblock_inline_small_or_no_lineno_blocks -fn basicblock_inline_small_or_no_lineno_blocks( - blocks: &mut Blocks, - block_idx: BlockIdx, -) -> crate::InternalResult { - let Some(last) = basicblock_last_instr(&blocks[block_idx]).copied() else { - return Ok(false); - }; - if !last.instr.is_unconditional_jump() { - return Ok(false); +/// assemble.c assemble_emit_location +fn assemble_emit_location( + linetable: &mut Vec, + loc: LineTableLocation, + mut size: usize, + prev_line: &mut i32, + debug_ranges: bool, +) -> crate::InternalResult<()> { + if size == 0 { + return Ok(()); } - - let target = last.target; - debug_assert!(target != BlockIdx::NULL); - let small_exit_block = - basicblock_exits_scope(&blocks[target]) && blocks[target].instruction_used <= MAX_COPY_SIZE; - let no_lineno_no_fallthrough = - basicblock_has_no_lineno(&blocks[target]) && !bb_has_fallthrough(&blocks[target]); - if small_exit_block || no_lineno_no_fallthrough { - debug_assert!(is_jump(&last)); - let removed_jump_opcode = last.instr; - let last = basicblock_last_instr_mut(&mut blocks[block_idx]) - .expect("non-empty block has last instruction"); - set_to_nop(last); - blocks.basicblock_append_block_instructions(block_idx, target)?; - if no_lineno_no_fallthrough { - let last = basicblock_last_instr_mut(&mut blocks[block_idx]).unwrap(); - if last.instr.is_unconditional_jump() - && matches!( - removed_jump_opcode.into(), - AnyOpcode::Pseudo(PseudoOpcode::Jump) - ) - { - last.instr = PseudoOpcode::Jump.into(); - } - } - blocks[target].predecessors -= 1; - return Ok(true); + while size > 8 { + write_location_info_entry(linetable, loc, 8, prev_line, debug_ranges)?; + size -= 8; } - Ok(false) + write_location_info_entry(linetable, loc, size, prev_line, debug_ranges) } -/// flowgraph.c inline_small_or_no_lineno_blocks -fn inline_small_or_no_lineno_blocks(blocks: &mut Blocks) -> crate::InternalResult { - loop { - let mut changes = false; - let mut current = BlockIdx(0); - while current != BlockIdx::NULL { - let next = blocks[current.idx()].next; - let res = basicblock_inline_small_or_no_lineno_blocks(blocks, current)?; - if res { - changes = true; - } - - current = next; - } - if !changes { - return Ok(changes); - } +fn no_linetable_location() -> LineTableLocation { + LineTableLocation { + line: NO_LOCATION_OVERRIDE, + end_line: NO_LOCATION_OVERRIDE, + col: NO_LOCATION_OVERRIDE, + end_col: NO_LOCATION_OVERRIDE, } } -/// flowgraph.c basicblock_remove_redundant_nops -#[allow(clippy::unnecessary_wraps)] -fn basicblock_remove_redundant_nops( - blocks: &mut Blocks, - block_idx: BlockIdx, -) -> crate::InternalResult { - let bi = block_idx.idx(); - let mut dest = 0; - let mut prev_lineno = -1i32; - let instr_count = blocks[bi].instruction_used; - - for src in 0..instr_count { - let instr = blocks[bi].instructions[src]; - let lineno = instruction_lineno(&instr); - - if matches!(instr.instr.real(), Some(Instruction::Nop)) { - if lineno < 0 { - continue; - } - if prev_lineno == lineno { - continue; - } - if src < instr_count - 1 { - let next_lineno = instruction_lineno(&blocks[bi].instructions[src + 1]); - if next_lineno == lineno { - continue; - } - if next_lineno < 0 { - instr_set_loc( - &mut blocks[bi].instructions[src + 1], - instr.location, - instr.end_location, - instr.lineno_override, - ); - continue; - } - } else { - let next = next_nonempty_block(blocks, blocks[bi].next); - if next != BlockIdx::NULL { - let mut next_loc = no_linetable_location(); - let mut next_i = 0; - while next_i < blocks[next.idx()].instruction_used { - let instr = blocks[next.idx()].instructions[next_i]; - if matches!(instr.instr.real(), Some(Instruction::Nop)) - && instruction_lineno(&instr) < 0 - { - next_i += 1; - continue; - } - next_loc = instruction_linetable_location(&instr); - break; - } - if lineno == next_loc.line { - continue; - } - } - } - } - - if dest != src { - blocks[bi].instructions[dest] = blocks[bi].instructions[src]; - } - dest += 1; - prev_lineno = lineno; +fn next_linetable_location() -> LineTableLocation { + LineTableLocation { + line: NEXT_LOCATION_OVERRIDE, + end_line: NEXT_LOCATION_OVERRIDE, + col: NEXT_LOCATION_OVERRIDE, + end_col: NEXT_LOCATION_OVERRIDE, } - - debug_assert!(dest <= instr_count); - let num_removed = instr_count - dest; - blocks[bi].instruction_used = dest; - Ok(num_removed) } -/// flowgraph.c remove_redundant_nops -#[allow(clippy::unnecessary_wraps)] -fn remove_redundant_nops(blocks: &mut Blocks) -> crate::InternalResult { - let mut changes = 0; - let mut current = BlockIdx(0); - while current != BlockIdx::NULL { - let next = blocks[current.idx()].next; - let change = basicblock_remove_redundant_nops(blocks, current)?; - changes += change; - current = next; +/// assemble.c assemble_emit_exception_table_item +fn assemble_emit_exception_table_item(table: &mut Vec, value: i32, mut msb: u8) { + debug_assert!((msb | 128) == 128); + debug_assert!((0..(1 << 30)).contains(&value)); + let value = value as u32; + const CONTINUATION_BIT: u8 = 64; + if value >= 1 << 24 { + table.push(((value >> 24) as u8) | CONTINUATION_BIT | msb); + msb = 0; } - Ok(changes) -} - -/// flowgraph.c no_redundant_nops -#[cfg(debug_assertions)] -fn no_redundant_nops(blocks: &mut Blocks) -> bool { - matches!(remove_redundant_nops(blocks), Ok(0)) + if value >= 1 << 18 { + table.push((((value >> 18) & 0x3f) as u8) | CONTINUATION_BIT | msb); + msb = 0; + } + if value >= 1 << 12 { + table.push((((value >> 12) & 0x3f) as u8) | CONTINUATION_BIT | msb); + msb = 0; + } + if value >= 1 << 6 { + table.push((((value >> 6) & 0x3f) as u8) | CONTINUATION_BIT | msb); + msb = 0; + } + table.push(((value & 0x3f) as u8) | msb); } -/// flowgraph.c remove_redundant_jumps -fn remove_redundant_jumps(blocks: &mut Blocks) -> crate::InternalResult { - let mut changes = 0; - let mut current = BlockIdx(0); - while current != BlockIdx::NULL { - let block_idx = current.idx(); - let Some(last) = basicblock_last_instr(&blocks[block_idx]).copied() else { - current = blocks[block_idx].next; - continue; - }; - debug_assert!(!last.instr.is_assembler()); - if last.instr.is_unconditional_jump() { - let jump_target = next_nonempty_block(blocks, last.target); - if jump_target == BlockIdx::NULL { - return Err(InternalError::MalformedControlFlowGraph); - } - let next = next_nonempty_block(blocks, blocks[block_idx].next); - if jump_target == next { - changes += 1; - let last = basicblock_last_instr_mut(&mut blocks[block_idx]).unwrap(); - set_to_nop(last); - } - } - current = blocks[block_idx].next; +/// assemble.c assemble_emit_exception_table_entry +fn assemble_emit_exception_table_entry( + table: &mut Vec, + start: i32, + end: i32, + handler_offset: i32, + handler: InstructionSequenceExceptHandlerInfo, +) -> crate::InternalResult<()> { + const MAX_SIZE_OF_ENTRY: usize = 20; + if table + .len() + .checked_add(MAX_SIZE_OF_ENTRY) + .ok_or(InternalError::MalformedControlFlowGraph)? + >= table.capacity() + { + vec_try_resize_to_double_capacity(table)?; + } + let size = end - start; + debug_assert!(end > start); + let target = handler_offset; + let mut depth = handler.start_depth - 1; + if handler.preserve_lasti > 0 { + depth -= 1; } - Ok(changes) + debug_assert!(depth >= 0); + let depth_lasti = (depth << 1) | handler.preserve_lasti; + assemble_emit_exception_table_item(table, start, 1 << 7); + assemble_emit_exception_table_item(table, size, 0); + assemble_emit_exception_table_item(table, target, 0); + assemble_emit_exception_table_item(table, depth_lasti, 0); + Ok(()) } -/// flowgraph.c no_redundant_jumps -#[cfg(debug_assertions)] -fn no_redundant_jumps(blocks: &Blocks) -> bool { - let mut current = BlockIdx(0); - while current != BlockIdx::NULL { - let block = &blocks[current.idx()]; - if let Some(last) = basicblock_last_instr(block) - && last.instr.is_unconditional_jump() - { - let next = next_nonempty_block(blocks, block.next); - let jump_target = next_nonempty_block(blocks, last.target); - if jump_target == next { - assert!(next != BlockIdx::NULL); - if instruction_lineno(last) - == instruction_lineno(&blocks[next.idx()].instructions[0]) - { - assert_ne!( - instruction_lineno(last), - instruction_lineno(&blocks[next.idx()].instructions[0]), - "redundant jump has same line as fallthrough target" - ); - return false; - } +/// assemble.c assemble_exception_table +fn assemble_exception_table( + instrs: &[InstructionSequenceEntry], +) -> crate::InternalResult> { + let mut table = Vec::new(); + vec_try_reserve_exact(&mut table, DEFAULT_LNOTAB_SIZE)?; + let mut handler = InstructionSequenceExceptHandlerInfo { + h_label: NO_EXCEPTION_HANDLER_LABEL, + start_depth: -1, + preserve_lasti: -1, + }; + let mut start = -1; + let mut ioffset = 0i32; + + for i in 0..instrs.len() { + let instr = &instrs[i]; + if instr.except_handler.h_label != handler.h_label { + if handler.h_label >= 0 { + let handler_offset = instrs[handler.h_label as usize].i_offset; + assemble_emit_exception_table_entry( + &mut table, + start, + ioffset, + handler_offset, + handler, + )?; } + start = ioffset; + handler = instr.except_handler; } - current = block.next; + ioffset += instr_size(&instr.info) as i32; } - true -} -fn remove_redundant_nops_and_jumps(blocks: &mut Blocks) -> crate::InternalResult<()> { - loop { - // Convergence is guaranteed because the number of redundant jumps and - // nops only decreases. - let removed_nops = remove_redundant_nops(blocks)?; - let removed_jumps = remove_redundant_jumps(blocks)?; - if removed_nops + removed_jumps == 0 { - break; - } + if handler.h_label >= 0 { + let handler_offset = instrs[handler.h_label as usize].i_offset; + assemble_emit_exception_table_entry(&mut table, start, ioffset, handler_offset, handler)?; } - Ok(()) + + Ok(table.into_boxed_slice()) } -fn blocks_new_block(blocks: &mut Blocks) -> crate::InternalResult { - blocks - .try_reserve(1) - .map_err(|_| InternalError::MalformedControlFlowGraph)?; - let block_idx = BlockIdx( - blocks - .len() - .to_u32() - .ok_or(InternalError::MalformedControlFlowGraph)?, - ); - blocks.push(Block::default()); - Ok(block_idx) +/// pycore_opcode_utils.h IS_CONDITIONAL_JUMP_OPCODE +fn is_conditional_jump_opcode(instr: AnyInstruction) -> bool { + matches!( + instr.real().map(Into::into), + Some( + Opcode::PopJumpIfFalse + | Opcode::PopJumpIfTrue + | Opcode::PopJumpIfNone + | Opcode::PopJumpIfNotNone + ) + ) } /// flowgraph.c struct _PyCfgBuilder @@ -6022,7 +6024,7 @@ struct CfgBuilder { /// flowgraph.c cfg_builder_new_block fn cfg_builder_new_block(g: &mut CfgBuilder) -> crate::InternalResult { - let block = blocks_new_block(&mut g.blocks)?; + let block = g.blocks.blocks_new_block()?; g.blocks[block.idx()].allocation_next = g.block_list; g.blocks[block.idx()].cpython_label = InstructionSequenceLabel::NO_LABEL; g.block_list = block; @@ -6760,7 +6762,7 @@ pub(crate) fn convert_pseudo_ops(blocks: &mut Blocks) -> crate::InternalResult<( } // CPython flowgraph.c::convert_pseudo_ops() finishes by calling // remove_redundant_nops_and_jumps(). - remove_redundant_nops_and_jumps(blocks) + blocks.remove_redundant_nops_and_jumps() } /// flowgraph.c build_cellfixedoffsets