Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Implement side-table instrumented opcode execution
Add CoMonitoringData with line_opcodes and per_instruction_opcodes
side-tables. INSTRUMENTED_LINE and INSTRUMENTED_INSTRUCTION read
original opcodes from side-tables and re-dispatch after firing events.

- Add decode_exception_table() and CodeUnits::replace_op()
- Add Instruction::to_instrumented/to_base/is_instrumented mappings
- Three-phase instrument_code: de-instrument, re-instrument regular,
  then layer INSTRUCTION and LINE with side-table storage
- Mark exception handler targets as line starts in LINE placement
- InstrumentedLine resolves side-table chain atomically when wrapping
  InstrumentedInstruction
- InstrumentedForIter fires both BRANCH_LEFT and BRANCH_RIGHT
- Remove callback on DISABLE return for non-local events
  • Loading branch information
youknowone committed Feb 27, 2026
commit 9c329bd62f734269c1f564cf503ad626731f2300
45 changes: 45 additions & 0 deletions crates/compiler-core/src/bytecode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,34 @@ pub fn find_exception_handler(table: &[u8], offset: u32) -> Option<ExceptionTabl
None
}

/// Decode all exception table entries.
pub fn decode_exception_table(table: &[u8]) -> Vec<ExceptionTableEntry> {
let mut entries = Vec::new();
let mut pos = 0;
while pos < table.len() {
let Some(start) = read_varint_with_start(table, &mut pos) else {
break;
};
let Some(size) = read_varint(table, &mut pos) else {
break;
};
let Some(target) = read_varint(table, &mut pos) else {
break;
};
let Some(depth_lasti) = read_varint(table, &mut pos) else {
break;
};
entries.push(ExceptionTableEntry {
start,
end: start + size,
target,
depth: (depth_lasti >> 1) as u16,
push_lasti: (depth_lasti & 1) != 0,
});
}
entries
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

/// CPython 3.11+ linetable location info codes
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[repr(u8)]
Expand Down Expand Up @@ -370,6 +398,23 @@ impl Deref for CodeUnits {
}
}

impl CodeUnits {
/// Replace the opcode at `index` in-place without changing the arg byte.
///
/// # Safety
/// Caller must ensure `index` is in bounds and `new_op` has the same
/// arg semantics as the original opcode.
pub unsafe fn replace_op(&self, index: usize, new_op: Instruction) {
unsafe {
let ptr = self.0.as_ptr() as *mut CodeUnit;
let unit_ptr = ptr.add(index);
// Write only the opcode byte (first byte of CodeUnit due to #[repr(C)])
let op_ptr = unit_ptr as *mut u8;
core::ptr::write_volatile(op_ptr, new_op.into());
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

/// A Constant (which usually encapsulates data within it)
///
/// # Examples
Expand Down
94 changes: 94 additions & 0 deletions crates/compiler-core/src/bytecode/instruction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,100 @@ impl TryFrom<u8> for Instruction {
}
}

impl Instruction {
/// Returns `true` if this is any instrumented opcode
/// (regular INSTRUMENTED_*, INSTRUMENTED_LINE, or INSTRUMENTED_INSTRUCTION).
pub fn is_instrumented(self) -> bool {
self.to_base().is_some()
|| matches!(self, Self::InstrumentedLine | Self::InstrumentedInstruction)
}

/// Map a base opcode to its INSTRUMENTED_* variant.
/// Returns `None` if this opcode has no instrumented counterpart.
///
/// # Panics (debug)
/// Panics if called on an already-instrumented opcode.
pub fn to_instrumented(self) -> Option<Self> {
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to_instrumented and to_based transform into each others in the same type.

debug_assert!(
self.to_base().is_none(),
"to_instrumented called on already-instrumented opcode {self:?}"
);
Some(match self {
Self::Resume { .. } => Self::InstrumentedResume,
Self::ReturnValue => Self::InstrumentedReturnValue,
Self::YieldValue { .. } => Self::InstrumentedYieldValue,
Self::Call { .. } => Self::InstrumentedCall,
Self::CallKw { .. } => Self::InstrumentedCallKw,
Self::CallFunctionEx => Self::InstrumentedCallFunctionEx,
Self::LoadSuperAttr { .. } => Self::InstrumentedLoadSuperAttr,
Self::JumpForward { .. } => Self::InstrumentedJumpForward,
Self::JumpBackward { .. } => Self::InstrumentedJumpBackward,
Self::ForIter { .. } => Self::InstrumentedForIter,
Self::EndFor => Self::InstrumentedEndFor,
Self::EndSend => Self::InstrumentedEndSend,
Self::PopJumpIfTrue { .. } => Self::InstrumentedPopJumpIfTrue,
Self::PopJumpIfFalse { .. } => Self::InstrumentedPopJumpIfFalse,
Self::PopJumpIfNone { .. } => Self::InstrumentedPopJumpIfNone,
Self::PopJumpIfNotNone { .. } => Self::InstrumentedPopJumpIfNotNone,
Self::NotTaken => Self::InstrumentedNotTaken,
Self::PopIter => Self::InstrumentedPopIter,
Self::EndAsyncFor => Self::InstrumentedEndAsyncFor,
_ => return None,
})
}

/// Map an INSTRUMENTED_* opcode back to its base variant.
/// Returns `None` if this is not an instrumented opcode.
///
/// The returned base opcode uses `Arg::marker()` for typed fields —
/// only the opcode byte matters since `replace_op` preserves the arg byte.
///
/// # Panics (debug)
/// Panics if called on a base opcode that has an instrumented counterpart.
pub fn to_base(self) -> Option<Self> {
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Some(match self {
Self::InstrumentedResume => Self::Resume { arg: Arg::marker() },
Self::InstrumentedReturnValue => Self::ReturnValue,
Self::InstrumentedYieldValue => Self::YieldValue { arg: Arg::marker() },
Self::InstrumentedCall => Self::Call {
nargs: Arg::marker(),
},
Self::InstrumentedCallKw => Self::CallKw {
nargs: Arg::marker(),
},
Self::InstrumentedCallFunctionEx => Self::CallFunctionEx,
Self::InstrumentedLoadSuperAttr => Self::LoadSuperAttr { arg: Arg::marker() },
Self::InstrumentedJumpForward => Self::JumpForward {
target: Arg::marker(),
},
Self::InstrumentedJumpBackward => Self::JumpBackward {
target: Arg::marker(),
},
Self::InstrumentedForIter => Self::ForIter {
target: Arg::marker(),
},
Self::InstrumentedEndFor => Self::EndFor,
Self::InstrumentedEndSend => Self::EndSend,
Self::InstrumentedPopJumpIfTrue => Self::PopJumpIfTrue {
target: Arg::marker(),
},
Self::InstrumentedPopJumpIfFalse => Self::PopJumpIfFalse {
target: Arg::marker(),
},
Self::InstrumentedPopJumpIfNone => Self::PopJumpIfNone {
target: Arg::marker(),
},
Self::InstrumentedPopJumpIfNotNone => Self::PopJumpIfNotNone {
target: Arg::marker(),
},
Self::InstrumentedNotTaken => Self::NotTaken,
Self::InstrumentedPopIter => Self::PopIter,
Self::InstrumentedEndAsyncFor => Self::EndAsyncFor,
_ => return None,
})
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

impl InstructionMetadata for Instruction {
#[inline]
fn label_arg(&self) -> Option<Arg<Label>> {
Expand Down
22 changes: 21 additions & 1 deletion crates/vm/src/builtins/code.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Infamous code object. The python class `code`

use super::{PyBytesRef, PyStrRef, PyTupleRef, PyType};
use crate::common::lock::PyMutex;
use crate::{
AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine,
builtins::PyStrInterned,
Expand All @@ -15,7 +16,7 @@ use alloc::fmt;
use core::{
borrow::Borrow,
ops::Deref,
sync::atomic::{AtomicPtr, Ordering},
sync::atomic::{AtomicPtr, AtomicU64, Ordering},
};
use malachite_bigint::BigInt;
use num_traits::Zero;
Expand Down Expand Up @@ -324,10 +325,27 @@ impl<B: AsRef<[u8]>> IntoCodeObject for frozen::FrozenCodeObject<B> {
}
}

/// Per-code-object monitoring data (_PyCoMonitoringData).
/// Stores original opcodes displaced by INSTRUMENTED_LINE / INSTRUMENTED_INSTRUCTION.
pub struct CoMonitoringData {
/// Original opcodes at positions with INSTRUMENTED_LINE.
/// Indexed by instruction index. 0 = not instrumented for LINE.
pub line_opcodes: Vec<u8>,

/// Original opcodes at positions with INSTRUMENTED_INSTRUCTION.
/// Indexed by instruction index. 0 = not instrumented for INSTRUCTION.
pub per_instruction_opcodes: Vec<u8>,
}

#[pyclass(module = false, name = "code")]
pub struct PyCode {
pub code: CodeObject,
source_path: AtomicPtr<PyStrInterned>,
/// Version counter for lazy re-instrumentation.
/// Compared against `PyGlobalState::instrumentation_version` at RESUME.
pub instrumentation_version: AtomicU64,
/// Side-table for INSTRUMENTED_LINE / INSTRUMENTED_INSTRUCTION.
pub monitoring_data: PyMutex<Option<CoMonitoringData>>,
}

impl Deref for PyCode {
Expand All @@ -343,6 +361,8 @@ impl PyCode {
Self {
code,
source_path: AtomicPtr::new(sp),
instrumentation_version: AtomicU64::new(0),
monitoring_data: PyMutex::new(None),
}
}

Expand Down
Loading