Add Instruction::deoptimize() and CodeUnits::original_bytes()

youknowone · youknowone · commit 3bc025fd4eef · 2026-03-02T01:26:10.000+09:00
- deoptimize() maps specialized opcodes back to their base adaptive variant
- original_bytes() produces deoptimized bytecode with zeroed CACHE entries
- co_code now returns deoptimized bytes, _co_code_adaptive returns current bytes
- Marshal serialization uses original_bytes() instead of raw transmute
diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs
@@ -525,6 +525,30 @@ impl CodeUnits {
         units[index].arg = OpArgByte::from(value);
     }
 
+    /// Produce a clean copy of the bytecode suitable for serialization
+    /// (marshal) and `co_code`. Specialized opcodes are mapped back to their
+    /// base variants via `deoptimize()` and all CACHE entries are zeroed.
+    pub fn original_bytes(&self) -> Vec<u8> {
+        let units = unsafe { &*self.0.get() };
+        let mut out = Vec::with_capacity(units.len() * 2);
+        let len = units.len();
+        let mut i = 0;
+        while i < len {
+            let op = units[i].op.deoptimize();
+            let caches = op.cache_entries();
+            out.push(u8::from(op));
+            out.push(u8::from(units[i].arg));
+            // Zero-fill all CACHE entries (counter + cached data)
+            for _ in 0..caches {
+                i += 1;
+                out.push(0); // op = Cache = 0
+                out.push(0); // arg = 0
+            }
+            i += 1;
+        }
+        out
+    }
+
     /// Initialize adaptive warmup counters for all cacheable instructions.
     /// Called lazily at RESUME (first execution of a code object).
     /// Uses the `arg` byte of the first CACHE entry, preserving `op = Instruction::Cache`.
diff --git a/crates/compiler-core/src/bytecode/instruction.rs b/crates/compiler-core/src/bytecode/instruction.rs
@@ -512,6 +512,119 @@ impl Instruction {
         })
     }
 
+    /// Map a specialized opcode back to its adaptive (base) variant.
+    /// `_PyOpcode_Deopt`
+    pub fn deoptimize(self) -> Self {
+        match self {
+            // LOAD_ATTR specializations
+            Self::LoadAttrClass
+            | Self::LoadAttrClassWithMetaclassCheck
+            | Self::LoadAttrGetattributeOverridden
+            | Self::LoadAttrInstanceValue
+            | Self::LoadAttrMethodLazyDict
+            | Self::LoadAttrMethodNoDict
+            | Self::LoadAttrMethodWithValues
+            | Self::LoadAttrModule
+            | Self::LoadAttrNondescriptorNoDict
+            | Self::LoadAttrNondescriptorWithValues
+            | Self::LoadAttrProperty
+            | Self::LoadAttrSlot
+            | Self::LoadAttrWithHint => Self::LoadAttr { idx: Arg::marker() },
+            // BINARY_OP specializations
+            Self::BinaryOpAddFloat
+            | Self::BinaryOpAddInt
+            | Self::BinaryOpAddUnicode
+            | Self::BinaryOpExtend
+            | Self::BinaryOpInplaceAddUnicode
+            | Self::BinaryOpMultiplyFloat
+            | Self::BinaryOpMultiplyInt
+            | Self::BinaryOpSubscrDict
+            | Self::BinaryOpSubscrGetitem
+            | Self::BinaryOpSubscrListInt
+            | Self::BinaryOpSubscrListSlice
+            | Self::BinaryOpSubscrStrInt
+            | Self::BinaryOpSubscrTupleInt
+            | Self::BinaryOpSubtractFloat
+            | Self::BinaryOpSubtractInt => Self::BinaryOp { op: Arg::marker() },
+            // CALL specializations
+            Self::CallAllocAndEnterInit
+            | Self::CallBoundMethodExactArgs
+            | Self::CallBoundMethodGeneral
+            | Self::CallBuiltinClass
+            | Self::CallBuiltinFast
+            | Self::CallBuiltinFastWithKeywords
+            | Self::CallBuiltinO
+            | Self::CallIsinstance
+            | Self::CallLen
+            | Self::CallListAppend
+            | Self::CallMethodDescriptorFast
+            | Self::CallMethodDescriptorFastWithKeywords
+            | Self::CallMethodDescriptorNoargs
+            | Self::CallMethodDescriptorO
+            | Self::CallNonPyGeneral
+            | Self::CallPyExactArgs
+            | Self::CallPyGeneral
+            | Self::CallStr1
+            | Self::CallTuple1
+            | Self::CallType1 => Self::Call {
+                nargs: Arg::marker(),
+            },
+            // CALL_KW specializations
+            Self::CallKwBoundMethod | Self::CallKwNonPy | Self::CallKwPy => Self::CallKw {
+                nargs: Arg::marker(),
+            },
+            // TO_BOOL specializations
+            Self::ToBoolAlwaysTrue
+            | Self::ToBoolBool
+            | Self::ToBoolInt
+            | Self::ToBoolList
+            | Self::ToBoolNone
+            | Self::ToBoolStr => Self::ToBool,
+            // COMPARE_OP specializations
+            Self::CompareOpFloat | Self::CompareOpInt | Self::CompareOpStr => {
+                Self::CompareOp { op: Arg::marker() }
+            }
+            // CONTAINS_OP specializations
+            Self::ContainsOpDict | Self::ContainsOpSet => Self::ContainsOp(Arg::marker()),
+            // FOR_ITER specializations
+            Self::ForIterGen | Self::ForIterList | Self::ForIterRange | Self::ForIterTuple => {
+                Self::ForIter {
+                    target: Arg::marker(),
+                }
+            }
+            // LOAD_GLOBAL specializations
+            Self::LoadGlobalBuiltin | Self::LoadGlobalModule => Self::LoadGlobal(Arg::marker()),
+            // STORE_ATTR specializations
+            Self::StoreAttrInstanceValue | Self::StoreAttrSlot | Self::StoreAttrWithHint => {
+                Self::StoreAttr { idx: Arg::marker() }
+            }
+            // LOAD_SUPER_ATTR specializations
+            Self::LoadSuperAttrAttr | Self::LoadSuperAttrMethod => {
+                Self::LoadSuperAttr { arg: Arg::marker() }
+            }
+            // STORE_SUBSCR specializations
+            Self::StoreSubscrDict | Self::StoreSubscrListInt => Self::StoreSubscr,
+            // UNPACK_SEQUENCE specializations
+            Self::UnpackSequenceList | Self::UnpackSequenceTuple | Self::UnpackSequenceTwoTuple => {
+                Self::UnpackSequence {
+                    size: Arg::marker(),
+                }
+            }
+            // SEND specializations
+            Self::SendGen => Self::Send {
+                target: Arg::marker(),
+            },
+            // LOAD_CONST specializations
+            Self::LoadConstImmortal | Self::LoadConstMortal => {
+                Self::LoadConst { idx: Arg::marker() }
+            }
+            // RESUME specializations
+            Self::ResumeCheck => Self::Resume { arg: Arg::marker() },
+            // Everything else maps to itself
+            _ => self,
+        }
+    }
+
     /// Number of CACHE code units that follow this instruction.
     /// _PyOpcode_Caches
     pub fn cache_entries(self) -> usize {
diff --git a/crates/compiler-core/src/marshal.rs b/crates/compiler-core/src/marshal.rs
@@ -662,9 +662,8 @@ pub fn serialize_value<W: Write, D: Dumpable>(
 
 pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>) {
     write_len(buf, code.instructions.len());
-    // SAFETY: it's ok to transmute CodeUnit to [u8; 2]
-    let (_, instructions_bytes, _) = unsafe { code.instructions.align_to() };
-    buf.write_slice(instructions_bytes);
+    let original = code.instructions.original_bytes();
+    buf.write_slice(&original);
 
     write_len(buf, code.locations.len());
     for (start, end) in &*code.locations {
diff --git a/crates/vm/src/builtins/code.rs b/crates/vm/src/builtins/code.rs
@@ -684,7 +684,12 @@ impl PyCode {
 
     #[pygetset]
     pub fn co_code(&self, vm: &VirtualMachine) -> crate::builtins::PyBytesRef {
-        // SAFETY: CodeUnit is #[repr(C)] with size 2, so we can safely transmute to bytes
+        vm.ctx.new_bytes(self.code.instructions.original_bytes())
+    }
+
+    #[pygetset]
+    pub fn _co_code_adaptive(&self, vm: &VirtualMachine) -> crate::builtins::PyBytesRef {
+        // Return current (possibly quickened/specialized) bytecode
         let bytes = unsafe {
             core::slice::from_raw_parts(
                 self.code.instructions.as_ptr() as *const u8,
@@ -694,12 +699,6 @@ impl PyCode {
         vm.ctx.new_bytes(bytes.to_vec())
     }
 
-    #[pygetset]
-    pub fn _co_code_adaptive(&self, vm: &VirtualMachine) -> crate::builtins::PyBytesRef {
-        // RustPython doesn't have adaptive/specialized bytecode, so return regular co_code
-        self.co_code(vm)
-    }
-
     #[pygetset]
     pub fn co_freevars(&self, vm: &VirtualMachine) -> PyTupleRef {
         let names = self
diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs
@@ -2293,11 +2293,7 @@ impl ExecutingFrame<'_> {
             Instruction::RaiseVarargs { kind } => self.execute_raise(vm, kind.get(arg)),
             Instruction::Resume { .. } => {
                 // Lazy quickening: initialize adaptive counters on first execution
-                if !self
-                    .code
-                    .quickened
-                    .swap(true, atomic::Ordering::Relaxed)
-                {
+                if !self.code.quickened.swap(true, atomic::Ordering::Relaxed) {
                     self.code.instructions.quicken();
                 }
                 // Check if bytecode needs re-instrumentation