Skip to content

Commit 1762d98

Browse files
committed
Implement LOAD_ATTR inline caching with adaptive specialization
Add type version counter (tp_version_tag) to PyType with subclass invalidation cascade. Add cache read/write methods (u16/u32/u64) to CodeUnits. Implement adaptive specialization in load_attr that replaces the opcode with specialized variants on first execution: - LoadAttrMethodNoDict: cached method lookup for slotted types - LoadAttrMethodWithValues: cached method with dict shadow check - LoadAttrInstanceValue: direct dict lookup skipping descriptors Specialized opcodes guard on type_version_tag and deoptimize back to generic LOAD_ATTR with backoff counter on cache miss.
1 parent ae8c0b3 commit 1762d98

File tree

5 files changed

+346
-11
lines changed

5 files changed

+346
-11
lines changed

crates/codegen/src/ir.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -457,11 +457,13 @@ impl CodeInfo {
457457
.map(|byte| CodeUnit::new(Instruction::ExtendedArg, byte))
458458
.chain([CodeUnit { op, arg: lo_arg }]),
459459
);
460-
// Emit CACHE code units after the instruction
461-
instructions.extend(core::iter::repeat_n(
462-
CodeUnit::new(Instruction::Cache, 0.into()),
463-
cache_count,
464-
));
460+
// Emit CACHE code units after the instruction (all zeroed)
461+
if cache_count > 0 {
462+
instructions.extend(core::iter::repeat_n(
463+
CodeUnit::new(Instruction::Cache, 0.into()),
464+
cache_count,
465+
));
466+
}
465467
current_offset = offset_after;
466468
}
467469
next_block = block.next;

crates/compiler-core/src/bytecode.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,11 @@ pub struct CodeUnit {
343343

344344
const _: () = assert!(mem::size_of::<CodeUnit>() == 2);
345345

346+
/// Adaptive specialization: number of executions before attempting specialization.
347+
pub const ADAPTIVE_WARMUP_VALUE: u16 = 50;
348+
/// Adaptive specialization: backoff counter after deoptimization.
349+
pub const ADAPTIVE_BACKOFF_VALUE: u16 = 250;
350+
346351
impl CodeUnit {
347352
pub const fn new(op: Instruction, arg: OpArgByte) -> Self {
348353
Self { op, arg }
@@ -441,6 +446,63 @@ impl CodeUnits {
441446
core::ptr::write(op_ptr, new_op.into());
442447
}
443448
}
449+
450+
/// Write a u16 value into a CACHE code unit at `index`.
451+
/// Each CodeUnit is 2 bytes (#[repr(C)]: op u8 + arg u8), so one u16 fits exactly.
452+
///
453+
/// # Safety
454+
/// - `index` must be in bounds and point to a CACHE entry.
455+
/// - The caller must ensure no concurrent reads/writes to the same slot.
456+
pub unsafe fn write_cache_u16(&self, index: usize, value: u16) {
457+
unsafe {
458+
let units = &mut *self.0.get();
459+
let ptr = units.as_mut_ptr().add(index) as *mut u8;
460+
core::ptr::write_unaligned(ptr as *mut u16, value);
461+
}
462+
}
463+
464+
/// Read a u16 value from a CACHE code unit at `index`.
465+
pub fn read_cache_u16(&self, index: usize) -> u16 {
466+
let units = unsafe { &*self.0.get() };
467+
let ptr = units.as_ptr().wrapping_add(index) as *const u8;
468+
unsafe { core::ptr::read_unaligned(ptr as *const u16) }
469+
}
470+
471+
/// Write a u32 value across two consecutive CACHE code units starting at `index`.
472+
///
473+
/// # Safety
474+
/// Same requirements as `write_cache_u16`.
475+
pub unsafe fn write_cache_u32(&self, index: usize, value: u32) {
476+
unsafe {
477+
self.write_cache_u16(index, value as u16);
478+
self.write_cache_u16(index + 1, (value >> 16) as u16);
479+
}
480+
}
481+
482+
/// Read a u32 value from two consecutive CACHE code units starting at `index`.
483+
pub fn read_cache_u32(&self, index: usize) -> u32 {
484+
let lo = self.read_cache_u16(index) as u32;
485+
let hi = self.read_cache_u16(index + 1) as u32;
486+
lo | (hi << 16)
487+
}
488+
489+
/// Write a u64 value across four consecutive CACHE code units starting at `index`.
490+
///
491+
/// # Safety
492+
/// Same requirements as `write_cache_u16`.
493+
pub unsafe fn write_cache_u64(&self, index: usize, value: u64) {
494+
unsafe {
495+
self.write_cache_u32(index, value as u32);
496+
self.write_cache_u32(index + 2, (value >> 32) as u32);
497+
}
498+
}
499+
500+
/// Read a u64 value from four consecutive CACHE code units starting at `index`.
501+
pub fn read_cache_u64(&self, index: usize) -> u64 {
502+
let lo = self.read_cache_u32(index) as u64;
503+
let hi = self.read_cache_u32(index + 2) as u64;
504+
lo | (hi << 32)
505+
}
444506
}
445507

446508
/// A Constant (which usually encapsulates data within it)

crates/vm/src/builtins/type.rs

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,14 @@ use crate::{
2828
Representable, SLOT_DEFS, SetAttr, TypeDataRef, TypeDataRefMut, TypeDataSlot,
2929
},
3030
};
31-
use core::{any::Any, borrow::Borrow, ops::Deref, pin::Pin, ptr::NonNull};
31+
use core::{
32+
any::Any,
33+
borrow::Borrow,
34+
ops::Deref,
35+
pin::Pin,
36+
ptr::NonNull,
37+
sync::atomic::{AtomicU32, Ordering},
38+
};
3239
use indexmap::{IndexMap, map::Entry};
3340
use itertools::Itertools;
3441
use num_traits::ToPrimitive;
@@ -44,8 +51,12 @@ pub struct PyType {
4451
pub attributes: PyRwLock<PyAttributes>,
4552
pub slots: PyTypeSlots,
4653
pub heaptype_ext: Option<Pin<Box<HeapTypeExt>>>,
54+
/// Type version tag for inline caching. 0 means unassigned/invalidated.
55+
pub tp_version_tag: AtomicU32,
4756
}
4857

58+
static NEXT_TYPE_VERSION: AtomicU32 = AtomicU32::new(1);
59+
4960
unsafe impl crate::object::Traverse for PyType {
5061
fn traverse(&self, tracer_fn: &mut crate::object::TraverseFn<'_>) {
5162
self.base.traverse(tracer_fn);
@@ -188,6 +199,27 @@ fn is_subtype_with_mro(a_mro: &[PyTypeRef], a: &Py<PyType>, b: &Py<PyType>) -> b
188199
}
189200

190201
impl PyType {
202+
/// Assign a fresh version tag. Returns 0 on overflow (all caches invalidated).
203+
pub fn assign_version_tag(&self) -> u32 {
204+
let v = NEXT_TYPE_VERSION.fetch_add(1, Ordering::Relaxed);
205+
if v == 0 {
206+
return 0;
207+
}
208+
self.tp_version_tag.store(v, Ordering::Release);
209+
v
210+
}
211+
212+
/// Invalidate this type's version tag and cascade to all subclasses.
213+
pub fn modified(&self) {
214+
self.tp_version_tag.store(0, Ordering::Release);
215+
let subclasses = self.subclasses.read();
216+
for weak_ref in subclasses.iter() {
217+
if let Some(sub) = weak_ref.upgrade() {
218+
sub.downcast_ref::<PyType>().unwrap().modified();
219+
}
220+
}
221+
}
222+
191223
pub fn new_simple_heap(
192224
name: &str,
193225
base: &Py<PyType>,
@@ -365,6 +397,7 @@ impl PyType {
365397
attributes: PyRwLock::new(attrs),
366398
slots,
367399
heaptype_ext: Some(Pin::new(Box::new(heaptype_ext))),
400+
tp_version_tag: AtomicU32::new(0),
368401
},
369402
metaclass,
370403
None,
@@ -418,6 +451,7 @@ impl PyType {
418451
attributes: PyRwLock::new(attrs),
419452
slots,
420453
heaptype_ext: None,
454+
tp_version_tag: AtomicU32::new(0),
421455
},
422456
metaclass,
423457
None,
@@ -799,6 +833,9 @@ impl PyType {
799833
}
800834
update_mro_recursively(zelf, vm)?;
801835

836+
// Invalidate inline caches
837+
zelf.modified();
838+
802839
// TODO: do any old slots need to be cleaned up first?
803840
zelf.init_slots(&vm.ctx);
804841

@@ -1903,6 +1940,9 @@ impl SetAttr for PyType {
19031940
)));
19041941
}
19051942
}
1943+
// Invalidate inline caches that depend on this type's attributes
1944+
zelf.modified();
1945+
19061946
if attr_name.as_wtf8().starts_with("__") && attr_name.as_wtf8().ends_with("__") {
19071947
if assign {
19081948
zelf.update_slot::<true>(attr_name, &vm.ctx);

0 commit comments

Comments
 (0)