Skip to content

Commit a98c646

Browse files
authored
freelist optimization (#7337)
* Add object freelist infrastructure and PyFloat freelist Add freelist_push/freelist_pop hooks to PyPayload trait with default no-op implementations. Modify default_dealloc to offer dead objects to the freelist before freeing memory. Modify PyRef::new_ref to try popping from the freelist before allocating. Implement thread-local freelist for PyFloat (max 100 entries). Float objects are ideal candidates: fixed-size payload (f64), no GC tracking, no instance dict, trivial Drop. Benchmark improvement on float-heavy workloads: float_arith: ~12% faster float_list: ~17% faster * Add PyList freelist and GC-safe freelist infrastructure Add HAS_FREELIST const to PyPayload trait. For freelist types, GC untracking is done immediately (not deferred) during dealloc to prevent race conditions when the object is reused. Restructure new_ref so freelist-popped objects also get GC tracked, enabling freelist support for GC-tracked types like PyList. Add drop_in_place for old payload before writing new one, required for types with non-trivial Drop (e.g. PyList's RwLock<Vec>). Implement thread-local freelist for PyList (max 80 entries). * Add PyDict freelist and exact-type guard for freelist push Add thread-local freelist for PyDict (max 80 entries). Add heaptype check in default_dealloc to prevent subclass instances from entering the freelist. Subclass objects have a different Python type than the base type, so reusing them would return objects with the wrong __class__. Skip PyTuple freelist: structseq types (stat_result, struct_time) are static subtypes sharing the same Rust payload, making type-safe reuse impractical with heaptype check alone. * Add PySlice freelist (max 1, matching PySlice_MAXFREELIST) * Add FreeList<T> wrapper to drain cached objects on thread teardown Replace Cell<Vec<*mut PyObject>> with Cell<FreeList<T>> which implements Drop to properly deallocate PyInner<T> when threads exit. Also fix freelist_pop safety doc to match actual contract. * Add manual Traverse impl for PySlice with clear() and fix comment * Deduplicate allocation fallback in PyRef::new_ref
1 parent 798f9e8 commit a98c646

File tree

8 files changed

+289
-12
lines changed

8 files changed

+289
-12
lines changed

.cspell.dict/cpython.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ finalizers
7070
firsttraceable
7171
flowgraph
7272
formatfloat
73+
freelist
7374
freevar
7475
freevars
7576
fromlist

crates/vm/src/builtins/dict.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ use crate::{
2626
vm::VirtualMachine,
2727
};
2828
use alloc::fmt;
29+
use core::cell::Cell;
30+
use core::ptr::NonNull;
2931
use rustpython_common::lock::PyMutex;
3032
use rustpython_common::wtf8::Wtf8Buf;
3133

@@ -60,11 +62,43 @@ impl fmt::Debug for PyDict {
6062
}
6163
}
6264

65+
thread_local! {
66+
static DICT_FREELIST: Cell<crate::object::FreeList<PyDict>> = const { Cell::new(crate::object::FreeList::new()) };
67+
}
68+
6369
impl PyPayload for PyDict {
70+
const MAX_FREELIST: usize = 80;
71+
const HAS_FREELIST: bool = true;
72+
6473
#[inline]
6574
fn class(ctx: &Context) -> &'static Py<PyType> {
6675
ctx.types.dict_type
6776
}
77+
78+
#[inline]
79+
unsafe fn freelist_push(obj: *mut PyObject) -> bool {
80+
DICT_FREELIST.with(|fl| {
81+
let mut list = fl.take();
82+
let stored = if list.len() < Self::MAX_FREELIST {
83+
list.push(obj);
84+
true
85+
} else {
86+
false
87+
};
88+
fl.set(list);
89+
stored
90+
})
91+
}
92+
93+
#[inline]
94+
unsafe fn freelist_pop() -> Option<NonNull<PyObject>> {
95+
DICT_FREELIST.with(|fl| {
96+
let mut list = fl.take();
97+
let result = list.pop().map(|p| unsafe { NonNull::new_unchecked(p) });
98+
fl.set(list);
99+
result
100+
})
101+
}
68102
}
69103

70104
impl PyDict {

crates/vm/src/builtins/float.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ use crate::{
1515
protocol::PyNumberMethods,
1616
types::{AsNumber, Callable, Comparable, Constructor, Hashable, PyComparisonOp, Representable},
1717
};
18+
use core::cell::Cell;
19+
use core::ptr::NonNull;
1820
use malachite_bigint::{BigInt, ToBigInt};
1921
use num_complex::Complex64;
2022
use num_traits::{Signed, ToPrimitive, Zero};
@@ -32,11 +34,43 @@ impl PyFloat {
3234
}
3335
}
3436

37+
thread_local! {
38+
static FLOAT_FREELIST: Cell<crate::object::FreeList<PyFloat>> = const { Cell::new(crate::object::FreeList::new()) };
39+
}
40+
3541
impl PyPayload for PyFloat {
42+
const MAX_FREELIST: usize = 100;
43+
const HAS_FREELIST: bool = true;
44+
3645
#[inline]
3746
fn class(ctx: &Context) -> &'static Py<PyType> {
3847
ctx.types.float_type
3948
}
49+
50+
#[inline]
51+
unsafe fn freelist_push(obj: *mut PyObject) -> bool {
52+
FLOAT_FREELIST.with(|fl| {
53+
let mut list = fl.take();
54+
let stored = if list.len() < Self::MAX_FREELIST {
55+
list.push(obj);
56+
true
57+
} else {
58+
false
59+
};
60+
fl.set(list);
61+
stored
62+
})
63+
}
64+
65+
#[inline]
66+
unsafe fn freelist_pop() -> Option<NonNull<PyObject>> {
67+
FLOAT_FREELIST.with(|fl| {
68+
let mut list = fl.take();
69+
let result = list.pop().map(|p| unsafe { NonNull::new_unchecked(p) });
70+
fl.set(list);
71+
result
72+
})
73+
}
4074
}
4175

4276
impl ToPyObject for f64 {

crates/vm/src/builtins/list.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ use crate::{
2727
use rustpython_common::wtf8::Wtf8Buf;
2828

2929
use alloc::fmt;
30+
use core::cell::Cell;
3031
use core::ops::DerefMut;
32+
use core::ptr::NonNull;
3133

3234
#[pyclass(module = false, name = "list", unhashable = true, traverse = "manual")]
3335
#[derive(Default)]
@@ -72,11 +74,43 @@ unsafe impl Traverse for PyList {
7274
}
7375
}
7476

77+
thread_local! {
78+
static LIST_FREELIST: Cell<crate::object::FreeList<PyList>> = const { Cell::new(crate::object::FreeList::new()) };
79+
}
80+
7581
impl PyPayload for PyList {
82+
const MAX_FREELIST: usize = 80;
83+
const HAS_FREELIST: bool = true;
84+
7685
#[inline]
7786
fn class(ctx: &Context) -> &'static Py<PyType> {
7887
ctx.types.list_type
7988
}
89+
90+
#[inline]
91+
unsafe fn freelist_push(obj: *mut PyObject) -> bool {
92+
LIST_FREELIST.with(|fl| {
93+
let mut list = fl.take();
94+
let stored = if list.len() < Self::MAX_FREELIST {
95+
list.push(obj);
96+
true
97+
} else {
98+
false
99+
};
100+
fl.set(list);
101+
stored
102+
})
103+
}
104+
105+
#[inline]
106+
unsafe fn freelist_pop() -> Option<NonNull<PyObject>> {
107+
LIST_FREELIST.with(|fl| {
108+
let mut list = fl.take();
109+
let result = list.pop().map(|p| unsafe { NonNull::new_unchecked(p) });
110+
fl.set(list);
111+
result
112+
})
113+
}
80114
}
81115

82116
impl ToPyObject for Vec<PyObjectRef> {

crates/vm/src/builtins/slice.rs

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// sliceobject.{h,c} in CPython
22
// spell-checker:ignore sliceobject
3+
use core::cell::Cell;
4+
use core::ptr::NonNull;
35
use rustpython_common::wtf8::{Wtf8Buf, wtf8_concat};
46

57
use super::{PyGenericAlias, PyStrRef, PyTupleRef, PyType, PyTypeRef};
@@ -15,19 +17,71 @@ use crate::{
1517
use malachite_bigint::{BigInt, ToBigInt};
1618
use num_traits::{One, Signed, Zero};
1719

18-
#[pyclass(module = false, name = "slice", unhashable = true, traverse)]
20+
#[pyclass(module = false, name = "slice", unhashable = true, traverse = "manual")]
1921
#[derive(Debug)]
2022
pub struct PySlice {
2123
pub start: Option<PyObjectRef>,
2224
pub stop: PyObjectRef,
2325
pub step: Option<PyObjectRef>,
2426
}
2527

28+
// SAFETY: Traverse properly visits all owned PyObjectRefs
29+
unsafe impl crate::object::Traverse for PySlice {
30+
fn traverse(&self, traverse_fn: &mut crate::object::TraverseFn<'_>) {
31+
self.start.traverse(traverse_fn);
32+
self.stop.traverse(traverse_fn);
33+
self.step.traverse(traverse_fn);
34+
}
35+
36+
fn clear(&mut self, out: &mut Vec<PyObjectRef>) {
37+
if let Some(start) = self.start.take() {
38+
out.push(start);
39+
}
40+
// stop is not Option, so it will be freed when payload is dropped
41+
// (via drop_in_place on freelist pop, or Box::from_raw on dealloc)
42+
if let Some(step) = self.step.take() {
43+
out.push(step);
44+
}
45+
}
46+
}
47+
48+
thread_local! {
49+
static SLICE_FREELIST: Cell<crate::object::FreeList<PySlice>> = const { Cell::new(crate::object::FreeList::new()) };
50+
}
51+
2652
impl PyPayload for PySlice {
53+
const MAX_FREELIST: usize = 1;
54+
const HAS_FREELIST: bool = true;
55+
2756
#[inline]
2857
fn class(ctx: &Context) -> &'static Py<PyType> {
2958
ctx.types.slice_type
3059
}
60+
61+
#[inline]
62+
unsafe fn freelist_push(obj: *mut PyObject) -> bool {
63+
SLICE_FREELIST.with(|fl| {
64+
let mut list = fl.take();
65+
let stored = if list.len() < Self::MAX_FREELIST {
66+
list.push(obj);
67+
true
68+
} else {
69+
false
70+
};
71+
fl.set(list);
72+
stored
73+
})
74+
}
75+
76+
#[inline]
77+
unsafe fn freelist_pop() -> Option<NonNull<PyObject>> {
78+
SLICE_FREELIST.with(|fl| {
79+
let mut list = fl.take();
80+
let result = list.pop().map(|p| unsafe { NonNull::new_unchecked(p) });
81+
fl.set(list);
82+
result
83+
})
84+
}
3185
}
3286

3387
#[pyclass(with(Comparable, Representable, Hashable))]

crates/vm/src/builtins/tuple.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ unsafe impl Traverse for PyTuple {
5353
}
5454
}
5555

56+
// No freelist for PyTuple: structseq types (stat_result, struct_time, etc.)
57+
// are static subtypes sharing the same Rust payload, making type-safe reuse
58+
// impractical without a type-pointer comparison at push time.
5659
impl PyPayload for PyTuple {
5760
#[inline]
5861
fn class(ctx: &Context) -> &'static Py<PyType> {

crates/vm/src/object/core.rs

Lines changed: 95 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -171,13 +171,19 @@ pub(super) unsafe fn default_dealloc<T: PyPayload>(obj: *mut PyObject) {
171171
// Untrack from GC BEFORE deallocation.
172172
if obj_ref.is_gc_tracked() {
173173
let ptr = unsafe { NonNull::new_unchecked(obj) };
174-
rustpython_common::refcount::try_defer_drop(move || {
175-
// untrack_object only removes the pointer address from a HashSet.
176-
// It does NOT dereference the pointer, so it's safe even after deallocation.
177-
unsafe {
178-
crate::gc_state::gc_state().untrack_object(ptr);
179-
}
180-
});
174+
if T::HAS_FREELIST {
175+
// Freelist types must untrack immediately to avoid race conditions:
176+
// a deferred untrack could remove a re-tracked entry after reuse.
177+
unsafe { crate::gc_state::gc_state().untrack_object(ptr) };
178+
} else {
179+
rustpython_common::refcount::try_defer_drop(move || {
180+
// untrack_object only removes the pointer address from a HashSet.
181+
// It does NOT dereference the pointer, so it's safe even after deallocation.
182+
unsafe {
183+
crate::gc_state::gc_state().untrack_object(ptr);
184+
}
185+
});
186+
}
181187
}
182188

183189
// Extract child references before deallocation to break circular refs (tp_clear)
@@ -186,8 +192,17 @@ pub(super) unsafe fn default_dealloc<T: PyPayload>(obj: *mut PyObject) {
186192
unsafe { clear_fn(obj, &mut edges) };
187193
}
188194

189-
// Deallocate the object memory
190-
drop(unsafe { Box::from_raw(obj as *mut PyInner<T>) });
195+
// Try to store in freelist for reuse; otherwise deallocate.
196+
// Only exact types (not heaptype subclasses) go into the freelist,
197+
// because the pop site assumes the cached typ matches the base type.
198+
let pushed = if T::HAS_FREELIST && obj_ref.class().heaptype_ext.is_none() {
199+
unsafe { T::freelist_push(obj) }
200+
} else {
201+
false
202+
};
203+
if !pushed {
204+
drop(unsafe { Box::from_raw(obj as *mut PyInner<T>) });
205+
}
191206

192207
// Drop child references - may trigger recursive destruction.
193208
// The object is already deallocated, so circular refs are broken.
@@ -807,6 +822,52 @@ impl<T: PyPayload + core::fmt::Debug> PyInner<T> {
807822
}
808823
}
809824

825+
/// Thread-local freelist storage that properly deallocates cached objects
826+
/// on thread teardown.
827+
///
828+
/// Wraps a `Vec<*mut PyObject>` and implements `Drop` to convert each
829+
/// raw pointer back into `Box<PyInner<T>>` for proper deallocation.
830+
pub(crate) struct FreeList<T: PyPayload> {
831+
items: Vec<*mut PyObject>,
832+
_marker: core::marker::PhantomData<T>,
833+
}
834+
835+
impl<T: PyPayload> FreeList<T> {
836+
pub(crate) const fn new() -> Self {
837+
Self {
838+
items: Vec::new(),
839+
_marker: core::marker::PhantomData,
840+
}
841+
}
842+
}
843+
844+
impl<T: PyPayload> Default for FreeList<T> {
845+
fn default() -> Self {
846+
Self::new()
847+
}
848+
}
849+
850+
impl<T: PyPayload> Drop for FreeList<T> {
851+
fn drop(&mut self) {
852+
for ptr in self.items.drain(..) {
853+
drop(unsafe { Box::from_raw(ptr as *mut PyInner<T>) });
854+
}
855+
}
856+
}
857+
858+
impl<T: PyPayload> core::ops::Deref for FreeList<T> {
859+
type Target = Vec<*mut PyObject>;
860+
fn deref(&self) -> &Self::Target {
861+
&self.items
862+
}
863+
}
864+
865+
impl<T: PyPayload> core::ops::DerefMut for FreeList<T> {
866+
fn deref_mut(&mut self) -> &mut Self::Target {
867+
&mut self.items
868+
}
869+
}
870+
810871
/// The `PyObjectRef` is one of the most used types. It is a reference to a
811872
/// python object. A single python object can have multiple references, and
812873
/// this reference counting is accounted for by this type. Use the `.clone()`
@@ -1720,8 +1781,31 @@ impl<T: PyPayload + crate::object::MaybeTraverse + core::fmt::Debug> PyRef<T> {
17201781
pub fn new_ref(payload: T, typ: crate::builtins::PyTypeRef, dict: Option<PyDictRef>) -> Self {
17211782
let has_dict = dict.is_some();
17221783
let is_heaptype = typ.heaptype_ext.is_some();
1723-
let inner = Box::into_raw(PyInner::new(payload, typ, dict));
1724-
let ptr = unsafe { NonNull::new_unchecked(inner.cast::<Py<T>>()) };
1784+
1785+
// Try to reuse from freelist (exact type only, no dict, no heaptype)
1786+
let cached = if !has_dict && !is_heaptype {
1787+
unsafe { T::freelist_pop() }
1788+
} else {
1789+
None
1790+
};
1791+
1792+
let ptr = if let Some(cached) = cached {
1793+
let inner = cached.as_ptr() as *mut PyInner<T>;
1794+
unsafe {
1795+
core::ptr::write(&mut (*inner).ref_count, RefCount::new());
1796+
(*inner).gc_bits.store(0, Ordering::Relaxed);
1797+
core::ptr::drop_in_place(&mut (*inner).payload);
1798+
core::ptr::write(&mut (*inner).payload, payload);
1799+
// typ, vtable, slots are preserved; dict is None, weak_list was
1800+
// cleared by drop_slow_inner before freelist push
1801+
}
1802+
// Drop the caller's typ since the cached object already holds one
1803+
drop(typ);
1804+
unsafe { NonNull::new_unchecked(inner.cast::<Py<T>>()) }
1805+
} else {
1806+
let inner = Box::into_raw(PyInner::new(payload, typ, dict));
1807+
unsafe { NonNull::new_unchecked(inner.cast::<Py<T>>()) }
1808+
};
17251809

17261810
// Track object if:
17271811
// - HAS_TRAVERSE is true (Rust payload implements Traverse), OR

0 commit comments

Comments
 (0)