Skip to content

Commit e2c097e

Browse files
committed
Fix descriptor cache race in LOAD_ATTR method specialization
Add per-cache-base descriptor seqlock in CodeUnits and use it in try_read/write_cached_descriptor paths to prevent mixed type_version/descriptor reads under concurrent specialization. This fixes lock/RLock method mixups seen in threading/compileall stress runs. Also add cspell entries for stoptheworld and pystate.
1 parent 189c2f8 commit e2c097e

File tree

4 files changed

+116
-19
lines changed

4 files changed

+116
-19
lines changed

.cspell.dict/cpython.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ NEWLOCALS
127127
newsemlockobject
128128
nfrees
129129
nkwargs
130-
nlocalsplus
131130
nkwelts
131+
nlocalsplus
132132
Nondescriptor
133133
noninteger
134134
nops
@@ -160,6 +160,7 @@ pylifecycle
160160
pymain
161161
pyrepl
162162
PYTHONTRACEMALLOC
163+
PYTHONUTF
163164
pythonw
164165
PYTHREAD_NAME
165166
releasebuffer
@@ -171,9 +172,11 @@ saveall
171172
scls
172173
setdict
173174
setfunc
175+
setprofileallthreads
174176
SETREF
175177
setresult
176178
setslice
179+
settraceallthreads
177180
SLOTDEFINED
178181
SMALLBUF
179182
SOABI
@@ -190,6 +193,7 @@ subparams
190193
subscr
191194
sval
192195
swappedbytes
196+
sysdict
193197
templatelib
194198
testconsole
195199
ticketer

.cspell.json

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,6 @@
152152
"IFEXEC",
153153
// "stat"
154154
"FIRMLINK",
155-
// CPython internal names
156-
"PYTHONUTF",
157-
"sysdict",
158-
"settraceallthreads",
159-
"setprofileallthreads"
160155
],
161156
// flagWords - list of words to be always considered incorrect
162157
"flagWords": [

crates/compiler-core/src/bytecode.rs

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use core::{
1212
cell::UnsafeCell,
1313
hash, mem,
1414
ops::Deref,
15-
sync::atomic::{AtomicU8, AtomicU16, AtomicUsize, Ordering},
15+
sync::atomic::{AtomicU8, AtomicU16, AtomicU32, AtomicUsize, Ordering},
1616
};
1717
use itertools::Itertools;
1818
use malachite_bigint::BigInt;
@@ -415,6 +415,9 @@ pub struct CodeUnits {
415415
/// Single atomic load/store prevents torn reads when multiple threads
416416
/// specialize the same instruction concurrently.
417417
pointer_cache: Box<[AtomicUsize]>,
418+
/// SeqLock counter per instruction cache base for descriptor payload writes.
419+
/// odd = write in progress, even = quiescent.
420+
descriptor_sequences: Box<[AtomicU32]>,
418421
}
419422

420423
// SAFETY: All cache operations use atomic read/write instructions.
@@ -441,10 +444,16 @@ impl Clone for CodeUnits {
441444
.iter()
442445
.map(|c| AtomicUsize::new(c.load(Ordering::Relaxed)))
443446
.collect();
447+
let descriptor_sequences = self
448+
.descriptor_sequences
449+
.iter()
450+
.map(|c| AtomicU32::new(c.load(Ordering::Relaxed)))
451+
.collect();
444452
Self {
445453
units: UnsafeCell::new(units),
446454
adaptive_counters,
447455
pointer_cache,
456+
descriptor_sequences,
448457
}
449458
}
450459
}
@@ -491,10 +500,15 @@ impl From<Vec<CodeUnit>> for CodeUnits {
491500
.map(|_| AtomicUsize::new(0))
492501
.collect::<Vec<_>>()
493502
.into_boxed_slice();
503+
let descriptor_sequences = (0..len)
504+
.map(|_| AtomicU32::new(0))
505+
.collect::<Vec<_>>()
506+
.into_boxed_slice();
494507
Self {
495508
units: UnsafeCell::new(units),
496509
adaptive_counters,
497510
pointer_cache,
511+
descriptor_sequences,
498512
}
499513
}
500514
}
@@ -641,6 +655,54 @@ impl CodeUnits {
641655
self.pointer_cache[index].load(Ordering::Relaxed)
642656
}
643657

658+
#[inline]
659+
pub fn begin_descriptor_read(&self, index: usize) -> u32 {
660+
let mut sequence = self.descriptor_sequences[index].load(Ordering::Acquire);
661+
while (sequence & 1) != 0 {
662+
core::hint::spin_loop();
663+
sequence = self.descriptor_sequences[index].load(Ordering::Acquire);
664+
}
665+
sequence
666+
}
667+
668+
#[inline]
669+
pub fn end_descriptor_read(&self, index: usize, previous: u32) -> bool {
670+
core::sync::atomic::fence(Ordering::Acquire);
671+
self.descriptor_sequences[index].load(Ordering::Relaxed) == previous
672+
}
673+
674+
#[inline]
675+
pub fn begin_descriptor_write(&self, index: usize) {
676+
let sequence = &self.descriptor_sequences[index];
677+
let mut seq = sequence.load(Ordering::Acquire);
678+
loop {
679+
while (seq & 1) != 0 {
680+
core::hint::spin_loop();
681+
seq = sequence.load(Ordering::Acquire);
682+
}
683+
match sequence.compare_exchange_weak(
684+
seq,
685+
seq.wrapping_add(1),
686+
Ordering::AcqRel,
687+
Ordering::Acquire,
688+
) {
689+
Ok(_) => {
690+
core::sync::atomic::fence(Ordering::Release);
691+
break;
692+
}
693+
Err(observed) => {
694+
core::hint::spin_loop();
695+
seq = observed;
696+
}
697+
}
698+
}
699+
}
700+
701+
#[inline]
702+
pub fn end_descriptor_write(&self, index: usize) {
703+
self.descriptor_sequences[index].fetch_add(1, Ordering::Release);
704+
}
705+
644706
/// Read adaptive counter bits for instruction at `index`.
645707
/// Uses Relaxed atomic load.
646708
pub fn read_adaptive_counter(&self, index: usize) -> u16 {

crates/vm/src/frame.rs

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7030,18 +7030,50 @@ impl ExecutingFrame<'_> {
70307030
cache_base: usize,
70317031
expected_type_version: u32,
70327032
) -> Option<PyObjectRef> {
7033-
let descr_ptr = self.code.instructions.read_cache_ptr(cache_base + 5);
7034-
if descr_ptr == 0 {
7035-
return None;
7036-
}
7037-
let cloned = unsafe { PyObject::try_to_owned_from_ptr(descr_ptr as *mut PyObject) }?;
7038-
if self.code.instructions.read_cache_u32(cache_base + 1) == expected_type_version
7039-
&& self.code.instructions.read_cache_ptr(cache_base + 5) == descr_ptr
7040-
{
7041-
Some(cloned)
7042-
} else {
7043-
drop(cloned);
7044-
None
7033+
loop {
7034+
let sequence = self.code.instructions.begin_descriptor_read(cache_base);
7035+
let version_before = self.code.instructions.read_cache_u32(cache_base + 1);
7036+
if version_before != expected_type_version || version_before == 0 {
7037+
if self
7038+
.code
7039+
.instructions
7040+
.end_descriptor_read(cache_base, sequence)
7041+
{
7042+
return None;
7043+
}
7044+
continue;
7045+
}
7046+
7047+
let descr_ptr = self.code.instructions.read_cache_ptr(cache_base + 5);
7048+
if descr_ptr == 0 {
7049+
if self
7050+
.code
7051+
.instructions
7052+
.end_descriptor_read(cache_base, sequence)
7053+
{
7054+
return None;
7055+
}
7056+
continue;
7057+
}
7058+
7059+
let cloned = unsafe { PyObject::try_to_owned_from_ptr(descr_ptr as *mut PyObject) };
7060+
let version_after = self.code.instructions.read_cache_u32(cache_base + 1);
7061+
let ptr_after = self.code.instructions.read_cache_ptr(cache_base + 5);
7062+
let stable = self
7063+
.code
7064+
.instructions
7065+
.end_descriptor_read(cache_base, sequence);
7066+
7067+
if let Some(cloned) = cloned
7068+
&& stable
7069+
&& version_after == expected_type_version
7070+
&& ptr_after == descr_ptr
7071+
{
7072+
return Some(cloned);
7073+
}
7074+
if stable {
7075+
return None;
7076+
}
70457077
}
70467078
}
70477079

@@ -7054,6 +7086,7 @@ impl ExecutingFrame<'_> {
70547086
) {
70557087
// Publish descriptor cache atomically as a tuple:
70567088
// invalidate version first, then write payload, then publish version.
7089+
self.code.instructions.begin_descriptor_write(cache_base);
70577090
unsafe {
70587091
self.code.instructions.write_cache_u32(cache_base + 1, 0);
70597092
self.code
@@ -7063,6 +7096,7 @@ impl ExecutingFrame<'_> {
70637096
.instructions
70647097
.write_cache_u32(cache_base + 1, type_version);
70657098
}
7099+
self.code.instructions.end_descriptor_write(cache_base);
70667100
}
70677101

70687102
#[inline]
@@ -7074,6 +7108,7 @@ impl ExecutingFrame<'_> {
70747108
descr_ptr: usize,
70757109
) {
70767110
// Same publish protocol as write_cached_descriptor(), plus metaclass guard.
7111+
self.code.instructions.begin_descriptor_write(cache_base);
70777112
unsafe {
70787113
self.code.instructions.write_cache_u32(cache_base + 1, 0);
70797114
self.code
@@ -7086,6 +7121,7 @@ impl ExecutingFrame<'_> {
70867121
.instructions
70877122
.write_cache_u32(cache_base + 1, type_version);
70887123
}
7124+
self.code.instructions.end_descriptor_write(cache_base);
70897125
}
70907126

70917127
fn load_attr(&mut self, vm: &VirtualMachine, oparg: LoadAttr) -> FrameResult {

0 commit comments

Comments
 (0)