From 6e2eb31c014f6d040ee5bd730a8f62f3ca40243b Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Tue, 17 Feb 2026 10:07:58 +0900 Subject: [PATCH] Optimize coroutine exception handling and fix gen_throw traceback - Replace ExceptionStack linked list with Vec for O(1) push/pop - Introduce FramePtr (NonNull>) to avoid Arc clone in frame stack - Use FramePtr in ThreadSlot for lock-protected cross-thread frame access - Add resume_gen_frame for lightweight generator/coroutine resume - Replace Coro.exception PyMutex with PyAtomicRef for lock-free swap - Add with_frame_exc to support initial exception state for generators - Use scopeguard for panic-safe cleanup in with_frame_exc/resume_gen_frame - Add traceback entries in gen_throw delegate/close error paths - Treat EndAsyncFor and CleanupThrow as reraise in handle_exception - Update current_frame()/current_globals() to return owned values - Add ThreadSlot with atomic exception field for sys._current_exceptions() - Push/pop thread frames in resume_gen_frame for sys._current_frames() --- crates/stdlib/src/faulthandler.rs | 31 +++-- crates/vm/src/builtins/frame.rs | 45 ++++-- crates/vm/src/coroutine.rs | 41 ++---- crates/vm/src/frame.rs | 43 +++++- crates/vm/src/protocol/callable.rs | 9 +- crates/vm/src/stdlib/builtins.rs | 4 +- crates/vm/src/stdlib/sys.rs | 30 ++-- crates/vm/src/stdlib/thread.rs | 9 +- crates/vm/src/vm/mod.rs | 214 +++++++++++++++++++++-------- crates/vm/src/vm/thread.rs | 105 ++++++++++---- crates/vm/src/warn.rs | 2 +- 11 files changed, 368 insertions(+), 165 deletions(-) diff --git a/crates/stdlib/src/faulthandler.rs b/crates/stdlib/src/faulthandler.rs index 265bcf9ca6d..f618f8f6731 100644 --- a/crates/stdlib/src/faulthandler.rs +++ b/crates/stdlib/src/faulthandler.rs @@ -96,9 +96,8 @@ mod decl { all_threads: AtomicBool::new(true), }; - /// Arc>> - shared frame slot for a thread #[cfg(feature = "threading")] - type ThreadFrameSlot = Arc>>; + type ThreadFrameSlot = Arc; // Watchdog thread state for dump_traceback_later struct WatchdogState { @@ -326,7 +325,7 @@ mod decl { /// Write a frame's info to an fd using signal-safe I/O. #[cfg(any(unix, windows))] - fn dump_frame_from_ref(fd: i32, frame: &crate::vm::PyRef) { + fn dump_frame_from_ref(fd: i32, frame: &crate::vm::Py) { let funcname = frame.code.obj_name.as_str(); let filename = frame.code.source_path().as_str(); let lineno = if frame.lasti() == 0 { @@ -345,20 +344,23 @@ mod decl { } /// Dump traceback for a thread given its frame stack (for cross-thread dumping). + /// # Safety + /// Each `FramePtr` must point to a live frame (caller holds the Mutex). #[cfg(all(any(unix, windows), feature = "threading"))] fn dump_traceback_thread_frames( fd: i32, thread_id: u64, is_current: bool, - frames: &[crate::vm::frame::FrameRef], + frames: &[rustpython_vm::vm::FramePtr], ) { write_thread_id(fd, thread_id, is_current); if frames.is_empty() { puts(fd, " \n"); } else { - for frame in frames.iter().rev() { - dump_frame_from_ref(fd, frame); + for fp in frames.iter().rev() { + // SAFETY: caller holds the Mutex, so the owning thread can't pop. + dump_frame_from_ref(fd, unsafe { fp.as_ref() }); } } } @@ -382,8 +384,9 @@ mod decl { } else { puts(fd, "Stack (most recent call first):\n"); let frames = vm.frames.borrow(); - for frame in frames.iter().rev() { - dump_frame_from_ref(fd, frame); + for fp in frames.iter().rev() { + // SAFETY: the frame is alive while it's in the Vec + dump_frame_from_ref(fd, unsafe { fp.as_ref() }); } } } @@ -410,7 +413,7 @@ mod decl { if tid == current_tid { continue; } - let frames_guard = slot.lock(); + let frames_guard = slot.frames.lock(); dump_traceback_thread_frames(fd, tid, false, &frames_guard); puts(fd, "\n"); } @@ -421,8 +424,8 @@ mod decl { if frames.is_empty() { puts(fd, " \n"); } else { - for frame in frames.iter().rev() { - dump_frame_from_ref(fd, frame); + for fp in frames.iter().rev() { + dump_frame_from_ref(fd, unsafe { fp.as_ref() }); } } } @@ -431,8 +434,8 @@ mod decl { { write_thread_id(fd, current_thread_id(), true); let frames = vm.frames.borrow(); - for frame in frames.iter().rev() { - dump_frame_from_ref(fd, frame); + for fp in frames.iter().rev() { + dump_frame_from_ref(fd, unsafe { fp.as_ref() }); } } } @@ -870,7 +873,7 @@ mod decl { #[cfg(feature = "threading")] { for (tid, slot) in &thread_frame_slots { - let frames = slot.lock(); + let frames = slot.frames.lock(); dump_traceback_thread_frames(fd, *tid, false, &frames); } } diff --git a/crates/vm/src/builtins/frame.rs b/crates/vm/src/builtins/frame.rs index 5d7510d8ff8..ed2e1e672fd 100644 --- a/crates/vm/src/builtins/frame.rs +++ b/crates/vm/src/builtins/frame.rs @@ -4,7 +4,7 @@ use super::{PyCode, PyDictRef, PyIntRef, PyStrRef}; use crate::{ - AsObject, Context, Py, PyObjectRef, PyRef, PyResult, VirtualMachine, + Context, Py, PyObjectRef, PyRef, PyResult, VirtualMachine, class::PyClassImpl, frame::{Frame, FrameOwner, FrameRef}, function::PySetterValue, @@ -195,16 +195,43 @@ impl Py { #[pygetset] pub fn f_back(&self, vm: &VirtualMachine) -> Option> { - // TODO: actually store f_back inside Frame struct + let previous = self.previous_frame(); + if previous.is_null() { + return None; + } - // get the frame in the frame stack that appears before this one. - // won't work if this frame isn't in the frame stack, hence the todo above - vm.frames + if let Some(frame) = vm + .frames .borrow() .iter() - .rev() - .skip_while(|p| !p.is(self.as_object())) - .nth(1) - .cloned() + .find(|fp| { + // SAFETY: the caller keeps the FrameRef alive while it's in the Vec + let py: &crate::Py = unsafe { fp.as_ref() }; + let ptr: *const Frame = &**py; + core::ptr::eq(ptr, previous) + }) + .map(|fp| unsafe { fp.as_ref() }.to_owned()) + { + return Some(frame); + } + + #[cfg(feature = "threading")] + { + let registry = vm.state.thread_frames.lock(); + for slot in registry.values() { + let frames = slot.frames.lock(); + // SAFETY: the owning thread can't pop while we hold the Mutex, + // so FramePtr is valid for the duration of the lock. + if let Some(frame) = frames.iter().find_map(|fp| { + let f = unsafe { fp.as_ref() }; + let ptr: *const Frame = &**f; + core::ptr::eq(ptr, previous).then(|| f.to_owned()) + }) { + return Some(frame); + } + } + } + + None } } diff --git a/crates/vm/src/coroutine.rs b/crates/vm/src/coroutine.rs index a066c9944fe..ac44e33f799 100644 --- a/crates/vm/src/coroutine.rs +++ b/crates/vm/src/coroutine.rs @@ -1,11 +1,11 @@ use crate::{ AsObject, Py, PyObject, PyObjectRef, PyResult, TryFromObject, VirtualMachine, - builtins::{PyBaseExceptionRef, PyStrRef}, + builtins::PyStrRef, common::lock::PyMutex, exceptions::types::PyBaseException, - frame::{ExecutionResult, FrameOwner, FrameRef}, + frame::{ExecutionResult, Frame, FrameOwner, FrameRef}, function::OptionalArg, - object::{Traverse, TraverseFn}, + object::{PyAtomicRef, Traverse, TraverseFn}, protocol::PyIterReturn, }; use crossbeam_utils::atomic::AtomicCell; @@ -36,7 +36,7 @@ pub struct Coro { // _weakreflist name: PyMutex, qualname: PyMutex, - exception: PyMutex>, // exc_state + exception: PyAtomicRef>, // exc_state } unsafe impl Traverse for Coro { @@ -44,7 +44,9 @@ unsafe impl Traverse for Coro { self.frame.traverse(tracer_fn); self.name.traverse(tracer_fn); self.qualname.traverse(tracer_fn); - self.exception.traverse(tracer_fn); + if let Some(exc) = self.exception.deref() { + exc.traverse(tracer_fn); + } } } @@ -65,7 +67,7 @@ impl Coro { frame, closed: AtomicCell::new(false), running: AtomicCell::new(false), - exception: PyMutex::default(), + exception: PyAtomicRef::from(None), name: PyMutex::new(name), qualname: PyMutex::new(qualname), } @@ -92,33 +94,20 @@ impl Coro { func: F, ) -> PyResult where - F: FnOnce(FrameRef) -> PyResult, + F: FnOnce(&Py) -> PyResult, { if self.running.compare_exchange(false, true).is_err() { return Err(vm.new_value_error(format!("{} already executing", gen_name(jen, vm)))); } - // swap exception state - // Get generator's saved exception state from last yield - let gen_exc = self.exception.lock().take(); - - // Use a slot to capture generator's exception state before with_frame pops - let exception_slot = &self.exception; + // SAFETY: running.compare_exchange guarantees exclusive access + let gen_exc = unsafe { self.exception.swap(None) }; + let exception_ptr = &self.exception as *const PyAtomicRef>; - // Run the generator frame - // with_frame does push_exception(None) which creates a new exception context - // The caller's exception remains in the chain via prev, so topmost_exception() - // will find it if generator's exception is None - let result = vm.with_frame(self.frame.clone(), |f| { - // with_frame pushed None, creating: { exc: None, prev: caller's exc_info } - // Pop None and push generator's exception instead - // This maintains the chain: { exc: gen_exc, prev: caller's exc_info } - vm.pop_exception(); - vm.push_exception(gen_exc); + let result = vm.resume_gen_frame(&self.frame, gen_exc, |f| { let result = func(f); - // Save generator's exception state BEFORE with_frame pops - // This is the generator's current exception context - *exception_slot.lock() = vm.current_exception(); + // SAFETY: exclusive access guaranteed by running flag + let _old = unsafe { (*exception_ptr).swap(vm.current_exception()) }; result }); diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index f24b25c610a..b498dc3e726 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -463,7 +463,7 @@ impl ExecutingFrame<'_> { // Execute until return or exception: let instructions = &self.code.instructions; let mut arg_state = bytecode::OpArgState::default(); - let mut prev_line: usize = 0; + let mut prev_line: u32 = 0; loop { let idx = self.lasti() as usize; // Fire 'line' trace event when line number changes. @@ -472,9 +472,9 @@ impl ExecutingFrame<'_> { if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) && let Some((loc, _)) = self.code.locations.get(idx) - && loc.line.get() != prev_line + && loc.line.get() as u32 != prev_line { - prev_line = loc.line.get(); + prev_line = loc.line.get() as u32; vm.trace_event(crate::protocol::TraceEvent::Line, None)?; } self.update_lasti(|i| *i += 1); @@ -543,13 +543,16 @@ impl ExecutingFrame<'_> { // Check if this is a RERAISE instruction // Both AnyInstruction::Raise { kind: Reraise/ReraiseFromStack } and // AnyInstruction::Reraise are reraise operations that should not add - // new traceback entries + // new traceback entries. + // EndAsyncFor and CleanupThrow also re-raise non-matching exceptions. let is_reraise = match op { Instruction::RaiseVarargs { kind } => matches!( kind.get(arg), bytecode::RaiseKind::BareRaise | bytecode::RaiseKind::ReraiseFromStack ), - Instruction::Reraise { .. } => true, + Instruction::Reraise { .. } + | Instruction::EndAsyncFor + | Instruction::CleanupThrow => true, _ => false, }; @@ -653,6 +656,19 @@ impl ExecutingFrame<'_> { Ok(()) }; if let Err(err) = close_result { + let idx = self.lasti().saturating_sub(1) as usize; + if idx < self.code.locations.len() { + let (loc, _end_loc) = self.code.locations[idx]; + let next = err.__traceback__(); + let new_traceback = PyTraceback::new( + next, + self.object.to_owned(), + idx as u32 * 2, + loc.line, + ); + err.set_traceback_typed(Some(new_traceback.into_ref(&vm.ctx))); + } + self.push_value(vm.ctx.none()); vm.contextualize_exception(&err); return match self.unwind_blocks(vm, UnwindReason::Raising { exception: err }) { @@ -678,6 +694,23 @@ impl ExecutingFrame<'_> { Either::B(meth) => meth.call((exc_type, exc_val, exc_tb), vm), }; return ret.map(ExecutionResult::Yield).or_else(|err| { + // Add traceback entry for the yield-from/await point. + // gen_send_ex2 resumes the frame with a pending exception, + // which goes through error: → PyTraceBack_Here. We add the + // entry here before calling unwind_blocks. + let idx = self.lasti().saturating_sub(1) as usize; + if idx < self.code.locations.len() { + let (loc, _end_loc) = self.code.locations[idx]; + let next = err.__traceback__(); + let new_traceback = PyTraceback::new( + next, + self.object.to_owned(), + idx as u32 * 2, + loc.line, + ); + err.set_traceback_typed(Some(new_traceback.into_ref(&vm.ctx))); + } + self.push_value(vm.ctx.none()); vm.contextualize_exception(&err); match self.unwind_blocks(vm, UnwindReason::Raising { exception: err }) { diff --git a/crates/vm/src/protocol/callable.rs b/crates/vm/src/protocol/callable.rs index 9308ec8ffe2..9a621dee4f8 100644 --- a/crates/vm/src/protocol/callable.rs +++ b/crates/vm/src/protocol/callable.rs @@ -2,7 +2,7 @@ use crate::{ builtins::{PyBoundMethod, PyFunction}, function::{FuncArgs, IntoFuncArgs}, types::GenericMethod, - {AsObject, PyObject, PyObjectRef, PyResult, VirtualMachine}, + {PyObject, PyObjectRef, PyResult, VirtualMachine}, }; impl PyObject { @@ -111,12 +111,11 @@ impl VirtualMachine { return Ok(()); } - let frame_ref = self.current_frame(); - if frame_ref.is_none() { + let Some(frame_ref) = self.current_frame() else { return Ok(()); - } + }; - let frame = frame_ref.unwrap().as_object().to_owned(); + let frame: PyObjectRef = frame_ref.into(); let event = self.ctx.new_str(event.to_string()).into(); let args = vec![frame, event, arg.unwrap_or_else(|| self.ctx.none())]; diff --git a/crates/vm/src/stdlib/builtins.rs b/crates/vm/src/stdlib/builtins.rs index 7b24d72d9b5..1b54a26e732 100644 --- a/crates/vm/src/stdlib/builtins.rs +++ b/crates/vm/src/stdlib/builtins.rs @@ -384,7 +384,7 @@ mod builtins { ) } None => ( - vm.current_globals().clone(), + vm.current_globals(), if let Some(locals) = self.locals { locals } else { @@ -503,7 +503,7 @@ mod builtins { #[pyfunction] fn globals(vm: &VirtualMachine) -> PyDictRef { - vm.current_globals().clone() + vm.current_globals() } #[pyfunction] diff --git a/crates/vm/src/stdlib/sys.rs b/crates/vm/src/stdlib/sys.rs index 4c672af110b..0a6f26d642c 100644 --- a/crates/vm/src/stdlib/sys.rs +++ b/crates/vm/src/stdlib/sys.rs @@ -41,7 +41,7 @@ mod sys { hash::{PyHash, PyUHash}, }, convert::ToPyObject, - frame::FrameRef, + frame::{Frame, FrameRef}, function::{FuncArgs, KwArgs, OptionalArg, PosArgs}, stdlib::{builtins, warnings::warn}, types::PyStructSequence, @@ -971,12 +971,14 @@ mod sys { #[pyfunction] fn _getframe(offset: OptionalArg, vm: &VirtualMachine) -> PyResult { let offset = offset.into_option().unwrap_or(0); - if offset > vm.frames.borrow().len() - 1 { + let frames = vm.frames.borrow(); + if offset >= frames.len() { return Err(vm.new_value_error("call stack is not deep enough")); } - let idx = vm.frames.borrow().len() - offset - 1; - let frame = &vm.frames.borrow()[idx]; - Ok(frame.clone()) + let idx = frames.len() - offset - 1; + // SAFETY: the FrameRef is alive on the call stack while it's in the Vec + let py: &crate::Py = unsafe { frames[idx].as_ref() }; + Ok(py.to_owned()) } #[pyfunction] @@ -984,15 +986,19 @@ mod sys { let depth = depth.into_option().unwrap_or(0); // Get the frame at the specified depth - if depth > vm.frames.borrow().len() - 1 { - return Ok(vm.ctx.none()); - } - - let idx = vm.frames.borrow().len() - depth - 1; - let frame = &vm.frames.borrow()[idx]; + let func_obj = { + let frames = vm.frames.borrow(); + if depth >= frames.len() { + return Ok(vm.ctx.none()); + } + let idx = frames.len() - depth - 1; + // SAFETY: the FrameRef is alive on the call stack while it's in the Vec + let frame: &crate::Py = unsafe { frames[idx].as_ref() }; + frame.func_obj.clone() + }; // If the frame has a function object, return its __module__ attribute - if let Some(func_obj) = &frame.func_obj { + if let Some(func_obj) = func_obj { match func_obj.get_attr(identifier!(vm, __module__), vm) { Ok(module) => Ok(module), Err(_) => { diff --git a/crates/vm/src/stdlib/thread.rs b/crates/vm/src/stdlib/thread.rs index 12a741f62f0..bf495ecc382 100644 --- a/crates/vm/src/stdlib/thread.rs +++ b/crates/vm/src/stdlib/thread.rs @@ -891,7 +891,14 @@ pub(crate) mod _thread { let registry = vm.state.thread_frames.lock(); registry .iter() - .filter_map(|(id, slot)| slot.lock().last().cloned().map(|f| (*id, f))) + .filter_map(|(id, slot)| { + let frames = slot.frames.lock(); + // SAFETY: the owning thread can't pop while we hold the Mutex, + // so the FramePtr is valid for the duration of the lock. + frames + .last() + .map(|fp| (*id, unsafe { fp.as_ref() }.to_owned())) + }) .collect() } diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index 10409d943b3..07395b80460 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -41,7 +41,8 @@ use crate::{ }; use alloc::{borrow::Cow, collections::BTreeMap}; use core::{ - cell::{Cell, OnceCell, Ref, RefCell}, + cell::{Cell, OnceCell, RefCell}, + ptr::NonNull, sync::atomic::{AtomicBool, Ordering}, }; use crossbeam_utils::atomic::AtomicCell; @@ -72,7 +73,7 @@ pub struct VirtualMachine { pub builtins: PyRef, pub sys_module: PyRef, pub ctx: PyRc, - pub frames: RefCell>, + pub frames: RefCell>, pub wasm_id: Option, exceptions: RefCell, pub import_func: PyObjectRef, @@ -99,10 +100,26 @@ pub struct VirtualMachine { pub asyncio_running_task: RefCell>, } +/// Non-owning frame pointer for the frames stack. +/// The pointed-to frame is kept alive by the caller of with_frame_exc/resume_gen_frame. +#[derive(Copy, Clone)] +pub struct FramePtr(NonNull>); + +impl FramePtr { + /// # Safety + /// The pointed-to frame must still be alive. + pub unsafe fn as_ref(&self) -> &Py { + unsafe { self.0.as_ref() } + } +} + +// SAFETY: FramePtr is only stored in the VM's frames Vec while the corresponding +// FrameRef is alive on the call stack. The Vec is always empty when the VM moves between threads. +unsafe impl Send for FramePtr {} + #[derive(Debug, Default)] struct ExceptionStack { - exc: Option, - prev: Option>, + stack: Vec>, } pub struct PyGlobalState { @@ -129,7 +146,7 @@ pub struct PyGlobalState { /// Main thread identifier (pthread_self on Unix) #[cfg(feature = "threading")] pub main_thread_ident: AtomicCell, - /// Registry of all threads' current frames for sys._current_frames() + /// Registry of all threads' slots for sys._current_frames() and sys._current_exceptions() #[cfg(feature = "threading")] pub thread_frames: parking_lot::Mutex>, /// Registry of all ThreadHandles for fork cleanup @@ -997,31 +1014,55 @@ impl VirtualMachine { &self, frame: FrameRef, f: F, + ) -> PyResult { + self.with_frame_exc(frame, None, f) + } + + /// Like `with_frame` but allows specifying the initial exception state. + pub fn with_frame_exc PyResult>( + &self, + frame: FrameRef, + exc: Option, + f: F, ) -> PyResult { self.with_recursion("", || { - self.frames.borrow_mut().push(frame.clone()); + // SAFETY: `frame` (FrameRef) stays alive for the entire closure scope, + // keeping the FramePtr valid. We pass a clone to `f` so that `f` + // consuming its FrameRef doesn't invalidate our pointer. + let fp = FramePtr(NonNull::from(&*frame)); + self.frames.borrow_mut().push(fp); // Update the shared frame stack for sys._current_frames() and faulthandler #[cfg(feature = "threading")] - crate::vm::thread::push_thread_frame(frame.clone()); + crate::vm::thread::push_thread_frame(fp); // Link frame into the signal-safe frame chain (previous pointer) - let frame_ptr: *const Frame = &**frame; - let old_frame = crate::vm::thread::set_current_frame(frame_ptr); + let old_frame = crate::vm::thread::set_current_frame((&**frame) as *const Frame); frame.previous.store( old_frame as *mut Frame, core::sync::atomic::Ordering::Relaxed, ); - // Push a new exception context for frame isolation - // Each frame starts with no active exception (None) - // This prevents exceptions from leaking between function calls - self.push_exception(None); + // Push exception context for frame isolation. + // For normal calls: None (clean slate). + // For generators: the saved exception from last yield. + self.push_exception(exc); let old_owner = frame.owner.swap( crate::frame::FrameOwner::Thread as i8, core::sync::atomic::Ordering::AcqRel, ); + + // Ensure cleanup on panic: restore owner, pop exception, frame chain, and frames Vec. + scopeguard::defer! { + frame.owner.store(old_owner, core::sync::atomic::Ordering::Release); + self.pop_exception(); + crate::vm::thread::set_current_frame(old_frame); + self.frames.borrow_mut().pop(); + #[cfg(feature = "threading")] + crate::vm::thread::pop_thread_frame(); + } + use crate::protocol::TraceEvent; // Fire 'call' trace event after pushing frame // (current_frame() now returns the callee's frame) - let result = match self.trace_event(TraceEvent::Call, None) { + match self.trace_event(TraceEvent::Call, None) { Ok(()) => { // Set per-frame trace function so line events fire for this frame. // Frames entered before sys.settrace() keep trace=None and skip line events. @@ -1031,7 +1072,7 @@ impl VirtualMachine { *frame.trace.lock() = trace_func; } } - let result = f(frame); + let result = f(frame.clone()); // Fire 'return' trace event on success if result.is_ok() { let _ = self.trace_event(TraceEvent::Return, None); @@ -1039,23 +1080,67 @@ impl VirtualMachine { result } Err(e) => Err(e), - }; - // SAFETY: frame_ptr is valid because self.frames holds a clone - // of the frame, keeping the underlying allocation alive. - unsafe { &*frame_ptr } - .owner - .store(old_owner, core::sync::atomic::Ordering::Release); - // Pop the exception context - restores caller's exception state - self.pop_exception(); - // Restore previous frame as current (unlink from chain) + } + }) + } + + /// Lightweight frame execution for generator/coroutine resume. + /// Pushes to the thread frame stack and fires trace/profile events, + /// but skips the thread exception update for performance. + pub fn resume_gen_frame) -> PyResult>( + &self, + frame: &FrameRef, + exc: Option, + f: F, + ) -> PyResult { + self.check_recursive_call("")?; + if self.check_c_stack_overflow() { + return Err(self.new_recursion_error(String::new())); + } + self.recursion_depth.update(|d| d + 1); + + // SAFETY: frame (&FrameRef) stays alive for the duration, so NonNull is valid until pop. + let fp = FramePtr(NonNull::from(&**frame)); + self.frames.borrow_mut().push(fp); + #[cfg(feature = "threading")] + crate::vm::thread::push_thread_frame(fp); + let old_frame = crate::vm::thread::set_current_frame((&***frame) as *const Frame); + frame.previous.store( + old_frame as *mut Frame, + core::sync::atomic::Ordering::Relaxed, + ); + // Inline exception push without thread exception update + self.exceptions.borrow_mut().stack.push(exc); + let old_owner = frame.owner.swap( + crate::frame::FrameOwner::Thread as i8, + core::sync::atomic::Ordering::AcqRel, + ); + + // Ensure cleanup on panic: restore owner, pop exception, frame chain, frames Vec, + // and recursion depth. + scopeguard::defer! { + frame.owner.store(old_owner, core::sync::atomic::Ordering::Release); + self.exceptions.borrow_mut().stack + .pop() + .expect("pop_exception() without nested exc stack"); crate::vm::thread::set_current_frame(old_frame); - // defer dec frame - let _popped = self.frames.borrow_mut().pop(); - // Pop from shared frame stack + self.frames.borrow_mut().pop(); #[cfg(feature = "threading")] crate::vm::thread::pop_thread_frame(); - result - }) + self.recursion_depth.update(|d| d - 1); + } + + use crate::protocol::TraceEvent; + match self.trace_event(TraceEvent::Call, None) { + Ok(()) => { + let result = f(frame); + if result.is_ok() { + let _ = self.trace_event(TraceEvent::Return, None); + } + result + } + Err(e) => Err(e), + } } /// Returns a basic CompileOpts instance with options accurate to the vm. Used @@ -1077,15 +1162,11 @@ impl VirtualMachine { } } - pub fn current_frame(&self) -> Option> { - let frames = self.frames.borrow(); - if frames.is_empty() { - None - } else { - Some(Ref::map(self.frames.borrow(), |frames| { - frames.last().unwrap() - })) - } + pub fn current_frame(&self) -> Option { + self.frames.borrow().last().map(|fp| { + // SAFETY: the caller keeps the FrameRef alive while it's in the Vec + unsafe { fp.as_ref() }.to_owned() + }) } pub fn current_locals(&self) -> PyResult { @@ -1094,11 +1175,11 @@ impl VirtualMachine { .locals(self) } - pub fn current_globals(&self) -> Ref<'_, PyDictRef> { - let frame = self - .current_frame() - .expect("called current_globals but no frames on the stack"); - Ref::map(frame, |f| &f.globals) + pub fn current_globals(&self) -> PyDictRef { + self.current_frame() + .expect("called current_globals but no frames on the stack") + .globals + .clone() } pub fn try_class(&self, module: &'static str, class: &'static str) -> PyResult { @@ -1351,27 +1432,44 @@ impl VirtualMachine { } pub(crate) fn push_exception(&self, exc: Option) { - let mut excs = self.exceptions.borrow_mut(); - let prev = core::mem::take(&mut *excs); - excs.prev = Some(Box::new(prev)); - excs.exc = exc + self.exceptions.borrow_mut().stack.push(exc); + #[cfg(feature = "threading")] + thread::update_thread_exception(self.topmost_exception()); } pub(crate) fn pop_exception(&self) -> Option { - let mut excs = self.exceptions.borrow_mut(); - let cur = core::mem::take(&mut *excs); - *excs = *cur.prev.expect("pop_exception() without nested exc stack"); - cur.exc + let exc = self + .exceptions + .borrow_mut() + .stack + .pop() + .expect("pop_exception() without nested exc stack"); + #[cfg(feature = "threading")] + thread::update_thread_exception(self.topmost_exception()); + exc } pub(crate) fn current_exception(&self) -> Option { - self.exceptions.borrow().exc.clone() + self.exceptions.borrow().stack.last().cloned().flatten() } pub(crate) fn set_exception(&self, exc: Option) { // don't be holding the RefCell guard while __del__ is called - let prev = core::mem::replace(&mut self.exceptions.borrow_mut().exc, exc); - drop(prev); + let mut excs = self.exceptions.borrow_mut(); + debug_assert!( + !excs.stack.is_empty(), + "set_exception called with empty exception stack" + ); + if let Some(top) = excs.stack.last_mut() { + let prev = core::mem::replace(top, exc); + drop(excs); + drop(prev); + } else { + excs.stack.push(exc); + drop(excs); + } + #[cfg(feature = "threading")] + thread::update_thread_exception(self.topmost_exception()); } pub(crate) fn contextualize_exception(&self, exception: &Py) { @@ -1404,13 +1502,7 @@ impl VirtualMachine { pub(crate) fn topmost_exception(&self) -> Option { let excs = self.exceptions.borrow(); - let mut cur = &*excs; - loop { - if let Some(exc) = &cur.exc { - return Some(exc.clone()); - } - cur = cur.prev.as_deref()?; - } + excs.stack.iter().rev().find_map(|e| e.clone()) } pub fn handle_exit_exception(&self, exc: PyBaseExceptionRef) -> u32 { diff --git a/crates/vm/src/vm/thread.rs b/crates/vm/src/vm/thread.rs index af69fa8d8e5..575910f7900 100644 --- a/crates/vm/src/vm/thread.rs +++ b/crates/vm/src/vm/thread.rs @@ -1,6 +1,8 @@ -use crate::frame::Frame; #[cfg(feature = "threading")] -use crate::frame::FrameRef; +use super::FramePtr; +#[cfg(feature = "threading")] +use crate::builtins::PyBaseExceptionRef; +use crate::frame::Frame; use crate::{AsObject, PyObject, VirtualMachine}; #[cfg(feature = "threading")] use alloc::sync::Arc; @@ -12,20 +14,27 @@ use core::{ use itertools::Itertools; use std::thread_local; -/// Type for current frame slot - shared between threads for sys._current_frames() -/// Stores the full frame stack so faulthandler can dump complete tracebacks -/// for all threads. +/// Per-thread shared state for sys._current_frames() and sys._current_exceptions(). +/// The exception field uses atomic operations for lock-free cross-thread reads. +#[cfg(feature = "threading")] +pub struct ThreadSlot { + /// Raw frame pointers, valid while the owning thread's call stack is active. + /// Readers must hold the Mutex and convert to FrameRef inside the lock. + pub frames: parking_lot::Mutex>, + pub exception: crate::PyAtomicRef>, +} + #[cfg(feature = "threading")] -pub type CurrentFrameSlot = Arc>>; +pub type CurrentFrameSlot = Arc; thread_local! { pub(super) static VM_STACK: RefCell>> = Vec::with_capacity(1).into(); pub(crate) static COROUTINE_ORIGIN_TRACKING_DEPTH: Cell = const { Cell::new(0) }; - /// Current thread's frame slot for sys._current_frames() + /// Current thread's slot for sys._current_frames() and sys._current_exceptions() #[cfg(feature = "threading")] - static CURRENT_FRAME_SLOT: RefCell> = const { RefCell::new(None) }; + static CURRENT_THREAD_SLOT: RefCell> = const { RefCell::new(None) }; /// Current top frame for signal-safe traceback walking. /// Mirrors `PyThreadState.current_frame`. Read by faulthandler's signal @@ -49,23 +58,26 @@ pub fn enter_vm(vm: &VirtualMachine, f: impl FnOnce() -> R) -> R { VM_STACK.with(|vms| { vms.borrow_mut().push(vm.into()); - // Initialize frame slot for this thread if not already done + // Initialize thread slot for this thread if not already done #[cfg(feature = "threading")] - init_frame_slot_if_needed(vm); + init_thread_slot_if_needed(vm); scopeguard::defer! { vms.borrow_mut().pop(); } VM_CURRENT.set(vm, f) }) } -/// Initialize frame slot for current thread if not already initialized. +/// Initialize thread slot for current thread if not already initialized. /// Called automatically by enter_vm(). #[cfg(feature = "threading")] -fn init_frame_slot_if_needed(vm: &VirtualMachine) { - CURRENT_FRAME_SLOT.with(|slot| { +fn init_thread_slot_if_needed(vm: &VirtualMachine) { + CURRENT_THREAD_SLOT.with(|slot| { if slot.borrow().is_none() { let thread_id = crate::stdlib::thread::get_ident(); - let new_slot = Arc::new(parking_lot::Mutex::new(Vec::new())); + let new_slot = Arc::new(ThreadSlot { + frames: parking_lot::Mutex::new(Vec::new()), + exception: crate::PyAtomicRef::from(None::), + }); vm.state .thread_frames .lock() @@ -75,13 +87,18 @@ fn init_frame_slot_if_needed(vm: &VirtualMachine) { }); } -/// Push a frame onto the current thread's shared frame stack. -/// Called when a new frame is entered. +/// Push a frame pointer onto the current thread's shared frame stack. +/// The pointed-to frame must remain alive until the matching pop. #[cfg(feature = "threading")] -pub fn push_thread_frame(frame: FrameRef) { - CURRENT_FRAME_SLOT.with(|slot| { +pub fn push_thread_frame(fp: FramePtr) { + CURRENT_THREAD_SLOT.with(|slot| { if let Some(s) = slot.borrow().as_ref() { - s.lock().push(frame); + s.frames.lock().push(fp); + } else { + debug_assert!( + false, + "push_thread_frame called without initialized thread slot" + ); } }); } @@ -90,9 +107,14 @@ pub fn push_thread_frame(frame: FrameRef) { /// Called when a frame is exited. #[cfg(feature = "threading")] pub fn pop_thread_frame() { - CURRENT_FRAME_SLOT.with(|slot| { + CURRENT_THREAD_SLOT.with(|slot| { if let Some(s) = slot.borrow().as_ref() { - s.lock().pop(); + s.frames.lock().pop(); + } else { + debug_assert!( + false, + "pop_thread_frame called without initialized thread slot" + ); } }); } @@ -109,25 +131,51 @@ pub fn get_current_frame() -> *const Frame { CURRENT_FRAME.with(|c| c.load(Ordering::Relaxed) as *const Frame) } -/// Cleanup frame tracking for the current thread. Called at thread exit. +/// Update the current thread's exception slot atomically (no locks). +/// Called from push_exception/pop_exception/set_exception. +#[cfg(feature = "threading")] +pub fn update_thread_exception(exc: Option) { + CURRENT_THREAD_SLOT.with(|slot| { + if let Some(s) = slot.borrow().as_ref() { + // SAFETY: Called only from the owning thread. The old ref is dropped + // here on the owning thread, which is safe. + let _old = unsafe { s.exception.swap(exc) }; + } + }); +} + +/// Collect all threads' current exceptions for sys._current_exceptions(). +/// Acquires the global registry lock briefly, then reads each slot's exception atomically. +#[cfg(feature = "threading")] +pub fn get_all_current_exceptions(vm: &VirtualMachine) -> Vec<(u64, Option)> { + let registry = vm.state.thread_frames.lock(); + registry + .iter() + .map(|(id, slot)| (*id, slot.exception.to_owned())) + .collect() +} + +/// Cleanup thread slot for the current thread. Called at thread exit. #[cfg(feature = "threading")] pub fn cleanup_current_thread_frames(vm: &VirtualMachine) { let thread_id = crate::stdlib::thread::get_ident(); vm.state.thread_frames.lock().remove(&thread_id); - CURRENT_FRAME_SLOT.with(|s| { + CURRENT_THREAD_SLOT.with(|s| { *s.borrow_mut() = None; }); } -/// Reinitialize frame slot after fork. Called in child process. +/// Reinitialize thread slot after fork. Called in child process. /// Creates a fresh slot and registers it for the current thread, /// preserving the current thread's frames from `vm.frames`. #[cfg(feature = "threading")] pub fn reinit_frame_slot_after_fork(vm: &VirtualMachine) { let current_ident = crate::stdlib::thread::get_ident(); - // Preserve the current thread's frames across fork - let current_frames: Vec = vm.frames.borrow().clone(); - let new_slot = Arc::new(parking_lot::Mutex::new(current_frames)); + let current_frames: Vec = vm.frames.borrow().clone(); + let new_slot = Arc::new(ThreadSlot { + frames: parking_lot::Mutex::new(current_frames), + exception: crate::PyAtomicRef::from(vm.topmost_exception()), + }); // After fork, only the current thread exists. If the lock was held by // another thread during fork, force unlock it. @@ -144,8 +192,7 @@ pub fn reinit_frame_slot_after_fork(vm: &VirtualMachine) { registry.insert(current_ident, new_slot.clone()); drop(registry); - // Update thread-local to point to the new slot - CURRENT_FRAME_SLOT.with(|s| { + CURRENT_THREAD_SLOT.with(|s| { *s.borrow_mut() = Some(new_slot); }); } diff --git a/crates/vm/src/warn.rs b/crates/vm/src/warn.rs index b4406ff5246..684630e6af0 100644 --- a/crates/vm/src/warn.rs +++ b/crates/vm/src/warn.rs @@ -551,7 +551,7 @@ fn setup_context( skip_file_prefixes: Option<&PyTupleRef>, vm: &VirtualMachine, ) -> PyResult<(PyStrRef, usize, Option, PyObjectRef)> { - let mut f = vm.current_frame().as_deref().cloned(); + let mut f = vm.current_frame(); // Stack level comparisons to Python code is off by one as there is no // warnings-related stack level to avoid.