-
Notifications
You must be signed in to change notification settings - Fork 1.4k
More fork safety #7380
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
More fork safety #7380
Changes from 1 commit
6217ad6
6b6138b
396fec9
9fe70f2
e37b15f
cfd7f01
1b97419
1ba2821
ca41577
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -872,57 +872,129 @@ pub mod module { | |
| run_at_forkers(after_forkers_parent, false, vm); | ||
| } | ||
|
|
||
| /// Warn if forking from a multi-threaded process | ||
| fn warn_if_multi_threaded(name: &str, vm: &VirtualMachine) { | ||
| // Only check threading if it was already imported | ||
| // Avoid vm.import() which can execute arbitrary Python code in the fork path | ||
| let threading = match vm | ||
| .sys_module | ||
| .get_attr("modules", vm) | ||
| .and_then(|m| m.get_item("threading", vm)) | ||
| /// Best-effort number of OS threads in this process. | ||
| /// Returns <= 0 when unavailable, mirroring CPython fallback behavior. | ||
| fn get_number_of_os_threads() -> isize { | ||
| #[cfg(target_os = "macos")] | ||
| { | ||
| Ok(m) => m, | ||
| Err(_) => return, | ||
| }; | ||
| let active = threading.get_attr("_active", vm).ok(); | ||
| let limbo = threading.get_attr("_limbo", vm).ok(); | ||
| type MachPortT = libc::c_uint; | ||
| type KernReturnT = libc::c_int; | ||
| type MachMsgTypeNumberT = libc::c_uint; | ||
| type ThreadActArrayT = *mut MachPortT; | ||
| const KERN_SUCCESS: KernReturnT = 0; | ||
| unsafe extern "C" { | ||
| fn mach_task_self() -> MachPortT; | ||
| fn task_for_pid( | ||
| task: MachPortT, | ||
| pid: libc::c_int, | ||
| target_task: *mut MachPortT, | ||
| ) -> KernReturnT; | ||
| fn task_threads( | ||
| target_task: MachPortT, | ||
| act_list: *mut ThreadActArrayT, | ||
| act_list_cnt: *mut MachMsgTypeNumberT, | ||
| ) -> KernReturnT; | ||
| fn vm_deallocate( | ||
| target_task: MachPortT, | ||
| address: libc::uintptr_t, | ||
| size: libc::uintptr_t, | ||
| ) -> KernReturnT; | ||
| } | ||
|
|
||
| let count_dict = |obj: Option<crate::PyObjectRef>| -> usize { | ||
| obj.and_then(|o| o.length_opt(vm)) | ||
| .and_then(|r| r.ok()) | ||
| .unwrap_or(0) | ||
| }; | ||
| let self_task = unsafe { mach_task_self() }; | ||
| let mut proc_task: MachPortT = 0; | ||
| if unsafe { task_for_pid(self_task, libc::getpid(), &mut proc_task) } == KERN_SUCCESS { | ||
| let mut threads: ThreadActArrayT = core::ptr::null_mut(); | ||
| let mut n_threads: MachMsgTypeNumberT = 0; | ||
| if unsafe { task_threads(proc_task, &mut threads, &mut n_threads) } == KERN_SUCCESS | ||
| { | ||
| if !threads.is_null() { | ||
| let _ = unsafe { | ||
| vm_deallocate( | ||
| self_task, | ||
| threads as libc::uintptr_t, | ||
| (n_threads as usize * core::mem::size_of::<MachPortT>()) | ||
| as libc::uintptr_t, | ||
| ) | ||
| }; | ||
| } | ||
| return n_threads as isize; | ||
| } | ||
| } | ||
| 0 | ||
| } | ||
| #[cfg(target_os = "linux")] | ||
| { | ||
| use std::io::Read as _; | ||
| let mut file = match std::fs::File::open("/proc/self/stat") { | ||
| Ok(f) => f, | ||
| Err(_) => return 0, | ||
| }; | ||
| let mut buf = [0u8; 160]; | ||
| let n = match file.read(&mut buf) { | ||
| Ok(n) => n, | ||
| Err(_) => return 0, | ||
| }; | ||
| let line = match std::str::from_utf8(&buf[..n]) { | ||
| Ok(s) => s, | ||
| Err(_) => return 0, | ||
| }; | ||
| if let Some(field) = line.split_whitespace().nth(19) { | ||
| return field.parse::<isize>().unwrap_or(0); | ||
| } | ||
| return 0; | ||
| } | ||
| #[cfg(not(any(target_os = "macos", target_os = "linux")))] | ||
| { | ||
| 0 | ||
| } | ||
| } | ||
|
|
||
| let num_threads = count_dict(active) + count_dict(limbo); | ||
| if num_threads > 1 { | ||
| // Use Python warnings module to ensure filters are applied correctly | ||
| let Ok(warnings) = vm.import("warnings", 0) else { | ||
| return; | ||
| /// Warn if forking from a multi-threaded process. | ||
| /// `num_os_threads` should be captured before parent after-fork hooks run. | ||
| fn warn_if_multi_threaded(name: &str, num_os_threads: isize, vm: &VirtualMachine) { | ||
| let num_threads = if num_os_threads > 0 { | ||
| num_os_threads as usize | ||
| } else { | ||
| // CPython fallback: if OS-level count isn't available, use the | ||
| // threading module's active+limbo view. | ||
| // Only check threading if it was already imported. Avoid vm.import() | ||
| // which can execute arbitrary Python code in the fork path. | ||
| let threading = match vm | ||
| .sys_module | ||
| .get_attr("modules", vm) | ||
| .and_then(|m| m.get_item("threading", vm)) | ||
| { | ||
| Ok(m) => m, | ||
| Err(_) => return, | ||
| }; | ||
| let Ok(warn_fn) = warnings.get_attr("warn", vm) else { | ||
| return; | ||
| let active = threading.get_attr("_active", vm).ok(); | ||
| let limbo = threading.get_attr("_limbo", vm).ok(); | ||
|
|
||
| // Match threading module internals and avoid sequence overcounting: | ||
| // count only dict-backed _active/_limbo containers. | ||
| let count_dict = |obj: Option<crate::PyObjectRef>| -> usize { | ||
| obj.and_then(|o| { | ||
| o.downcast_ref::<crate::builtins::PyDict>() | ||
| .map(|d| d.__len__()) | ||
| }) | ||
| .unwrap_or(0) | ||
| }; | ||
|
|
||
| count_dict(active) + count_dict(limbo) | ||
| }; | ||
|
|
||
| if num_threads > 1 { | ||
| let pid = unsafe { libc::getpid() }; | ||
| let msg = format!( | ||
| "This process (pid={}) is multi-threaded, use of {}() may lead to deadlocks in the child.", | ||
| pid, name | ||
| ); | ||
|
|
||
| // Call warnings.warn(message, DeprecationWarning, stacklevel=2) | ||
| // stacklevel=2 to point to the caller of fork() | ||
| let args = crate::function::FuncArgs::new( | ||
| vec![ | ||
| vm.ctx.new_str(msg).into(), | ||
| vm.ctx.exceptions.deprecation_warning.as_object().to_owned(), | ||
| ], | ||
| crate::function::KwArgs::new( | ||
| [("stacklevel".to_owned(), vm.ctx.new_int(2).into())] | ||
| .into_iter() | ||
| .collect(), | ||
| ), | ||
| ); | ||
| let _ = warn_fn.call(args, vm); | ||
| // Match PyErr_WarnFormat(..., stacklevel=1) in CPython. | ||
| // Best effort: ignore failures like CPython does in this path. | ||
| let _ = | ||
| crate::stdlib::warnings::warn(vm.ctx.exceptions.deprecation_warning, msg, 1, vm); | ||
|
Comment on lines
+994
to
+997
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, let's find and read the relevant section of posix.rs
wc -l crates/vm/src/stdlib/posix.rsRepository: RustPython/RustPython Length of output: 99 🏁 Script executed: # Read the specific lines mentioned in the review
sed -n '990,1010p' crates/vm/src/stdlib/posix.rsRepository: RustPython/RustPython Length of output: 866 🏁 Script executed: # Find the warn_if_multi_threaded function definition to see its current signature
rg -n "fn warn_if_multi_threaded" crates/vm/src/stdlib/posix.rs -A 10Repository: RustPython/RustPython Length of output: 689 🏁 Script executed: # Find the warnings::warn function to verify its return type
rg -n "fn warn" crates/vm/src/stdlib/warnings.rs -A 5Repository: RustPython/RustPython Length of output: 820 🏁 Script executed: # Check where warn_if_multi_threaded is called to understand the context
rg -n "warn_if_multi_threaded" crates/vm/src/stdlib/posix.rsRepository: RustPython/RustPython Length of output: 225 🏁 Script executed: # Search for any related discussions or TODOs about fork warning handling
rg -i "cpython|fork" crates/vm/src/stdlib/posix.rs -B 2 -A 2 | grep -A 5 -B 5 "warn\|error"Repository: RustPython/RustPython Length of output: 1656 🏁 Script executed: # Check if there are any tests for the fork warning behavior
fd -e rs | xargs rg "test.*fork\|fork.*test" -l | head -5Repository: RustPython/RustPython Length of output: 47 🏁 Script executed: # Let's check the broader context of the fork function to understand the error handling strategy
sed -n '1025,1060p' crates/vm/src/stdlib/posix.rsRepository: RustPython/RustPython Length of output: 1241 🏁 Script executed: # Check if there are any other similar patterns where PyResult is dropped in critical functions
rg "let _ = .*warn" crates/vm/src/stdlib/posix.rs -B 3 -A 3Repository: RustPython/RustPython Length of output: 47 🌐 Web query:
💡 Result: In CPython, the “
The “warning vs error” handling difference (why it sometimes doesn’t fail under
|
||
| } | ||
| } | ||
|
|
||
|
|
@@ -953,9 +1025,12 @@ pub mod module { | |
| if pid == 0 { | ||
| py_os_after_fork_child(vm); | ||
| } else { | ||
| // Match CPython timing: capture this before parent after-fork hooks | ||
| // in case those hooks start threads. | ||
| let num_os_threads = get_number_of_os_threads(); | ||
| py_os_after_fork_parent(vm); | ||
| // Match CPython timing: warn only after parent callback path resumes world. | ||
| warn_if_multi_threaded("fork", vm); | ||
| warn_if_multi_threaded("fork", num_os_threads, vm); | ||
| } | ||
| if pid == -1 { | ||
| Err(nix::Error::from_raw(saved_errno).into_pyexception(vm)) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Linux:
/proc/self/statparsing is fragile when comm contains spaces.The
commfield (field 2 in/proc/self/stat) is enclosed in parentheses and can contain spaces, parentheses, or newlines. Usingsplit_whitespace().nth(19)will yield incorrect results if the process name contains spaces.For example, a process named "my app" produces:
split_whitespace()splits this into["123", "(my", "app)", "S", ...], makingnth(19)point to the wrong field.CPython handles this by finding the last
)to locate the end of the comm field, then counting fields from there.🛡️ Suggested fix to handle comm field correctly
#[cfg(target_os = "linux")] { use std::io::Read as _; let mut file = match std::fs::File::open("/proc/self/stat") { Ok(f) => f, Err(_) => return 0, }; let mut buf = [0u8; 160]; let n = match file.read(&mut buf) { Ok(n) => n, Err(_) => return 0, }; let line = match core::str::from_utf8(&buf[..n]) { Ok(s) => s, Err(_) => return 0, }; - if let Some(field) = line.split_whitespace().nth(19) { - return field.parse::<isize>().unwrap_or(0); + // Find end of comm field (last ')') to handle spaces in process name + let after_comm = match line.rfind(')') { + Some(idx) => &line[idx + 1..], + None => return 0, + }; + // Field 20 (num_threads) is the 17th field after comm (fields 3-19 are 17 fields) + if let Some(field) = after_comm.split_whitespace().nth(17) { + return field.parse::<isize>().unwrap_or(0); } 0 }🤖 Prompt for AI Agents