Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
multiphase
  • Loading branch information
youknowone committed Jan 21, 2026
commit fee1a4b097a64aceb050180cbfc3474c403a5c69
16 changes: 15 additions & 1 deletion crates/stdlib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,23 @@ mod tkinter;
use rustpython_common as common;
use rustpython_vm as vm;

use crate::vm::{builtins, stdlib::StdlibInitFunc};
use crate::vm::{
builtins,
stdlib::{StdlibDefFunc, StdlibInitFunc},
};
use alloc::borrow::Cow;

/// Returns module definitions for multi-phase init modules.
/// These modules are added to sys.modules BEFORE their exec function runs,
/// allowing safe circular imports.
pub fn get_module_defs() -> impl Iterator<Item = (Cow<'static, str>, StdlibDefFunc)> {
[(
Cow::from("_asyncio"),
_asyncio::__module_def as StdlibDefFunc,
)]
.into_iter()
}

pub fn get_module_inits() -> impl Iterator<Item = (Cow<'static, str>, StdlibInitFunc)> {
macro_rules! modules {
{
Expand Down
2 changes: 1 addition & 1 deletion crates/vm/src/builtins/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pub use mappingproxy::PyMappingProxy;
pub(crate) mod memory;
pub use memory::PyMemoryView;
pub(crate) mod module;
pub use module::{PyModule, PyModuleDef};
pub use module::{PyModule, PyModuleDef, PyModuleSlots};
pub(crate) mod namespace;
pub use namespace::PyNamespace;
pub(crate) mod object;
Expand Down
38 changes: 37 additions & 1 deletion crates/vm/src/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,50 @@ pub fn import_frozen(vm: &VirtualMachine, module_name: &str) -> PyResult {
}

pub fn import_builtin(vm: &VirtualMachine, module_name: &str) -> PyResult {
use crate::PyPayload;
use crate::builtins::PyModule;

let sys_modules = vm.sys_module.get_attr("modules", vm)?;

// Check if already in sys.modules (handles recursive imports)
if let Ok(module) = sys_modules.get_item(module_name, vm) {
return Ok(module);
}

// Try multi-phase init first (preferred for modules that import other modules)
if let Some(def_func) = vm.state.module_defs.get(module_name) {
let def = def_func(&vm.ctx);

// Phase 1: Create module from definition
let module = PyModule::from_def(def).into_ref(&vm.ctx);

// Initialize module dict
let dict = vm.ctx.new_dict();
dict.set_item("__name__", vm.ctx.new_str(def.name.as_str()).into(), vm)?;
if let Some(doc) = def.doc {
dict.set_item("__doc__", vm.ctx.new_str(doc.as_str()).into(), vm)?;
}
module.set_attr("__dict__", dict, vm)?;

// Add to sys.modules BEFORE exec (critical for circular import handling)
sys_modules.set_item(module_name, module.clone().into(), vm)?;

// Phase 2: Call exec slot (can safely import other modules now)
if let Some(exec) = def.slots.exec {
exec(vm, &module)?;
}

return Ok(module.into());
}

// Fall back to legacy single-phase init
let make_module_func = vm.state.module_inits.get(module_name).ok_or_else(|| {
vm.new_import_error(
format!("Cannot import builtin module {module_name}"),
vm.ctx.new_str(module_name),
)
})?;
let module = make_module_func(vm);
let sys_modules = vm.sys_module.get_attr("modules", vm)?;
sys_modules.set_item(module_name, module.as_object().to_owned(), vm)?;
Ok(module.into())
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Expand Down
53 changes: 44 additions & 9 deletions crates/vm/src/stdlib/imp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ mod _imp {
#[pyfunction]
fn is_builtin(name: PyStrRef, vm: &VirtualMachine) -> bool {
vm.state.module_inits.contains_key(name.as_str())
|| vm.state.module_defs.contains_key(name.as_str())
}

#[pyfunction]
Expand All @@ -116,22 +117,56 @@ mod _imp {

#[pyfunction]
fn create_builtin(spec: PyObjectRef, vm: &VirtualMachine) -> PyResult {
use crate::PyPayload;
use crate::builtins::PyModule;

let sys_modules = vm.sys_module.get_attr("modules", vm).unwrap();
let name: PyStrRef = spec.get_attr("name", vm)?.try_into_value(vm)?;

let module = if let Ok(module) = sys_modules.get_item(&*name, vm) {
module
} else if let Some(make_module_func) = vm.state.module_inits.get(name.as_str()) {
make_module_func(vm).into()
} else {
vm.ctx.none()
};
Ok(module)
// Check sys.modules first
if let Ok(module) = sys_modules.get_item(&*name, vm) {
return Ok(module);
}

// Try multi-phase init modules first (they need special handling)
if let Some(def_func) = vm.state.module_defs.get(name.as_str()) {
let def = def_func(&vm.ctx);

// Phase 1: Create module from definition
let module = PyModule::from_def(def).into_ref(&vm.ctx);

// Initialize module dict
let dict = vm.ctx.new_dict();
dict.set_item("__name__", vm.ctx.new_str(def.name.as_str()).into(), vm)?;
if let Some(doc) = def.doc {
dict.set_item("__doc__", vm.ctx.new_str(doc.as_str()).into(), vm)?;
}
module.set_attr("__dict__", dict, vm)?;

// Add to sys.modules BEFORE exec (critical for circular import handling)
sys_modules.set_item(&*name, module.clone().into(), vm)?;

// Phase 2: Call exec slot (can safely import other modules now)
if let Some(exec) = def.slots.exec {
exec(vm, &module)?;
}
Comment on lines +122 to +149
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Clean up sys.modules when the exec slot fails.

Line 143 inserts the module into sys.modules before exec. If Line 147–149 errors, a partially initialized module remains cached and may be reused by later imports. Consider removing it on failure to match CPython’s behavior.

🛠️ Proposed fix
-            if let Some(exec) = def.slots.exec {
-                exec(vm, &module)?;
-            }
+            if let Some(exec) = def.slots.exec {
+                if let Err(err) = exec(vm, &module) {
+                    let _ = sys_modules.del_item(&*name, vm);
+                    return Err(err);
+                }
+            }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// Check sys.modules first
if let Ok(module) = sys_modules.get_item(&*name, vm) {
return Ok(module);
}
// Try multi-phase init modules first (they need special handling)
if let Some(&def) = vm.state.module_defs.get(name.as_str()) {
// Phase 1: Create module (use create slot if provided, else default creation)
let module = if let Some(create) = def.slots.create {
// Custom module creation
create(vm, &spec, def)?
} else {
// Default module creation
PyModule::from_def(def).into_ref(&vm.ctx)
};
// Initialize module dict and methods
// Corresponds to PyModule_FromDefAndSpec: md_def, _add_methods_to_object, PyModule_SetDocString
PyModule::__init_dict_from_def(vm, &module);
module.__init_methods(vm)?;
// Add to sys.modules BEFORE exec (critical for circular import handling)
sys_modules.set_item(&*name, module.clone().into(), vm)?;
// Phase 2: Call exec slot (can safely import other modules now)
if let Some(exec) = def.slots.exec {
exec(vm, &module)?;
}
// Check sys.modules first
if let Ok(module) = sys_modules.get_item(&*name, vm) {
return Ok(module);
}
// Try multi-phase init modules first (they need special handling)
if let Some(&def) = vm.state.module_defs.get(name.as_str()) {
// Phase 1: Create module (use create slot if provided, else default creation)
let module = if let Some(create) = def.slots.create {
// Custom module creation
create(vm, &spec, def)?
} else {
// Default module creation
PyModule::from_def(def).into_ref(&vm.ctx)
};
// Initialize module dict and methods
// Corresponds to PyModule_FromDefAndSpec: md_def, _add_methods_to_object, PyModule_SetDocString
PyModule::__init_dict_from_def(vm, &module);
module.__init_methods(vm)?;
// Add to sys.modules BEFORE exec (critical for circular import handling)
sys_modules.set_item(&*name, module.clone().into(), vm)?;
// Phase 2: Call exec slot (can safely import other modules now)
if let Some(exec) = def.slots.exec {
if let Err(err) = exec(vm, &module) {
let _ = sys_modules.del_item(&*name, vm);
return Err(err);
}
}
🤖 Prompt for AI Agents
In `@crates/vm/src/stdlib/imp.rs` around lines 122 - 149, When inserting the
module into sys.modules before calling the def.slots.exec slot (module variable,
sys_modules.set_item and def.slots.exec), ensure you remove that entry if exec
returns an Err so a partially-initialized module is not left cached; change the
exec call to match/if-let on its Result, and on Err call
sys_modules.del_item(&*name, vm) (or the equivalent removal API) before
propagating the error so behavior matches CPython’s cleanup on exec failure.


return Ok(module.into());
}

// Fall back to legacy single-phase init
if let Some(make_module_func) = vm.state.module_inits.get(name.as_str()) {
let module = make_module_func(vm);
sys_modules.set_item(&*name, module.clone().into(), vm)?;
return Ok(module.into());
}

Ok(vm.ctx.none())
}

#[pyfunction]
fn exec_builtin(_mod: PyRef<PyModule>) -> i32 {
// TODO: Should we do something here?
// For multi-phase init modules, exec is already called in create_builtin
0
}

Expand Down
21 changes: 20 additions & 1 deletion crates/vm/src/stdlib/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,22 @@ mod winapi;
#[cfg(windows)]
mod winreg;

use crate::{PyRef, VirtualMachine, builtins::PyModule};
use crate::{Context, PyRef, VirtualMachine, builtins::PyModule, builtins::PyModuleDef};
use alloc::borrow::Cow;
use std::collections::HashMap;

/// Legacy single-phase init: function that creates and populates a module in one step
pub type StdlibInitFunc = Box<py_dyn_fn!(dyn Fn(&VirtualMachine) -> PyRef<PyModule>)>;

/// Multi-phase init: function that returns a module definition
/// The import machinery will:
/// 1. Create module from def
/// 2. Add to sys.modules
/// 3. Call exec slot (which can safely import other modules)
pub type StdlibDefFunc = fn(&Context) -> &'static PyModuleDef;

pub type StdlibMap = HashMap<Cow<'static, str>, StdlibInitFunc, ahash::RandomState>;
pub type StdlibDefMap = HashMap<Cow<'static, str>, StdlibDefFunc, ahash::RandomState>;

pub fn get_module_inits() -> StdlibMap {
macro_rules! modules {
Expand Down Expand Up @@ -154,3 +164,12 @@ pub fn get_module_inits() -> StdlibMap {
}
}
}

/// Returns module definitions for multi-phase init modules.
/// These modules use CPython's two-phase initialization pattern:
/// 1. Create module from def and add to sys.modules
/// 2. Call exec slot (can safely import other modules without circular import issues)
pub fn get_module_defs() -> StdlibDefMap {
// Currently empty - modules will be migrated to multi-phase init as needed
HashMap::default()
}
2 changes: 2 additions & 0 deletions crates/vm/src/stdlib/sys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ mod sys {
#[pyattr]
fn builtin_module_names(vm: &VirtualMachine) -> PyTupleRef {
let mut module_names: Vec<_> = vm.state.module_inits.keys().cloned().collect();
// Also include multi-phase init modules
module_names.extend(vm.state.module_defs.keys().cloned());
module_names.push("sys".into());
module_names.push("builtins".into());
module_names.sort();
Comment on lines 168 to 174
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Deduplicate sys.builtin_module_names entries.

module_defs may already contain sys/builtins, so pushing them unconditionally can introduce duplicates in the exported tuple. Consider deduping after sort to keep the public surface clean.

🩹 Proposed fix
         module_names.sort();
+        module_names.dedup();
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
fn builtin_module_names(vm: &VirtualMachine) -> PyTupleRef {
let mut module_names: Vec<_> = vm.state.module_inits.keys().cloned().collect();
module_names.push("sys".into());
module_names.push("builtins".into());
let mut module_names: Vec<String> =
vm.state.module_defs.keys().map(|&s| s.to_owned()).collect();
module_names.push("sys".to_owned());
module_names.push("builtins".to_owned());
module_names.sort();
fn builtin_module_names(vm: &VirtualMachine) -> PyTupleRef {
let mut module_names: Vec<String> =
vm.state.module_defs.keys().map(|&s| s.to_owned()).collect();
module_names.push("sys".to_owned());
module_names.push("builtins".to_owned());
module_names.sort();
module_names.dedup();
🤖 Prompt for AI Agents
In `@crates/vm/src/stdlib/sys.rs` around lines 168 - 174, The returned tuple from
builtin_module_names may contain duplicate "sys" or "builtins" entries; in the
builtin_module_names function, after collecting and pushing module names
(module_names) and sorting, remove duplicates (e.g., dedupe the Vec<String>
in-place) before converting to a PyTupleRef so the exported tuple contains
unique module names only.

Expand Down
20 changes: 20 additions & 0 deletions crates/vm/src/vm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ struct ExceptionStack {
pub struct PyGlobalState {
pub config: PyConfig,
pub module_inits: stdlib::StdlibMap,
pub module_defs: stdlib::StdlibDefMap,
pub frozen: HashMap<&'static str, FrozenModule, ahash::RandomState>,
pub stacksize: AtomicCell<usize>,
pub thread_count: AtomicCell<usize>,
Expand Down Expand Up @@ -170,6 +171,7 @@ impl VirtualMachine {
));

let module_inits = stdlib::get_module_inits();
let module_defs = stdlib::get_module_defs();

let seed = match config.settings.hash_seed {
Some(seed) => seed,
Expand Down Expand Up @@ -203,6 +205,7 @@ impl VirtualMachine {
state: PyRc::new(PyGlobalState {
config,
module_inits,
module_defs,
frozen: HashMap::default(),
stacksize: AtomicCell::new(0),
thread_count: AtomicCell::new(0),
Expand Down Expand Up @@ -490,6 +493,23 @@ impl VirtualMachine {
self.state_mut().module_inits.extend(iter);
}

/// Add a module definition for multi-phase initialization.
/// These modules are added to sys.modules BEFORE their exec function runs,
/// allowing safe circular imports.
pub fn add_native_module_def<S>(&mut self, name: S, def_func: stdlib::StdlibDefFunc)
where
S: Into<Cow<'static, str>>,
{
self.state_mut().module_defs.insert(name.into(), def_func);
}

pub fn add_native_module_defs<I>(&mut self, iter: I)
where
I: IntoIterator<Item = (Cow<'static, str>, stdlib::StdlibDefFunc)>,
{
self.state_mut().module_defs.extend(iter);
}

/// Can only be used in the initialization closure passed to [`Interpreter::with_init`]
pub fn add_frozen<I>(&mut self, frozen: I)
where
Expand Down
2 changes: 2 additions & 0 deletions src/interpreter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ impl InterpreterConfig {
/// Initializes all standard library modules for the given VM.
#[cfg(feature = "stdlib")]
pub fn init_stdlib(vm: &mut VirtualMachine) {
// Add multi-phase init modules first (they're checked first in import_builtin)
vm.add_native_module_defs(rustpython_stdlib::get_module_defs());
vm.add_native_modules(rustpython_stdlib::get_module_inits());

#[cfg(feature = "freeze-stdlib")]
Expand Down