Skip to content

Commit e1b22f1

Browse files
authored
vm.run_pyc_bytes (#6645)
* rustpython_vm::import::check_pyc_magic_number_bytes * vm.new_scope_with_main * PyCode::from_pyc * vm.run_pyc_bytes * add boundary check
1 parent ec564ac commit e1b22f1

File tree

6 files changed

+160
-69
lines changed

6 files changed

+160
-69
lines changed

crates/vm/src/builtins/code.rs

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
33
use super::{PyBytesRef, PyStrRef, PyTupleRef, PyType};
44
use crate::{
5-
AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyResult, VirtualMachine,
5+
AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine,
66
builtins::PyStrInterned,
77
bytecode::{self, AsBag, BorrowedConstant, CodeFlags, Constant, ConstantBag},
88
class::{PyClassImpl, StaticType},
9-
convert::ToPyObject,
9+
convert::{ToPyException, ToPyObject},
1010
frozen,
1111
function::OptionalArg,
1212
types::{Constructor, Representable},
@@ -336,6 +336,44 @@ impl PyCode {
336336
pub const fn new(code: CodeObject) -> Self {
337337
Self { code }
338338
}
339+
pub fn from_pyc_path(path: &std::path::Path, vm: &VirtualMachine) -> PyResult<PyRef<Self>> {
340+
let name = match path.file_stem() {
341+
Some(stem) => stem.display().to_string(),
342+
None => "".to_owned(),
343+
};
344+
let content = std::fs::read(path).map_err(|e| e.to_pyexception(vm))?;
345+
Self::from_pyc(
346+
&content,
347+
Some(&name),
348+
Some(&path.display().to_string()),
349+
Some("<source>"),
350+
vm,
351+
)
352+
}
353+
pub fn from_pyc(
354+
pyc_bytes: &[u8],
355+
name: Option<&str>,
356+
bytecode_path: Option<&str>,
357+
source_path: Option<&str>,
358+
vm: &VirtualMachine,
359+
) -> PyResult<PyRef<Self>> {
360+
if !crate::import::check_pyc_magic_number_bytes(pyc_bytes) {
361+
return Err(vm.new_value_error("pyc bytes has wrong MAGIC"));
362+
}
363+
let bootstrap_external = vm.import("_frozen_importlib_external", 0)?;
364+
let compile_bytecode = bootstrap_external.get_attr("_compile_bytecode", vm)?;
365+
// 16 is the pyc header length
366+
let Some((_, code_bytes)) = pyc_bytes.split_at_checked(16) else {
367+
return Err(vm.new_value_error(format!(
368+
"pyc_bytes header is broken. 16 bytes expected but {} bytes given.",
369+
pyc_bytes.len()
370+
)));
371+
};
372+
let code_bytes_obj = vm.ctx.new_bytes(code_bytes.to_vec());
373+
let compiled =
374+
compile_bytecode.call((code_bytes_obj, name, bytecode_path, source_path), vm)?;
375+
compiled.try_downcast(vm)
376+
}
339377
}
340378

341379
impl fmt::Debug for PyCode {

crates/vm/src/import.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ use crate::{
88
vm::{VirtualMachine, resolve_frozen_alias, thread},
99
};
1010

11+
pub(crate) fn check_pyc_magic_number_bytes(buf: &[u8]) -> bool {
12+
buf.starts_with(&crate::version::PYC_MAGIC_NUMBER_BYTES[..2])
13+
}
14+
1115
pub(crate) fn init_importlib_base(vm: &mut VirtualMachine) -> PyResult<PyObjectRef> {
1216
flame_guard!("init importlib");
1317

crates/vm/src/vm/mod.rs

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ mod vm_ops;
1919
use crate::{
2020
AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult,
2121
builtins::{
22-
PyBaseExceptionRef, PyDictRef, PyInt, PyList, PyModule, PyStr, PyStrInterned, PyStrRef,
23-
PyTypeRef, code::PyCode, pystr::AsPyStr, tuple::PyTuple,
22+
PyBaseExceptionRef, PyDict, PyDictRef, PyInt, PyList, PyModule, PyStr, PyStrInterned,
23+
PyStrRef, PyTypeRef, code::PyCode, pystr::AsPyStr, tuple::PyTuple,
2424
},
2525
codecs::CodecsRegistry,
2626
common::{hash::HashSecret, lock::PyMutex, rc::PyRc},
@@ -460,6 +460,42 @@ impl VirtualMachine {
460460
self.signal_rx = Some(signal_rx);
461461
}
462462

463+
/// Execute Python bytecode (`.pyc`) from an in-memory buffer.
464+
///
465+
/// When the RustPython CLI is available, `.pyc` files are normally executed by
466+
/// invoking `rustpython <input>.pyc`. This method provides an alternative for
467+
/// environments where the binary is unavailable or file I/O is restricted
468+
/// (e.g. WASM).
469+
///
470+
/// ## Preparing a `.pyc` file
471+
///
472+
/// First, compile a Python source file into bytecode:
473+
///
474+
/// ```sh
475+
/// # Generate a .pyc file
476+
/// $ rustpython -m py_compile <input>.py
477+
/// ```
478+
///
479+
/// ## Running the bytecode
480+
///
481+
/// Load the resulting `.pyc` file into memory and execute it using the VM:
482+
///
483+
/// ```no_run
484+
/// use rustpython_vm::Interpreter;
485+
/// Interpreter::without_stdlib(Default::default()).enter(|vm| {
486+
/// let bytes = std::fs::read("__pycache__/<input>.rustpython-313.pyc").unwrap();
487+
/// let main_scope = vm.new_scope_with_main().unwrap();
488+
/// vm.run_pyc_bytes(&bytes, main_scope);
489+
/// });
490+
/// ```
491+
pub fn run_pyc_bytes(&self, pyc_bytes: &[u8], scope: Scope) -> PyResult<()> {
492+
let code = PyCode::from_pyc(pyc_bytes, Some("<pyc_bytes>"), None, None, self)?;
493+
self.with_simple_run("<source>", |_module_dict| {
494+
self.run_code_obj(code, scope)?;
495+
Ok(())
496+
})
497+
}
498+
463499
pub fn run_code_obj(&self, code: PyRef<PyCode>, scope: Scope) -> PyResult {
464500
use crate::builtins::PyFunction;
465501

@@ -500,6 +536,52 @@ impl VirtualMachine {
500536
}
501537
}
502538

539+
/// Run `run` with main scope.
540+
fn with_simple_run(
541+
&self,
542+
path: &str,
543+
run: impl FnOnce(&Py<PyDict>) -> PyResult<()>,
544+
) -> PyResult<()> {
545+
let sys_modules = self.sys_module.get_attr(identifier!(self, modules), self)?;
546+
let main_module = sys_modules.get_item(identifier!(self, __main__), self)?;
547+
let module_dict = main_module.dict().expect("main module must have __dict__");
548+
549+
// Track whether we set __file__ (for cleanup)
550+
let set_file_name = !module_dict.contains_key(identifier!(self, __file__), self);
551+
if set_file_name {
552+
module_dict.set_item(
553+
identifier!(self, __file__),
554+
self.ctx.new_str(path).into(),
555+
self,
556+
)?;
557+
module_dict.set_item(identifier!(self, __cached__), self.ctx.none(), self)?;
558+
}
559+
560+
let result = run(&module_dict);
561+
562+
self.flush_io();
563+
564+
// Cleanup __file__ and __cached__ after execution
565+
if set_file_name {
566+
let _ = module_dict.del_item(identifier!(self, __file__), self);
567+
let _ = module_dict.del_item(identifier!(self, __cached__), self);
568+
}
569+
570+
result
571+
}
572+
573+
/// flush_io
574+
///
575+
/// Flush stdout and stderr. Errors are silently ignored.
576+
fn flush_io(&self) {
577+
if let Ok(stdout) = self.sys_module.get_attr("stdout", self) {
578+
let _ = self.call_method(&stdout, identifier!(self, flush).as_str(), ());
579+
}
580+
if let Ok(stderr) = self.sys_module.get_attr("stderr", self) {
581+
let _ = self.call_method(&stderr, identifier!(self, flush).as_str(), ());
582+
}
583+
}
584+
503585
pub fn current_recursion_depth(&self) -> usize {
504586
self.recursion_depth.get()
505587
}

crates/vm/src/vm/python_run.rs

Lines changed: 11 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
//! Python code execution functions.
22
33
use crate::{
4-
PyResult, VirtualMachine,
5-
builtins::{PyCode, PyDictRef},
4+
Py, PyResult, VirtualMachine,
5+
builtins::{PyCode, PyDict},
66
compiler::{self},
77
scope::Scope,
88
};
@@ -25,37 +25,14 @@ impl VirtualMachine {
2525
/// Execute a Python file with __main__ module setup.
2626
/// Sets __file__ and __cached__ before execution, removes them after.
2727
fn run_simple_file(&self, scope: Scope, path: &str) -> PyResult<()> {
28-
let sys_modules = self.sys_module.get_attr(identifier!(self, modules), self)?;
29-
let main_module = sys_modules.get_item(identifier!(self, __main__), self)?;
30-
let module_dict = main_module.dict().expect("main module must have __dict__");
31-
32-
// Track whether we set __file__ (for cleanup)
33-
let set_file_name = !module_dict.contains_key(identifier!(self, __file__), self);
34-
if set_file_name {
35-
module_dict.set_item(
36-
identifier!(self, __file__),
37-
self.ctx.new_str(path).into(),
38-
self,
39-
)?;
40-
module_dict.set_item(identifier!(self, __cached__), self.ctx.none(), self)?;
41-
}
42-
43-
let result = self.run_simple_file_inner(&module_dict, scope, path);
44-
45-
self.flush_io();
46-
47-
// Cleanup __file__ and __cached__ after execution
48-
if set_file_name {
49-
let _ = module_dict.del_item(identifier!(self, __file__), self);
50-
let _ = module_dict.del_item(identifier!(self, __cached__), self);
51-
}
52-
53-
result
28+
self.with_simple_run(path, |module_dict| {
29+
self.run_simple_file_inner(module_dict, scope, path)
30+
})
5431
}
5532

5633
fn run_simple_file_inner(
5734
&self,
58-
module_dict: &PyDictRef,
35+
module_dict: &Py<PyDict>,
5936
scope: Scope,
6037
path: &str,
6138
) -> PyResult<()> {
@@ -123,22 +100,10 @@ impl VirtualMachine {
123100
.map_err(|err| self.new_syntax_error(&err, Some(source)))?;
124101
self.run_code_obj(code_obj, scope)
125102
}
126-
127-
/// flush_io
128-
///
129-
/// Flush stdout and stderr. Errors are silently ignored.
130-
fn flush_io(&self) {
131-
if let Ok(stdout) = self.sys_module.get_attr("stdout", self) {
132-
let _ = self.call_method(&stdout, identifier!(self, flush).as_str(), ());
133-
}
134-
if let Ok(stderr) = self.sys_module.get_attr("stderr", self) {
135-
let _ = self.call_method(&stderr, identifier!(self, flush).as_str(), ());
136-
}
137-
}
138103
}
139104

140105
fn set_main_loader(
141-
module_dict: &PyDictRef,
106+
module_dict: &Py<PyDict>,
142107
filename: &str,
143108
loader_name: &str,
144109
vm: &VirtualMachine,
@@ -162,10 +127,10 @@ fn maybe_pyc_file(path: &str) -> bool {
162127
if path.ends_with(".pyc") {
163128
return true;
164129
}
165-
maybe_pyc_file_with_magic(path, &crate::version::PYC_MAGIC_NUMBER_BYTES).unwrap_or(false)
130+
maybe_pyc_file_with_magic(path).unwrap_or(false)
166131
}
167132

168-
fn maybe_pyc_file_with_magic(path: &str, magic_number: &[u8]) -> std::io::Result<bool> {
133+
fn maybe_pyc_file_with_magic(path: &str) -> std::io::Result<bool> {
169134
let path_obj = std::path::Path::new(path);
170135
if !path_obj.is_file() {
171136
return Ok(false);
@@ -175,12 +140,12 @@ fn maybe_pyc_file_with_magic(path: &str, magic_number: &[u8]) -> std::io::Result
175140
let mut buf = [0u8; 2];
176141

177142
use std::io::Read;
178-
if file.read(&mut buf)? != 2 || magic_number.len() < 2 {
143+
if file.read(&mut buf)? != 2 {
179144
return Ok(false);
180145
}
181146

182147
// Read only two bytes of the magic. If the file was opened in
183148
// text mode, the bytes 3 and 4 of the magic (\r\n) might not
184149
// be read as they are on disk.
185-
Ok(buf == magic_number[..2])
150+
Ok(crate::import::check_pyc_magic_number_bytes(&buf))
186151
}

crates/vm/src/vm/vm_new.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::{
2-
AsObject, Py, PyObject, PyObjectRef, PyRef,
2+
AsObject, Py, PyObject, PyObjectRef, PyRef, PyResult,
33
builtins::{
44
PyBaseException, PyBaseExceptionRef, PyBytesRef, PyDictRef, PyModule, PyOSError, PyStrRef,
55
PyType, PyTypeRef,
@@ -62,6 +62,23 @@ impl VirtualMachine {
6262
Scope::with_builtins(None, self.ctx.new_dict(), self)
6363
}
6464

65+
pub fn new_scope_with_main(&self) -> PyResult<Scope> {
66+
let scope = self.new_scope_with_builtins();
67+
let main_module = self.new_module("__main__", scope.globals.clone(), None);
68+
main_module
69+
.dict()
70+
.set_item("__annotations__", self.ctx.new_dict().into(), self)
71+
.expect("Failed to initialize __main__.__annotations__");
72+
73+
self.sys_module.get_attr("modules", self)?.set_item(
74+
"__main__",
75+
main_module.into(),
76+
self,
77+
)?;
78+
79+
Ok(scope)
80+
}
81+
6582
pub fn new_function<F, FKind>(&self, name: &'static str, f: F) -> PyRef<PyNativeFunction>
6683
where
6784
F: IntoPyNativeFn<FKind>,

src/lib.rs

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -114,21 +114,6 @@ pub fn run(init: impl FnOnce(&mut VirtualMachine) + 'static) -> ExitCode {
114114
rustpython_vm::common::os::exit_code(exitcode)
115115
}
116116

117-
fn setup_main_module(vm: &VirtualMachine) -> PyResult<Scope> {
118-
let scope = vm.new_scope_with_builtins();
119-
let main_module = vm.new_module("__main__", scope.globals.clone(), None);
120-
main_module
121-
.dict()
122-
.set_item("__annotations__", vm.ctx.new_dict().into(), vm)
123-
.expect("Failed to initialize __main__.__annotations__");
124-
125-
vm.sys_module
126-
.get_attr("modules", vm)?
127-
.set_item("__main__", main_module.into(), vm)?;
128-
129-
Ok(scope)
130-
}
131-
132117
fn get_pip(scope: Scope, vm: &VirtualMachine) -> PyResult<()> {
133118
let get_getpip = rustpython_vm::py_compile!(
134119
source = r#"\
@@ -221,7 +206,7 @@ fn run_rustpython(vm: &VirtualMachine, run_mode: RunMode) -> PyResult<()> {
221206
#[cfg(feature = "flame-it")]
222207
let main_guard = flame::start_guard("RustPython main");
223208

224-
let scope = setup_main_module(vm)?;
209+
let scope = vm.new_scope_with_main()?;
225210

226211
// Import site first, before setting sys.path[0]
227212
// This matches CPython's behavior where site.removeduppaths() runs
@@ -366,11 +351,11 @@ mod tests {
366351
fn test_run_script() {
367352
interpreter().enter(|vm| {
368353
vm.unwrap_pyresult((|| {
369-
let scope = setup_main_module(vm)?;
354+
let scope = vm.new_scope_with_main()?;
370355
// test file run
371356
vm.run_any_file(scope, "extra_tests/snippets/dir_main/__main__.py")?;
372357

373-
let scope = setup_main_module(vm)?;
358+
let scope = vm.new_scope_with_main()?;
374359
// test module run (directory with __main__.py)
375360
run_file(vm, scope, "extra_tests/snippets/dir_main")?;
376361

0 commit comments

Comments
 (0)