Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 130 additions & 1 deletion bytecode/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,7 @@ impl<C: Constant> fmt::Debug for CodeObject<C> {
}
}

#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Debug)]
pub struct FrozenModule<C: Constant = ConstantData> {
#[serde(bound(
deserialize = "C: serde::Deserialize<'de>, C::Name: serde::Deserialize<'de>",
Expand All @@ -1084,3 +1084,132 @@ pub struct FrozenModule<C: Constant = ConstantData> {
pub code: CodeObject<C>,
pub package: bool,
}

pub mod frozen_lib {
use super::*;
use bincode::{options, Options};
use std::convert::TryInto;
use std::io;

pub fn decode_lib(bytes: &[u8]) -> FrozenModulesIter {
let data = lz4_flex::decompress_size_prepended(bytes).unwrap();
let r = VecReader { data, pos: 0 };
let mut de = bincode::Deserializer::with_bincode_read(r, options());
let len = u64::deserialize(&mut de).unwrap().try_into().unwrap();
FrozenModulesIter { len, de }
}

pub struct FrozenModulesIter {
len: usize,
// ideally this could be a SeqAccess, but I think that would require existential types
de: bincode::Deserializer<VecReader, bincode::DefaultOptions>,
}

impl Iterator for FrozenModulesIter {
type Item = (String, FrozenModule);

fn next(&mut self) -> Option<Self::Item> {
// manually mimic bincode's seq encoding, which is <len:u64> <element*len>
// This probably won't change (bincode doesn't require padding or anything), but
// it's not guaranteed by semver as far as I can tell
if self.len > 0 {
let entry = Deserialize::deserialize(&mut self.de).unwrap();
self.len -= 1;
Some(entry)
} else {
None
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
(self.len, Some(self.len))
}
}

impl ExactSizeIterator for FrozenModulesIter {}

pub fn encode_lib<'a, I>(lib: I) -> Vec<u8>
where
I: IntoIterator<Item = (&'a str, &'a FrozenModule)>,
I::IntoIter: ExactSizeIterator + Clone,
{
let iter = lib.into_iter();
let data = options().serialize(&SerializeLib { iter }).unwrap();
lz4_flex::compress_prepend_size(&data)
}

struct SerializeLib<I> {
iter: I,
}

impl<'a, I> Serialize for SerializeLib<I>
where
I: ExactSizeIterator<Item = (&'a str, &'a FrozenModule)> + Clone,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.collect_seq(self.iter.clone())
}
}

/// Owned version of bincode::de::read::SliceReader<'a>
struct VecReader {
data: Vec<u8>,
pos: usize,
}

impl io::Read for VecReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut subslice = &self.data[self.pos..];
let n = io::Read::read(&mut subslice, buf)?;
self.pos += n;
Ok(n)
}
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
self.get_byte_slice(buf.len())
.map(|data| buf.copy_from_slice(data))
}
}

impl VecReader {
#[inline(always)]
fn get_byte_slice(&mut self, length: usize) -> io::Result<&[u8]> {
let subslice = &self.data[self.pos..];
match subslice.get(..length) {
Some(ret) => {
self.pos += length;
Ok(ret)
}
None => Err(io::ErrorKind::UnexpectedEof.into()),
}
}
}

impl<'storage> bincode::BincodeRead<'storage> for VecReader {
fn forward_read_str<V>(&mut self, length: usize, visitor: V) -> bincode::Result<V::Value>
where
V: serde::de::Visitor<'storage>,
{
let bytes = self.get_byte_slice(length)?;
match ::std::str::from_utf8(bytes) {
Ok(s) => visitor.visit_str(s),
Err(e) => Err(bincode::ErrorKind::InvalidUtf8Encoding(e).into()),
}
}

fn get_byte_buffer(&mut self, length: usize) -> bincode::Result<Vec<u8>> {
self.get_byte_slice(length)
.map(|x| x.to_vec())
.map_err(Into::into)
}

fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> bincode::Result<V::Value>
where
V: serde::de::Visitor<'storage>,
{
visitor.visit_bytes(self.get_byte_slice(length)?)
}
}
}
25 changes: 3 additions & 22 deletions derive/src/compile_bytecode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,30 +330,11 @@ pub fn impl_py_freeze(input: TokenStream2) -> Result<TokenStream2, Diagnostic> {
let crate_name = args.crate_name;
let code_map = args.source.compile(args.mode, args.module_name)?;

let modules_len = code_map.len();

let modules = code_map
.into_iter()
.map(|(module_name, FrozenModule { code, package })| {
let module_name = LitStr::new(&module_name, Span::call_site());
let bytes = code.to_bytes();
let bytes = LitByteStr::new(&bytes, Span::call_site());
quote! {
m.insert(#module_name.into(), #crate_name::FrozenModule {
code: #crate_name::CodeObject::from_bytes(
#bytes
).expect("Deserializing CodeObject failed"),
package: #package,
});
}
});
let data = rustpython_bytecode::frozen_lib::encode_lib(code_map.iter().map(|(k, v)| (&**k, v)));
let bytes = LitByteStr::new(&data, Span::call_site());

let output = quote! {
{
let mut m = ::std::collections::HashMap::with_capacity(#modules_len);
#(#modules)*
m
}
#crate_name::frozen_lib::decode_lib(#bytes)
};

Ok(output)
Expand Down
10 changes: 2 additions & 8 deletions examples/freeze/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::collections::HashMap;

use rustpython_vm as vm;

fn main() -> vm::pyobject::PyResult<()> {
Expand All @@ -11,13 +9,9 @@ fn run(vm: &vm::VirtualMachine) -> vm::pyobject::PyResult<()> {

// the file parameter is relevant to the directory where the crate's Cargo.toml is located, see $CARGO_MANIFEST_DIR:
// https://doc.rust-lang.org/cargo/reference/environment-variables.html#environment-variables-cargo-sets-for-crates
let modules: HashMap<String, vm::bytecode::FrozenModule> =
vm::py_freeze!(file = "examples/freeze/freeze.py");
let module = vm::py_compile!(file = "examples/freeze/freeze.py");

let res = vm.run_code_obj(
vm.new_code_object(modules.get("frozen").unwrap().code.clone()),
scope,
);
let res = vm.run_code_obj(vm.new_code_object(module), scope);

if let Err(err) = res {
vm::exceptions::print_exception(&vm, err);
Expand Down
4 changes: 2 additions & 2 deletions vm/pylib-crate/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
pub const LIB_PATH: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/Lib");

#[cfg(feature = "compiled-bytecode")]
use {rustpython_bytecode::FrozenModule, std::collections::HashMap};
use rustpython_bytecode::FrozenModule;
#[cfg(feature = "compiled-bytecode")]
pub fn frozen_stdlib() -> HashMap<String, FrozenModule> {
pub fn frozen_stdlib() -> impl Iterator<Item = (String, FrozenModule)> {
rustpython_derive::py_freeze!(dir = "Lib", crate_name = "rustpython_bytecode")
}
32 changes: 14 additions & 18 deletions vm/src/frozen.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::builtins::code;
use crate::bytecode;
use crate::VirtualMachine;
use std::collections::HashMap;

pub fn map_frozen<'a>(
vm: &'a VirtualMachine,
Expand All @@ -19,18 +18,19 @@ pub fn map_frozen<'a>(
})
}

pub fn get_module_inits(
vm: &VirtualMachine,
) -> HashMap<String, code::FrozenModule, ahash::RandomState> {
let mut modules = HashMap::default();

pub fn get_module_inits() -> impl Iterator<Item = (String, bytecode::FrozenModule)> {
let iter = std::iter::empty();
macro_rules! ext_modules {
($($t:tt)*) => {
modules.extend(map_frozen(vm, py_freeze!($($t)*)));
($iter:ident, ($modules:expr)) => {
let $iter = $iter.chain($modules);
};
($iter:ident, $($t:tt)*) => {
ext_modules!($iter, (py_freeze!($($t)*)))
};
}

ext_modules!(
iter,
source = "initialized = True; print(\"Hello world!\")\n",
module_name = "__hello__",
);
Expand All @@ -39,19 +39,15 @@ pub fn get_module_inits(
// in theory be implemented in Rust, but are easiest to do in Python for one reason or another.
// Includes _importlib_bootstrap and _importlib_bootstrap_external
// For Windows: did you forget to run `powershell scripts\symlinks-to-hardlinks.ps1`?
ext_modules!(dir = "Lib/python_builtins/");
ext_modules!(iter, dir = "Lib/python_builtins/");

#[cfg(not(feature = "freeze-stdlib"))]
{
// core stdlib Python modules that the vm calls into, but are still used in Python
// application code, e.g. copyreg
ext_modules!(dir = "Lib/core_modules/");
}
// core stdlib Python modules that the vm calls into, but are still used in Python
// application code, e.g. copyreg
ext_modules!(iter, dir = "Lib/core_modules/");
// if we're on freeze-stdlib, the core stdlib modules will be included anyway
#[cfg(feature = "freeze-stdlib")]
{
modules.extend(map_frozen(vm, rustpython_pylib::frozen_stdlib()));
}
ext_modules!(iter, (rustpython_pylib::frozen_stdlib()));

modules
iter
}
2 changes: 1 addition & 1 deletion vm/src/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ impl VirtualMachine {
initialized: false,
};

let frozen = frozen::get_module_inits(&vm);
let frozen = frozen::map_frozen(&vm, frozen::get_module_inits()).collect();
PyRc::get_mut(&mut vm.state).unwrap().frozen = frozen;

module::init_module_dict(
Expand Down