diff --git a/crates/stdlib/src/pystruct.rs b/crates/stdlib/src/pystruct.rs index 8cf1023c8ca..c525942e35e 100644 --- a/crates/stdlib/src/pystruct.rs +++ b/crates/stdlib/src/pystruct.rs @@ -51,9 +51,8 @@ pub(crate) mod _struct { s } b @ PyBytes => { - let ascii_str = ascii::AsciiStr::from_ascii(&b).map_err(|_| { - new_struct_error(vm, "bad char in struct format".to_owned()) - })?; + let ascii_str = ascii::AsciiStr::from_ascii(&b) + .map_err(|_| new_struct_error(vm, "bad char in struct format"))?; vm.ctx.new_str(ascii_str) } other => @@ -192,7 +191,7 @@ pub(crate) mod _struct { if format_spec.size == 0 { Err(new_struct_error( vm, - "cannot iteratively unpack with a struct of length 0".to_owned(), + "cannot iteratively unpack with a struct of length 0", )) } else if !buffer.len().is_multiple_of(format_spec.size) { Err(new_struct_error( diff --git a/crates/vm/src/anystr.rs b/crates/vm/src/anystr.rs index f1c35ecd65f..4afea3ce7b8 100644 --- a/crates/vm/src/anystr.rs +++ b/crates/vm/src/anystr.rs @@ -1,15 +1,16 @@ +use core::ops::Range; + +use icu_properties::props::{ + BinaryProperty, EnumeratedProperty, GeneralCategory, GeneralCategoryGroup, +}; +use num_traits::{cast::ToPrimitive, sign::Signed}; + use crate::{ Py, PyObject, PyObjectRef, PyResult, TryFromObject, VirtualMachine, builtins::{PyIntRef, PyTuple}, convert::TryFromBorrowedObject, function::OptionalOption, }; -use icu_properties::props::{ - BinaryProperty, EnumeratedProperty, GeneralCategory, GeneralCategoryGroup, -}; -use num_traits::{cast::ToPrimitive, sign::Signed}; - -use core::ops::Range; #[derive(FromArgs)] pub struct SplitArgs { @@ -410,36 +411,20 @@ pub(crate) trait AnyStr { // _Py_bytes_islower fn py_islower(&self) -> bool { - let mut lower = false; - for byte in self - .as_bytes() + self.as_bytes() .iter() .copied() .filter(u8::is_ascii_alphabetic) - { - if byte.is_ascii_uppercase() { - return false; - } - lower = true; - } - lower + .all(|byte| byte.is_ascii_lowercase()) } // Py_bytes_isupper fn py_isupper(&self) -> bool { - let mut upper = false; - for byte in self - .as_bytes() + self.as_bytes() .iter() .copied() .filter(u8::is_ascii_alphabetic) - { - if byte.is_ascii_lowercase() { - return false; - } - upper = true; - } - upper + .all(|byte| byte.is_ascii_uppercase()) } // Unified form of CPython functions: @@ -484,18 +469,17 @@ where F: Fn(T) -> PyResult, M: Fn(&PyObject) -> String, { - match obj.try_to_value::(vm) { - Ok(single) => (predicate)(single), - Err(_) => { - let tuple: &Py = obj - .try_to_value(vm) - .map_err(|_| vm.new_type_error((message)(obj)))?; - for obj in tuple { - if single_or_tuple_any(obj, predicate, message, vm)? { - return Ok(true); - } + if let Ok(single) = obj.try_to_value::(vm) { + (predicate)(single) + } else { + let tuple: &Py = obj + .try_to_value(vm) + .map_err(|_| vm.new_type_error((message)(obj)))?; + for obj in tuple { + if single_or_tuple_any(obj, predicate, message, vm)? { + return Ok(true); } - Ok(false) } + Ok(false) } } diff --git a/crates/vm/src/buffer.rs b/crates/vm/src/buffer.rs index c3ad10e89a7..446f81116c6 100644 --- a/crates/vm/src/buffer.rs +++ b/crates/vm/src/buffer.rs @@ -5,8 +5,10 @@ use crate::{ convert::ToPyObject, function::{ArgBytesLike, ArgIntoBool, ArgIntoFloat}, }; -use alloc::fmt; -use core::{iter::Peekable, mem}; + +use rustpython_common::wtf8::Wtf8Buf; + +use core::{fmt, iter::Peekable, mem}; use half::f16; use itertools::Itertools; use malachite_bigint::BigInt; @@ -737,9 +739,8 @@ pub fn struct_error_type(vm: &VirtualMachine) -> &'static PyTypeRef { INSTANCE.get_or_init(|| vm.ctx.new_exception_type("struct", "error", None)) } -pub fn new_struct_error(vm: &VirtualMachine, msg: impl Into) -> PyBaseExceptionRef { +pub fn new_struct_error>(vm: &VirtualMachine, msg: T) -> PyBaseExceptionRef { // can't just STRUCT_ERROR.get().unwrap() cause this could be called before from buffer // machinery, independent of whether _struct was ever imported - let msg: String = msg.into(); vm.new_exception_msg(struct_error_type(vm).clone(), msg.into()) } diff --git a/crates/vm/src/builtins/float.rs b/crates/vm/src/builtins/float.rs index f36f9de79d4..e1a0bd00342 100644 --- a/crates/vm/src/builtins/float.rs +++ b/crates/vm/src/builtins/float.rs @@ -84,6 +84,7 @@ impl ToPyObject for f64 { vm.ctx.new_float(self).into() } } + impl ToPyObject for f32 { fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef { vm.ctx.new_float(f64::from(self)).into() @@ -156,8 +157,7 @@ fn inner_divmod(v1: f64, v2: f64, vm: &VirtualMachine) -> PyResult<(f64, f64)> { pub(crate) fn float_pow(v1: f64, v2: f64, vm: &VirtualMachine) -> PyResult { if v1.is_zero() && v2.is_sign_negative() { - let msg = "zero to a negative power"; - Err(vm.new_zero_division_error(msg.to_owned())) + Err(vm.new_zero_division_error("zero to a negative power")) } else if v1.is_sign_negative() && (v2.floor() - v2).abs() > f64::EPSILON { let v1 = Complex64::new(v1, 0.); let v2 = Complex64::new(v2, 0.); diff --git a/crates/vm/src/builtins/staticmethod.rs b/crates/vm/src/builtins/staticmethod.rs index 3a8d451b3dc..1ab697a0a1d 100644 --- a/crates/vm/src/builtins/staticmethod.rs +++ b/crates/vm/src/builtins/staticmethod.rs @@ -68,6 +68,7 @@ impl PyStaticMethod { callable: PyMutex::new(callable), } } + #[deprecated(note = "use PyStaticMethod::new(...).into_ref() instead")] pub fn new_ref(callable: PyObjectRef, ctx: &Context) -> PyRef { Self::new(callable).into_ref(ctx) diff --git a/crates/vm/src/byte.rs b/crates/vm/src/byte.rs index d9e927cbfa5..933ddead4b9 100644 --- a/crates/vm/src/byte.rs +++ b/crates/vm/src/byte.rs @@ -1,8 +1,9 @@ //! byte operation APIs -use crate::object::AsObject; -use crate::{PyObject, PyResult, VirtualMachine}; + use num_traits::ToPrimitive; +use crate::{AsObject, PyObject, PyResult, VirtualMachine}; + pub fn bytes_from_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult> { if let Ok(elements) = obj.try_bytes_like(vm, |bytes| bytes.to_vec()) { return Ok(elements); diff --git a/crates/vm/src/cformat.rs b/crates/vm/src/cformat.rs index 6bf6062c84f..3bba0e5f8e7 100644 --- a/crates/vm/src/cformat.rs +++ b/crates/vm/src/cformat.rs @@ -3,8 +3,9 @@ //! Implementation of Printf-Style string formatting //! as per the [Python Docs](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). -use crate::common::cformat::*; -use crate::common::wtf8::{CodePoint, Wtf8, Wtf8Buf}; +use itertools::Itertools; +use num_traits::cast::ToPrimitive; + use crate::{ AsObject, PyObject, PyObjectRef, PyResult, TryFromBorrowedObject, TryFromObject, VirtualMachine, @@ -12,12 +13,18 @@ use crate::{ PyBaseExceptionRef, PyByteArray, PyBytes, PyFloat, PyInt, PyStr, int::check_int_to_str_digits, try_f64_to_bigint, tuple, }, + common::{ + cformat::{ + CCharacterType, CConversionFlags, CFormatBytes, CFormatConversion, CFormatPart, + CFormatPrecision, CFormatQuantity, CFormatSpec, CFormatSpecKeyed, CFormatType, + CFormatWtf8, CNumberType, + }, + wtf8::{CodePoint, Wtf8, Wtf8Buf}, + }, function::ArgIntoFloat, protocol::PyBuffer, stdlib::builtins, }; -use itertools::Itertools; -use num_traits::cast::ToPrimitive; fn spec_format_bytes( vm: &VirtualMachine, @@ -39,11 +46,12 @@ fn spec_format_bytes( let bytes = vm .get_special_method(&obj, identifier!(vm, __bytes__))? .ok_or_else(|| { - vm.new_type_error(format!( + let msg = format!( "%b requires a bytes-like object, or an object that \ implements __bytes__, not '{}'", obj.class().name() - )) + ); + vm.new_type_error(msg) })? .invoke((), vm)?; let bytes = PyBytes::try_from_borrowed_object(vm, &bytes)?; @@ -71,6 +79,7 @@ fn spec_format_bytes( check_int_to_str_digits(i.as_bigint(), vm)?; return Ok(spec.format_number(i.as_bigint()).into_bytes()); } + if let Some(method) = vm.get_method(obj.clone(), identifier!(vm, __int__)) { let result = method?.call((), vm)?; if let Some(i) = result.downcast_ref::() { @@ -78,6 +87,7 @@ fn spec_format_bytes( return Ok(spec.format_number(i.as_bigint()).into_bytes()); } } + Err(vm.new_type_error(format!( "%{} format: a real number is required, not {}", spec.format_type.to_char(), @@ -301,6 +311,7 @@ fn try_update_quantity_from_tuple<'a, I: Iterator>( let Some(CFormatQuantity::FromValuesTuple) = q else { return Ok(()); }; + let element = elements.next(); f.insert(try_conversion_flag_from_tuple( vm, @@ -319,6 +330,7 @@ fn try_update_precision_from_tuple<'a, I: Iterator>( let Some(CFormatPrecision::Quantity(CFormatQuantity::FromValuesTuple)) = p else { return Ok(()); }; + let quantity = try_update_quantity_from_element(vm, elements.next().map(|v| v.as_ref()))?; *p = Some(CFormatPrecision::Quantity(quantity)); Ok(()) @@ -347,42 +359,45 @@ pub(crate) fn cformat_bytes( && !values_obj.fast_isinstance(vm.ctx.types.bytearray_type); if num_specifiers == 0 { - // literal only - return if is_mapping - || values_obj + if !is_mapping + && values_obj .downcast_ref::() - .is_some_and(|e| e.is_empty()) + .is_none_or(|e| !e.is_empty()) { - for (_, part) in format.iter_mut() { - match part { - CFormatPart::Literal(literal) => result.append(literal), - CFormatPart::Spec(_) => unreachable!(), - } + return Err(vm.new_type_error("not all arguments converted during bytes formatting")); + } + + // literal only + for (_, part) in format.iter_mut() { + if let CFormatPart::Literal(literal) = part { + result.append(literal) + } else { + unreachable!() } - Ok(result) - } else { - Err(vm.new_type_error("not all arguments converted during bytes formatting")) - }; + } + + return Ok(result); } if mapping_required { + if !is_mapping { + return Err(vm.new_type_error("format requires a mapping")); + } + // dict - return if is_mapping { - for (_, part) in format { - match part { - CFormatPart::Literal(literal) => result.extend(literal), - CFormatPart::Spec(CFormatSpecKeyed { mapping_key, spec }) => { - let key = mapping_key.unwrap(); - let value = values_obj.get_item(&key, vm)?; - let part_result = spec_format_bytes(vm, &spec, value)?; - result.extend(part_result); - } + for (_, part) in format { + match part { + CFormatPart::Literal(literal) => result.extend(literal), + CFormatPart::Spec(CFormatSpecKeyed { mapping_key, spec }) => { + let key = mapping_key.unwrap(); + let value = values_obj.get_item(&key, vm)?; + let part_result = spec_format_bytes(vm, &spec, value)?; + result.extend(part_result); } } - Ok(result) - } else { - Err(vm.new_type_error("format requires a mapping")) - }; + } + + return Ok(result); } // tuple @@ -405,18 +420,18 @@ pub(crate) fn cformat_bytes( )?; try_update_precision_from_tuple(vm, &mut value_iter, &mut spec.precision)?; - let value = match value_iter.next() { - Some(obj) => Ok(obj.clone()), - None => Err(vm.new_type_error("not enough arguments for format string")), - }?; - let part_result = spec_format_bytes(vm, &spec, value)?; + let Some(value) = value_iter.next() else { + return Err(vm.new_type_error("not enough arguments for format string")); + }; + + let part_result = spec_format_bytes(vm, &spec, value.clone())?; result.extend(part_result); } } } // check that all arguments were converted - if value_iter.next().is_some() && !is_mapping { + if !is_mapping && value_iter.next().is_some() { Err(vm.new_type_error("not all arguments converted during bytes formatting")) } else { Ok(result) @@ -441,41 +456,44 @@ pub(crate) fn cformat_string( && !values_obj.fast_isinstance(vm.ctx.types.str_type); if num_specifiers == 0 { - // literal only - return if is_mapping - || values_obj + if !is_mapping + && values_obj .downcast_ref::() - .is_some_and(|e| e.is_empty()) + .is_none_or(|e| !e.is_empty()) { - for (_, part) in format.iter() { - match part { - CFormatPart::Literal(literal) => result.push_wtf8(literal), - CFormatPart::Spec(_) => unreachable!(), - } + return Err(vm.new_type_error("not all arguments converted during string formatting")); + } + + // literal only + for (_, part) in format.iter() { + if let CFormatPart::Literal(literal) = part { + result.push_wtf8(literal) + } else { + unreachable!() } - Ok(result) - } else { - Err(vm.new_type_error("not all arguments converted during string formatting")) - }; + } + + return Ok(result); } if mapping_required { + if !is_mapping { + return Err(vm.new_type_error("format requires a mapping")); + } + // dict - return if is_mapping { - for (idx, part) in format { - match part { - CFormatPart::Literal(literal) => result.push_wtf8(&literal), - CFormatPart::Spec(CFormatSpecKeyed { mapping_key, spec }) => { - let value = values_obj.get_item(&mapping_key.unwrap(), vm)?; - let part_result = spec_format_string(vm, &spec, value, idx)?; - result.push_wtf8(&part_result); - } + for (idx, part) in format { + match part { + CFormatPart::Literal(literal) => result.push_wtf8(&literal), + CFormatPart::Spec(CFormatSpecKeyed { mapping_key, spec }) => { + let value = values_obj.get_item(&mapping_key.unwrap(), vm)?; + let part_result = spec_format_string(vm, &spec, value, idx)?; + result.push_wtf8(&part_result); } } - Ok(result) - } else { - Err(vm.new_type_error("format requires a mapping")) - }; + } + + return Ok(result); } // tuple @@ -484,6 +502,7 @@ pub(crate) fn cformat_string( } else { core::slice::from_ref(&values_obj) }; + let mut value_iter = values.iter(); for (idx, part) in format { @@ -498,18 +517,18 @@ pub(crate) fn cformat_string( )?; try_update_precision_from_tuple(vm, &mut value_iter, &mut spec.precision)?; - let value = match value_iter.next() { - Some(obj) => Ok(obj.clone()), - None => Err(vm.new_type_error("not enough arguments for format string")), - }?; - let part_result = spec_format_string(vm, &spec, value, idx)?; + let Some(value) = value_iter.next() else { + return Err(vm.new_type_error("not enough arguments for format string")); + }; + + let part_result = spec_format_string(vm, &spec, value.clone(), idx)?; result.push_wtf8(&part_result); } } } // check that all arguments were converted - if value_iter.next().is_some() && !is_mapping { + if !is_mapping && value_iter.next().is_some() { Err(vm.new_type_error("not all arguments converted during string formatting")) } else { Ok(result) diff --git a/crates/vm/src/class.rs b/crates/vm/src/class.rs index 2e8af54f974..364ae721159 100644 --- a/crates/vm/src/class.rs +++ b/crates/vm/src/class.rs @@ -66,16 +66,19 @@ pub fn add_operators(class: &'static Py, ctx: &Context) { pub trait StaticType { // Ideally, saving PyType is better than PyTypeRef fn static_cell() -> &'static static_cell::StaticCell; + #[inline] #[must_use] fn static_metaclass() -> &'static Py { PyType::static_type() } + #[inline] #[must_use] fn static_baseclass() -> &'static Py { PyBaseObject::static_type() } + #[inline] #[must_use] fn static_type() -> &'static Py { @@ -87,6 +90,7 @@ pub trait StaticType { } Self::static_cell().get().unwrap_or_else(|| fail()) } + #[must_use] fn init_manually(typ: PyTypeRef) -> &'static Py { let cell = Self::static_cell(); @@ -94,6 +98,7 @@ pub trait StaticType { .unwrap_or_else(|_| panic!("double initialization from init_manually")); cell.get().unwrap() } + #[must_use] fn init_builtin_type() -> &'static Py where @@ -105,6 +110,7 @@ pub trait StaticType { .unwrap_or_else(|_| panic!("double initialization of {}", Self::NAME)); cell.get().unwrap() } + #[must_use] fn create_static_type() -> PyTypeRef where @@ -137,14 +143,19 @@ pub trait PyClassDef { pub trait PyClassImpl: PyClassDef { const TP_FLAGS: PyTypeFlags = PyTypeFlags::DEFAULT; + const METHOD_DEFS: &'static [PyMethodDef]; + + fn impl_extend_class(ctx: &'static Context, class: &'static Py); + + fn extend_slots(slots: &mut PyTypeSlots); + fn extend_class(ctx: &'static Context, class: &'static Py) where Self: Sized, { + // NOTE: `is_created_with_flags` if only available when debug_assertions is true #[cfg(debug_assertions)] - { - assert!(class.slots.flags.is_created_with_flags()); - } + debug_assert!(class.slots.flags.is_created_with_flags()); let _ = ctx.intern_str(Self::NAME); // intern type name @@ -161,7 +172,9 @@ pub trait PyClassImpl: PyClassDef { .into(), ); } + Self::impl_extend_class(ctx, class); + if let Some(doc) = Self::DOC { // Only set __doc__ if it doesn't already exist (e.g., as a member descriptor) // This matches CPython's behavior in type_dict_set_doc @@ -170,6 +183,7 @@ pub trait PyClassImpl: PyClassDef { class.set_attr(doc_attr_name, ctx.new_str(doc).into()); } } + if let Some(module_name) = Self::MODULE_NAME { let module_key = identifier!(ctx, __module__); // Don't overwrite a getset descriptor for __module__ (e.g. TypeAliasType @@ -230,10 +244,6 @@ pub trait PyClassImpl: PyClassDef { .to_owned() } - fn impl_extend_class(ctx: &'static Context, class: &'static Py); - const METHOD_DEFS: &'static [PyMethodDef]; - fn extend_slots(slots: &mut PyTypeSlots); - fn make_slots() -> PyTypeSlots { let mut slots = PyTypeSlots { flags: Self::TP_FLAGS, diff --git a/crates/vm/src/codecs.rs b/crates/vm/src/codecs.rs index 03340d423ad..e2e3e3e234c 100644 --- a/crates/vm/src/codecs.rs +++ b/crates/vm/src/codecs.rs @@ -1,14 +1,19 @@ +use alloc::borrow::Cow; +use core::ops::{Deref, Range}; +use std::collections::HashMap; + use rustpython_common::{ + ascii, borrow::BorrowedValue, encodings::{ CodecContext, DecodeContext, DecodeErrorHandler, EncodeContext, EncodeErrorHandler, EncodeReplace, StrBuffer, StrSize, errors, }, + lock::{OnceCell, PyRwLock}, str::StrKind, wtf8::{CodePoint, Wtf8, Wtf8Buf}, }; -use crate::common::lock::OnceCell; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyResult, TryFromBorrowedObject, TryFromObject, VirtualMachine, @@ -16,13 +21,9 @@ use crate::{ PyBaseExceptionRef, PyBytes, PyBytesRef, PyStr, PyStrRef, PyTuple, PyTupleRef, PyUtf8Str, PyUtf8StrRef, }, - common::{ascii, lock::PyRwLock}, convert::ToPyObject, function::{ArgBytesLike, PyMethodDef}, }; -use alloc::borrow::Cow; -use core::ops::{self, Range}; -use std::collections::HashMap; pub struct CodecsRegistry { inner: PyRwLock, @@ -39,6 +40,7 @@ pub(crate) const DEFAULT_ENCODING: &str = "utf-8"; #[derive(Clone)] #[repr(transparent)] pub struct PyCodec(PyTupleRef); + impl PyCodec { #[inline] pub fn from_tuple(tuple: PyTupleRef) -> Result { @@ -48,10 +50,12 @@ impl PyCodec { Err(tuple) } } + #[inline] pub fn into_tuple(self) -> PyTupleRef { self.0 } + #[inline] pub fn as_tuple(&self) -> &Py { &self.0 @@ -61,6 +65,7 @@ impl PyCodec { pub fn get_encode_func(&self) -> &PyObject { &self.0[0] } + #[inline] pub fn get_decode_func(&self) -> &PyObject { &self.0[1] @@ -116,10 +121,7 @@ impl PyCodec { errors: Option, vm: &VirtualMachine, ) -> PyResult { - let args = match errors { - Some(e) => vec![e.into()], - None => vec![], - }; + let args = errors.map_or_else(Vec::new, |e| vec![e.into()]); vm.call_method(self.0.as_object(), "incrementalencoder", args) } @@ -128,10 +130,7 @@ impl PyCodec { errors: Option, vm: &VirtualMachine, ) -> PyResult { - let args = match errors { - Some(e) => vec![e.into()], - None => vec![], - }; + let args = errors.map_or_else(Vec::new, |e| vec![e.into()]); vm.call_method(self.0.as_object(), "incrementaldecoder", args) } } @@ -191,16 +190,17 @@ impl CodecsRegistry { ("namereplace", methods[5].build_function(ctx)), ("surrogatepass", methods[6].build_function(ctx)), ("surrogateescape", methods[7].build_function(ctx)), - ]; - let errors = errors - .into_iter() - .map(|(name, f)| (name.to_owned(), f.into())) - .collect(); + ] + .into_iter() + .map(|(name, f)| (name.to_owned(), f.into())) + .collect(); + let inner = RegistryInner { search_path: Vec::new(), search_cache: HashMap::new(), errors, }; + Self { inner: PyRwLock::new(inner), } @@ -210,6 +210,7 @@ impl CodecsRegistry { if !search_function.is_callable() { return Err(vm.new_type_error("argument must be callable")); } + self.inner.write().search_path.push(search_function); Ok(()) } @@ -250,6 +251,7 @@ impl CodecsRegistry { } inner.search_path.clone() }; + let encoding: PyUtf8StrRef = vm.ctx.new_utf8_str(encoding.as_ref()); for func in search_path { let res = func.call((encoding.clone(),), vm)?; @@ -264,6 +266,7 @@ impl CodecsRegistry { return Ok(codec.clone()); } } + Err(vm.new_lookup_error(format!("unknown encoding: {encoding}"))) } @@ -274,6 +277,7 @@ impl CodecsRegistry { vm: &VirtualMachine, ) -> PyResult { let codec = self.lookup(encoding, vm)?; + if codec.is_text_codec(vm)? { Ok(codec) } else { @@ -430,7 +434,7 @@ fn normalize_encoding_name(encoding: &str) -> Cow<'_, str> { out.into() } -#[derive(Eq, PartialEq)] +#[derive(Clone, Copy, Eq, PartialEq)] enum StandardEncoding { Utf8, Utf16Be, @@ -455,12 +459,14 @@ impl StandardEncoding { let encoding = encoding .strip_prefix(|c| ['-', '_'].contains(&c)) .unwrap_or(encoding); + if encoding == "8" { Some(Self::Utf8) } else if let Some(encoding) = encoding.strip_prefix("16") { if encoding.is_empty() { return Some(Self::UTF_16_NE); } + let encoding = encoding.strip_prefix(['-', '_']).unwrap_or(encoding); match encoding { "be" => Some(Self::Utf16Be), @@ -471,6 +477,7 @@ impl StandardEncoding { if encoding.is_empty() { return Some(Self::UTF_32_NE); } + let encoding = encoding.strip_prefix(['-', '_']).unwrap_or(encoding); match encoding { "be" => Some(Self::Utf32Be), @@ -504,10 +511,12 @@ impl<'a> EncodeErrorHandler> for SurrogatePass { let mut out: Vec = Vec::with_capacity(num_chars * 4); for ch in err_str.code_points() { let c = ch.to_u32(); - let 0xd800..=0xdfff = c else { + + if !(0xd800..=0xdfff).contains(&c) { // Not a surrogate, fail with original exception return Err(ctx.error_encoding(range, reason)); - }; + } + match standard_encoding { StandardEncoding::Utf8 => out.extend(ch.encode_wtf8(&mut [0; 4]).as_bytes()), StandardEncoding::Utf16Le => out.extend((c as u16).to_le_bytes()), @@ -601,7 +610,9 @@ impl<'a> PyEncodeContext<'a> { impl CodecContext for PyEncodeContext<'_> { type Error = PyBaseExceptionRef; + type StrBuf = PyStrRef; + type BytesBuf = PyBytesRef; fn string(&self, s: Wtf8Buf) -> Self::StrBuf { @@ -612,6 +623,7 @@ impl CodecContext for PyEncodeContext<'_> { self.vm.ctx.new_bytes(b) } } + impl EncodeContext for PyEncodeContext<'_> { fn full_data(&self) -> &Wtf8 { self.data.as_wtf8() @@ -690,12 +702,15 @@ pub(crate) struct PyDecodeContext<'a> { pos: usize, exception: OnceCell, } + enum PyDecodeData<'a> { Original(BorrowedValue<'a, [u8]>), Modified(PyBytesRef), } -impl ops::Deref for PyDecodeData<'_> { + +impl Deref for PyDecodeData<'_> { type Target = [u8]; + fn deref(&self) -> &Self::Target { match self { PyDecodeData::Original(data) => data, @@ -719,7 +734,9 @@ impl<'a> PyDecodeContext<'a> { impl CodecContext for PyDecodeContext<'_> { type Error = PyBaseExceptionRef; + type StrBuf = PyStrRef; + type BytesBuf = PyBytesRef; fn string(&self, s: Wtf8Buf) -> Self::StrBuf { @@ -730,6 +747,7 @@ impl CodecContext for PyDecodeContext<'_> { self.vm.ctx.new_bytes(b) } } + impl DecodeContext for PyDecodeContext<'_> { fn full_data(&self) -> &[u8] { &self.data @@ -858,6 +876,7 @@ pub(crate) struct ErrorsHandler<'a> { errors: &'a Py, resolved: OnceCell, } + enum ResolvedError { Standard(StandardError), Handler(PyObjectRef), @@ -866,17 +885,19 @@ enum ResolvedError { impl<'a> ErrorsHandler<'a> { #[inline] pub(crate) fn new(errors: Option<&'a Py>, vm: &VirtualMachine) -> Self { - match errors { - Some(errors) => Self { + if let Some(errors) = errors { + Self { errors, resolved: OnceCell::new(), - }, - None => Self { + } + } else { + Self { errors: identifier_utf8!(vm, strict), resolved: OnceCell::from(ResolvedError::Standard(StandardError::Strict)), - }, + } } } + #[inline] fn resolve(&self, vm: &VirtualMachine) -> PyResult<&ResolvedError> { if let Some(val) = self.resolved.get() { @@ -895,11 +916,13 @@ impl<'a> ErrorsHandler<'a> { Ok(self.resolved.get().unwrap()) } } + impl StrBuffer for PyStrRef { fn is_compatible_with(&self, kind: StrKind) -> bool { self.kind() <= kind } } + impl<'a> EncodeErrorHandler> for ErrorsHandler<'_> { fn handle_encode_error( &self, @@ -950,6 +973,7 @@ impl<'a> EncodeErrorHandler> for ErrorsHandler<'_> { Ok((replace, restart)) } } + impl<'a> DecodeErrorHandler> for ErrorsHandler<'_> { fn handle_decode_error( &self, @@ -1104,8 +1128,10 @@ where fn extract_unicode_error_range(err: &PyObject, vm: &VirtualMachine) -> PyResult> { let start = err.get_attr("start", vm)?; let start = start.try_into_value(vm)?; + let end = err.get_attr("end", vm)?; let end = end.try_into_value(vm)?; + Ok(Range { start, end }) } @@ -1128,10 +1154,12 @@ fn update_unicode_error_attrs( fn is_encode_err(err: &PyObject, vm: &VirtualMachine) -> bool { err.fast_isinstance(vm.ctx.exceptions.unicode_encode_error) } + #[inline] fn is_decode_err(err: &PyObject, vm: &VirtualMachine) -> bool { err.fast_isinstance(vm.ctx.exceptions.unicode_decode_error) } + #[inline] fn is_translate_err(err: &PyObject, vm: &VirtualMachine) -> bool { err.fast_isinstance(vm.ctx.exceptions.unicode_translate_error) @@ -1145,10 +1173,9 @@ fn bad_err_type(err: PyObjectRef, vm: &VirtualMachine) -> PyBaseExceptionRef { } fn strict_errors(err: PyObjectRef, vm: &VirtualMachine) -> PyResult { - let err = err + Err(err .downcast() - .unwrap_or_else(|_| vm.new_type_error("codec must pass exception instance")); - Err(err) + .unwrap_or_else(|_| vm.new_type_error("codec must pass exception instance"))) } fn ignore_errors(err: PyObjectRef, vm: &VirtualMachine) -> PyResult<(PyObjectRef, usize)> { diff --git a/crates/vm/src/compiler.rs b/crates/vm/src/compiler.rs index 25fa33302a5..9842bc0d1c7 100644 --- a/crates/vm/src/compiler.rs +++ b/crates/vm/src/compiler.rs @@ -1,45 +1,42 @@ +#[cfg(all(not(feature = "compiler"), feature = "parser", feature = "codegen",))] +compile_error!("Use --features=compiler to enable both parser and codegen"); + #[cfg(feature = "codegen")] pub use rustpython_codegen::CompileOpts; -#[cfg(feature = "compiler")] -pub use rustpython_compiler::*; - -#[cfg(not(feature = "compiler"))] -pub use rustpython_compiler_core::Mode; +cfg_select! { + feature = "compiler" => { + pub use rustpython_compiler::*; + } + _ => { + pub use ruff_python_parser as parser; -#[cfg(not(feature = "compiler"))] -pub use rustpython_compiler_core as core; + pub use rustpython_compiler_core::Mode; + pub use rustpython_compiler_core as core; + } +} #[cfg(not(feature = "compiler"))] -pub use ruff_python_parser as parser; +#[derive(Debug, thiserror::Error)] +pub enum CompileErrorType { + #[cfg(feature = "codegen")] + #[error(transparent)] + Codegen(#[from] super::codegen::error::CodegenErrorType), + #[cfg(feature = "parser")] + #[error(transparent)] + Parse(#[from] super::parser::ParseErrorType), +} #[cfg(not(feature = "compiler"))] -mod error { - #[cfg(all(feature = "parser", feature = "codegen"))] - panic!("Use --features=compiler to enable both parser and codegen"); - - #[derive(Debug, thiserror::Error)] - pub enum CompileErrorType { - #[cfg(feature = "codegen")] - #[error(transparent)] - Codegen(#[from] super::codegen::error::CodegenErrorType), - #[cfg(feature = "parser")] - #[error(transparent)] - Parse(#[from] super::parser::ParseErrorType), - } - - #[derive(Debug, thiserror::Error)] - pub enum CompileError { - #[cfg(feature = "codegen")] - #[error(transparent)] - Codegen(#[from] super::codegen::error::CodegenError), - #[cfg(feature = "parser")] - #[error(transparent)] - Parse(#[from] super::parser::ParseError), - } +#[derive(Debug, thiserror::Error)] +pub enum CompileError { + #[cfg(feature = "codegen")] + #[error(transparent)] + Codegen(#[from] super::codegen::error::CodegenError), + #[cfg(feature = "parser")] + #[error(transparent)] + Parse(#[from] super::parser::ParseError), } -#[cfg(not(feature = "compiler"))] -pub use error::{CompileError, CompileErrorType}; #[cfg(any(feature = "parser", feature = "codegen"))] impl crate::convert::ToPyException for (CompileError, Option<&str>) { diff --git a/crates/vm/src/convert/try_from.rs b/crates/vm/src/convert/try_from.rs index 85d6f5e20e3..10b1449d7eb 100644 --- a/crates/vm/src/convert/try_from.rs +++ b/crates/vm/src/convert/try_from.rs @@ -1,10 +1,11 @@ +use malachite_bigint::Sign; +use num_traits::ToPrimitive; + use crate::{ Py, VirtualMachine, builtins::PyFloat, object::{AsObject, PyObject, PyObjectRef, PyPayload, PyRef, PyResult}, }; -use malachite_bigint::Sign; -use num_traits::ToPrimitive; /// Implemented by any type that can be created from a Python object. /// @@ -62,7 +63,7 @@ impl PyObject { } } -/// Lower-cost variation of `TryFromObject` +/// Lower-cost variation of [`TryFromObject`]. pub trait TryFromBorrowedObject<'a>: Sized where Self: 'a, @@ -126,12 +127,15 @@ impl TryFromObject for core::time::Duration { fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { if let Some(float) = obj.downcast_ref::() { let f = float.to_f64(); + if f.is_nan() { return Err(vm.new_value_error("Invalid value NaN (not a number)")); } + if f < 0.0 { return Err(vm.new_value_error("negative duration")); } + if !f.is_finite() || f > u64::MAX as f64 { return Err(vm.new_overflow_error("timestamp too large to convert to C PyTime_t")); } diff --git a/crates/vm/src/gc_state.rs b/crates/vm/src/gc_state.rs index ecb34fcc869..d4571be46c8 100644 --- a/crates/vm/src/gc_state.rs +++ b/crates/vm/src/gc_state.rs @@ -15,11 +15,6 @@ fn elapsed_secs(start: &std::time::Instant) -> f64 { start.elapsed().as_secs_f64() } -#[cfg(target_arch = "wasm32")] -fn elapsed_secs(_start: &()) -> f64 { - 0.0 -} - bitflags::bitflags! { /// GC debug flags (see Include/internal/pycore_gc.h) #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] @@ -38,7 +33,7 @@ bitflags::bitflags! { } /// Result from a single collection run -#[derive(Debug, Default)] +#[derive(Clone, Copy, Debug, Default)] pub struct CollectResult { pub collected: usize, pub uncollectable: usize, @@ -47,7 +42,7 @@ pub struct CollectResult { } /// Statistics for a single generation (gc_generation_stats) -#[derive(Debug, Default)] +#[derive(Clone, Copy, Debug, Default)] pub struct GcStats { pub collections: usize, pub collected: usize, @@ -176,7 +171,7 @@ impl Default for GcState { impl GcState { #[must_use] - pub fn new() -> Self { + pub const fn new() -> Self { Self { generations: [ GcGeneration::new(2000), // young @@ -395,8 +390,6 @@ impl GcState { #[cfg(not(target_arch = "wasm32"))] let start_time = std::time::Instant::now(); - #[cfg(target_arch = "wasm32")] - let start_time = (); // Memory barrier to ensure visibility of all reference count updates // from other threads before we start analyzing the object graph. @@ -431,7 +424,12 @@ impl GcState { for i in 0..reset_end { self.generations[i].count.store(0, Ordering::SeqCst); } - let duration = elapsed_secs(&start_time); + + let duration = cfg_select! { + target_arch = "wasm32" => 0.0, + _ => elapsed_secs(&start_time), + }; + self.generations[generation].update_stats(0, 0, 0, duration); return CollectResult { collected: 0, @@ -556,7 +554,11 @@ impl GcState { for i in 0..reset_end { self.generations[i].count.store(0, Ordering::SeqCst); } - let duration = elapsed_secs(&start_time); + + let duration = cfg_select! { + target_arch = "wasm32" => 0.0, + _ => elapsed_secs(&start_time), + }; self.generations[generation].update_stats(0, 0, candidates, duration); return CollectResult { collected: 0, @@ -577,7 +579,10 @@ impl GcState { for i in 0..reset_end { self.generations[i].count.store(0, Ordering::SeqCst); } - let duration = elapsed_secs(&start_time); + let duration = cfg_select! { + target_arch = "wasm32" => 0.0, + _ => elapsed_secs(&start_time), + }; self.generations[generation].update_stats(0, 0, candidates, duration); return CollectResult { collected: 0, @@ -725,7 +730,10 @@ impl GcState { self.generations[i].count.store(0, Ordering::SeqCst); } - let duration = elapsed_secs(&start_time); + let duration = cfg_select! { + target_arch = "wasm32" => 0.0, + _ => elapsed_secs(&start_time), + }; self.generations[generation].update_stats(collected, 0, candidates, duration); CollectResult { diff --git a/crates/vm/src/recursion.rs b/crates/vm/src/recursion.rs index 7392cca4ded..dea7898c8a7 100644 --- a/crates/vm/src/recursion.rs +++ b/crates/vm/src/recursion.rs @@ -1,14 +1,15 @@ use crate::{AsObject, PyObject, VirtualMachine}; +/// A guard to protect repr methods from recursion into itself. pub struct ReprGuard<'vm> { vm: &'vm VirtualMachine, id: usize, } -/// A guard to protect repr methods from recursion into itself, impl<'vm> ReprGuard<'vm> { - /// Returns None if the guard against 'obj' is still held otherwise returns the guard. The guard - /// which is released if dropped. + /// Returns None if the guard against 'obj' is still held otherwise returns the guard. + /// + /// The guard which is released if dropped. pub fn enter(vm: &'vm VirtualMachine, obj: &PyObject) -> Option { let mut guards = vm.repr_guards.borrow_mut(); @@ -18,8 +19,9 @@ impl<'vm> ReprGuard<'vm> { if guards.contains(&id) { return None; } + guards.insert(id); - Some(ReprGuard { vm, id }) + Some(Self { vm, id }) } } diff --git a/crates/vm/src/suggestion.rs b/crates/vm/src/suggestion.rs index b48b78af755..69ce8f5f6b9 100644 --- a/crates/vm/src/suggestion.rs +++ b/crates/vm/src/suggestion.rs @@ -1,13 +1,14 @@ //! This module provides functionality to suggest similar names for attributes or variables. //! This is used during tracebacks. +use core::iter::ExactSizeIterator; + use crate::{ AsObject, Py, PyObject, PyObjectRef, VirtualMachine, builtins::{PyStr, PyStrRef}, exceptions::types::PyBaseException, sliceable::SliceableSequenceOp, }; -use core::iter::ExactSizeIterator; use rustpython_common::str::levenshtein::{MOVE_COST, levenshtein_distance}; const MAX_CANDIDATE_ITEMS: usize = 750;