Skip to content

Commit a477835

Browse files
authored
py_new separation for set/tuple/bytes/str
1 parent 5f496c9 commit a477835

File tree

6 files changed

+137
-112
lines changed

6 files changed

+137
-112
lines changed

crates/vm/src/builtins/bytes.rs

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,64 @@ pub(crate) fn init(context: &Context) {
9292
}
9393

9494
impl Constructor for PyBytes {
95-
type Args = ByteInnerNewOptions;
95+
type Args = Vec<u8>;
9696

9797
fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult {
98-
let options: Self::Args = args.bind(vm)?;
99-
options.get_bytes(cls, vm).to_pyresult(vm)
98+
let options: ByteInnerNewOptions = args.bind(vm)?;
99+
100+
// Optimizations for exact bytes type
101+
if cls.is(vm.ctx.types.bytes_type) {
102+
// Return empty bytes singleton
103+
if options.source.is_missing()
104+
&& options.encoding.is_missing()
105+
&& options.errors.is_missing()
106+
{
107+
return Ok(vm.ctx.empty_bytes.clone().into());
108+
}
109+
110+
// Return exact bytes as-is
111+
if let OptionalArg::Present(ref obj) = options.source
112+
&& options.encoding.is_missing()
113+
&& options.errors.is_missing()
114+
&& let Ok(b) = obj.clone().downcast_exact::<PyBytes>(vm)
115+
{
116+
return Ok(b.into_pyref().into());
117+
}
118+
}
119+
120+
// Handle __bytes__ method - may return PyBytes directly
121+
if let OptionalArg::Present(ref obj) = options.source
122+
&& options.encoding.is_missing()
123+
&& options.errors.is_missing()
124+
&& let Some(bytes_method) = vm.get_method(obj.clone(), identifier!(vm, __bytes__))
125+
{
126+
let bytes = bytes_method?.call((), vm)?;
127+
// If exact bytes type and __bytes__ returns bytes, use it directly
128+
if cls.is(vm.ctx.types.bytes_type)
129+
&& let Ok(b) = bytes.clone().downcast::<PyBytes>()
130+
{
131+
return Ok(b.into());
132+
}
133+
// Otherwise convert to Vec<u8>
134+
let inner = PyBytesInner::try_from_borrowed_object(vm, &bytes)?;
135+
let payload = Self::py_new(&cls, inner.elements, vm)?;
136+
return payload.into_ref_with_type(vm, cls).map(Into::into);
137+
}
138+
139+
// Fallback to get_bytearray_inner
140+
let elements = options.get_bytearray_inner(vm)?.elements;
141+
142+
// Return empty bytes singleton for exact bytes types
143+
if elements.is_empty() && cls.is(vm.ctx.types.bytes_type) {
144+
return Ok(vm.ctx.empty_bytes.clone().into());
145+
}
146+
147+
let payload = Self::py_new(&cls, elements, vm)?;
148+
payload.into_ref_with_type(vm, cls).map(Into::into)
100149
}
101150

102-
fn py_new(_cls: &Py<PyType>, _args: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
103-
unreachable!("use slot_new")
151+
fn py_new(_cls: &Py<PyType>, elements: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
152+
Ok(Self::from(elements))
104153
}
105154
}
106155

crates/vm/src/builtins/set.rs

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -918,35 +918,43 @@ impl Representable for PySet {
918918
}
919919

920920
impl Constructor for PyFrozenSet {
921-
type Args = OptionalArg<PyObjectRef>;
921+
type Args = Vec<PyObjectRef>;
922922

923923
fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult {
924-
let iterable: Self::Args = args.bind(vm)?;
925-
let elements = if let OptionalArg::Present(iterable) = iterable {
926-
let iterable = if cls.is(vm.ctx.types.frozenset_type) {
927-
match iterable.downcast_exact::<Self>(vm) {
928-
Ok(fs) => return Ok(fs.into_pyref().into()),
929-
Err(iterable) => iterable,
930-
}
931-
} else {
932-
iterable
933-
};
924+
let iterable: OptionalArg<PyObjectRef> = args.bind(vm)?;
925+
926+
// Optimizations for exact frozenset type
927+
if cls.is(vm.ctx.types.frozenset_type) {
928+
// Return exact frozenset as-is
929+
if let OptionalArg::Present(ref input) = iterable
930+
&& let Ok(fs) = input.clone().downcast_exact::<PyFrozenSet>(vm)
931+
{
932+
return Ok(fs.into_pyref().into());
933+
}
934+
935+
// Return empty frozenset singleton
936+
if iterable.is_missing() {
937+
return Ok(vm.ctx.empty_frozenset.clone().into());
938+
}
939+
}
940+
941+
let elements: Vec<PyObjectRef> = if let OptionalArg::Present(iterable) = iterable {
934942
iterable.try_to_value(vm)?
935943
} else {
936944
vec![]
937945
};
938946

939-
// Return empty fs if iterable passed is empty and only for exact fs types.
947+
// Return empty frozenset singleton for exact frozenset types (when iterable was empty)
940948
if elements.is_empty() && cls.is(vm.ctx.types.frozenset_type) {
941-
Ok(vm.ctx.empty_frozenset.clone().into())
942-
} else {
943-
Self::from_iter(vm, elements)
944-
.and_then(|o| o.into_ref_with_type(vm, cls).map(Into::into))
949+
return Ok(vm.ctx.empty_frozenset.clone().into());
945950
}
951+
952+
let payload = Self::py_new(&cls, elements, vm)?;
953+
payload.into_ref_with_type(vm, cls).map(Into::into)
946954
}
947955

948-
fn py_new(_cls: &Py<PyType>, _args: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
949-
unreachable!("use slot_new")
956+
fn py_new(_cls: &Py<PyType>, elements: Self::Args, vm: &VirtualMachine) -> PyResult<Self> {
957+
Self::from_iter(vm, elements)
950958
}
951959
}
952960

crates/vm/src/builtins/str.rs

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -351,36 +351,39 @@ impl Constructor for PyStr {
351351
type Args = StrArgs;
352352

353353
fn slot_new(cls: PyTypeRef, func_args: FuncArgs, vm: &VirtualMachine) -> PyResult {
354+
// Optimization: return exact str as-is (only when no encoding/errors provided)
355+
if cls.is(vm.ctx.types.str_type)
356+
&& func_args.args.len() == 1
357+
&& func_args.kwargs.is_empty()
358+
&& func_args.args[0].class().is(vm.ctx.types.str_type)
359+
{
360+
return Ok(func_args.args[0].clone());
361+
}
362+
354363
let args: Self::Args = func_args.bind(vm)?;
355-
let string: PyRef<PyStr> = match args.object {
364+
let payload = Self::py_new(&cls, args, vm)?;
365+
payload.into_ref_with_type(vm, cls).map(Into::into)
366+
}
367+
368+
fn py_new(_cls: &Py<PyType>, args: Self::Args, vm: &VirtualMachine) -> PyResult<Self> {
369+
match args.object {
356370
OptionalArg::Present(input) => {
357371
if let OptionalArg::Present(enc) = args.encoding {
358-
vm.state.codec_registry.decode_text(
372+
let s = vm.state.codec_registry.decode_text(
359373
input,
360374
enc.as_str(),
361375
args.errors.into_option(),
362376
vm,
363-
)?
377+
)?;
378+
Ok(Self::from(s.as_wtf8().to_owned()))
364379
} else {
365-
input.str(vm)?
380+
let s = input.str(vm)?;
381+
Ok(Self::from(s.as_wtf8().to_owned()))
366382
}
367383
}
368-
OptionalArg::Missing => {
369-
Self::from(String::new()).into_ref_with_type(vm, cls.clone())?
370-
}
371-
};
372-
if string.class().is(&cls) {
373-
Ok(string.into())
374-
} else {
375-
Self::from(string.as_wtf8())
376-
.into_ref_with_type(vm, cls)
377-
.map(Into::into)
384+
OptionalArg::Missing => Ok(Self::from(String::new())),
378385
}
379386
}
380-
381-
fn py_new(_cls: &Py<PyType>, _args: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
382-
unreachable!("use slot_new")
383-
}
384387
}
385388

386389
impl PyStr {

crates/vm/src/builtins/tuple.rs

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -110,37 +110,45 @@ impl_from_into_pytuple!(A, B, C, D, E, F, G);
110110
pub type PyTupleRef = PyRef<PyTuple>;
111111

112112
impl Constructor for PyTuple {
113-
type Args = OptionalArg<PyObjectRef>;
113+
type Args = Vec<PyObjectRef>;
114114

115115
fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult {
116-
let iterable: Self::Args = args.bind(vm)?;
116+
let iterable: OptionalArg<PyObjectRef> = args.bind(vm)?;
117+
118+
// Optimizations for exact tuple type
119+
if cls.is(vm.ctx.types.tuple_type) {
120+
// Return exact tuple as-is
121+
if let OptionalArg::Present(ref input) = iterable
122+
&& let Ok(tuple) = input.clone().downcast_exact::<PyTuple>(vm)
123+
{
124+
return Ok(tuple.into_pyref().into());
125+
}
126+
127+
// Return empty tuple singleton
128+
if iterable.is_missing() {
129+
return Ok(vm.ctx.empty_tuple.clone().into());
130+
}
131+
}
132+
117133
let elements = if let OptionalArg::Present(iterable) = iterable {
118-
let iterable = if cls.is(vm.ctx.types.tuple_type) {
119-
match iterable.downcast_exact::<Self>(vm) {
120-
Ok(tuple) => return Ok(tuple.into_pyref().into()),
121-
Err(iterable) => iterable,
122-
}
123-
} else {
124-
iterable
125-
};
126134
iterable.try_to_value(vm)?
127135
} else {
128136
vec![]
129137
};
130-
// Return empty tuple only for exact tuple types if the iterable is empty.
138+
139+
// Return empty tuple singleton for exact tuple types (when iterable was empty)
131140
if elements.is_empty() && cls.is(vm.ctx.types.tuple_type) {
132-
Ok(vm.ctx.empty_tuple.clone().into())
133-
} else {
134-
Self {
135-
elements: elements.into_boxed_slice(),
136-
}
137-
.into_ref_with_type(vm, cls)
138-
.map(Into::into)
141+
return Ok(vm.ctx.empty_tuple.clone().into());
139142
}
143+
144+
let payload = Self::py_new(&cls, elements, vm)?;
145+
payload.into_ref_with_type(vm, cls).map(Into::into)
140146
}
141147

142-
fn py_new(_cls: &Py<PyType>, _args: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
143-
unreachable!("use slot_new")
148+
fn py_new(_cls: &Py<PyType>, elements: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
149+
Ok(Self {
150+
elements: elements.into_boxed_slice(),
151+
})
144152
}
145153
}
146154

crates/vm/src/bytes_inner.rs

Lines changed: 2 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
// spell-checker:ignore unchunked
22
use crate::{
3-
AsObject, PyObject, PyObjectRef, PyPayload, PyResult, TryFromBorrowedObject, VirtualMachine,
3+
AsObject, PyObject, PyObjectRef, PyResult, TryFromBorrowedObject, VirtualMachine,
44
anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper},
55
builtins::{
66
PyBaseExceptionRef, PyByteArray, PyBytes, PyBytesRef, PyInt, PyIntRef, PyStr, PyStrRef,
7-
PyTypeRef, pystr,
7+
pystr,
88
},
99
byte::bytes_from_object,
1010
cformat::cformat_bytes,
1111
common::hash,
1212
function::{ArgIterable, Either, OptionalArg, OptionalOption, PyComparisonValue},
13-
identifier,
1413
literal::escape::Escape,
1514
protocol::PyBuffer,
1615
sequence::{SequenceExt, SequenceMutExt},
@@ -91,43 +90,6 @@ impl ByteInnerNewOptions {
9190
})
9291
}
9392

94-
pub fn get_bytes(self, cls: PyTypeRef, vm: &VirtualMachine) -> PyResult<PyBytesRef> {
95-
let inner = match (&self.source, &self.encoding, &self.errors) {
96-
(OptionalArg::Present(obj), OptionalArg::Missing, OptionalArg::Missing) => {
97-
let obj = obj.clone();
98-
// construct an exact bytes from an exact bytes do not clone
99-
let obj = if cls.is(vm.ctx.types.bytes_type) {
100-
match obj.downcast_exact::<PyBytes>(vm) {
101-
Ok(b) => return Ok(b.into_pyref()),
102-
Err(obj) => obj,
103-
}
104-
} else {
105-
obj
106-
};
107-
108-
if let Some(bytes_method) = vm.get_method(obj, identifier!(vm, __bytes__)) {
109-
// construct an exact bytes from __bytes__ slot.
110-
// if __bytes__ return a bytes, use the bytes object except we are the subclass of the bytes
111-
let bytes = bytes_method?.call((), vm)?;
112-
let bytes = if cls.is(vm.ctx.types.bytes_type) {
113-
match bytes.downcast::<PyBytes>() {
114-
Ok(b) => return Ok(b),
115-
Err(bytes) => bytes,
116-
}
117-
} else {
118-
bytes
119-
};
120-
Some(PyBytesInner::try_from_borrowed_object(vm, &bytes))
121-
} else {
122-
None
123-
}
124-
}
125-
_ => None,
126-
}
127-
.unwrap_or_else(|| self.get_bytearray_inner(vm))?;
128-
PyBytes::from(inner).into_ref_with_type(vm, cls)
129-
}
130-
13193
pub fn get_bytearray_inner(self, vm: &VirtualMachine) -> PyResult<PyBytesInner> {
13294
match (self.source, self.encoding, self.errors) {
13395
(OptionalArg::Present(obj), OptionalArg::Missing, OptionalArg::Missing) => {

crates/vm/src/protocol/object.rs

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,16 @@
44
use crate::{
55
AsObject, Py, PyObject, PyObjectRef, PyRef, PyResult, TryFromObject, VirtualMachine,
66
builtins::{
7-
PyAsyncGen, PyDict, PyDictRef, PyGenericAlias, PyInt, PyList, PyStr, PyTuple, PyTupleRef,
8-
PyType, PyTypeRef, PyUtf8Str, pystr::AsPyStr,
7+
PyAsyncGen, PyBytes, PyDict, PyDictRef, PyGenericAlias, PyInt, PyList, PyStr, PyTuple,
8+
PyTupleRef, PyType, PyTypeRef, PyUtf8Str, pystr::AsPyStr,
99
},
10-
bytes_inner::ByteInnerNewOptions,
1110
common::{hash::PyHash, str::to_ascii},
1211
convert::{ToPyObject, ToPyResult},
1312
dict_inner::DictKey,
14-
function::{Either, OptionalArg, PyArithmeticValue, PySetterValue},
13+
function::{Either, FuncArgs, PyArithmeticValue, PySetterValue},
1514
object::PyPayload,
1615
protocol::{PyIter, PyMapping, PySequence},
17-
types::PyComparisonOp,
16+
types::{Constructor, PyComparisonOp},
1817
};
1918

2019
// RustPython doesn't need these items
@@ -37,12 +36,8 @@ impl PyObjectRef {
3736
match self.downcast_exact::<PyInt>(vm) {
3837
Ok(int) => Err(vm.new_downcast_type_error(bytes_type, &int)),
3938
Err(obj) => {
40-
let options = ByteInnerNewOptions {
41-
source: OptionalArg::Present(obj),
42-
encoding: OptionalArg::Missing,
43-
errors: OptionalArg::Missing,
44-
};
45-
options.get_bytes(bytes_type.to_owned(), vm).map(Into::into)
39+
let args = FuncArgs::from(vec![obj]);
40+
<PyBytes as Constructor>::slot_new(bytes_type.to_owned(), args, vm)
4641
}
4742
}
4843
}

0 commit comments

Comments
 (0)