Skip to content

Commit 3504993

Browse files
authored
Apply vectorcall for more places (#7336)
* Add PyType vectorcall and use vectorcall in all specialized call fallbacks * Add vectorcall slot for PyMethodDescriptor and PyWrapper * Add FuncArgs::from_vectorcall_owned and simplify vectorcall fallback paths Remove has_vectorcall checks from execute_call_vectorcall and execute_call_kw_vectorcall. The invoke_vectorcall fallback now uses from_vectorcall_owned to move args instead of cloning.
1 parent 42628a5 commit 3504993

File tree

6 files changed

+155
-103
lines changed

6 files changed

+155
-103
lines changed

crates/vm/src/builtins/descriptor.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,10 +426,63 @@ impl GetDescriptor for PyMemberDescriptor {
426426
}
427427
}
428428

429+
/// Vectorcall for method_descriptor: calls native method directly
430+
fn vectorcall_method_descriptor(
431+
zelf_obj: &PyObject,
432+
args: Vec<PyObjectRef>,
433+
nargs: usize,
434+
kwnames: Option<&[PyObjectRef]>,
435+
vm: &VirtualMachine,
436+
) -> PyResult {
437+
let zelf: &Py<PyMethodDescriptor> = zelf_obj.downcast_ref().unwrap();
438+
let func_args = FuncArgs::from_vectorcall_owned(args, nargs, kwnames);
439+
(zelf.method.func)(vm, func_args)
440+
}
441+
442+
/// Vectorcall for wrapper_descriptor: calls wrapped slot function
443+
fn vectorcall_wrapper(
444+
zelf_obj: &PyObject,
445+
mut args: Vec<PyObjectRef>,
446+
nargs: usize,
447+
kwnames: Option<&[PyObjectRef]>,
448+
vm: &VirtualMachine,
449+
) -> PyResult {
450+
let zelf: &Py<PyWrapper> = zelf_obj.downcast_ref().unwrap();
451+
// First positional arg is self
452+
if nargs == 0 {
453+
return Err(vm.new_type_error(format!(
454+
"descriptor '{}' of '{}' object needs an argument",
455+
zelf.name.as_str(),
456+
zelf.typ.name()
457+
)));
458+
}
459+
let obj = args.remove(0);
460+
if !obj.fast_isinstance(zelf.typ) {
461+
return Err(vm.new_type_error(format!(
462+
"descriptor '{}' requires a '{}' object but received a '{}'",
463+
zelf.name.as_str(),
464+
zelf.typ.name(),
465+
obj.class().name()
466+
)));
467+
}
468+
let rest = FuncArgs::from_vectorcall_owned(args, nargs - 1, kwnames);
469+
zelf.wrapped.call(obj, rest, vm)
470+
}
471+
429472
pub fn init(ctx: &'static Context) {
430473
PyMemberDescriptor::extend_class(ctx, ctx.types.member_descriptor_type);
431474
PyMethodDescriptor::extend_class(ctx, ctx.types.method_descriptor_type);
475+
ctx.types
476+
.method_descriptor_type
477+
.slots
478+
.vectorcall
479+
.store(Some(vectorcall_method_descriptor));
432480
PyWrapper::extend_class(ctx, ctx.types.wrapper_descriptor_type);
481+
ctx.types
482+
.wrapper_descriptor_type
483+
.slots
484+
.vectorcall
485+
.store(Some(vectorcall_wrapper));
433486
PyMethodWrapper::extend_class(ctx, ctx.types.method_wrapper_type);
434487
}
435488

crates/vm/src/builtins/function.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,14 +1280,14 @@ pub(crate) fn vectorcall_function(
12801280
// FAST PATH: simple positional-only call, exact arg count.
12811281
// Move owned args directly into fastlocals — no clone needed.
12821282
let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) {
1283-
ArgMapping::from_dict_exact(vm.ctx.new_dict())
1283+
None // lazy allocation — most frames never access locals dict
12841284
} else {
1285-
ArgMapping::from_dict_exact(zelf.globals.clone())
1285+
Some(ArgMapping::from_dict_exact(zelf.globals.clone()))
12861286
};
12871287

12881288
let frame = Frame::new(
12891289
code.to_owned(),
1290-
Scope::new(Some(locals), zelf.globals.clone()),
1290+
Scope::new(locals, zelf.globals.clone()),
12911291
zelf.builtins.clone(),
12921292
zelf.closure.as_ref().map_or(&[], |c| c.as_slice()),
12931293
Some(zelf.to_owned().into()),

crates/vm/src/builtins/type.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2339,8 +2339,37 @@ fn subtype_set_dict(obj: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine) -
23392339
* The magical type type
23402340
*/
23412341

2342+
/// Vectorcall for PyType (PEP 590).
2343+
/// Fast path: type(x) returns x.__class__ without constructing FuncArgs.
2344+
fn vectorcall_type(
2345+
zelf_obj: &PyObject,
2346+
args: Vec<PyObjectRef>,
2347+
nargs: usize,
2348+
kwnames: Option<&[PyObjectRef]>,
2349+
vm: &VirtualMachine,
2350+
) -> PyResult {
2351+
let zelf: &Py<PyType> = zelf_obj.downcast_ref().unwrap();
2352+
2353+
// type(x) fast path: single positional arg, no kwargs
2354+
if zelf.is(vm.ctx.types.type_type) {
2355+
let no_kwargs = kwnames.is_none_or(|kw| kw.is_empty());
2356+
if nargs == 1 && no_kwargs {
2357+
return Ok(args[0].obj_type());
2358+
}
2359+
}
2360+
2361+
// Fallback: construct FuncArgs and use standard call
2362+
let func_args = FuncArgs::from_vectorcall_owned(args, nargs, kwnames);
2363+
PyType::call(zelf, func_args, vm)
2364+
}
2365+
23422366
pub(crate) fn init(ctx: &'static Context) {
23432367
PyType::extend_class(ctx, ctx.types.type_type);
2368+
ctx.types
2369+
.type_type
2370+
.slots
2371+
.vectorcall
2372+
.store(Some(vectorcall_type));
23442373
}
23452374

23462375
pub(crate) fn call_slot_new(

crates/vm/src/frame.rs

Lines changed: 36 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -3847,8 +3847,7 @@ impl ExecutingFrame<'_> {
38473847
self.deoptimize(Instruction::Call {
38483848
argc: Arg::marker(),
38493849
});
3850-
let args = self.collect_positional_args(nargs);
3851-
self.execute_call(args, vm)
3850+
self.execute_call_vectorcall(nargs, vm)
38523851
}
38533852
Instruction::CallMethodDescriptorO => {
38543853
let instr_idx = self.lasti() as usize - 1;
@@ -3885,8 +3884,7 @@ impl ExecutingFrame<'_> {
38853884
self.deoptimize(Instruction::Call {
38863885
argc: Arg::marker(),
38873886
});
3888-
let args = self.collect_positional_args(nargs);
3889-
self.execute_call(args, vm)
3887+
self.execute_call_vectorcall(nargs, vm)
38903888
}
38913889
Instruction::CallMethodDescriptorFast => {
38923890
let instr_idx = self.lasti() as usize - 1;
@@ -3924,8 +3922,7 @@ impl ExecutingFrame<'_> {
39243922
self.deoptimize(Instruction::Call {
39253923
argc: Arg::marker(),
39263924
});
3927-
let args = self.collect_positional_args(nargs);
3928-
self.execute_call(args, vm)
3925+
self.execute_call_vectorcall(nargs, vm)
39293926
}
39303927
Instruction::CallBuiltinClass => {
39313928
let instr_idx = self.lasti() as usize - 1;
@@ -3934,26 +3931,12 @@ impl ExecutingFrame<'_> {
39343931
let nargs: u32 = arg.into();
39353932
let callable = self.nth_value(nargs + 1);
39363933
let callable_tag = callable as *const PyObject as u32;
3937-
if cached_tag == callable_tag && callable.downcast_ref::<PyType>().is_some() {
3938-
let args = self.collect_positional_args(nargs);
3939-
let self_or_null = self.pop_value_opt();
3940-
let callable = self.pop_value();
3941-
let final_args = if let Some(self_val) = self_or_null {
3942-
let mut args = args;
3943-
args.prepend_arg(self_val);
3944-
args
3945-
} else {
3946-
args
3947-
};
3948-
let result = callable.call(final_args, vm)?;
3949-
self.push_value(result);
3950-
return Ok(None);
3934+
if !(cached_tag == callable_tag && callable.downcast_ref::<PyType>().is_some()) {
3935+
self.deoptimize(Instruction::Call {
3936+
argc: Arg::marker(),
3937+
});
39513938
}
3952-
self.deoptimize(Instruction::Call {
3953-
argc: Arg::marker(),
3954-
});
3955-
let args = self.collect_positional_args(nargs);
3956-
self.execute_call(args, vm)
3939+
self.execute_call_vectorcall(nargs, vm)
39573940
}
39583941
Instruction::CallAllocAndEnterInit => {
39593942
let instr_idx = self.lasti() as usize - 1;
@@ -4013,8 +3996,7 @@ impl ExecutingFrame<'_> {
40133996
self.deoptimize(Instruction::Call {
40143997
argc: Arg::marker(),
40153998
});
4016-
let args = self.collect_positional_args(nargs);
4017-
self.execute_call(args, vm)
3999+
self.execute_call_vectorcall(nargs, vm)
40184000
}
40194001
Instruction::CallMethodDescriptorFastWithKeywords => {
40204002
// Native function interface is uniform regardless of keyword support
@@ -4053,8 +4035,7 @@ impl ExecutingFrame<'_> {
40534035
self.deoptimize(Instruction::Call {
40544036
argc: Arg::marker(),
40554037
});
4056-
let args = self.collect_positional_args(nargs);
4057-
self.execute_call(args, vm)
4038+
self.execute_call_vectorcall(nargs, vm)
40584039
}
40594040
Instruction::CallBuiltinFastWithKeywords => {
40604041
// Native function interface is uniform regardless of keyword support
@@ -4087,8 +4068,7 @@ impl ExecutingFrame<'_> {
40874068
self.deoptimize(Instruction::Call {
40884069
argc: Arg::marker(),
40894070
});
4090-
let args = self.collect_positional_args(nargs);
4091-
self.execute_call(args, vm)
4071+
self.execute_call_vectorcall(nargs, vm)
40924072
}
40934073
Instruction::CallNonPyGeneral => {
40944074
let instr_idx = self.lasti() as usize - 1;
@@ -4097,15 +4077,12 @@ impl ExecutingFrame<'_> {
40974077
let nargs: u32 = arg.into();
40984078
let callable = self.nth_value(nargs + 1);
40994079
let callable_tag = callable as *const PyObject as u32;
4100-
if cached_tag == callable_tag {
4101-
let args = self.collect_positional_args(nargs);
4102-
return self.execute_call(args, vm);
4080+
if cached_tag != callable_tag {
4081+
self.deoptimize(Instruction::Call {
4082+
argc: Arg::marker(),
4083+
});
41034084
}
4104-
self.deoptimize(Instruction::Call {
4105-
argc: Arg::marker(),
4106-
});
4107-
let args = self.collect_positional_args(nargs);
4108-
self.execute_call(args, vm)
4085+
self.execute_call_vectorcall(nargs, vm)
41094086
}
41104087
Instruction::CallKwPy => {
41114088
let instr_idx = self.lasti() as usize - 1;
@@ -4196,15 +4173,12 @@ impl ExecutingFrame<'_> {
41964173
let nargs: u32 = arg.into();
41974174
let callable = self.nth_value(nargs + 2);
41984175
let callable_tag = callable as *const PyObject as u32;
4199-
if cached_tag == callable_tag {
4200-
let args = self.collect_keyword_args(nargs);
4201-
return self.execute_call(args, vm);
4176+
if cached_tag != callable_tag {
4177+
self.deoptimize(Instruction::CallKw {
4178+
argc: Arg::marker(),
4179+
});
42024180
}
4203-
self.deoptimize(Instruction::CallKw {
4204-
argc: Arg::marker(),
4205-
});
4206-
let args = self.collect_keyword_args(nargs);
4207-
self.execute_call(args, vm)
4181+
self.execute_call_kw_vectorcall(nargs, vm)
42084182
}
42094183
Instruction::LoadSuperAttrAttr => {
42104184
let oparg = u32::from(arg);
@@ -5626,22 +5600,15 @@ impl ExecutingFrame<'_> {
56265600
fn execute_call_vectorcall(&mut self, nargs: u32, vm: &VirtualMachine) -> FrameResult {
56275601
let nargs_usize = nargs as usize;
56285602
let stack_len = self.state.stack.len();
5603+
debug_assert!(
5604+
stack_len >= nargs_usize + 2,
5605+
"CALL stack underflow: need callable + self_or_null + {nargs_usize} args, have {stack_len}"
5606+
);
56295607
let callable_idx = stack_len - nargs_usize - 2;
56305608
let self_or_null_idx = stack_len - nargs_usize - 1;
56315609
let args_start = stack_len - nargs_usize;
56325610

5633-
// Check if callable has vectorcall slot
5634-
let has_vectorcall = self.state.stack[callable_idx]
5635-
.as_ref()
5636-
.is_some_and(|sr| sr.as_object().class().slots.vectorcall.load().is_some());
5637-
5638-
if !has_vectorcall {
5639-
// Fallback to existing FuncArgs path
5640-
let args = self.collect_positional_args(nargs);
5641-
return self.execute_call(args, vm);
5642-
}
5643-
5644-
// Build args slice: [self_or_null?, arg1, ..., argN]
5611+
// Build args: [self?, arg1, ..., argN]
56455612
let self_or_null = self.state.stack[self_or_null_idx]
56465613
.take()
56475614
.map(|sr| sr.to_pyobj());
@@ -5664,6 +5631,7 @@ impl ExecutingFrame<'_> {
56645631
let callable_obj = self.state.stack[callable_idx].take().unwrap().to_pyobj();
56655632
self.state.stack.truncate(callable_idx);
56665633

5634+
// invoke_vectorcall falls back to FuncArgs if no vectorcall slot
56675635
let result = callable_obj.vectorcall(args_vec, effective_nargs, None, vm)?;
56685636
self.push_value(result);
56695637
Ok(None)
@@ -5680,50 +5648,26 @@ impl ExecutingFrame<'_> {
56805648
.downcast_ref::<PyTuple>()
56815649
.expect("kwarg names should be tuple");
56825650
let kw_count = kwarg_names_tuple.len();
5651+
debug_assert!(kw_count <= nargs_usize, "CALL_KW kw_count exceeds nargs");
56835652

56845653
let stack_len = self.state.stack.len();
5654+
debug_assert!(
5655+
stack_len >= nargs_usize + 2,
5656+
"CALL_KW stack underflow: need callable + self_or_null + {nargs_usize} args, have {stack_len}"
5657+
);
56855658
let callable_idx = stack_len - nargs_usize - 2;
56865659
let self_or_null_idx = stack_len - nargs_usize - 1;
56875660
let args_start = stack_len - nargs_usize;
56885661

5689-
// Check if callable has vectorcall slot
5690-
let has_vectorcall = self.state.stack[callable_idx]
5691-
.as_ref()
5692-
.is_some_and(|sr| sr.as_object().class().slots.vectorcall.load().is_some());
5693-
5694-
if !has_vectorcall {
5695-
// Fallback: reconstruct kwarg_names iterator and use existing path
5696-
let kwarg_names_iter = kwarg_names_tuple.as_slice().iter().map(|pyobj| {
5697-
pyobj
5698-
.downcast_ref::<PyUtf8Str>()
5699-
.unwrap()
5700-
.as_str()
5701-
.to_owned()
5702-
});
5703-
let args = self.pop_multiple(nargs_usize);
5704-
let func_args = FuncArgs::with_kwargs_names(args, kwarg_names_iter);
5705-
// pop self_or_null and callable
5706-
let self_or_null = self.pop_value_opt();
5707-
let callable = self.pop_value();
5708-
let final_args = if let Some(self_val) = self_or_null {
5709-
let mut args = func_args;
5710-
args.prepend_arg(self_val);
5711-
args
5712-
} else {
5713-
func_args
5714-
};
5715-
let value = callable.call(final_args, vm)?;
5716-
self.push_value(value);
5717-
return Ok(None);
5718-
}
5719-
57205662
// Build args: [self?, pos_arg1, ..., pos_argM, kw_val1, ..., kw_valK]
57215663
let self_or_null = self.state.stack[self_or_null_idx]
57225664
.take()
57235665
.map(|sr| sr.to_pyobj());
57245666
let has_self = self_or_null.is_some();
57255667

5726-
let pos_count = nargs_usize - kw_count;
5668+
let pos_count = nargs_usize
5669+
.checked_sub(kw_count)
5670+
.expect("CALL_KW: kw_count exceeds nargs");
57275671
let effective_nargs = if has_self { pos_count + 1 } else { pos_count };
57285672

57295673
// Build the full args slice: positional (including self) + kwarg values
@@ -5740,6 +5684,7 @@ impl ExecutingFrame<'_> {
57405684
let callable_obj = self.state.stack[callable_idx].take().unwrap().to_pyobj();
57415685
self.state.stack.truncate(callable_idx);
57425686

5687+
// invoke_vectorcall falls back to FuncArgs if no vectorcall slot
57435688
let kwnames = kwarg_names_tuple.as_slice();
57445689
let result = callable_obj.vectorcall(args_vec, effective_nargs, Some(kwnames), vm)?;
57455690
self.push_value(result);

0 commit comments

Comments
 (0)