Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Optimize vectorcall: move args instead of clone, use vectorcall in sp…
…ecialized paths

- invoke_exact_args takes Vec by value and uses drain() to move args
  into fastlocals instead of cloning (eliminates refcount overhead)
- CallPyGeneral and CallBoundMethodGeneral now call vectorcall_function
  directly instead of going through FuncArgs + prepend_arg + invoke
- CallKwPy and CallKwBoundMethod use vectorcall_function with kwnames
- vectorcall_bound_method uses insert(0) on existing Vec instead of
  allocating a second Vec
  • Loading branch information
youknowone committed Mar 3, 2026
commit 2226ae80c0c828e956aa1d8c69b32e584cf19b55
25 changes: 10 additions & 15 deletions crates/vm/src/builtins/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ impl Py<PyFunction> {
/// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args.
/// Only valid when: no VARARGS, no VARKEYWORDS, no kwonlyargs, not generator/coroutine,
/// and nargs == co_argcount.
pub fn invoke_exact_args(&self, args: &[PyObjectRef], vm: &VirtualMachine) -> PyResult {
pub fn invoke_exact_args(&self, mut args: Vec<PyObjectRef>, vm: &VirtualMachine) -> PyResult {
let code: PyRef<PyCode> = (*self.code).to_owned();

debug_assert_eq!(args.len(), code.arg_count as usize);
Expand All @@ -671,11 +671,11 @@ impl Py<PyFunction> {
)
.into_ref(&vm.ctx);

// Copy args directly into fastlocals
// Move args directly into fastlocals (no clone/refcount needed)
{
let fastlocals = unsafe { frame.fastlocals.borrow_mut() };
for (i, arg) in args.iter().enumerate() {
fastlocals[i] = Some(arg.clone());
for (slot, arg) in fastlocals.iter_mut().zip(args.drain(..)) {
*slot = Some(arg);
}
}

Expand Down Expand Up @@ -1255,7 +1255,7 @@ impl PyCell {

/// Vectorcall implementation for PyFunction (PEP 590).
/// Takes owned args to avoid cloning when filling fastlocals.
fn vectorcall_function(
pub(crate) fn vectorcall_function(
zelf_obj: &PyObject,
mut args: Vec<PyObjectRef>,
nargs: usize,
Expand Down Expand Up @@ -1324,23 +1324,18 @@ fn vectorcall_function(
/// Vectorcall implementation for PyBoundMethod (PEP 590).
fn vectorcall_bound_method(
zelf_obj: &PyObject,
args: Vec<PyObjectRef>,
mut args: Vec<PyObjectRef>,
nargs: usize,
kwnames: Option<&[PyObjectRef]>,
vm: &VirtualMachine,
) -> PyResult {
let zelf: &Py<PyBoundMethod> = zelf_obj.downcast_ref().unwrap();

// Build args with self prepended: [self.object, arg1, ..., argN, kw_val1, ..., kw_valK]
let kw_count = kwnames.map_or(0, |kw| kw.len());
let total = nargs + 1 + kw_count;
let mut full_args = Vec::with_capacity(total);
full_args.push(zelf.object.clone());
full_args.extend(args.into_iter().take(nargs + kw_count));

// Delegate to inner function's vectorcall if available
// Insert self at front of existing Vec (avoids 2nd allocation).
// O(n) memmove is cheaper than a 2nd heap alloc+dealloc for typical arg counts.
args.insert(0, zelf.object.clone());
let new_nargs = nargs + 1;
zelf.function.vectorcall(full_args, new_nargs, kwnames, vm)
zelf.function.vectorcall(args, new_nargs, kwnames, vm)
}

pub fn init(context: &'static Context) {
Expand Down
102 changes: 71 additions & 31 deletions crates/vm/src/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::{
builtin_func::PyNativeFunction,
descriptor::{MemberGetter, PyMemberDescriptor, PyMethodDescriptor},
frame::stack_analysis,
function::{PyCell, PyCellRef, PyFunction},
function::{PyCell, PyCellRef, PyFunction, vectorcall_function},
list::PyListIterator,
range::PyRangeIterator,
tuple::{PyTuple, PyTupleIterator, PyTupleRef},
Expand Down Expand Up @@ -3678,7 +3678,7 @@ impl ExecutingFrame<'_> {
let _null = self.pop_value_opt(); // self_or_null (NULL)
let callable = self.pop_value();
let func = callable.downcast_ref::<PyFunction>().unwrap();
let result = func.invoke_exact_args(&args, vm)?;
let result = func.invoke_exact_args(args, vm)?;
self.push_value(result);
Ok(None)
} else {
Expand Down Expand Up @@ -3716,7 +3716,7 @@ impl ExecutingFrame<'_> {
let mut all_args = Vec::with_capacity(pos_args.len() + 1);
all_args.push(self_val);
all_args.extend(pos_args);
let result = func.invoke_exact_args(&all_args, vm)?;
let result = func.invoke_exact_args(all_args, vm)?;
self.push_value(result);
Ok(None)
} else {
Expand Down Expand Up @@ -3934,18 +3934,21 @@ impl ExecutingFrame<'_> {
&& func.func_version() == cached_version
&& cached_version != 0
{
let args = self.collect_positional_args(nargs);
let nargs_usize = nargs as usize;
let pos_args: Vec<PyObjectRef> =
self.pop_multiple(nargs_usize).collect();
let self_or_null = self.pop_value_opt();
let callable = self.pop_value();
let func = callable.downcast_ref::<PyFunction>().unwrap();
let final_args = if let Some(self_val) = self_or_null {
let mut args = args;
args.prepend_arg(self_val);
args
let (args_vec, effective_nargs) = if let Some(self_val) = self_or_null {
let mut v = Vec::with_capacity(nargs_usize + 1);
v.push(self_val);
v.extend(pos_args);
(v, nargs_usize + 1)
} else {
args
(pos_args, nargs_usize)
};
let result = func.invoke(final_args, vm)?;
let result =
vectorcall_function(&callable, args_vec, effective_nargs, None, vm)?;
self.push_value(result);
Ok(None)
} else {
Expand All @@ -3964,13 +3967,21 @@ impl ExecutingFrame<'_> {
&& func.func_version() == cached_version
&& cached_version != 0
{
let args = self.collect_positional_args(nargs);
let nargs_usize = nargs as usize;
let pos_args: Vec<PyObjectRef> =
self.pop_multiple(nargs_usize).collect();
let self_val = self.pop_value();
let callable = self.pop_value();
let func = callable.downcast_ref::<PyFunction>().unwrap();
let mut final_args = args;
final_args.prepend_arg(self_val);
let result = func.invoke(final_args, vm)?;
let mut args_vec = Vec::with_capacity(nargs_usize + 1);
args_vec.push(self_val);
args_vec.extend(pos_args);
let result = vectorcall_function(
&callable,
args_vec,
nargs_usize + 1,
None,
vm,
)?;
self.push_value(result);
Ok(None)
} else {
Expand Down Expand Up @@ -4226,24 +4237,38 @@ impl ExecutingFrame<'_> {
let cached_version = self.code.instructions.read_cache_u32(cache_base + 1);
let nargs: u32 = arg.into();
// Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names]
// callable is at position nargs + 2 from top (nargs args + kwarg_names + self_or_null)
let callable = self.nth_value(nargs + 2);
if let Some(func) = callable.downcast_ref::<PyFunction>()
&& func.func_version() == cached_version
&& cached_version != 0
{
let args = self.collect_keyword_args(nargs);
let nargs_usize = nargs as usize;
let kwarg_names_obj = self.pop_value();
let kwarg_names_tuple = kwarg_names_obj
.downcast_ref::<PyTuple>()
.expect("kwarg names should be tuple");
let kw_count = kwarg_names_tuple.len();
let all_args: Vec<PyObjectRef> =
self.pop_multiple(nargs_usize).collect();
let self_or_null = self.pop_value_opt();
let callable = self.pop_value();
let func = callable.downcast_ref::<PyFunction>().unwrap();
let final_args = if let Some(self_val) = self_or_null {
let mut args = args;
args.prepend_arg(self_val);
args
let pos_count = nargs_usize - kw_count;
let (args_vec, effective_nargs) = if let Some(self_val) = self_or_null {
let mut v = Vec::with_capacity(nargs_usize + 1);
v.push(self_val);
v.extend(all_args);
(v, pos_count + 1)
} else {
args
(all_args, pos_count)
};
let result = func.invoke(final_args, vm)?;
let kwnames = kwarg_names_tuple.as_slice();
let result = vectorcall_function(
&callable,
args_vec,
effective_nargs,
Some(kwnames),
vm,
)?;
self.push_value(result);
return Ok(None);
}
Expand All @@ -4262,13 +4287,28 @@ impl ExecutingFrame<'_> {
&& func.func_version() == cached_version
&& cached_version != 0
{
let args = self.collect_keyword_args(nargs);
let self_val = self.pop_value(); // self_or_null is always Some here
let nargs_usize = nargs as usize;
let kwarg_names_obj = self.pop_value();
let kwarg_names_tuple = kwarg_names_obj
.downcast_ref::<PyTuple>()
.expect("kwarg names should be tuple");
let kw_count = kwarg_names_tuple.len();
let all_args: Vec<PyObjectRef> =
self.pop_multiple(nargs_usize).collect();
let self_val = self.pop_value();
let callable = self.pop_value();
let func = callable.downcast_ref::<PyFunction>().unwrap();
let mut final_args = args;
final_args.prepend_arg(self_val);
let result = func.invoke(final_args, vm)?;
let pos_count = nargs_usize - kw_count;
let mut args_vec = Vec::with_capacity(nargs_usize + 1);
args_vec.push(self_val);
args_vec.extend(all_args);
let kwnames = kwarg_names_tuple.as_slice();
let result = vectorcall_function(
&callable,
args_vec,
pos_count + 1,
Some(kwnames),
vm,
)?;
self.push_value(result);
return Ok(None);
}
Expand Down
Loading