Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Remove the usage of the cstack in Python to Python calls
Ths commit inlines calls to Python functions in the eval loop
and steals all the arguments in the call from the caller for
performance.
  • Loading branch information
pablogsal committed Sep 21, 2021
commit 8a88fd3173edd69db3cb4c4c68b0e9aa2cb4dcc4
1 change: 1 addition & 0 deletions Include/cpython/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ typedef struct _cframe {
*/
int use_tracing;
struct _cframe *previous;
int depth;
} CFrame;

typedef struct _err_stackitem {
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ typedef struct _interpreter_frame {
int f_lasti; /* Last instruction if called */
int stacktop; /* Offset of TOS from localsplus */
PyFrameState f_state; /* What state the frame is in */
int depth; /* Depth of the frame in a ceval loop */
Comment thread
pablogsal marked this conversation as resolved.
Outdated
Comment thread
pablogsal marked this conversation as resolved.
Outdated
PyObject *localsplus[1];
} InterpreterFrame;

Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_tuple.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ extern "C" {
#define _PyTuple_ITEMS(op) (_PyTuple_CAST(op)->ob_item)

extern PyObject *_PyTuple_FromArray(PyObject *const *, Py_ssize_t);
extern PyObject *_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t);

#ifdef __cplusplus
}
Expand Down
20 changes: 20 additions & 0 deletions Objects/tupleobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,26 @@ _PyTuple_FromArray(PyObject *const *src, Py_ssize_t n)
return (PyObject *)tuple;
}

PyObject *
_PyTuple_FromArraySteal(PyObject *const *src, Py_ssize_t n)
{
if (n == 0) {
return tuple_get_empty();
}

PyTupleObject *tuple = tuple_alloc(n);
if (tuple == NULL) {
return NULL;
}
PyObject **dst = tuple->ob_item;
for (Py_ssize_t i = 0; i < n; i++) {
PyObject *item = src[i];
dst[i] = item;
}
_PyObject_GC_TRACK(tuple);
return (PyObject *)tuple;
}

static PyObject *
tupleslice(PyTupleObject *a, Py_ssize_t ilow,
Py_ssize_t ihigh)
Expand Down
160 changes: 133 additions & 27 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ static int check_args_iterable(PyThreadState *, PyObject *func, PyObject *vararg
static void format_kwargs_error(PyThreadState *, PyObject *func, PyObject *kwargs);
static void format_awaitable_error(PyThreadState *, PyTypeObject *, int, int);
static int get_exception_handler(PyCodeObject *, int, int*, int*, int*);
static InterpreterFrame *
_PyEvalFramePushAndInit(PyThreadState *tstate, PyFrameConstructor *con,
PyObject *locals, PyObject* const* args,
size_t argcount, PyObject *kwnames, int steal_args);
static int
_PyEvalFrameClearAndPop(PyThreadState *tstate, InterpreterFrame * frame);

#define NAME_ERROR_MSG \
"name '%.200s' is not defined"
Expand Down Expand Up @@ -1534,10 +1540,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
PyObject *retval = NULL; /* Return value */
_Py_atomic_int * const eval_breaker = &tstate->interp->ceval.eval_breaker;

if (_Py_EnterRecursiveCall(tstate, "")) {
return NULL;
}

CFrame cframe;

/* WARNING: Because the CFrame lives on the C stack,
Expand All @@ -1547,11 +1549,20 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
CFrame *prev_cframe = tstate->cframe;
cframe.use_tracing = prev_cframe->use_tracing;
cframe.previous = prev_cframe;
cframe.depth = 0;
tstate->cframe = &cframe;

/* push frame */
tstate->frame = frame;

start_frame:
if (_Py_EnterRecursiveCall(tstate, "")) {
tstate->recursion_depth++;
goto exit_eval_frame;
}

assert(frame == tstate->frame);

if (cframe.use_tracing) {
if (trace_function_entry(tstate, frame)) {
goto exit_eval_frame;
Expand All @@ -1573,7 +1584,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
}
}


resume_frame:
frame->depth = cframe.depth;
co = frame->f_code;
PyObject *names = co->co_names;
PyObject *consts = co->co_consts;
_Py_CODEUNIT *first_instr = co->co_firstinstr;
Expand Down Expand Up @@ -1616,6 +1629,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
#endif

if (throwflag) { /* support for generator.throw() */
throwflag = 0;
goto error;
}

Expand Down Expand Up @@ -4603,16 +4617,75 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr

TARGET(CALL_FUNCTION): {
PREDICTED(CALL_FUNCTION);
PyObject **sp, *res;
sp = stack_pointer;
res = call_function(tstate, &sp, oparg, NULL, cframe.use_tracing);
stack_pointer = sp;
PUSH(res);
if (res == NULL) {
PyObject *res;

// ------------ Check if call cal be optimized -------------- //
PyObject *function = *(stack_pointer - oparg - 1);
Comment thread
pablogsal marked this conversation as resolved.
Outdated

int optimize_call = 0;
if (Py_TYPE(function) == &PyFunction_Type) {
PyCodeObject *co = (PyCodeObject*)((PyFunctionObject*)(function))->func_code;
int is_coro = co->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR);
optimize_call = (is_coro || cframe.use_tracing) ? 0 : 1;
}
// ----------------------------------------------------------- //

if (!optimize_call) {
PyObject **sp = stack_pointer;
res = call_function(tstate, &sp, oparg, NULL, cframe.use_tracing);
Comment thread
pablogsal marked this conversation as resolved.
stack_pointer = sp;
PUSH(res);
if (res == NULL) {
goto error;
}
CHECK_EVAL_BREAKER();
DISPATCH();
}

assert(PyFunction_Check(function));
size_t nargsf = oparg | PY_VECTORCALL_ARGUMENTS_OFFSET;
PyObject *const *args = stack_pointer - oparg;
assert(args != NULL || PyVectorcall_NARGS(nargsf) == 0);
assert(PyVectorcall_Function(function) == _PyFunction_Vectorcall);
PyFrameConstructor *con = PyFunction_AS_FRAME_CONSTRUCTOR(function);
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
assert(nargs >= 0);
assert(nargs == 0 || args != NULL);
PyObject *locals = (((PyCodeObject *)con->fc_code)->co_flags & CO_OPTIMIZED) ? NULL : con->fc_globals;
PyCodeObject *code = (PyCodeObject *)con->fc_code;

assert(!(code->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) || cframe.use_tracing);

InterpreterFrame *new_frame = _PyEvalFramePushAndInit(tstate, con, locals, args, nargs, NULL, 1);
if (new_frame == NULL) {
// When we exit here, we own all variables in the stack (the frame creation has not stolen
// any variable) so we need to clean the whole stack (done in the "error" label).
goto error;
}
CHECK_EVAL_BREAKER();
DISPATCH();
assert(tstate->interp->eval_frame != NULL);

Py_DECREF(function);
STACK_SHRINK(oparg + 1);
// TODO: Transform the following into a goto
_PyFrame_SetStackPointer(frame, stack_pointer);
tstate->frame = frame = new_frame;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to self, no need to pop frame here because it's done on eval frame exit at exit_eval_frame.

cframe.depth++;
goto start_frame;
// res = _PyEval_EvalFrame(tstate, new_frame, 0);
// assert(_PyFrame_GetStackPointer(new_frame) == _PyFrame_Stackbase(new_frame));
// if (_PyEvalFrameClearAndPop(tstate, new_frame)) {
// Py_XDECREF(res);
// goto error;
// }
//
// assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
//
// PUSH(res);
// if (res == NULL) {
// goto error;
// }
// CHECK_EVAL_BREAKER();
// DISPATCH();
}

TARGET(CALL_FUNCTION_KW): {
Expand Down Expand Up @@ -4991,14 +5064,29 @@ MISS_WITH_OPARG_COUNTER(BINARY_ADD)

/* pop frame */
exit_eval_frame:
/* Restore previous cframe */
tstate->cframe = cframe.previous;
tstate->cframe->use_tracing = cframe.use_tracing;

if (PyDTrace_FUNCTION_RETURN_ENABLED())
dtrace_function_return(frame);
_Py_LeaveRecursiveCall(tstate);

if (cframe.depth) {
_PyFrame_StackPush(frame->previous, retval);
if (_PyEvalFrameClearAndPop(tstate, frame)) {
retval = NULL;
}
frame = tstate->frame;
cframe.depth--;
if (retval == NULL) {
assert(_PyErr_Occurred(tstate));
throwflag = 1;
}
retval = NULL;
goto resume_frame;
}
tstate->frame = frame->previous;

/* Restore previous cframe */
tstate->cframe = cframe.previous;
tstate->cframe->use_tracing = cframe.use_tracing;
return _Py_CheckFunctionResult(tstate, NULL, retval, __func__);
}

Expand Down Expand Up @@ -5309,7 +5397,7 @@ get_exception_handler(PyCodeObject *code, int index, int *level, int *handler, i
static int
initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
PyObject **localsplus, PyObject *const *args,
Py_ssize_t argcount, PyObject *kwnames)
Py_ssize_t argcount, PyObject *kwnames, int steal_args)
Comment thread
pablogsal marked this conversation as resolved.
Outdated
{
PyCodeObject *co = (PyCodeObject*)con->fc_code;
const Py_ssize_t total_args = co->co_argcount + co->co_kwonlyargcount;
Expand All @@ -5319,8 +5407,9 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
Py_ssize_t i;
if (co->co_flags & CO_VARKEYWORDS) {
kwdict = PyDict_New();
Comment thread
pablogsal marked this conversation as resolved.
if (kwdict == NULL)
if (kwdict == NULL) {
goto fail;
}
i = total_args;
if (co->co_flags & CO_VARARGS) {
i++;
Expand All @@ -5342,14 +5431,21 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
}
for (j = 0; j < n; j++) {
PyObject *x = args[j];
Py_INCREF(x);
if (!steal_args) {
Py_INCREF(x);
}
assert(localsplus[j] == NULL);
localsplus[j] = x;
}

/* Pack other positional arguments into the *args argument */
if (co->co_flags & CO_VARARGS) {
PyObject *u = _PyTuple_FromArray(args + n, argcount - n);
PyObject *u = NULL;
if (steal_args) {
u = _PyTuple_FromArraySteal(args + n, argcount - n);
} else {
u = _PyTuple_FromArray(args + n, argcount - n);
}
if (u == NULL) {
goto fail;
}
Expand Down Expand Up @@ -5415,6 +5511,9 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
if (PyDict_SetItem(kwdict, keyword, value) == -1) {
goto fail;
}
if (steal_args) {
Py_DECREF(value);
}
continue;

kw_found:
Expand All @@ -5424,7 +5523,9 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
con->fc_qualname, keyword);
goto fail;
}
Py_INCREF(value);
if (!steal_args) {
Py_INCREF(value);
}
localsplus[j] = value;
}
}
Expand Down Expand Up @@ -5528,7 +5629,7 @@ make_coro_frame(PyThreadState *tstate,
}
_PyFrame_InitializeSpecials(frame, con, locals, code->co_nlocalsplus);
assert(frame->frame_obj == NULL);
if (initialize_locals(tstate, con, frame->localsplus, args, argcount, kwnames)) {
if (initialize_locals(tstate, con, frame->localsplus, args, argcount, kwnames, 0)) {
_PyFrame_Clear(frame, 1);
return NULL;
}
Expand Down Expand Up @@ -5557,14 +5658,19 @@ make_coro(PyThreadState *tstate, PyFrameConstructor *con,
static InterpreterFrame *
_PyEvalFramePushAndInit(PyThreadState *tstate, PyFrameConstructor *con,
PyObject *locals, PyObject* const* args,
size_t argcount, PyObject *kwnames)
size_t argcount, PyObject *kwnames, int steal_args)
{
InterpreterFrame * frame = _PyThreadState_PushFrame(tstate, con, locals);
if (frame == NULL) {
return NULL;
}
PyObject **localsarray = _PyFrame_GetLocalsArray(frame);
if (initialize_locals(tstate, con, localsarray, args, argcount, kwnames)) {
if (initialize_locals(tstate, con, localsarray, args, argcount, kwnames, steal_args)) {
if (steal_args) {
for (int i = 0; i < frame->stacktop; i++) {
Py_XINCREF(frame->localsplus[i]);
Comment thread
pablogsal marked this conversation as resolved.
Outdated
}
}
_PyFrame_Clear(frame, 0);
return NULL;
}
Expand All @@ -5579,9 +5685,9 @@ _PyEvalFrameClearAndPop(PyThreadState *tstate, InterpreterFrame * frame)
++tstate->recursion_depth;
assert(frame->frame_obj == NULL || frame->frame_obj->f_own_locals_memory == 0);
if (_PyFrame_Clear(frame, 0)) {
--tstate->recursion_depth;
return -1;
}
assert(frame->frame_obj == NULL);
--tstate->recursion_depth;
tstate->frame = frame->previous;
_PyThreadState_PopFrame(tstate, frame);
Expand All @@ -5601,7 +5707,7 @@ _PyEval_Vector(PyThreadState *tstate, PyFrameConstructor *con,
return make_coro(tstate, con, locals, args, argcount, kwnames);
}
InterpreterFrame *frame = _PyEvalFramePushAndInit(
tstate, con, locals, args, argcount, kwnames);
tstate, con, locals, args, argcount, kwnames, 0);
if (frame == NULL) {
return NULL;
}
Expand Down
Loading