Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Use inline caching for CALL
  • Loading branch information
brandtbucher committed Mar 4, 2022
commit 2820d50c64645211c0d4212946d15fe216961422
18 changes: 14 additions & 4 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ typedef struct {
uint32_t func_version;
uint16_t min_args;
uint16_t defaults_len;
} _PyCallCache;
} _PyPrecallCache;


/* Add specialized versions of entries to this union.
Expand All @@ -46,7 +46,7 @@ typedef union {
_PyEntryZero zero;
_PyAdaptiveEntry adaptive;
_PyObjectCache obj;
_PyCallCache call;
_PyPrecallCache call;
} SpecializedCacheEntry;

#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
Expand Down Expand Up @@ -112,6 +112,16 @@ typedef struct {

#define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache)

// XXX: These members can definitely shrink:
typedef struct {
_Py_CODEUNIT counter;
_Py_CODEUNIT func_version[2];
_Py_CODEUNIT min_args;
_Py_CODEUNIT defaults_len;
} _PyCallCache;

#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)

/* Maximum size of code to quicken, in code units. */
#define MAX_SIZE_TO_QUICKEN 5000

Expand Down Expand Up @@ -348,8 +358,8 @@ extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr,
PyObject *name);
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
PyObject *kwnames, SpecializedCacheEntry *cache);
extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr,
int nargs, PyObject *kwnames);
extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
PyObject *kwnames, SpecializedCacheEntry *cache, PyObject *builtins);
extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
Expand Down
1 change: 1 addition & 0 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def jabs_op(name, op, entries=0):
def_op('DICT_UPDATE', 165)
def_op('PRECALL', 166)

def_op('CALL', 171)
def_op('CALL', 171, 5)
def_op('KW_NAMES', 172)
hasconst.append(172)

Expand Down
284 changes: 142 additions & 142 deletions Lib/test/test_dis.py

Large diffs are not rendered by default.

75 changes: 40 additions & 35 deletions Programs/test_frozenmain.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

73 changes: 35 additions & 38 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -4654,6 +4654,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
goto error;
}
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->f_lasti += INLINE_CACHE_ENTRIES_CALL;
new_frame->previous = frame;
cframe.current_frame = frame = new_frame;
CALL_STAT_INC(inlined_py_calls);
Expand Down Expand Up @@ -4685,6 +4686,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
if (res == NULL) {
goto error;
}
JUMPBY(INLINE_CACHE_ENTRIES_CALL);
CHECK_EVAL_BREAKER();
DISPATCH();
}
Expand Down Expand Up @@ -4714,40 +4716,35 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}

TARGET(CALL_ADAPTIVE) {
SpecializedCacheEntry *cache = GET_CACHE();
int original_oparg = cache->adaptive.original_oparg;
if (cache->adaptive.counter == 0) {
_PyCallCache *cache = (_PyCallCache *)next_instr;
if (cache->counter == 0) {
next_instr--;
int is_meth = is_method(stack_pointer, original_oparg);
int nargs = original_oparg + is_meth;
int is_meth = is_method(stack_pointer, oparg);
int nargs = oparg + is_meth;
PyObject *callable = PEEK(nargs + 1);
int err = _Py_Specialize_Call(
callable, next_instr, nargs,
call_shape.kwnames, cache);
int err = _Py_Specialize_Call(callable, next_instr, nargs,
call_shape.kwnames);
if (err < 0) {
goto error;
}
DISPATCH();
}
else {
STAT_INC(CALL, deferred);
cache->adaptive.counter--;
oparg = original_oparg;
cache->counter--;
goto call_function;
}
}

TARGET(CALL_PY_EXACT_ARGS) {
assert(call_shape.kwnames == NULL);
SpecializedCacheEntry *caches = GET_CACHE();
int original_oparg = caches->adaptive.original_oparg;
int is_meth = is_method(stack_pointer, original_oparg);
int argcount = original_oparg + is_meth;
_PyCallCache *cache = (_PyCallCache *)next_instr;
int is_meth = is_method(stack_pointer, oparg);
int argcount = oparg + is_meth;
PyObject *callable = PEEK(argcount + 1);
DEOPT_IF(!PyFunction_Check(callable), CALL);
_PyCallCache *cache1 = &caches[-1].call;
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != cache1->func_version, CALL);
DEOPT_IF(func->func_version != read_u32(cache->func_version), CALL);
PyCodeObject *code = (PyCodeObject *)func->func_code;
DEOPT_IF(code->co_argcount != argcount, CALL);
STAT_INC(CALL, hit);
Expand All @@ -4765,25 +4762,24 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
STACK_SHRINK(2-is_meth);
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->f_lasti += INLINE_CACHE_ENTRIES_CALL;
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
goto start_frame;
}

TARGET(CALL_PY_WITH_DEFAULTS) {
assert(call_shape.kwnames == NULL);
SpecializedCacheEntry *caches = GET_CACHE();
int original_oparg = caches->adaptive.original_oparg;
int is_meth = is_method(stack_pointer, original_oparg);
int argcount = original_oparg + is_meth;
_PyCallCache *cache = (_PyCallCache *)next_instr;
int is_meth = is_method(stack_pointer, oparg);
int argcount = oparg + is_meth;
PyObject *callable = PEEK(argcount + 1);
DEOPT_IF(!PyFunction_Check(callable), CALL);
_PyCallCache *cache1 = &caches[-1].call;
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != cache1->func_version, CALL);
DEOPT_IF(func->func_version != read_u32(cache->func_version), CALL);
PyCodeObject *code = (PyCodeObject *)func->func_code;
DEOPT_IF(argcount > code->co_argcount, CALL);
int minargs = cache1->min_args;
int minargs = cache->min_args;
DEOPT_IF(argcount < minargs, CALL);
STAT_INC(CALL, hit);
_PyInterpreterFrame *new_frame = _PyFrame_Push(tstate, func);
Expand All @@ -4795,7 +4791,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = stack_pointer[i];
}
int def_offset = cache1->defaults_len - code->co_argcount;
int def_offset = cache->defaults_len - code->co_argcount;
for (int i = argcount; i < code->co_argcount; i++) {
PyObject *def = PyTuple_GET_ITEM(func->func_defaults, i + def_offset);
Py_INCREF(def);
Expand All @@ -4806,6 +4802,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
STACK_SHRINK(2-is_meth);
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->f_lasti += INLINE_CACHE_ENTRIES_CALL;
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
goto start_frame;
Expand All @@ -4819,7 +4816,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
PyObject *obj = TOP();
PyObject *callable = SECOND();
DEOPT_IF(callable != (PyObject *)&PyType_Type, PRECALL);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
STAT_INC(PRECALL, hit);
PyObject *res = Py_NewRef(Py_TYPE(obj));
Py_DECREF(callable);
Expand All @@ -4836,7 +4833,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DEOPT_IF(is_method(stack_pointer, 1), PRECALL);
PyObject *callable = PEEK(2);
DEOPT_IF(callable != (PyObject *)&PyUnicode_Type, PRECALL);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
STAT_INC(PRECALL, hit);
PyObject *arg = TOP();
PyObject *res = PyObject_Str(arg);
Expand All @@ -4857,7 +4854,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DEOPT_IF(is_method(stack_pointer, 1), PRECALL);
PyObject *callable = PEEK(2);
DEOPT_IF(callable != (PyObject *)&PyTuple_Type, PRECALL);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
STAT_INC(PRECALL, hit);
PyObject *arg = TOP();
PyObject *res = PySequence_Tuple(arg);
Expand All @@ -4881,7 +4878,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DEOPT_IF(!PyType_Check(callable), PRECALL);
PyTypeObject *tp = (PyTypeObject *)callable;
DEOPT_IF(tp->tp_vectorcall == NULL, PRECALL);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
STAT_INC(PRECALL, hit);
STACK_SHRINK(total_args);
PyObject *res = tp->tp_vectorcall((PyObject *)tp, stack_pointer,
Expand Down Expand Up @@ -4913,7 +4910,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
PyObject *callable = PEEK(total_args + 1);
DEOPT_IF(!PyCFunction_CheckExact(callable), PRECALL);
DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_O, PRECALL);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
STAT_INC(PRECALL, hit);
PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable);
// This is slower but CPython promises to check all non-vectorcall
Expand Down Expand Up @@ -4949,7 +4946,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DEOPT_IF(!PyCFunction_CheckExact(callable), PRECALL);
DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_FASTCALL,
PRECALL);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
STAT_INC(PRECALL, hit);
PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable);
STACK_SHRINK(total_args);
Expand Down Expand Up @@ -4990,7 +4987,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DEOPT_IF(!PyCFunction_CheckExact(callable), PRECALL);
DEOPT_IF(PyCFunction_GET_FLAGS(callable) !=
(METH_FASTCALL | METH_KEYWORDS), PRECALL);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
STAT_INC(PRECALL, hit);
STACK_SHRINK(total_args);
/* res = func(self, args, nargs, kwnames) */
Expand Down Expand Up @@ -5032,7 +5029,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
_PyObjectCache *cache1 = &caches[-1].obj;
PyObject *callable = PEEK(total_args + 1);
DEOPT_IF(callable != cache1->obj, PRECALL);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
STAT_INC(PRECALL, hit);
PyObject *arg = TOP();
Py_ssize_t len_i = PyObject_Length(arg);
Expand Down Expand Up @@ -5065,7 +5062,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
_PyObjectCache *cache1 = &caches[-1].obj;

DEOPT_IF(callable != cache1->obj, PRECALL);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
STAT_INC(PRECALL, hit);
PyObject *cls = POP();
PyObject *inst = TOP();
Expand Down Expand Up @@ -5100,7 +5097,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
PyObject *list = SECOND();
DEOPT_IF(!PyList_Check(list), PRECALL);
STAT_INC(PRECALL, hit);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
PyObject *arg = TOP();
int err = PyList_Append(list, arg);
if (err) {
Expand All @@ -5125,7 +5122,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DEOPT_IF(!Py_IS_TYPE(callable, &PyMethodDescr_Type), PRECALL);
PyMethodDef *meth = ((PyMethodDescrObject *)callable)->d_method;
DEOPT_IF(meth->ml_flags != METH_O, PRECALL);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
STAT_INC(PRECALL, hit);
PyCFunction cfunc = meth->ml_meth;
// This is slower but CPython promises to check all non-vectorcall
Expand Down Expand Up @@ -5161,7 +5158,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DEOPT_IF(!Py_IS_TYPE(callable, &PyMethodDescr_Type), PRECALL);
PyMethodDef *meth = ((PyMethodDescrObject *)callable)->d_method;
DEOPT_IF(meth->ml_flags != METH_NOARGS, PRECALL);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
STAT_INC(PRECALL, hit);
PyCFunction cfunc = meth->ml_meth;
// This is slower but CPython promises to check all non-vectorcall
Expand Down Expand Up @@ -5194,7 +5191,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
DEOPT_IF(!Py_IS_TYPE(callable, &PyMethodDescr_Type), PRECALL);
PyMethodDef *meth = ((PyMethodDescrObject *)callable)->d_method;
DEOPT_IF(meth->ml_flags != METH_FASTCALL, PRECALL);
next_instr++; // Skip following call
next_instr += INLINE_CACHE_ENTRIES_CALL + 1; // Skip following call
STAT_INC(PRECALL, hit);
_PyCFunctionFast cfunc = (_PyCFunctionFast)(void(*)(void))meth->ml_meth;
int nargs = total_args-1;
Expand Down Expand Up @@ -5594,7 +5591,7 @@ MISS_WITH_INLINE_CACHE(STORE_ATTR)
MISS_WITH_INLINE_CACHE(LOAD_GLOBAL)
MISS_WITH_INLINE_CACHE(LOAD_METHOD)
MISS_WITH_CACHE(PRECALL)
MISS_WITH_CACHE(CALL)
MISS_WITH_INLINE_CACHE(CALL)
MISS_WITH_INLINE_CACHE(BINARY_OP)
MISS_WITH_INLINE_CACHE(COMPARE_OP)
MISS_WITH_INLINE_CACHE(BINARY_SUBSCR)
Expand Down
Loading