Skip to content
Next Next commit
Use inline cache for BINARY_SUBSCR. Work in progress.
  • Loading branch information
markshannon committed Feb 28, 2022
commit 465f0c01f26dc76c82501b89d08f687878d46b0a
2 changes: 2 additions & 0 deletions Include/cpython/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ struct PyCodeObject {
_Py_CODEUNIT *co_firstinstr; /* Pointer to first instruction, used for quickening.
Unlike the other "hot" fields, this one is
actually derived from co_code. */
PyObject **_co_obj_cache; /* Array of borrowed references to objects, for specialized code. */
PyObject *co_exceptiontable; /* Byte string encoding exception handling table */
int co_flags; /* CO_..., see below */
int co_warmup; /* Warmup counter for quickening */
Expand Down Expand Up @@ -90,6 +91,7 @@ struct PyCodeObject {
int co_nplaincellvars; /* number of non-arg cell variables */
int co_ncellvars; /* total number of cell variables */
int co_nfreevars; /* number of free variables */
int _co_obj_cache_len; /* number of entries in _co_obj_cache */
Comment thread
markshannon marked this conversation as resolved.
Outdated
// lazily-computed values
PyObject *co_varnames; /* tuple of strings (local variable names) */
PyObject *co_cellvars; /* tuple of strings (cell variable names) */
Expand Down
31 changes: 30 additions & 1 deletion Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,32 @@ typedef struct {
_Py_CODEUNIT counter;
} _PyUnpackSequenceCache;










typedef struct {
_Py_CODEUNIT counter;
_Py_CODEUNIT object;
_Py_CODEUNIT type_version;
_Py_CODEUNIT _t1;
_Py_CODEUNIT func_version;
} _PyBinarySubscrCache;

#define INLINE_CACHE_ENTRIES_BINARY_OP \
(sizeof(_PyBinaryOpCache) / sizeof(_Py_CODEUNIT))

#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
(sizeof(_PyUnpackSequenceCache) / sizeof(_Py_CODEUNIT))

#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR \
(sizeof(_PyBinarySubscrCache) / sizeof(_Py_CODEUNIT))

/* Maximum size of code to quicken, in code units. */
#define MAX_SIZE_TO_QUICKEN 5000

Expand All @@ -98,6 +118,15 @@ _GetSpecializedCacheEntry(const _Py_CODEUNIT *first_instr, Py_ssize_t n)
return &last_cache_plus_one[-1-n].entry;
}

/* Returns a borrowed reference */
static inline PyObject*
_PyQuickenedGetObject(const _Py_CODEUNIT *first_instr, uint16_t index)
{
SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr;
assert(&last_cache_plus_one->code[0] == first_instr);
return last_cache_plus_one[-1-index].entry.obj.obj;
}

/* Following two functions form a pair.
*
* oparg_from_offset_and_index() is used to compute the oparg
Expand Down Expand Up @@ -309,7 +338,7 @@ extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObjec
extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache);
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, PyCodeObject *code);
extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
PyObject *kwnames, SpecializedCacheEntry *cache);
Expand Down
1 change: 1 addition & 0 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def jabs_op(name, op, entries=0):

def_op('UNARY_INVERT', 15)

def_op('BINARY_SUBSCR', 25)
def_op('BINARY_SUBSCR', 25, 5)

def_op('GET_LEN', 30)
def_op('MATCH_MAPPING', 31)
Expand Down
28 changes: 16 additions & 12 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -2102,25 +2102,24 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
SET_TOP(res);
if (res == NULL)
goto error;
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
DISPATCH();
}

TARGET(BINARY_SUBSCR_ADAPTIVE) {
SpecializedCacheEntry *cache = GET_CACHE();
if (cache->adaptive.counter == 0) {
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
if (cache->counter == 0) {
PyObject *sub = TOP();
PyObject *container = SECOND();
next_instr--;
if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) {
if (_Py_Specialize_BinarySubscr(container, sub, next_instr, frame->f_code) < 0) {
goto error;
}
DISPATCH();
}
else {
STAT_INC(BINARY_SUBSCR, deferred);
cache->adaptive.counter--;
assert(cache->adaptive.original_oparg == 0);
/* No need to set oparg here; it isn't used by BINARY_SUBSCR */
cache->counter--;
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
}
}
Expand All @@ -2146,6 +2145,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
Py_DECREF(sub);
SET_TOP(res);
Py_DECREF(list);
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
NOTRACE_DISPATCH();
}

Expand All @@ -2170,6 +2170,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
Py_DECREF(sub);
SET_TOP(res);
Py_DECREF(tuple);
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
NOTRACE_DISPATCH();
}

Expand All @@ -2188,18 +2189,20 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
Py_DECREF(sub);
SET_TOP(res);
Py_DECREF(dict);
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
DISPATCH();
}

TARGET(BINARY_SUBSCR_GETITEM) {
PyObject *sub = TOP();
PyObject *container = SECOND();
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
_PyObjectCache *cache1 = &caches[-1].obj;
PyFunctionObject *getitem = (PyFunctionObject *)cache1->obj;
DEOPT_IF(Py_TYPE(container)->tp_version_tag != cache0->version, BINARY_SUBSCR);
DEOPT_IF(getitem->func_version != cache0->index, BINARY_SUBSCR);
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
PyObject *cached = _PyQuickenedGetObject(first_instr, cache->object);
assert(PyFunction_Check(cached));
PyFunctionObject *getitem = (PyFunctionObject *)cached;
uint32_t type_version = read32(&cache->type_version);
DEOPT_IF(Py_TYPE(container)->tp_version_tag != type_version, BINARY_SUBSCR);
DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR);
PyCodeObject *code = (PyCodeObject *)getitem->func_code;
size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE;
assert(code->co_argcount == 2);
Expand All @@ -2221,6 +2224,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
CALL_STAT_INC(inlined_py_calls);
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
goto start_frame;
}

Expand Down
46 changes: 34 additions & 12 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,18 @@ static uint8_t cache_requirements[256] = {
[LOAD_ATTR] = 1, // _PyAdaptiveEntry
[LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
[LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
[BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
[STORE_SUBSCR] = 0,
[CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[PRECALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[STORE_ATTR] = 1, // _PyAdaptiveEntry
[COMPARE_OP] = 1, /* _PyAdaptiveEntry */
};

/* The number of object cache entries required for a "family" of instructions. */
static const uint8_t object_cache_requirements[256] = {
[BINARY_SUBSCR] = 5,
};

Py_ssize_t _Py_QuickenedCount = 0;
#ifdef Py_STATS
PyStats _py_stats = { 0 };
Expand Down Expand Up @@ -287,6 +291,14 @@ _Py_PrintSpecializationStats(int to_file)
#define SPECIALIZATION_FAIL(opcode, kind) ((void)0)
#endif

static void
_PyQuickenedSetObject(const _Py_CODEUNIT *first_instr, uint16_t index, PyObject *obj)
{
SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr;
assert(&last_cache_plus_one->code[0] == first_instr);
last_cache_plus_one[-1-index].entry.obj.obj = obj;
}

static SpecializedCacheOrInstruction *
allocate(int cache_count, int instruction_count)
{
Expand Down Expand Up @@ -353,7 +365,10 @@ entries_needed(const _Py_CODEUNIT *code, int len)
int previous_opcode = -1;
for (int i = 0; i < len; i++) {
uint8_t opcode = _Py_OPCODE(code[i]);
if (previous_opcode != EXTENDED_ARG) {
if (object_cache_requirements[opcode]) {
cache_offset += object_cache_requirements[opcode];
}
else if (previous_opcode != EXTENDED_ARG) {
oparg_from_instruction_and_update_offset(i, opcode, 0, &cache_offset);
}
previous_opcode = opcode;
Expand Down Expand Up @@ -387,6 +402,11 @@ optimize(SpecializedCacheOrInstruction *quickened, int len)
if (adaptive_opcode) {
if (_PyOpcode_InlineCacheEntries[opcode]) {
instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, oparg);
if (object_cache_requirements[opcode]) {
assert(_PyOpcode_InlineCacheEntries[opcode] >= 2);
instructions[i+2] = cache_offset;
cache_offset += object_cache_requirements[opcode];
}
}
else if (previous_opcode != EXTENDED_ARG) {
int new_oparg = oparg_from_instruction_and_update_offset(
Expand Down Expand Up @@ -1332,9 +1352,11 @@ function_kind(PyCodeObject *code) {

int
_Py_Specialize_BinarySubscr(
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache)
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, PyCodeObject *code)
{
_PyAdaptiveEntry *cache0 = &cache->adaptive;
assert(_PyOpcode_InlineCacheEntries[BINARY_SUBSCR] ==
INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1);
PyTypeObject *container_type = Py_TYPE(container);
if (container_type == &PyList_Type) {
if (PyLong_CheckExact(sub)) {
Expand Down Expand Up @@ -1362,25 +1384,25 @@ _Py_Specialize_BinarySubscr(
PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__));
if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
PyFunctionObject *func = (PyFunctionObject *)descriptor;
PyCodeObject *code = (PyCodeObject *)func->func_code;
int kind = function_kind(code);
PyCodeObject *fcode = (PyCodeObject *)func->func_code;
int kind = function_kind(fcode);
if (kind != SIMPLE_FUNCTION) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, kind);
goto fail;
}
if (code->co_argcount != 2) {
if (fcode->co_argcount != 2) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
goto fail;
}
assert(cls->tp_version_tag != 0);
cache0->version = cls->tp_version_tag;
cache->type_version = cls->tp_version_tag;
int version = _PyFunction_GetVersionForCurrentState(func);
if (version == 0 || version != (uint16_t)version) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS);
goto fail;
}
cache0->index = version;
cache[-1].obj.obj = descriptor;
cache->func_version = version;
_PyQuickenedSetObject(code->co_firstinstr, cache->object, descriptor);
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr));
goto success;
}
Expand All @@ -1389,12 +1411,12 @@ _Py_Specialize_BinarySubscr(
fail:
STAT_INC(BINARY_SUBSCR, failure);
assert(!PyErr_Occurred());
cache_backoff(cache0);
cache->counter = ADAPTIVE_CACHE_BACKOFF;
return 0;
success:
STAT_INC(BINARY_SUBSCR, success);
assert(!PyErr_Occurred());
cache0->counter = initial_counter_value();
cache->counter = initial_counter_value();
return 0;
}

Expand Down