Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Handle default arguments in CALL_FUNCTION_PY_SIMPLE.
  • Loading branch information
markshannon committed Oct 18, 2021
commit 5dbedd8a8c640999ce6e6a0b816cb4cc0f74093f
8 changes: 7 additions & 1 deletion Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ typedef struct {
uint8_t original_oparg;
uint8_t counter;
uint16_t index;
uint32_t version;
} _PyAdaptiveEntry;


Expand All @@ -36,6 +35,12 @@ typedef struct {
PyObject *obj;
} _PyObjectCache;

typedef struct {
uint32_t func_version;
uint16_t defaults_start;
uint16_t defaults_len;
} _PyCallCache;

/* Add specialized versions of entries to this union.
*
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
Expand All @@ -52,6 +57,7 @@ typedef union {
_PyAttrCache attr;
_PyLoadGlobalCache load_global;
_PyObjectCache obj;
_PyCallCache call;
} SpecializedCacheEntry;

#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
Expand Down
9 changes: 8 additions & 1 deletion Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -4682,10 +4682,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
int argcount = cache0->original_oparg;
_PyCallCache *cache1 = &caches[-1].call;
PyObject *callable = PEEK(argcount+1);
DEOPT_IF(!PyFunction_Check(callable), CALL_FUNCTION);
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != cache0->version, CALL_FUNCTION);
DEOPT_IF(func->func_version != cache1->func_version, CALL_FUNCTION);
/* PEP 523 */
DEOPT_IF(tstate->interp->eval_frame != NULL, CALL_FUNCTION);
STAT_INC(CALL_FUNCTION, hit);
Expand All @@ -4700,6 +4701,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
new_frame->localsplus[i] = stack_pointer[i];
}
STACK_SHRINK(1);
int deflen = cache1->defaults_len;
for (int i = 0; i < deflen; i++) {
PyObject *def = PyTuple_GET_ITEM(func->func_defaults, cache1->defaults_start+i);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if the compiler is smart enough to realize that func->func_defaults and cache1->defaults_start doesn't change between iterations? I'd say probably not. Perhaps we should micro-optimize and move this out of the loop?

Copy link
Copy Markdown
Member Author

@markshannon markshannon Oct 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The common case is that deflen == 0. In which case moving it out of loop could make it worse.

Py_INCREF(def);
new_frame->localsplus[argcount+i] = def;
}
Py_DECREF(func);
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = tstate->frame;
Expand Down
31 changes: 23 additions & 8 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ static uint8_t cache_requirements[256] = {
[BINARY_ADD] = 0,
[BINARY_SUBSCR] = 0,
[STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
[CALL_FUNCTION] = 1 /* _PyAdaptiveEntry */
[CALL_FUNCTION] = 2 /* _PyAdaptiveEntry and _PyCallCache */
};

/* Return the oparg for the cache_offset and instruction index.
Expand Down Expand Up @@ -1225,6 +1225,7 @@ specialize_py_call(
PyFunctionObject *func, _Py_CODEUNIT *instr,
int nargs, SpecializedCacheEntry *cache)
{
_PyCallCache *cache1 = &cache[-1].call;
/* Exclude generator or coroutines for now */
PyCodeObject *code = (PyCodeObject *)func->func_code;
int flags = code->co_flags;
Expand All @@ -1236,10 +1237,6 @@ specialize_py_call(
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_COMPLEX_PARAMETERS);
return -1;
}
if (code->co_argcount != nargs) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
return -1;
}
if ((flags & CO_OPTIMIZED) == 0) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CO_NOT_OPTIMIZED);
return -1;
Expand All @@ -1248,13 +1245,31 @@ specialize_py_call(
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_FREE_VARS);
return -1;
}
_PyAdaptiveEntry *cache0 = &cache->adaptive;
int argcount = code->co_argcount;
int defcount = func->func_defaults == NULL ? 0 : (int)PyTuple_GET_SIZE(func->func_defaults);
assert(defcount <= argcount);
int min_args = argcount-defcount;
if (nargs > argcount || nargs < min_args) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
return -1;
}
assert(nargs <= argcount && nargs >= min_args);
int defstart = nargs - min_args;
int deflen = argcount - nargs;
assert(defstart >= 0 && deflen >= 0);
assert(deflen == 0 || func->func_defaults != NULL);
if (defstart > 0xffff || deflen > 0xffff) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_RANGE);
return -1;
}
int version = _PyFunction_GetVersionForCurrentState(func);
if (version == 0) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_VERSIONS);
return -1;
}
cache0->version = version;
cache1->func_version = version;
cache1->defaults_start = defstart;
cache1->defaults_len = deflen;
*instr = _Py_MAKECODEUNIT(CALL_FUNCTION_PY_SIMPLE, _Py_OPARG(*instr));
return 0;
}
Expand All @@ -1265,7 +1280,6 @@ _Py_Specialize_CallFunction(
PyObject *callable, _Py_CODEUNIT *instr,
int nargs, SpecializedCacheEntry *cache)
{
_PyAdaptiveEntry *cache0 = &cache->adaptive;
int fail;
if (PyCFunction_CheckExact(callable)) {
fail = specialize_c_call(callable, instr, nargs, cache);
Expand All @@ -1280,6 +1294,7 @@ _Py_Specialize_CallFunction(
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OTHER);
fail = -1;
Copy link
Copy Markdown
Member

@Fidget-Spinner Fidget-Spinner Oct 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I set specialize_c_call to return 1 to indicate failure. You may have to change the return codes in specialize_c_call for consistency. Sorry!

Copy link
Copy Markdown
Member Author

@markshannon markshannon Oct 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably add an enum {SUCCESS, FAILURE} for clarity.

I wouldn't worry too much about it for now, we do much worse elsewhere.
compile.c uses zero for success and non-zero for failure in some places, and the other way around in other places 😞

}
_PyAdaptiveEntry *cache0 = &cache->adaptive;
if (fail) {
STAT_INC(CALL_FUNCTION, specialization_failure);
assert(!PyErr_Occurred());
Expand Down