Skip to content

Commit 5a428c6

Browse files
author
jhylton
committed
Small function call optimization and special build option for call stats.
-DCALL_PROFILE: Count the number of function calls executed. When this symbol is defined, the ceval mainloop and helper functions count the number of function calls made. It keeps detailed statistics about what kind of object was called and whether the call hit any of the special fast paths in the code. Optimization: When we take the fast_function() path, which seems to be taken for most function calls, and there is minimal frame setup to do, avoid call PyEval_EvalCodeEx(). The eval code ex function does a lot of work to handle keywords args and star args, free variables, generators, etc. The inlined version simply allocates the frame and copies the arguments values into the frame. The optimization gets a little help from compile.c which adds a CO_NOFREE flag to code objects that don't have free variables or cell variables. This change allows fast_function() to get into the fast path with fewer tests. I measure a couple of percent speedup in pystone with this change, but there's surely more that can be done. git-svn-id: http://svn.python.org/projects/python/trunk@31241 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 5ac14e3 commit 5a428c6

6 files changed

Lines changed: 189 additions & 14 deletions

File tree

Include/ceval.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ PyAPI_FUNC(int) Py_GetRecursionLimit(void);
4848
PyAPI_FUNC(char *) PyEval_GetFuncName(PyObject *);
4949
PyAPI_FUNC(char *) PyEval_GetFuncDesc(PyObject *);
5050

51+
PyAPI_FUNC(PyObject *) PyEval_GetCallStats(PyObject *);
52+
5153
/* this used to be handled on a per-thread basis - now just two globals */
5254
PyAPI_DATA(volatile int) _Py_Ticker;
5355
PyAPI_DATA(int) _Py_CheckInterval;

Include/compile.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ typedef struct {
3434
#define CO_VARKEYWORDS 0x0008
3535
#define CO_NESTED 0x0010
3636
#define CO_GENERATOR 0x0020
37+
/* The CO_NOFREE flag is set if there are no free or cell variables.
38+
This information is redundant, but it allows a single flag test
39+
to determine whether there is any extra work to be done when the
40+
call frame it setup.
41+
*/
42+
#define CO_NOFREE 0x0040
3743
/* XXX Temporary hack. Until generators are a permanent part of the
3844
language, we need a way for a code object to record that generators
3945
were *possible* when it was compiled. This is so code dynamically

Misc/SpecialBuilds.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,13 @@ sprayed to stdout, such as every opcode and opcode argument and values
199199
pushed onto and popped off the value stack.
200200

201201
Not useful very often, but very useful when needed.
202+
203+
---------------------------------------------------------------------------
204+
CALL_PROFILE introduced for Python 2.3
205+
206+
Count the number of function calls executed.
207+
208+
When this symbol is defined, the ceval mainloop and helper functions
209+
count the number of function calls made. It keeps detailed statistics
210+
about what kind of object was called and whether the call hit any of
211+
the special fast paths in the code.

Python/ceval.c

Lines changed: 142 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,62 @@ static long dxp[256];
8787
#endif
8888
#endif
8989

90+
/* Function call profile */
91+
#ifdef CALL_PROFILE
92+
#define PCALL_NUM 11
93+
static int pcall[PCALL_NUM];
94+
95+
#define PCALL_ALL 0
96+
#define PCALL_FUNCTION 1
97+
#define PCALL_FAST_FUNCTION 2
98+
#define PCALL_FASTER_FUNCTION 3
99+
#define PCALL_METHOD 4
100+
#define PCALL_BOUND_METHOD 5
101+
#define PCALL_CFUNCTION 6
102+
#define PCALL_TYPE 7
103+
#define PCALL_GENERATOR 8
104+
#define PCALL_OTHER 9
105+
#define PCALL_POP 10
106+
107+
/* Notes about the statistics
108+
109+
PCALL_FAST stats
110+
111+
FAST_FUNCTION means no argument tuple needs to be created.
112+
FASTER_FUNCTION means that the fast-path frame setup code is used.
113+
114+
If there is a method call where the call can be optimized by changing
115+
the argument tuple and calling the function directly, it gets recorded
116+
twice.
117+
118+
As a result, the relationship among the statistics appears to be
119+
PCALL_ALL == PCALL_FUNCTION + PCALL_METHOD - PCALL_BOUND_METHOD +
120+
PCALL_CFUNCTION + PCALL_TYPE + PCALL_GENERATOR + PCALL_OTHER
121+
PCALL_FUNCTION > PCALL_FAST_FUNCTION > PCALL_FASTER_FUNCTION
122+
PCALL_METHOD > PCALL_BOUND_METHOD
123+
*/
124+
125+
#define PCALL(POS) pcall[POS]++
126+
127+
PyObject *
128+
PyEval_GetCallStats(PyObject *self)
129+
{
130+
return Py_BuildValue("iiiiiiiiii",
131+
pcall[0], pcall[1], pcall[2], pcall[3],
132+
pcall[4], pcall[5], pcall[6], pcall[7],
133+
pcall[8], pcall[9]);
134+
}
135+
#else
136+
#define PCALL(O)
137+
138+
PyObject *
139+
PyEval_GetCallStats(PyObject *self)
140+
{
141+
Py_INCREF(Py_None);
142+
return Py_None;
143+
}
144+
#endif
145+
90146
static PyTypeObject gentype;
91147

92148
typedef struct {
@@ -475,6 +531,7 @@ volatile int _Py_Ticker = 100;
475531
PyObject *
476532
PyEval_EvalCode(PyCodeObject *co, PyObject *globals, PyObject *locals)
477533
{
534+
/* XXX raise SystemError if globals is NULL */
478535
return PyEval_EvalCodeEx(co,
479536
globals, locals,
480537
(PyObject **)NULL, 0,
@@ -1980,6 +2037,7 @@ eval_frame(PyFrameObject *f)
19802037
continue;
19812038

19822039
case CALL_FUNCTION:
2040+
PCALL(PCALL_ALL);
19832041
x = call_function(&stack_pointer, oparg);
19842042
PUSH(x);
19852043
if (x != NULL)
@@ -1995,6 +2053,7 @@ eval_frame(PyFrameObject *f)
19952053
int flags = (opcode - CALL_FUNCTION) & 3;
19962054
int n = na + 2 * nk;
19972055
PyObject **pfunc, *func;
2056+
PCALL(PCALL_ALL);
19982057
if (flags & CALL_FLAG_VAR)
19992058
n++;
20002059
if (flags & CALL_FLAG_KW)
@@ -2317,9 +2376,8 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
23172376
return NULL;
23182377
}
23192378

2320-
f = PyFrame_New(tstate, /*back*/
2321-
co, /*code*/
2322-
globals, locals);
2379+
assert(globals != NULL);
2380+
f = PyFrame_New(tstate, co, globals, locals);
23232381
if (f == NULL)
23242382
return NULL;
23252383

@@ -2520,6 +2578,8 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
25202578
Py_XDECREF(f->f_back);
25212579
f->f_back = NULL;
25222580

2581+
PCALL(PCALL_GENERATOR);
2582+
25232583
/* Create a new generator that owns the ready to run frame
25242584
* and return that as the value. */
25252585
return gen_new(f);
@@ -3198,12 +3258,12 @@ call_function(PyObject ***pp_stack, int oparg)
31983258
PyObject *func = *pfunc;
31993259
PyObject *x, *w;
32003260

3201-
/* Always dispatch PyCFunction first, because
3202-
these are presumed to be the most frequent
3203-
callable object.
3261+
/* Always dispatch PyCFunction first, because these are
3262+
presumed to be the most frequent callable object.
32043263
*/
32053264
if (PyCFunction_Check(func) && nk == 0) {
32063265
int flags = PyCFunction_GET_FLAGS(func);
3266+
PCALL(PCALL_CFUNCTION);
32073267
if (flags & (METH_NOARGS | METH_O)) {
32083268
PyCFunction meth = PyCFunction_GET_FUNCTION(func);
32093269
PyObject *self = PyCFunction_GET_SELF(func);
@@ -3229,6 +3289,8 @@ call_function(PyObject ***pp_stack, int oparg)
32293289
if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) {
32303290
/* optimize access to bound methods */
32313291
PyObject *self = PyMethod_GET_SELF(func);
3292+
PCALL(PCALL_METHOD);
3293+
PCALL(PCALL_BOUND_METHOD);
32323294
Py_INCREF(self);
32333295
func = PyMethod_GET_FUNCTION(func);
32343296
Py_INCREF(func);
@@ -3245,35 +3307,75 @@ call_function(PyObject ***pp_stack, int oparg)
32453307
Py_DECREF(func);
32463308
}
32473309

3310+
/* What does this do? */
32483311
while ((*pp_stack) > pfunc) {
32493312
w = EXT_POP(*pp_stack);
32503313
Py_DECREF(w);
3314+
PCALL(PCALL_POP);
32513315
}
32523316
return x;
32533317
}
32543318

32553319
/* The fast_function() function optimize calls for which no argument
32563320
tuple is necessary; the objects are passed directly from the stack.
3321+
For the simplest case -- a function that takes only positional
3322+
arguments and is called with only positional arguments -- it
3323+
inlines the most primitive frame setup code from
3324+
PyEval_EvalCodeEx(), which vastly reduces the checks that must be
3325+
done before evaluating the frame.
32573326
*/
32583327

32593328
static PyObject *
32603329
fast_function(PyObject *func, PyObject ***pp_stack, int n, int na, int nk)
32613330
{
3262-
PyObject *co = PyFunction_GET_CODE(func);
3331+
PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
32633332
PyObject *globals = PyFunction_GET_GLOBALS(func);
32643333
PyObject *argdefs = PyFunction_GET_DEFAULTS(func);
3265-
PyObject *closure = PyFunction_GET_CLOSURE(func);
32663334
PyObject **d = NULL;
32673335
int nd = 0;
32683336

3337+
PCALL(PCALL_FUNCTION);
3338+
PCALL(PCALL_FAST_FUNCTION);
3339+
if (argdefs == NULL && co->co_argcount == n &&
3340+
co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) {
3341+
PyFrameObject *f;
3342+
PyObject *retval = NULL;
3343+
PyThreadState *tstate = PyThreadState_GET();
3344+
PyObject **fastlocals, **stack;
3345+
int i;
3346+
3347+
PCALL(PCALL_FASTER_FUNCTION);
3348+
assert(globals != NULL);
3349+
/* XXX Perhaps we should create a specialized
3350+
PyFrame_New() that doesn't take locals, but does
3351+
take builtins without sanity checking them.
3352+
*/
3353+
f = PyFrame_New(tstate, co, globals, NULL);
3354+
if (f == NULL)
3355+
return NULL;
3356+
3357+
fastlocals = f->f_localsplus;
3358+
stack = (*pp_stack) - n;
3359+
3360+
for (i = 0; i < n; i++) {
3361+
Py_INCREF(*stack);
3362+
fastlocals[i] = *stack++;
3363+
}
3364+
retval = eval_frame(f);
3365+
assert(tstate != NULL);
3366+
++tstate->recursion_depth;
3367+
Py_DECREF(f);
3368+
--tstate->recursion_depth;
3369+
return retval;
3370+
}
32693371
if (argdefs != NULL) {
32703372
d = &PyTuple_GET_ITEM(argdefs, 0);
32713373
nd = ((PyTupleObject *)argdefs)->ob_size;
32723374
}
3273-
return PyEval_EvalCodeEx((PyCodeObject *)co, globals,
3274-
(PyObject *)NULL, (*pp_stack)-n, na,
3275-
(*pp_stack)-2*nk, nk, d, nd,
3276-
closure);
3375+
return PyEval_EvalCodeEx(co, globals,
3376+
(PyObject *)NULL, (*pp_stack)-n, na,
3377+
(*pp_stack)-2*nk, nk, d, nd,
3378+
PyFunction_GET_CLOSURE(func));
32773379
}
32783380

32793381
static PyObject *
@@ -3371,6 +3473,20 @@ do_call(PyObject *func, PyObject ***pp_stack, int na, int nk)
33713473
callargs = load_args(pp_stack, na);
33723474
if (callargs == NULL)
33733475
goto call_fail;
3476+
#ifdef CALL_PROFILE
3477+
/* At this point, we have to look at the type of func to
3478+
update the call stats properly. Do it here so as to avoid
3479+
exposing the call stats machinery outside ceval.c
3480+
*/
3481+
if (PyFunction_Check(func))
3482+
PCALL(PCALL_FUNCTION);
3483+
else if (PyMethod_Check(func))
3484+
PCALL(PCALL_METHOD);
3485+
else if (PyType_Check(func))
3486+
PCALL(PCALL_TYPE);
3487+
else
3488+
PCALL(PCALL_OTHER);
3489+
#endif
33743490
result = PyObject_Call(func, callargs, kwdict);
33753491
call_fail:
33763492
Py_XDECREF(callargs);
@@ -3426,6 +3542,20 @@ ext_do_call(PyObject *func, PyObject ***pp_stack, int flags, int na, int nk)
34263542
callargs = update_star_args(na, nstar, stararg, pp_stack);
34273543
if (callargs == NULL)
34283544
goto ext_call_fail;
3545+
#ifdef CALL_PROFILE
3546+
/* At this point, we have to look at the type of func to
3547+
update the call stats properly. Do it here so as to avoid
3548+
exposing the call stats machinery outside ceval.c
3549+
*/
3550+
if (PyFunction_Check(func))
3551+
PCALL(PCALL_FUNCTION);
3552+
else if (PyMethod_Check(func))
3553+
PCALL(PCALL_METHOD);
3554+
else if (PyType_Check(func))
3555+
PCALL(PCALL_TYPE);
3556+
else
3557+
PCALL(PCALL_OTHER);
3558+
#endif
34293559
result = PyObject_Call(func, callargs, kwdict);
34303560
ext_call_fail:
34313561
Py_XDECREF(callargs);

Python/compile.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,9 @@ PyCode_New(int argcount, int nlocals, int stacksize, int flags,
385385
co->co_firstlineno = firstlineno;
386386
Py_INCREF(lnotab);
387387
co->co_lnotab = lnotab;
388+
if (PyTuple_GET_SIZE(freevars) == 0 &&
389+
PyTuple_GET_SIZE(cellvars) == 0)
390+
co->co_flags |= CO_NOFREE;
388391
}
389392
return co;
390393
}

Python/sysmodule.c

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,28 @@ sys_getframe(PyObject *self, PyObject *args)
562562
return (PyObject*)f;
563563
}
564564

565+
PyDoc_STRVAR(callstats_doc,
566+
"callstats() -> tuple of integers\n\
567+
\n\
568+
Return a tuple of function call statistics, if CALL_PROFILE was defined\n\
569+
when Python was built. Otherwise, return None.\n\
570+
\n\
571+
When enabled, this function returns detailed, implementation-specific\n\
572+
details about the number of function calls executed. The return value is\n\
573+
a 11-tuple where the entries in the tuple are counts of:\n\
574+
0. all function calls\n\
575+
1. calls to PyFunction_Type objects\n\
576+
2. PyFunction calls that do not create an argument tuple\n\
577+
3. PyFunction calls that do not create an argument tuple\n\
578+
and bypass PyEval_EvalCodeEx()\n\
579+
4. PyMethod calls\n\
580+
5. PyMethod calls on bound methods\n\
581+
6. PyType calls\n\
582+
7. PyCFunction calls\n\
583+
8. generator calls\n\
584+
9. All other calls\n\
585+
10. Number of stack pops performed by call_function()"
586+
);
565587

566588
#ifdef Py_TRACE_REFS
567589
/* Defined in objects.c because it uses static globals if that file */
@@ -575,13 +597,15 @@ extern PyObject *_Py_GetDXProfile(PyObject *, PyObject *);
575597

576598
static PyMethodDef sys_methods[] = {
577599
/* Might as well keep this in alphabetic order */
600+
{"callstats", (PyCFunction)PyEval_GetCallStats, METH_NOARGS,
601+
callstats_doc},
578602
{"displayhook", sys_displayhook, METH_O, displayhook_doc},
579603
{"exc_info", (PyCFunction)sys_exc_info, METH_NOARGS, exc_info_doc},
580604
{"excepthook", sys_excepthook, METH_VARARGS, excepthook_doc},
581605
{"exit", sys_exit, METH_VARARGS, exit_doc},
582606
#ifdef Py_USING_UNICODE
583-
{"getdefaultencoding", (PyCFunction)sys_getdefaultencoding, METH_NOARGS,
584-
getdefaultencoding_doc},
607+
{"getdefaultencoding", (PyCFunction)sys_getdefaultencoding,
608+
METH_NOARGS, getdefaultencoding_doc},
585609
#endif
586610
#ifdef HAVE_DLOPEN
587611
{"getdlopenflags", (PyCFunction)sys_getdlopenflags, METH_NOARGS,

0 commit comments

Comments
 (0)