@@ -87,6 +87,62 @@ static long dxp[256];
8787#endif
8888#endif
8989
90+ /* Function call profile */
91+ #ifdef CALL_PROFILE
92+ #define PCALL_NUM 11
93+ static int pcall [PCALL_NUM ];
94+
95+ #define PCALL_ALL 0
96+ #define PCALL_FUNCTION 1
97+ #define PCALL_FAST_FUNCTION 2
98+ #define PCALL_FASTER_FUNCTION 3
99+ #define PCALL_METHOD 4
100+ #define PCALL_BOUND_METHOD 5
101+ #define PCALL_CFUNCTION 6
102+ #define PCALL_TYPE 7
103+ #define PCALL_GENERATOR 8
104+ #define PCALL_OTHER 9
105+ #define PCALL_POP 10
106+
107+ /* Notes about the statistics
108+
109+ PCALL_FAST stats
110+
111+ FAST_FUNCTION means no argument tuple needs to be created.
112+ FASTER_FUNCTION means that the fast-path frame setup code is used.
113+
114+ If there is a method call where the call can be optimized by changing
115+ the argument tuple and calling the function directly, it gets recorded
116+ twice.
117+
118+ As a result, the relationship among the statistics appears to be
119+ PCALL_ALL == PCALL_FUNCTION + PCALL_METHOD - PCALL_BOUND_METHOD +
120+ PCALL_CFUNCTION + PCALL_TYPE + PCALL_GENERATOR + PCALL_OTHER
121+ PCALL_FUNCTION > PCALL_FAST_FUNCTION > PCALL_FASTER_FUNCTION
122+ PCALL_METHOD > PCALL_BOUND_METHOD
123+ */
124+
125+ #define PCALL (POS ) pcall[POS]++
126+
127+ PyObject *
128+ PyEval_GetCallStats (PyObject * self )
129+ {
130+ return Py_BuildValue ("iiiiiiiiii" ,
131+ pcall [0 ], pcall [1 ], pcall [2 ], pcall [3 ],
132+ pcall [4 ], pcall [5 ], pcall [6 ], pcall [7 ],
133+ pcall [8 ], pcall [9 ]);
134+ }
135+ #else
136+ #define PCALL (O )
137+
138+ PyObject *
139+ PyEval_GetCallStats (PyObject * self )
140+ {
141+ Py_INCREF (Py_None );
142+ return Py_None ;
143+ }
144+ #endif
145+
90146static PyTypeObject gentype ;
91147
92148typedef struct {
@@ -475,6 +531,7 @@ volatile int _Py_Ticker = 100;
475531PyObject *
476532PyEval_EvalCode (PyCodeObject * co , PyObject * globals , PyObject * locals )
477533{
534+ /* XXX raise SystemError if globals is NULL */
478535 return PyEval_EvalCodeEx (co ,
479536 globals , locals ,
480537 (PyObject * * )NULL , 0 ,
@@ -1980,6 +2037,7 @@ eval_frame(PyFrameObject *f)
19802037 continue ;
19812038
19822039 case CALL_FUNCTION :
2040+ PCALL (PCALL_ALL );
19832041 x = call_function (& stack_pointer , oparg );
19842042 PUSH (x );
19852043 if (x != NULL )
@@ -1995,6 +2053,7 @@ eval_frame(PyFrameObject *f)
19952053 int flags = (opcode - CALL_FUNCTION ) & 3 ;
19962054 int n = na + 2 * nk ;
19972055 PyObject * * pfunc , * func ;
2056+ PCALL (PCALL_ALL );
19982057 if (flags & CALL_FLAG_VAR )
19992058 n ++ ;
20002059 if (flags & CALL_FLAG_KW )
@@ -2317,9 +2376,8 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
23172376 return NULL ;
23182377 }
23192378
2320- f = PyFrame_New (tstate , /*back*/
2321- co , /*code*/
2322- globals , locals );
2379+ assert (globals != NULL );
2380+ f = PyFrame_New (tstate , co , globals , locals );
23232381 if (f == NULL )
23242382 return NULL ;
23252383
@@ -2520,6 +2578,8 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
25202578 Py_XDECREF (f -> f_back );
25212579 f -> f_back = NULL ;
25222580
2581+ PCALL (PCALL_GENERATOR );
2582+
25232583 /* Create a new generator that owns the ready to run frame
25242584 * and return that as the value. */
25252585 return gen_new (f );
@@ -3198,12 +3258,12 @@ call_function(PyObject ***pp_stack, int oparg)
31983258 PyObject * func = * pfunc ;
31993259 PyObject * x , * w ;
32003260
3201- /* Always dispatch PyCFunction first, because
3202- these are presumed to be the most frequent
3203- callable object.
3261+ /* Always dispatch PyCFunction first, because these are
3262+ presumed to be the most frequent callable object.
32043263 */
32053264 if (PyCFunction_Check (func ) && nk == 0 ) {
32063265 int flags = PyCFunction_GET_FLAGS (func );
3266+ PCALL (PCALL_CFUNCTION );
32073267 if (flags & (METH_NOARGS | METH_O )) {
32083268 PyCFunction meth = PyCFunction_GET_FUNCTION (func );
32093269 PyObject * self = PyCFunction_GET_SELF (func );
@@ -3229,6 +3289,8 @@ call_function(PyObject ***pp_stack, int oparg)
32293289 if (PyMethod_Check (func ) && PyMethod_GET_SELF (func ) != NULL ) {
32303290 /* optimize access to bound methods */
32313291 PyObject * self = PyMethod_GET_SELF (func );
3292+ PCALL (PCALL_METHOD );
3293+ PCALL (PCALL_BOUND_METHOD );
32323294 Py_INCREF (self );
32333295 func = PyMethod_GET_FUNCTION (func );
32343296 Py_INCREF (func );
@@ -3245,35 +3307,75 @@ call_function(PyObject ***pp_stack, int oparg)
32453307 Py_DECREF (func );
32463308 }
32473309
3310+ /* What does this do? */
32483311 while ((* pp_stack ) > pfunc ) {
32493312 w = EXT_POP (* pp_stack );
32503313 Py_DECREF (w );
3314+ PCALL (PCALL_POP );
32513315 }
32523316 return x ;
32533317}
32543318
32553319/* The fast_function() function optimize calls for which no argument
32563320 tuple is necessary; the objects are passed directly from the stack.
3321+ For the simplest case -- a function that takes only positional
3322+ arguments and is called with only positional arguments -- it
3323+ inlines the most primitive frame setup code from
3324+ PyEval_EvalCodeEx(), which vastly reduces the checks that must be
3325+ done before evaluating the frame.
32573326*/
32583327
32593328static PyObject *
32603329fast_function (PyObject * func , PyObject * * * pp_stack , int n , int na , int nk )
32613330{
3262- PyObject * co = PyFunction_GET_CODE (func );
3331+ PyCodeObject * co = ( PyCodeObject * ) PyFunction_GET_CODE (func );
32633332 PyObject * globals = PyFunction_GET_GLOBALS (func );
32643333 PyObject * argdefs = PyFunction_GET_DEFAULTS (func );
3265- PyObject * closure = PyFunction_GET_CLOSURE (func );
32663334 PyObject * * d = NULL ;
32673335 int nd = 0 ;
32683336
3337+ PCALL (PCALL_FUNCTION );
3338+ PCALL (PCALL_FAST_FUNCTION );
3339+ if (argdefs == NULL && co -> co_argcount == n &&
3340+ co -> co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE )) {
3341+ PyFrameObject * f ;
3342+ PyObject * retval = NULL ;
3343+ PyThreadState * tstate = PyThreadState_GET ();
3344+ PyObject * * fastlocals , * * stack ;
3345+ int i ;
3346+
3347+ PCALL (PCALL_FASTER_FUNCTION );
3348+ assert (globals != NULL );
3349+ /* XXX Perhaps we should create a specialized
3350+ PyFrame_New() that doesn't take locals, but does
3351+ take builtins without sanity checking them.
3352+ */
3353+ f = PyFrame_New (tstate , co , globals , NULL );
3354+ if (f == NULL )
3355+ return NULL ;
3356+
3357+ fastlocals = f -> f_localsplus ;
3358+ stack = (* pp_stack ) - n ;
3359+
3360+ for (i = 0 ; i < n ; i ++ ) {
3361+ Py_INCREF (* stack );
3362+ fastlocals [i ] = * stack ++ ;
3363+ }
3364+ retval = eval_frame (f );
3365+ assert (tstate != NULL );
3366+ ++ tstate -> recursion_depth ;
3367+ Py_DECREF (f );
3368+ -- tstate -> recursion_depth ;
3369+ return retval ;
3370+ }
32693371 if (argdefs != NULL ) {
32703372 d = & PyTuple_GET_ITEM (argdefs , 0 );
32713373 nd = ((PyTupleObject * )argdefs )-> ob_size ;
32723374 }
3273- return PyEval_EvalCodeEx (( PyCodeObject * ) co , globals ,
3274- (PyObject * )NULL , (* pp_stack )- n , na ,
3275- (* pp_stack )- 2 * nk , nk , d , nd ,
3276- closure );
3375+ return PyEval_EvalCodeEx (co , globals ,
3376+ (PyObject * )NULL , (* pp_stack )- n , na ,
3377+ (* pp_stack )- 2 * nk , nk , d , nd ,
3378+ PyFunction_GET_CLOSURE ( func ) );
32773379}
32783380
32793381static PyObject *
@@ -3371,6 +3473,20 @@ do_call(PyObject *func, PyObject ***pp_stack, int na, int nk)
33713473 callargs = load_args (pp_stack , na );
33723474 if (callargs == NULL )
33733475 goto call_fail ;
3476+ #ifdef CALL_PROFILE
3477+ /* At this point, we have to look at the type of func to
3478+ update the call stats properly. Do it here so as to avoid
3479+ exposing the call stats machinery outside ceval.c
3480+ */
3481+ if (PyFunction_Check (func ))
3482+ PCALL (PCALL_FUNCTION );
3483+ else if (PyMethod_Check (func ))
3484+ PCALL (PCALL_METHOD );
3485+ else if (PyType_Check (func ))
3486+ PCALL (PCALL_TYPE );
3487+ else
3488+ PCALL (PCALL_OTHER );
3489+ #endif
33743490 result = PyObject_Call (func , callargs , kwdict );
33753491 call_fail :
33763492 Py_XDECREF (callargs );
@@ -3426,6 +3542,20 @@ ext_do_call(PyObject *func, PyObject ***pp_stack, int flags, int na, int nk)
34263542 callargs = update_star_args (na , nstar , stararg , pp_stack );
34273543 if (callargs == NULL )
34283544 goto ext_call_fail ;
3545+ #ifdef CALL_PROFILE
3546+ /* At this point, we have to look at the type of func to
3547+ update the call stats properly. Do it here so as to avoid
3548+ exposing the call stats machinery outside ceval.c
3549+ */
3550+ if (PyFunction_Check (func ))
3551+ PCALL (PCALL_FUNCTION );
3552+ else if (PyMethod_Check (func ))
3553+ PCALL (PCALL_METHOD );
3554+ else if (PyType_Check (func ))
3555+ PCALL (PCALL_TYPE );
3556+ else
3557+ PCALL (PCALL_OTHER );
3558+ #endif
34293559 result = PyObject_Call (func , callargs , kwdict );
34303560 ext_call_fail :
34313561 Py_XDECREF (callargs );
0 commit comments