Skip to content

Commit a2cd2a6

Browse files
author
loewis
committed
Patch #510695: Add TSC profiling for the VM.
git-svn-id: http://svn.python.org/projects/python/trunk@36122 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 277616e commit a2cd2a6

10 files changed

Lines changed: 243 additions & 3 deletions

File tree

Doc/lib/libsys.tex

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,16 @@ \section{\module{sys} ---
475475
debugged.
476476
\end{funcdesc}
477477

478+
\begin{funcdesc}{settscdump}{on_flag}
479+
Activate dumping of VM measurements using the Pentium timestamp
480+
counter, if \var{on_flag} is true. Deactivate these dumps if
481+
\var{on_flag} is off. The function is available only if Python
482+
was compiled with \code{--with-tsc}. To understand the output
483+
of this dump, read \code{Python/ceval.c}.
484+
485+
\versionadded{2.4}
486+
\end{funcdesc}
487+
478488
\begin{datadesc}{stdin}
479489
\dataline{stdout}
480490
\dataline{stderr}

Include/pystate.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ typedef struct _is {
2929
#ifdef HAVE_DLOPEN
3030
int dlopenflags;
3131
#endif
32+
#ifdef WITH_TSC
33+
int tscdump;
34+
#endif
3235

3336
} PyInterpreterState;
3437

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,9 @@ Tools/Demos
499499
Build
500500
-----
501501

502+
- Profiling the VM using the Pentium TSC is now possible if Python
503+
is configured --with-tsc.
504+
502505
- In order to find libraries, setup.py now also looks in /lib64, for use
503506
on AMD64.
504507

Python/ceval.c

Lines changed: 150 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,26 @@
1717

1818
#include <ctype.h>
1919

20+
#ifdef WITH_TSC
21+
#include <asm/msr.h>
22+
23+
typedef unsigned long long uint64;
24+
25+
void dump_tsc(int opcode, int ticked, uint64 inst0, uint64 inst1,
26+
uint64 loop0, uint64 loop1, uint64 intr0, uint64 intr1)
27+
{
28+
uint64 intr, inst, loop;
29+
PyThreadState *tstate = PyThreadState_Get();
30+
if (!tstate->interp->tscdump)
31+
return;
32+
intr = intr1 - intr0;
33+
inst = inst1 - inst0 - intr;
34+
loop = loop1 - loop0 - intr;
35+
fprintf(stderr, "opcode=%03d t=%d inst=%06lld loop=%06lld\n",
36+
opcode, ticked, inst, loop);
37+
}
38+
#endif
39+
2040
/* Turn this on if your compiler chokes on the big switch: */
2141
/* #define CASE_TOO_BIG 1 */
2242

@@ -30,7 +50,11 @@ typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *);
3050

3151
/* Forward declarations */
3252
static PyObject *eval_frame(PyFrameObject *);
53+
#ifdef WITH_TSC
54+
static PyObject *call_function(PyObject ***, int, uint64*, uint64*);
55+
#else
3356
static PyObject *call_function(PyObject ***, int);
57+
#endif
3458
static PyObject *fast_function(PyObject *, PyObject ***, int, int, int);
3559
static PyObject *do_call(PyObject *, PyObject ***, int, int);
3660
static PyObject *ext_do_call(PyObject *, PyObject ***, int, int, int);
@@ -485,6 +509,44 @@ eval_frame(PyFrameObject *f)
485509
#define GETITEM(v, i) PyTuple_GetItem((v), (i))
486510
#endif
487511

512+
#ifdef WITH_TSC
513+
/* Use Pentium timestamp counter to mark certain events:
514+
inst0 -- beginning of switch statement for opcode dispatch
515+
inst1 -- end of switch statement (may be skipped)
516+
loop0 -- the top of the mainloop
517+
loop1 -- place where control returns again to top of mainloop
518+
(may be skipped)
519+
intr1 -- beginning of long interruption
520+
intr2 -- end of long interruption
521+
522+
Many opcodes call out to helper C functions. In some cases, the
523+
time in those functions should be counted towards the time for the
524+
opcode, but not in all cases. For example, a CALL_FUNCTION opcode
525+
calls another Python function; there's no point in charge all the
526+
bytecode executed by the called function to the caller.
527+
528+
It's hard to make a useful judgement statically. In the presence
529+
of operator overloading, it's impossible to tell if a call will
530+
execute new Python code or not.
531+
532+
It's a case-by-case judgement. I'll use intr1 for the following
533+
cases:
534+
535+
EXEC_STMT
536+
IMPORT_STAR
537+
IMPORT_FROM
538+
CALL_FUNCTION (and friends)
539+
540+
*/
541+
uint64 inst0, inst1, loop0, loop1, intr0 = 0, intr1 = 0;
542+
int ticked = 0;
543+
544+
rdtscll(inst0);
545+
rdtscll(inst1);
546+
rdtscll(loop0);
547+
rdtscll(loop1);
548+
#endif
549+
488550
/* Code access macros */
489551

490552
#define INSTR_OFFSET() (next_instr - first_instr)
@@ -643,6 +705,23 @@ eval_frame(PyFrameObject *f)
643705
w = NULL;
644706

645707
for (;;) {
708+
#ifdef WITH_TSC
709+
if (inst1 == 0) {
710+
/* Almost surely, the opcode executed a break
711+
or a continue, preventing inst1 from being set
712+
on the way out of the loop.
713+
*/
714+
rdtscll(inst1);
715+
loop1 = inst1;
716+
}
717+
dump_tsc(opcode, ticked, inst0, inst1, loop0, loop1,
718+
intr0, intr1);
719+
ticked = 0;
720+
inst1 = 0;
721+
intr0 = 0;
722+
intr1 = 0;
723+
rdtscll(loop0);
724+
#endif
646725
assert(stack_pointer >= f->f_valuestack); /* else underflow */
647726
assert(STACK_LEVEL() <= f->f_stacksize); /* else overflow */
648727

@@ -662,6 +741,9 @@ eval_frame(PyFrameObject *f)
662741
}
663742
_Py_Ticker = _Py_CheckInterval;
664743
tstate->tick_counter++;
744+
#ifdef WITH_TSC
745+
ticked = 1;
746+
#endif
665747
if (things_to_do) {
666748
if (Py_MakePendingCalls() < 0) {
667749
why = WHY_EXCEPTION;
@@ -752,6 +834,9 @@ eval_frame(PyFrameObject *f)
752834
#endif
753835

754836
/* Main switch on opcode */
837+
#ifdef WITH_TSC
838+
rdtscll(inst0);
839+
#endif
755840

756841
switch (opcode) {
757842

@@ -1493,7 +1578,13 @@ eval_frame(PyFrameObject *f)
14931578
v = SECOND();
14941579
u = THIRD();
14951580
STACKADJ(-3);
1581+
#ifdef WITH_TSC
1582+
rdtscll(intr0);
1583+
#endif
14961584
err = exec_statement(f, u, v, w);
1585+
#ifdef WITH_TSC
1586+
rdtscll(intr1);
1587+
#endif
14971588
Py_DECREF(u);
14981589
Py_DECREF(v);
14991590
Py_DECREF(w);
@@ -1855,7 +1946,13 @@ eval_frame(PyFrameObject *f)
18551946
x = NULL;
18561947
break;
18571948
}
1949+
#ifdef WITH_TSC
1950+
rdtscll(intr0);
1951+
#endif
18581952
x = PyEval_CallObject(x, w);
1953+
#ifdef WITH_TSC
1954+
rdtscll(intr1);
1955+
#endif
18591956
Py_DECREF(w);
18601957
SET_TOP(x);
18611958
if (x != NULL) continue;
@@ -1869,7 +1966,13 @@ eval_frame(PyFrameObject *f)
18691966
"no locals found during 'import *'");
18701967
break;
18711968
}
1969+
#ifdef WITH_TSC
1970+
rdtscll(intr0);
1971+
#endif
18721972
err = import_all_from(x, v);
1973+
#ifdef WITH_TSC
1974+
rdtscll(intr1);
1975+
#endif
18731976
PyFrame_LocalsToFast(f, 0);
18741977
Py_DECREF(v);
18751978
if (err == 0) continue;
@@ -1878,7 +1981,13 @@ eval_frame(PyFrameObject *f)
18781981
case IMPORT_FROM:
18791982
w = GETITEM(names, oparg);
18801983
v = TOP();
1984+
#ifdef WITH_TSC
1985+
rdtscll(intr0);
1986+
#endif
18811987
x = import_from(v, w);
1988+
#ifdef WITH_TSC
1989+
rdtscll(intr1);
1990+
#endif
18821991
PUSH(x);
18831992
if (x != NULL) continue;
18841993
break;
@@ -1987,7 +2096,11 @@ eval_frame(PyFrameObject *f)
19872096

19882097
case CALL_FUNCTION:
19892098
PCALL(PCALL_ALL);
2099+
#ifdef WITH_TSC
2100+
x = call_function(&stack_pointer, oparg, &intr0, &intr1);
2101+
#else
19902102
x = call_function(&stack_pointer, oparg);
2103+
#endif
19912104
PUSH(x);
19922105
if (x != NULL)
19932106
continue;
@@ -2022,7 +2135,13 @@ eval_frame(PyFrameObject *f)
20222135
n++;
20232136
} else
20242137
Py_INCREF(func);
2138+
#ifdef WITH_TSC
2139+
rdtscll(intr0);
2140+
#endif
20252141
x = ext_do_call(func, &stack_pointer, flags, na, nk);
2142+
#ifdef WITH_TSC
2143+
rdtscll(intr1);
2144+
#endif
20262145
Py_DECREF(func);
20272146

20282147
while (stack_pointer > pfunc) {
@@ -2134,6 +2253,10 @@ eval_frame(PyFrameObject *f)
21342253

21352254
on_error:
21362255

2256+
#ifdef WITH_TSC
2257+
rdtscll(inst1);
2258+
#endif
2259+
21372260
/* Quickly continue if no error occurred */
21382261

21392262
if (why == WHY_NOT) {
@@ -2143,9 +2266,15 @@ eval_frame(PyFrameObject *f)
21432266
if (PyErr_Occurred())
21442267
fprintf(stderr,
21452268
"XXX undetected error\n");
2146-
else
2269+
else {
2270+
#endif
2271+
#ifdef WITH_TSC
2272+
rdtscll(loop1);
21472273
#endif
21482274
continue; /* Normal, fast path */
2275+
#ifdef CHECKEXC
2276+
}
2277+
#endif
21492278
}
21502279
why = WHY_EXCEPTION;
21512280
x = Py_None;
@@ -2260,6 +2389,9 @@ eval_frame(PyFrameObject *f)
22602389

22612390
if (why != WHY_NOT)
22622391
break;
2392+
#ifdef WITH_TSC
2393+
rdtscll(loop1);
2394+
#endif
22632395

22642396
} /* main loop */
22652397

@@ -3331,7 +3463,11 @@ if (tstate->use_tracing) { \
33313463

33323464

33333465
static PyObject *
3334-
call_function(PyObject ***pp_stack, int oparg)
3466+
call_function(PyObject ***pp_stack, int oparg
3467+
#ifdef WITH_TSC
3468+
, uint64* pintr0, uint64* pintr1
3469+
#endif
3470+
)
33353471
{
33363472
int na = oparg & 0xff;
33373473
int nk = (oparg>>8) & 0xff;
@@ -3374,7 +3510,13 @@ call_function(PyObject ***pp_stack, int oparg)
33743510
PyObject *callargs;
33753511
callargs = load_args(pp_stack, na);
33763512
BEGIN_C_TRACE
3513+
#ifdef WITH_TSC
3514+
rdtscll(*pintr0);
3515+
#endif
33773516
x = PyCFunction_Call(func, callargs, NULL);
3517+
#ifdef WITH_TSC
3518+
rdtscll(*pintr1);
3519+
#endif
33783520
END_C_TRACE
33793521
Py_XDECREF(callargs);
33803522
}
@@ -3393,10 +3535,16 @@ call_function(PyObject ***pp_stack, int oparg)
33933535
n++;
33943536
} else
33953537
Py_INCREF(func);
3538+
#ifdef WITH_TSC
3539+
rdtscll(*pintr0);
3540+
#endif
33963541
if (PyFunction_Check(func))
33973542
x = fast_function(func, pp_stack, n, na, nk);
33983543
else
33993544
x = do_call(func, pp_stack, na, nk);
3545+
#ifdef WITH_TSC
3546+
rdtscll(*pintr1);
3547+
#endif
34003548
Py_DECREF(func);
34013549
}
34023550

Python/pystate.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ PyInterpreterState_New(void)
5858
#else
5959
interp->dlopenflags = RTLD_LAZY;
6060
#endif
61+
#endif
62+
#ifdef WITH_TSC
63+
interp->tscdump = 0;
6164
#endif
6265

6366
HEAD_LOCK();

Python/sysmodule.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,33 @@ PyDoc_STRVAR(getcheckinterval_doc,
442442
"getcheckinterval() -> current check interval; see setcheckinterval()."
443443
);
444444

445+
#ifdef WITH_TSC
446+
static PyObject *
447+
sys_settscdump(PyObject *self, PyObject *args)
448+
{
449+
int bool;
450+
PyThreadState *tstate = PyThreadState_Get();
451+
452+
if (!PyArg_ParseTuple(args, "i:settscdump", &bool))
453+
return NULL;
454+
if (bool)
455+
tstate->interp->tscdump = 1;
456+
else
457+
tstate->interp->tscdump = 0;
458+
Py_INCREF(Py_None);
459+
return Py_None;
460+
461+
}
462+
463+
PyDoc_STRVAR(settscdump_doc,
464+
"settscdump(bool)\n\
465+
\n\
466+
If true, tell the Python interpreter to dump VM measurements to\n\
467+
stderr. If false, turn off dump. The measurements are based on the\n\
468+
Pentium time-stamp counter."
469+
);
470+
#endif TSC
471+
445472
static PyObject *
446473
sys_setrecursionlimit(PyObject *self, PyObject *args)
447474
{
@@ -743,6 +770,9 @@ static PyMethodDef sys_methods[] = {
743770
{"setprofile", sys_setprofile, METH_O, setprofile_doc},
744771
{"setrecursionlimit", sys_setrecursionlimit, METH_VARARGS,
745772
setrecursionlimit_doc},
773+
#ifdef WITH_TSC
774+
{"settscdump", sys_settscdump, METH_VARARGS, settscdump_doc},
775+
#endif
746776
{"settrace", sys_settrace, METH_O, settrace_doc},
747777
{"call_tracing", sys_call_tracing, METH_VARARGS, call_tracing_doc},
748778
{NULL, NULL} /* sentinel */

README

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,7 @@ Modules/getpath.o.
10261026
in files. Python code can open a file with open(file, 'U') to
10271027
read it in universal newline mode. THIS OPTION IS UNSUPPORTED.
10281028

1029+
--with-tsc: Profile using the Pentium timestamping counter (TSC).
10291030

10301031
Building for multiple architectures (using the VPATH feature)
10311032
-------------------------------------------------------------

0 commit comments

Comments
 (0)