Skip to content

Commit b6380da

Browse files
author
richard.jones
committed
Merge from rjones-funccall branch.
Applied patch zombie-frames-2.diff from sf patch 876206 with updates for Python 2.5 and also modified to retain the free_list to avoid the 67% slow-down in pybench recursion test. 5% speed up in function call pybench. git-svn-id: http://svn.python.org/projects/python/trunk@46096 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent c2ac3aa commit b6380da

4 files changed

Lines changed: 104 additions & 62 deletions

File tree

Include/code.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ typedef struct {
2424
PyObject *co_name; /* string (name, for reference) */
2525
int co_firstlineno; /* first source line number */
2626
PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) */
27+
void *co_zombieframe; /* for optimization only (see frameobject.c) */
2728
} PyCodeObject;
2829

2930
/* Masks for co_flags above */

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ What's New in Python 2.5 alpha 3?
1212
Core and builtins
1313
-----------------
1414

15+
- Patch #876206: function call speedup by retaining allocated frame
16+
objects.
17+
1518
- Bug #1462152: file() now checks more thoroughly for invalid mode
1619
strings and removes a possible "U" before passing the mode to the
1720
C library function.

Objects/codeobject.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ PyCode_New(int argcount, int nlocals, int stacksize, int flags,
102102
co->co_firstlineno = firstlineno;
103103
Py_INCREF(lnotab);
104104
co->co_lnotab = lnotab;
105+
co->co_zombieframe = NULL;
105106
}
106107
return co;
107108
}
@@ -265,6 +266,8 @@ code_dealloc(PyCodeObject *co)
265266
Py_XDECREF(co->co_filename);
266267
Py_XDECREF(co->co_name);
267268
Py_XDECREF(co->co_lnotab);
269+
if (co->co_zombieframe != NULL)
270+
PyObject_GC_Del(co->co_zombieframe);
268271
PyObject_DEL(co);
269272
}
270273

Objects/frameobject.c

Lines changed: 97 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -350,10 +350,31 @@ static PyGetSetDef frame_getsetlist[] = {
350350
};
351351

352352
/* Stack frames are allocated and deallocated at a considerable rate.
353-
In an attempt to improve the speed of function calls, we maintain a
354-
separate free list of stack frames (just like integers are
355-
allocated in a special way -- see intobject.c). When a stack frame
356-
is on the free list, only the following members have a meaning:
353+
In an attempt to improve the speed of function calls, we:
354+
355+
1. Hold a single "zombie" frame on each code object. This retains
356+
the allocated and initialised frame object from an invocation of
357+
the code object. The zombie is reanimated the next time we need a
358+
frame object for that code object. Doing this saves the malloc/
359+
realloc required when using a free_list frame that isn't the
360+
correct size. It also saves some field initialisation.
361+
362+
In zombie mode, no field of PyFrameObject holds a reference, but
363+
the following fields are still valid:
364+
365+
* ob_type, ob_size, f_code, f_valuestack,
366+
f_nlocals, f_ncells, f_nfreevars, f_stacksize;
367+
368+
* f_locals, f_trace,
369+
f_exc_type, f_exc_value, f_exc_traceback are NULL;
370+
371+
* f_localsplus does not require re-allocation and
372+
the local variables in f_localsplus are NULL.
373+
374+
2. We also maintain a separate free list of stack frames (just like
375+
integers are allocated in a special way -- see intobject.c). When
376+
a stack frame is on the free list, only the following members have
377+
a meaning:
357378
ob_type == &Frametype
358379
f_back next item on free list, or NULL
359380
f_nlocals number of locals
@@ -380,41 +401,43 @@ static int numfree = 0; /* number of frames currently in free_list */
380401
static void
381402
frame_dealloc(PyFrameObject *f)
382403
{
383-
int i, slots;
384-
PyObject **fastlocals;
385-
PyObject **p;
404+
PyObject **p, **valuestack;
405+
PyCodeObject *co;
386406

387407
PyObject_GC_UnTrack(f);
388408
Py_TRASHCAN_SAFE_BEGIN(f)
389409
/* Kill all local variables */
390-
slots = f->f_nlocals + f->f_ncells + f->f_nfreevars;
391-
fastlocals = f->f_localsplus;
392-
for (i = slots; --i >= 0; ++fastlocals) {
393-
Py_XDECREF(*fastlocals);
394-
}
410+
valuestack = f->f_valuestack;
411+
for (p = f->f_localsplus; p < valuestack; p++)
412+
Py_CLEAR(*p);
395413

396414
/* Free stack */
397415
if (f->f_stacktop != NULL) {
398-
for (p = f->f_valuestack; p < f->f_stacktop; p++)
416+
for (p = valuestack; p < f->f_stacktop; p++)
399417
Py_XDECREF(*p);
400418
}
401419

402420
Py_XDECREF(f->f_back);
403-
Py_DECREF(f->f_code);
404421
Py_DECREF(f->f_builtins);
405422
Py_DECREF(f->f_globals);
406-
Py_XDECREF(f->f_locals);
407-
Py_XDECREF(f->f_trace);
408-
Py_XDECREF(f->f_exc_type);
409-
Py_XDECREF(f->f_exc_value);
410-
Py_XDECREF(f->f_exc_traceback);
411-
if (numfree < MAXFREELIST) {
423+
Py_CLEAR(f->f_locals);
424+
Py_CLEAR(f->f_trace);
425+
Py_CLEAR(f->f_exc_type);
426+
Py_CLEAR(f->f_exc_value);
427+
Py_CLEAR(f->f_exc_traceback);
428+
429+
co = f->f_code;
430+
if (co != NULL && co->co_zombieframe == NULL)
431+
co->co_zombieframe = f;
432+
else if (numfree < MAXFREELIST) {
412433
++numfree;
413434
f->f_back = free_list;
414435
free_list = f;
415-
}
416-
else
436+
}
437+
else
417438
PyObject_GC_Del(f);
439+
440+
Py_XDECREF(co);
418441
Py_TRASHCAN_SAFE_END(f)
419442
}
420443

@@ -532,7 +555,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
532555
PyFrameObject *back = tstate->frame;
533556
PyFrameObject *f;
534557
PyObject *builtins;
535-
Py_ssize_t extras, ncells, nfrees, i;
558+
Py_ssize_t i;
536559

537560
#ifdef Py_DEBUG
538561
if (code == NULL || globals == NULL || !PyDict_Check(globals) ||
@@ -541,9 +564,6 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
541564
return NULL;
542565
}
543566
#endif
544-
ncells = PyTuple_GET_SIZE(code->co_cellvars);
545-
nfrees = PyTuple_GET_SIZE(code->co_freevars);
546-
extras = code->co_stacksize + code->co_nlocals + ncells + nfrees;
547567
if (back == NULL || back->f_globals != globals) {
548568
builtins = PyDict_GetItem(globals, builtin_object);
549569
if (builtins) {
@@ -574,71 +594,86 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
574594
assert(builtins != NULL && PyDict_Check(builtins));
575595
Py_INCREF(builtins);
576596
}
577-
if (free_list == NULL) {
578-
f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, extras);
579-
if (f == NULL) {
580-
Py_DECREF(builtins);
581-
return NULL;
582-
}
597+
if (code->co_zombieframe != NULL) {
598+
f = code->co_zombieframe;
599+
code->co_zombieframe = NULL;
600+
_Py_NewReference((PyObject *)f);
601+
assert(f->f_code == code);
583602
}
584-
else {
585-
assert(numfree > 0);
586-
--numfree;
587-
f = free_list;
588-
free_list = free_list->f_back;
589-
if (f->ob_size < extras) {
590-
f = PyObject_GC_Resize(PyFrameObject, f, extras);
591-
if (f == NULL) {
592-
Py_DECREF(builtins);
593-
return NULL;
594-
}
595-
}
596-
_Py_NewReference((PyObject *)f);
603+
else {
604+
Py_ssize_t extras, ncells, nfrees;
605+
ncells = PyTuple_GET_SIZE(code->co_cellvars);
606+
nfrees = PyTuple_GET_SIZE(code->co_freevars);
607+
extras = code->co_stacksize + code->co_nlocals + ncells +
608+
nfrees;
609+
if (free_list == NULL) {
610+
f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type,
611+
extras);
612+
if (f == NULL) {
613+
Py_DECREF(builtins);
614+
return NULL;
615+
}
616+
}
617+
else {
618+
assert(numfree > 0);
619+
--numfree;
620+
f = free_list;
621+
free_list = free_list->f_back;
622+
if (f->ob_size < extras) {
623+
f = PyObject_GC_Resize(PyFrameObject, f, extras);
624+
if (f == NULL) {
625+
Py_DECREF(builtins);
626+
return NULL;
627+
}
628+
}
629+
_Py_NewReference((PyObject *)f);
630+
}
631+
632+
f->f_code = code;
633+
f->f_nlocals = code->co_nlocals;
634+
f->f_stacksize = code->co_stacksize;
635+
f->f_ncells = ncells;
636+
f->f_nfreevars = nfrees;
637+
extras = f->f_nlocals + ncells + nfrees;
638+
f->f_valuestack = f->f_localsplus + extras;
639+
for (i=0; i<extras; i++)
640+
f->f_localsplus[i] = NULL;
641+
f->f_locals = NULL;
642+
f->f_trace = NULL;
643+
f->f_exc_type = f->f_exc_value = f->f_exc_traceback = NULL;
597644
}
598645
f->f_builtins = builtins;
599646
Py_XINCREF(back);
600647
f->f_back = back;
601648
Py_INCREF(code);
602-
f->f_code = code;
603649
Py_INCREF(globals);
604650
f->f_globals = globals;
605651
/* Most functions have CO_NEWLOCALS and CO_OPTIMIZED set. */
606652
if ((code->co_flags & (CO_NEWLOCALS | CO_OPTIMIZED)) ==
607653
(CO_NEWLOCALS | CO_OPTIMIZED))
608-
locals = NULL; /* PyFrame_FastToLocals() will set. */
654+
; /* f_locals = NULL; will be set by PyFrame_FastToLocals() */
609655
else if (code->co_flags & CO_NEWLOCALS) {
610656
locals = PyDict_New();
611657
if (locals == NULL) {
612658
Py_DECREF(f);
613659
return NULL;
614660
}
661+
f->f_locals = locals;
615662
}
616663
else {
617664
if (locals == NULL)
618665
locals = globals;
619666
Py_INCREF(locals);
667+
f->f_locals = locals;
620668
}
621-
f->f_locals = locals;
622-
f->f_trace = NULL;
623-
f->f_exc_type = f->f_exc_value = f->f_exc_traceback = NULL;
624669
f->f_tstate = tstate;
625670

626671
f->f_lasti = -1;
627672
f->f_lineno = code->co_firstlineno;
628673
f->f_restricted = (builtins != tstate->interp->builtins);
629674
f->f_iblock = 0;
630-
f->f_nlocals = code->co_nlocals;
631-
f->f_stacksize = code->co_stacksize;
632-
f->f_ncells = ncells;
633-
f->f_nfreevars = nfrees;
634-
635-
extras = f->f_nlocals + ncells + nfrees;
636-
/* Tim said it's ok to replace memset */
637-
for (i=0; i<extras; i++)
638-
f->f_localsplus[i] = NULL;
639-
640-
f->f_valuestack = f->f_localsplus + extras;
641-
f->f_stacktop = f->f_valuestack;
675+
676+
f->f_stacktop = f->f_valuestack;
642677
_PyObject_GC_TRACK(f);
643678
return f;
644679
}

0 commit comments

Comments
 (0)