Skip to content

Commit 3a652b1

Browse files
committed
Merged revisions 70546 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r70546 | antoine.pitrou | 2009-03-23 19:41:45 +0100 (lun., 23 mars 2009) | 9 lines Issue #4688: Add a heuristic so that tuples and dicts containing only untrackable objects are not tracked by the garbage collector. This can reduce the size of collections and therefore the garbage collection overhead on long-running programs, depending on their particular use of datatypes. (trivia: this makes the "binary_trees" benchmark from the Computer Language Shootout 40% faster) ........
1 parent 17e4fdd commit 3a652b1

File tree

11 files changed

+397
-2
lines changed

11 files changed

+397
-2
lines changed

Doc/library/gc.rst

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,31 @@ The :mod:`gc` module provides the following functions:
129129
from an argument, that integer object may or may not appear in the result list.
130130

131131

132+
.. function:: is_tracked(obj)
133+
134+
Returns True if the object is currently tracked by the garbage collector,
135+
False otherwise. As a general rule, instances of atomic types aren't
136+
tracked and instances of non-atomic types (containers, user-defined
137+
objects...) are. However, some type-specific optimizations can be present
138+
in order to suppress the garbage collector footprint of simple instances
139+
(e.g. dicts containing only atomic keys and values)::
140+
141+
>>> gc.is_tracked(0)
142+
False
143+
>>> gc.is_tracked("a")
144+
False
145+
>>> gc.is_tracked([])
146+
True
147+
>>> gc.is_tracked({})
148+
False
149+
>>> gc.is_tracked({"a": 1})
150+
False
151+
>>> gc.is_tracked({"a": []})
152+
True
153+
154+
.. versionadded:: 2.7
155+
156+
132157
The following variable is provided for read-only access (you can mutate its
133158
value but should not rebind it):
134159

Include/dictobject.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ PyAPI_FUNC(PyObject *) PyDict_Copy(PyObject *mp);
125125
PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key);
126126
PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, long hash);
127127
PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);
128+
PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp);
128129

129130
/* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */
130131
PyAPI_FUNC(int) PyDict_Update(PyObject *mp, PyObject *other);

Include/objimpl.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,17 @@ extern PyGC_Head *_PyGC_generation0;
282282
g->gc.gc_next = NULL; \
283283
} while (0);
284284

285+
/* True if the object is currently tracked by the GC. */
286+
#define _PyObject_GC_IS_TRACKED(o) \
287+
((_Py_AS_GC(o))->gc.gc_refs != _PyGC_REFS_UNTRACKED)
288+
289+
/* True if the object may be tracked by the GC in the future, or already is.
290+
This can be useful to implement some optimizations. */
291+
#define _PyObject_GC_MAY_BE_TRACKED(obj) \
292+
(PyObject_IS_GC(obj) && \
293+
(!PyTuple_CheckExact(obj) || _PyObject_GC_IS_TRACKED(obj)))
294+
295+
285296
PyAPI_FUNC(PyObject *) _PyObject_GC_Malloc(size_t);
286297
PyAPI_FUNC(PyObject *) _PyObject_GC_New(PyTypeObject *);
287298
PyAPI_FUNC(PyVarObject *) _PyObject_GC_NewVar(PyTypeObject *, Py_ssize_t);

Include/tupleobject.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ PyAPI_FUNC(int) PyTuple_SetItem(PyObject *, Py_ssize_t, PyObject *);
4545
PyAPI_FUNC(PyObject *) PyTuple_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t);
4646
PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, Py_ssize_t);
4747
PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...);
48+
PyAPI_FUNC(void) _PyTuple_MaybeUntrack(PyObject *);
4849

4950
/* Macro, trading safety for speed */
5051
#define PyTuple_GET_ITEM(op, i) (((PyTupleObject *)(op))->ob_item[i])

Lib/test/test_dict.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,104 @@ class C(object):
665665
gc.collect()
666666
self.assert_(ref() is None, "Cycle was not collected")
667667

668+
def _not_tracked(self, t):
669+
# Nested containers can take several collections to untrack
670+
gc.collect()
671+
gc.collect()
672+
self.assertFalse(gc.is_tracked(t), t)
673+
674+
def _tracked(self, t):
675+
self.assertTrue(gc.is_tracked(t), t)
676+
gc.collect()
677+
gc.collect()
678+
self.assertTrue(gc.is_tracked(t), t)
679+
680+
def test_track_literals(self):
681+
# Test GC-optimization of dict literals
682+
x, y, z, w = 1.5, "a", (1, None), []
683+
684+
self._not_tracked({})
685+
self._not_tracked({x:(), y:x, z:1})
686+
self._not_tracked({1: "a", "b": 2})
687+
self._not_tracked({1: 2, (None, True, False, ()): int})
688+
self._not_tracked({1: object()})
689+
690+
# Dicts with mutable elements are always tracked, even if those
691+
# elements are not tracked right now.
692+
self._tracked({1: []})
693+
self._tracked({1: ([],)})
694+
self._tracked({1: {}})
695+
self._tracked({1: set()})
696+
697+
def test_track_dynamic(self):
698+
# Test GC-optimization of dynamically-created dicts
699+
class MyObject(object):
700+
pass
701+
x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject()
702+
703+
d = dict()
704+
self._not_tracked(d)
705+
d[1] = "a"
706+
self._not_tracked(d)
707+
d[y] = 2
708+
self._not_tracked(d)
709+
d[z] = 3
710+
self._not_tracked(d)
711+
self._not_tracked(d.copy())
712+
d[4] = w
713+
self._tracked(d)
714+
self._tracked(d.copy())
715+
d[4] = None
716+
self._not_tracked(d)
717+
self._not_tracked(d.copy())
718+
719+
# dd isn't tracked right now, but it may mutate and therefore d
720+
# which contains it must be tracked.
721+
d = dict()
722+
dd = dict()
723+
d[1] = dd
724+
self._not_tracked(dd)
725+
self._tracked(d)
726+
dd[1] = d
727+
self._tracked(dd)
728+
729+
d = dict.fromkeys([x, y, z])
730+
self._not_tracked(d)
731+
dd = dict()
732+
dd.update(d)
733+
self._not_tracked(dd)
734+
d = dict.fromkeys([x, y, z, o])
735+
self._tracked(d)
736+
dd = dict()
737+
dd.update(d)
738+
self._tracked(dd)
739+
740+
d = dict(x=x, y=y, z=z)
741+
self._not_tracked(d)
742+
d = dict(x=x, y=y, z=z, w=w)
743+
self._tracked(d)
744+
d = dict()
745+
d.update(x=x, y=y, z=z)
746+
self._not_tracked(d)
747+
d.update(w=w)
748+
self._tracked(d)
749+
750+
d = dict([(x, y), (z, 1)])
751+
self._not_tracked(d)
752+
d = dict([(x, y), (z, w)])
753+
self._tracked(d)
754+
d = dict()
755+
d.update([(x, y), (z, 1)])
756+
self._not_tracked(d)
757+
d.update([(x, y), (z, w)])
758+
self._tracked(d)
759+
760+
def test_track_subtypes(self):
761+
# Dict subtypes are always tracked
762+
class MyDict(dict):
763+
pass
764+
self._tracked(MyDict())
765+
668766

669767
from test import mapping_tests
670768

Lib/test/test_gc.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,33 @@ def test_get_referents(self):
415415

416416
self.assertEqual(gc.get_referents(1, 'a', 4j), [])
417417

418+
def test_is_tracked(self):
419+
# Atomic built-in types are not tracked, user-defined objects and
420+
# mutable containers are.
421+
# NOTE: types with special optimizations (e.g. tuple) have tests
422+
# in their own test files instead.
423+
self.assertFalse(gc.is_tracked(None))
424+
self.assertFalse(gc.is_tracked(1))
425+
self.assertFalse(gc.is_tracked(1.0))
426+
self.assertFalse(gc.is_tracked(1.0 + 5.0j))
427+
self.assertFalse(gc.is_tracked(True))
428+
self.assertFalse(gc.is_tracked(False))
429+
self.assertFalse(gc.is_tracked(b"a"))
430+
self.assertFalse(gc.is_tracked("a"))
431+
self.assertFalse(gc.is_tracked(bytearray(b"a")))
432+
self.assertFalse(gc.is_tracked(type))
433+
self.assertFalse(gc.is_tracked(int))
434+
self.assertFalse(gc.is_tracked(object))
435+
self.assertFalse(gc.is_tracked(object()))
436+
437+
class UserClass:
438+
pass
439+
self.assertTrue(gc.is_tracked(gc))
440+
self.assertTrue(gc.is_tracked(UserClass))
441+
self.assertTrue(gc.is_tracked(UserClass()))
442+
self.assertTrue(gc.is_tracked([]))
443+
self.assertTrue(gc.is_tracked(set()))
444+
418445
def test_bug1055820b(self):
419446
# Corresponds to temp2b.py in the bug report.
420447

Lib/test/test_tuple.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from test import support, seq_tests
22

3+
import gc
4+
35
class TupleTest(seq_tests.CommonTest):
46
type2test = tuple
57

@@ -82,6 +84,69 @@ def test_repr(self):
8284
self.assertEqual(repr(a0), "()")
8385
self.assertEqual(repr(a2), "(0, 1, 2)")
8486

87+
def _not_tracked(self, t):
88+
# Nested tuples can take several collections to untrack
89+
gc.collect()
90+
gc.collect()
91+
self.assertFalse(gc.is_tracked(t), t)
92+
93+
def _tracked(self, t):
94+
self.assertTrue(gc.is_tracked(t), t)
95+
gc.collect()
96+
gc.collect()
97+
self.assertTrue(gc.is_tracked(t), t)
98+
99+
def test_track_literals(self):
100+
# Test GC-optimization of tuple literals
101+
x, y, z = 1.5, "a", []
102+
103+
self._not_tracked(())
104+
self._not_tracked((1,))
105+
self._not_tracked((1, 2))
106+
self._not_tracked((1, 2, "a"))
107+
self._not_tracked((1, 2, (None, True, False, ()), int))
108+
self._not_tracked((object(),))
109+
self._not_tracked(((1, x), y, (2, 3)))
110+
111+
# Tuples with mutable elements are always tracked, even if those
112+
# elements are not tracked right now.
113+
self._tracked(([],))
114+
self._tracked(([1],))
115+
self._tracked(({},))
116+
self._tracked((set(),))
117+
self._tracked((x, y, z))
118+
119+
def check_track_dynamic(self, tp, always_track):
120+
x, y, z = 1.5, "a", []
121+
122+
check = self._tracked if always_track else self._not_tracked
123+
check(tp())
124+
check(tp([]))
125+
check(tp(set()))
126+
check(tp([1, x, y]))
127+
check(tp(obj for obj in [1, x, y]))
128+
check(tp(set([1, x, y])))
129+
check(tp(tuple([obj]) for obj in [1, x, y]))
130+
check(tuple(tp([obj]) for obj in [1, x, y]))
131+
132+
self._tracked(tp([z]))
133+
self._tracked(tp([[x, y]]))
134+
self._tracked(tp([{x: y}]))
135+
self._tracked(tp(obj for obj in [x, y, z]))
136+
self._tracked(tp(tuple([obj]) for obj in [x, y, z]))
137+
self._tracked(tuple(tp([obj]) for obj in [x, y, z]))
138+
139+
def test_track_dynamic(self):
140+
# Test GC-optimization of dynamically constructed tuples.
141+
self.check_track_dynamic(tuple, False)
142+
143+
def test_track_subtypes(self):
144+
# Tuple subtypes must always be tracked
145+
class MyTuple(tuple):
146+
pass
147+
self.check_track_dynamic(MyTuple, True)
148+
149+
85150
def test_main():
86151
support.run_unittest(TupleTest)
87152

Misc/NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ What's New in Python 3.1 alpha 2?
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue #4688: Add a heuristic so that tuples and dicts containing only
16+
untrackable objects are not tracked by the garbage collector. This can
17+
reduce the size of collections and therefore the garbage collection overhead
18+
on long-running programs, depending on their particular use of datatypes.
19+
1520
- Issue #5512: Rewrite PyLong long division algorithm (x_divrem) to
1621
improve its performance. Long divisions and remainder operations
1722
are now between 50% and 150% faster.

Modules/gcmodule.c

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,13 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
433433
(void) traverse(op,
434434
(visitproc)visit_reachable,
435435
(void *)young);
436-
next = gc->gc.gc_next;
436+
next = gc->gc.gc_next;
437+
if (PyTuple_CheckExact(op)) {
438+
_PyTuple_MaybeUntrack(op);
439+
}
440+
else if (PyDict_CheckExact(op)) {
441+
_PyDict_MaybeUntrack(op);
442+
}
437443
}
438444
else {
439445
/* This *may* be unreachable. To make progress,
@@ -1229,6 +1235,26 @@ gc_get_objects(PyObject *self, PyObject *noargs)
12291235
return result;
12301236
}
12311237

1238+
PyDoc_STRVAR(gc_is_tracked__doc__,
1239+
"is_tracked(obj) -> bool\n"
1240+
"\n"
1241+
"Returns true if the object is tracked by the garbage collector.\n"
1242+
"Simple atomic objects will return false.\n"
1243+
);
1244+
1245+
static PyObject *
1246+
gc_is_tracked(PyObject *self, PyObject *obj)
1247+
{
1248+
PyObject *result;
1249+
1250+
if (PyObject_IS_GC(obj) && IS_TRACKED(obj))
1251+
result = Py_True;
1252+
else
1253+
result = Py_False;
1254+
Py_INCREF(result);
1255+
return result;
1256+
}
1257+
12321258

12331259
PyDoc_STRVAR(gc__doc__,
12341260
"This module provides access to the garbage collector for reference cycles.\n"
@@ -1243,6 +1269,7 @@ PyDoc_STRVAR(gc__doc__,
12431269
"set_threshold() -- Set the collection thresholds.\n"
12441270
"get_threshold() -- Return the current the collection thresholds.\n"
12451271
"get_objects() -- Return a list of all objects tracked by the collector.\n"
1272+
"is_tracked() -- Returns true if a given object is tracked.\n"
12461273
"get_referrers() -- Return the list of objects that refer to an object.\n"
12471274
"get_referents() -- Return the list of objects that an object refers to.\n");
12481275

@@ -1258,6 +1285,7 @@ static PyMethodDef GcMethods[] = {
12581285
{"collect", (PyCFunction)gc_collect,
12591286
METH_VARARGS | METH_KEYWORDS, gc_collect__doc__},
12601287
{"get_objects", gc_get_objects,METH_NOARGS, gc_get_objects__doc__},
1288+
{"is_tracked", gc_is_tracked, METH_O, gc_is_tracked__doc__},
12611289
{"get_referrers", gc_get_referrers, METH_VARARGS,
12621290
gc_get_referrers__doc__},
12631291
{"get_referents", gc_get_referents, METH_VARARGS,

0 commit comments

Comments
 (0)