Skip to content

Commit 0417949

Browse files
Issue python#28199: Microoptimized dict resizing. Based on patch by Naoki Inada.
2 parents 43a5c1c + d76d8bf commit 0417949

1 file changed

Lines changed: 63 additions & 60 deletions

File tree

Objects/dictobject.c

Lines changed: 63 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,41 +1196,21 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
11961196
}
11971197

11981198
/*
1199-
Internal routine used by dictresize() to insert an item which is
1200-
known to be absent from the dict. This routine also assumes that
1201-
the dict contains no deleted entries. Besides the performance benefit,
1202-
using insertdict() in dictresize() is dangerous (SF bug #1456209).
1203-
Note that no refcounts are changed by this routine; if needed, the caller
1204-
is responsible for incref'ing `key` and `value`.
1205-
Neither mp->ma_used nor k->dk_usable are modified by this routine; the caller
1206-
must set them correctly
1199+
Internal routine used by dictresize() to buid a hashtable of entries.
12071200
*/
12081201
static void
1209-
insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
1210-
PyObject *value)
1202+
build_indices(PyDictKeysObject *keys, PyDictKeyEntry *ep, Py_ssize_t n)
12111203
{
1212-
size_t i;
1213-
PyDictKeysObject *k = mp->ma_keys;
1214-
size_t mask = (size_t)DK_SIZE(k)-1;
1215-
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
1216-
PyDictKeyEntry *ep;
1217-
1218-
assert(k->dk_lookup != NULL);
1219-
assert(value != NULL);
1220-
assert(key != NULL);
1221-
assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict);
1222-
i = hash & mask;
1223-
for (size_t perturb = hash; dk_get_index(k, i) != DKIX_EMPTY;) {
1224-
perturb >>= PERTURB_SHIFT;
1225-
i = mask & ((i << 2) + i + perturb + 1);
1204+
size_t mask = (size_t)DK_SIZE(keys) - 1;
1205+
for (Py_ssize_t ix = 0; ix != n; ix++, ep++) {
1206+
Py_hash_t hash = ep->me_hash;
1207+
size_t i = hash & mask;
1208+
for (size_t perturb = hash; dk_get_index(keys, i) != DKIX_EMPTY;) {
1209+
perturb >>= PERTURB_SHIFT;
1210+
i = mask & ((i << 2) + i + perturb + 1);
1211+
}
1212+
dk_set_index(keys, i, ix);
12261213
}
1227-
ep = &ep0[k->dk_nentries];
1228-
assert(ep->me_value == NULL);
1229-
dk_set_index(k, i, k->dk_nentries);
1230-
k->dk_nentries++;
1231-
ep->me_key = key;
1232-
ep->me_hash = hash;
1233-
ep->me_value = value;
12341214
}
12351215

12361216
/*
@@ -1246,10 +1226,10 @@ but can be resplit by make_keys_shared().
12461226
static int
12471227
dictresize(PyDictObject *mp, Py_ssize_t minused)
12481228
{
1249-
Py_ssize_t i, newsize;
1229+
Py_ssize_t newsize, numentries;
12501230
PyDictKeysObject *oldkeys;
12511231
PyObject **oldvalues;
1252-
PyDictKeyEntry *ep0;
1232+
PyDictKeyEntry *oldentries, *newentries;
12531233

12541234
/* Find the smallest table size > minused. */
12551235
for (newsize = PyDict_MINSIZE;
@@ -1260,8 +1240,14 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
12601240
PyErr_NoMemory();
12611241
return -1;
12621242
}
1243+
12631244
oldkeys = mp->ma_keys;
1264-
oldvalues = mp->ma_values;
1245+
1246+
/* NOTE: Current odict checks mp->ma_keys to detect resize happen.
1247+
* So we can't reuse oldkeys even if oldkeys->dk_size == newsize.
1248+
* TODO: Try reusing oldkeys when reimplement odict.
1249+
*/
1250+
12651251
/* Allocate a new table. */
12661252
mp->ma_keys = new_keys_object(newsize);
12671253
if (mp->ma_keys == NULL) {
@@ -1270,42 +1256,59 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
12701256
}
12711257
if (oldkeys->dk_lookup == lookdict)
12721258
mp->ma_keys->dk_lookup = lookdict;
1273-
mp->ma_values = NULL;
1274-
ep0 = DK_ENTRIES(oldkeys);
1275-
/* Main loop below assumes we can transfer refcount to new keys
1276-
* and that value is stored in me_value.
1277-
* Increment ref-counts and copy values here to compensate
1278-
* This (resizing a split table) should be relatively rare */
1259+
1260+
numentries = mp->ma_used;
1261+
oldentries = DK_ENTRIES(oldkeys);
1262+
newentries = DK_ENTRIES(mp->ma_keys);
1263+
oldvalues = mp->ma_values;
12791264
if (oldvalues != NULL) {
1280-
for (i = 0; i < oldkeys->dk_nentries; i++) {
1281-
if (oldvalues[i] != NULL) {
1282-
Py_INCREF(ep0[i].me_key);
1283-
ep0[i].me_value = oldvalues[i];
1284-
}
1285-
}
1286-
}
1287-
/* Main loop */
1288-
for (i = 0; i < oldkeys->dk_nentries; i++) {
1289-
PyDictKeyEntry *ep = &ep0[i];
1290-
if (ep->me_value != NULL) {
1291-
insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
1265+
/* Convert split table into new combined table.
1266+
* We must incref keys; we can transfer values.
1267+
* Note that values of split table is always dense.
1268+
*/
1269+
for (Py_ssize_t i = 0; i < numentries; i++) {
1270+
assert(oldvalues[i] != NULL);
1271+
PyDictKeyEntry *ep = &oldentries[i];
1272+
PyObject *key = ep->me_key;
1273+
Py_INCREF(key);
1274+
newentries[i].me_key = key;
1275+
newentries[i].me_hash = ep->me_hash;
1276+
newentries[i].me_value = oldvalues[i];
12921277
}
1293-
}
1294-
mp->ma_keys->dk_usable -= mp->ma_used;
1295-
if (oldvalues != NULL) {
1296-
/* NULL out me_value slot in oldkeys, in case it was shared */
1297-
for (i = 0; i < oldkeys->dk_nentries; i++)
1298-
ep0[i].me_value = NULL;
1278+
12991279
DK_DECREF(oldkeys);
1280+
mp->ma_values = NULL;
13001281
if (oldvalues != empty_values) {
13011282
free_values(oldvalues);
13021283
}
13031284
}
1304-
else {
1285+
else { // combined table.
1286+
if (oldkeys->dk_nentries == numentries) {
1287+
memcpy(newentries, oldentries, numentries * sizeof(PyDictKeyEntry));
1288+
}
1289+
else {
1290+
PyDictKeyEntry *ep = oldentries;
1291+
for (Py_ssize_t i = 0; i < numentries; i++) {
1292+
while (ep->me_value == NULL)
1293+
ep++;
1294+
newentries[i] = *ep++;
1295+
}
1296+
}
1297+
13051298
assert(oldkeys->dk_lookup != lookdict_split);
13061299
assert(oldkeys->dk_refcnt == 1);
1307-
DK_DEBUG_DECREF PyObject_FREE(oldkeys);
1300+
if (oldkeys->dk_size == PyDict_MINSIZE &&
1301+
numfreekeys < PyDict_MAXFREELIST) {
1302+
DK_DEBUG_DECREF keys_free_list[numfreekeys++] = oldkeys;
1303+
}
1304+
else {
1305+
DK_DEBUG_DECREF PyObject_FREE(oldkeys);
1306+
}
13081307
}
1308+
1309+
build_indices(mp->ma_keys, newentries, numentries);
1310+
mp->ma_keys->dk_usable -= numentries;
1311+
mp->ma_keys->dk_nentries = numentries;
13091312
return 0;
13101313
}
13111314

0 commit comments

Comments
 (0)