Skip to content

Commit 95004e5

Browse files
committed
py: Fix delete operation on map/dict and set objects.
Hash table can now be completely full (ie now NULL entry) before a resize is triggered. Use sentinel value to indicate delete entry in the table.
1 parent e20b6b4 commit 95004e5

6 files changed

Lines changed: 190 additions & 80 deletions

File tree

py/map.c

Lines changed: 124 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ STATIC void mp_map_rehash(mp_map_t *map) {
8383
map->all_keys_are_qstrs = 1;
8484
map->table = m_new0(mp_map_elem_t, map->alloc);
8585
for (int i = 0; i < old_alloc; i++) {
86-
if (old_table[i].key != NULL) {
86+
if (old_table[i].key != MP_OBJ_NULL && old_table[i].key != MP_OBJ_SENTINEL) {
8787
mp_map_lookup(map, old_table[i].key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = old_table[i].value;
8888
}
8989
}
@@ -106,8 +106,6 @@ mp_map_elem_t* mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
106106

107107
// map is a hash table (not a fixed array), so do a hash lookup
108108

109-
machine_uint_t hash;
110-
hash = mp_obj_hash(index);
111109
if (map->alloc == 0) {
112110
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
113111
mp_map_rehash(map);
@@ -116,54 +114,79 @@ mp_map_elem_t* mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
116114
}
117115
}
118116

117+
machine_uint_t hash = mp_obj_hash(index);
119118
uint pos = hash % map->alloc;
119+
uint start_pos = pos;
120+
mp_map_elem_t *avail_slot = NULL;
120121
for (;;) {
121-
mp_map_elem_t *elem = &map->table[pos];
122-
if (elem->key == NULL) {
123-
// not in table
122+
mp_map_elem_t *slot = &map->table[pos];
123+
if (slot->key == MP_OBJ_NULL) {
124+
// found NULL slot, so index is not in table
124125
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
125-
if (map->used + 1 >= map->alloc) {
126-
// not enough room in table, rehash it
127-
mp_map_rehash(map);
128-
// restart the search for the new element
129-
pos = hash % map->alloc;
130-
continue;
131-
} else {
132-
map->used += 1;
133-
elem->key = index;
134-
elem->value = NULL;
135-
if (!MP_OBJ_IS_QSTR(index)) {
136-
map->all_keys_are_qstrs = 0;
137-
}
138-
return elem;
126+
map->used += 1;
127+
if (avail_slot == NULL) {
128+
avail_slot = slot;
129+
}
130+
slot->key = index;
131+
slot->value = MP_OBJ_NULL;
132+
if (!MP_OBJ_IS_QSTR(index)) {
133+
map->all_keys_are_qstrs = 0;
139134
}
140-
} else if (elem->value == NULL) {
141-
return NULL;
135+
return slot;
136+
} else {
137+
return MP_OBJ_NULL;
142138
}
143-
// Otherwise it's just entry marked as deleted, so continue with next one
144-
} else if (elem->key == index || (!map->all_keys_are_qstrs && mp_obj_equal(elem->key, index))) {
145-
// found it
146-
/* it seems CPython does not replace the index; try x={True:'true'};x[1]='one';x
147-
if (add_if_not_found) {
148-
elem->key = index;
139+
} else if (slot->key == MP_OBJ_SENTINEL) {
140+
// found deleted slot, remember for later
141+
if (avail_slot == NULL) {
142+
avail_slot = slot;
149143
}
150-
*/
144+
} else if (slot->key == index || (!map->all_keys_are_qstrs && mp_obj_equal(slot->key, index))) {
145+
// found index
146+
// Note: CPython does not replace the index; try x={True:'true'};x[1]='one';x
151147
if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) {
152-
map->used--;
153148
// this leaks this memory (but see dict_get_helper)
154149
mp_map_elem_t *retval = m_new(mp_map_elem_t, 1);
155-
retval->key = elem->key;
156-
retval->value = elem->value;
157-
elem->key = NULL;
158-
// elem->key = NULL && elem->value != NULL means "marked deleted"
159-
// assume value indeed never NULL
150+
retval->key = slot->key;
151+
retval->value = slot->value;
152+
// delete element in this slot
153+
map->used--;
154+
if (map->table[(pos + 1) % map->alloc].key == MP_OBJ_NULL) {
155+
// optimisation if next slot is empty
156+
slot->key = MP_OBJ_NULL;
157+
} else {
158+
slot->key = MP_OBJ_SENTINEL;
159+
}
160160
return retval;
161161
}
162-
return elem;
162+
return slot;
163163
}
164164

165165
// not yet found, keep searching in this table
166166
pos = (pos + 1) % map->alloc;
167+
168+
if (pos == start_pos) {
169+
// search got back to starting position, so index is not in table
170+
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
171+
if (avail_slot != NULL) {
172+
// there was an available slot, so use that
173+
map->used++;
174+
avail_slot->key = index;
175+
avail_slot->value = MP_OBJ_NULL;
176+
if (!MP_OBJ_IS_QSTR(index)) {
177+
map->all_keys_are_qstrs = 0;
178+
}
179+
return avail_slot;
180+
} else {
181+
// not enough room in table, rehash it
182+
mp_map_rehash(map);
183+
// restart the search for the new element
184+
start_pos = pos = hash % map->alloc;
185+
}
186+
} else {
187+
return MP_OBJ_NULL;
188+
}
189+
}
167190
}
168191
}
169192

@@ -183,62 +206,99 @@ STATIC void mp_set_rehash(mp_set_t *set) {
183206
set->used = 0;
184207
set->table = m_new0(mp_obj_t, set->alloc);
185208
for (int i = 0; i < old_alloc; i++) {
186-
if (old_table[i] != NULL) {
187-
mp_set_lookup(set, old_table[i], true);
209+
if (old_table[i] != MP_OBJ_NULL && old_table[i] != MP_OBJ_SENTINEL) {
210+
mp_set_lookup(set, old_table[i], MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
188211
}
189212
}
190213
m_del(mp_obj_t, old_table, old_alloc);
191214
}
192215

193216
mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind) {
194-
int hash;
195-
int pos;
196217
if (set->alloc == 0) {
197218
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
198219
mp_set_rehash(set);
199220
} else {
200221
return NULL;
201222
}
202223
}
203-
if (lookup_kind & MP_MAP_LOOKUP_FIRST) {
204-
hash = 0;
205-
pos = 0;
206-
} else {
207-
hash = mp_obj_hash(index);;
208-
pos = hash % set->alloc;
209-
}
224+
machine_uint_t hash = mp_obj_hash(index);
225+
uint pos = hash % set->alloc;
226+
uint start_pos = pos;
227+
mp_obj_t *avail_slot = NULL;
210228
for (;;) {
211229
mp_obj_t elem = set->table[pos];
212230
if (elem == MP_OBJ_NULL) {
213-
// not in table
231+
// found NULL slot, so index is not in table
214232
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
215-
if (set->used + 1 >= set->alloc) {
233+
if (avail_slot == NULL) {
234+
avail_slot = &set->table[pos];
235+
}
236+
set->used++;
237+
*avail_slot = index;
238+
return index;
239+
} else {
240+
return MP_OBJ_NULL;
241+
}
242+
} else if (elem == MP_OBJ_SENTINEL) {
243+
// found deleted slot, remember for later
244+
if (avail_slot == NULL) {
245+
avail_slot = &set->table[pos];
246+
}
247+
} else if (mp_obj_equal(elem, index)) {
248+
// found index
249+
if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) {
250+
// delete element
251+
set->used--;
252+
if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) {
253+
// optimisation if next slot is empty
254+
set->table[pos] = MP_OBJ_NULL;
255+
} else {
256+
set->table[pos] = MP_OBJ_SENTINEL;
257+
}
258+
}
259+
return elem;
260+
}
261+
262+
// not yet found, keep searching in this table
263+
pos = (pos + 1) % set->alloc;
264+
265+
if (pos == start_pos) {
266+
// search got back to starting position, so index is not in table
267+
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
268+
if (avail_slot != NULL) {
269+
// there was an available slot, so use that
270+
set->used++;
271+
*avail_slot = index;
272+
return index;
273+
} else {
216274
// not enough room in table, rehash it
217275
mp_set_rehash(set);
218276
// restart the search for the new element
219-
pos = hash % set->alloc;
220-
} else {
221-
set->used += 1;
222-
set->table[pos] = index;
223-
return index;
277+
start_pos = pos = hash % set->alloc;
224278
}
225-
} else if (lookup_kind & MP_MAP_LOOKUP_FIRST) {
226-
pos++;
227279
} else {
228280
return MP_OBJ_NULL;
229281
}
230-
} else if ((lookup_kind & MP_MAP_LOOKUP_FIRST) || mp_obj_equal(elem, index)) {
231-
// found it
232-
if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) {
233-
set->used--;
234-
set->table[pos] = NULL;
282+
}
283+
}
284+
}
285+
286+
mp_obj_t mp_set_remove_first(mp_set_t *set) {
287+
for (uint pos = 0; pos < set->alloc; pos++) {
288+
if (set->table[pos] != MP_OBJ_NULL && set->table[pos] != MP_OBJ_SENTINEL) {
289+
mp_obj_t elem = set->table[pos];
290+
// delete element
291+
set->used--;
292+
if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) {
293+
// optimisation if next slot is empty
294+
set->table[pos] = MP_OBJ_NULL;
295+
} else {
296+
set->table[pos] = MP_OBJ_SENTINEL;
235297
}
236298
return elem;
237-
} else {
238-
// not yet found, keep searching in this table
239-
pos = (pos + 1) % set->alloc;
240299
}
241300
}
301+
return MP_OBJ_NULL;
242302
}
243303

244304
void mp_set_clear(mp_set_t *set) {

py/obj.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ typedef struct _mp_obj_base_t mp_obj_base_t;
2323

2424
#define MP_OBJ_NULL ((mp_obj_t)NULL)
2525

26+
// The SENTINEL object is used for various internal purposes where one needs
27+
// an object which is unique from all other objects, including MP_OBJ_NULL.
28+
29+
#define MP_OBJ_SENTINEL ((mp_obj_t)8)
30+
2631
// These macros check for small int, qstr or object, and access small int and qstr values
2732
// - xxxx...xxx1: a small int, bits 1 and above are the value
2833
// - xxxx...xx10: a qstr, bits 2 and above are the value
@@ -103,11 +108,11 @@ typedef struct _mp_map_t {
103108
mp_map_elem_t *table;
104109
} mp_map_t;
105110

111+
// These can be or'd together
106112
typedef enum _mp_map_lookup_kind_t {
107113
MP_MAP_LOOKUP, // 0
108114
MP_MAP_LOOKUP_ADD_IF_NOT_FOUND, // 1
109115
MP_MAP_LOOKUP_REMOVE_IF_FOUND, // 2
110-
MP_MAP_LOOKUP_FIRST = 4,
111116
} mp_map_lookup_kind_t;
112117

113118
void mp_map_init(mp_map_t *map, int n);
@@ -129,6 +134,7 @@ typedef struct _mp_set_t {
129134

130135
void mp_set_init(mp_set_t *set, int n);
131136
mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind);
137+
mp_obj_t mp_set_remove_first(mp_set_t *set);
132138
void mp_set_clear(mp_set_t *set);
133139

134140
// Type definitions for methods

py/objdict.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ STATIC mp_map_elem_t *dict_it_iternext_elem(mp_obj_t self_in) {
103103
mp_map_elem_t *table = self->dict->map.table;
104104

105105
for (int i = self->cur; i < max; i++) {
106-
if (table[i].key != NULL) {
106+
if (table[i].key != MP_OBJ_NULL && table[i].key != MP_OBJ_SENTINEL) {
107107
self->cur = i + 1;
108108
return &(table[i]);
109109
}

py/objset.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ STATIC void set_print(void (*print)(void *env, const char *fmt, ...), void *env,
3232
bool first = true;
3333
print(env, "{");
3434
for (int i = 0; i < self->set.alloc; i++) {
35-
if (self->set.table[i] != MP_OBJ_NULL) {
35+
if (self->set.table[i] != MP_OBJ_NULL && self->set.table[i] != MP_OBJ_SENTINEL) {
3636
if (!first) {
3737
print(env, ", ");
3838
}
@@ -83,7 +83,7 @@ STATIC mp_obj_t set_it_iternext(mp_obj_t self_in) {
8383
mp_obj_t *table = self->set->set.table;
8484

8585
for (machine_uint_t i = self->cur; i < max; i++) {
86-
if (table[i] != NULL) {
86+
if (table[i] != MP_OBJ_NULL && table[i] != MP_OBJ_SENTINEL) {
8787
self->cur = i + 1;
8888
return table[i];
8989
}
@@ -307,12 +307,10 @@ STATIC mp_obj_t set_equal(mp_obj_t self_in, mp_obj_t other_in) {
307307
STATIC mp_obj_t set_pop(mp_obj_t self_in) {
308308
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_set));
309309
mp_obj_set_t *self = self_in;
310-
311-
if (self->set.used == 0) {
310+
mp_obj_t obj = mp_set_remove_first(&self->set);
311+
if (obj == MP_OBJ_NULL) {
312312
nlr_jump(mp_obj_new_exception_msg(&mp_type_KeyError, "pop from an empty set"));
313313
}
314-
mp_obj_t obj = mp_set_lookup(&self->set, NULL,
315-
MP_MAP_LOOKUP_REMOVE_IF_FOUND | MP_MAP_LOOKUP_FIRST);
316314
return obj;
317315
}
318316
STATIC MP_DEFINE_CONST_FUN_OBJ_1(set_pop_obj, set_pop);
@@ -375,6 +373,14 @@ STATIC mp_obj_t set_union(mp_obj_t self_in, mp_obj_t other_in) {
375373
}
376374
STATIC MP_DEFINE_CONST_FUN_OBJ_2(set_union_obj, set_union);
377375

376+
STATIC mp_obj_t set_unary_op(int op, mp_obj_t self_in) {
377+
mp_obj_set_t *self = self_in;
378+
switch (op) {
379+
case MP_UNARY_OP_BOOL: return MP_BOOL(self->set.used != 0);
380+
case MP_UNARY_OP_LEN: return MP_OBJ_NEW_SMALL_INT((machine_int_t)self->set.used);
381+
default: return MP_OBJ_NULL; // op not supported for None
382+
}
383+
}
378384

379385
STATIC mp_obj_t set_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) {
380386
mp_obj_t args[] = {lhs, rhs};
@@ -450,6 +456,7 @@ const mp_obj_type_t mp_type_set = {
450456
.name = MP_QSTR_set,
451457
.print = set_print,
452458
.make_new = set_make_new,
459+
.unary_op = set_unary_op,
453460
.binary_op = set_binary_op,
454461
.getiter = set_getiter,
455462
.locals_dict = (mp_obj_t)&set_locals_dict,

tests/basics/dict-del.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,21 @@
1-
for i in range(100):
2-
d = dict()
3-
for j in range(100):
4-
d[j] = j
5-
del d[i]
6-
for j in range(100):
7-
if j not in d:
8-
print(j, 'not in d')
1+
for n in range(20):
2+
print('testing dict with {} items'.format(n))
3+
for i in range(n):
4+
# create dict
5+
d = dict()
6+
for j in range(n):
7+
d[str(j)] = j
8+
print(len(d))
9+
10+
# delete an item
11+
del d[str(i)]
12+
print(len(d))
13+
14+
# check items
15+
for j in range(n):
16+
if str(j) in d:
17+
if j == i:
18+
print(j, 'in d, but it should not be')
19+
else:
20+
if j != i:
21+
print(j, 'not in d, but it should be')

0 commit comments

Comments
 (0)