Skip to content

Commit d741ed4

Browse files
committed
Issue #28427: old keys should not remove new values from
WeakValueDictionary when collecting from another thread.
2 parents 34d0ac8 + e10ca3a commit d741ed4

File tree

7 files changed

+193
-22
lines changed

7 files changed

+193
-22
lines changed

Include/dictobject.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ PyAPI_FUNC(int) PyDict_DelItem(PyObject *mp, PyObject *key);
8888
#ifndef Py_LIMITED_API
8989
PyAPI_FUNC(int) _PyDict_DelItem_KnownHash(PyObject *mp, PyObject *key,
9090
Py_hash_t hash);
91+
PyAPI_FUNC(int) _PyDict_DelItemIf(PyObject *mp, PyObject *key,
92+
int (*predicate)(PyObject *value));
9193
#endif
9294
PyAPI_FUNC(void) PyDict_Clear(PyObject *mp);
9395
PyAPI_FUNC(int) PyDict_Next(

Lib/test/test_weakref.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,6 +1676,18 @@ def test_threaded_weak_valued_pop(self):
16761676
x = d.pop(10, 10)
16771677
self.assertIsNot(x, None) # we never put None in there!
16781678

1679+
def test_threaded_weak_valued_consistency(self):
1680+
# Issue #28427: old keys should not remove new values from
1681+
# WeakValueDictionary when collecting from another thread.
1682+
d = weakref.WeakValueDictionary()
1683+
with collect_in_thread():
1684+
for i in range(200000):
1685+
o = RefCycle()
1686+
d[10] = o
1687+
# o is still alive, so the dict can't be empty
1688+
self.assertEqual(len(d), 1)
1689+
o = None # lose ref
1690+
16791691

16801692
from test import mapping_tests
16811693

Lib/weakref.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
proxy,
1717
CallableProxyType,
1818
ProxyType,
19-
ReferenceType)
19+
ReferenceType,
20+
_remove_dead_weakref)
2021

2122
from _weakrefset import WeakSet, _IterationGuard
2223

@@ -111,7 +112,9 @@ def remove(wr, selfref=ref(self)):
111112
if self._iterating:
112113
self._pending_removals.append(wr.key)
113114
else:
114-
del self.data[wr.key]
115+
# Atomic removal is necessary since this function
116+
# can be called asynchronously by the GC
117+
_remove_dead_weakref(d, wr.key)
115118
self._remove = remove
116119
# A list of keys to be removed
117120
self._pending_removals = []
@@ -125,9 +128,12 @@ def _commit_removals(self):
125128
# We shouldn't encounter any KeyError, because this method should
126129
# always be called *before* mutating the dict.
127130
while l:
128-
del d[l.pop()]
131+
key = l.pop()
132+
_remove_dead_weakref(d, key)
129133

130134
def __getitem__(self, key):
135+
if self._pending_removals:
136+
self._commit_removals()
131137
o = self.data[key]()
132138
if o is None:
133139
raise KeyError(key)
@@ -140,9 +146,13 @@ def __delitem__(self, key):
140146
del self.data[key]
141147

142148
def __len__(self):
143-
return len(self.data) - len(self._pending_removals)
149+
if self._pending_removals:
150+
self._commit_removals()
151+
return len(self.data)
144152

145153
def __contains__(self, key):
154+
if self._pending_removals:
155+
self._commit_removals()
146156
try:
147157
o = self.data[key]()
148158
except KeyError:
@@ -158,6 +168,8 @@ def __setitem__(self, key, value):
158168
self.data[key] = KeyedRef(value, self._remove, key)
159169

160170
def copy(self):
171+
if self._pending_removals:
172+
self._commit_removals()
161173
new = WeakValueDictionary()
162174
for key, wr in self.data.items():
163175
o = wr()
@@ -169,6 +181,8 @@ def copy(self):
169181

170182
def __deepcopy__(self, memo):
171183
from copy import deepcopy
184+
if self._pending_removals:
185+
self._commit_removals()
172186
new = self.__class__()
173187
for key, wr in self.data.items():
174188
o = wr()
@@ -177,6 +191,8 @@ def __deepcopy__(self, memo):
177191
return new
178192

179193
def get(self, key, default=None):
194+
if self._pending_removals:
195+
self._commit_removals()
180196
try:
181197
wr = self.data[key]
182198
except KeyError:
@@ -190,13 +206,17 @@ def get(self, key, default=None):
190206
return o
191207

192208
def items(self):
209+
if self._pending_removals:
210+
self._commit_removals()
193211
with _IterationGuard(self):
194212
for k, wr in self.data.items():
195213
v = wr()
196214
if v is not None:
197215
yield k, v
198216

199217
def keys(self):
218+
if self._pending_removals:
219+
self._commit_removals()
200220
with _IterationGuard(self):
201221
for k, wr in self.data.items():
202222
if wr() is not None:
@@ -214,10 +234,14 @@ def itervaluerefs(self):
214234
keep the values around longer than needed.
215235
216236
"""
237+
if self._pending_removals:
238+
self._commit_removals()
217239
with _IterationGuard(self):
218240
yield from self.data.values()
219241

220242
def values(self):
243+
if self._pending_removals:
244+
self._commit_removals()
221245
with _IterationGuard(self):
222246
for wr in self.data.values():
223247
obj = wr()
@@ -290,6 +314,8 @@ def valuerefs(self):
290314
keep the values around longer than needed.
291315
292316
"""
317+
if self._pending_removals:
318+
self._commit_removals()
293319
return list(self.data.values())
294320

295321

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ Core and Builtins
4040
Library
4141
-------
4242

43+
- Issue #28427: old keys should not remove new values from
44+
WeakValueDictionary when collecting from another thread.
45+
4346
- Issue 28923: Remove editor artifacts from Tix.py.
4447

4548
- Issue #29055: Neaten-up empty population error on random.choice()

Modules/_weakref.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,46 @@ _weakref_getweakrefcount_impl(PyObject *module, PyObject *object)
3535
}
3636

3737

38+
static int
39+
is_dead_weakref(PyObject *value)
40+
{
41+
if (!PyWeakref_Check(value)) {
42+
PyErr_SetString(PyExc_TypeError, "not a weakref");
43+
return -1;
44+
}
45+
return PyWeakref_GET_OBJECT(value) == Py_None;
46+
}
47+
48+
/*[clinic input]
49+
50+
_weakref._remove_dead_weakref -> object
51+
52+
dct: object(subclass_of='&PyDict_Type')
53+
key: object
54+
/
55+
56+
Atomically remove key from dict if it points to a dead weakref.
57+
[clinic start generated code]*/
58+
59+
static PyObject *
60+
_weakref__remove_dead_weakref_impl(PyObject *module, PyObject *dct,
61+
PyObject *key)
62+
/*[clinic end generated code: output=d9ff53061fcb875c input=19fc91f257f96a1d]*/
63+
{
64+
if (_PyDict_DelItemIf(dct, key, is_dead_weakref) < 0) {
65+
if (PyErr_ExceptionMatches(PyExc_KeyError))
66+
/* This function is meant to allow safe weak-value dicts
67+
with GC in another thread (see issue #28427), so it's
68+
ok if the key doesn't exist anymore.
69+
*/
70+
PyErr_Clear();
71+
else
72+
return NULL;
73+
}
74+
Py_RETURN_NONE;
75+
}
76+
77+
3878
PyDoc_STRVAR(weakref_getweakrefs__doc__,
3979
"getweakrefs(object) -- return a list of all weak reference objects\n"
4080
"that point to 'object'.");
@@ -88,6 +128,7 @@ weakref_proxy(PyObject *self, PyObject *args)
88128
static PyMethodDef
89129
weakref_functions[] = {
90130
_WEAKREF_GETWEAKREFCOUNT_METHODDEF
131+
_WEAKREF__REMOVE_DEAD_WEAKREF_METHODDEF
91132
{"getweakrefs", weakref_getweakrefs, METH_O,
92133
weakref_getweakrefs__doc__},
93134
{"proxy", weakref_proxy, METH_VARARGS,

Modules/clinic/_weakref.c.h

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,34 @@ _weakref_getweakrefcount(PyObject *module, PyObject *object)
2929
exit:
3030
return return_value;
3131
}
32-
/*[clinic end generated code: output=e1ad587147323e19 input=a9049054013a1b77]*/
32+
33+
PyDoc_STRVAR(_weakref__remove_dead_weakref__doc__,
34+
"_remove_dead_weakref($module, dct, key, /)\n"
35+
"--\n"
36+
"\n"
37+
"Atomically remove key from dict if it points to a dead weakref.");
38+
39+
#define _WEAKREF__REMOVE_DEAD_WEAKREF_METHODDEF \
40+
{"_remove_dead_weakref", (PyCFunction)_weakref__remove_dead_weakref, METH_VARARGS, _weakref__remove_dead_weakref__doc__},
41+
42+
static PyObject *
43+
_weakref__remove_dead_weakref_impl(PyObject *module, PyObject *dct,
44+
PyObject *key);
45+
46+
static PyObject *
47+
_weakref__remove_dead_weakref(PyObject *module, PyObject *args)
48+
{
49+
PyObject *return_value = NULL;
50+
PyObject *dct;
51+
PyObject *key;
52+
53+
if (!PyArg_ParseTuple(args, "O!O:_remove_dead_weakref",
54+
&PyDict_Type, &dct, &key)) {
55+
goto exit;
56+
}
57+
return_value = _weakref__remove_dead_weakref_impl(module, dct, key);
58+
59+
exit:
60+
return return_value;
61+
}
62+
/*[clinic end generated code: output=e860dd818a44bc9b input=a9049054013a1b77]*/

Objects/dictobject.c

Lines changed: 74 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,11 +1591,34 @@ _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value,
15911591
return insertdict(mp, key, hash, value);
15921592
}
15931593

1594+
static int
1595+
delitem_common(PyDictObject *mp, Py_ssize_t hashpos, Py_ssize_t ix,
1596+
PyObject **value_addr)
1597+
{
1598+
PyObject *old_key, *old_value;
1599+
PyDictKeyEntry *ep;
1600+
1601+
old_value = *value_addr;
1602+
assert(old_value != NULL);
1603+
*value_addr = NULL;
1604+
mp->ma_used--;
1605+
mp->ma_version_tag = DICT_NEXT_VERSION();
1606+
ep = &DK_ENTRIES(mp->ma_keys)[ix];
1607+
dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
1608+
ENSURE_ALLOWS_DELETIONS(mp);
1609+
old_key = ep->me_key;
1610+
ep->me_key = NULL;
1611+
Py_DECREF(old_key);
1612+
Py_DECREF(old_value);
1613+
1614+
assert(_PyDict_CheckConsistency(mp));
1615+
return 0;
1616+
}
1617+
15941618
int
15951619
PyDict_DelItem(PyObject *op, PyObject *key)
15961620
{
15971621
Py_hash_t hash;
1598-
15991622
assert(key);
16001623
if (!PyUnicode_CheckExact(key) ||
16011624
(hash = ((PyASCIIObject *) key)->hash) == -1) {
@@ -1612,8 +1635,6 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
16121635
{
16131636
Py_ssize_t hashpos, ix;
16141637
PyDictObject *mp;
1615-
PyDictKeyEntry *ep;
1616-
PyObject *old_key, *old_value;
16171638
PyObject **value_addr;
16181639

16191640
if (!PyDict_Check(op)) {
@@ -1640,24 +1661,60 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
16401661
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
16411662
assert(ix >= 0);
16421663
}
1664+
return delitem_common(mp, hashpos, ix, value_addr);
1665+
}
16431666

1644-
old_value = *value_addr;
1645-
assert(old_value != NULL);
1646-
*value_addr = NULL;
1647-
mp->ma_used--;
1648-
mp->ma_version_tag = DICT_NEXT_VERSION();
1649-
ep = &DK_ENTRIES(mp->ma_keys)[ix];
1650-
dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
1651-
ENSURE_ALLOWS_DELETIONS(mp);
1652-
old_key = ep->me_key;
1653-
ep->me_key = NULL;
1654-
Py_DECREF(old_key);
1655-
Py_DECREF(old_value);
1667+
/* This function promises that the predicate -> deletion sequence is atomic
1668+
* (i.e. protected by the GIL), assuming the predicate itself doesn't
1669+
* release the GIL.
1670+
*/
1671+
int
1672+
_PyDict_DelItemIf(PyObject *op, PyObject *key,
1673+
int (*predicate)(PyObject *value))
1674+
{
1675+
Py_ssize_t hashpos, ix;
1676+
PyDictObject *mp;
1677+
Py_hash_t hash;
1678+
PyObject **value_addr;
1679+
int res;
16561680

1657-
assert(_PyDict_CheckConsistency(mp));
1658-
return 0;
1681+
if (!PyDict_Check(op)) {
1682+
PyErr_BadInternalCall();
1683+
return -1;
1684+
}
1685+
assert(key);
1686+
hash = PyObject_Hash(key);
1687+
if (hash == -1)
1688+
return -1;
1689+
mp = (PyDictObject *)op;
1690+
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
1691+
if (ix == DKIX_ERROR)
1692+
return -1;
1693+
if (ix == DKIX_EMPTY || *value_addr == NULL) {
1694+
_PyErr_SetKeyError(key);
1695+
return -1;
1696+
}
1697+
assert(dk_get_index(mp->ma_keys, hashpos) == ix);
1698+
1699+
// Split table doesn't allow deletion. Combine it.
1700+
if (_PyDict_HasSplitTable(mp)) {
1701+
if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
1702+
return -1;
1703+
}
1704+
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
1705+
assert(ix >= 0);
1706+
}
1707+
1708+
res = predicate(*value_addr);
1709+
if (res == -1)
1710+
return -1;
1711+
if (res > 0)
1712+
return delitem_common(mp, hashpos, ix, value_addr);
1713+
else
1714+
return 0;
16591715
}
16601716

1717+
16611718
void
16621719
PyDict_Clear(PyObject *op)
16631720
{

0 commit comments

Comments
 (0)