Skip to content

Commit d6cddb9

Browse files
committed
fix: drain _seen_logs drift on hit path and pin FIFO by key identity
Two Copilot review follow-ups on #1717: - ``_mark_seen`` returned early on cache hits, so a drift-overshoot left behind by concurrent inserts (FT or sync-multi-instance callers racing the ``len < cap`` check) only drained on the next miss. A workload that's mostly hits after a contention burst could keep the dict permanently above the cap. Add a drift-drain ``while`` inside the hit branch, gated on ``len > _MAX_SEEN_LOGS`` so steady-state-at-cap hits remain a single ``len`` + compare past the membership check. The shared pop+race-tolerant idiom factors out into a small ``_evict_oldest`` helper invoked only when the outer ``while`` predicate is true, so the hot-path cost is unchanged. Pin the behaviour with ``test_mark_seen_drains_drift_on_hit_path``. - ``tests/test_protocol.py::test_seen_logs_is_bounded`` asserted FIFO eviction via substring matches on the exception text (``"'1.2.3.4', 0)" in k``). That tied the test to the current ``IncomingDecodeError`` message format, so a future normalization of dedup-key shape (as discussed on #1714) would break the test without changing the bounded/eviction behaviour. Snapshot the actual key the parser inserted per port via ``next(reversed(_seen_logs))`` and assert FIFO by key identity. Also pin ``len(set(keys_per_port)) == _MAX_SEEN_LOGS + overflow`` so a regression that dropped ``self.source`` from the exception text (collapsing all calls to one dedup key) still fails the test rather than silently passing — preserving the per-port- unique-key check from the prior Kōan review.
1 parent d3598be commit d6cddb9

3 files changed

Lines changed: 71 additions & 27 deletions

File tree

src/zeroconf/_logger.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,23 @@ def set_logger_level_if_unset() -> None:
4343
_seen_logs: dict[str, None] = {}
4444

4545

46+
def _evict_oldest(seen: dict[str, None]) -> bool:
47+
"""Pop the oldest entry from ``seen``; return False if it raced.
48+
49+
Individual dict ops (``pop`` with a default, ``next``) are atomic
50+
on the free-threaded build, but the compound ``iter`` → ``next``
51+
used to pick the FIFO victim can raise ``RuntimeError`` if
52+
another thread mutates the dict between the two ops. The caller
53+
breaks its drain loop on False so concurrent mutation can't make
54+
it spin.
55+
"""
56+
try:
57+
seen.pop(next(iter(seen)), None)
58+
except (RuntimeError, StopIteration):
59+
return False
60+
return True
61+
62+
4663
def _mark_seen(seen: dict[str, None], key: str) -> bool:
4764
"""Record ``key`` in ``seen`` and return True if it was newly added.
4865
@@ -53,26 +70,22 @@ def _mark_seen(seen: dict[str, None], key: str) -> bool:
5370
5471
The dict is shared across all ``Zeroconf`` instances in the
5572
process; on the free-threaded build (3.14t) and under multi-
56-
instance sync use, callers can race. Individual dict operations
57-
(``in``, ``__setitem__``, ``pop``, ``len``) are atomic in CPython
58-
3.13+ FT and don't crash, but the compound ``iter`` → ``next``
59-
used to find the FIFO victim can raise ``RuntimeError`` if
60-
another thread mutates the dict between the two ops. The
61-
eviction loop drains until ``len(seen) < _MAX_SEEN_LOGS`` so that
62-
any drift accumulated by prior concurrent inserts is corrected by
63-
the next caller, and bails on ``RuntimeError`` (another thread is
64-
already shrinking the set) so we don't spin.
73+
instance sync use, callers can race the ``len < cap`` check and
74+
both insert, leaving the dict transiently above the cap. The
75+
drain loop runs on every call (steady-state-at-cap hits are a
76+
single ``len`` + compare past the membership check because the
77+
helper short-circuits) so a contention burst is corrected by the
78+
next caller regardless of whether it's a hit or a miss.
6579
"""
66-
if key in seen:
67-
return False
68-
while len(seen) >= _MAX_SEEN_LOGS:
69-
try:
70-
oldest = next(iter(seen))
71-
except (RuntimeError, StopIteration):
72-
break
73-
seen.pop(oldest, None)
74-
seen[key] = None
75-
return True
80+
inserting = key not in seen
81+
# Hit (``inserting`` is False): drain only if drifted above cap.
82+
# Miss (``inserting`` is True): drain to ``cap - 1`` to make room
83+
# for the new key. Bool subtracts as 0/1 to pick the right limit.
84+
while len(seen) > _MAX_SEEN_LOGS - inserting and _evict_oldest(seen):
85+
pass
86+
if inserting:
87+
seen[key] = None
88+
return inserting
7689

7790

7891
class QuietLogger:

tests/test_logger.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,27 @@ def test_mark_seen_drains_drift_above_cap() -> None:
126126
assert f"k-{i}" not in seen
127127

128128

129+
def test_mark_seen_drains_drift_on_hit_path() -> None:
130+
"""``_mark_seen`` drains drift even when ``key`` is already cached.
131+
132+
A hit-heavy workload after a contention burst (e.g. the same
133+
exception text deduplicated repeatedly) must still correct the
134+
overshoot — otherwise the dict can sit permanently above the cap
135+
until a miss happens to come along.
136+
"""
137+
seen: dict[str, None] = {}
138+
drift = 10
139+
for i in range(_MAX_SEEN_LOGS + drift):
140+
seen[f"k-{i}"] = None
141+
# Hit on a non-oldest key — survives the drift drain.
142+
hit_key = f"k-{_MAX_SEEN_LOGS}"
143+
assert _mark_seen(seen, hit_key) is False
144+
assert len(seen) == _MAX_SEEN_LOGS
145+
assert hit_key in seen
146+
for i in range(drift):
147+
assert f"k-{i}" not in seen
148+
149+
129150
def test_seen_logs_is_bounded() -> None:
130151
"""``_seen_logs`` stays at the cap and evicts oldest-first (FIFO)."""
131152
_logger._seen_logs.clear()

tests/test_protocol.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -973,17 +973,27 @@ def test_seen_logs_is_bounded():
973973
)
974974
overflow = 5
975975
_incoming_module._seen_logs.clear()
976+
# Snapshot the actual key the parser inserted per port. This is whatever
977+
# ``str(exc_info()[1])`` produces today — the test stays agnostic to the
978+
# exception text format so a future normalization of the message (see
979+
# the discussion on #1714) doesn't break the assertions, while still
980+
# pinning that the parser exception path actually entered the dict.
981+
keys_per_port: list[str] = []
976982
for port in range(_MAX_SEEN_LOGS + overflow):
977983
r.DNSIncoming(packet, ("1.2.3.4", port))
978-
# Bound is hit exactly — confirms the parser exception path actually
979-
# entered the dict with a per-port-unique key; a future change that
980-
# dropped self.source from the exception text would collapse to a
981-
# single dedup key and fail this assertion.
984+
keys_per_port.append(next(reversed(_incoming_module._seen_logs)))
985+
# Bound is hit exactly.
982986
assert len(_incoming_module._seen_logs) == _MAX_SEEN_LOGS
983-
# FIFO eviction: the earliest port's exception string is gone, the
984-
# latest port's is still present.
985-
assert not any("'1.2.3.4', 0)" in k for k in _incoming_module._seen_logs)
986-
assert any(f"'1.2.3.4', {_MAX_SEEN_LOGS + overflow - 1})" in k for k in _incoming_module._seen_logs)
987+
# Each port produced a distinct dedup key — a regression that dropped
988+
# the per-packet-varying component (e.g. self.source) from the exception
989+
# text would collapse all 517 calls to one key and fail this.
990+
assert len(set(keys_per_port)) == _MAX_SEEN_LOGS + overflow
991+
# FIFO eviction by key identity (no substring matching on the message
992+
# format): the earliest ports' keys are gone, the latest ports' remain.
993+
for port in range(overflow):
994+
assert keys_per_port[port] not in _incoming_module._seen_logs
995+
for port in range(_MAX_SEEN_LOGS, _MAX_SEEN_LOGS + overflow):
996+
assert keys_per_port[port] in _incoming_module._seen_logs
987997

988998

989999
def test_label_length_attack():

0 commit comments

Comments
 (0)