From 42220db28d731a1295dbe9c11076587c73925691 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 17 May 2026 18:56:57 -0700 Subject: [PATCH 1/8] fix: bound _seen_logs and stop retaining exc_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #1714. The module-level `_seen_logs` dict in `_protocol/incoming.py` and the class-level `QuietLogger._seen_logs` in `_logger.py` were unbounded caches keyed by `str(sys.exc_info()[1])`, with `sys.exc_info()` itself stored as the value. The exception strings emitted by `_read_name` / `_decode_labels_at_offset` embed peer-controlled fields (`self.source` carries the peer's IP and ephemeral source port) and byte offsets, so each malformed packet variant generated a fresh key. The stored traceback in turn pinned `self.data` (the raw packet bytes), giving a LAN-local attacker ~9 KB of per-key retention with no upper bound. The stored `exc_info` triple was dead state. Its only reader was removed in 4218d75 (Nov 2018) when `logger('Exception occurred:', exc_info=exc_info)` was rewritten to `exc_info=True`; since then the dict has functioned as a `set` with junk values. The integer counters in `log_warning_once` / `log_exception_once` were written but never read either. Switch both `_seen_logs` instances to a plain `set[str]` and clear them once they reach `_MAX_SEEN_LOGS = 256` entries. Logging behaviour is unchanged — first-occurrence warnings/tracebacks still fire, the suppression on subsequent occurrences still works — but the worst-case memory footprint under malformed input is now bounded to a few hundred short exception strings instead of millions of (str, exc_info-triple) pairs. --- src/zeroconf/_logger.py | 56 +++++++++++++----------------- src/zeroconf/_protocol/incoming.py | 14 +++++--- tests/test_logger.py | 19 +++++++--- tests/test_protocol.py | 14 ++++++++ 4 files changed, 61 insertions(+), 42 deletions(-) diff --git a/src/zeroconf/_logger.py b/src/zeroconf/_logger.py index 0d734dfd..bb8a4397 100644 --- a/src/zeroconf/_logger.py +++ b/src/zeroconf/_logger.py @@ -25,7 +25,7 @@ import logging import sys -from typing import Any, ClassVar, cast +from typing import Any, ClassVar log = logging.getLogger(__name__.split(".", maxsplit=1)[0]) log.addHandler(logging.NullHandler()) @@ -39,50 +39,42 @@ def set_logger_level_if_unset() -> None: set_logger_level_if_unset() +_MAX_SEEN_LOGS = 256 + + class QuietLogger: - _seen_logs: ClassVar[dict[str, int | tuple]] = {} + _seen_logs: ClassVar[set[str]] = set() + + @classmethod + def _mark_seen(cls, key: str) -> bool: + """Record ``key`` and return True if it was newly added.""" + if key in cls._seen_logs: + return False + # Keys can carry caller-supplied fields (peer addresses, packet + # offsets); clear when full so a malicious peer can't grow the + # set without bound. + if len(cls._seen_logs) >= _MAX_SEEN_LOGS: + cls._seen_logs.clear() + cls._seen_logs.add(key) + return True @classmethod def log_exception_warning(cls, *logger_data: Any) -> None: - exc_info = sys.exc_info() - exc_str = str(exc_info[1]) - if exc_str not in cls._seen_logs: - # log at warning level the first time this is seen - cls._seen_logs[exc_str] = exc_info - logger = log.warning - else: - logger = log.debug + first_time = cls._mark_seen(str(sys.exc_info()[1])) + logger = log.warning if first_time else log.debug logger(*(logger_data or ["Exception occurred"]), exc_info=True) @classmethod def log_exception_debug(cls, *logger_data: Any) -> None: - log_exc_info = False - exc_info = sys.exc_info() - exc_str = str(exc_info[1]) - if exc_str not in cls._seen_logs: - # log the trace only on the first time - cls._seen_logs[exc_str] = exc_info - log_exc_info = True - log.debug(*(logger_data or ["Exception occurred"]), exc_info=log_exc_info) + first_time = cls._mark_seen(str(sys.exc_info()[1])) + log.debug(*(logger_data or ["Exception occurred"]), exc_info=first_time) @classmethod def log_warning_once(cls, *args: Any) -> None: - msg_str = args[0] - if msg_str not in cls._seen_logs: - cls._seen_logs[msg_str] = 0 - logger = log.warning - else: - logger = log.debug - cls._seen_logs[msg_str] = cast(int, cls._seen_logs[msg_str]) + 1 + logger = log.warning if cls._mark_seen(args[0]) else log.debug logger(*args) @classmethod def log_exception_once(cls, exc: Exception, *args: Any) -> None: - msg_str = args[0] - if msg_str not in cls._seen_logs: - cls._seen_logs[msg_str] = 0 - logger = log.warning - else: - logger = log.debug - cls._seen_logs[msg_str] = cast(int, cls._seen_logs[msg_str]) + 1 + logger = log.warning if cls._mark_seen(args[0]) else log.debug logger(*args, exc_info=exc) diff --git a/src/zeroconf/_protocol/incoming.py b/src/zeroconf/_protocol/incoming.py index 2d977b64..763d8f90 100644 --- a/src/zeroconf/_protocol/incoming.py +++ b/src/zeroconf/_protocol/incoming.py @@ -63,7 +63,8 @@ DECODE_EXCEPTIONS = (IndexError, struct.error, IncomingDecodeError) -_seen_logs: dict[str, int | tuple] = {} +_MAX_SEEN_LOGS = 256 +_seen_logs: set[str] = set() _str = str _int = int @@ -183,11 +184,14 @@ def _initial_parse(self) -> None: @classmethod def _log_exception_debug(cls, *logger_data: Any) -> None: log_exc_info = False - exc_info = sys.exc_info() - exc_str = str(exc_info[1]) + exc_str = str(sys.exc_info()[1]) if exc_str not in _seen_logs: - # log the trace only on the first time - _seen_logs[exc_str] = exc_info + # The dedup key embeds attacker-controlled fields (peer IP/port, + # byte offsets); clear when full so a malicious peer can't grow + # the set without bound. + if len(_seen_logs) >= _MAX_SEEN_LOGS: + _seen_logs.clear() + _seen_logs.add(exc_str) log_exc_info = True log.debug(*(logger_data or ["Exception occurred"]), exc_info=log_exc_info) diff --git a/tests/test_logger.py b/tests/test_logger.py index 4e09aa3b..39c73c62 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -5,7 +5,7 @@ import logging from unittest.mock import call, patch -from zeroconf._logger import QuietLogger, set_logger_level_if_unset +from zeroconf._logger import _MAX_SEEN_LOGS, QuietLogger, set_logger_level_if_unset def test_loading_logger(): @@ -25,7 +25,7 @@ def test_loading_logger(): def test_log_warning_once(): """Test we only log with warning level once.""" - QuietLogger._seen_logs = {} + QuietLogger._seen_logs = set() quiet_logger = QuietLogger() with ( patch("zeroconf._logger.log.warning") as mock_log_warning, @@ -48,7 +48,7 @@ def test_log_warning_once(): def test_log_exception_warning(): """Test we only log with warning level once.""" - QuietLogger._seen_logs = {} + QuietLogger._seen_logs = set() quiet_logger = QuietLogger() with ( patch("zeroconf._logger.log.warning") as mock_log_warning, @@ -71,7 +71,7 @@ def test_log_exception_warning(): def test_llog_exception_debug(): """Test we only log with a trace once.""" - QuietLogger._seen_logs = {} + QuietLogger._seen_logs = set() quiet_logger = QuietLogger() with patch("zeroconf._logger.log.debug") as mock_log_debug: quiet_logger.log_exception_debug("the exception") @@ -84,9 +84,18 @@ def test_llog_exception_debug(): assert mock_log_debug.mock_calls == [call("the exception", exc_info=False)] +def test_seen_logs_is_bounded(): + """Distinct keys must not grow ``_seen_logs`` without bound.""" + QuietLogger._seen_logs = set() + with patch("zeroconf._logger.log.warning"), patch("zeroconf._logger.log.debug"): + for i in range(_MAX_SEEN_LOGS + 5): + QuietLogger.log_warning_once(f"warning-{i}") + assert len(QuietLogger._seen_logs) <= _MAX_SEEN_LOGS + + def test_log_exception_once(): """Test we only log with warning level once.""" - QuietLogger._seen_logs = {} + QuietLogger._seen_logs = set() quiet_logger = QuietLogger() exc = Exception() with ( diff --git a/tests/test_protocol.py b/tests/test_protocol.py index edd87c2e..b00e7dd9 100644 --- a/tests/test_protocol.py +++ b/tests/test_protocol.py @@ -14,6 +14,7 @@ import zeroconf as r from zeroconf import DNSHinfo, DNSIncoming, DNSText, const, current_time_millis +from zeroconf._protocol import incoming as _incoming_module from . import has_working_ipv6 @@ -962,6 +963,19 @@ def test_dns_compression_generic_failure(caplog): assert "Received invalid packet from ('1.2.3.4', 5353)" in caplog.text +def test_seen_logs_is_bounded(): + """Corrupt packets from varying peers must not grow _seen_logs without bound.""" + packet = ( + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x06domain\x05local\x00\x00\x01" + b"\x80\x01\x00\x00\x00\x01\x00\x04\xc0\xa8\xd0\x05-\x0c\x00\x01\x80\x01\x00\x00" + b"\x00\x01\x00\x04\xc0\xa8\xd0\x06" + ) + _incoming_module._seen_logs.clear() + for port in range(_incoming_module._MAX_SEEN_LOGS + 5): + r.DNSIncoming(packet, ("1.2.3.4", port)) + assert len(_incoming_module._seen_logs) <= _incoming_module._MAX_SEEN_LOGS + + def test_label_length_attack(): """Test our wire parser does not loop forever when the name exceeds 253 chars.""" packet = ( From 85ac2ae21d5adc33cc690dc45d5d97729ece67ab Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 17 May 2026 19:11:40 -0700 Subject: [PATCH 2/8] refactor: share _mark_seen helper between _logger and _protocol.incoming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lift the bound-check + eviction logic out of the four QuietLogger classmethods and the parser-side `_log_exception_debug` and into a single `_mark_seen(seen, key)` in `_logger.py`. Both call sites still keep their own `_seen_logs` set (no behavior change vs the previous commit — there are still two independent dedup pools, each capped at `_MAX_SEEN_LOGS`); only the eviction policy is now defined in one place. `_MAX_SEEN_LOGS` likewise moves to `_logger.py` only. --- src/zeroconf/_logger.py | 38 +++++++++++++++--------------- src/zeroconf/_protocol/incoming.py | 14 ++--------- tests/test_logger.py | 13 +++++----- tests/test_protocol.py | 5 ++-- 4 files changed, 31 insertions(+), 39 deletions(-) diff --git a/src/zeroconf/_logger.py b/src/zeroconf/_logger.py index bb8a4397..8a6c49ab 100644 --- a/src/zeroconf/_logger.py +++ b/src/zeroconf/_logger.py @@ -25,7 +25,7 @@ import logging import sys -from typing import Any, ClassVar +from typing import Any log = logging.getLogger(__name__.split(".", maxsplit=1)[0]) log.addHandler(logging.NullHandler()) @@ -40,41 +40,41 @@ def set_logger_level_if_unset() -> None: _MAX_SEEN_LOGS = 256 +_seen_logs: set[str] = set() -class QuietLogger: - _seen_logs: ClassVar[set[str]] = set() +def _mark_seen(seen: set[str], key: str) -> bool: + """Record ``key`` in ``seen`` and return True if it was newly added. - @classmethod - def _mark_seen(cls, key: str) -> bool: - """Record ``key`` and return True if it was newly added.""" - if key in cls._seen_logs: - return False - # Keys can carry caller-supplied fields (peer addresses, packet - # offsets); clear when full so a malicious peer can't grow the - # set without bound. - if len(cls._seen_logs) >= _MAX_SEEN_LOGS: - cls._seen_logs.clear() - cls._seen_logs.add(key) - return True + Bounds the set so callers passing attacker-influenced keys (peer + addresses, packet offsets) cannot grow it without bound. + """ + if key in seen: + return False + if len(seen) >= _MAX_SEEN_LOGS: + seen.clear() + seen.add(key) + return True + +class QuietLogger: @classmethod def log_exception_warning(cls, *logger_data: Any) -> None: - first_time = cls._mark_seen(str(sys.exc_info()[1])) + first_time = _mark_seen(_seen_logs, str(sys.exc_info()[1])) logger = log.warning if first_time else log.debug logger(*(logger_data or ["Exception occurred"]), exc_info=True) @classmethod def log_exception_debug(cls, *logger_data: Any) -> None: - first_time = cls._mark_seen(str(sys.exc_info()[1])) + first_time = _mark_seen(_seen_logs, str(sys.exc_info()[1])) log.debug(*(logger_data or ["Exception occurred"]), exc_info=first_time) @classmethod def log_warning_once(cls, *args: Any) -> None: - logger = log.warning if cls._mark_seen(args[0]) else log.debug + logger = log.warning if _mark_seen(_seen_logs, args[0]) else log.debug logger(*args) @classmethod def log_exception_once(cls, exc: Exception, *args: Any) -> None: - logger = log.warning if cls._mark_seen(args[0]) else log.debug + logger = log.warning if _mark_seen(_seen_logs, args[0]) else log.debug logger(*args, exc_info=exc) diff --git a/src/zeroconf/_protocol/incoming.py b/src/zeroconf/_protocol/incoming.py index 763d8f90..b9eab964 100644 --- a/src/zeroconf/_protocol/incoming.py +++ b/src/zeroconf/_protocol/incoming.py @@ -37,7 +37,7 @@ DNSText, ) from .._exceptions import IncomingDecodeError -from .._logger import log +from .._logger import _mark_seen, log from .._utils.time import current_time_millis from ..const import ( _FLAGS_QR_MASK, @@ -63,7 +63,6 @@ DECODE_EXCEPTIONS = (IndexError, struct.error, IncomingDecodeError) -_MAX_SEEN_LOGS = 256 _seen_logs: set[str] = set() _str = str _int = int @@ -183,16 +182,7 @@ def _initial_parse(self) -> None: @classmethod def _log_exception_debug(cls, *logger_data: Any) -> None: - log_exc_info = False - exc_str = str(sys.exc_info()[1]) - if exc_str not in _seen_logs: - # The dedup key embeds attacker-controlled fields (peer IP/port, - # byte offsets); clear when full so a malicious peer can't grow - # the set without bound. - if len(_seen_logs) >= _MAX_SEEN_LOGS: - _seen_logs.clear() - _seen_logs.add(exc_str) - log_exc_info = True + log_exc_info = _mark_seen(_seen_logs, str(sys.exc_info()[1])) log.debug(*(logger_data or ["Exception occurred"]), exc_info=log_exc_info) def answers(self) -> list[DNSRecord]: diff --git a/tests/test_logger.py b/tests/test_logger.py index 39c73c62..404ef761 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -5,6 +5,7 @@ import logging from unittest.mock import call, patch +from zeroconf import _logger from zeroconf._logger import _MAX_SEEN_LOGS, QuietLogger, set_logger_level_if_unset @@ -25,7 +26,7 @@ def test_loading_logger(): def test_log_warning_once(): """Test we only log with warning level once.""" - QuietLogger._seen_logs = set() + _logger._seen_logs.clear() quiet_logger = QuietLogger() with ( patch("zeroconf._logger.log.warning") as mock_log_warning, @@ -48,7 +49,7 @@ def test_log_warning_once(): def test_log_exception_warning(): """Test we only log with warning level once.""" - QuietLogger._seen_logs = set() + _logger._seen_logs.clear() quiet_logger = QuietLogger() with ( patch("zeroconf._logger.log.warning") as mock_log_warning, @@ -71,7 +72,7 @@ def test_log_exception_warning(): def test_llog_exception_debug(): """Test we only log with a trace once.""" - QuietLogger._seen_logs = set() + _logger._seen_logs.clear() quiet_logger = QuietLogger() with patch("zeroconf._logger.log.debug") as mock_log_debug: quiet_logger.log_exception_debug("the exception") @@ -86,16 +87,16 @@ def test_llog_exception_debug(): def test_seen_logs_is_bounded(): """Distinct keys must not grow ``_seen_logs`` without bound.""" - QuietLogger._seen_logs = set() + _logger._seen_logs.clear() with patch("zeroconf._logger.log.warning"), patch("zeroconf._logger.log.debug"): for i in range(_MAX_SEEN_LOGS + 5): QuietLogger.log_warning_once(f"warning-{i}") - assert len(QuietLogger._seen_logs) <= _MAX_SEEN_LOGS + assert len(_logger._seen_logs) <= _MAX_SEEN_LOGS def test_log_exception_once(): """Test we only log with warning level once.""" - QuietLogger._seen_logs = set() + _logger._seen_logs.clear() quiet_logger = QuietLogger() exc = Exception() with ( diff --git a/tests/test_protocol.py b/tests/test_protocol.py index b00e7dd9..0216a6d0 100644 --- a/tests/test_protocol.py +++ b/tests/test_protocol.py @@ -14,6 +14,7 @@ import zeroconf as r from zeroconf import DNSHinfo, DNSIncoming, DNSText, const, current_time_millis +from zeroconf._logger import _MAX_SEEN_LOGS from zeroconf._protocol import incoming as _incoming_module from . import has_working_ipv6 @@ -971,9 +972,9 @@ def test_seen_logs_is_bounded(): b"\x00\x01\x00\x04\xc0\xa8\xd0\x06" ) _incoming_module._seen_logs.clear() - for port in range(_incoming_module._MAX_SEEN_LOGS + 5): + for port in range(_MAX_SEEN_LOGS + 5): r.DNSIncoming(packet, ("1.2.3.4", port)) - assert len(_incoming_module._seen_logs) <= _incoming_module._MAX_SEEN_LOGS + assert len(_incoming_module._seen_logs) <= _MAX_SEEN_LOGS def test_label_length_attack(): From ba5de3a22f3c1935d422b0231203c81e288441a1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 17 May 2026 19:14:39 -0700 Subject: [PATCH 3/8] refactor: switch _mark_seen eviction from clear() to pop() and raise cap to 512 Smooth out the overflow path: instead of dropping the entire set at once when it hits the bound (which then re-emits warning-level logs for every previously-deduped key on its next occurrence), evict one arbitrary entry per insert past the bound. The cap effectively becomes a recency window of size `_MAX_SEEN_LOGS`. Also raises the bound from 256 to 512. Memory cost stays trivial (~40 KB of short exception strings), but under sustained malformed- packet pressure ops sees a steady trickle of new warnings rather than a burst of re-emissions every few hundred packets. --- src/zeroconf/_logger.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/zeroconf/_logger.py b/src/zeroconf/_logger.py index 8a6c49ab..b9cd39f0 100644 --- a/src/zeroconf/_logger.py +++ b/src/zeroconf/_logger.py @@ -39,7 +39,7 @@ def set_logger_level_if_unset() -> None: set_logger_level_if_unset() -_MAX_SEEN_LOGS = 256 +_MAX_SEEN_LOGS = 512 _seen_logs: set[str] = set() @@ -47,12 +47,14 @@ def _mark_seen(seen: set[str], key: str) -> bool: """Record ``key`` in ``seen`` and return True if it was newly added. Bounds the set so callers passing attacker-influenced keys (peer - addresses, packet offsets) cannot grow it without bound. + addresses, packet offsets) cannot grow it without bound. Evicts + one arbitrary entry per overflow so warning-level re-emissions + stay smooth rather than arriving in bursts. """ if key in seen: return False if len(seen) >= _MAX_SEEN_LOGS: - seen.clear() + seen.pop() seen.add(key) return True From 4cb3ecfc4734692f8743e7021652f88db6a34510 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 17 May 2026 19:17:23 -0700 Subject: [PATCH 4/8] refactor: switch _seen_logs to dict[str, None] for FIFO eviction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `set.pop()` removes an arbitrary element (hash-bucket order), not the oldest one — the "recency window" framing in ba5de3a was misleading. Switch the dedup containers to insertion-ordered `dict[str, None]` and evict via `del seen[next(iter(seen))]` so eviction is truly FIFO: the oldest entry is dropped per overflow, and `_MAX_SEEN_LOGS` becomes a genuine recency window. The eviction-path delta vs set+pop (~165 vs ~77 ns/call in a microbench) is dwarfed by the `str(sys.exc_info()[1])` interpolation on the calling path; this only runs when a packet has already failed to parse. --- src/zeroconf/_logger.py | 14 +++++++------- src/zeroconf/_protocol/incoming.py | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/zeroconf/_logger.py b/src/zeroconf/_logger.py index b9cd39f0..0936ebdb 100644 --- a/src/zeroconf/_logger.py +++ b/src/zeroconf/_logger.py @@ -40,22 +40,22 @@ def set_logger_level_if_unset() -> None: _MAX_SEEN_LOGS = 512 -_seen_logs: set[str] = set() +_seen_logs: dict[str, None] = {} -def _mark_seen(seen: set[str], key: str) -> bool: +def _mark_seen(seen: dict[str, None], key: str) -> bool: """Record ``key`` in ``seen`` and return True if it was newly added. - Bounds the set so callers passing attacker-influenced keys (peer + Bounds the dict so callers passing attacker-influenced keys (peer addresses, packet offsets) cannot grow it without bound. Evicts - one arbitrary entry per overflow so warning-level re-emissions - stay smooth rather than arriving in bursts. + the oldest entry per overflow (dict preserves insertion order on + Python 3.7+), so ``_MAX_SEEN_LOGS`` is a recency window. """ if key in seen: return False if len(seen) >= _MAX_SEEN_LOGS: - seen.pop() - seen.add(key) + del seen[next(iter(seen))] + seen[key] = None return True diff --git a/src/zeroconf/_protocol/incoming.py b/src/zeroconf/_protocol/incoming.py index b9eab964..20e319f3 100644 --- a/src/zeroconf/_protocol/incoming.py +++ b/src/zeroconf/_protocol/incoming.py @@ -63,7 +63,7 @@ DECODE_EXCEPTIONS = (IndexError, struct.error, IncomingDecodeError) -_seen_logs: set[str] = set() +_seen_logs: dict[str, None] = {} _str = str _int = int From 9a643536a61dc7af44a81c2ee956ae2da6d70f6f Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 17 May 2026 19:20:24 -0700 Subject: [PATCH 5/8] test: pin _mark_seen FIFO eviction and add CodSpeed bench MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tighten ``test_seen_logs_is_bounded`` to assert the cap is held exactly and that the oldest 5 keys are evicted first while the newest overflow keys remain — without this, the test would have passed for ``set.pop()`` (random eviction) too, hiding any future regression away from dict-backed FIFO. Also add ``tests/benchmarks/test_mark_seen.py`` so CodSpeed tracks the three branches (hit / fill / churn) of the helper. --- tests/benchmarks/test_mark_seen.py | 39 ++++++++++++++++++++++++++++++ tests/test_logger.py | 13 +++++++--- 2 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 tests/benchmarks/test_mark_seen.py diff --git a/tests/benchmarks/test_mark_seen.py b/tests/benchmarks/test_mark_seen.py new file mode 100644 index 00000000..4f82da8c --- /dev/null +++ b/tests/benchmarks/test_mark_seen.py @@ -0,0 +1,39 @@ +"""Benchmark for _logger._mark_seen.""" + +from __future__ import annotations + +from pytest_codspeed import BenchmarkFixture + +from zeroconf._logger import _MAX_SEEN_LOGS, _mark_seen + + +def test_mark_seen_hit(benchmark: BenchmarkFixture) -> None: + """Benchmark the cache-hit path (same key repeated).""" + seen: dict[str, None] = {"warm": None} + + @benchmark + def _hit() -> None: + for _ in range(1000): + _mark_seen(seen, "warm") + + +def test_mark_seen_fill(benchmark: BenchmarkFixture) -> None: + """Benchmark filling from empty up to the cap (no evictions).""" + keys = [f"key-{i}" for i in range(_MAX_SEEN_LOGS)] + + @benchmark + def _fill() -> None: + seen: dict[str, None] = {} + for k in keys: + _mark_seen(seen, k) + + +def test_mark_seen_churn(benchmark: BenchmarkFixture) -> None: + """Benchmark sustained eviction (every call past the cap drops oldest).""" + keys = [f"churn-{i}" for i in range(_MAX_SEEN_LOGS * 4)] + + @benchmark + def _churn() -> None: + seen: dict[str, None] = {} + for k in keys: + _mark_seen(seen, k) diff --git a/tests/test_logger.py b/tests/test_logger.py index 404ef761..9246c18c 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -85,13 +85,18 @@ def test_llog_exception_debug(): assert mock_log_debug.mock_calls == [call("the exception", exc_info=False)] -def test_seen_logs_is_bounded(): - """Distinct keys must not grow ``_seen_logs`` without bound.""" +def test_seen_logs_is_bounded() -> None: + """``_seen_logs`` stays at the cap and evicts oldest-first (FIFO).""" _logger._seen_logs.clear() + overflow = 5 with patch("zeroconf._logger.log.warning"), patch("zeroconf._logger.log.debug"): - for i in range(_MAX_SEEN_LOGS + 5): + for i in range(_MAX_SEEN_LOGS + overflow): QuietLogger.log_warning_once(f"warning-{i}") - assert len(_logger._seen_logs) <= _MAX_SEEN_LOGS + assert len(_logger._seen_logs) == _MAX_SEEN_LOGS + for i in range(overflow): + assert f"warning-{i}" not in _logger._seen_logs + for i in range(_MAX_SEEN_LOGS, _MAX_SEEN_LOGS + overflow): + assert f"warning-{i}" in _logger._seen_logs def test_log_exception_once(): From ebe1ab21862ef9be1700fc1ffa8ceca4dbd1c367 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 17 May 2026 19:50:41 -0700 Subject: [PATCH 6/8] fix: make _mark_seen safe under free-threaded contention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review feedback on #1717. The previous eviction body — ``del seen[next(iter(seen))]`` — relied on the GIL to serialize the compound iter/next/del. Under the free-threaded build (3.14t) and under multi-instance sync use where multiple ``Zeroconf`` instances share the module-level ``_seen_logs``, callers can race: - Two threads pop the same victim — ``del`` raises ``KeyError`` on the loser - One thread mutates the dict between another's ``iter()`` and ``next()`` — the iterator's mutation-count check raises ``RuntimeError`` ("dictionary changed size during iteration") Switch to ``seen.pop(next(iter(seen), None), None)`` so the pop is idempotent (no ``KeyError``) and the iter start handles the empty- dict edge case (no ``StopIteration``), and wrap the iter/next in a ``try/except RuntimeError`` so concurrent mutation during eviction is absorbed. Worst case under contention is a transient overshoot of the cap by one entry per racing thread, which clears as soon as the contention does. Add ``test_mark_seen_absorbs_runtime_error_during_eviction`` which substitutes a dict subclass whose ``__iter__`` always raises, proving the new ``except`` branch lets the insert still complete. Also tighten ``tests/test_protocol.py::test_seen_logs_is_bounded`` per Kōan's review comment: previously asserted ``<= _MAX_SEEN_LOGS``, which would still pass if a future refactor collapsed the per-port keys to a single dedup string. Now asserts ``== _MAX_SEEN_LOGS`` and verifies port 0's exception text is gone while the highest port's is still present — pins both the bound and that the parser exception path actually enters the dict with per-source-unique keys. --- src/zeroconf/_logger.py | 18 +++++++++++++++++- tests/test_logger.py | 22 +++++++++++++++++++++- tests/test_protocol.py | 15 ++++++++++++--- 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/zeroconf/_logger.py b/src/zeroconf/_logger.py index 0936ebdb..2f3cd731 100644 --- a/src/zeroconf/_logger.py +++ b/src/zeroconf/_logger.py @@ -50,11 +50,27 @@ def _mark_seen(seen: dict[str, None], key: str) -> bool: addresses, packet offsets) cannot grow it without bound. Evicts the oldest entry per overflow (dict preserves insertion order on Python 3.7+), so ``_MAX_SEEN_LOGS`` is a recency window. + + The dict is shared across all ``Zeroconf`` instances in the + process; on the free-threaded build (3.14t) and under multi- + instance sync use, callers can race. Individual dict operations + (``in``, ``__setitem__``, ``pop``, ``len``) are atomic in CPython + 3.13+ FT and don't crash, but the compound ``iter`` → ``next`` + used to find the FIFO victim can raise ``RuntimeError`` if + another thread mutates the dict between the two ops. Catch and + skip — the other thread is already shrinking the set, so missing + one eviction here just lets the cap drift up by one entry per + racing thread until contention clears. """ if key in seen: return False if len(seen) >= _MAX_SEEN_LOGS: - del seen[next(iter(seen))] + try: + oldest = next(iter(seen), None) + except RuntimeError: + oldest = None + if oldest is not None: + seen.pop(oldest, None) seen[key] = None return True diff --git a/tests/test_logger.py b/tests/test_logger.py index 9246c18c..1f165e08 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -6,7 +6,7 @@ from unittest.mock import call, patch from zeroconf import _logger -from zeroconf._logger import _MAX_SEEN_LOGS, QuietLogger, set_logger_level_if_unset +from zeroconf._logger import _MAX_SEEN_LOGS, QuietLogger, _mark_seen, set_logger_level_if_unset def test_loading_logger(): @@ -85,6 +85,26 @@ def test_llog_exception_debug(): assert mock_log_debug.mock_calls == [call("the exception", exc_info=False)] +def test_mark_seen_absorbs_runtime_error_during_eviction() -> None: + """Concurrent mutation can make ``iter(seen)`` raise ``RuntimeError``. + + Free-threaded (3.14t) and multi-instance sync callers share + ``_seen_logs``; if another thread mutates it between ``iter()`` + and ``next()`` the iterator raises ``RuntimeError``. + ``_mark_seen`` must absorb that and still insert the new key. + """ + + class RacyDict(dict[str, None]): + def __iter__(self): # type: ignore[override] + raise RuntimeError("dictionary changed size during iteration") + + seen: dict[str, None] = RacyDict() + for i in range(_MAX_SEEN_LOGS): + seen[f"k-{i}"] = None + assert _mark_seen(seen, "new-key") is True + assert "new-key" in seen + + def test_seen_logs_is_bounded() -> None: """``_seen_logs`` stays at the cap and evicts oldest-first (FIFO).""" _logger._seen_logs.clear() diff --git a/tests/test_protocol.py b/tests/test_protocol.py index 0216a6d0..42547775 100644 --- a/tests/test_protocol.py +++ b/tests/test_protocol.py @@ -965,16 +965,25 @@ def test_dns_compression_generic_failure(caplog): def test_seen_logs_is_bounded(): - """Corrupt packets from varying peers must not grow _seen_logs without bound.""" + """Corrupt packets from varying peers fill ``_seen_logs`` exactly to the cap.""" packet = ( b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x06domain\x05local\x00\x00\x01" b"\x80\x01\x00\x00\x00\x01\x00\x04\xc0\xa8\xd0\x05-\x0c\x00\x01\x80\x01\x00\x00" b"\x00\x01\x00\x04\xc0\xa8\xd0\x06" ) + overflow = 5 _incoming_module._seen_logs.clear() - for port in range(_MAX_SEEN_LOGS + 5): + for port in range(_MAX_SEEN_LOGS + overflow): r.DNSIncoming(packet, ("1.2.3.4", port)) - assert len(_incoming_module._seen_logs) <= _MAX_SEEN_LOGS + # Bound is hit exactly — confirms the parser exception path actually + # entered the dict with a per-port-unique key; a future change that + # dropped self.source from the exception text would collapse to a + # single dedup key and fail this assertion. + assert len(_incoming_module._seen_logs) == _MAX_SEEN_LOGS + # FIFO eviction: the earliest port's exception string is gone, the + # latest port's is still present. + assert not any("'1.2.3.4', 0)" in k for k in _incoming_module._seen_logs) + assert any(f"'1.2.3.4', {_MAX_SEEN_LOGS + overflow - 1})" in k for k in _incoming_module._seen_logs) def test_label_length_attack(): From 0e25328e31d68e568976c749ab9de256b620e846 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 17 May 2026 20:00:32 -0700 Subject: [PATCH 7/8] fix: drain _seen_logs in a while loop so drift can't inflate the cap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Under free-threaded (3.14t) contention or sustained multi-instance sync use, two callers can both pass the ``len(seen) < _MAX_SEEN_LOGS`` check and both insert before either evicts — leaving the dict transiently above the cap. The previous ``if``-based eviction removed one entry on the next call, but the new insert added one back, so the overshoot never drained and the dict silently grew with thread count. Switch the eviction to ``while len(seen) >= _MAX_SEEN_LOGS:`` so a single non-racing caller drains the accumulated drift back to the cap. Bail on ``RuntimeError`` and ``StopIteration`` so a concurrent mutation can't make the loop spin. Add ``test_mark_seen_drains_drift_above_cap`` to pin the drain behaviour — a regression to single-eviction would leave the dict at ``cap + drift - 1`` and fail the assertion. --- src/zeroconf/_logger.py | 22 +++++++++++----------- tests/test_logger.py | 21 +++++++++++++++++++++ 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/zeroconf/_logger.py b/src/zeroconf/_logger.py index 2f3cd731..379b33f6 100644 --- a/src/zeroconf/_logger.py +++ b/src/zeroconf/_logger.py @@ -48,7 +48,7 @@ def _mark_seen(seen: dict[str, None], key: str) -> bool: Bounds the dict so callers passing attacker-influenced keys (peer addresses, packet offsets) cannot grow it without bound. Evicts - the oldest entry per overflow (dict preserves insertion order on + the oldest entries on overflow (dict preserves insertion order on Python 3.7+), so ``_MAX_SEEN_LOGS`` is a recency window. The dict is shared across all ``Zeroconf`` instances in the @@ -57,20 +57,20 @@ def _mark_seen(seen: dict[str, None], key: str) -> bool: (``in``, ``__setitem__``, ``pop``, ``len``) are atomic in CPython 3.13+ FT and don't crash, but the compound ``iter`` → ``next`` used to find the FIFO victim can raise ``RuntimeError`` if - another thread mutates the dict between the two ops. Catch and - skip — the other thread is already shrinking the set, so missing - one eviction here just lets the cap drift up by one entry per - racing thread until contention clears. + another thread mutates the dict between the two ops. The + eviction loop drains until ``len(seen) < _MAX_SEEN_LOGS`` so that + any drift accumulated by prior concurrent inserts is corrected by + the next caller, and bails on ``RuntimeError`` (another thread is + already shrinking the set) so we don't spin. """ if key in seen: return False - if len(seen) >= _MAX_SEEN_LOGS: + while len(seen) >= _MAX_SEEN_LOGS: try: - oldest = next(iter(seen), None) - except RuntimeError: - oldest = None - if oldest is not None: - seen.pop(oldest, None) + oldest = next(iter(seen)) + except (RuntimeError, StopIteration): + break + seen.pop(oldest, None) seen[key] = None return True diff --git a/tests/test_logger.py b/tests/test_logger.py index 1f165e08..54bba8a0 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -105,6 +105,27 @@ def __iter__(self): # type: ignore[override] assert "new-key" in seen +def test_mark_seen_drains_drift_above_cap() -> None: + """``_mark_seen`` drains a drifted-over-cap dict back to the cap. + + Concurrent inserts on the free-threaded build can leave the dict + transiently above ``_MAX_SEEN_LOGS`` (e.g. two threads both passed + the ``len < cap`` check and both inserted). The next non-racing + call must drain the accumulated overshoot, not just evict one + entry — otherwise the cap silently inflates with thread count. + """ + seen: dict[str, None] = {} + drift = 10 + for i in range(_MAX_SEEN_LOGS + drift): + seen[f"k-{i}"] = None + assert len(seen) == _MAX_SEEN_LOGS + drift + assert _mark_seen(seen, "new-key") is True + assert len(seen) == _MAX_SEEN_LOGS + assert "new-key" in seen + for i in range(drift + 1): + assert f"k-{i}" not in seen + + def test_seen_logs_is_bounded() -> None: """``_seen_logs`` stays at the cap and evicts oldest-first (FIFO).""" _logger._seen_logs.clear() From d6cddb9e559b2613db32fda05a0d0c07005b495e Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 17 May 2026 20:27:22 -0700 Subject: [PATCH 8/8] fix: drain _seen_logs drift on hit path and pin FIFO by key identity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two Copilot review follow-ups on #1717: - ``_mark_seen`` returned early on cache hits, so a drift-overshoot left behind by concurrent inserts (FT or sync-multi-instance callers racing the ``len < cap`` check) only drained on the next miss. A workload that's mostly hits after a contention burst could keep the dict permanently above the cap. Add a drift-drain ``while`` inside the hit branch, gated on ``len > _MAX_SEEN_LOGS`` so steady-state-at-cap hits remain a single ``len`` + compare past the membership check. The shared pop+race-tolerant idiom factors out into a small ``_evict_oldest`` helper invoked only when the outer ``while`` predicate is true, so the hot-path cost is unchanged. Pin the behaviour with ``test_mark_seen_drains_drift_on_hit_path``. - ``tests/test_protocol.py::test_seen_logs_is_bounded`` asserted FIFO eviction via substring matches on the exception text (``"'1.2.3.4', 0)" in k``). That tied the test to the current ``IncomingDecodeError`` message format, so a future normalization of dedup-key shape (as discussed on #1714) would break the test without changing the bounded/eviction behaviour. Snapshot the actual key the parser inserted per port via ``next(reversed(_seen_logs))`` and assert FIFO by key identity. Also pin ``len(set(keys_per_port)) == _MAX_SEEN_LOGS + overflow`` so a regression that dropped ``self.source`` from the exception text (collapsing all calls to one dedup key) still fails the test rather than silently passing — preserving the per-port- unique-key check from the prior Kōan review. --- src/zeroconf/_logger.py | 51 ++++++++++++++++++++++++++--------------- tests/test_logger.py | 21 +++++++++++++++++ tests/test_protocol.py | 26 ++++++++++++++------- 3 files changed, 71 insertions(+), 27 deletions(-) diff --git a/src/zeroconf/_logger.py b/src/zeroconf/_logger.py index 379b33f6..99990cf6 100644 --- a/src/zeroconf/_logger.py +++ b/src/zeroconf/_logger.py @@ -43,6 +43,23 @@ def set_logger_level_if_unset() -> None: _seen_logs: dict[str, None] = {} +def _evict_oldest(seen: dict[str, None]) -> bool: + """Pop the oldest entry from ``seen``; return False if it raced. + + Individual dict ops (``pop`` with a default, ``next``) are atomic + on the free-threaded build, but the compound ``iter`` → ``next`` + used to pick the FIFO victim can raise ``RuntimeError`` if + another thread mutates the dict between the two ops. The caller + breaks its drain loop on False so concurrent mutation can't make + it spin. + """ + try: + seen.pop(next(iter(seen)), None) + except (RuntimeError, StopIteration): + return False + return True + + def _mark_seen(seen: dict[str, None], key: str) -> bool: """Record ``key`` in ``seen`` and return True if it was newly added. @@ -53,26 +70,22 @@ def _mark_seen(seen: dict[str, None], key: str) -> bool: The dict is shared across all ``Zeroconf`` instances in the process; on the free-threaded build (3.14t) and under multi- - instance sync use, callers can race. Individual dict operations - (``in``, ``__setitem__``, ``pop``, ``len``) are atomic in CPython - 3.13+ FT and don't crash, but the compound ``iter`` → ``next`` - used to find the FIFO victim can raise ``RuntimeError`` if - another thread mutates the dict between the two ops. The - eviction loop drains until ``len(seen) < _MAX_SEEN_LOGS`` so that - any drift accumulated by prior concurrent inserts is corrected by - the next caller, and bails on ``RuntimeError`` (another thread is - already shrinking the set) so we don't spin. + instance sync use, callers can race the ``len < cap`` check and + both insert, leaving the dict transiently above the cap. The + drain loop runs on every call (steady-state-at-cap hits are a + single ``len`` + compare past the membership check because the + helper short-circuits) so a contention burst is corrected by the + next caller regardless of whether it's a hit or a miss. """ - if key in seen: - return False - while len(seen) >= _MAX_SEEN_LOGS: - try: - oldest = next(iter(seen)) - except (RuntimeError, StopIteration): - break - seen.pop(oldest, None) - seen[key] = None - return True + inserting = key not in seen + # Hit (``inserting`` is False): drain only if drifted above cap. + # Miss (``inserting`` is True): drain to ``cap - 1`` to make room + # for the new key. Bool subtracts as 0/1 to pick the right limit. + while len(seen) > _MAX_SEEN_LOGS - inserting and _evict_oldest(seen): + pass + if inserting: + seen[key] = None + return inserting class QuietLogger: diff --git a/tests/test_logger.py b/tests/test_logger.py index 54bba8a0..8042e49c 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -126,6 +126,27 @@ def test_mark_seen_drains_drift_above_cap() -> None: assert f"k-{i}" not in seen +def test_mark_seen_drains_drift_on_hit_path() -> None: + """``_mark_seen`` drains drift even when ``key`` is already cached. + + A hit-heavy workload after a contention burst (e.g. the same + exception text deduplicated repeatedly) must still correct the + overshoot — otherwise the dict can sit permanently above the cap + until a miss happens to come along. + """ + seen: dict[str, None] = {} + drift = 10 + for i in range(_MAX_SEEN_LOGS + drift): + seen[f"k-{i}"] = None + # Hit on a non-oldest key — survives the drift drain. + hit_key = f"k-{_MAX_SEEN_LOGS}" + assert _mark_seen(seen, hit_key) is False + assert len(seen) == _MAX_SEEN_LOGS + assert hit_key in seen + for i in range(drift): + assert f"k-{i}" not in seen + + def test_seen_logs_is_bounded() -> None: """``_seen_logs`` stays at the cap and evicts oldest-first (FIFO).""" _logger._seen_logs.clear() diff --git a/tests/test_protocol.py b/tests/test_protocol.py index 3a3d2ab3..782b77aa 100644 --- a/tests/test_protocol.py +++ b/tests/test_protocol.py @@ -973,17 +973,27 @@ def test_seen_logs_is_bounded(): ) overflow = 5 _incoming_module._seen_logs.clear() + # Snapshot the actual key the parser inserted per port. This is whatever + # ``str(exc_info()[1])`` produces today — the test stays agnostic to the + # exception text format so a future normalization of the message (see + # the discussion on #1714) doesn't break the assertions, while still + # pinning that the parser exception path actually entered the dict. + keys_per_port: list[str] = [] for port in range(_MAX_SEEN_LOGS + overflow): r.DNSIncoming(packet, ("1.2.3.4", port)) - # Bound is hit exactly — confirms the parser exception path actually - # entered the dict with a per-port-unique key; a future change that - # dropped self.source from the exception text would collapse to a - # single dedup key and fail this assertion. + keys_per_port.append(next(reversed(_incoming_module._seen_logs))) + # Bound is hit exactly. assert len(_incoming_module._seen_logs) == _MAX_SEEN_LOGS - # FIFO eviction: the earliest port's exception string is gone, the - # latest port's is still present. - assert not any("'1.2.3.4', 0)" in k for k in _incoming_module._seen_logs) - assert any(f"'1.2.3.4', {_MAX_SEEN_LOGS + overflow - 1})" in k for k in _incoming_module._seen_logs) + # Each port produced a distinct dedup key — a regression that dropped + # the per-packet-varying component (e.g. self.source) from the exception + # text would collapse all 517 calls to one key and fail this. + assert len(set(keys_per_port)) == _MAX_SEEN_LOGS + overflow + # FIFO eviction by key identity (no substring matching on the message + # format): the earliest ports' keys are gone, the latest ports' remain. + for port in range(overflow): + assert keys_per_port[port] not in _incoming_module._seen_logs + for port in range(_MAX_SEEN_LOGS, _MAX_SEEN_LOGS + overflow): + assert keys_per_port[port] in _incoming_module._seen_logs def test_label_length_attack():