From c24fa39d860ef169dfac51bf63cded742334b0a8 Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@home-assistant.io>
Date: Sun, 17 May 2026 19:36:12 -0700
Subject: [PATCH 1/9] fix: bound DNSCache record count to prevent unbounded
 LAN-driven growth
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #1715.

A LAN-local peer multicasting unique-name mDNS responses could grow
``DNSCache.cache`` / ``_expirations`` / ``service_cache`` /
``_expire_heap`` without bound. ``_DNS_PTR_MIN_TTL = 1125`` (RFC 6762
recommendation for ServiceBrowser refresh-floor) actually extends
attacker-injected PTR records to ~19 min before they expire naturally,
and ``async_expire`` only runs every ``_CACHE_CLEANUP_INTERVAL = 10`` s
— too slow to keep up with a sustained flood. On memory-constrained
deployments (Home Assistant on a Raspberry Pi is the canonical victim)
this trivially OOMs the process.

Add a hard cap (``_MAX_CACHE_RECORDS = 10_000`` in ``const.py``) on the
total number of records the cache will hold. ``DNSCache`` now tracks
``_total_records`` (incremented on genuinely-new inserts in
``_async_add``, decremented in ``_async_remove``). When the cap is
hit, ``_async_evict_oldest`` heappops the closest-to-expiration record
and removes it before inserting the new one. Reuses the existing
``_expire_heap`` for victim selection — O(log n) — and skips stale
heap entries via the same expiration-equality check
``async_expire`` already uses.

All four touched lines compile to direct C int ops in the Cython
build (score-0 in ``cython -a`` annotation); the eviction call is a
C-level vtable dispatch fired only on overflow. Wall-clock check:
below-cap add stays at ~160 ns/record; over-cap add with constant
eviction is ~195 ns/record (+36 ns for the heappop + cache delete).

Per-source-IP record quotas — the reporter's "Better" suggestion —
are deferred to a follow-up PR. This commit closes the unbounded-
growth bug class on its own; the quota work changes the fairness
properties but not the memory bound.

CWE-400 (Uncontrolled Resource Consumption). LAN-local attack
surface only.
---
 src/zeroconf/_cache.pxd              |  8 +++-
 src/zeroconf/_cache.py               | 31 ++++++++++++++-
 src/zeroconf/const.py                |  6 +++
 tests/benchmarks/test_cache_bound.py | 44 +++++++++++++++++++++
 tests/test_cache.py                  | 57 ++++++++++++++++++++++++++++
 5 files changed, 143 insertions(+), 3 deletions(-)
 create mode 100644 tests/benchmarks/test_cache_bound.py

diff --git a/src/zeroconf/_cache.pxd b/src/zeroconf/_cache.pxd
index 05a40c0f..fa1942c5 100644
--- a/src/zeroconf/_cache.pxd
+++ b/src/zeroconf/_cache.pxd
@@ -19,6 +19,7 @@ cdef object _UNIQUE_RECORD_TYPES
 cdef unsigned int _TYPE_PTR
 cdef cython.uint _ONE_SECOND
 cdef unsigned int _MIN_SCHEDULED_RECORD_EXPIRATION
+cdef unsigned int _MAX_CACHE_RECORDS
 
 
 @cython.locals(record_cache=dict)
@@ -31,6 +32,7 @@ cdef class DNSCache:
     cdef public cython.dict service_cache
     cdef public list _expire_heap
     cdef public dict _expirations
+    cdef public unsigned int _total_records
 
     cpdef bint async_add_records(self, object entries)
 
@@ -60,10 +62,14 @@ cdef class DNSCache:
         service_store=cython.dict,
         service_record=DNSService,
         when=object,
-        new=bint
+        new=bint,
+        is_new=bint
     )
     cdef bint _async_add(self, DNSRecord record)
 
+    @cython.locals(record=DNSRecord, when_record=tuple)
+    cdef void _async_evict_oldest(self)
+
     @cython.locals(service_record=DNSService)
     cdef void _async_remove(self, DNSRecord record)
 
diff --git a/src/zeroconf/_cache.py b/src/zeroconf/_cache.py
index 94af3169..c523becc 100644
--- a/src/zeroconf/_cache.py
+++ b/src/zeroconf/_cache.py
@@ -37,7 +37,7 @@
     DNSText,
 )
 from ._utils.time import current_time_millis
-from .const import _ONE_SECOND, _TYPE_PTR
+from .const import _MAX_CACHE_RECORDS, _ONE_SECOND, _TYPE_PTR
 
 _UNIQUE_RECORD_TYPES = (DNSAddress, DNSHinfo, DNSPointer, DNSText, DNSService)
 _UniqueRecordsType = DNSAddress | DNSHinfo | DNSPointer | DNSText | DNSService
@@ -72,6 +72,7 @@ def __init__(self) -> None:
         self._expire_heap: list[tuple[float, DNSRecord]] = []
         self._expirations: dict[DNSRecord, float] = {}
         self.service_cache: _DNSRecordCacheType = {}
+        self._total_records: int = 0
 
     # Functions prefixed with async_ are NOT threadsafe and must
     # be run in the event loop.
@@ -91,7 +92,16 @@ def _async_add(self, record: _DNSRecord) -> bool:
         # direction would return the old incorrect entry.
         if (store := self.cache.get(record.key)) is None:
             store = self.cache[record.key] = {}
-        new = record not in store and not isinstance(record, DNSNsec)
+        is_new = record not in store
+        # Bound total cache size; evict closest-to-expiration entry to
+        # make room before inserting a new record. Prevents a LAN-local
+        # flood of unique-name records from growing the cache without
+        # bound (RFC 6762 §10 advisory caching, defense-in-depth).
+        if is_new and self._total_records >= _MAX_CACHE_RECORDS:
+            self._async_evict_oldest()
+        new = is_new and not isinstance(record, DNSNsec)
+        if is_new:
+            self._total_records += 1
         store[record] = record
         when = record.created + (record.ttl * 1000)
         if self._expirations.get(record) != when:
@@ -106,6 +116,22 @@ def _async_add(self, record: _DNSRecord) -> bool:
             service_store[service_record] = service_record
         return new
 
+    def _async_evict_oldest(self) -> None:
+        """Drop the closest-to-expiration record to make room for a new one.
+
+        Skips stale heap entries (records re-added with a different TTL),
+        which mirrors the staleness check in ``async_expire``.
+
+        This function must be run in from event loop.
+        """
+        while self._expire_heap:
+            when_record = heappop(self._expire_heap)
+            record = when_record[1]
+            if self._expirations.get(record) != when_record[0]:
+                continue
+            self._async_remove(record)
+            return
+
     def async_add_records(self, entries: Iterable[DNSRecord]) -> bool:
         """Add multiple records.
 
@@ -129,6 +155,7 @@ def _async_remove(self, record: _DNSRecord) -> None:
             _remove_key(self.service_cache, service_record.server_key, service_record)
         _remove_key(self.cache, record.key, record)
         self._expirations.pop(record, None)
+        self._total_records -= 1
 
     def async_remove_records(self, entries: Iterable[DNSRecord]) -> None:
         """Remove multiple records.
diff --git a/src/zeroconf/const.py b/src/zeroconf/const.py
index 1db39a46..a17e4685 100644
--- a/src/zeroconf/const.py
+++ b/src/zeroconf/const.py
@@ -59,6 +59,12 @@
 # level of rate limit and safe guards so we use 1/4 of the recommended value
 _DNS_PTR_MIN_TTL = 1125
 
+# Upper bound on the number of records the DNSCache will hold before it
+# starts evicting the closest-to-expiration entry to make room for new
+# arrivals. Bounds the memory a malicious LAN peer can force the cache
+# to retain by multicasting many unique-name records.
+_MAX_CACHE_RECORDS = 10000
+
 _DNS_PACKET_HEADER_LEN = 12
 
 _MAX_MSG_TYPICAL = 1460  # unused
diff --git a/tests/benchmarks/test_cache_bound.py b/tests/benchmarks/test_cache_bound.py
new file mode 100644
index 00000000..0ccb6bdd
--- /dev/null
+++ b/tests/benchmarks/test_cache_bound.py
@@ -0,0 +1,44 @@
+"""Benchmark for the DNSCache record-count bound + overflow eviction."""
+
+from __future__ import annotations
+
+from pytest_codspeed import BenchmarkFixture
+
+from zeroconf import DNSAddress, DNSCache, current_time_millis
+from zeroconf.const import _CLASS_IN, _MAX_CACHE_RECORDS, _TYPE_A
+
+
+def _make_records(count: int, now: float) -> list[DNSAddress]:
+    return [
+        DNSAddress(
+            f"bench-{i}.local.",
+            _TYPE_A,
+            _CLASS_IN,
+            120,
+            bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
+            created=now + i,
+        )
+        for i in range(count)
+    ]
+
+
+def test_cache_add_below_cap(benchmark: BenchmarkFixture) -> None:
+    """Adding records while the cache is well below the cap (no eviction)."""
+    now = current_time_millis()
+    records = _make_records(1000, now)
+
+    @benchmark
+    def _add() -> None:
+        cache = DNSCache()
+        cache.async_add_records(records)
+
+
+def test_cache_add_at_cap_evicts(benchmark: BenchmarkFixture) -> None:
+    """Steady-state add at the cap: every new record forces one eviction."""
+    now = current_time_millis()
+    overflow_records = _make_records(_MAX_CACHE_RECORDS + 1000, now)
+
+    @benchmark
+    def _flood() -> None:
+        cache = DNSCache()
+        cache.async_add_records(overflow_records)
diff --git a/tests/test_cache.py b/tests/test_cache.py
index 9d55435d..a2ef9ee2 100644
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@@ -482,3 +482,60 @@ async def test_cache_heap_pops_order() -> None:
         ts, _ = heappop(cache._expire_heap)
         assert ts >= start_ts
         start_ts = ts
+
+
+def test_cache_size_is_bounded() -> None:
+    """A flood of unique-name records is capped at ``_MAX_CACHE_RECORDS``."""
+    cache = r.DNSCache()
+    now = r.current_time_millis()
+    overflow = 1000
+    flood_size = const._MAX_CACHE_RECORDS + overflow
+
+    cache.async_add_records(
+        r.DNSAddress(
+            f"flood-{i}.local.",
+            const._TYPE_A,
+            const._CLASS_IN,
+            120,
+            bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
+            created=now + i,
+        )
+        for i in range(flood_size)
+    )
+
+    total = sum(len(store) for store in cache.cache.values())
+    assert total == const._MAX_CACHE_RECORDS
+    assert cache._total_records == const._MAX_CACHE_RECORDS
+    # FIFO-ish: the earliest-created records (closest to expiration) get
+    # evicted first, so the names that remain are from the tail.
+    for i in range(overflow):
+        assert f"flood-{i}.local." not in cache.cache
+    for i in range(flood_size - overflow, flood_size):
+        assert f"flood-{i}.local." in cache.cache
+
+
+def test_cache_eviction_decrements_total_records() -> None:
+    """Natural removal (goodbyes, expirations) must keep ``_total_records`` in sync."""
+    cache = r.DNSCache()
+    now = r.current_time_millis()
+    records = [
+        r.DNSAddress(
+            f"sync-{i}.local.",
+            const._TYPE_A,
+            const._CLASS_IN,
+            120,
+            bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
+            created=now,
+        )
+        for i in range(50)
+    ]
+    cache.async_add_records(records)
+    assert cache._total_records == 50
+
+    cache.async_remove_records(records[:20])
+    assert cache._total_records == 30
+
+    # async_expire on a future time should drop the rest
+    cache.async_expire(now + (200 * 1000))
+    assert cache._total_records == 0
+    assert not cache.cache

From 51fbc279a0f6e434debbba196321c608c781a805 Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@home-assistant.io>
Date: Sun, 17 May 2026 19:42:17 -0700
Subject: [PATCH 2/9] test: cover stale-heap-entry branch in
 _async_evict_oldest

Adds ``test_cache_eviction_skips_stale_heap_entries`` which fills the
cache to ``_MAX_CACHE_RECORDS``, re-adds the closest-to-expiration
record with a longer TTL (which leaves a stale ``(when, record)``
tuple at the head of ``_expire_heap`` and updates ``_expirations``
with the new ``when``), then forces a single eviction and asserts the
refreshed record survives while the next-oldest legitimate record is
the one that gets evicted. Without this test the ``continue`` branch
inside ``_async_evict_oldest`` would be untouched by the suite.
---
 tests/test_cache.py | 53 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/tests/test_cache.py b/tests/test_cache.py
index a2ef9ee2..ecb59599 100644
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@@ -514,6 +514,59 @@ def test_cache_size_is_bounded() -> None:
         assert f"flood-{i}.local." in cache.cache
 
 
+def test_cache_eviction_skips_stale_heap_entries() -> None:
+    """Eviction must skip stale ``_expire_heap`` entries left by TTL re-adds."""
+    cache = r.DNSCache()
+    now = r.current_time_millis()
+    # Fill to one shy of the cap so a single add triggers exactly one eviction
+    cache.async_add_records(
+        r.DNSAddress(
+            f"stale-{i}.local.",
+            const._TYPE_A,
+            const._CLASS_IN,
+            120,
+            bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
+            created=now + i,
+        )
+        for i in range(const._MAX_CACHE_RECORDS)
+    )
+    assert cache._total_records == const._MAX_CACHE_RECORDS
+
+    # Re-add the closest-to-expiration record with a longer TTL. The original
+    # (when, record) tuple stays at the head of _expire_heap as a stale entry;
+    # _expirations[record] now points to the new (later) expiration. Eviction
+    # must pop and skip that stale tuple before settling on a real victim.
+    victim_name = "stale-0.local."
+    refreshed = r.DNSAddress(
+        victim_name,
+        const._TYPE_A,
+        const._CLASS_IN,
+        7200,
+        bytes((0, 0, 0, 1)),
+        created=now + 0,
+    )
+    cache.async_add_records([refreshed])
+    assert cache._total_records == const._MAX_CACHE_RECORDS
+
+    # Force one eviction. The next-oldest legitimate record must go; the
+    # refreshed record stays alive.
+    cache.async_add_records(
+        [
+            r.DNSAddress(
+                "trigger.local.",
+                const._TYPE_A,
+                const._CLASS_IN,
+                120,
+                b"\xff\xff\xff\xff",
+                created=now + const._MAX_CACHE_RECORDS,
+            )
+        ]
+    )
+    assert cache._total_records == const._MAX_CACHE_RECORDS
+    assert victim_name in cache.cache
+    assert "stale-1.local." not in cache.cache
+
+
 def test_cache_eviction_decrements_total_records() -> None:
     """Natural removal (goodbyes, expirations) must keep ``_total_records`` in sync."""
     cache = r.DNSCache()

From 931358ab6c6fcfb369d9a5bef0303dfab3786228 Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@home-assistant.io>
Date: Sun, 17 May 2026 20:05:05 -0700
Subject: [PATCH 3/9] fix: re-resolve store after eviction and rebuild
 stale-prefix heap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address review feedback on #1718.

**Orphaned-store bug (blocking).** ``_async_add`` was capturing
``store = self.cache.get(record.key)`` before calling
``_async_evict_oldest``. If the closest-to-expiration victim was the
last record under ``record.key`` (e.g. the incoming record shares its
key with the victim — A+AAAA pairs, PTR fan-outs, TXT/SRV pairs all
hit this in real workloads), ``_remove_key`` would ``del
self.cache[record.key]`` during eviction, leaving the captured
``store`` pointing at an orphaned dict. The subsequent ``store[record]
= record`` then wrote into a dict no longer reachable through the
cache; the new record was effectively lost, and a later
``async_expire`` of that record would ``KeyError`` from
``_remove_key``.

Defer the bucket creation: look up ``store`` first (without creating),
do the eviction-if-needed pass, then ``cache.get`` again to pick up
any deletion eviction caused. The bucket is only created with
``self.cache[record.key] = {}`` after we know eviction has settled.

**Stale-prefix scan (suggestion ＃1).** Under sustained TTL re-adds —
e.g. ``async_mark_unique_records_older_than_1s_to_expire`` rewriting
TTLs to 1 — ``_expire_heap`` accumulates stale ``(when, record)``
tuples that no longer match ``_expirations[record]``. The previous
eviction body would pop those one at a time, doing
O(stale_prefix * log n) work per ``_async_add``. Reuse the same
``len(heap) > _MIN_SCHEDULED_RECORD_EXPIRATION and len(heap) >
len(expirations) * 2`` heuristic that ``async_expire`` already runs:
when the heap is mostly stale, rebuild it once up front, then run
the pop loop against only live entries.

**Accounting invariant test.** ``_total_records`` is ``cdef public
unsigned int``; an off-by-one in ``_async_add`` / ``_async_remove``
would silently underflow in the Cython wheel and pin the cap-check
True forever (eviction storm on every add). Rather than defensively
zero-clamp the decrement — which would hide the bug — the new
``test_cache_total_records_invariant_under_mixed_ops`` walks the
counter through every code path that touches it (fresh inserts,
re-adds, DNSService, DNSNsec, shared-key inserts that empty their
bucket on removal, full-cap eviction loop, ``async_expire``,
``async_remove_records``) and asserts ``cache._total_records ==
sum(len(s) for s in cache.cache.values())`` after every step. Any
future change that misses an increment or doubles a decrement fails
loudly.

Regression tests:
- ``test_cache_eviction_victim_shares_key_with_new_record`` —
  reproduces the orphan; would have failed against c24fa39.
- ``test_cache_eviction_rebuilds_heap_when_mostly_stale`` — fills to
  cap then re-adds with two distinct TTLs to push the heap to ~3x
  MAX, then triggers eviction and asserts the rebuild ran (heap and
  expirations match after the pass).
- ``test_cache_total_records_invariant_under_mixed_ops`` — counter
  invariant across all touched code paths.

The ``.pxd`` adds ``expire_heap_len`` as ``unsigned int`` to
``_async_evict_oldest``'s locals so the new threshold comparison
stays a direct C int op in the Cython build.
---
 src/zeroconf/_cache.pxd |   2 +-
 src/zeroconf/_cache.py  |  26 ++++-
 tests/test_cache.py     | 214 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 237 insertions(+), 5 deletions(-)

diff --git a/src/zeroconf/_cache.pxd b/src/zeroconf/_cache.pxd
index fa1942c5..b94131c3 100644
--- a/src/zeroconf/_cache.pxd
+++ b/src/zeroconf/_cache.pxd
@@ -67,7 +67,7 @@ cdef class DNSCache:
     )
     cdef bint _async_add(self, DNSRecord record)
 
-    @cython.locals(record=DNSRecord, when_record=tuple)
+    @cython.locals(record=DNSRecord, when_record=tuple, expire_heap_len="unsigned int")
     cdef void _async_evict_oldest(self)
 
     @cython.locals(service_record=DNSService)
diff --git a/src/zeroconf/_cache.py b/src/zeroconf/_cache.py
index c523becc..353117f3 100644
--- a/src/zeroconf/_cache.py
+++ b/src/zeroconf/_cache.py
@@ -90,15 +90,20 @@ def _async_add(self, record: _DNSRecord) -> bool:
         # replaces any existing records that are __eq__ to each other which
         # removes the risk that accessing the cache from the wrong
         # direction would return the old incorrect entry.
-        if (store := self.cache.get(record.key)) is None:
-            store = self.cache[record.key] = {}
-        is_new = record not in store
+        store = self.cache.get(record.key)
+        is_new = store is None or record not in store
         # Bound total cache size; evict closest-to-expiration entry to
         # make room before inserting a new record. Prevents a LAN-local
         # flood of unique-name records from growing the cache without
         # bound (RFC 6762 §10 advisory caching, defense-in-depth).
         if is_new and self._total_records >= _MAX_CACHE_RECORDS:
             self._async_evict_oldest()
+            # The victim may have been the last record under
+            # ``record.key``, in which case ``_remove_key`` deleted
+            # the bucket. Re-fetch before creating below.
+            store = self.cache.get(record.key)
+        if store is None:
+            store = self.cache[record.key] = {}
         new = is_new and not isinstance(record, DNSNsec)
         if is_new:
             self._total_records += 1
@@ -120,10 +125,23 @@ def _async_evict_oldest(self) -> None:
         """Drop the closest-to-expiration record to make room for a new one.
 
         Skips stale heap entries (records re-added with a different TTL),
-        which mirrors the staleness check in ``async_expire``.
+        which mirrors the staleness check in ``async_expire``. If the
+        heap is mostly stale (long stale prefix from sustained TTL
+        re-adds), rebuild it once up front so the pop loop below
+        doesn't do O(stale_prefix * log n) work on a single add. Same
+        heuristic / threshold as ``async_expire``.
 
         This function must be run in from event loop.
         """
+        expire_heap_len = len(self._expire_heap)
+        if (
+            expire_heap_len > _MIN_SCHEDULED_RECORD_EXPIRATION
+            and expire_heap_len > len(self._expirations) * 2
+        ):
+            self._expire_heap = [
+                entry for entry in self._expire_heap if self._expirations.get(entry[1]) == entry[0]
+            ]
+            heapify(self._expire_heap)
         while self._expire_heap:
             when_record = heappop(self._expire_heap)
             record = when_record[1]
diff --git a/tests/test_cache.py b/tests/test_cache.py
index ecb59599..80607ffd 100644
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@@ -567,6 +567,128 @@ def test_cache_eviction_skips_stale_heap_entries() -> None:
     assert "stale-1.local." not in cache.cache
 
 
+def test_cache_eviction_victim_shares_key_with_new_record() -> None:
+    """Eviction must not orphan the new record when it collides on ``record.key``.
+
+    If the closest-to-expiration record is the *last* one stored under
+    ``record.key`` and the incoming record uses the same key,
+    ``_remove_key`` deletes ``self.cache[record.key]`` during eviction.
+    A previous capture of ``store = self.cache.get(record.key)`` would
+    then write the new record into an orphaned dict not reachable via
+    the cache. Pin that the new record is reachable.
+    """
+    cache = r.DNSCache()
+    now = r.current_time_millis()
+    # Fill the cache to one shy of the cap with unique-name records, each
+    # with a later created time than the shared-key victim below.
+    cache.async_add_records(
+        r.DNSAddress(
+            f"filler-{i}.local.",
+            const._TYPE_A,
+            const._CLASS_IN,
+            120,
+            bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
+            created=now + 1000 + i,
+        )
+        for i in range(const._MAX_CACHE_RECORDS - 1)
+    )
+
+    # Insert a record at "shared.local." with the earliest expiration —
+    # this is the one closest_to_expiration eviction will pick.
+    shared_key = "shared.local."
+    old_shared = r.DNSAddress(
+        shared_key,
+        const._TYPE_A,
+        const._CLASS_IN,
+        120,
+        b"\x01\x02\x03\x04",
+        created=now,
+    )
+    cache.async_add_records([old_shared])
+    assert cache._total_records == const._MAX_CACHE_RECORDS
+    assert shared_key in cache.cache
+
+    # Add a new record with the SAME key. Eviction will remove old_shared,
+    # which empties cache[shared_key], so _remove_key deletes that bucket.
+    # If _async_add captured the store before eviction it will now be
+    # writing into an orphaned dict.
+    new_shared = r.DNSAddress(
+        shared_key,
+        const._TYPE_A,
+        const._CLASS_IN,
+        120,
+        b"\x05\x06\x07\x08",
+        created=now + 999,
+    )
+    cache.async_add_records([new_shared])
+
+    # The new record must be reachable via the cache.
+    assert shared_key in cache.cache, "new record orphaned: cache bucket missing"
+    assert new_shared in cache.cache[shared_key]
+    assert cache.async_get_unique(new_shared) == new_shared
+    # And the counter accounting must still match observable state.
+    total = sum(len(store) for store in cache.cache.values())
+    assert total == cache._total_records
+
+
+def test_cache_eviction_rebuilds_heap_when_mostly_stale() -> None:
+    """Eviction rebuilds ``_expire_heap`` up front when stale entries dominate.
+
+    Without the rebuild, a single ``_async_add`` at cap could pop a long
+    stale prefix one entry at a time — O(stale_prefix * log n). The
+    rebuild is the same heuristic ``async_expire`` already uses: when
+    ``len(heap) > 2 * len(expirations)``, the heap is reconstructed
+    from only the valid entries in one O(n) pass before the pop loop.
+    """
+    cache = r.DNSCache()
+    now = r.current_time_millis()
+    base_kwargs = {"type_": const._TYPE_A, "class_": const._CLASS_IN}
+
+    cache.async_add_records(
+        r.DNSAddress(
+            f"r-{i}.local.",
+            ttl=120,
+            address=bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
+            created=now + i,
+            **base_kwargs,
+        )
+        for i in range(const._MAX_CACHE_RECORDS)
+    )
+    # Two re-adds with distinct TTLs leave two stale entries per record
+    # in ``_expire_heap`` while ``_expirations`` stays at MAX. Heap reaches
+    # ~3x MAX, which trips the ``len(heap) > 2 * len(expirations)``
+    # threshold on the next eviction.
+    for ttl in (7200, 14400):
+        cache.async_add_records(
+            r.DNSAddress(
+                f"r-{i}.local.",
+                ttl=ttl,
+                address=bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
+                created=now + i,
+                **base_kwargs,
+            )
+            for i in range(const._MAX_CACHE_RECORDS)
+        )
+    assert len(cache._expire_heap) > 2 * len(cache._expirations)
+
+    # One more insert pushes us over the cap; eviction fires and must
+    # rebuild before scanning.
+    cache.async_add_records(
+        [
+            r.DNSAddress(
+                "trigger.local.",
+                ttl=120,
+                address=b"\xff\xff\xff\xff",
+                created=now + const._MAX_CACHE_RECORDS,
+                **base_kwargs,
+            )
+        ]
+    )
+    # Rebuild dropped the stale entries; heap and expirations now match.
+    assert len(cache._expire_heap) == len(cache._expirations) == const._MAX_CACHE_RECORDS
+    assert cache._total_records == const._MAX_CACHE_RECORDS
+
+
 def test_cache_eviction_decrements_total_records() -> None:
     """Natural removal (goodbyes, expirations) must keep ``_total_records`` in sync."""
     cache = r.DNSCache()
@@ -592,3 +714,95 @@ def test_cache_eviction_decrements_total_records() -> None:
     cache.async_expire(now + (200 * 1000))
     assert cache._total_records == 0
     assert not cache.cache
+
+
+def test_cache_total_records_invariant_under_mixed_ops() -> None:
+    """``_total_records`` must equal ``sum(len(store) for store in cache.values())``.
+
+    Walks the counter through every code path that touches it — new inserts,
+    re-adds of an existing record (no increment), DNSService (extra
+    service_cache write), DNSNsec (stored but not counted as "new" by the
+    flag), shared-key inserts that empty their bucket on removal, full-cap
+    eviction, async_expire, manual async_remove_records — and asserts the
+    invariant after every step. Any future change that misses an increment
+    or doubles a decrement will trip this immediately; ``_total_records`` is
+    ``cdef unsigned int`` in the Cython build, so silent underflow would
+    otherwise propagate as a permanent eviction-on-every-add storm.
+    """
+    cache = r.DNSCache()
+    now = r.current_time_millis()
+
+    def actual() -> int:
+        return sum(len(store) for store in cache.cache.values())
+
+    # Fresh inserts: counter follows insert count exactly.
+    addrs = [
+        r.DNSAddress(
+            f"mix-{i}.local.", const._TYPE_A, const._CLASS_IN, 120, bytes((i, 0, 0, 1)), created=now + i
+        )
+        for i in range(20)
+    ]
+    cache.async_add_records(addrs)
+    assert cache._total_records == actual() == 20
+
+    # Re-add of an identical record: no increment (already present).
+    cache.async_add_records([addrs[0]])
+    assert cache._total_records == actual() == 20
+
+    # DNSService writes service_cache too — counter must still match cache size.
+    svc = r.DNSService("svc.local.", const._TYPE_SRV, const._CLASS_IN, 120, 0, 0, 80, "host.local.")
+    cache.async_add_records([svc])
+    assert cache._total_records == actual() == 21
+    cache.async_remove_records([svc])
+    assert cache._total_records == actual() == 20
+
+    # DNSNsec is stored but excluded from the "new" return value; the counter
+    # still tracks it because it occupies a bucket slot.
+    nsec = r.DNSNsec(
+        "nsec.local.",
+        const._TYPE_NSEC,
+        const._CLASS_IN,
+        120,
+        "nsec.local.",
+        [const._TYPE_A],
+    )
+    cache.async_add_records([nsec])
+    assert cache._total_records == actual() == 21
+    cache.async_remove_records([nsec])
+    assert cache._total_records == actual() == 20
+
+    # Shared-key insert/remove: emptying a bucket removes the cache key, but
+    # the counter must still decrement only by the number of records gone.
+    shared_a = r.DNSAddress(
+        "shared.local.", const._TYPE_A, const._CLASS_IN, 120, b"\x01\x01\x01\x01", created=now
+    )
+    shared_b = r.DNSAddress(
+        "shared.local.", const._TYPE_A, const._CLASS_IN, 120, b"\x02\x02\x02\x02", created=now
+    )
+    cache.async_add_records([shared_a, shared_b])
+    assert cache._total_records == actual() == 22
+    cache.async_remove_records([shared_a, shared_b])
+    assert cache._total_records == actual() == 20
+    assert "shared.local." not in cache.cache
+
+    # async_expire path: counter must follow the records dropped.
+    cache.async_expire(now + (200 * 1000))
+    assert cache._total_records == actual() == 0
+    assert not cache.cache
+
+    # Full-cap eviction loop: counter never grows past the cap, never drifts.
+    cap_records = [
+        r.DNSAddress(
+            f"cap-{i}.local.",
+            const._TYPE_A,
+            const._CLASS_IN,
+            120,
+            bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
+            created=now + i,
+        )
+        for i in range(const._MAX_CACHE_RECORDS + 50)
+    ]
+    for rec in cap_records:
+        cache.async_add_records([rec])
+        assert cache._total_records == actual()
+    assert cache._total_records == const._MAX_CACHE_RECORDS

From 7c6be63a9fc8134bddbdf2e1e2821e5107cb36e6 Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@home-assistant.io>
Date: Sun, 17 May 2026 20:48:14 -0700
Subject: [PATCH 4/9] fix: bound _expire_heap growth from re-adds and DRY test
 helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address remaining review feedback on #1718.

**Re-add heap growth (Copilot inline at L100, real bug separate from
the orphan).** ``_async_add``'s cap check is gated on ``is_new``, but
a peer that just replays cached records with a different ``created``/
TTL hits the ``is_new == False`` path: no cap check, no
``_total_records`` change — yet the changed ``when`` makes
``_expirations.get(record) != when`` true, so a fresh ``(when,
record)`` tuple is heappushed while the prior tuple stays behind as
stale. ``_expire_heap`` grows unbounded between cleanups even though
``cache`` and ``_total_records`` stay flat. At ~10k pps for 10 s
that's ~1 M stale heap entries × ~100 B ≈ 100 MB transient.

Factor the rebuild check (``len(heap) > _MIN_SCHEDULED_RECORD_EXPIRATION
and len(heap) > 2 * len(expirations)`` → filter-and-heapify) into a
new ``_maybe_rebuild_heap`` cdef helper. Call it from three sites:

- ``_async_add`` immediately after the conditional heappush, so a
  re-add flood is bounded at every step (amortized O(1) per push,
  O(N) rebuild fires at most once per N stale pushes)
- ``async_expire`` after the pop loop, replacing the existing inline
  rebuild — same behavior, slightly more aggressive (uses post-pop
  heap length instead of pre-pop), no test regression
- (removed from ``_async_evict_oldest`` — the eager rebuild in
  ``_async_add`` keeps the heap bounded before eviction is ever
  triggered, so the eviction-side call was dead code)

Cython codegen: the rebuild call from ``_async_add`` only fires when
the conditional heappush already fired, so the hot path on duplicate
re-adds (same ``when``) is unchanged. The check inside
``_maybe_rebuild_heap`` is two ``len()`` calls + an integer compare —
score-0 in ``cython -a``.

**Test DRY-up.** Introduce a module-level ``_addr(name, idx, ...)``
helper that builds the boilerplate ``DNSAddress(name, _TYPE_A,
_CLASS_IN, ttl, bytes((idx & 0xFF, (idx >> 8) & 0xFF, 0, 1)), created
=...)`` so each new test reads top-down instead of being dominated by
constructor noise. Test docstrings collapsed to one sentence per
``CLAUDE.md``.

**New regression test:** ``test_cache_re_add_flood_does_not_grow_heap_unbounded``
adds 200 records below the cap, then replays them through 10 cycles
of varying TTLs (2000 stale pushes total) and asserts the heap stays
near the rebuild threshold. The previous code would have grown it to
~11x the record count; the helper keeps it within ``2 * expirations``.

**Removed test:** ``test_cache_eviction_rebuilds_heap_when_mostly_stale``
— its premise (build up stale entries, then trigger eviction-side
rebuild) is no longer reachable now that ``_async_add`` rebuilds
proactively. The same code path is covered by the re-add flood test.

**Existing tests updated:** ``test_cache_heap_multi_name_cleanup`` and
``test_cache_heap_pops_order`` previously asserted pre-cleanup heap
size (``min_records_to_cleanup + 5``). With proactive rebuild firing
inside ``_async_add``, the heap is cleaned up earlier and the post-
cleanup invariant (``1 + 5`` after ``async_expire``) still holds, but
the intermediate snapshot is no longer accurate. Replaced the stale
intermediate assertion with a comment.
---
 src/zeroconf/_cache.pxd |   5 +-
 src/zeroconf/_cache.py  |  72 +++++------
 tests/test_cache.py     | 277 ++++++++++------------------------------
 3 files changed, 99 insertions(+), 255 deletions(-)

diff --git a/src/zeroconf/_cache.pxd b/src/zeroconf/_cache.pxd
index b94131c3..023304bc 100644
--- a/src/zeroconf/_cache.pxd
+++ b/src/zeroconf/_cache.pxd
@@ -67,9 +67,12 @@ cdef class DNSCache:
     )
     cdef bint _async_add(self, DNSRecord record)
 
-    @cython.locals(record=DNSRecord, when_record=tuple, expire_heap_len="unsigned int")
+    @cython.locals(record=DNSRecord, when_record=tuple)
     cdef void _async_evict_oldest(self)
 
+    @cython.locals(expire_heap_len="unsigned int")
+    cdef void _maybe_rebuild_heap(self)
+
     @cython.locals(service_record=DNSService)
     cdef void _async_remove(self, DNSRecord record)
 
diff --git a/src/zeroconf/_cache.py b/src/zeroconf/_cache.py
index 353117f3..3c45b733 100644
--- a/src/zeroconf/_cache.py
+++ b/src/zeroconf/_cache.py
@@ -110,9 +110,14 @@ def _async_add(self, record: _DNSRecord) -> bool:
         store[record] = record
         when = record.created + (record.ttl * 1000)
         if self._expirations.get(record) != when:
-            # Avoid adding duplicates to the heap
             heappush(self._expire_heap, (when, record))
             self._expirations[record] = when
+            # Re-adds of an existing record with a new TTL push a fresh
+            # entry but leave the prior tuple behind as stale, so a peer
+            # that just replays cached records can grow ``_expire_heap``
+            # without ever tripping the cap. Rebuild when stale entries
+            # dominate.
+            self._maybe_rebuild_heap()
 
         if isinstance(record, DNSService):
             service_record = record
@@ -122,16 +127,28 @@ def _async_add(self, record: _DNSRecord) -> bool:
         return new
 
     def _async_evict_oldest(self) -> None:
-        """Drop the closest-to-expiration record to make room for a new one.
-
-        Skips stale heap entries (records re-added with a different TTL),
-        which mirrors the staleness check in ``async_expire``. If the
-        heap is mostly stale (long stale prefix from sustained TTL
-        re-adds), rebuild it once up front so the pop loop below
-        doesn't do O(stale_prefix * log n) work on a single add. Same
-        heuristic / threshold as ``async_expire``.
+        """Drop the closest-to-expiration record to make room for a new one."""
+        while self._expire_heap:
+            when_record = heappop(self._expire_heap)
+            record = when_record[1]
+            if self._expirations.get(record) != when_record[0]:
+                continue
+            self._async_remove(record)
+            return
 
-        This function must be run in from event loop.
+    def _maybe_rebuild_heap(self) -> None:
+        """Rebuild ``_expire_heap`` if stale entries dominate.
+
+        Re-adds of an existing record with a new TTL append a fresh
+        ``(when, record)`` and leave the prior tuple behind as stale;
+        eviction's stale-skip loop and ``async_expire`` already absorb
+        these, but unchecked accumulation lets a peer that just replays
+        cached records grow the heap arbitrarily between cleanups.
+        Same threshold as the long-standing rebuild in ``async_expire``:
+        only fire when stale entries outweigh live ones (heap > 2 x
+        expirations), and only above a minimum floor so a small cache
+        isn't rebuilt for nothing. Amortized cost is O(1) per push;
+        the O(N) rebuild fires at most once per N stale pushes.
         """
         expire_heap_len = len(self._expire_heap)
         if (
@@ -142,13 +159,6 @@ def _async_evict_oldest(self) -> None:
                 entry for entry in self._expire_heap if self._expirations.get(entry[1]) == entry[0]
             ]
             heapify(self._expire_heap)
-        while self._expire_heap:
-            when_record = heappop(self._expire_heap)
-            record = when_record[1]
-            if self._expirations.get(record) != when_record[0]:
-                continue
-            self._async_remove(record)
-            return
 
     def async_add_records(self, entries: Iterable[DNSRecord]) -> bool:
         """Add multiple records.
@@ -190,43 +200,23 @@ def async_expire(self, now: _float) -> list[DNSRecord]:
 
         :param now: The current time in milliseconds.
         """
-        if not (expire_heap_len := len(self._expire_heap)):
+        if not self._expire_heap:
             return []
 
         expired: list[DNSRecord] = []
-        # Find any expired records and add them to the to-delete list
         while self._expire_heap:
             when_record = self._expire_heap[0]
             when = when_record[0]
             if when > now:
                 break
             heappop(self._expire_heap)
-            # Check if the record hasn't been re-added to the heap
-            # with a different expiration time as it will be removed
-            # later when it reaches the top of the heap and its
-            # expiration time is met.
+            # Skip entries left behind by a TTL re-add; the live tuple is
+            # later in the heap and will be removed when it reaches the top.
             record = when_record[1]
             if self._expirations.get(record) == when:
                 expired.append(record)
 
-        # If the expiration heap grows larger than the number expirations
-        # times two, we clean it up to avoid keeping expired entries in
-        # the heap and consuming memory. We guard this with a minimum
-        # threshold to avoid cleaning up the heap too often when there are
-        # only a few scheduled expirations.
-        if (
-            expire_heap_len > _MIN_SCHEDULED_RECORD_EXPIRATION
-            and expire_heap_len > len(self._expirations) * 2
-        ):
-            # Remove any expired entries from the expiration heap
-            # that do not match the expiration time in the expirations
-            # as it means the record has been re-added to the heap
-            # with a different expiration time.
-            self._expire_heap = [
-                entry for entry in self._expire_heap if self._expirations.get(entry[1]) == entry[0]
-            ]
-            heapify(self._expire_heap)
-
+        self._maybe_rebuild_heap()
         self.async_remove_records(expired)
         return expired
 
diff --git a/tests/test_cache.py b/tests/test_cache.py
index 80607ffd..a802d851 100644
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@@ -439,7 +439,9 @@ async def test_cache_heap_multi_name_cleanup() -> None:
         )
         cache.async_add_records([record])
 
-    assert len(cache._expire_heap) == min_records_to_cleanup + 5
+    # ``_async_add`` rebuilds ``_expire_heap`` proactively when stale entries
+    # dominate (heap > 2x expirations), so the heap is already capped at
+    # ~one entry per unique record long before ``async_expire`` is called.
     assert len(cache.async_entries_with_name(name)) == 1
     assert len(cache.async_entries_with_name(name2)) == 5
 
@@ -473,7 +475,8 @@ async def test_cache_heap_pops_order() -> None:
         )
         cache.async_add_records([record])
 
-    assert len(cache._expire_heap) == min_records_to_cleanup + 5
+    # ``_async_add`` proactively rebuilds the heap when stale entries dominate,
+    # so the heap holds only one entry per unique record by this point.
     assert len(cache.async_entries_with_name(name)) == 1
     assert len(cache.async_entries_with_name(name2)) == 5
 
@@ -484,6 +487,18 @@ async def test_cache_heap_pops_order() -> None:
         start_ts = ts
 
 
+def _addr(name: str, idx: int, *, ttl: int = 120, created: float | None = None) -> r.DNSAddress:
+    """Build a DNSAddress with idx-derived payload for the bound/eviction tests."""
+    return r.DNSAddress(
+        name,
+        const._TYPE_A,
+        const._CLASS_IN,
+        ttl,
+        bytes((idx & 0xFF, (idx >> 8) & 0xFF, 0, 1)),
+        created=r.current_time_millis() if created is None else created,
+    )
+
+
 def test_cache_size_is_bounded() -> None:
     """A flood of unique-name records is capped at ``_MAX_CACHE_RECORDS``."""
     cache = r.DNSCache()
@@ -491,17 +506,7 @@ def test_cache_size_is_bounded() -> None:
     overflow = 1000
     flood_size = const._MAX_CACHE_RECORDS + overflow
 
-    cache.async_add_records(
-        r.DNSAddress(
-            f"flood-{i}.local.",
-            const._TYPE_A,
-            const._CLASS_IN,
-            120,
-            bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
-            created=now + i,
-        )
-        for i in range(flood_size)
-    )
+    cache.async_add_records(_addr(f"flood-{i}.local.", i, created=now + i) for i in range(flood_size))
 
     total = sum(len(store) for store in cache.cache.values())
     assert total == const._MAX_CACHE_RECORDS
@@ -515,293 +520,139 @@ def test_cache_size_is_bounded() -> None:
 
 
 def test_cache_eviction_skips_stale_heap_entries() -> None:
-    """Eviction must skip stale ``_expire_heap`` entries left by TTL re-adds."""
+    """Eviction skips stale heap entries left by TTL re-adds."""
     cache = r.DNSCache()
     now = r.current_time_millis()
-    # Fill to one shy of the cap so a single add triggers exactly one eviction
     cache.async_add_records(
-        r.DNSAddress(
-            f"stale-{i}.local.",
-            const._TYPE_A,
-            const._CLASS_IN,
-            120,
-            bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
-            created=now + i,
-        )
-        for i in range(const._MAX_CACHE_RECORDS)
+        _addr(f"stale-{i}.local.", i, created=now + i) for i in range(const._MAX_CACHE_RECORDS)
     )
     assert cache._total_records == const._MAX_CACHE_RECORDS
 
-    # Re-add the closest-to-expiration record with a longer TTL. The original
-    # (when, record) tuple stays at the head of _expire_heap as a stale entry;
-    # _expirations[record] now points to the new (later) expiration. Eviction
-    # must pop and skip that stale tuple before settling on a real victim.
+    # Re-add the closest-to-expiration record with a longer TTL; the prior
+    # ``(when, record)`` tuple stays as stale, eviction must skip it.
     victim_name = "stale-0.local."
-    refreshed = r.DNSAddress(
-        victim_name,
-        const._TYPE_A,
-        const._CLASS_IN,
-        7200,
-        bytes((0, 0, 0, 1)),
-        created=now + 0,
-    )
-    cache.async_add_records([refreshed])
+    cache.async_add_records([_addr(victim_name, 0, ttl=7200, created=now)])
     assert cache._total_records == const._MAX_CACHE_RECORDS
 
-    # Force one eviction. The next-oldest legitimate record must go; the
-    # refreshed record stays alive.
-    cache.async_add_records(
-        [
-            r.DNSAddress(
-                "trigger.local.",
-                const._TYPE_A,
-                const._CLASS_IN,
-                120,
-                b"\xff\xff\xff\xff",
-                created=now + const._MAX_CACHE_RECORDS,
-            )
-        ]
-    )
+    cache.async_add_records([_addr("trigger.local.", 0xFFFF, created=now + const._MAX_CACHE_RECORDS)])
     assert cache._total_records == const._MAX_CACHE_RECORDS
     assert victim_name in cache.cache
     assert "stale-1.local." not in cache.cache
 
 
 def test_cache_eviction_victim_shares_key_with_new_record() -> None:
-    """Eviction must not orphan the new record when it collides on ``record.key``.
-
-    If the closest-to-expiration record is the *last* one stored under
-    ``record.key`` and the incoming record uses the same key,
-    ``_remove_key`` deletes ``self.cache[record.key]`` during eviction.
-    A previous capture of ``store = self.cache.get(record.key)`` would
-    then write the new record into an orphaned dict not reachable via
-    the cache. Pin that the new record is reachable.
-    """
+    """Inserting a record whose key collides with the eviction victim keeps it reachable."""
     cache = r.DNSCache()
     now = r.current_time_millis()
-    # Fill the cache to one shy of the cap with unique-name records, each
-    # with a later created time than the shared-key victim below.
     cache.async_add_records(
-        r.DNSAddress(
-            f"filler-{i}.local.",
-            const._TYPE_A,
-            const._CLASS_IN,
-            120,
-            bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
-            created=now + 1000 + i,
-        )
-        for i in range(const._MAX_CACHE_RECORDS - 1)
+        _addr(f"filler-{i}.local.", i, created=now + 1000 + i) for i in range(const._MAX_CACHE_RECORDS - 1)
     )
 
-    # Insert a record at "shared.local." with the earliest expiration —
-    # this is the one closest_to_expiration eviction will pick.
+    # Insert at "shared.local." with the earliest expiration so eviction
+    # picks it. ``_remove_key`` then deletes ``cache["shared.local."]``.
     shared_key = "shared.local."
-    old_shared = r.DNSAddress(
-        shared_key,
-        const._TYPE_A,
-        const._CLASS_IN,
-        120,
-        b"\x01\x02\x03\x04",
-        created=now,
-    )
-    cache.async_add_records([old_shared])
+    cache.async_add_records([_addr(shared_key, 0x0102, created=now)])
     assert cache._total_records == const._MAX_CACHE_RECORDS
-    assert shared_key in cache.cache
-
-    # Add a new record with the SAME key. Eviction will remove old_shared,
-    # which empties cache[shared_key], so _remove_key deletes that bucket.
-    # If _async_add captured the store before eviction it will now be
-    # writing into an orphaned dict.
-    new_shared = r.DNSAddress(
-        shared_key,
-        const._TYPE_A,
-        const._CLASS_IN,
-        120,
-        b"\x05\x06\x07\x08",
-        created=now + 999,
-    )
+
+    # Adding a new record under the SAME key: a pre-eviction-captured
+    # ``store`` would write into an orphaned dict; the fix re-resolves.
+    new_shared = _addr(shared_key, 0x0506, created=now + 999)
     cache.async_add_records([new_shared])
 
-    # The new record must be reachable via the cache.
     assert shared_key in cache.cache, "new record orphaned: cache bucket missing"
     assert new_shared in cache.cache[shared_key]
     assert cache.async_get_unique(new_shared) == new_shared
-    # And the counter accounting must still match observable state.
     total = sum(len(store) for store in cache.cache.values())
     assert total == cache._total_records
 
 
-def test_cache_eviction_rebuilds_heap_when_mostly_stale() -> None:
-    """Eviction rebuilds ``_expire_heap`` up front when stale entries dominate.
-
-    Without the rebuild, a single ``_async_add`` at cap could pop a long
-    stale prefix one entry at a time — O(stale_prefix * log n). The
-    rebuild is the same heuristic ``async_expire`` already uses: when
-    ``len(heap) > 2 * len(expirations)``, the heap is reconstructed
-    from only the valid entries in one O(n) pass before the pop loop.
-    """
+def test_cache_re_add_flood_does_not_grow_heap_unbounded() -> None:
+    """Replaying cached records with shifting TTLs cannot grow ``_expire_heap`` unbounded."""
     cache = r.DNSCache()
     now = r.current_time_millis()
-    base_kwargs = {"type_": const._TYPE_A, "class_": const._CLASS_IN}
-
-    cache.async_add_records(
-        r.DNSAddress(
-            f"r-{i}.local.",
-            ttl=120,
-            address=bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
-            created=now + i,
-            **base_kwargs,
-        )
-        for i in range(const._MAX_CACHE_RECORDS)
-    )
-    # Two re-adds with distinct TTLs leave two stale entries per record
-    # in ``_expire_heap`` while ``_expirations`` stays at MAX. Heap reaches
-    # ~3x MAX, which trips the ``len(heap) > 2 * len(expirations)``
-    # threshold on the next eviction.
-    for ttl in (7200, 14400):
+    # Stay below the cache cap so eviction never fires; the attack here is
+    # heap growth via re-add, not cap saturation. Clear the
+    # ``_MIN_SCHEDULED_RECORD_EXPIRATION`` floor so the rebuild engages.
+    record_count = 200
+    cache.async_add_records(_addr(f"flood-{i}.local.", i, created=now) for i in range(record_count))
+    assert cache._total_records == record_count
+
+    # 10 cycles x ``record_count`` stale pushes each. Without
+    # ``_maybe_rebuild_heap`` firing inside ``_async_add``, the heap would
+    # grow to ~11 x record_count.
+    for cycle in range(10):
         cache.async_add_records(
-            r.DNSAddress(
-                f"r-{i}.local.",
-                ttl=ttl,
-                address=bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
-                created=now + i,
-                **base_kwargs,
-            )
-            for i in range(const._MAX_CACHE_RECORDS)
+            _addr(f"flood-{i}.local.", i, ttl=7200 + cycle, created=now) for i in range(record_count)
         )
-    assert len(cache._expire_heap) > 2 * len(cache._expirations)
 
-    # One more insert pushes us over the cap; eviction fires and must
-    # rebuild before scanning.
-    cache.async_add_records(
-        [
-            r.DNSAddress(
-                "trigger.local.",
-                ttl=120,
-                address=b"\xff\xff\xff\xff",
-                created=now + const._MAX_CACHE_RECORDS,
-                **base_kwargs,
-            )
-        ]
-    )
-    # Rebuild dropped the stale entries; heap and expirations now match.
-    assert len(cache._expire_heap) == len(cache._expirations) == const._MAX_CACHE_RECORDS
-    assert cache._total_records == const._MAX_CACHE_RECORDS
+    # Heap is bounded near the rebuild threshold; ``+ record_count`` of slack
+    # to stay resilient to where in a re-add cycle the rebuild last fired.
+    assert len(cache._expire_heap) <= 2 * len(cache._expirations) + record_count
+    assert cache._total_records == record_count
 
 
 def test_cache_eviction_decrements_total_records() -> None:
-    """Natural removal (goodbyes, expirations) must keep ``_total_records`` in sync."""
+    """Natural removal (goodbyes, expirations) keeps ``_total_records`` in sync."""
     cache = r.DNSCache()
     now = r.current_time_millis()
-    records = [
-        r.DNSAddress(
-            f"sync-{i}.local.",
-            const._TYPE_A,
-            const._CLASS_IN,
-            120,
-            bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
-            created=now,
-        )
-        for i in range(50)
-    ]
+    records = [_addr(f"sync-{i}.local.", i, created=now) for i in range(50)]
     cache.async_add_records(records)
     assert cache._total_records == 50
 
     cache.async_remove_records(records[:20])
     assert cache._total_records == 30
 
-    # async_expire on a future time should drop the rest
     cache.async_expire(now + (200 * 1000))
     assert cache._total_records == 0
     assert not cache.cache
 
 
 def test_cache_total_records_invariant_under_mixed_ops() -> None:
-    """``_total_records`` must equal ``sum(len(store) for store in cache.values())``.
-
-    Walks the counter through every code path that touches it — new inserts,
-    re-adds of an existing record (no increment), DNSService (extra
-    service_cache write), DNSNsec (stored but not counted as "new" by the
-    flag), shared-key inserts that empty their bucket on removal, full-cap
-    eviction, async_expire, manual async_remove_records — and asserts the
-    invariant after every step. Any future change that misses an increment
-    or doubles a decrement will trip this immediately; ``_total_records`` is
-    ``cdef unsigned int`` in the Cython build, so silent underflow would
-    otherwise propagate as a permanent eviction-on-every-add storm.
-    """
+    """``_total_records`` stays equal to the sum of bucket sizes across all touched paths."""
     cache = r.DNSCache()
     now = r.current_time_millis()
 
     def actual() -> int:
         return sum(len(store) for store in cache.cache.values())
 
-    # Fresh inserts: counter follows insert count exactly.
-    addrs = [
-        r.DNSAddress(
-            f"mix-{i}.local.", const._TYPE_A, const._CLASS_IN, 120, bytes((i, 0, 0, 1)), created=now + i
-        )
-        for i in range(20)
-    ]
+    addrs = [_addr(f"mix-{i}.local.", i, created=now + i) for i in range(20)]
     cache.async_add_records(addrs)
     assert cache._total_records == actual() == 20
 
-    # Re-add of an identical record: no increment (already present).
+    # Re-add of an identical record: no increment.
     cache.async_add_records([addrs[0]])
     assert cache._total_records == actual() == 20
 
-    # DNSService writes service_cache too — counter must still match cache size.
+    # DNSService writes service_cache too — counter still matches cache size.
     svc = r.DNSService("svc.local.", const._TYPE_SRV, const._CLASS_IN, 120, 0, 0, 80, "host.local.")
     cache.async_add_records([svc])
     assert cache._total_records == actual() == 21
     cache.async_remove_records([svc])
     assert cache._total_records == actual() == 20
 
-    # DNSNsec is stored but excluded from the "new" return value; the counter
-    # still tracks it because it occupies a bucket slot.
-    nsec = r.DNSNsec(
-        "nsec.local.",
-        const._TYPE_NSEC,
-        const._CLASS_IN,
-        120,
-        "nsec.local.",
-        [const._TYPE_A],
-    )
+    # DNSNsec is stored but excluded from the "new" return; counter tracks it anyway.
+    nsec = r.DNSNsec("nsec.local.", const._TYPE_NSEC, const._CLASS_IN, 120, "nsec.local.", [const._TYPE_A])
     cache.async_add_records([nsec])
     assert cache._total_records == actual() == 21
     cache.async_remove_records([nsec])
     assert cache._total_records == actual() == 20
 
-    # Shared-key insert/remove: emptying a bucket removes the cache key, but
-    # the counter must still decrement only by the number of records gone.
-    shared_a = r.DNSAddress(
-        "shared.local.", const._TYPE_A, const._CLASS_IN, 120, b"\x01\x01\x01\x01", created=now
-    )
-    shared_b = r.DNSAddress(
-        "shared.local.", const._TYPE_A, const._CLASS_IN, 120, b"\x02\x02\x02\x02", created=now
-    )
+    # Shared-key insert/remove: emptying the bucket drops the cache key but
+    # counter decrements only by the records that left.
+    shared_a = _addr("shared.local.", 0x0101, created=now)
+    shared_b = _addr("shared.local.", 0x0202, created=now)
     cache.async_add_records([shared_a, shared_b])
     assert cache._total_records == actual() == 22
     cache.async_remove_records([shared_a, shared_b])
     assert cache._total_records == actual() == 20
     assert "shared.local." not in cache.cache
 
-    # async_expire path: counter must follow the records dropped.
     cache.async_expire(now + (200 * 1000))
     assert cache._total_records == actual() == 0
     assert not cache.cache
 
     # Full-cap eviction loop: counter never grows past the cap, never drifts.
-    cap_records = [
-        r.DNSAddress(
-            f"cap-{i}.local.",
-            const._TYPE_A,
-            const._CLASS_IN,
-            120,
-            bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
-            created=now + i,
-        )
-        for i in range(const._MAX_CACHE_RECORDS + 50)
-    ]
+    cap_records = [_addr(f"cap-{i}.local.", i, created=now + i) for i in range(const._MAX_CACHE_RECORDS + 50)]
     for rec in cap_records:
         cache.async_add_records([rec])
         assert cache._total_records == actual()

From 052b1e2be2ec60e9697adec6640b221fae4331f3 Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@home-assistant.io>
Date: Sun, 17 May 2026 20:56:41 -0700
Subject: [PATCH 5/9] test: cover empty-heap exit branch in _async_evict_oldest
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

``_async_evict_oldest`` falls through to a silent return if
``_expire_heap`` is empty. By the cache invariant every record
counted in ``_total_records`` has a heap entry, so this branch is
unreachable in normal operation — but it's a defense-in-depth path
worth pinning. Force the broken state directly
(``_total_records = MAX, _expire_heap = []``) and verify eviction
returns gracefully while the subsequent insert still completes.
---
 tests/test_cache.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/test_cache.py b/tests/test_cache.py
index a802d851..7c095911 100644
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@@ -519,6 +519,19 @@ def test_cache_size_is_bounded() -> None:
         assert f"flood-{i}.local." in cache.cache
 
 
+def test_cache_eviction_empty_heap_returns_without_evicting() -> None:
+    """Eviction tolerates an empty ``_expire_heap`` (invariant-violation safety net)."""
+    cache = r.DNSCache()
+    # By the cache invariant every record in ``_total_records`` has a heap
+    # entry, so eviction should never see an empty heap. Force the broken
+    # state directly to pin the defensive behaviour: ``_async_evict_oldest``
+    # returns without raising and the subsequent insert still lands.
+    cache._total_records = const._MAX_CACHE_RECORDS
+    cache._expire_heap = []
+    cache.async_add_records([_addr("post-empty.local.", 0)])
+    assert "post-empty.local." in cache.cache
+
+
 def test_cache_eviction_skips_stale_heap_entries() -> None:
     """Eviction skips stale heap entries left by TTL re-adds."""
     cache = r.DNSCache()

From a2a2d34c62acb32c16630e7966ec42a28caf3413 Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@home-assistant.io>
Date: Sun, 17 May 2026 21:09:47 -0700
Subject: [PATCH 6/9] test: pin counter drift, restructure at-cap bench, trim
 docstring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three review follow-ups on #1718:

**Bench actually measures eviction (Copilot inline at L43).**
``test_cache_add_at_cap_evicts`` previously created an empty cache
and added ``_MAX_CACHE_RECORDS + 1000`` records, so ~91% of the
measured time was the below-cap fill path and only the final 1000
inserts evicted — the CodSpeed number was not representative of the
eviction performance the name implies. Restructure to pre-fill the
cache to the cap *outside* the timed body and consume one fresh
unique record per benchmark iteration. Each iteration measures
exactly one ``_async_add`` + ``_async_evict_oldest`` cycle, with the
cache permanently at the cap. ``_make_records`` gains a ``prefix=``
parameter so the fill records and the overflow pool don't collide on
``name``.

**Defensive test pins counter state (Kōan suggestion ＃3).**
``test_cache_eviction_empty_heap_returns_without_evicting`` already
asserts the new record lands; add the corresponding
``_total_records == _MAX_CACHE_RECORDS + 1`` assertion so a future
change to the broken-state recovery (refuse the add, clamp the
counter) fails this test explicitly rather than silently shifting
behaviour.

**One-line docstring (Kōan suggestion ＃2).**
``_maybe_rebuild_heap`` had an 11-line docstring restating the body
and including amortized-cost analysis. Per CLAUDE.md the why lives
at the call site comment in ``_async_add`` (already does); the
function's contract is a single line.

**No log on the empty-heap fall-through (Kōan suggestion ＃1).**
Kōan suggested an assertion or debug log; the assertion would crash
production on invariant drift, and even a guarded ``log.debug(...)``
adds Python attribute-lookup overhead in the function the cap
overflow path runs through. Drift is already caught loudly by
``test_cache_total_records_invariant_under_mixed_ops``. Comment
explains the deliberate silence.
---
 src/zeroconf/_cache.py               | 17 ++++-------------
 tests/benchmarks/test_cache_bound.py | 25 +++++++++++++++++--------
 tests/test_cache.py                  |  6 +++++-
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/src/zeroconf/_cache.py b/src/zeroconf/_cache.py
index 3c45b733..c9249946 100644
--- a/src/zeroconf/_cache.py
+++ b/src/zeroconf/_cache.py
@@ -135,21 +135,12 @@ def _async_evict_oldest(self) -> None:
                 continue
             self._async_remove(record)
             return
+        # Heap-empty fall-through is unreachable by the cache invariant
+        # (every counted record has a heap entry); accounting drift would
+        # surface in tests via test_cache_total_records_invariant_under_mixed_ops.
 
     def _maybe_rebuild_heap(self) -> None:
-        """Rebuild ``_expire_heap`` if stale entries dominate.
-
-        Re-adds of an existing record with a new TTL append a fresh
-        ``(when, record)`` and leave the prior tuple behind as stale;
-        eviction's stale-skip loop and ``async_expire`` already absorb
-        these, but unchecked accumulation lets a peer that just replays
-        cached records grow the heap arbitrarily between cleanups.
-        Same threshold as the long-standing rebuild in ``async_expire``:
-        only fire when stale entries outweigh live ones (heap > 2 x
-        expirations), and only above a minimum floor so a small cache
-        isn't rebuilt for nothing. Amortized cost is O(1) per push;
-        the O(N) rebuild fires at most once per N stale pushes.
-        """
+        """Rebuild ``_expire_heap`` when stale entries dominate live ones."""
         expire_heap_len = len(self._expire_heap)
         if (
             expire_heap_len > _MIN_SCHEDULED_RECORD_EXPIRATION
diff --git a/tests/benchmarks/test_cache_bound.py b/tests/benchmarks/test_cache_bound.py
index 0ccb6bdd..07fbf107 100644
--- a/tests/benchmarks/test_cache_bound.py
+++ b/tests/benchmarks/test_cache_bound.py
@@ -8,14 +8,14 @@
 from zeroconf.const import _CLASS_IN, _MAX_CACHE_RECORDS, _TYPE_A
 
 
-def _make_records(count: int, now: float) -> list[DNSAddress]:
+def _make_records(count: int, now: float, prefix: str = "bench") -> list[DNSAddress]:
     return [
         DNSAddress(
-            f"bench-{i}.local.",
+            f"{prefix}-{i}.local.",
             _TYPE_A,
             _CLASS_IN,
             120,
-            bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
+            bytes(((i >> 24) & 0xFF, (i >> 16) & 0xFF, (i >> 8) & 0xFF, i & 0xFF)),
             created=now + i,
         )
         for i in range(count)
@@ -34,11 +34,20 @@ def _add() -> None:
 
 
 def test_cache_add_at_cap_evicts(benchmark: BenchmarkFixture) -> None:
-    """Steady-state add at the cap: every new record forces one eviction."""
+    """Steady-state add at the cap: every measured insert forces one eviction.
+
+    Pre-fills the cache to ``_MAX_CACHE_RECORDS`` outside the timed body so
+    only the eviction-path adds are measured. Each benchmark iteration
+    consumes one fresh unique record from a pre-built pool, keeping the
+    cache permanently at the cap and the work per iteration to a single
+    ``_async_add`` + ``_async_evict_oldest`` cycle.
+    """
     now = current_time_millis()
-    overflow_records = _make_records(_MAX_CACHE_RECORDS + 1000, now)
+    cache = DNSCache()
+    cache.async_add_records(_make_records(_MAX_CACHE_RECORDS, now, prefix="fill"))
+    # Large pool so the iterator outlives any reasonable codspeed run count.
+    pool = iter(_make_records(100_000, now + _MAX_CACHE_RECORDS, prefix="evict"))
 
     @benchmark
-    def _flood() -> None:
-        cache = DNSCache()
-        cache.async_add_records(overflow_records)
+    def _evict_one() -> None:
+        cache.async_add_records([next(pool)])
diff --git a/tests/test_cache.py b/tests/test_cache.py
index 7c095911..8c4fc47b 100644
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@@ -525,11 +525,15 @@ def test_cache_eviction_empty_heap_returns_without_evicting() -> None:
     # By the cache invariant every record in ``_total_records`` has a heap
     # entry, so eviction should never see an empty heap. Force the broken
     # state directly to pin the defensive behaviour: ``_async_evict_oldest``
-    # returns without raising and the subsequent insert still lands.
+    # returns without raising and the subsequent insert still lands. Since
+    # eviction can't free space, the counter is allowed to drift past the
+    # cap by exactly one — pinned so a future change to the recovery
+    # semantics (e.g., refusing the add or clamping) fails this test.
     cache._total_records = const._MAX_CACHE_RECORDS
     cache._expire_heap = []
     cache.async_add_records([_addr("post-empty.local.", 0)])
     assert "post-empty.local." in cache.cache
+    assert cache._total_records == const._MAX_CACHE_RECORDS + 1
 
 
 def test_cache_eviction_skips_stale_heap_entries() -> None:

From 38b151779db3bf4351c5902366fed16f9b14c1c2 Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@home-assistant.io>
Date: Sun, 17 May 2026 21:13:02 -0700
Subject: [PATCH 7/9] test: pin DNSNsec flood is bounded by _MAX_CACHE_RECORDS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ``new = is_new and not isinstance(record, DNSNsec)`` line in
``_async_add`` only gates the listener-notify return value — the
counter increment, the cache store, and the cap check are all
type-agnostic. A peer can't flood NSec records past the cap today.
But the existing ``new`` flag is exactly the shape a future refactor
might extend to ``_total_records`` ("DNSNsec isn't real data, don't
count it"), which would silently open a bypass. Pin
``_total_records == _MAX_CACHE_RECORDS`` after pushing
``_MAX_CACHE_RECORDS + 100`` unique-name DNSNsec records so any such
regression fails this test loudly.
---
 tests/test_cache.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/test_cache.py b/tests/test_cache.py
index 8c4fc47b..31065076 100644
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@@ -583,6 +583,26 @@ def test_cache_eviction_victim_shares_key_with_new_record() -> None:
     assert total == cache._total_records
 
 
+def test_cache_dnsnsec_flood_is_bounded() -> None:
+    """DNSNsec records honour ``_MAX_CACHE_RECORDS`` (no bypass via the ``new`` flag)."""
+    cache = r.DNSCache()
+    overflow = 100
+    cache.async_add_records(
+        r.DNSNsec(
+            f"nsec-{i}.local.",
+            const._TYPE_NSEC,
+            const._CLASS_IN,
+            120,
+            f"nsec-{i}.local.",
+            [const._TYPE_A],
+        )
+        for i in range(const._MAX_CACHE_RECORDS + overflow)
+    )
+    assert cache._total_records == const._MAX_CACHE_RECORDS
+    total = sum(len(store) for store in cache.cache.values())
+    assert total == const._MAX_CACHE_RECORDS
+
+
 def test_cache_re_add_flood_does_not_grow_heap_unbounded() -> None:
     """Replaying cached records with shifting TTLs cannot grow ``_expire_heap`` unbounded."""
     cache = r.DNSCache()

From e5b509a93107ede3467c9d98cbcc789613230679 Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@home-assistant.io>
Date: Sun, 17 May 2026 21:21:07 -0700
Subject: [PATCH 8/9] test: DNSNsec at-cap eviction + unbounded benchmark pool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

**Cross-type NSec spot-test (Kōan).** Add
``test_cache_dnsnsec_at_cap_evicts_prior_record``: fill the cache to
``_MAX_CACHE_RECORDS`` with ``DNSAddress`` records, add a single
``DNSNsec``, assert the counter stays at cap (NSEC routed through
eviction), the NSEC is reachable via ``cache.cache[nsec.key]``
(orphan fix covers NSEC's key-collision path), and the earliest
fill record was the eviction victim. Complements the existing flood
test — together they pin NSEC against both ``new``-flag drift and
cross-type ``_async_add`` regressions. Uses direct cache lookup
instead of ``async_get_unique`` because the latter's type stub
excludes ``DNSNsec``.

**Unbounded benchmark generator (Copilot inline at
test_cache_bound.py:53).** The previous bench used
``iter(_make_records(100_000, ...))`` which raises ``StopIteration``
once exhausted — fast operations under pytest-codspeed can exceed
that count. Replace with ``_unbounded_records`` (``itertools.count``
+ lazy ``DNSAddress`` construction) so the iterator survives any
iteration count CodSpeed chooses. The construction work moves into
the timed body but is a constant per-iteration overhead and doesn't
distort relative measurements across runs.
---
 tests/benchmarks/test_cache_bound.py | 29 +++++++++++++++++++++-------
 tests/test_cache.py                  | 25 ++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/tests/benchmarks/test_cache_bound.py b/tests/benchmarks/test_cache_bound.py
index 07fbf107..774129e3 100644
--- a/tests/benchmarks/test_cache_bound.py
+++ b/tests/benchmarks/test_cache_bound.py
@@ -2,13 +2,16 @@
 
 from __future__ import annotations
 
+from collections.abc import Iterator
+from itertools import count
+
 from pytest_codspeed import BenchmarkFixture
 
 from zeroconf import DNSAddress, DNSCache, current_time_millis
 from zeroconf.const import _CLASS_IN, _MAX_CACHE_RECORDS, _TYPE_A
 
 
-def _make_records(count: int, now: float, prefix: str = "bench") -> list[DNSAddress]:
+def _make_records(count_: int, now: float, prefix: str = "bench") -> list[DNSAddress]:
     return [
         DNSAddress(
             f"{prefix}-{i}.local.",
@@ -18,10 +21,23 @@ def _make_records(count: int, now: float, prefix: str = "bench") -> list[DNSAddr
             bytes(((i >> 24) & 0xFF, (i >> 16) & 0xFF, (i >> 8) & 0xFF, i & 0xFF)),
             created=now + i,
         )
-        for i in range(count)
+        for i in range(count_)
     ]
 
 
+def _unbounded_records(now: float, prefix: str = "evict") -> Iterator[DNSAddress]:
+    """Unbounded generator of unique-name DNSAddress records."""
+    for i in count():
+        yield DNSAddress(
+            f"{prefix}-{i}.local.",
+            _TYPE_A,
+            _CLASS_IN,
+            120,
+            bytes(((i >> 24) & 0xFF, (i >> 16) & 0xFF, (i >> 8) & 0xFF, i & 0xFF)),
+            created=now + i,
+        )
+
+
 def test_cache_add_below_cap(benchmark: BenchmarkFixture) -> None:
     """Adding records while the cache is well below the cap (no eviction)."""
     now = current_time_millis()
@@ -38,15 +54,14 @@ def test_cache_add_at_cap_evicts(benchmark: BenchmarkFixture) -> None:
 
     Pre-fills the cache to ``_MAX_CACHE_RECORDS`` outside the timed body so
     only the eviction-path adds are measured. Each benchmark iteration
-    consumes one fresh unique record from a pre-built pool, keeping the
-    cache permanently at the cap and the work per iteration to a single
-    ``_async_add`` + ``_async_evict_oldest`` cycle.
+    pulls one fresh unique record from an unbounded generator, keeping the
+    cache permanently at the cap. The generator avoids the iteration-count
+    cap that a pre-built pool would impose for very fast operations.
     """
     now = current_time_millis()
     cache = DNSCache()
     cache.async_add_records(_make_records(_MAX_CACHE_RECORDS, now, prefix="fill"))
-    # Large pool so the iterator outlives any reasonable codspeed run count.
-    pool = iter(_make_records(100_000, now + _MAX_CACHE_RECORDS, prefix="evict"))
+    pool = _unbounded_records(now + _MAX_CACHE_RECORDS)
 
     @benchmark
     def _evict_one() -> None:
diff --git a/tests/test_cache.py b/tests/test_cache.py
index 31065076..aeb3a2ab 100644
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@@ -583,6 +583,31 @@ def test_cache_eviction_victim_shares_key_with_new_record() -> None:
     assert total == cache._total_records
 
 
+def test_cache_dnsnsec_at_cap_evicts_prior_record() -> None:
+    """A single DNSNsec arriving at the cap evicts one prior record and stays reachable."""
+    cache = r.DNSCache()
+    now = r.current_time_millis()
+    cache.async_add_records(
+        _addr(f"fill-{i}.local.", i, created=now + i) for i in range(const._MAX_CACHE_RECORDS)
+    )
+    assert cache._total_records == const._MAX_CACHE_RECORDS
+
+    nsec = r.DNSNsec(
+        "nsec-arrival.local.",
+        const._TYPE_NSEC,
+        const._CLASS_IN,
+        120,
+        "nsec-arrival.local.",
+        [const._TYPE_A],
+    )
+    cache.async_add_records([nsec])
+
+    assert cache._total_records == const._MAX_CACHE_RECORDS
+    assert nsec in cache.cache[nsec.key]
+    # The earliest-created fill record is gone (FIFO-ish eviction).
+    assert "fill-0.local." not in cache.cache
+
+
 def test_cache_dnsnsec_flood_is_bounded() -> None:
     """DNSNsec records honour ``_MAX_CACHE_RECORDS`` (no bypass via the ``new`` flag)."""
     cache = r.DNSCache()

From 39ab7550d565fcb985b012275044e3b82fb0e58c Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@home-assistant.io>
Date: Sun, 17 May 2026 21:23:26 -0700
Subject: [PATCH 9/9] chore: drop stale comment from _async_evict_oldest
 fall-through
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The comment said drift would surface via
``test_cache_total_records_invariant_under_mixed_ops``, but that
test only exercises well-formed paths and never produces a
heap-empty-with-counter-nonzero state. The test that actually pins
the broken-invariant recovery is
``test_cache_eviction_empty_heap_returns_without_evicting``. Per
``CLAUDE.md`` ("default to no comments"; cross-references rot when
referents are renamed) the comment shouldn't exist at all — the
function name and docstring already describe the contract, and the
while-loop fall-through is self-explanatory.
---
 src/zeroconf/_cache.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/zeroconf/_cache.py b/src/zeroconf/_cache.py
index c9249946..df60982b 100644
--- a/src/zeroconf/_cache.py
+++ b/src/zeroconf/_cache.py
@@ -135,9 +135,6 @@ def _async_evict_oldest(self) -> None:
                 continue
             self._async_remove(record)
             return
-        # Heap-empty fall-through is unreachable by the cache invariant
-        # (every counted record has a heap entry); accounting drift would
-        # surface in tests via test_cache_total_records_invariant_under_mixed_ops.
 
     def _maybe_rebuild_heap(self) -> None:
         """Rebuild ``_expire_heap`` when stale entries dominate live ones."""