Skip to content

Commit c24fa39

Browse files
committed
fix: bound DNSCache record count to prevent unbounded LAN-driven growth
Closes #1715. A LAN-local peer multicasting unique-name mDNS responses could grow ``DNSCache.cache`` / ``_expirations`` / ``service_cache`` / ``_expire_heap`` without bound. ``_DNS_PTR_MIN_TTL = 1125`` (RFC 6762 recommendation for ServiceBrowser refresh-floor) actually extends attacker-injected PTR records to ~19 min before they expire naturally, and ``async_expire`` only runs every ``_CACHE_CLEANUP_INTERVAL = 10`` s — too slow to keep up with a sustained flood. On memory-constrained deployments (Home Assistant on a Raspberry Pi is the canonical victim) this trivially OOMs the process. Add a hard cap (``_MAX_CACHE_RECORDS = 10_000`` in ``const.py``) on the total number of records the cache will hold. ``DNSCache`` now tracks ``_total_records`` (incremented on genuinely-new inserts in ``_async_add``, decremented in ``_async_remove``). When the cap is hit, ``_async_evict_oldest`` heappops the closest-to-expiration record and removes it before inserting the new one. Reuses the existing ``_expire_heap`` for victim selection — O(log n) — and skips stale heap entries via the same expiration-equality check ``async_expire`` already uses. All four touched lines compile to direct C int ops in the Cython build (score-0 in ``cython -a`` annotation); the eviction call is a C-level vtable dispatch fired only on overflow. Wall-clock check: below-cap add stays at ~160 ns/record; over-cap add with constant eviction is ~195 ns/record (+36 ns for the heappop + cache delete). Per-source-IP record quotas — the reporter's "Better" suggestion — are deferred to a follow-up PR. This commit closes the unbounded- growth bug class on its own; the quota work changes the fairness properties but not the memory bound. CWE-400 (Uncontrolled Resource Consumption). LAN-local attack surface only.
1 parent 64d143d commit c24fa39

5 files changed

Lines changed: 143 additions & 3 deletions

File tree

src/zeroconf/_cache.pxd

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ cdef object _UNIQUE_RECORD_TYPES
1919
cdef unsigned int _TYPE_PTR
2020
cdef cython.uint _ONE_SECOND
2121
cdef unsigned int _MIN_SCHEDULED_RECORD_EXPIRATION
22+
cdef unsigned int _MAX_CACHE_RECORDS
2223

2324

2425
@cython.locals(record_cache=dict)
@@ -31,6 +32,7 @@ cdef class DNSCache:
3132
cdef public cython.dict service_cache
3233
cdef public list _expire_heap
3334
cdef public dict _expirations
35+
cdef public unsigned int _total_records
3436

3537
cpdef bint async_add_records(self, object entries)
3638

@@ -60,10 +62,14 @@ cdef class DNSCache:
6062
service_store=cython.dict,
6163
service_record=DNSService,
6264
when=object,
63-
new=bint
65+
new=bint,
66+
is_new=bint
6467
)
6568
cdef bint _async_add(self, DNSRecord record)
6669

70+
@cython.locals(record=DNSRecord, when_record=tuple)
71+
cdef void _async_evict_oldest(self)
72+
6773
@cython.locals(service_record=DNSService)
6874
cdef void _async_remove(self, DNSRecord record)
6975

src/zeroconf/_cache.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
DNSText,
3838
)
3939
from ._utils.time import current_time_millis
40-
from .const import _ONE_SECOND, _TYPE_PTR
40+
from .const import _MAX_CACHE_RECORDS, _ONE_SECOND, _TYPE_PTR
4141

4242
_UNIQUE_RECORD_TYPES = (DNSAddress, DNSHinfo, DNSPointer, DNSText, DNSService)
4343
_UniqueRecordsType = DNSAddress | DNSHinfo | DNSPointer | DNSText | DNSService
@@ -72,6 +72,7 @@ def __init__(self) -> None:
7272
self._expire_heap: list[tuple[float, DNSRecord]] = []
7373
self._expirations: dict[DNSRecord, float] = {}
7474
self.service_cache: _DNSRecordCacheType = {}
75+
self._total_records: int = 0
7576

7677
# Functions prefixed with async_ are NOT threadsafe and must
7778
# be run in the event loop.
@@ -91,7 +92,16 @@ def _async_add(self, record: _DNSRecord) -> bool:
9192
# direction would return the old incorrect entry.
9293
if (store := self.cache.get(record.key)) is None:
9394
store = self.cache[record.key] = {}
94-
new = record not in store and not isinstance(record, DNSNsec)
95+
is_new = record not in store
96+
# Bound total cache size; evict closest-to-expiration entry to
97+
# make room before inserting a new record. Prevents a LAN-local
98+
# flood of unique-name records from growing the cache without
99+
# bound (RFC 6762 §10 advisory caching, defense-in-depth).
100+
if is_new and self._total_records >= _MAX_CACHE_RECORDS:
101+
self._async_evict_oldest()
102+
new = is_new and not isinstance(record, DNSNsec)
103+
if is_new:
104+
self._total_records += 1
95105
store[record] = record
96106
when = record.created + (record.ttl * 1000)
97107
if self._expirations.get(record) != when:
@@ -106,6 +116,22 @@ def _async_add(self, record: _DNSRecord) -> bool:
106116
service_store[service_record] = service_record
107117
return new
108118

119+
def _async_evict_oldest(self) -> None:
120+
"""Drop the closest-to-expiration record to make room for a new one.
121+
122+
Skips stale heap entries (records re-added with a different TTL),
123+
which mirrors the staleness check in ``async_expire``.
124+
125+
This function must be run in from event loop.
126+
"""
127+
while self._expire_heap:
128+
when_record = heappop(self._expire_heap)
129+
record = when_record[1]
130+
if self._expirations.get(record) != when_record[0]:
131+
continue
132+
self._async_remove(record)
133+
return
134+
109135
def async_add_records(self, entries: Iterable[DNSRecord]) -> bool:
110136
"""Add multiple records.
111137
@@ -129,6 +155,7 @@ def _async_remove(self, record: _DNSRecord) -> None:
129155
_remove_key(self.service_cache, service_record.server_key, service_record)
130156
_remove_key(self.cache, record.key, record)
131157
self._expirations.pop(record, None)
158+
self._total_records -= 1
132159

133160
def async_remove_records(self, entries: Iterable[DNSRecord]) -> None:
134161
"""Remove multiple records.

src/zeroconf/const.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@
5959
# level of rate limit and safe guards so we use 1/4 of the recommended value
6060
_DNS_PTR_MIN_TTL = 1125
6161

62+
# Upper bound on the number of records the DNSCache will hold before it
63+
# starts evicting the closest-to-expiration entry to make room for new
64+
# arrivals. Bounds the memory a malicious LAN peer can force the cache
65+
# to retain by multicasting many unique-name records.
66+
_MAX_CACHE_RECORDS = 10000
67+
6268
_DNS_PACKET_HEADER_LEN = 12
6369

6470
_MAX_MSG_TYPICAL = 1460 # unused
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""Benchmark for the DNSCache record-count bound + overflow eviction."""
2+
3+
from __future__ import annotations
4+
5+
from pytest_codspeed import BenchmarkFixture
6+
7+
from zeroconf import DNSAddress, DNSCache, current_time_millis
8+
from zeroconf.const import _CLASS_IN, _MAX_CACHE_RECORDS, _TYPE_A
9+
10+
11+
def _make_records(count: int, now: float) -> list[DNSAddress]:
12+
return [
13+
DNSAddress(
14+
f"bench-{i}.local.",
15+
_TYPE_A,
16+
_CLASS_IN,
17+
120,
18+
bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
19+
created=now + i,
20+
)
21+
for i in range(count)
22+
]
23+
24+
25+
def test_cache_add_below_cap(benchmark: BenchmarkFixture) -> None:
26+
"""Adding records while the cache is well below the cap (no eviction)."""
27+
now = current_time_millis()
28+
records = _make_records(1000, now)
29+
30+
@benchmark
31+
def _add() -> None:
32+
cache = DNSCache()
33+
cache.async_add_records(records)
34+
35+
36+
def test_cache_add_at_cap_evicts(benchmark: BenchmarkFixture) -> None:
37+
"""Steady-state add at the cap: every new record forces one eviction."""
38+
now = current_time_millis()
39+
overflow_records = _make_records(_MAX_CACHE_RECORDS + 1000, now)
40+
41+
@benchmark
42+
def _flood() -> None:
43+
cache = DNSCache()
44+
cache.async_add_records(overflow_records)

tests/test_cache.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,3 +482,60 @@ async def test_cache_heap_pops_order() -> None:
482482
ts, _ = heappop(cache._expire_heap)
483483
assert ts >= start_ts
484484
start_ts = ts
485+
486+
487+
def test_cache_size_is_bounded() -> None:
488+
"""A flood of unique-name records is capped at ``_MAX_CACHE_RECORDS``."""
489+
cache = r.DNSCache()
490+
now = r.current_time_millis()
491+
overflow = 1000
492+
flood_size = const._MAX_CACHE_RECORDS + overflow
493+
494+
cache.async_add_records(
495+
r.DNSAddress(
496+
f"flood-{i}.local.",
497+
const._TYPE_A,
498+
const._CLASS_IN,
499+
120,
500+
bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
501+
created=now + i,
502+
)
503+
for i in range(flood_size)
504+
)
505+
506+
total = sum(len(store) for store in cache.cache.values())
507+
assert total == const._MAX_CACHE_RECORDS
508+
assert cache._total_records == const._MAX_CACHE_RECORDS
509+
# FIFO-ish: the earliest-created records (closest to expiration) get
510+
# evicted first, so the names that remain are from the tail.
511+
for i in range(overflow):
512+
assert f"flood-{i}.local." not in cache.cache
513+
for i in range(flood_size - overflow, flood_size):
514+
assert f"flood-{i}.local." in cache.cache
515+
516+
517+
def test_cache_eviction_decrements_total_records() -> None:
518+
"""Natural removal (goodbyes, expirations) must keep ``_total_records`` in sync."""
519+
cache = r.DNSCache()
520+
now = r.current_time_millis()
521+
records = [
522+
r.DNSAddress(
523+
f"sync-{i}.local.",
524+
const._TYPE_A,
525+
const._CLASS_IN,
526+
120,
527+
bytes((i & 0xFF, (i >> 8) & 0xFF, 0, 1)),
528+
created=now,
529+
)
530+
for i in range(50)
531+
]
532+
cache.async_add_records(records)
533+
assert cache._total_records == 50
534+
535+
cache.async_remove_records(records[:20])
536+
assert cache._total_records == 30
537+
538+
# async_expire on a future time should drop the rest
539+
cache.async_expire(now + (200 * 1000))
540+
assert cache._total_records == 0
541+
assert not cache.cache

0 commit comments

Comments
 (0)