diff --git a/Cargo.lock b/Cargo.lock
index 179949b262e..ba3a3204287 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3822,6 +3822,7 @@ dependencies = [
  "reqwest 0.12.24",
  "rkyv",
  "rmp-serde",
+ "roaring",
  "seq-macro",
  "serde",
  "serde_json",
diff --git a/Cargo.toml b/Cargo.toml
index 10d170e2164..96f6919ca05 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -217,6 +217,7 @@ reqwest-websocket = "0.5.0"
 rkyv = { version = "0.7.45", default-features = false }
 rmp-serde = "1.3.0"
 rmpv = "1.3.0"
+roaring = "0.11.3"
 rstest = "0.15"
 # Make sure this is the same rustls version used by the `tonic` crate.
 # See the `ensure_default_crypto_provider` function.
diff --git a/crates/dbsp/Cargo.toml b/crates/dbsp/Cargo.toml
index c6da2493f09..636e755efb0 100644
--- a/crates/dbsp/Cargo.toml
+++ b/crates/dbsp/Cargo.toml
@@ -83,6 +83,7 @@ tracing = { workspace = true }
 snap = { workspace = true }
 enum-map = { workspace = true }
 fastbloom = { workspace = true }
+roaring = { workspace = true }
 core_affinity = { workspace = true }
 indexmap = { workspace = true }
 feldera-storage = { workspace = true }
@@ -165,6 +166,14 @@ harness = false
 name = "window_min"
 harness = false
 
+[[bench]]
+name = "filter_bitmap"
+harness = false
+
+[[bench]]
+name = "filter_predictor"
+harness = false
+
 [[example]]
 name = "orgchart"
 
diff --git a/crates/dbsp/benches/filter_bitmap.rs b/crates/dbsp/benches/filter_bitmap.rs
new file mode 100644
index 00000000000..3c6d6762c2f
--- /dev/null
+++ b/crates/dbsp/benches/filter_bitmap.rs
@@ -0,0 +1,1431 @@
+//! Membership benchmark for `fastbloom` vs `roaring`.
+//!
+//! Examples:
+//! `cargo bench -p dbsp --bench filter_bitmap -- --csv-output filter_bitmap.csv`
+//! `cargo bench -p dbsp --bench filter_bitmap -- --key-types u32,u64 --key-spaces consecutive,full_range`
+
+use clap::{Parser, ValueEnum};
+use csv::Writer;
+use dbsp::storage::file::BLOOM_FILTER_FALSE_POSITIVE_RATE;
+use fastbloom::BloomFilter;
+use rand::{RngCore, SeedableRng};
+use rand_chacha::ChaCha8Rng;
+use rand_distr::{Distribution, Normal};
+use roaring::{RoaringBitmap, RoaringTreemap};
+use serde::Serialize;
+use std::{
+    fmt::{Display, Formatter},
+    fs::File,
+    mem::size_of_val,
+    path::PathBuf,
+    time::Instant,
+};
+
+const DEFAULT_BLOOM_SEED: u128 = 42;
+const MIN_BLOOM_EXPECTED_ITEMS: u64 = 64;
+const U32_KEY_SPACE_SIZE: u64 = u32::MAX as u64 + 1;
+const DEFAULT_LOOKUP_LIMIT: u64 = 50_000_000;
+const DEFAULT_KEY_EPS_VALUES: [f64; 6] = [1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 5e-1];
+
+// Mirror the spine_async size bands and include the near-full u32 domain case.
+const DEFAULT_SPINE_LEVEL_SIZES: [u64; 6] =
+    [14_999, 99_999, 999_999, 9_999_999, 99_999_999, 999_999_999];
+
+fn main() {
+    let args = Args::parse();
+    let key_types = args.key_types();
+    let key_spaces = args.key_spaces();
+    let num_elements_list = args.num_elements();
+    args.validate(&key_types, &key_spaces, &num_elements_list);
+
+    let csv_file = File::create(&args.csv_output)
+        .unwrap_or_else(|error| panic!("failed to create {}: {error}", args.csv_output.display()));
+    let mut csv_writer = Writer::from_writer(csv_file);
+
+    println!("benchmark=filter_bitmap");
+    println!(
+        "num_elements={}",
+        num_elements_list
+            .iter()
+            .map(u64::to_string)
+            .collect::<Vec<_>>()
+            .join(",")
+    );
+    println!("repetitions={}", args.repetitions);
+    println!("insert_order={}", args.insert_order);
+    println!("lookup_order={}", args.lookup_order);
+    println!("insert_seed={}", args.insert_seed);
+    println!("lookup_seed={}", args.lookup_seed);
+    println!("key_space_seed={}", args.key_space_seed);
+    println!(
+        "key_types={}",
+        key_types
+            .iter()
+            .map(ToString::to_string)
+            .collect::<Vec<_>>()
+            .join(",")
+    );
+    println!(
+        "key_spaces={}",
+        key_spaces
+            .iter()
+            .map(ToString::to_string)
+            .collect::<Vec<_>>()
+            .join(",")
+    );
+    println!(
+        "key_eps={}",
+        args.key_eps()
+            .iter()
+            .map(ToString::to_string)
+            .collect::<Vec<_>>()
+            .join(",")
+    );
+    println!(
+        "structures={}",
+        args.structures
+            .iter()
+            .map(ToString::to_string)
+            .collect::<Vec<_>>()
+            .join(",")
+    );
+    println!(
+        "bloom_false_positive_rate={}",
+        args.bloom_false_positive_rate
+    );
+    println!("bloom_seed={}", args.bloom_seed);
+    println!("csv_output={}", args.csv_output.display());
+    println!();
+
+    for &key_type in &key_types {
+        for &key_space in &key_spaces {
+            for key_eps in args.key_eps_for(key_space) {
+                let config = BenchmarkConfig {
+                    key_type,
+                    key_space,
+                    key_eps,
+                };
+
+                for &num_elements in &num_elements_list {
+                    let lookup_count = args.lookup_count_for(num_elements);
+                    let false_positive_lookup_count =
+                        args.false_positive_lookup_count_for(config, num_elements, lookup_count);
+                    let bloom_expected_items = args
+                        .bloom_expected_items
+                        .unwrap_or(num_elements)
+                        .max(MIN_BLOOM_EXPECTED_ITEMS);
+
+                    for structure in &args.structures {
+                        let result = match structure {
+                            Structure::Bloom => benchmark_bloom(
+                                &args,
+                                config,
+                                num_elements,
+                                lookup_count,
+                                false_positive_lookup_count,
+                                bloom_expected_items,
+                            ),
+                            Structure::Roaring => {
+                                benchmark_roaring(&args, config, num_elements, lookup_count)
+                            }
+                        };
+
+                        print_report(
+                            *structure,
+                            config,
+                            &result,
+                            num_elements,
+                            lookup_count,
+                            false_positive_lookup_count,
+                        );
+
+                        csv_writer
+                            .serialize(CsvRow::from_result(
+                                CsvRowContext {
+                                    structure: *structure,
+                                    config,
+                                    args: &args,
+                                    num_elements,
+                                    lookup_count,
+                                    false_positive_lookup_count,
+                                    bloom_expected_items,
+                                },
+                                &result,
+                            ))
+                            .expect("failed to write CSV row");
+                        csv_writer.flush().expect("failed to flush CSV writer");
+                    }
+                }
+            }
+        }
+    }
+}
+
+#[derive(Parser, Debug, Clone)]
+#[command(name = "filter_bitmap")]
+#[command(about = "Benchmark fastbloom against roaring bitmap or treemap membership queries")]
+struct Args {
+    /// Comma-separated input sizes. Underscores and `u32::MAX` are accepted.
+    #[arg(long, value_name = "CSV")]
+    num_elements: Option<String>,
+
+    /// Number of successful lookups to benchmark for each input size.
+    /// Defaults to min(num_elements, 50_000_000).
+    #[arg(long)]
+    lookup_count: Option<u64>,
+
+    /// Number of negative lookups used to measure bloom false positives for each input size.
+    #[arg(long)]
+    false_positive_lookup_count: Option<u64>,
+
+    /// Number of repeated benchmark runs used to compute min/avg/max/std.
+    #[arg(long, default_value_t = 3)]
+    repetitions: usize,
+
+    /// Structures to benchmark.
+    #[arg(long, value_delimiter = ',', default_value = "bloom,roaring")]
+    structures: Vec<Structure>,
+
+    /// Key types to benchmark.
+    #[arg(long, value_delimiter = ',', default_value = "u32")]
+    key_types: Vec<KeyType>,
+
+    /// Key-space models to benchmark.
+    ///
+    /// `consecutive` inserts keys from `0..n`.
+    /// `full_range` samples `n` distinct keys from the full type domain.
+    /// `half_normal` spreads `n` unique keys across `0..u32::MAX` with
+    /// a half-normal offset distribution controlled by `--key-eps`.
+    #[arg(long, value_delimiter = ',', default_value = "consecutive")]
+    key_spaces: Vec<KeySpace>,
+
+    /// Seed used by the full-range sampler and half-normal quantile phase.
+    #[arg(long, default_value_t = 2)]
+    key_space_seed: u64,
+
+    /// Comma-separated epsilon values used by `--key-spaces half-normal`.
+    #[arg(long, value_name = "CSV")]
+    key_eps: Option<String>,
+
+    /// Insert order over the chosen keyset.
+    #[arg(long, default_value_t = Order::Sequential)]
+    insert_order: Order,
+
+    /// Lookup order over the chosen keyset or sampled subset.
+    #[arg(long, default_value_t = Order::Random)]
+    lookup_order: Order,
+
+    /// Seed used when `insert-order=random`.
+    #[arg(long, default_value_t = 0)]
+    insert_seed: u64,
+
+    /// Seed used when `lookup-order=random`.
+    #[arg(long, default_value_t = 1)]
+    lookup_seed: u64,
+
+    /// Bloom filter false-positive rate. Defaults to DBSP storage default.
+    #[arg(long, default_value_t = BLOOM_FILTER_FALSE_POSITIVE_RATE)]
+    bloom_false_positive_rate: f64,
+
+    /// Bloom filter seed. Defaults to DBSP storage seed.
+    #[arg(long, default_value_t = DEFAULT_BLOOM_SEED)]
+    bloom_seed: u128,
+
+    /// Backward-compatible alias for `--key-types u64`.
+    #[doc(hidden)]
+    #[arg(long, hide = true, default_value_t = false)]
+    u64_keys: bool,
+
+    /// Expected number of items passed to the bloom filter builder for each input size.
+    #[arg(long)]
+    bloom_expected_items: Option<u64>,
+
+    /// Output CSV path.
+    #[arg(long, default_value = "filter_bitmap.csv")]
+    csv_output: PathBuf,
+
+    // When running with `cargo bench` the binary gets the `--bench` flag, so we
+    // have to parse and ignore it so clap doesn't reject it.
+    #[doc(hidden)]
+    #[arg(long = "bench", hide = true)]
+    __bench: bool,
+}
+
+impl Args {
+    fn key_types(&self) -> Vec<KeyType> {
+        let raw = if self.u64_keys {
+            vec![KeyType::U64]
+        } else {
+            self.key_types.clone()
+        };
+        dedup(raw)
+    }
+
+    fn key_spaces(&self) -> Vec<KeySpace> {
+        dedup(self.key_spaces.clone())
+    }
+
+    fn key_eps(&self) -> Vec<f64> {
+        match &self.key_eps {
+            Some(csv) => parse_f64_csv(csv, "--key-eps"),
+            None => DEFAULT_KEY_EPS_VALUES.to_vec(),
+        }
+    }
+
+    fn key_eps_for(&self, key_space: KeySpace) -> Vec<Option<f64>> {
+        match key_space {
+            KeySpace::HalfNormal => self.key_eps().into_iter().map(Some).collect(),
+            _ => vec![None],
+        }
+    }
+
+    fn num_elements(&self) -> Vec<u64> {
+        match &self.num_elements {
+            Some(csv) => parse_u64_csv(csv),
+            None => DEFAULT_SPINE_LEVEL_SIZES.to_vec(),
+        }
+    }
+
+    fn lookup_count_for(&self, num_elements: u64) -> u64 {
+        self.lookup_count
+            .map(|lookup_count| lookup_count.min(num_elements))
+            .unwrap_or(num_elements.min(DEFAULT_LOOKUP_LIMIT))
+    }
+
+    fn false_positive_lookup_count_for(
+        &self,
+        config: BenchmarkConfig,
+        num_elements: u64,
+        _lookup_count: u64,
+    ) -> u64 {
+        self.false_positive_lookup_count
+            .map(|count| {
+                let max_false_positive_lookup_count =
+                    config.max_false_positive_lookup_count(num_elements);
+                count.min(max_false_positive_lookup_count)
+            })
+            .unwrap_or(0)
+    }
+
+    fn validate(&self, key_types: &[KeyType], key_spaces: &[KeySpace], num_elements_list: &[u64]) {
+        let key_eps = self.key_eps();
+
+        assert!(
+            !num_elements_list.is_empty(),
+            "--num-elements must select at least one size"
+        );
+        assert!(
+            self.repetitions > 0,
+            "--repetitions must be greater than zero"
+        );
+        assert!(
+            !self.structures.is_empty(),
+            "--structures must select at least one structure"
+        );
+        assert!(
+            !key_types.is_empty(),
+            "--key-types must select at least one key type"
+        );
+        assert!(
+            !key_spaces.is_empty(),
+            "--key-spaces must select at least one key-space mode"
+        );
+        if key_spaces.contains(&KeySpace::HalfNormal) {
+            assert!(
+                !key_eps.is_empty(),
+                "--key-eps must select at least one epsilon for key-space half_normal"
+            );
+            for eps in &key_eps {
+                assert!(
+                    eps.is_finite() && *eps > 0.0,
+                    "--key-eps values must be finite and greater than zero"
+                );
+            }
+        }
+        assert!(
+            self.bloom_false_positive_rate > 0.0 && self.bloom_false_positive_rate < 1.0,
+            "--bloom-false-positive-rate must be between 0 and 1"
+        );
+
+        for &num_elements in num_elements_list {
+            assert!(
+                num_elements > 0,
+                "--num-elements values must be greater than zero"
+            );
+
+            for &key_type in key_types {
+                for &key_space in key_spaces {
+                    let config = BenchmarkConfig {
+                        key_type,
+                        key_space,
+                        key_eps: None,
+                    };
+                    config.validate_num_elements(num_elements);
+                }
+            }
+        }
+    }
+}
+
+fn dedup<T>(values: Vec<T>) -> Vec<T>
+where
+    T: PartialEq,
+{
+    let mut out = Vec::with_capacity(values.len());
+    for value in values {
+        if !out.contains(&value) {
+            out.push(value);
+        }
+    }
+    out
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)]
+enum Structure {
+    #[value(name = "bloom")]
+    Bloom,
+    #[value(name = "roaring")]
+    Roaring,
+}
+
+impl Display for Structure {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Bloom => f.write_str("bloom"),
+            Self::Roaring => f.write_str("roaring"),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)]
+enum KeyType {
+    #[value(name = "u32")]
+    U32,
+    #[value(name = "u64")]
+    U64,
+}
+
+impl Display for KeyType {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::U32 => f.write_str("u32"),
+            Self::U64 => f.write_str("u64"),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)]
+enum KeySpace {
+    #[value(name = "consecutive")]
+    Consecutive,
+    #[value(name = "full_range", alias = "full-range")]
+    FullRange,
+    #[value(name = "half_normal", alias = "half-normal")]
+    HalfNormal,
+}
+
+impl Display for KeySpace {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Consecutive => f.write_str("consecutive"),
+            Self::FullRange => f.write_str("full_range"),
+            Self::HalfNormal => f.write_str("half_normal"),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+struct BenchmarkConfig {
+    key_type: KeyType,
+    key_space: KeySpace,
+    key_eps: Option<f64>,
+}
+
+impl BenchmarkConfig {
+    fn validate_num_elements(self, num_elements: u64) {
+        match (self.key_type, self.key_space) {
+            (KeyType::U32, _) | (_, KeySpace::HalfNormal) => assert!(
+                num_elements <= U32_KEY_SPACE_SIZE,
+                "--num-elements values must be <= {} for this key type/key space",
+                U32_KEY_SPACE_SIZE
+            ),
+            (KeyType::U64, _) => {}
+        }
+    }
+
+    fn max_false_positive_lookup_count(self, num_elements: u64) -> u64 {
+        match (self.key_type, self.key_space) {
+            (_, KeySpace::HalfNormal) => U32_KEY_SPACE_SIZE - num_elements,
+            (KeyType::U32, _) => U32_KEY_SPACE_SIZE - num_elements,
+            (KeyType::U64, _) => u64::MAX - num_elements + 1,
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)]
+enum Order {
+    #[value(name = "sequential")]
+    Sequential,
+    #[value(name = "random")]
+    Random,
+}
+
+impl Display for Order {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Sequential => f.write_str("sequential"),
+            Self::Random => f.write_str("random"),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+struct AffinePermutation {
+    len: u64,
+    multiplier: u64,
+    offset: u64,
+}
+
+impl AffinePermutation {
+    fn sequential(len: u64) -> Self {
+        Self {
+            len,
+            multiplier: 1,
+            offset: 0,
+        }
+    }
+
+    fn random(len: u64, seed: u64) -> Self {
+        if len <= 1 {
+            return Self::sequential(len);
+        }
+        let mut rng = ChaCha8Rng::seed_from_u64(seed);
+        let mut multiplier = (rng.next_u64() % len) | 1;
+        while gcd(multiplier, len) != 1 {
+            multiplier = (multiplier + 2) % len;
+            if multiplier == 0 {
+                multiplier = 1;
+            }
+        }
+        let offset = rng.next_u64() % len;
+        Self {
+            len,
+            multiplier,
+            offset,
+        }
+    }
+
+    fn for_order(len: u64, order: Order, seed: u64) -> Self {
+        match order {
+            Order::Sequential => Self::sequential(len),
+            Order::Random => Self::random(len, seed),
+        }
+    }
+
+    fn index_at(&self, position: u64) -> u64 {
+        debug_assert!(position < self.len);
+        (self
+            .multiplier
+            .wrapping_mul(position)
+            .wrapping_add(self.offset))
+            % self.len
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+struct WrappingPermutation64 {
+    multiplier: u64,
+    offset: u64,
+}
+
+impl WrappingPermutation64 {
+    fn sequential() -> Self {
+        Self {
+            multiplier: 1,
+            offset: 0,
+        }
+    }
+
+    fn random(seed: u64) -> Self {
+        let mut rng = ChaCha8Rng::seed_from_u64(seed);
+        Self {
+            multiplier: rng.next_u64() | 1,
+            offset: rng.next_u64(),
+        }
+    }
+
+    fn for_order(order: Order, seed: u64) -> Self {
+        match order {
+            Order::Sequential => Self::sequential(),
+            Order::Random => Self::random(seed),
+        }
+    }
+
+    fn index_at(&self, position: u64) -> u64 {
+        position
+            .wrapping_mul(self.multiplier)
+            .wrapping_add(self.offset)
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+struct HalfNormalKeySampler {
+    eps: f64,
+    seed: u64,
+}
+
+impl HalfNormalKeySampler {
+    fn new(eps: f64, seed: u64) -> Self {
+        Self { eps, seed }
+    }
+
+    fn present_keys_u32(&self, num_elements: u64) -> Vec<u32> {
+        let len = usize::try_from(num_elements).expect("num_elements must fit in usize");
+        let mut rng = ChaCha8Rng::seed_from_u64(self.seed);
+        let sigma = self.eps * u32::MAX as f64;
+        let distribution = Normal::new(0.0, sigma)
+            .expect("half-normal epsilon must produce a positive standard deviation");
+        let mut keys = Vec::with_capacity(len);
+
+        for _ in 0..num_elements {
+            let sampled = distribution.sample(&mut rng).abs().round();
+            keys.push(sampled.clamp(0.0, u32::MAX as f64) as u32);
+        }
+
+        keys.sort_unstable();
+        project_sorted_unique_u32_domain(&mut keys);
+        keys
+    }
+
+    fn present_keys_u64(&self, num_elements: u64) -> Vec<u64> {
+        self.present_keys_u32(num_elements)
+            .into_iter()
+            .map(u64::from)
+            .collect()
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+enum U32KeySampler {
+    Consecutive,
+    FullRange(AffinePermutation),
+    HalfNormal(HalfNormalKeySampler),
+}
+
+impl U32KeySampler {
+    fn new(key_space: KeySpace, _num_elements: u64, key_eps: Option<f64>, seed: u64) -> Self {
+        match key_space {
+            KeySpace::Consecutive => Self::Consecutive,
+            KeySpace::FullRange => Self::FullRange(AffinePermutation::for_order(
+                U32_KEY_SPACE_SIZE,
+                Order::Random,
+                seed,
+            )),
+            KeySpace::HalfNormal => Self::HalfNormal(HalfNormalKeySampler::new(
+                key_eps.expect("half_normal key space requires key_eps"),
+                seed,
+            )),
+        }
+    }
+
+    fn present_key(&self, set_index: u64) -> u32 {
+        match self {
+            Self::Consecutive => set_index as u32,
+            Self::FullRange(permutation) => permutation.index_at(set_index) as u32,
+            Self::HalfNormal(_) => {
+                panic!("half_normal key space requires pre-generated keys")
+            }
+        }
+    }
+
+    fn absent_key(&self, num_elements: u64, absent_index: u64) -> u32 {
+        let domain_index = num_elements
+            .checked_add(absent_index)
+            .expect("u32 absent-key generation overflowed");
+        match self {
+            Self::Consecutive => domain_index as u32,
+            Self::FullRange(permutation) => permutation.index_at(domain_index) as u32,
+            Self::HalfNormal(_) => {
+                panic!("half_normal key space requires prepared absent keys")
+            }
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+enum U64KeySampler {
+    Consecutive,
+    FullRange(WrappingPermutation64),
+    HalfNormal(HalfNormalKeySampler),
+}
+
+impl U64KeySampler {
+    fn new(key_space: KeySpace, _num_elements: u64, key_eps: Option<f64>, seed: u64) -> Self {
+        match key_space {
+            KeySpace::Consecutive => Self::Consecutive,
+            KeySpace::FullRange => {
+                Self::FullRange(WrappingPermutation64::for_order(Order::Random, seed))
+            }
+            KeySpace::HalfNormal => Self::HalfNormal(HalfNormalKeySampler::new(
+                key_eps.expect("half_normal key space requires key_eps"),
+                seed,
+            )),
+        }
+    }
+
+    fn present_key(&self, set_index: u64) -> u64 {
+        match self {
+            Self::Consecutive => set_index,
+            Self::FullRange(permutation) => permutation.index_at(set_index),
+            Self::HalfNormal(_) => {
+                panic!("half_normal key space requires pre-generated keys")
+            }
+        }
+    }
+
+    fn absent_key(&self, num_elements: u64, absent_index: u64) -> u64 {
+        let domain_index = num_elements
+            .checked_add(absent_index)
+            .expect("u64 absent-key generation overflowed");
+        match self {
+            Self::Consecutive => domain_index,
+            Self::FullRange(permutation) => permutation.index_at(domain_index),
+            Self::HalfNormal(_) => {
+                panic!("half_normal key space requires prepared absent keys")
+            }
+        }
+    }
+}
+
+fn gcd(mut lhs: u64, mut rhs: u64) -> u64 {
+    while rhs != 0 {
+        let next = lhs % rhs;
+        lhs = rhs;
+        rhs = next;
+    }
+    lhs
+}
+
+fn parse_u64_csv(csv: &str) -> Vec<u64> {
+    let mut out: Vec<u64> = csv
+        .split(',')
+        .filter(|entry| !entry.trim().is_empty())
+        .map(|entry| {
+            parse_u64_token(entry.trim())
+                .unwrap_or_else(|error| panic!("invalid u64 in --num-elements: {entry} ({error})"))
+        })
+        .collect();
+    out.sort_unstable();
+    out.dedup();
+    out
+}
+
+fn parse_f64_csv(csv: &str, flag_name: &str) -> Vec<f64> {
+    let mut out: Vec<f64> = csv
+        .split(',')
+        .filter(|entry| !entry.trim().is_empty())
+        .map(|entry| {
+            entry
+                .trim()
+                .parse::<f64>()
+                .unwrap_or_else(|error| panic!("invalid f64 in {flag_name}: {entry} ({error})"))
+        })
+        .collect();
+    out.sort_by(|lhs, rhs| lhs.partial_cmp(rhs).expect("NaN was already rejected"));
+    out.dedup();
+    out
+}
+
+fn parse_u64_token(token: &str) -> Result<u64, String> {
+    match token {
+        "u32::MAX" | "u32_max" | "max_u32" => Ok(u32::MAX as u64),
+        _ => token
+            .replace('_', "")
+            .parse::<u64>()
+            .map_err(|error| error.to_string()),
+    }
+}
+
+fn project_sorted_unique_u32_domain(keys: &mut [u32]) {
+    if keys.is_empty() {
+        return;
+    }
+
+    for (index, key) in keys.iter_mut().enumerate() {
+        let min_key = u32::try_from(index).expect("key count exceeded u32 domain");
+        if *key < min_key {
+            *key = min_key;
+        }
+    }
+
+    for index in (0..keys.len()).rev() {
+        let tail = keys.len() - 1 - index;
+        let max_key = u32::MAX
+            .checked_sub(u32::try_from(tail).expect("key count exceeded u32 domain"))
+            .expect("tail adjustment underflowed");
+        if keys[index] > max_key {
+            keys[index] = max_key;
+        }
+        if index + 1 < keys.len() && keys[index] >= keys[index + 1] {
+            keys[index] = keys[index + 1] - 1;
+        }
+    }
+
+    debug_assert!(keys.windows(2).all(|window| window[0] < window[1]));
+}
+
+fn absent_keys_from_sorted_present_u32(present_keys: &[u32], count: u64) -> Vec<u32> {
+    let target_len = usize::try_from(count).expect("false-positive lookup count must fit in usize");
+    let mut absent_keys = Vec::with_capacity(target_len);
+    let mut candidate = 0u64;
+
+    for &present_key in present_keys {
+        let present_key = present_key as u64;
+        while candidate < present_key && absent_keys.len() < target_len {
+            absent_keys.push(candidate as u32);
+            candidate += 1;
+        }
+        if absent_keys.len() == target_len {
+            return absent_keys;
+        }
+        candidate = present_key
+            .checked_add(1)
+            .expect("half-normal absent-key generation overflowed");
+    }
+
+    while absent_keys.len() < target_len {
+        absent_keys.push(candidate as u32);
+        candidate = candidate
+            .checked_add(1)
+            .expect("half-normal absent-key generation overflowed");
+    }
+
+    absent_keys
+}
+
+fn absent_keys_from_sorted_present_u64(present_keys: &[u64], count: u64) -> Vec<u64> {
+    let target_len = usize::try_from(count).expect("false-positive lookup count must fit in usize");
+    let mut absent_keys = Vec::with_capacity(target_len);
+    let mut candidate = 0u64;
+
+    for &present_key in present_keys {
+        while candidate < present_key && absent_keys.len() < target_len {
+            absent_keys.push(candidate);
+            candidate += 1;
+        }
+        if absent_keys.len() == target_len {
+            return absent_keys;
+        }
+        candidate = present_key
+            .checked_add(1)
+            .expect("half-normal absent-key generation overflowed");
+    }
+
+    while absent_keys.len() < target_len {
+        absent_keys.push(candidate);
+        candidate = candidate
+            .checked_add(1)
+            .expect("half-normal absent-key generation overflowed");
+    }
+
+    absent_keys
+}
+
+#[derive(Debug, Clone, Copy)]
+struct SummaryStats {
+    min: f64,
+    avg: f64,
+    max: f64,
+    stddev: f64,
+}
+
+impl SummaryStats {
+    fn from_samples(samples: &[f64]) -> Self {
+        let min = samples.iter().copied().fold(f64::INFINITY, f64::min);
+        let max = samples.iter().copied().fold(f64::NEG_INFINITY, f64::max);
+        let avg = samples.iter().sum::<f64>() / samples.len() as f64;
+        let variance = samples
+            .iter()
+            .map(|sample| {
+                let delta = *sample - avg;
+                delta * delta
+            })
+            .sum::<f64>()
+            / samples.len() as f64;
+        Self {
+            min,
+            avg,
+            max,
+            stddev: variance.sqrt(),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+struct BenchmarkResult {
+    insert_ns_per_element: SummaryStats,
+    lookup_ns_per_element: SummaryStats,
+    bytes_used: usize,
+    false_positive_rate_percent: Option<SummaryStats>,
+}
+
+#[derive(Debug, Serialize)]
+struct CsvRow {
+    structure: &'static str,
+    key_type: &'static str,
+    key_space: &'static str,
+    key_eps: Option<f64>,
+    num_elements: u64,
+    lookup_count: u64,
+    false_positive_lookup_count: u64,
+    repetitions: usize,
+    insert_order: &'static str,
+    lookup_order: &'static str,
+    insert_seed: u64,
+    lookup_seed: u64,
+    key_space_seed: u64,
+    bloom_false_positive_rate_target_percent: f64,
+    bloom_seed: u128,
+    bloom_expected_items: u64,
+    bytes_used: usize,
+    bytes_per_element: f64,
+    bits_per_element: Option<f64>,
+    insert_ns_per_element_min: f64,
+    insert_ns_per_element_avg: f64,
+    insert_ns_per_element_max: f64,
+    insert_ns_per_element_stddev: f64,
+    lookup_ns_per_element_min: f64,
+    lookup_ns_per_element_avg: f64,
+    lookup_ns_per_element_max: f64,
+    lookup_ns_per_element_stddev: f64,
+    false_positive_rate_percent_min: Option<f64>,
+    false_positive_rate_percent_avg: Option<f64>,
+    false_positive_rate_percent_max: Option<f64>,
+    false_positive_rate_percent_stddev: Option<f64>,
+}
+
+#[derive(Clone, Copy)]
+struct CsvRowContext<'a> {
+    structure: Structure,
+    config: BenchmarkConfig,
+    args: &'a Args,
+    num_elements: u64,
+    lookup_count: u64,
+    false_positive_lookup_count: u64,
+    bloom_expected_items: u64,
+}
+
+impl CsvRow {
+    fn from_result(context: CsvRowContext<'_>, result: &BenchmarkResult) -> Self {
+        let bits_per_element = (context.structure == Structure::Bloom)
+            .then_some((result.bytes_used as f64 * 8.0) / context.num_elements as f64);
+        let false_positive_stats = result.false_positive_rate_percent;
+
+        Self {
+            structure: context.structure.as_str(),
+            key_type: context.config.key_type.as_str(),
+            key_space: context.config.key_space.as_str(),
+            key_eps: context.config.key_eps,
+            num_elements: context.num_elements,
+            lookup_count: context.lookup_count,
+            false_positive_lookup_count: context.false_positive_lookup_count,
+            repetitions: context.args.repetitions,
+            insert_order: context.args.insert_order.as_str(),
+            lookup_order: context.args.lookup_order.as_str(),
+            insert_seed: context.args.insert_seed,
+            lookup_seed: context.args.lookup_seed,
+            key_space_seed: context.args.key_space_seed,
+            bloom_false_positive_rate_target_percent: context.args.bloom_false_positive_rate
+                * 100.0,
+            bloom_seed: context.args.bloom_seed,
+            bloom_expected_items: context.bloom_expected_items,
+            bytes_used: result.bytes_used,
+            bytes_per_element: result.bytes_used as f64 / context.num_elements as f64,
+            bits_per_element,
+            insert_ns_per_element_min: result.insert_ns_per_element.min,
+            insert_ns_per_element_avg: result.insert_ns_per_element.avg,
+            insert_ns_per_element_max: result.insert_ns_per_element.max,
+            insert_ns_per_element_stddev: result.insert_ns_per_element.stddev,
+            lookup_ns_per_element_min: result.lookup_ns_per_element.min,
+            lookup_ns_per_element_avg: result.lookup_ns_per_element.avg,
+            lookup_ns_per_element_max: result.lookup_ns_per_element.max,
+            lookup_ns_per_element_stddev: result.lookup_ns_per_element.stddev,
+            false_positive_rate_percent_min: false_positive_stats.map(|stats| stats.min),
+            false_positive_rate_percent_avg: false_positive_stats.map(|stats| stats.avg),
+            false_positive_rate_percent_max: false_positive_stats.map(|stats| stats.max),
+            false_positive_rate_percent_stddev: false_positive_stats.map(|stats| stats.stddev),
+        }
+    }
+}
+
+impl Structure {
+    fn as_str(self) -> &'static str {
+        match self {
+            Self::Bloom => "bloom",
+            Self::Roaring => "roaring",
+        }
+    }
+}
+
+impl KeyType {
+    fn as_str(self) -> &'static str {
+        match self {
+            Self::U32 => "u32",
+            Self::U64 => "u64",
+        }
+    }
+}
+
+impl KeySpace {
+    fn as_str(self) -> &'static str {
+        match self {
+            Self::Consecutive => "consecutive",
+            Self::FullRange => "full_range",
+            Self::HalfNormal => "half_normal",
+        }
+    }
+}
+
+impl Order {
+    fn as_str(self) -> &'static str {
+        match self {
+            Self::Sequential => "sequential",
+            Self::Random => "random",
+        }
+    }
+}
+
+fn benchmark_bloom(
+    args: &Args,
+    config: BenchmarkConfig,
+    num_elements: u64,
+    lookup_count: u64,
+    false_positive_lookup_count: u64,
+    bloom_expected_items: u64,
+) -> BenchmarkResult {
+    match config.key_type {
+        KeyType::U32 => benchmark_bloom_u32(
+            args,
+            config,
+            num_elements,
+            lookup_count,
+            false_positive_lookup_count,
+            bloom_expected_items,
+        ),
+        KeyType::U64 => benchmark_bloom_u64(
+            args,
+            config,
+            num_elements,
+            lookup_count,
+            false_positive_lookup_count,
+            bloom_expected_items,
+        ),
+    }
+}
+
+fn benchmark_bloom_u32(
+    args: &Args,
+    config: BenchmarkConfig,
+    num_elements: u64,
+    lookup_count: u64,
+    false_positive_lookup_count: u64,
+    bloom_expected_items: u64,
+) -> BenchmarkResult {
+    let mut insert_samples = Vec::with_capacity(args.repetitions);
+    let mut lookup_samples = Vec::with_capacity(args.repetitions);
+    let mut false_positive_rate_percent_samples = Vec::with_capacity(args.repetitions);
+    let mut bytes_used = 0;
+    let expected_items =
+        usize::try_from(bloom_expected_items).expect("bloom expected items must fit in usize");
+
+    for repetition in 0..args.repetitions {
+        let sampler = U32KeySampler::new(
+            config.key_space,
+            num_elements,
+            config.key_eps,
+            args.key_space_seed.wrapping_add(repetition as u64),
+        );
+        let present_keys = sorted_present_keys_u32(sampler, num_elements);
+        let insert_permutation = AffinePermutation::for_order(
+            num_elements,
+            args.insert_order,
+            args.insert_seed.wrapping_add(repetition as u64),
+        );
+        let lookup_permutation = AffinePermutation::for_order(
+            num_elements,
+            args.lookup_order,
+            args.lookup_seed.wrapping_add(repetition as u64),
+        );
+        let false_positive_permutation = (false_positive_lookup_count > 0).then(|| {
+            AffinePermutation::for_order(
+                false_positive_lookup_count,
+                args.lookup_order,
+                args.lookup_seed.wrapping_add(repetition as u64),
+            )
+        });
+        let absent_keys = matches!(sampler, U32KeySampler::HalfNormal(_)).then(|| {
+            absent_keys_from_sorted_present_u32(&present_keys, false_positive_lookup_count)
+        });
+
+        let mut bloom = BloomFilter::with_false_pos(args.bloom_false_positive_rate)
+            .seed(&args.bloom_seed)
+            .expected_items(expected_items.max(MIN_BLOOM_EXPECTED_ITEMS as usize));
+
+        let insert_started = Instant::now();
+        for index in 0..num_elements {
+            let key = present_keys[insert_permutation.index_at(index) as usize];
+            bloom.insert(&key);
+        }
+        let insert_elapsed = insert_started.elapsed();
+
+        let lookup_started = Instant::now();
+        let mut hits = 0u64;
+        for index in 0..lookup_count {
+            let key = present_keys[lookup_permutation.index_at(index) as usize];
+            hits += u64::from(bloom.contains(&key));
+        }
+        let lookup_elapsed = lookup_started.elapsed();
+
+        assert_eq!(hits, lookup_count, "expected all lookup keys to be present");
+
+        if let Some(false_positive_permutation) = false_positive_permutation {
+            let mut false_positives = 0u64;
+            for index in 0..false_positive_lookup_count {
+                let absent_index = false_positive_permutation.index_at(index);
+                let key = absent_keys.as_ref().map_or_else(
+                    || sampler.absent_key(num_elements, absent_index),
+                    |keys| keys[absent_index as usize],
+                );
+                false_positives += u64::from(bloom.contains(&key));
+            }
+            false_positive_rate_percent_samples
+                .push((false_positives as f64 / false_positive_lookup_count as f64) * 100.0);
+        }
+
+        bytes_used = size_of_val(bloom.as_slice());
+        insert_samples.push(insert_elapsed.as_nanos() as f64 / num_elements as f64);
+        lookup_samples.push(lookup_elapsed.as_nanos() as f64 / lookup_count as f64);
+    }
+
+    BenchmarkResult {
+        insert_ns_per_element: SummaryStats::from_samples(&insert_samples),
+        lookup_ns_per_element: SummaryStats::from_samples(&lookup_samples),
+        bytes_used,
+        false_positive_rate_percent: (!false_positive_rate_percent_samples.is_empty())
+            .then(|| SummaryStats::from_samples(&false_positive_rate_percent_samples)),
+    }
+}
+
+fn benchmark_bloom_u64(
+    args: &Args,
+    config: BenchmarkConfig,
+    num_elements: u64,
+    lookup_count: u64,
+    false_positive_lookup_count: u64,
+    bloom_expected_items: u64,
+) -> BenchmarkResult {
+    let mut insert_samples = Vec::with_capacity(args.repetitions);
+    let mut lookup_samples = Vec::with_capacity(args.repetitions);
+    let mut false_positive_rate_percent_samples = Vec::with_capacity(args.repetitions);
+    let mut bytes_used = 0;
+    let expected_items =
+        usize::try_from(bloom_expected_items).expect("bloom expected items must fit in usize");
+
+    for repetition in 0..args.repetitions {
+        let sampler = U64KeySampler::new(
+            config.key_space,
+            num_elements,
+            config.key_eps,
+            args.key_space_seed.wrapping_add(repetition as u64),
+        );
+        let present_keys = sorted_present_keys_u64(sampler, num_elements);
+        let insert_permutation = AffinePermutation::for_order(
+            num_elements,
+            args.insert_order,
+            args.insert_seed.wrapping_add(repetition as u64),
+        );
+        let lookup_permutation = AffinePermutation::for_order(
+            num_elements,
+            args.lookup_order,
+            args.lookup_seed.wrapping_add(repetition as u64),
+        );
+        let false_positive_permutation = (false_positive_lookup_count > 0).then(|| {
+            AffinePermutation::for_order(
+                false_positive_lookup_count,
+                args.lookup_order,
+                args.lookup_seed.wrapping_add(repetition as u64),
+            )
+        });
+        let absent_keys = matches!(sampler, U64KeySampler::HalfNormal(_)).then(|| {
+            absent_keys_from_sorted_present_u64(&present_keys, false_positive_lookup_count)
+        });
+
+        let mut bloom = BloomFilter::with_false_pos(args.bloom_false_positive_rate)
+            .seed(&args.bloom_seed)
+            .expected_items(expected_items.max(MIN_BLOOM_EXPECTED_ITEMS as usize));
+
+        let insert_started = Instant::now();
+        for index in 0..num_elements {
+            let key = present_keys[insert_permutation.index_at(index) as usize];
+            bloom.insert(&key);
+        }
+        let insert_elapsed = insert_started.elapsed();
+
+        let lookup_started = Instant::now();
+        let mut hits = 0u64;
+        for index in 0..lookup_count {
+            let key = present_keys[lookup_permutation.index_at(index) as usize];
+            hits += u64::from(bloom.contains(&key));
+        }
+        let lookup_elapsed = lookup_started.elapsed();
+
+        assert_eq!(hits, lookup_count, "expected all lookup keys to be present");
+
+        if let Some(false_positive_permutation) = false_positive_permutation {
+            let mut false_positives = 0u64;
+            for index in 0..false_positive_lookup_count {
+                let absent_index = false_positive_permutation.index_at(index);
+                let key = absent_keys.as_ref().map_or_else(
+                    || sampler.absent_key(num_elements, absent_index),
+                    |keys| keys[absent_index as usize],
+                );
+                false_positives += u64::from(bloom.contains(&key));
+            }
+            false_positive_rate_percent_samples
+                .push((false_positives as f64 / false_positive_lookup_count as f64) * 100.0);
+        }
+
+        bytes_used = size_of_val(bloom.as_slice());
+        insert_samples.push(insert_elapsed.as_nanos() as f64 / num_elements as f64);
+        lookup_samples.push(lookup_elapsed.as_nanos() as f64 / lookup_count as f64);
+    }
+
+    BenchmarkResult {
+        insert_ns_per_element: SummaryStats::from_samples(&insert_samples),
+        lookup_ns_per_element: SummaryStats::from_samples(&lookup_samples),
+        bytes_used,
+        false_positive_rate_percent: (!false_positive_rate_percent_samples.is_empty())
+            .then(|| SummaryStats::from_samples(&false_positive_rate_percent_samples)),
+    }
+}
+
+fn benchmark_roaring(
+    args: &Args,
+    config: BenchmarkConfig,
+    num_elements: u64,
+    lookup_count: u64,
+) -> BenchmarkResult {
+    match config.key_type {
+        KeyType::U32 => benchmark_roaring_u32(args, config, num_elements, lookup_count),
+        KeyType::U64 => benchmark_roaring_u64(args, config, num_elements, lookup_count),
+    }
+}
+
+fn benchmark_roaring_u32(
+    args: &Args,
+    config: BenchmarkConfig,
+    num_elements: u64,
+    lookup_count: u64,
+) -> BenchmarkResult {
+    let mut insert_samples = Vec::with_capacity(args.repetitions);
+    let mut lookup_samples = Vec::with_capacity(args.repetitions);
+    let mut bytes_used = 0;
+
+    for repetition in 0..args.repetitions {
+        let sampler = U32KeySampler::new(
+            config.key_space,
+            num_elements,
+            config.key_eps,
+            args.key_space_seed.wrapping_add(repetition as u64),
+        );
+        let present_keys = sorted_present_keys_u32(sampler, num_elements);
+        let insert_permutation = AffinePermutation::for_order(
+            num_elements,
+            args.insert_order,
+            args.insert_seed.wrapping_add(repetition as u64),
+        );
+        let lookup_permutation = AffinePermutation::for_order(
+            num_elements,
+            args.lookup_order,
+            args.lookup_seed.wrapping_add(repetition as u64),
+        );
+
+        let insert_started = Instant::now();
+        let mut bitmap = RoaringBitmap::new();
+        for index in 0..num_elements {
+            bitmap.insert(present_keys[insert_permutation.index_at(index) as usize]);
+        }
+        let insert_elapsed = insert_started.elapsed();
+        let _ = bitmap.optimize();
+
+        let lookup_started = Instant::now();
+        let mut hits = 0u64;
+        for index in 0..lookup_count {
+            let key = present_keys[lookup_permutation.index_at(index) as usize];
+            hits += u64::from(bitmap.contains(key));
+        }
+        let lookup_elapsed = lookup_started.elapsed();
+
+        assert_eq!(hits, lookup_count, "expected all lookup keys to be present");
+        bytes_used = bitmap.serialized_size();
+        insert_samples.push(insert_elapsed.as_nanos() as f64 / num_elements as f64);
+        lookup_samples.push(lookup_elapsed.as_nanos() as f64 / lookup_count as f64);
+    }
+
+    BenchmarkResult {
+        insert_ns_per_element: SummaryStats::from_samples(&insert_samples),
+        lookup_ns_per_element: SummaryStats::from_samples(&lookup_samples),
+        bytes_used,
+        false_positive_rate_percent: None,
+    }
+}
+
+fn benchmark_roaring_u64(
+    args: &Args,
+    config: BenchmarkConfig,
+    num_elements: u64,
+    lookup_count: u64,
+) -> BenchmarkResult {
+    let mut insert_samples = Vec::with_capacity(args.repetitions);
+    let mut lookup_samples = Vec::with_capacity(args.repetitions);
+    let mut bytes_used = 0;
+
+    for repetition in 0..args.repetitions {
+        let sampler = U64KeySampler::new(
+            config.key_space,
+            num_elements,
+            config.key_eps,
+            args.key_space_seed.wrapping_add(repetition as u64),
+        );
+        let present_keys = sorted_present_keys_u64(sampler, num_elements);
+        let insert_permutation = AffinePermutation::for_order(
+            num_elements,
+            args.insert_order,
+            args.insert_seed.wrapping_add(repetition as u64),
+        );
+        let lookup_permutation = AffinePermutation::for_order(
+            num_elements,
+            args.lookup_order,
+            args.lookup_seed.wrapping_add(repetition as u64),
+        );
+
+        let insert_started = Instant::now();
+        let mut treemap = RoaringTreemap::new();
+        for index in 0..num_elements {
+            treemap.insert(present_keys[insert_permutation.index_at(index) as usize]);
+        }
+        let insert_elapsed = insert_started.elapsed();
+
+        let lookup_started = Instant::now();
+        let mut hits = 0u64;
+        for index in 0..lookup_count {
+            let key = present_keys[lookup_permutation.index_at(index) as usize];
+            hits += u64::from(treemap.contains(key));
+        }
+        let lookup_elapsed = lookup_started.elapsed();
+
+        assert_eq!(hits, lookup_count, "expected all lookup keys to be present");
+        bytes_used = treemap.serialized_size();
+        insert_samples.push(insert_elapsed.as_nanos() as f64 / num_elements as f64);
+        lookup_samples.push(lookup_elapsed.as_nanos() as f64 / lookup_count as f64);
+    }
+
+    BenchmarkResult {
+        insert_ns_per_element: SummaryStats::from_samples(&insert_samples),
+        lookup_ns_per_element: SummaryStats::from_samples(&lookup_samples),
+        bytes_used,
+        false_positive_rate_percent: None,
+    }
+}
+
+fn sorted_present_keys_u32(sampler: U32KeySampler, num_elements: u64) -> Vec<u32> {
+    match sampler {
+        U32KeySampler::HalfNormal(sampler) => sampler.present_keys_u32(num_elements),
+        _ => {
+            let mut present_keys = Vec::with_capacity(
+                usize::try_from(num_elements).expect("num_elements must fit in usize"),
+            );
+            for set_index in 0..num_elements {
+                present_keys.push(sampler.present_key(set_index));
+            }
+            present_keys.sort_unstable();
+            present_keys
+        }
+    }
+}
+
+fn sorted_present_keys_u64(sampler: U64KeySampler, num_elements: u64) -> Vec<u64> {
+    match sampler {
+        U64KeySampler::HalfNormal(sampler) => sampler.present_keys_u64(num_elements),
+        _ => {
+            let mut present_keys = Vec::with_capacity(
+                usize::try_from(num_elements).expect("num_elements must fit in usize"),
+            );
+            for set_index in 0..num_elements {
+                present_keys.push(sampler.present_key(set_index));
+            }
+            present_keys.sort_unstable();
+            present_keys
+        }
+    }
+}
+
+fn print_report(
+    structure: Structure,
+    config: BenchmarkConfig,
+    result: &BenchmarkResult,
+    num_elements: u64,
+    lookup_count: u64,
+    false_positive_lookup_count: u64,
+) {
+    println!("structure={structure}");
+    println!("key_type={}", config.key_type.as_str());
+    println!("key_space={}", config.key_space.as_str());
+    if let Some(key_eps) = config.key_eps {
+        println!("key_eps={key_eps}");
+    }
+    println!("num_elements={num_elements}");
+    println!("bytes_used={}", result.bytes_used);
+    println!(
+        "bytes_used_human={}",
+        format_bytes(result.bytes_used as f64)
+    );
+    println!(
+        "bytes_per_element={}",
+        format_bytes(result.bytes_used as f64 / num_elements as f64)
+    );
+    if structure == Structure::Bloom {
+        println!(
+            "bits_per_element={:.6}",
+            (result.bytes_used as f64 * 8.0) / num_elements as f64
+        );
+    }
+    print_stats("insert_ns_per_element", result.insert_ns_per_element);
+    print_stats("lookup_ns_per_element", result.lookup_ns_per_element);
+    println!("lookup_count={lookup_count}");
+    if let Some(stats) = result.false_positive_rate_percent {
+        println!("false_positive_lookup_count={false_positive_lookup_count}");
+        print_stats("false_positive_rate_percent", stats);
+    }
+    println!();
+}
+
+fn print_stats(label: &str, stats: SummaryStats) {
+    println!("{label}.min={:.6}", stats.min);
+    println!("{label}.avg={:.6}", stats.avg);
+    println!("{label}.max={:.6}", stats.max);
+    println!("{label}.stddev={:.6}", stats.stddev);
+}
+
+fn format_bytes(bytes: f64) -> String {
+    const UNITS: [&str; 5] = ["B", "KiB", "MiB", "GiB", "TiB"];
+
+    let mut value = bytes;
+    let mut unit_index = 0;
+    while value >= 1024.0 && unit_index + 1 < UNITS.len() {
+        value /= 1024.0;
+        unit_index += 1;
+    }
+
+    format!("{value:.6} {}", UNITS[unit_index])
+}
diff --git a/crates/dbsp/benches/filter_predictor.rs b/crates/dbsp/benches/filter_predictor.rs
new file mode 100644
index 00000000000..040202d09b3
--- /dev/null
+++ b/crates/dbsp/benches/filter_predictor.rs
@@ -0,0 +1,1932 @@
+//! Predictor benchmark for deciding between `fastbloom` and `roaring` on u32 keys.
+//!
+//! Examples:
+//! `cargo bench -p dbsp --bench filter_predictor -- --csv-output filter_predictor.csv`
+//! `cargo bench -p dbsp --bench filter_predictor -- --num-keys 99_999,999_999 --distributions gaussian,bimodal,exponential --gaussian-means 0.1,0.5,0.9 --gaussian-stddevs 1e-6,1e-4,1e-2`
+
+use clap::{Parser, ValueEnum};
+use csv::Writer;
+use dbsp::storage::file::BLOOM_FILTER_FALSE_POSITIVE_RATE;
+use fastbloom::BloomFilter;
+use rand::{RngCore, SeedableRng, seq::index::sample};
+use rand_chacha::ChaCha8Rng;
+use rand_distr::{Distribution, Exp, Normal};
+use roaring::RoaringBitmap;
+use serde::Serialize;
+use std::{
+    collections::HashMap,
+    fmt::{Display, Formatter},
+    fs::File,
+    mem::size_of_val,
+    path::PathBuf,
+    sync::{
+        atomic::{AtomicUsize, Ordering},
+        mpsc,
+    },
+    thread,
+    time::Instant,
+};
+
+const DEFAULT_BLOOM_SEED: u128 = 42;
+const DEFAULT_GAUSSIAN_MEAN_FRACTIONS: [f64; 1] = [0.5];
+const DEFAULT_GAUSSIAN_STDDEV_FRACTIONS: [f64; 10] =
+    [1e-6, 1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 1e-2, 5e-2, 1e-1, 5e-1];
+const DEFAULT_LOOKUP_LIMIT: u64 = 5_000_000;
+const DEFAULT_SAMPLE_PERCENT: f64 = 0.1;
+const DEFAULT_MIN_SAMPLE_SIZE: usize = 1_024;
+const BIMODAL_LEFT_PEAK_FRAC: f64 = 0.25;
+const BIMODAL_RIGHT_PEAK_FRAC: f64 = 0.75;
+const MIN_BLOOM_EXPECTED_ITEMS: u64 = 64;
+const U32_KEY_SPACE_SIZE: u64 = u32::MAX as u64 + 1;
+const DEFAULT_NUM_KEYS: [u64; 10] = [
+    14_999,
+    49_999,
+    99_999,
+    499_999,
+    999_999,
+    4_999_999,
+    9_999_999,
+    49_999_999,
+    99_999_999,
+    999_999_999,
+];
+
+// Build and memory mostly care about how much work or storage Roaring pays per
+// touched 16-bit container, so these predictors stay intentionally simple and
+// depend primarily on estimated keys per touched window.
+const BUILD_ROARING_ESTIMATED_KEYS_PER_WINDOW_THRESHOLD: f64 = 4.0;
+const MEMORY_ROARING_ESTIMATED_KEYS_PER_WINDOW_THRESHOLD: f64 = 32.0;
+
+// roaring-rs switches array containers to bitmap containers around 4096 keys.
+// That transition materially changes lookup behavior, so the lookup predictor
+// treats it as a first-class boundary.
+const ROARING_BITMAP_CONTAINER_THRESHOLD: f64 = 4_096.0;
+
+// Lookup prediction is framed as a coarse cost proxy. If the estimated cost of
+// reaching and searching a Roaring container stays below this budget, predict
+// Roaring; otherwise predict Bloom.
+const LOOKUP_ROARING_WINDOW_PROBABILITY_THRESHOLD: f64 = 0.1;
+const LOOKUP_ROARING_BITMAP_WINDOW_PROBABILITY_PENALTY: f64 = 0.1;
+const LOOKUP_ROARING_ARRAY_WINDOW_PROBABILITY_PENALTY_BASE: f64 = 0.25;
+const LOOKUP_ROARING_ARRAY_WINDOW_PROBABILITY_PENALTY_PER_LOG2_KEY: f64 = 0.15;
+
+// Raw Chao1 fixes a real failure mode in sparse, very wide distributions, where
+// the old uniform estimator badly under-counted touched windows and therefore
+// over-predicted Roaring for random u32 lookups. Damping keeps that correction
+// from overreacting on samples with only a small amount of singleton noise.
+const TOUCHED_WINDOWS_CHAO1_DAMPING: f64 = 0.25;
+const U32_WINDOW_COUNT: usize = 1 << 16;
+
+fn main() {
+    let args = Args::parse();
+    let distributions = args.distributions();
+    let num_keys_list = args.num_keys();
+    let gaussian_means = args.gaussian_means();
+    let gaussian_stddevs = args.gaussian_stddevs();
+    args.validate(
+        &distributions,
+        &num_keys_list,
+        &gaussian_means,
+        &gaussian_stddevs,
+    );
+    let run_configs = build_run_configs(
+        &args,
+        &distributions,
+        &num_keys_list,
+        &gaussian_means,
+        &gaussian_stddevs,
+    );
+    let worker_threads = args.worker_threads(run_configs.len());
+
+    println!("benchmark=filter_predictor");
+    println!(
+        "distributions={}",
+        distributions
+            .iter()
+            .map(|distribution| distribution.as_str())
+            .collect::<Vec<_>>()
+            .join(",")
+    );
+    println!(
+        "num_keys={}",
+        num_keys_list
+            .iter()
+            .map(u64::to_string)
+            .collect::<Vec<_>>()
+            .join(",")
+    );
+    println!(
+        "gaussian_means={}",
+        gaussian_means
+            .iter()
+            .map(ToString::to_string)
+            .collect::<Vec<_>>()
+            .join(",")
+    );
+    println!(
+        "gaussian_stddevs={}",
+        gaussian_stddevs
+            .iter()
+            .map(ToString::to_string)
+            .collect::<Vec<_>>()
+            .join(",")
+    );
+    println!("repetitions={}", args.repetitions);
+    println!("distribution_seed={}", args.distribution_seed);
+    println!("sample_seed={}", args.sample_seed);
+    println!("lookup_seed={}", args.lookup_seed);
+    println!("threads={}", worker_threads);
+    println!("lookup_space={}", args.lookup_space.as_str());
+    println!(
+        "sample_size_override_percent={}",
+        option_f64(args.sample_size)
+    );
+    println!("lookup_count_override={}", option_u64(args.lookup_count));
+    println!(
+        "bloom_false_positive_rate={}",
+        args.bloom_false_positive_rate
+    );
+    println!("bloom_seed={}", args.bloom_seed);
+    println!(
+        "bloom_expected_items_override={}",
+        option_u64(args.bloom_expected_items)
+    );
+    println!("csv_output={}", args.csv_output.display());
+    println!();
+
+    let rows = execute_runs(&args, &run_configs, worker_threads);
+
+    let csv_file = File::create(&args.csv_output)
+        .unwrap_or_else(|error| panic!("failed to create {}: {error}", args.csv_output.display()));
+    let mut csv_writer = Writer::from_writer(csv_file);
+    for row in &rows {
+        print_run_report(row);
+        csv_writer
+            .serialize(row)
+            .expect("failed to write filter predictor CSV row");
+    }
+    csv_writer
+        .flush()
+        .expect("failed to flush filter predictor CSV");
+
+    let accuracy = summarize_accuracy(&rows);
+    print_summary(&rows, &accuracy);
+}
+
+#[derive(Parser, Debug, Clone)]
+#[command(name = "filter_predictor")]
+#[command(about = "Benchmark a simple roaring-vs-bloom predictor on gaussian u32 keysets")]
+struct Args {
+    /// Comma-separated key counts. Underscores and `u32::MAX` are accepted.
+    #[arg(long, value_name = "CSV")]
+    num_keys: Option<String>,
+
+    /// Comma-separated distribution families to run.
+    /// Supported values: `gaussian`, `consecutive`, `round_robin_window`,
+    /// `bimodal`, `exponential`.
+    #[arg(long, value_name = "CSV")]
+    distributions: Option<String>,
+
+    /// Gaussian mean values expressed as fractions of `u32::MAX`.
+    /// Only used by the `gaussian` distribution family.
+    #[arg(long, value_name = "CSV")]
+    gaussian_means: Option<String>,
+
+    /// Spread parameters expressed as fractions of `u32::MAX`.
+    /// Used as:
+    /// - gaussian standard deviation for `gaussian`
+    /// - per-peak standard deviation for `bimodal`
+    /// - exponential scale for `exponential`
+    #[arg(long, value_name = "CSV")]
+    gaussian_stddevs: Option<String>,
+
+    /// Number of repeated runs per `(num_keys, mean, stddev)` configuration.
+    #[arg(long, default_value_t = 3)]
+    repetitions: usize,
+
+    /// Number of benchmark configurations to run concurrently.
+    /// `1` keeps runs sequential.
+    #[arg(long, default_value_t = 1)]
+    threads: usize,
+
+    /// Lookup workload.
+    /// `present` samples only keys from the batch.
+    /// `full_u32` samples random u32 keys from the full domain.
+    #[arg(long, value_enum, default_value_t = LookupSpace::FullU32)]
+    lookup_space: LookupSpace,
+
+    /// Number of lookups to benchmark per run.
+    /// Defaults to `min(num_keys, 5_000_000)` for `present` and `5_000_000`
+    /// for `full_u32`.
+    #[arg(long)]
+    lookup_count: Option<u64>,
+
+    /// Predictor sample size as a percentage of the batch.
+    /// For example, `0.1` samples 0.1% of the keys.
+    #[arg(long)]
+    sample_size: Option<f64>,
+
+    /// Seed for gaussian key generation.
+    #[arg(long, default_value_t = 0)]
+    distribution_seed: u64,
+
+    /// Seed for the predictor's internal sampling pass.
+    #[arg(long, default_value_t = 1)]
+    sample_seed: u64,
+
+    /// Seed for randomized successful lookups.
+    #[arg(long, default_value_t = 2)]
+    lookup_seed: u64,
+
+    /// Bloom filter false-positive rate.
+    #[arg(long, default_value_t = BLOOM_FILTER_FALSE_POSITIVE_RATE)]
+    bloom_false_positive_rate: f64,
+
+    /// Bloom filter seed.
+    #[arg(long, default_value_t = DEFAULT_BLOOM_SEED)]
+    bloom_seed: u128,
+
+    /// Expected items passed to the bloom filter builder.
+    #[arg(long)]
+    bloom_expected_items: Option<u64>,
+
+    /// Output CSV path.
+    #[arg(long, default_value = "filter_predictor.csv")]
+    csv_output: PathBuf,
+
+    #[doc(hidden)]
+    #[arg(long = "bench", hide = true)]
+    __bench: bool,
+}
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, ValueEnum)]
+enum LookupSpace {
+    Present,
+    FullU32,
+}
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, ValueEnum)]
+enum DistributionKind {
+    Gaussian,
+    Consecutive,
+    RoundRobinWindow,
+    Bimodal,
+    Exponential,
+}
+
+impl DistributionKind {
+    fn as_str(self) -> &'static str {
+        match self {
+            Self::Gaussian => "gaussian",
+            Self::Consecutive => "consecutive",
+            Self::RoundRobinWindow => "round_robin_window",
+            Self::Bimodal => "bimodal",
+            Self::Exponential => "exponential",
+        }
+    }
+
+    fn uses_gaussian_mean(self) -> bool {
+        matches!(self, Self::Gaussian)
+    }
+
+    fn uses_spread_param(self) -> bool {
+        matches!(self, Self::Gaussian | Self::Bimodal | Self::Exponential)
+    }
+}
+
+const DEFAULT_DISTRIBUTIONS: [DistributionKind; 5] = [
+    DistributionKind::Gaussian,
+    DistributionKind::Consecutive,
+    DistributionKind::RoundRobinWindow,
+    DistributionKind::Bimodal,
+    DistributionKind::Exponential,
+];
+
+impl LookupSpace {
+    fn as_str(self) -> &'static str {
+        match self {
+            Self::Present => "present",
+            Self::FullU32 => "full_u32",
+        }
+    }
+}
+
+impl Args {
+    fn distributions(&self) -> Vec<DistributionKind> {
+        match &self.distributions {
+            Some(csv) => parse_distribution_csv(csv),
+            None => DEFAULT_DISTRIBUTIONS.to_vec(),
+        }
+    }
+
+    fn num_keys(&self) -> Vec<u64> {
+        match &self.num_keys {
+            Some(csv) => parse_u64_csv(csv),
+            None => DEFAULT_NUM_KEYS.to_vec(),
+        }
+    }
+
+    fn gaussian_means(&self) -> Vec<f64> {
+        match &self.gaussian_means {
+            Some(csv) => parse_f64_csv(csv, "--gaussian-means"),
+            None => DEFAULT_GAUSSIAN_MEAN_FRACTIONS.to_vec(),
+        }
+    }
+
+    fn gaussian_stddevs(&self) -> Vec<f64> {
+        match &self.gaussian_stddevs {
+            Some(csv) => parse_f64_csv(csv, "--gaussian-stddevs"),
+            None => DEFAULT_GAUSSIAN_STDDEV_FRACTIONS.to_vec(),
+        }
+    }
+
+    fn lookup_count_for(&self, num_keys: u64) -> u64 {
+        self.lookup_count
+            .map(|lookup_count| match self.lookup_space {
+                LookupSpace::Present => lookup_count.min(num_keys),
+                LookupSpace::FullU32 => lookup_count,
+            })
+            .unwrap_or(match self.lookup_space {
+                LookupSpace::Present => num_keys.min(DEFAULT_LOOKUP_LIMIT),
+                LookupSpace::FullU32 => DEFAULT_LOOKUP_LIMIT,
+            })
+    }
+
+    fn sample_size_for(&self, num_keys: u64) -> usize {
+        match self.sample_size {
+            Some(sample_percent) => sample_count_from_percent(num_keys, sample_percent, 1),
+            None => default_sample_size(num_keys),
+        }
+    }
+
+    fn worker_threads(&self, run_count: usize) -> usize {
+        self.threads.max(1).min(run_count.max(1))
+    }
+
+    fn validate(
+        &self,
+        distributions: &[DistributionKind],
+        num_keys_list: &[u64],
+        gaussian_means: &[f64],
+        gaussian_stddevs: &[f64],
+    ) {
+        assert!(
+            !distributions.is_empty(),
+            "--distributions must select at least one family"
+        );
+        assert!(
+            !num_keys_list.is_empty(),
+            "--num-keys must select at least one size"
+        );
+        if distributions
+            .iter()
+            .copied()
+            .any(DistributionKind::uses_gaussian_mean)
+        {
+            assert!(
+                !gaussian_means.is_empty(),
+                "--gaussian-means must select at least one value when gaussian is enabled"
+            );
+        }
+        if distributions
+            .iter()
+            .copied()
+            .any(DistributionKind::uses_spread_param)
+        {
+            assert!(
+                !gaussian_stddevs.is_empty(),
+                "--gaussian-stddevs must select at least one value when gaussian, bimodal, or exponential is enabled"
+            );
+        }
+        assert!(
+            self.repetitions > 0,
+            "--repetitions must be greater than zero"
+        );
+        assert!(self.threads > 0, "--threads must be greater than zero");
+        assert!(
+            self.bloom_false_positive_rate > 0.0 && self.bloom_false_positive_rate < 1.0,
+            "--bloom-false-positive-rate must be between 0 and 1"
+        );
+
+        for &num_keys in num_keys_list {
+            assert!(num_keys > 0, "--num-keys values must be greater than zero");
+            assert!(
+                num_keys <= U32_KEY_SPACE_SIZE,
+                "--num-keys values must be <= {}",
+                U32_KEY_SPACE_SIZE
+            );
+        }
+        for &gaussian_mean in gaussian_means {
+            assert!(
+                gaussian_mean.is_finite() && (0.0..=1.0).contains(&gaussian_mean),
+                "--gaussian-means values must be finite fractions in [0, 1]"
+            );
+        }
+        for &gaussian_stddev in gaussian_stddevs {
+            assert!(
+                gaussian_stddev.is_finite() && gaussian_stddev > 0.0,
+                "--gaussian-stddevs values must be finite and greater than zero"
+            );
+        }
+        if let Some(sample_percent) = self.sample_size {
+            assert!(
+                sample_percent.is_finite() && sample_percent > 0.0 && sample_percent <= 100.0,
+                "--sample-size must be a finite percentage in (0, 100]"
+            );
+        }
+        if let Some(lookup_count) = self.lookup_count {
+            assert!(lookup_count > 0, "--lookup-count must be greater than zero");
+        }
+        if let Some(bloom_expected_items) = self.bloom_expected_items {
+            assert!(
+                bloom_expected_items > 0,
+                "--bloom-expected-items must be greater than zero"
+            );
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+struct GaussianDistribution {
+    mean_frac: f64,
+    stddev_frac: f64,
+}
+
+impl GaussianDistribution {
+    fn mean_value(self) -> f64 {
+        self.mean_frac * u32::MAX as f64
+    }
+
+    fn stddev_value(self) -> f64 {
+        self.stddev_frac * u32::MAX as f64
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+enum DistributionSpec {
+    Gaussian(GaussianDistribution),
+    Consecutive,
+    RoundRobinWindow,
+    Bimodal { stddev_frac: f64 },
+    Exponential { scale_frac: f64 },
+}
+
+impl DistributionSpec {
+    fn as_str(self) -> &'static str {
+        match self {
+            Self::Gaussian(_) => "gaussian",
+            Self::Consecutive => "consecutive",
+            Self::RoundRobinWindow => "round_robin_window",
+            Self::Bimodal { .. } => "bimodal",
+            Self::Exponential { .. } => "exponential",
+        }
+    }
+
+    fn parameter_name(self) -> &'static str {
+        match self {
+            Self::Gaussian(_) => "stddev_frac",
+            Self::Bimodal { .. } => "stddev_frac",
+            Self::Exponential { .. } => "scale_frac",
+            Self::Consecutive | Self::RoundRobinWindow => "none",
+        }
+    }
+
+    fn parameter_frac(self) -> Option<f64> {
+        match self {
+            Self::Gaussian(distribution) => Some(distribution.stddev_frac),
+            Self::Bimodal { stddev_frac } => Some(stddev_frac),
+            Self::Exponential { scale_frac } => Some(scale_frac),
+            Self::Consecutive | Self::RoundRobinWindow => None,
+        }
+    }
+
+    fn parameter_value(self) -> Option<f64> {
+        match self {
+            Self::Gaussian(distribution) => Some(distribution.stddev_value()),
+            Self::Bimodal { stddev_frac } => Some(stddev_frac * u32::MAX as f64),
+            Self::Exponential { scale_frac } => Some(scale_frac * u32::MAX as f64),
+            Self::Consecutive | Self::RoundRobinWindow => None,
+        }
+    }
+
+    fn gaussian_mean_frac(self) -> Option<f64> {
+        match self {
+            Self::Gaussian(distribution) => Some(distribution.mean_frac),
+            Self::Consecutive
+            | Self::RoundRobinWindow
+            | Self::Bimodal { .. }
+            | Self::Exponential { .. } => None,
+        }
+    }
+
+    fn gaussian_mean_value(self) -> Option<f64> {
+        match self {
+            Self::Gaussian(distribution) => Some(distribution.mean_value()),
+            Self::Consecutive
+            | Self::RoundRobinWindow
+            | Self::Bimodal { .. }
+            | Self::Exponential { .. } => None,
+        }
+    }
+
+    fn gaussian_stddev_frac(self) -> Option<f64> {
+        match self {
+            Self::Gaussian(distribution) => Some(distribution.stddev_frac),
+            Self::Consecutive
+            | Self::RoundRobinWindow
+            | Self::Bimodal { .. }
+            | Self::Exponential { .. } => None,
+        }
+    }
+
+    fn gaussian_stddev_value(self) -> Option<f64> {
+        match self {
+            Self::Gaussian(distribution) => Some(distribution.stddev_value()),
+            Self::Consecutive
+            | Self::RoundRobinWindow
+            | Self::Bimodal { .. }
+            | Self::Exponential { .. } => None,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+struct RunConfig {
+    run_index: usize,
+    num_keys: u64,
+    distribution: DistributionSpec,
+    repetition: usize,
+    distribution_seed: u64,
+    sample_seed: u64,
+    lookup_seed: u64,
+}
+
+fn build_run_configs(
+    args: &Args,
+    distributions: &[DistributionKind],
+    num_keys_list: &[u64],
+    gaussian_means: &[f64],
+    gaussian_stddevs: &[f64],
+) -> Vec<RunConfig> {
+    let mut run_configs = Vec::new();
+
+    for &num_keys in num_keys_list {
+        for &distribution_kind in distributions {
+            match distribution_kind {
+                DistributionKind::Gaussian => {
+                    for &gaussian_mean_frac in gaussian_means {
+                        for &gaussian_stddev_frac in gaussian_stddevs {
+                            let distribution = DistributionSpec::Gaussian(GaussianDistribution {
+                                mean_frac: gaussian_mean_frac,
+                                stddev_frac: gaussian_stddev_frac,
+                            });
+                            push_run_configs(&mut run_configs, args, num_keys, distribution);
+                        }
+                    }
+                }
+                DistributionKind::Consecutive => {
+                    push_run_configs(
+                        &mut run_configs,
+                        args,
+                        num_keys,
+                        DistributionSpec::Consecutive,
+                    );
+                }
+                DistributionKind::RoundRobinWindow => {
+                    push_run_configs(
+                        &mut run_configs,
+                        args,
+                        num_keys,
+                        DistributionSpec::RoundRobinWindow,
+                    );
+                }
+                DistributionKind::Bimodal => {
+                    for &stddev_frac in gaussian_stddevs {
+                        push_run_configs(
+                            &mut run_configs,
+                            args,
+                            num_keys,
+                            DistributionSpec::Bimodal { stddev_frac },
+                        );
+                    }
+                }
+                DistributionKind::Exponential => {
+                    for &scale_frac in gaussian_stddevs {
+                        push_run_configs(
+                            &mut run_configs,
+                            args,
+                            num_keys,
+                            DistributionSpec::Exponential { scale_frac },
+                        );
+                    }
+                }
+            }
+        }
+    }
+
+    run_configs
+}
+
+fn push_run_configs(
+    run_configs: &mut Vec<RunConfig>,
+    args: &Args,
+    num_keys: u64,
+    distribution: DistributionSpec,
+) {
+    for repetition in 0..args.repetitions {
+        run_configs.push(RunConfig {
+            run_index: run_configs.len(),
+            num_keys,
+            distribution,
+            repetition,
+            distribution_seed: args.distribution_seed.wrapping_add(repetition as u64),
+            sample_seed: args.sample_seed.wrapping_add(repetition as u64),
+            lookup_seed: args.lookup_seed.wrapping_add(repetition as u64),
+        });
+    }
+}
+
+fn execute_runs(args: &Args, run_configs: &[RunConfig], worker_threads: usize) -> Vec<CsvRow> {
+    if worker_threads <= 1 {
+        return run_configs
+            .iter()
+            .copied()
+            .map(|run_config| run_single_config(args, run_config))
+            .collect();
+    }
+
+    let next_index = AtomicUsize::new(0);
+    let (tx, rx) = mpsc::channel::<(usize, CsvRow)>();
+
+    thread::scope(|scope| {
+        for _ in 0..worker_threads {
+            let tx = tx.clone();
+            let next_index = &next_index;
+            let run_configs = run_configs;
+            let args = args;
+            scope.spawn(move || {
+                loop {
+                    let task_index = next_index.fetch_add(1, Ordering::Relaxed);
+                    if task_index >= run_configs.len() {
+                        break;
+                    }
+
+                    let run_config = run_configs[task_index];
+                    let row = run_single_config(args, run_config);
+                    tx.send((run_config.run_index, row))
+                        .expect("result receiver dropped unexpectedly");
+                }
+            });
+        }
+
+        drop(tx);
+
+        let mut rows_by_index: Vec<Option<CsvRow>> = std::iter::repeat_with(|| None)
+            .take(run_configs.len())
+            .collect();
+        for (run_index, row) in rx {
+            rows_by_index[run_index] = Some(row);
+        }
+
+        rows_by_index
+            .into_iter()
+            .map(|row| row.expect("missing benchmark row"))
+            .collect()
+    })
+}
+
+fn run_single_config(args: &Args, run_config: RunConfig) -> CsvRow {
+    let generated_keys = generate_keys(
+        run_config.num_keys,
+        run_config.distribution,
+        run_config.distribution_seed,
+    );
+    let batch = GeneratedBatch::new(generated_keys, run_config.sample_seed);
+    let lookup_count = args.lookup_count_for(run_config.num_keys);
+    let sample_size = args.sample_size_for(run_config.num_keys);
+    let sample_percent_of_batch = sample_size as f64 / run_config.num_keys as f64 * 100.0;
+    let bloom_expected_items = args
+        .bloom_expected_items
+        .unwrap_or(run_config.num_keys)
+        .max(MIN_BLOOM_EXPECTED_ITEMS);
+
+    let predictor_stats = estimate_roaring_sample_stats(&batch, sample_size)
+        .expect("predictor sample should not be empty");
+    let prediction = predict_filter_winner(&predictor_stats);
+
+    let bloom = benchmark_bloom(
+        batch.keys(),
+        lookup_count,
+        run_config.lookup_seed,
+        args.lookup_space,
+        bloom_expected_items,
+        args.bloom_false_positive_rate,
+        args.bloom_seed,
+    );
+    let roaring = benchmark_roaring(
+        batch.keys(),
+        lookup_count,
+        run_config.lookup_seed,
+        args.lookup_space,
+    );
+
+    let build_actual = actual_winner(bloom.build_ns_per_element, roaring.build_ns_per_element);
+    let lookup_actual = actual_winner(bloom.lookup_ns_per_element, roaring.lookup_ns_per_element);
+    let memory_actual = actual_winner(bloom.bytes_used as f64, roaring.bytes_used as f64);
+
+    let build_prediction_correct = prediction.build_winner == build_actual;
+    let lookup_prediction_correct = prediction.lookup_winner == lookup_actual;
+    let memory_prediction_correct = prediction.memory_winner == memory_actual;
+
+    CsvRow {
+        num_keys: run_config.num_keys,
+        distribution: run_config.distribution.as_str(),
+        distribution_param_name: run_config.distribution.parameter_name(),
+        distribution_param_frac: run_config.distribution.parameter_frac(),
+        distribution_param_value: run_config.distribution.parameter_value(),
+        gaussian_mean_frac: run_config.distribution.gaussian_mean_frac(),
+        gaussian_mean: run_config.distribution.gaussian_mean_value(),
+        gaussian_stddev_frac: run_config.distribution.gaussian_stddev_frac(),
+        gaussian_stddev: run_config.distribution.gaussian_stddev_value(),
+        repetition: run_config.repetition,
+        distribution_seed: run_config.distribution_seed,
+        sample_seed: run_config.sample_seed,
+        lookup_seed: run_config.lookup_seed,
+        lookup_space: args.lookup_space.as_str(),
+        lookup_count,
+        sample_size,
+        sample_percent_of_batch,
+        sample_fraction: sample_size as f64 / run_config.num_keys as f64,
+        bloom_false_positive_rate_target_percent: args.bloom_false_positive_rate * 100.0,
+        bloom_seed: args.bloom_seed,
+        bloom_expected_items,
+        predictor_sampled_keys: predictor_stats.sampled_keys,
+        predictor_distinct_windows: predictor_stats.distinct_windows,
+        predictor_avg_sample_keys_per_window: predictor_stats.avg_sample_keys_per_window,
+        predictor_same_window_rate: predictor_stats.same_window_rate,
+        predictor_estimated_keys_per_window: predictor_stats.estimated_keys_per_window,
+        predictor_estimated_touched_windows: predictor_stats.estimated_touched_windows,
+        predictor_estimated_window_fill_ratio: predictor_stats.estimated_window_fill_ratio,
+        predictor_density_score: prediction.density_score,
+        predictor_build_score: prediction.build_score,
+        predictor_lookup_score: prediction.lookup_score,
+        predictor_memory_score: prediction.memory_score,
+        predicted_build_winner: prediction.build_winner.as_str(),
+        predicted_lookup_winner: prediction.lookup_winner.as_str(),
+        predicted_memory_winner: prediction.memory_winner.as_str(),
+        bloom_build_ns_per_element: bloom.build_ns_per_element,
+        roaring_build_ns_per_element: roaring.build_ns_per_element,
+        build_ratio_bloom_over_roaring: bloom.build_ns_per_element / roaring.build_ns_per_element,
+        actual_build_winner: build_actual.as_str(),
+        build_prediction_correct,
+        bloom_lookup_ns_per_element: bloom.lookup_ns_per_element,
+        bloom_lookup_hits: bloom.lookup_hits,
+        bloom_lookup_hit_rate_percent: bloom.lookup_hits as f64 / lookup_count as f64 * 100.0,
+        roaring_lookup_ns_per_element: roaring.lookup_ns_per_element,
+        roaring_lookup_hits: roaring.lookup_hits,
+        roaring_lookup_hit_rate_percent: roaring.lookup_hits as f64 / lookup_count as f64 * 100.0,
+        lookup_ratio_bloom_over_roaring: bloom.lookup_ns_per_element
+            / roaring.lookup_ns_per_element,
+        actual_lookup_winner: lookup_actual.as_str(),
+        lookup_prediction_correct,
+        bloom_bytes_used: bloom.bytes_used,
+        roaring_bytes_used: roaring.bytes_used,
+        memory_ratio_bloom_over_roaring: bloom.bytes_used as f64 / roaring.bytes_used as f64,
+        actual_memory_winner: memory_actual.as_str(),
+        memory_prediction_correct,
+    }
+}
+
+#[derive(Debug, Clone)]
+struct GeneratedBatch {
+    keys: Vec<u32>,
+    sample_seed: u64,
+}
+
+impl GeneratedBatch {
+    fn new(keys: Vec<u32>, sample_seed: u64) -> Self {
+        Self { keys, sample_seed }
+    }
+
+    fn keys(&self) -> &[u32] {
+        &self.keys
+    }
+}
+
+/// Minimal trait matching the predictor sketch.
+pub trait SampleKeys {
+    fn sample_keys(&self, n: usize) -> Vec<u32>;
+    fn key_count(&self) -> usize;
+}
+
+impl SampleKeys for GeneratedBatch {
+    fn sample_keys(&self, n: usize) -> Vec<u32> {
+        if self.keys.is_empty() {
+            return Vec::new();
+        }
+        if n >= self.keys.len() {
+            return self.keys.clone();
+        }
+
+        let mut rng = ChaCha8Rng::seed_from_u64(self.sample_seed);
+        let mut indexes = sample(&mut rng, self.keys.len(), n).into_vec();
+        indexes.sort_unstable();
+        indexes.into_iter().map(|index| self.keys[index]).collect()
+    }
+
+    fn key_count(&self) -> usize {
+        self.keys.len()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct RoaringSampleStats {
+    /// Number of keys in the batch.
+    pub batch_keys: usize,
+
+    /// Number of sampled keys actually returned.
+    pub sampled_keys: usize,
+
+    /// Fraction of the batch included in the sample.
+    pub sample_fraction: f64,
+
+    /// Number of distinct 16-bit windows (containers) touched by the sample.
+    pub distinct_windows: usize,
+
+    /// Average number of sampled keys per touched window.
+    pub avg_sample_keys_per_window: f64,
+
+    /// Fraction of adjacent sampled keys that stay in the same 2^16 window.
+    pub same_window_rate: f64,
+
+    /// Estimated number of real keys per touched 16-bit window after
+    /// rescaling by the sample fraction.
+    pub estimated_keys_per_window: f64,
+
+    /// Estimated number of distinct 16-bit windows touched by the full batch.
+    pub estimated_touched_windows: f64,
+
+    /// Estimated occupancy of a touched window, normalized by 2^16.
+    pub estimated_window_fill_ratio: f64,
+}
+
+/// Estimate Roaring-friendly batch structure from a small sample of keys.
+///
+/// The estimator deliberately works in two layers:
+/// 1. Sample `n` keys from the batch.
+/// 2. Sort and dedup them so adjacency and per-window counts are stable.
+/// 3. Bucket sampled keys by their high 16 bits, which matches Roaring's
+///    top-level `u32` container layout.
+/// 4. Compute sample-level statistics such as:
+///    - sampled keys
+///    - distinct touched windows
+///    - average sampled keys per touched window
+///    - adjacent-key same-window rate
+/// 5. Rescale the sampled keys/window estimate by the sample fraction so large
+///    dense batches do not look artificially sparse just because only a small
+///    fraction of the batch was sampled.
+/// 6. Estimate the full-batch touched-window count by combining:
+///    - a uniform occupancy estimate, which works well when keys are spread
+///      fairly evenly across windows
+///    - a damped Chao1 correction, which helps when the sample is dominated by
+///      singleton windows and the uniform estimate would under-count unseen
+///      windows in sparse, wide distributions
+/// 7. Derive the normalized window fill ratio from the estimated keys/window.
+///
+/// Example:
+/// - Suppose the batch contains `10_000` keys and we sample `1_000`.
+/// - After sorting and deduping we still have `1_000` sampled keys, so the
+///   sample fraction is `0.1`.
+/// - If those sampled keys touch `50` distinct 16-bit windows, then the sample
+///   average is `1_000 / 50 = 20` sampled keys per touched window.
+/// - Rescaling by the sample fraction gives an estimated
+///   `20 / 0.1 = 200` real keys per touched window.
+/// - If most sampled windows are singletons, the Chao1-style correction will
+///   push the touched-window estimate above the uniform estimate because the
+///   sample is likely missing many windows entirely.
+/// - If the sample instead shows repeated hits in the same windows, the uniform
+///   estimate tends to dominate and the batch looks more Roaring-friendly.
+pub fn estimate_roaring_sample_stats<B: SampleKeys>(
+    batch: &B,
+    n: usize,
+) -> Option<RoaringSampleStats> {
+    if n == 0 {
+        return None;
+    }
+
+    let batch_keys = batch.key_count();
+    if batch_keys == 0 {
+        return None;
+    }
+
+    let mut keys = batch.sample_keys(n);
+    if keys.is_empty() {
+        return None;
+    }
+
+    // Make adjacent-key and per-window statistics deterministic even if the
+    // caller samples in arbitrary order.
+    keys.sort_unstable();
+    keys.dedup();
+
+    let sampled_keys = keys.len();
+    if sampled_keys == 0 {
+        return None;
+    }
+
+    let mut per_window: HashMap<u16, usize> = HashMap::new();
+    for &key in &keys {
+        let window = (key >> 16) as u16;
+        *per_window.entry(window).or_insert(0) += 1;
+    }
+
+    let distinct_windows = per_window.len();
+    let sample_fraction = sampled_keys as f64 / batch_keys as f64;
+    let avg_sample_keys_per_window = sampled_keys as f64 / distinct_windows as f64;
+    let same_window_rate = if sampled_keys > 1 {
+        (sampled_keys - distinct_windows) as f64 / (sampled_keys - 1) as f64
+    } else {
+        0.0
+    };
+    // The sampled average keys/window shrinks as batches get larger unless we
+    // scale it back up by the sample fraction. Without this rescaling, large
+    // but dense batches look artificially sparse and the predictor incorrectly
+    // drifts toward Bloom.
+    let estimated_keys_per_window = if sample_fraction > 0.0 {
+        (avg_sample_keys_per_window / sample_fraction).min(65_536.0)
+    } else {
+        0.0
+    };
+    // Sparse, wide samples often show up as many singleton windows and very few
+    // doubletons. Those counts are exactly what the Chao1-style correction uses
+    // to estimate how many touched windows the sample likely missed entirely.
+    let sample_singleton_windows = per_window.values().filter(|&&count| count == 1).count();
+    let sample_doubleton_windows = per_window.values().filter(|&&count| count == 2).count();
+    let estimated_touched_windows = estimate_touched_windows(
+        batch_keys,
+        sampled_keys,
+        distinct_windows,
+        sample_singleton_windows,
+        sample_doubleton_windows,
+    );
+    let estimated_window_fill_ratio = estimated_keys_per_window / 65_536.0;
+
+    Some(RoaringSampleStats {
+        batch_keys,
+        sampled_keys,
+        sample_fraction,
+        distinct_windows,
+        avg_sample_keys_per_window,
+        same_window_rate,
+        estimated_keys_per_window,
+        estimated_touched_windows,
+        estimated_window_fill_ratio,
+    })
+}
+
+/// Estimate how many distinct 16-bit Roaring windows the full batch touches.
+///
+/// This function combines two signals:
+/// 1. A uniform occupancy estimate that works well when touched windows are
+///    fairly evenly populated.
+/// 2. A Chao1-style unseen-window estimate that reacts when the sample is full
+///    of singleton windows and therefore likely missing many windows entirely.
+///
+/// Example:
+/// - Suppose a batch of `10_000` keys is sampled down to `1_000` keys.
+/// - The sample touches `50` distinct windows.
+/// - If many of those `50` windows only appear once in the sample, that is a
+///   hint that the sample is only seeing the tip of a much wider distribution.
+/// - The uniform estimate might still say "roughly 70 windows total", while
+///   Chao1 might say "closer to 200 windows total".
+/// - We blend the two so sparse wide batches move upward, but not so far that
+///   a little singleton noise completely dominates the estimate.
+///
+/// This blend exists because the original uniform-only estimator was the main
+/// reason the predictor failed on wide Gaussians: it under-counted touched
+/// windows, which made random full-u32 Roaring lookups appear cheaper than
+/// they really were.
+fn estimate_touched_windows(
+    batch_keys: usize,
+    sampled_keys: usize,
+    distinct_windows: usize,
+    sample_singleton_windows: usize,
+    sample_doubleton_windows: usize,
+) -> f64 {
+    if batch_keys == 0 || sampled_keys == 0 || distinct_windows == 0 {
+        return 0.0;
+    }
+    if sampled_keys >= batch_keys {
+        return distinct_windows as f64;
+    }
+
+    let uniform_estimate =
+        estimate_uniform_touched_windows(batch_keys, sampled_keys, distinct_windows);
+    let chao1_estimate = estimate_chao1_touched_windows(
+        distinct_windows,
+        sample_singleton_windows,
+        sample_doubleton_windows,
+    );
+
+    // The original uniform estimate works well when occupancy is reasonably
+    // even, but it collapses badly on sparse wide Gaussians: it can turn a
+    // singleton-heavy sample into only ~1k touched windows, which then makes
+    // random full-u32 lookups look far more Roaring-friendly than they are.
+    // Blend in a damped Chao1 correction so unseen windows move the estimate in
+    // the right direction without letting Chao1 dominate every noisy sample.
+    arithmetic_blend(
+        uniform_estimate,
+        chao1_estimate,
+        TOUCHED_WINDOWS_CHAO1_DAMPING,
+    )
+}
+
+/// Estimate touched windows under a "roughly uniform occupancy" assumption.
+///
+/// Intuition:
+/// - Assume the full batch touches `W` windows and spreads keys across them
+///   fairly evenly.
+/// - Given the sample fraction, solve for the `W` that would yield the observed
+///   sampled distinct-window count.
+///
+/// Example:
+/// - If a `10_000`-key batch is sampled at `10%`, and the sample sees `50`
+///   distinct windows, this function asks:
+///   "For what total window count would a 10% sample be expected to see about
+///   50 windows?"
+/// - It binary-searches that answer between the sampled distinct count and the
+///   theoretical maximum number of windows.
+///
+/// This is the baseline estimator because it behaves sensibly on compact or
+/// moderately regular distributions. It falls apart on sparse wide batches,
+/// where many windows are touched so rarely that the sample never sees them.
+fn estimate_uniform_touched_windows(
+    batch_keys: usize,
+    sampled_keys: usize,
+    distinct_windows: usize,
+) -> f64 {
+    if batch_keys == 0 || sampled_keys == 0 || distinct_windows == 0 {
+        return 0.0;
+    }
+    if sampled_keys >= batch_keys {
+        return distinct_windows as f64;
+    }
+
+    // This model assumes touched windows are roughly uniform and solves for the
+    // total window count that would yield the observed sampled distinct window
+    // count. It is a good baseline, but it systematically underestimates very
+    // sparse wide batches because those batches have many unseen windows.
+    let sample_fraction = sampled_keys as f64 / batch_keys as f64;
+    let mut low = distinct_windows as f64;
+    let mut high = batch_keys.min(U32_WINDOW_COUNT) as f64;
+
+    if low >= high {
+        return low;
+    }
+
+    let log_unseen = (-sample_fraction).ln_1p();
+    for _ in 0..100 {
+        let mid = (low + high) * 0.5;
+        let avg_keys_per_window = batch_keys as f64 / mid;
+        let observed_windows = mid * (1.0 - (avg_keys_per_window * log_unseen).exp());
+
+        if observed_windows < distinct_windows as f64 {
+            low = mid;
+        } else {
+            high = mid;
+        }
+    }
+
+    high
+}
+
+/// Estimate touched windows with a Chao1-style unseen-species correction.
+///
+/// Here the "species" are touched 16-bit windows:
+/// - `distinct_windows` is how many windows the sample observed
+/// - `sample_singleton_windows` counts windows seen exactly once
+/// - `sample_doubleton_windows` counts windows seen exactly twice
+///
+/// Example:
+/// - If a sample touches `50` windows, with `35` singletons and `2`
+///   doubletons, that pattern is strong evidence that many windows were missed
+///   entirely.
+/// - Chao1 turns that singleton-heavy shape into a larger touched-window
+///   estimate than the uniform model would produce.
+///
+/// Raw Chao1 is intentionally not used directly in the final predictor because
+/// it can overreact when `f2` is tiny. We still compute it here because it is
+/// the right directional correction for the sparse-wide failure mode.
+fn estimate_chao1_touched_windows(
+    distinct_windows: usize,
+    sample_singleton_windows: usize,
+    sample_doubleton_windows: usize,
+) -> f64 {
+    // Chao1 is a classic unseen-species estimator. Here the "species" are
+    // touched 16-bit windows, and singleton-heavy samples are evidence that the
+    // sample missed many windows entirely.
+    let chao1_estimate = if sample_doubleton_windows > 0 {
+        distinct_windows as f64
+            + (sample_singleton_windows * sample_singleton_windows) as f64
+                / (2.0 * sample_doubleton_windows as f64)
+    } else {
+        distinct_windows as f64
+            + (sample_singleton_windows.saturating_mul(sample_singleton_windows.saturating_sub(1))
+                / 2) as f64
+    };
+
+    chao1_estimate
+        .max(distinct_windows as f64)
+        .min(U32_WINDOW_COUNT as f64)
+}
+
+fn arithmetic_blend(current: f64, chao1: f64, alpha: f64) -> f64 {
+    // Raw Chao1 reacts strongly to singleton-heavy samples, which is useful for
+    // sparse wide batches but too aggressive to use directly. Blend it toward
+    // the previous uniform estimate so the predictor only partially trusts the
+    // unseen-window correction.
+    current + alpha * (chao1 - current)
+}
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq)]
+enum Winner {
+    Bloom,
+    Roaring,
+}
+
+impl Winner {
+    fn as_str(self) -> &'static str {
+        match self {
+            Self::Bloom => "bloom",
+            Self::Roaring => "roaring",
+        }
+    }
+}
+
+impl Display for Winner {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+struct PredictorOutput {
+    density_score: f64,
+    build_score: f64,
+    lookup_score: f64,
+    memory_score: f64,
+    build_winner: Winner,
+    lookup_winner: Winner,
+    memory_winner: Winner,
+}
+
+/// Convert sampled structural estimates into coarse Bloom-vs-Roaring winners.
+///
+/// The predictor intentionally uses different signals for different metrics:
+/// - build: mostly "how many keys end up in each touched window?"
+/// - memory: same question, but with a higher density threshold
+/// - lookup: "how often does a random probe reach a touched window, and how
+///   expensive is that container likely to be once it does?"
+///
+/// Example:
+/// - Suppose a batch looks dense after sampling, with many keys per touched
+///   window and only a small touched-window fraction. That usually pushes all
+///   three metrics toward Roaring.
+/// - Suppose instead the batch is spread across a large fraction of the 16-bit
+///   windows and each window only has a modest number of keys. That is the
+///   "many sparse array containers" regime where lookup can flip toward Bloom.
+///
+/// The lookup path is where most of the iterations happened:
+/// - using only keys/window missed sparse-wide cases
+/// - using touched windows without normalizing them was the wrong shape
+/// - using a flat array penalty missed that `ArrayStore::contains()` gets
+///   slower as array containers grow
+///
+/// The current formula keeps the model simple while preserving those learned
+/// corrections from the benchmark runs.
+fn predict_filter_winner(stats: &RoaringSampleStats) -> PredictorOutput {
+    let density_score = stats.estimated_window_fill_ratio;
+    // Build and memory stay as simple density rules: if touched windows are
+    // dense, Roaring tends to compress and build well; if they are sparse,
+    // Bloom tends to be cheaper.
+    let build_score =
+        stats.estimated_keys_per_window / BUILD_ROARING_ESTIMATED_KEYS_PER_WINDOW_THRESHOLD;
+    // For lookups we need more than density. Random u32 probes only pay inner
+    // container cost when they land in a touched 16-bit window, so the touched
+    // window estimate is normalized into a hit probability. If we omit this
+    // term, the predictor cannot distinguish dense-in-a-few-windows batches
+    // from equally dense batches spread across a large fraction of the domain.
+    let lookup_window_probability =
+        (stats.estimated_touched_windows / U32_WINDOW_COUNT as f64).clamp(0.0, 1.0);
+    // roaring-rs switches between array and bitmap containers around 4096
+    // elements. Bitmap containers are close to a constant-time bit test, but
+    // array containers use binary search and get meaningfully slower as they
+    // grow. Without this size-dependent array penalty, medium-N wide Gaussians
+    // with many sparse array containers were still over-predicted as Roaring.
+    let lookup_container_penalty =
+        if stats.estimated_keys_per_window >= ROARING_BITMAP_CONTAINER_THRESHOLD {
+            LOOKUP_ROARING_BITMAP_WINDOW_PROBABILITY_PENALTY
+        } else {
+            LOOKUP_ROARING_ARRAY_WINDOW_PROBABILITY_PENALTY_BASE
+                + LOOKUP_ROARING_ARRAY_WINDOW_PROBABILITY_PENALTY_PER_LOG2_KEY
+                    * (stats.estimated_keys_per_window + 1.0).log2()
+        };
+    // lookup_score >= 1.0 means the estimated Roaring lookup cost stays under
+    // the current budget and we predict Roaring. The exact threshold is tuned
+    // empirically from benchmark output; the important part is the shape above.
+    let lookup_cost_proxy = lookup_window_probability * lookup_container_penalty;
+    let lookup_score =
+        LOOKUP_ROARING_WINDOW_PROBABILITY_THRESHOLD / lookup_cost_proxy.max(f64::MIN_POSITIVE);
+    let memory_score =
+        stats.estimated_keys_per_window / MEMORY_ROARING_ESTIMATED_KEYS_PER_WINDOW_THRESHOLD;
+
+    PredictorOutput {
+        density_score,
+        build_score,
+        lookup_score,
+        memory_score,
+        build_winner: predicted_winner(build_score),
+        lookup_winner: predicted_winner(lookup_score),
+        memory_winner: predicted_winner(memory_score),
+    }
+}
+
+fn predicted_winner(score: f64) -> Winner {
+    if score >= 1.0 {
+        Winner::Roaring
+    } else {
+        Winner::Bloom
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+struct Measurement {
+    build_ns_per_element: f64,
+    lookup_ns_per_element: f64,
+    lookup_hits: u64,
+    bytes_used: usize,
+}
+
+fn benchmark_bloom(
+    keys: &[u32],
+    lookup_count: u64,
+    lookup_seed: u64,
+    lookup_space: LookupSpace,
+    bloom_expected_items: u64,
+    bloom_false_positive_rate: f64,
+    bloom_seed: u128,
+) -> Measurement {
+    let expected_items =
+        usize::try_from(bloom_expected_items).expect("bloom expected items must fit in usize");
+    let mut bloom = BloomFilter::with_false_pos(bloom_false_positive_rate)
+        .seed(&bloom_seed)
+        .expected_items(expected_items.max(MIN_BLOOM_EXPECTED_ITEMS as usize));
+
+    let build_started = Instant::now();
+    for &key in keys {
+        bloom.insert(&key);
+    }
+    let build_elapsed = build_started.elapsed();
+
+    let (lookup_elapsed, hits) = benchmark_lookup(keys, lookup_count, lookup_seed, lookup_space, {
+        |key| bloom.contains(&key)
+    });
+
+    Measurement {
+        build_ns_per_element: build_elapsed.as_nanos() as f64 / keys.len() as f64,
+        lookup_ns_per_element: lookup_elapsed.as_nanos() as f64 / lookup_count as f64,
+        lookup_hits: hits,
+        bytes_used: size_of_val(bloom.as_slice()),
+    }
+}
+
+fn benchmark_roaring(
+    keys: &[u32],
+    lookup_count: u64,
+    lookup_seed: u64,
+    lookup_space: LookupSpace,
+) -> Measurement {
+    let build_started = Instant::now();
+    let mut bitmap = RoaringBitmap::from_sorted_iter(keys.iter().copied())
+        .expect("sorted gaussian keys should build a roaring bitmap");
+    let build_elapsed = build_started.elapsed();
+    let _ = bitmap.optimize();
+
+    let (lookup_elapsed, hits) =
+        benchmark_lookup(keys, lookup_count, lookup_seed, lookup_space, |key| {
+            bitmap.contains(key)
+        });
+
+    Measurement {
+        build_ns_per_element: build_elapsed.as_nanos() as f64 / keys.len() as f64,
+        lookup_ns_per_element: lookup_elapsed.as_nanos() as f64 / lookup_count as f64,
+        lookup_hits: hits,
+        bytes_used: bitmap.serialized_size(),
+    }
+}
+
+fn benchmark_lookup<F>(
+    keys: &[u32],
+    lookup_count: u64,
+    lookup_seed: u64,
+    lookup_space: LookupSpace,
+    mut contains: F,
+) -> (std::time::Duration, u64)
+where
+    F: FnMut(u32) -> bool,
+{
+    let lookup_started = Instant::now();
+    let hits = match lookup_space {
+        LookupSpace::Present => {
+            let lookup_permutation = AffinePermutation::random(keys.len() as u64, lookup_seed);
+            let mut hits = 0u64;
+            for index in 0..lookup_count {
+                let key = keys[lookup_permutation.index_at(index) as usize];
+                hits += u64::from(contains(key));
+            }
+            assert_eq!(
+                hits, lookup_count,
+                "expected all present lookup keys to be present"
+            );
+            hits
+        }
+        LookupSpace::FullU32 => {
+            let mut rng = ChaCha8Rng::seed_from_u64(lookup_seed);
+            let mut hits = 0u64;
+            for _ in 0..lookup_count {
+                hits += u64::from(contains(rng.next_u32()));
+            }
+            hits
+        }
+    };
+    (lookup_started.elapsed(), hits)
+}
+
+fn actual_winner(bloom_value: f64, roaring_value: f64) -> Winner {
+    if roaring_value < bloom_value {
+        Winner::Roaring
+    } else {
+        Winner::Bloom
+    }
+}
+
+#[derive(Debug, Serialize)]
+struct CsvRow {
+    num_keys: u64,
+    distribution: &'static str,
+    distribution_param_name: &'static str,
+    distribution_param_frac: Option<f64>,
+    distribution_param_value: Option<f64>,
+    gaussian_mean_frac: Option<f64>,
+    gaussian_mean: Option<f64>,
+    gaussian_stddev_frac: Option<f64>,
+    gaussian_stddev: Option<f64>,
+    repetition: usize,
+    distribution_seed: u64,
+    sample_seed: u64,
+    lookup_seed: u64,
+    lookup_space: &'static str,
+    lookup_count: u64,
+    sample_size: usize,
+    sample_percent_of_batch: f64,
+    sample_fraction: f64,
+    bloom_false_positive_rate_target_percent: f64,
+    bloom_seed: u128,
+    bloom_expected_items: u64,
+    predictor_sampled_keys: usize,
+    predictor_distinct_windows: usize,
+    predictor_avg_sample_keys_per_window: f64,
+    predictor_same_window_rate: f64,
+    predictor_estimated_keys_per_window: f64,
+    predictor_estimated_touched_windows: f64,
+    predictor_estimated_window_fill_ratio: f64,
+    predictor_density_score: f64,
+    predictor_build_score: f64,
+    predictor_lookup_score: f64,
+    predictor_memory_score: f64,
+    predicted_build_winner: &'static str,
+    predicted_lookup_winner: &'static str,
+    predicted_memory_winner: &'static str,
+    bloom_build_ns_per_element: f64,
+    roaring_build_ns_per_element: f64,
+    build_ratio_bloom_over_roaring: f64,
+    actual_build_winner: &'static str,
+    build_prediction_correct: bool,
+    bloom_lookup_ns_per_element: f64,
+    bloom_lookup_hits: u64,
+    bloom_lookup_hit_rate_percent: f64,
+    roaring_lookup_ns_per_element: f64,
+    roaring_lookup_hits: u64,
+    roaring_lookup_hit_rate_percent: f64,
+    lookup_ratio_bloom_over_roaring: f64,
+    actual_lookup_winner: &'static str,
+    lookup_prediction_correct: bool,
+    bloom_bytes_used: usize,
+    roaring_bytes_used: usize,
+    memory_ratio_bloom_over_roaring: f64,
+    actual_memory_winner: &'static str,
+    memory_prediction_correct: bool,
+}
+
+#[derive(Debug, Default)]
+struct AccuracySummary {
+    runs: usize,
+    build_correct: usize,
+    lookup_correct: usize,
+    memory_correct: usize,
+}
+
+fn summarize_accuracy(rows: &[CsvRow]) -> AccuracySummary {
+    let mut accuracy = AccuracySummary::default();
+
+    for row in rows {
+        accuracy.runs += 1;
+        accuracy.build_correct += usize::from(row.build_prediction_correct);
+        accuracy.lookup_correct += usize::from(row.lookup_prediction_correct);
+        accuracy.memory_correct += usize::from(row.memory_prediction_correct);
+    }
+
+    accuracy
+}
+
+fn print_summary(rows: &[CsvRow], accuracy: &AccuracySummary) {
+    let wrong_rows: Vec<&CsvRow> = rows
+        .iter()
+        .filter(|row| {
+            !row.build_prediction_correct
+                || !row.lookup_prediction_correct
+                || !row.memory_prediction_correct
+        })
+        .collect();
+    let wrong_metric_predictions = wrong_rows
+        .iter()
+        .map(|row| {
+            usize::from(!row.build_prediction_correct)
+                + usize::from(!row.lookup_prediction_correct)
+                + usize::from(!row.memory_prediction_correct)
+        })
+        .sum::<usize>();
+
+    println!("summary.runs={}", accuracy.runs);
+    println!(
+        "accuracy.build={}/{}",
+        accuracy.build_correct, accuracy.runs
+    );
+    println!(
+        "accuracy.lookup={}/{}",
+        accuracy.lookup_correct, accuracy.runs
+    );
+    println!(
+        "accuracy.memory={}/{}",
+        accuracy.memory_correct, accuracy.runs
+    );
+    println!("wrong_predictions.run_count={}", wrong_rows.len());
+    println!(
+        "wrong_predictions.metric_count={}",
+        wrong_metric_predictions
+    );
+
+    for row in wrong_rows {
+        println!(
+            "wrong_prediction {} num_keys={} repetition={} sample_size={} sample_percent_of_batch={:.6}",
+            distribution_summary_fields(row),
+            row.num_keys,
+            row.repetition,
+            row.sample_size,
+            row.sample_percent_of_batch
+        );
+        println!(
+            "wrong_prediction.predictor avg_sample_keys_per_window={:.6} same_window_rate={:.6} estimated_keys_per_window={:.6} estimated_touched_windows={:.6} estimated_window_fill_ratio={:.6}",
+            row.predictor_avg_sample_keys_per_window,
+            row.predictor_same_window_rate,
+            row.predictor_estimated_keys_per_window,
+            row.predictor_estimated_touched_windows,
+            row.predictor_estimated_window_fill_ratio
+        );
+
+        if !row.build_prediction_correct {
+            println!(
+                "wrong_prediction.build predicted={} actual={} score={:.6} bloom_over_roaring={:.6}",
+                row.predicted_build_winner,
+                row.actual_build_winner,
+                row.predictor_build_score,
+                row.build_ratio_bloom_over_roaring
+            );
+        }
+        if !row.lookup_prediction_correct {
+            println!(
+                "wrong_prediction.lookup predicted={} actual={} score={:.6} bloom_over_roaring={:.6}",
+                row.predicted_lookup_winner,
+                row.actual_lookup_winner,
+                row.predictor_lookup_score,
+                row.lookup_ratio_bloom_over_roaring
+            );
+        }
+        if !row.memory_prediction_correct {
+            println!(
+                "wrong_prediction.memory predicted={} actual={} score={:.6} bloom_over_roaring={:.6}",
+                row.predicted_memory_winner,
+                row.actual_memory_winner,
+                row.predictor_memory_score,
+                row.memory_ratio_bloom_over_roaring
+            );
+        }
+    }
+}
+
+fn print_run_report(row: &CsvRow) {
+    println!("distribution={}", row.distribution);
+    println!("distribution_param_name={}", row.distribution_param_name);
+    println!(
+        "distribution_param_frac={}",
+        option_f64(row.distribution_param_frac)
+    );
+    println!(
+        "distribution_param_value={}",
+        option_f64(row.distribution_param_value)
+    );
+    println!("num_keys={}", row.num_keys);
+    println!("gaussian_mean_frac={}", option_f64(row.gaussian_mean_frac));
+    println!("gaussian_mean={}", option_f64(row.gaussian_mean));
+    println!(
+        "gaussian_stddev_frac={}",
+        option_f64(row.gaussian_stddev_frac)
+    );
+    println!("gaussian_stddev={}", option_f64(row.gaussian_stddev));
+    println!("repetition={}", row.repetition);
+    println!("lookup_space={}", row.lookup_space);
+    println!("sample_size={}", row.sample_size);
+    println!("sample_percent_of_batch={:.6}", row.sample_percent_of_batch);
+    println!("lookup_count={}", row.lookup_count);
+    println!("predictor.sampled_keys={}", row.predictor_sampled_keys);
+    println!(
+        "predictor.distinct_windows={}",
+        row.predictor_distinct_windows
+    );
+    println!(
+        "predictor.avg_sample_keys_per_window={:.6}",
+        row.predictor_avg_sample_keys_per_window
+    );
+    println!(
+        "predictor.same_window_rate={:.6}",
+        row.predictor_same_window_rate
+    );
+    println!(
+        "predictor.estimated_keys_per_window={:.6}",
+        row.predictor_estimated_keys_per_window
+    );
+    println!(
+        "predictor.estimated_touched_windows={:.6}",
+        row.predictor_estimated_touched_windows
+    );
+    println!(
+        "predictor.estimated_window_fill_ratio={:.6}",
+        row.predictor_estimated_window_fill_ratio
+    );
+    println!("predictor.build_score={:.6}", row.predictor_build_score);
+    println!("predictor.lookup_score={:.6}", row.predictor_lookup_score);
+    println!("predictor.memory_score={:.6}", row.predictor_memory_score);
+    println!("predicted.build_winner={}", row.predicted_build_winner);
+    println!("predicted.lookup_winner={}", row.predicted_lookup_winner);
+    println!("predicted.memory_winner={}", row.predicted_memory_winner);
+    println!(
+        "bloom.build_ns_per_element={:.6}",
+        row.bloom_build_ns_per_element
+    );
+    println!(
+        "roaring.build_ns_per_element={:.6}",
+        row.roaring_build_ns_per_element
+    );
+    println!(
+        "build_ratio_bloom_over_roaring={:.6}",
+        row.build_ratio_bloom_over_roaring
+    );
+    println!("actual.build_winner={}", row.actual_build_winner);
+    println!("build_prediction_correct={}", row.build_prediction_correct);
+    println!(
+        "bloom.lookup_ns_per_element={:.6}",
+        row.bloom_lookup_ns_per_element
+    );
+    println!("bloom.lookup_hits={}", row.bloom_lookup_hits);
+    println!(
+        "bloom.lookup_hit_rate_percent={:.6}",
+        row.bloom_lookup_hit_rate_percent
+    );
+    println!(
+        "roaring.lookup_ns_per_element={:.6}",
+        row.roaring_lookup_ns_per_element
+    );
+    println!("roaring.lookup_hits={}", row.roaring_lookup_hits);
+    println!(
+        "roaring.lookup_hit_rate_percent={:.6}",
+        row.roaring_lookup_hit_rate_percent
+    );
+    println!(
+        "lookup_ratio_bloom_over_roaring={:.6}",
+        row.lookup_ratio_bloom_over_roaring
+    );
+    println!("actual.lookup_winner={}", row.actual_lookup_winner);
+    println!(
+        "lookup_prediction_correct={}",
+        row.lookup_prediction_correct
+    );
+    println!("bloom.bytes_used={}", row.bloom_bytes_used);
+    println!("roaring.bytes_used={}", row.roaring_bytes_used);
+    println!(
+        "memory_ratio_bloom_over_roaring={:.6}",
+        row.memory_ratio_bloom_over_roaring
+    );
+    println!("actual.memory_winner={}", row.actual_memory_winner);
+    println!(
+        "memory_prediction_correct={}",
+        row.memory_prediction_correct
+    );
+    println!();
+}
+
+#[derive(Clone, Copy, Debug)]
+struct AffinePermutation {
+    len: u64,
+    multiplier: u64,
+    offset: u64,
+}
+
+impl AffinePermutation {
+    fn sequential(len: u64) -> Self {
+        Self {
+            len,
+            multiplier: 1,
+            offset: 0,
+        }
+    }
+
+    fn random(len: u64, seed: u64) -> Self {
+        if len <= 1 {
+            return Self::sequential(len);
+        }
+        let mut rng = ChaCha8Rng::seed_from_u64(seed);
+        let mut multiplier = (rng.next_u64() % len) | 1;
+        while gcd(multiplier, len) != 1 {
+            multiplier = (multiplier + 2) % len;
+            if multiplier == 0 {
+                multiplier = 1;
+            }
+        }
+        let offset = rng.next_u64() % len;
+        Self {
+            len,
+            multiplier,
+            offset,
+        }
+    }
+
+    fn index_at(&self, position: u64) -> u64 {
+        debug_assert!(position < self.len);
+        (self
+            .multiplier
+            .wrapping_mul(position)
+            .wrapping_add(self.offset))
+            % self.len
+    }
+}
+
+fn gcd(mut lhs: u64, mut rhs: u64) -> u64 {
+    while rhs != 0 {
+        let next = lhs % rhs;
+        lhs = rhs;
+        rhs = next;
+    }
+    lhs
+}
+
+fn generate_keys(num_keys: u64, distribution: DistributionSpec, seed: u64) -> Vec<u32> {
+    match distribution {
+        DistributionSpec::Gaussian(distribution) => {
+            generate_gaussian_keys(num_keys, distribution, seed)
+        }
+        DistributionSpec::Consecutive => generate_consecutive_keys(num_keys),
+        DistributionSpec::RoundRobinWindow => generate_round_robin_window_keys(num_keys),
+        DistributionSpec::Bimodal { stddev_frac } => {
+            generate_bimodal_keys(num_keys, stddev_frac, seed)
+        }
+        DistributionSpec::Exponential { scale_frac } => {
+            generate_exponential_keys(num_keys, scale_frac, seed)
+        }
+    }
+}
+
+fn generate_gaussian_keys(
+    num_keys: u64,
+    distribution: GaussianDistribution,
+    seed: u64,
+) -> Vec<u32> {
+    let len = usize::try_from(num_keys).expect("num_keys must fit in usize");
+    let mut rng = ChaCha8Rng::seed_from_u64(seed);
+    let normal = Normal::new(distribution.mean_value(), distribution.stddev_value())
+        .expect("gaussian distribution should have a positive standard deviation");
+    let mut keys = Vec::with_capacity(len);
+
+    for _ in 0..num_keys {
+        let sampled = normal.sample(&mut rng).round();
+        keys.push(sampled.clamp(0.0, u32::MAX as f64) as u32);
+    }
+
+    keys.sort_unstable();
+    project_sorted_unique_u32_domain(&mut keys);
+    keys
+}
+
+fn generate_consecutive_keys(num_keys: u64) -> Vec<u32> {
+    let len = usize::try_from(num_keys).expect("num_keys must fit in usize");
+    (0..len)
+        .map(|index| u32::try_from(index).expect("consecutive key exceeded u32 domain"))
+        .collect()
+}
+
+fn generate_round_robin_window_keys(num_keys: u64) -> Vec<u32> {
+    let len = usize::try_from(num_keys).expect("num_keys must fit in usize");
+    let mut keys = Vec::with_capacity(len);
+    let full_layers = num_keys / U32_WINDOW_COUNT as u64;
+    let partial_windows = num_keys % U32_WINDOW_COUNT as u64;
+
+    for window in 0..U32_WINDOW_COUNT as u64 {
+        let keys_in_window = full_layers + u64::from(window < partial_windows);
+        let window_base = window << 16;
+        for low in 0..keys_in_window {
+            keys.push(
+                u32::try_from(window_base + low).expect("round-robin key exceeded u32 domain"),
+            );
+        }
+    }
+
+    debug_assert_eq!(keys.len(), len);
+    keys
+}
+
+fn generate_bimodal_keys(num_keys: u64, stddev_frac: f64, seed: u64) -> Vec<u32> {
+    let len = usize::try_from(num_keys).expect("num_keys must fit in usize");
+    let mut rng = ChaCha8Rng::seed_from_u64(seed);
+    let left = Normal::new(
+        BIMODAL_LEFT_PEAK_FRAC * u32::MAX as f64,
+        stddev_frac * u32::MAX as f64,
+    )
+    .expect("bimodal distribution should have a positive standard deviation");
+    let right = Normal::new(
+        BIMODAL_RIGHT_PEAK_FRAC * u32::MAX as f64,
+        stddev_frac * u32::MAX as f64,
+    )
+    .expect("bimodal distribution should have a positive standard deviation");
+    let mut keys = Vec::with_capacity(len);
+
+    for _ in 0..num_keys {
+        let sampled = if rng.next_u32() & 1 == 0 {
+            left.sample(&mut rng)
+        } else {
+            right.sample(&mut rng)
+        }
+        .round();
+        keys.push(sampled.clamp(0.0, u32::MAX as f64) as u32);
+    }
+
+    keys.sort_unstable();
+    project_sorted_unique_u32_domain(&mut keys);
+    keys
+}
+
+fn generate_exponential_keys(num_keys: u64, scale_frac: f64, seed: u64) -> Vec<u32> {
+    let len = usize::try_from(num_keys).expect("num_keys must fit in usize");
+    let mut rng = ChaCha8Rng::seed_from_u64(seed);
+    let scale = (scale_frac * u32::MAX as f64).max(f64::MIN_POSITIVE);
+    let distribution =
+        Exp::new(1.0 / scale).expect("exponential distribution should have a positive scale");
+    let mut keys = Vec::with_capacity(len);
+
+    for _ in 0..num_keys {
+        let sampled = distribution.sample(&mut rng).round();
+        keys.push(sampled.clamp(0.0, u32::MAX as f64) as u32);
+    }
+
+    keys.sort_unstable();
+    project_sorted_unique_u32_domain(&mut keys);
+    keys
+}
+
+fn default_sample_size(num_keys: u64) -> usize {
+    sample_count_from_percent(num_keys, DEFAULT_SAMPLE_PERCENT, DEFAULT_MIN_SAMPLE_SIZE)
+}
+
+fn parse_u64_csv(csv: &str) -> Vec<u64> {
+    let mut out: Vec<u64> = csv
+        .split(',')
+        .filter(|entry| !entry.trim().is_empty())
+        .map(|entry| {
+            parse_u64_token(entry.trim())
+                .unwrap_or_else(|error| panic!("invalid u64 in CSV: {entry} ({error})"))
+        })
+        .collect();
+    out.sort_unstable();
+    out.dedup();
+    out
+}
+
+fn parse_f64_csv(csv: &str, flag_name: &str) -> Vec<f64> {
+    let mut out: Vec<f64> = csv
+        .split(',')
+        .filter(|entry| !entry.trim().is_empty())
+        .map(|entry| {
+            entry
+                .trim()
+                .parse::<f64>()
+                .unwrap_or_else(|error| panic!("invalid f64 in {flag_name}: {entry} ({error})"))
+        })
+        .collect();
+    out.sort_by(|lhs, rhs| lhs.partial_cmp(rhs).expect("NaN was already rejected"));
+    out.dedup();
+    out
+}
+
+fn parse_distribution_csv(csv: &str) -> Vec<DistributionKind> {
+    let mut out = Vec::new();
+
+    for token in csv.split(',').filter(|entry| !entry.trim().is_empty()) {
+        let normalized = token.trim().replace('_', "-");
+        let distribution = DistributionKind::from_str(&normalized, true).unwrap_or_else(|error| {
+            panic!("invalid distribution in --distributions: {token} ({error})")
+        });
+        if !out.contains(&distribution) {
+            out.push(distribution);
+        }
+    }
+
+    out
+}
+
+fn parse_u64_token(token: &str) -> Result<u64, String> {
+    match token {
+        "u32::MAX" | "u32_max" | "max_u32" => Ok(u32::MAX as u64),
+        _ => token
+            .replace('_', "")
+            .parse::<u64>()
+            .map_err(|error| error.to_string()),
+    }
+}
+
+fn project_sorted_unique_u32_domain(keys: &mut [u32]) {
+    if keys.is_empty() {
+        return;
+    }
+
+    for (index, key) in keys.iter_mut().enumerate() {
+        let min_key = u32::try_from(index).expect("key count exceeded u32 domain");
+        if *key < min_key {
+            *key = min_key;
+        }
+    }
+
+    for index in (0..keys.len()).rev() {
+        let tail = keys.len() - 1 - index;
+        let max_key = u32::MAX
+            .checked_sub(u32::try_from(tail).expect("key count exceeded u32 domain"))
+            .expect("tail adjustment underflowed");
+        if keys[index] > max_key {
+            keys[index] = max_key;
+        }
+        if index + 1 < keys.len() && keys[index] >= keys[index + 1] {
+            keys[index] = keys[index + 1] - 1;
+        }
+    }
+
+    debug_assert!(keys.windows(2).all(|window| window[0] < window[1]));
+}
+
+fn option_u64(value: Option<u64>) -> String {
+    value
+        .map(|value| value.to_string())
+        .unwrap_or_else(|| "auto".to_string())
+}
+
+fn option_f64(value: Option<f64>) -> String {
+    value
+        .map(|value| value.to_string())
+        .unwrap_or_else(|| "auto".to_string())
+}
+
+fn distribution_summary_fields(row: &CsvRow) -> String {
+    let mut fields = format!("distribution={}", row.distribution);
+    if let Some(gaussian_mean_frac) = row.gaussian_mean_frac {
+        fields.push_str(&format!(" gaussian_mean_frac={gaussian_mean_frac}"));
+    }
+    if let Some(distribution_param_frac) = row.distribution_param_frac {
+        fields.push_str(&format!(
+            " {}={distribution_param_frac}",
+            row.distribution_param_name
+        ));
+    }
+    fields
+}
+
+fn sample_count_from_percent(num_keys: u64, sample_percent: f64, min_sample_size: usize) -> usize {
+    let scaled = ((num_keys as f64) * (sample_percent / 100.0)).ceil() as u64;
+    let sample_size = scaled.max(min_sample_size as u64).min(num_keys);
+    usize::try_from(sample_size).expect("sample size must fit in usize")
+}
diff --git a/crates/dbsp/src/circuit/metadata.rs b/crates/dbsp/src/circuit/metadata.rs
index 5522bd42e96..d0c3001b97b 100644
--- a/crates/dbsp/src/circuit/metadata.rs
+++ b/crates/dbsp/src/circuit/metadata.rs
@@ -136,6 +136,14 @@ pub const BLOOM_FILTER_MISSES_COUNT: MetricId =
 pub const BLOOM_FILTER_HIT_RATE_PERCENT: MetricId =
     MetricId(Cow::Borrowed("bloom_filter_hit_rate_percent"));
 pub const BLOOM_FILTER_SIZE_BYTES: MetricId = MetricId(Cow::Borrowed("bloom_filter_size_bytes"));
+pub const ROARING_FILTER_HITS_COUNT: MetricId =
+    MetricId(Cow::Borrowed("roaring_filter_hits_count"));
+pub const ROARING_FILTER_MISSES_COUNT: MetricId =
+    MetricId(Cow::Borrowed("roaring_filter_misses_count"));
+pub const ROARING_FILTER_HIT_RATE_PERCENT: MetricId =
+    MetricId(Cow::Borrowed("roaring_filter_hit_rate_percent"));
+pub const ROARING_FILTER_SIZE_BYTES: MetricId =
+    MetricId(Cow::Borrowed("roaring_filter_size_bytes"));
 pub const RANGE_FILTER_HITS_COUNT: MetricId = MetricId(Cow::Borrowed("range_filter_hits_count"));
 pub const RANGE_FILTER_MISSES_COUNT: MetricId =
     MetricId(Cow::Borrowed("range_filter_misses_count"));
@@ -167,7 +175,7 @@ pub const PREFIX_BATCHES_STATS: MetricId = MetricId(Cow::Borrowed("prefix_batche
 pub const INPUT_INTEGRAL_RECORDS_COUNT: MetricId =
     MetricId(Cow::Borrowed("input_integral_records_count"));
 
-pub const CIRCUIT_METRICS: [CircuitMetric; 70] = [
+pub const CIRCUIT_METRICS: [CircuitMetric; 74] = [
     // State
     CircuitMetric {
         name: USED_MEMORY_BYTES,
@@ -269,7 +277,7 @@ pub const CIRCUIT_METRICS: [CircuitMetric; 70] = [
         name: BLOOM_FILTER_BITS_PER_KEY,
         category: CircuitMetricCategory::State,
         advanced: false,
-        description: "Average number of bits per key in the Bloom filter.",
+        description: "Average number of bits per key across batches that use a Bloom filter.",
     },
     CircuitMetric {
         name: BLOOM_FILTER_SIZE_BYTES,
@@ -295,6 +303,30 @@ pub const CIRCUIT_METRICS: [CircuitMetric; 70] = [
         advanced: false,
         description: "Hit rate of the Bloom filter.",
     },
+    CircuitMetric {
+        name: ROARING_FILTER_SIZE_BYTES,
+        category: CircuitMetricCategory::State,
+        advanced: false,
+        description: "Size of the bitmap filter in bytes.",
+    },
+    CircuitMetric {
+        name: ROARING_FILTER_HITS_COUNT,
+        category: CircuitMetricCategory::State,
+        advanced: false,
+        description: "The number of hits across all bitmap filters. The hits are summed across the bitmap filters for all batches in the spine.",
+    },
+    CircuitMetric {
+        name: ROARING_FILTER_MISSES_COUNT,
+        category: CircuitMetricCategory::State,
+        advanced: false,
+        description: "The number of misses across all bitmap filters. The misses are summed across the bitmap filters for all batches in the spine.",
+    },
+    CircuitMetric {
+        name: ROARING_FILTER_HIT_RATE_PERCENT,
+        category: CircuitMetricCategory::State,
+        advanced: false,
+        description: "Hit rate of the bitmap filter.",
+    },
     CircuitMetric {
         name: RANGE_FILTER_SIZE_BYTES,
         category: CircuitMetricCategory::State,
diff --git a/crates/dbsp/src/dynamic/data.rs b/crates/dbsp/src/dynamic/data.rs
index ce1645a1c27..f328ad90106 100644
--- a/crates/dbsp/src/dynamic/data.rs
+++ b/crates/dbsp/src/dynamic/data.rs
@@ -12,6 +12,7 @@ use crate::{
         rkyv::SerializeDyn,
     },
     hash::default_hash,
+    utils::SupportsRoaring,
 };
 
 /// Defines the minimal set of operations that must be supported by
@@ -19,7 +20,16 @@ use crate::{
 ///
 /// This trait is object safe and can be invoked via dynamic dispatch.
 pub trait Data:
-    Comparable + Clonable + SerializeDyn + DeserializableDyn + Send + Sync + Debug + AsAny + SizeOf
+    Comparable
+    + Clonable
+    + SerializeDyn
+    + DeserializableDyn
+    + Send
+    + Sync
+    + Debug
+    + AsAny
+    + SizeOf
+    + SupportsRoaring
 {
     /// Compute a hash of the object using default hasher and seed.
     fn default_hash(&self) -> u64;
diff --git a/crates/dbsp/src/storage.rs b/crates/dbsp/src/storage.rs
index 25c5557567c..cb2fa45d11a 100644
--- a/crates/dbsp/src/storage.rs
+++ b/crates/dbsp/src/storage.rs
@@ -7,7 +7,6 @@ pub mod backend;
 pub mod buffer_cache;
 pub mod dirlock;
 pub mod file;
-pub mod filter_stats;
 pub mod tracking_bloom_filter;
 
 use fdlimit::{Outcome::LimitRaised, raise_fd_limit};
diff --git a/crates/dbsp/src/storage/file.rs b/crates/dbsp/src/storage/file.rs
index ec32a6b3729..e3abc88bfae 100644
--- a/crates/dbsp/src/storage/file.rs
+++ b/crates/dbsp/src/storage/file.rs
@@ -36,8 +36,10 @@
 //! value and for sequential reads.  It should be possible to disable indexing
 //! by data value for workloads that don't require it.
 //!
-//! Layer files support approximate set membership query in `~O(1)` time using
-//! [a filter block](format::FilterBlock).
+//! Layer files support cheap key-membership tests using a per-batch filter
+//! block. The default filter is Bloom-based; key types whose per-batch span
+//! fits in `u32` can alternatively use an exact roaring bitmap filter by
+//! storing keys relative to the batch minimum.
 //!
 //! Layer files should support 1 TB data size.
 //!
@@ -98,6 +100,7 @@ use std::{
 use std::{any::Any, sync::Arc};
 use std::{fmt::Debug, ptr::NonNull};
 
+mod filter;
 pub mod format;
 mod item;
 pub mod reader;
@@ -108,6 +111,10 @@ use crate::{
     dynamic::{DataTrait, Erase, Factory, WithFactory},
     storage::file::item::RefTup2Factory,
 };
+pub use filter::BatchKeyFilter;
+pub use filter::FilterPlan;
+pub use filter::TrackingRoaringBitmap;
+pub use filter::{FilterKind, FilterStats, TrackingFilterStats};
 pub use item::{ArchivedItem, Item, ItemFactory, WithItemFactory};
 
 const BLOOM_FILTER_SEED: u128 = 42;
@@ -577,9 +584,8 @@ impl Deserializer {
     pub fn new(version: u32) -> Self {
         // Proper error is returned in reader.rs, this is a sanity check.
         assert!(
-            version >= format::VERSION_NUMBER,
-            "Unable to read old (pre-v{}) checkpoint data on this feldera version, pipeline needs to backfilled to start.",
-            format::VERSION_NUMBER
+            version >= format::MIN_SUPPORTED_VERSION,
+            "Unable to read checkpoint data with unsupported old storage format version {version} on this feldera version.",
         );
         Self {
             version,
diff --git a/crates/dbsp/src/storage/file/filter.rs b/crates/dbsp/src/storage/file/filter.rs
new file mode 100644
index 00000000000..2e3d71866e4
--- /dev/null
+++ b/crates/dbsp/src/storage/file/filter.rs
@@ -0,0 +1,266 @@
+mod bloom;
+mod roaring;
+mod stats;
+
+use crate::{
+    Runtime,
+    dynamic::{DataTrait, DynVec},
+    storage::tracking_bloom_filter::TrackingBloomFilter,
+    trace::{BatchReader, BatchReaderFactories, sample_keys_from_batches},
+};
+use dyn_clone::clone_box;
+use rand::thread_rng;
+use std::io;
+
+pub use roaring::TrackingRoaringBitmap;
+pub(crate) use roaring::{
+    FILTER_PLAN_MIN_SAMPLE_SIZE, FILTER_PLAN_SAMPLE_PERCENT, RoaringLookupSampleStats,
+};
+pub use stats::{FilterKind, FilterStats, TrackingFilterStats};
+
+/// In-memory representation of the per-batch key filter.
+#[derive(Debug)]
+pub enum BatchKeyFilter {
+    /// Probabilistic Bloom filter over key hashes.
+    Bloom(TrackingBloomFilter),
+
+    /// Exact roaring bitmap for key types whose batch's range fits in `u32`.
+    RoaringU32(TrackingRoaringBitmap),
+}
+
+impl BatchKeyFilter {
+    pub(crate) fn new_bloom(estimated_keys: usize, bloom_false_positive_rate: f64) -> Self {
+        Self::Bloom(bloom::new_bloom_filter(
+            estimated_keys,
+            bloom_false_positive_rate,
+        ))
+    }
+
+    pub(crate) fn new_roaring_u32<K>(min: &K) -> Self
+    where
+        K: DataTrait + ?Sized,
+    {
+        Self::RoaringU32(TrackingRoaringBitmap::with_min(min))
+    }
+
+    pub(crate) fn deserialize_bloom(num_hashes: u32, data: Vec<u64>) -> Self {
+        Self::Bloom(bloom::deserialize_bloom_filter(num_hashes, data))
+    }
+
+    pub(crate) fn deserialize_roaring_u32<K>(data: &[u8], min: &K) -> io::Result<Self>
+    where
+        K: DataTrait + ?Sized,
+    {
+        TrackingRoaringBitmap::deserialize_from(data, min).map(Self::RoaringU32)
+    }
+
+    pub(crate) fn insert_key<K>(&mut self, key: &K)
+    where
+        K: DataTrait + ?Sized,
+    {
+        match self {
+            Self::Bloom(filter) => {
+                filter.insert_hash(key.default_hash());
+            }
+            Self::RoaringU32(filter) => {
+                filter.insert_key(key);
+            }
+        }
+    }
+    pub(crate) fn finalize(&mut self) {
+        match self {
+            Self::Bloom(_) => {}
+            Self::RoaringU32(filter) => filter.finalize(),
+        }
+    }
+}
+
+/// Merge-time input used to choose the batch membership filter before writing.
+///
+/// The writer must know upfront whether it is building Bloom or bitmap state,
+/// because it cannot switch filters after the first key is written. The plan
+/// therefore bundles:
+/// - the merged batch bounds, which tell us whether min-offset roaring fits;
+/// - a sampled subset of input keys, which lets us predict lookup behavior
+///   when Bloom and roaring are both enabled.
+pub struct FilterPlan<K>
+where
+    K: DataTrait + ?Sized,
+{
+    min: Box<K>,
+    max: Box<K>,
+    sampled_keys: Option<Box<DynVec<K>>>,
+}
+
+impl<K> FilterPlan<K>
+where
+    K: DataTrait + ?Sized,
+{
+    fn sample_count_for_filter_plan(num_keys: usize) -> usize {
+        let scaled = ((num_keys as f64) * (FILTER_PLAN_SAMPLE_PERCENT / 100.0)).ceil() as usize;
+        scaled.max(FILTER_PLAN_MIN_SAMPLE_SIZE).min(num_keys)
+    }
+
+    /// Builds a filter plan from the known minimum and maximum batch keys.
+    pub fn from_bounds(min: &K, max: &K) -> Self {
+        Self {
+            min: clone_box(min),
+            max: clone_box(max),
+            sampled_keys: None,
+        }
+    }
+
+    #[cfg(test)]
+    pub(crate) fn with_sampled_keys(mut self, sampled_keys: Box<DynVec<K>>) -> Self {
+        self.sampled_keys = Some(sampled_keys);
+        self
+    }
+
+    pub(crate) fn from_batches<'a, B, I>(batches: I) -> Option<Self>
+    where
+        B: BatchReader<Key = K>,
+        I: IntoIterator<Item = &'a B>,
+    {
+        let batches: Vec<&'a B> = batches.into_iter().collect();
+        let mut bounds: Option<(Box<K>, Box<K>)> = None;
+        for batch in &batches {
+            let (batch_min, batch_max) = batch.key_bounds()?;
+            match bounds.as_mut() {
+                Some((min, max)) => {
+                    if batch_min < min.as_ref() {
+                        *min = clone_box(batch_min);
+                    }
+                    if batch_max > max.as_ref() {
+                        *max = clone_box(batch_max);
+                    }
+                }
+                None => bounds = Some((clone_box(batch_min), clone_box(batch_max))),
+            }
+        }
+
+        bounds.map(|(min, max)| {
+            let mut plan = Self {
+                min,
+                max,
+                sampled_keys: None,
+            };
+            if plan.roaring_range_fits() {
+                plan.sampled_keys = Self::collect_sampled_keys_from_batches(&batches);
+            }
+            plan
+        })
+    }
+
+    fn collect_sampled_keys_from_batches<B>(batches: &[&B]) -> Option<Box<DynVec<K>>>
+    where
+        B: BatchReader<Key = K>,
+    {
+        let first_batch = batches.first()?;
+        let mut sampled_keys = first_batch.factories().keys_factory().default_box();
+        let total_sample_size = batches
+            .iter()
+            .map(|batch| Self::sample_count_for_filter_plan(batch.key_count()))
+            .sum::<usize>();
+        sampled_keys.reserve(total_sample_size);
+
+        let mut rng = thread_rng();
+        sample_keys_from_batches(
+            &first_batch.factories(),
+            batches,
+            &mut rng,
+            |batch| Self::sample_count_for_filter_plan(batch.key_count()),
+            sampled_keys.as_mut(),
+        );
+
+        (!sampled_keys.is_empty()).then_some(sampled_keys)
+    }
+
+    fn roaring_range_fits(&self) -> bool {
+        self.min.supports_roaring32() && self.max.into_roaring_u32(self.min.as_data()).is_some()
+    }
+
+    fn can_use_roaring(&self, enable_roaring: bool) -> bool {
+        enable_roaring && self.roaring_range_fits()
+    }
+
+    fn predict_lookup_prefers_roaring(&self, estimated_keys: usize) -> bool {
+        let sampled_keys = match self.sampled_keys.as_ref() {
+            Some(sampled_keys) => sampled_keys,
+            None => return false,
+        };
+
+        let mut roaring_keys = Vec::with_capacity(sampled_keys.len());
+        for index in 0..sampled_keys.len() {
+            let roaring_key = match sampled_keys
+                .index(index)
+                .into_roaring_u32(self.min.as_data())
+            {
+                Some(roaring_key) => roaring_key,
+                None => return false,
+            };
+            roaring_keys.push(roaring_key);
+        }
+        roaring_keys.sort_unstable();
+        roaring_keys.dedup();
+
+        RoaringLookupSampleStats::from_sample(estimated_keys, &roaring_keys)
+            .map(|stats| stats.lookup_prefers_roaring())
+            .unwrap_or(false)
+    }
+
+    fn preferred_filter(
+        &self,
+        estimated_keys: usize,
+        enable_roaring: bool,
+        bloom_false_positive_rate: f64,
+    ) -> BatchKeyFilter {
+        if self.can_use_roaring(enable_roaring)
+            && self.predict_lookup_prefers_roaring(estimated_keys)
+        {
+            BatchKeyFilter::new_roaring_u32(self.min.as_ref())
+        } else {
+            BatchKeyFilter::new_bloom(estimated_keys, bloom_false_positive_rate)
+        }
+    }
+
+    /// Chooses the membership filter to build for a batch with `estimated_keys`
+    /// rows, using the enabled Bloom/roaring settings and an optional batch
+    /// bounds plan.
+    pub fn decide_filter(
+        filter_plan: Option<&Self>,
+        estimated_keys: usize,
+    ) -> Option<BatchKeyFilter> {
+        // Choose between Bloom, roaring, or no membership filter using the
+        // following rules:
+        //
+        // - If Bloom and roaring are both enabled, prefer roaring when the
+        //   plan proves the batch range fits in `u32` and the sampled-key
+        //   lookup predictor says roaring should beat Bloom. If sampling is
+        //   unavailable or the predictor cannot run, fall back to Bloom.
+        // - If only Bloom is enabled, always build Bloom.
+        // - If only roaring is enabled, build roaring only when the plan
+        //   proves the batch range fits in `u32`; otherwise build no
+        //   membership filter.
+        // - If both are disabled, build no membership filter.
+        //
+        // The "no plan => no roaring" rule is intentional: without known
+        // batch bounds we cannot safely decide that min-offset roaring
+        // encoding will fit, and we do not allow switching filters after
+        // writing has started.
+        let enable_roaring = Runtime::with_dev_tweaks(|dev_tweaks| dev_tweaks.enable_roaring());
+        let bloom_false_positive_rate = Runtime::with_dev_tweaks(|dev_tweaks| {
+            let rate = dev_tweaks.bloom_false_positive_rate();
+            (rate > 0.0 && rate < 1.0).then_some(rate)
+        });
+        match (bloom_false_positive_rate, filter_plan) {
+            (Some(rate), Some(filter_plan)) => {
+                Some(filter_plan.preferred_filter(estimated_keys, enable_roaring, rate))
+            }
+            (Some(rate), None) => Some(BatchKeyFilter::new_bloom(estimated_keys, rate)),
+            (None, Some(filter_plan)) if filter_plan.can_use_roaring(enable_roaring) => {
+                Some(BatchKeyFilter::new_roaring_u32(filter_plan.min.as_ref()))
+            }
+            (None, _) => None,
+        }
+    }
+}
diff --git a/crates/dbsp/src/storage/file/filter/bloom.rs b/crates/dbsp/src/storage/file/filter/bloom.rs
new file mode 100644
index 00000000000..698500a8784
--- /dev/null
+++ b/crates/dbsp/src/storage/file/filter/bloom.rs
@@ -0,0 +1,28 @@
+use crate::storage::tracking_bloom_filter::TrackingBloomFilter;
+use fastbloom::BloomFilter;
+
+use super::super::BLOOM_FILTER_SEED;
+
+pub(super) fn new_bloom_filter(
+    estimated_keys: usize,
+    bloom_false_positive_rate: f64,
+) -> TrackingBloomFilter {
+    TrackingBloomFilter::new(
+        BloomFilter::with_false_pos(bloom_false_positive_rate)
+            .seed(&BLOOM_FILTER_SEED)
+            .expected_items({
+                // `.max(64)` works around a fastbloom bug that hangs when the
+                // expected number of items is zero (see
+                // <https://github.com/tomtomwombat/fastbloom/issues/17>).
+                estimated_keys.max(64)
+            }),
+    )
+}
+
+pub(super) fn deserialize_bloom_filter(num_hashes: u32, data: Vec<u64>) -> TrackingBloomFilter {
+    TrackingBloomFilter::new(
+        BloomFilter::from_vec(data)
+            .seed(&BLOOM_FILTER_SEED)
+            .hashes(num_hashes),
+    )
+}
diff --git a/crates/dbsp/src/storage/file/filter/roaring.rs b/crates/dbsp/src/storage/file/filter/roaring.rs
new file mode 100644
index 00000000000..2c8ed0f65f3
--- /dev/null
+++ b/crates/dbsp/src/storage/file/filter/roaring.rs
@@ -0,0 +1,387 @@
+use crate::{
+    dynamic::{DataTrait, DynData},
+    storage::file::{FilterStats, TrackingFilterStats},
+};
+use dyn_clone::clone_box;
+use roaring::RoaringBitmap;
+use size_of::SizeOf;
+use std::{collections::HashMap, io, mem::size_of_val};
+
+/// Sample 0.1% of keys per batch when building a merge-time filter plan.
+pub(crate) const FILTER_PLAN_SAMPLE_PERCENT: f64 = 0.1;
+/// Never sample fewer than this many keys from a batch for the filter plan.
+pub(crate) const FILTER_PLAN_MIN_SAMPLE_SIZE: usize = 1_024;
+
+/// Roaring bitmap wrapper that tracks hit/miss counts during membership probes.
+#[derive(Debug)]
+pub struct TrackingRoaringBitmap {
+    bitmap: RoaringBitmap,
+    min: Box<DynData>,
+    tracking: TrackingFilterStats,
+}
+
+impl TrackingRoaringBitmap {
+    pub(crate) fn new<K>(bitmap: RoaringBitmap, min: &K) -> Self
+    where
+        K: DataTrait + ?Sized,
+    {
+        let mut filter = Self {
+            bitmap,
+            min: clone_box(min.as_data()),
+            tracking: TrackingFilterStats::new(0),
+        };
+        filter.refresh_stats_size();
+        filter
+    }
+
+    pub(crate) fn with_min<K>(min: &K) -> Self
+    where
+        K: DataTrait + ?Sized,
+    {
+        Self::new(RoaringBitmap::new(), min)
+    }
+
+    pub(crate) fn insert(&mut self, value: u32) {
+        self.bitmap.insert(value);
+    }
+
+    pub(crate) fn insert_key<K>(&mut self, key: &K)
+    where
+        K: DataTrait + ?Sized,
+    {
+        self.insert(self.roaring_u32(key));
+    }
+
+    pub(crate) fn finalize(&mut self) {
+        self.bitmap.optimize();
+        self.refresh_stats_size();
+    }
+
+    // Bloom filters allocate their backing bitset up front, so their tracked
+    // size is stable after construction. Roaring bitmaps grow as keys are
+    // inserted and can shrink again after `optimize()`, so refresh the tracked
+    // size once the batch is finalized instead of trying to maintain it on
+    // every insert.
+    fn refresh_stats_size(&mut self) {
+        let min_size = self.min.size_of().total_bytes();
+        self.tracking
+            .set_size_byte(size_of_val(&self.bitmap) + self.bitmap.serialized_size() + min_size);
+    }
+
+    pub(crate) fn contains(&self, value: u32) -> bool {
+        let is_hit = self.bitmap.contains(value);
+        self.tracking.record(is_hit);
+        is_hit
+    }
+
+    fn roaring_u32<K>(&self, key: &K) -> u32
+    where
+        K: DataTrait + ?Sized,
+    {
+        key.into_roaring_u32_checked(self.min.as_ref())
+    }
+
+    pub(crate) fn maybe_contains_key<K>(&self, key: &K) -> bool
+    where
+        K: DataTrait + ?Sized,
+    {
+        self.contains(self.roaring_u32(key))
+    }
+
+    pub(crate) fn stats(&self) -> FilterStats {
+        self.tracking.stats()
+    }
+
+    pub(crate) fn serialized_size(&self) -> usize {
+        self.bitmap.serialized_size()
+    }
+
+    pub(crate) fn serialize_into<W: io::Write>(&self, writer: W) -> io::Result<()> {
+        self.bitmap.serialize_into(writer)
+    }
+
+    pub(crate) fn deserialize_from<R, K>(reader: R, min: &K) -> io::Result<Self>
+    where
+        R: io::Read,
+        K: DataTrait + ?Sized,
+    {
+        Ok(Self::new(RoaringBitmap::deserialize_from(reader)?, min))
+    }
+}
+
+/// Sample-derived summary of how a batch's key distribution maps onto
+/// Roaring's container layout for lookup prediction.
+///
+/// This exists because Roaring is not uniformly "better than Bloom":
+/// - keys are first partitioned by their high 16 bits, so a `u32` domain is
+///   split into `2^16` containers;
+/// - within each touched container, roaring-rs keeps values in an array until
+///   the container reaches about 4096 entries, then upgrades it to a bitmap;
+/// - sparse batches therefore tend to pay binary-search costs in many small
+///   array containers, while dense batches benefit from cheap bitmap probes.
+///
+/// The predictor estimates those two things from a sample:
+/// - how many 16-bit containers the batch likely touches
+/// - how many keys each touched container likely holds
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct RoaringLookupSampleStats {
+    // Estimated number of real keys per touched 16-bit window after rescaling
+    // the sampled keys/window by the sample fraction.
+    estimated_keys_per_window: f64,
+    // Estimated number of distinct 16-bit windows touched by the full batch.
+    estimated_touched_windows: f64,
+}
+
+impl RoaringLookupSampleStats {
+    const ROARING_WINDOW_CAPACITY: f64 = 65_536.0;
+    const ROARING_BITMAP_CONTAINER_THRESHOLD: f64 = 4_096.0;
+    const LOOKUP_ROARING_WINDOW_PROBABILITY_THRESHOLD: f64 = 0.1;
+    const LOOKUP_ROARING_BITMAP_WINDOW_PROBABILITY_PENALTY: f64 = 0.1;
+    const LOOKUP_ROARING_ARRAY_WINDOW_PROBABILITY_PENALTY_BASE: f64 = 0.25;
+    const LOOKUP_ROARING_ARRAY_WINDOW_PROBABILITY_PENALTY_PER_LOG2_KEY: f64 = 0.15;
+    const TOUCHED_WINDOWS_CHAO1_DAMPING: f64 = 0.25;
+    const U32_WINDOW_COUNT: usize = 1 << 16;
+
+    /// Estimate roaring friendliness for lookups from a small sample of keys.
+    ///
+    /// The estimator works is based on `crates/dbsp/benches/filter_predictor.rs`:
+    /// 1. Bucket sampled keys by their high 16 bits, which matches Roaring's
+    ///    top-level `u32` container layout.
+    /// 2. Rescale sampled keys/window by the sample fraction so large & dense
+    ///    batches do not look artificially sparse.
+    /// 3. Estimate the full-batch touched-window count by combining a uniform
+    ///    occupancy model with a Chao1 unseen-window correction.
+    ///
+    /// Example:
+    /// - If the batch has `1_000_000` keys and the sample contains `1_000`,
+    ///   the sample fraction is `0.001` (`0.1%`).
+    /// - If those `1_000` sampled keys touch `50` windows, then the sampled
+    ///   average is `20` keys/window and the rescaled estimate is
+    ///   `20 / 0.001 = 20_000` real keys/window.
+    /// - If many sampled windows are singletons, the Chao1 correction pushes
+    ///   the touched-window estimate upward because the sample likely missed
+    ///   many windows entirely.
+    pub(crate) fn from_sample(batch_keys: usize, sampled_keys: &[u32]) -> Option<Self> {
+        if batch_keys == 0 || sampled_keys.is_empty() {
+            return None;
+        }
+
+        let sampled_key_count = sampled_keys.len();
+        let mut per_window: HashMap<u16, usize> = HashMap::new();
+        for &key in sampled_keys {
+            let window = (key >> 16) as u16;
+            *per_window.entry(window).or_insert(0) += 1;
+        }
+
+        let distinct_windows = per_window.len();
+        if distinct_windows == 0 {
+            return None;
+        }
+
+        let sample_fraction = sampled_key_count as f64 / batch_keys as f64;
+        if sample_fraction <= 0.0 {
+            return None;
+        }
+
+        let avg_sample_keys_per_window = sampled_key_count as f64 / distinct_windows as f64;
+        // Without this rescaling, large but dense batches look artificially
+        // sparse and the predictor drifts toward Bloom.
+        let estimated_keys_per_window =
+            (avg_sample_keys_per_window / sample_fraction).min(Self::ROARING_WINDOW_CAPACITY);
+        // Sparse, wide samples often show up as many singleton windows and very
+        // few doubletons. Those are exactly the signals the Chao1 correction
+        // uses to estimate how many windows the sample likely missed entirely.
+        let sample_singleton_windows = per_window.values().filter(|&&count| count == 1).count();
+        let sample_doubleton_windows = per_window.values().filter(|&&count| count == 2).count();
+        let estimated_touched_windows = estimate_touched_windows(
+            batch_keys,
+            sampled_key_count,
+            distinct_windows,
+            sample_singleton_windows,
+            sample_doubleton_windows,
+        );
+
+        Some(Self {
+            estimated_keys_per_window,
+            estimated_touched_windows,
+        })
+    }
+
+    /// Predict whether lookup-heavy workloads should prefer Roaring.
+    ///
+    /// Random probes only pay container cost when they land in a touched
+    /// 16-bit window, so touched-window count is normalized into a
+    /// probability. Array containers get a size-dependent penalty because
+    /// `ArrayStore::contains()` gets slower as they grow, while bitmap
+    /// containers are treated as near-constant-time once the estimated
+    /// keys/window crosses Roaring's array-to-bitmap threshold.
+    pub(crate) fn lookup_prefers_roaring(&self) -> bool {
+        let lookup_window_probability =
+            (self.estimated_touched_windows / Self::U32_WINDOW_COUNT as f64).clamp(0.0, 1.0);
+        // roaring-rs switches between array and bitmap containers around 4096
+        // elements. Bitmap containers are close to a constant-time bit test,
+        // but array containers use binary search and get meaningfully slower
+        // as they grow.
+        let lookup_container_penalty =
+            if self.estimated_keys_per_window >= Self::ROARING_BITMAP_CONTAINER_THRESHOLD {
+                Self::LOOKUP_ROARING_BITMAP_WINDOW_PROBABILITY_PENALTY
+            } else {
+                Self::LOOKUP_ROARING_ARRAY_WINDOW_PROBABILITY_PENALTY_BASE
+                    + Self::LOOKUP_ROARING_ARRAY_WINDOW_PROBABILITY_PENALTY_PER_LOG2_KEY
+                        * (self.estimated_keys_per_window + 1.0).log2()
+            };
+        let lookup_cost_proxy = lookup_window_probability * lookup_container_penalty;
+        let lookup_score = Self::LOOKUP_ROARING_WINDOW_PROBABILITY_THRESHOLD
+            / lookup_cost_proxy.max(f64::MIN_POSITIVE);
+        lookup_score >= 1.0
+    }
+}
+
+/// Estimate how many distinct 16-bit Roaring windows the full batch touches.
+///
+/// This combines:
+/// 1. A uniform occupancy estimate that works well when windows are populated
+///    fairly evenly.
+/// 2. A Chao1-style unseen-window estimate that reacts when the sample is full
+///    of singleton windows and is therefore likely missing many windows.
+///
+/// The blend exists because just doing a uniform-only estimate under-counts
+/// touched windows on sparse, wide distributions and makes random Roaring
+/// lookups look cheaper than they are.
+fn estimate_touched_windows(
+    batch_keys: usize,
+    sampled_keys: usize,
+    distinct_windows: usize,
+    sample_singleton_windows: usize,
+    sample_doubleton_windows: usize,
+) -> f64 {
+    if batch_keys == 0 || sampled_keys == 0 || distinct_windows == 0 {
+        return 0.0;
+    }
+    if sampled_keys >= batch_keys {
+        return distinct_windows as f64;
+    }
+
+    let uniform_estimate =
+        estimate_uniform_touched_windows(batch_keys, sampled_keys, distinct_windows);
+    let chao1_estimate = estimate_chao1_touched_windows(
+        distinct_windows,
+        sample_singleton_windows,
+        sample_doubleton_windows,
+    );
+    blend_touched_window_estimates(
+        uniform_estimate,
+        chao1_estimate,
+        RoaringLookupSampleStats::TOUCHED_WINDOWS_CHAO1_DAMPING,
+    )
+}
+
+/// Estimate touched windows under a "roughly uniform occupancy" assumption.
+///
+/// Intuition:
+/// - assume the full batch touches `W` windows and spreads keys across them
+///   fairly evenly;
+/// - given the sample fraction, solve for the `W` that would yield the
+///   observed sampled distinct-window count.
+///
+/// This is the baseline estimate because it behaves sensibly on compact or
+/// moderately regular distributions. It falls apart on sparse wide batches,
+/// where many windows are touched so rarely that the sample never sees them.
+fn estimate_uniform_touched_windows(
+    batch_keys: usize,
+    sampled_keys: usize,
+    distinct_windows: usize,
+) -> f64 {
+    if batch_keys == 0 || sampled_keys == 0 || distinct_windows == 0 {
+        return 0.0;
+    }
+    if sampled_keys >= batch_keys {
+        return distinct_windows as f64;
+    }
+
+    let sample_fraction = sampled_keys as f64 / batch_keys as f64;
+    let mut low = distinct_windows as f64;
+    let mut high = batch_keys.min(RoaringLookupSampleStats::U32_WINDOW_COUNT) as f64;
+
+    if low >= high {
+        return low;
+    }
+
+    let log_unseen = (-sample_fraction).ln_1p();
+    for _ in 0..100 {
+        let mid = (low + high) * 0.5;
+        let avg_keys_per_window = batch_keys as f64 / mid;
+        let observed_windows = mid * (1.0 - (avg_keys_per_window * log_unseen).exp());
+
+        if observed_windows < distinct_windows as f64 {
+            low = mid;
+        } else {
+            high = mid;
+        }
+    }
+
+    high
+}
+
+/// Estimate touched windows with a Chao1-style unseen-species correction.
+///
+/// Here the "species" are touched 16-bit windows:
+/// - `distinct_windows` is how many windows the sample observed
+/// - `sample_singleton_windows` counts windows seen exactly once
+/// - `sample_doubleton_windows` counts windows seen exactly twice
+///
+/// Raw Chao1 is intentionally not used directly in the final predictor because
+/// it can overreact when `f2` is tiny. We still compute it because it pushes
+/// the estimate upward in the cases we care about here: batches that touch
+/// many 16-bit windows, but only a few sampled keys land in each window.
+fn estimate_chao1_touched_windows(
+    distinct_windows: usize,
+    sample_singleton_windows: usize,
+    sample_doubleton_windows: usize,
+) -> f64 {
+    let chao1_estimate = if sample_doubleton_windows > 0 {
+        distinct_windows as f64
+            + (sample_singleton_windows * sample_singleton_windows) as f64
+                / (2.0 * sample_doubleton_windows as f64)
+    } else {
+        distinct_windows as f64
+            + (sample_singleton_windows.saturating_mul(sample_singleton_windows.saturating_sub(1))
+                / 2) as f64
+    };
+
+    chao1_estimate
+        .max(distinct_windows as f64)
+        .min(RoaringLookupSampleStats::U32_WINDOW_COUNT as f64)
+}
+
+fn blend_touched_window_estimates(uniform_estimate: f64, chao1_estimate: f64, alpha: f64) -> f64 {
+    // Raw Chao1 reacts strongly to singleton-heavy samples, which is useful
+    // for sparse wide batches but too aggressive to use directly. Blend it
+    // toward the uniform estimate so the unseen-window correction only nudges
+    // the final estimate in the right direction.
+    uniform_estimate + alpha * (chao1_estimate - uniform_estimate)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::TrackingRoaringBitmap;
+    use crate::storage::file::FilterStats;
+
+    #[test]
+    fn tracking_roaring_bitmap_stats() {
+        let mut filter = TrackingRoaringBitmap::with_min((&0u32) as &crate::dynamic::DynData);
+        filter.insert(1);
+        filter.insert(3);
+
+        assert!(filter.contains(1));
+        assert!(!filter.contains(2));
+        assert_eq!(
+            filter.stats(),
+            FilterStats {
+                size_byte: filter.stats().size_byte,
+                hits: 1,
+                misses: 1,
+            }
+        );
+    }
+}
diff --git a/crates/dbsp/src/storage/filter_stats.rs b/crates/dbsp/src/storage/file/filter/stats.rs
similarity index 87%
rename from crates/dbsp/src/storage/filter_stats.rs
rename to crates/dbsp/src/storage/file/filter/stats.rs
index 54167363333..fbad894ee4c 100644
--- a/crates/dbsp/src/storage/filter_stats.rs
+++ b/crates/dbsp/src/storage/file/filter/stats.rs
@@ -1,6 +1,15 @@
 use crossbeam::utils::CachePadded;
 use std::sync::atomic::{AtomicUsize, Ordering};
 
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
+pub enum FilterKind {
+    #[default]
+    None,
+    Bloom,
+    Roaring,
+    Range,
+}
+
 /// Statistics about an in-memory key filter.
 ///
 /// The statistics implement addition such that they can be summed across
@@ -59,6 +68,10 @@ impl TrackingFilterStats {
         }
     }
 
+    pub(crate) fn set_size_byte(&mut self, size_byte: usize) {
+        self.size_byte = size_byte;
+    }
+
     /// Records the result of one filter probe.
     pub fn record(&self, is_hit: bool) {
         if is_hit {
diff --git a/crates/dbsp/src/storage/file/format.rs b/crates/dbsp/src/storage/file/format.rs
index b0b9cc66657..da7994c5542 100644
--- a/crates/dbsp/src/storage/file/format.rs
+++ b/crates/dbsp/src/storage/file/format.rs
@@ -75,12 +75,10 @@
 //!
 //! Decompressing a compressed block yields the regular index or data block
 //! format starting with a [`BlockHeader`].
-use crate::storage::tracking_bloom_filter::TrackingBloomFilter;
-use crate::storage::{buffer_cache::FBuf, file::BLOOM_FILTER_SEED};
+use crate::storage::buffer_cache::FBuf;
 use binrw::{BinRead, BinResult, BinWrite, Error as BinError, binrw, binwrite};
 #[cfg(doc)]
 use crc32c;
-use fastbloom::BloomFilter;
 use num_derive::FromPrimitive;
 use num_traits::FromPrimitive;
 use size_of::SizeOf;
@@ -92,11 +90,15 @@ use size_of::SizeOf;
 /// - v3: Bloom filter format change.
 /// - v4: Tup None optimizations.
 /// - v5: Change in representation for Timestamp, ShortInterval
+/// - v6: Roaring bitmap filter blocks.
 ///
 /// When a new version is created, make sure to generate new golden
 /// files for it in crate `storage-test-compat` to check for
 /// backwards compatibility.
-pub const VERSION_NUMBER: u32 = 5;
+pub const VERSION_NUMBER: u32 = 6;
+
+/// Oldest layer file format version this binary can read.
+pub const MIN_SUPPORTED_VERSION: u32 = 5;
 
 /// Magic number for data blocks.
 pub const DATA_BLOCK_MAGIC: [u8; 4] = *b"LFDB";
@@ -107,8 +109,11 @@ pub const INDEX_BLOCK_MAGIC: [u8; 4] = *b"LFIB";
 /// Magic number for the file trailer block.
 pub const FILE_TRAILER_BLOCK_MAGIC: [u8; 4] = *b"LFFT";
 
-/// Magic number for filter blocks.
-pub const FILTER_BLOCK_MAGIC: [u8; 4] = *b"LFFB";
+/// Magic number for Bloom filter blocks.
+pub const BLOOM_FILTER_BLOCK_MAGIC: [u8; 4] = *b"LFFB";
+
+/// Magic number for roaring bitmap filter blocks.
+pub const ROARING_BITMAP_FILTER_BLOCK_MAGIC: [u8; 4] = *b"LFFR";
 
 /// 8-byte header at the beginning of each block.
 ///
@@ -171,13 +176,13 @@ pub struct FileTrailer {
     #[br(count = n_columns)]
     pub columns: Vec<FileTrailerColumn>,
 
-    /// File offset in bytes of the [FilterBlock].
+    /// File offset in bytes of the filter block.
     ///
     /// This is 0 if there is no filter block, or if the filter block size is
     /// bigger than `i32::MAX`.
     pub filter_offset: u64,
 
-    /// Size in bytes of the [FilterBlock].
+    /// Size in bytes of the filter block.
     ///
     /// This is 0 if there is no filter block, or if the filter block size is
     /// bigger than `i32::MAX`.
@@ -205,7 +210,7 @@ pub struct FileTrailer {
     /// future expansion.
     pub incompatible_features: u64,
 
-    /// File offset in bytes of the [FilterBlock].
+    /// File offset in bytes of the filter block.
     ///
     /// This is 0 if there is no filter block, or if the filter block size is
     /// less than `i32::MAX`.  If this is nonzero, then
@@ -213,7 +218,7 @@ pub struct FileTrailer {
     /// [FileTrailer::compatible_features].
     pub filter_offset64: u64,
 
-    /// Size in bytes of the [FilterBlock].
+    /// Size in bytes of the filter block.
     ///
     /// This is 0 if there is no filter block, or if the filter block size is
     /// less than `i32::MAX`.  If this is nonzero, then
@@ -243,6 +248,17 @@ impl FileTrailer {
         (self.compatible_features & feature) != 0
     }
 
+    /// Returns the unknown incompatible features, if any.
+    pub fn unknown_incompatible_features(&self) -> Option<u64> {
+        let unknown_incompatible_features =
+            self.incompatible_features & !INCOMPATIBLE_FEATURE_ROARING_FILTERS;
+        if unknown_incompatible_features != 0 {
+            Some(unknown_incompatible_features)
+        } else {
+            None
+        }
+    }
+
     /// Returns true if this file trailer has a 64-bit filter.
     pub fn has_filter64(&self) -> bool {
         self.has_compatible_feature(COMPATIBLE_FEATURE_FILTER64)
@@ -259,6 +275,10 @@ pub const COMPATIBLE_FEATURE_FILTER64: u64 = 1 << 0;
 /// deserialized as if its value is 0. Conversely, old readers will simply ignore the field.
 pub const COMPATIBLE_FEATURE_NEGATIVE_WEIGHT_COUNT: u64 = 1 << 1;
 
+/// Bit set to 1 in [FileTrailer::incompatible_features] if the file contains
+/// roaring bitmap membership filter blocks.
+pub const INCOMPATIBLE_FEATURE_ROARING_FILTERS: u64 = 1 << 0;
+
 /// Information about a column.
 ///
 /// Embedded inside the [`FileTrailer`] block.
@@ -555,12 +575,15 @@ impl Compression {
 ///
 /// The Bloom filter contains a member for each key in column 0.
 #[binrw]
-pub struct FilterBlock {
+pub struct BloomFilterBlock {
     /// Block header with "LFFB" magic.
-    #[brw(assert(header.magic == FILTER_BLOCK_MAGIC, "filter block has bad magic"))]
+    #[brw(assert(
+        header.magic == BLOOM_FILTER_BLOCK_MAGIC,
+        "bloom filter block has bad magic"
+    ))]
     pub header: BlockHeader,
 
-    /// [BloomFilter::num_hashes].
+    /// Number of hashes used by the Bloom filter.
     pub num_hashes: u32,
 
     /// Number of elements in `data`.
@@ -572,24 +595,17 @@ pub struct FilterBlock {
     pub data: Vec<u64>,
 }
 
-impl From<FilterBlock> for TrackingBloomFilter {
-    fn from(block: FilterBlock) -> Self {
-        TrackingBloomFilter::new(
-            BloomFilter::from_vec(block.data)
-                .seed(&BLOOM_FILTER_SEED)
-                .hashes(block.num_hashes),
-        )
-    }
-}
-
 /// A block representing a Bloom filter (with data by reference).
 #[binwrite]
-pub struct FilterBlockRef<'a> {
+pub struct BloomFilterBlockRef<'a> {
     /// Block header with "LFFB" magic.
-    #[bw(assert(header.magic == FILTER_BLOCK_MAGIC, "filter block has bad magic"))]
+    #[bw(assert(
+        header.magic == BLOOM_FILTER_BLOCK_MAGIC,
+        "bloom filter block has bad magic"
+    ))]
     pub header: BlockHeader,
 
-    /// [BloomFilter::num_hashes].
+    /// Number of hashes used by the Bloom filter.
     pub num_hashes: u32,
 
     /// Number of elements in `data`.
@@ -600,12 +616,39 @@ pub struct FilterBlockRef<'a> {
     pub data: &'a [u64],
 }
 
-impl<'a> From<&'a TrackingBloomFilter> for FilterBlockRef<'a> {
-    fn from(value: &'a TrackingBloomFilter) -> Self {
-        FilterBlockRef {
-            header: BlockHeader::new(&FILTER_BLOCK_MAGIC),
-            num_hashes: value.num_hashes(),
-            data: value.as_slice(),
-        }
-    }
+/// A block representing a roaring bitmap filter.
+#[binrw]
+pub struct RoaringBitmapFilterBlock {
+    /// Block header with "LFFR" magic.
+    #[brw(assert(
+        header.magic == ROARING_BITMAP_FILTER_BLOCK_MAGIC,
+        "roaring filter block has bad magic"
+    ))]
+    pub header: BlockHeader,
+
+    /// Number of bytes in `data`.
+    #[bw(try_calc(u64::try_from(data.len())))]
+    pub len: u64,
+
+    /// Serialized roaring bitmap contents.
+    #[br(count = len)]
+    pub data: Vec<u8>,
+}
+
+/// A block representing a roaring bitmap filter (with data by reference).
+#[binwrite]
+pub struct RoaringBitmapFilterBlockRef<'a> {
+    /// Block header with "LFFR" magic.
+    #[bw(assert(
+        header.magic == ROARING_BITMAP_FILTER_BLOCK_MAGIC,
+        "roaring filter block has bad magic"
+    ))]
+    pub header: BlockHeader,
+
+    /// Number of bytes in `data`.
+    #[bw(try_calc(u64::try_from(data.len())))]
+    pub len: u64,
+
+    /// Serialized roaring bitmap contents.
+    pub data: &'a [u8],
 }
diff --git a/crates/dbsp/src/storage/file/reader.rs b/crates/dbsp/src/storage/file/reader.rs
index edaba67ee38..65746c1ded7 100644
--- a/crates/dbsp/src/storage/file/reader.rs
+++ b/crates/dbsp/src/storage/file/reader.rs
@@ -2,22 +2,22 @@
 //!
 //! [`Reader`] is the top-level interface for reading layer files.
 
-use super::format::{Compression, FileTrailer};
-use super::{AnyFactories, Deserializer, Factories};
+use super::format::{BloomFilterBlock, Compression, FileTrailer, RoaringBitmapFilterBlock};
+use super::{AnyFactories, BatchKeyFilter, Deserializer, Factories};
 use crate::dynamic::{DynVec, WeightTrait};
 use crate::storage::buffer_cache::CacheAccess;
-use crate::storage::file::format::{BatchMetadata, FilterBlock};
-use crate::storage::tracking_bloom_filter::TrackingBloomFilter;
 use crate::storage::{
     backend::StorageError,
     buffer_cache::{BufferCache, FBuf},
     file::format::{
-        DataBlockHeader, FileTrailerColumn, IndexBlockHeader, NodeType, VERSION_NUMBER, Varint,
+        BLOOM_FILTER_BLOCK_MAGIC, BatchMetadata, DataBlockHeader, FileTrailerColumn,
+        IndexBlockHeader, MIN_SUPPORTED_VERSION, NodeType, ROARING_BITMAP_FILTER_BLOCK_MAGIC,
+        Varint,
     },
     file::item::ArchivedItem,
 };
 use crate::{
-    dynamic::{DataTrait, DeserializeDyn, Factory},
+    dynamic::{DataTrait, DeserializeDyn, DynData, Factory},
     storage::{
         backend::{BlockLocation, FileReader, InvalidBlockLocation, StorageBackend},
         buffer_cache::{AtomicCacheStats, CacheStats},
@@ -112,12 +112,14 @@ pub enum CorruptionError {
     },
 
     /// Invalid version number in file trailer.
-    #[error("File has invalid version {version} (expected {expected_version})")]
+    #[error(
+        "File has unsupported storage format version {version}; supported versions are {min_supported_version} and newer"
+    )]
     InvalidVersion {
         /// Version in file.
         version: u32,
-        /// Expected version ([`VERSION_NUMBER`]).
-        expected_version: u32,
+        /// Minimum supported version.
+        min_supported_version: u32,
     },
 
     /// Invalid version number in file trailer.
@@ -327,6 +329,35 @@ pub enum CorruptionError {
     /// Invalid filter block location.
     #[error("Invalid file block location ({0}).")]
     InvalidFilterLocation(InvalidBlockLocation),
+
+    /// Filter block payload could not be decoded.
+    #[error("Invalid {kind} filter encoding in block ({location}): {inner}")]
+    InvalidFilterEncoding {
+        /// Block location.
+        location: BlockLocation,
+        /// Filter kind.
+        kind: &'static str,
+        /// Underlying parse error.
+        inner: String,
+    },
+
+    /// Roaring bitmap filter block payload could not be decoded.
+    #[error("Invalid roaring bitmap filter encoding in block ({location}): {inner}")]
+    InvalidRoaringBitmapFilterEncoding {
+        /// Block location.
+        location: BlockLocation,
+        /// Underlying parse error.
+        inner: String,
+    },
+
+    /// Filter block magic is unknown.
+    #[error("Unknown filter block magic {magic:?} in block ({location}).")]
+    UnknownFilterBlockMagic {
+        /// Block location.
+        location: BlockLocation,
+        /// Unknown magic.
+        magic: [u8; 4],
+    },
 }
 
 /// Reader for an array of [Varint]s in a storage file.
@@ -1347,19 +1378,6 @@ struct Column {
     n_rows: u64,
 }
 
-impl FilterBlock {
-    fn new(file_handle: &dyn FileReader, location: BlockLocation) -> Result<Self, Error> {
-        let block = file_handle.read_block(location)?;
-        Self::read_le(&mut io::Cursor::new(block.as_slice())).map_err(|e| {
-            Error::Corruption(CorruptionError::Binrw {
-                location,
-                block_type: "filter",
-                inner: e.to_string(),
-            })
-        })
-    }
-}
-
 impl Column {
     fn new(factories: &AnyFactories, info: &FileTrailerColumn) -> Result<Self, Error> {
         let FileTrailerColumn {
@@ -1509,6 +1527,67 @@ fn decompress(
     Ok(raw)
 }
 
+fn parse_filter_block<T: for<'a> BinRead<Args<'a> = ()>>(
+    block: &FBuf,
+    location: BlockLocation,
+    block_type: &'static str,
+) -> Result<T, Error> {
+    T::read_le(&mut io::Cursor::new(block.as_slice())).map_err(|e| {
+        Error::Corruption(CorruptionError::Binrw {
+            location,
+            block_type,
+            inner: e.to_string(),
+        })
+    })
+}
+
+fn read_filter_block(
+    file_handle: &dyn FileReader,
+    location: BlockLocation,
+    roaring_min: Option<&DynData>,
+) -> Result<BatchKeyFilter, Error> {
+    let block = file_handle.read_block(location)?;
+    if block.len() < 8 {
+        return Err(Error::Corruption(CorruptionError::InvalidFilterEncoding {
+            location,
+            kind: "unknown",
+            inner: format!("block too short: {} bytes", block.len()),
+        }));
+    }
+
+    let mut magic = [0u8; 4];
+    magic.copy_from_slice(&block[4..8]);
+
+    match magic {
+        BLOOM_FILTER_BLOCK_MAGIC => {
+            let block: BloomFilterBlock = parse_filter_block(&block, location, "bloom filter")?;
+            Ok(BatchKeyFilter::deserialize_bloom(
+                block.num_hashes,
+                block.data,
+            ))
+        }
+        ROARING_BITMAP_FILTER_BLOCK_MAGIC => {
+            let block: RoaringBitmapFilterBlock =
+                parse_filter_block(&block, location, "roaring bitmap filter")?;
+            let roaring_min = roaring_min.ok_or_else(|| {
+                Error::Corruption(CorruptionError::InvalidRoaringBitmapFilterEncoding {
+                    location,
+                    inner: "roaring bitmap filter requires the batch minimum".to_string(),
+                })
+            })?;
+            BatchKeyFilter::deserialize_roaring_u32(&block.data, roaring_min).map_err(|e| {
+                Error::Corruption(CorruptionError::InvalidRoaringBitmapFilterEncoding {
+                    location,
+                    inner: e.to_string(),
+                })
+            })
+        }
+        magic => Err(Error::Corruption(
+            CorruptionError::UnknownFilterBlockMagic { location, magic },
+        )),
+    }
+}
+
 /// Layer file column specification.
 ///
 /// A column specification must take the form `K0, A0, N0`, where `(K0, A0)` is
@@ -1554,6 +1633,7 @@ where
 pub struct Reader<T> {
     file: ImmutableFileRef,
     columns: Vec<Column>,
+    membership_filter_location: Option<BlockLocation>,
 
     /// Additional metadata added to the file by the writer.
     pub(crate) metadata: BatchMetadata,
@@ -1591,8 +1671,8 @@ where
         factories: &[&AnyFactories],
         cache: fn() -> Option<Arc<BufferCache>>,
         file: Arc<dyn FileReader>,
-        membership_filter: Option<TrackingBloomFilter>,
-    ) -> Result<(Self, Option<TrackingBloomFilter>), Error> {
+        membership_filter: Option<BatchKeyFilter>,
+    ) -> Result<(Self, Option<BatchKeyFilter>), Error> {
         let file_size = file.get_size()?;
         if file_size < 512 || (file_size % 512) != 0 {
             return Err(CorruptionError::InvalidFileSize(file_size).into());
@@ -1606,12 +1686,10 @@ where
             &stats,
         )?;
 
-        // v4/v5 isn't backwards compatible. do not attempt to support
-        // older formats.
-        if file_trailer.version < VERSION_NUMBER {
+        if file_trailer.version < MIN_SUPPORTED_VERSION {
             return Err(CorruptionError::InvalidVersion {
                 version: file_trailer.version,
-                expected_version: VERSION_NUMBER,
+                min_supported_version: MIN_SUPPORTED_VERSION,
             }
             .into());
         }
@@ -1623,11 +1701,8 @@ where
             );
         }
 
-        if file_trailer.incompatible_features != 0 {
-            return Err(CorruptionError::UnsupportedIncompatibleFeatures(
-                file_trailer.incompatible_features,
-            )
-            .into());
+        if let Some(features) = file_trailer.unknown_incompatible_features() {
+            return Err(CorruptionError::UnsupportedIncompatibleFeatures(features).into());
         }
 
         assert_eq!(factories.len(), file_trailer.columns.len());
@@ -1659,34 +1734,26 @@ where
                 .into());
             }
         }
-
-        fn read_filter_block(
-            file_handle: &dyn FileReader,
-            offset: u64,
-            size: usize,
-        ) -> Result<TrackingBloomFilter, Error> {
-            Ok(FilterBlock::new(
-                file_handle,
-                BlockLocation::new(offset, size).map_err(|error: InvalidBlockLocation| {
+        let membership_filter_location = if file_trailer.has_filter64() {
+            Some(
+                BlockLocation::new(
+                    file_trailer.filter_offset64,
+                    file_trailer.filter_size64 as usize,
+                )
+                .map_err(|error: InvalidBlockLocation| {
                     Error::Corruption(CorruptionError::InvalidFilterLocation(error))
                 })?,
-            )?
-            .into())
-        }
-        let membership_filter = if let Some(membership_filter) = membership_filter {
-            Some(membership_filter)
-        } else if file_trailer.has_filter64() {
-            Some(read_filter_block(
-                &*file,
-                file_trailer.filter_offset64,
-                file_trailer.filter_size64 as usize,
-            )?)
+            )
         } else if file_trailer.filter_offset != 0 {
-            Some(read_filter_block(
-                &*file,
-                file_trailer.filter_offset,
-                file_trailer.filter_size as usize,
-            )?)
+            Some(
+                BlockLocation::new(
+                    file_trailer.filter_offset,
+                    file_trailer.filter_size as usize,
+                )
+                .map_err(|error: InvalidBlockLocation| {
+                    Error::Corruption(CorruptionError::InvalidFilterLocation(error))
+                })?,
+            )
         } else {
             None
         };
@@ -1701,6 +1768,7 @@ where
                     file_trailer.version,
                 ),
                 columns,
+                membership_filter_location,
                 metadata: file_trailer.metadata.clone(),
                 _phantom: PhantomData,
             },
@@ -1723,15 +1791,6 @@ where
         Self::new(factories, cache, storage_backend.open(path)?)
     }
 
-    pub(crate) fn open_with_filter(
-        factories: &[&AnyFactories],
-        cache: fn() -> Option<Arc<BufferCache>>,
-        storage_backend: &dyn StorageBackend,
-        path: &StoragePath,
-    ) -> Result<(Self, Option<TrackingBloomFilter>), Error> {
-        Self::new_with_filter(factories, cache, storage_backend.open(path)?, None)
-    }
-
     /// The number of columns in the layer file.
     ///
     /// This is a fixed value for any given `Reader`.
@@ -1780,6 +1839,15 @@ where
     pub fn metadata(&self) -> &BatchMetadata {
         &self.metadata
     }
+
+    fn read_membership_filter(
+        &self,
+        roaring_min: Option<&DynData>,
+    ) -> Result<Option<BatchKeyFilter>, Error> {
+        self.membership_filter_location
+            .map(|location| read_filter_block(&*self.file.file_handle, location, roaring_min))
+            .transpose()
+    }
 }
 
 impl<K, A, N> Reader<(&'static K, &'static A, N)>
@@ -1788,6 +1856,19 @@ where
     A: DataTrait + ?Sized,
     (&'static K, &'static A, N): ColumnSpec,
 {
+    pub(crate) fn open_with_filter(
+        factories: &[&AnyFactories],
+        cache: fn() -> Option<Arc<BufferCache>>,
+        storage_backend: &dyn StorageBackend,
+        path: &StoragePath,
+    ) -> Result<(Self, Option<BatchKeyFilter>), Error> {
+        let reader = Self::open(factories, cache, storage_backend, path)?;
+        let key_range = reader.key_range()?;
+        let roaring_min = key_range.as_ref().map(|(min, _)| min.as_ref().as_data());
+        let membership_filter = reader.read_membership_filter(roaring_min)?;
+        Ok((reader, membership_filter))
+    }
+
     /// Returns the min and max keys stored in column 0.
     ///
     /// The bounds are loaded from the root node when first requested and can
diff --git a/crates/dbsp/src/storage/file/test.rs b/crates/dbsp/src/storage/file/test.rs
index ba77ef7ded9..062c5cd3ead 100644
--- a/crates/dbsp/src/storage/file/test.rs
+++ b/crates/dbsp/src/storage/file/test.rs
@@ -1,28 +1,29 @@
-use std::{marker::PhantomData, sync::Arc};
+use std::{io::Cursor, marker::PhantomData, sync::Arc};
 
 use crate::{
     DBWeight,
     dynamic::{DataTrait, DowncastTrait, DynWeight, Factory, LeanVec, Vector, WithFactory},
     storage::{
-        backend::StorageBackend,
+        backend::{BlockLocation, StorageBackend},
         buffer_cache::BufferCache,
         file::{
-            format::{BatchMetadata, Compression},
+            format::{
+                BLOOM_FILTER_BLOCK_MAGIC, BatchMetadata, Compression, FileTrailer,
+                ROARING_BITMAP_FILTER_BLOCK_MAGIC,
+            },
             reader::{BulkRows, FilteredKeys, Reader},
         },
     },
     trace::{
         BatchReaderFactories, Builder, VecIndexedWSetFactories, VecWSetFactories,
-        ord::{
-            batch_filter::BatchFilters,
-            vec::{indexed_wset_batch::VecIndexedWSetBuilder, wset_batch::VecWSetBuilder},
-        },
+        filter::BatchFilters,
+        ord::vec::{indexed_wset_batch::VecIndexedWSetBuilder, wset_batch::VecWSetBuilder},
     },
-    utils::test::init_test_logger,
+    utils::{Tup1, test::init_test_logger},
 };
 
 use super::{
-    Factories,
+    Factories, FilterPlan,
     reader::{ColumnSpec, RowGroup},
     writer::{Parameters, Writer1, Writer2},
 };
@@ -31,6 +32,7 @@ use crate::{
     DBData,
     dynamic::{DynData, Erase},
 };
+use binrw::BinRead;
 use feldera_types::config::{StorageConfig, StorageOptions};
 use rand::{Rng, seq::SliceRandom, thread_rng};
 use tempfile::tempdir;
@@ -712,6 +714,73 @@ fn test_key_range<K, A, Aux, N>(
     assert_eq!(max.downcast_checked::<K>(), &expected_max);
 }
 
+fn filter_block_magic<K, A, N>(reader: &Reader<(&'static K, &'static A, N)>) -> Option<[u8; 4]>
+where
+    K: DataTrait + ?Sized,
+    A: DataTrait + ?Sized,
+    (&'static K, &'static A, N): ColumnSpec,
+{
+    let file_size = reader.byte_size().unwrap() as usize;
+    let trailer_block = reader
+        .file_handle()
+        .read_block(BlockLocation::new((file_size - 512) as u64, 512).unwrap())
+        .unwrap();
+    let trailer = FileTrailer::read_le(&mut Cursor::new(trailer_block.as_slice())).unwrap();
+    let offset = if trailer.has_filter64() {
+        trailer.filter_offset64
+    } else {
+        trailer.filter_offset
+    };
+    let size = if trailer.has_filter64() {
+        trailer.filter_size64 as usize
+    } else {
+        trailer.filter_size as usize
+    };
+    if offset == 0 {
+        return None;
+    }
+
+    let filter_block = reader
+        .file_handle()
+        .read_block(BlockLocation::new(offset, size).unwrap())
+        .unwrap();
+    let mut magic = [0u8; 4];
+    magic.copy_from_slice(&filter_block[4..8]);
+    Some(magic)
+}
+
+fn incompatible_features<K, A, N>(reader: &Reader<(&'static K, &'static A, N)>) -> u64
+where
+    K: DataTrait + ?Sized,
+    A: DataTrait + ?Sized,
+    (&'static K, &'static A, N): ColumnSpec,
+{
+    let file_size = reader.byte_size().unwrap() as usize;
+    let trailer_block = reader
+        .file_handle()
+        .read_block(BlockLocation::new((file_size - 512) as u64, 512).unwrap())
+        .unwrap();
+    let trailer = FileTrailer::read_le(&mut Cursor::new(trailer_block.as_slice())).unwrap();
+    trailer.incompatible_features
+}
+
+fn sampled_filter_plan<K>(
+    factories: &Factories<DynData, DynData>,
+    keys: &[K],
+) -> FilterPlan<DynData>
+where
+    K: DBData + Erase<DynData>,
+{
+    let mut sampled_keys = factories.keys_factory.default_box();
+    sampled_keys.reserve(keys.len());
+    for key in keys {
+        sampled_keys.push_ref(key.erase());
+    }
+
+    FilterPlan::from_bounds(keys.first().unwrap().erase(), keys.last().unwrap().erase())
+        .with_sampled_keys(sampled_keys)
+}
+
 fn test_two_columns<T>(parameters: Parameters)
 where
     T: TwoColumns,
@@ -734,7 +803,7 @@ where
         test_buffer_cache,
         &*storage_backend,
         parameters,
-        T::n0(),
+        FilterPlan::<DynData>::decide_filter(None, T::n0()),
     )
     .unwrap();
     let n0 = T::n0();
@@ -800,7 +869,7 @@ where
         test_buffer_cache,
         &*storage_backend,
         parameters,
-        T::n0(),
+        FilterPlan::<DynData>::decide_filter(None, T::n0()),
     )
     .unwrap();
     let n0 = T::n0();
@@ -945,7 +1014,7 @@ where
             test_buffer_cache,
             &*storage_backend,
             parameters.clone(),
-            n,
+            FilterPlan::<DynData>::decide_filter(None, n),
         )
         .unwrap();
         for row in 0..n {
@@ -956,7 +1025,7 @@ where
         let (reader, filters) = if reopen {
             println!("closing writer and reopening as reader");
             let path = writer.path().clone();
-            let (_file_handle, _bloom_filter, _key_bounds) =
+            let (_file_handle, _key_filter, _key_bounds) =
                 writer.close(BatchMetadata::default()).unwrap();
             let (reader, membership_filter) = Reader::open_with_filter(
                 &[&factories.any_factories()],
@@ -1006,7 +1075,7 @@ fn test_one_column_zset<K, A>(
             test_buffer_cache,
             &*storage_backend,
             parameters.clone(),
-            n,
+            FilterPlan::<DynData>::decide_filter(None, n),
         )
         .unwrap();
         for row in 0..n {
@@ -1017,7 +1086,7 @@ fn test_one_column_zset<K, A>(
         let reader = if reopen {
             println!("closing writer and reopening as reader");
             let path = writer.path().clone();
-            let (_file_handle, _bloom_filter, _key_bounds) =
+            let (_file_handle, _key_filter, _key_bounds) =
                 writer.close(BatchMetadata::default()).unwrap();
             Reader::open(
                 &[&factories.any_factories()],
@@ -1063,7 +1132,7 @@ fn one_column_key_range() {
                 test_buffer_cache,
                 &*storage_backend,
                 Parameters::default(),
-                keys.len(),
+                FilterPlan::<DynData>::decide_filter(None, keys.len()),
             )
             .unwrap();
             for key in keys {
@@ -1072,7 +1141,7 @@ fn one_column_key_range() {
 
             let reader = if reopen {
                 let path = writer.path().clone();
-                let (_file_handle, _bloom_filter, _key_bounds) =
+                let (_file_handle, _key_filter, _key_bounds) =
                     writer.close(BatchMetadata::default()).unwrap();
                 Reader::open(
                     &[&factories.any_factories()],
@@ -1099,6 +1168,390 @@ fn one_column_key_range() {
     }
 }
 
+#[test]
+fn test_bloom_filter_roundtrip_and_block_kind() {
+    init_test_logger();
+
+    for reopen in [false, true] {
+        let factories = Factories::<DynData, DynData>::new::<i64, ()>();
+        let tempdir = tempdir().unwrap();
+        let storage_backend = <dyn StorageBackend>::new(
+            &StorageConfig {
+                path: tempdir.path().to_string_lossy().to_string(),
+                cache: Default::default(),
+            },
+            &StorageOptions::default(),
+        )
+        .unwrap();
+
+        let mut writer = Writer1::new(
+            &factories,
+            test_buffer_cache,
+            &*storage_backend,
+            Parameters::default(),
+            FilterPlan::<DynData>::decide_filter(None, 3),
+        )
+        .unwrap();
+        for key in [1i64, 3, 7] {
+            writer.write0((&key, &())).unwrap();
+        }
+
+        let (reader, filters) = if reopen {
+            let path = writer.path().clone();
+            let (_file_handle, _key_filter, _key_bounds) =
+                writer.close(BatchMetadata::default()).unwrap();
+            let (reader, membership_filter) = Reader::open_with_filter(
+                &[&factories.any_factories()],
+                test_buffer_cache,
+                &*storage_backend,
+                &path,
+            )
+            .unwrap();
+            let key_range = reader.key_range().unwrap().map(Into::into);
+            let filters = BatchFilters::from_file(key_range, membership_filter);
+            (reader, filters)
+        } else {
+            writer.into_reader(BatchMetadata::default()).unwrap()
+        };
+
+        for key in [1i64, 3, 7] {
+            assert!(filters.maybe_contains_key(key.erase(), None));
+        }
+        assert_eq!(filter_block_magic(&reader), Some(BLOOM_FILTER_BLOCK_MAGIC));
+        assert_eq!(incompatible_features(&reader), 0);
+    }
+}
+
+#[test]
+fn test_roaring_u32_filter_roundtrip_exact_and_block_kind() {
+    init_test_logger();
+
+    for reopen in [false, true] {
+        let factories = Factories::<DynData, DynData>::new::<u32, ()>();
+        let tempdir = tempdir().unwrap();
+        let storage_backend = <dyn StorageBackend>::new(
+            &StorageConfig {
+                path: tempdir.path().to_string_lossy().to_string(),
+                cache: Default::default(),
+            },
+            &StorageOptions::default(),
+        )
+        .unwrap();
+
+        let filter_plan = sampled_filter_plan(&factories, &[1u32, 3, 7]);
+        let mut writer = Writer1::new(
+            &factories,
+            test_buffer_cache,
+            &*storage_backend,
+            Parameters::default(),
+            FilterPlan::decide_filter(Some(&filter_plan), 3),
+        )
+        .unwrap();
+        for key in [1u32, 3, 7] {
+            writer.write0((&key, &())).unwrap();
+        }
+
+        let (reader, filters) = if reopen {
+            let path = writer.path().clone();
+            let (_file_handle, _key_filter, _key_bounds) =
+                writer.close(BatchMetadata::default()).unwrap();
+            let (reader, membership_filter) = Reader::open_with_filter(
+                &[&factories.any_factories()],
+                test_buffer_cache,
+                &*storage_backend,
+                &path,
+            )
+            .unwrap();
+            let key_range = reader.key_range().unwrap().map(Into::into);
+            let filters = BatchFilters::from_file(key_range, membership_filter);
+            (reader, filters)
+        } else {
+            writer.into_reader(BatchMetadata::default()).unwrap()
+        };
+
+        for key in [1u32, 3, 7] {
+            assert!(filters.maybe_contains_key(key.erase(), None));
+        }
+        for key in [0u32, 2, 9] {
+            assert!(!filters.maybe_contains_key(key.erase(), None));
+        }
+        assert_eq!(
+            filter_block_magic(&reader),
+            Some(ROARING_BITMAP_FILTER_BLOCK_MAGIC)
+        );
+        assert_ne!(incompatible_features(&reader), 0);
+    }
+}
+
+#[test]
+fn test_roaring_tup1_i32_filter_roundtrip_exact_and_block_kind() {
+    init_test_logger();
+
+    for reopen in [false, true] {
+        let factories = Factories::<DynData, DynData>::new::<Tup1<i32>, ()>();
+        let tempdir = tempdir().unwrap();
+        let storage_backend = <dyn StorageBackend>::new(
+            &StorageConfig {
+                path: tempdir.path().to_string_lossy().to_string(),
+                cache: Default::default(),
+            },
+            &StorageOptions::default(),
+        )
+        .unwrap();
+
+        let filter_plan = sampled_filter_plan(&factories, &[Tup1(-7i32), Tup1(1), Tup1(3)]);
+        let mut writer = Writer1::new(
+            &factories,
+            test_buffer_cache,
+            &*storage_backend,
+            Parameters::default(),
+            FilterPlan::decide_filter(Some(&filter_plan), 3),
+        )
+        .unwrap();
+        for key in [Tup1(-7i32), Tup1(1), Tup1(3)] {
+            writer.write0((&key, &())).unwrap();
+        }
+
+        let (reader, filters) = if reopen {
+            let path = writer.path().clone();
+            let (_file_handle, _key_filter, _key_bounds) =
+                writer.close(BatchMetadata::default()).unwrap();
+            let (reader, membership_filter) = Reader::open_with_filter(
+                &[&factories.any_factories()],
+                test_buffer_cache,
+                &*storage_backend,
+                &path,
+            )
+            .unwrap();
+            let key_range = reader.key_range().unwrap().map(Into::into);
+            let filters = BatchFilters::from_file(key_range, membership_filter);
+            (reader, filters)
+        } else {
+            writer.into_reader(BatchMetadata::default()).unwrap()
+        };
+
+        for key in [Tup1(-7i32), Tup1(1), Tup1(3)] {
+            assert!(filters.maybe_contains_key(key.erase(), None));
+        }
+        for key in [Tup1(-8i32), Tup1(0), Tup1(9)] {
+            assert!(!filters.maybe_contains_key(key.erase(), None));
+        }
+        assert_eq!(
+            filter_block_magic(&reader),
+            Some(ROARING_BITMAP_FILTER_BLOCK_MAGIC)
+        );
+        assert_ne!(incompatible_features(&reader), 0);
+    }
+}
+
+#[test]
+fn test_writer_without_filter_plan_uses_bloom_filter() {
+    init_test_logger();
+
+    let factories = Factories::<DynData, DynData>::new::<u32, ()>();
+    let tempdir = tempdir().unwrap();
+    let storage_backend = <dyn StorageBackend>::new(
+        &StorageConfig {
+            path: tempdir.path().to_string_lossy().to_string(),
+            cache: Default::default(),
+        },
+        &StorageOptions::default(),
+    )
+    .unwrap();
+
+    let mut writer = Writer1::new(
+        &factories,
+        test_buffer_cache,
+        &*storage_backend,
+        Parameters::default(),
+        FilterPlan::<DynData>::decide_filter(None, 2),
+    )
+    .unwrap();
+    for key in [5u32, 8] {
+        writer.write0((&key, &())).unwrap();
+    }
+
+    let (reader, _filters) = writer.into_reader(BatchMetadata::default()).unwrap();
+    assert_eq!(filter_block_magic(&reader), Some(BLOOM_FILTER_BLOCK_MAGIC));
+}
+
+#[test]
+fn test_filter_plan_without_sample_falls_back_to_bloom() {
+    init_test_logger();
+
+    let filter_plan = FilterPlan::from_bounds((&1u32) as &DynData, (&7u32) as &DynData);
+    assert!(matches!(
+        FilterPlan::decide_filter(Some(&filter_plan), 3),
+        Some(super::BatchKeyFilter::Bloom(_))
+    ));
+}
+
+#[test]
+fn test_filter_plan_predictor_prefers_roaring_for_dense_sample() {
+    init_test_logger();
+
+    let factories = Factories::<DynData, DynData>::new::<u32, ()>();
+    let keys: Vec<u32> = (0..50_000).collect();
+    let filter_plan = sampled_filter_plan(&factories, keys.as_slice());
+
+    assert!(matches!(
+        FilterPlan::decide_filter(Some(&filter_plan), keys.len()),
+        Some(super::BatchKeyFilter::RoaringU32(_))
+    ));
+}
+
+#[test]
+fn test_filter_plan_predictor_prefers_bloom_for_sparse_wide_sample() {
+    init_test_logger();
+
+    let factories = Factories::<DynData, DynData>::new::<u32, ()>();
+    let keys: Vec<u32> = (0..50_000).map(|index| index << 16).collect();
+    let filter_plan = sampled_filter_plan(&factories, keys.as_slice());
+
+    assert!(matches!(
+        FilterPlan::decide_filter(Some(&filter_plan), keys.len()),
+        Some(super::BatchKeyFilter::Bloom(_))
+    ));
+}
+
+#[test]
+fn test_roaring_i64_filter_roundtrip_uses_batch_min_offset() {
+    init_test_logger();
+
+    for reopen in [false, true] {
+        let factories = Factories::<DynData, DynData>::new::<i64, ()>();
+        let tempdir = tempdir().unwrap();
+        let storage_backend = <dyn StorageBackend>::new(
+            &StorageConfig {
+                path: tempdir.path().to_string_lossy().to_string(),
+                cache: Default::default(),
+            },
+            &StorageOptions::default(),
+        )
+        .unwrap();
+
+        let min = (i64::from(u32::MAX) * 4) + 10;
+        let keys = [min, min + 3, min + 7];
+        let filter_plan = sampled_filter_plan(&factories, &keys);
+        let mut writer = Writer1::new(
+            &factories,
+            test_buffer_cache,
+            &*storage_backend,
+            Parameters::default(),
+            FilterPlan::decide_filter(Some(&filter_plan), keys.len()),
+        )
+        .unwrap();
+        for key in keys {
+            writer.write0((&key, &())).unwrap();
+        }
+
+        let (reader, filters) = if reopen {
+            let path = writer.path().clone();
+            let (_file_handle, _key_filter, _key_bounds) =
+                writer.close(BatchMetadata::default()).unwrap();
+            let (reader, membership_filter) = Reader::open_with_filter(
+                &[&factories.any_factories()],
+                test_buffer_cache,
+                &*storage_backend,
+                &path,
+            )
+            .unwrap();
+            let key_range = reader.key_range().unwrap().map(Into::into);
+            let filters = BatchFilters::from_file(key_range, membership_filter);
+            (reader, filters)
+        } else {
+            writer.into_reader(BatchMetadata::default()).unwrap()
+        };
+
+        for key in keys {
+            assert!(filters.maybe_contains_key((&key) as &DynData, None));
+        }
+        for key in [min - 1, min + 4, min + 9] {
+            assert!(!filters.maybe_contains_key((&key) as &DynData, None));
+        }
+        assert_eq!(
+            filter_block_magic(&reader),
+            Some(ROARING_BITMAP_FILTER_BLOCK_MAGIC)
+        );
+    }
+}
+
+#[test]
+fn test_roaring_u64_filter_roundtrip_uses_batch_min_offset() {
+    init_test_logger();
+
+    let factories = Factories::<DynData, DynData>::new::<u64, ()>();
+    let tempdir = tempdir().unwrap();
+    let storage_backend = <dyn StorageBackend>::new(
+        &StorageConfig {
+            path: tempdir.path().to_string_lossy().to_string(),
+            cache: Default::default(),
+        },
+        &StorageOptions::default(),
+    )
+    .unwrap();
+
+    let base = (u64::from(u32::MAX) << 8) + 11;
+    let keys = [base, base + 2, base + 9];
+    let filter_plan = sampled_filter_plan(&factories, &keys);
+    let mut writer = Writer1::new(
+        &factories,
+        test_buffer_cache,
+        &*storage_backend,
+        Parameters::default(),
+        FilterPlan::decide_filter(Some(&filter_plan), keys.len()),
+    )
+    .unwrap();
+    for key in keys {
+        writer.write0((&key, &())).unwrap();
+    }
+
+    let (reader, filters) = writer.into_reader(BatchMetadata::default()).unwrap();
+    for key in keys {
+        assert!(filters.maybe_contains_key((&key) as &DynData, None));
+    }
+    for key in [base - 1, base + 3, base + 20] {
+        assert!(!filters.maybe_contains_key((&key) as &DynData, None));
+    }
+    assert_eq!(
+        filter_block_magic(&reader),
+        Some(ROARING_BITMAP_FILTER_BLOCK_MAGIC)
+    );
+}
+
+#[test]
+fn test_i64_keys_fallback_to_bloom_when_span_exceeds_u32() {
+    init_test_logger();
+
+    let factories = Factories::<DynData, DynData>::new::<i64, ()>();
+    let tempdir = tempdir().unwrap();
+    let storage_backend = <dyn StorageBackend>::new(
+        &StorageConfig {
+            path: tempdir.path().to_string_lossy().to_string(),
+            cache: Default::default(),
+        },
+        &StorageOptions::default(),
+    )
+    .unwrap();
+
+    let max = i64::from(u32::MAX) + 1;
+    let filter_plan = FilterPlan::from_bounds((&0i64) as &DynData, (&max) as &DynData);
+    let mut writer = Writer1::new(
+        &factories,
+        test_buffer_cache,
+        &*storage_backend,
+        Parameters::default(),
+        FilterPlan::decide_filter(Some(&filter_plan), 2),
+    )
+    .unwrap();
+    for key in [0i64, max] {
+        writer.write0((&key, &())).unwrap();
+    }
+
+    let (reader, _filters) = writer.into_reader(BatchMetadata::default()).unwrap();
+    assert_eq!(filter_block_magic(&reader), Some(BLOOM_FILTER_BLOCK_MAGIC));
+}
+
 fn test_i64_helper(parameters: Parameters) {
     init_test_logger();
     test_one_column(
diff --git a/crates/dbsp/src/storage/file/writer.rs b/crates/dbsp/src/storage/file/writer.rs
index 189603f5a57..816c727a077 100644
--- a/crates/dbsp/src/storage/file/writer.rs
+++ b/crates/dbsp/src/storage/file/writer.rs
@@ -4,51 +4,44 @@
 //! 2-column layer file.  To write more columns, either add another `Writer<N>`
 //! struct, which is easily done, or mark the currently private `Writer` as
 //! `pub`.
+use super::format::Compression;
+use super::{AnyFactories, BatchKeyFilter, Factories, reader::Reader};
 use crate::storage::{
     backend::{BlockLocation, FileReader, FileWriter, StorageBackend, StorageError},
     buffer_cache::{BufferCache, FBuf, FBufSerializer, LimitExceeded},
     file::{
-        BLOOM_FILTER_SEED, SerializerInner,
+        SerializerInner,
         format::{
-            BatchMetadata, BlockHeader, COMPATIBLE_FEATURE_FILTER64,
+            BatchMetadata, BlockHeader, BloomFilterBlockRef, COMPATIBLE_FEATURE_FILTER64,
             COMPATIBLE_FEATURE_NEGATIVE_WEIGHT_COUNT, DATA_BLOCK_MAGIC, DataBlockHeader,
-            FILE_TRAILER_BLOCK_MAGIC, FileTrailer, FileTrailerColumn, FilterBlockRef, FixedLen,
-            INDEX_BLOCK_MAGIC, IndexBlockHeader, NodeType, VERSION_NUMBER, Varint,
+            FILE_TRAILER_BLOCK_MAGIC, FileTrailer, FileTrailerColumn, FixedLen,
+            INCOMPATIBLE_FEATURE_ROARING_FILTERS, INDEX_BLOCK_MAGIC, IndexBlockHeader, NodeType,
+            ROARING_BITMAP_FILTER_BLOCK_MAGIC, RoaringBitmapFilterBlockRef, VERSION_NUMBER, Varint,
         },
         reader::TreeNode,
     },
 };
+use crate::{
+    Runtime,
+    dynamic::{DataTrait, DeserializeDyn, SerializeDyn},
+    storage::file::ItemFactory,
+    trace::filter::{BatchFilters, key_range::KeyRange},
+};
 use binrw::{
     BinWrite,
     io::{Cursor, NoSeek},
 };
 use crc32c::crc32c;
-#[cfg(debug_assertions)]
 use dyn_clone::clone_box;
-use fastbloom::BloomFilter;
 use feldera_buffer_cache::CacheEntry;
 use feldera_storage::StoragePath;
 use snap::raw::{Encoder, max_compress_len};
-use std::{
-    cell::RefCell,
-    sync::{Arc, Once},
-};
+use std::{cell::RefCell, sync::Arc};
 use std::{
     marker::PhantomData,
     mem::{replace, take},
     ops::Range,
 };
-use tracing::info;
-
-use super::format::Compression;
-use super::{AnyFactories, Factories, reader::Reader};
-use crate::storage::tracking_bloom_filter::TrackingBloomFilter;
-use crate::{
-    Runtime,
-    dynamic::{DataTrait, DeserializeDyn, SerializeDyn},
-    storage::file::ItemFactory,
-    trace::ord::{BatchFilters, key_range::KeyRange},
-};
 
 struct VarintWriter {
     varint: Varint,
@@ -1140,50 +1133,23 @@ impl BlockWriter {
 struct Writer {
     cache: fn() -> Option<Arc<BufferCache>>,
     writer: BlockWriter,
-    bloom_filter: Option<TrackingBloomFilter>,
+    key_filter: Option<BatchKeyFilter>,
     cws: Vec<ColumnWriter>,
     finished_columns: Vec<FileTrailerColumn>,
     serializer: SerializerInner,
 }
 
 impl Writer {
-    fn bloom_false_positive_rate() -> Option<f64> {
-        let rate = Runtime::with_dev_tweaks(|dev_tweaks| dev_tweaks.bloom_false_positive_rate());
-        let rate = (rate > 0.0 && rate < 1.0).then_some(rate);
-
-        static ONCE: Once = Once::new();
-        ONCE.call_once(|| {
-            if let Some(rate) = rate {
-                info!("Using Bloom filter false positive rate {rate}");
-            } else {
-                info!("Bloom filters disabled");
-            }
-        });
-        rate
-    }
-
     pub fn new(
         factories: &[&AnyFactories],
         cache: fn() -> Option<Arc<BufferCache>>,
         storage_backend: &dyn StorageBackend,
         parameters: Parameters,
         n_columns: usize,
-        estimated_keys: usize,
+        key_filter: Option<BatchKeyFilter>,
     ) -> Result<Self, StorageError> {
         assert_eq!(factories.len(), n_columns);
 
-        let bloom_filter = Self::bloom_false_positive_rate().map(|bloom_false_positive_rate| {
-            TrackingBloomFilter::new(
-                BloomFilter::with_false_pos(bloom_false_positive_rate)
-                    .seed(&BLOOM_FILTER_SEED)
-                    .expected_items({
-                        // `.max(64)` works around a fastbloom bug that hangs when the
-                        // expected number of items is zero (see
-                        // https://github.com/tomtomwombat/fastbloom/issues/17).
-                        estimated_keys.max(64)
-                    }),
-            )
-        });
         let parameters = Arc::new(parameters);
         let cws = factories
             .iter()
@@ -1197,7 +1163,7 @@ impl Writer {
                 cache().expect("Should have a buffer cache"),
                 storage_backend.create_with_prefix(&worker.into())?,
             ),
-            bloom_filter,
+            key_filter,
             cws,
             finished_columns,
             serializer: SerializerInner::new(),
@@ -1218,11 +1184,10 @@ impl Writer {
             None
         };
 
-        if column == 0 {
-            // Add `key` to bloom filter.
-            if let Some(bloom_filter) = &mut self.bloom_filter {
-                bloom_filter.insert_hash(item.0.default_hash());
-            }
+        if column == 0
+            && let Some(key_filter) = &mut self.key_filter
+        {
+            key_filter.insert_key(item.0);
         }
 
         // Add `value` to row group for column.
@@ -1252,22 +1217,50 @@ impl Writer {
     pub fn close(
         mut self,
         metadata: BatchMetadata,
-    ) -> Result<(Arc<dyn FileReader>, Option<TrackingBloomFilter>), StorageError> {
+    ) -> Result<(Arc<dyn FileReader>, Option<BatchKeyFilter>), StorageError> {
         debug_assert_eq!(self.cws.len(), self.finished_columns.len());
 
-        // Write the Bloom filter.
-        let filter_location = if let Some(bloom_filter) = &self.bloom_filter {
-            let filter_block = FilterBlockRef::from(bloom_filter);
-            // std::mem::size_of::<FilterBlockRef>() should be an
-            // upper bound: in-memory struct size + bloom payload bytes.
-            let estimated_block_size = (std::mem::size_of::<FilterBlockRef>()
-                + std::mem::size_of_val(filter_block.data))
-            // our binrw min block size is 512 so we round it up to avoid another
-            // reallocation
-            .next_multiple_of(512);
-            self.writer
-                .write_block(filter_block.into_block(estimated_block_size), None)?
-                .1
+        if let Some(key_filter) = &mut self.key_filter {
+            key_filter.finalize();
+        }
+
+        // Write the batch key filter.
+        let mut incompatible_features = 0;
+        let filter_location = if let Some(key_filter) = &self.key_filter {
+            match key_filter {
+                BatchKeyFilter::Bloom(filter) => {
+                    let filter_block = BloomFilterBlockRef {
+                        header: BlockHeader::new(
+                            &crate::storage::file::format::BLOOM_FILTER_BLOCK_MAGIC,
+                        ),
+                        num_hashes: filter.num_hashes(),
+                        data: filter.as_slice(),
+                    };
+                    let estimated_block_size = (std::mem::size_of::<BloomFilterBlockRef>()
+                        + std::mem::size_of_val(filter_block.data))
+                    .next_multiple_of(512);
+                    self.writer
+                        .write_block(filter_block.into_block(estimated_block_size), None)?
+                        .1
+                }
+                BatchKeyFilter::RoaringU32(filter) => {
+                    incompatible_features |= INCOMPATIBLE_FEATURE_ROARING_FILTERS;
+                    let mut data = Vec::with_capacity(filter.serialized_size());
+                    filter
+                        .serialize_into(&mut data)
+                        .map_err(|_| StorageError::RoaringBitmapFilter)?;
+                    let filter_block = RoaringBitmapFilterBlockRef {
+                        header: BlockHeader::new(&ROARING_BITMAP_FILTER_BLOCK_MAGIC),
+                        data: &data,
+                    };
+                    let estimated_block_size = (std::mem::size_of::<RoaringBitmapFilterBlockRef>()
+                        + data.len())
+                    .next_multiple_of(512);
+                    self.writer
+                        .write_block(filter_block.into_block(estimated_block_size), None)?
+                        .1
+                }
+            }
         } else {
             BlockLocation { offset: 0, size: 0 }
         };
@@ -1282,7 +1275,7 @@ impl Writer {
             filter_offset: 0,
             filter_size: 0,
             compatible_features: COMPATIBLE_FEATURE_NEGATIVE_WEIGHT_COUNT,
-            incompatible_features: 0,
+            incompatible_features,
             filter_offset64: 0,
             filter_size64: 0,
             metadata,
@@ -1305,7 +1298,7 @@ impl Writer {
         self.writer
             .insert_cache_entry(location, Arc::new(file_trailer));
 
-        Ok((self.writer.complete()?, self.bloom_filter))
+        Ok((self.writer.complete()?, self.key_filter))
     }
 
     pub fn n_columns(&self) -> usize {
@@ -1354,7 +1347,7 @@ impl Writer {
 /// }, &StorageOptions::default()).unwrap();
 /// let parameters = Parameters::default();
 /// let mut file =
-///     Writer1::new(&factories, || Some(Arc::new(BufferCache::new(1024 * 1024))), &*storage_backend, parameters, 1_000_000).unwrap();
+///     Writer1::new(&factories, || Some(Arc::new(BufferCache::new(1024 * 1024))), &*storage_backend, parameters, None).unwrap();
 /// for i in 0..1000_u32 {
 ///     file.write0((i.erase(), ().erase())).unwrap();
 /// }
@@ -1383,7 +1376,7 @@ where
         cache: fn() -> Option<Arc<BufferCache>>,
         storage_backend: &dyn StorageBackend,
         parameters: Parameters,
-        estimated_keys: usize,
+        key_filter: Option<BatchKeyFilter>,
     ) -> Result<Self, StorageError> {
         Ok(Self {
             factories: factories.clone(),
@@ -1393,7 +1386,7 @@ where
                 storage_backend,
                 parameters,
                 1,
-                estimated_keys,
+                key_filter,
             )?,
             _phantom: PhantomData,
             #[cfg(debug_assertions)]
@@ -1434,7 +1427,7 @@ where
     ) -> Result<
         (
             Arc<dyn FileReader>,
-            Option<TrackingBloomFilter>,
+            Option<BatchKeyFilter>,
             Option<(Box<K0>, Box<K0>)>,
         ),
         StorageError,
@@ -1462,12 +1455,12 @@ where
         let any_factories = self.factories.any_factories();
 
         let cache = self.inner.cache;
-        let (file_handle, bloom_filter, key_bounds) = self.close(metadata)?;
+        let (file_handle, key_filter, key_bounds) = self.close(metadata)?;
         let key_range = key_bounds
             .as_ref()
             .map(|(min, max)| KeyRange::from_refs(min.as_ref(), max.as_ref()));
         let (reader, membership_filter) =
-            Reader::new_with_filter(&[&any_factories], cache, file_handle, bloom_filter)?;
+            Reader::new_with_filter(&[&any_factories], cache, file_handle, key_filter)?;
         let filters = BatchFilters::from_file(key_range, membership_filter);
         Ok((reader, filters))
     }
@@ -1518,7 +1511,7 @@ where
 /// }, &StorageOptions::default()).unwrap();
 /// let parameters = Parameters::default();
 /// let mut file =
-///     Writer2::new(&factories, &factories, || Some(Arc::new(BufferCache::new(1024 * 1024))), &*storage_backend, parameters, 1_000_000).unwrap();
+///     Writer2::new(&factories, &factories, || Some(Arc::new(BufferCache::new(1024 * 1024))), &*storage_backend, parameters, None).unwrap();
 /// for i in 0..1000_u32 {
 ///     for j in 0..10_u32 {
 ///         file.write1((&j, &())).unwrap();
@@ -1558,7 +1551,7 @@ where
         cache: fn() -> Option<Arc<BufferCache>>,
         storage_backend: &dyn StorageBackend,
         parameters: Parameters,
-        estimated_keys: usize,
+        key_filter: Option<BatchKeyFilter>,
     ) -> Result<Self, StorageError> {
         Ok(Self {
             factories0: factories0.clone(),
@@ -1569,7 +1562,7 @@ where
                 storage_backend,
                 parameters,
                 2,
-                estimated_keys,
+                key_filter,
             )?,
             #[cfg(debug_assertions)]
             prev0: None,
@@ -1640,7 +1633,7 @@ where
     ) -> Result<
         (
             Arc<dyn FileReader>,
-            Option<TrackingBloomFilter>,
+            Option<BatchKeyFilter>,
             Option<(Box<K0>, Box<K0>)>,
         ),
         StorageError,
@@ -1674,7 +1667,7 @@ where
         let any_factories0 = self.factories0.any_factories();
         let any_factories1 = self.factories1.any_factories();
         let cache = self.inner.cache;
-        let (file_handle, bloom_filter, key_bounds) = self.close(metadata)?;
+        let (file_handle, key_filter, key_bounds) = self.close(metadata)?;
         let key_range = key_bounds
             .as_ref()
             .map(|(min, max)| KeyRange::from_refs(min.as_ref(), max.as_ref()));
@@ -1682,7 +1675,7 @@ where
             &[&any_factories0, &any_factories1],
             cache,
             file_handle,
-            bloom_filter,
+            key_filter,
         )?;
         let filters = BatchFilters::from_file(key_range, membership_filter);
         Ok((reader, filters))
diff --git a/crates/dbsp/src/storage/tracking_bloom_filter.rs b/crates/dbsp/src/storage/tracking_bloom_filter.rs
index 7f226789112..a50cd65e4ad 100644
--- a/crates/dbsp/src/storage/tracking_bloom_filter.rs
+++ b/crates/dbsp/src/storage/tracking_bloom_filter.rs
@@ -1,4 +1,4 @@
-use crate::storage::filter_stats::{FilterStats, TrackingFilterStats};
+use crate::storage::file::{FilterStats, TrackingFilterStats};
 use fastbloom::BloomFilter;
 
 /// Bloom filter which tracks the number of hits and misses when lookups are performed.
@@ -53,7 +53,7 @@ impl TrackingBloomFilter {
 #[cfg(test)]
 mod tests {
     use super::TrackingBloomFilter;
-    use crate::storage::filter_stats::FilterStats;
+    use crate::storage::file::FilterStats;
     use fastbloom::BloomFilter;
 
     #[test]
@@ -67,7 +67,7 @@ mod tests {
             FilterStats {
                 size_byte: 96 + 8192 / 8,
                 hits: 0,
-                misses: 0,
+                misses: 0
             }
         );
         filter.insert_hash(123);
@@ -79,7 +79,7 @@ mod tests {
             FilterStats {
                 size_byte: 96 + 8192 / 8,
                 hits: 1,
-                misses: 2,
+                misses: 2
             }
         );
     }
@@ -91,7 +91,7 @@ mod tests {
             FilterStats {
                 size_byte: 0,
                 hits: 0,
-                misses: 0,
+                misses: 0
             }
         );
     }
diff --git a/crates/dbsp/src/trace.rs b/crates/dbsp/src/trace.rs
index 573331b9aec..aad64648e9c 100644
--- a/crates/dbsp/src/trace.rs
+++ b/crates/dbsp/src/trace.rs
@@ -31,11 +31,12 @@ use crate::dynamic::{ClonableTrait, DynDataTyped, DynUnit, Weight};
 use crate::storage::buffer_cache::CacheStats;
 use crate::storage::file::SerializerInner;
 pub use crate::storage::file::{DbspSerializer, Deserializable, Deserializer, Rkyv};
+use crate::storage::file::{FilterKind, FilterStats};
 use crate::trace::cursor::{
     DefaultPushCursor, FilteredMergeCursor, FilteredMergeCursorWithSnapshot, PushCursor,
     UnfilteredMergeCursor,
 };
-use crate::utils::IsNone;
+use crate::utils::{IsNone, SupportsRoaring};
 use crate::{dynamic::ArchivedDBData, storage::buffer_cache::FBuf};
 use cursor::CursorFactory;
 use enum_map::Enum;
@@ -52,7 +53,9 @@ pub mod cursor;
 pub mod filter;
 pub mod layers;
 pub mod ord;
+mod sampling;
 pub mod spine_async;
+pub(crate) use sampling::sample_keys_from_batches;
 pub use spine_async::{BatchReaderWithSnapshot, ListMerger, Spine, SpineSnapshot, WithSnapshot};
 
 #[cfg(test)]
@@ -77,7 +80,6 @@ use crate::{
     algebra::MonoidValue,
     dynamic::{DataTrait, DynPair, DynVec, DynWeightedPairs, Erase, Factory, WeightTrait},
     storage::file::reader::Error as ReaderError,
-    storage::filter_stats::FilterStats,
 };
 pub use cursor::{Cursor, MergeCursor};
 pub use filter::{Filter, GroupFilter};
@@ -102,6 +104,7 @@ pub trait DBData:
     + Debug
     + ArchivedDBData
     + IsNone<Inner: ArchivedDBData>
+    + SupportsRoaring
     + 'static
 {
 }
@@ -119,6 +122,7 @@ impl<T> DBData for T where
         + Debug
         + ArchivedDBData
         + IsNone<Inner: ArchivedDBData>
+        + SupportsRoaring
         + 'static
 {
 }
@@ -473,17 +477,36 @@ where
     /// [Cursor::seek_key_exact] after the range filter.
     ///
     /// Today this is usually a Bloom filter. Batches without such a filter
-    /// should return `FilterStats::default()`.
-    fn membership_filter_stats(&self) -> FilterStats;
+    /// should return zero/default stats.
+    fn membership_filter_stats(&self) -> FilterStats {
+        FilterStats::default()
+    }
+
+    /// Filter kind for the secondary membership filter used by
+    /// [Cursor::seek_key_exact].
+    fn membership_filter_kind(&self) -> FilterKind {
+        FilterKind::None
+    }
 
     /// Statistics of the in-memory range filter used by
     /// [Cursor::seek_key_exact].
     ///
-    /// Batches without a range filter should return `FilterStats::default()`.
+    /// Returns range-filter stats. Batches without a range filter should
+    /// return zeroed range stats.
     fn range_filter_stats(&self) -> FilterStats {
         FilterStats::default()
     }
 
+    /// Cached minimum and maximum keys for this batch, when available.
+    ///
+    /// File-backed batches materialize these bounds at write time. In-memory
+    /// batches can compute them from their ordered key storage. Merge builders
+    /// use these bounds to decide upfront whether a batch span can be encoded
+    /// into a roaring bitmap.
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        None
+    }
+
     /// Where the batch's data is stored.
     fn location(&self) -> BatchLocation {
         BatchLocation::Memory
@@ -534,7 +557,6 @@ where
     /// * The output sample contains keys sorted in ascending order.
     fn sample_keys<RG>(&self, rng: &mut RG, sample_size: usize, sample: &mut DynVec<Self::Key>)
     where
-        Self::Time: PartialEq<()>,
         RG: Rng;
 
     /// Returns num_partitions-1 keys from the batch that partition the batch into num_partitions
@@ -660,9 +682,15 @@ where
     fn membership_filter_stats(&self) -> FilterStats {
         (**self).membership_filter_stats()
     }
+    fn membership_filter_kind(&self) -> FilterKind {
+        (**self).membership_filter_kind()
+    }
     fn range_filter_stats(&self) -> FilterStats {
         (**self).range_filter_stats()
     }
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        (**self).key_bounds()
+    }
     fn location(&self) -> BatchLocation {
         (**self).location()
     }
@@ -674,7 +702,6 @@ where
     }
     fn sample_keys<RG>(&self, rng: &mut RG, sample_size: usize, sample: &mut DynVec<Self::Key>)
     where
-        Self::Time: PartialEq<()>,
         RG: Rng,
     {
         (**self).sample_keys(rng, sample_size, sample)
@@ -998,7 +1025,7 @@ where
         location: Option<BatchLocation>,
     ) -> Self
     where
-        B: BatchReader,
+        B: BatchReader<Key = Output::Key, Val = Output::Val, Time = Output::Time, R = Output::R>,
         I: IntoIterator<Item = &'a B> + Clone,
     {
         let _ = location;
diff --git a/crates/dbsp/src/trace/filter.rs b/crates/dbsp/src/trace/filter.rs
index 3c1ed2da1eb..5a6f3ae5e41 100644
--- a/crates/dbsp/src/trace/filter.rs
+++ b/crates/dbsp/src/trace/filter.rs
@@ -3,10 +3,15 @@
 //! Filters are used by the garbage collector to discard unused records.
 //! We support different several types of filters for keys and values.
 
+pub(crate) mod batch;
+pub(crate) mod key_range;
+
 use dyn_clone::DynClone;
 
 use crate::{circuit::metadata::MetaItem, dynamic::Factory};
 
+pub(crate) use batch::BatchFilters;
+
 pub trait FilterFunc<V: ?Sized>: Fn(&V) -> bool + DynClone + Send + Sync {}
 
 impl<V: ?Sized, F> FilterFunc<V> for F where F: Fn(&V) -> bool + Clone + Send + Sync + 'static {}
diff --git a/crates/dbsp/src/trace/ord/batch_filter.rs b/crates/dbsp/src/trace/filter/batch.rs
similarity index 80%
rename from crates/dbsp/src/trace/ord/batch_filter.rs
rename to crates/dbsp/src/trace/filter/batch.rs
index 6a47f708398..91745d23783 100644
--- a/crates/dbsp/src/trace/ord/batch_filter.rs
+++ b/crates/dbsp/src/trace/filter/batch.rs
@@ -6,14 +6,15 @@
 use crate::{
     dynamic::{DataTrait, DynVec},
     storage::{
-        file::reader::FilteredKeys,
-        filter_stats::{FilterStats, TrackingFilterStats},
+        file::{
+            BatchKeyFilter, FilterKind, FilterStats, TrackingFilterStats, TrackingRoaringBitmap,
+            reader::FilteredKeys,
+        },
         tracking_bloom_filter::TrackingBloomFilter,
     },
-    trace::ord::key_range::KeyRange,
+    trace::filter::key_range::KeyRange,
 };
 use size_of::SizeOf;
-use smallvec::SmallVec;
 use std::sync::Arc;
 
 /// A cheap, in-memory precheck used by `seek_key_exact`.
@@ -31,6 +32,9 @@ where
     /// filters pays that cost at most once.
     fn maybe_contains_key(&self, key: &K, hash: &mut Option<u64>) -> bool;
 
+    /// Filter kind for observability.
+    fn kind(&self) -> FilterKind;
+
     /// Statistics for this filter.
     fn stats(&self) -> FilterStats;
 }
@@ -109,14 +113,10 @@ where
     /// pay the hash or bloom lookup cost.
     pub(crate) fn from_file(
         key_range: Option<KeyRange<K>>,
-        membership_filter: Option<TrackingBloomFilter>,
+        membership_filter: Option<BatchKeyFilter>,
     ) -> Self {
-        Self::new(
-            key_range,
-            membership_filter
-                .map(Arc::new)
-                .map(|filter| filter as Arc<dyn BatchFilter<K>>),
-        )
+        let membership_filter = membership_filter.map(Arc::<dyn BatchFilter<K>>::from);
+        Self::new(key_range, membership_filter)
     }
 
     /// Returns cumulative statistics for the range and membership filters.
@@ -131,6 +131,13 @@ where
         }
     }
 
+    pub fn membership_filter_kind(&self) -> FilterKind {
+        self.membership_filter
+            .as_ref()
+            .map(|filter| filter.kind())
+            .unwrap_or(FilterKind::None)
+    }
+
     /// Returns the cached key bounds, when available.
     pub fn key_bounds(&self) -> Option<(&K, &K)> {
         self.range_filter.range.as_ref().map(|range| range.bounds())
@@ -141,9 +148,7 @@ where
     pub(crate) fn filtered_keys<'a>(&self, keys: &'a DynVec<K>) -> FilteredKeys<'a, K> {
         debug_assert!(keys.is_sorted_by(&|a, b| a.cmp(b)));
 
-        // Preserve the old `FilteredKeys` heuristic: if too many keys pass,
-        // avoid allocating the index vector and just keep the original slice.
-        let mut filter_pass_keys = SmallVec::<[_; 50]>::new();
+        let mut filter_pass_keys = Vec::with_capacity(keys.len().min(50));
         for (index, key) in keys.dyn_iter().enumerate() {
             if self.maybe_contains_key(key, None) {
                 filter_pass_keys.push(index);
@@ -153,7 +158,7 @@ where
             }
         }
 
-        FilteredKeys::with_filter_pass_keys(keys, Some(filter_pass_keys.into_vec()))
+        FilteredKeys::with_filter_pass_keys(keys, Some(filter_pass_keys))
     }
 
     /// Returns `false` only when `key` is definitely not present.
@@ -208,6 +213,10 @@ where
         is_hit
     }
 
+    fn kind(&self) -> FilterKind {
+        FilterKind::Range
+    }
+
     fn stats(&self) -> FilterStats {
         self.as_ref().stats()
     }
@@ -222,17 +231,48 @@ where
         self.contains_hash(*hash)
     }
 
+    fn kind(&self) -> FilterKind {
+        FilterKind::Bloom
+    }
+
     fn stats(&self) -> FilterStats {
         TrackingBloomFilter::stats(self)
     }
 }
 
+impl<K> BatchFilter<K> for TrackingRoaringBitmap
+where
+    K: DataTrait + ?Sized,
+{
+    fn maybe_contains_key(&self, key: &K, _hash: &mut Option<u64>) -> bool {
+        self.maybe_contains_key(key)
+    }
+
+    fn kind(&self) -> FilterKind {
+        FilterKind::Roaring
+    }
+
+    fn stats(&self) -> FilterStats {
+        TrackingRoaringBitmap::stats(self)
+    }
+}
+
+impl<K> From<BatchKeyFilter> for Arc<dyn BatchFilter<K>>
+where
+    K: DataTrait + ?Sized,
+{
+    fn from(filter: BatchKeyFilter) -> Self {
+        match filter {
+            BatchKeyFilter::Bloom(filter) => Arc::new(filter),
+            BatchKeyFilter::RoaringU32(filter) => Arc::new(filter),
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::{BatchFilter, TrackedRangeFilter};
-    use crate::{
-        dynamic::DynData, storage::filter_stats::FilterStats, trace::ord::key_range::KeyRange,
-    };
+    use crate::{dynamic::DynData, storage::file::FilterStats, trace::filter::key_range::KeyRange};
     use std::sync::Arc;
 
     #[test]
diff --git a/crates/dbsp/src/trace/ord/key_range.rs b/crates/dbsp/src/trace/filter/key_range.rs
similarity index 100%
rename from crates/dbsp/src/trace/ord/key_range.rs
rename to crates/dbsp/src/trace/filter/key_range.rs
diff --git a/crates/dbsp/src/trace/ord.rs b/crates/dbsp/src/trace/ord.rs
index e0bf2b0c689..1be1646b897 100644
--- a/crates/dbsp/src/trace/ord.rs
+++ b/crates/dbsp/src/trace/ord.rs
@@ -1,11 +1,7 @@
-pub(crate) mod batch_filter;
 pub mod fallback;
 pub mod file;
-pub(crate) mod key_range;
 pub mod merge_batcher;
 pub mod vec;
-
-pub use batch_filter::{BatchFilterStats, BatchFilters};
 pub use fallback::{
     indexed_wset::{
         FallbackIndexedWSet, FallbackIndexedWSet as OrdIndexedWSet, FallbackIndexedWSetBuilder,
diff --git a/crates/dbsp/src/trace/ord/fallback/indexed_wset.rs b/crates/dbsp/src/trace/ord/fallback/indexed_wset.rs
index 54dbbc0b4ac..872de33b6e8 100644
--- a/crates/dbsp/src/trace/ord/fallback/indexed_wset.rs
+++ b/crates/dbsp/src/trace/ord/fallback/indexed_wset.rs
@@ -1,6 +1,6 @@
 use super::utils::{copy_to_builder, pick_merge_destination};
 use crate::storage::file::SerializerInner;
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::{FilterKind, FilterStats};
 use crate::{
     DBWeight, Error, NumEntries,
     algebra::{AddAssignByRef, AddByRef, NegByRef, ZRingValue},
@@ -283,6 +283,13 @@ where
         }
     }
 
+    fn membership_filter_kind(&self) -> FilterKind {
+        match &self.inner {
+            Inner::File(file) => file.membership_filter_kind(),
+            Inner::Vec(vec) => vec.membership_filter_kind(),
+        }
+    }
+
     fn range_filter_stats(&self) -> FilterStats {
         match &self.inner {
             Inner::File(file) => file.range_filter_stats(),
@@ -290,6 +297,13 @@ where
         }
     }
 
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        match &self.inner {
+            Inner::File(file) => file.key_bounds(),
+            Inner::Vec(vec) => vec.key_bounds(),
+        }
+    }
+
     #[inline]
     fn location(&self) -> BatchLocation {
         match &self.inner {
@@ -514,17 +528,23 @@ where
         location: Option<BatchLocation>,
     ) -> Self
     where
-        B: BatchReader,
+        B: BatchReader<Key = K, Val = V, Time = (), R = R>,
         I: IntoIterator<Item = &'a B> + Clone,
     {
+        let key_capacity = batches.clone().into_iter().map(|b| b.key_count()).sum();
+        let value_capacity = batches.clone().into_iter().map(|b| b.len()).sum();
         Self {
             factories: factories.clone(),
-            inner: BuilderInner::new(
-                factories,
-                batches.clone().into_iter().map(|b| b.key_count()).sum(),
-                batches.clone().into_iter().map(|b| b.len()).sum(),
-                pick_merge_destination(batches, location).into(),
-            ),
+            inner: match pick_merge_destination(batches.clone(), location) {
+                BatchLocation::Memory => BuilderInner::Vec(VecIndexedWSetBuilder::with_capacity(
+                    &factories.vec_indexed_wset_factory,
+                    key_capacity,
+                    value_capacity,
+                )),
+                BatchLocation::Storage => BuilderInner::File(FileIndexedWSetBuilder::for_merge(
+                    factories, batches, location,
+                )),
+            },
         }
     }
 
diff --git a/crates/dbsp/src/trace/ord/fallback/key_batch.rs b/crates/dbsp/src/trace/ord/fallback/key_batch.rs
index e6725e7dcd2..d76d34fee3f 100644
--- a/crates/dbsp/src/trace/ord/fallback/key_batch.rs
+++ b/crates/dbsp/src/trace/ord/fallback/key_batch.rs
@@ -1,5 +1,5 @@
 use super::utils::{copy_to_builder, pick_merge_destination};
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::{FilterKind, FilterStats};
 use crate::{
     DBData, DBWeight, NumEntries, Timestamp,
     dynamic::{
@@ -274,6 +274,14 @@ where
         }
     }
 
+    #[inline]
+    fn membership_filter_kind(&self) -> FilterKind {
+        match &self.inner {
+            Inner::File(file) => file.membership_filter_kind(),
+            Inner::Vec(vec) => vec.membership_filter_kind(),
+        }
+    }
+
     #[inline]
     fn range_filter_stats(&self) -> FilterStats {
         match &self.inner {
@@ -282,6 +290,13 @@ where
         }
     }
 
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        match &self.inner {
+            Inner::File(file) => file.key_bounds(),
+            Inner::Vec(vec) => vec.key_bounds(),
+        }
+    }
+
     #[inline]
     fn location(&self) -> BatchLocation {
         match &self.inner {
@@ -299,7 +314,6 @@ where
 
     fn sample_keys<RG>(&self, rng: &mut RG, sample_size: usize, output: &mut DynVec<Self::Key>)
     where
-        Self::Time: PartialEq<()>,
         RG: Rng,
     {
         match &self.inner {
@@ -408,23 +422,23 @@ where
         location: Option<BatchLocation>,
     ) -> Self
     where
-        B: BatchReader,
+        B: BatchReader<Key = K, Val = DynUnit, Time = T, R = R>,
         I: IntoIterator<Item = &'a B> + Clone,
     {
         let key_capacity = batches.clone().into_iter().map(|b| b.key_count()).sum();
         let value_capacity = batches.clone().into_iter().map(|b| b.len()).sum();
         Self {
             factories: factories.clone(),
-            inner: match pick_merge_destination(batches, location) {
+            inner: match pick_merge_destination(batches.clone(), location) {
                 BatchLocation::Memory => BuilderInner::Vec(VecKeyBuilder::with_capacity(
                     &factories.vec,
                     key_capacity,
                     value_capacity,
                 )),
-                BatchLocation::Storage => BuilderInner::File(FileKeyBuilder::with_capacity(
+                BatchLocation::Storage => BuilderInner::File(FileKeyBuilder::for_merge(
                     &factories.file,
-                    key_capacity,
-                    value_capacity,
+                    batches,
+                    location,
                 )),
             },
         }
diff --git a/crates/dbsp/src/trace/ord/fallback/val_batch.rs b/crates/dbsp/src/trace/ord/fallback/val_batch.rs
index f376a626681..96e94c38919 100644
--- a/crates/dbsp/src/trace/ord/fallback/val_batch.rs
+++ b/crates/dbsp/src/trace/ord/fallback/val_batch.rs
@@ -3,7 +3,7 @@ use std::sync::Arc;
 
 use super::utils::{copy_to_builder, pick_merge_destination};
 use crate::storage::buffer_cache::CacheStats;
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::{FilterKind, FilterStats};
 use crate::trace::cursor::{DelegatingCursor, PushCursor};
 use crate::trace::ord::file::val_batch::FileValBuilder;
 use crate::trace::ord::vec::val_batch::VecValBuilder;
@@ -281,6 +281,14 @@ where
         }
     }
 
+    #[inline]
+    fn membership_filter_kind(&self) -> FilterKind {
+        match &self.inner {
+            Inner::File(file) => file.membership_filter_kind(),
+            Inner::Vec(vec) => vec.membership_filter_kind(),
+        }
+    }
+
     #[inline]
     fn range_filter_stats(&self) -> FilterStats {
         match &self.inner {
@@ -289,6 +297,13 @@ where
         }
     }
 
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        match &self.inner {
+            Inner::File(file) => file.key_bounds(),
+            Inner::Vec(vec) => vec.key_bounds(),
+        }
+    }
+
     #[inline]
     fn location(&self) -> BatchLocation {
         match &self.inner {
@@ -307,7 +322,6 @@ where
     fn sample_keys<RG>(&self, rng: &mut RG, sample_size: usize, output: &mut DynVec<Self::Key>)
     where
         RG: Rng,
-        T: PartialEq<()>,
     {
         match &self.inner {
             Inner::Vec(vec) => vec.sample_keys(rng, sample_size, output),
@@ -425,23 +439,23 @@ where
         location: Option<BatchLocation>,
     ) -> Self
     where
-        B: BatchReader,
+        B: BatchReader<Key = K, Val = V, Time = T, R = R>,
         I: IntoIterator<Item = &'a B> + Clone,
     {
         let key_capacity = batches.clone().into_iter().map(|b| b.key_count()).sum();
         let value_capacity = batches.clone().into_iter().map(|b| b.len()).sum();
         Self {
             factories: factories.clone(),
-            inner: match pick_merge_destination(batches, location) {
+            inner: match pick_merge_destination(batches.clone(), location) {
                 BatchLocation::Memory => BuilderInner::Vec(VecValBuilder::with_capacity(
                     &factories.vec,
                     key_capacity,
                     value_capacity,
                 )),
-                BatchLocation::Storage => BuilderInner::File(FileValBuilder::with_capacity(
+                BatchLocation::Storage => BuilderInner::File(FileValBuilder::for_merge(
                     &factories.file,
-                    key_capacity,
-                    value_capacity,
+                    batches,
+                    location,
                 )),
             },
         }
diff --git a/crates/dbsp/src/trace/ord/fallback/wset.rs b/crates/dbsp/src/trace/ord/fallback/wset.rs
index c1de66b50e4..3757d52e7bd 100644
--- a/crates/dbsp/src/trace/ord/fallback/wset.rs
+++ b/crates/dbsp/src/trace/ord/fallback/wset.rs
@@ -1,5 +1,5 @@
 use super::utils::{copy_to_builder, pick_merge_destination};
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::{FilterKind, FilterStats};
 use crate::{
     DBWeight, NumEntries,
     algebra::{AddAssignByRef, AddByRef, NegByRef, ZRingValue},
@@ -281,6 +281,13 @@ where
         }
     }
 
+    fn membership_filter_kind(&self) -> FilterKind {
+        match &self.inner {
+            Inner::File(file) => file.membership_filter_kind(),
+            Inner::Vec(vec) => vec.membership_filter_kind(),
+        }
+    }
+
     fn range_filter_stats(&self) -> FilterStats {
         match &self.inner {
             Inner::File(file) => file.range_filter_stats(),
@@ -288,6 +295,13 @@ where
         }
     }
 
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        match &self.inner {
+            Inner::File(file) => file.key_bounds(),
+            Inner::Vec(vec) => vec.key_bounds(),
+        }
+    }
+
     #[inline]
     fn location(&self) -> BatchLocation {
         match &self.inner {
@@ -495,16 +509,22 @@ where
         location: Option<BatchLocation>,
     ) -> Self
     where
-        B: BatchReader,
+        B: BatchReader<Key = K, Val = DynUnit, Time = (), R = R>,
         I: IntoIterator<Item = &'a B> + Clone,
     {
+        let key_capacity = batches.clone().into_iter().map(|b| b.key_count()).sum();
         Self {
             factories: factories.clone(),
-            inner: BuilderInner::new(
-                factories,
-                batches.clone().into_iter().map(|b| b.key_count()).sum(),
-                pick_merge_destination(batches, location).into(),
-            ),
+            inner: match pick_merge_destination(batches.clone(), location) {
+                BatchLocation::Memory => BuilderInner::Vec(VecWSetBuilder::with_capacity(
+                    &factories.vec_wset_factory,
+                    key_capacity,
+                    key_capacity,
+                )),
+                BatchLocation::Storage => {
+                    BuilderInner::File(FileWSetBuilder::for_merge(factories, batches, location))
+                }
+            },
         }
     }
 
diff --git a/crates/dbsp/src/trace/ord/file/indexed_wset_batch.rs b/crates/dbsp/src/trace/ord/file/indexed_wset_batch.rs
index 4148e0e134c..f4d0819cfb7 100644
--- a/crates/dbsp/src/trace/ord/file/indexed_wset_batch.rs
+++ b/crates/dbsp/src/trace/ord/file/indexed_wset_batch.rs
@@ -1,5 +1,5 @@
 use crate::storage::file::format::BatchMetadata;
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::{FilterKind, FilterStats};
 use crate::{
     DBData, DBWeight, NumEntries, Runtime,
     algebra::{AddAssignByRef, AddByRef, NegByRef},
@@ -10,7 +10,7 @@ use crate::{
     storage::{
         buffer_cache::CacheStats,
         file::{
-            Factories as FileFactories,
+            Factories as FileFactories, FilterPlan,
             reader::{BulkRows, Cursor as FileCursor, Error as ReaderError, Reader},
             writer::Writer2,
         },
@@ -19,8 +19,9 @@ use crate::{
         Batch, BatchFactories, BatchLocation, BatchReader, BatchReaderFactories, Builder, Cursor,
         FileValBatch, VecIndexedWSetFactories, WeightedItem,
         cursor::{CursorFactory, CursorFactoryWrapper, Pending, Position, PushCursor},
+        filter::BatchFilters,
         merge_batches_by_reference,
-        ord::{batch_filter::BatchFilters, file::UnwrapStorage, merge_batcher::MergeBatcher},
+        ord::{file::UnwrapStorage, merge_batcher::MergeBatcher},
     },
 };
 use crate::{DynZWeight, ZWeight};
@@ -284,7 +285,7 @@ where
             Runtime::buffer_cache,
             &*Runtime::storage_backend().unwrap_storage(),
             Runtime::file_writer_parameters(),
-            self.key_count(),
+            FilterPlan::<K>::decide_filter(None, self.key_count()),
         )
         .unwrap_storage();
 
@@ -398,10 +399,18 @@ where
         self.filters.stats().membership_filter
     }
 
+    fn membership_filter_kind(&self) -> FilterKind {
+        self.filters.membership_filter_kind()
+    }
+
     fn range_filter_stats(&self) -> FilterStats {
         self.filters.stats().range_filter
     }
 
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        self.filters.key_bounds()
+    }
+
     #[inline]
     fn location(&self) -> BatchLocation {
         BatchLocation::Storage
@@ -913,7 +922,39 @@ where
                 Runtime::buffer_cache,
                 &*Runtime::storage_backend().unwrap_storage(),
                 Runtime::file_writer_parameters(),
-                key_capacity,
+                FilterPlan::<K>::decide_filter(None, key_capacity),
+            )
+            .unwrap_storage(),
+            weight: factories.weight_factory().default_box(),
+            num_tuples: 0,
+            stats: BatchMetadata::default(),
+        }
+    }
+
+    fn for_merge<'a, B, I>(
+        factories: &FileIndexedWSetFactories<K, V, R>,
+        batches: I,
+        _location: Option<BatchLocation>,
+    ) -> Self
+    where
+        B: BatchReader<Key = K, Val = V, Time = (), R = R>,
+        I: IntoIterator<Item = &'a B> + Clone,
+    {
+        let key_capacity = batches.clone().into_iter().map(|b| b.key_count()).sum();
+        let filter_plan = FilterPlan::from_batches(batches.clone());
+        let key_filter = filter_plan.map_or_else(
+            || FilterPlan::<K>::decide_filter(None, key_capacity),
+            |filter_plan| FilterPlan::decide_filter(Some(&filter_plan), key_capacity),
+        );
+        Self {
+            factories: factories.clone(),
+            writer: Writer2::new(
+                &factories.factories0,
+                &factories.factories1,
+                Runtime::buffer_cache,
+                &*Runtime::storage_backend().unwrap_storage(),
+                Runtime::file_writer_parameters(),
+                key_filter,
             )
             .unwrap_storage(),
             weight: factories.weight_factory().default_box(),
diff --git a/crates/dbsp/src/trace/ord/file/key_batch.rs b/crates/dbsp/src/trace/ord/file/key_batch.rs
index 4c02b61b065..6ebbe942c77 100644
--- a/crates/dbsp/src/trace/ord/file/key_batch.rs
+++ b/crates/dbsp/src/trace/ord/file/key_batch.rs
@@ -1,5 +1,5 @@
 use crate::storage::file::format::BatchMetadata;
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::{FilterKind, FilterStats};
 use crate::trace::cursor::Position;
 use crate::{
     DBData, DBWeight, NumEntries, Runtime, Timestamp,
@@ -10,7 +10,7 @@ use crate::{
     storage::{
         buffer_cache::CacheStats,
         file::{
-            Factories as FileFactories,
+            Factories as FileFactories, FilterPlan,
             reader::{Cursor as FileCursor, Error as ReaderError, Reader},
             writer::Writer2,
         },
@@ -18,7 +18,8 @@ use crate::{
     trace::{
         Batch, BatchFactories, BatchLocation, BatchReader, BatchReaderFactories, Builder, Cursor,
         WeightedItem,
-        ord::{batch_filter::BatchFilters, file::UnwrapStorage, merge_batcher::MergeBatcher},
+        filter::BatchFilters,
+        ord::{file::UnwrapStorage, merge_batcher::MergeBatcher},
     },
     utils::Tup2,
 };
@@ -303,10 +304,18 @@ where
         self.filters.stats().membership_filter
     }
 
+    fn membership_filter_kind(&self) -> FilterKind {
+        self.filters.membership_filter_kind()
+    }
+
     fn range_filter_stats(&self) -> FilterStats {
         self.filters.stats().range_filter
     }
 
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        self.filters.key_bounds()
+    }
+
     #[inline]
     fn location(&self) -> BatchLocation {
         BatchLocation::Storage
@@ -673,7 +682,39 @@ where
                 Runtime::buffer_cache,
                 &*Runtime::storage_backend().unwrap_storage(),
                 Runtime::file_writer_parameters(),
-                key_capacity,
+                FilterPlan::<K>::decide_filter(None, key_capacity),
+            )
+            .unwrap_storage(),
+            key: factories.opt_key_factory.default_box(),
+            num_tuples: 0,
+            stats: BatchMetadata::default(),
+        }
+    }
+
+    fn for_merge<'a, B, I>(
+        factories: &FileKeyBatchFactories<K, T, R>,
+        batches: I,
+        _location: Option<BatchLocation>,
+    ) -> Self
+    where
+        B: BatchReader<Key = K, Val = DynUnit, Time = T, R = R>,
+        I: IntoIterator<Item = &'a B> + Clone,
+    {
+        let key_capacity = batches.clone().into_iter().map(|b| b.key_count()).sum();
+        let filter_plan = FilterPlan::from_batches(batches.clone());
+        let key_filter = filter_plan.map_or_else(
+            || FilterPlan::<K>::decide_filter(None, key_capacity),
+            |filter_plan| FilterPlan::decide_filter(Some(&filter_plan), key_capacity),
+        );
+        Self {
+            factories: factories.clone(),
+            writer: Writer2::new(
+                &factories.factories0,
+                &factories.factories1,
+                Runtime::buffer_cache,
+                &*Runtime::storage_backend().unwrap_storage(),
+                Runtime::file_writer_parameters(),
+                key_filter,
             )
             .unwrap_storage(),
             key: factories.opt_key_factory.default_box(),
diff --git a/crates/dbsp/src/trace/ord/file/val_batch.rs b/crates/dbsp/src/trace/ord/file/val_batch.rs
index 6c3824a302f..34b1e8cedb4 100644
--- a/crates/dbsp/src/trace/ord/file/val_batch.rs
+++ b/crates/dbsp/src/trace/ord/file/val_batch.rs
@@ -1,5 +1,5 @@
 use crate::storage::buffer_cache::CacheStats;
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::{FilterKind, FilterStats};
 use crate::trace::BatchLocation;
 use crate::trace::cursor::Position;
 use crate::trace::ord::file::UnwrapStorage;
@@ -10,14 +10,14 @@ use crate::{
         Factory, LeanVec, WeightTrait, WithFactory,
     },
     storage::file::{
-        Factories as FileFactories,
+        Factories as FileFactories, FilterPlan,
         format::BatchMetadata,
         reader::{Cursor as FileCursor, Error as ReaderError, Reader},
         writer::Writer2,
     },
     trace::{
         Batch, BatchFactories, BatchReader, BatchReaderFactories, Builder, Cursor, WeightedItem,
-        ord::{batch_filter::BatchFilters, merge_batcher::MergeBatcher},
+        filter::BatchFilters, ord::merge_batcher::MergeBatcher,
     },
     utils::Tup2,
 };
@@ -325,10 +325,18 @@ where
         self.filters.stats().membership_filter
     }
 
+    fn membership_filter_kind(&self) -> FilterKind {
+        self.filters.membership_filter_kind()
+    }
+
     fn range_filter_stats(&self) -> FilterStats {
         self.filters.stats().range_filter
     }
 
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        self.filters.key_bounds()
+    }
+
     #[inline]
     fn location(&self) -> BatchLocation {
         BatchLocation::Storage
@@ -716,7 +724,39 @@ where
                 Runtime::buffer_cache,
                 &*Runtime::storage_backend().unwrap_storage(),
                 Runtime::file_writer_parameters(),
-                key_capacity,
+                FilterPlan::<K>::decide_filter(None, key_capacity),
+            )
+            .unwrap_storage(),
+            time_diffs: factories.timediff_factory.default_box(),
+            num_tuples: 0,
+            stats: BatchMetadata::default(),
+        }
+    }
+
+    fn for_merge<'a, B, I>(
+        factories: &FileValBatchFactories<K, V, T, R>,
+        batches: I,
+        _location: Option<BatchLocation>,
+    ) -> Self
+    where
+        B: BatchReader<Key = K, Val = V, Time = T, R = R>,
+        I: IntoIterator<Item = &'a B> + Clone,
+    {
+        let key_capacity = batches.clone().into_iter().map(|b| b.key_count()).sum();
+        let filter_plan = FilterPlan::from_batches(batches.clone());
+        let key_filter = filter_plan.map_or_else(
+            || FilterPlan::<K>::decide_filter(None, key_capacity),
+            |filter_plan| FilterPlan::decide_filter(Some(&filter_plan), key_capacity),
+        );
+        Self {
+            factories: factories.clone(),
+            writer: Writer2::new(
+                &factories.factories0,
+                &factories.factories1,
+                Runtime::buffer_cache,
+                &*Runtime::storage_backend().unwrap_storage(),
+                Runtime::file_writer_parameters(),
+                key_filter,
             )
             .unwrap_storage(),
             time_diffs: factories.timediff_factory.default_box(),
diff --git a/crates/dbsp/src/trace/ord/file/wset_batch.rs b/crates/dbsp/src/trace/ord/file/wset_batch.rs
index 33a005f98d6..10bf4bd7698 100644
--- a/crates/dbsp/src/trace/ord/file/wset_batch.rs
+++ b/crates/dbsp/src/trace/ord/file/wset_batch.rs
@@ -1,5 +1,5 @@
 use crate::storage::file::format::BatchMetadata;
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::{FilterKind, FilterStats};
 use crate::{
     DBData, DBWeight, NumEntries, Runtime,
     algebra::{AddAssignByRef, AddByRef, NegByRef},
@@ -10,7 +10,7 @@ use crate::{
     storage::{
         buffer_cache::CacheStats,
         file::{
-            Factories as FileFactories,
+            Factories as FileFactories, FilterPlan,
             reader::{BulkRows, Cursor as FileCursor, Error as ReaderError, Reader},
             writer::Writer1,
         },
@@ -19,8 +19,9 @@ use crate::{
         Batch, BatchFactories, BatchLocation, BatchReader, BatchReaderFactories, Builder, Cursor,
         DbspSerializer, Deserializer, FileKeyBatch, VecWSetFactories, WeightedItem,
         cursor::{CursorFactoryWrapper, Pending, Position, PushCursor},
+        filter::BatchFilters,
         merge_batches_by_reference,
-        ord::{batch_filter::BatchFilters, file::UnwrapStorage, merge_batcher::MergeBatcher},
+        ord::{file::UnwrapStorage, merge_batcher::MergeBatcher},
     },
 };
 use crate::{DynZWeight, ZWeight};
@@ -258,7 +259,7 @@ where
             Runtime::buffer_cache,
             &*Runtime::storage_backend().unwrap(),
             Runtime::file_writer_parameters(),
-            self.key_count(),
+            FilterPlan::<K>::decide_filter(None, self.key_count()),
         )
         .unwrap_storage();
 
@@ -387,10 +388,18 @@ where
         self.filters.stats().membership_filter
     }
 
+    fn membership_filter_kind(&self) -> FilterKind {
+        self.filters.membership_filter_kind()
+    }
+
     fn range_filter_stats(&self) -> FilterStats {
         self.filters.stats().range_filter
     }
 
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        self.filters.key_bounds()
+    }
+
     #[inline]
     fn location(&self) -> BatchLocation {
         BatchLocation::Storage
@@ -826,7 +835,38 @@ where
                 Runtime::buffer_cache,
                 &*Runtime::storage_backend().unwrap_storage(),
                 Runtime::file_writer_parameters(),
-                key_capacity,
+                FilterPlan::<K>::decide_filter(None, key_capacity),
+            )
+            .unwrap_storage(),
+            weight: factories.weight_factory().default_box(),
+            num_tuples: 0,
+            stats: BatchMetadata::default(),
+        }
+    }
+
+    fn for_merge<'a, B, I>(
+        factories: &<FileWSet<K, R> as BatchReader>::Factories,
+        batches: I,
+        _location: Option<BatchLocation>,
+    ) -> Self
+    where
+        B: BatchReader<Key = K, Val = DynUnit, Time = (), R = R>,
+        I: IntoIterator<Item = &'a B> + Clone,
+    {
+        let key_capacity = batches.clone().into_iter().map(|b| b.key_count()).sum();
+        let filter_plan = FilterPlan::from_batches(batches.clone());
+        let key_filter = filter_plan.map_or_else(
+            || FilterPlan::<K>::decide_filter(None, key_capacity),
+            |filter_plan| FilterPlan::decide_filter(Some(&filter_plan), key_capacity),
+        );
+        Self {
+            factories: factories.clone(),
+            writer: Writer1::new(
+                &factories.file_factories,
+                Runtime::buffer_cache,
+                &*Runtime::storage_backend().unwrap_storage(),
+                Runtime::file_writer_parameters(),
+                key_filter,
             )
             .unwrap_storage(),
             weight: factories.weight_factory().default_box(),
diff --git a/crates/dbsp/src/trace/ord/vec/indexed_wset_batch.rs b/crates/dbsp/src/trace/ord/vec/indexed_wset_batch.rs
index 092e87d6717..5d47c5e129f 100644
--- a/crates/dbsp/src/trace/ord/vec/indexed_wset_batch.rs
+++ b/crates/dbsp/src/trace/ord/vec/indexed_wset_batch.rs
@@ -1,5 +1,5 @@
+use crate::storage::file::FilterStats;
 use crate::storage::file::SerializerInner;
-use crate::storage::filter_stats::FilterStats;
 use crate::trace::ord::merge_batcher::MergeBatcher;
 use crate::{
     DBData, DBWeight, Error, NumEntries,
@@ -461,9 +461,12 @@ where
         FilterStats::default()
     }
 
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        Some((self.layer.keys.first()?, self.layer.keys.last()?))
+    }
+
     fn sample_keys<RG>(&self, rng: &mut RG, sample_size: usize, sample: &mut DynVec<Self::Key>)
     where
-        Self::Time: PartialEq<()>,
         RG: Rng,
     {
         self.layer.sample_keys(rng, sample_size, sample);
diff --git a/crates/dbsp/src/trace/ord/vec/key_batch.rs b/crates/dbsp/src/trace/ord/vec/key_batch.rs
index a1598db2d41..c7c1f24a8ca 100644
--- a/crates/dbsp/src/trace/ord/vec/key_batch.rs
+++ b/crates/dbsp/src/trace/ord/vec/key_batch.rs
@@ -1,4 +1,4 @@
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::FilterStats;
 use crate::trace::ord::merge_batcher::MergeBatcher;
 use crate::{
     DBData, DBWeight, NumEntries, Timestamp,
@@ -320,9 +320,12 @@ where
         FilterStats::default()
     }
 
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        Some((self.layer.keys.first()?, self.layer.keys.last()?))
+    }
+
     fn sample_keys<RG>(&self, rng: &mut RG, sample_size: usize, sample: &mut DynVec<Self::Key>)
     where
-        Self::Time: PartialEq<()>,
         RG: Rng,
     {
         self.layer.sample_keys(rng, sample_size, sample);
diff --git a/crates/dbsp/src/trace/ord/vec/val_batch.rs b/crates/dbsp/src/trace/ord/vec/val_batch.rs
index e19217ba530..b58383a63c7 100644
--- a/crates/dbsp/src/trace/ord/vec/val_batch.rs
+++ b/crates/dbsp/src/trace/ord/vec/val_batch.rs
@@ -1,5 +1,5 @@
 use crate::ZWeight;
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::FilterStats;
 use crate::trace::cursor::Position;
 use crate::trace::ord::merge_batcher::MergeBatcher;
 use crate::{
@@ -381,9 +381,12 @@ where
         FilterStats::default()
     }
 
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        Some((self.layer.keys.first()?, self.layer.keys.last()?))
+    }
+
     fn sample_keys<RG>(&self, rng: &mut RG, sample_size: usize, sample: &mut DynVec<Self::Key>)
     where
-        Self::Time: PartialEq<()>,
         RG: Rng,
     {
         self.layer.sample_keys(rng, sample_size, sample);
diff --git a/crates/dbsp/src/trace/ord/vec/wset_batch.rs b/crates/dbsp/src/trace/ord/vec/wset_batch.rs
index 2e262d21ede..5c21bce1b9f 100644
--- a/crates/dbsp/src/trace/ord/vec/wset_batch.rs
+++ b/crates/dbsp/src/trace/ord/vec/wset_batch.rs
@@ -1,4 +1,4 @@
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::FilterStats;
 use crate::{
     DBData, DBWeight, NumEntries,
     algebra::{NegByRef, ZRingValue},
@@ -363,6 +363,10 @@ impl<K: DataTrait + ?Sized, R: WeightTrait + ?Sized> BatchReader for VecWSet<K,
         FilterStats::default()
     }
 
+    fn key_bounds(&self) -> Option<(&Self::Key, &Self::Key)> {
+        Some((self.layer.keys.first()?, self.layer.keys.last()?))
+    }
+
     fn sample_keys<RG>(&self, rng: &mut RG, sample_size: usize, sample: &mut DynVec<Self::Key>)
     where
         RG: Rng,
diff --git a/crates/dbsp/src/trace/sampling.rs b/crates/dbsp/src/trace/sampling.rs
new file mode 100644
index 00000000000..1798c4bfc8d
--- /dev/null
+++ b/crates/dbsp/src/trace/sampling.rs
@@ -0,0 +1,61 @@
+use crate::{
+    dynamic::DynVec,
+    trace::{BatchReader, BatchReaderFactories, Cursor, cursor::CursorList},
+};
+use rand::Rng;
+
+/// Samples keys from a set of batches by invoking each batch's
+/// [`BatchReader::sample_keys`] implementation and merging the results.
+///
+/// `sample_size_for` decides how many keys to request from each batch. The
+/// helper deduplicates keys across batches before appending them to `sample`,
+/// which keeps it usable for overlapping inputs such as merge planning.
+pub(crate) fn sample_keys_from_batches<B, RG, F>(
+    factories: &B::Factories,
+    batches: &[&B],
+    rng: &mut RG,
+    mut sample_size_for: F,
+    sample: &mut DynVec<B::Key>,
+) where
+    B: BatchReader,
+    RG: Rng,
+    F: FnMut(&B) -> usize,
+{
+    if batches.is_empty() {
+        return;
+    }
+
+    let total_sample_size = batches
+        .iter()
+        .map(|batch| sample_size_for(*batch))
+        .sum::<usize>();
+    if total_sample_size == 0 {
+        return;
+    }
+
+    let mut intermediate = factories.keys_factory().default_box();
+    let mut merged_cursor = CursorList::new(
+        factories.weight_factory(),
+        batches.iter().map(|batch| batch.cursor()).collect(),
+    );
+    intermediate.reserve(total_sample_size);
+
+    for batch in batches {
+        let sample_size = sample_size_for(*batch);
+        if sample_size == 0 {
+            continue;
+        }
+        batch.sample_keys(rng, sample_size, intermediate.as_mut());
+    }
+
+    intermediate.as_mut().sort_unstable();
+    intermediate.dedup();
+    for key in intermediate.dyn_iter_mut() {
+        merged_cursor.seek_key(key);
+        if let Some(current_key) = merged_cursor.get_key()
+            && current_key == key
+        {
+            sample.push_ref(key);
+        }
+    }
+}
diff --git a/crates/dbsp/src/trace/spine_async.rs b/crates/dbsp/src/trace/spine_async.rs
index 11defddadf1..ff3f14eafe6 100644
--- a/crates/dbsp/src/trace/spine_async.rs
+++ b/crates/dbsp/src/trace/spine_async.rs
@@ -18,17 +18,19 @@ use crate::{
             MERGING_MEMORY_RECORDS_COUNT, MERGING_SIZE_BYTES, MERGING_STORAGE_RECORDS_COUNT,
             MetaItem, MetricId, MetricReading, NEGATIVE_WEIGHT_COUNT, OperatorMeta,
             RANGE_FILTER_HIT_RATE_PERCENT, RANGE_FILTER_HITS_COUNT, RANGE_FILTER_MISSES_COUNT,
-            RANGE_FILTER_SIZE_BYTES, SPINE_BATCHES_COUNT, SPINE_STORAGE_SIZE_BYTES,
+            RANGE_FILTER_SIZE_BYTES, ROARING_FILTER_HIT_RATE_PERCENT, ROARING_FILTER_HITS_COUNT,
+            ROARING_FILTER_MISSES_COUNT, ROARING_FILTER_SIZE_BYTES, SPINE_BATCHES_COUNT,
+            SPINE_STORAGE_SIZE_BYTES,
         },
         metrics::COMPACTION_STALL_TIME_NANOSECONDS,
         negative_weight_multiplier,
         runtime::{TOKIO_BUFFER_CACHE, TOKIO_WORKER_INDEX},
     },
-    dynamic::{DynVec, Factory, Weight},
+    dynamic::{DynVec, Factory},
     samply::SamplySpan,
     storage::{
         buffer_cache::{BufferCache, CacheStats},
-        filter_stats::FilterStats,
+        file::{FilterKind, FilterStats},
     },
     time::Timestamp,
     trace::{
@@ -36,6 +38,7 @@ use crate::{
         cursor::{CursorList, Position},
         merge_batches,
         ord::fallback::pick_insert_destination,
+        sample_keys_from_batches,
         spine_async::{
             list_merger::ArcListMerger, push_merger::ArcPushMerger, snapshot::FetchList,
         },
@@ -68,7 +71,6 @@ use std::{
 use std::{collections::VecDeque, sync::atomic::Ordering};
 use std::{
     fmt::{self, Debug, Display, Formatter},
-    ops::DerefMut,
     sync::Condvar,
 };
 use std::{ops::RangeInclusive, sync::Mutex};
@@ -780,19 +782,25 @@ where
         let mut cache_stats = spine_stats.cache_stats;
         let mut storage_size = 0;
         let mut merging_size = 0;
-        let mut membership_filter_stats = FilterStats::default();
+        let mut membership_filter_stats = BTreeMap::<FilterKind, FilterStats>::new();
         let mut range_filter_stats = FilterStats::default();
-        let mut storage_records = 0;
+        let mut bloom_filter_records = 0;
         for (batch, merging) in batches {
             cache_stats += batch.cache_stats();
-            membership_filter_stats += batch.membership_filter_stats();
+            let kind = batch.membership_filter_kind();
+            if kind != FilterKind::None {
+                *membership_filter_stats.entry(kind).or_default() +=
+                    batch.membership_filter_stats();
+            }
+            if kind == FilterKind::Bloom {
+                bloom_filter_records += batch.key_count();
+            }
             range_filter_stats += batch.range_filter_stats();
             let on_storage = batch.location() == BatchLocation::Storage;
             if on_storage || merging {
                 let size = batch.approximate_byte_size();
                 if on_storage {
                     storage_size += size;
-                    storage_records += batch.key_count();
                 }
                 if merging {
                     merging_size += size;
@@ -800,9 +808,16 @@ where
             }
         }
 
-        if storage_records > 0 {
+        let bloom_filter_stats = membership_filter_stats
+            .remove(&FilterKind::Bloom)
+            .unwrap_or_default();
+        let roaring_filter_stats = membership_filter_stats
+            .remove(&FilterKind::Roaring)
+            .unwrap_or_default();
+
+        if bloom_filter_records > 0 {
             let bits_per_key =
-                membership_filter_stats.size_byte as f64 * 8.0 / storage_records as f64;
+                bloom_filter_stats.size_byte as f64 * 8.0 / bloom_filter_records as f64;
             let bits_per_key = bits_per_key as usize;
             meta.extend(metadata! {
                 BLOOM_FILTER_BITS_PER_KEY => MetaItem::Int(bits_per_key)
@@ -839,25 +854,48 @@ where
             MetricReading::new(
                 BLOOM_FILTER_SIZE_BYTES,
                 Vec::new(),
-                MetaItem::bytes(membership_filter_stats.size_byte),
+                MetaItem::bytes(bloom_filter_stats.size_byte),
             ),
             MetricReading::new(
                 BLOOM_FILTER_HITS_COUNT,
                 Vec::new(),
-                MetaItem::Count(membership_filter_stats.hits),
+                MetaItem::Count(bloom_filter_stats.hits),
             ),
             MetricReading::new(
                 BLOOM_FILTER_MISSES_COUNT,
                 Vec::new(),
-                MetaItem::Count(membership_filter_stats.misses),
+                MetaItem::Count(bloom_filter_stats.misses),
             ),
             MetricReading::new(
                 BLOOM_FILTER_HIT_RATE_PERCENT,
                 Vec::new(),
                 MetaItem::Percent {
-                    numerator: membership_filter_stats.hits as u64,
-                    denominator: membership_filter_stats.hits as u64
-                        + membership_filter_stats.misses as u64,
+                    numerator: bloom_filter_stats.hits as u64,
+                    denominator: bloom_filter_stats.hits as u64 + bloom_filter_stats.misses as u64,
+                },
+            ),
+            MetricReading::new(
+                ROARING_FILTER_SIZE_BYTES,
+                Vec::new(),
+                MetaItem::bytes(roaring_filter_stats.size_byte),
+            ),
+            MetricReading::new(
+                ROARING_FILTER_HITS_COUNT,
+                Vec::new(),
+                MetaItem::Count(roaring_filter_stats.hits),
+            ),
+            MetricReading::new(
+                ROARING_FILTER_MISSES_COUNT,
+                Vec::new(),
+                MetaItem::Count(roaring_filter_stats.misses),
+            ),
+            MetricReading::new(
+                ROARING_FILTER_HIT_RATE_PERCENT,
+                Vec::new(),
+                MetaItem::Percent {
+                    numerator: roaring_filter_stats.hits as u64,
+                    denominator: roaring_filter_stats.hits as u64
+                        + roaring_filter_stats.misses as u64,
                 },
             ),
             MetricReading::new(
@@ -1291,57 +1329,6 @@ where
     }
 }
 
-/// Samples `sample_size` keys from a set of batches.
-///
-/// See [`BatchReader::sample_keys`](`crate::trace::BatchReader::sample_keys`) for more details.
-pub(crate) fn sample_keys_from_batches<B, RG>(
-    factories: &B::Factories,
-    batches: &[Arc<B>],
-    rng: &mut RG,
-    sample_size: usize,
-    sample: &mut DynVec<B::Key>,
-) where
-    B: Batch,
-    B::Time: PartialEq<()>,
-    RG: Rng,
-{
-    let total_keys = batches.iter().map(|batch| batch.key_count()).sum::<usize>();
-
-    if sample_size == 0 || total_keys == 0 {
-        // Avoid division by zero.
-        return;
-    }
-
-    // Sample each batch, picking the number of keys proportional to
-    // batch size.
-    let mut intermediate = factories.keys_factory().default_box();
-    intermediate.reserve(sample_size);
-
-    for batch in batches {
-        batch.sample_keys(
-            rng,
-            ((batch.key_count() as u128) * (sample_size as u128) / (total_keys as u128)) as usize,
-            intermediate.as_mut(),
-        );
-    }
-
-    // Drop duplicate keys and keys that appear with 0 weight, i.e.,
-    // get canceled out across multiple batches.
-    intermediate.deref_mut().sort_unstable();
-    intermediate.dedup();
-
-    let mut cursor = SpineCursor::new_cursor(factories, batches.to_vec());
-    for key in intermediate.dyn_iter_mut() {
-        cursor.seek_key(key);
-        if let Some(current_key) = cursor.get_key()
-            && current_key == key
-        {
-            debug_assert!(cursor.val_valid() && !cursor.weight().is_zero());
-            sample.push_ref(key);
-        }
-    }
-}
-
 impl<B> BatchReader for Spine<B>
 where
     B: Batch,
@@ -1382,14 +1369,6 @@ where
             .sum()
     }
 
-    fn membership_filter_stats(&self) -> FilterStats {
-        self.merger
-            .get_batches()
-            .iter()
-            .map(|batch| batch.membership_filter_stats())
-            .sum()
-    }
-
     fn range_filter_stats(&self) -> FilterStats {
         self.merger
             .get_batches()
@@ -1404,14 +1383,23 @@ where
 
     fn sample_keys<RG>(&self, rng: &mut RG, sample_size: usize, sample: &mut DynVec<Self::Key>)
     where
-        Self::Time: PartialEq<()>,
         RG: Rng,
     {
+        let batches = self.merger.get_batches();
+        let total_keys = batches.iter().map(|batch| batch.key_count()).sum::<usize>();
+        let batch_refs: Vec<_> = batches.iter().map(Arc::as_ref).collect();
         sample_keys_from_batches(
             &self.factories,
-            &self.merger.get_batches(),
+            &batch_refs,
             rng,
-            sample_size,
+            |batch| {
+                if sample_size == 0 || total_keys == 0 {
+                    0
+                } else {
+                    ((batch.key_count() as u128) * (sample_size as u128) / (total_keys as u128))
+                        as usize
+                }
+            },
             sample,
         );
     }
diff --git a/crates/dbsp/src/trace/spine_async/snapshot.rs b/crates/dbsp/src/trace/spine_async/snapshot.rs
index 8f7e080779d..92c0ea12a52 100644
--- a/crates/dbsp/src/trace/spine_async/snapshot.rs
+++ b/crates/dbsp/src/trace/spine_async/snapshot.rs
@@ -13,10 +13,12 @@ use size_of::SizeOf;
 use super::SpineCursor;
 use crate::NumEntries;
 use crate::dynamic::{DynVec, Factory};
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::FilterStats;
 use crate::trace::cursor::{CursorFactory, CursorList};
-use crate::trace::spine_async::sample_keys_from_batches;
-use crate::trace::{Batch, BatchReader, BatchReaderFactories, Cursor, Spine, merge_batches};
+use crate::trace::{
+    Batch, BatchReader, BatchReaderFactories, Cursor, Spine, merge_batches,
+    sample_keys_from_batches,
+};
 
 pub trait WithSnapshot: Sized {
     type Batch: Batch;
@@ -228,27 +230,32 @@ where
             .fold(0, |acc, batch| acc + batch.approximate_byte_size())
     }
 
-    fn membership_filter_stats(&self) -> FilterStats {
-        self.batches
-            .iter()
-            .map(|b| b.membership_filter_stats())
-            .sum()
-    }
-
     fn range_filter_stats(&self) -> FilterStats {
         self.batches.iter().map(|b| b.range_filter_stats()).sum()
     }
 
     fn sample_keys<RG>(&self, rng: &mut RG, sample_size: usize, sample: &mut DynVec<Self::Key>)
     where
-        Self::Time: PartialEq<()>,
         RG: Rng,
     {
+        let total_keys = self
+            .batches
+            .iter()
+            .map(|batch| batch.key_count())
+            .sum::<usize>();
+        let batch_refs: Vec<_> = self.batches.iter().map(Arc::as_ref).collect();
         sample_keys_from_batches(
             &self.factories,
-            self.batches.as_slice(),
+            &batch_refs,
             rng,
-            sample_size,
+            |batch| {
+                if sample_size == 0 || total_keys == 0 {
+                    0
+                } else {
+                    ((batch.key_count() as u128) * (sample_size as u128) / (total_keys as u128))
+                        as usize
+                }
+            },
             sample,
         );
     }
diff --git a/crates/dbsp/src/trace/test.rs b/crates/dbsp/src/trace/test.rs
index b1849209bd9..94dc4a07068 100644
--- a/crates/dbsp/src/trace/test.rs
+++ b/crates/dbsp/src/trace/test.rs
@@ -12,14 +12,15 @@ use size_of::SizeOf;
 use crate::{
     DynZWeight, Runtime, ZWeight,
     algebra::{
-        IndexedZSet, NegByRef, OrdIndexedZSet, OrdIndexedZSetFactories, OrdZSet, OrdZSetFactories,
-        ZBatch, ZSet,
+        AddByRef, IndexedZSet, NegByRef, OrdIndexedZSet, OrdIndexedZSetFactories, OrdZSet,
+        OrdZSetFactories, ZBatch, ZSet,
     },
     circuit::{CircuitConfig, mkconfig},
     dynamic::{DowncastTrait, DynData, DynUnit, DynWeightedPairs, Erase, LeanVec, pair::DynPair},
+    storage::{buffer_cache::CacheStats, file::FilterKind},
     trace::{
-        Batch, BatchReader, BatchReaderFactories, Builder, FileIndexedWSetFactories,
-        FileWSetFactories, GroupFilter, Spine, Trace,
+        Batch, BatchLocation, BatchReader, BatchReaderFactories, Builder, FileIndexedWSetFactories,
+        FileWSetFactories, GroupFilter, ListMerger, Spine, Trace,
         cursor::{Cursor, CursorPair},
         ord::{
             FileIndexedWSet, FileKeyBatch, FileKeyBatchFactories, FileValBatch,
@@ -31,7 +32,7 @@ use crate::{
             assert_trace_eq, test_batch_sampling, test_trace_sampling,
         },
     },
-    utils::{Tup2, Tup3, Tup4},
+    utils::{Tup1, Tup2, Tup3, Tup4},
 };
 
 use super::Filter;
@@ -828,6 +829,13 @@ where
     F: FnOnce() + Clone + Send + 'static,
 {
     let (_temp_dir, config) = mkconfig();
+    run_in_circuit_with_storage_config(config, f);
+}
+
+fn run_in_circuit_with_storage_config<F>(config: CircuitConfig, f: F)
+where
+    F: FnOnce() + Clone + Send + 'static,
+{
     let count = Arc::new(AtomicUsize::new(0));
     Runtime::init_circuit(config, {
         let count = count.clone();
@@ -843,6 +851,145 @@ where
     assert_eq!(count.load(Ordering::Relaxed), 1);
 }
 
+fn total_cache_accesses(stats: CacheStats) -> u64 {
+    stats
+        .0
+        .iter()
+        .map(|(_, accesses)| accesses.iter().map(|(_, counts)| counts.count).sum::<u64>())
+        .sum()
+}
+
+fn build_file_wset_u32(keys: &[u32]) -> FileWSet<DynData, DynZWeight> {
+    let factories = <FileWSetFactories<DynData, DynZWeight>>::new::<u32, (), ZWeight>();
+    let mut builder =
+        <FileWSet<DynData, DynZWeight> as Batch>::Builder::with_capacity(&factories, keys.len(), 0);
+
+    for key in keys {
+        let weight: ZWeight = 1;
+        builder.push_time_diff(&(), weight.erase());
+        builder.push_key(key.erase());
+    }
+
+    builder.done()
+}
+
+fn build_file_wset_tup1_i32(keys: &[i32]) -> FileWSet<DynData, DynZWeight> {
+    let factories = <FileWSetFactories<DynData, DynZWeight>>::new::<Tup1<i32>, (), ZWeight>();
+    let mut builder =
+        <FileWSet<DynData, DynZWeight> as Batch>::Builder::with_capacity(&factories, keys.len(), 0);
+
+    for key in keys {
+        let weight: ZWeight = 1;
+        builder.push_time_diff(&(), weight.erase());
+        builder.push_key(Tup1(*key).erase());
+    }
+
+    builder.done()
+}
+
+fn build_fallback_wset_i32(keys: &[i32]) -> crate::trace::FallbackWSet<DynI32, DynZWeight> {
+    let factories =
+        <crate::trace::FallbackWSetFactories<DynI32, DynZWeight>>::new::<i32, (), ZWeight>();
+    let mut erased_tuples = zset_tuples(keys.iter().copied().map(|key| Tup2(key, 1)).collect());
+    crate::trace::FallbackWSet::<DynI32, DynZWeight>::dyn_from_tuples(
+        &factories,
+        (),
+        &mut erased_tuples,
+    )
+}
+
+#[test]
+fn test_file_wset_roaring_u32_seek_key_exact_skips_absent_reads() {
+    let (_temp_dir, mut config) = mkconfig();
+    config.dev_tweaks.enable_roaring = Some(true);
+
+    run_in_circuit_with_storage_config(config, move || {
+        let batch = build_file_wset_u32(&[1, 3, 7]);
+        let mut cursor = batch.cursor();
+        let before = total_cache_accesses(batch.cache_stats());
+
+        let missing = 2u32;
+        assert!(!cursor.seek_key_exact(missing.erase(), None));
+        assert_eq!(total_cache_accesses(batch.cache_stats()), before);
+
+        let present = 3u32;
+        assert!(cursor.seek_key_exact(present.erase(), None));
+    });
+}
+
+#[test]
+fn test_file_wset_tup1_i32_roaring_seek_key_exact_skips_absent_reads() {
+    let (_temp_dir, mut config) = mkconfig();
+    config.dev_tweaks.enable_roaring = Some(true);
+
+    run_in_circuit_with_storage_config(config, move || {
+        let batch = build_file_wset_tup1_i32(&[-7, 1, 3]);
+        let mut cursor = batch.cursor();
+        let before = total_cache_accesses(batch.cache_stats());
+
+        let missing = Tup1(2i32);
+        assert!(!cursor.seek_key_exact(missing.erase(), None));
+        assert_eq!(total_cache_accesses(batch.cache_stats()), before);
+
+        let present = Tup1(3i32);
+        assert!(cursor.seek_key_exact(present.erase(), None));
+    });
+}
+
+#[test]
+fn test_file_wset_roaring_filter_rebuilt_after_merge() {
+    let (_temp_dir, mut config) = mkconfig();
+    config.dev_tweaks.enable_roaring = Some(true);
+
+    run_in_circuit_with_storage_config(config, move || {
+        let lhs = build_file_wset_u32(&[1, 5]);
+        let rhs = build_file_wset_u32(&[3, 7]);
+        let merged = lhs.add_by_ref(&rhs);
+
+        let mut cursor = merged.cursor();
+        let before = total_cache_accesses(merged.cache_stats());
+
+        let missing = 4u32;
+        assert!(!cursor.seek_key_exact(missing.erase(), None));
+        assert_eq!(total_cache_accesses(merged.cache_stats()), before);
+
+        let present = 7u32;
+        assert!(cursor.seek_key_exact(present.erase(), None));
+    });
+}
+
+#[test]
+fn test_fallback_wset_roaring_filter_rebuilt_after_storage_merge() {
+    let (_temp_dir, mut config) = mkconfig();
+    config.dev_tweaks.enable_roaring = Some(true);
+    config.storage.as_mut().unwrap().options.min_storage_bytes = Some(0);
+
+    run_in_circuit_with_storage_config(config, move || {
+        let lhs = build_fallback_wset_i32(&[1, 5]);
+        let rhs = build_fallback_wset_i32(&[3, 7]);
+        let factories =
+            <crate::trace::FallbackWSetFactories<DynI32, DynZWeight>>::new::<i32, (), ZWeight>();
+        let merged: crate::trace::FallbackWSet<DynI32, DynZWeight> = ListMerger::merge(
+            &factories,
+            <crate::trace::FallbackWSet<DynI32, DynZWeight> as Batch>::Builder::for_merge(
+                &factories,
+                [&lhs, &rhs],
+                Some(BatchLocation::Storage),
+            ),
+            vec![lhs.merge_cursor(None, None), rhs.merge_cursor(None, None)],
+        );
+
+        assert_eq!(merged.membership_filter_kind(), FilterKind::Roaring);
+
+        let mut cursor = merged.cursor();
+        let before = total_cache_accesses(merged.cache_stats());
+
+        let missing = 4i32;
+        assert!(!cursor.seek_key_exact(missing.erase(), None));
+        assert_eq!(total_cache_accesses(merged.cache_stats()), before);
+    });
+}
+
 proptest! {
     #![proptest_config(ProptestConfig::with_cases(1000))]
 
diff --git a/crates/dbsp/src/trace/test/test_batch.rs b/crates/dbsp/src/trace/test/test_batch.rs
index 99d64417552..34bc1728ca8 100644
--- a/crates/dbsp/src/trace/test/test_batch.rs
+++ b/crates/dbsp/src/trace/test/test_batch.rs
@@ -3,7 +3,7 @@
 //! So far, only methods/traits used in tests have been implemented.
 #![allow(clippy::type_complexity)]
 
-use crate::storage::filter_stats::FilterStats;
+use crate::storage::file::FilterStats;
 use crate::{
     DBData, DBWeight, NumEntries, Timestamp,
     dynamic::{
diff --git a/crates/dbsp/src/utils.rs b/crates/dbsp/src/utils.rs
index 6849a75f3b2..a8528842407 100644
--- a/crates/dbsp/src/utils.rs
+++ b/crates/dbsp/src/utils.rs
@@ -6,6 +6,7 @@ mod consolidation;
 mod graph;
 mod is_none;
 mod sort;
+mod supports_roaring;
 pub mod tuple;
 
 #[cfg(test)]
@@ -31,6 +32,7 @@ pub use consolidation::{
 pub use graph::components;
 
 pub use is_none::IsNone;
+pub use supports_roaring::SupportsRoaring;
 
 #[allow(unused_imports)]
 pub use dot::{DotEdgeAttributes, DotNodeAttributes};
diff --git a/crates/dbsp/src/utils/supports_roaring.rs b/crates/dbsp/src/utils/supports_roaring.rs
new file mode 100644
index 00000000000..0fa1388b807
--- /dev/null
+++ b/crates/dbsp/src/utils/supports_roaring.rs
@@ -0,0 +1,262 @@
+//! Trait for key types that can be mapped into a roaring bitmap domain.
+
+use crate::dynamic::{BSet, DowncastTrait, DynData, LeanVec};
+use crate::time::UnitTimestamp;
+use std::collections::BTreeMap;
+use std::rc::Rc;
+use std::sync::Arc;
+use uuid::Uuid;
+
+pub trait SupportsRoaring {
+    #[inline]
+    fn supports_roaring32(&self) -> bool {
+        false
+    }
+
+    #[inline]
+    fn roaring_u32_offset(&self, _min: &Self) -> Option<u32>
+    where
+        Self: Sized,
+    {
+        None
+    }
+
+    #[inline]
+    fn into_roaring_u32(&self, _min: &DynData) -> Option<u32> {
+        None
+    }
+
+    #[inline]
+    fn into_roaring_u32_checked(&self, min: &DynData) -> u32 {
+        self.into_roaring_u32(min)
+            .expect("roaring-u32 filter was selected for a key outside the planned batch range")
+    }
+}
+
+#[macro_export]
+macro_rules! never_roaring_filter {
+    ($($ty:ty),* $(,)?) => {
+        $(
+            impl $crate::utils::SupportsRoaring for $ty {}
+        )*
+    };
+}
+
+never_roaring_filter!(
+    (),
+    bool,
+    char,
+    i8,
+    i16,
+    i128,
+    u8,
+    u16,
+    u128,
+    f32,
+    f64,
+    usize,
+    isize,
+    String,
+    UnitTimestamp,
+    Uuid
+);
+
+impl SupportsRoaring for u32 {
+    #[inline]
+    fn supports_roaring32(&self) -> bool {
+        true
+    }
+
+    #[inline]
+    fn roaring_u32_offset(&self, min: &Self) -> Option<u32> {
+        self.checked_sub(*min)
+    }
+
+    #[inline]
+    fn into_roaring_u32(&self, min: &DynData) -> Option<u32> {
+        self.roaring_u32_offset(min.downcast_checked::<u32>())
+    }
+}
+
+impl SupportsRoaring for i32 {
+    #[inline]
+    fn supports_roaring32(&self) -> bool {
+        true
+    }
+
+    #[inline]
+    fn roaring_u32_offset(&self, min: &Self) -> Option<u32> {
+        let diff = i64::from(*self) - i64::from(*min);
+        (0..=i64::from(u32::MAX))
+            .contains(&diff)
+            .then_some(diff as u32)
+    }
+
+    #[inline]
+    fn into_roaring_u32(&self, min: &DynData) -> Option<u32> {
+        self.roaring_u32_offset(min.downcast_checked::<i32>())
+    }
+}
+
+impl SupportsRoaring for u64 {
+    #[inline]
+    fn supports_roaring32(&self) -> bool {
+        true
+    }
+
+    #[inline]
+    fn roaring_u32_offset(&self, min: &Self) -> Option<u32> {
+        self.checked_sub(*min)
+            .filter(|diff| *diff <= u64::from(u32::MAX))
+            .map(|diff| diff as u32)
+    }
+
+    #[inline]
+    fn into_roaring_u32(&self, min: &DynData) -> Option<u32> {
+        self.roaring_u32_offset(min.downcast_checked::<u64>())
+    }
+}
+
+impl SupportsRoaring for i64 {
+    #[inline]
+    fn supports_roaring32(&self) -> bool {
+        true
+    }
+
+    #[inline]
+    fn roaring_u32_offset(&self, min: &Self) -> Option<u32> {
+        let diff = i128::from(*self) - i128::from(*min);
+        (0..=i128::from(u32::MAX))
+            .contains(&diff)
+            .then_some(diff as u32)
+    }
+
+    #[inline]
+    fn into_roaring_u32(&self, min: &DynData) -> Option<u32> {
+        self.roaring_u32_offset(min.downcast_checked::<i64>())
+    }
+}
+
+impl<T> SupportsRoaring for Option<T> {}
+
+#[macro_export]
+macro_rules! never_roaring_filter_1 {
+    ($($wrapper:ident),* $(,)?) => {
+        $(
+            impl<T> $crate::utils::SupportsRoaring for $wrapper<T> {}
+        )*
+    };
+}
+
+never_roaring_filter_1!(Vec, LeanVec, BSet);
+
+#[macro_export]
+macro_rules! delegate_supports_roaring {
+    ($($wrapper:ident),* $(,)?) => {
+        $(
+            impl<T: $crate::utils::SupportsRoaring> $crate::utils::SupportsRoaring for $wrapper<T> {
+                #[inline]
+                fn supports_roaring32(&self) -> bool {
+                    self.as_ref().supports_roaring32()
+                }
+
+                #[inline]
+                fn roaring_u32_offset(&self, min: &Self) -> Option<u32> {
+                    self.as_ref().roaring_u32_offset(min.as_ref())
+                }
+
+                #[inline]
+                fn into_roaring_u32(&self, min: &$crate::dynamic::DynData) -> Option<u32> {
+                    self.as_ref().into_roaring_u32(min)
+                }
+
+                #[inline]
+                fn into_roaring_u32_checked(&self, min: &$crate::dynamic::DynData) -> u32 {
+                    self.as_ref().into_roaring_u32_checked(min)
+                }
+            }
+        )*
+    };
+}
+
+delegate_supports_roaring!(Box, Rc, Arc);
+
+#[macro_export]
+macro_rules! never_roaring_filter_tuples {
+    ($($name:ident),+) => {
+        impl<$($name),+> SupportsRoaring for ($($name,)+) {}
+    };
+}
+
+never_roaring_filter_tuples!(A);
+never_roaring_filter_tuples!(A, B);
+never_roaring_filter_tuples!(A, B, C);
+never_roaring_filter_tuples!(A, B, C, D);
+never_roaring_filter_tuples!(A, B, C, D, E);
+never_roaring_filter_tuples!(A, B, C, D, E, F);
+
+impl<K, V> SupportsRoaring for BTreeMap<K, V> {}
+
+impl<T: SupportsRoaring + 'static> SupportsRoaring for crate::utils::Tup1<T> {
+    #[inline]
+    fn supports_roaring32(&self) -> bool {
+        self.0.supports_roaring32()
+    }
+
+    #[inline]
+    fn roaring_u32_offset(&self, min: &Self) -> Option<u32> {
+        self.0.roaring_u32_offset(&min.0)
+    }
+
+    #[inline]
+    fn into_roaring_u32(&self, min: &DynData) -> Option<u32> {
+        self.roaring_u32_offset(min.downcast_checked::<Self>())
+    }
+
+    #[inline]
+    fn into_roaring_u32_checked(&self, min: &DynData) -> u32 {
+        self.roaring_u32_offset(min.downcast_checked::<Self>())
+            .expect("roaring-u32 filter was selected for a key outside the planned batch range")
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::SupportsRoaring;
+    use crate::{dynamic::DynData, utils::Tup1};
+
+    #[test]
+    fn supported_roaring_keys() {
+        assert!(7u32.supports_roaring32());
+        assert_eq!(7u32.into_roaring_u32((&0u32) as &DynData), Some(7));
+
+        assert!((-7i32).supports_roaring32());
+        assert_eq!((-7i32).into_roaring_u32((&-10i32) as &DynData), Some(3));
+
+        assert!(Tup1(-7i32).supports_roaring32());
+        assert_eq!(
+            Tup1(-7i32).into_roaring_u32((&Tup1(-10i32)) as &DynData),
+            Some(3)
+        );
+
+        assert!(11u64.supports_roaring32());
+        assert_eq!(11u64.into_roaring_u32((&9u64) as &DynData), Some(2));
+
+        assert!((-2i64).supports_roaring32());
+        assert_eq!((-2i64).into_roaring_u32((&-5i64) as &DynData), Some(3));
+    }
+
+    #[test]
+    fn unsupported_roaring_keys() {
+        assert!(!"feldera".to_string().supports_roaring32());
+        assert_eq!(
+            "feldera"
+                .to_string()
+                .into_roaring_u32((&String::new()) as &DynData),
+            None
+        );
+
+        assert_eq!(11u64.into_roaring_u32((&(u64::MAX - 1)) as &DynData), None);
+        assert_eq!(5i64.into_roaring_u32((&10i64) as &DynData), None);
+    }
+}
diff --git a/crates/feldera-macros/src/lib.rs b/crates/feldera-macros/src/lib.rs
index 0461482f405..c4b3d9086bd 100644
--- a/crates/feldera-macros/src/lib.rs
+++ b/crates/feldera-macros/src/lib.rs
@@ -1,4 +1,4 @@
-//! Procedural macros for Feldera tuple types and `IsNone`.
+//! Procedural macros for Feldera tuple types and utility traits.
 //!
 //! The `declare_tuple!` macro decides which layout to use based on tuple size
 //! and the active storage format rules.
@@ -51,6 +51,8 @@ pub fn derive_not_none(item: TokenStream) -> TokenStream {
                 inner
             }
         }
+
+        impl #impl_generics ::dbsp::utils::SupportsRoaring for #ident #ty_generics #where_clause {}
     };
 
     TokenStream::from(expanded)
diff --git a/crates/feldera-macros/src/tuples.rs b/crates/feldera-macros/src/tuples.rs
index 6de257791cb..256f7b5e7a9 100644
--- a/crates/feldera-macros/src/tuples.rs
+++ b/crates/feldera-macros/src/tuples.rs
@@ -247,6 +247,14 @@ pub(super) fn declare_tuple_impl(tuple: TupleDef) -> TokenStream2 {
         }
     };
 
+    let roaring_u32_key_impl = if num_elements == 1 {
+        quote! {}
+    } else {
+        quote! {
+            impl<#(#generics),*> ::dbsp::utils::SupportsRoaring for #name<#(#generics),*> {}
+        }
+    };
+
     let sparse_get_methods = fields
         .iter()
         .enumerate()
@@ -969,6 +977,7 @@ pub(super) fn declare_tuple_impl(tuple: TupleDef) -> TokenStream2 {
         #copy_impl
         #checkpoint_impl
         #not_an_option
+        #roaring_u32_key_impl
     });
 
     expanded
diff --git a/crates/feldera-types/src/config/dev_tweaks.rs b/crates/feldera-types/src/config/dev_tweaks.rs
index 057daa4c3cb..91679b15cb2 100644
--- a/crates/feldera-types/src/config/dev_tweaks.rs
+++ b/crates/feldera-types/src/config/dev_tweaks.rs
@@ -170,6 +170,11 @@ pub struct DevTweaks {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub bloom_false_positive_rate: Option<f64>,
 
+    /// Whether file-backed batches may use roaring membership filters when the
+    /// key type supports them.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub enable_roaring: Option<bool>,
+
     /// Maximum batch size in records for level 0 merges.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub max_level0_batch_size_records: Option<u16>,
@@ -240,6 +245,9 @@ impl DevTweaks {
     pub fn bloom_false_positive_rate(&self) -> f64 {
         self.bloom_false_positive_rate.unwrap_or(0.0001)
     }
+    pub fn enable_roaring(&self) -> bool {
+        self.enable_roaring.unwrap_or(true)
+    }
     pub fn negative_weight_multiplier(&self) -> u16 {
         self.negative_weight_multiplier.unwrap_or(0)
     }
diff --git a/crates/fxp/src/dbsp_impl.rs b/crates/fxp/src/dbsp_impl.rs
index 37f8ec9160c..d7e999435c6 100644
--- a/crates/fxp/src/dbsp_impl.rs
+++ b/crates/fxp/src/dbsp_impl.rs
@@ -1,6 +1,6 @@
 use dbsp::NumEntries;
 use dbsp::algebra::{HasOne, HasZero, MulByRef, OptionWeightType};
-use dbsp::utils::IsNone;
+use dbsp::utils::{IsNone, SupportsRoaring};
 use feldera_types::serde_with_context::{
     DeserializeWithContext, SerializeWithContext, SqlSerdeConfig, serde_config::DecimalFormat,
 };
@@ -38,6 +38,8 @@ impl<const P: usize, const S: usize> IsNone for Fixed<P, S> {
     }
 }
 
+impl<const P: usize, const S: usize> SupportsRoaring for Fixed<P, S> {}
+
 impl<const P: usize, const S: usize> OptionWeightType for Fixed<P, S> {}
 impl<const P: usize, const S: usize> OptionWeightType for &Fixed<P, S> {}
 
diff --git a/crates/nexmark/src/queries/q9.rs b/crates/nexmark/src/queries/q9.rs
index 9aa8b12abfc..2e3040c1d22 100644
--- a/crates/nexmark/src/queries/q9.rs
+++ b/crates/nexmark/src/queries/q9.rs
@@ -43,6 +43,7 @@ pub struct Q9Output(
 );
 
 dbsp::never_none!(Q9Output);
+dbsp::never_roaring_filter!(Q9Output);
 
 type Q9Stream = Stream<RootCircuit, OrdZSet<Q9Output>>;
 
diff --git a/crates/sqllib/tests/tuple_proptest.rs b/crates/sqllib/tests/tuple_proptest.rs
index a42fa01dc2e..cb206c97d95 100644
--- a/crates/sqllib/tests/tuple_proptest.rs
+++ b/crates/sqllib/tests/tuple_proptest.rs
@@ -8,6 +8,7 @@ use dbsp::storage::backend::memory_impl::MemoryBackend;
 use dbsp::storage::buffer_cache::BufferCache;
 use dbsp::storage::file::Factories;
 use dbsp::storage::file::{
+    FilterPlan,
     format::BatchMetadata,
     writer::{Parameters, Writer1},
 };
@@ -308,8 +309,14 @@ where
     let backend = MemoryBackend::new();
     let factories = Factories::<DynData, DynData>::new::<T, ()>();
     let parameters = Parameters::default();
-    let mut writer = Writer1::new(&factories, buffer_cache, &backend, parameters, values.len())
-        .map_err(|err| TestCaseError::fail(format!("writer init failed: {err:?}")))?;
+    let mut writer = Writer1::new(
+        &factories,
+        buffer_cache,
+        &backend,
+        parameters,
+        FilterPlan::<DynData>::decide_filter(None, values.len()),
+    )
+    .map_err(|err| TestCaseError::fail(format!("writer init failed: {err:?}")))?;
 
     let aux = ();
     for value in &values {
diff --git a/crates/storage-test-compat/golden-files/golden-batch-v6-large.feldera b/crates/storage-test-compat/golden-files/golden-batch-v6-large.feldera
new file mode 100644
index 00000000000..c97031e2785
Binary files /dev/null and b/crates/storage-test-compat/golden-files/golden-batch-v6-large.feldera differ
diff --git a/crates/storage-test-compat/golden-files/golden-batch-v6-small.feldera b/crates/storage-test-compat/golden-files/golden-batch-v6-small.feldera
new file mode 100644
index 00000000000..849285b51a3
Binary files /dev/null and b/crates/storage-test-compat/golden-files/golden-batch-v6-small.feldera differ
diff --git a/crates/storage-test-compat/golden-files/golden-batch-v6-snappy-large.feldera b/crates/storage-test-compat/golden-files/golden-batch-v6-snappy-large.feldera
new file mode 100644
index 00000000000..62a8c83bfef
Binary files /dev/null and b/crates/storage-test-compat/golden-files/golden-batch-v6-snappy-large.feldera differ
diff --git a/crates/storage-test-compat/golden-files/golden-batch-v6-snappy-small.feldera b/crates/storage-test-compat/golden-files/golden-batch-v6-snappy-small.feldera
new file mode 100644
index 00000000000..5e2a8d1349a
Binary files /dev/null and b/crates/storage-test-compat/golden-files/golden-batch-v6-snappy-small.feldera differ
diff --git a/crates/storage-test-compat/src/bin/golden-writer.rs b/crates/storage-test-compat/src/bin/golden-writer.rs
index ba8f2a2a8d5..41f6f2cc81e 100644
--- a/crates/storage-test-compat/src/bin/golden-writer.rs
+++ b/crates/storage-test-compat/src/bin/golden-writer.rs
@@ -13,7 +13,7 @@ use dbsp::storage::file::format::BatchMetadata;
 use dbsp::storage::file::format::Compression;
 use dbsp::storage::file::format::VERSION_NUMBER;
 use dbsp::storage::file::writer::{Parameters, Writer1};
-use dbsp::storage::file::Factories;
+use dbsp::storage::file::{Factories, FilterPlan};
 use feldera_types::config::{StorageConfig, StorageOptions};
 
 use storage_test_compat::{
@@ -102,7 +102,7 @@ where
         buffer_cache,
         &*storage_backend,
         parameters,
-        rows,
+        FilterPlan::<DynData>::decide_filter(None, rows),
     )?;
 
     for row in 0..rows {
@@ -112,7 +112,7 @@ where
     }
 
     let tmp_path = writer.path().clone();
-    let (_file_handle, _bloom_filter, _key_bounds) = writer.close(BatchMetadata::default())?;
+    let (_file_handle, _key_filter, _key_bounds) = writer.close(BatchMetadata::default())?;
     let content = storage_backend.read(&tmp_path)?;
     storage_backend.write(&output_storage_path, (*content).clone())?;
     storage_backend.delete(&tmp_path)?;
diff --git a/crates/storage/src/error.rs b/crates/storage/src/error.rs
index f45874795fe..b4e3dab6070 100644
--- a/crates/storage/src/error.rs
+++ b/crates/storage/src/error.rs
@@ -37,10 +37,14 @@ pub enum StorageError {
     /// Cannot perform operation because storage is not enabled.
     #[error("Cannot perform operation because storage is not enabled.")]
     StorageDisabled,
-    /// Error while creating a bloom filter.
-    #[error("Failed to serialize/deserialize bloom filter.")]
+    /// Error while creating a batch key filter.
+    #[error("Failed to serialize/deserialize batch key filter.")]
     BloomFilter,
 
+    /// Error while serializing a roaring bitmap batch key filter.
+    #[error("Failed to serialize roaring bitmap batch key filter.")]
+    RoaringBitmapFilter,
+
     /// Path is not valid in storage.
     ///
     /// Storage paths may not be absolute, may not start with a drive letter (on
@@ -147,7 +151,7 @@ impl StorageError {
             StorageError::NoPersistentId(_) => ErrorKind::Other,
             StorageError::CheckpointNotFound(_) => ErrorKind::NotFound,
             StorageError::StorageDisabled => ErrorKind::Other,
-            StorageError::BloomFilter => ErrorKind::Other,
+            StorageError::BloomFilter | StorageError::RoaringBitmapFilter => ErrorKind::Other,
             StorageError::InvalidPath(_) => ErrorKind::Other,
             StorageError::InvalidURL(_) => ErrorKind::Other,
             StorageError::ObjectStore { kind, .. } => *kind,
diff --git a/scripts/plot_filter_bitmap.py b/scripts/plot_filter_bitmap.py
new file mode 100644
index 00000000000..684dc31198d
--- /dev/null
+++ b/scripts/plot_filter_bitmap.py
@@ -0,0 +1,855 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#   "pandas>=2.2",
+#   "plotly>=5.24",
+#   "kaleido>=0.2.1",
+# ]
+# ///
+
+from __future__ import annotations
+
+import argparse
+import math
+from pathlib import Path
+
+import pandas as pd
+import plotly.graph_objects as go
+import plotly.io as pio
+from plotly.subplots import make_subplots
+
+
+KEY_TYPE_ORDER = ["u32", "u64"]
+KEY_SPACE_ORDER = ["consecutive", "full_range", "half_normal"]
+STRUCTURE_ORDER = ["bloom", "roaring"]
+METRICS = [
+    (
+        "insert_ns_per_element_avg",
+        "insert_ns_per_element_min",
+        "insert_ns_per_element_max",
+        "Insert Time",
+        "Insert Time (ns/element)",
+        "ns",
+    ),
+    (
+        "lookup_ns_per_element_avg",
+        "lookup_ns_per_element_min",
+        "lookup_ns_per_element_max",
+        "Lookup Time",
+        "Lookup Time (ns/element)",
+        "ns",
+    ),
+    ("bytes_used", None, None, "Memory Usage", "Memory Usage (bytes)", "bytes"),
+]
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Plot filter_bitmap.csv comparisons for bloom vs roaring."
+    )
+    parser.add_argument(
+        "--input",
+        type=Path,
+        default=Path("crates/dbsp/filter_bitmap.csv"),
+        help="Input CSV produced by crates/dbsp/benches/filter_bitmap.rs",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path("filter_bitmap_plots"),
+        help="Directory to write plots into",
+    )
+    parser.add_argument(
+        "--write-png",
+        action="store_true",
+        help="Also export PNG images with Kaleido. Requires a working non-snap Chrome/Chromium.",
+    )
+    return parser.parse_args()
+
+
+def format_structure(name: str) -> str:
+    return {
+        "bloom": "fastbloom",
+        "roaring": "roaring",
+    }.get(name, name)
+
+
+def format_key_type(name: str) -> str:
+    return {
+        "u32": "u32 Keys",
+        "u64": "u64 Keys",
+    }.get(name, name)
+
+
+def format_key_space(name: str, key_type: str) -> str:
+    if name == "consecutive":
+        return "K={0..N}"
+    if name == "full_range":
+        max_label = "2^32" if key_type == "u32" else "2^64"
+        return f"K={{0..{max_label}}}"
+    if name == "half_normal":
+        return "Half-normal K={0..2^32}"
+    return name
+
+
+def format_distribution_key_space(name: str) -> str:
+    if name == "half_normal":
+        return "Half-normal K={0..2^32}"
+    return name.replace("_", " ").title()
+
+
+def format_num_elements(value: int) -> str:
+    return f"{value:,}"
+
+
+def format_key_eps(value: float) -> str:
+    return f"{value:g}"
+
+
+def format_bytes(value: float) -> str:
+    units = ["B", "KiB", "MiB", "GiB", "TiB"]
+    unit_index = 0
+    while value >= 1024.0 and unit_index + 1 < len(units):
+        value /= 1024.0
+        unit_index += 1
+    return f"{value:.2f} {units[unit_index]}"
+
+
+def format_ns_per_element(value: float) -> str:
+    return f"{value:.2f} ns"
+
+
+def format_ratio(value: float) -> str:
+    return f"{value:.2f}x"
+
+
+def metric_formatter(kind: str):
+    if kind == "bytes":
+        return format_bytes
+    return format_ns_per_element
+
+
+def ordered_values(values: pd.Series, preferred_order: list[str]) -> list[str]:
+    present = {str(value) for value in values.dropna().unique()}
+    ordered = [value for value in preferred_order if value in present]
+    extras = sorted(present - set(preferred_order))
+    return ordered + extras
+
+
+def prepare_frame(frame: pd.DataFrame) -> pd.DataFrame:
+    frame = frame.copy()
+
+    if "key_type" not in frame.columns:
+        frame["key_type"] = "u32"
+    if "key_space" not in frame.columns:
+        frame["key_space"] = "consecutive"
+    if "key_eps" not in frame.columns:
+        frame["key_eps"] = pd.NA
+
+    numeric_columns = [
+        "key_eps",
+        "num_elements",
+        "lookup_count",
+        "false_positive_lookup_count",
+        "repetitions",
+        "insert_seed",
+        "lookup_seed",
+        "key_space_seed",
+        "bloom_false_positive_rate_target_percent",
+        "bloom_seed",
+        "bloom_expected_items",
+        "bytes_used",
+        "bytes_per_element",
+        "bits_per_element",
+        "insert_ns_per_element_min",
+        "insert_ns_per_element_avg",
+        "insert_ns_per_element_max",
+        "insert_ns_per_element_stddev",
+        "lookup_ns_per_element_min",
+        "lookup_ns_per_element_avg",
+        "lookup_ns_per_element_max",
+        "lookup_ns_per_element_stddev",
+        "false_positive_rate_percent_min",
+        "false_positive_rate_percent_avg",
+        "false_positive_rate_percent_max",
+        "false_positive_rate_percent_stddev",
+    ]
+    for column in numeric_columns:
+        if column in frame.columns:
+            frame[column] = pd.to_numeric(frame[column], errors="coerce")
+
+    group_columns = ["structure", "key_type", "key_space", "key_eps", "num_elements"]
+    agg_spec: dict[str, str] = {}
+    for column in frame.columns:
+        if column in group_columns:
+            continue
+        if not pd.api.types.is_numeric_dtype(frame[column]):
+            agg_spec[column] = "first"
+        elif column.endswith("_min"):
+            agg_spec[column] = "min"
+        elif column.endswith("_max"):
+            agg_spec[column] = "max"
+        elif column.endswith("_avg") or column.endswith("_stddev"):
+            agg_spec[column] = "mean"
+        elif column in {
+            "bytes_used",
+            "bytes_per_element",
+            "bits_per_element",
+            "bloom_false_positive_rate_target_percent",
+        }:
+            agg_spec[column] = "mean"
+        else:
+            agg_spec[column] = "first"
+
+    frame = frame.groupby(group_columns, as_index=False, dropna=False).agg(agg_spec)
+    return frame.sort_values(group_columns)
+
+
+def build_category_order(
+    frame: pd.DataFrame,
+    key_spaces: list[str],
+) -> list[tuple[int, str]]:
+    ordered_sizes = sorted(int(value) for value in frame["num_elements"].unique())
+    categories: list[tuple[int, str]] = []
+    for size in ordered_sizes:
+        for key_space in key_spaces:
+            if ((frame["num_elements"] == size) & (frame["key_space"] == key_space)).any():
+                categories.append((size, key_space))
+    return categories
+
+
+def category_axis(categories: list[tuple[int, str]], key_type: str) -> list[list[str]]:
+    return [
+        [format_num_elements(size) for size, _ in categories],
+        [format_key_space(key_space, key_type) for _, key_space in categories],
+    ]
+
+
+def build_metric_figure(
+    frame: pd.DataFrame,
+    y_column: str,
+    y_min_column: str | None,
+    y_max_column: str | None,
+    y_label: str,
+    title: str,
+    formatter,
+) -> go.Figure:
+    key_types = ordered_values(frame["key_type"], KEY_TYPE_ORDER)
+    key_spaces = ordered_values(frame["key_space"], KEY_SPACE_ORDER)
+    colors = {
+        "bloom": "#0f766e",
+        "roaring": "#c2410c",
+    }
+
+    fig = make_subplots(
+        rows=max(1, len(key_types)),
+        cols=1,
+        shared_xaxes=False,
+        vertical_spacing=0.18,
+        row_titles=[format_key_type(key_type) for key_type in key_types],
+    )
+
+    for row_index, key_type in enumerate(key_types, start=1):
+        row_frame = frame[frame["key_type"] == key_type]
+        categories = build_category_order(row_frame, key_spaces)
+        x_axis = category_axis(categories, key_type)
+
+        for structure in STRUCTURE_ORDER:
+            structure_frame = (
+                row_frame[row_frame["structure"] == structure]
+                .set_index(["num_elements", "key_space"])
+                .sort_index()
+            )
+            if structure_frame.empty:
+                continue
+
+            y_values = []
+            text_values = []
+            error_plus = []
+            error_minus = []
+            for category in categories:
+                if category in structure_frame.index:
+                    value = float(structure_frame.loc[category, y_column])
+                    y_values.append(value)
+                    text_values.append(formatter(value))
+                    if y_min_column is not None and y_max_column is not None:
+                        min_value = float(structure_frame.loc[category, y_min_column])
+                        max_value = float(structure_frame.loc[category, y_max_column])
+                        error_minus.append(max(0.0, value - min_value))
+                        error_plus.append(max(0.0, max_value - value))
+                    else:
+                        error_minus.append(None)
+                        error_plus.append(None)
+                else:
+                    y_values.append(None)
+                    text_values.append("")
+                    error_minus.append(None)
+                    error_plus.append(None)
+
+            fig.add_trace(
+                go.Bar(
+                    name=format_structure(structure),
+                    x=x_axis,
+                    y=y_values,
+                    text=text_values,
+                    textposition="outside",
+                    cliponaxis=False,
+                    marker_color=colors[structure],
+                    showlegend=row_index == 1,
+                    offsetgroup=structure,
+                    legendgroup=structure,
+                    error_y=(
+                        dict(
+                            type="data",
+                            symmetric=False,
+                            array=error_plus,
+                            arrayminus=error_minus,
+                            thickness=1.2,
+                            width=3,
+                            color="#334155",
+                        )
+                        if y_min_column is not None and y_max_column is not None
+                        else None
+                    ),
+                ),
+                row=row_index,
+                col=1,
+            )
+
+        fig.update_xaxes(title_text="Input Size / Key Space", row=row_index, col=1)
+        fig.update_yaxes(title_text=y_label, type="log", row=row_index, col=1)
+
+    fig.update_layout(
+        title=title,
+        barmode="group",
+        template="plotly_white",
+        width=max(1100, 160 * max(1, len(build_category_order(frame, key_spaces)))),
+        height=500 * max(1, len(key_types)),
+        legend_title_text="Structure",
+        margin=dict(t=90, r=30, b=80, l=80),
+    )
+    return fig
+
+
+def relative_frame(frame: pd.DataFrame, metric: str) -> pd.DataFrame:
+    if frame.empty:
+        return pd.DataFrame()
+
+    pivot = (
+        frame.pivot_table(
+            index=["key_type", "key_space", "key_eps", "num_elements"],
+            columns="structure",
+            values=metric,
+            aggfunc="first",
+        )
+        .rename(columns={"bloom": "bloom_value", "roaring": "roaring_value"})
+        .reset_index()
+    )
+    if pivot.empty or {"bloom_value", "roaring_value"} - set(pivot.columns):
+        return pd.DataFrame()
+
+    pivot = pivot.dropna(subset=["bloom_value", "roaring_value"]).copy()
+    if pivot.empty:
+        return pivot
+
+    pivot["relative_factor"] = pivot["bloom_value"] / pivot["roaring_value"]
+    pivot["log2_relative_factor"] = pivot["relative_factor"].map(math.log2)
+    return pivot
+
+
+def heatmap_tick_values(z_bound: float) -> list[float]:
+    step = 0.5 if z_bound <= 2.0 else 1.0
+    tick_count = int(round((2 * z_bound) / step))
+    return [(-z_bound + step * index) for index in range(tick_count + 1)]
+
+
+def build_relative_heatmap_figure(
+    frame: pd.DataFrame,
+    metric: str,
+    title: str,
+    value_formatter,
+    colorbar_title: str,
+) -> go.Figure | None:
+    ratio_frame = relative_frame(frame, metric)
+    if ratio_frame.empty:
+        return None
+
+    key_types = ordered_values(ratio_frame["key_type"], KEY_TYPE_ORDER)
+    key_spaces = ordered_values(ratio_frame["key_space"], KEY_SPACE_ORDER)
+    max_abs_log2 = ratio_frame["log2_relative_factor"].abs().max()
+    z_bound = max(0.5, math.ceil(float(max_abs_log2) * 2.0) / 2.0)
+    tick_values = heatmap_tick_values(z_bound)
+    tick_text = [format_ratio(2**value) for value in tick_values]
+
+    fig = make_subplots(
+        rows=max(1, len(key_types)),
+        cols=max(1, len(key_spaces)),
+        row_titles=[format_key_type(key_type) for key_type in key_types],
+        column_titles=[format_distribution_key_space(key_space) for key_space in key_spaces],
+        horizontal_spacing=0.08,
+        vertical_spacing=0.16,
+    )
+
+    max_num_values = 1
+    max_eps_values = 1
+
+    for row_index, key_type in enumerate(key_types, start=1):
+        for col_index, key_space in enumerate(key_spaces, start=1):
+            subplot_frame = ratio_frame[
+                (ratio_frame["key_type"] == key_type)
+                & (ratio_frame["key_space"] == key_space)
+            ]
+            if subplot_frame.empty:
+                continue
+
+            eps_values = sorted(float(value) for value in subplot_frame["key_eps"].dropna().unique())
+            num_values = sorted(int(value) for value in subplot_frame["num_elements"].unique())
+            max_num_values = max(max_num_values, len(num_values))
+            max_eps_values = max(max_eps_values, len(eps_values))
+
+            log2_table = (
+                subplot_frame.pivot(index="key_eps", columns="num_elements", values="log2_relative_factor")
+                .reindex(index=eps_values, columns=num_values)
+            )
+            ratio_table = (
+                subplot_frame.pivot(index="key_eps", columns="num_elements", values="relative_factor")
+                .reindex(index=eps_values, columns=num_values)
+            )
+            bloom_table = (
+                subplot_frame.pivot(index="key_eps", columns="num_elements", values="bloom_value")
+                .reindex(index=eps_values, columns=num_values)
+            )
+            roaring_table = (
+                subplot_frame.pivot(index="key_eps", columns="num_elements", values="roaring_value")
+                .reindex(index=eps_values, columns=num_values)
+            )
+
+            text = [
+                [
+                    format_ratio(value) if pd.notna(value) else ""
+                    for value in row_values
+                ]
+                for row_values in ratio_table.values
+            ]
+            customdata = [
+                [
+                    [
+                        value_formatter(bloom_value) if pd.notna(bloom_value) else "",
+                        value_formatter(roaring_value) if pd.notna(roaring_value) else "",
+                    ]
+                    for bloom_value, roaring_value in zip(bloom_row, roaring_row)
+                ]
+                for bloom_row, roaring_row in zip(bloom_table.values, roaring_table.values)
+            ]
+
+            fig.add_trace(
+                go.Heatmap(
+                    x=[format_num_elements(value) for value in num_values],
+                    y=[format_key_eps(value) for value in eps_values],
+                    z=log2_table.values,
+                    text=text,
+                    customdata=customdata,
+                    texttemplate="%{text}",
+                    hoverongaps=False,
+                    xgap=1,
+                    ygap=1,
+                    coloraxis="coloraxis",
+                    hovertemplate=(
+                        "num_elements=%{x}<br>"
+                        "key_eps=%{y}<br>"
+                        f"{colorbar_title}=%{{text}}<br>"
+                        "fastbloom=%{customdata[0]}<br>"
+                        "roaring=%{customdata[1]}"
+                        "<extra></extra>"
+                    ),
+                ),
+                row=row_index,
+                col=col_index,
+            )
+
+            fig.update_xaxes(title_text="num_elements", row=row_index, col=col_index)
+            fig.update_yaxes(title_text="key_eps", row=row_index, col=col_index)
+
+    fig.update_layout(
+        title=title,
+        template="plotly_white",
+        width=max(950, 280 * len(key_spaces) + 110 * max_num_values * len(key_spaces)),
+        height=max(480, 220 * len(key_types) + 70 * max_eps_values * len(key_types)),
+        margin=dict(t=110, r=40, b=80, l=90),
+        coloraxis=dict(
+            colorscale=[
+                (0.0, "#b91c1c"),
+                (0.5, "#f8fafc"),
+                (1.0, "#15803d"),
+            ],
+            cmin=-z_bound,
+            cmax=z_bound,
+            colorbar=dict(
+                title=colorbar_title,
+                tickvals=tick_values,
+                ticktext=tick_text,
+            ),
+        ),
+    )
+    return fig
+
+
+def build_summary_figure(frame: pd.DataFrame) -> go.Figure:
+    key_types = ordered_values(frame["key_type"], KEY_TYPE_ORDER)
+    key_spaces = ordered_values(frame["key_space"], KEY_SPACE_ORDER)
+    colors = {
+        "bloom": "#0f766e",
+        "roaring": "#c2410c",
+    }
+
+    fig = make_subplots(
+        rows=max(1, len(key_types)),
+        cols=3,
+        subplot_titles=[
+            metric_title
+            for _ in key_types
+            for _, _, _, metric_title, _, _ in METRICS
+        ],
+        row_titles=[format_key_type(key_type) for key_type in key_types],
+        horizontal_spacing=0.06,
+        vertical_spacing=0.18,
+    )
+
+    for row_index, key_type in enumerate(key_types, start=1):
+        row_frame = frame[frame["key_type"] == key_type]
+        categories = build_category_order(row_frame, key_spaces)
+        x_axis = category_axis(categories, key_type)
+
+        for col_index, (
+            metric,
+            metric_min,
+            metric_max,
+            _metric_title,
+            y_label,
+            kind,
+        ) in enumerate(METRICS, start=1):
+            formatter = metric_formatter(kind)
+            for structure in STRUCTURE_ORDER:
+                structure_frame = (
+                    row_frame[row_frame["structure"] == structure]
+                    .set_index(["num_elements", "key_space"])
+                    .sort_index()
+                )
+                if structure_frame.empty:
+                    continue
+
+                y_values = []
+                text_values = []
+                error_plus = []
+                error_minus = []
+                for category in categories:
+                    if category in structure_frame.index:
+                        value = float(structure_frame.loc[category, metric])
+                        y_values.append(value)
+                        text_values.append(formatter(value))
+                        if metric_min is not None and metric_max is not None:
+                            min_value = float(structure_frame.loc[category, metric_min])
+                            max_value = float(structure_frame.loc[category, metric_max])
+                            error_minus.append(max(0.0, value - min_value))
+                            error_plus.append(max(0.0, max_value - value))
+                        else:
+                            error_minus.append(None)
+                            error_plus.append(None)
+                    else:
+                        y_values.append(None)
+                        text_values.append("")
+                        error_minus.append(None)
+                        error_plus.append(None)
+
+                fig.add_trace(
+                    go.Bar(
+                        name=format_structure(structure),
+                        x=x_axis,
+                        y=y_values,
+                        text=text_values,
+                        textposition="outside",
+                        cliponaxis=False,
+                        marker_color=colors[structure],
+                        showlegend=row_index == 1 and col_index == 1,
+                        offsetgroup=structure,
+                        legendgroup=structure,
+                        error_y=(
+                            dict(
+                                type="data",
+                                symmetric=False,
+                                array=error_plus,
+                                arrayminus=error_minus,
+                                thickness=1.2,
+                                width=3,
+                                color="#334155",
+                            )
+                            if metric_min is not None and metric_max is not None
+                            else None
+                        ),
+                    ),
+                    row=row_index,
+                    col=col_index,
+                )
+
+            fig.update_yaxes(title_text=y_label, type="log", row=row_index, col=col_index)
+            fig.update_xaxes(
+                title_text="Input Size / Key Space",
+                row=row_index,
+                col=col_index,
+            )
+
+    fig.update_layout(
+        title="filter_bitmap Summary",
+        barmode="group",
+        template="plotly_white",
+        width=max(1900, 260 * max(1, len(build_category_order(frame, key_spaces)))),
+        height=640 * max(1, len(key_types)),
+        legend_title_text="Structure",
+        margin=dict(t=110, r=30, b=90, l=70),
+    )
+    return fig
+
+
+def write_figure(fig: go.Figure, base_path: Path, write_png: bool) -> None:
+    fig.write_html(base_path.with_suffix(".html"))
+    if write_png:
+        try:
+            fig.write_image(base_path.with_suffix(".png"), scale=2)
+        except Exception as exc:  # pragma: no cover - depends on local browser setup.
+            print(f"warning: failed to write {base_path.with_suffix('.png')}: {exc}")
+
+
+def write_summary_dashboard(
+    sections: list[tuple[str, go.Figure]],
+    output_path: Path,
+) -> None:
+    if not sections:
+        return
+
+    grouped_summary = next(
+        ((title, figure) for title, figure in sections if title == "Grouped Summary"),
+        None,
+    )
+    heatmap_sections = [
+        (title, figure) for title, figure in sections if title != "Grouped Summary"
+    ]
+
+    html_parts = [
+        "<!DOCTYPE html>",
+        "<html lang='en'>",
+        "<head>",
+        "  <meta charset='utf-8'>",
+        "  <meta name='viewport' content='width=device-width, initial-scale=1'>",
+        "  <title>filter_bitmap Summary</title>",
+        "  <style>",
+        "    body { font-family: system-ui, sans-serif; margin: 0; background: #f8fafc; color: #0f172a; }",
+        "    main { max-width: 2400px; margin: 0 auto; padding: 24px; }",
+        "    h1 { margin: 0 0 24px; font-size: 28px; }",
+        "    section { background: white; border: 1px solid #e2e8f0; border-radius: 12px; padding: 20px; margin-bottom: 24px; box-shadow: 0 8px 30px rgba(15, 23, 42, 0.05); }",
+        "    h2 { margin: 0 0 16px; font-size: 20px; }",
+        "    h3 { margin: 0 0 12px; font-size: 16px; }",
+        "    .heatmap-grid { display: grid; grid-template-columns: repeat(3, minmax(0, 1fr)); gap: 20px; align-items: start; }",
+        "    .plot-card { min-width: 0; }",
+        "    .plot-card .js-plotly-plot, .plot-card .plotly-graph-div { width: 100% !important; }",
+        "    @media (max-width: 1800px) { .heatmap-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); } }",
+        "    @media (max-width: 1180px) { .heatmap-grid { grid-template-columns: 1fr; } }",
+        "  </style>",
+        "</head>",
+        "<body>",
+        "<main>",
+        "  <h1>filter_bitmap Summary</h1>",
+    ]
+
+    next_plotlyjs_mode = "cdn"
+
+    if grouped_summary is not None:
+        title, figure = grouped_summary
+        figure_html = pio.to_html(
+            figure,
+            full_html=False,
+            include_plotlyjs=next_plotlyjs_mode,
+        )
+        next_plotlyjs_mode = False
+        html_parts.extend(
+            [
+                "  <section>",
+                f"    <h2>{title}</h2>",
+                figure_html,
+                "  </section>",
+            ]
+        )
+
+    if heatmap_sections:
+        html_parts.extend(
+            [
+                "  <section>",
+                "    <h2>Roaring Advantage Heatmaps</h2>",
+                "    <div class='heatmap-grid'>",
+            ]
+        )
+        for title, figure in heatmap_sections:
+            figure_html = pio.to_html(
+                figure,
+                full_html=False,
+                include_plotlyjs=next_plotlyjs_mode,
+            )
+            next_plotlyjs_mode = False
+            html_parts.extend(
+                [
+                    "      <article class='plot-card'>",
+                    f"        <h3>{title}</h3>",
+                    figure_html,
+                    "      </article>",
+                ]
+            )
+        html_parts.extend(
+            [
+                "    </div>",
+                "  </section>",
+            ]
+        )
+
+    html_parts.extend(["</main>", "</body>", "</html>"])
+    output_path.write_text("\n".join(html_parts), encoding="utf-8")
+
+
+def main() -> None:
+    args = parse_args()
+    if not args.input.exists():
+        raise SystemExit(f"input CSV not found: {args.input}")
+
+    frame = pd.read_csv(args.input)
+    if frame.empty:
+        raise SystemExit(f"input CSV is empty: {args.input}")
+
+    required_columns = {
+        "structure",
+        "num_elements",
+        "insert_ns_per_element_avg",
+        "lookup_ns_per_element_avg",
+        "bytes_used",
+    }
+    missing_columns = sorted(required_columns - set(frame.columns))
+    if missing_columns:
+        raise SystemExit(
+            f"input CSV is missing required columns: {', '.join(missing_columns)}"
+        )
+
+    frame = prepare_frame(frame)
+    args.output_dir.mkdir(parents=True, exist_ok=True)
+
+    standard_frame = frame[frame["key_eps"].isna()].copy()
+    distribution_frame = frame[frame["key_eps"].notna()].copy()
+    summary_sections: list[tuple[str, go.Figure]] = []
+
+    if not standard_frame.empty:
+        insert_figure = build_metric_figure(
+            standard_frame,
+            "insert_ns_per_element_avg",
+            "insert_ns_per_element_min",
+            "insert_ns_per_element_max",
+            "Insert Time (ns/element)",
+            "filter_bitmap: Insert Time",
+            format_ns_per_element,
+        )
+        write_figure(
+            insert_figure,
+            args.output_dir / "filter_bitmap_insert_ns_per_element",
+            args.write_png,
+        )
+
+        lookup_figure = build_metric_figure(
+            standard_frame,
+            "lookup_ns_per_element_avg",
+            "lookup_ns_per_element_min",
+            "lookup_ns_per_element_max",
+            "Lookup Time (ns/element)",
+            "filter_bitmap: Lookup Time",
+            format_ns_per_element,
+        )
+        write_figure(
+            lookup_figure,
+            args.output_dir / "filter_bitmap_lookup_ns_per_element",
+            args.write_png,
+        )
+
+        memory_figure = build_metric_figure(
+            standard_frame,
+            "bytes_used",
+            None,
+            None,
+            "Memory Usage (bytes)",
+            "filter_bitmap: Memory Usage",
+            format_bytes,
+        )
+        write_figure(
+            memory_figure,
+            args.output_dir / "filter_bitmap_memory_bytes",
+            args.write_png,
+        )
+
+        summary_figure = build_summary_figure(standard_frame)
+        write_figure(
+            summary_figure,
+            args.output_dir / "filter_bitmap_summary",
+            args.write_png,
+        )
+        summary_sections.append(("Grouped Summary", summary_figure))
+
+    if not distribution_frame.empty:
+        insert_heatmap = build_relative_heatmap_figure(
+            distribution_frame,
+            "insert_ns_per_element_avg",
+            "filter_bitmap: Roaring Insert Advantage Heatmap",
+            format_ns_per_element,
+            "fastbloom / roaring",
+        )
+        if insert_heatmap is not None:
+            write_figure(
+                insert_heatmap,
+                args.output_dir / "filter_bitmap_insert_advantage_heatmap",
+                args.write_png,
+            )
+            summary_sections.append(("Roaring Insert Advantage Heatmap", insert_heatmap))
+
+        lookup_heatmap = build_relative_heatmap_figure(
+            distribution_frame,
+            "lookup_ns_per_element_avg",
+            "filter_bitmap: Roaring Lookup Advantage Heatmap",
+            format_ns_per_element,
+            "fastbloom / roaring",
+        )
+        if lookup_heatmap is not None:
+            write_figure(
+                lookup_heatmap,
+                args.output_dir / "filter_bitmap_lookup_advantage_heatmap",
+                args.write_png,
+            )
+            summary_sections.append(("Roaring Lookup Advantage Heatmap", lookup_heatmap))
+
+        memory_heatmap = build_relative_heatmap_figure(
+            distribution_frame,
+            "bytes_used",
+            "filter_bitmap: Roaring Memory Advantage Heatmap",
+            format_bytes,
+            "fastbloom / roaring",
+        )
+        if memory_heatmap is not None:
+            write_figure(
+                memory_heatmap,
+                args.output_dir / "filter_bitmap_memory_advantage_heatmap",
+                args.write_png,
+            )
+            summary_sections.append(("Roaring Memory Advantage Heatmap", memory_heatmap))
+
+    write_summary_dashboard(
+        summary_sections,
+        args.output_dir / "filter_bitmap_summary.html",
+    )
+
+    print(f"wrote plots to {args.output_dir}")
+
+
+if __name__ == "__main__":
+    main()