Skip to content

Commit 84819c1

Browse files
committed
storage: gate roaring format changes by feature flag
1 parent fefc4a0 commit 84819c1

15 files changed

Lines changed: 121 additions & 20 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/dbsp/src/circuit/metadata.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,14 @@ pub const BLOOM_FILTER_MISSES_COUNT: MetricId =
136136
pub const BLOOM_FILTER_HIT_RATE_PERCENT: MetricId =
137137
MetricId(Cow::Borrowed("bloom_filter_hit_rate_percent"));
138138
pub const BLOOM_FILTER_SIZE_BYTES: MetricId = MetricId(Cow::Borrowed("bloom_filter_size_bytes"));
139+
pub const ROARING_FILTER_HITS_COUNT: MetricId =
140+
MetricId(Cow::Borrowed("roaring_filter_hits_count"));
141+
pub const ROARING_FILTER_MISSES_COUNT: MetricId =
142+
MetricId(Cow::Borrowed("roaring_filter_misses_count"));
143+
pub const ROARING_FILTER_HIT_RATE_PERCENT: MetricId =
144+
MetricId(Cow::Borrowed("roaring_filter_hit_rate_percent"));
145+
pub const ROARING_FILTER_SIZE_BYTES: MetricId =
146+
MetricId(Cow::Borrowed("roaring_filter_size_bytes"));
139147
pub const RANGE_FILTER_HITS_COUNT: MetricId = MetricId(Cow::Borrowed("range_filter_hits_count"));
140148
pub const RANGE_FILTER_MISSES_COUNT: MetricId =
141149
MetricId(Cow::Borrowed("range_filter_misses_count"));

crates/dbsp/src/storage/file.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -584,9 +584,8 @@ impl Deserializer {
584584
pub fn new(version: u32) -> Self {
585585
// Proper error is returned in reader.rs, this is a sanity check.
586586
assert!(
587-
version >= format::VERSION_NUMBER,
588-
"Unable to read old (pre-v{}) checkpoint data on this feldera version, pipeline needs to backfilled to start.",
589-
format::VERSION_NUMBER
587+
version >= format::MIN_SUPPORTED_VERSION,
588+
"Unable to read checkpoint data with unsupported old storage format version {version} on this feldera version.",
590589
);
591590
Self {
592591
version,

crates/dbsp/src/storage/file/format.rs

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ use num_derive::FromPrimitive;
8383
use num_traits::FromPrimitive;
8484
use size_of::SizeOf;
8585

86-
/// Increment this on each incompatible change.
86+
/// Increment this when the base file format itself changes incompatibly.
8787
///
8888
/// Increment this on each incompatible change.
8989
///
@@ -93,11 +93,17 @@ use size_of::SizeOf;
9393
/// - v4: Tup None optimizations.
9494
/// - v5: Change in representation for Timestamp, ShortInterval
9595
///
96-
/// When a new version is created, make sure to generate new golden
97-
/// files for it in crate `storage-test-compat` to check for
98-
/// backwards compatibility.
96+
/// Roaring bitmap filters are gated by
97+
/// [`INCOMPATIBLE_FEATURE_ROARING_FILTERS`] instead of a version bump, so
98+
/// Bloom-only files remain readable by older binaries that support v5.
99+
///
100+
/// When a new version is created, make sure to generate new golden files for
101+
/// it in crate `storage-test-compat` to check for backwards compatibility.
99102
pub const VERSION_NUMBER: u32 = 5;
100103

104+
/// Oldest layer file format version this binary can read.
105+
pub const MIN_SUPPORTED_VERSION: u32 = 5;
106+
101107
/// Magic number for data blocks.
102108
pub const DATA_BLOCK_MAGIC: [u8; 4] = *b"LFDB";
103109

@@ -204,8 +210,8 @@ pub struct FileTrailer {
204210
///
205211
/// If any of these bits are set, the version number must be at least 3.
206212
///
207-
/// No incompatible features are currently defined. This bitmap is for
208-
/// future expansion.
213+
/// One incompatible feature is set:
214+
/// [`INCOMPATIBLE_FEATURE_ROARING_FILTERS`].
209215
pub incompatible_features: u64,
210216

211217
/// File offset in bytes of the filter block.
@@ -246,6 +252,17 @@ impl FileTrailer {
246252
(self.compatible_features & feature) != 0
247253
}
248254

255+
/// Returns the unknown incompatible features, if any.
256+
pub fn unknown_incompatible_features(&self) -> Option<u64> {
257+
let unknown_incompatible_features =
258+
self.incompatible_features & !INCOMPATIBLE_FEATURE_ROARING_FILTERS;
259+
if unknown_incompatible_features != 0 {
260+
Some(unknown_incompatible_features)
261+
} else {
262+
None
263+
}
264+
}
265+
249266
/// Returns true if this file trailer has a 64-bit filter.
250267
pub fn has_filter64(&self) -> bool {
251268
self.has_compatible_feature(COMPATIBLE_FEATURE_FILTER64)
@@ -262,6 +279,10 @@ pub const COMPATIBLE_FEATURE_FILTER64: u64 = 1 << 0;
262279
/// deserialized as if its value is 0. Conversely, old readers will simply ignore the field.
263280
pub const COMPATIBLE_FEATURE_NEGATIVE_WEIGHT_COUNT: u64 = 1 << 1;
264281

282+
/// Bit set to 1 in [FileTrailer::incompatible_features] if the file contains
283+
/// roaring bitmap membership filter blocks.
284+
pub const INCOMPATIBLE_FEATURE_ROARING_FILTERS: u64 = 1 << 0;
285+
265286
/// Information about a column.
266287
///
267288
/// Embedded inside the [`FileTrailer`] block.

crates/dbsp/src/trace.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ use crate::{
8080
algebra::MonoidValue,
8181
dynamic::{DataTrait, DynPair, DynVec, DynWeightedPairs, Erase, Factory, WeightTrait},
8282
storage::file::reader::Error as ReaderError,
83-
storage::filter_stats::FilterStats,
8483
};
8584
pub use cursor::{Cursor, MergeCursor};
8685
pub use filter::{Filter, GroupFilter};
@@ -478,13 +477,22 @@ where
478477
/// [Cursor::seek_key_exact] after the range filter.
479478
///
480479
/// Today this is usually a Bloom filter. Batches without such a filter
481-
/// should return `FilterStats::default()`.
482-
fn membership_filter_stats(&self) -> FilterStats;
480+
/// should return zero/default stats.
481+
fn membership_filter_stats(&self) -> FilterStats {
482+
FilterStats::default()
483+
}
484+
485+
/// Filter kind for the secondary membership filter used by
486+
/// [Cursor::seek_key_exact].
487+
fn membership_filter_kind(&self) -> FilterKind {
488+
FilterKind::None
489+
}
483490

484491
/// Statistics of the in-memory range filter used by
485492
/// [Cursor::seek_key_exact].
486493
///
487-
/// Batches without a range filter should return `FilterStats::default()`.
494+
/// Returns range-filter stats. Batches without a range filter should
495+
/// return zeroed range stats.
488496
fn range_filter_stats(&self) -> FilterStats {
489497
FilterStats::default()
490498
}
@@ -674,6 +682,9 @@ where
674682
fn membership_filter_stats(&self) -> FilterStats {
675683
(**self).membership_filter_stats()
676684
}
685+
fn membership_filter_kind(&self) -> FilterKind {
686+
(**self).membership_filter_kind()
687+
}
677688
fn range_filter_stats(&self) -> FilterStats {
678689
(**self).range_filter_stats()
679690
}

crates/dbsp/src/trace/filter/batch.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ where
3232
/// filters pays that cost at most once.
3333
fn maybe_contains_key(&self, key: &K, hash: &mut Option<u64>) -> bool;
3434

35+
/// Filter kind for observability.
36+
fn kind(&self) -> FilterKind;
37+
3538
/// Statistics for this filter.
3639
fn stats(&self) -> FilterStats;
3740
}
@@ -128,6 +131,13 @@ where
128131
}
129132
}
130133

134+
pub fn membership_filter_kind(&self) -> FilterKind {
135+
self.membership_filter
136+
.as_ref()
137+
.map(|filter| filter.kind())
138+
.unwrap_or(FilterKind::None)
139+
}
140+
131141
/// Returns the cached key bounds, when available.
132142
pub fn key_bounds(&self) -> Option<(&K, &K)> {
133143
self.range_filter.range.as_ref().map(|range| range.bounds())
@@ -203,6 +213,10 @@ where
203213
is_hit
204214
}
205215

216+
fn kind(&self) -> FilterKind {
217+
FilterKind::Range
218+
}
219+
206220
fn stats(&self) -> FilterStats {
207221
self.as_ref().stats()
208222
}
@@ -217,6 +231,10 @@ where
217231
self.contains_hash(*hash)
218232
}
219233

234+
fn kind(&self) -> FilterKind {
235+
FilterKind::Bloom
236+
}
237+
220238
fn stats(&self) -> FilterStats {
221239
TrackingBloomFilter::stats(self)
222240
}
@@ -230,6 +248,10 @@ where
230248
self.maybe_contains_key(key)
231249
}
232250

251+
fn kind(&self) -> FilterKind {
252+
FilterKind::Roaring
253+
}
254+
233255
fn stats(&self) -> FilterStats {
234256
TrackingRoaringBitmap::stats(self)
235257
}

crates/dbsp/src/trace/ord/fallback/indexed_wset.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,13 @@ where
283283
}
284284
}
285285

286+
fn membership_filter_kind(&self) -> FilterKind {
287+
match &self.inner {
288+
Inner::File(file) => file.membership_filter_kind(),
289+
Inner::Vec(vec) => vec.membership_filter_kind(),
290+
}
291+
}
292+
286293
fn range_filter_stats(&self) -> FilterStats {
287294
match &self.inner {
288295
Inner::File(file) => file.range_filter_stats(),

crates/dbsp/src/trace/ord/fallback/key_batch.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,14 @@ where
274274
}
275275
}
276276

277+
#[inline]
278+
fn membership_filter_kind(&self) -> FilterKind {
279+
match &self.inner {
280+
Inner::File(file) => file.membership_filter_kind(),
281+
Inner::Vec(vec) => vec.membership_filter_kind(),
282+
}
283+
}
284+
277285
#[inline]
278286
fn range_filter_stats(&self) -> FilterStats {
279287
match &self.inner {

crates/dbsp/src/trace/ord/fallback/val_batch.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,14 @@ where
281281
}
282282
}
283283

284+
#[inline]
285+
fn membership_filter_kind(&self) -> FilterKind {
286+
match &self.inner {
287+
Inner::File(file) => file.membership_filter_kind(),
288+
Inner::Vec(vec) => vec.membership_filter_kind(),
289+
}
290+
}
291+
284292
#[inline]
285293
fn range_filter_stats(&self) -> FilterStats {
286294
match &self.inner {

crates/dbsp/src/trace/ord/fallback/wset.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,13 @@ where
281281
}
282282
}
283283

284+
fn membership_filter_kind(&self) -> FilterKind {
285+
match &self.inner {
286+
Inner::File(file) => file.membership_filter_kind(),
287+
Inner::Vec(vec) => vec.membership_filter_kind(),
288+
}
289+
}
290+
284291
fn range_filter_stats(&self) -> FilterStats {
285292
match &self.inner {
286293
Inner::File(file) => file.range_filter_stats(),

0 commit comments

Comments
 (0)