Skip to content

Commit f9ae764

Browse files
committed
storage: bump layer file format to v6
1 parent 677a609 commit f9ae764

File tree

20 files changed

+123
-15
lines changed

20 files changed

+123
-15
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/dbsp/src/circuit/metadata.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,14 @@ pub const BLOOM_FILTER_MISSES_COUNT: MetricId =
136136
pub const BLOOM_FILTER_HIT_RATE_PERCENT: MetricId =
137137
MetricId(Cow::Borrowed("bloom_filter_hit_rate_percent"));
138138
pub const BLOOM_FILTER_SIZE_BYTES: MetricId = MetricId(Cow::Borrowed("bloom_filter_size_bytes"));
139+
pub const ROARING_FILTER_HITS_COUNT: MetricId =
140+
MetricId(Cow::Borrowed("roaring_filter_hits_count"));
141+
pub const ROARING_FILTER_MISSES_COUNT: MetricId =
142+
MetricId(Cow::Borrowed("roaring_filter_misses_count"));
143+
pub const ROARING_FILTER_HIT_RATE_PERCENT: MetricId =
144+
MetricId(Cow::Borrowed("roaring_filter_hit_rate_percent"));
145+
pub const ROARING_FILTER_SIZE_BYTES: MetricId =
146+
MetricId(Cow::Borrowed("roaring_filter_size_bytes"));
139147
pub const RANGE_FILTER_HITS_COUNT: MetricId = MetricId(Cow::Borrowed("range_filter_hits_count"));
140148
pub const RANGE_FILTER_MISSES_COUNT: MetricId =
141149
MetricId(Cow::Borrowed("range_filter_misses_count"));

crates/dbsp/src/storage/file.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -584,9 +584,8 @@ impl Deserializer {
584584
pub fn new(version: u32) -> Self {
585585
// Proper error is returned in reader.rs, this is a sanity check.
586586
assert!(
587-
version >= format::VERSION_NUMBER,
588-
"Unable to read old (pre-v{}) checkpoint data on this feldera version, pipeline needs to backfilled to start.",
589-
format::VERSION_NUMBER
587+
version >= format::MIN_SUPPORTED_VERSION,
588+
"Unable to read checkpoint data with unsupported old storage format version {version} on this feldera version.",
590589
);
591590
Self {
592591
version,

crates/dbsp/src/storage/file/filter/stats.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
use crossbeam::utils::CachePadded;
22
use std::sync::atomic::{AtomicUsize, Ordering};
33

4+
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
5+
pub enum FilterKind {
6+
#[default]
7+
None,
8+
Bloom,
9+
Roaring,
10+
Range,
11+
}
12+
413
/// Statistics about an in-memory key filter.
514
///
615
/// The statistics implement addition such that they can be summed across

crates/dbsp/src/storage/file/format.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,15 @@ use size_of::SizeOf;
9090
/// - v3: Bloom filter format change.
9191
/// - v4: Tup None optimizations.
9292
/// - v5: Change in representation for Timestamp, ShortInterval
93+
/// - v6: Roaring bitmap filter blocks.
9394
///
9495
/// When a new version is created, make sure to generate new golden
9596
/// files for it in crate `storage-test-compat` to check for
9697
/// backwards compatibility.
97-
pub const VERSION_NUMBER: u32 = 5;
98+
pub const VERSION_NUMBER: u32 = 6;
99+
100+
/// Oldest layer file format version this binary can read.
101+
pub const MIN_SUPPORTED_VERSION: u32 = 5;
98102

99103
/// Magic number for data blocks.
100104
pub const DATA_BLOCK_MAGIC: [u8; 4] = *b"LFDB";
@@ -244,6 +248,17 @@ impl FileTrailer {
244248
(self.compatible_features & feature) != 0
245249
}
246250

251+
/// Returns the unknown incompatible features, if any.
252+
pub fn unknown_incompatible_features(&self) -> Option<u64> {
253+
let unknown_incompatible_features =
254+
self.incompatible_features & !INCOMPATIBLE_FEATURE_ROARING_FILTERS;
255+
if unknown_incompatible_features != 0 {
256+
Some(unknown_incompatible_features)
257+
} else {
258+
None
259+
}
260+
}
261+
247262
/// Returns true if this file trailer has a 64-bit filter.
248263
pub fn has_filter64(&self) -> bool {
249264
self.has_compatible_feature(COMPATIBLE_FEATURE_FILTER64)
@@ -260,6 +275,10 @@ pub const COMPATIBLE_FEATURE_FILTER64: u64 = 1 << 0;
260275
/// deserialized as if its value is 0. Conversely, old readers will simply ignore the field.
261276
pub const COMPATIBLE_FEATURE_NEGATIVE_WEIGHT_COUNT: u64 = 1 << 1;
262277

278+
/// Bit set to 1 in [FileTrailer::incompatible_features] if the file contains
279+
/// roaring bitmap membership filter blocks.
280+
pub const INCOMPATIBLE_FEATURE_ROARING_FILTERS: u64 = 1 << 0;
281+
263282
/// Information about a column.
264283
///
265284
/// Embedded inside the [`FileTrailer`] block.

crates/dbsp/src/trace.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ use crate::{
8080
algebra::MonoidValue,
8181
dynamic::{DataTrait, DynPair, DynVec, DynWeightedPairs, Erase, Factory, WeightTrait},
8282
storage::file::reader::Error as ReaderError,
83-
storage::filter_stats::FilterStats,
8483
};
8584
pub use cursor::{Cursor, MergeCursor};
8685
pub use filter::{Filter, GroupFilter};
@@ -478,13 +477,22 @@ where
478477
/// [Cursor::seek_key_exact] after the range filter.
479478
///
480479
/// Today this is usually a Bloom filter. Batches without such a filter
481-
/// should return `FilterStats::default()`.
482-
fn membership_filter_stats(&self) -> FilterStats;
480+
/// should return zero/default stats.
481+
fn membership_filter_stats(&self) -> FilterStats {
482+
FilterStats::default()
483+
}
484+
485+
/// Filter kind for the secondary membership filter used by
486+
/// [Cursor::seek_key_exact].
487+
fn membership_filter_kind(&self) -> FilterKind {
488+
FilterKind::None
489+
}
483490

484491
/// Statistics of the in-memory range filter used by
485492
/// [Cursor::seek_key_exact].
486493
///
487-
/// Batches without a range filter should return `FilterStats::default()`.
494+
/// Returns range-filter stats. Batches without a range filter should
495+
/// return zeroed range stats.
488496
fn range_filter_stats(&self) -> FilterStats {
489497
FilterStats::default()
490498
}
@@ -674,6 +682,9 @@ where
674682
fn membership_filter_stats(&self) -> FilterStats {
675683
(**self).membership_filter_stats()
676684
}
685+
fn membership_filter_kind(&self) -> FilterKind {
686+
(**self).membership_filter_kind()
687+
}
677688
fn range_filter_stats(&self) -> FilterStats {
678689
(**self).range_filter_stats()
679690
}

crates/dbsp/src/trace/filter/batch.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ where
3232
/// filters pays that cost at most once.
3333
fn maybe_contains_key(&self, key: &K, hash: &mut Option<u64>) -> bool;
3434

35+
/// Filter kind for observability.
36+
fn kind(&self) -> FilterKind;
37+
3538
/// Statistics for this filter.
3639
fn stats(&self) -> FilterStats;
3740
}
@@ -128,6 +131,13 @@ where
128131
}
129132
}
130133

134+
pub fn membership_filter_kind(&self) -> FilterKind {
135+
self.membership_filter
136+
.as_ref()
137+
.map(|filter| filter.kind())
138+
.unwrap_or(FilterKind::None)
139+
}
140+
131141
/// Returns the cached key bounds, when available.
132142
pub fn key_bounds(&self) -> Option<(&K, &K)> {
133143
self.range_filter.range.as_ref().map(|range| range.bounds())
@@ -203,6 +213,10 @@ where
203213
is_hit
204214
}
205215

216+
fn kind(&self) -> FilterKind {
217+
FilterKind::Range
218+
}
219+
206220
fn stats(&self) -> FilterStats {
207221
self.as_ref().stats()
208222
}
@@ -217,6 +231,10 @@ where
217231
self.contains_hash(*hash)
218232
}
219233

234+
fn kind(&self) -> FilterKind {
235+
FilterKind::Bloom
236+
}
237+
220238
fn stats(&self) -> FilterStats {
221239
TrackingBloomFilter::stats(self)
222240
}
@@ -230,6 +248,10 @@ where
230248
self.maybe_contains_key(key)
231249
}
232250

251+
fn kind(&self) -> FilterKind {
252+
FilterKind::Roaring
253+
}
254+
233255
fn stats(&self) -> FilterStats {
234256
TrackingRoaringBitmap::stats(self)
235257
}

crates/dbsp/src/trace/ord/fallback/indexed_wset.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,13 @@ where
283283
}
284284
}
285285

286+
fn membership_filter_kind(&self) -> FilterKind {
287+
match &self.inner {
288+
Inner::File(file) => file.membership_filter_kind(),
289+
Inner::Vec(vec) => vec.membership_filter_kind(),
290+
}
291+
}
292+
286293
fn range_filter_stats(&self) -> FilterStats {
287294
match &self.inner {
288295
Inner::File(file) => file.range_filter_stats(),

crates/dbsp/src/trace/ord/fallback/key_batch.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,14 @@ where
274274
}
275275
}
276276

277+
#[inline]
278+
fn membership_filter_kind(&self) -> FilterKind {
279+
match &self.inner {
280+
Inner::File(file) => file.membership_filter_kind(),
281+
Inner::Vec(vec) => vec.membership_filter_kind(),
282+
}
283+
}
284+
277285
#[inline]
278286
fn range_filter_stats(&self) -> FilterStats {
279287
match &self.inner {

crates/dbsp/src/trace/ord/fallback/val_batch.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,14 @@ where
281281
}
282282
}
283283

284+
#[inline]
285+
fn membership_filter_kind(&self) -> FilterKind {
286+
match &self.inner {
287+
Inner::File(file) => file.membership_filter_kind(),
288+
Inner::Vec(vec) => vec.membership_filter_kind(),
289+
}
290+
}
291+
284292
#[inline]
285293
fn range_filter_stats(&self) -> FilterStats {
286294
match &self.inner {

0 commit comments

Comments
 (0)