1+ use crate :: storage:: filter_stats:: { FilterStats , TrackingFilterStats } ;
12use fastbloom:: BloomFilter ;
2- use std:: iter:: Sum ;
3- use std:: ops:: { Add , AddAssign } ;
4- use std:: sync:: atomic:: { AtomicUsize , Ordering } ;
53
64/// Bloom filter which tracks the number of hits and misses when lookups are performed.
75/// It implements the subset of [`BloomFilter`] functions that are used by Feldera storage.
86#[ derive( Debug ) ]
97pub struct TrackingBloomFilter {
108 /// Underlying Bloom filter.
119 bloom_filter : BloomFilter ,
12- /// Number of hits.
13- hits : AtomicUsize ,
14- /// Number of misses.
15- misses : AtomicUsize ,
16- }
17-
18- /// Statistics about the Bloom filter.
19- ///
20- /// The statistics implement addition such that they can be summed (e.g., when you have a spine
21- /// consisting out of multiple batches, each with their own Bloom filter). However, their addition
22- /// will lose information about individual sizes, hits, misses and by extension, hit rates.
23- #[ derive( Clone , Copy , Debug , Default , PartialEq ) ]
24- pub struct BloomFilterStats {
25- /// Bloom filter size in bytes.
26- pub size_byte : usize ,
27- /// Number of hits.
28- pub hits : usize ,
29- /// Number of misses.
30- pub misses : usize ,
31- }
32-
33- impl Add for BloomFilterStats {
34- type Output = Self ;
35-
36- fn add ( mut self , rhs : Self ) -> Self :: Output {
37- self . add_assign ( rhs) ;
38- self
39- }
40- }
41-
42- impl AddAssign for BloomFilterStats {
43- fn add_assign ( & mut self , rhs : Self ) {
44- self . size_byte += rhs. size_byte ;
45- self . hits += rhs. hits ;
46- self . misses += rhs. misses ;
47- }
48- }
49-
50- impl Sum for BloomFilterStats {
51- fn sum < I : Iterator < Item = Self > > ( iter : I ) -> Self {
52- iter. fold ( Self :: default ( ) , Add :: add)
53- }
10+ tracking : TrackingFilterStats ,
5411}
5512
5613impl TrackingBloomFilter {
5714 /// Constructs a tracking Bloom filter which wraps a regular Bloom filter instance.
5815 /// It is assumed the underlying Bloom filter has not yet been used.
5916 pub fn new ( bloom_filter : BloomFilter ) -> Self {
17+ let size_byte = size_of_val ( & bloom_filter) + bloom_filter. num_bits ( ) / 8 ;
6018 Self {
19+ tracking : TrackingFilterStats :: new ( size_byte) ,
6120 bloom_filter,
62- hits : AtomicUsize :: new ( 0 ) ,
63- misses : AtomicUsize :: new ( 0 ) ,
6421 }
6522 }
6623
6724 /// Retrieves statistics.
68- pub fn stats ( & self ) -> BloomFilterStats {
69- BloomFilterStats {
70- size_byte : size_of_val ( & self . bloom_filter ) + self . bloom_filter . num_bits ( ) / 8 ,
71- hits : self . hits . load ( Ordering :: Acquire ) ,
72- misses : self . misses . load ( Ordering :: Acquire ) ,
73- }
25+ pub fn stats ( & self ) -> FilterStats {
26+ self . tracking . stats ( )
7427 }
7528
7629 /// See [`BloomFilter::num_hashes`].
@@ -92,18 +45,15 @@ impl TrackingBloomFilter {
9245 /// It additionally counts the hits or misses, before returning.
9346 pub fn contains_hash ( & self , hash : u64 ) -> bool {
9447 let is_hit = self . bloom_filter . contains_hash ( hash) ;
95- if is_hit {
96- self . hits . fetch_add ( 1 , Ordering :: Release ) ;
97- } else {
98- self . misses . fetch_add ( 1 , Ordering :: Release ) ;
99- }
48+ self . tracking . record ( is_hit) ;
10049 is_hit
10150 }
10251}
10352
10453#[ cfg( test) ]
10554mod tests {
106- use super :: { BloomFilterStats , TrackingBloomFilter } ;
55+ use super :: TrackingBloomFilter ;
56+ use crate :: storage:: filter_stats:: FilterStats ;
10757 use fastbloom:: BloomFilter ;
10858
10959 #[ test]
@@ -114,7 +64,7 @@ mod tests {
11464 assert ! ( filter. num_hashes( ) >= 1 ) ;
11565 assert_eq ! (
11666 filter. stats( ) ,
117- BloomFilterStats {
67+ FilterStats {
11868 size_byte: 96 + 8192 / 8 ,
11969 hits: 0 ,
12070 misses: 0 ,
@@ -126,7 +76,7 @@ mod tests {
12676 assert ! ( !filter. contains_hash( 789 ) ) ;
12777 assert_eq ! (
12878 filter. stats( ) ,
129- BloomFilterStats {
79+ FilterStats {
13080 size_byte: 96 + 8192 / 8 ,
13181 hits: 1 ,
13282 misses: 2 ,
@@ -137,8 +87,8 @@ mod tests {
13787 #[ test]
13888 fn tracking_bloom_filter_stats_default ( ) {
13989 assert_eq ! (
140- BloomFilterStats :: default ( ) ,
141- BloomFilterStats {
90+ FilterStats :: default ( ) ,
91+ FilterStats {
14292 size_byte: 0 ,
14393 hits: 0 ,
14494 misses: 0 ,
@@ -148,24 +98,24 @@ mod tests {
14898
14999 #[ test]
150100 fn tracking_bloom_filter_stats_addition ( ) {
151- let stats1 = BloomFilterStats {
101+ let stats1 = FilterStats {
152102 size_byte : 123 ,
153103 hits : 456 ,
154104 misses : 789 ,
155105 } ;
156- let stats2 = BloomFilterStats {
106+ let stats2 = FilterStats {
157107 size_byte : 100 ,
158108 hits : 200 ,
159109 misses : 300 ,
160110 } ;
161- let stats3 = BloomFilterStats {
111+ let stats3 = FilterStats {
162112 size_byte : 223 ,
163113 hits : 656 ,
164114 misses : 1089 ,
165115 } ;
166116 assert_eq ! ( stats1 + stats2, stats3) ;
167117 assert_eq ! (
168- vec![ stats1, stats2] . into_iter( ) . sum:: <BloomFilterStats >( ) ,
118+ vec![ stats1, stats2] . into_iter( ) . sum:: <FilterStats >( ) ,
169119 stats3
170120 ) ;
171121 }
0 commit comments