3838public class BloomFilter <E > implements Serializable {
3939 private BitSet bitset ;
4040 private int bitSetSize ;
41+ private double bitsPerElement ;
4142 private int expectedNumberOfFilterElements ; // expected (maximum) number of elements to be added
4243 private int numberOfAddedElements ; // number of elements actually added to the Bloom filter
43- private int k ;
44- static Charset charset = Charset .forName ("UTF-8" ); // encoding used for storing hash values as strings
44+ private int k ; // number of hash functions
4545
46- static String hashName = "MD5" ; // MD5 gives good enough accuracy in most circumstances. Change to SHA1 if it's needed
46+ static final Charset charset = Charset .forName ("UTF-8" ); // encoding used for storing hash values as strings
47+
48+ static final String hashName = "MD5" ; // MD5 gives good enough accuracy in most circumstances. Change to SHA1 if it's needed
4749 static final MessageDigest digestFunction ;
4850 static { // The digest method is reused between instances
4951 MessageDigest tmp ;
@@ -56,18 +58,47 @@ public class BloomFilter<E> implements Serializable {
5658 }
5759
5860 /**
59- * Constructs an empty Bloom filter.
61+ * Constructs an empty Bloom filter. The total length of the Bloom filter will be
62+ * c*n.
63+ *
64+ * @param c is the number of bits used per element.
65+ * @param n is the expected number of elements the filter will contain.
66+ * @param k is the number of hash functions used.
67+ */
68+ public BloomFilter (double c , int n , int k ) {
69+ this .expectedNumberOfFilterElements = n ;
70+ this .k = k ;
71+ this .bitsPerElement = c ;
72+ this .bitSetSize = (int )Math .ceil (c * n );
73+ numberOfAddedElements = 0 ;
74+ this .bitset = new BitSet (bitSetSize );
75+ }
76+
77+ /**
78+ * Constructs an empty Bloom filter. The optimal number of hash functions (k) is estimated from the total size of the Bloom
79+ * and the number of expected elements.
6080 *
61- * @param bitSetSize defines how many bits should be used for the filter.
62- * @param expectedNumberOfFilterElements defines the maximum number of elements the filter is expected to contain.
81+ * @param bitSetSize defines how many bits should be used in total for the filter.
82+ * @param expectedNumberOElements defines the maximum number of elements the filter is expected to contain.
6383 */
64- public BloomFilter (int bitSetSize , int expectedNumberOfFilterElements ) {
65- this .expectedNumberOfFilterElements = expectedNumberOfFilterElements ;
66- this .k = (int ) Math .round ((bitSetSize / expectedNumberOfFilterElements ) *
67- Math .log (2.0 ));
68- bitset = new BitSet (bitSetSize );
69- this .bitSetSize = bitSetSize ;
70- numberOfAddedElements = 0 ;
84+ public BloomFilter (int bitSetSize , int expectedNumberOElements ) {
85+ this (bitSetSize / (double )expectedNumberOElements ,
86+ expectedNumberOElements ,
87+ (int ) Math .round ((bitSetSize / (double )expectedNumberOElements ) * Math .log (2.0 )));
88+ }
89+
90+ /**
91+ * Constructs an empty Bloom filter with a given false positive probability. The number of bits per
92+ * element and the number of hash functions is estimated
93+ * to match the false positive probability.
94+ *
95+ * @param falsePositiveProbability is the desired false positive probability.
96+ * @param expectedNumberOfElements is the expected number of elements in the Bloom filter.
97+ */
98+ public BloomFilter (double falsePositiveProbability , int expectedNumberOfElements ) {
99+ this (Math .ceil (-(Math .log (falsePositiveProbability ) / Math .log (2 ))) / Math .log (2 ), // c = k / ln(2)
100+ expectedNumberOfElements ,
101+ (int )Math .ceil (-(Math .log (falsePositiveProbability ) / Math .log (2 )))); // k = ceil(-log_2(false prob.))
71102 }
72103
73104 /**
@@ -346,4 +377,24 @@ public int count() {
346377 public int getExpectedNumberOfElements () {
347378 return expectedNumberOfFilterElements ;
348379 }
380+
381+ /**
382+ * Get expected number of bits per element when the Bloom filter is full. This value is set by the constructor
383+ * when the Bloom filter is created. See also getBitsPerElement().
384+ *
385+ * @return expected number of bits per element.
386+ */
387+ public double getExpectedBitsPerElement () {
388+ return this .bitsPerElement ;
389+ }
390+
391+ /**
392+ * Get actual number of bits per element based on the number of elements that have currently been inserted and the length
393+ * of the Bloom filter. See also getExpectedBitsPerElement().
394+ *
395+ * @return number of bits per element.
396+ */
397+ public double getBitsPerElement () {
398+ return this .bitSetSize / (double )numberOfAddedElements ;
399+ }
349400}
0 commit comments