Skip to content

Commit 6518030

Browse files
committed
Added new constructor for setting n, c and k manually
Added new constructor for estimating bitSetSize from a given false probability Added methods for getting bits per element
1 parent 697906c commit 6518030

1 file changed

Lines changed: 64 additions & 13 deletions

File tree

src/com/skjegstad/utils/BloomFilter.java

Lines changed: 64 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,14 @@
3838
public class BloomFilter<E> implements Serializable {
3939
private BitSet bitset;
4040
private int bitSetSize;
41+
private double bitsPerElement;
4142
private int expectedNumberOfFilterElements; // expected (maximum) number of elements to be added
4243
private int numberOfAddedElements; // number of elements actually added to the Bloom filter
43-
private int k;
44-
static Charset charset = Charset.forName("UTF-8"); // encoding used for storing hash values as strings
44+
private int k; // number of hash functions
4545

46-
static String hashName = "MD5"; // MD5 gives good enough accuracy in most circumstances. Change to SHA1 if it's needed
46+
static final Charset charset = Charset.forName("UTF-8"); // encoding used for storing hash values as strings
47+
48+
static final String hashName = "MD5"; // MD5 gives good enough accuracy in most circumstances. Change to SHA1 if it's needed
4749
static final MessageDigest digestFunction;
4850
static { // The digest method is reused between instances
4951
MessageDigest tmp;
@@ -56,18 +58,47 @@ public class BloomFilter<E> implements Serializable {
5658
}
5759

5860
/**
59-
* Constructs an empty Bloom filter.
61+
* Constructs an empty Bloom filter. The total length of the Bloom filter will be
62+
* c*n.
63+
*
64+
* @param c is the number of bits used per element.
65+
* @param n is the expected number of elements the filter will contain.
66+
* @param k is the number of hash functions used.
67+
*/
68+
public BloomFilter(double c, int n, int k) {
69+
this.expectedNumberOfFilterElements = n;
70+
this.k = k;
71+
this.bitsPerElement = c;
72+
this.bitSetSize = (int)Math.ceil(c * n);
73+
numberOfAddedElements = 0;
74+
this.bitset = new BitSet(bitSetSize);
75+
}
76+
77+
/**
78+
* Constructs an empty Bloom filter. The optimal number of hash functions (k) is estimated from the total size of the Bloom
79+
* and the number of expected elements.
6080
*
61-
* @param bitSetSize defines how many bits should be used for the filter.
62-
* @param expectedNumberOfFilterElements defines the maximum number of elements the filter is expected to contain.
81+
* @param bitSetSize defines how many bits should be used in total for the filter.
82+
* @param expectedNumberOElements defines the maximum number of elements the filter is expected to contain.
6383
*/
64-
public BloomFilter(int bitSetSize, int expectedNumberOfFilterElements) {
65-
this.expectedNumberOfFilterElements = expectedNumberOfFilterElements;
66-
this.k = (int) Math.round((bitSetSize / expectedNumberOfFilterElements) *
67-
Math.log(2.0));
68-
bitset = new BitSet(bitSetSize);
69-
this.bitSetSize = bitSetSize;
70-
numberOfAddedElements = 0;
84+
public BloomFilter(int bitSetSize, int expectedNumberOElements) {
85+
this(bitSetSize / (double)expectedNumberOElements,
86+
expectedNumberOElements,
87+
(int) Math.round((bitSetSize / (double)expectedNumberOElements) * Math.log(2.0)));
88+
}
89+
90+
/**
91+
* Constructs an empty Bloom filter with a given false positive probability. The number of bits per
92+
* element and the number of hash functions is estimated
93+
* to match the false positive probability.
94+
*
95+
* @param falsePositiveProbability is the desired false positive probability.
96+
* @param expectedNumberOfElements is the expected number of elements in the Bloom filter.
97+
*/
98+
public BloomFilter(double falsePositiveProbability, int expectedNumberOfElements) {
99+
this(Math.ceil(-(Math.log(falsePositiveProbability) / Math.log(2))) / Math.log(2), // c = k / ln(2)
100+
expectedNumberOfElements,
101+
(int)Math.ceil(-(Math.log(falsePositiveProbability) / Math.log(2)))); // k = ceil(-log_2(false prob.))
71102
}
72103

73104
/**
@@ -346,4 +377,24 @@ public int count() {
346377
public int getExpectedNumberOfElements() {
347378
return expectedNumberOfFilterElements;
348379
}
380+
381+
/**
382+
* Get expected number of bits per element when the Bloom filter is full. This value is set by the constructor
383+
* when the Bloom filter is created. See also getBitsPerElement().
384+
*
385+
* @return expected number of bits per element.
386+
*/
387+
public double getExpectedBitsPerElement() {
388+
return this.bitsPerElement;
389+
}
390+
391+
/**
392+
* Get actual number of bits per element based on the number of elements that have currently been inserted and the length
393+
* of the Bloom filter. See also getExpectedBitsPerElement().
394+
*
395+
* @return number of bits per element.
396+
*/
397+
public double getBitsPerElement() {
398+
return this.bitSetSize / (double)numberOfAddedElements;
399+
}
349400
}

0 commit comments

Comments
 (0)