diff --git a/src/com/skjegstad/utils/BloomFilter.java b/src/com/skjegstad/utils/BloomFilter.java index 03cbd76..389e8fe 100644 --- a/src/com/skjegstad/utils/BloomFilter.java +++ b/src/com/skjegstad/utils/BloomFilter.java @@ -183,6 +183,35 @@ public static int[] createHashes(byte[] data, int hashes) { return result; } + /** + * Compares the properties of two instances to see if they are compatible. + * + * If they are compatible, the ensemblist operations work on them. + * + * @param obj is the object to compare to. + * @return True if the contents of the objects are compatible. + */ + public boolean isCompatible(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final BloomFilter other = (BloomFilter) obj; + if (this.expectedNumberOfFilterElements != other.expectedNumberOfFilterElements) { + return false; + } + if (this.k != other.k) { + return false; + } + if (this.bitSetSize != other.bitSetSize) { + return false; + } + return true; + } + + /** * Compares the contents of two instances to see if they are equal. * @@ -318,7 +347,7 @@ public void addAll(Collection c) { for (E element : c) add(element); } - + /** * Returns true if the element could have been inserted into the Bloom filter. * Use getFalsePositiveProbability() to calculate the probability of this @@ -438,4 +467,49 @@ public double getExpectedBitsPerElement() { public double getBitsPerElement() { return this.bitSetSize / (double)numberOfAddedElements; } -} \ No newline at end of file + + /** + * Approximate the size of the Bloom filter + * + * @return the approximate number of elements in that Bloom filter + */ + public int approxCount() { + double N = this.expectedNumberOfFilterElements * this.bitsPerElement; + return (int) (-N * Math.log(1 - ((double)this.bitset.cardinality()/N))/k); + } + + /** + * Performs the Union of two bloom filters + * + * @param bf A compatible Bloom filter. + */ + public void union(BloomFilter bf) { + this.bitset.or(bf.bitset); + this.numberOfAddedElements += bf.numberOfAddedElements; + } + + /** + * Approximate the size of the intersection of two bloom filters + * + * @return The approximage cardinality of the intersection of the two bloom filters + */ + public int approxInterSize(BloomFilter bf) { + return (int) (this.approxCount() + bf.approxCount() + approxUnionSize(bf)); + } + + /** + * Approximate the size of the union between several bloom filters + * + * @return The approximate cardinality of the union of those bloom filters + */ + public int approxUnionSize(BloomFilter ... blooms) { + BitSet newBitset = (BitSet) this.bitset.clone(); + double N = this.expectedNumberOfFilterElements * this.bitsPerElement; + + for (BloomFilter bf: blooms) { + newBitset.or(bf.bitset); + } + + return (int) (- N * Math.log(1 - ((double)newBitset.cardinality()/N))/k); + } +} diff --git a/test/com/skjegstad/utils/BloomFilterTest.java b/test/com/skjegstad/utils/BloomFilterTest.java index 6359ac8..fdfdb0b 100644 --- a/test/com/skjegstad/utils/BloomFilterTest.java +++ b/test/com/skjegstad/utils/BloomFilterTest.java @@ -483,5 +483,74 @@ public void testCount() { assertEquals(expResult, result); } + /** + * Test of approxCount method, of class BloomFilter. + */ + @Test + public void testApproxCount() { + System.out.println("approxCount"); + int expResult = 100; + + BloomFilter instance = new BloomFilter(0.01, expResult); + for (int i = 0; i < expResult; i++) { + instance.add(i); + } + int result = instance.approxCount(); + assertEquals(expResult, result, expResult/100); + + expResult = 1000; + + instance = new BloomFilter(0.001, expResult); + for (int i = 0; i < expResult; i++) { + instance.add(i); + } + result = instance.approxCount(); + assertEquals(expResult, result, expResult/100); + + expResult = 10000; + + instance = new BloomFilter(0.0001, expResult); + for (int i = 0; i < expResult; i++) { + instance.add(i); + } + result = instance.approxCount(); + assertEquals(expResult, result, expResult/100); + } + + /** + * Test of isCompatible method, of class BloomFilter. + */ + @Test + public void testIsCompatible() { + System.out.println("isCompatible"); + + int expResult = 100; + + BloomFilter instance1 = new BloomFilter(0.01, expResult); + BloomFilter instance2 = new BloomFilter(0.01, expResult); + + assertEquals(instance1.isCompatible(instance2), true); + + expResult = 10000; + + instance1 = new BloomFilter(0.0001, expResult); + instance2 = new BloomFilter(0.0001, expResult); + + assertEquals(instance1.isCompatible(instance2), true); + + expResult = 1000; + + instance1 = new BloomFilter(0.0001, expResult); + instance2 = new BloomFilter(0.0003, expResult); + + assertEquals(instance1.isCompatible(instance2), false); + + expResult = 10000; + + instance1 = new BloomFilter(0.001, expResult); + instance2 = new BloomFilter(0.001, 10); + + assertEquals(instance1.isCompatible(instance2), false); + } -} \ No newline at end of file +}