Skip to content

Commit 3857dc7

Browse files
author
vk
committed
add python bloom filter
1 parent 3d731cc commit 3857dc7

4 files changed

Lines changed: 45 additions & 0 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
env/
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
"""
2+
Bloomfilters are a memory efficient way of checking if an element is part of a set or not
3+
4+
here is a pretty decent tutorial http://billmill.org/bloomfilter-tutorial/
5+
"""
6+
7+
from bitarray import bitarray
8+
import mmh3
9+
10+
class BloomFilter:
11+
12+
def __init__(self):
13+
self.HASH_SIZE = 1000
14+
15+
self.bits = bitarray(self.HASH_SIZE)
16+
self.bits.setall(0)
17+
18+
def add(self, word):
19+
for seed in range(self.HASH_SIZE):
20+
bit_value = mmh3.hash(word, seed) % self.HASH_SIZE
21+
self.bits[bit_value] = 1
22+
23+
def check(self, word):
24+
for seed in range(self.HASH_SIZE):
25+
bit_value = mmh3.hash(word, seed) % self.HASH_SIZE
26+
if self.bits[bit_value] == 0:
27+
return False
28+
return True
29+
30+
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from BloomFilter import BloomFilter
2+
3+
bf = BloomFilter()
4+
5+
bf.add("python")
6+
bf.add("vk")
7+
8+
test_ok = 0
9+
10+
assert(bf.check("python") == True)
11+
assert(bf.check("vk") == True)
12+
assert(bf.check("kaboom") == False)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
bitarray==0.8.1
2+
mmh3==2.3

0 commit comments

Comments
 (0)