-
Notifications
You must be signed in to change notification settings - Fork 0
/
bloom-filter.py
81 lines (68 loc) · 3.19 KB
/
bloom-filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import array
from utils import HashFunctions
import os
class BloomFilter():
def __init__(self):
self.bit_array = array.array("B", [0]*4796875)
# check whether the words in the list are present or not
def check_presence(self,word_list):
hash_func = HashFunctions()
for word in word_list:
if len(word) > 0:
if (
self.bit_array[hash_func.md5_hash(word) % len(self.bit_array)] and
self.bit_array[hash_func.sha1_hash(word) % len(self.bit_array)] and
self.bit_array[hash_func.sha256_hash(word) % len(self.bit_array)] and
self.bit_array[hash_func.sha512_hash(word) % len(self.bit_array)] and
self.bit_array[hash_func.crc32_hash(word) % len(self.bit_array)] and
self.bit_array[hash_func.murmur_hash(word) % len(self.bit_array)] and
self.bit_array[hash_func.fnv_hash(word) % len(self.bit_array)]
):
print(word,": Found")
else:
print(word,": Not found")
'''
creates the bloom filter from .txt file
For example i am considering
no. of words = 300
probability of false positive = 1% i.e 0.01
Hence, size of bit array (m) = 3837
no. of hash functions(k) = ~5
'''
def fill_bloom_filter(self):
with open("words2.txt", "r") as f:
hash_func = HashFunctions()
try:
print("Creating Bin File...")
for line in f:
word = line.strip()
self.bit_array[hash_func.md5_hash(word)% len(self.bit_array)] |= 1
self.bit_array[hash_func.sha1_hash(word)% len(self.bit_array)] |= 1
self.bit_array[hash_func.sha256_hash(word)% len(self.bit_array)] |= 1
self.bit_array[hash_func.sha512_hash(word)% len(self.bit_array)] |= 1
self.bit_array[hash_func.crc32_hash(word)% len(self.bit_array)] |= 1
self.bit_array[hash_func.murmur_hash(word)% len(self.bit_array)] |= 1
self.bit_array[hash_func.fnv_hash(word)% len(self.bit_array)] |= 1
# write to bin file
script_dir = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(script_dir, "bloom_filter.bin")
with open(file_path, 'wb') as f:
self.bit_array.tofile(f)
print("Created")
words_file_size = os.path.getsize("words2.txt")
bin_file_size = os.path.getsize("bloom_filter.bin")
print('''Some stats for nerds
Disk Usage
txt File: {}
bin File: {}
'''.format(words_file_size, bin_file_size)),
except Exception as e:
print("Some error occurred ",e)
if __name__ == "__main__":
bf = BloomFilter()
bf.fill_bloom_filter()
print('''Commands :
1. check word1 word2''')
user_input_words = input("Enter command: ")
user_input_words = user_input_words.replace("check","").split(" ")
bf.check_presence(user_input_words[1:])