Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using Raw bytes for BloomFilter Bit Array #6

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 27 additions & 25 deletions BloomFilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@


class BloomFilter:

# To create bit array of size n
'''
size - size of bit array calc based the formula
Expand All @@ -13,56 +12,59 @@ class BloomFilter:

def __init__(self, n):
self.p = 0.05
self.size = math.ceil(-n*math.log(self.p)/(math.log(2)**2))
self.k = math.ceil(self.size/n*math.log(2))
self.bit_array = [0] * self.size
self.validate_array = []
self.size = math.ceil(-n * math.log(self.p) / (math.log(2) ** 2))
self.k = math.ceil(self.size / n * math.log(2))
self.bit_array = bytearray(math.ceil(self.size / 8))

def __set_bit__(self, index):
byte_index = int(index / 8)
self.bit_array[byte_index] = self.bit_array[byte_index] | (1 << (7 - index % 8))

def __get_bit__(self, index):
byte_index = int(index / 8)
return self.bit_array[byte_index] & (1 << (7 - index % 8))

# Func to insert values into BF
def insert(self, value, freq=1):
line_hash = str(mmh3.hash(value,freq))
line_hash = str(mmh3.hash(value, freq))
for i in range(self.k):
index = mmh3.hash(line_hash,i) % self.size
self.bit_array[index] = 1
index = mmh3.hash(line_hash, i) % self.size
self.__set_bit__(index)

# To check if the value is present in BF or not
def validate(self, value, freq=1):
line_hash = str(mmh3.hash(value,freq))
line_hash = str(mmh3.hash(value, freq))
for i in range(self.k):
check_at_index = mmh3.hash(line_hash,i) % self.size
if self.validate_array[check_at_index] == 1:
check_at_index = mmh3.hash(line_hash, i) % self.size
if self.__get_bit__(check_at_index):
continue
else:
return False
return True

def readBloomFilterFromFile(self,filename):
def readBloomFilterFromFile(self, filename):
f = open(filename, "rb")
self.validate_array = list(f.read())
for i in range(0, len(self.validate_array)):
self.validate_array[i] -= 48
print(self.validate_array)
self.bit_array = bytearray(f.read())
print(self.bit_array)
f.close()

def readBloomFilterFromBytes(self,bf_as_bytes):
self.validate_array = list(bf_as_bytes)
for i in range(0, len(self.validate_array)):
self.validate_array[i] -= 48

def readBloomFilterFromBytes(self, bf_as_bytes):
self.bit_array = bytearray(bf_as_bytes)

# Returns the bit array
def getBloomFilter(self):
return self.bit_array

# Returns the size of the bit arry
# Returns the size of the bit array
def getSize(self):
return self.size

def getNFromSize(self,size):
return(math.floor(size*-1*(math.log(2)**2)/math.log(self.p)))
def getNFromSize(self, size):
return math.floor(size * -1 * (math.log(2) ** 2) / math.log(self.p))

# Returns the # of Hash Functions ie. h1(k), h2(k) ...
def getNumberOfHashFunctions(self):
return self.k

def getAsBytes(self):
return str.encode(''.join([str(i) for i in self.bit_array]))
return self.bit_array
15 changes: 5 additions & 10 deletions P2P/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@



class Request:

REQUEST_TYPE_BLOOMFILTER = 2
REQUEST_TYPE_BLOOMFILTER = 2
REQUEST_TYPE_REPLY_SLAVE_BLOOMFILTER = 3
REQUEST_SEND_ACTUAL_LINES = 4
REQUEST_SEND_ENTIRE_FILE_HASH = 5
Expand All @@ -14,7 +13,7 @@ def __init__(self, request_type, message):
self.message = message
if isinstance(message, str):
self.byte_message = bytes(message, 'utf-8')
elif isinstance(message, bytes) :
elif isinstance(message, bytes):
self.byte_message = message
else:
self.byte_message = bytes(message)
Expand All @@ -26,17 +25,13 @@ def get_type(self):
return self.type

def get_message_size(self):
return len(self.actual_message())

def actual_message(self):
return self.byte_message.decode('utf-8')
return len(self.byte_message)

def get_message_bytes(self):
return self.byte_message

def __str__(self):
return "<Request type: " + str(self.type) + ", message: " + self.actual_message() + "...>"

return "<Request type: " + str(self.type) + ", message: " + str(self.byte_message) + "...>"


def parse_received_data(data):
Expand All @@ -50,4 +45,4 @@ def parse_received_data(data):
str_message = bloom_filter
type_int = int(bytes.hex(type_specifying_byte), 16)
req = Request(type_int, str_message)
return req
return req
21 changes: 10 additions & 11 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def handle_request(self, request):
# We send it now
print("\n\nThe other user has modified his file, syncing...")
print("Received the bloom filter")
my_missing_content = getMissingContent(getNFromSize(
my_missing_content = getMissingContent(getNFromByteSize(
request.get_message_size()), request.get_message_bytes())
print("Acknowleding and transmitting the bloom filter...")
bf = computeBloomFilter()
Expand All @@ -57,7 +57,7 @@ def handle_request(self, request):
elif(request.get_type() == utils.Request.REQUEST_TYPE_REPLY_SLAVE_BLOOMFILTER):
print(
"Request was acknowledged by the other peer and has given the other bloom filter")
my_missing_content = getMissingContent(getNFromSize(
my_missing_content = getMissingContent(getNFromByteSize(
request.get_message_size()), request.get_message_bytes())

# Send the missing contents computed to the other user
Expand All @@ -68,7 +68,7 @@ def handle_request(self, request):

elif(request.get_type() == utils.Request.REQUEST_SEND_ACTUAL_LINES):
print("Received the actual missing lines...")
missing_dict = eval(request.actual_message())
missing_dict = eval(request.get_message_bytes())
should_trigger_modified = False
print("Syncing the file...")
Synchronizer.syncFile(
Expand Down Expand Up @@ -97,10 +97,10 @@ def handle_request(self, request):
p2p.send_request(req)
print("Done.")
elif(request.get_type() == utils.Request.REQUEST_SEND_ENTIRE_FILE):
file_content_from_other_user = request.actual_message()
file_content_from_other_user = request.get_message_bytes()
should_trigger_modified = False

with open(input_path, 'w') as f:
with open(input_path, 'wb') as f:
f.write(file_content_from_other_user)
time.sleep(1)
should_trigger_modified = True
Expand Down Expand Up @@ -138,12 +138,11 @@ def on_modified(self, event):
self.last_modified = time.time()
return super().on_modified(event)

# Use this func to find n required for BloomFilter
# Size is the len of bloomfilter bit array


def getNFromSize(size):
return(floor(size*-1*(log(2)**2)/log(0.05)))
# Use this func to find n required for BloomFilter
# byte_size is the len of bloomfilter bit array in bytes
def getNFromByteSize(byte_size):
return floor((byte_size * 8)*-1*(log(2)**2)/log(0.05))


def main():
Expand Down Expand Up @@ -251,7 +250,7 @@ def initiateSync():


def read_entire_file():
with open(input_path) as f:
with open(input_path, "rb") as f:
content = f.read()
return content

Expand Down