Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added_multiprocessing #26

Merged
95 changes: 75 additions & 20 deletions Scrapper-Service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@
import traceback
import os
from utility import str2bool , getLogger , printStart




#from multiprocessing import Process
import multiprocessing
##
#
# Test whether the initialization is working or not
Expand Down Expand Up @@ -57,7 +55,48 @@ def createDatabase(configuration):
module = importlib.import_module(path , ".")
Database = module.__getattribute__(classname)
return lambda logger: Database(logger , **db_configuration)

#----------------

class attribute_info():
def __init__(self,attr,log_folder,configuration):
self.attr = attr
self.log_folder = log_folder
self.configuration = configuration
def getInfo(self):
return (self.attr,self.log_folder,self.configuration)

#----------------

class create_process(multiprocessing.Process):
def __init__(self,function_name,info):
multiprocessing.Process.__init__(self)
self.function_name = function_name
self.info = info.getInfo()

def run(self):
attr = self.info[0]
log_folder = self.info[1]
config = self.info[2]
self.function_name(attr,log_folder,config)

#----------------

def parallize_scrapper(attr,log_folder,configuration):
path = attr["filename"]
class_name = attr["class"]
plugin_module = importlib.import_module(path , ".")
scrapper = plugin_module.__getattribute__(class_name)
try:
log_stream = log_streamOption("{}/{}.log".format(log_folder , class_name))
if istest:
scrapper( log_level = log_level, log_stream = log_stream , getDatabaseObject = createDatabase(configuration) )
else:
scrapper( log_level = log_level, log_stream = log_stream , getDatabaseObject = createDatabase(configuration) ).run()
except Exception as e:
logger.error("{} scrapper failed".format(class_name))
traceback.print_exception(type(e), e, e.__traceback__)

##-----------------
if __name__ == '__main__':

with open(CONFIG) as file:
Expand All @@ -74,19 +113,35 @@ def createDatabase(configuration):
logger.info("Application started , Extracting all the plugins")

import_list = configuration["plugins"]

process_list=[]
for attr in import_list:

path = attr["filename"]
class_name = attr["class"]
plugin_module = importlib.import_module(path , ".")
scrapper = plugin_module.__getattribute__(class_name)
try:
log_stream = log_streamOption("{}/{}.log".format(log_folder , class_name))
if istest:
scrapper( log_level = log_level, log_stream = log_stream , getDatabaseObject = createDatabase(configuration) )
else:
scrapper( log_level = log_level, log_stream = log_stream , getDatabaseObject = createDatabase(configuration) ).run()
except Exception as e:
logger.error("{} scrapper failed".format(class_name))
traceback.print_exception(type(e), e, e.__traceback__)
logger.info("Scrapping done from all Scrapper plugins")
process = create_process(parallize_scrapper,attribute_info(attr,log_folder,configuration))
process_list.append(process)
process.start()

#finish the job of each instance in list
for pro in process_list:
pro.join()
#terminate all the process to clean memeory
process_list[-1].terminate()

logger.info("Scrapping done from all Scrapper plugins")


















Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.