From 408cad38da6dea973432a5a44b0fcc904421b05c Mon Sep 17 00:00:00 2001 From: "Mahadik, Mukul Chandrakant" Date: Wed, 8 Nov 2023 17:14:38 -0700 Subject: [PATCH] Scalability fixes - Load model once per user Implementing code changes for improving scalability as per the requirements in this issue# 950 in e-mission-docs. Initial approach involves utilizing Singleton design pattern concept on instance variables to check whether model has already been loaded before attempting to load the model. Thus this should prevent model from being loaded if it is already present in the instance variable of the current class instance of BuiltinModelStorage. --- emission/core/get_database.py | 24 ++++++++++++++---- .../modifiable/builtin_model_storage.py | 25 ++++++++++++++----- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/emission/core/get_database.py b/emission/core/get_database.py index 0939b41d9..047598ad7 100644 --- a/emission/core/get_database.py +++ b/emission/core/get_database.py @@ -37,9 +37,18 @@ _current_db = MongoClient(url, uuidRepresentation='pythonLegacy')[db_name] #config_file.close() +# Store the latest model globally for implementing controlled access and allow model loading only once +_model_db = None + def _get_current_db(): return _current_db +def _get_model_db(): + return _model_db + +def _set_model_db(model_db): + _model_db = model_db + def get_token_db(): Tokens= _get_current_db().Stage_Tokens return Tokens @@ -100,7 +109,6 @@ def update_routeDistanceMatrix_db(user_id, method, updatedMatrix): f.write(json.dumps(updatedMatrix)) f.close() - def get_client_db(): # current_db=MongoClient().Stage_database Clients = _get_current_db().Stage_clients @@ -231,10 +239,16 @@ def get_model_db(): " will eventually delete them. This means that the elements are essentially " getting updated, only over time and as a log-structured filesystem. """ - ModelDB = _get_current_db().Stage_updateable_models - ModelDB.create_index([("user_id", pymongo.ASCENDING)]) - ModelDB.create_index([("metadata.key", pymongo.ASCENDING)]) - ModelDB.create_index([("metadata.write_ts", pymongo.DESCENDING)]) + ModelDB = _get_model_db() + if ModelDB == None: + logging.debug("Started model load in edb.get_model_db()...") + ModelDB = _get_current_db().Stage_updateable_models + ModelDB.create_index([("user_id", pymongo.ASCENDING)]) + ModelDB.create_index([("metadata.key", pymongo.ASCENDING)]) + ModelDB.create_index([("metadata.write_ts", pymongo.DESCENDING)]) + _set_model_db(ModelDB) + logging.debug("Finished model load in edb.get_model_db()...") + logging.debug("Fetched model in edb.get_model_db()") return ModelDB def _create_analysis_result_indices(tscoll): diff --git a/emission/storage/modifiable/builtin_model_storage.py b/emission/storage/modifiable/builtin_model_storage.py index 35f0f601f..255fc6d18 100644 --- a/emission/storage/modifiable/builtin_model_storage.py +++ b/emission/storage/modifiable/builtin_model_storage.py @@ -15,6 +15,13 @@ def __init__(self, user_id): super(BuiltinModelStorage, self).__init__(user_id) self.key_query = lambda key: {"metadata.key": key} self.user_query = {"user_id": self.user_id} # UUID is mandatory for this version + self.current_model = None + + def _get_model(): + return self.current_model + + def _set_model(model): + self.current_model = model def upsert_model(self, key:str, model: ecwb.WrapperBase): """ @@ -34,12 +41,18 @@ def get_current_model(self, key:str) -> Optional[Dict]: :return: the most recent database entry for this key """ find_query = {"user_id": self.user_id, "metadata.key": key} - result_it = edb.get_model_db().find(find_query).sort("metadata.write_ts", -1).limit(1) - # this differs from the timeseries `get_first_entry` only in the find query - # and the fact that the sort key and sort order are hardcoded - # everything below this point is identical - # but it is also fairly trivial, so I am not sure it is worth pulling - # out into common code at this point + result_it = _get_model() + if result_it == None: + logging.debug("Started model load in builtin_model_storage.get_current_model()...") + result_it = edb.get_model_db().find(find_query).sort("metadata.write_ts", -1).limit(1) + # this differs from the timeseries `get_first_entry` only in the find query + # and the fact that the sort key and sort order are hardcoded + # everything below this point is identical + # but it is also fairly trivial, so I am not sure it is worth pulling + # out into common code at this point + _set_model(result_it) + logging.debug("Finished model load in builtin_model_storage.get_current_model()...") + logging.debug("Fetched model in builtin_model_storage.get_current_model()...") result_list = list(result_it) if len(result_list) == 0: return None