From 1c16800450656ea940ed6d252d219f42dae55403 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Fri, 10 Jan 2025 12:33:02 -0500 Subject: [PATCH 1/3] add _common.py with run_on_all_deployments for migration scripts `run_on_all_deployments` can be used byother migration scripts, causing them to be run on all production deployments It does this by scanning the `nrel-openpath-deploy-configs` repo for a list of deployments and modifying the DB_HOST accordingly in between each time the function is called --- bin/historical/migrations/_common.py | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 bin/historical/migrations/_common.py diff --git a/bin/historical/migrations/_common.py b/bin/historical/migrations/_common.py new file mode 100644 index 000000000..a9308d6eb --- /dev/null +++ b/bin/historical/migrations/_common.py @@ -0,0 +1,41 @@ +import os +import subprocess +import importlib +import logging + +import emission.core.get_database as edb + +DB_HOST_TEMPLATE = "mongodb://localhost:27017/openpath_prod_REPLACEME" + +proc = subprocess.run( + 'rm -rf nrel-openpath-deploy-configs && ' + + 'git clone --no-checkout https://github.com/e-mission/nrel-openpath-deploy-configs.git && ' + + 'cd nrel-openpath-deploy-configs && ' + + 'git ls-tree -r --name-only HEAD | grep configs/', + shell=True, + capture_output=True, + text=True) +filenames = proc.stdout.replace("configs/", "").split("\n") + +PROD_LIST = [ + fname.split(".")[0] + for fname in filenames + if fname and 'dev-' not in fname and 'stage-' not in fname +] +print(f"PROD_LIST: {PROD_LIST}") + + +def run_on_all_deployments(fn_to_run): + """ + Run the given function on the database for each deployment by setting the + DB_HOST environment variable in between each function call. + The list of deployments (PROD_LIST) is retrieved from the + nrel-openpath-deploy-configs repo upon initialization of this module. + """ + for prod in PROD_LIST: + prod_db_name = prod.replace("-", "_") + print(f"Running {fn_to_run.__name__} for {prod} on DB {prod_db_name}") + os.environ['DB_HOST'] = DB_HOST_TEMPLATE.replace( + "REPLACEME", prod_db_name) + importlib.reload(edb) + fn_to_run() From d6f71089b503bbf34c49924e49b3b9fff1dd6df4 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Fri, 10 Jan 2025 12:33:32 -0500 Subject: [PATCH 2/3] add trim_fluff_from_composite_trips.py """ Trim unnecessary fields from composite trips in the analysis_timeseries_db. The shape of the remaining fields is unchanged. """ --- .../trim_fluff_from_composite_trips.py | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 bin/historical/migrations/trim_fluff_from_composite_trips.py diff --git a/bin/historical/migrations/trim_fluff_from_composite_trips.py b/bin/historical/migrations/trim_fluff_from_composite_trips.py new file mode 100644 index 000000000..86f6d790a --- /dev/null +++ b/bin/historical/migrations/trim_fluff_from_composite_trips.py @@ -0,0 +1,52 @@ +import emission.core.get_database as edb + +from _common import run_on_all_deployments + + +def trim_fluff_from_composite_trips(): + """ + Trim unnecessary fields from composite trips in the analysis_timeseries_db. + The shape of the remaining fields is unchanged. + """ + print("Trimming fluff from composite trips") + analysis_ts = edb.get_analysis_timeseries_db() + for ct in analysis_ts.find({'metadata.key': 'analysis/composite_trip'}): + # print(f"Trimming {ct['_id']}, {ct['data'].get('start_ts')} - {ct['data'].get('end_ts')}") + for l in ct['data'].get('locations', []): + trim_entry(l, { + 'metadata': [], + 'data': ['loc', 'ts'], + }) + + for s in ct['data'].get('sections', []): + trim_entry(s, { + 'metadata': [], + 'data': ['start_ts', 'end_ts', 'sensed_mode', 'sensed_mode_str', + 'ble_sensed_mode', 'distance', 'duration'], + }) + + for key in ['start_confirmed_place', 'end_confirmed_place']: + trim_entry(ct['data'].get(key), { + '_id': True, + 'metadata': ['key'], + 'data': ['enter_ts', 'exit_ts', 'location', 'duration', + 'user_input', 'additions'], + }) + + analysis_ts.update_one( + {'_id': ct['_id']}, + {'$set': {'data': ct['data']}} + ) + + +def trim_entry(entry, fields_to_keep): + if entry is None: + return + for key in list(entry): + if key not in fields_to_keep: + del entry[key] + elif isinstance(entry[key], dict) and isinstance(fields_to_keep, dict): + trim_entry(entry[key], fields_to_keep[key]) + + +run_on_all_deployments(trim_fluff_from_composite_trips) From 947492d1b7ec509e7afc0f7b6af0a9eca52ac93a Mon Sep 17 00:00:00 2001 From: Shankari Date: Wed, 15 Jan 2025 22:57:26 -0800 Subject: [PATCH 3/3] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20=20support=20subset=20?= =?UTF-8?q?+=20simplify=20retrieval=20+=20template?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit builds on 1c16800450656ea940ed6d252d219f42dae55403 and: - simplifies the retrieval (via git) to dynamically identify the full list of deployments. Notably it no longer relies on munging command output - supports providing a subset of the deployments via an environment variable It also adds a new function that is lightweight and just prints out the configured URL as a simple template for testing and adapting Testing done: - ran the template function above ``` ./e-mission-py.bash bin/historical/migrations/all_deployments_template.py Connecting to database URLmongodb://localhost:27017/openpath_prod_nc_transit_equity_study Running print_connect_url for ebikegj on DB ebikegj Config file not found, returning a copy of the environment variables instead... Retrieved config: {'DB_HOST': 'mongodb://localhost:27017/openpath_prod_ebikegj', 'DB_RESULT_LIMIT': None} Connecting to database URL mongodb://localhost:27017/openpath_prod_ebikegj Connecting to database URLmongodb://localhost:27017/openpath_prod_ebikegj Running print_connect_url for cortezebikes on DB cortezebikes Config file not found, returning a copy of the environment variables instead... Retrieved config: {'DB_HOST': 'mongodb://localhost:27017/openpath_prod_cortezebikes', 'DB_RESULT_LIMIT': None} Connecting to database URL mongodb://localhost:27017/openpath_prod_cortezebikes Connecting to database URLmongodb://localhost:27017/openpath_prod_cortezebikes Running print_connect_url for dcebike on DB dcebike Config file not found, returning a copy of the environment variables instead... Retrieved config: {'DB_HOST': 'mongodb://localhost:27017/openpath_prod_dcebike', 'DB_RESULT_LIMIT': None} Connecting to database URL mongodb://localhost:27017/openpath_prod_dcebike Connecting to database URLmongodb://localhost:27017/openpath_prod_dcebike Running print_connect_url for washingtoncommons on DB washingtoncommons Config file not found, returning a copy of the environment variables instead... Retrieved config: {'DB_HOST': 'mongodb://localhost:27017/openpath_prod_washingtoncommons', 'DB_RESULT_LIMIT': None} ``` - ran the template function above with an overridden `PROD_LIST`. Note that the first two entries are from final values set to the DB_HOST variable before the program terminated previously. We may want to unset the variable before the program ends ``` $ export PROD_LIST='cortezebikes,fortmorgan' $ ./e-mission-py.bash bin/historical/migrations/all_deployments_template.py Config file not found, returning a copy of the environment variables instead... Retrieved config: {'DB_HOST': 'mongodb://localhost/openpath_prod_nrel_commute', 'DB_RESULT_LIMIT': None} Connecting to database URL mongodb://localhost/openpath_prod_nrel_commute PROD_LIST: ['cortezebikes', 'fortmorgan'] Running print_connect_url for cortezebikes on DB cortezebikes Config file not found, returning a copy of the environment variables instead... Retrieved config: {'DB_HOST': 'mongodb://localhost:27017/openpath_prod_cortezebikes', 'DB_RESULT_LIMIT': None} Connecting to database URL mongodb://localhost:27017/openpath_prod_cortezebikes Connecting to database URLmongodb://localhost:27017/openpath_prod_cortezebikes Running print_connect_url for fortmorgan on DB fortmorgan Config file not found, returning a copy of the environment variables instead... Retrieved config: {'DB_HOST': 'mongodb://localhost:27017/openpath_prod_fortmorgan', 'DB_RESULT_LIMIT': None} Connecting to database URL mongodb://localhost:27017/openpath_prod_fortmorgan Connecting to database URLmongodb://localhost:27017/openpath_prod_fortmorgan ``` --- bin/historical/migrations/_common.py | 33 ++++++++++--------- .../migrations/all_deployments_template.py | 8 +++++ 2 files changed, 25 insertions(+), 16 deletions(-) create mode 100644 bin/historical/migrations/all_deployments_template.py diff --git a/bin/historical/migrations/_common.py b/bin/historical/migrations/_common.py index a9308d6eb..a562c0ee1 100644 --- a/bin/historical/migrations/_common.py +++ b/bin/historical/migrations/_common.py @@ -2,29 +2,30 @@ import subprocess import importlib import logging +import tempfile +import time import emission.core.get_database as edb -DB_HOST_TEMPLATE = "mongodb://localhost:27017/openpath_prod_REPLACEME" +DB_HOST_TEMPLATE = os.environ.get('DB_HOST_TEMPLATE', "mongodb://localhost:27017/openpath_prod_REPLACEME") -proc = subprocess.run( - 'rm -rf nrel-openpath-deploy-configs && ' + - 'git clone --no-checkout https://github.com/e-mission/nrel-openpath-deploy-configs.git && ' + - 'cd nrel-openpath-deploy-configs && ' + - 'git ls-tree -r --name-only HEAD | grep configs/', - shell=True, - capture_output=True, - text=True) -filenames = proc.stdout.replace("configs/", "").split("\n") +if 'PROD_LIST' in os.environ: + PROD_LIST=os.environ['PROD_LIST'].split(",") +else: + with tempfile.TemporaryDirectory() as tmpdirname: + print(f"created {tmpdirname=} to find list of configs") + os.chdir(tmpdirname) + proc = subprocess.run( + f"git clone https://github.com/e-mission/nrel-openpath-deploy-configs.git", shell=True) + filenames = os.listdir(f"nrel-openpath-deploy-configs/configs/") -PROD_LIST = [ - fname.split(".")[0] - for fname in filenames - if fname and 'dev-' not in fname and 'stage-' not in fname -] + PROD_LIST = [ + fname.split(".")[0] + for fname in filenames + if fname and 'dev-' not in fname and 'stage-' not in fname + ] print(f"PROD_LIST: {PROD_LIST}") - def run_on_all_deployments(fn_to_run): """ Run the given function on the database for each deployment by setting the diff --git a/bin/historical/migrations/all_deployments_template.py b/bin/historical/migrations/all_deployments_template.py new file mode 100644 index 000000000..202e26f6e --- /dev/null +++ b/bin/historical/migrations/all_deployments_template.py @@ -0,0 +1,8 @@ +import emission.core.get_database as edb + +from _common import run_on_all_deployments + +def print_connect_url(): + print("Connecting to database URL"+edb.url) + +run_on_all_deployments(print_connect_url)