Skip to content

Commit

Permalink
Merge pull request #1009 from JGreenlee/add_compositetrip_migration_s…
Browse files Browse the repository at this point in the history
…cripts

Add `composite_trip` Migration Scripts
  • Loading branch information
shankari authored Jan 16, 2025
2 parents b14c8c4 + 947492d commit 33eda70
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 0 deletions.
42 changes: 42 additions & 0 deletions bin/historical/migrations/_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
import subprocess
import importlib
import logging
import tempfile
import time

import emission.core.get_database as edb

DB_HOST_TEMPLATE = os.environ.get('DB_HOST_TEMPLATE', "mongodb://localhost:27017/openpath_prod_REPLACEME")

if 'PROD_LIST' in os.environ:
PROD_LIST=os.environ['PROD_LIST'].split(",")
else:
with tempfile.TemporaryDirectory() as tmpdirname:
print(f"created {tmpdirname=} to find list of configs")
os.chdir(tmpdirname)
proc = subprocess.run(
f"git clone https://github.com/e-mission/nrel-openpath-deploy-configs.git", shell=True)
filenames = os.listdir(f"nrel-openpath-deploy-configs/configs/")

PROD_LIST = [
fname.split(".")[0]
for fname in filenames
if fname and 'dev-' not in fname and 'stage-' not in fname
]
print(f"PROD_LIST: {PROD_LIST}")

def run_on_all_deployments(fn_to_run):
"""
Run the given function on the database for each deployment by setting the
DB_HOST environment variable in between each function call.
The list of deployments (PROD_LIST) is retrieved from the
nrel-openpath-deploy-configs repo upon initialization of this module.
"""
for prod in PROD_LIST:
prod_db_name = prod.replace("-", "_")
print(f"Running {fn_to_run.__name__} for {prod} on DB {prod_db_name}")
os.environ['DB_HOST'] = DB_HOST_TEMPLATE.replace(
"REPLACEME", prod_db_name)
importlib.reload(edb)
fn_to_run()
8 changes: 8 additions & 0 deletions bin/historical/migrations/all_deployments_template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import emission.core.get_database as edb

from _common import run_on_all_deployments

def print_connect_url():
print("Connecting to database URL"+edb.url)

run_on_all_deployments(print_connect_url)
52 changes: 52 additions & 0 deletions bin/historical/migrations/trim_fluff_from_composite_trips.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import emission.core.get_database as edb

from _common import run_on_all_deployments


def trim_fluff_from_composite_trips():
"""
Trim unnecessary fields from composite trips in the analysis_timeseries_db.
The shape of the remaining fields is unchanged.
"""
print("Trimming fluff from composite trips")
analysis_ts = edb.get_analysis_timeseries_db()
for ct in analysis_ts.find({'metadata.key': 'analysis/composite_trip'}):
# print(f"Trimming {ct['_id']}, {ct['data'].get('start_ts')} - {ct['data'].get('end_ts')}")
for l in ct['data'].get('locations', []):
trim_entry(l, {
'metadata': [],
'data': ['loc', 'ts'],
})

for s in ct['data'].get('sections', []):
trim_entry(s, {
'metadata': [],
'data': ['start_ts', 'end_ts', 'sensed_mode', 'sensed_mode_str',
'ble_sensed_mode', 'distance', 'duration'],
})

for key in ['start_confirmed_place', 'end_confirmed_place']:
trim_entry(ct['data'].get(key), {
'_id': True,
'metadata': ['key'],
'data': ['enter_ts', 'exit_ts', 'location', 'duration',
'user_input', 'additions'],
})

analysis_ts.update_one(
{'_id': ct['_id']},
{'$set': {'data': ct['data']}}
)


def trim_entry(entry, fields_to_keep):
if entry is None:
return
for key in list(entry):
if key not in fields_to_keep:
del entry[key]
elif isinstance(entry[key], dict) and isinstance(fields_to_keep, dict):
trim_entry(entry[key], fields_to_keep[key])


run_on_all_deployments(trim_fluff_from_composite_trips)

0 comments on commit 33eda70

Please sign in to comment.