diff --git a/backend/db/refresh.py b/backend/db/refresh.py index b604585ec..874ffd249 100755 --- a/backend/db/refresh.py +++ b/backend/db/refresh.py @@ -13,7 +13,8 @@ sys.path.insert(0, str(PROJECT_ROOT)) from backend.db.analysis import counts_update,counts_docs from backend.db.config import CONFIG -from backend.db.utils import current_datetime, get_db_connection, last_refresh_timestamp, update_db_status_var, check_db_status_var, delete_db_status_var +from backend.db.utils import current_datetime, get_db_connection, is_refresh_active, last_refresh_timestamp, \ + update_db_status_var, check_db_status_var, delete_db_status_var from enclave_wrangler.objects_api import csets_and_members_enclave_to_db DESC = 'Refresh TermHub database w/ newest updates from the Enclave using the objects API.' @@ -46,7 +47,8 @@ def refresh_db( local = use_local_db print('INFO: Starting database refresh.', flush=True) # flush: for gh action t0, t0_str = datetime.now(), current_datetime() - if check_db_status_var('refresh_status') == 'active': + + if is_refresh_active(): print('INFO: Refresh already in progress. When that process completes, it will restart again. Exiting.') update_db_status_var('new_request_while_refreshing', t0_str, local) return @@ -67,14 +69,16 @@ def refresh_db( new_data: bool = csets_and_members_enclave_to_db(con, since, schema=schema) except Exception as err: update_db_status_var('last_refresh_result', 'error', local) - update_db_status_var('refresh_status', 'inactive', local) print(f"Database refresh incomplete; exception occurred. Tallying counts and exiting.", file=sys.stderr) counts_update('DB refresh error.', schema, local, filter_temp_refresh_tables=True) counts_docs() raise err + finally: + refresh_complete_dt = current_datetime() + update_db_status_var('last_refresh_exited', refresh_complete_dt, local) + update_db_status_var('refresh_status', 'inactive', local) - update_db_status_var('refresh_status', 'inactive', local) - update_db_status_var('last_refresh_success', current_datetime(), local) + update_db_status_var('last_refresh_success', refresh_complete_dt, local) update_db_status_var('last_refresh_result', 'success', local) if new_data: counts_update('DB refresh.', schema, local) diff --git a/backend/db/utils.py b/backend/db/utils.py index e5e4cebdc..66103f91b 100644 --- a/backend/db/utils.py +++ b/backend/db/utils.py @@ -261,6 +261,21 @@ def is_table_up_to_date(table_name: str, skip_if_updated_within_hours: int = Non last_updated_key = f'last_updated_{table_name}' return check_if_updated(last_updated_key, skip_if_updated_within_hours) +def is_refresh_active(local=False) -> bool: + """Checks if the database refresh is currently running + + As of 2023/10/28, there is still a variable called 'refresh_status' with values active/inactive. However, this was + problematic, because sometimes (e.g. when debugging), the process would exit abnormally and this variable wouldn't + get set to 'inactive'. To circumvent that, this variable is ignored and 'last_start' and 'last_end' times are used + instead. There is a 6 hour threshold to where if these variables show that the process is reported to have been + running for that time, it is determined that this is in error and the refresh is considered inactive. 6 hours was + chosen because this is the default maximum amount of time that a GitHub action can run, but it is also well over the + normal amount of time that the refresh takes.""" + last_start = dp.parse(check_db_status_var('last_refresh_request', local)) + last_end = dp.parse(check_db_status_var('last_refresh_exited', local)) + hours_since_last_refresh: float = (last_start - last_end).total_seconds() / 60 / 60 + return 6 > hours_since_last_refresh > 0 + # todo: Can update update_db_status_var() so that it can accept optional param 'con' to improve performance. def update_db_status_var(key: str, val: str, local=False): """Update the `manage` table with information for a given variable, e.g. when a table was last updated""" @@ -269,7 +284,6 @@ def update_db_status_var(key: str, val: str, local=False): sql_str = f"INSERT INTO public.manage (key, value) VALUES (:key, :val);" run_sql(con, sql_str, {'key': key, 'val': val}) - def check_db_status_var(key: str, local=False): """Check the value of a given variable the `manage`table """ with get_db_connection(schema='', local=local) as con: