Skip to content

Commit

Permalink
Merge pull request #596 from jhu-bids/bugfix-perpetual-refresh
Browse files Browse the repository at this point in the history
Bug: Perpetual "refresh in progress" can occur #574
  • Loading branch information
joeflack4 authored Oct 29, 2023
2 parents ae73b87 + 7149564 commit 67c9c5a
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 6 deletions.
14 changes: 9 additions & 5 deletions backend/db/refresh.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
sys.path.insert(0, str(PROJECT_ROOT))
from backend.db.analysis import counts_update,counts_docs
from backend.db.config import CONFIG
from backend.db.utils import current_datetime, get_db_connection, last_refresh_timestamp, update_db_status_var, check_db_status_var, delete_db_status_var
from backend.db.utils import current_datetime, get_db_connection, is_refresh_active, last_refresh_timestamp, \
update_db_status_var, check_db_status_var, delete_db_status_var
from enclave_wrangler.objects_api import csets_and_members_enclave_to_db

DESC = 'Refresh TermHub database w/ newest updates from the Enclave using the objects API.'
Expand Down Expand Up @@ -46,7 +47,8 @@ def refresh_db(
local = use_local_db
print('INFO: Starting database refresh.', flush=True) # flush: for gh action
t0, t0_str = datetime.now(), current_datetime()
if check_db_status_var('refresh_status') == 'active':

if is_refresh_active():
print('INFO: Refresh already in progress. When that process completes, it will restart again. Exiting.')
update_db_status_var('new_request_while_refreshing', t0_str, local)
return
Expand All @@ -67,14 +69,16 @@ def refresh_db(
new_data: bool = csets_and_members_enclave_to_db(con, since, schema=schema)
except Exception as err:
update_db_status_var('last_refresh_result', 'error', local)
update_db_status_var('refresh_status', 'inactive', local)
print(f"Database refresh incomplete; exception occurred. Tallying counts and exiting.", file=sys.stderr)
counts_update('DB refresh error.', schema, local, filter_temp_refresh_tables=True)
counts_docs()
raise err
finally:
refresh_complete_dt = current_datetime()
update_db_status_var('last_refresh_exited', refresh_complete_dt, local)
update_db_status_var('refresh_status', 'inactive', local)

update_db_status_var('refresh_status', 'inactive', local)
update_db_status_var('last_refresh_success', current_datetime(), local)
update_db_status_var('last_refresh_success', refresh_complete_dt, local)
update_db_status_var('last_refresh_result', 'success', local)
if new_data:
counts_update('DB refresh.', schema, local)
Expand Down
16 changes: 15 additions & 1 deletion backend/db/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,21 @@ def is_table_up_to_date(table_name: str, skip_if_updated_within_hours: int = Non
last_updated_key = f'last_updated_{table_name}'
return check_if_updated(last_updated_key, skip_if_updated_within_hours)

def is_refresh_active(local=False) -> bool:
"""Checks if the database refresh is currently running
As of 2023/10/28, there is still a variable called 'refresh_status' with values active/inactive. However, this was
problematic, because sometimes (e.g. when debugging), the process would exit abnormally and this variable wouldn't
get set to 'inactive'. To circumvent that, this variable is ignored and 'last_start' and 'last_end' times are used
instead. There is a 6 hour threshold to where if these variables show that the process is reported to have been
running for that time, it is determined that this is in error and the refresh is considered inactive. 6 hours was
chosen because this is the default maximum amount of time that a GitHub action can run, but it is also well over the
normal amount of time that the refresh takes."""
last_start = dp.parse(check_db_status_var('last_refresh_request', local))
last_end = dp.parse(check_db_status_var('last_refresh_exited', local))
hours_since_last_refresh: float = (last_start - last_end).total_seconds() / 60 / 60
return 6 > hours_since_last_refresh > 0

# todo: Can update update_db_status_var() so that it can accept optional param 'con' to improve performance.
def update_db_status_var(key: str, val: str, local=False):
"""Update the `manage` table with information for a given variable, e.g. when a table was last updated"""
Expand All @@ -269,7 +284,6 @@ def update_db_status_var(key: str, val: str, local=False):
sql_str = f"INSERT INTO public.manage (key, value) VALUES (:key, :val);"
run_sql(con, sql_str, {'key': key, 'val': val})


def check_db_status_var(key: str, local=False):
"""Check the value of a given variable the `manage`table """
with get_db_connection(schema='', local=local) as con:
Expand Down

0 comments on commit 67c9c5a

Please sign in to comment.