Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:jhu-bids/TermHub into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
Sigfried committed Mar 11, 2024
2 parents d73d676 + fd6fc01 commit 1c4f6e2
Show file tree
Hide file tree
Showing 9 changed files with 231 additions and 145 deletions.
40 changes: 40 additions & 0 deletions .github/workflows/kill_idle_cons.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Kill idle connections older than 10 minutes
name: Kill idle connections

on:
schedule:
- cron: '0 */3 * * * ' # every 3 hours
workflow_dispatch:
jobs:
kill-idle-cons:
runs-on: ubuntu-latest
steps:
# Set up
- name: Checkout repository and submodules
uses: actions/checkout@v2
- name: Set up Python version
uses: actions/setup-python@v2
with:
# Consider '3.10' or 'v3.10.0': https://github.com/actions/setup-python/issues/160
python-version: '3.9' # Not sure why 3.9.7 here and 3.9 elsewhere. the .7 works on Mac, but not on Ubuntu

- name: 'Create env file'
run: |
mkdir env
echo "${{ secrets.ENV_FILE }}" > env/.env
- name: Create and start virtual environment
run: |
python3 -m venv venv
source venv/bin/activate
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install --upgrade wheel
pip install --upgrade setuptools
pip install -r requirements.txt
# Run the action
- name: Kill idle connections
run: make kill-idle-cons
9 changes: 1 addition & 8 deletions backend/db/ddl-1-column_types.jinja.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,4 @@ ALTER TABLE IF EXISTS {{schema}}test_concept_set_container ALTER COLUMN project_
ALTER TABLE IF EXISTS {{schema}}test_concept_set_container ALTER COLUMN assigned_informatician TYPE text;
ALTER TABLE IF EXISTS {{schema}}test_concept_set_container ALTER COLUMN assigned_sme TYPE text;
ALTER TABLE IF EXISTS {{schema}}test_concept_set_container ALTER COLUMN intention TYPE text;
ALTER TABLE IF EXISTS {{schema}}test_concept_set_container ALTER COLUMN n3c_reviewer TYPE text;

-- throwing this in here for now
CREATE OR REPLACE FUNCTION array_sort (ANYARRAY)
RETURNS ANYARRAY LANGUAGE SQL
AS $$
SELECT ARRAY(SELECT unnest($1) ORDER BY 1)
$$;
ALTER TABLE IF EXISTS {{schema}}test_concept_set_container ALTER COLUMN n3c_reviewer TYPE text;
6 changes: 6 additions & 0 deletions backend/db/ddl-1-functions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- Functions -----------------------------------------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION array_sort (ANYARRAY)
RETURNS ANYARRAY LANGUAGE SQL
AS $$
SELECT ARRAY(SELECT unnest($1) ORDER BY 1)
$$;
13 changes: 9 additions & 4 deletions backend/db/resolve_fetch_failures_0_members.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
sys.path.insert(0, str(PROJECT_ROOT))
from enclave_wrangler.objects_api import concept_set_members__from_csets_and_members_to_db, \
fetch_cset_and_member_objects
from backend.db.utils import SCHEMA, fetch_status_set_success, get_db_connection, select_failed_fetches, \
from backend.db.utils import SCHEMA, fetch_status_set_success, get_db_connection, reset_temp_refresh_tables, \
select_failed_fetches, \
refresh_derived_tables

DESC = "Resolve any failures resulting from fetching data from the Enclave's objects API."
Expand Down Expand Up @@ -76,9 +77,13 @@ def resolve_fetch_failures_0_members(

# Update DB
if success_cases:
with get_db_connection(schema=schema, local=use_local_db) as con:
concept_set_members__from_csets_and_members_to_db(con, csets_and_members)
refresh_derived_tables(con)
try:
with get_db_connection(schema=schema, local=use_local_db) as con:
concept_set_members__from_csets_and_members_to_db(con, csets_and_members)
refresh_derived_tables(con)
except Exception as err:
reset_temp_refresh_tables(schema)
raise err

# Report success
if success_cases:
Expand Down
34 changes: 19 additions & 15 deletions backend/db/resolve_fetch_failures_excess_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
sys.path.insert(0, str(PROJECT_ROOT))
from backend.db.utils import SCHEMA, fetch_status_set_success, get_db_connection, insert_from_dicts, \
refresh_derived_tables, \
select_failed_fetches
reset_temp_refresh_tables, select_failed_fetches
from enclave_wrangler.datasets import CSV_TRANSFORM_DIR, download_datasets
from enclave_wrangler.utils import was_file_modified_within_threshold

DESC = "Resolve failures due to too many expression items or members when fetching data from the Enclave's objects API."

def resolve_fetch_failures_excess_items(use_local_db=False, cached_dataset_threshold_hours=0):
def resolve_fetch_failures_excess_items(schema=SCHEMA, use_local_db=False, cached_dataset_threshold_hours=0):
"""Resolve failures due to too many expression items or members when fetching data from the Enclave's objects API.
cached_dataset_threshold_hours: Threshold, in hours, until cached datasets considered invalid and need to be
re-downloaded.
Expand Down Expand Up @@ -62,19 +62,23 @@ def resolve_fetch_failures_excess_items(use_local_db=False, cached_dataset_thres

# Update DB
solved_failures = []
with get_db_connection(local=use_local_db) as con:
for dataset, failures in failures_by_dataset.items():
print(f'Inserting data into core table: {dataset}')
print(f'- This will address the following failures:\n{failures}')
df = pd.read_csv(dataset_path_map[dataset])
df['codeset_id'] = df['codeset_id'].apply(lambda x: str(x).split('.')[0] if x else '') # couldn't int cuz nan's
for failure in failures:
rows = df[df['codeset_id'] == failure['primary_key']].to_dict('records')
# todo: if not rows, update comment that tried to fix but couldn't find any data?
if rows:
insert_from_dicts(con, dataset, rows)
solved_failures.append(failure)
refresh_derived_tables(con)
try:
with get_db_connection(schema=schema, local=use_local_db) as con:
for dataset, failures in failures_by_dataset.items():
print(f'Inserting data into core table: {dataset}')
print(f'- This will address the following failures:\n{failures}')
df = pd.read_csv(dataset_path_map[dataset])
df['codeset_id'] = df['codeset_id'].apply(lambda x: str(x).split('.')[0] if x else '') # couldn't int cuz nan's
for failure in failures:
rows = df[df['codeset_id'] == failure['primary_key']].to_dict('records')
# todo: if not rows, update comment that tried to fix but couldn't find any data?
if rows:
insert_from_dicts(con, dataset, rows)
solved_failures.append(failure)
refresh_derived_tables(con)
except Exception as err:
reset_temp_refresh_tables(schema)
raise err

# Update fetch_audit status
fetch_status_set_success(solved_failures)
Expand Down
18 changes: 18 additions & 0 deletions backend/db/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,13 +809,31 @@ def get_idle_connections(interval: str = '1 week'):
return result


def kill_idle_cons(threshold_minutes=10):
"""Kill idle connections older than threshold minutes
Kills only those connections having to do with TermHub."""
qry = f"""SELECT pg_terminate_backend(pid) FROM pg_stat_activity
WHERE state = 'idle' and datname = 'termhub' and usename = 'thadmin'
and state_change < NOW() - INTERVAL '{threshold_minutes} minutes';
"""
with get_db_connection(schema='') as con:
run_sql(con, qry)


def cli():
"""Command line interface"""
parser = ArgumentParser(prog='termhub-db-utils', description='Database utilities.')
parser.add_argument(
'-f', '--reset-refresh-state', required=False, default=False, action='store_true',
help='Resets both temporary tables and status variables')
parser.add_argument(
'-k', '--kill-idle-cons', required=False, default=False, action='store_true',
help='Kills any idle connections older than 10 minutes')
d: Dict = vars(parser.parse_args())
if d['kill_idle_cons']:
print('Killing idle connections older than 10 minutes')
kill_idle_cons()
if d['reset_refresh_state']:
print('Resetting temporary tables and status variables')
reset_refresh_state()
Expand Down
Loading

0 comments on commit 1c4f6e2

Please sign in to comment.