- API call logging is now capturing city/state for easier reading of

what calls are comming in - Moved load_dotenv(ENVFILE) out of db/config and enclave_wrangler/config into backend.config. If you import that, it will get loaded - Working on make_new_versions_of_csets for N3C Recommended updates
jhu-bids · Oct 31, 2023 · 29584ac · 29584ac
1 parent a963812
commit 29584ac
Show file tree

Hide file tree

Showing 8 changed files with 407 additions and 67 deletions.
diff --git a/backend/app.py b/backend/app.py
@@ -4,23 +4,24 @@
 - https://github.com/tiangolo/fastapi
 """
 import os
-from pathlib import Path
 from typing import List, Optional
+import re
 
 import uvicorn
 from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.middleware.gzip import GZipMiddleware
+import httpx
 # from starlette.requests import Request
 import time
 import datetime
 from socket import gethostname
 
+import backend.config
 from backend.routes import cset_crud, db, graph
 from backend.db.config import override_schema, get_schema_name
 from backend.db.utils import insert_from_dict, get_db_connection
 
-PROJECT_DIR = Path(os.path.dirname(__file__)).parent
 # users on the same server
 APP = FastAPI()
 APP.include_router(cset_crud.router)
@@ -35,6 +36,69 @@
 )
 APP.add_middleware(GZipMiddleware, minimum_size=1000)
 
+
+async def client_location(request: Request) -> str:
+    # rpt['client'] = request.client.host -- this gives a local (169.154) IP on azure
+    #   chatgpt recommends:
+    forwarded_for: Optional[str] = request.headers.get('X-Forwarded-For')
+    if forwarded_for:
+        # The header can contain multiple IP addresses, so take the first one
+        ip = forwarded_for.split(',')[0]
+    else:
+        ip = request.client.host
+
+    ip = re.sub(':.*', '', ip)
+
+    ipstack_key = os.getenv('API_STACK_KEY', None)
+
+    if ip != '127.0.0.1' and ipstack_key:
+        """
+        http://api.ipstack.com/134.201.250.155?access_key=7a6f9d6d72d68a1452b643eb58cd8ee7&format=1
+        {
+            "ip": "134.201.250.155",
+            "type": "ipv4",
+            "continent_code": "NA",
+            "continent_name": "North America",
+            "country_code": "US",
+            "country_name": "United States",
+            "region_code": "CA",
+            "region_name": "California",
+            "city": "San Fernando",
+            "zip": "91344",
+            "latitude": 34.293949127197266,
+            "longitude": -118.50763702392578,
+            "location": {
+                "geoname_id": 5391945,
+                "capital": "Washington D.C.",
+                "languages": [
+                    {
+                        "code": "en",
+                        "name": "English",
+                        "native": "English"
+                    }
+                ],
+                "country_flag": "https://assets.ipstack.com/flags/us.svg",
+                "country_flag_emoji": "🇺🇸",
+                "country_flag_emoji_unicode": "U+1F1FA U+1F1F8",
+                "calling_code": "1",
+                "is_eu": false
+            }
+        }
+        """
+
+        loc_url = f"http://api.ipstack.com/{ip}?access_key={ipstack_key}"
+
+        async with httpx.AsyncClient() as client:
+            response = await client.get(loc_url)
+            if response and response.json:
+                loc_obj = response.json()
+                location = f"{ip}: {loc_obj['city']}, {loc_obj['region_name']}"
+                return location
+
+    return ip
+
+
+
 @APP.middleware("http")
 async def set_schema_globally_and_log_calls(request: Request, call_next):
     """
@@ -61,14 +125,7 @@ async def set_schema_globally_and_log_calls(request: Request, call_next):
 
     rpt['host'] = os.getenv('HOSTENV', gethostname())
 
-    # rpt['client'] = request.client.host -- this gives a local (169.154) IP on azure
-    #   chatgpt recommends:
-    forwarded_for: Optional[str] = request.headers.get('X-Forwarded-For')
-    if forwarded_for:
-        # The header can contain multiple IP addresses, so take the first one
-        rpt['client'] = forwarded_for.split(',')[0]
-    else:
-        rpt['client'] = request.client.host
+    rpt['client'] = await client_location(request)
 
     schema = query_params.get("schema")
     if schema:

diff --git a/backend/config.py b/backend/config.py
@@ -0,0 +1,9 @@
+import os
+from dotenv import load_dotenv
+
+APP_ROOT = os.path.dirname(os.path.realpath(__file__))
+PROJECT_ROOT = os.path.realpath(os.path.join(APP_ROOT, '..'))
+ENV_DIR = os.path.join(PROJECT_ROOT, 'env')
+OUTPUT_DIR = os.path.join(PROJECT_ROOT, 'output')
+ENV_FILE = os.path.join(ENV_DIR, '.env')
+load_dotenv(ENV_FILE)
diff --git a/backend/db/config.py b/backend/db/config.py
@@ -2,19 +2,16 @@
 import os
 from typing import Dict, List
 
-from dotenv import load_dotenv
+from backend.config import PROJECT_ROOT
 
 DB_DIR = os.path.dirname(os.path.realpath(__file__))
 BACKEND_DIR = os.path.join(DB_DIR, '..')
-PROJECT_ROOT = os.path.join(BACKEND_DIR, '..')
 DOCS_DIR = os.path.join(PROJECT_ROOT, 'docs')
-ENV_DIR = os.path.join(PROJECT_ROOT, 'env')
-ENV_FILE = os.path.join(ENV_DIR, '.env')
 TERMHUB_CSETS_PATH = os.path.join(PROJECT_ROOT, 'termhub-csets')
 DATASETS_PATH = os.path.join(TERMHUB_CSETS_PATH, 'datasets', 'prepped_files')
 OBJECTS_PATH = os.path.join(TERMHUB_CSETS_PATH, 'objects')
 DDL_JINJA_PATH_PATTERN = os.path.join(DB_DIR, 'ddl-*.jinja.sql')
-load_dotenv(ENV_FILE)
+
 CONFIG = {
     'server': os.getenv('TERMHUB_DB_SERVER'),
     'driver': os.getenv('TERMHUB_DB_DRIVER'),

diff --git a/enclave_wrangler/config.py b/enclave_wrangler/config.py
@@ -1,16 +1,11 @@
 """Config"""
 import os
 from collections import OrderedDict
-
-from dotenv import load_dotenv
+from backend.config import PROJECT_ROOT
 
 TERMHUB_VERSION = "0.0.1"
 
-APP_ROOT = os.path.dirname(os.path.realpath(__file__))
-PROJECT_ROOT = os.path.realpath(os.path.join(APP_ROOT, '..'))
-ENV_DIR = os.path.join(PROJECT_ROOT, 'env')
-OUTPUT_DIR = os.path.join(PROJECT_ROOT, 'output')
-ENV_FILE = os.path.join(ENV_DIR, '.env')
+
 TERMHUB_CSETS_DIR = os.path.join(PROJECT_ROOT, 'termhub-csets')
 UPLOADS_DIR = os.path.join(TERMHUB_CSETS_DIR, 'datasets', 'uploads')
 OUTDIR_OBJECTS = os.path.join(TERMHUB_CSETS_DIR, 'objects')
@@ -32,7 +27,6 @@
 RESEARCHER_COLS = ['container_created_by', 'codeset_created_by', 'assigned_sme', 'reviewed_by', 'n3c_reviewer',
                    'assigned_informatician']
 
-load_dotenv(ENV_FILE)
 config = {
     'PALANTIR_ENCLAVE_AUTHENTICATION_BEARER_TOKEN': os.getenv('PALANTIR_ENCLAVE_AUTHENTICATION_BEARER_TOKEN', '').replace('\r', ''),
     'OTHER_TOKEN': os.getenv('OTHER_TOKEN', '').replace('\r', ''),

diff --git a/enclave_wrangler/dataset_upload.py b/enclave_wrangler/dataset_upload.py
@@ -185,47 +185,6 @@ def upload_new_cset_container_with_concepts_from_csv(
     return responses
 
 
-def upload_cset_as_new_version_of_itself(
-    codeset_id: int,
-    add_to_field: Dict = {'intention': f'Version for comparison to N3C-Rec on {datetime.date.today().isoformat()}'}
-                                         ) -> Dict:
-    v = fetch_cset_version(codeset_id, False)
-
-    vi = [i['properties'] for i in get_concept_set_version_expression_items(codeset_id, 'full')]
-    concepts = []
-    for item in vi:
-        c = {'concept_id': item['conceptId']}
-        for p in ['includeDescendants', 'isExcluded', 'includeMapped']:
-            c[p] = item[p]
-        concepts.append(c)
-
-    upload_args = {
-        # 'on_behalf_of': v['createdBy'],
-        'on_behalf_of': config['SERVICE_USER_ID'],
-        'concept_set_name': v['conceptSetNameOMOP'],
-        'provenance': v['provenance'],
-        'limitations': v['limitations'],
-        'intention': v['intention'],
-        'parent_version_codeset_id': v['codesetId'],
-        'current_max_version': v['version'],  # probably
-        # codeset_id': None, will be assigned
-        'validate_first': True,
-        'omop_concepts': concepts,
-        'finalize': True,
-        # annotation,
-        # intended_research_project,
-    }
-
-    for key, value in add_to_field.items():
-        val = '. ' + v[key] if v[key] else ''
-        val = value + val
-        upload_args[key] = val
-
-    # upload_new_cset_version_with_concepts( concept_set_name, omop_concepts, provenance, limitations, intention, annotation, parent_version_codeset_id, current_max_version, intended_research_project, on_behalf_of, codeset_id, validate_first, finalize )
-    # pass_on_args = ['conceptSetNameOMOP'] not sure what this was for
-    return upload_new_cset_version_with_concepts(**upload_args)
-    # returns {'responses': [...], 'codeset_id': 123}
-
 # TODO: What if this fails halfway through? Can we teardown any of the steps? (need to store random `codeset_id` too)
 # TODO: Need to do proper codeset_id assignment: (i) look up registry and get next available ID, (ii) assign it here,
 #  (iii) persist new ID / set to registry, (iv) persist new ID to any files passed through CLI, (v), return the new ID
@@ -1064,7 +1023,60 @@ def cli():
     upload_dataset(**kwargs_dict)
 
 
+def upload_cset_as_new_version_of_itself(
+    codeset_id: int,
+    add_to_field: Dict = {'intention': f'Version for comparison to N3C-Rec on {date.today().isoformat()}'}
+) -> Dict:
+    ov = fetch_cset_version(codeset_id, False)
+
+    vi = [i['properties'] for i in get_concept_set_version_expression_items(codeset_id, 'full')]
+    concepts = []
+    for item in vi:
+        c = {'concept_id': item['conceptId']}
+        for p in ['includeDescendants', 'isExcluded', 'includeMapped']:
+            c[p] = item[p]
+        concepts.append(c)
+
+    upload_args = {
+        # 'on_behalf_of': ov['createdBy'],
+        'on_behalf_of': config['SERVICE_USER_ID'],
+        'concept_set_name': ov['conceptSetNameOMOP'],
+        'provenance': ov['provenance'],
+        'limitations': ov['limitations'],
+        'intention': ov['intention'],
+        'parent_version_id': ov['codesetId'],
+        'current_max_version': ov['version'],  # probably
+        # codeset_id': None, will be assigned
+        'validate_first': True,
+        'omop_concepts': concepts,
+        'finalize': True,
+        # annotation,
+        # intended_research_project,
+    }
+
+    for key, value in add_to_field.items():
+        val = '. ' + ov[key] if ov[key] else ''
+        val = value + val
+        upload_args[key] = val
+
+    # upload_new_cset_version_with_concepts( concept_set_name, omop_concepts, provenance, limitations, intention, annotation, parent_version_codeset_id, current_max_version, intended_research_project, on_behalf_of, codeset_id, validate_first, finalize )
+    # pass_on_args = ['conceptSetNameOMOP'] not sure what this was for
+    d = upload_new_cset_version_with_concepts(**upload_args) # {'responses': [...], 'codeset_id': 123}
+    return d['codeset_id']
+
+
+def make_new_versions_of_csets(codeset_ids: List[int]):
+    new_codeset_ids = []
+    for codeset_id in codeset_ids:
+        print(f'Making new version of {codeset_id}')
+        new_version_codeset_id = upload_cset_as_new_version_of_itself(codeset_id)
+        print(f'{codeset_id}, {new_version_codeset_id}')
+        new_codeset_ids.append(new_version_codeset_id)
+
+    pass
+
 if __name__ == '__main__':
-    # u = upload_cset_as_new_version_of_itself(834391873)
-    # exit(0)
-    cli()
+    # test_new_version_compare_codeset_ids = [27371375, 523378440, 490947789]
+    # make_new_versions_of_csets(codeset_ids=test_new_version_compare_codeset_ids)
+    # pass
+    cli()
diff --git a/requirements-unlocked.txt b/requirements-unlocked.txt
@@ -1,6 +1,7 @@
 # jq commented out because doesn't work on Windows and are also not needed.
 # dependencies
 fastapi
+httpx
 jinja2
 mezmorize
 pandas