Skip to content

Commit

Permalink
- API call logging is now capturing city/state for easier reading of
Browse files Browse the repository at this point in the history
  what calls are comming in
- Moved load_dotenv(ENVFILE) out of db/config and
  enclave_wrangler/config into backend.config. If you import that, it
  will get loaded
- Working on make_new_versions_of_csets for N3C Recommended updates
  • Loading branch information
Sigfried committed Oct 31, 2023
1 parent a963812 commit 29584ac
Show file tree
Hide file tree
Showing 8 changed files with 407 additions and 67 deletions.
77 changes: 67 additions & 10 deletions backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,24 @@
- https://github.com/tiangolo/fastapi
"""
import os
from pathlib import Path
from typing import List, Optional
import re

import uvicorn
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
import httpx
# from starlette.requests import Request
import time
import datetime
from socket import gethostname

import backend.config
from backend.routes import cset_crud, db, graph
from backend.db.config import override_schema, get_schema_name
from backend.db.utils import insert_from_dict, get_db_connection

PROJECT_DIR = Path(os.path.dirname(__file__)).parent
# users on the same server
APP = FastAPI()
APP.include_router(cset_crud.router)
Expand All @@ -35,6 +36,69 @@
)
APP.add_middleware(GZipMiddleware, minimum_size=1000)


async def client_location(request: Request) -> str:
# rpt['client'] = request.client.host -- this gives a local (169.154) IP on azure
# chatgpt recommends:
forwarded_for: Optional[str] = request.headers.get('X-Forwarded-For')
if forwarded_for:
# The header can contain multiple IP addresses, so take the first one
ip = forwarded_for.split(',')[0]
else:
ip = request.client.host

ip = re.sub(':.*', '', ip)

ipstack_key = os.getenv('API_STACK_KEY', None)

if ip != '127.0.0.1' and ipstack_key:
"""
http://api.ipstack.com/134.201.250.155?access_key=7a6f9d6d72d68a1452b643eb58cd8ee7&format=1
{
"ip": "134.201.250.155",
"type": "ipv4",
"continent_code": "NA",
"continent_name": "North America",
"country_code": "US",
"country_name": "United States",
"region_code": "CA",
"region_name": "California",
"city": "San Fernando",
"zip": "91344",
"latitude": 34.293949127197266,
"longitude": -118.50763702392578,
"location": {
"geoname_id": 5391945,
"capital": "Washington D.C.",
"languages": [
{
"code": "en",
"name": "English",
"native": "English"
}
],
"country_flag": "https://assets.ipstack.com/flags/us.svg",
"country_flag_emoji": "🇺🇸",
"country_flag_emoji_unicode": "U+1F1FA U+1F1F8",
"calling_code": "1",
"is_eu": false
}
}
"""

loc_url = f"http://api.ipstack.com/{ip}?access_key={ipstack_key}"

async with httpx.AsyncClient() as client:
response = await client.get(loc_url)
if response and response.json:
loc_obj = response.json()
location = f"{ip}: {loc_obj['city']}, {loc_obj['region_name']}"
return location

return ip



@APP.middleware("http")
async def set_schema_globally_and_log_calls(request: Request, call_next):
"""
Expand All @@ -61,14 +125,7 @@ async def set_schema_globally_and_log_calls(request: Request, call_next):

rpt['host'] = os.getenv('HOSTENV', gethostname())

# rpt['client'] = request.client.host -- this gives a local (169.154) IP on azure
# chatgpt recommends:
forwarded_for: Optional[str] = request.headers.get('X-Forwarded-For')
if forwarded_for:
# The header can contain multiple IP addresses, so take the first one
rpt['client'] = forwarded_for.split(',')[0]
else:
rpt['client'] = request.client.host
rpt['client'] = await client_location(request)

schema = query_params.get("schema")
if schema:
Expand Down
9 changes: 9 additions & 0 deletions backend/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import os
from dotenv import load_dotenv

APP_ROOT = os.path.dirname(os.path.realpath(__file__))
PROJECT_ROOT = os.path.realpath(os.path.join(APP_ROOT, '..'))
ENV_DIR = os.path.join(PROJECT_ROOT, 'env')
OUTPUT_DIR = os.path.join(PROJECT_ROOT, 'output')
ENV_FILE = os.path.join(ENV_DIR, '.env')
load_dotenv(ENV_FILE)
7 changes: 2 additions & 5 deletions backend/db/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,16 @@
import os
from typing import Dict, List

from dotenv import load_dotenv
from backend.config import PROJECT_ROOT

DB_DIR = os.path.dirname(os.path.realpath(__file__))
BACKEND_DIR = os.path.join(DB_DIR, '..')
PROJECT_ROOT = os.path.join(BACKEND_DIR, '..')
DOCS_DIR = os.path.join(PROJECT_ROOT, 'docs')
ENV_DIR = os.path.join(PROJECT_ROOT, 'env')
ENV_FILE = os.path.join(ENV_DIR, '.env')
TERMHUB_CSETS_PATH = os.path.join(PROJECT_ROOT, 'termhub-csets')
DATASETS_PATH = os.path.join(TERMHUB_CSETS_PATH, 'datasets', 'prepped_files')
OBJECTS_PATH = os.path.join(TERMHUB_CSETS_PATH, 'objects')
DDL_JINJA_PATH_PATTERN = os.path.join(DB_DIR, 'ddl-*.jinja.sql')
load_dotenv(ENV_FILE)

CONFIG = {
'server': os.getenv('TERMHUB_DB_SERVER'),
'driver': os.getenv('TERMHUB_DB_DRIVER'),
Expand Down
10 changes: 2 additions & 8 deletions enclave_wrangler/config.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
"""Config"""
import os
from collections import OrderedDict

from dotenv import load_dotenv
from backend.config import PROJECT_ROOT

TERMHUB_VERSION = "0.0.1"

APP_ROOT = os.path.dirname(os.path.realpath(__file__))
PROJECT_ROOT = os.path.realpath(os.path.join(APP_ROOT, '..'))
ENV_DIR = os.path.join(PROJECT_ROOT, 'env')
OUTPUT_DIR = os.path.join(PROJECT_ROOT, 'output')
ENV_FILE = os.path.join(ENV_DIR, '.env')

TERMHUB_CSETS_DIR = os.path.join(PROJECT_ROOT, 'termhub-csets')
UPLOADS_DIR = os.path.join(TERMHUB_CSETS_DIR, 'datasets', 'uploads')
OUTDIR_OBJECTS = os.path.join(TERMHUB_CSETS_DIR, 'objects')
Expand All @@ -32,7 +27,6 @@
RESEARCHER_COLS = ['container_created_by', 'codeset_created_by', 'assigned_sme', 'reviewed_by', 'n3c_reviewer',
'assigned_informatician']

load_dotenv(ENV_FILE)
config = {
'PALANTIR_ENCLAVE_AUTHENTICATION_BEARER_TOKEN': os.getenv('PALANTIR_ENCLAVE_AUTHENTICATION_BEARER_TOKEN', '').replace('\r', ''),
'OTHER_TOKEN': os.getenv('OTHER_TOKEN', '').replace('\r', ''),
Expand Down
100 changes: 56 additions & 44 deletions enclave_wrangler/dataset_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,47 +185,6 @@ def upload_new_cset_container_with_concepts_from_csv(
return responses


def upload_cset_as_new_version_of_itself(
codeset_id: int,
add_to_field: Dict = {'intention': f'Version for comparison to N3C-Rec on {datetime.date.today().isoformat()}'}
) -> Dict:
v = fetch_cset_version(codeset_id, False)

vi = [i['properties'] for i in get_concept_set_version_expression_items(codeset_id, 'full')]
concepts = []
for item in vi:
c = {'concept_id': item['conceptId']}
for p in ['includeDescendants', 'isExcluded', 'includeMapped']:
c[p] = item[p]
concepts.append(c)

upload_args = {
# 'on_behalf_of': v['createdBy'],
'on_behalf_of': config['SERVICE_USER_ID'],
'concept_set_name': v['conceptSetNameOMOP'],
'provenance': v['provenance'],
'limitations': v['limitations'],
'intention': v['intention'],
'parent_version_codeset_id': v['codesetId'],
'current_max_version': v['version'], # probably
# codeset_id': None, will be assigned
'validate_first': True,
'omop_concepts': concepts,
'finalize': True,
# annotation,
# intended_research_project,
}

for key, value in add_to_field.items():
val = '. ' + v[key] if v[key] else ''
val = value + val
upload_args[key] = val

# upload_new_cset_version_with_concepts( concept_set_name, omop_concepts, provenance, limitations, intention, annotation, parent_version_codeset_id, current_max_version, intended_research_project, on_behalf_of, codeset_id, validate_first, finalize )
# pass_on_args = ['conceptSetNameOMOP'] not sure what this was for
return upload_new_cset_version_with_concepts(**upload_args)
# returns {'responses': [...], 'codeset_id': 123}

# TODO: What if this fails halfway through? Can we teardown any of the steps? (need to store random `codeset_id` too)
# TODO: Need to do proper codeset_id assignment: (i) look up registry and get next available ID, (ii) assign it here,
# (iii) persist new ID / set to registry, (iv) persist new ID to any files passed through CLI, (v), return the new ID
Expand Down Expand Up @@ -1064,7 +1023,60 @@ def cli():
upload_dataset(**kwargs_dict)


def upload_cset_as_new_version_of_itself(
codeset_id: int,
add_to_field: Dict = {'intention': f'Version for comparison to N3C-Rec on {date.today().isoformat()}'}
) -> Dict:
ov = fetch_cset_version(codeset_id, False)

vi = [i['properties'] for i in get_concept_set_version_expression_items(codeset_id, 'full')]
concepts = []
for item in vi:
c = {'concept_id': item['conceptId']}
for p in ['includeDescendants', 'isExcluded', 'includeMapped']:
c[p] = item[p]
concepts.append(c)

upload_args = {
# 'on_behalf_of': ov['createdBy'],
'on_behalf_of': config['SERVICE_USER_ID'],
'concept_set_name': ov['conceptSetNameOMOP'],
'provenance': ov['provenance'],
'limitations': ov['limitations'],
'intention': ov['intention'],
'parent_version_id': ov['codesetId'],
'current_max_version': ov['version'], # probably
# codeset_id': None, will be assigned
'validate_first': True,
'omop_concepts': concepts,
'finalize': True,
# annotation,
# intended_research_project,
}

for key, value in add_to_field.items():
val = '. ' + ov[key] if ov[key] else ''
val = value + val
upload_args[key] = val

# upload_new_cset_version_with_concepts( concept_set_name, omop_concepts, provenance, limitations, intention, annotation, parent_version_codeset_id, current_max_version, intended_research_project, on_behalf_of, codeset_id, validate_first, finalize )
# pass_on_args = ['conceptSetNameOMOP'] not sure what this was for
d = upload_new_cset_version_with_concepts(**upload_args) # {'responses': [...], 'codeset_id': 123}
return d['codeset_id']


def make_new_versions_of_csets(codeset_ids: List[int]):
new_codeset_ids = []
for codeset_id in codeset_ids:
print(f'Making new version of {codeset_id}')
new_version_codeset_id = upload_cset_as_new_version_of_itself(codeset_id)
print(f'{codeset_id}, {new_version_codeset_id}')
new_codeset_ids.append(new_version_codeset_id)

pass

if __name__ == '__main__':
# u = upload_cset_as_new_version_of_itself(834391873)
# exit(0)
cli()
# test_new_version_compare_codeset_ids = [27371375, 523378440, 490947789]
# make_new_versions_of_csets(codeset_ids=test_new_version_compare_codeset_ids)
# pass
cli()
1 change: 1 addition & 0 deletions requirements-unlocked.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# jq commented out because doesn't work on Windows and are also not needed.
# dependencies
fastapi
httpx
jinja2
mezmorize
pandas
Expand Down
Loading

0 comments on commit 29584ac

Please sign in to comment.