-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Now produces FHIR outputs in addition to OWL and SemanticSQL. General - Update: Simplified CLI by importing & customizing OMOP2OWL-vocab CLI. - Add: requirements.txt for versioned requirements. - Update: Moved CLI to __main__ so that it is what you get when you run the package as a module. - Update: Customized so that n3c_ingest.py now has prebaked configs
- Loading branch information
Showing
6 changed files
with
147 additions
and
102 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1 @@ | ||
"""N3C OMOP to OWL""" | ||
from n3c_ingest.n3c_ingest import cli |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,12 @@ | ||
"""N3C OMOP to OWL""" | ||
from datetime import datetime | ||
|
||
from n3c_ingest.n3c_ingest import cli | ||
# noinspection PyProtectedMember | ||
from omop2owl_vocab.omop2owl_vocab import cli | ||
|
||
|
||
if __name__ == '__main__': | ||
t1 = datetime.now() | ||
cli() | ||
cli('n3c-ingest', 'Convert N3C OMOP vocab tables to OWL, SemanticSQL, and FHIR.') | ||
t2 = datetime.now() | ||
print(f'Finished in {(t2 - t1).seconds} seconds') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,121 +1,42 @@ | ||
"""N3C ingest for TIMS.""" | ||
import os | ||
from argparse import ArgumentParser | ||
from datetime import datetime | ||
from pathlib import Path | ||
|
||
# noinspection PyProtectedMember | ||
from omop2owl_vocab.omop2owl_vocab import _convert_semsql, _get_merged_file_outpath, _run_command, run | ||
from omop2owl_vocab.omop2owl_vocab import omop2owl | ||
|
||
SRC_DIR = Path(os.path.dirname(os.path.abspath(__file__))) | ||
PROJECT_DIR = SRC_DIR.parent | ||
IO_DIR = PROJECT_DIR / 'io' | ||
RELEASE_DIR = PROJECT_DIR / IO_DIR / 'release' | ||
INPUT_DIR = PROJECT_DIR / IO_DIR / 'input' | ||
TERMHUB_CSETS_DIR = INPUT_DIR / 'termhub-csets' | ||
# DATASETS_DIR = TERMHUB_CSETS_DIR / 'datasets' / 'prepped_files' | ||
DATASETS_DIR = INPUT_DIR | ||
CONCEPT_CSV = DATASETS_DIR / 'concept.csv' | ||
CONCEPT_RELATIONSHIP_CSV = DATASETS_DIR / 'concept_relationship.csv' | ||
PROG = 'n3c-ingest' | ||
DESC = 'Convert N3C OMOP vocab tables to OWL, SemanticSQL, and FHIR.' | ||
CONCEPT_CSV = INPUT_DIR / 'concept.csv' | ||
CONCEPT_RELATIONSHIP_CSV = INPUT_DIR / 'concept_relationship.csv' | ||
|
||
for d in [RELEASE_DIR, INPUT_DIR]: | ||
os.makedirs(d, exist_ok=True) | ||
|
||
|
||
# TODO: keep this in sync with omop2owl-vocab | ||
# - how to: copy/paste it, and then replace the concept and concept rel parts to default to the paths above (assuming makefile doesn't do this) | ||
def cli(): | ||
"""Command line interface.""" | ||
parser = ArgumentParser(prog=PROG, description=DESC) | ||
# Required | ||
parser.add_argument( | ||
'-c', '--concept-csv-path', required=False, help='Path to CSV of OMOP concept table.') | ||
parser.add_argument( | ||
'-r', '--concept-relationship-csv-path', required=False, | ||
help='Path to CSV of OMOP concept_relationship table.') | ||
# Optional | ||
parser.add_argument( | ||
'-O', '--outdir', required=False, default=os.getcwd(), help='Output directory.') | ||
# todo:would be good to allow them to pass their own pURL | ||
parser.add_argument( | ||
'-I', '--ontology-id', required=False, default='OMOP', # add str(randint(100000, 999999))? | ||
help='Identifier for ontology. Used to generate a pURL and file name.') | ||
parser.add_argument( | ||
'-o', '--output-type', required=False, default='merged-post-split', | ||
choices=['merged', 'split', 'merged-post-split', 'rxnorm'], | ||
help='What output to generate? If "merged" will create an ONTOLOGY_ID.db file with all concepts of all vocabs ' | ||
'merged into one. If "split" will create an ONTOLOGY_ID-*.db file for each vocab. "merged-post-split" ' | ||
'output will be as if running both "split" and "merged", but the merging implementation is different. ' | ||
'Use this option if running out of memory. If using "rxnorm", will create a specifically customized ' | ||
'ONTOLOGY_ID-RxNorm.db.') | ||
parser.add_argument( | ||
'-v', '--vocabs', required=False, nargs='+', | ||
help='Used with `--output-type specific-vocabs-merged`. Which vocabularies to include in the output? Usage: ' | ||
'--vocabs "Procedure Type" "Device Type"') | ||
parser.add_argument( | ||
'-R', '--relationships', required=False, nargs='+', default=['Is a'], | ||
help='Which relationship types from the concept_relationship table\'s relationship_id field to include? ' | ||
'Default is "Is a" only. Passing "ALL" includes everything. Ignored for --output-type options that are ' | ||
'specific to a pre-set vocabulary (e.g. rxnorm). Usage: --realationships "Is a" "Maps to"') | ||
parser.add_argument( | ||
'-S', '--skip-semsql', required=False, action='store_true', | ||
help='In addition to .owl, also convert to a SemanticSQL .db? This is always True except when --output-type is ' | ||
'all-merged-post-split and it is creating initial .owl files to be merged.') | ||
parser.add_argument( | ||
'-e', '--exclude-singletons', required=False, action='store_true', | ||
help='Exclude terms that do not have any relationships. This only applies to --method robot.') | ||
parser.add_argument( | ||
'-s', '--semsql-only', required=False, action='store_true', | ||
help='Use this if the .owl already exists and you just want to create a SemanticSQL .db.') | ||
parser.add_argument( | ||
'-C', '--use-cache', required=False, action='store_true', | ||
help='Of outputs or intermediates already exist, use them.') | ||
parser.add_argument( | ||
'-M', '--memory', required=False, default=100, help='The amount of Java memory (GB) to allocate.') | ||
parser.add_argument('-i', '--install', action='store_true', help='Installs necessary docker images.') | ||
# TODO: Provide customizations for runnig with preset relationships (i think want all vocabs) | ||
# - got to look to our slack discussion | ||
# - I won't be able to run the preset relationships. will run out of memory. | ||
def skip_semsql_and_use_cache( | ||
concept_csv_path: str = CONCEPT_CSV, concept_relationship_csv_path: str = CONCEPT_RELATIONSHIP_CSV, | ||
outdir: str = RELEASE_DIR, use_cache: bool = True, skip_semsql: bool = True | ||
): | ||
"""Run ingest | ||
# TODO: Need to switch to **kwargs for most of below | ||
d = vars(parser.parse_args()) | ||
if d['install']: | ||
_run_command('docker pull obolibrary/odkfull:dev') | ||
print('Installation complete. Exiting.') | ||
return | ||
if not d['concept_csv_path'] or not d['concept_relationship_csv_path']: | ||
raise RuntimeError('Must pass --concept-csv-path and --concept-relationship-csv-path') | ||
if d['semsql_only']: | ||
outpath: str = _get_merged_file_outpath(d['outdir'], d['ontology_id'], d['vocabs']) | ||
_convert_semsql(outpath, memory=d['memory']) | ||
elif d['output_type'] == 'split': | ||
run( | ||
concept_csv_path=d['concept_csv_path'], concept_relationship_csv_path=d['concept_relationship_csv_path'], | ||
split_by_vocab=True, use_cache=d['use_cache'], skip_semsql=d['skip_semsql'], | ||
exclude_singletons=d['exclude_singletons'], relationships=d['relationships'], vocabs=d['vocabs'], | ||
memory=d['memory'], outdir=d['outdir']) | ||
elif d['output_type'] == 'merged-post-split': # Default | ||
run( | ||
concept_csv_path=d['concept_csv_path'], concept_relationship_csv_path=d['concept_relationship_csv_path'], | ||
split_by_vocab=True, split_by_vocab_merge_after=True, use_cache=d['use_cache'], | ||
skip_semsql=d['skip_semsql'], exclude_singletons=d['exclude_singletons'], relationships=d['relationships'], | ||
vocabs=d['vocabs'], memory=d['memory'], outdir=d['outdir']) | ||
elif d['output_type'] == 'merged': | ||
run( | ||
concept_csv_path=d['concept_csv_path'], concept_relationship_csv_path=d['concept_relationship_csv_path'], | ||
split_by_vocab=False, use_cache=d['use_cache'], skip_semsql=d['skip_semsql'], memory=d['memory'], | ||
exclude_singletons=d['exclude_singletons'], relationships=d['relationships'], vocabs=d['vocabs'], | ||
outdir=d['outdir']) | ||
elif d['output_type'] == 'rxnorm': | ||
# rxnorm_ingest(concept_csv_path=d['concept_csv_path'], concept_relationship_csv_path=d['concept_relationship_csv_path']) | ||
run( | ||
concept_csv_path=d['concept_csv_path'], concept_relationship_csv_path=d['concept_relationship_csv_path'], | ||
split_by_vocab=True, vocabs=['RxNorm', 'ATC'], use_cache=d['use_cache'], | ||
relationships=['Is a', 'Maps to', 'RxNorm inverse is a'], skip_semsql=d['skip_semsql'], | ||
exclude_singletons=d['exclude_singletons'], memory=d['memory'], outdir=d['outdir']) | ||
Running `python n3c_ingest.n3c_ingest` uses default params, whears `python -m n3c_ingest` provides a CLI.""" | ||
omop2owl( | ||
concept_csv_path, concept_relationship_csv_path, use_cache=use_cache, skip_semsql=skip_semsql, outdir=outdir, | ||
# todo: may change omop2owl for these to be tru eby default | ||
split_by_vocab=True, split_by_vocab_merge_after=True, retain_robot_templates=False | ||
) | ||
|
||
|
||
# todo: might want to make a wrapper CLI for this where I have different pre-baked configs to run | ||
if __name__ == '__main__': | ||
t1 = datetime.now() | ||
cli() | ||
skip_semsql_and_use_cache() | ||
t2 = datetime.now() | ||
print(f'Finished in {(t2 - t1).seconds} seconds') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
airium==0.2.6 | ||
annotated-types==0.5.0 | ||
antlr4-python3-runtime==4.9.3 | ||
appdirs==1.4.4 | ||
attrs==23.1.0 | ||
Babel==2.12.1 | ||
bcp47==0.0.4 | ||
beautifulsoup4==4.12.2 | ||
cattrs==23.1.2 | ||
certifi==2023.7.22 | ||
chardet==5.2.0 | ||
charset-normalizer==3.3.0 | ||
class-resolver==0.4.2 | ||
click==8.1.7 | ||
colorama==0.4.6 | ||
curies==0.6.4 | ||
Deprecated==1.2.14 | ||
deprecation==2.1.0 | ||
distlib==0.3.7 | ||
EditorConfig==0.12.3 | ||
eutils==0.6.0 | ||
exceptiongroup==1.1.3 | ||
fastobo==0.12.2 | ||
filelock==3.12.4 | ||
funowl==0.2.3 | ||
ghp-import==2.1.0 | ||
greenlet==2.0.1 | ||
hbreader==0.9.1 | ||
idna==3.4 | ||
ijson==3.2.3 | ||
importlib-metadata==6.8.0 | ||
iniconfig==2.0.0 | ||
isodate==0.6.1 | ||
Jinja2==3.1.2 | ||
jsbeautifier==1.14.9 | ||
json-flattener==0.1.9 | ||
jsonasobj==1.3.1 | ||
jsonasobj2==1.0.4 | ||
jsonschema==4.19.1 | ||
jsonschema-specifications==2023.7.1 | ||
kgcl-rdflib==0.5.0 | ||
kgcl-schema==0.6.0 | ||
lark==1.1.7 | ||
linkml-renderer==0.3.0 | ||
linkml-runtime==1.6.0 | ||
lxml==4.9.3 | ||
Markdown==3.4.4 | ||
MarkupSafe==2.1.3 | ||
mergedeep==1.3.4 | ||
mkdocs==1.5.3 | ||
mkdocs-material==9.4.2 | ||
mkdocs-material-extensions==1.2 | ||
mkdocs-mermaid2-plugin==0.6.0 | ||
more-click==0.1.2 | ||
ndex2==3.5.1 | ||
networkx==3.1 | ||
numpy==1.26.0 | ||
oaklib==0.5.20 | ||
ols-client==0.1.4 | ||
omop2owl-vocab==1.1.0 | ||
ontoportal-client==0.0.4 | ||
packaging==23.2 | ||
paginate==0.5.6 | ||
pandas==2.1.1 | ||
pansql==0.0.1 | ||
pathspec==0.11.2 | ||
pbr==5.11.1 | ||
platformdirs==3.10.0 | ||
pluggy==1.3.0 | ||
prefixcommons==0.1.12 | ||
prefixmaps==0.1.5 | ||
pronto==2.5.5 | ||
pydantic==2.4.2 | ||
pydantic_core==2.10.1 | ||
Pygments==2.16.1 | ||
PyJSG==0.11.10 | ||
pymdown-extensions==10.3 | ||
pyparsing==3.1.1 | ||
pysolr==3.9.0 | ||
pystow==0.5.0 | ||
pytest==7.4.2 | ||
pytest-logging==2015.11.4 | ||
python-dateutil==2.8.2 | ||
PyTrie==0.4.0 | ||
pytz==2023.3.post1 | ||
PyYAML==6.0.1 | ||
pyyaml_env_tag==0.1 | ||
ratelimit==2.2.1 | ||
rdflib==7.0.0 | ||
rdflib-jsonld==0.6.1 | ||
rdflib-shim==1.0.3 | ||
referencing==0.30.2 | ||
regex==2023.8.8 | ||
requests==2.31.0 | ||
requests-cache==1.1.0 | ||
requests-toolbelt==1.0.0 | ||
rfc3987==1.3.8 | ||
rpds-py==0.10.3 | ||
scipy==1.11.3 | ||
semsimian==0.2.1 | ||
semsql==0.3.2 | ||
six==1.16.0 | ||
sortedcontainers==2.4.0 | ||
soupsieve==2.5 | ||
SPARQLWrapper==2.0.0 | ||
SQLAlchemy==2.0.21 | ||
SQLAlchemy-Utils==0.38.3 | ||
sssom==0.3.41 | ||
sssom-schema==0.15.0 | ||
stevedore==5.1.0 | ||
tomli==2.0.1 | ||
tqdm==4.66.1 | ||
typing_extensions==4.8.0 | ||
tzdata==2023.3 | ||
url-normalize==1.4.3 | ||
urllib3==2.0.5 | ||
validators==0.22.0 | ||
virtualenv==20.24.5 | ||
virtualenv-clone==0.5.7 | ||
virtualenvwrapper==4.8.4 | ||
watchdog==3.0.0 | ||
wrapt==1.15.0 | ||
zipp==3.17.0 |