Skip to content

Commit

Permalink
temp
Browse files Browse the repository at this point in the history
  • Loading branch information
joeflack4 committed May 19, 2024
1 parent 995968c commit 392c4cb
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 18 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/buid_and_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ jobs:
title: "${{ steps.current-time.outputs.formattedTime }}"
prerelease: false
# todo: add back `release/medgen-disease-extract.owl`, pending https://github.com/monarch-initiative/medgen/issues/11
# todo: add `medgen.sssom.tsv`, pending https://github.com/monarch-initiative/medgen/issues/6
# output/release/medgen-disease-extract.owl
# output/release/medgen.sssom.tsv
files: |
output/release/medgen.obo
output/release/medgen-disease-extract.obo
Expand Down
27 changes: 14 additions & 13 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ minimal: build-lite stage-lite clean
# stage-lite: These commented out files are produced by `all` but not by `minimal`. Just left here for reference. See: https://github.com/monarch-initiative/medgen/issues/11
stage-lite: | output/release/
# mv medgen-disease-extract.owl output/release/
# mv medgen.sssom.tsv output/release/
mv *.obo output/release/
mv *.robot.template.tsv output/release/
mv *.sssom.tsv output/release/
Expand Down Expand Up @@ -75,21 +74,21 @@ medgen-disease-extract.obo: x-Disease-or-Syndrome.obo x-Neoplastic-Process.obo
medgen-disease-extract.json: medgen-disease-extract.obo
owltools $< -o -f json $@

medgen-disease-extract.owl: medgen-disease-extract.obo
output/medgen-disease-extract.owl: medgen-disease-extract.obo | output/
owltools $< -o $@

# SSSOM ----------------------------------
# todo: comemented out old pipeline: remove
#medgen.obographs.json:
# robot convert -i medgen-disease-extract.owl -o $@
#
#medgen.sssom.tsv: medgen.obographs.json
# sssom parse medgen.obographs.json -I obographs-json -m config/medgen.sssom-metadata.yml -o $@
sssom: umls-hpo.sssom.tsv
sssom validate umls-hpo.sssom.tsv
sssom validate hpo-mesh.sssom.tsv

umls-hpo.sssom.tsv hpo-mesh.sssom.tsv: ftp.ncbi.nlm.nih.gov/pub/medgen/MedGenIDMappings.txt
output/medgen.obographs.json: output/medgen-disease-extract.owl | output/
robot convert -i medgen-disease-extract.owl -o $@

output/medgen.sssom.tsv: output/medgen.obographs.json | output/
sssom parse medgen.obographs.json -I obographs-json -m config/medgen.sssom-metadata.yml -o $@

umls-hpo.sssom.tsv hpo-mesh.sssom.tsv output/hpo-mesh_non-matches-included.sssom.tsv: ftp.ncbi.nlm.nih.gov/pub/medgen/MedGenIDMappings.txt
python src/create_sssom.py --input-mappings $< --input-sssom-config config/medgen.sssom-metadata.yml

# ----------------------------------------
Expand All @@ -113,16 +112,18 @@ tmp/input/mondo.sssom.tsv: | tmp/input/
wget http://purl.obolibrary.org/obo/mondo/mappings/mondo.sssom.tsv -O $@

# creates more than just this file; that goal creates multiple files
output/medgen_terms_mapping_status.tsv output/obsoleted_medgen_terms_in_mondo.txt: tmp/input/mondo.sssom.tsv | output/
output/medgen_terms_mapping_status.tsv output/obsoleted_medgen_terms_in_mondo.txt: tmp/input/mondo.sssom.tsv output/medgen.sssom.tsv | output/
python src/mondo_mapping_status.py

# ----------------------------------------
# Robot templates
# ----------------------------------------
# todo: Ideally I wanted this done at the end of the ingest, permuting from medgen.sssom.tsv, but there were some
# problems with that file. Eventually changing to that feels like it makes more sense. Will have already been
# pre-curated by disease. And some of the logic in this Python script is duplicative.
medgen-xrefs.robot.template.tsv medgen-xrefs-mesh.robot.template.tsv: ftp.ncbi.nlm.nih.gov/pub/medgen/MedGenIDMappings.txt
# problems with that file. Eventually changing to that feels like it makes more sense. Will have already been
# pre-curated by disease. And some of the logic in this Python script is duplicative. Update 2024/05/19: I did include
# medgen.sssom.tsv as a prerequisite. the alternative build, 'make all' was failing without it. It has a different role
# than 'permutation' as I mention above.
medgen-xrefs.robot.template.tsv medgen-xrefs-mesh.robot.template.tsv: ftp.ncbi.nlm.nih.gov/pub/medgen/MedGenIDMappings.txt output/medgen.sssom.tsv
python src/mondo_robot_template.py -i $< \
--outpath-general medgen-xrefs.robot.template.tsv \
--outpath-mesh medgen-xrefs-mesh.robot.template.tsv
5 changes: 3 additions & 2 deletions src/create_sssom.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
PROJECT_DIR = SRC_DIR.parent
FTP_DIR = PROJECT_DIR / "ftp.ncbi.nlm.nih.gov" / "pub" / "medgen"
CONFIG_DIR = PROJECT_DIR / "config"
OUTDIR = PROJECT_DIR / "output"
INPUT_MAPPINGS = str(FTP_DIR / "MedGenIDMappings.txt")
INPUT_CONFIG = str(CONFIG_DIR / "medgen.sssom-metadata.yml")
OUTPUT_FILE_HPO_UMLS = str(PROJECT_DIR / "umls-hpo.sssom.tsv")
OUTPUT_FILE_HPO_MESH = str(PROJECT_DIR / "hpo-mesh.sssom.tsv")
OUTPUT_FILE_HPO_MESH_WITH_NON_MATCHES = str(OUTDIR / "hpo-mesh_non-matches-included.sssom.tsv")


def _filter_and_format_cols(df: pd.DataFrame, source: str) -> pd.DataFrame:
Expand Down Expand Up @@ -54,8 +56,7 @@ def run(input_mappings: str = INPUT_MAPPINGS, input_sssom_config: str = INPUT_CO
# move the col removals below (umls) to above
# - add mapping_justification
df_hpo_mesh['mapping_justification'] = 'semapv:ManualMappingCuration'
write_sssom(df_hpo_mesh, input_sssom_config,
OUTPUT_FILE_HPO_MESH.replace('.sssom.tsv', '-non-matches-included.sssom.tsv'))
write_sssom(df_hpo_mesh, input_sssom_config, OUTPUT_FILE_HPO_MESH_WITH_NON_MATCHES)
# -- filter non-matches & drop unneeded cols
df_hpo_mesh = df_hpo_mesh[df_hpo_mesh['subject_id'].notna()][[
x for x in df_hpo_mesh.columns if not x.startswith('umls')]]
Expand Down
2 changes: 1 addition & 1 deletion src/mondo_mapping_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
RELEASE_OUTDIR = OUTDIR / 'release'
INPUT_DIR = PROJECT_DIR / 'tmp' / 'input'
MONDO_SSSOM_TSV = INPUT_DIR / 'mondo.sssom.tsv'
MEDGEN_SSSOM_TSV = RELEASE_OUTDIR / 'medgen.sssom.tsv'
MEDGEN_SSSOM_TSV = OUTDIR / 'medgen.sssom.tsv'
# MEDGEN_PREFIXES: Some of these are old, some are new, some may not be used.
# todo: If I couldn't convert SSSOM properly with MedGen_CUI, souldn't UMLS_CUI have a problem? though i think it's just coming from previous work in mondo maybe. it's not being used in this ingest
MEDGEN_PREFIXES = [
Expand Down

0 comments on commit 392c4cb

Please sign in to comment.