Skip to content

Commit

Permalink
Merge pull request #12 from monarch-initiative/ordo-mappings2
Browse files Browse the repository at this point in the history
ICD11 --> ORDO mappings
  • Loading branch information
joeflack4 authored Apr 18, 2024
2 parents f1e37f4 + 20350d7 commit f6a4c19
Show file tree
Hide file tree
Showing 10 changed files with 100,391 additions and 40 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,6 @@ dmypy.json

.idea/
tmp/

# Custom
_archive/
25 changes: 14 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# ICD11 Foundation Ingest

## Pre-reqs
- Docker
- Docker images
One or both of the following, depending on if you want to run the stable build `latest` or `dev`:
- a. `docker pull obolibrary/odkfull:latest`
- b. `docker pull obolibrary/odkfull:dev`

## Running
`sh run.sh make all`
# ICD11 Foundation Ingest

## Pre-reqs
- Docker
- Docker images
One or both of the following, depending on if you want to run the stable build `latest` or `dev`:
- a. `docker pull obolibrary/odkfull:latest`
- b. `docker pull obolibrary/odkfull:dev`

## Local development setup
`pip install -r requirements.txt`

## Running
`sh run.sh make all`
15 changes: 15 additions & 0 deletions config/icd11.sssom-metadata.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
creator_id: orcid:0000-0002-2906-7319
curie_map:
icd11.foundation: http://id.who.int/icd/entity/
MONDO: http://purl.obolibrary.org/obo/MONDO_
oboInOwl: http://www.geneontology.org/formats/oboInOwl#
orcid: https://orcid.org/
owl: http://www.w3.org/2002/07/owl#
Orphanet: http://www.orpha.net/ORDO/Orphanet_
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
rdfs: http://www.w3.org/2000/01/rdf-schema#
semapv: https://w3id.org/semapv/
skos: http://www.w3.org/2004/02/skos/core#
sssom: https://w3id.org/sssom/
license: http://w3id.org/sssom/license/unspecified
mapping_provider: https://www.orpha.net/
100,016 changes: 100,016 additions & 0 deletions icd11foundation_labels.tsv

Large diffs are not rendered by default.

44 changes: 42 additions & 2 deletions makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
.DEFAULT_GOAL := all
.PHONY: all release clean
.PHONY: all release clean ontology mappings mappings-validate help
TODAY ?=$(shell date +%Y-%m-%d)
VERSION=v$(TODAY)
SOURCE_URL=https://icd11files.blob.core.windows.net/tmp/whofic-2023-04-08.owl.gz


# MAIN COMMANDS / GOALS ------------------------------------------------------------------------------------------------
all: tmp/output/release/icd11foundation.owl
all: ontology mappings

ontology: tmp/output/release/icd11foundation.owl

mappings: mappings-validate

mappings-validate: tmp/output/release/mondo_exactmatch_icd11foundation.sssom.tsv
sssom validate tmp/output/release/mondo_exactmatch_icd11foundation.sssom.tsv
sssom validate tmp/output/release/ordo_exactmatch_icd11foundation.sssom.tsv

clean:
rm -rf tmp/
Expand All @@ -26,6 +34,7 @@ tmp/input/:

tmp/input/source.owl: tmp/input/source.gz
gunzip -c $< > $@
rm $<

tmp/input/source.gz: | tmp/input/
wget ${SOURCE_URL} -O $@
Expand All @@ -35,6 +44,37 @@ release: | tmp/output/release/
@test $(VERSION)
gh release create $(VERSION) --notes "New release." --title "$(VERSION)" tmp/output/release/*

# Mappings
# todo: I'd like to do USE_PREBUILT=false, but this error is occuring:
# cp: cannot stat 'tmp/input/mondo/src/ontology/mappings/mondo.sssom.tsv': No such file or directory
# ...even though I checked and the file at that path does exist.
USE_PREBUILT=true

tmp/input/mondo/:
rm -rf $@ &&\
cd tmp/input/ &&\
git clone --depth 1 https://github.com/monarch-initiative/mondo

tmp/input/mondo.sssom.tsv: tmp/input/mondo/
if [ $(USE_PREBUILT) = true ]; then wget https://raw.githubusercontent.com/monarch-initiative/mondo/master/src/ontology/mappings/mondo.sssom.tsv -O $@; else cd tmp/input/ &&\
cd mondo/src/ontology &&\
make mondo.owl mappings -B MIR=false IMP=false MIR=false &&\
cp tmp/input/mondo/src/ontology/mappings/mondo.sssom.tsv $@; fi

# todo: Stable URI/filename issue: https://github.com/monarch-initiative/icd11/pull/12#discussion_r1542187711
tmp/input/Orphanet_Nomenclature_Pack_EN/ORPHA_ICD11_mapping_en_newversion_2023.xml: | tmp/input/
wget https://www.orphadata.com/data/nomenclature/packs/Orphanet_Nomenclature_Pack_EN.zip -O tmp/input/Orphanet_Nomenclature_Pack_EN.zip
unzip tmp/input/Orphanet_Nomenclature_Pack_EN.zip -d tmp/input/Orphanet_Nomenclature_Pack_EN

# todo: rename icd11.sssom-metadata.yml --> icd11foundation.sssom-metadata.yml
tmp/output/release/mondo_exactmatch_icd11foundation.sssom.tsv tmp/output/release/ordo_exactmatch_icd11foundation.sssom.tsv: tmp/input/Orphanet_Nomenclature_Pack_EN/ORPHA_ICD11_mapping_en_newversion_2023.xml tmp/input/mondo.sssom.tsv | tmp/output/release/
python3 src/mappings.py \
--input-nomenclature-xml tmp/input/Orphanet_Nomenclature_Pack_EN/ORPHA_ICD11_mapping_en_newversion_2023.xml \
--input-sssom-config config/icd11.sssom-metadata.yml \
--input-mondo-sssom tmp/input/mondo.sssom.tsv \
--outpath-ordo-mappings tmp/output/release/ordo_exactmatch_icd11foundation.sssom.tsv \
--outpath-mondo-mappings tmp/output/release/mondo_exactmatch_icd11foundation.sssom.tsv

# HELP -----------------------------------------------------------------------------------------------------------------
help:
@echo "-----------------------------------"
Expand Down
3 changes: 3 additions & 0 deletions requirements-unlocked.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pandas
pyyaml
sssom
60 changes: 60 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
annotated-types==0.6.0
attrs==23.2.0
certifi==2024.2.2
charset-normalizer==3.3.2
click==8.1.7
curies==0.7.9
Deprecated==1.2.14
deprecation==2.1.0
distlib==0.3.8
exceptiongroup==1.2.0
filelock==3.13.1
hbreader==0.9.1
idna==3.6
importlib_resources==6.4.0
iniconfig==2.0.0
isodate==0.6.1
json-flattener==0.1.9
jsonasobj2==1.0.4
jsonschema==4.21.1
jsonschema-specifications==2023.12.1
linkml-runtime==1.7.5
networkx==3.3
numpy==1.26.4
packaging==24.0
pandas==2.2.1
pansql==0.0.1
pbr==6.0.0
platformdirs==4.2.0
pluggy==1.4.0
prefixcommons==0.1.12
prefixmaps==0.2.3
pydantic==2.6.4
pydantic_core==2.16.3
pyparsing==3.1.2
pytest==8.1.1
pytest-logging==2015.11.4
python-dateutil==2.9.0.post0
PyTrie==0.4.0
pytz==2024.1
PyYAML==6.0.1
rdflib==7.0.0
referencing==0.34.0
requests==2.31.0
rpds-py==0.18.0
scipy==1.13.0
six==1.16.0
sortedcontainers==2.4.0
SPARQLWrapper==2.0.0
SQLAlchemy==2.0.29
sssom==0.4.6
sssom-schema==0.15.2
stevedore==5.1.0
tomli==2.0.1
typing_extensions==4.11.0
tzdata==2024.1
urllib3==2.2.1
validators==0.28.0
virtualenv==20.25.0
virtualenv-clone==0.5.7
wrapt==1.16.0
32 changes: 5 additions & 27 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
#
# See README-editors.md for more details.

set -e

if [ -f run.sh.conf ]; then
. ./run.sh.conf
fi
Expand All @@ -31,22 +29,6 @@ elif [ -f "$HOME/Library/Application Support/ontology-development-kit/github/tok
GH_TOKEN=$(cat "$HOME/Library/Application Support/ontology-development-kit/github/token")
fi

# SSH agent socket
# On macOS, we cannot use $SSH_AUTH_SOCK directly,
# we need to use a "magic" socket instead.
case "$(uname)" in
Darwin)
ODK_SSH_AUTH_SOCKET=/run/host-services/ssh-auth.sock
;;
*)
ODK_SSH_AUTH_SOCKET=$SSH_AUTH_SOCK
;;
esac
ODK_SSH_BIND=
if [ -n "$ODK_SSH_AUTH_SOCKET" ]; then
ODK_SSH_BIND=",$ODK_SSH_AUTH_SOCKET:/run/host-services/ssh-auth.sock"
fi

ODK_IMAGE=${ODK_IMAGE:-odkfull}
TAG_IN_IMAGE=$(echo $ODK_IMAGE | awk -F':' '{ print $2 }')
if [ -n "$TAG_IN_IMAGE" ]; then
Expand All @@ -58,9 +40,6 @@ ODK_TAG=${ODK_TAG:-latest}
ODK_JAVA_OPTS=${ODK_JAVA_OPTS:--Xmx20G}
ODK_DEBUG=${ODK_DEBUG:-no}

ODK_USER_ID=${ODK_USER_ID:-$(id -u)}
ODK_GROUP_ID=${ODK_GROUP_ID:-$(id -g)}

# Convert OWLAPI_* environment variables to the OWLAPI as Java options
# See http://owlcs.github.io/owlapi/apidocs_4/org/semanticweb/owlapi/model/parameters/ConfigurationOptions.html
# for a list of allowed options
Expand All @@ -77,26 +56,25 @@ if [ x$ODK_DEBUG = xyes ]; then
echo "Running ${IMAGE} with ${ODK_JAVA_OPTS} of memory for ROBOT and Java-based pipeline steps."
TIMECMD="/usr/bin/time -f ### DEBUG STATS ###\nElapsed time: %E\nPeak memory: %M kb"
fi
rm -f tmp/debug.log

VOLUME_BIND=$PWD/../../:/work$ODK_SSH_BIND
WORK_DIR=/work/src/ontology
VOLUME_BIND=$PWD:/work
WORK_DIR=/work

if [ -n "$ODK_BINDS" ]; then
VOLUME_BIND="$VOLUME_BIND,$ODK_BINDS"
fi

if [ -n "$USE_SINGULARITY" ]; then

singularity exec --cleanenv $ODK_SINGULARITY_OPTIONS \
--env "ROBOT_JAVA_ARGS=$ODK_JAVA_OPTS,JAVA_OPTS=$ODK_JAVA_OPTS,SSH_AUTH_SOCK=/run/host-services/ssh-auth.sock,ODK_USER_ID=$ODK_USER_ID,ODK_GROUP_ID=$ODK_GROUP_ID,ODK_DEBUG=$ODK_DEBUG" \
--env "ROBOT_JAVA_ARGS=$ODK_JAVA_OPTS,JAVA_OPTS=$ODK_JAVA_OPTS" \
--bind $VOLUME_BIND \
-W $WORK_DIR \
docker://obolibrary/$ODK_IMAGE:$ODK_TAG $TIMECMD "$@"
else
BIND_OPTIONS="-v $(echo $VOLUME_BIND | sed 's/,/ -v /')"
docker run $ODK_DOCKER_OPTIONS $BIND_OPTIONS -w $WORK_DIR \
-e ROBOT_JAVA_ARGS="$ODK_JAVA_OPTS" -e JAVA_OPTS="$ODK_JAVA_OPTS" -e SSH_AUTH_SOCK=/run/host-services/ssh-auth.sock -e ODK_USER_ID=$ODK_USER_ID -e ODK_GROUP_ID=$ODK_GROUP_ID -e ODK_DEBUG=$ODK_DEBUG \
-e ROBOT_JAVA_ARGS="$ODK_JAVA_OPTS" -e JAVA_OPTS="$ODK_JAVA_OPTS" \
--rm -ti obolibrary/$ODK_IMAGE:$ODK_TAG $TIMECMD "$@"
fi

Expand Down
Loading

0 comments on commit f6a4c19

Please sign in to comment.