Skip to content

Commit

Permalink
ORDO Mappings
Browse files Browse the repository at this point in the history
- Delete: Unnecessary files and goals: temp analysis, getting of ordo.owl and querying it via SPARQL.
- Add: SSSOM validation
- Update: Replaced ad hoc SSSOM utilities with the ones from sssom-py
  • Loading branch information
joeflack4 committed Apr 7, 2024
1 parent 12dc3d1 commit c4aa5c0
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 77 deletions.
Empty file added a.txt
Empty file.
4 changes: 3 additions & 1 deletion config/icd11.sssom-metadata.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
creator_id: 0000-0002-2906-7319
creator_id: orcid:0000-0002-2906-7319
curie_map:
Orphanet: http://www.orpha.net/ORDO/Orphanet_
icd11.foundation: http://id.who.int/icd/entity/
oboInOwl: http://www.geneontology.org/formats/oboInOwl#
orcid: https://orcid.org/
owl: http://www.w3.org/2002/07/owl#
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
rdfs: http://www.w3.org/2000/01/rdf-schema#
semapv: https://w3id.org/semapv/
skos: http://www.w3.org/2004/02/skos/core#
sssom: https://w3id.org/sssom/
license: http://w3id.org/sssom/license/unspecified
mapping_provider: https://www.orpha.net/
19 changes: 10 additions & 9 deletions makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
.DEFAULT_GOAL := all
.PHONY: all release clean
.PHONY: all release clean ontology mappings mappings-validate help
TODAY ?=$(shell date +%Y-%m-%d)
VERSION=v$(TODAY)
SOURCE_URL=https://icd11files.blob.core.windows.net/tmp/whofic-2023-04-08.owl.gz


# MAIN COMMANDS / GOALS ------------------------------------------------------------------------------------------------
all: tmp/output/release/icd11foundation.owl tmp/output/release/ordo-icd11.sssom.tsv
all: ontology mappings

ontology: tmp/output/release/icd11foundation.owl

mappings: mappings-validate

mappings-validate: tmp/output/release/ordo-icd11.sssom.tsv
sssom validate $<

clean:
rm -rf tmp/
Expand Down Expand Up @@ -42,13 +49,7 @@ tmp/input/Orphanet_Nomenclature_Pack_EN/ORPHA_ICD11_mapping_en_newversion_2023.x
wget https://www.orphadata.com/data/nomenclature/packs/Orphanet_Nomenclature_Pack_EN.zip -O tmp/input/Orphanet_Nomenclature_Pack_EN.zip
unzip tmp/input/Orphanet_Nomenclature_Pack_EN.zip -d tmp/input/Orphanet_Nomenclature_Pack_EN

tmp/input/ordo.owl: | tmp/input/
wget http://www.orphadata.org/data/ORDO/ordo_orphanet.owl -O $@

tmp/output/icd11mms-exact-matches.tsv: tmp/input/ordo.owl
robot query -i $< --query src/icd11mms-exact-matches.sparql $@

tmp/output/release/ordo-icd11.sssom.tsv: tmp/input/Orphanet_Nomenclature_Pack_EN/ORPHA_ICD11_mapping_en_newversion_2023.xml tmp/output/icd11mms-exact-matches.tsv | tmp/output/release/
tmp/output/release/ordo-icd11.sssom.tsv: tmp/input/Orphanet_Nomenclature_Pack_EN/ORPHA_ICD11_mapping_en_newversion_2023.xml | tmp/output/release/
python3 src/mappings.py \
--input-nomenclature-xml tmp/input/Orphanet_Nomenclature_Pack_EN/ORPHA_ICD11_mapping_en_newversion_2023.xml \
--input-sssom-config config/icd11.sssom-metadata.yml \
Expand Down
1 change: 1 addition & 0 deletions requirements-unlocked.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pandas
pyyaml
sssom
68 changes: 57 additions & 11 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,60 @@
distlib==0.3.6
filelock==3.9.0
numpy==1.25.1
pandas==2.0.3
pbr==5.11.1
platformdirs==3.1.0
python-dateutil==2.8.2
pytz==2023.3
annotated-types==0.6.0
attrs==23.2.0
certifi==2024.2.2
charset-normalizer==3.3.2
click==8.1.7
curies==0.7.9
Deprecated==1.2.14
deprecation==2.1.0
distlib==0.3.8
exceptiongroup==1.2.0
filelock==3.13.1
hbreader==0.9.1
idna==3.6
importlib_resources==6.4.0
iniconfig==2.0.0
isodate==0.6.1
json-flattener==0.1.9
jsonasobj2==1.0.4
jsonschema==4.21.1
jsonschema-specifications==2023.12.1
linkml-runtime==1.7.5
networkx==3.3
numpy==1.26.4
packaging==24.0
pandas==2.2.1
pansql==0.0.1
pbr==6.0.0
platformdirs==4.2.0
pluggy==1.4.0
prefixcommons==0.1.12
prefixmaps==0.2.3
pydantic==2.6.4
pydantic_core==2.16.3
pyparsing==3.1.2
pytest==8.1.1
pytest-logging==2015.11.4
python-dateutil==2.9.0.post0
PyTrie==0.4.0
pytz==2024.1
PyYAML==6.0.1
rdflib==7.0.0
referencing==0.34.0
requests==2.31.0
rpds-py==0.18.0
scipy==1.13.0
six==1.16.0
stevedore==5.0.0
tzdata==2023.3
virtualenv==20.20.0
sortedcontainers==2.4.0
SPARQLWrapper==2.0.0
SQLAlchemy==2.0.29
sssom==0.4.6
sssom-schema==0.15.2
stevedore==5.1.0
tomli==2.0.1
typing_extensions==4.11.0
tzdata==2024.1
urllib3==2.2.1
validators==0.28.0
virtualenv==20.25.0
virtualenv-clone==0.5.7
wrapt==1.16.0
22 changes: 0 additions & 22 deletions src/icd11mms-exact-matches.sparql

This file was deleted.

23 changes: 19 additions & 4 deletions src/mappings.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
"""Extract mappings"""
"""Extract mappings
todo's (minor):
1. icd11.sssom-metadata.yml: Generalize it by removing the 'mapping_provider' field, which links to Orphanet, and add
that in dynamically just for these ORDO mappings.
"""
from argparse import ArgumentParser
from pathlib import Path
import xml.etree.ElementTree as eleTree
from typing import Dict

import curies
import pandas as pd

from utils import write_sssom
import yaml
from sssom import MappingSetDataFrame
from sssom.writers import write_table

SRC_DIR = Path(__file__).parent
PROJECT_DIR = SRC_DIR.parent
Expand Down Expand Up @@ -131,7 +139,14 @@ def run(

# Get only columns we care about
df = df[['subject_id', 'subject_label', 'predicate_id', 'object_id']]
write_sssom(df, input_sssom_config, outpath)

# Write file
with open(input_sssom_config, 'r') as yaml_file:
metadata: Dict = yaml.load(yaml_file, Loader=yaml.FullLoader)
converter = curies.Converter.from_prefix_map(metadata['curie_map'])
msdf: MappingSetDataFrame = MappingSetDataFrame(converter=converter, df=df, metadata=metadata)
with open(outpath, 'w') as f:
write_table(msdf, f)


def cli():
Expand Down
30 changes: 0 additions & 30 deletions src/temp_compare_matches_owl_and_nomenclature.py

This file was deleted.

0 comments on commit c4aa5c0

Please sign in to comment.