-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Delete: Unnecessary files and goals: temp analysis, getting of ordo.owl and querying it via SPARQL. - Add: SSSOM validation - Update: Replaced ad hoc SSSOM utilities with the ones from sssom-py - Add: mondo-icd11 mappings
- Loading branch information
Showing
8 changed files
with
167 additions
and
136 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,15 @@ | ||
creator_id: 0000-0002-2906-7319 | ||
creator_id: orcid:0000-0002-2906-7319 | ||
curie_map: | ||
Orphanet: http://www.orpha.net/ORDO/Orphanet_ | ||
icd11.foundation: http://id.who.int/icd/entity/ | ||
MONDO: http://purl.obolibrary.org/obo/MONDO_ | ||
oboInOwl: http://www.geneontology.org/formats/oboInOwl# | ||
orcid: https://orcid.org/ | ||
owl: http://www.w3.org/2002/07/owl# | ||
Orphanet: http://www.orpha.net/ORDO/Orphanet_ | ||
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# | ||
rdfs: http://www.w3.org/2000/01/rdf-schema# | ||
semapv: https://w3id.org/semapv/ | ||
skos: http://www.w3.org/2004/02/skos/core# | ||
sssom: https://w3id.org/sssom/ | ||
license: http://w3id.org/sssom/license/unspecified | ||
mapping_provider: https://www.orpha.net/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
pandas | ||
pyyaml | ||
sssom |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,60 @@ | ||
distlib==0.3.6 | ||
filelock==3.9.0 | ||
numpy==1.25.1 | ||
pandas==2.0.3 | ||
pbr==5.11.1 | ||
platformdirs==3.1.0 | ||
python-dateutil==2.8.2 | ||
pytz==2023.3 | ||
annotated-types==0.6.0 | ||
attrs==23.2.0 | ||
certifi==2024.2.2 | ||
charset-normalizer==3.3.2 | ||
click==8.1.7 | ||
curies==0.7.9 | ||
Deprecated==1.2.14 | ||
deprecation==2.1.0 | ||
distlib==0.3.8 | ||
exceptiongroup==1.2.0 | ||
filelock==3.13.1 | ||
hbreader==0.9.1 | ||
idna==3.6 | ||
importlib_resources==6.4.0 | ||
iniconfig==2.0.0 | ||
isodate==0.6.1 | ||
json-flattener==0.1.9 | ||
jsonasobj2==1.0.4 | ||
jsonschema==4.21.1 | ||
jsonschema-specifications==2023.12.1 | ||
linkml-runtime==1.7.5 | ||
networkx==3.3 | ||
numpy==1.26.4 | ||
packaging==24.0 | ||
pandas==2.2.1 | ||
pansql==0.0.1 | ||
pbr==6.0.0 | ||
platformdirs==4.2.0 | ||
pluggy==1.4.0 | ||
prefixcommons==0.1.12 | ||
prefixmaps==0.2.3 | ||
pydantic==2.6.4 | ||
pydantic_core==2.16.3 | ||
pyparsing==3.1.2 | ||
pytest==8.1.1 | ||
pytest-logging==2015.11.4 | ||
python-dateutil==2.9.0.post0 | ||
PyTrie==0.4.0 | ||
pytz==2024.1 | ||
PyYAML==6.0.1 | ||
rdflib==7.0.0 | ||
referencing==0.34.0 | ||
requests==2.31.0 | ||
rpds-py==0.18.0 | ||
scipy==1.13.0 | ||
six==1.16.0 | ||
stevedore==5.0.0 | ||
tzdata==2023.3 | ||
virtualenv==20.20.0 | ||
sortedcontainers==2.4.0 | ||
SPARQLWrapper==2.0.0 | ||
SQLAlchemy==2.0.29 | ||
sssom==0.4.6 | ||
sssom-schema==0.15.2 | ||
stevedore==5.1.0 | ||
tomli==2.0.1 | ||
typing_extensions==4.11.0 | ||
tzdata==2024.1 | ||
urllib3==2.2.1 | ||
validators==0.28.0 | ||
virtualenv==20.25.0 | ||
virtualenv-clone==0.5.7 | ||
wrapt==1.16.0 |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,56 +1,20 @@ | ||
"""Utilities""" | ||
import os | ||
from pathlib import Path | ||
from typing import Set, Union | ||
from typing import Dict, Union | ||
|
||
import curies | ||
import pandas as pd | ||
import yaml | ||
from sssom import MappingSetDataFrame | ||
from sssom.writers import write_table | ||
|
||
|
||
def find_prefixes_in_mapping_set(source_df: pd.DataFrame) -> Set[str]: | ||
"""Find prefixes in mapping set""" | ||
df = source_df.copy() | ||
cols_with_prefixes = ['subject_id', 'object_id', 'predicate_id'] | ||
prefixes = set() | ||
for col in cols_with_prefixes: | ||
col2 = col.replace('id', 'prefix') | ||
df[col2] = df[col].apply(lambda x: x.split(':')[0] | ||
if isinstance(x, str) else x) # handles nan | ||
prefixes.update(set(df[col2].to_list())) | ||
return prefixes | ||
|
||
|
||
# todo: Add to sssom-py. Shared between, at the least, ICD11 and MedGen repos | ||
def write_sssom(df: pd.DataFrame, config_path: Union[Path, str], outpath: Union[Path, str]): | ||
"""Writes a SSSOM file with commented metadata at the top of the file. | ||
Filters only prefxes in curie_map that exist in the mapping set.""" | ||
temp_filtered_config_path = str(config_path) + '.tmp' | ||
# Load config | ||
config = yaml.safe_load(open(config_path, 'r')) | ||
# Filter curie_map | ||
prefixes: Set[str] = find_prefixes_in_mapping_set(df) | ||
config['curie_map'] = {k: v for k, v in config['curie_map'].items() if k in prefixes} | ||
# Write | ||
with open(temp_filtered_config_path, 'w') as f: | ||
yaml.dump(config, f) | ||
write_tsv_with_comments(df, temp_filtered_config_path, outpath) | ||
os.remove(temp_filtered_config_path) | ||
|
||
|
||
def write_tsv_with_comments(df: pd.DataFrame, comments_file: Union[Path, str], outpath: Union[Path, str]): | ||
"""Write a TSV with comments at the top""" | ||
# write metadata | ||
f = open(comments_file, "r") | ||
lines = f.readlines() | ||
f.close() | ||
output_lines = [] | ||
for line in lines: | ||
output_lines.append("# " + line) | ||
metadata_str = ''.join(output_lines) | ||
if os.path.exists(outpath): | ||
os.remove(outpath) | ||
f = open(outpath, 'a') | ||
f.write(metadata_str) | ||
f.close() | ||
# write data | ||
df.to_csv(outpath, index=False, sep='\t', mode='a') | ||
"""Writes a SSSOM file""" | ||
with open(config_path, 'r') as yaml_file: | ||
metadata: Dict = yaml.load(yaml_file, Loader=yaml.FullLoader) | ||
converter = curies.Converter.from_prefix_map(metadata['curie_map']) | ||
msdf: MappingSetDataFrame = MappingSetDataFrame(converter=converter, df=df, metadata=metadata) | ||
with open(outpath, 'w') as f: | ||
write_table(msdf, f) |