Skip to content

Commit

Permalink
Merge pull request #128 from monarch-initiative/abbrevs
Browse files Browse the repository at this point in the history
The symbol value from "Preferred Title; symbol" should have a synonym type of abbreviation
  • Loading branch information
joeflack4 authored Sep 10, 2024
2 parents db4ddd2 + 4d18859 commit 0f30e3a
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 25 deletions.
65 changes: 40 additions & 25 deletions omim2obo/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,53 +164,68 @@ def omim2obo(use_cache: bool = False):
continue

# - Non-deprecated
omim_type, pref_label, alt_labels, inc_labels = omim_type_and_titles[omim_id]
label = pref_label
# Parse titles
omim_type, pref_labels_str, alt_labels, inc_labels = omim_type_and_titles[omim_id]
other_labels = []
cleaned_inc_labels = []
label_endswith_included_alt = False
label_endswith_included_inc = False
pref_labels: List[str] = [x.strip() for x in pref_labels_str.split(';')]
pref_title: str = pref_labels[0]
pref_symbols: List[str] = pref_labels[1:]
if alt_labels:
cleaned_alt_labels, label_endswith_included_alt = get_alt_labels(alt_labels)
other_labels += cleaned_alt_labels
if inc_labels:
cleaned_inc_labels, label_endswith_included_inc = get_alt_labels(inc_labels)
# other_labels += cleaned_inc_labels # deactivated 7/2024 in favor of alternative for tagging 'included'

included_detected_comment = "This term has one or more labels that end with ', INCLUDED'."
if label_endswith_included_alt or label_endswith_included_inc:
graph.add((omim_uri, RDFS['comment'], Literal(included_detected_comment)))

use_abbrev_over_label = False
abbrev = label.split(';')[1].strip() if ';' in label else None
# Special cases depending on OMIM term type
is_gene = omim_type == OmimType.GENE or omim_type == OmimType.HAS_AFFECTED_FEATURE
if omim_type == OmimType.HERITABLE_PHENOTYPIC_MARKER: # %
graph.add((omim_uri, BIOLINK['category'], BIOLINK['Disease']))
elif omim_type == OmimType.GENE or omim_type == OmimType.HAS_AFFECTED_FEATURE: # * or +
use_abbrev_over_label = True
elif is_gene: # * or +
graph.add((omim_uri, RDFS.subClassOf, SO['0000704'])) # gene
graph.add((omim_uri, MONDO.exclusionReason, MONDO.nonDisease))
graph.add((omim_uri, BIOLINK['category'], BIOLINK['Gene']))
elif omim_type == OmimType.PHENOTYPE:
graph.add((omim_uri, BIOLINK['category'], BIOLINK['Disease'])) # phenotype ~= disease
elif omim_type == OmimType.SUSPECTED:
graph.add((omim_uri, MONDO.exclusionReason, MONDO.excludeTrait))
else:
pass

if use_abbrev_over_label and abbrev:
graph.add((omim_uri, RDFS.label, Literal(abbrev)))
# Alternative rdfs:label for genes
if is_gene and pref_symbols:
gene_label_err = 'Warning: Only 1 symbol picked for label for gene term, but there were 2 to choose' \
f'from. Unsure which is best. Picking the first.\nhttps://omim.org/entry/{omim_id} - {pref_symbols}'
if len(pref_symbols) > 1:
LOG.warning(gene_label_err) # todo: decide the best way to handle these situations
graph.add((omim_uri, RDFS.label, Literal(pref_symbols[0])))
else:
graph.add((omim_uri, RDFS.label, Literal(label_cleaner.clean(label))))

exact_labels = [s.strip() for s in label.split(';')]
# the last string is an abbreviation. Add OWL reification. See issue #2
if len(exact_labels) > 1:
abbr = exact_labels.pop()
graph.add((omim_uri, oboInOwl.hasExactSynonym, Literal(abbr)))
for exact_label in exact_labels:
graph.add((omim_uri, oboInOwl.hasExactSynonym, Literal(label_cleaner.clean(exact_label, abbrev))))
for label in other_labels:
graph.add((omim_uri, oboInOwl.hasExactSynonym, Literal(label_cleaner.clean(label, abbrev))))
graph.add((omim_uri, RDFS.label, Literal(label_cleaner.clean(pref_title))))

# todo: .clean()/.cleanup_label() 2nd param `explicit_abbrev` should be List[str] instead of str. And below,
# should pass all symbols/abbrevs from each of preferred, alt, included each time it is called. If no symbols
# for given term, should pass empty list. See: https://github.com/monarch-initiative/omim/issues/129
abbrev: Union[str, None] = None if not pref_symbols else pref_symbols[0]

# Add synonyms
graph.add((omim_uri, oboInOwl.hasExactSynonym, Literal(label_cleaner.clean(pref_title, abbrev))))
for alt_label in other_labels:
graph.add((omim_uri, oboInOwl.hasExactSynonym, Literal(label_cleaner.clean(alt_label, abbrev))))
for abbreviation in pref_symbols:
graph.add((omim_uri, oboInOwl.hasExactSynonym, Literal(abbreviation)))
# Reify on abbreviations. See: https://github.com/monarch-initiative/omim/issues/2
axiom = BNode()
graph.add((axiom, RDF.type, OWL.Axiom))
graph.add((axiom, OWL.annotatedSource, omim_uri))
graph.add((axiom, OWL.annotatedProperty, oboInOwl.hasExactSynonym))
graph.add((axiom, OWL.annotatedTarget, Literal(abbreviation)))
graph.add((axiom, OBOINOWL.hasSynonymType, MONDONS.abbreviation))

# Add 'included' entry properties
included_detected_comment = "This term has one or more labels that end with ', INCLUDED'."
if label_endswith_included_alt or label_endswith_included_inc:
graph.add((omim_uri, RDFS['comment'], Literal(included_detected_comment)))
for included_label in cleaned_inc_labels:
graph.add((omim_uri, URIRef(INCLUDED_URI), Literal(label_cleaner.clean(included_label, abbrev))))

Expand Down
1 change: 1 addition & 0 deletions omim2obo/parsers/omim_entry_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def _detect_abbreviations(
return replacements


# todo: explicit_abbrev: Change to List[str]. See: https://github.com/monarch-initiative/omim/issues/129
def cleanup_label(
label: str,
explicit_abbrev: str = None,
Expand Down

0 comments on commit 0f30e3a

Please sign in to comment.