From af76b58246f87c439d04c28713589f68e2bcd0d4 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 5 Jul 2023 12:46:59 +0200 Subject: [PATCH] Additional updates --- mira/dkg/construct.py | 17 +++++++++++++---- mira/dkg/construct_registry.py | 1 - mira/dkg/resources/probonto.py | 10 ++++++++-- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py index 631e25415..e1a62dd0f 100644 --- a/mira/dkg/construct.py +++ b/mira/dkg/construct.py @@ -464,7 +464,7 @@ def construct( property_values="", xref_types="", # TODO synonym_types=";".join( - synonym.type or "skos:exactMatch" for synonym in term.synonyms or [] + synonym.type.curie for synonym in term.synonyms or [] ), ) for parent in term.parents: @@ -781,6 +781,16 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str: edge.pred for edge in graph.edges if edge.pred.startswith("http") ) + clean_edges = ( + edge + for edge in graph.edges + if ( + edge.subject is not None + and edge.predicate is not None + and edge.object is not None + and edge.object.curie not in OBSOLETE + ) + ) edges.extend( ( edge.sub, @@ -792,9 +802,8 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str: version or "", ) for edge in tqdm( - sorted(graph.edges, key=methodcaller("as_tuple")), unit="edge", unit_scale=True + sorted(clean_edges, key=methodcaller("as_tuple")), unit="edge", unit_scale=True ) - if edge.obj not in OBSOLETE ) for sub, obj, pred_label, pred, *_ in edges: @@ -970,7 +979,7 @@ def get_node_info(term: pyobo.Term, type: EntityType = "class"): property_values="", xref_types="", synonym_types=";".join( - synonym.type or "skos:exactMatch" for synonym in term.synonyms or [] + synonym.type.curie for synonym in term.synonyms or [] ), ) diff --git a/mira/dkg/construct_registry.py b/mira/dkg/construct_registry.py index 1bfe368da..bde7c12bd 100644 --- a/mira/dkg/construct_registry.py +++ b/mira/dkg/construct_registry.py @@ -9,7 +9,6 @@ from typing import Optional, Set import bioregistry -import bioregistry.app.impl import click from bioregistry import Manager from tqdm import tqdm diff --git a/mira/dkg/resources/probonto.py b/mira/dkg/resources/probonto.py index 187075340..3bc71dafb 100644 --- a/mira/dkg/resources/probonto.py +++ b/mira/dkg/resources/probonto.py @@ -1,9 +1,12 @@ import json from collections import defaultdict +from pathlib import Path import bioontologies import rdflib +HERE = Path(__file__).parent.resolve() +PROBONTO_PATH = HERE.joinpath("probonto.json") def get_data_properties(rdf_graph: rdflib.Graph, identifier): return { @@ -24,7 +27,10 @@ def get_instances(obo_graph, probonto_identifier: str): ] -def get_probonto_terms(): +def get_probonto_terms(*, refresh: bool = False): + if PROBONTO_PATH.is_file() and not refresh: + return json.loads(PROBONTO_PATH.read_text()) + obo_graph = ( bioontologies.get_obograph_by_prefix("probonto") .guess("probonto") @@ -96,7 +102,7 @@ def get_probonto_terms(): def main(): results = get_probonto_terms() - with open("probonto.json", "w") as file: + with open(PROBONTO_PATH, "w") as file: json.dump(results, file, indent=2, ensure_ascii=False)