diff --git a/docker/Dockerfile b/docker/Dockerfile index 2707a2896..6e4ecdcf6 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -25,7 +25,7 @@ RUN wget -O /sw/nodes.tsv.gz https://askem-mira.s3.amazonaws.com/dkg/$domain/bui neo4j-admin import --delimiter='TAB' --skip-duplicate-nodes=true --skip-bad-relationships=true --nodes /sw/nodes.tsv.gz --relationships /sw/edges.tsv.gz # Python packages -RUN python -m pip install git+https://github.com/indralab/mira.git@main#egg=mira[web,uvicorn] && \ +RUN python -m pip install git+https://github.com/indralab/mira.git@main#egg=mira[web,uvicorn,dkg-client] && \ python -m pip uninstall -y flask_bootstrap && \ python -m pip uninstall -y bootstrap_flask && \ python -m pip install bootstrap_flask diff --git a/docker/Dockerfile.local b/docker/Dockerfile.local index 4fd0673c0..abeb5b6ba 100644 --- a/docker/Dockerfile.local +++ b/docker/Dockerfile.local @@ -24,7 +24,7 @@ RUN sed -i 's/#dbms.default_listen_address/dbms.default_listen_address/' /etc/ne neo4j-admin import --delimiter='TAB' --skip-duplicate-nodes=true --skip-bad-relationships=true --nodes /sw/nodes.tsv.gz --relationships /sw/edges.tsv.gz # Python packages -RUN python -m pip install git+https://github.com/indralab/mira.git@$branch#egg=mira[web,uvicorn] && \ +RUN python -m pip install git+https://github.com/indralab/mira.git@$branch#egg=mira[web,uvicorn,dkg-client] && \ python -m pip uninstall -y flask_bootstrap && \ python -m pip uninstall -y bootstrap_flask && \ python -m pip install bootstrap_flask diff --git a/mira/dkg/client.py b/mira/dkg/client.py index ba4a23b8d..745962c56 100644 --- a/mira/dkg/client.py +++ b/mira/dkg/client.py @@ -382,13 +382,15 @@ def get_grounder(self, prefix: Union[str, List[str]]) -> "gilda.grounder.Grounde if isinstance(prefix, str): prefix = [prefix] - terms = list( - itt.chain.from_iterable( + terms = [ + term + for term in itt.chain.from_iterable( self.get_grounder_terms(p) for p in tqdm( prefix, desc="Caching grounding terms" ) ) - ) + if term.norm_text + ] return Grounder(terms) def get_node_counter(self) -> Counter: @@ -598,25 +600,29 @@ def get_terms( from gilda.process import normalize from gilda.term import Term - yield Term( - norm_text=normalize(name), - text=name, - db=prefix, - id=identifier, - entry_name=name, - status="name", - source=prefix, - ) - for synonym in synonyms or []: + norm_text = normalize(name) + if norm_text: yield Term( - norm_text=normalize(synonym), - text=synonym, + norm_text=norm_text, + text=name, db=prefix, id=identifier, entry_name=name, - status="synonym", + status="name", source=prefix, ) + for synonym in synonyms or []: + norm_text = normalize(synonym) + if norm_text: + yield Term( + norm_text=norm_text, + text=synonym, + db=prefix, + id=identifier, + entry_name=name, + status="synonym", + source=prefix, + ) def build_match_clause( diff --git a/mira/dkg/constants.py b/mira/dkg/constants.py index 57527304a..6088b6cf9 100644 --- a/mira/dkg/constants.py +++ b/mira/dkg/constants.py @@ -12,6 +12,11 @@ "xrefs:string[]", "alts:string[]", "version:string", + "property_predicates:string[]", + "property_values:string[]", + "xref_types:string[]", + "synonym_types:string[]", + "source:string", ) #: The used for the edges files in the neo4j bulk import diff --git a/setup.cfg b/setup.cfg index d394e597a..82da1fd6e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -65,6 +65,7 @@ web = gilda click neo4j + networkx pystow tabulate pygraphviz