From d3338027975d55a3dc82ebada4e706f13fd6d624 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 9 Oct 2023 13:19:43 +0200 Subject: [PATCH] Directly unpack metadata tuple (#453) This PR makes it much more direct how the Metadata tuple object is unpacked. Additionally, the need to expose a "prefix_map" is no longer there, so this is removed. --- src/sssom/io.py | 12 ++++++------ src/sssom/parsers.py | 20 ++++++++++---------- src/sssom/typehints.py | 5 ----- tests/test_parsers.py | 32 +++++++++++++++++--------------- 4 files changed, 33 insertions(+), 36 deletions(-) diff --git a/src/sssom/io.py b/src/sssom/io.py index 42777221..d90fe2d1 100644 --- a/src/sssom/io.py +++ b/src/sssom/io.py @@ -80,27 +80,27 @@ def parse_file( :param mapping_predicate_filter: Optional list of mapping predicates or filepath containing the same. """ raise_for_bad_path(input_path) - metadata = get_metadata_and_prefix_map( + converter, meta = get_metadata_and_prefix_map( metadata_path=metadata_path, prefix_map_mode=prefix_map_mode ) parse_func = get_parsing_function(input_format, input_path) mapping_predicates = None # Get list of predicates of interest. if mapping_predicate_filter: - mapping_predicates = get_list_of_predicate_iri(mapping_predicate_filter, metadata.converter) + mapping_predicates = get_list_of_predicate_iri(mapping_predicate_filter, converter) # if mapping_predicates: doc = parse_func( input_path, - prefix_map=metadata.prefix_map, - meta=metadata.metadata, + prefix_map=converter, + meta=meta, mapping_predicates=mapping_predicates, ) # else: # doc = parse_func( # input_path, - # prefix_map=metadata.prefix_map, - # meta=metadata.metadata, + # prefix_map=converter, + # meta=meta, # ) if clean_prefixes: # We do this because we got a lot of prefixes from the default SSSOM prefixes! diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py index 8b58ec00..88414f26 100644 --- a/src/sssom/parsers.py +++ b/src/sssom/parsers.py @@ -233,11 +233,11 @@ def parse_sssom_rdf( ) -> MappingSetDataFrame: """Parse a TSV to a :class:`MappingSetDocument` to a :class:`MappingSetDataFrame`.""" raise_for_bad_path(file_path) - metadata = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta) + converter, meta = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta) g = Graph() g.parse(file_path, format=serialisation) - msdf = from_sssom_rdf(g, prefix_map=metadata.prefix_map, meta=metadata.metadata) + msdf = from_sssom_rdf(g, prefix_map=converter, meta=meta) # df: pd.DataFrame = msdf.df # if mapping_predicates and not df.empty(): # msdf.df = df[df["predicate_id"].isin(mapping_predicates)] @@ -253,11 +253,11 @@ def parse_sssom_json( ) -> MappingSetDataFrame: """Parse a TSV to a :class:`MappingSetDocument` to a :class`MappingSetDataFrame`.""" raise_for_bad_path(file_path) - metadata = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta) + converter, meta = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta) with open(file_path) as json_file: jsondoc = json.load(json_file) - msdf = from_sssom_json(jsondoc=jsondoc, prefix_map=metadata.prefix_map, meta=metadata.metadata) + msdf = from_sssom_json(jsondoc=jsondoc, prefix_map=converter, meta=meta) # df: pd.DataFrame = msdf.df # if mapping_predicates and not df.empty(): # msdf.df = df[df["predicate_id"].isin(mapping_predicates)] @@ -283,15 +283,15 @@ def parse_obographs_json( """ raise_for_bad_path(file_path) - _xmetadata = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta) + converter, meta = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta) with open(file_path) as json_file: jsondoc = json.load(json_file) return from_obographs( jsondoc, - prefix_map=_xmetadata.prefix_map, - meta=_xmetadata.metadata, + prefix_map=converter, + meta=meta, mapping_predicates=mapping_predicates, ) @@ -354,13 +354,13 @@ def parse_alignment_xml( """Parse a TSV -> MappingSetDocument -> MappingSetDataFrame.""" raise_for_bad_path(file_path) - metadata = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta) + converter, meta = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta) logging.info("Loading from alignment API") xmldoc = minidom.parse(file_path) msdf = from_alignment_minidom( xmldoc, - prefix_map=metadata.prefix_map, - meta=metadata.metadata, + prefix_map=converter, + meta=meta, mapping_predicates=mapping_predicates, ) return msdf diff --git a/src/sssom/typehints.py b/src/sssom/typehints.py index 18538a44..4283f773 100644 --- a/src/sssom/typehints.py +++ b/src/sssom/typehints.py @@ -27,11 +27,6 @@ class Metadata(NamedTuple): converter: Converter metadata: MetadataType - @property - def prefix_map(self): - """Get the bimap.""" - return self.converter.bimap - @classmethod def default(cls): """Get default metadata.""" diff --git a/tests/test_parsers.py b/tests/test_parsers.py index ab245416..7adb90a7 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -13,10 +13,11 @@ import numpy as np import pandas as pd import yaml +from curies import Converter from rdflib import Graph from sssom.constants import CURIE_MAP, DEFAULT_LICENSE, SSSOM_URI_PREFIX -from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter +from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter, get_converter from sssom.io import parse_file from sssom.parsers import ( _open_input, @@ -28,7 +29,7 @@ from_sssom_rdf, parse_sssom_table, ) -from sssom.typehints import Metadata +from sssom.typehints import get_default_metadata from sssom.util import MappingSetDataFrame, sort_df_rows_columns from sssom.writers import write_table from tests.test_data import data_dir as test_data_dir @@ -65,12 +66,13 @@ def setUp(self) -> None: with open(f"{test_data_dir}/basic-meta-external.yml") as file: df_meta = yaml.safe_load(file) - self.df_prefix_map = df_meta.pop(CURIE_MAP) + self.df_converter = Converter.from_prefix_map(df_meta.pop(CURIE_MAP)) self.df_meta = df_meta self.alignmentxml_file = f"{test_data_dir}/oaei-ordo-hp.rdf" self.alignmentxml = minidom.parse(self.alignmentxml_file) - self.metadata = Metadata.default() + self.metadata = get_default_metadata() + self.converter = get_converter() def test_parse_sssom_dataframe_from_file(self): """Test parsing a TSV.""" @@ -117,21 +119,23 @@ def test_parse_obographs(self): """Test parsing OBO Graph JSON.""" msdf = from_obographs( jsondoc=self.obographs, - prefix_map=self.metadata.prefix_map, - meta=self.metadata.metadata, + prefix_map=self.converter, + meta=self.metadata, ) path = os.path.join(test_out_dir, "test_parse_obographs.tsv") with open(path, "w") as file: write_table(msdf, file) self.assertEqual( + # this number went up from 8099 when the curies.Converter was introduced + # since it was able to handle CURIE prefix and URI prefix synonyms + 8488, len(msdf.df), - 8099, f"{self.obographs_file} has the wrong number of mappings.", ) def test_parse_tsv(self): """Test parsing TSV.""" - msdf = from_sssom_dataframe(df=self.df, prefix_map=self.df_prefix_map, meta=self.df_meta) + msdf = from_sssom_dataframe(df=self.df, prefix_map=self.df_converter, meta=self.df_meta) path = os.path.join(test_out_dir, "test_parse_tsv.tsv") with open(path, "w") as file: write_table(msdf, file) @@ -145,8 +149,8 @@ def test_parse_alignment_minidom(self): """Test parsing an alignment XML.""" msdf = from_alignment_minidom( dom=self.alignmentxml, - prefix_map=self.metadata.prefix_map, - meta=self.metadata.metadata, + prefix_map=self.converter, + meta=self.metadata, ) path = os.path.join(test_out_dir, "test_parse_alignment_minidom.tsv") with open(path, "w") as file: @@ -244,9 +248,7 @@ def test_parse_alignment_xml(self): def test_parse_sssom_rdf(self): """Test parsing RDF.""" - msdf = from_sssom_rdf( - g=self.rdf_graph, prefix_map=self.df_prefix_map, meta=self.metadata.metadata - ) + msdf = from_sssom_rdf(g=self.rdf_graph, prefix_map=self.df_converter, meta=self.metadata) path = os.path.join(test_out_dir, "test_parse_sssom_rdf.tsv") with open(path, "w") as file: write_table(msdf, file) @@ -260,8 +262,8 @@ def test_parse_sssom_json(self): """Test parsing JSON.""" msdf = from_sssom_json( jsondoc=self.json, - prefix_map=self.df_prefix_map, - meta=self.metadata.metadata, + prefix_map=self.df_converter, + meta=self.metadata, ) path = os.path.join(test_out_dir, "test_parse_sssom_json.tsv") with open(path, "w") as file: