Skip to content

Commit

Permalink
Clean up extract_iri functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Nov 6, 2023
1 parent ad70a83 commit df230e3
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 27 deletions.
38 changes: 14 additions & 24 deletions src/sssom/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def parse_file(
mapping_predicates = None
# Get list of predicates of interest.
if mapping_predicate_filter:
mapping_predicates = get_list_of_predicate_iri(mapping_predicate_filter, converter)
mapping_predicates = extract_iris(mapping_predicate_filter, converter)

# if mapping_predicates:
doc = parse_func(
Expand Down Expand Up @@ -161,39 +161,29 @@ def _merge_converter(converter: Converter, prefix_map_mode: str = None) -> Conve
raise ValueError(f"Invalid prefix map mode: {prefix_map_mode}")


def get_list_of_predicate_iri(predicate_filter: Iterable[str], converter: Converter) -> list:
"""Return a list of IRIs for predicate CURIEs passed.
:param predicate_filter: CURIE OR list of CURIEs OR file path containing the same.
:param converter: Prefix map of mapping set (possibly) containing custom prefix:IRI combination.
:return: A list of IRIs.
"""
return sorted(set(chain.from_iterable(extract_iri(p, converter) for p in predicate_filter)))


def extract_iri(input: str, converter: Converter) -> List[str]:
def extract_iris(
input: Union[str, Path, Iterable[Union[str, Path]]], converter: Converter
) -> List[str]:
"""
Recursively extracts a list of IRIs from a string or file.
:param input: CURIE OR list of CURIEs OR file path containing the same.
:param converter: Prefix map of mapping set (possibly) containing custom prefix:IRI combination.
:return: A list of IRIs.
:rtype: list
"""
if isinstance(input, (str, Path)) and os.path.isfile(input):
pred_list = Path(input).read_text().splitlines()
return sorted(set(chain.from_iterable(extract_iris(p, converter) for p in pred_list)))
if isinstance(input, list):
return sorted(set(chain.from_iterable(extract_iris(p, converter) for p in input)))
if converter.is_uri(input):
return [converter.standardize_uri(input, strict=True)]
elif converter.is_curie(input):
if converter.is_curie(input):
return [converter.expand(input, strict=True)]

elif os.path.isfile(input):
pred_list = Path(input).read_text().splitlines()
return sorted(set(chain.from_iterable(extract_iri(p, converter) for p in pred_list)))

else:
logging.warning(
f"{input} is neither a local file path nor a valid CURIE or URI w.r.t. the given converter. "
f"skipped from processing."
)
logging.warning(
f"{input} is neither a local file path nor a valid CURIE or URI w.r.t. the given converter. "
f"skipped from processing."
)
return []


Expand Down
6 changes: 3 additions & 3 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from sssom.constants import OBJECT_ID, SUBJECT_ID
from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter
from sssom.io import get_list_of_predicate_iri
from sssom.io import extract_iris
from sssom.parsers import parse_sssom_table
from sssom.util import (
MappingSetDataFrame,
Expand All @@ -33,10 +33,10 @@ def setUp(self) -> None:

def test_broken_predicate_list(self):
"""Test merging of multiple msdfs."""
predicate_filter = ["skos:relatedMatch", f"{data_dir}/predicate_list3.txt"]
predicate_filter = ["skos:relatedMatch", [f"{data_dir}/predicate_list3.txt"]]
prefix_map = {"skos": "http://www.w3.org/2004/02/skos/core#"}
converter = Converter.from_prefix_map(prefix_map)
iri_list = get_list_of_predicate_iri(converter=converter, predicate_filter=predicate_filter)
iri_list = extract_iris(predicate_filter, converter=converter)
self.assertEqual(
[
"http://www.w3.org/2004/02/skos/core#narrowMatch",
Expand Down

0 comments on commit df230e3

Please sign in to comment.