Skip to content

Commit

Permalink
Adding support for "extension_slots" during parsing
Browse files Browse the repository at this point in the history
I previously failed parsing in strict mode when a non-standard metadata element was encountered on mapping_set level. This here adds support for the case that someone legally specified an extension slot according to https://w3id.org/sssom/spec).
  • Loading branch information
matentzn committed Dec 7, 2024
1 parent ca01b56 commit 9847b67
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 17 deletions.
32 changes: 15 additions & 17 deletions src/sssom/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,26 +182,23 @@ def _get_seperator_symbol_from_file_path(file):
return None


def _is_check_valid_extension_slot(slot_name):
logging.warning(
f"'{slot_name}' could be a valid extension slot "
f"(https://mapping-commons.github.io/sssom/spec-model/#non-standard-slots), "
f"but the validator does not check that yet."
)
return False
def _is_check_valid_extension_slot(slot_name, meta):
extension_definitions = meta.get("extension_definitions", [])
return any(entry.get("slot_name") == slot_name for entry in extension_definitions)


def _check_irregular_metadata(sssom_metadata, meta):
def _is_irregular_metadata(metadata_list: List[Dict]):
fail_metadata = False
for m in [sssom_metadata, meta]:
for m in metadata_list:
for key in m:
if (key not in _get_sssom_schema_object().mapping_set_slots) and (
not _is_check_valid_extension_slot(key)
):
logging.warning(
f"Metadata key '{key}' is not a standard SSSOM mapping set metadata field."
)
fail_metadata = True
if key not in _get_sssom_schema_object().mapping_set_slots:
if not _is_check_valid_extension_slot(key, m):
logging.warning(
f"Metadata key '{key}' is not a standard SSSOM mapping set metadata field. See "
f"https://mapping-commons.github.io/sssom/spec-model/#non-standard-slots on how to "
f"specify additional, non-standard fields in a SSSOM file."
)
fail_metadata = True
return fail_metadata


Expand All @@ -226,6 +223,7 @@ def _check_redefined_builtin_prefixes(sssom_metadata, meta, prefix_map):
f"to the required URI expansion: {builtin_uri}. The prefix will be ignored."
)
is_valid_prefixes = False
# NOTE during refactor replace the following line by https://github.com/biopragmatics/curies/pull/136
reverse_bimap = {value: key for key, value in builtin_converter.bimap.items()}
if builtin_uri in reverse_bimap:
if builtin_prefix != reverse_bimap[builtin_uri]:
Expand Down Expand Up @@ -289,7 +287,7 @@ def parse_sssom_table(
meta = {}

is_valid_built_in_prefixes = _check_redefined_builtin_prefixes(sssom_metadata, meta, prefix_map)
is_valid_metadata = _check_irregular_metadata(sssom_metadata, meta)
is_valid_metadata = _is_irregular_metadata([sssom_metadata, meta])

if kwargs.get("strict"):
_fail_in_strict_parsing_mode(is_valid_built_in_prefixes, is_valid_metadata)
Expand Down
23 changes: 23 additions & 0 deletions tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,3 +461,26 @@ def test_strict_parsing(self):
# Make sure it parses in non-strict mode
msdf = parse_sssom_table(stream)
self.assertEqual(len(msdf.df), 2)

def test_check_irregular_metadata(self):
"""Test if irregular metadata check works according to https://w3id.org/sssom/spec."""
meta_fail = {
"licenses": "http://licen.se",
"mapping_set_id": "http://mapping.set/id1",
"ext_test": "value",
}
meta_ok = {
"license": "http://licen.se",
"mapping_set_id": "http://mapping.set/id1",
"ext_test": "value",
"extension_definitions": [{"slot_name": "ext_test"}],
}

from sssom.parsers import _is_check_valid_extension_slot, _is_irregular_metadata

is_irregular_metadata_fail_case = _is_irregular_metadata([meta_fail])
is_valid_extension = _is_check_valid_extension_slot("ext_test", meta_ok)
is_irregular_metadata_ok_case = _is_irregular_metadata([meta_ok])
self.assertTrue(is_irregular_metadata_fail_case)
self.assertTrue(is_valid_extension)
self.assertFalse(is_irregular_metadata_ok_case)

0 comments on commit 9847b67

Please sign in to comment.