Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update type hints for prefix map merging #454

Merged
merged 6 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/sssom/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import sys
from operator import itemgetter
from pathlib import Path
from typing import Any, Callable, List, Optional, TextIO, Tuple
from typing import Any, Callable, List, Optional, TextIO, Tuple, get_args

import click
import curies
Expand All @@ -28,7 +28,7 @@

from sssom.constants import (
DEFAULT_VALIDATION_TYPES,
PREFIX_MAP_MODES,
MergeMode,
SchemaValidationType,
_get_sssom_schema_object,
)
Expand Down Expand Up @@ -172,9 +172,9 @@ def convert(input: str, output: TextIO, output_format: str):
default="metadata_only",
show_default=True,
required=True,
type=click.Choice(PREFIX_MAP_MODES, case_sensitive=False),
type=click.Choice(get_args(MergeMode), case_sensitive=False),
help="Defines whether the prefix map in the metadata should be extended or replaced with "
"the SSSOM default prefix map. Must be one of metadata_only, sssom_default_only, merged",
"the SSSOM default prefix map.",
)
@click.option(
"-p",
Expand Down Expand Up @@ -205,7 +205,7 @@ def parse(
input: str,
input_format: str,
metadata: str,
prefix_map_mode: str,
prefix_map_mode: MergeMode,
clean_prefixes: bool,
strict_clean_prefixes: bool,
output: TextIO,
Expand All @@ -218,7 +218,7 @@ def parse(
output=output,
input_format=input_format,
metadata_path=metadata,
prefix_map_mode=prefix_map_mode,
merge_mode=prefix_map_mode,
clean_prefixes=clean_prefixes,
strict_clean_prefixes=strict_clean_prefixes,
embedded_mode=embedded_mode,
Expand Down
14 changes: 5 additions & 9 deletions src/sssom/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pathlib
from enum import Enum
from functools import lru_cache
from typing import List
from typing import List, Literal

import pkg_resources
import yaml
Expand Down Expand Up @@ -37,14 +37,10 @@
]

UNKNOWN_IRI = "http://w3id.org/sssom/unknown_prefix/"
PREFIX_MAP_MODE_METADATA_ONLY = "metadata_only"
PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY = "sssom_default_only"
PREFIX_MAP_MODE_MERGED = "merged"
PREFIX_MAP_MODES = [
PREFIX_MAP_MODE_METADATA_ONLY,
PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY,
PREFIX_MAP_MODE_MERGED,
]
MergeMode = Literal["metadata_only", "sssom_default_only", "merged"]
PREFIX_MAP_MODE_METADATA_ONLY: MergeMode = "metadata_only"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Never seen this syntax. What does this do, when considering line 40? Looks redundant at first glance (twice the same assignment)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes checking that one of the constants is used properly is checked by type checking. It further builds the string enumeration into the automatic documentation generation

PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY: MergeMode = "sssom_default_only"
PREFIX_MAP_MODE_MERGED: MergeMode = "merged"
ENTITY_REFERENCE = "EntityReference"

# Slot Constants
Expand Down
29 changes: 15 additions & 14 deletions src/sssom/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
PREFIX_MAP_MODE_MERGED,
PREFIX_MAP_MODE_METADATA_ONLY,
PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY,
MergeMode,
SchemaValidationType,
)
from .context import get_converter
Expand Down Expand Up @@ -59,7 +60,7 @@ def parse_file(
output: TextIO,
input_format: Optional[str] = None,
metadata_path: Optional[str] = None,
prefix_map_mode: Optional[str] = None,
merge_mode: Optional[MergeMode] = None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All borders have fallen now, but renaming parameters to central methods like this is really annoying for users, but the change makes sense, so lets just break and mend it all now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a feeling this one is unused - it is only part of a method that should itself be private. In fact, I would rather remove this option completely and have it automatically chain on all instances.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I reverted this and removed the renames

clean_prefixes: bool = True,
strict_clean_prefixes: bool = True,
embedded_mode: bool = True,
Expand All @@ -72,17 +73,15 @@ def parse_file(
:param input_format: The string denoting the input format.
:param metadata_path: The path to a file containing the sssom metadata (including prefix_map)
to be used during parse.
:param prefix_map_mode: Defines whether the prefix map in the metadata should be extended or replaced with
the SSSOM default prefix map. Must be one of metadata_only, sssom_default_only, merged
:param merge_mode: Defines whether the prefix map in the metadata should be extended or replaced with
the SSSOM default prefix map derived from the :mod:`bioregistry`.
:param clean_prefixes: If True (default), records with unknown prefixes are removed from the SSSOM file.
:param strict_clean_prefixes: If True (default), clean_prefixes() will be in strict mode.
:param embedded_mode:If True (default), the dataframe and metadata are exported in one file (tsv), else two separate files (tsv and yaml).
:param mapping_predicate_filter: Optional list of mapping predicates or filepath containing the same.
"""
raise_for_bad_path(input_path)
metadata = get_metadata_and_prefix_map(
metadata_path=metadata_path, prefix_map_mode=prefix_map_mode
)
metadata = get_metadata_and_prefix_map(metadata_path=metadata_path, merge_mode=merge_mode)
parse_func = get_parsing_function(input_format, input_path)
mapping_predicates = None
# Get list of predicates of interest.
Expand Down Expand Up @@ -134,13 +133,15 @@ def split_file(input_path: str, output_directory: Union[str, Path]) -> None:


def get_metadata_and_prefix_map(
metadata_path: Union[None, str, Path] = None, prefix_map_mode: Optional[str] = None
metadata_path: Union[None, str, Path] = None,
*,
merge_mode: Optional[MergeMode] = None,
) -> Metadata:
"""
Load SSSOM metadata from a file, and then augments it with default prefixes.

:param metadata_path: The metadata file in YAML format
:param prefix_map_mode: one of metadata_only, sssom_default_only, merged
:param merge_mode: one of metadata_only, sssom_default_only, merged
:return: a prefix map dictionary and a metadata object dictionary
"""
if metadata_path is None:
Expand All @@ -159,20 +160,20 @@ def get_metadata_and_prefix_map(
else:
prefix_map = {}
converter = Converter.from_prefix_map(prefix_map)
converter = _merge_converter(converter, prefix_map_mode=prefix_map_mode)
converter = _merge_converter(converter, mode=merge_mode)
cthoyt marked this conversation as resolved.
Show resolved Hide resolved

return Metadata(converter=converter, metadata=metadata)


def _merge_converter(converter: Converter, prefix_map_mode: str = None) -> Converter:
def _merge_converter(converter: Converter, mode: Optional[MergeMode] = None) -> Converter:
"""Merge the metadata's converter with the default converter."""
if prefix_map_mode is None or prefix_map_mode == PREFIX_MAP_MODE_METADATA_ONLY:
if mode is None or mode == PREFIX_MAP_MODE_METADATA_ONLY:
return converter
if prefix_map_mode == PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY:
if mode == PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY:
return get_converter()
if prefix_map_mode == PREFIX_MAP_MODE_MERGED:
if mode == PREFIX_MAP_MODE_MERGED:
return curies.chain([converter, get_converter()])
raise ValueError(f"Invalid prefix map mode: {prefix_map_mode}")
raise ValueError(f"Invalid prefix map mode: {mode}")


def get_list_of_predicate_iri(predicate_filter: tuple, converter: Converter) -> list:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def test_parse_obographs_merged(self):
with open(outfile, "w") as f:
parse_file(
input_path=hp_json,
prefix_map_mode="merged",
merge_mode="merged",
clean_prefixes=True,
input_format="obographs-json",
metadata_path=hp_meta,
Expand Down
Loading