diff --git a/src/sssom/__init__.py b/src/sssom/__init__.py index 80b1c1a2..e8d6ad16 100644 --- a/src/sssom/__init__.py +++ b/src/sssom/__init__.py @@ -9,6 +9,7 @@ from sssom_schema import Mapping, MappingSet, slots # noqa:401 +from sssom.io import get_metadata_and_prefix_map # noqa:401 from sssom.sssom_document import MappingSetDocument # noqa:401 from sssom.util import ( # noqa:401 MappingSetDataFrame, @@ -20,3 +21,5 @@ parse, reconcile_prefix_and_data, ) + +from .constants import generate_mapping_set_id, get_default_metadata # noqa:401 diff --git a/src/sssom/constants.py b/src/sssom/constants.py index 05723f89..7b4f8e42 100644 --- a/src/sssom/constants.py +++ b/src/sssom/constants.py @@ -1,9 +1,10 @@ """Constants.""" import pathlib +import uuid from enum import Enum from functools import lru_cache -from typing import List +from typing import Any, Dict, List import pkg_resources import yaml @@ -271,3 +272,41 @@ def _get_sssom_schema_object() -> SSSOMSchemaView: SSSOM_URI_PREFIX = "https://w3id.org/sssom/" DEFAULT_LICENSE = f"{SSSOM_URI_PREFIX}license/unspecified" + +#: The type for metadata that gets passed around in many places +MetadataType = Dict[str, Any] + + +def generate_mapping_set_id() -> str: + """Generate a mapping set ID.""" + return f"{SSSOM_URI_PREFIX}mappings/{uuid.uuid4()}" + + +def get_default_metadata() -> MetadataType: + """Get default metadata. + + :returns: A metadata dictionary containing a default + license with value :data:`DEFAULT_LICENSE` and an + auto-generated mapping set ID + + If you want to combine some metadata you loaded + but ensure that there is also default metadata, + the best tool is :class:`collections.ChainMap`. + You can do: + + .. code-block:: python + + my_metadata: dict | None = ... + + from collections import ChainMap + from sssom import get_default_metadata + + metadata = dict(ChainMap( + my_metadata or {}, + get_default_metadata() + )) + """ + return { + "mapping_set_id": generate_mapping_set_id(), + "license": DEFAULT_LICENSE, + } diff --git a/src/sssom/io.py b/src/sssom/io.py index d90fe2d1..648e131c 100644 --- a/src/sssom/io.py +++ b/src/sssom/io.py @@ -3,8 +3,9 @@ import logging import os import re +from collections import ChainMap from pathlib import Path -from typing import List, Optional, TextIO, Union +from typing import List, Optional, TextIO, Tuple, Union import curies import pandas as pd @@ -16,15 +17,15 @@ from .constants import ( CURIE_MAP, - DEFAULT_LICENSE, PREFIX_MAP_MODE_MERGED, PREFIX_MAP_MODE_METADATA_ONLY, PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY, + MetadataType, SchemaValidationType, + get_default_metadata, ) from .context import get_converter from .parsers import get_parsing_function, parse_sssom_table, split_dataframe -from .typehints import Metadata, generate_mapping_set_id from .util import ( MappingSetDataFrame, are_params_slots, @@ -134,34 +135,25 @@ def split_file(input_path: str, output_directory: Union[str, Path]) -> None: def get_metadata_and_prefix_map( - metadata_path: Union[None, str, Path] = None, prefix_map_mode: Optional[str] = None -) -> Metadata: + metadata_path: Union[None, str, Path] = None, *, prefix_map_mode: Optional[str] = None +) -> Tuple[Converter, MetadataType]: """ - Load SSSOM metadata from a file, and then augments it with default prefixes. + Load SSSOM metadata from a YAML file, and then augment it with default prefixes. :param metadata_path: The metadata file in YAML format :param prefix_map_mode: one of metadata_only, sssom_default_only, merged - :return: a prefix map dictionary and a metadata object dictionary + :return: A converter and remaining metadata from the YAML file """ if metadata_path is None: - return Metadata.default() + return get_converter(), get_default_metadata() with Path(metadata_path).resolve().open() as file: metadata = yaml.safe_load(file) - if not metadata.get("mapping_set_id"): - metadata["mapping_set_id"] = generate_mapping_set_id() - if not metadata.get("license"): - metadata["license"] = DEFAULT_LICENSE - logging.warning(f"No License provided, using {DEFAULT_LICENSE}") - - if CURIE_MAP in metadata: - prefix_map = metadata.pop(CURIE_MAP) - else: - prefix_map = {} - converter = Converter.from_prefix_map(prefix_map) - converter = _merge_converter(converter, prefix_map_mode=prefix_map_mode) - return Metadata(converter=converter, metadata=metadata) + metadata = dict(ChainMap(metadata, get_default_metadata())) + converter = Converter.from_prefix_map(metadata.pop(CURIE_MAP, {})) + converter = _merge_converter(converter, prefix_map_mode=prefix_map_mode) + return converter, metadata def _merge_converter(converter: Converter, prefix_map_mode: str = None) -> Converter: diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py index 88414f26..cda463ce 100644 --- a/src/sssom/parsers.py +++ b/src/sssom/parsers.py @@ -8,7 +8,7 @@ import typing from collections import ChainMap, Counter from pathlib import Path -from typing import Any, Callable, Dict, Iterable, List, Optional, TextIO, Union, cast +from typing import Any, Callable, Dict, Iterable, List, Optional, TextIO, Tuple, Union, cast from xml.dom import Node, minidom from xml.dom.minidom import Document @@ -48,12 +48,13 @@ SUBJECT_LABEL, SUBJECT_SOURCE, SUBJECT_SOURCE_ID, + MetadataType, _get_sssom_schema_object, + get_default_metadata, ) from .context import ConverterHint, _get_built_in_prefix_map, ensure_converter from .sssom_document import MappingSetDocument -from .typehints import Metadata, MetadataType, get_default_metadata from .util import ( SSSOM_DEFAULT_RDF_SERIALISATION, URI_SSSOM_MAPPINGS, @@ -298,17 +299,17 @@ def parse_obographs_json( def _get_prefix_map_and_metadata( prefix_map: ConverterHint = None, meta: Optional[MetadataType] = None -) -> Metadata: - if prefix_map and meta and CURIE_MAP in meta: - logging.info( - "Prefix map provided as parameter, but SSSOM file provides its own prefix map. " - "Prefix map provided externally is disregarded in favour of the prefix map in the SSSOM file." - ) - prefix_map = meta[CURIE_MAP] - converter = ensure_converter(prefix_map) +) -> Tuple[Converter, MetadataType]: if meta is None: - meta = Metadata.default().metadata - return Metadata(converter=converter, metadata=meta) + meta = get_default_metadata() + converter = curies.chain( + [ + _get_built_in_prefix_map(), + Converter.from_prefix_map(meta.pop(CURIE_MAP, {})), + ensure_converter(prefix_map, use_defaults=False), + ] + ) + return converter, meta def _address_multivalued_slot(k: str, v: Any) -> Union[str, List[str]]: diff --git a/src/sssom/sssom_document.py b/src/sssom/sssom_document.py index e5013b36..db0b8633 100644 --- a/src/sssom/sssom_document.py +++ b/src/sssom/sssom_document.py @@ -1,12 +1,11 @@ """Additional SSSOM object models.""" from dataclasses import dataclass +from typing import Dict from curies import Converter from sssom_schema import MappingSet -from .typehints import PrefixMap - __all__ = [ "MappingSetDocument", ] @@ -28,6 +27,6 @@ class MappingSetDocument: converter: Converter @property - def prefix_map(self) -> PrefixMap: + def prefix_map(self) -> Dict[str, str]: """Get a prefix map.""" return dict(self.converter.bimap) diff --git a/src/sssom/typehints.py b/src/sssom/typehints.py deleted file mode 100644 index 4283f773..00000000 --- a/src/sssom/typehints.py +++ /dev/null @@ -1,51 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Type hints for SSSOM.""" - -import uuid -from typing import Any, Dict, NamedTuple - -from curies import Converter - -from sssom.constants import DEFAULT_LICENSE, SSSOM_URI_PREFIX - -__all__ = [ - "PrefixMap", - "MetadataType", - "Metadata", -] - -PrefixMap = Dict[str, str] - -#: TODO replace this with something more specific -MetadataType = Dict[str, Any] - - -class Metadata(NamedTuple): - """A pair of a prefix map and associated metadata.""" - - converter: Converter - metadata: MetadataType - - @classmethod - def default(cls): - """Get default metadata.""" - from .context import get_converter - - return cls( - converter=get_converter(), - metadata=get_default_metadata(), - ) - - -def generate_mapping_set_id() -> str: - """Generate a mapping set ID.""" - return f"{SSSOM_URI_PREFIX}mappings/{uuid.uuid4()}" - - -def get_default_metadata() -> MetadataType: - """Get default metadata.""" - return { - "mapping_set_id": generate_mapping_set_id(), - "license": DEFAULT_LICENSE, - } diff --git a/src/sssom/util.py b/src/sssom/util.py index bd457c25..ce72f628 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -57,7 +57,9 @@ SUBJECT_LABEL, SUBJECT_SOURCE, UNKNOWN_IRI, + MetadataType, _get_sssom_schema_object, + get_default_metadata, ) from .context import ( SSSOM_BUILT_IN_PREFIXES, @@ -67,7 +69,6 @@ get_converter, ) from .sssom_document import MappingSetDocument -from .typehints import MetadataType, get_default_metadata logging = _logging.getLogger(__name__) diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 7adb90a7..335ae68b 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -16,7 +16,7 @@ from curies import Converter from rdflib import Graph -from sssom.constants import CURIE_MAP, DEFAULT_LICENSE, SSSOM_URI_PREFIX +from sssom.constants import CURIE_MAP, DEFAULT_LICENSE, SSSOM_URI_PREFIX, get_default_metadata from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter, get_converter from sssom.io import parse_file from sssom.parsers import ( @@ -29,7 +29,6 @@ from_sssom_rdf, parse_sssom_table, ) -from sssom.typehints import get_default_metadata from sssom.util import MappingSetDataFrame, sort_df_rows_columns from sssom.writers import write_table from tests.test_data import data_dir as test_data_dir