Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup Metadata usages #447

Merged
merged 29 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/sssom/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from sssom_schema import Mapping, MappingSet, slots # noqa:401

from sssom.io import get_metadata_and_prefix_map # noqa:401
from sssom.sssom_document import MappingSetDocument # noqa:401
from sssom.util import ( # noqa:401
MappingSetDataFrame,
Expand All @@ -20,3 +21,5 @@
parse,
reconcile_prefix_and_data,
)

from .constants import generate_mapping_set_id, get_default_metadata # noqa:401
41 changes: 40 additions & 1 deletion src/sssom/constants.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"""Constants."""

import pathlib
import uuid
from enum import Enum
from functools import lru_cache
from typing import List
from typing import Any, Dict, List

import pkg_resources
import yaml
Expand Down Expand Up @@ -271,3 +272,41 @@ def _get_sssom_schema_object() -> SSSOMSchemaView:

SSSOM_URI_PREFIX = "https://w3id.org/sssom/"
DEFAULT_LICENSE = f"{SSSOM_URI_PREFIX}license/unspecified"

#: The type for metadata that gets passed around in many places
MetadataType = Dict[str, Any]


def generate_mapping_set_id() -> str:
matentzn marked this conversation as resolved.
Show resolved Hide resolved
"""Generate a mapping set ID."""
return f"{SSSOM_URI_PREFIX}mappings/{uuid.uuid4()}"


def get_default_metadata() -> MetadataType:
"""Get default metadata.

:returns: A metadata dictionary containing a default
license with value :data:`DEFAULT_LICENSE` and an
auto-generated mapping set ID

If you want to combine some metadata you loaded
but ensure that there is also default metadata,
the best tool is :class:`collections.ChainMap`.
You can do:

.. code-block:: python

my_metadata: dict | None = ...

from collections import ChainMap
from sssom import get_default_metadata

metadata = dict(ChainMap(
my_metadata or {},
get_default_metadata()
))
"""
return {
"mapping_set_id": generate_mapping_set_id(),
"license": DEFAULT_LICENSE,
}
34 changes: 13 additions & 21 deletions src/sssom/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import logging
import os
import re
from collections import ChainMap
from pathlib import Path
from typing import List, Optional, TextIO, Union
from typing import List, Optional, TextIO, Tuple, Union

import curies
import pandas as pd
Expand All @@ -16,15 +17,15 @@

from .constants import (
CURIE_MAP,
DEFAULT_LICENSE,
PREFIX_MAP_MODE_MERGED,
PREFIX_MAP_MODE_METADATA_ONLY,
PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY,
MetadataType,
SchemaValidationType,
get_default_metadata,
)
from .context import get_converter
from .parsers import get_parsing_function, parse_sssom_table, split_dataframe
from .typehints import Metadata, generate_mapping_set_id
from .util import (
MappingSetDataFrame,
are_params_slots,
Expand Down Expand Up @@ -134,34 +135,25 @@ def split_file(input_path: str, output_directory: Union[str, Path]) -> None:


def get_metadata_and_prefix_map(
metadata_path: Union[None, str, Path] = None, prefix_map_mode: Optional[str] = None
) -> Metadata:
metadata_path: Union[None, str, Path] = None, *, prefix_map_mode: Optional[str] = None
) -> Tuple[Converter, MetadataType]:
"""
Load SSSOM metadata from a file, and then augments it with default prefixes.
Load SSSOM metadata from a YAML file, and then augment it with default prefixes.

:param metadata_path: The metadata file in YAML format
:param prefix_map_mode: one of metadata_only, sssom_default_only, merged
:return: a prefix map dictionary and a metadata object dictionary
:return: A converter and remaining metadata from the YAML file
"""
if metadata_path is None:
return Metadata.default()
return get_converter(), get_default_metadata()

with Path(metadata_path).resolve().open() as file:
metadata = yaml.safe_load(file)
if not metadata.get("mapping_set_id"):
metadata["mapping_set_id"] = generate_mapping_set_id()
if not metadata.get("license"):
metadata["license"] = DEFAULT_LICENSE
logging.warning(f"No License provided, using {DEFAULT_LICENSE}")
matentzn marked this conversation as resolved.
Show resolved Hide resolved

if CURIE_MAP in metadata:
prefix_map = metadata.pop(CURIE_MAP)
else:
prefix_map = {}
converter = Converter.from_prefix_map(prefix_map)
converter = _merge_converter(converter, prefix_map_mode=prefix_map_mode)

return Metadata(converter=converter, metadata=metadata)
metadata = dict(ChainMap(metadata, get_default_metadata()))
converter = Converter.from_prefix_map(metadata.pop(CURIE_MAP, {}))
converter = _merge_converter(converter, prefix_map_mode=prefix_map_mode)
return converter, metadata


def _merge_converter(converter: Converter, prefix_map_mode: str = None) -> Converter:
Expand Down
25 changes: 13 additions & 12 deletions src/sssom/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import typing
from collections import ChainMap, Counter
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, TextIO, Union, cast
from typing import Any, Callable, Dict, Iterable, List, Optional, TextIO, Tuple, Union, cast
from xml.dom import Node, minidom
from xml.dom.minidom import Document

Expand Down Expand Up @@ -48,12 +48,13 @@
SUBJECT_LABEL,
SUBJECT_SOURCE,
SUBJECT_SOURCE_ID,
MetadataType,
_get_sssom_schema_object,
get_default_metadata,
)

from .context import ConverterHint, _get_built_in_prefix_map, ensure_converter
from .sssom_document import MappingSetDocument
from .typehints import Metadata, MetadataType, get_default_metadata
from .util import (
SSSOM_DEFAULT_RDF_SERIALISATION,
URI_SSSOM_MAPPINGS,
Expand Down Expand Up @@ -298,17 +299,17 @@ def parse_obographs_json(

def _get_prefix_map_and_metadata(
prefix_map: ConverterHint = None, meta: Optional[MetadataType] = None
) -> Metadata:
if prefix_map and meta and CURIE_MAP in meta:
logging.info(
"Prefix map provided as parameter, but SSSOM file provides its own prefix map. "
"Prefix map provided externally is disregarded in favour of the prefix map in the SSSOM file."
)
prefix_map = meta[CURIE_MAP]
converter = ensure_converter(prefix_map)
) -> Tuple[Converter, MetadataType]:
if meta is None:
meta = Metadata.default().metadata
return Metadata(converter=converter, metadata=meta)
meta = get_default_metadata()
converter = curies.chain(
[
_get_built_in_prefix_map(),
Converter.from_prefix_map(meta.pop(CURIE_MAP, {})),
ensure_converter(prefix_map, use_defaults=False),
]
)
return converter, meta


def _address_multivalued_slot(k: str, v: Any) -> Union[str, List[str]]:
Expand Down
5 changes: 2 additions & 3 deletions src/sssom/sssom_document.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""Additional SSSOM object models."""

from dataclasses import dataclass
from typing import Dict

from curies import Converter
from sssom_schema import MappingSet

from .typehints import PrefixMap

__all__ = [
"MappingSetDocument",
]
Expand All @@ -28,6 +27,6 @@ class MappingSetDocument:
converter: Converter

@property
def prefix_map(self) -> PrefixMap:
def prefix_map(self) -> Dict[str, str]:
"""Get a prefix map."""
return dict(self.converter.bimap)
51 changes: 0 additions & 51 deletions src/sssom/typehints.py

This file was deleted.

3 changes: 2 additions & 1 deletion src/sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@
SUBJECT_LABEL,
SUBJECT_SOURCE,
UNKNOWN_IRI,
MetadataType,
_get_sssom_schema_object,
get_default_metadata,
)
from .context import (
SSSOM_BUILT_IN_PREFIXES,
Expand All @@ -67,7 +69,6 @@
get_converter,
)
from .sssom_document import MappingSetDocument
from .typehints import MetadataType, get_default_metadata

logging = _logging.getLogger(__name__)

Expand Down
3 changes: 1 addition & 2 deletions tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from curies import Converter
from rdflib import Graph

from sssom.constants import CURIE_MAP, DEFAULT_LICENSE, SSSOM_URI_PREFIX
from sssom.constants import CURIE_MAP, DEFAULT_LICENSE, SSSOM_URI_PREFIX, get_default_metadata
from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter, get_converter
from sssom.io import parse_file
from sssom.parsers import (
Expand All @@ -29,7 +29,6 @@
from_sssom_rdf,
parse_sssom_table,
)
from sssom.typehints import get_default_metadata
from sssom.util import MappingSetDataFrame, sort_df_rows_columns
from sssom.writers import write_table
from tests.test_data import data_dir as test_data_dir
Expand Down
Loading