diff --git a/python/lsst/daf/butler/registry/_caching_context.py b/python/lsst/daf/butler/registry/_caching_context.py index 81362df826..631f3ab278 100644 --- a/python/lsst/daf/butler/registry/_caching_context.py +++ b/python/lsst/daf/butler/registry/_caching_context.py @@ -27,19 +27,14 @@ from __future__ import annotations -__all__ = ["CachingContext", "GenericCachingContext"] - -from typing import Generic, TypeAlias, TypeVar +__all__ = ["CachingContext"] from ._collection_record_cache import CollectionRecordCache from ._collection_summary_cache import CollectionSummaryCache from ._dataset_type_cache import DatasetTypeCache -_T = TypeVar("_T") -_U = TypeVar("_U") - -class GenericCachingContext(Generic[_T, _U]): +class CachingContext: """Collection of caches for various types of records retrieved from database. @@ -54,16 +49,10 @@ class is passed to the relevant managers that can use it to query or Dataset type cache is always enabled for now, this avoids the need for explicitly enabling caching in pipetask executors. - - `GenericCachingContext` is generic over two kinds of opaque dataset type - data, with the expectation that most code will use the ``CachingContext`` - type alias (which resolves to `GenericCachingContext[object, object]`); - the `DatasetRecordStorageManager` can then cast this to a - `GenericCachingContext` with the actual opaque data types it uses. """ def __init__(self) -> None: - self._dataset_types: DatasetTypeCache[_T, _U] = DatasetTypeCache() + self._dataset_types: DatasetTypeCache = DatasetTypeCache() self._collection_records: CollectionRecordCache | None = None self._collection_summaries: CollectionSummaryCache | None = None self._depth = 0 @@ -109,9 +98,6 @@ def collection_summaries(self) -> CollectionSummaryCache | None: return self._collection_summaries @property - def dataset_types(self) -> DatasetTypeCache[_T, _U]: + def dataset_types(self) -> DatasetTypeCache: """Cache for dataset types, never disabled (`DatasetTypeCache`).""" return self._dataset_types - - -CachingContext: TypeAlias = GenericCachingContext[object, object] diff --git a/python/lsst/daf/butler/registry/_dataset_type_cache.py b/python/lsst/daf/butler/registry/_dataset_type_cache.py index ab43c80ca5..17569b4058 100644 --- a/python/lsst/daf/butler/registry/_dataset_type_cache.py +++ b/python/lsst/daf/butler/registry/_dataset_type_cache.py @@ -30,29 +30,24 @@ __all__ = ("DatasetTypeCache",) from collections.abc import Iterable, Iterator -from typing import Generic, TypeVar +from typing import TYPE_CHECKING from .._dataset_type import DatasetType from ..dimensions import DimensionGroup -_T = TypeVar("_T") -_U = TypeVar("_U") +if TYPE_CHECKING: + from .datasets.byDimensions.tables import DynamicTables -class DatasetTypeCache(Generic[_T, _U]): +class DatasetTypeCache: """Cache for dataset types. Notes ----- This cache is a pair of mappings with different kinds of keys: - - the `DatasetType` itself is cached by name, as is some opaque data used - only by a `DatasetRecordStorageManager` implementation; - - additional opaque data (also used only by `DatasetRecordStorageManager` - implementations can be cached by the dimensions dataset types (i.e. a - `DimensionGroup`). - - `DatasetTypeCache` is generic over these two opaque data types. + - Dataset type name -> (`DatasetType`, database integer primary key) + - `DimensionGroup` -> database table information In some contexts (e.g. ``resolve_wildcard``) a full list of dataset types is needed. To signify that cache content can be used in such contexts, @@ -62,8 +57,8 @@ class DatasetTypeCache(Generic[_T, _U]): """ def __init__(self) -> None: - self._by_name_cache: dict[str, tuple[DatasetType, _T]] = {} - self._by_dimensions_cache: dict[DimensionGroup, _U] = {} + self._by_name_cache: dict[str, tuple[DatasetType, int]] = {} + self._by_dimensions_cache: dict[DimensionGroup, DynamicTables] = {} self._full = False self._dimensions_full = False @@ -77,7 +72,7 @@ def dimensions_full(self) -> bool: """`True` if cache holds all known dataset type dimensions (`bool`).""" return self._dimensions_full - def add(self, dataset_type: DatasetType, extra: _T) -> None: + def add(self, dataset_type: DatasetType, id: int) -> None: """Add one record to the cache. Parameters @@ -85,17 +80,18 @@ def add(self, dataset_type: DatasetType, extra: _T) -> None: dataset_type : `DatasetType` Dataset type, replaces any existing dataset type with the same name. - extra : `Any` + id : `int` + The dataset type primary key Additional opaque object stored with this dataset type. """ - self._by_name_cache[dataset_type.name] = (dataset_type, extra) + self._by_name_cache[dataset_type.name] = (dataset_type, id) def set( self, - data: Iterable[tuple[DatasetType, _T]], + data: Iterable[tuple[DatasetType, int]], *, full: bool = False, - dimensions_data: Iterable[tuple[DimensionGroup, _U]] | None = None, + dimensions_data: Iterable[tuple[DimensionGroup, DynamicTables]] | None = None, dimensions_full: bool = False, ) -> None: """Replace cache contents with the new set of dataset types. @@ -136,7 +132,7 @@ def discard(self, name: str) -> None: """ self._by_name_cache.pop(name, None) - def get(self, name: str) -> tuple[DatasetType | None, _T | None]: + def get(self, name: str) -> tuple[DatasetType | None, int | None]: """Return cached info given dataset type name. Parameters @@ -177,7 +173,7 @@ def get_dataset_type(self, name: str) -> DatasetType | None: return None return item[0] - def items(self) -> Iterator[tuple[DatasetType, _T]]: + def items(self) -> Iterator[tuple[DatasetType, int]]: """Return iterator for the set of items in the cache, can only be used if `full` is true. @@ -195,19 +191,19 @@ def items(self) -> Iterator[tuple[DatasetType, _T]]: raise RuntimeError("cannot call items() if cache is not full") return iter(self._by_name_cache.values()) - def add_by_dimensions(self, dimensions: DimensionGroup, extra: _U) -> None: + def add_by_dimensions(self, dimensions: DimensionGroup, tables: DynamicTables) -> None: """Add information about a set of dataset type dimensions to the cache. Parameters ---------- dimensions : `DimensionGroup` Dimensions of one or more dataset types. - extra : `Any` + tables : `DynamicTables` Additional opaque object stored with these dimensions. """ - self._by_dimensions_cache[dimensions] = extra + self._by_dimensions_cache[dimensions] = tables - def get_by_dimensions(self, dimensions: DimensionGroup) -> _U | None: + def get_by_dimensions(self, dimensions: DimensionGroup) -> DynamicTables | None: """Get information about a set of dataset type dimensions. Parameters @@ -217,13 +213,13 @@ def get_by_dimensions(self, dimensions: DimensionGroup) -> _U | None: Returns ------- - extra : `Any` or `None` + tables : `DynamicTables` or `None` Additional opaque object stored with these dimensions, or `None` if these dimensions are not present in the cache. """ return self._by_dimensions_cache.get(dimensions) - def by_dimensions_items(self) -> Iterator[tuple[DimensionGroup, _U]]: + def by_dimensions_items(self) -> Iterator[tuple[DimensionGroup, DynamicTables]]: """Return iterator for all dimensions-keyed data in the cache. This can only be called if `dimensions_full` is `True`. diff --git a/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py b/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py index 590e4b7702..e8b9ff7222 100644 --- a/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py +++ b/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py @@ -6,7 +6,7 @@ import datetime import logging from collections.abc import Iterable, Mapping, Sequence, Set -from typing import TYPE_CHECKING, Any, ClassVar, cast +from typing import TYPE_CHECKING, Any, ClassVar import astropy.time import sqlalchemy @@ -24,7 +24,7 @@ from ....dimensions import DataCoordinate, DimensionGroup, DimensionUniverse from ....direct_query_driver import SqlJoinsBuilder, SqlSelectBuilder # new query system, server+direct only from ....queries import tree as qt # new query system, both clients + server -from ..._caching_context import CachingContext, GenericCachingContext +from ..._caching_context import CachingContext from ..._collection_summary import CollectionSummary from ..._exceptions import ConflictingDefinitionError, DatasetTypeExpressionError, OrphanedRecordError from ...interfaces import DatasetRecordStorageManager, RunRecord, VersionTuple @@ -155,7 +155,7 @@ def __init__( self._dimensions = dimensions self._static = static self._summaries = summaries - self._caching_context = cast(GenericCachingContext[int, DynamicTables], caching_context) + self._caching_context = caching_context self._use_astropy_ingest_date = self.ingest_date_dtype() is ddl.AstropyTimeNsecTai self._run_key_column = collections.getRunForeignKeyName()