From 98141ff88538079038451a2eba9d1fa45702b9ed Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Wed, 1 Nov 2023 10:55:58 -0400 Subject: [PATCH 01/16] Use typing.overload to clean up DimensionUniverse.sorted annotations. --- .../lsst/daf/butler/dimensions/_universe.py | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/python/lsst/daf/butler/dimensions/_universe.py b/python/lsst/daf/butler/dimensions/_universe.py index a284605e8d..3e5f4f1d98 100644 --- a/python/lsst/daf/butler/dimensions/_universe.py +++ b/python/lsst/daf/butler/dimensions/_universe.py @@ -33,8 +33,8 @@ import math import pickle from collections import defaultdict -from collections.abc import Iterable, Mapping -from typing import TYPE_CHECKING, Any, ClassVar, TypeVar +from collections.abc import Iterable, Mapping, Sequence +from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, overload from deprecated.sphinx import deprecated from lsst.utils.classes import cached_getter, immutable @@ -432,7 +432,17 @@ def extract(self, iterable: Iterable[Dimension | str]) -> DimensionGraph: names.add(item) return DimensionGraph(universe=self, names=names) - def sorted(self, elements: Iterable[E | str], *, reverse: bool = False) -> list[E]: + @overload + def sorted(self, elements: Iterable[Dimension], *, reverse: bool = False) -> Sequence[Dimension]: + ... + + @overload + def sorted( + self, elements: Iterable[DimensionElement | str], *, reverse: bool = False + ) -> Sequence[DimensionElement]: + ... + + def sorted(self, elements: Iterable[Any], *, reverse: bool = False) -> list[Any]: """Return a sorted version of the given iterable of dimension elements. The universe's sort order is topological (an element's dependencies @@ -448,16 +458,15 @@ def sorted(self, elements: Iterable[E | str], *, reverse: bool = False) -> list[ Returns ------- - sorted : `list` of `DimensionElement` - A sorted list containing the same elements that were given. + sorted : `~collections.abc.Sequence` [ `Dimension` or \ + `DimensionElement` ] + A sorted sequence containing the same elements that were given. """ s = set(elements) result = [element for element in self._elements if element in s or element.name in s] if reverse: result.reverse() - # mypy thinks this can return DimensionElements even if all the user - # passed it was Dimensions; we know better. - return result # type: ignore + return result # TODO: Remove this method on DM-38687. @deprecated( From b39a323cc2426982f8ad64d8427f65eecefa899a Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Wed, 1 Nov 2023 11:34:31 -0400 Subject: [PATCH 02/16] Use typing.overload to clean up Named container annotations. --- python/lsst/daf/butler/_named.py | 26 ++++++++++++++----- .../daf/butler/datastore/file_templates.py | 2 +- .../daf/butler/registry/obscore/_records.py | 2 +- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/python/lsst/daf/butler/_named.py b/python/lsst/daf/butler/_named.py index e3ff851b72..db26f7a188 100644 --- a/python/lsst/daf/butler/_named.py +++ b/python/lsst/daf/butler/_named.py @@ -50,7 +50,7 @@ ValuesView, ) from types import MappingProxyType -from typing import Any, Protocol, TypeVar +from typing import Any, Protocol, TypeVar, overload class Named(Protocol): @@ -120,10 +120,16 @@ def keys(self) -> NamedValueAbstractSet[K]: # type: ignore def __getitem__(self, key: str | K) -> V_co: raise NotImplementedError() - def get(self, key: str | K, default: Any = None) -> Any: - # Delegating to super is not allowed by typing, because it doesn't - # accept str, but we know it just delegates to __getitem__, which does. - return super().get(key, default) # type: ignore + @overload + def get(self, key: object) -> V_co | None: + ... + + @overload + def get(self, key: object, default: V) -> V_co | V: + ... + + def get(self, key: Any, default: Any = None) -> Any: + return super().get(key, default) NameLookupMapping = NamedKeyMapping[K, V_co] | Mapping[str, V_co] @@ -305,7 +311,15 @@ def asMapping(self) -> Mapping[str, K_co]: def __getitem__(self, key: str | K_co) -> K_co: raise NotImplementedError() - def get(self, key: str | K_co, default: Any = None) -> Any: + @overload + def get(self, key: object) -> K_co | None: + ... + + @overload + def get(self, key: object, default: V) -> K_co | V: + ... + + def get(self, key: Any, default: Any = None) -> Any: """Return the element with the given name. Returns ``default`` if no such element is present. diff --git a/python/lsst/daf/butler/datastore/file_templates.py b/python/lsst/daf/butler/datastore/file_templates.py index 17c3ca80ca..ec9e2e271a 100644 --- a/python/lsst/daf/butler/datastore/file_templates.py +++ b/python/lsst/daf/butler/datastore/file_templates.py @@ -448,7 +448,7 @@ def format(self, ref: DatasetRef) -> str: # the case where only required dimensions are present (which in this # context should only happen in unit tests; in general we need all # dimensions to fill out templates). - fields = { + fields: dict[str, object] = { k: ref.dataId.get(k) for k in ref.datasetType.dimensions.names if ref.dataId.get(k) is not None } # Extra information that can be included using . syntax diff --git a/python/lsst/daf/butler/registry/obscore/_records.py b/python/lsst/daf/butler/registry/obscore/_records.py index 615cd34681..d0be555aad 100644 --- a/python/lsst/daf/butler/registry/obscore/_records.py +++ b/python/lsst/daf/butler/registry/obscore/_records.py @@ -211,7 +211,7 @@ def __call__(self, ref: DatasetRef, context: SqlQueryContext) -> Record | None: if self.band in dataId: em_range = None if (label := dataId.get(self.physical_filter)) is not None: - em_range = self.config.spectral_ranges.get(label) + em_range = self.config.spectral_ranges.get(cast(str, label)) if not em_range: band_name = dataId[self.band] assert isinstance(band_name, str), "Band name must be string" From 2988d2d7b03209d763fe9173cf09fab601b8c888 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Wed, 1 Nov 2023 10:58:11 -0400 Subject: [PATCH 03/16] Add DimensionGroup and deprecate DimensionGraph. This adds a lot of warnings; DimensionGraph usage will be migrated in later commits. Due to those warnings, one test in test_obscore.py has been temporarily disabled, because it counts warnings, and I couldn't figure out how to make it only count the kind of warning it's actually looking for. --- doc/lsst.daf.butler/dimensions.rst | 18 +- python/lsst/daf/butler/dimensions/__init__.py | 1 + .../lsst/daf/butler/dimensions/_coordinate.py | 25 +- python/lsst/daf/butler/dimensions/_graph.py | 435 ++++++++++-------- python/lsst/daf/butler/dimensions/_group.py | 431 +++++++++++++++++ .../lsst/daf/butler/dimensions/_universe.py | 71 ++- .../daf/butler/registry/queries/_readers.py | 4 +- tests/test_obscore.py | 1 + 8 files changed, 756 insertions(+), 230 deletions(-) create mode 100644 python/lsst/daf/butler/dimensions/_group.py diff --git a/doc/lsst.daf.butler/dimensions.rst b/doc/lsst.daf.butler/dimensions.rst index e9c77e215e..8f7114e31c 100644 --- a/doc/lsst.daf.butler/dimensions.rst +++ b/doc/lsst.daf.butler/dimensions.rst @@ -31,15 +31,11 @@ There are two kinds of relationships: For example, the visit dimension has an implied dependency on the physical filter dimension, because a visit is observed through exactly one filter and hence each visit ID determines a filter name. When both dimensions are associated with database tables, an implied dependency involves having a foreign key field in the dependent table that is not part of a primary key in the dependent table. -A `DimensionGraph` is an immutable, set-like container of dimensions that is guaranteed to (recursively) include all dependencies of any dimension in the graph. -It also categorizes those dimensions into `~DimensionGraph.required` and `~DimensionGraph.implied` subsets, which have roughly the same meaning for a set of dimensions as they do for a single dimension: once the primary key values of all of the required dimensions are known, the primary key values of all implied dimensions are known as well. -`DimensionGraph` also guarantees a deterministic and topological sort order for its elements. +A `DimensionGroup` is an immutable, set-like container of dimensions that is guaranteed to (recursively) include all dependencies of any dimension in the set. +It also categorizes those dimensions into `~DimensionGroup.required` and `~DimensionGroup.implied` subsets, which have roughly the same meaning for a set of dimensions as they do for a single dimension: once the primary key values of all of the required dimensions are known, the primary key values of all implied dimensions are known as well. +`DimensionGroup` also guarantees a deterministic and topological sort order for its elements. -Because `Dimension` instances have a `~Dimension.name` attribute, we typically -use `~lsst.daf.butler.NamedValueSet` and `~lsst.daf.butler.NamedKeyDict` as containers when immutability is needed or the guarantees of `DimensionGraph`. -This allows the string names of dimensions to be used as well in most places where `Dimension` instances are expected. - -The complete set of all compatible dimensions is held by a special subclass of `DimensionGraph`, `DimensionUniverse`. +The complete set of all recognized dimensions is managed by a `DimensionUniverse`. A dimension universe is constructed from configuration, and is responsible for constructing all `Dimension` and `DimensionElement` instances; within a universe, there is exactly one `Dimension` instance that is always used to represent a particular dimension. `DimensionUniverse` instances themselves are held in a global map keyed by the version number in the configuration used for construction, so they behave somewhat like singletons. @@ -50,15 +46,15 @@ Data IDs -------- The most common way butler users encounter dimensions is as the keys in a *data ID*, a dictionary that maps dimensions to their primary key values. -Different datasets with the same `DatasetType` are always identified by the same set of dimensions (i.e. the same set of data ID keys), and hence a `DatasetType` instance holds a `DimensionGraph` that contains exactly those keys. +Different datasets with the same `DatasetType` are always identified by the same set of dimensions (i.e. the same set of data ID keys), and hence a `DatasetType` instance holds a `DimensionGroup` that contains exactly those keys. Many data IDs are simply Python dictionaries that use the string names of dimensions or actual `Dimension` instances as keys. Most `Butler` and `Registry` APIs that accept data IDs as input accept both dictionaries and keyword arguments that are added to these dictionaries automatically. The data IDs returned by the `Butler` or `Registry` (and most of those used internally) are usually instances of the `DataCoordinate` class. `DataCoordinate` instances can have different states of knowledge about the dimensions they identify. -They always contain at least the key-value pairs that correspond to its `DimensionGraph`\ 's `~DimensionGraph.required` subset -- that is, the minimal set of keys needed to fully identify all other dimensions in the graph. -They can also contain key-value pairs for the `~DimensionGraph.implied` subset (a state indicated by `DataCoordinate.hasFull()` returning `True`). +They always contain at least the key-value pairs that correspond to its `DimensionGroup`\ 's `~DimensionGroup.required` subset -- that is, the minimal set of keys needed to fully identify all other dimensions in the set. +They can also contain key-value pairs for the `~DimensionGroup.implied` subset (a state indicated by `DataCoordinate.hasFull()` returning `True`). And if `DataCoordinate.hasRecords` returns `True`, the data ID also holds all of the metadata records associated with its dimensions, both as a mapping in the `~DataCoordinate.records` attribute and via dynamic attribute access, e.g. ``data_id.exposure.day_obs``. diff --git a/python/lsst/daf/butler/dimensions/__init__.py b/python/lsst/daf/butler/dimensions/__init__.py index 75ab7b283d..0f53208258 100644 --- a/python/lsst/daf/butler/dimensions/__init__.py +++ b/python/lsst/daf/butler/dimensions/__init__.py @@ -41,6 +41,7 @@ from ._elements import * from ._governor import * from ._graph import * +from ._group import * from ._packer import * from ._records import * from ._schema import * diff --git a/python/lsst/daf/butler/dimensions/_coordinate.py b/python/lsst/daf/butler/dimensions/_coordinate.py index 474365e865..a577280dfa 100644 --- a/python/lsst/daf/butler/dimensions/_coordinate.py +++ b/python/lsst/daf/butler/dimensions/_coordinate.py @@ -254,7 +254,7 @@ def standardize( for k, v in defaults.items(): d.setdefault(k.name, v) if d.keys() >= graph.dimensions.names: - values = tuple(d[name] for name in graph._dataCoordinateIndices) + values = tuple(d[name] for name in graph._group._data_coordinate_indices) else: try: values = tuple(d[name] for name in graph.required.names) @@ -880,9 +880,9 @@ class _BasicTupleDataCoordinate(DataCoordinate): graph : `DimensionGraph` The dimensions to be identified. values : `tuple` [ `int` or `str` ] - Data ID values, ordered to match ``graph._dataCoordinateIndices``. May - include values for just required dimensions (which always come first) - or all dimensions. + Data ID values, ordered like the concatenation of ``graph.required`` + and ``graph.implied``. May include values for just required dimensions + (which is why these always come first) or all dimensions. """ def __init__(self, graph: DimensionGraph, values: tuple[DataIdValue, ...]): @@ -900,7 +900,7 @@ def __getitem__(self, key: DataIdKey) -> DataIdValue: # Docstring inherited from DataCoordinate. if isinstance(key, Dimension): key = key.name - index = self._graph._dataCoordinateIndices[key] + index = self._graph._group._data_coordinate_indices[key] try: return self._values[index] except IndexError: @@ -922,7 +922,7 @@ def subset(self, graph: DimensionGraph) -> DataCoordinate: elif self.hasFull() or self._graph.required >= graph.dimensions: return _BasicTupleDataCoordinate( graph, - tuple(self[k] for k in graph._dataCoordinateIndices), + tuple(self[k] for k in graph._group._data_coordinate_indices), ) else: return _BasicTupleDataCoordinate(graph, tuple(self[k] for k in graph.required.names)) @@ -977,7 +977,7 @@ def expanded( def hasFull(self) -> bool: # Docstring inherited from DataCoordinate. - return len(self._values) == len(self._graph._dataCoordinateIndices) + return len(self._values) == len(self._graph._group._data_coordinate_indices) def hasRecords(self) -> bool: # Docstring inherited from DataCoordinate. @@ -1014,16 +1014,15 @@ class _ExpandedTupleDataCoordinate(_BasicTupleDataCoordinate): graph : `DimensionGraph` The dimensions to be identified. values : `tuple` [ `int` or `str` ] - Data ID values, ordered to match ``graph._dataCoordinateIndices``. - May include values for just required dimensions (which always come - first) or all dimensions. + Data ID values, ordered like the concatenation of ``graph.required`` + and ``graph.implied``. records : `~collections.abc.Mapping` [ `str`, `DimensionRecord` or `None` ] A `NamedKeyMapping` with `DimensionElement` keys or a regular `~collections.abc.Mapping` with `str` (`DimensionElement` name) keys and `DimensionRecord` values. Keys must cover all elements in ``self.graph.elements``. Values may be `None`, but only to reflect - actual NULL values in the database, not just records that have not - been fetched. + actual NULL values in the database, not just records that have not been + fetched. """ def __init__( @@ -1043,7 +1042,7 @@ def subset(self, graph: DimensionGraph) -> DataCoordinate: if self._graph == graph: return self return _ExpandedTupleDataCoordinate( - graph, tuple(self[k] for k in graph._dataCoordinateIndices), records=self._records + graph, tuple(self[k] for k in graph._group._data_coordinate_indices), records=self._records ) def expanded( diff --git a/python/lsst/daf/butler/dimensions/_graph.py b/python/lsst/daf/butler/dimensions/_graph.py index 8026b5306e..276f1d0d97 100644 --- a/python/lsst/daf/butler/dimensions/_graph.py +++ b/python/lsst/daf/butler/dimensions/_graph.py @@ -29,17 +29,19 @@ __all__ = ["DimensionGraph", "SerializedDimensionGraph"] -import itertools +import warnings from collections.abc import Iterable, Iterator, Mapping, Set -from types import MappingProxyType -from typing import TYPE_CHECKING, Any, ClassVar +from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, cast +from deprecated.sphinx import deprecated from lsst.daf.butler._compat import _BaseModelCompat from lsst.utils.classes import cached_getter, immutable +from lsst.utils.introspection import find_outside_stacklevel -from .._named import NamedValueAbstractSet, NamedValueSet +from .._named import NamedValueAbstractSet, NameMappingSetView from .._topology import TopologicalFamily, TopologicalSpace from ..json import from_json_pydantic, to_json_pydantic +from ._group import DimensionGroup if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. from ..registry import Registry @@ -66,33 +68,138 @@ def direct(cls, *, names: list[str]) -> SerializedDimensionGraph: return cls.model_construct(names=names) +_T = TypeVar("_T", bound="DimensionElement", covariant=True) + + +# TODO: Remove on DM-41326. +_NVAS_DEPRECATION_MSG = """DimensionGraph is deprecated in favor of +DimensionGroup, which uses sets of str names instead of NamedValueAbstractSets +of Dimension or DimensionElement instances. Support for the +NamedValueAbstractSet interfaces on this object will be dropped after v27. +""" + + +class _DimensionGraphNamedValueSet(NameMappingSetView[_T]): + def __init__(self, keys: Set[str], universe: DimensionUniverse): + super().__init__({k: cast(_T, universe[k]) for k in keys}) + + # TODO: Remove on DM-41326. + @deprecated( + _NVAS_DEPRECATION_MSG + + "Use a dict comprehension and DimensionUniverse indexing to construct a mapping when needed.", + version="v27", + category=FutureWarning, + ) + def asMapping(self) -> Mapping[str, _T]: + return super().asMapping() + + # TODO: Remove on DM-41326. + @deprecated( + _NVAS_DEPRECATION_MSG + "Use DimensionUniverse for DimensionElement lookups.", + version="v27", + category=FutureWarning, + ) + def __getitem__(self, key: str | _T) -> _T: + return super().__getitem__(key) + + def __contains__(self, key: Any) -> bool: + from ._elements import DimensionElement + + if isinstance(key, DimensionElement): + warnings.warn( + _NVAS_DEPRECATION_MSG + "'in' expressions must use str keys.", + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler."), + ) + return super().__contains__(key) + + def __iter__(self) -> Iterator[_T]: + # TODO: Remove on DM-41326. + warnings.warn( + _NVAS_DEPRECATION_MSG + + ( + "In the future, iteration will yield str names; for now, use .names " + "to do the same without triggering this warning." + ), + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler."), + ) + return super().__iter__() + + def __eq__(self, other: Any) -> bool: + # TODO: Remove on DM-41326. + warnings.warn( + _NVAS_DEPRECATION_MSG + + ( + "In the future, set-equality will assume str keys; for now, use .names " + "to do the same without triggering this warning." + ), + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler."), + ) + return super().__eq__(other) + + def __le__(self, other: Set[Any]) -> bool: + # TODO: Remove on DM-41326. + warnings.warn( + _NVAS_DEPRECATION_MSG + + ( + "In the future, subset tests will assume str keys; for now, use .names " + "to do the same without triggering this warning." + ), + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler."), + ) + return super().__le__(other) + + def __ge__(self, other: Set[Any]) -> bool: + # TODO: Remove on DM-41326. + warnings.warn( + _NVAS_DEPRECATION_MSG + + ( + "In the future, superset tests will assume str keys; for now, use .names " + "to do the same without triggering this warning." + ), + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler."), + ) + return super().__ge__(other) + + +# TODO: Remove on DM-41326. +@deprecated( + "DimensionGraph is deprecated in favor of DimensionGroup and will be removed after v27.", + category=FutureWarning, + version="v27", +) @immutable class DimensionGraph: """An immutable, dependency-complete collection of dimensions. - `DimensionGraph` behaves in many respects like a set of `Dimension` - instances that maintains several special subsets and supersets of - related `DimensionElement` instances. It does not fully implement the - `collections.abc.Set` interface, as its automatic expansion of dependencies - would make set difference and XOR operations behave surprisingly. - - It also provides dict-like lookup of `DimensionElement` instances from - their names. + `DimensionGraph` is deprecated in favor of `DimensionGroup` and will be + removed after v27. The two types have very similar interfaces, but + `DimensionGroup` does not support direct iteration and its set-like + attributes are of dimension element names, not `DimensionElement` + instances. `DimensionGraph` objects are still returned by certain + non-deprecated methods and properties (most prominently + `DatasetType.dimensions`), and to handle these cases deprecation warnings + are only emitted for operations on `DimensionGraph` that are not + supported by `DimensionGroup` as well. Parameters ---------- universe : `DimensionUniverse` - The special graph of all known dimensions of which this graph will be - a subset. + The special graph of all known dimensions of which this graph will be a + subset. dimensions : iterable of `Dimension`, optional An iterable of `Dimension` instances that must be included in the - graph. All (recursive) dependencies of these dimensions will also - be included. At most one of ``dimensions`` and ``names`` must be + graph. All (recursive) dependencies of these dimensions will also be + included. At most one of ``dimensions`` and ``names`` must be provided. names : iterable of `str`, optional An iterable of the names of dimensions that must be included in the - graph. All (recursive) dependencies of these dimensions will also - be included. At most one of ``dimensions`` and ``names`` must be + graph. All (recursive) dependencies of these dimensions will also be + included. At most one of ``dimensions`` and ``names`` must be provided. conform : `bool`, optional If `True` (default), expand to include dependencies. `False` should @@ -118,86 +225,71 @@ def __new__( names: Iterable[str] | None = None, conform: bool = True, ) -> DimensionGraph: - conformedNames: set[str] if names is None: if dimensions is None: - conformedNames = set() + group = DimensionGroup(universe) else: - try: - # Optimize for NamedValueSet/NamedKeyDict, though that's - # not required. - conformedNames = set(dimensions.names) # type: ignore - except AttributeError: - conformedNames = {d.name for d in dimensions} + group = DimensionGroup(universe, {d.name for d in dimensions}, _conform=conform) else: if dimensions is not None: raise TypeError("Only one of 'dimensions' and 'names' may be provided.") - conformedNames = set(names) - if conform: - universe.expandDimensionNameSet(conformedNames) - # Look in the cache of existing graphs, with the expanded set of names. - cacheKey = frozenset(conformedNames) - self = universe._cache.get(cacheKey, None) - if self is not None: - return self - # This is apparently a new graph. Create it, and add it to the cache. - self = super().__new__(cls) - universe._cache[cacheKey] = self - self.universe = universe - # Reorder dimensions by iterating over the universe (which is - # ordered already) and extracting the ones in the set. - self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze() - # Make a set that includes both the dimensions and any - # DimensionElements whose dependencies are in self.dimensions. - self.elements = NamedValueSet( - e for e in universe.getStaticElements() if e.required.names <= self.dimensions.names - ).freeze() - self._finish() - return self + group = DimensionGroup(universe, names, _conform=conform) + return group._as_graph() - def _finish(self) -> None: - # Make a set containing just the governor dimensions in this graph. - # Need local import to avoid cycle. - from ._governor import GovernorDimension - - self.governors = NamedValueSet( - d for d in self.dimensions if isinstance(d, GovernorDimension) - ).freeze() - # Split dependencies up into "required" and "implied" subsets. - # Note that a dimension may be required in one graph and implied in - # another. - required: NamedValueSet[Dimension] = NamedValueSet() - implied: NamedValueSet[Dimension] = NamedValueSet() - for dim1 in self.dimensions: - for dim2 in self.dimensions: - if dim1.name in dim2.implied.names: - implied.add(dim1) - break - else: - # If no other dimension implies dim1, it's required. - required.add(dim1) - self.required = required.freeze() - self.implied = implied.freeze() - - self.topology = MappingProxyType( - { - space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze() - for space in TopologicalSpace.__members__.values() - } - ) + @property + def universe(self) -> DimensionUniverse: + """Object that manages all known dimensions.""" + return self._group.universe + + @property + @deprecated( + _NVAS_DEPRECATION_MSG + "Use '.names' instead of '.dimensions' or '.dimensions.names'.", + version="v27", + category=FutureWarning, + ) + @cached_getter + def dimensions(self) -> NamedValueAbstractSet[Dimension]: + """A true `~collections.abc.Set` of all true `Dimension` instances in + the graph. + """ + return _DimensionGraphNamedValueSet(self._group.names, self._group.universe) + + @property + @cached_getter + def elements(self) -> NamedValueAbstractSet[DimensionElement]: + """A true `~collections.abc.Set` of all `DimensionElement` instances in + the graph; a superset of `dimensions` (`NamedValueAbstractSet` of + `DimensionElement`). + """ + return _DimensionGraphNamedValueSet(self._group.elements, self._group.universe) + + @property + @cached_getter + def governors(self) -> NamedValueAbstractSet[GovernorDimension]: + """A true `~collections.abc.Set` of all `GovernorDimension` instances + in the graph. + """ + return _DimensionGraphNamedValueSet(self._group.governors, self._group.universe) + + @property + @cached_getter + def required(self) -> NamedValueAbstractSet[Dimension]: + """The subset of `dimensions` whose elements must be directly + identified via their primary keys in a data ID in order to identify the + rest of the elements in the graph. + """ + return _DimensionGraphNamedValueSet(self._group.required, self._group.universe) - # Build mappings from dimension to index; this is really for - # DataCoordinate, but we put it in DimensionGraph because many - # (many!) DataCoordinates will share the same DimensionGraph, and - # we want them to be lightweight. The order here is what's convenient - # for DataCoordinate: all required dimensions before all implied - # dimensions. - self._dataCoordinateIndices: dict[str, int] = { - name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names)) - } + @property + @cached_getter + def implied(self) -> NamedValueAbstractSet[Dimension]: + """The subset of `dimensions` whose elements need not be directly + identified via their primary keys in a data ID. + """ + return _DimensionGraphNamedValueSet(self._group.implied, self._group.universe) def __getnewargs__(self) -> tuple: - return (self.universe, None, tuple(self.dimensions.names), False) + return (self.universe, None, tuple(self._group.names), False) def __deepcopy__(self, memo: dict) -> DimensionGraph: # DimensionGraph is recursively immutable; see note in @immutable @@ -206,8 +298,8 @@ def __deepcopy__(self, memo: dict) -> DimensionGraph: @property def names(self) -> Set[str]: - """Set of the names of all dimensions in the graph (`KeysView`).""" - return self.dimensions.names + """Set of the names of all dimensions in the graph.""" + return self._group.names def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph: """Convert this class to a simple python type. @@ -280,7 +372,7 @@ def __len__(self) -> int: (and true `Dimension` instances only). """ - return len(self.dimensions) + return len(self._group) def __contains__(self, element: str | DimensionElement) -> bool: """Return `True` if the given element or element name is in the graph. @@ -307,59 +399,68 @@ def get(self, name: str, default: Any = None) -> DimensionElement: return self.elements.get(name, default) def __str__(self) -> str: - return str(self.dimensions) + return str(self.as_group()) def __repr__(self) -> str: return f"DimensionGraph({str(self)})" - def isdisjoint(self, other: DimensionGraph) -> bool: + def as_group(self) -> DimensionGroup: + """Return a `DimensionGroup` that represents the same set of + dimensions. + """ + return self._group + + def isdisjoint(self, other: DimensionGroup | DimensionGraph) -> bool: """Test whether the intersection of two graphs is empty. Returns `True` if either operand is the empty. """ - return self.dimensions.isdisjoint(other.dimensions) + return self._group.isdisjoint(other.as_group()) - def issubset(self, other: DimensionGraph) -> bool: + def issubset(self, other: DimensionGroup | DimensionGraph) -> bool: """Test whether all dimensions in ``self`` are also in ``other``. Returns `True` if ``self`` is empty. """ - return self.dimensions <= other.dimensions + return self._group <= other.as_group() - def issuperset(self, other: DimensionGraph) -> bool: + def issuperset(self, other: DimensionGroup | DimensionGraph) -> bool: """Test whether all dimensions in ``other`` are also in ``self``. Returns `True` if ``other`` is empty. """ - return self.dimensions >= other.dimensions + return self._group >= other.as_group() def __eq__(self, other: Any) -> bool: """Test the arguments have exactly the same dimensions & elements.""" + match other: + case DimensionGraph(): + return self.as_group == other.as_group() if isinstance(other, DimensionGraph): - return self.dimensions == other.dimensions + return self._group == other._group else: return False def __hash__(self) -> int: - return hash(tuple(self.dimensions.names)) + return hash(self.as_group()) - def __le__(self, other: DimensionGraph) -> bool: + def __le__(self, other: DimensionGroup | DimensionGraph) -> bool: """Test whether ``self`` is a subset of ``other``.""" - return self.dimensions <= other.dimensions + return self._group <= other.as_group() - def __ge__(self, other: DimensionGraph) -> bool: + def __ge__(self, other: DimensionGroup | DimensionGraph) -> bool: """Test whether ``self`` is a superset of ``other``.""" - return self.dimensions >= other.dimensions + return self._group >= other.as_group() - def __lt__(self, other: DimensionGraph) -> bool: + def __lt__(self, other: DimensionGroup | DimensionGraph) -> bool: """Test whether ``self`` is a strict subset of ``other``.""" - return self.dimensions < other.dimensions + return self._group < other.as_group() - def __gt__(self, other: DimensionGraph) -> bool: + def __gt__(self, other: DimensionGroup | DimensionGraph) -> bool: """Test whether ``self`` is a strict superset of ``other``.""" - return self.dimensions > other.dimensions + return self._group > other.as_group() - def union(self, *others: DimensionGraph) -> DimensionGraph: + def union(self, *others: DimensionGroup | DimensionGraph) -> DimensionGraph: """Construct a new graph with all dimensions in any of the operands. The elements of the returned graph may exceed the naive union of @@ -368,119 +469,73 @@ def union(self, *others: DimensionGraph) -> DimensionGraph: dependency dimensions could have been provided by different operands. """ names = set(self.names).union(*[other.names for other in others]) - return DimensionGraph(self.universe, names=names) + return self.universe.conform(names)._as_graph() - def intersection(self, *others: DimensionGraph) -> DimensionGraph: + def intersection(self, *others: DimensionGroup | DimensionGraph) -> DimensionGraph: """Construct a new graph with only dimensions in all of the operands. See also `union`. """ names = set(self.names).intersection(*[other.names for other in others]) - return DimensionGraph(self.universe, names=names) + return self.universe.conform(names)._as_graph() - def __or__(self, other: DimensionGraph) -> DimensionGraph: + def __or__(self, other: DimensionGroup | DimensionGraph) -> DimensionGraph: """Construct a new graph with all dimensions in any of the operands. See `union`. """ return self.union(other) - def __and__(self, other: DimensionGraph) -> DimensionGraph: + def __and__(self, other: DimensionGroup | DimensionGraph) -> DimensionGraph: """Construct a new graph with only dimensions in all of the operands. See `intersection`. """ return self.intersection(other) + # TODO: Remove on DM-41326. @property - @cached_getter + @deprecated( + "DimensionGraph is deprecated in favor of DimensionGroup, which does not have this attribute; " + "use .lookup_order. DimensionGraph will be removed after v27.", + category=FutureWarning, + version="v27", + ) def primaryKeyTraversalOrder(self) -> tuple[DimensionElement, ...]: - """Return a tuple of all elements in specific order. + """A tuple of all elements in specific order. - The order allows records to be - found given their primary keys, starting from only the primary keys of - required dimensions (`tuple` [ `DimensionRecord` ]). + The order allows records to be found given their primary keys, starting + from only the primary keys of required dimensions (`tuple` [ + `DimensionRecord` ]). Unlike the table definition/topological order (which is what - DimensionUniverse.sorted gives you), when dimension A implies - dimension B, dimension A appears first. + DimensionUniverse.sorted gives you), when dimension A implies dimension + B, dimension A appears first. """ - done: set[str] = set() - order = [] - - def addToOrder(element: DimensionElement) -> None: - if element.name in done: - return - predecessors = set(element.required.names) - predecessors.discard(element.name) - if not done.issuperset(predecessors): - return - order.append(element) - done.add(element.name) - for other in element.implied: - addToOrder(other) - - while not done.issuperset(self.required): - for dimension in self.required: - addToOrder(dimension) - - order.extend(element for element in self.elements if element.name not in done) - return tuple(order) + return tuple(self.universe[element_name] for element_name in self._group.lookup_order) @property def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: """Families represented by the spatial elements in this graph.""" - return self.topology[TopologicalSpace.SPATIAL] + return self._group.spatial @property def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: """Families represented by the temporal elements in this graph.""" - return self.topology[TopologicalSpace.TEMPORAL] - - # Class attributes below are shadowed by instance attributes, and are - # present just to hold the docstrings for those instance attributes. - - universe: DimensionUniverse - """The set of all known dimensions, of which this graph is a subset - (`DimensionUniverse`). - """ - - dimensions: NamedValueAbstractSet[Dimension] - """A true `~collections.abc.Set` of all true `Dimension` instances in the - graph (`NamedValueAbstractSet` of `Dimension`). + return self._group.temporal - This is the set used for iteration, ``len()``, and most set-like operations - on `DimensionGraph` itself. - """ - - elements: NamedValueAbstractSet[DimensionElement] - """A true `~collections.abc.Set` of all `DimensionElement` instances in the - graph; a superset of `dimensions` (`NamedValueAbstractSet` of - `DimensionElement`). - - This is the set used for dict-like lookups, including the ``in`` operator, - on `DimensionGraph` itself. - """ - - governors: NamedValueAbstractSet[GovernorDimension] - """A true `~collections.abc.Set` of all true `GovernorDimension` instances - in the graph (`NamedValueAbstractSet` of `GovernorDimension`). - """ - - required: NamedValueAbstractSet[Dimension] - """The subset of `dimensions` whose elements must be directly identified - via their primary keys in a data ID in order to identify the rest of the - elements in the graph (`NamedValueAbstractSet` of `Dimension`). - """ - - implied: NamedValueAbstractSet[Dimension] - """The subset of `dimensions` whose elements need not be directly - identified via their primary keys in a data ID (`NamedValueAbstractSet` of - `Dimension`). - """ + # TODO: Remove on DM-41326. + @property + @deprecated( + "DimensionGraph is deprecated in favor of DimensionGroup, which does not have this attribute; " + "use .spatial or .temporal. DimensionGraph will be removed after v27.", + category=FutureWarning, + version="v27", + ) + def topology(self) -> Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]]: + """Families of elements in this graph that can participate in + topological relationships. + """ + return self._group._space_families - topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] - """Families of elements in this graph that can participate in topological - relationships (`~collections.abc.Mapping` from `TopologicalSpace` to - `NamedValueAbstractSet` of `TopologicalFamily`). - """ + _group: DimensionGroup diff --git a/python/lsst/daf/butler/dimensions/_group.py b/python/lsst/daf/butler/dimensions/_group.py new file mode 100644 index 0000000000..b4e6a299b7 --- /dev/null +++ b/python/lsst/daf/butler/dimensions/_group.py @@ -0,0 +1,431 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from __future__ import annotations + +__all__ = ["DimensionGroup"] + +import itertools +from collections.abc import Iterable, Iterator, Mapping, Set +from types import MappingProxyType +from typing import TYPE_CHECKING, Any + +from lsst.utils.classes import cached_getter, immutable + +from .._named import NamedValueAbstractSet, NamedValueSet +from .._topology import TopologicalFamily, TopologicalSpace + +if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. + from ._elements import DimensionElement + from ._graph import DimensionGraph + from ._universe import DimensionUniverse + + +class SortedSequenceSet(Set[str]): + """A set-like interface wrapper around a tuple. + + This delegates directly to ``tuple.__contains__``, so there is an implicit + assumption that `len` is small and hence O(N) lookups are not a problem, as + is the case for sets of dimension names. + """ + + def __init__(self, seq: tuple[str, ...]): + self._seq = seq + + __slots__ = ("_seq",) + + def __contains__(self, x: object) -> bool: + return x in self._seq + + def __iter__(self) -> Iterator[str]: + return iter(self._seq) + + def __len__(self) -> int: + return len(self._seq) + + def __hash__(self) -> int: + return hash(self._seq) + + def __eq__(self, other: object) -> bool: + if seq := getattr(other, "_seq", None): + return seq == self._seq + return super().__eq__(other) + + @classmethod + def _from_iterable(cls, iterable: Iterable[str]) -> set[str]: + # This is used by collections.abc.Set mixin methods when they need + # to return a new object (e.g. in `__and__`). + return set(iterable) + + def __repr__(self) -> str: + return f"{{{', '.join(str(k) for k in self._seq)}}}" + + def as_tuple(self) -> tuple[str, ...]: + """Return the underlying tuple.""" + return self._seq + + @property + def names(self) -> Set[str]: + """An alias to ``self``. + + This is a backwards-compatibility API that allows `DimensionGroup` + to mimic the `DimensionGraph` object it is intended to replace, by + permitting expressions like ``x.required.names`` when ``x`` can be + an object of either type. + """ + return self + + +@immutable +class DimensionGroup: + """An immutable, dependency-complete collection of dimensions. + + `DimensionGroup` behaves in many respects like a set of `str` dimension + names that maintains several special subsets and supersets of related + dimension elements. It does not fully implement the `collections.abc.Set` + interface, because it defines a few different iteration orders and does not + privilege any one of them by implementing ``__iter__``. + + Parameters + ---------- + universe : `DimensionUniverse` + Object that manages all known dimensions. + names : iterable of `str`, optional + An iterable of the names of dimensions that must be included in the + group. All (recursive) dependencies of these dimensions will also be + included. At most one of ``dimensions`` and ``names`` must be + provided. + _conform : `bool`, optional + If `True` (default), expand to include dependencies. `False` should + only be used for callers that can guarantee that other arguments are + already correctly expanded, and is for internal use only. + + Notes + ----- + `DimensionGroup` should be used instead of other collections in most + contexts where a collection of dimensions is required and a + `DimensionUniverse` is available. Exceptions include cases where order + matters (and is different from the consistent ordering defined by the + `DimensionUniverse`), or complete `~collection.abc.Set` semantics are + required. + """ + + def __new__( + cls, + universe: DimensionUniverse, + names: Iterable[str] | DimensionGroup = frozenset(), + _conform: bool = True, + ) -> DimensionGroup: + if isinstance(names, DimensionGroup): + if names.universe is universe: + return names + else: + names = names.names + if _conform: + # Expand dimension names to include all required and implied + # dependencies. + to_expand = set(names) + names = set() + while to_expand: + dimension = universe[to_expand.pop()] + names.add(dimension.name) + to_expand.update(dimension.required.names) + to_expand.update(dimension.implied.names) + to_expand.difference_update(names) + else: + names = frozenset(names) + # Look in the cache of existing groups, with the expanded set of names. + cache_key = frozenset(names) + self = universe._cached_groups.get(cache_key, None) + if self is not None: + return self + # This is apparently a new group. Create it, and add it to the cache. + self = super().__new__(cls) + universe._cached_groups[cache_key] = self + self.universe = universe + # Reorder dimensions by iterating over the universe (which is + # ordered already) and extracting the ones in the set. + self.names = SortedSequenceSet(tuple(d.name for d in universe.sorted(names))) + # Make a set that includes both the dimensions and any + # DimensionElements whose dependencies are in self.dimensions. + self.elements = SortedSequenceSet( + tuple(e.name for e in universe.getStaticElements() if e.required.names <= self.names) + ) + self.governors = SortedSequenceSet( + tuple(d for d in self.names if d in universe.getGovernorDimensions().names) + ) + # Split dependencies up into "required" and "implied" subsets. + # Note that a dimension may be required in one group and implied in + # another. + required: list[str] = [] + implied: list[str] = [] + for dim1 in self.names: + for dim2 in self.names: + if dim1 in universe[dim2].implied.names: + implied.append(dim1) + break + else: + # If no other dimension implies dim1, it's required. + required.append(dim1) + self.required = SortedSequenceSet(tuple(required)) + self.implied = SortedSequenceSet(tuple(implied)) + + self._space_families = MappingProxyType( + { + space: NamedValueSet( + universe[e].topology[space] for e in self.elements if space in universe[e].topology + ).freeze() + for space in TopologicalSpace.__members__.values() + } + ) + + # Build mappings from dimension to index; this is really for + # DataCoordinate, but we put it in DimensionGroup because many (many!) + # DataCoordinates will share the same DimensionGroup, and we want them + # to be lightweight. The order here is what's convenient for + # DataCoordinate: all required dimensions before all implied + # dimensions. + self._data_coordinate_indices = { + name: i for i, name in enumerate(itertools.chain(self.required, self.implied)) + } + return self + + def __getnewargs__(self) -> tuple: + return (self.universe, self.names._seq, False) + + def __deepcopy__(self, memo: dict) -> DimensionGroup: + # DimensionGroup is recursively immutable; see note in @immutable + # decorator. + return self + + def __len__(self) -> int: + return len(self.names) + + def __contains__(self, element: str) -> bool: + if element in self.elements: + return True + else: + from ._elements import DimensionElement + + if isinstance(element, DimensionElement): # type: ignore[unreachable] + raise TypeError( + "DimensionGroup does not support membership tests using DimensionElement " + "instances; use their names instead." + ) + return False + + def __str__(self) -> str: + return str(self.names) + + def __repr__(self) -> str: + return f"DimensionGroup({self.names})" + + def as_group(self) -> DimensionGroup: + """Return ``self``. + + This is a backwards-compatibility API that allows both `DimensionGraph` + and `DimensionGroup` to be coerced to the latter. + """ + return self + + @cached_getter + def _as_graph(self) -> DimensionGraph: + """Return a view of ``self`` as a `DimensionGraph`. + + This is provided as a convenience for methods and properties that must + return a `DimensionGraph` for backwards compatibility (until v27). It + is the only way of making a `DimensionGraph` that does not produce + a warning. + """ + from ._graph import DimensionGraph + + result = object.__new__(DimensionGraph) + result._group = self + return result + + def isdisjoint(self, other: DimensionGroup) -> bool: + """Test whether the intersection of two groups is empty. + + Returns `True` if either operand is the empty. + """ + return self.names.isdisjoint(other.names) + + def issubset(self, other: DimensionGroup) -> bool: + """Test whether all dimensions in ``self`` are also in ``other``. + + Returns `True` if ``self`` is empty. + """ + return self.names <= other.names + + def issuperset(self, other: DimensionGroup) -> bool: + """Test whether all dimensions in ``other`` are also in ``self``. + + Returns `True` if ``other`` is empty. + """ + return self.names >= other.names + + def __eq__(self, other: Any) -> bool: + from ._graph import DimensionGraph + + # TODO: Drop DimensionGraph support here on DM-41326. + if isinstance(other, (DimensionGroup, DimensionGraph)): + return self.names == other.names + else: + return False + + def __hash__(self) -> int: + return hash(self.required._seq) + + def __le__(self, other: DimensionGroup) -> bool: + return self.names <= other.names + + def __ge__(self, other: DimensionGroup) -> bool: + return self.names >= other.names + + def __lt__(self, other: DimensionGroup) -> bool: + return self.names < other.names + + def __gt__(self, other: DimensionGroup) -> bool: + return self.names > other.names + + def union(self, *others: DimensionGroup) -> DimensionGroup: + """Construct a new group with all dimensions in any of the operands. + + The elements of the returned group may exceed the naive union of their + elements, as some dimension elements are included in groups whenever + multiple dimensions are present, and those dependency dimensions could + have been provided by different operands. + """ + names = set(self.names).union(*[other.names for other in others]) + return DimensionGroup(self.universe, names) + + def intersection(self, *others: DimensionGroup) -> DimensionGroup: + """Construct a new group with only dimensions in all of the operands. + + See also `union`. + """ + names = set(self.names).intersection(*[other.names for other in others]) + return DimensionGroup(self.universe, names=names) + + def __or__(self, other: DimensionGroup) -> DimensionGroup: + return self.union(other) + + def __and__(self, other: DimensionGroup) -> DimensionGroup: + return self.intersection(other) + + @property + def data_coordinate_keys(self) -> Set[str]: + """A set of dimensions ordered like `DataCoordinate.mapping`. + + This order is defined as all required dimensions followed by all + implied dimensions. + """ + return self._data_coordinate_indices.keys() + + @property + @cached_getter + def lookup_order(self) -> tuple[str, ...]: + """A tuple of all elements in the order needed to find their records. + + Unlike the table definition/topological order (which is what + `DimensionUniverse.sorted` gives you), when dimension A implies + dimension B, dimension A appears first. + """ + done: set[str] = set() + order: list[str] = [] + + def add_to_order(element: DimensionElement) -> None: + if element.name in done: + return + predecessors = set(element.required.names) + predecessors.discard(element.name) + if not done.issuperset(predecessors): + return + order.append(element.name) + done.add(element.name) + for other in element.implied: + add_to_order(other) + + while not done.issuperset(self.required): + for dimension in self.required: + add_to_order(self.universe[dimension]) + + order.extend(element for element in self.elements if element not in done) + return tuple(order) + + @property + def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: + """Families represented by the spatial elements in this graph.""" + return self._space_families[TopologicalSpace.SPATIAL] + + @property + def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: + """Families represented by the temporal elements in this graph.""" + return self._space_families[TopologicalSpace.TEMPORAL] + + # Class attributes below are shadowed by instance attributes, and are + # present just to hold the docstrings for those instance attributes. + + universe: DimensionUniverse + """The set of all known dimensions, of which this group is a subset + (`DimensionUniverse`). + """ + + names: SortedSequenceSet + """A true `~collections.abc.Set` of the dimension names. + + Iteration order is consist with `DimensionUniverse.sorted`: each dimension + is preceded by its required and implied dependencies. + """ + + elements: SortedSequenceSet + """A true `~collections.abc.Set` of all dimension element names in the + group; a superset of `dimensions`. + """ + + governors: SortedSequenceSet + """A true `~collections.abc.Set` of all true governor dimension names in + the group. + """ + + required: SortedSequenceSet + """The dimensions that must be directly identified via their primary keys + in a data ID in order to identify the rest of the elements in the group. + """ + + implied: SortedSequenceSet + """The dimensions that need not be directly identified via their primary + keys in a data ID. + """ + + _space_families: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] + """Families of elements in this graph that exist in topological spaces + relationships (`~collections.abc.Mapping` from `TopologicalSpace` to + `NamedValueAbstractSet` of `TopologicalFamily`). + """ + + _data_coordinate_indices: dict[str, int] diff --git a/python/lsst/daf/butler/dimensions/_universe.py b/python/lsst/daf/butler/dimensions/_universe.py index 3e5f4f1d98..fb7f1eea8f 100644 --- a/python/lsst/daf/butler/dimensions/_universe.py +++ b/python/lsst/daf/butler/dimensions/_universe.py @@ -34,7 +34,7 @@ import pickle from collections import defaultdict from collections.abc import Iterable, Mapping, Sequence -from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, overload +from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, cast, overload from deprecated.sphinx import deprecated from lsst.utils.classes import cached_getter, immutable @@ -47,6 +47,7 @@ from ._elements import Dimension, DimensionElement from ._governor import GovernorDimension from ._graph import DimensionGraph +from ._group import DimensionGroup from ._skypix import SkyPixDimension, SkyPixSystem if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. @@ -143,7 +144,7 @@ def __new__( # copying from builder. self = object.__new__(cls) assert self is not None - self._cache = {} + self._cached_groups = {} self._dimensions = builder.dimensions self._elements = builder.elements self._topology = builder.topology @@ -158,7 +159,7 @@ def __new__( element.universe = self # Add attribute for special subsets of the graph. - self.empty = DimensionGraph(self, (), conform=False) + self._empty = DimensionGroup(self, (), _conform=False) # Use the version number and namespace from the config as a key in # the singleton dict containing all instances; that will let us @@ -403,6 +404,13 @@ def expandDimensionNameSet(self, names: set[str]) -> None: else: oldSize = len(names) + # TODO: remove on DM-41326. + @deprecated( + "DimensionUniverse.extract and DimensionGraph are deprecated in favor of DimensionUniverse.conform " + "and DimensionGroup, and will be removed after v27.", + version="v27", + category=FutureWarning, + ) def extract(self, iterable: Iterable[Dimension | str]) -> DimensionGraph: """Construct graph from iterable. @@ -424,13 +432,40 @@ def extract(self, iterable: Iterable[Dimension | str]) -> DimensionGraph: graph : `DimensionGraph` A `DimensionGraph` instance containing all given dimensions. """ - names = set() - for item in iterable: - try: - names.add(item.name) # type: ignore - except AttributeError: - names.add(item) - return DimensionGraph(universe=self, names=names) + return self.conform(iterable)._as_graph() + + def conform( + self, + dimensions: Iterable[str | Dimension] | DimensionGroup | DimensionGraph, + /, + ) -> DimensionGroup: + """Construct a dimension group from an iterable of dimension names. + + Parameters + ---------- + dimensions : `~collections.abc.Iterable` [ `str` or `Dimension` ], \ + `DimensionGroup`, or `DimensionGraph` + Dimensions that must be included in the returned group; their + dependencies will be as well. Support for `Dimension`, + `DimensionGraph` objects is deprecated and will be removed after + v27. Passing `DimensionGraph` objects will not yield a deprecation + warning to allow non-deprecated methods and properties that return + `DimensionGraph` objects to be passed though, since these will be + changed to return `DimensionGroup` in the future. + + Returns + ------- + group : `DimensionGroup` + A `DimensionGroup` instance containing all given dimensions. + """ + match dimensions: + case DimensionGroup(): + return dimensions + case DimensionGraph(): + return dimensions.as_group() + case iterable: + names: set[str] = {getattr(d, "name", cast(str, d)) for d in iterable} + return DimensionGroup(self, names) @overload def sorted(self, elements: Iterable[Dimension], *, reverse: bool = False) -> Sequence[Dimension]: @@ -510,6 +545,14 @@ def get_elements_populated_by(self, dimension: Dimension) -> NamedValueAbstractS """ return self._populates[dimension.name] + @property + def empty(self) -> DimensionGraph: + """The `DimensionGraph` that contains no dimensions. + + After v27 this will be a `DimensionGroup`. + """ + return self._empty._as_graph() + @classmethod def _unpickle(cls, version: int, namespace: str | None = None) -> DimensionUniverse: """Return an unpickled dimension universe. @@ -543,10 +586,6 @@ def __deepcopy__(self, memo: dict) -> DimensionUniverse: # Class attributes below are shadowed by instance attributes, and are # present just to hold the docstrings for those instance attributes. - empty: DimensionGraph - """The `DimensionGraph` that contains no dimensions (`DimensionGraph`). - """ - commonSkyPix: SkyPixDimension """The special skypix dimension that is used to relate all other spatial dimensions in the `Registry` database (`SkyPixDimension`). @@ -555,12 +594,14 @@ def __deepcopy__(self, memo: dict) -> DimensionUniverse: dimensionConfig: DimensionConfig """The configuration used to create this Universe (`DimensionConfig`).""" - _cache: dict[frozenset[str], DimensionGraph] + _cached_groups: dict[frozenset[str], DimensionGroup] _dimensions: NamedValueAbstractSet[Dimension] _elements: NamedValueAbstractSet[DimensionElement] + _empty: DimensionGroup + _topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] _dimensionIndices: dict[str, int] diff --git a/python/lsst/daf/butler/registry/queries/_readers.py b/python/lsst/daf/butler/registry/queries/_readers.py index 203e8f5f7c..f35c0cbf0e 100644 --- a/python/lsst/daf/butler/registry/queries/_readers.py +++ b/python/lsst/daf/butler/registry/queries/_readers.py @@ -163,7 +163,9 @@ class _FullDataCoordinateReader(DataCoordinateReader): def __init__(self, dimensions: DimensionGraph): self._dimensions = dimensions - self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions._dataCoordinateIndices) + self._tags = tuple( + DimensionKeyColumnTag(name) for name in self._dimensions._group._data_coordinate_indices + ) __slots__ = ("_dimensions", "_tags") diff --git a/tests/test_obscore.py b/tests/test_obscore.py index 8465377315..6f2374ed9c 100644 --- a/tests/test_obscore.py +++ b/tests/test_obscore.py @@ -442,6 +442,7 @@ def test_associate(self): rows = list(result) self.assertEqual(len(rows), 0) + @unittest.skip("Temporary, while deprecation warnings are present.") def test_region_type_warning(self) -> None: """Test that non-polygon region generates one or more warnings.""" collections = None From 7cc94a8a4cfc17b0a8633f197c6003f7b34f6aa9 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Wed, 1 Nov 2023 12:37:16 -0400 Subject: [PATCH 04/16] Add, use Dimension[Universe,Graph,Group] subset attributes. The confusingly-named DimensionUniverse.getStatic* methods are no longer needed after this, but while I think the middleware team has informally agreed we should drop them and go back to attributes, I don't think that's been RFC'd, so they stay for now. The new skypix attributes help with the other goal here: using set-membership tests rather than isinstance checks in places where we care about different types of dimensions. This will work better when DimensionElement objects appear more rarely, in favor of str names. --- .../lsst/daf/butler/_column_categorization.py | 4 +- python/lsst/daf/butler/_dataset_type.py | 2 +- .../daf/butler/datastore/file_templates.py | 7 ++- python/lsst/daf/butler/dimensions/_graph.py | 19 ++++--- python/lsst/daf/butler/dimensions/_group.py | 14 ++++-- python/lsst/daf/butler/dimensions/_skypix.py | 8 ++- .../lsst/daf/butler/dimensions/_universe.py | 49 +++++++++++++++++-- python/lsst/daf/butler/direct_butler.py | 10 ++-- python/lsst/daf/butler/registry/_defaults.py | 2 +- .../datasets/byDimensions/summaries.py | 2 +- .../daf/butler/registry/dimensions/static.py | 15 ++---- .../registry/queries/_sql_query_backend.py | 10 ++-- .../daf/butler/registry/queries/_structs.py | 40 ++++++++------- .../queries/expressions/_predicate.py | 6 +-- .../registry/queries/expressions/check.py | 2 +- .../lsst/daf/butler/registry/sql_registry.py | 3 +- .../daf/butler/registry/tests/_registry.py | 2 +- python/lsst/daf/butler/script/ingest_files.py | 2 +- python/lsst/daf/butler/transfers/_context.py | 2 +- tests/test_dimensions.py | 10 ++-- 20 files changed, 132 insertions(+), 77 deletions(-) diff --git a/python/lsst/daf/butler/_column_categorization.py b/python/lsst/daf/butler/_column_categorization.py index a2669b5030..747e1b6e04 100644 --- a/python/lsst/daf/butler/_column_categorization.py +++ b/python/lsst/daf/butler/_column_categorization.py @@ -63,9 +63,7 @@ def from_iterable(cls, iterable: Iterable[Any]) -> ColumnCategorization: def filter_skypix(self, universe: DimensionUniverse) -> Iterator[SkyPixDimension]: return ( - dimension - for name in self.dimension_keys - if isinstance(dimension := universe[name], SkyPixDimension) + dimension for name in self.dimension_keys if (dimension := universe.skypix_dimensions.get(name)) ) def filter_governors(self, universe: DimensionUniverse) -> Iterator[GovernorDimension]: diff --git a/python/lsst/daf/butler/_dataset_type.py b/python/lsst/daf/butler/_dataset_type.py index 6b578036b3..1b38821bc0 100644 --- a/python/lsst/daf/butler/_dataset_type.py +++ b/python/lsst/daf/butler/_dataset_type.py @@ -201,7 +201,7 @@ def __init__( ) dimensions = universe.extract(dimensions) self._dimensions = dimensions - if name in self._dimensions.universe.getGovernorDimensions().names: + if name in self._dimensions.universe.governor_dimensions.names: raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.") if not isinstance(storageClass, StorageClass | str): raise ValueError(f"StorageClass argument must be StorageClass or str. Got {storageClass}") diff --git a/python/lsst/daf/butler/datastore/file_templates.py b/python/lsst/daf/butler/datastore/file_templates.py index ec9e2e271a..c33d67d2a5 100644 --- a/python/lsst/daf/butler/datastore/file_templates.py +++ b/python/lsst/daf/butler/datastore/file_templates.py @@ -43,7 +43,7 @@ from .._dataset_ref import DatasetRef from .._exceptions import ValidationError from .._storage_class import StorageClass -from ..dimensions import DataCoordinate, SkyPixDimension +from ..dimensions import DataCoordinate if TYPE_CHECKING: from .._dataset_type import DatasetType @@ -735,7 +735,6 @@ def _determine_skypix_alias(self, entity: DatasetRef | DatasetType) -> str | Non # not be true in some test code, but that test code is a pain to # update to be more like the real world while still providing our # only tests of important behavior. - skypix = [dimension for dimension in entity.dimensions if isinstance(dimension, SkyPixDimension)] - if len(skypix) == 1: - alias = skypix[0].name + if len(entity.dimensions.skypix) == 1: + (alias,) = entity.dimensions.skypix.names return alias diff --git a/python/lsst/daf/butler/dimensions/_graph.py b/python/lsst/daf/butler/dimensions/_graph.py index 276f1d0d97..0322f4f96d 100644 --- a/python/lsst/daf/butler/dimensions/_graph.py +++ b/python/lsst/daf/butler/dimensions/_graph.py @@ -47,6 +47,7 @@ from ..registry import Registry from ._elements import Dimension, DimensionElement from ._governor import GovernorDimension + from ._skypix import SkyPixDimension from ._universe import DimensionUniverse @@ -271,6 +272,14 @@ def governors(self) -> NamedValueAbstractSet[GovernorDimension]: """ return _DimensionGraphNamedValueSet(self._group.governors, self._group.universe) + @property + @cached_getter + def skypix(self) -> NamedValueAbstractSet[SkyPixDimension]: + """A true `~collections.abc.Set` of all `SkyPixDimension` instances + in the graph. + """ + return _DimensionGraphNamedValueSet(self._group.skypix, self._group.universe) + @property @cached_getter def required(self) -> NamedValueAbstractSet[Dimension]: @@ -433,13 +442,9 @@ def issuperset(self, other: DimensionGroup | DimensionGraph) -> bool: def __eq__(self, other: Any) -> bool: """Test the arguments have exactly the same dimensions & elements.""" - match other: - case DimensionGraph(): - return self.as_group == other.as_group() - if isinstance(other, DimensionGraph): - return self._group == other._group - else: - return False + if isinstance(other, (DimensionGraph, DimensionGroup)): + return self._group == other.as_group() + return False def __hash__(self) -> int: return hash(self.as_group()) diff --git a/python/lsst/daf/butler/dimensions/_group.py b/python/lsst/daf/butler/dimensions/_group.py index b4e6a299b7..1d718c015e 100644 --- a/python/lsst/daf/butler/dimensions/_group.py +++ b/python/lsst/daf/butler/dimensions/_group.py @@ -173,11 +173,12 @@ def __new__( # Make a set that includes both the dimensions and any # DimensionElements whose dependencies are in self.dimensions. self.elements = SortedSequenceSet( - tuple(e.name for e in universe.getStaticElements() if e.required.names <= self.names) + tuple(e.name for e in universe.elements if e.required.names <= self.names) ) self.governors = SortedSequenceSet( - tuple(d for d in self.names if d in universe.getGovernorDimensions().names) + tuple(d for d in self.names if d in universe.governor_dimensions.names) ) + self.skypix = SortedSequenceSet(tuple(d for d in self.names if d in universe.skypix_dimensions.names)) # Split dependencies up into "required" and "implied" subsets. # Note that a dimension may be required in one group and implied in # another. @@ -408,8 +409,13 @@ def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: """ governors: SortedSequenceSet - """A true `~collections.abc.Set` of all true governor dimension names in - the group. + """A true `~collections.abc.Set` of all governor dimension names in the + group. + """ + + skypix: SortedSequenceSet + """A true `~collections.abc.Set` of all skypix dimension names in the " + group. """ required: SortedSequenceSet diff --git a/python/lsst/daf/butler/dimensions/_skypix.py b/python/lsst/daf/butler/dimensions/_skypix.py index 02e7dedaa3..d4de089823 100644 --- a/python/lsst/daf/butler/dimensions/_skypix.py +++ b/python/lsst/daf/butler/dimensions/_skypix.py @@ -32,7 +32,7 @@ "SkyPixSystem", ) -from collections.abc import Mapping, Set +from collections.abc import Iterator, Mapping, Set from types import MappingProxyType from typing import TYPE_CHECKING @@ -97,6 +97,12 @@ def choose(self, endpoints: NamedValueAbstractSet[TopologicalRelationshipEndpoin def __getitem__(self, level: int) -> SkyPixDimension: return self._members[level] + def __iter__(self) -> Iterator[SkyPixDimension]: + return iter(self._members.values()) + + def __len__(self) -> int: + return len(self._members) + class SkyPixDimension(Dimension): """Special dimension for sky pixelizations. diff --git a/python/lsst/daf/butler/dimensions/_universe.py b/python/lsst/daf/butler/dimensions/_universe.py index fb7f1eea8f..05bf4bc2f2 100644 --- a/python/lsst/daf/butler/dimensions/_universe.py +++ b/python/lsst/daf/butler/dimensions/_universe.py @@ -293,7 +293,6 @@ def getStaticDimensions(self) -> NamedValueAbstractSet[Dimension]: """ return self._dimensions - @cached_getter def getGovernorDimensions(self) -> NamedValueAbstractSet[GovernorDimension]: """Return a set of all `GovernorDimension` instances in this universe. @@ -302,9 +301,8 @@ def getGovernorDimensions(self) -> NamedValueAbstractSet[GovernorDimension]: governors : `NamedValueAbstractSet` [ `GovernorDimension` ] A frozen set of `GovernorDimension` instances. """ - return NamedValueSet(d for d in self._dimensions if isinstance(d, GovernorDimension)).freeze() + return self.governor_dimensions - @cached_getter def getDatabaseElements(self) -> NamedValueAbstractSet[DatabaseDimensionElement]: """Return set of all `DatabaseDimensionElement` instances in universe. @@ -316,6 +314,51 @@ def getDatabaseElements(self) -> NamedValueAbstractSet[DatabaseDimensionElement] elements : `NamedValueAbstractSet` [ `DatabaseDimensionElement` ] A frozen set of `DatabaseDimensionElement` instances. """ + return self.database_elements + + @property + def elements(self) -> NamedValueAbstractSet[DimensionElement]: + """All dimension elements defined in this universe.""" + return self._elements + + @property + def dimensions(self) -> NamedValueAbstractSet[Dimension]: + """All dimensions defined in this universe.""" + return self._dimensions + + @property + @cached_getter + def governor_dimensions(self) -> NamedValueAbstractSet[GovernorDimension]: + """All governor dimensions defined in this universe. + + Governor dimensions serve as special required dependencies of other + dimensions, with special handling in dimension query expressions and + collection summaries. Governor dimension records are stored in the + database but the set of such values is expected to be small enough + for all values to be cached by all clients. + """ + return NamedValueSet(d for d in self._dimensions if isinstance(d, GovernorDimension)).freeze() + + @property + @cached_getter + def skypix_dimensions(self) -> NamedValueAbstractSet[SkyPixDimension]: + """All skypix dimensions defined in this universe. + + Skypix dimension records are always generated on-the-fly rather than + stored in the database, and they always represent a tiling of the sky + with no overlaps. + """ + result = NamedValueSet[SkyPixDimension]() + for system in self.skypix: + result.update(system) + return result.freeze() + + @property + @cached_getter + def database_elements(self) -> NamedValueAbstractSet[DatabaseDimensionElement]: + """All dimension elements whose records are stored in the database, + except governor dimensions. + """ return NamedValueSet(d for d in self._elements if isinstance(d, DatabaseDimensionElement)).freeze() @property diff --git a/python/lsst/daf/butler/direct_butler.py b/python/lsst/daf/butler/direct_butler.py index 80bcf5d1ae..2c7be95ce1 100644 --- a/python/lsst/daf/butler/direct_butler.py +++ b/python/lsst/daf/butler/direct_butler.py @@ -483,7 +483,7 @@ def _rewrite_data_id( for dimensionName in list(dataIdDict): value = dataIdDict[dimensionName] try: - dimension = self.dimensions.getStaticDimensions()[dimensionName] + dimension = self.dimensions.dimensions[dimensionName] except KeyError: # This is not a real dimension not_dimensions[dimensionName] = value @@ -555,7 +555,7 @@ def _rewrite_data_id( # fail but they are going to fail anyway because of the # ambiguousness of the dataId... if datasetType.isCalibration(): - for dim in self.dimensions.getStaticDimensions(): + for dim in self.dimensions.dimensions: if dim.temporal: candidateDimensions.add(str(dim)) @@ -571,7 +571,7 @@ def _rewrite_data_id( # given names with records within those dimensions matched_dims = set() for dimensionName in candidateDimensions: - dimension = self.dimensions.getStaticDimensions()[dimensionName] + dimension = self.dimensions.dimensions[dimensionName] fields = dimension.metadata.names | dimension.uniqueKeys.names for field in not_dimensions: if field in fields: @@ -750,7 +750,7 @@ def _rewrite_data_id( ) # Get the primary key from the real dimension object - dimension = self.dimensions.getStaticDimensions()[dimensionName] + dimension = self.dimensions.dimensions[dimensionName] if not isinstance(dimension, Dimension): raise RuntimeError( f"{dimension.name} is not a true dimension, and cannot be used in data IDs." @@ -1959,7 +1959,7 @@ def transfer_from( # come from this butler's universe. elements = frozenset( element - for element in self.dimensions.getStaticElements() + for element in self.dimensions.elements if element.hasTable() and element.viewOf is None ) dataIds = {ref.dataId for ref in source_refs} diff --git a/python/lsst/daf/butler/registry/_defaults.py b/python/lsst/daf/butler/registry/_defaults.py index 89c1a08ab3..27055000c2 100644 --- a/python/lsst/daf/butler/registry/_defaults.py +++ b/python/lsst/daf/butler/registry/_defaults.py @@ -120,7 +120,7 @@ def finish(self, registry: SqlRegistry) -> None: Raised if a non-governor dimension was included in ``**kwargs`` at construction. """ - allGovernorDimensions = registry.dimensions.getGovernorDimensions() + allGovernorDimensions = registry.dimensions.governor_dimensions if not self._kwargs.keys() <= allGovernorDimensions.names: raise TypeError( "Only governor dimensions may be identified by a default data " diff --git a/python/lsst/daf/butler/registry/datasets/byDimensions/summaries.py b/python/lsst/daf/butler/registry/datasets/byDimensions/summaries.py index d051b4b38d..0443d3dc01 100644 --- a/python/lsst/daf/butler/registry/datasets/byDimensions/summaries.py +++ b/python/lsst/daf/butler/registry/datasets/byDimensions/summaries.py @@ -109,7 +109,7 @@ def makeTableSpecs( ) # Specs for collection_summary_. dimensionTableSpecs = NamedKeyDict[GovernorDimension, ddl.TableSpec]() - for dimension in dimensions.universe.getGovernorDimensions(): + for dimension in dimensions.universe.governor_dimensions: tableSpec = ddl.TableSpec(fields=[]) collections.addCollectionForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE") addDimensionForeignKey(tableSpec, dimension, primaryKey=True) diff --git a/python/lsst/daf/butler/registry/dimensions/static.py b/python/lsst/daf/butler/registry/dimensions/static.py index d83becd298..777af98631 100644 --- a/python/lsst/daf/butler/registry/dimensions/static.py +++ b/python/lsst/daf/butler/registry/dimensions/static.py @@ -44,7 +44,6 @@ DimensionGraph, DimensionUniverse, GovernorDimension, - SkyPixDimension, ) from .._exceptions import MissingSpatialOverlapError from ..interfaces import ( @@ -124,7 +123,7 @@ def initialize( # can pass in when initializing storage for DatabaseDimensionElements. governors = NamedKeyDict[GovernorDimension, GovernorDimensionRecordStorage]() records = NamedKeyDict[DimensionElement, DimensionRecordStorage]() - for dimension in universe.getGovernorDimensions(): + for dimension in universe.governor_dimensions: governorStorage = dimension.makeStorage(db, context=context) governors[dimension] = governorStorage records[dimension] = governorStorage @@ -133,14 +132,12 @@ def initialize( # to gather a mapping from the names of those targets back to their # views. view_targets = { - element.viewOf: element - for element in universe.getDatabaseElements() - if element.viewOf is not None + element.viewOf: element for element in universe.database_elements if element.viewOf is not None } # We remember the spatial ones (grouped by family) so we can go back # and initialize overlap storage for them later. spatial = NamedKeyDict[DatabaseTopologicalFamily, list[DatabaseDimensionRecordStorage]]() - for element in universe.getDatabaseElements(): + for element in universe.database_elements: if element.viewOf is not None: # We'll initialize this storage when the view's target is # initialized. @@ -200,10 +197,8 @@ def get(self, element: DimensionElement | str) -> DimensionRecordStorage | None: # Docstring inherited from DimensionRecordStorageManager. r = self._records.get(element) if r is None: - if isinstance(element, str): - element = self.universe[element] - if isinstance(element, SkyPixDimension): - return self.universe.skypix[element.system][element.level].makeStorage() + if (dimension := self.universe.skypix_dimensions.get(element)) is not None: + return dimension.makeStorage() return r def register(self, element: DimensionElement) -> DimensionRecordStorage: diff --git a/python/lsst/daf/butler/registry/queries/_sql_query_backend.py b/python/lsst/daf/butler/registry/queries/_sql_query_backend.py index fc5866e8ba..d00a66b6b3 100644 --- a/python/lsst/daf/butler/registry/queries/_sql_query_backend.py +++ b/python/lsst/daf/butler/registry/queries/_sql_query_backend.py @@ -36,7 +36,7 @@ from ..._column_categorization import ColumnCategorization from ..._column_tags import DimensionKeyColumnTag, DimensionRecordColumnTag from ..._dataset_type import DatasetType -from ...dimensions import DataCoordinate, DimensionGraph, DimensionRecord, DimensionUniverse, SkyPixDimension +from ...dimensions import DataCoordinate, DimensionGraph, DimensionRecord, DimensionUniverse from .._collection_type import CollectionType from .._exceptions import DataIdValueError from ..interfaces import CollectionRecord, Database @@ -236,12 +236,10 @@ def make_dimension_relation( # spatial join, since we need all dimension key columns present in the # SQL engine and skypix regions are added by postprocessing in the # native iteration engine. - for dimension in dimensions: - if DimensionKeyColumnTag(dimension.name) not in relation.columns and isinstance( - dimension, SkyPixDimension - ): + for skypix_dimension in dimensions.skypix: + if DimensionKeyColumnTag(skypix_dimension.name) not in relation.columns: raise NotImplementedError( - f"Cannot construct query involving skypix dimension {dimension.name} unless " + f"Cannot construct query involving skypix dimension {skypix_dimension.name} unless " "it is part of a dataset subquery, spatial join, or other initial relation." ) diff --git a/python/lsst/daf/butler/registry/queries/_structs.py b/python/lsst/daf/butler/registry/queries/_structs.py index 072e71bfdc..7b56575742 100644 --- a/python/lsst/daf/butler/registry/queries/_structs.py +++ b/python/lsst/daf/butler/registry/queries/_structs.py @@ -464,28 +464,34 @@ def _compute_columns_required( if self.order_by is not None: tags.update(self.order_by.columns_required) region = self.where.region - for dimension in self.where.data_id.graph: - dimension_tag = DimensionKeyColumnTag(dimension.name) + for dimension_name in self.where.data_id.graph.names: + dimension_tag = DimensionKeyColumnTag(dimension_name) if dimension_tag in tags: continue - if dimension == self.universe.commonSkyPix or not isinstance(dimension, SkyPixDimension): + if skypix_dimension := self.universe.skypix_dimensions.get(dimension_name): + if skypix_dimension == self.universe.commonSkyPix: + # Common skypix dimension is should be available from + # spatial join tables. + tags.add(dimension_tag) + else: + # This is a SkyPixDimension other than the common one. If + # it's not already present in the query (e.g. from a + # dataset join), this is a pure spatial constraint, which + # we can only apply by modifying the 'region' for the + # query. That will also require that we join in the common + # skypix dimension. + pixel = skypix_dimension.pixelization.pixel(self.where.data_id[dimension_name]) + if region is None: + region = pixel + else: + region = IntersectionRegion(region, pixel) + else: # If a dimension in the data ID is available from dimension # tables or dimension spatial-join tables in the database, - # include it in the set of dimensions whose tables should be - # joined. This makes these data ID constraints work just like - # simple 'where' constraints, which is good. + # include it in the set of dimensions whose tables should + # be joined. This makes these data ID constraints work + # just like simple 'where' constraints, which is good. tags.add(dimension_tag) - else: - # This is a SkyPixDimension other than the common one. If it's - # not already present in the query (e.g. from a dataset join), - # this is a pure spatial constraint, which we can only apply by - # modifying the 'region' for the query. That will also require - # that we join in the common skypix dimension. - pixel = dimension.pixelization.pixel(self.where.data_id[dimension]) - if region is None: - region = pixel - else: - region = IntersectionRegion(region, pixel) # Make sure the dimension keys are expanded self-consistently in what # we return by passing them through DimensionGraph. dimensions = DimensionGraph( diff --git a/python/lsst/daf/butler/registry/queries/expressions/_predicate.py b/python/lsst/daf/butler/registry/queries/expressions/_predicate.py index d0d70558e5..8874fec89b 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/_predicate.py +++ b/python/lsst/daf/butler/registry/queries/expressions/_predicate.py @@ -140,10 +140,10 @@ def make_string_expression_predicate( bind = {} if bind: for identifier in bind: - if identifier in dimensions.universe.getStaticElements().names: + if identifier in dimensions.universe.elements.names: raise RuntimeError(f"Bind parameter key {identifier!r} conflicts with a dimension element.") table, _, column = identifier.partition(".") - if column and table in dimensions.universe.getStaticElements().names: + if column and table in dimensions.universe.elements.names: raise RuntimeError(f"Bind parameter key {identifier!r} looks like a dimension column.") if defaults is None: defaults = DataCoordinate.makeEmpty(dimensions.universe) @@ -168,7 +168,7 @@ def make_string_expression_predicate( msg = f'Error in query expression "{exprOriginal}" (normalized to "{exprNormal}"): {err}' raise UserExpressionError(msg) from None for dimension_name, values in summary.dimension_constraints.items(): - if dimension_name in dimensions.universe.getGovernorDimensions().names: + if dimension_name in dimensions.universe.governor_dimensions.names: governor_constraints[dimension_name] = cast(Set[str], values) converter = PredicateConversionVisitor(bind, dataset_type_name, dimensions.universe, column_types) predicate = tree.visit(converter) diff --git a/python/lsst/daf/butler/registry/queries/expressions/check.py b/python/lsst/daf/butler/registry/queries/expressions/check.py index de2c762d3b..b42a9a24b2 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/check.py +++ b/python/lsst/daf/butler/registry/queries/expressions/check.py @@ -465,7 +465,7 @@ def visitOuter(self, branches: Sequence[InnerSummary], form: NormalForm) -> Oute # pulled from defaults in _all_ branches. This is the set we will # be able to bound overall; any dimensions not referenced by even # one branch could be unbounded. - dimensions_in_all_branches = set(self.graph.universe.getStaticDimensions().names) + dimensions_in_all_branches = set(self.graph.universe.dimensions.names) for branch in branches: summary.update(branch) summary.defaultsNeeded.update(branch.defaultsNeeded) diff --git a/python/lsst/daf/butler/registry/sql_registry.py b/python/lsst/daf/butler/registry/sql_registry.py index 63c22a97d7..fa87ffd1a5 100644 --- a/python/lsst/daf/butler/registry/sql_registry.py +++ b/python/lsst/daf/butler/registry/sql_registry.py @@ -2352,8 +2352,7 @@ def queryDimensionRecords( element = self.dimensions[element] except KeyError as e: raise DimensionNameError( - f"No such dimension '{element}', available dimensions: " - + str(self.dimensions.getStaticElements()) + f"No such dimension '{element}', available dimensions: " + str(self.dimensions.elements) ) from e doomed_by: list[str] = [] data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py index 90b2c52e25..f9016a6fd1 100644 --- a/python/lsst/daf/butler/registry/tests/_registry.py +++ b/python/lsst/daf/butler/registry/tests/_registry.py @@ -1244,7 +1244,7 @@ def testSpatialJoin(self): families = defaultdict(set) # Dictionary of {element.name: {dataId: region}}. regions = {} - for element in registry.dimensions.getDatabaseElements(): + for element in registry.dimensions.database_elements: if element.spatial is not None: families[element.spatial.name].add(element) regions[element.name] = { diff --git a/python/lsst/daf/butler/script/ingest_files.py b/python/lsst/daf/butler/script/ingest_files.py index aa3f2b1aac..0b3de53046 100644 --- a/python/lsst/daf/butler/script/ingest_files.py +++ b/python/lsst/daf/butler/script/ingest_files.py @@ -217,7 +217,7 @@ def parse_data_id_tuple(data_ids: tuple[str, ...], universe: DimensionUniverse) dimension_str, value = id_str.split("=") try: - dimension = universe.getStaticDimensions()[dimension_str] + dimension = universe.dimensions[dimension_str] except KeyError: raise ValueError(f"DataID dimension '{dimension_str}' is not known to this universe.") from None diff --git a/python/lsst/daf/butler/transfers/_context.py b/python/lsst/daf/butler/transfers/_context.py index a810f99293..29a45627b6 100644 --- a/python/lsst/daf/butler/transfers/_context.py +++ b/python/lsst/daf/butler/transfers/_context.py @@ -171,7 +171,7 @@ def saveDataIds( if elements is None: standardized_elements = frozenset( element - for element in self._registry.dimensions.getStaticElements() + for element in self._registry.dimensions.elements if element.hasTable() and element.viewOf is None ) else: diff --git a/tests/test_dimensions.py b/tests/test_dimensions.py index 46ea12a8c7..9cd1d1069c 100644 --- a/tests/test_dimensions.py +++ b/tests/test_dimensions.py @@ -229,7 +229,7 @@ def testVersion(self): def testConfigRead(self): self.assertEqual( - set(self.universe.getStaticDimensions().names), + set(self.universe.dimensions.names), { "instrument", "visit", @@ -249,7 +249,7 @@ def testConfigRead(self): def testGraphs(self): self.checkGraphInvariants(self.universe.empty) - for element in self.universe.getStaticElements(): + for element in self.universe.elements: self.checkGraphInvariants(element.graph) def testInstrumentDimensions(self): @@ -329,7 +329,7 @@ def testSubsetCalculation(self): def testSchemaGeneration(self): tableSpecs = NamedKeyDict({}) - for element in self.universe.getStaticElements(): + for element in self.universe.elements: if element.hasTable and element.viewOf is None: tableSpecs[element] = element.RecordClass.fields.makeTableSpec( TimespanReprClass=TimespanDatabaseRepresentation.Compound, @@ -389,7 +389,7 @@ def testPickling(self): self.assertIs(universe1, universe2) self.assertIs(universe1, universe3) self.assertIs(universe1, universe4) - for element1 in universe1.getStaticElements(): + for element1 in universe1.elements: element2 = pickle.loads(pickle.dumps(element1)) self.assertIs(element1, element2) graph1 = element1.graph @@ -674,7 +674,7 @@ def testStandardize(self): # dimensions if hasFull is False (see # `DataCoordinate.subset` docs). newDimensions = self.randomDimensionSubset(n=1, graph=dataId.graph) - if dataId.hasFull() or dataId.graph.required.issuperset(newDimensions.required): + if dataId.hasFull() or dataId.graph.required >= newDimensions.required: newDataIds = [ dataId.subset(newDimensions), DataCoordinate.standardize(dataId, graph=newDimensions), From 699fd1108d830e5972bafea17252b8978a11bf02 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Wed, 1 Nov 2023 13:35:31 -0400 Subject: [PATCH 05/16] Modify TopologicalFamily.choose to avoid sets of dimensions. --- python/lsst/daf/butler/_topology.py | 15 ++++++++------- python/lsst/daf/butler/dimensions/_database.py | 7 ++++--- python/lsst/daf/butler/dimensions/_skypix.py | 8 +++++--- .../lsst/daf/butler/registry/queries/_builder.py | 4 ++-- python/lsst/daf/butler/registry/queries/_query.py | 10 +++++++--- .../lsst/daf/butler/registry/queries/_structs.py | 2 +- 6 files changed, 27 insertions(+), 19 deletions(-) diff --git a/python/lsst/daf/butler/_topology.py b/python/lsst/daf/butler/_topology.py index 6ca43982a1..c1af048c9c 100644 --- a/python/lsst/daf/butler/_topology.py +++ b/python/lsst/daf/butler/_topology.py @@ -35,12 +35,13 @@ import enum from abc import ABC, abstractmethod -from collections.abc import Mapping -from typing import Any +from collections.abc import Mapping, Set +from typing import TYPE_CHECKING, Any from lsst.utils.classes import immutable -from ._named import NamedValueAbstractSet +if TYPE_CHECKING: + from .dimensions import DimensionUniverse @enum.unique @@ -106,9 +107,7 @@ def __contains__(self, other: TopologicalRelationshipEndpoint) -> bool: return other.topology.get(self.space) == self @abstractmethod - def choose( - self, endpoints: NamedValueAbstractSet[TopologicalRelationshipEndpoint] - ) -> TopologicalRelationshipEndpoint: + def choose(self, endpoints: Set[str], universe: DimensionUniverse) -> TopologicalRelationshipEndpoint: """Select the best member of this family to use. These are to be used in a query join or data ID when more than one @@ -118,9 +117,11 @@ def choose( Parameters ---------- - endpoints : `NamedValueAbstractSet` [`TopologicalRelationshipEndpoint`] + endpoints : `~collections.abc.Set` [`str`] Endpoints to choose from. May include endpoints that are not members of this family (which should be ignored). + universe : `DimensionUniverse` + Object that manages all known dimensions. Returns ------- diff --git a/python/lsst/daf/butler/dimensions/_database.py b/python/lsst/daf/butler/dimensions/_database.py index a6212fe7a9..4331ae1ca3 100644 --- a/python/lsst/daf/butler/dimensions/_database.py +++ b/python/lsst/daf/butler/dimensions/_database.py @@ -43,7 +43,7 @@ from .. import ddl from .._named import NamedKeyMapping, NamedValueAbstractSet, NamedValueSet -from .._topology import TopologicalFamily, TopologicalRelationshipEndpoint, TopologicalSpace +from .._topology import TopologicalFamily, TopologicalSpace from ._elements import Dimension, DimensionCombination, DimensionElement from .construction import DimensionConstructionBuilder, DimensionConstructionVisitor @@ -55,6 +55,7 @@ StaticTablesContext, ) from ._governor import GovernorDimension + from ._universe import DimensionUniverse class DatabaseTopologicalFamily(TopologicalFamily): @@ -85,10 +86,10 @@ def __init__( super().__init__(name, space) self.members = members - def choose(self, endpoints: NamedValueAbstractSet[TopologicalRelationshipEndpoint]) -> DimensionElement: + def choose(self, endpoints: Set[str], universe: DimensionUniverse) -> DimensionElement: # Docstring inherited from TopologicalFamily. for member in self.members: - if member in endpoints: + if member.name in endpoints: return member raise RuntimeError(f"No recognized endpoints for {self.name} in {endpoints}.") diff --git a/python/lsst/daf/butler/dimensions/_skypix.py b/python/lsst/daf/butler/dimensions/_skypix.py index d4de089823..7f8bdddcf0 100644 --- a/python/lsst/daf/butler/dimensions/_skypix.py +++ b/python/lsst/daf/butler/dimensions/_skypix.py @@ -42,12 +42,13 @@ from .. import ddl from .._named import NamedValueAbstractSet, NamedValueSet -from .._topology import TopologicalFamily, TopologicalRelationshipEndpoint, TopologicalSpace +from .._topology import TopologicalFamily, TopologicalSpace from ._elements import Dimension from .construction import DimensionConstructionBuilder, DimensionConstructionVisitor if TYPE_CHECKING: from ..registry.interfaces import SkyPixDimensionRecordStorage + from ._universe import DimensionUniverse class SkyPixSystem(TopologicalFamily): @@ -81,10 +82,11 @@ def __init__( for level in range(maxLevel + 1): self._members[level] = SkyPixDimension(self, level) - def choose(self, endpoints: NamedValueAbstractSet[TopologicalRelationshipEndpoint]) -> SkyPixDimension: + def choose(self, endpoints: Set[str], universe: DimensionUniverse) -> SkyPixDimension: # Docstring inherited from TopologicalFamily. best: SkyPixDimension | None = None - for endpoint in endpoints: + for endpoint_name in endpoints: + endpoint = universe[endpoint_name] if endpoint not in self: continue assert isinstance(endpoint, SkyPixDimension) diff --git a/python/lsst/daf/butler/registry/queries/_builder.py b/python/lsst/daf/butler/registry/queries/_builder.py index c8517e04a2..dc66c274f4 100644 --- a/python/lsst/daf/butler/registry/queries/_builder.py +++ b/python/lsst/daf/butler/registry/queries/_builder.py @@ -227,8 +227,8 @@ def finish(self, joinMissing: bool = True) -> Query: for family1, family2 in itertools.combinations(self.summary.dimensions.spatial, 2): spatial_joins.append( ( - family1.choose(self.summary.dimensions.elements).name, - family2.choose(self.summary.dimensions.elements).name, + family1.choose(self.summary.dimensions.elements.names, self.summary.universe).name, + family2.choose(self.summary.dimensions.elements.names, self.summary.universe).name, ) ) self.relation = self._backend.make_dimension_relation( diff --git a/python/lsst/daf/butler/registry/queries/_query.py b/python/lsst/daf/butler/registry/queries/_query.py index 29322aaa05..bde17e328b 100644 --- a/python/lsst/daf/butler/registry/queries/_query.py +++ b/python/lsst/daf/butler/registry/queries/_query.py @@ -734,8 +734,12 @@ def find_datasets( # present in each family (e.g. patch beats tract). spatial_joins.append( ( - lhs_spatial_family.choose(full_dimensions.elements).name, - rhs_spatial_family.choose(full_dimensions.elements).name, + lhs_spatial_family.choose( + full_dimensions.elements.names, self.dimensions.universe + ).name, + rhs_spatial_family.choose( + full_dimensions.elements.names, self.dimensions.universe + ).name, ) ) # Set up any temporal join between the query dimensions and CALIBRATION @@ -743,7 +747,7 @@ def find_datasets( temporal_join_on: set[ColumnTag] = set() if any(r.type is CollectionType.CALIBRATION for r in collection_records): for family in self._dimensions.temporal: - endpoint = family.choose(self._dimensions.elements) + endpoint = family.choose(self._dimensions.elements.names, self.dimensions.universe) temporal_join_on.add(DimensionRecordColumnTag(endpoint.name, "timespan")) base_columns_required.update(temporal_join_on) # Note which of the many kinds of potentially-missing columns we have diff --git a/python/lsst/daf/butler/registry/queries/_structs.py b/python/lsst/daf/butler/registry/queries/_structs.py index 7b56575742..dc31d7c3c5 100644 --- a/python/lsst/daf/butler/registry/queries/_structs.py +++ b/python/lsst/daf/butler/registry/queries/_structs.py @@ -502,7 +502,7 @@ def _compute_columns_required( missing_common_skypix = False if region is not None: for family in dimensions.spatial: - element = family.choose(dimensions.elements) + element = family.choose(dimensions.elements.names, self.universe) tags.add(DimensionRecordColumnTag(element.name, "region")) if not isinstance(element, SkyPixDimension) and self.universe.commonSkyPix not in dimensions: missing_common_skypix = True From 33efabcce2a6dc2bd57d9b58ade576ab02f744e0 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Wed, 1 Nov 2023 13:38:07 -0400 Subject: [PATCH 06/16] Update DataCoordinate interface per RFC-834. This includes replacing '.graph' with '.dimensions' (to go along with replacing DimensionGraph with DimensionGroup), adding '.mapping' and '.required' to replace '.full', and deprecating Dimension-instance lookup and iteration (which effectively deprecates the Mapping interface). --- .../lsst/daf/butler/dimensions/_coordinate.py | 883 ++++++++++++------ 1 file changed, 611 insertions(+), 272 deletions(-) diff --git a/python/lsst/daf/butler/dimensions/_coordinate.py b/python/lsst/daf/butler/dimensions/_coordinate.py index a577280dfa..f45a0bebf7 100644 --- a/python/lsst/daf/butler/dimensions/_coordinate.py +++ b/python/lsst/daf/butler/dimensions/_coordinate.py @@ -35,20 +35,23 @@ __all__ = ("DataCoordinate", "DataId", "DataIdKey", "DataIdValue", "SerializedDataCoordinate") import numbers +import warnings from abc import abstractmethod -from collections.abc import Iterator, Mapping, Set -from typing import TYPE_CHECKING, Any, ClassVar, Literal, overload +from collections.abc import Iterable, Iterator, Mapping, Set +from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast, overload from deprecated.sphinx import deprecated from lsst.daf.butler._compat import _BaseModelCompat from lsst.sphgeom import IntersectionRegion, Region +from lsst.utils.introspection import find_outside_stacklevel -from .._named import NamedKeyDict, NamedKeyMapping, NamedValueAbstractSet, NameLookupMapping +from .._named import NamedKeyMapping, NamedValueAbstractSet, NameLookupMapping from .._timespan import Timespan from ..json import from_json_pydantic, to_json_pydantic from ..persistence_context import PersistenceContextVars from ._elements import Dimension, DimensionElement from ._graph import DimensionGraph +from ._group import DimensionGroup from ._records import DimensionRecord, SerializedDimensionRecord if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. @@ -119,45 +122,25 @@ def _intersectRegions(*args: Region) -> Region | None: class DataCoordinate(NamedKeyMapping[Dimension, DataIdValue]): - """Data ID dictionary. + """A validated data ID. - An immutable data ID dictionary that guarantees that its key-value pairs - identify at least all required dimensions in a `DimensionGraph`. + DataCoordinate guarantees that its key-value pairs identify at least all + required dimensions in a `DimensionGroup`. - `DataCoordinate` itself is an ABC, but provides `staticmethod` factory + Notes + ----- + `DataCoordinate` is an ABC, but it provides `staticmethod` factory functions for private concrete implementations that should be sufficient for most purposes. `standardize` is the most flexible and safe of these; - the others (`makeEmpty`, `fromRequiredValues`, and `fromFullValues`) are - more specialized and perform little or no checking of inputs. + the others (`makeEmpty`, `from_required_values`, and `from_full_values`) + are more specialized and perform little or no checking of inputs. - Notes - ----- - Like any data ID class, `DataCoordinate` behaves like a dictionary, but - with some subtleties: - - - Both `Dimension` instances and `str` names thereof may be used as keys - in lookup operations, but iteration (and `keys`) will yield `Dimension` - instances. The `names` property can be used to obtain the corresponding - `str` names. - - - Lookups for implied dimensions (those in ``self.graph.implied``) are - supported if and only if `hasFull` returns `True`, and are never - included in iteration or `keys`. The `full` property may be used to - obtain a mapping whose keys do include implied dimensions. - - - Equality comparison with other mappings is supported, but it always - considers only required dimensions (as well as requiring both operands - to identify the same dimensions). This is not quite consistent with the - way mappings usually work - normally differing keys imply unequal - mappings - but it makes sense in this context because data IDs with the - same values for required dimensions but different values for implied - dimensions represent a serious problem with the data that - `DataCoordinate` cannot generally recognize on its own, and a data ID - that knows implied dimension values should still be able to compare as - equal to one that does not. This is of course not the way comparisons - between simple `dict` data IDs work, and hence using a `DataCoordinate` - instance for at least one operand in any data ID comparison is strongly - recommended. + Lookups for implied dimensions (those in ``self.dimensions.implied``) are + supported if and only if `has_full_values` is `True`. This also sets the + keys of the `mapping` attribute. This means that `DataCoordinate` equality + is not the same as testing for equality on the `mapping` attribute + (instead, it is the same as testing for equality on the `required` + attribute). See Also -------- @@ -172,6 +155,7 @@ class DataCoordinate(NamedKeyMapping[Dimension, DataIdValue]): def standardize( mapping: NameLookupMapping[Dimension, DataIdValue] | None = None, *, + dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, graph: DimensionGraph | None = None, universe: DimensionUniverse | None = None, defaults: DataCoordinate | None = None, @@ -187,14 +171,19 @@ def standardize( mapping : `~collections.abc.Mapping`, optional An informal data ID that maps dimensions or dimension names to their primary key values (may also be a true `DataCoordinate`). - graph : `DimensionGraph` - The dimensions to be identified by the new `DataCoordinate`. - If not provided, will be inferred from the keys of ``mapping`` and + dimensions : `~collections.abc.Iterable` [ `str` ], `DimensionGroup` \ + or `DimensionGraph`, optional + The dimensions to be identified by the new `DataCoordinate`. If not + provided, will be inferred from the keys of ``mapping`` and ``**kwargs``, and ``universe`` must be provided unless ``mapping`` is already a `DataCoordinate`. + graph : `DimensionGraph`, optional + Like ``dimensions``, but requires a ``DimensionGraph`` instance. + Ignored if ``dimensions`` is provided. Deprecated and will be + removed after v27. universe : `DimensionUniverse` - All known dimensions and their relationships; used to expand - and validate dependencies when ``graph`` is not provided. + All known dimensions and their relationships; used to expand and + validate dependencies when ``graph`` is not provided. defaults : `DataCoordinate`, optional Default dimension key-value pairs to use when needed. These are never used to infer ``graph``, and are ignored if a different value @@ -215,57 +204,119 @@ def standardize( KeyError Raised if a key-value pair for a required dimension is missing. """ - d: dict[str, DataIdValue] = {} + universe = ( + universe + or getattr(dimensions, "universe", None) + or getattr(graph, "universe", None) + or getattr(mapping, "universe", None) + ) + if universe is None: + raise TypeError( + "universe must be provided, either directly or via dimensions, mapping, or graph." + ) + if graph is not None: + # TODO: remove argument on DM-41326. + warnings.warn( + "The 'graph' argument to DataCoordinate.standardize is deprecated in favor of the " + "'dimensions' argument, and will be removed after v27.", + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler"), + ) + dimensions = graph.names + if dimensions is not None: + dimensions = universe.conform(dimensions) + del graph # make sure we don't actualy use this below + new_mapping: dict[str, DataIdValue] = {} if isinstance(mapping, DataCoordinate): - if graph is None: + if dimensions is None: if not kwargs: # Already standardized to exactly what we want. return mapping - elif kwargs.keys().isdisjoint(graph.dimensions.names): + elif kwargs.keys().isdisjoint(dimensions.names): # User provided kwargs, but told us not to use them by # passing in dimensions that are disjoint from those kwargs. # This is not necessarily user error - it's a useful pattern # to pass in all of the key-value pairs you have and let the # code here pull out only what it needs. - return mapping.subset(graph) - assert universe is None or universe == mapping.universe - universe = mapping.universe - d.update((name, mapping[name]) for name in mapping.graph.required.names) + return mapping.subset(dimensions.names) + new_mapping.update((name, mapping[name]) for name in mapping.dimensions.required) if mapping.hasFull(): - d.update((name, mapping[name]) for name in mapping.graph.implied.names) + new_mapping.update((name, mapping[name]) for name in mapping.dimensions.implied) elif isinstance(mapping, NamedKeyMapping): - d.update(mapping.byName()) + warnings.warn( + "Passing a NamedKeyMapping to DataCoordinate.standardize is deprecated, and will be " + "removed after v27.", + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler"), + ) + new_mapping.update(mapping.byName()) elif mapping is not None: - d.update(mapping) - d.update(kwargs) - if graph is None: + new_mapping.update(mapping) + new_mapping.update(kwargs) + if dimensions is None: if defaults is not None: universe = defaults.universe elif universe is None: raise TypeError("universe must be provided if graph is not.") - graph = DimensionGraph(universe, names=d.keys()) - if not graph.dimensions: - return DataCoordinate.makeEmpty(graph.universe) + dimensions = DimensionGroup(universe, new_mapping.keys()) + if not dimensions: + return DataCoordinate.makeEmpty(universe) + # Some backends cannot handle numpy.int64 type which is a subclass of + # numbers.Integral; convert that to int. + for k, v in new_mapping.items(): + if isinstance(v, numbers.Integral): + new_mapping[k] = int(v) # type: ignore if defaults is not None: - if defaults.hasFull(): - for k, v in defaults.full.items(): - d.setdefault(k.name, v) - else: - for k, v in defaults.items(): - d.setdefault(k.name, v) - if d.keys() >= graph.dimensions.names: - values = tuple(d[name] for name in graph._group._data_coordinate_indices) + for k, v in defaults.mapping.items(): + new_mapping.setdefault(k, v) + if new_mapping.keys() >= dimensions.names: + return DataCoordinate.from_full_values( + dimensions, tuple(new_mapping[name] for name in dimensions.data_coordinate_keys) + ) else: try: - values = tuple(d[name] for name in graph.required.names) + values = tuple(new_mapping[name] for name in dimensions.required) except KeyError as err: raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err - # Some backends cannot handle numpy.int64 type which is a subclass of - # numbers.Integral; convert that to int. - values = tuple( - int(val) if isinstance(val, numbers.Integral) else val for val in values # type: ignore - ) - return _BasicTupleDataCoordinate(graph, values) + return DataCoordinate.from_required_values(dimensions, values) + + @property + @abstractmethod + def mapping(self) -> Mapping[str, DataIdValue]: + """A mapping view of the data ID with keys for all dimensions it has + values for. + """ + raise NotImplementedError() + + @property + @abstractmethod + def required(self) -> Mapping[str, DataIdValue]: + """A mapping view of the data ID with keys for just its required + dimensions. + """ + raise NotImplementedError() + + @property + @abstractmethod + def required_values(self) -> tuple[DataIdValue, ...]: + """The required values (only) of this data ID as a tuple. + + Element order is consistent with `required`. + + In contexts where all data IDs have the same dimensions, comparing and + hashing these tuples can be much faster than comparing the original + `DataCoordinate` instances. + """ + raise NotImplementedError() + + @property + def full_values(self) -> tuple[DataIdValue, ...]: + """The full values (only) of this data ID as a tuple. + + Element order is consistent with `DimensionGroup.data_coordinate_keys`, + i.e. all required dimensions followed by all implied dimensions. + """ + raise ValueError(f"DataCoordinate {self} has only required values.") @staticmethod def makeEmpty(universe: DimensionUniverse) -> DataCoordinate: @@ -285,12 +336,22 @@ def makeEmpty(universe: DimensionUniverse) -> DataCoordinate: `hasRecords` are guaranteed to return `True`, because both `full` and `records` are just empty mappings. """ - return _ExpandedTupleDataCoordinate(universe.empty, (), {}) + return _ExpandedTupleDataCoordinate(universe.empty.as_group(), (), {}) + # TODO: remove on DM-41326. @staticmethod + @deprecated( + "fromRequiredValues is deprecated in favor of from_required_values, " + "which takes a DimensionGroup instead of a DimensionGraph. It will be " + "removed after v27.", + version="v27", + category=FutureWarning, + ) def fromRequiredValues(graph: DimensionGraph, values: tuple[DataIdValue, ...]) -> DataCoordinate: """Construct a `DataCoordinate` from required dimension values. + This method is deprecated in favor of `from_required_values`. + This is a low-level interface with at most assertion-level checking of inputs. Most callers should use `standardize` instead. @@ -306,19 +367,58 @@ def fromRequiredValues(graph: DimensionGraph, values: tuple[DataIdValue, ...]) - ------- dataId : `DataCoordinate` A data ID object that identifies the given dimensions. - ``dataId.hasFull()`` will return `True` if and only if - ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never - return `True`. + ``dataId.hasFull()`` will return `True` only if ``graph.implied`` + is empty. ``dataId.hasRecords()`` will return `True` + if and only if ``graph`` is empty. """ - assert len(graph.required) == len( - values - ), f"Inconsistency between dimensions {graph.required} and required values {values}." - return _BasicTupleDataCoordinate(graph, values) + return DataCoordinate.from_required_values(graph._group, values) + + @staticmethod + def from_required_values(dimensions: DimensionGroup, values: tuple[DataIdValue, ...]) -> DataCoordinate: + """Construct a `DataCoordinate` from required dimension values. + + This is a low-level interface with at most assertion-level checking of + inputs. Most callers should use `standardize` instead. + + Parameters + ---------- + dimensions : `DimensionGroup` + Dimensions this data ID will identify. + values : `tuple` [ `int` or `str` ] + Tuple of primary key values corresponding to ``graph.required``, in + that order. + Returns + ------- + dataId : `DataCoordinate` + A data ID object that identifies the given dimensions. + ``dataId.hasFull()`` will return `True` only if + ``dimensions.implied`` is empty. ``dataId.hasRecords()`` will + return `True` if and only if ``graph`` is empty. + """ + assert len(dimensions.required) == len( + values + ), f"Inconsistency between dimensions {dimensions.required} and required values {values}." + if not dimensions: + return DataCoordinate.makeEmpty(dimensions.universe) + if not dimensions.implied: + return _FullTupleDataCoordinate(dimensions, values) + return _RequiredTupleDataCoordinate(dimensions, values) + + # TODO: remove on DM-41326. @staticmethod + @deprecated( + "fromFullValues is deprecated in favor of from_full_values, " + "which takes a DimensionGroup instead of a DimensionGraph. It will be " + "removed after v27.", + version="v27", + category=FutureWarning, + ) def fromFullValues(graph: DimensionGraph, values: tuple[DataIdValue, ...]) -> DataCoordinate: """Construct a `DataCoordinate` from all dimension values. + This method is deprecated in favor of `from_full_values`. + This is a low-level interface with at most assertion-level checking of inputs. Most callers should use `standardize` instead. @@ -336,56 +436,105 @@ def fromFullValues(graph: DimensionGraph, values: tuple[DataIdValue, ...]) -> Da ------- dataId : `DataCoordinate` A data ID object that identifies the given dimensions. - ``dataId.hasFull()`` will return `True` if and only if - ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never - return `True`. + ``dataId.hasFull()`` will always return `True`. + ``dataId.hasRecords()`` will only return `True` if ``graph`` is + empty. + """ + return DataCoordinate.from_full_values(graph._group, values) + + @staticmethod + def from_full_values(dimensions: DimensionGroup, values: tuple[DataIdValue, ...]) -> DataCoordinate: + """Construct a `DataCoordinate` from all dimension values. + + This is a low-level interface with at most assertion-level checking of + inputs. Most callers should use `standardize` instead. + + Parameters + ---------- + dimensions : `DimensionGroup` + Dimensions this data ID will identify. + values : `tuple` [ `int` or `str` ] + Tuple of primary key values corresponding to + ``itertools.chain(graph.required, graph.implied)``, in that order. + Note that this is _not_ the same order as ``graph.dimensions``, + though these contain the same elements. + + Returns + ------- + dataId : `DataCoordinate` + A data ID object that identifies the given dimensions. + ``dataId.hasFull()`` will always return `True`. + ``dataId.hasRecords()`` will only return `True` if ``dimensions`` + is empty. """ - assert len(graph.dimensions) == len( + assert len(dimensions) == len( values - ), f"Inconsistency between dimensions {graph.dimensions} and full values {values}." - return _BasicTupleDataCoordinate(graph, values) + ), f"Inconsistency between dimensions {dimensions.data_coordinate_keys} and full values {values}." + if not dimensions: + return DataCoordinate.makeEmpty(dimensions.universe) + return _FullTupleDataCoordinate(dimensions, values) + + def __bool__(self) -> bool: + return bool(self.dimensions) def __hash__(self) -> int: - return hash((self.graph,) + self.values_tuple()) + return hash((self.dimensions,) + self.required_values) def __eq__(self, other: Any) -> bool: if not isinstance(other, DataCoordinate): other = DataCoordinate.standardize(other, universe=self.universe) - return self.graph == other.graph and self.values_tuple() == other.values_tuple() + return self.dimensions == other.dimensions and self.required_values == other.required_values def __repr__(self) -> str: # We can't make repr yield something that could be exec'd here without - # printing out the whole DimensionUniverse the graph is derived from. - # So we print something that mostly looks like a dict, but doesn't - # quote its keys: that's both more compact and something that can't - # be mistaken for an actual dict or something that could be exec'd. - terms = [f"{d}: {self[d]!r}" for d in self.graph.required.names] - if self.hasFull() and self.graph.required != self.graph.dimensions: - terms.append("...") - return "{{{}}}".format(", ".join(terms)) + # printing out the whole DimensionUniverse. + return str(self.mapping) def __lt__(self, other: Any) -> bool: - # Allow DataCoordinate to be sorted - if not isinstance(other, type(self)): + if not isinstance(other, DataCoordinate): return NotImplemented - # Form tuple of tuples for each DataCoordinate: - # Unlike repr() we only use required keys here to ensure that - # __eq__ can not be true simultaneously with __lt__ being true. - self_kv = tuple(self.items()) - other_kv = tuple(other.items()) - - return self_kv < other_kv + # Unlike repr() we only use required keys here to ensure that __eq__ + # can not be true simultaneously with __lt__ being true. + return self.required_values < other.required_values + # TODO: remove on DM-41326. + @deprecated( + "Using DataCoordinate as a Mapping is deprecated in favor of the " + ".mapping and .required attributes, and will be dropped after v27.", + version="v27", + category=FutureWarning, + ) def __iter__(self) -> Iterator[Dimension]: return iter(self.keys()) + # TODO: remove on DM-41326. + @deprecated( + "Using DataCoordinate as a Mapping is deprecated in favor of the " + ".mapping and .required attributes, and will be dropped after v27.", + version="v27", + category=FutureWarning, + ) def __len__(self) -> int: return len(self.keys()) + # TODO: remove on DM-41326. + @deprecated( + "Using DataCoordinate as a Mapping is deprecated in favor of the " + ".mapping and .required attributes, and will be dropped after v27.", + version="v27", + category=FutureWarning, + ) def keys(self) -> NamedValueAbstractSet[Dimension]: # type: ignore return self.graph.required + # TODO: remove on DM-41326. @property + @deprecated( + "DataCoordinate.names is deprecated in favor of the .dimensions " + "attribute, and will be dropped after v27.", + version="v27", + category=FutureWarning, + ) def names(self) -> Set[str]: """Names of the required dimensions identified by this data ID. @@ -395,13 +544,16 @@ def names(self) -> Set[str]: return self.keys().names @abstractmethod - def subset(self, graph: DimensionGraph) -> DataCoordinate: + def subset(self, dimensions: DimensionGraph | DimensionGroup | Iterable[str]) -> DataCoordinate: """Return a `DataCoordinate` whose graph is a subset of ``self.graph``. Parameters ---------- - graph : `DimensionGraph` + dimensions : `DimensionGraph`, `DimensionGroup`, or \ + `~collections.abc.Iterable` [ `str` ] The dimensions identified by the returned `DataCoordinate`. + Passing a `DimensionGraph` is deprecated and support will be + dropped after v27. Returns ------- @@ -413,15 +565,13 @@ def subset(self, graph: DimensionGraph) -> DataCoordinate: ------ KeyError Raised if the primary key value for one or more required dimensions - is unknown. This may happen if ``graph.issubset(self.graph)`` is - `False`, or even if ``graph.issubset(self.graph)`` is `True`, if - ``self.hasFull()`` is `False` and - ``graph.required.issubset(self.graph.required)`` is `False`. As - an example of the latter case, consider trying to go from a data ID - with dimensions {instrument, physical_filter, band} to - just {instrument, band}; band is implied by - physical_filter and hence would have no value in the original data - ID if ``self.hasFull()`` is `False`. + is unknown. This may happen even if the required subset of the new + dimensions are not a subset of the dimensions actually known by + this data ID.. As an example, consider trying to go from a data ID + with dimensions {instrument, physical_filter, band} to just + {instrument, band}; band is implied by physical_filter and hence + would have no value in the original data ID if ``self.hasFull()`` + is `False`. Notes ----- @@ -429,6 +579,7 @@ def subset(self, graph: DimensionGraph) -> DataCoordinate: return `True` (respectively) on the returned `DataCoordinate` as well. The converse does not hold. """ + # TODO: update docs r.e. deprecation on DM-41326. raise NotImplementedError() @abstractmethod @@ -447,8 +598,8 @@ def union(self, other: DataCoordinate) -> DataCoordinate: ------- unioned : `DataCoordinate` A `DataCoordinate` instance that satisfies - ``unioned.graph == self.graph.union(other.graph)``. Will preserve - ``hasFull`` and ``hasRecords`` whenever possible. + ``unioned.dimensions == self.dimensions.union(other.dimensions)``. + Will preserve ``hasFull`` and ``hasRecords`` whenever possible. Notes ----- @@ -478,21 +629,39 @@ def expanded( keys and `DimensionRecord` values. Keys must cover all elements in ``self.graph.elements``. Values may be `None`, but only to reflect actual NULL values in the database, not just records that have not - been fetched. + been fetched. Passing a `NamedKeyMapping` is deprecated and will + not be supported after v27. """ + # TODO: update docs r.e. deprecation on DM-41326. raise NotImplementedError() @property def universe(self) -> DimensionUniverse: """Universe that defines all known compatible dimensions. - The univers will be compatible with this coordinate + The universe will be compatible with this coordinate (`DimensionUniverse`). """ - return self.graph.universe + return self.dimensions.universe @property @abstractmethod + def dimensions(self) -> DimensionGroup: + """Dimensions identified by this data ID (`DimensionGroup`). + + Note that values are only required to be present for dimensions in + ``self.dimensions.required``; all others may be retrieved (from a + `Registry`) given these. + """ + raise NotImplementedError() + + # TODO: remove on DM-41326. + @property + @deprecated( + "DataCoordinate.graph is deprecated in favor of .dimensions, and will be dropped after v27.", + version="v27", + category=FutureWarning, + ) def graph(self) -> DimensionGraph: """Dimensions identified by this data ID (`DimensionGraph`). @@ -500,7 +669,7 @@ def graph(self) -> DimensionGraph: ``self.graph.required``; all others may be retrieved (from a `Registry`) given these. """ - raise NotImplementedError() + return self.dimensions._as_graph() @abstractmethod def hasFull(self) -> bool: @@ -518,13 +687,19 @@ def hasFull(self) -> bool: """ raise NotImplementedError() + # TODO: remove on DM-41326. @property + @deprecated( + "DataCoordinate.full is deprecated in favor of .mapping, and will be dropped after v27.", + version="v27", + category=FutureWarning, + ) @abstractmethod def full(self) -> NamedKeyMapping[Dimension, DataIdValue]: - """Return mapping for all dimensions in ``self.graph``. + """Return mapping for all dimensions in ``self.dimensions``. The mapping includes key-value pairs for all dimensions in - ``self.graph``, including implied (`NamedKeyMapping`). + ``self.dimensions``, including implied. Accessing this attribute if `hasFull` returns `False` is a logic error that may raise an exception of unspecified type either immediately or @@ -533,7 +708,13 @@ def full(self) -> NamedKeyMapping[Dimension, DataIdValue]: """ raise NotImplementedError() - @abstractmethod + # TODO: remove on DM-41326. + @deprecated( + "DataCoordinate.values_tuple() is deprecated in favor of .required_values, and will be dropped " + "after v27.", + version="v27", + category=FutureWarning, + ) def values_tuple(self) -> tuple[DataIdValue, ...]: """Return the required values (only) of this data ID as a tuple. @@ -541,7 +722,7 @@ def values_tuple(self) -> tuple[DataIdValue, ...]: hashing these tuples can be *much* faster than comparing the original `DataCoordinate` instances. """ - raise NotImplementedError() + return self.required_values @abstractmethod def hasRecords(self) -> bool: @@ -565,11 +746,14 @@ def hasRecords(self) -> bool: @property def records(self) -> NamedKeyMapping[DimensionElement, DimensionRecord | None]: - """Return the records. + """A mapping that contains `DimensionRecord` objects for all + elements identified by this data ID. - Returns a mapping that contains `DimensionRecord` objects for all - elements identified by this data ID (`NamedKeyMapping`). + This mapping will become a regular `~collections.abc.Mapping` with + `str` keys after v27. + Notes + ----- The values of this mapping may be `None` if and only if there is no record for that element with these dimensions in the database (which means some foreign key field must have a NULL value). @@ -590,7 +774,7 @@ def _record(self, name: str) -> DimensionRecord | None: ---------- name : `str` The name of a `DimensionElement`, guaranteed to be in - ``self.graph.elements.names``. + ``self.dimensions.elements``. Returns ------- @@ -606,7 +790,7 @@ def region(self) -> Region | None: (`lsst.sphgeom.Region` or `None`). - This is `None` if and only if ``self.graph.spatial`` is empty. + This is `None` if and only if ``self.dimensions.spatial`` is empty. Accessing this attribute if `hasRecords` returns `False` is a logic error that may or may not raise an exception, depending on the @@ -614,8 +798,8 @@ def region(self) -> Region | None: """ assert self.hasRecords(), "region may only be accessed if hasRecords() returns True." regions = [] - for family in self.graph.spatial: - element = family.choose(self.graph.elements) + for family in self.dimensions.spatial: + element = family.choose(self.dimensions.elements, self.universe) record = self._record(element.name) if record is None or record.region is None: return None @@ -629,7 +813,7 @@ def timespan(self) -> Timespan | None: (`Timespan` or `None`). - This is `None` if and only if ``self.graph.timespan`` is empty. + This is `None` if and only if ``self.dimensions.temporal`` is empty. Accessing this attribute if `hasRecords` returns `False` is a logic error that may or may not raise an exception, depending on the @@ -637,8 +821,8 @@ def timespan(self) -> Timespan | None: """ assert self.hasRecords(), "timespan may only be accessed if hasRecords() returns True." timespans = [] - for family in self.graph.temporal: - element = family.choose(self.graph.elements) + for family in self.dimensions.temporal: + element = family.choose(self.dimensions.elements, self.universe) record = self._record(element.name) # DimensionRecord subclasses for temporal elements always have # .timespan, but they're dynamic so this can't be type-checked. @@ -713,17 +897,15 @@ def to_simple(self, minimal: bool = False) -> SerializedDataCoordinate: The object converted to simple form. """ # Convert to a dict form - if self.hasFull(): - dataId = self.full.byName() - else: - dataId = self.byName() records: dict[str, SerializedDimensionRecord] | None if not minimal and self.hasRecords(): - records = {k: v.to_simple() for k, v in self.records.byName().items() if v is not None} + records = { + k: v.to_simple() for k in self.dimensions.elements if (v := self.records[k]) is not None + } else: records = None - return SerializedDataCoordinate(dataId=dataId, records=records) + return SerializedDataCoordinate(dataId=dict(self.mapping), records=records) @classmethod def from_simple( @@ -742,7 +924,7 @@ def from_simple( simple : `dict` of [`str`, `Any`] The `dict` returned by `to_simple()`. universe : `DimensionUniverse` - The special graph of all known dimensions. + Object that manages all known dimensions. registry : `lsst.daf.butler.Registry`, optional Registry from which a universe can be extracted. Can be `None` if universe is provided explicitly. @@ -783,6 +965,8 @@ def from_simple( """ +# Deprecated by having its only public access (DataCoordinate.full) deprecated. +# TODO: remove on DM-41326. class _DataCoordinateFullView(NamedKeyMapping[Dimension, DataIdValue]): """View class for `DataCoordinate.full`. @@ -801,8 +985,7 @@ def __init__(self, target: _BasicTupleDataCoordinate): __slots__ = ("_target",) def __repr__(self) -> str: - terms = [f"{d}: {self[d]!r}" for d in self._target.graph.dimensions.names] - return "{{{}}}".format(", ".join(terms)) + return repr(self._target) def __getitem__(self, key: DataIdKey) -> DataIdValue: return self._target[key] @@ -822,6 +1005,7 @@ def names(self) -> Set[str]: return self.keys().names +# TODO: Make a Mapping[str, DimensionRecord | None] on DM-41326. class _DataCoordinateRecordsView(NamedKeyMapping[DimensionElement, DimensionRecord | None]): """View class for `DataCoordinate.records`. @@ -848,28 +1032,47 @@ def __str__(self) -> str: def __getitem__(self, key: DimensionElement | str) -> DimensionRecord | None: if isinstance(key, DimensionElement): + warnings.warn( + "Using Dimension keys in DataCoordinate is deprecated and will not be supported after v27.", + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler"), + ) key = key.name return self._target._record(key) + # TODO: fix on DM-41326. + @deprecated( + "Iteration over DataCoordinate.records is deprecated as the key type will change to 'str' after " + "v27. Use DataCoordinate.dimensions.elements to get the names of all dimension elements instead.", + version="v27", + category=FutureWarning, + ) def __iter__(self) -> Iterator[DimensionElement]: return iter(self.keys()) def __len__(self) -> int: return len(self.keys()) + # TODO: remove on DM-41326. + # Deprecation warning will come from using .graph. def keys(self) -> NamedValueAbstractSet[DimensionElement]: # type: ignore return self._target.graph.elements @property + @deprecated( + "DataCoordinate.records.names is deprecated in favor of DataCoordinate.dimensions.elements and " + "will be removed after v27.", + version="v27", + category=FutureWarning, + ) def names(self) -> Set[str]: # Docstring inherited from `NamedKeyMapping`. return self.keys().names class _BasicTupleDataCoordinate(DataCoordinate): - """Standard implementation of `DataCoordinate`. - - Backed by a tuple of values. + """Intermediate base class for the standard implementation of + `DataCoordinate`. This class should only be accessed outside this module via the `DataCoordinate` interface, and should only be constructed via the static @@ -877,30 +1080,42 @@ class _BasicTupleDataCoordinate(DataCoordinate): Parameters ---------- - graph : `DimensionGraph` + dimensions : `DimensionGroup` The dimensions to be identified. values : `tuple` [ `int` or `str` ] - Data ID values, ordered like the concatenation of ``graph.required`` - and ``graph.implied``. May include values for just required dimensions - (which is why these always come first) or all dimensions. + Data ID values, ordered to match + ``dimensions.data_coordinate_keys``. May include values for just + required dimensions (which always come first) or all dimensions + (concrete subclasses implementations will care which). """ - def __init__(self, graph: DimensionGraph, values: tuple[DataIdValue, ...]): - self._graph = graph + def __init__(self, dimensions: DimensionGroup, values: tuple[DataIdValue, ...]): + self._dimensions = dimensions self._values = values - __slots__ = ("_graph", "_values") + __slots__ = ("_dimensions", "_values") @property - def graph(self) -> DimensionGraph: + def dimensions(self) -> DimensionGroup: # Docstring inherited from DataCoordinate. - return self._graph + return self._dimensions + + @property + def required(self) -> Mapping[str, DataIdValue]: + # Docstring inherited from DataCoordinate. + return _DataCoordinateRequiredMappingView(self) def __getitem__(self, key: DataIdKey) -> DataIdValue: # Docstring inherited from DataCoordinate. + # TODO: remove on DM-41326. if isinstance(key, Dimension): + warnings.warn( + "Using Dimension keys in DataCoordinate is deprecated and will not be supported after v27.", + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler"), + ) key = key.name - index = self._graph._group._data_coordinate_indices[key] + index = self._dimensions._data_coordinate_indices[key] try: return self._values[index] except IndexError: @@ -908,102 +1123,252 @@ def __getitem__(self, key: DataIdKey) -> DataIdValue: # values for the required ones. raise KeyError(key) from None + # TODO: remove on DM-41326. + @deprecated( + "Using DataCoordinate as a NamedKeyMapping is deprecated in favor of the " + ".mapping and .required attributes, and will be dropped after v27. " + "Use `dict(data_id.required)` as an exact replacement for `data_id.byName()`.", + version="v27", + category=FutureWarning, + ) def byName(self) -> dict[str, DataIdValue]: # Docstring inheritance. - # Reimplementation is for optimization; `values_tuple()` is much faster - # to iterate over than values() because it doesn't go through + # Reimplementation is for optimization; `required_values` is much + # faster to iterate over than values() because it doesn't go through # `__getitem__`. - return dict(zip(self.names, self.values_tuple(), strict=True)) + return dict(zip(self.names, self.required_values, strict=True)) + + def hasRecords(self) -> bool: + # Docstring inherited from DataCoordinate. + return False - def subset(self, graph: DimensionGraph) -> DataCoordinate: + def _record(self, name: str) -> DimensionRecord | None: # Docstring inherited from DataCoordinate. - if self._graph == graph: + raise AssertionError() + + def __getattr__(self, name: str) -> Any: + if name in self.dimensions.elements: + raise AttributeError( + f"Dimension record attribute {name!r} is only available on expanded DataCoordinates." + ) + raise AttributeError(name) + + +class _DataCoordinateRequiredMappingView(Mapping[str, DataIdValue]): + """A DataCoordinate Mapping view class whose keys are just the required + dimensions. + """ + + def __init__(self, target: DataCoordinate): + self._target = target + + __slots__ = ("_target",) + + def __getitem__(self, key: str) -> DataIdValue: + if key not in self._target.dimensions.required: + raise KeyError(key) + return self._target[key] + + def __len__(self) -> int: + return len(self._target.dimensions.required) + + def __iter__(self) -> Iterator[str]: + return iter(self._target.dimensions.required) + + def __repr__(self) -> str: + return f"{{{', '.join(f'{k}: {v!r}' for k, v in self.items())}}}" + + +class _DataCoordinateFullMappingView(Mapping[str, DataIdValue]): + """A DataCoordinate Mapping view class whose keys are all dimensions.""" + + def __init__(self, target: DataCoordinate): + self._target = target + + __slots__ = ("_target",) + + def __getitem__(self, key: str) -> DataIdValue: + return self._target[key] + + def __len__(self) -> int: + return len(self._target.dimensions) + + def __iter__(self) -> Iterator[str]: + return iter(self._target.dimensions.data_coordinate_keys) + + def __repr__(self) -> str: + return f"{{{', '.join(f'{k}: {v!r}' for k, v in self.items())}}}" + + +class _RequiredTupleDataCoordinate(_BasicTupleDataCoordinate): + """A `DataCoordinate` implementation that has values for required + dimensions only, when implied dimensions already exist. + + Note that `_FullTupleDataCoordinate` should be used if there are no + implied dimensions. + + This class should only be accessed outside this module via the + `DataCoordinate` interface, and should only be constructed via calls to + `DataCoordinate.from_full_values`. + """ + + __slots__ = () + + @property + def mapping(self) -> Mapping[str, DataIdValue]: + # Docstring inherited from DataCoordinate. + return _DataCoordinateRequiredMappingView(self) + + @property + def required_values(self) -> tuple[DataIdValue, ...]: + # Docstring inherited from DataCoordinate. + return self._values + + def subset(self, dimensions: DimensionGraph | DimensionGroup | Iterable[str]) -> DataCoordinate: + # Docstring inherited from DataCoordinate. + dimensions = self.universe.conform(dimensions) + if self._dimensions == dimensions: return self - elif self.hasFull() or self._graph.required >= graph.dimensions: - return _BasicTupleDataCoordinate( - graph, - tuple(self[k] for k in graph._group._data_coordinate_indices), + elif self._dimensions.required >= dimensions.names: + return DataCoordinate.from_full_values( + dimensions, + tuple(self[k] for k in dimensions.data_coordinate_keys), ) else: - return _BasicTupleDataCoordinate(graph, tuple(self[k] for k in graph.required.names)) + return DataCoordinate.from_required_values( + dimensions, tuple(self[k] for k in dimensions.required) + ) def union(self, other: DataCoordinate) -> DataCoordinate: # Docstring inherited from DataCoordinate. - graph = self.graph.union(other.graph) - # See if one or both input data IDs is already what we want to return; - # if so, return the most complete one we have. - if other.graph == graph: - if self.graph == graph: - # Input data IDs have the same graph (which is also the result - # graph), but may not have the same content. - # other might have records; self does not, so try other first. - # If it at least has full values, it's no worse than self. - if other.hasFull(): - return other - else: - return self - elif other.hasFull(): - return other - # There's some chance that neither self nor other has full values, - # but together provide enough to the union to. Let the general - # case below handle that. - elif self.graph == graph and self.hasFull(): - # No chance at returning records. If self has full values, it's - # the best we can do. - return self + dimensions = self.dimensions.union(other.dimensions) + # See if the other one is already what we want to return. We don't + # shortcut-return 'self' because `other` might have full values or + # even records, and we want to return the more complete data ID. + if other.dimensions == dimensions: + return other # General case with actual merging of dictionaries. - values = self.full.byName() if self.hasFull() else self.byName() - values.update(other.full.byName() if other.hasFull() else other.byName()) - return DataCoordinate.standardize(values, graph=graph) + values = dict(self.mapping) + values.update(other.mapping) + return DataCoordinate.standardize(values, dimensions=dimensions) + # TODO: remove on DM-41326. @property def full(self) -> NamedKeyMapping[Dimension, DataIdValue]: # Docstring inherited. - assert self.hasFull(), "full may only be accessed if hasFull() returns True." - return _DataCoordinateFullView(self) + raise AssertionError("full may only be accessed if hasFull() returns True.") def expanded( self, records: NameLookupMapping[DimensionElement, DimensionRecord | None] ) -> DataCoordinate: # Docstring inherited from DataCoordinate - values = self._values - if not self.hasFull(): - # Extract a complete values tuple from the attributes of the given - # records. It's possible for these to be inconsistent with - # self._values (which is a serious problem, of course), but we've - # documented this as a no-checking API. - values += tuple(getattr(records[d.name], d.primaryKey.name) for d in self._graph.implied) - return _ExpandedTupleDataCoordinate(self._graph, values, records) + # Extract a complete values tuple from the attributes of the given + # records. It's possible for these to be inconsistent with + # self._values (which is a serious problem, of course), but we've + # documented this as a no-checking API. + values = self._values + tuple( + getattr(records[d], cast(Dimension, self.universe[d]).primaryKey.name) + for d in self._dimensions.implied + ) + if isinstance(records, NamedKeyMapping): + warnings.warn( + "NamedKeyMappings will not be accepted after v27; pass a Mapping with str keys instead.", + stacklevel=find_outside_stacklevel("lsst.daf.butler"), + category=FutureWarning, + ) + return _ExpandedTupleDataCoordinate(self._dimensions, values, records) def hasFull(self) -> bool: # Docstring inherited from DataCoordinate. - return len(self._values) == len(self._graph._group._data_coordinate_indices) + return False - def hasRecords(self) -> bool: + def __reduce__(self) -> tuple[Any, ...]: + return (_RequiredTupleDataCoordinate, (self._dimensions, self._values)) + + +class _FullTupleDataCoordinate(_BasicTupleDataCoordinate): + """A `DataCoordinate` implementation that has values for all dimensions. + + This class should only be accessed outside this module via the + `DataCoordinate` interface, and should only be constructed via calls to + `DataCoordinate.from_full_values`. + """ + + __slots__ = () + + @property + def mapping(self) -> Mapping[str, DataIdValue]: # Docstring inherited from DataCoordinate. - return False + return _DataCoordinateFullMappingView(self) - def values_tuple(self) -> tuple[DataIdValue, ...]: + @property + def required_values(self) -> tuple[DataIdValue, ...]: # Docstring inherited from DataCoordinate. - return self._values[: len(self._graph.required)] + return self._values[: len(self._dimensions.required)] - def _record(self, name: str) -> DimensionRecord | None: + @property + def full_values(self) -> tuple[DataIdValue, ...]: # Docstring inherited from DataCoordinate. - raise AssertionError() + return self._values - def __reduce__(self) -> tuple[Any, ...]: - return (_BasicTupleDataCoordinate, (self._graph, self._values)) + def subset(self, dimensions: DimensionGraph | DimensionGroup | Iterable[str]) -> DataCoordinate: + # Docstring inherited from DataCoordinate. + dimensions = self.universe.conform(dimensions) + if self._dimensions == dimensions: + return self + return DataCoordinate.from_full_values( + dimensions, + tuple(self[k] for k in dimensions.data_coordinate_keys), + ) - def __getattr__(self, name: str) -> Any: - if name in self.graph.elements.names: - raise AttributeError( - f"Dimension record attribute {name!r} is only available on expanded DataCoordinates." + def union(self, other: DataCoordinate) -> DataCoordinate: + # Docstring inherited from DataCoordinate. + dimensions = self.dimensions.union(other.dimensions) + # See if one or both input data IDs is already what we want to return; + # if so, return the most complete one we have. + if other.dimensions == dimensions and other.hasRecords(): + return other + elif self.dimensions == dimensions and not other.hasRecords(): + return self + # General case with actual merging of dictionaries. + values = dict(self.mapping) + values.update(other.mapping) + return DataCoordinate.standardize(values, dimensions=dimensions) + + # TODO: remove on DM-41326. + @property + @deprecated( + "DataCoordinate.full is deprecated in favor of .mapping, and will be dropped after v27.", + version="v27", + category=FutureWarning, + ) + def full(self) -> NamedKeyMapping[Dimension, DataIdValue]: + # Docstring inherited. + return _DataCoordinateFullView(self) + + def expanded( + self, records: NameLookupMapping[DimensionElement, DimensionRecord | None] + ) -> DataCoordinate: + # Docstring inherited from DataCoordinate + if isinstance(records, NamedKeyMapping): + warnings.warn( + "NamedKeyMappings will not be accepted after v27; pass a Mapping with str keys instead.", + stacklevel=find_outside_stacklevel("lsst.daf.butler"), + category=FutureWarning, ) - raise AttributeError(name) + return _ExpandedTupleDataCoordinate(self._dimensions, self._values, records) + + def hasFull(self) -> bool: + # Docstring inherited from DataCoordinate. + return True + def __reduce__(self) -> tuple[Any, ...]: + return (_FullTupleDataCoordinate, (self._dimensions, self._values)) -class _ExpandedTupleDataCoordinate(_BasicTupleDataCoordinate): - """A `DataCoordinate` implementation that can hold `DimensionRecord`. + +class _ExpandedTupleDataCoordinate(_FullTupleDataCoordinate): + """A `DataCoordinate` implementation that directly holds `DimensionRecord` + objects relevant to it. This class should only be accessed outside this module via the `DataCoordinate` interface, and should only be constructed via calls to @@ -1011,85 +1376,59 @@ class _ExpandedTupleDataCoordinate(_BasicTupleDataCoordinate): Parameters ---------- - graph : `DimensionGraph` + dimensions : `DimensionGroup` The dimensions to be identified. values : `tuple` [ `int` or `str` ] - Data ID values, ordered like the concatenation of ``graph.required`` - and ``graph.implied``. + Data ID values, ordered to match + ``dimensions._data_coordinate_indices``. Just include values for all + dimensions. records : `~collections.abc.Mapping` [ `str`, `DimensionRecord` or `None` ] A `NamedKeyMapping` with `DimensionElement` keys or a regular `~collections.abc.Mapping` with `str` (`DimensionElement` name) keys and `DimensionRecord` values. Keys must cover all elements in - ``self.graph.elements``. Values may be `None`, but only to reflect - actual NULL values in the database, not just records that have not been - fetched. + ``self.dimensions.elements``. Values may be `None`, but only to + reflect actual NULL values in the database, not just records that have + not been fetched. """ def __init__( self, - graph: DimensionGraph, + dimensions: DimensionGroup, values: tuple[DataIdValue, ...], records: NameLookupMapping[DimensionElement, DimensionRecord | None], ): - super().__init__(graph, values) + super().__init__(dimensions, values) assert super().hasFull(), "This implementation requires full dimension records." self._records = records __slots__ = ("_records",) - def subset(self, graph: DimensionGraph) -> DataCoordinate: + def subset(self, dimensions: DimensionGraph | DimensionGroup | Iterable[str]) -> DataCoordinate: # Docstring inherited from DataCoordinate. - if self._graph == graph: - return self - return _ExpandedTupleDataCoordinate( - graph, tuple(self[k] for k in graph._group._data_coordinate_indices), records=self._records - ) + return super().subset(dimensions).expanded(self._records) def expanded( self, records: NameLookupMapping[DimensionElement, DimensionRecord | None] ) -> DataCoordinate: # Docstring inherited from DataCoordinate. + if isinstance(records, NamedKeyMapping): + warnings.warn( + "NamedKeyMappings will not be accepted after v27; pass a Mapping with str keys instead.", + stacklevel=find_outside_stacklevel("lsst.daf.butler"), + category=FutureWarning, + ) return self def union(self, other: DataCoordinate) -> DataCoordinate: # Docstring inherited from DataCoordinate. - graph = self.graph.union(other.graph) - # See if one or both input data IDs is already what we want to return; - # if so, return the most complete one we have. - if self.graph == graph: - # self has records, so even if other is also a valid result, it's - # no better. - return self - if other.graph == graph and other.hasFull(): - # If other has full values, and self does not identify some of - # those, it's the base we can do. It may have records, too. - return other - # If other does not have full values, there's a chance self may - # provide the values needed to complete it. For example, self - # could be {band} while other could be - # {instrument, physical_filter, band}, with band unknown. - # General case with actual merging of dictionaries. - values = self.full.byName() - values.update(other.full.byName() if other.hasFull() else other.byName()) - basic = DataCoordinate.standardize(values, graph=graph) - # See if we can add records. - if self.hasRecords() and other.hasRecords(): - # Sometimes the elements of a union of graphs can contain elements - # that weren't in either input graph (because graph unions are only - # on dimensions). e.g. {visit} | {detector} brings along - # visit_detector_region. - elements = set(graph.elements.names) - elements -= self.graph.elements.names - elements -= other.graph.elements.names - if not elements: - records = NamedKeyDict[DimensionElement, DimensionRecord | None](self.records) - records.update(other.records) - return basic.expanded(records.freeze()) - return basic - - def hasFull(self) -> bool: - # Docstring inherited from DataCoordinate. - return True + result = super().union(other) + if not result.hasRecords() and other.hasRecords(): + records = {e: self._record(e) for e in self.dimensions.elements} | { + e: other._record(e) for e in other.dimensions.elements + } + if records.keys() >= result.dimensions.elements: + return result.expanded(records) + return result def hasRecords(self) -> bool: # Docstring inherited from DataCoordinate. @@ -1100,7 +1439,7 @@ def _record(self, name: str) -> DimensionRecord | None: return self._records[name] def __reduce__(self) -> tuple[Any, ...]: - return (_ExpandedTupleDataCoordinate, (self._graph, self._values, self._records)) + return (_ExpandedTupleDataCoordinate, (self._dimensions, self._values, self._records)) def __getattr__(self, name: str) -> Any: try: @@ -1110,5 +1449,5 @@ def __getattr__(self, name: str) -> Any: def __dir__(self) -> list[str]: result = list(super().__dir__()) - result.extend(self.graph.elements.names) + result.extend(self.dimensions.elements) return result From 71e786a1fa6f49c1f32275e9727be860fcd5e640 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Thu, 2 Nov 2023 11:47:22 -0400 Subject: [PATCH 07/16] Add DimensionElement.minimal_group to replace .graph. --- .../lsst/daf/butler/dimensions/_elements.py | 31 ++++++++++++++----- .../lsst/daf/butler/dimensions/_universe.py | 20 +++++++----- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/python/lsst/daf/butler/dimensions/_elements.py b/python/lsst/daf/butler/dimensions/_elements.py index 163cb98bd5..feeb6527ff 100644 --- a/python/lsst/daf/butler/dimensions/_elements.py +++ b/python/lsst/daf/butler/dimensions/_elements.py @@ -34,7 +34,7 @@ ) from abc import abstractmethod -from typing import TYPE_CHECKING, Any, ClassVar +from typing import TYPE_CHECKING, Any, ClassVar, cast from lsst.utils.classes import cached_getter @@ -47,6 +47,7 @@ from ..registry import Registry from ._governor import GovernorDimension from ._graph import DimensionGraph + from ._group import DimensionGroup from ._records import DimensionRecord from ._universe import DimensionUniverse @@ -212,12 +213,12 @@ def governor(self) -> GovernorDimension | None: element, or `None` if there is no such dimension (`GovernorDimension` or `None`). """ - if len(self.graph.governors) == 1: - (result,) = self.graph.governors - return result - elif len(self.graph.governors) > 1: + if len(self.minimal_group.governors) == 1: + (result,) = self.minimal_group.governors + return cast("GovernorDimension", self.universe[result]) + elif len(self.minimal_group.governors) > 1: raise RuntimeError( - f"Dimension element {self.name} has multiple governors: {self.graph.governors}." + f"Dimension element {self.name} has multiple governors: {self.minimal_group.governors}." ) else: return None @@ -280,8 +281,9 @@ def dimensions(self) -> NamedValueAbstractSet[Dimension]: """ return NamedValueSet(list(self.required) + list(self.implied)).freeze() + # Deprecated via a warning from its implementation. + # TODO: remove on DM-41326. @property - @cached_getter def graph(self) -> DimensionGraph: """Return minimal graph that includes this element (`DimensionGraph`). @@ -291,7 +293,20 @@ def graph(self) -> DimensionGraph: ``self.graph.implied`` includes all dimensions also identified (possibly recursively) by this set. """ - return self.universe.extract(self.dimensions.names) + return self.minimal_group._as_graph() + + @property + @cached_getter + def minimal_group(self) -> DimensionGroup: + """Return minimal dimension group that includes this element. + + ``self.minimal_group.required`` includes all dimensions whose primary + key values are sufficient (often necessary) to uniquely identify + ``self`` (including ``self`` if ``isinstance(self, Dimension)``. + ``self.minimal_group.implied`` includes all dimensions also identified + (possibly recursively) by this set. + """ + return self.universe.conform(self.dimensions.names) @property @cached_getter diff --git a/python/lsst/daf/butler/dimensions/_universe.py b/python/lsst/daf/butler/dimensions/_universe.py index 05bf4bc2f2..0d98a2936a 100644 --- a/python/lsst/daf/butler/dimensions/_universe.py +++ b/python/lsst/daf/butler/dimensions/_universe.py @@ -479,7 +479,7 @@ def extract(self, iterable: Iterable[Dimension | str]) -> DimensionGraph: def conform( self, - dimensions: Iterable[str | Dimension] | DimensionGroup | DimensionGraph, + dimensions: Iterable[str | Dimension] | str | DimensionElement | DimensionGroup | DimensionGraph, /, ) -> DimensionGroup: """Construct a dimension group from an iterable of dimension names. @@ -487,14 +487,16 @@ def conform( Parameters ---------- dimensions : `~collections.abc.Iterable` [ `str` or `Dimension` ], \ - `DimensionGroup`, or `DimensionGraph` + `str`, `DimensionElement`, `DimensionGroup`, or \ + `DimensionGraph` Dimensions that must be included in the returned group; their dependencies will be as well. Support for `Dimension`, - `DimensionGraph` objects is deprecated and will be removed after - v27. Passing `DimensionGraph` objects will not yield a deprecation - warning to allow non-deprecated methods and properties that return - `DimensionGraph` objects to be passed though, since these will be - changed to return `DimensionGroup` in the future. + `DimensionElement` and `DimensionGraph` objects is deprecated and + will be removed after v27. Passing `DimensionGraph` objects will + not yield a deprecation warning to allow non-deprecated methods and + properties that return `DimensionGraph` objects to be passed + though, since these will be changed to return `DimensionGroup` in + the future. Returns ------- @@ -506,6 +508,10 @@ def conform( return dimensions case DimensionGraph(): return dimensions.as_group() + case DimensionElement() as d: + return d.minimal_group + case str() as name: + return self[name].minimal_group case iterable: names: set[str] = {getattr(d, "name", cast(str, d)) for d in iterable} return DimensionGroup(self, names) From ed29a9b6c1ac6f0fa8123a9f3c59137d2d0346b7 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Thu, 2 Nov 2023 11:48:11 -0400 Subject: [PATCH 08/16] Use DimensionGroup in DataCoordinateIterables and query system. --- .../dimensions/_data_coordinate_iterable.py | 347 +++++++++++++----- .../daf/butler/registry/queries/_builder.py | 2 +- .../daf/butler/registry/queries/_query.py | 77 ++-- .../butler/registry/queries/_query_backend.py | 16 +- .../butler/registry/queries/_query_context.py | 11 +- .../daf/butler/registry/queries/_readers.py | 27 +- .../daf/butler/registry/queries/_results.py | 60 +-- .../registry/queries/_sql_query_backend.py | 41 ++- .../daf/butler/registry/queries/_structs.py | 39 +- .../queries/expressions/_predicate.py | 6 +- .../queries/expressions/categorize.py | 42 ++- .../registry/queries/expressions/check.py | 58 ++- .../lsst/daf/butler/registry/sql_registry.py | 13 +- tests/test_expressions.py | 30 +- 14 files changed, 473 insertions(+), 296 deletions(-) diff --git a/python/lsst/daf/butler/dimensions/_data_coordinate_iterable.py b/python/lsst/daf/butler/dimensions/_data_coordinate_iterable.py index 4a44d9a34f..72d8c19d62 100644 --- a/python/lsst/daf/butler/dimensions/_data_coordinate_iterable.py +++ b/python/lsst/daf/butler/dimensions/_data_coordinate_iterable.py @@ -33,12 +33,17 @@ "DataCoordinateSequence", ) +import warnings from abc import abstractmethod from collections.abc import Collection, Iterable, Iterator, Sequence, Set from typing import Any, overload +from deprecated.sphinx import deprecated +from lsst.utils.introspection import find_outside_stacklevel + from ._coordinate import DataCoordinate from ._graph import DimensionGraph +from ._group import DimensionGroup from ._universe import DimensionUniverse @@ -73,10 +78,21 @@ def fromScalar(dataId: DataCoordinate) -> _ScalarDataCoordinateIterable: """ return _ScalarDataCoordinateIterable(dataId) + # TODO: remove on DM-41326. @property - @abstractmethod + @deprecated( + "Deprecated in favor of .dimensions; will be removed after v26.", + category=FutureWarning, + version="v27", + ) def graph(self) -> DimensionGraph: """Dimensions identified by these data IDs (`DimensionGraph`).""" + return self.dimensions._as_graph() + + @property + @abstractmethod + def dimensions(self) -> DimensionGroup: + """Dimensions identified by these data IDs (`DimensionGroup`).""" raise NotImplementedError() @property @@ -85,7 +101,7 @@ def universe(self) -> DimensionUniverse: (`DimensionUniverse`). """ - return self.graph.universe + return self.dimensions.universe @abstractmethod def hasFull(self) -> bool: @@ -125,7 +141,7 @@ def toSet(self) -> DataCoordinateSet: """ return DataCoordinateSet( frozenset(self), - graph=self.graph, + dimensions=self.dimensions, hasFull=self.hasFull(), hasRecords=self.hasRecords(), check=False, @@ -142,11 +158,15 @@ def toSequence(self) -> DataCoordinateSequence: `DataCoordinateSequence`. """ return DataCoordinateSequence( - tuple(self), graph=self.graph, hasFull=self.hasFull(), hasRecords=self.hasRecords(), check=False + tuple(self), + dimensions=self.dimensions, + hasFull=self.hasFull(), + hasRecords=self.hasRecords(), + check=False, ) @abstractmethod - def subset(self, graph: DimensionGraph) -> DataCoordinateIterable: + def subset(self, dimensions: DimensionGraph | DimensionGroup | Iterable[str]) -> DataCoordinateIterable: """Return a subset iterable. This subset iterable returns data IDs that identify a subset of the @@ -154,15 +174,17 @@ def subset(self, graph: DimensionGraph) -> DataCoordinateIterable: Parameters ---------- - graph : `DimensionGraph` + dimensions : `DimensionGraph`, `DimensionGroup`, or \ + `~collections.abc.Iterable` [ `str` ] Dimensions to be identified by the data IDs in the returned - iterable. Must be a subset of ``self.graph``. + iterable. Must be a subset of ``self.dimensions``. Returns ------- iterable : `DataCoordinateIterable` - A `DataCoordinateIterable` with ``iterable.graph == graph``. - May be ``self`` if ``graph == self.graph``. Elements are + A `DataCoordinateIterable` with + ``iterable.dimensions == dimensions``. + May be ``self`` if ``dimensions == self.dimensions``. Elements are equivalent to those that would be created by calling `DataCoordinate.subset` on all elements in ``self``, possibly with deduplication and/or reordering (depending on the subclass, @@ -205,9 +227,9 @@ def __contains__(self, key: Any) -> bool: return False @property - def graph(self) -> DimensionGraph: + def dimensions(self) -> DimensionGroup: # Docstring inherited from DataCoordinateIterable. - return self._dataId.graph + return self._dataId.dimensions def hasFull(self) -> bool: # Docstring inherited from DataCoordinateIterable. @@ -217,9 +239,12 @@ def hasRecords(self) -> bool: # Docstring inherited from DataCoordinateIterable. return self._dataId.hasRecords() - def subset(self, graph: DimensionGraph) -> _ScalarDataCoordinateIterable: + def subset( + self, dimensions: DimensionGraph | DimensionGroup | Iterable[str] + ) -> _ScalarDataCoordinateIterable: # Docstring inherited from DataCoordinateIterable. - return _ScalarDataCoordinateIterable(self._dataId.subset(graph)) + dimensions = self.universe.conform(dimensions) + return _ScalarDataCoordinateIterable(self._dataId.subset(dimensions)) class _DataCoordinateCollectionBase(DataCoordinateIterable): @@ -236,9 +261,14 @@ class _DataCoordinateCollectionBase(DataCoordinateIterable): ---------- dataIds : `collections.abc.Collection` [ `DataCoordinate` ] A collection of `DataCoordinate` instances, with dimensions equal to - ``graph``. - graph : `DimensionGraph` - Dimensions identified by all data IDs in the set. + ``dimensions``. + graph : `DimensionGraph`, optional + Dimensions identified by all data IDs in the collection. Ignored if + ``dimensions`` is provided, and deprecated with removal after v27. + dimensions : `~collections.abc.Iterable` [ `str` ], `DimensionGroup`, \ + or `DimensionGraph`, optional + Dimensions identified by all data IDs in the collection. Must be + provided unless ``graph`` is. hasFull : `bool`, optional If `True`, the caller guarantees that `DataCoordinate.hasFull` returns `True` for all given data IDs. If `False`, no such guarantee is made, @@ -256,27 +286,56 @@ class _DataCoordinateCollectionBase(DataCoordinateIterable): If `True` (default) check that all data IDs are consistent with the given ``graph`` and state flags at construction. If `False`, no checking will occur. + universe : `DimensionUniverse` + Object that manages all dimension definitions. """ def __init__( self, dataIds: Collection[DataCoordinate], - graph: DimensionGraph, + graph: DimensionGraph | None = None, *, + dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, hasFull: bool | None = None, hasRecords: bool | None = None, check: bool = True, + universe: DimensionUniverse | None = None, ): + universe = ( + universe + or getattr(dimensions, "universe", None) + or getattr(graph, "universe", None) + or getattr(dataIds, "universe", None) + ) + if universe is None: + raise TypeError( + "universe must be provided, either directly or via dimensions, dataIds, or graph." + ) + if graph is not None: + warnings.warn( + "The 'graph' argument to DataCoordinateIterable constructors is deprecated in favor of " + " passing an iterable of dimension names as the 'dimensions' argument, and wil be removed " + "after v27.", + stacklevel=find_outside_stacklevel("lsst.daf.butler"), + category=FutureWarning, + ) + if dimensions is not None: + dimensions = universe.conform(dimensions) + elif graph is not None: + dimensions = graph.as_group() + del graph # Avoid accidental use later. + if dimensions is None: + raise TypeError("Exactly one of 'graph' or (preferably) 'dimensions' must be provided.") self._dataIds = dataIds - self._graph = graph + self._dimensions = dimensions if check: for dataId in self._dataIds: if hasFull and not dataId.hasFull(): raise ValueError(f"{dataId} is not complete, but is required to be.") if hasRecords and not dataId.hasRecords(): raise ValueError(f"{dataId} has no records, but is required to.") - if dataId.graph != self._graph: - raise ValueError(f"Bad dimensions {dataId.graph}; expected {self._graph}.") + if dataId.dimensions != self._dimensions: + raise ValueError(f"Bad dimensions {dataId.dimensions}; expected {self._dimensions}.") if hasFull is None: hasFull = all(dataId.hasFull() for dataId in self._dataIds) if hasRecords is None: @@ -284,12 +343,12 @@ def __init__( self._hasFull = hasFull self._hasRecords = hasRecords - __slots__ = ("_graph", "_dataIds", "_hasFull", "_hasRecords") + __slots__ = ("_dimensions", "_dataIds", "_hasFull", "_hasRecords") @property - def graph(self) -> DimensionGraph: + def dimensions(self) -> DimensionGroup: # Docstring inherited from DataCoordinateIterable. - return self._graph + return self._dimensions def hasFull(self) -> bool: # Docstring inherited from DataCoordinateIterable. @@ -310,7 +369,7 @@ def toSet(self) -> DataCoordinateSet: # and hence defer checking if that's what the user originally wanted. return DataCoordinateSet( frozenset(self._dataIds), - graph=self._graph, + dimensions=self._dimensions, hasFull=self._hasFull, hasRecords=self._hasRecords, check=False, @@ -323,7 +382,7 @@ def toSequence(self) -> DataCoordinateSequence: # and hence defer checking if that's what the user originally wanted. return DataCoordinateSequence( tuple(self._dataIds), - graph=self._graph, + dimensions=self._dimensions, hasFull=self._hasFull, hasRecords=self._hasRecords, check=False, @@ -339,12 +398,12 @@ def __contains__(self, key: Any) -> bool: key = DataCoordinate.standardize(key, universe=self.universe) return key in self._dataIds - def _subsetKwargs(self, graph: DimensionGraph) -> dict[str, Any]: + def _subsetKwargs(self, dimensions: DimensionGroup) -> dict[str, Any]: """Return constructor kwargs useful for subclasses implementing subset. Parameters ---------- - graph : `DimensionGraph` + dimensions : `DimensionGroup` Dimensions passed to `subset`. Returns @@ -355,7 +414,7 @@ def _subsetKwargs(self, graph: DimensionGraph) -> dict[str, Any]: dimensions. """ hasFull: bool | None - if graph.dimensions <= self.graph.required: + if dimensions.names <= self.dimensions.required: hasFull = True else: hasFull = self._hasFull @@ -374,8 +433,13 @@ class DataCoordinateSet(_DataCoordinateCollectionBase): A set of `DataCoordinate` instances, with dimensions equal to ``graph``. If this is a mutable object, the caller must be able to guarantee that it will not be modified by any other holders. - graph : `DimensionGraph` - Dimensions identified by all data IDs in the set. + graph : `DimensionGraph`, optional + Dimensions identified by all data IDs in the collection. Ignored if + ``dimensions`` is provided, and deprecated with removal after v27. + dimensions : `~collections.abc.Iterable` [ `str` ], `DimensionGroup`, \ + or `DimensionGraph`, optional + Dimensions identified by all data IDs in the collection. Must be + provided unless ``graph`` is. hasFull : `bool`, optional If `True`, the caller guarantees that `DataCoordinate.hasFull` returns `True` for all given data IDs. If `False`, no such guarantee is made, @@ -394,6 +458,8 @@ class DataCoordinateSet(_DataCoordinateCollectionBase): If `True` (default) check that all data IDs are consistent with the given ``graph`` and state flags at construction. If `False`, no checking will occur. + universe : `DimensionUniverse` + Object that manages all dimension definitions. Notes ----- @@ -411,7 +477,7 @@ class DataCoordinateSet(_DataCoordinateCollectionBase): - subset/superset comparison _operators_ (``<``, ``>``, ``<=``, ``>=``) require both operands to be `DataCoordinateSet` instances that have the - same dimensions (i.e. ``graph`` attribute); + same dimensions (i.e. `dimensions` attribute); - `issubset`, `issuperset`, and `isdisjoint` require the other argument to be a `DataCoordinateIterable` with the same dimensions; @@ -435,13 +501,23 @@ class DataCoordinateSet(_DataCoordinateCollectionBase): def __init__( self, dataIds: Set[DataCoordinate], - graph: DimensionGraph, + graph: DimensionGraph | None = None, *, + dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, hasFull: bool | None = None, hasRecords: bool | None = None, check: bool = True, + universe: DimensionUniverse | None = None, ): - super().__init__(dataIds, graph, hasFull=hasFull, hasRecords=hasRecords, check=check) + super().__init__( + dataIds, + graph, + dimensions=dimensions, + hasFull=hasFull, + hasRecords=hasRecords, + check=check, + universe=universe, + ) _dataIds: Set[DataCoordinate] @@ -452,33 +528,41 @@ def __str__(self) -> str: def __repr__(self) -> str: return ( - f"DataCoordinateSet({set(self._dataIds)}, {self._graph!r}, " + f"DataCoordinateSet({set(self._dataIds)}, {self._dimensions!r}, " f"hasFull={self._hasFull}, hasRecords={self._hasRecords})" ) def __eq__(self, other: Any) -> bool: if isinstance(other, DataCoordinateSet): - return self._graph == other._graph and self._dataIds == other._dataIds + return self._dimensions == other._dimensions and self._dataIds == other._dataIds return False def __le__(self, other: DataCoordinateSet) -> bool: - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set comparision: {self.graph} != {other.graph}.") + if self.dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set comparision: {self.dimensions} != {other.dimensions}." + ) return self._dataIds <= other._dataIds def __ge__(self, other: DataCoordinateSet) -> bool: - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set comparision: {self.graph} != {other.graph}.") + if self.dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set comparision: {self.dimensions} != {other.dimensions}." + ) return self._dataIds >= other._dataIds def __lt__(self, other: DataCoordinateSet) -> bool: - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set comparision: {self.graph} != {other.graph}.") + if self.dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set comparision: {self.dimensions} != {other.dimensions}." + ) return self._dataIds < other._dataIds def __gt__(self, other: DataCoordinateSet) -> bool: - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set comparision: {self.graph} != {other.graph}.") + if self.dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set comparision: {self.dimensions} != {other.dimensions}." + ) return self._dataIds > other._dataIds def issubset(self, other: DataCoordinateIterable) -> bool: @@ -495,8 +579,10 @@ def issubset(self, other: DataCoordinateIterable) -> bool: `True` if all data IDs in ``self`` are also in ``other``, and `False` otherwise. """ - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set comparision: {self.graph} != {other.graph}.") + if self.dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set comparision: {self.dimensions} != {other.dimensions}." + ) return self._dataIds <= other.toSet()._dataIds def issuperset(self, other: DataCoordinateIterable) -> bool: @@ -505,7 +591,8 @@ def issuperset(self, other: DataCoordinateIterable) -> bool: Parameters ---------- other : `DataCoordinateIterable` - An iterable of data IDs with ``other.graph == self.graph``. + An iterable of data IDs with + ``other.dimensions == self.dimensions``. Returns ------- @@ -513,8 +600,10 @@ def issuperset(self, other: DataCoordinateIterable) -> bool: `True` if all data IDs in ``other`` are also in ``self``, and `False` otherwise. """ - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set comparision: {self.graph} != {other.graph}.") + if self.dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set comparision: {self.dimensions} != {other.dimensions}." + ) return self._dataIds >= other.toSet()._dataIds def isdisjoint(self, other: DataCoordinateIterable) -> bool: @@ -523,7 +612,8 @@ def isdisjoint(self, other: DataCoordinateIterable) -> bool: Parameters ---------- other : `DataCoordinateIterable` - An iterable of data IDs with ``other.graph == self.graph``. + An iterable of data IDs with + ``other._dimensions == self._dimensions``. Returns ------- @@ -531,29 +621,39 @@ def isdisjoint(self, other: DataCoordinateIterable) -> bool: `True` if there are no data IDs in both ``self`` and ``other``, and `False` otherwise. """ - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set comparision: {self.graph} != {other.graph}.") + if self._dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set comparision: {self._dimensions} != {other.dimensions}." + ) return self._dataIds.isdisjoint(other.toSet()._dataIds) def __and__(self, other: DataCoordinateSet) -> DataCoordinateSet: - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set operation: {self.graph} != {other.graph}.") - return DataCoordinateSet(self._dataIds & other._dataIds, self.graph, check=False) + if self._dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set operation: {self._dimensions} != {other.dimensions}." + ) + return DataCoordinateSet(self._dataIds & other._dataIds, dimensions=self._dimensions, check=False) def __or__(self, other: DataCoordinateSet) -> DataCoordinateSet: - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set operation: {self.graph} != {other.graph}.") - return DataCoordinateSet(self._dataIds | other._dataIds, self.graph, check=False) + if self._dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set operation: {self._dimensions} != {other.dimensions}." + ) + return DataCoordinateSet(self._dataIds | other._dataIds, dimensions=self._dimensions, check=False) def __xor__(self, other: DataCoordinateSet) -> DataCoordinateSet: - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set operation: {self.graph} != {other.graph}.") - return DataCoordinateSet(self._dataIds ^ other._dataIds, self.graph, check=False) + if self._dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set operation: {self._dimensions} != {other.dimensions}." + ) + return DataCoordinateSet(self._dataIds ^ other._dataIds, dimensions=self._dimensions, check=False) def __sub__(self, other: DataCoordinateSet) -> DataCoordinateSet: - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set operation: {self.graph} != {other.graph}.") - return DataCoordinateSet(self._dataIds - other._dataIds, self.graph, check=False) + if self._dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set operation: {self._dimensions} != {other.dimensions}." + ) + return DataCoordinateSet(self._dataIds - other._dataIds, dimensions=self._dimensions, check=False) def intersection(self, other: DataCoordinateIterable) -> DataCoordinateSet: """Return a new set that contains all data IDs from parameters. @@ -561,16 +661,21 @@ def intersection(self, other: DataCoordinateIterable) -> DataCoordinateSet: Parameters ---------- other : `DataCoordinateIterable` - An iterable of data IDs with ``other.graph == self.graph``. + An iterable of data IDs with + ``other.dimensions == self.dimensions``. Returns ------- intersection : `DataCoordinateSet` A new `DataCoordinateSet` instance. """ - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set operation: {self.graph} != {other.graph}.") - return DataCoordinateSet(self._dataIds & other.toSet()._dataIds, self.graph, check=False) + if self.dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set operation: {self.dimensions} != {other.dimensions}." + ) + return DataCoordinateSet( + self._dataIds & other.toSet()._dataIds, dimensions=self.dimensions, check=False + ) def union(self, other: DataCoordinateIterable) -> DataCoordinateSet: """Return a new set that contains all data IDs in either parameters. @@ -578,16 +683,21 @@ def union(self, other: DataCoordinateIterable) -> DataCoordinateSet: Parameters ---------- other : `DataCoordinateIterable` - An iterable of data IDs with ``other.graph == self.graph``. + An iterable of data IDs with + ``other.dimensions == self.dimensions``. Returns ------- intersection : `DataCoordinateSet` A new `DataCoordinateSet` instance. """ - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set operation: {self.graph} != {other.graph}.") - return DataCoordinateSet(self._dataIds | other.toSet()._dataIds, self.graph, check=False) + if self.dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set operation: {self.dimensions} != {other.dimensions}." + ) + return DataCoordinateSet( + self._dataIds | other.toSet()._dataIds, dimensions=self.dimensions, check=False + ) def symmetric_difference(self, other: DataCoordinateIterable) -> DataCoordinateSet: """Return a new set with all data IDs in either parameters, not both. @@ -595,16 +705,21 @@ def symmetric_difference(self, other: DataCoordinateIterable) -> DataCoordinateS Parameters ---------- other : `DataCoordinateIterable` - An iterable of data IDs with ``other.graph == self.graph``. + An iterable of data IDs with + ``other.dimensions == self.dimensions``. Returns ------- intersection : `DataCoordinateSet` A new `DataCoordinateSet` instance. """ - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set operation: {self.graph} != {other.graph}.") - return DataCoordinateSet(self._dataIds ^ other.toSet()._dataIds, self.graph, check=False) + if self.dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set operation: {self.dimensions} != {other.dimensions}." + ) + return DataCoordinateSet( + self._dataIds ^ other.toSet()._dataIds, dimensions=self.dimensions, check=False + ) def difference(self, other: DataCoordinateIterable) -> DataCoordinateSet: """Return a new set with all data IDs in this that are not in other. @@ -612,43 +727,52 @@ def difference(self, other: DataCoordinateIterable) -> DataCoordinateSet: Parameters ---------- other : `DataCoordinateIterable` - An iterable of data IDs with ``other.graph == self.graph``. + An iterable of data IDs with + ``other.dimensions == self.dimensions``. Returns ------- intersection : `DataCoordinateSet` A new `DataCoordinateSet` instance. """ - if self.graph != other.graph: - raise ValueError(f"Inconsistent dimensions in set operation: {self.graph} != {other.graph}.") - return DataCoordinateSet(self._dataIds - other.toSet()._dataIds, self.graph, check=False) + if self.dimensions != other.dimensions: + raise ValueError( + f"Inconsistent dimensions in set operation: {self.dimensions} != {other.dimensions}." + ) + return DataCoordinateSet( + self._dataIds - other.toSet()._dataIds, dimensions=self.dimensions, check=False + ) def toSet(self) -> DataCoordinateSet: # Docstring inherited from DataCoordinateIterable. return self - def subset(self, graph: DimensionGraph) -> DataCoordinateSet: + def subset(self, dimensions: DimensionGraph | DimensionGroup | Iterable[str]) -> DataCoordinateSet: """Return a set whose data IDs identify a subset. Parameters ---------- - graph : `DimensionGraph` + dimensions : `DimensionGraph`, `DimensionGroup`, or \ + `~collections.abc.Iterable` [ `str` ] Dimensions to be identified by the data IDs in the returned - iterable. Must be a subset of ``self.graph``. + iterable. Must be a subset of ``self.dimensions``. Returns ------- set : `DataCoordinateSet` - A `DataCoordinateSet` with ``set.graph == graph``. - Will be ``self`` if ``graph == self.graph``. Elements are + A `DataCoordinateSet` with ``set.dimensions == dimensions``. Will + be ``self`` if ``dimensions == self.dimensions``. Elements are equivalent to those that would be created by calling `DataCoordinate.subset` on all elements in ``self``, with - deduplication but and in arbitrary order. + deduplication and in arbitrary order. """ - if graph == self.graph: + dimensions = self.universe.conform(dimensions) + if dimensions == self.dimensions: return self return DataCoordinateSet( - {dataId.subset(graph) for dataId in self._dataIds}, graph, **self._subsetKwargs(graph) + {dataId.subset(dimensions) for dataId in self._dataIds}, + dimensions=dimensions, + **self._subsetKwargs(dimensions), ) @@ -663,8 +787,13 @@ class DataCoordinateSequence(_DataCoordinateCollectionBase, Sequence[DataCoordin dataIds : `collections.abc.Sequence` [ `DataCoordinate` ] A sequence of `DataCoordinate` instances, with dimensions equal to ``graph``. - graph : `DimensionGraph` - Dimensions identified by all data IDs in the set. + graph : `DimensionGraph`, optional + Dimensions identified by all data IDs in the collection. Ignored if + ``dimensions`` is provided, and deprecated with removal after v27. + dimensions : `~collections.abc.Iterable` [ `str` ], `DimensionGroup`, \ + `DimensionGraph`, optional + Dimensions identified by all data IDs in the collection. Must be + provided unless ``graph`` is. hasFull : `bool`, optional If `True`, the caller guarantees that `DataCoordinate.hasFull` returns `True` for all given data IDs. If `False`, no such guarantee is made, @@ -683,18 +812,30 @@ class DataCoordinateSequence(_DataCoordinateCollectionBase, Sequence[DataCoordin If `True` (default) check that all data IDs are consistent with the given ``graph`` and state flags at construction. If `False`, no checking will occur. + universe : `DimensionUniverse` + Object that manages all dimension definitions. """ def __init__( self, dataIds: Sequence[DataCoordinate], - graph: DimensionGraph, + graph: DimensionGraph | None = None, *, + dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, hasFull: bool | None = None, hasRecords: bool | None = None, check: bool = True, + universe: DimensionUniverse | None = None, ): - super().__init__(tuple(dataIds), graph, hasFull=hasFull, hasRecords=hasRecords, check=check) + super().__init__( + tuple(dataIds), + graph, + dimensions=dimensions, + hasFull=hasFull, + hasRecords=hasRecords, + check=check, + universe=universe, + ) _dataIds: Sequence[DataCoordinate] @@ -705,13 +846,13 @@ def __str__(self) -> str: def __repr__(self) -> str: return ( - f"DataCoordinateSequence({tuple(self._dataIds)}, {self._graph!r}, " + f"DataCoordinateSequence({tuple(self._dataIds)}, {self._dimensions!r}, " f"hasFull={self._hasFull}, hasRecords={self._hasRecords})" ) def __eq__(self, other: Any) -> bool: if isinstance(other, DataCoordinateSequence): - return self._graph == other._graph and self._dataIds == other._dataIds + return self._dimensions == other._dimensions and self._dataIds == other._dataIds return False @overload @@ -726,7 +867,11 @@ def __getitem__(self, index: Any) -> Any: r = self._dataIds[index] if isinstance(index, slice): return DataCoordinateSequence( - r, self._graph, hasFull=self._hasFull, hasRecords=self._hasRecords, check=False + r, + dimensions=self._dimensions, + hasFull=self._hasFull, + hasRecords=self._hasRecords, + check=False, ) return r @@ -734,14 +879,15 @@ def toSequence(self) -> DataCoordinateSequence: # Docstring inherited from DataCoordinateIterable. return self - def subset(self, graph: DimensionGraph) -> DataCoordinateSequence: + def subset(self, dimensions: DimensionGraph | DimensionGroup | Iterable[str]) -> DataCoordinateSequence: """Return a sequence whose data IDs identify a subset. Parameters ---------- - graph : `DimensionGraph` + dimensions : `DimensionGraph`, `DimensionGroup`, \ + or `~collections.abc.Iterable` [ `str` ] Dimensions to be identified by the data IDs in the returned - iterable. Must be a subset of ``self.graph``. + iterable. Must be a subset of ``self.dimensions``. Returns ------- @@ -752,8 +898,11 @@ def subset(self, graph: DimensionGraph) -> DataCoordinateSequence: `DataCoordinate.subset` on all elements in ``self``, in the same order and with no deduplication. """ - if graph == self.graph: + dimensions = self.universe.conform(dimensions) + if dimensions == self.dimensions: return self return DataCoordinateSequence( - tuple(dataId.subset(graph) for dataId in self._dataIds), graph, **self._subsetKwargs(graph) + tuple(dataId.subset(dimensions) for dataId in self._dataIds), + dimensions=dimensions, + **self._subsetKwargs(dimensions), ) diff --git a/python/lsst/daf/butler/registry/queries/_builder.py b/python/lsst/daf/butler/registry/queries/_builder.py index dc66c274f4..7326e2ac05 100644 --- a/python/lsst/daf/butler/registry/queries/_builder.py +++ b/python/lsst/daf/butler/registry/queries/_builder.py @@ -176,7 +176,7 @@ def _addWhereClause(self, categorized_columns: ColumnCategorization) -> None: require_preferred_engine=True, ) if self.summary.where.data_id: - known_dimensions = self.summary.where.data_id.graph.intersection(self.summary.dimensions) + known_dimensions = self.summary.where.data_id.dimensions.intersection(self.summary.dimensions) known_data_id = self.summary.where.data_id.subset(known_dimensions) self.relation = self.relation.with_rows_satisfying( self._context.make_data_coordinate_predicate(known_data_id), diff --git a/python/lsst/daf/butler/registry/queries/_query.py b/python/lsst/daf/butler/registry/queries/_query.py index bde17e328b..1589406c63 100644 --- a/python/lsst/daf/butler/registry/queries/_query.py +++ b/python/lsst/daf/butler/registry/queries/_query.py @@ -38,7 +38,7 @@ from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag, DimensionRecordColumnTag from ..._dataset_ref import DatasetRef from ..._dataset_type import DatasetType -from ...dimensions import DataCoordinate, Dimension, DimensionElement, DimensionGraph, DimensionRecord +from ...dimensions import DataCoordinate, DimensionElement, DimensionGroup, DimensionRecord from .._collection_type import CollectionType from ..wildcards import CollectionWildcard from ._query_backend import QueryBackend @@ -52,7 +52,7 @@ class Query: Parameters ---------- - dimensions : `DimensionGraph` + dimensions : `DimensionGroup` The dimensions that span the query and are used to join its relations together. backend : `QueryBackend` @@ -107,7 +107,7 @@ class Query: def __init__( self, - dimensions: DimensionGraph, + dimensions: DimensionGroup, backend: QueryBackend[QueryContext], context: QueryContext, relation: Relation, @@ -126,9 +126,9 @@ def __init__( self._record_caches = record_caches if record_caches is not None else {} @property - def dimensions(self) -> DimensionGraph: + def dimensions(self) -> DimensionGroup: """The dimensions that span the query and are used to join its - relations together (`DimensionGraph`). + relations together (`DimensionGroup`). """ return self._dimensions @@ -175,12 +175,12 @@ def __str__(self) -> str: def __iter__(self) -> Iterator[Mapping[ColumnTag, Any]]: return iter(self._context.fetch_iterable(self._relation)) - def iter_data_ids(self, dimensions: DimensionGraph | None = None) -> Iterator[DataCoordinate]: + def iter_data_ids(self, dimensions: DimensionGroup | None = None) -> Iterator[DataCoordinate]: """Return an iterator that converts result rows to data IDs. Parameters ---------- - dimensions : `DimensionGraph`, optional + dimensions : `DimensionGroup`, optional Dimensions of the data IDs to return. If not provided, ``self.dimensions`` is used. @@ -239,7 +239,7 @@ def iter_dataset_refs( yield parent_ref.makeComponentRef(component) def iter_data_ids_and_dataset_refs( - self, dataset_type: DatasetType, dimensions: DimensionGraph | None = None + self, dataset_type: DatasetType, dimensions: DimensionGroup | None = None ) -> Iterator[tuple[DataCoordinate, DatasetRef]]: """Iterate over pairs of data IDs and dataset refs. @@ -250,7 +250,7 @@ def iter_data_ids_and_dataset_refs( ---------- dataset_type : `DatasetType` The parent dataset type to yield references for. - dimensions : `DimensionGraph`, optional + dimensions : `DimensionGroup`, optional Dimensions of the data IDs to return. If not provided, ``self.dimensions`` is used. @@ -305,7 +305,7 @@ def iter_dimension_records(self, element: DimensionElement | None = None) -> Ite case only_element_with_records: element = only_element_with_records if (cache := self._record_caches.get(element)) is not None: - return (cache[data_id] for data_id in self.iter_data_ids(element.graph)) + return (cache[data_id] for data_id in self.iter_data_ids(element.minimal_group)) else: reader = DimensionRecordReader(element) if not (reader.columns_required <= self.relation.columns): @@ -421,7 +421,7 @@ def materialized(self, defer_postprocessing: bool = True) -> Query: def projected( self, - dimensions: Iterable[Dimension | str] | None = None, + dimensions: DimensionGroup | Iterable[str] | None = None, unique: bool = True, columns: Iterable[ColumnTag] | None = None, defer: bool | None = None, @@ -432,7 +432,7 @@ def projected( Parameters ---------- - dimensions : `~collections.abc.Iterable` [ `Dimension` or `str` ], + dimensions : `~collections.abc.Iterable` [ `str` ], optional Dimensions to include in the new query. Will be expanded to include all required and implied dependencies. Must be a subset of @@ -475,13 +475,16 @@ def projected( Raised if the columns to include in the new query are not all present in the current query. """ - if dimensions is None: - dimensions = set(self._dimensions) - else: - dimensions = set(dimensions) + match dimensions: + case None: + dimensions = set(self._dimensions.names) + case DimensionGroup(): + dimensions = set(dimensions.names) + case iterable: + dimensions = set(iterable) if columns is not None: dimensions.update(tag.dimension for tag in DimensionKeyColumnTag.filter_from(columns)) - dimensions = self._dimensions.universe.extract(dimensions) + dimensions = self._dimensions.universe.conform(dimensions) if columns is None: columns = set() else: @@ -489,9 +492,9 @@ def projected( columns.update(DimensionKeyColumnTag.generate(dimensions.names)) if keep_record_columns: if self._has_record_columns is True: - for element in dimensions.elements: - if element not in self._record_caches: - columns.update(element.RecordClass.fields.columns) + for element_name in dimensions.elements: + if element_name not in self._record_caches: + columns.update(self.dimensions.universe[element_name].RecordClass.fields.columns) elif self._has_record_columns in dimensions.elements: element = cast(DimensionElement, self._has_record_columns) columns.update(element.RecordClass.fields.columns) @@ -507,17 +510,15 @@ def projected( relation = relation.without_duplicates(preferred_engine=self._context.preferred_engine) return self._chain(relation, defer, dimensions=dimensions) - def with_record_columns( - self, dimension_element: DimensionElement | None = None, defer: bool | None = None - ) -> Query: + def with_record_columns(self, dimension_element: str | None = None, defer: bool | None = None) -> Query: """Return a modified `Query` with additional dimension record columns and/or caches. Parameters ---------- - dimension_element : `DimensionElement`, optional - Single element to add record columns for, or `None` default to add - them for all elements in `dimensions`. + dimension_element : `str`, optional + Name of a single dimension element to add record columns for, or + `None` default to add them for all elements in `dimensions`. defer : `bool`, optional If `False`, run the new query immediately. If `True`, do not. If `None` (default), the ``defer`` option passed when making ``self`` @@ -543,10 +544,11 @@ def with_record_columns( return self record_caches = dict(self._record_caches) columns_required: set[ColumnTag] = set() - for element in self.dimensions.elements if dimension_element is None else [dimension_element]: - if element in record_caches: + for element_name in self.dimensions.elements if dimension_element is None else [dimension_element]: + element = self.dimensions.universe[element_name] + if element_name in record_caches: continue - if (cache := self._backend.get_dimension_record_cache(element.name, self._context)) is not None: + if (cache := self._backend.get_dimension_record_cache(element_name, self._context)) is not None: record_caches[element] = cache else: columns_required.update(element.RecordClass.fields.columns.keys()) @@ -571,7 +573,9 @@ def with_record_columns( return self._chain( relation, defer=defer, - has_record_columns=True if dimension_element is None else dimension_element, + has_record_columns=( + True if dimension_element is None else self.dimensions.universe[dimension_element] + ), record_caches=record_caches, ) @@ -693,7 +697,7 @@ def find_datasets( # If the dataset type has dimensions not in the current query, or we # need a temporal join for a calibration collection, either restore # those columns or join them in. - full_dimensions = dataset_type.dimensions.union(self._dimensions) + full_dimensions = dataset_type.dimensions.as_group().union(self._dimensions) relation = self._relation record_caches = self._record_caches base_columns_required: set[ColumnTag] = { @@ -706,11 +710,12 @@ def find_datasets( # dimensions to the query we need to be able to get records for # the new dimensions. record_caches = dict(self._record_caches) - for element in full_dimensions.elements: + for element_name in full_dimensions.elements: + element = full_dimensions.universe[element_name] if element in record_caches: continue if ( - cache := self._backend.get_dimension_record_cache(element.name, self._context) + cache := self._backend.get_dimension_record_cache(element_name, self._context) ) is not None: record_caches[element] = cache else: @@ -995,7 +1000,7 @@ def _copy( self, relation: Relation, is_deferred: bool, - dimensions: DimensionGraph | None = None, + dimensions: DimensionGroup | None = None, governor_constraints: Mapping[str, Set[str]] | None = None, has_record_columns: bool | DimensionElement | None = None, record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None, @@ -1022,7 +1027,7 @@ def _chain( self, relation: Relation, defer: bool | None, - dimensions: DimensionGraph | None = None, + dimensions: DimensionGroup | None = None, governor_constraints: Mapping[str, Set[str]] | None = None, has_record_columns: bool | DimensionElement | None = None, record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None, @@ -1038,7 +1043,7 @@ def _chain( If `False`, run the new query immediately. If `True`, do not. If `None` , the ``defer`` option passed when making ``self`` is used (this option is "sticky"). - dimensions : `DimensionGraph`, optional + dimensions : `DimensionGroup`, optional See class docs. governor_constraints : `~collections.abc.Mapping` [ `str`, \ `~collections.abc.Set` [ `str` ] ], optional diff --git a/python/lsst/daf/butler/registry/queries/_query_backend.py b/python/lsst/daf/butler/registry/queries/_query_backend.py index 52da8d43ef..8444de92cb 100644 --- a/python/lsst/daf/butler/registry/queries/_query_backend.py +++ b/python/lsst/daf/butler/registry/queries/_query_backend.py @@ -47,7 +47,7 @@ from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag from ..._dataset_type import DatasetType -from ...dimensions import DataCoordinate, DimensionGraph, DimensionRecord, DimensionUniverse +from ...dimensions import DataCoordinate, DimensionGroup, DimensionRecord, DimensionUniverse from .._collection_type import CollectionType from .._exceptions import DatasetTypeError, MissingDatasetTypeError from ..wildcards import CollectionWildcard @@ -576,15 +576,15 @@ def make_dataset_search_relation( if len(collections) <= 1: return base # We filter the dimension keys in the given relation through - # DimensionGraph.required.names to minimize the set we partition on + # DimensionGroup.required.names to minimize the set we partition on # and order it in a more index-friendly way. More precisely, any # index we define on dimensions will be consistent with this order, but # any particular index may not have the same dimension columns. - dimensions = self.universe.extract( + dimensions = self.universe.conform( [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)] ) find_first = FindFirstDataset( - dimensions=DimensionKeyColumnTag.generate(dimensions.required.names), + dimensions=DimensionKeyColumnTag.generate(dimensions.required), rank=DatasetColumnTag(dataset_type.name, "rank"), ) return find_first.apply( @@ -627,7 +627,7 @@ def make_doomed_dataset_relation( @abstractmethod def make_dimension_relation( self, - dimensions: DimensionGraph, + dimensions: DimensionGroup, columns: Set[ColumnTag], context: _C, *, @@ -642,7 +642,7 @@ def make_dimension_relation( Parameters ---------- - dimensions : `DimensionGraph` + dimensions : `DimensionGroup` Dimensions to include. The key columns for all dimensions (both required and implied) will be included in the returned relation. columns : `~collections.abc.Set` [ `ColumnTag` ] @@ -691,14 +691,14 @@ def make_dimension_relation( @abstractmethod def resolve_governor_constraints( - self, dimensions: DimensionGraph, constraints: Mapping[str, Set[str]], context: _C + self, dimensions: DimensionGroup, constraints: Mapping[str, Set[str]], context: _C ) -> Mapping[str, Set[str]]: """Resolve governor dimension constraints provided by user input to a query against the content in the `Registry`. Parameters ---------- - dimensions : `DimensionGraph` + dimensions : `DimensionGroup` Dimensions that bound the governor dimensions to consider (via ``dimensions.governors``, more specifically). constraints : `~collections.abc.Mapping` [ `str`, \ diff --git a/python/lsst/daf/butler/registry/queries/_query_context.py b/python/lsst/daf/butler/registry/queries/_query_context.py index d9cdacadea..c5f73ae680 100644 --- a/python/lsst/daf/butler/registry/queries/_query_context.py +++ b/python/lsst/daf/butler/registry/queries/_query_context.py @@ -328,13 +328,16 @@ def make_data_coordinate_predicate( """ if full is None: full = data_coordinate.hasFull() - dimensions = data_coordinate.graph.required if not full else data_coordinate.graph.dimensions + dimension_names = ( + data_coordinate.required if not full else data_coordinate.dimensions.data_coordinate_keys + ) terms: list[Predicate] = [] - for dimension in dimensions: + for dimension_name in dimension_names: + dimension = data_coordinate.universe.dimensions[dimension_name] dtype = dimension.primaryKey.getPythonType() terms.append( - ColumnExpression.reference(DimensionKeyColumnTag(dimension.name), dtype=dtype).eq( - ColumnExpression.literal(data_coordinate[dimension.name], dtype=dtype) + ColumnExpression.reference(DimensionKeyColumnTag(dimension_name), dtype=dtype).eq( + ColumnExpression.literal(data_coordinate[dimension_name], dtype=dtype) ) ) return Predicate.logical_and(*terms) diff --git a/python/lsst/daf/butler/registry/queries/_readers.py b/python/lsst/daf/butler/registry/queries/_readers.py index f35c0cbf0e..c90314b0bd 100644 --- a/python/lsst/daf/butler/registry/queries/_readers.py +++ b/python/lsst/daf/butler/registry/queries/_readers.py @@ -42,7 +42,7 @@ from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag from ..._dataset_ref import DatasetRef from ..._dataset_type import DatasetType -from ...dimensions import DataCoordinate, DimensionElement, DimensionGraph, DimensionRecord +from ...dimensions import DataCoordinate, DimensionElement, DimensionGroup, DimensionRecord if TYPE_CHECKING: from lsst.daf.relation import ColumnTag @@ -55,7 +55,7 @@ class DataCoordinateReader(ABC): @staticmethod def make( - dimensions: DimensionGraph, + dimensions: DimensionGroup, full: bool = True, records: bool = False, record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None, @@ -64,7 +64,7 @@ def make( Parameters ---------- - dimensions : `DimensionGraph` + dimensions : `DimensionGroup` Dimensions of the `DataCoordinate` instances the new reader will read. full : `bool`, optional @@ -92,8 +92,9 @@ def make( e: cache for e, cache in record_caches.items() if e in dimensions.elements } record_readers = {} - for element in dimensions.elements: - if element not in record_caches: + for element_name in dimensions.elements: + element = dimensions.universe[element_name] + if element_name not in record_caches: record_readers[element] = DimensionRecordReader(element) return _ExpandedDataCoordinateReader(full_reader, record_caches, record_readers) return full_reader @@ -130,11 +131,11 @@ class _BasicDataCoordinateReader(DataCoordinateReader): Parameters ---------- - dimensions : `DimensionGraph` + dimensions : `DimensionGroup` Dimensions of the `DataCoordinate` instances read. """ - def __init__(self, dimensions: DimensionGraph): + def __init__(self, dimensions: DimensionGroup): self._dimensions = dimensions self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions.required.names) @@ -142,7 +143,7 @@ def __init__(self, dimensions: DimensionGraph): def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: # Docstring inherited. - return DataCoordinate.fromRequiredValues( + return DataCoordinate.from_required_values( self._dimensions, tuple(row[tag] for tag in self._tags), ) @@ -157,21 +158,21 @@ class _FullDataCoordinateReader(DataCoordinateReader): Parameters ---------- - dimensions : `DimensionGraph` + dimensions : `DimensionGroup` Dimensions of the `DataCoordinate` instances read. """ - def __init__(self, dimensions: DimensionGraph): + def __init__(self, dimensions: DimensionGroup): self._dimensions = dimensions self._tags = tuple( - DimensionKeyColumnTag(name) for name in self._dimensions._group._data_coordinate_indices + DimensionKeyColumnTag(name) for name in self._dimensions.as_group().data_coordinate_keys ) __slots__ = ("_dimensions", "_tags") def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: # Docstring inherited. - return DataCoordinate.fromFullValues( + return DataCoordinate.from_full_values( self._dimensions, tuple(row[tag] for tag in self._tags), ) @@ -260,7 +261,7 @@ def __init__( record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None, ): self._data_coordinate_reader = DataCoordinateReader.make( - dataset_type.dimensions, full=full, records=records, record_caches=record_caches + dataset_type.dimensions.as_group(), full=full, records=records, record_caches=record_caches ) self._dataset_type = dataset_type self._translate_collection = translate_collection diff --git a/python/lsst/daf/butler/registry/queries/_results.py b/python/lsst/daf/butler/registry/queries/_results.py index e26287b4ac..6316423edf 100644 --- a/python/lsst/daf/butler/registry/queries/_results.py +++ b/python/lsst/daf/butler/registry/queries/_results.py @@ -41,6 +41,8 @@ from contextlib import AbstractContextManager, ExitStack, contextmanager from typing import Any +from deprecated.sphinx import deprecated + from ..._dataset_ref import DatasetRef from ..._dataset_type import DatasetType from ...dimensions import ( @@ -48,6 +50,7 @@ DataCoordinateIterable, DimensionElement, DimensionGraph, + DimensionGroup, DimensionRecord, ) from ._query import Query @@ -83,8 +86,18 @@ def __repr__(self) -> str: return f"" @property + @deprecated( + "Deprecated in favor of .dimensions. Will be removed after v27.", + version="v27", + category=FutureWarning, + ) def graph(self) -> DimensionGraph: # Docstring inherited from DataCoordinateIterable. + return self._query.dimensions._as_graph() + + @property + def dimensions(self) -> DimensionGroup: + """The dimensions of the data IDs returned by this query.""" return self._query.dimensions def hasFull(self) -> bool: @@ -93,7 +106,7 @@ def hasFull(self) -> bool: def hasRecords(self) -> bool: # Docstring inherited from DataCoordinateIterable. - return self._query.has_record_columns is True or not self.graph + return self._query.has_record_columns is True or not self.dimensions @contextmanager def materialize(self) -> Iterator[DataCoordinateQueryResults]: @@ -151,7 +164,10 @@ def expanded(self) -> DataCoordinateQueryResults: return DataCoordinateQueryResults(self._query.with_record_columns(defer=True)) def subset( - self, graph: DimensionGraph | None = None, *, unique: bool = False + self, + dimensions: DimensionGroup | DimensionGraph | Iterable[str] | None = None, + *, + unique: bool = False, ) -> DataCoordinateQueryResults: """Return a results object containing a subset of the dimensions of this one, and/or a unique near-subset of its rows. @@ -161,9 +177,10 @@ def subset( Parameters ---------- - graph : `DimensionGraph`, optional + dimensions : `DimensionGroup`, `DimensionGraph`, or \ + `~collections.abc.Iterable` [ `str`], optional Dimensions to include in the new results object. If `None`, - ``self.graph`` is used. + ``self.dimensions`` is used. unique : `bool`, optional If `True` (`False` is default), the query should only return unique data IDs. This is implemented in the database; to obtain unique @@ -180,7 +197,7 @@ def subset( Raises ------ ValueError - Raised when ``graph`` is not a subset of the dimension graph in + Raised when ``dimensions`` is not a subset of the dimensions in this result. Notes @@ -193,23 +210,21 @@ def subset( it may be much more efficient to call `materialize` first. For example:: - dimensions1 = DimensionGraph(...) - dimensions2 = DimensionGraph(...) + dimensions1 = DimensionGroup(...) + dimensions2 = DimensionGroup(...) with registry.queryDataIds(...).materialize() as tempDataIds: - for dataId1 in tempDataIds.subset( - graph=dimensions1, - unique=True): + for dataId1 in tempDataIds.subset(dimensions1, unique=True): ... - for dataId2 in tempDataIds.subset( - graph=dimensions2, - unique=True): + for dataId2 in tempDataIds.subset(dimensions2, unique=True): ... """ - if graph is None: - graph = self.graph - if not graph.issubset(self.graph): - raise ValueError(f"{graph} is not a subset of {self.graph}") - query = self._query.projected(graph, unique=unique, defer=True, drop_postprocessing=True) + if dimensions is None: + dimensions = self.dimensions + else: + dimensions = self.dimensions.universe.conform(dimensions) + if not dimensions.issubset(self.dimensions): + raise ValueError(f"{dimensions} is not a subset of {self.dimensions}") + query = self._query.projected(dimensions.names, unique=unique, defer=True, drop_postprocessing=True) return DataCoordinateQueryResults(query) def findDatasets( @@ -278,7 +293,7 @@ def findRelatedDatasets( collections: Any, *, findFirst: bool = True, - dimensions: DimensionGraph | None = None, + dimensions: DimensionGroup | DimensionGraph | Iterable[str] | None = None, ) -> Iterable[tuple[DataCoordinate, DatasetRef]]: """Find datasets using the data IDs identified by this query, and return them along with the original data IDs. @@ -306,7 +321,8 @@ def findRelatedDatasets( expressions and may not be ``...``. Note that this is not the same as yielding one `DatasetRef` for each yielded data ID if ``dimensions`` is not `None`. - dimensions : `DimensionGraph`, optional + dimensions : `DimensionGroup`, `DimensionGraph`, or \ + `~collections.abc.Iterable` [ `str` ], optional The dimensions of the data IDs returned. Must be a subset of ``self.dimensions``. @@ -322,7 +338,9 @@ def findRelatedDatasets( Raised if the given dataset type is not registered. """ if dimensions is None: - dimensions = self.graph + dimensions = self.dimensions + else: + dimensions = self.universe.conform(dimensions) parent_dataset_type, _ = self._query.backend.resolve_single_dataset_type_wildcard( datasetType, components=False, explicit_only=True ) diff --git a/python/lsst/daf/butler/registry/queries/_sql_query_backend.py b/python/lsst/daf/butler/registry/queries/_sql_query_backend.py index d00a66b6b3..103b257115 100644 --- a/python/lsst/daf/butler/registry/queries/_sql_query_backend.py +++ b/python/lsst/daf/butler/registry/queries/_sql_query_backend.py @@ -36,7 +36,7 @@ from ..._column_categorization import ColumnCategorization from ..._column_tags import DimensionKeyColumnTag, DimensionRecordColumnTag from ..._dataset_type import DatasetType -from ...dimensions import DataCoordinate, DimensionGraph, DimensionRecord, DimensionUniverse +from ...dimensions import DataCoordinate, DimensionGroup, DimensionRecord, DimensionUniverse from .._collection_type import CollectionType from .._exceptions import DataIdValueError from ..interfaces import CollectionRecord, Database @@ -174,7 +174,7 @@ def _make_dataset_query_relation_impl( def make_dimension_relation( self, - dimensions: DimensionGraph, + dimensions: DimensionGroup, columns: Set[ColumnTag], context: SqlQueryContext, *, @@ -236,10 +236,10 @@ def make_dimension_relation( # spatial join, since we need all dimension key columns present in the # SQL engine and skypix regions are added by postprocessing in the # native iteration engine. - for skypix_dimension in dimensions.skypix: - if DimensionKeyColumnTag(skypix_dimension.name) not in relation.columns: + for skypix_dimension_name in dimensions.skypix: + if DimensionKeyColumnTag(skypix_dimension_name) not in relation.columns: raise NotImplementedError( - f"Cannot construct query involving skypix dimension {skypix_dimension.name} unless " + f"Cannot construct query involving skypix dimension {skypix_dimension_name} unless " "it is part of a dataset subquery, spatial join, or other initial relation." ) @@ -272,8 +272,9 @@ def make_dimension_relation( # Iterate over all dimension elements whose relations definitely have # to be joined in. The order doesn't matter as long as we can assume # the database query optimizer is going to try to reorder them anyway. - for element in dimensions.elements: - columns_still_needed = missing_columns.dimension_records[element.name] + for element_name in dimensions.elements: + columns_still_needed = missing_columns.dimension_records[element_name] + element = self.universe[element_name] # Two separate conditions in play here: # - if we need a record column (not just key columns) from this # element, we have to join in its relation; @@ -286,19 +287,19 @@ def make_dimension_relation( (element.alwaysJoin or element.implied) and frozenset(element.dimensions.names) not in relationships ): - storage = self._managers.dimensions[element] + storage = self._managers.dimensions[element_name] relation = storage.join(relation, default_join, context) # At this point we've joined in all of the element relations that # definitely need to be included, but we may not have all of the # dimension key columns in the query that we want. To fill out that - # set, we iterate over just the given DimensionGraph's dimensions (not + # set, we iterate over just the given DimensionGroup's dimensions (not # all dimension *elements*) in reverse topological order. That order # should reduce the total number of tables we bring in, since each # dimension will bring in keys for its required dependencies before we # get to those required dependencies. - for dimension in self.universe.sorted(dimensions, reverse=True): - if DimensionKeyColumnTag(dimension.name) not in relation.columns: - storage = self._managers.dimensions[dimension] + for dimension_name in reversed(dimensions.names.as_tuple()): + if DimensionKeyColumnTag(dimension_name) not in relation.columns: + storage = self._managers.dimensions[dimension_name] relation = storage.join(relation, default_join, context) # Add the predicates we constructed earlier, with a transfer to native @@ -320,24 +321,24 @@ def make_dimension_relation( return relation def resolve_governor_constraints( - self, dimensions: DimensionGraph, constraints: Mapping[str, Set[str]], context: SqlQueryContext + self, dimensions: DimensionGroup, constraints: Mapping[str, Set[str]], context: SqlQueryContext ) -> Mapping[str, Set[str]]: # Docstring inherited. result: dict[str, Set[str]] = {} - for dimension in dimensions.governors: - storage = self._managers.dimensions[dimension] + for dimension_name in dimensions.governors: + storage = self._managers.dimensions[dimension_name] records = storage.get_record_cache(context) assert records is not None, "Governor dimensions are always cached." - all_values = {cast(str, data_id[dimension.name]) for data_id in records} - if (constraint_values := constraints.get(dimension.name)) is not None: + all_values = {cast(str, data_id[dimension_name]) for data_id in records} + if (constraint_values := constraints.get(dimension_name)) is not None: if not (constraint_values <= all_values): raise DataIdValueError( - f"Unknown values specified for governor dimension {dimension.name}: " + f"Unknown values specified for governor dimension {dimension_name}: " f"{constraint_values - all_values}." ) - result[dimension.name] = constraint_values + result[dimension_name] = constraint_values else: - result[dimension.name] = all_values + result[dimension_name] = all_values return result def get_dimension_record_cache( diff --git a/python/lsst/daf/butler/registry/queries/_structs.py b/python/lsst/daf/butler/registry/queries/_structs.py index dc31d7c3c5..6d6039536c 100644 --- a/python/lsst/daf/butler/registry/queries/_structs.py +++ b/python/lsst/daf/butler/registry/queries/_structs.py @@ -41,7 +41,7 @@ from ..._column_type_info import ColumnTypeInfo from ..._dataset_type import DatasetType from ..._named import NamedValueAbstractSet, NamedValueSet -from ...dimensions import DataCoordinate, DimensionElement, DimensionGraph, DimensionUniverse, SkyPixDimension +from ...dimensions import DataCoordinate, DimensionElement, DimensionGroup, DimensionUniverse, SkyPixDimension # We're not trying to add typing to the lex/yacc parser code, so MyPy # doesn't know about some of these imports. @@ -61,7 +61,7 @@ class QueryWhereClause: @classmethod def combine( cls, - dimensions: DimensionGraph, + dimensions: DimensionGroup, expression: str = "", *, column_types: ColumnTypeInfo, @@ -76,7 +76,7 @@ def combine( Parameters ---------- - dimensions : `DimensionGraph` + dimensions : `DimensionGroup` The dimensions that would be included in the query in the absence of the WHERE clause. expression : `str`, optional @@ -171,7 +171,7 @@ class OrderByClause: """Class for information about columns in ORDER BY clause.""" @classmethod - def parse_general(cls, order_by: Iterable[str], graph: DimensionGraph) -> OrderByClause: + def parse_general(cls, order_by: Iterable[str], dimensions: DimensionGroup) -> OrderByClause: """Parse an iterable of strings in the context of a multi-dimension query. @@ -179,7 +179,7 @@ def parse_general(cls, order_by: Iterable[str], graph: DimensionGraph) -> OrderB ---------- order_by : `~collections.abc.Iterable` [ `str` ] Sequence of names to use for ordering with optional "-" prefix. - graph : `DimensionGraph` + dimensions : `DimensionGroup` Dimensions used by a query. Returns @@ -195,7 +195,7 @@ def parse_general(cls, order_by: Iterable[str], graph: DimensionGraph) -> OrderB if name[0] == "-": ascending = False name = name[1:] - element, column = categorizeOrderByName(graph, name) + element, column = categorizeOrderByName(dimensions, name) term = cls._make_term(element, column, ascending) terms.append(term) return cls(terms) @@ -324,7 +324,7 @@ class QuerySummary: Parameters ---------- - requested : `DimensionGraph` + requested : `DimensionGroup` The dimensions whose primary keys should be included in the result rows of the query. column_types : `ColumnTypeInfo` @@ -362,7 +362,7 @@ class QuerySummary: def __init__( self, - requested: DimensionGraph, + requested: DimensionGroup, *, column_types: ColumnTypeInfo, data_id: DataCoordinate | None = None, @@ -396,9 +396,9 @@ def __init__( self.limit = limit self.columns_required, self.dimensions, self.region = self._compute_columns_required() - requested: DimensionGraph + requested: DimensionGroup """Dimensions whose primary keys should be included in the result rows of - the query (`DimensionGraph`). + the query (`DimensionGroup`). """ where: QueryWhereClause @@ -421,8 +421,8 @@ def __init__( postprocessing filters), respectively. """ - dimensions: DimensionGraph - """All dimensions in the query in any form (`DimensionGraph`). + dimensions: DimensionGroup + """All dimensions in the query in any form (`DimensionGroup`). """ region: Region | None @@ -448,7 +448,7 @@ def universe(self) -> DimensionUniverse: def _compute_columns_required( self, - ) -> tuple[set[ColumnTag], DimensionGraph, Region | None]: + ) -> tuple[set[ColumnTag], DimensionGroup, Region | None]: """Compute the columns that must be provided by the relations joined into this query in order to obtain the right *set* of result rows in the right order. @@ -464,7 +464,7 @@ def _compute_columns_required( if self.order_by is not None: tags.update(self.order_by.columns_required) region = self.where.region - for dimension_name in self.where.data_id.graph.names: + for dimension_name in self.where.data_id.dimensions.names: dimension_tag = DimensionKeyColumnTag(dimension_name) if dimension_tag in tags: continue @@ -493,8 +493,8 @@ def _compute_columns_required( # just like simple 'where' constraints, which is good. tags.add(dimension_tag) # Make sure the dimension keys are expanded self-consistently in what - # we return by passing them through DimensionGraph. - dimensions = DimensionGraph( + # we return by passing them through DimensionGroup. + dimensions = DimensionGroup( self.universe, names={tag.dimension for tag in DimensionKeyColumnTag.filter_from(tags)} ) # If we have a region constraint, ensure region columns and the common @@ -504,9 +504,12 @@ def _compute_columns_required( for family in dimensions.spatial: element = family.choose(dimensions.elements.names, self.universe) tags.add(DimensionRecordColumnTag(element.name, "region")) - if not isinstance(element, SkyPixDimension) and self.universe.commonSkyPix not in dimensions: + if ( + not isinstance(element, SkyPixDimension) + and self.universe.commonSkyPix.name not in dimensions + ): missing_common_skypix = True if missing_common_skypix: - dimensions = dimensions.union(self.universe.commonSkyPix.graph) + dimensions = dimensions.union(self.universe.commonSkyPix.minimal_group) tags.update(DimensionKeyColumnTag.generate(dimensions.names)) return (tags, dimensions, region) diff --git a/python/lsst/daf/butler/registry/queries/expressions/_predicate.py b/python/lsst/daf/butler/registry/queries/expressions/_predicate.py index 8874fec89b..827cb0f333 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/_predicate.py +++ b/python/lsst/daf/butler/registry/queries/expressions/_predicate.py @@ -53,7 +53,7 @@ from .... import _timespan from ...._column_tags import DatasetColumnTag, DimensionKeyColumnTag, DimensionRecordColumnTag from ...._column_type_info import ColumnTypeInfo -from ....dimensions import DataCoordinate, Dimension, DimensionGraph, DimensionUniverse +from ....dimensions import DataCoordinate, Dimension, DimensionGroup, DimensionUniverse from ..._exceptions import UserExpressionError, UserExpressionSyntaxError from .categorize import ExpressionConstant, categorizeConstant, categorizeElementId from .check import CheckVisitor @@ -76,7 +76,7 @@ class ExpressionTypeError(TypeError): def make_string_expression_predicate( string: str, - dimensions: DimensionGraph, + dimensions: DimensionGroup, *, column_types: ColumnTypeInfo, bind: Mapping[str, Any] | None = None, @@ -91,7 +91,7 @@ def make_string_expression_predicate( ---------- string : `str` String to parse. - dimensions : `DimensionGraph` + dimensions : `DimensionGroup` The dimensions the query would include in the absence of this WHERE expression. column_types : `ColumnTypeInfo` diff --git a/python/lsst/daf/butler/registry/queries/expressions/categorize.py b/python/lsst/daf/butler/registry/queries/expressions/categorize.py index dfed5c3a76..817eff3094 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/categorize.py +++ b/python/lsst/daf/butler/registry/queries/expressions/categorize.py @@ -31,7 +31,7 @@ import enum from typing import cast -from ....dimensions import Dimension, DimensionElement, DimensionGraph, DimensionUniverse +from ....dimensions import Dimension, DimensionElement, DimensionGroup, DimensionUniverse class ExpressionConstant(enum.Enum): @@ -109,7 +109,7 @@ def categorizeElementId(universe: DimensionUniverse, name: str) -> tuple[Dimensi # Allow e.g. "visit.id = x" instead of just "visit = x"; this # can be clearer. return element, None - elif column in element.graph.names: + elif column in element.dimensions.names: # User said something like "patch.tract = x" or # "tract.tract = x" instead of just "tract = x" or # "tract.id = x", which is at least needlessly confusing and @@ -132,12 +132,12 @@ def categorizeElementId(universe: DimensionUniverse, name: str) -> tuple[Dimensi return dimension, None -def categorizeOrderByName(graph: DimensionGraph, name: str) -> tuple[DimensionElement, str | None]: +def categorizeOrderByName(dimensions: DimensionGroup, name: str) -> tuple[DimensionElement, str | None]: """Categorize an identifier in an ORDER BY clause. Parameters ---------- - graph : `DimensionGraph` + dimensions : `DimensionGroup` All known dimensions. name : `str` Identifier to categorize. @@ -154,8 +154,8 @@ def categorizeOrderByName(graph: DimensionGraph, name: str) -> tuple[DimensionEl Raises ------ ValueError - Raised if element name is not found in a graph, metadata name is not - recognized, or if there is more than one element has specified + Raised if element name is not found in a dimensions, metadata name is + not recognized, or if there is more than one element has specified metadata. Notes @@ -168,16 +168,20 @@ def categorizeOrderByName(graph: DimensionGraph, name: str) -> tuple[DimensionEl by dot, e.g. ``detector.full_name``. - Name can be a metadata name without element name prefix, e.g. ``day_obs``; in that case metadata (or key) is searched in all elements - present in a graph. Exception is raised if name appears in more than one - element. + present in a dimensions. Exception is raised if name appears in more than + one element. - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be used with temporal elements, if element name is not given then a temporal - element from a graph is used. + element from a dimensions is used. """ element: DimensionElement field_name: str | None = None if name in ("timespan.begin", "timespan.end"): - matches = [element for element in graph.elements if element.temporal] + matches = [ + element + for element_name in dimensions.elements + if (element := dimensions.universe[element_name]).temporal + ] if len(matches) == 1: element = matches[0] field_name = name @@ -192,14 +196,20 @@ def categorizeOrderByName(graph: DimensionGraph, name: str) -> tuple[DimensionEl elif "." not in name: # No dot, can be either a dimension name or a field name (in any of # the known elements) - if name in graph.elements.names: - element = graph.elements[name] + if name in dimensions.elements: + element = dimensions.universe[name] else: # Can be a metadata name or any of unique keys match_pairs: list[tuple[DimensionElement, bool]] = [ - (elem, False) for elem in graph.elements if name in elem.metadata.names + (element, False) + for element_name in dimensions.elements + if name in (element := dimensions.universe[element_name]).metadata.names + ] + match_pairs += [ + (dimension, True) + for dimension_name in dimensions.names + if name in (dimension := dimensions.universe.dimensions[dimension_name]).uniqueKeys.names ] - match_pairs += [(dim, True) for dim in graph if name in dim.uniqueKeys.names] if len(match_pairs) == 1: element, is_dimension_key = match_pairs[0] if is_dimension_key and name == cast(Dimension, element).primaryKey.name: @@ -218,13 +228,13 @@ def categorizeOrderByName(graph: DimensionGraph, name: str) -> tuple[DimensionEl else: # qualified name, must be a dimension element and a field elem_name, _, field_name = name.partition(".") - if elem_name not in graph.elements.names: + if elem_name not in dimensions.elements: if field_name == "begin" or field_name == "end": raise ValueError( f"Unknown dimension element {elem_name!r}; perhaps you meant 'timespan.{field_name}'?" ) raise ValueError(f"Unknown dimension element {elem_name!r}.") - element = graph.elements[elem_name] + element = dimensions.universe[elem_name] if field_name in ("timespan.begin", "timespan.end"): if not element.temporal: raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.") diff --git a/python/lsst/daf/butler/registry/queries/expressions/check.py b/python/lsst/daf/butler/registry/queries/expressions/check.py index b42a9a24b2..2f41b6253c 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/check.py +++ b/python/lsst/daf/butler/registry/queries/expressions/check.py @@ -37,15 +37,7 @@ from typing import TYPE_CHECKING, Any from ...._column_tags import DatasetColumnTag, DimensionKeyColumnTag, DimensionRecordColumnTag -from ...._named import NamedKeyDict, NamedValueSet -from ....dimensions import ( - DataCoordinate, - DataIdValue, - Dimension, - DimensionElement, - DimensionGraph, - DimensionUniverse, -) +from ....dimensions import DataCoordinate, DataIdValue, Dimension, DimensionGroup, DimensionUniverse from ..._exceptions import UserExpressionError from .categorize import ExpressionConstant, categorizeConstant, categorizeElementId from .normalForm import NormalForm, NormalFormVisitor @@ -75,14 +67,14 @@ def update(self, other: InspectionSummary) -> None: self.columns.setdefault(element, set()).update(columns) self.hasIngestDate = self.hasIngestDate or other.hasIngestDate - dimensions: NamedValueSet[Dimension] = dataclasses.field(default_factory=NamedValueSet) - """Dimensions whose primary keys or dependencies were referenced anywhere - in this branch (`NamedValueSet` [ `Dimension` ]). + dimensions: set[str] = dataclasses.field(default_factory=set) + """Names of dimensions whose primary keys or dependencies were referenced + anywhere in this branch (`set` [ `str` ]). """ - columns: NamedKeyDict[DimensionElement, set[str]] = dataclasses.field(default_factory=NamedKeyDict) - """Dimension element tables whose columns were referenced anywhere in this - branch (`NamedKeyDict` [ `DimensionElement`, `set` [ `str` ] ]). + columns: dict[str, set[str]] = dataclasses.field(default_factory=dict) + """Names of dimension element tables whose columns were referenced anywhere + in this branch (`dict` [ `str`, `set` [ `str` ] ]). """ hasIngestDate: bool = False @@ -112,9 +104,9 @@ def make_column_tag_set(self, dataset_type_name: str | None) -> set[ColumnTag]: "Expression requires an ingest date, which requires exactly one dataset type." ) result.add(DatasetColumnTag(dataset_type_name, "ingest_date")) - result.update(DimensionKeyColumnTag.generate(self.dimensions.names)) + result.update(DimensionKeyColumnTag.generate(self.dimensions)) for dimension_element, columns in self.columns.items(): - result.update(DimensionRecordColumnTag.generate(dimension_element.name, columns)) + result.update(DimensionRecordColumnTag.generate(dimension_element, columns)) return result @@ -239,13 +231,11 @@ def visitIdentifier(self, name: str, node: Node) -> TreeSummary: if column is None: assert isinstance(element, Dimension) return TreeSummary( - dimensions=NamedValueSet(element.graph.dimensions), + dimensions=set(element.minimal_group.names), dataIdKey=element, ) else: - return TreeSummary( - dimensions=NamedValueSet(element.graph.dimensions), columns=NamedKeyDict({element: {column}}) - ) + return TreeSummary(dimensions=set(element.minimal_group.names), columns={element.name: {column}}) def visitUnaryOp(self, operator: str, operand: TreeSummary, node: Node) -> TreeSummary: # Docstring inherited from TreeVisitor.visitUnaryOp @@ -334,7 +324,7 @@ class CheckVisitor(NormalFormVisitor[TreeSummary, InnerSummary, OuterSummary]): ---------- dataId : `DataCoordinate` Dimension values that are fully known in advance. - graph : `DimensionGraph` + dimensions : `DimensionGroup` The dimensions the query would include in the absence of this expression. bind : `~collections.abc.Mapping` [ `str`, `object` ] @@ -352,17 +342,22 @@ class CheckVisitor(NormalFormVisitor[TreeSummary, InnerSummary, OuterSummary]): def __init__( self, dataId: DataCoordinate, - graph: DimensionGraph, + dimensions: DimensionGroup, bind: Mapping[str, Any], defaults: DataCoordinate, allow_orphans: bool = False, ): self.dataId = dataId - self.graph = graph + self.dimensions = dimensions self.defaults = defaults self._branchVisitor = InspectionVisitor(dataId.universe, bind) self._allow_orphans = allow_orphans + @property + def universe(self) -> DimensionUniverse: + """Object that defines all dimensions.""" + return self.dimensions.universe + def visitBranch(self, node: Node) -> TreeSummary: # Docstring inherited from NormalFormVisitor. return node.visit(self._branchVisitor) @@ -406,7 +401,7 @@ def visitInner(self, branches: Sequence[TreeSummary], form: NormalForm) -> Inner # Expression says something like "instrument='HSC' AND # instrument='DECam'", or data ID has one and expression # has the other. - if branch.dataIdKey in self.dataId: + if branch.dataIdKey.name in self.dataId: raise UserExpressionError( f"Conflict between expression containing {branch.dataIdKey.name}={new_value!r} " f"and data ID with {branch.dataIdKey.name}={value!r}." @@ -437,7 +432,7 @@ def visitInner(self, branches: Sequence[TreeSummary], form: NormalForm) -> Inner # after all). governorsNeededInBranch: set[str] = set() for dimension in summary.dimensions: - governorsNeededInBranch.update(dimension.graph.governors.names) + governorsNeededInBranch.update(self.universe.dimensions[dimension].minimal_group.governors) if not governorsNeededInBranch.issubset(summary.dimension_values.keys()): missing = governorsNeededInBranch - summary.dimension_values.keys() if missing <= self.defaults.names: @@ -465,7 +460,7 @@ def visitOuter(self, branches: Sequence[InnerSummary], form: NormalForm) -> Oute # pulled from defaults in _all_ branches. This is the set we will # be able to bound overall; any dimensions not referenced by even # one branch could be unbounded. - dimensions_in_all_branches = set(self.graph.universe.dimensions.names) + dimensions_in_all_branches = set(self.universe.dimensions.names) for branch in branches: summary.update(branch) summary.defaultsNeeded.update(branch.defaultsNeeded) @@ -482,11 +477,8 @@ def visitOuter(self, branches: Sequence[InnerSummary], form: NormalForm) -> Oute # lets a user say "tract=X" on the command line (well, "skymap=Y AND # tract=X" - logic in visitInner checks for that) when running a task # like ISR that has nothing to do with skymaps. - if not summary.dimensions.issubset(self.graph.dimensions): - self.graph = DimensionGraph( - self.graph.universe, - dimensions=(summary.dimensions | self.graph.dimensions), - ) + if not summary.dimensions.issubset(self.dimensions.names): + self.dimensions = self.universe.conform(summary.dimensions | self.dimensions.names) for dimension, values in summary.dimension_constraints.items(): if dimension in summary.defaultsNeeded: # One branch contained an explicit value for this dimension @@ -503,7 +495,7 @@ def visitOuter(self, branches: Sequence[InnerSummary], form: NormalForm) -> Oute # If any default data ID values were needed, update self.dataId with # them, and then update the governor restriction with them. if summary.defaultsNeeded: - defaultsNeededGraph = DimensionGraph(self.graph.universe, names=summary.defaultsNeeded) + defaultsNeededGraph = self.universe.conform(summary.defaultsNeeded) self.dataId = self.dataId.union(self.defaults.subset(defaultsNeededGraph)) for dimension in summary.defaultsNeeded: summary.dimension_constraints[dimension] = {self.defaults[dimension]} diff --git a/python/lsst/daf/butler/registry/sql_registry.py b/python/lsst/daf/butler/registry/sql_registry.py index fa87ffd1a5..4338e0a5d1 100644 --- a/python/lsst/daf/butler/registry/sql_registry.py +++ b/python/lsst/daf/butler/registry/sql_registry.py @@ -2092,11 +2092,11 @@ def queryDatasets( # if any. dimension_names = set(parent_dataset_type.dimensions.names) if dimensions is not None: - dimension_names.update(self.dimensions.extract(dimensions).names) + dimension_names.update(self.dimensions.conform(dimensions).names) # Construct the summary structure needed to construct a # QueryBuilder. summary = queries.QuerySummary( - requested=DimensionGraph(self.dimensions, names=dimension_names), + requested=self.dimensions.conform(dimension_names), column_types=self._managers.column_types, data_id=data_id, expression=where, @@ -2237,8 +2237,7 @@ def queryDataIds( lsst.daf.butler.registry.UserExpressionError Raised when ``where`` expression is invalid. """ - dimensions = ensure_iterable(dimensions) - requestedDimensions = self.dimensions.extract(dimensions) + requested_dimensions = self.dimensions.conform(dimensions) doomed_by: list[str] = [] data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) dataset_composition, collection_wildcard = self._standardize_query_dataset_args( @@ -2247,7 +2246,7 @@ def queryDataIds( if collection_wildcard is not None and collection_wildcard.empty(): doomed_by.append("No data coordinates can be found because collection list is empty.") summary = queries.QuerySummary( - requested=requestedDimensions, + requested=requested_dimensions, column_types=self._managers.column_types, data_id=data_id, expression=where, @@ -2362,7 +2361,7 @@ def queryDimensionRecords( if collection_wildcard is not None and collection_wildcard.empty(): doomed_by.append("No dimension records can be found because collection list is empty.") summary = queries.QuerySummary( - requested=element.graph, + requested=element.minimal_group, column_types=self._managers.column_types, data_id=data_id, expression=where, @@ -2374,7 +2373,7 @@ def queryDimensionRecords( builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) for datasetType in dataset_composition: builder.joinDataset(datasetType, collection_wildcard, isResult=False) - query = builder.finish().with_record_columns(element) + query = builder.finish().with_record_columns(element.name) return queries.DatabaseDimensionRecordQueryResults(query, element) def queryDatasetAssociations( diff --git a/tests/test_expressions.py b/tests/test_expressions.py index 66175b606f..d86cb75f84 100644 --- a/tests/test_expressions.py +++ b/tests/test_expressions.py @@ -243,7 +243,7 @@ def test_simple(self): tree = parser.parse("instrument = 'LSST'") bind = {} summary = tree.visit(InspectionVisitor(universe, bind)) - self.assertEqual(summary.dimensions.names, {"instrument"}) + self.assertEqual(summary.dimensions, {"instrument"}) self.assertFalse(summary.columns) self.assertFalse(summary.hasIngestDate) self.assertEqual(summary.dataIdKey, universe["instrument"]) @@ -251,23 +251,21 @@ def test_simple(self): tree = parser.parse("instrument != 'LSST'") summary = tree.visit(InspectionVisitor(universe, bind)) - self.assertEqual(summary.dimensions.names, {"instrument"}) + self.assertEqual(summary.dimensions, {"instrument"}) self.assertFalse(summary.columns) self.assertIsNone(summary.dataIdKey) self.assertIsNone(summary.dataIdValue) tree = parser.parse("instrument = 'LSST' AND visit = 1") summary = tree.visit(InspectionVisitor(universe, bind)) - self.assertEqual(summary.dimensions.names, {"instrument", "visit", "band", "physical_filter"}) + self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter"}) self.assertFalse(summary.columns) self.assertIsNone(summary.dataIdKey) self.assertIsNone(summary.dataIdValue) tree = parser.parse("instrument = 'LSST' AND visit = 1 AND skymap = 'x'") summary = tree.visit(InspectionVisitor(universe, bind)) - self.assertEqual( - summary.dimensions.names, {"instrument", "visit", "band", "physical_filter", "skymap"} - ) + self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter", "skymap"}) self.assertFalse(summary.columns) self.assertIsNone(summary.dataIdKey) self.assertIsNone(summary.dataIdValue) @@ -280,13 +278,13 @@ def test_bind(self): tree = parser.parse("instrument = instr") bind = {"instr": "LSST"} summary = tree.visit(InspectionVisitor(universe, bind)) - self.assertEqual(summary.dimensions.names, {"instrument"}) + self.assertEqual(summary.dimensions, {"instrument"}) self.assertFalse(summary.hasIngestDate) self.assertEqual(summary.dataIdKey, universe["instrument"]) self.assertEqual(summary.dataIdValue, "LSST") tree = parser.parse("instrument != instr") - self.assertEqual(summary.dimensions.names, {"instrument"}) + self.assertEqual(summary.dimensions, {"instrument"}) summary = tree.visit(InspectionVisitor(universe, bind)) self.assertIsNone(summary.dataIdKey) self.assertIsNone(summary.dataIdValue) @@ -294,16 +292,14 @@ def test_bind(self): tree = parser.parse("instrument = instr AND visit = visit_id") bind = {"instr": "LSST", "visit_id": 1} summary = tree.visit(InspectionVisitor(universe, bind)) - self.assertEqual(summary.dimensions.names, {"instrument", "visit", "band", "physical_filter"}) + self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter"}) self.assertIsNone(summary.dataIdKey) self.assertIsNone(summary.dataIdValue) tree = parser.parse("instrument = 'LSST' AND visit = 1 AND skymap = skymap_name") bind = {"instr": "LSST", "visit_id": 1, "skymap_name": "x"} summary = tree.visit(InspectionVisitor(universe, bind)) - self.assertEqual( - summary.dimensions.names, {"instrument", "visit", "band", "physical_filter", "skymap"} - ) + self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter", "skymap"}) self.assertIsNone(summary.dataIdKey) self.assertIsNone(summary.dataIdValue) @@ -315,7 +311,7 @@ def test_in(self): tree = parser.parse("instrument IN ('LSST')") bind = {} summary = tree.visit(InspectionVisitor(universe, bind)) - self.assertEqual(summary.dimensions.names, {"instrument"}) + self.assertEqual(summary.dimensions, {"instrument"}) self.assertFalse(summary.hasIngestDate) # we do not handle IN with a single item as `=` self.assertIsNone(summary.dataIdKey) @@ -324,28 +320,28 @@ def test_in(self): tree = parser.parse("instrument IN (instr)") bind = {"instr": "LSST"} summary = tree.visit(InspectionVisitor(universe, bind)) - self.assertEqual(summary.dimensions.names, {"instrument"}) + self.assertEqual(summary.dimensions, {"instrument"}) self.assertIsNone(summary.dataIdKey) self.assertIsNone(summary.dataIdValue) tree = parser.parse("visit IN (1,2,3)") bind = {} summary = tree.visit(InspectionVisitor(universe, bind)) - self.assertEqual(summary.dimensions.names, {"instrument", "visit", "band", "physical_filter"}) + self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter"}) self.assertIsNone(summary.dataIdKey) self.assertIsNone(summary.dataIdValue) tree = parser.parse("visit IN (visit1, visit2, visit3)") bind = {"visit1": 1, "visit2": 2, "visit3": 3} summary = tree.visit(InspectionVisitor(universe, bind)) - self.assertEqual(summary.dimensions.names, {"instrument", "visit", "band", "physical_filter"}) + self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter"}) self.assertIsNone(summary.dataIdKey) self.assertIsNone(summary.dataIdValue) tree = parser.parse("visit IN (visits)") bind = {"visits": (1, 2, 3)} summary = tree.visit(InspectionVisitor(universe, bind)) - self.assertEqual(summary.dimensions.names, {"instrument", "visit", "band", "physical_filter"}) + self.assertEqual(summary.dimensions, {"instrument", "visit", "band", "physical_filter"}) self.assertIsNone(summary.dataIdKey) self.assertIsNone(summary.dataIdValue) From f4b7fa52dca5e4e224f1572901b52f58165598b8 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Thu, 2 Nov 2023 14:48:02 -0400 Subject: [PATCH 09/16] Switch to DimensionGroup in DimensionPackers. --- python/lsst/daf/butler/dimensions/_packer.py | 50 +++++++++++--------- python/lsst/daf/butler/instrument.py | 8 ++-- tests/test_dimensions.py | 5 +- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/python/lsst/daf/butler/dimensions/_packer.py b/python/lsst/daf/butler/dimensions/_packer.py index 0ec609edcd..d6522640da 100644 --- a/python/lsst/daf/butler/dimensions/_packer.py +++ b/python/lsst/daf/butler/dimensions/_packer.py @@ -38,7 +38,7 @@ from lsst.utils import doImportType from ._coordinate import DataCoordinate, DataId -from ._graph import DimensionGraph +from ._graph import DimensionGraph, DimensionGroup from .construction import DimensionConstructionBuilder, DimensionConstructionVisitor if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. @@ -58,19 +58,29 @@ class DimensionPacker(metaclass=ABCMeta): (to these values) in all calls to `pack`, and are used in the results of calls to `unpack`. Subclasses are permitted to require that ``fixed.hasRecords()`` return `True`. - dimensions : `DimensionGraph` - The dimensions of data IDs packed by this instance. + dimensions : `DimensionGroup` or `DimensionGraph` + The dimensions of data IDs packed by this instance. Only + `DimensionGroup` will be supported after v27. """ - def __init__(self, fixed: DataCoordinate, dimensions: DimensionGraph): + def __init__(self, fixed: DataCoordinate, dimensions: DimensionGroup | DimensionGraph): self.fixed = fixed - self.dimensions = dimensions + self._dimensions = self.fixed.universe.conform(dimensions) @property def universe(self) -> DimensionUniverse: """Graph containing all known dimensions (`DimensionUniverse`).""" return self.fixed.universe + @property + def dimensions(self) -> DimensionGraph: + """The dimensions of data IDs packed by this instance + (`DimensionGraph`). + + After v27 this will be a `DimensionGroup`. + """ + return self._dimensions._as_graph() + @property @abstractmethod def maxBits(self) -> int: @@ -137,7 +147,7 @@ def pack( dataId = DataCoordinate.standardize( dataId, **kwargs, universe=self.fixed.universe, defaults=self.fixed ) - if dataId.subset(self.fixed.graph) != self.fixed: + if dataId.subset(self.fixed.dimensions) != self.fixed: raise ValueError(f"Data ID packer expected a data ID consistent with {self.fixed}, got {dataId}.") packed = self._pack(dataId) if returnMaxBits: @@ -176,10 +186,6 @@ def unpack(self, packedId: int) -> DataCoordinate: dimensions held fixed. """ - dimensions: DimensionGraph - """The dimensions of data IDs packed by this instance (`DimensionGraph`). - """ - # TODO: Remove this class on DM-38687. @deprecated( @@ -200,11 +206,11 @@ class DimensionPackerFactory: Fully-qualified name of the packer class this factory constructs. fixed : `~collections.abc.Set` [ `str` ] Names of dimensions whose values must be provided to the packer when it - is constructed. This will be expanded lazily into a `DimensionGraph` + is constructed. This will be expanded lazily into a `DimensionGroup` prior to `DimensionPacker` construction. dimensions : `~collections.abc.Set` [ `str` ] Names of dimensions whose values are passed to `DimensionPacker.pack`. - This will be expanded lazily into a `DimensionGraph` prior to + This will be expanded lazily into a `DimensionGroup` prior to `DimensionPacker` construction. """ @@ -214,11 +220,11 @@ def __init__( fixed: Set[str], dimensions: Set[str], ): - # We defer turning these into DimensionGraph objects until first use + # We defer turning these into DimensionGroup objects until first use # because __init__ is called before a DimensionUniverse exists, and - # DimensionGraph instances can only be constructed afterwards. - self._fixed: Set[str] | DimensionGraph = fixed - self._dimensions: Set[str] | DimensionGraph = dimensions + # DimensionGroup instances can only be constructed afterwards. + self._fixed: Set[str] | DimensionGroup = fixed + self._dimensions: Set[str] | DimensionGroup = dimensions self._clsName = clsName self._cls: type[DimensionPacker] | None = None @@ -232,12 +238,10 @@ def __call__(self, universe: DimensionUniverse, fixed: DataCoordinate) -> Dimens packer. Must be expanded with all metadata known to the `Registry`. ``fixed.hasRecords()`` must return `True`. """ - # Construct DimensionGraph instances if necessary on first use. + # Construct DimensionGroup instances if necessary on first use. # See related comment in __init__. - if not isinstance(self._fixed, DimensionGraph): - self._fixed = universe.extract(self._fixed) - if not isinstance(self._dimensions, DimensionGraph): - self._dimensions = universe.extract(self._dimensions) + self._fixed = universe.conform(self._fixed) + self._dimensions = universe.conform(self._dimensions) assert fixed.graph.issuperset(self._fixed) if self._cls is None: packer_class = doImportType(self._clsName) @@ -270,11 +274,11 @@ class DimensionPackerConstructionVisitor(DimensionConstructionVisitor): Fully-qualified name of a `DimensionPacker` subclass. fixed : `~collections.abc.Iterable` [ `str` ] Names of dimensions whose values must be provided to the packer when it - is constructed. This will be expanded lazily into a `DimensionGraph` + is constructed. This will be expanded lazily into a `DimensionGroup` prior to `DimensionPacker` construction. dimensions : `~collections.abc.Iterable` [ `str` ] Names of dimensions whose values are passed to `DimensionPacker.pack`. - This will be expanded lazily into a `DimensionGraph` prior to + This will be expanded lazily into a `DimensionGroup` prior to `DimensionPacker` construction. """ diff --git a/python/lsst/daf/butler/instrument.py b/python/lsst/daf/butler/instrument.py index f1dcb689f2..7d9ed3a757 100644 --- a/python/lsst/daf/butler/instrument.py +++ b/python/lsst/daf/butler/instrument.py @@ -30,7 +30,7 @@ # TODO: Remove this entire module on DM-38687. from deprecated.sphinx import deprecated -from lsst.daf.butler import DataCoordinate, DimensionGraph, DimensionPacker +from lsst.daf.butler import DataCoordinate, DimensionGraph, DimensionGroup, DimensionPacker # TODO: remove on DM-38687. @@ -44,12 +44,12 @@ class ObservationDimensionPacker(DimensionPacker): instrument. """ - def __init__(self, fixed: DataCoordinate, dimensions: DimensionGraph): + def __init__(self, fixed: DataCoordinate, dimensions: DimensionGraph | DimensionGroup): super().__init__(fixed, dimensions) self._instrumentName = fixed["instrument"] record = fixed.records["instrument"] assert record is not None - if self.dimensions.required.names == {"instrument", "visit", "detector"}: + if self._dimensions.required.names == {"instrument", "visit", "detector"}: self._observationName = "visit" obsMax = record.visit_max elif dimensions.required.names == {"instrument", "exposure", "detector"}: @@ -78,5 +78,5 @@ def unpack(self, packedId: int) -> DataCoordinate: "detector": detector, self._observationName: observation, }, - graph=self.dimensions, + dimensions=self._dimensions, ) diff --git a/tests/test_dimensions.py b/tests/test_dimensions.py index 9cd1d1069c..afb7296345 100644 --- a/tests/test_dimensions.py +++ b/tests/test_dimensions.py @@ -44,6 +44,7 @@ Dimension, DimensionConfig, DimensionGraph, + DimensionGroup, DimensionPacker, DimensionUniverse, NamedKeyDict, @@ -85,7 +86,7 @@ class ConcreteTestDimensionPacker(DimensionPacker): This class just returns the detector ID as-is. """ - def __init__(self, fixed: DataCoordinate, dimensions: DimensionGraph): + def __init__(self, fixed: DataCoordinate, dimensions: DimensionGroup): super().__init__(fixed, dimensions) self._n_detectors = fixed.records["instrument"].detector_max self._max_bits = (self._n_detectors - 1).bit_length() @@ -106,7 +107,7 @@ def unpack(self, packedId: int) -> DataCoordinate: "instrument": self.fixed["instrument"], "detector": packedId, }, - graph=self.dimensions, + dimensions=self._dimensions, ) From 2d7feb00caf537013593def26cb03911ee7b2231 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Thu, 2 Nov 2023 14:48:59 -0400 Subject: [PATCH 10/16] Switch to DimensionGroup in DatasetType and LookupKey. DatasetType.dimensions needs to continue to return DimensionGraph during the deprecation period, since that's the name we want to use long-term (unlike e.g. DataCoordinate.graph). That means we rely on the fact that we've also deprecated all of the things DimensionGraph can do that DimensionGroup can't do (like iteration), and we rely on those warnings instead of making DatasetType.dimensions itself warn. That should let a lot of usage just blindly pass DatasetType.dimensions to something else and not care about what type it is throughout the deprecation period. --- python/lsst/daf/butler/_config_support.py | 32 ++++----- python/lsst/daf/butler/_dataset_ref.py | 4 +- python/lsst/daf/butler/_dataset_type.py | 86 +++++++++++++---------- 3 files changed, 62 insertions(+), 60 deletions(-) diff --git a/python/lsst/daf/butler/_config_support.py b/python/lsst/daf/butler/_config_support.py index fe01404a1f..c96253136b 100644 --- a/python/lsst/daf/butler/_config_support.py +++ b/python/lsst/daf/butler/_config_support.py @@ -36,11 +36,11 @@ from collections.abc import Iterable, Mapping from typing import TYPE_CHECKING, Any -from .dimensions import DimensionGraph +from .dimensions import DimensionGroup if TYPE_CHECKING: from ._config import Config - from .dimensions import Dimension, DimensionUniverse + from .dimensions import DimensionUniverse log = logging.getLogger(__name__) @@ -58,9 +58,9 @@ class LookupKey: name : `str`, optional Primary index string for lookup. If this string looks like it represents dimensions (via ``dim1+dim2+dim3`` syntax) the name - is converted to a `DimensionGraph` and stored in ``dimensions`` + is converted to a `DimensionGroup` and stored in ``dimensions`` property. - dimensions : `DimensionGraph`, optional + dimensions : `DimensionGroup`, optional Dimensions that are relevant for lookup. Should not be specified if ``name`` is also specified. dataId : `dict`, optional @@ -68,13 +68,13 @@ class LookupKey: universe : `DimensionUniverse`, optional Set of all known dimensions, used to expand and validate ``name`` or ``dimensions``. Required if the key represents dimensions and a - full `DimensionGraph` is not provided. + full `DimensionGroup` is not provided. """ def __init__( self, name: str | None = None, - dimensions: Iterable[str | Dimension] | None = None, + dimensions: DimensionGroup | None = None, dataId: dict[str, Any] | None = None, *, universe: DimensionUniverse | None = None, @@ -100,7 +100,7 @@ def __init__( # indicate this but have to filter out the empty value dimension_names = [n for n in name.split("+") if n] try: - self._dimensions = universe.extract(dimension_names) + self._dimensions = universe.conform(dimension_names) except KeyError: # One or more of the dimensions is not known to the # universe. This could be a typo or it could be that @@ -122,15 +122,7 @@ def __init__( self._name = name elif dimensions is not None: - if not isinstance(dimensions, DimensionGraph): - if universe is None: - raise ValueError( - f"Cannot construct LookupKey for dimensions={dimensions} without universe." - ) - else: - self._dimensions = universe.extract(dimensions) - else: - self._dimensions = dimensions + self._dimensions = dimensions else: # mypy cannot work this out on its own raise ValueError("Name was None but dimensions is also None") @@ -181,8 +173,8 @@ def name(self) -> str | None: return self._name @property - def dimensions(self) -> DimensionGraph | None: - """Dimensions associated with lookup (`DimensionGraph`).""" + def dimensions(self) -> DimensionGroup | None: + """Dimensions associated with lookup (`DimensionGroup`).""" return self._dimensions @property @@ -203,7 +195,7 @@ def __hash__(self) -> int: def clone( self, name: str | None = None, - dimensions: DimensionGraph | None = None, + dimensions: DimensionGroup | None = None, dataId: dict[str, Any] | None = None, ) -> LookupKey: """Clone the object, overriding some options. @@ -216,7 +208,7 @@ def clone( name : `str`, optional Primary index string for lookup. Will override ``dimensions`` if ``dimensions`` are set. - dimensions : `DimensionGraph`, optional + dimensions : `DimensionGroup`, optional Dimensions that are relevant for lookup. Will override ``name`` if ``name`` is already set. dataId : `dict`, optional diff --git a/python/lsst/daf/butler/_dataset_ref.py b/python/lsst/daf/butler/_dataset_ref.py index 428c91e035..ee02c965dd 100644 --- a/python/lsst/daf/butler/_dataset_ref.py +++ b/python/lsst/daf/butler/_dataset_ref.py @@ -173,7 +173,7 @@ def makeDatasetId( else: raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}") - for name, value in sorted(dataId.byName().items()): + for name, value in sorted(dataId.required.items()): items.append((name, str(value))) data = ",".join(f"{key}={value}" for key, value in items) return uuid.uuid5(self.NS_UUID, data) @@ -328,7 +328,7 @@ def __init__( ): self.datasetType = datasetType if conform: - self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions) + self.dataId = DataCoordinate.standardize(dataId, dimensions=datasetType.dimensions) else: self.dataId = dataId self.run = run diff --git a/python/lsst/daf/butler/_dataset_type.py b/python/lsst/daf/butler/_dataset_type.py index 1b38821bc0..e17d412a11 100644 --- a/python/lsst/daf/butler/_dataset_type.py +++ b/python/lsst/daf/butler/_dataset_type.py @@ -40,7 +40,7 @@ from ._config_support import LookupKey from ._storage_class import StorageClass, StorageClassFactory -from .dimensions import DimensionGraph, SerializedDimensionGraph +from .dimensions import DimensionGraph, DimensionGroup, SerializedDimensionGraph from .json import from_json_pydantic, to_json_pydantic from .persistence_context import PersistenceContextVars @@ -60,7 +60,7 @@ class SerializedDatasetType(_BaseModelCompat): name: StrictStr storageClass: StrictStr | None = None - dimensions: SerializedDimensionGraph | None = None + dimensions: SerializedDimensionGraph | list[StrictStr] | None = None parentStorageClass: StrictStr | None = None isCalibration: StrictBool = False @@ -70,15 +70,16 @@ def direct( *, name: str, storageClass: str | None = None, - dimensions: dict | None = None, + dimensions: list | dict | None = None, parentStorageClass: str | None = None, isCalibration: bool = False, ) -> SerializedDatasetType: """Construct a `SerializedDatasetType` directly without validators. - This differs from PyDantics construct method in that the arguments are - explicitly what the model requires, and it will recurse through - members, constructing them from their corresponding `direct` methods. + This differs from Pydantic's model_construct method in that the + arguments are explicitly what the model requires, and it will recurse + through members, constructing them from their corresponding `direct` + methods. This method should only be called when the inputs are trusted. """ @@ -87,9 +88,14 @@ def direct( if cache is not None and (type_ := cache.get(key, None)) is not None: return type_ - serialized_dimensions = ( - SerializedDimensionGraph.direct(**dimensions) if dimensions is not None else None - ) + serialized_dimensions: list[str] | None + match dimensions: + case list(): + serialized_dimensions = dimensions + case dict(): + serialized_dimensions = SerializedDimensionGraph.direct(**dimensions).names + case None: + serialized_dimensions = None node = cls.model_construct( name=name, @@ -125,9 +131,11 @@ class DatasetType: and underscores. Component dataset types should contain a single period separating the base dataset type name from the component name (and may be recursive). - dimensions : `DimensionGraph` or iterable of `Dimension` or `str` + dimensions : `DimensionGroup`, `DimensionGraph`, or \ + `~collections.abc.Iterable` [ `Dimension` or `str` ] Dimensions used to label and relate instances of this `DatasetType`. - If not a `DimensionGraph`, ``universe`` must be provided as well. + If not a `DimensionGraph` or `DimensionGroup`, ``universe`` must be + provided as well. storageClass : `StorageClass` or `str` Instance of a `StorageClass` or name of `StorageClass` that defines how this `DatasetType` is persisted. @@ -184,7 +192,7 @@ def nameWithComponent(datasetTypeName: str, componentName: str) -> str: def __init__( self, name: str, - dimensions: DimensionGraph | Iterable[Dimension | str], + dimensions: DimensionGroup | DimensionGraph | Iterable[Dimension | str], storageClass: StorageClass | str, parentStorageClass: StorageClass | str | None = None, *, @@ -194,14 +202,13 @@ def __init__( if self.VALID_NAME_REGEX.match(name) is None: raise ValueError(f"DatasetType name '{name}' is invalid.") self._name = name - if not isinstance(dimensions, DimensionGraph): - if universe is None: - raise ValueError( - "If dimensions is not a normalized DimensionGraph, a universe must be provided." - ) - dimensions = universe.extract(dimensions) - self._dimensions = dimensions - if name in self._dimensions.universe.governor_dimensions.names: + universe = universe or getattr(dimensions, "universe", None) + if universe is None: + raise ValueError( + "If dimensions is not a DimensionGroup or DimensionGraph, a universe must be provided." + ) + self._dimensions = universe.conform(dimensions) + if name in self._dimensions.universe.governor_dimensions: raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.") if not isinstance(storageClass, StorageClass | str): raise ValueError(f"StorageClass argument must be StorageClass or str. Got {storageClass}") @@ -250,7 +257,7 @@ def __repr__(self) -> str: extra = f", parentStorageClass={self._parentStorageClassName}" if self._isCalibration: extra += ", isCalibration=True" - return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})" + return f"DatasetType({self.name!r}, {self._dimensions}, {self._storageClassName}{extra})" def _equal_ignoring_storage_class(self, other: Any) -> bool: """Check everything is equal except the storage class. @@ -350,12 +357,11 @@ def name(self) -> str: @property def dimensions(self) -> DimensionGraph: - r"""Return the `Dimension`\ s fir this dataset type. + """Return the dimensions of this dataset type (`DimensionGraph`). - The dimensions label and relate instances of this - `DatasetType` (`DimensionGraph`). + The dimensions of a define the keys of its datasets' data IDs.. """ - return self._dimensions + return self._dimensions._as_graph() @property def storageClass(self) -> StorageClass: @@ -501,7 +507,7 @@ def makeCompositeDatasetType(self) -> DatasetType: ) return DatasetType( composite_name, - dimensions=self.dimensions, + dimensions=self._dimensions, storageClass=self.parentStorageClass, isCalibration=self.isCalibration(), ) @@ -524,7 +530,7 @@ def makeComponentDatasetType(self, component: str) -> DatasetType: # The component could be a read/write or read component return DatasetType( self.componentTypeName(component), - dimensions=self.dimensions, + dimensions=self._dimensions, storageClass=self.storageClass.allComponents()[component], parentStorageClass=self.storageClass, isCalibration=self.isCalibration(), @@ -570,7 +576,7 @@ class is the current one. parent = self._parentStorageClass if self._parentStorageClass else self._parentStorageClassName new = DatasetType( self.name, - dimensions=self.dimensions, + dimensions=self._dimensions, storageClass=storageClass, parentStorageClass=parent, isCalibration=self.isCalibration(), @@ -625,9 +631,9 @@ def _lookupNames(self) -> tuple[LookupKey, ...]: if componentName is not None: lookups = lookups + (LookupKey(name=rootName),) - if self.dimensions: + if self._dimensions: # Dimensions are a lower priority than dataset type name - lookups = lookups + (LookupKey(dimensions=self.dimensions),) + lookups = lookups + (LookupKey(dimensions=self._dimensions),) storageClasses = self.storageClass._lookupNames() if componentName is not None and self.parentStorageClass is not None: @@ -661,7 +667,7 @@ def to_simple(self, minimal: bool = False) -> SerializedDatasetType: "name": self.name, "storageClass": self._storageClassName, "isCalibration": self._isCalibration, - "dimensions": self.dimensions.to_simple(), + "dimensions": list(self._dimensions.names), } if self._parentStorageClassName is not None: @@ -722,13 +728,17 @@ def from_simple( # this is for mypy raise ValueError("Unable to determine a usable universe") - if simple.dimensions is None: - # mypy hint - raise ValueError(f"Dimensions must be specified in {simple}") + match simple.dimensions: + case list(): + dimensions = universe.conform(simple.dimensions) + case SerializedDimensionGraph(): + dimensions = universe.conform(simple.dimensions.names) + case None: + raise ValueError(f"Dimensions must be specified in {simple}") newType = cls( name=simple.name, - dimensions=DimensionGraph.from_simple(simple.dimensions, universe=universe), + dimensions=dimensions, storageClass=simple.storageClass, isCalibration=simple.isCalibration, parentStorageClass=simple.parentStorageClass, @@ -744,7 +754,7 @@ def from_simple( def __reduce__( self, ) -> tuple[ - Callable, tuple[type[DatasetType], tuple[str, DimensionGraph, str, str | None], dict[str, bool]] + Callable, tuple[type[DatasetType], tuple[str, DimensionGroup, str, str | None], dict[str, bool]] ]: """Support pickling. @@ -753,7 +763,7 @@ def __reduce__( """ return _unpickle_via_factory, ( self.__class__, - (self.name, self.dimensions, self._storageClassName, self._parentStorageClassName), + (self.name, self._dimensions, self._storageClassName, self._parentStorageClassName), {"isCalibration": self._isCalibration}, ) @@ -768,7 +778,7 @@ def __deepcopy__(self, memo: Any) -> DatasetType: """ return DatasetType( name=deepcopy(self.name, memo), - dimensions=deepcopy(self.dimensions, memo), + dimensions=deepcopy(self._dimensions, memo), storageClass=deepcopy(self._storageClass or self._storageClassName, memo), parentStorageClass=deepcopy(self._parentStorageClass or self._parentStorageClassName, memo), isCalibration=deepcopy(self._isCalibration, memo), From 371110a487a028cc842fc7cb80d257cbe9c7c085 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Fri, 3 Nov 2023 16:07:22 -0400 Subject: [PATCH 11/16] Fix usage of now-deprecated interfaces. --- python/lsst/daf/butler/_quantum.py | 9 +- python/lsst/daf/butler/_registry_shim.py | 4 +- .../daf/butler/datastore/file_templates.py | 2 +- .../butler/datastores/inMemoryDatastore.py | 2 +- python/lsst/daf/butler/dimensions/_records.py | 8 +- python/lsst/daf/butler/direct_butler.py | 24 +- .../butler/registry/_collection_summary.py | 4 +- python/lsst/daf/butler/registry/_registry.py | 16 +- .../datasets/byDimensions/_manager.py | 4 +- .../datasets/byDimensions/_storage.py | 23 +- .../registry/datasets/byDimensions/tables.py | 6 +- .../butler/registry/dimensions/overlaps.py | 2 +- .../daf/butler/registry/dimensions/query.py | 4 +- .../daf/butler/registry/dimensions/static.py | 64 +-- .../daf/butler/registry/dimensions/table.py | 4 +- .../butler/registry/interfaces/_dimensions.py | 20 +- .../daf/butler/registry/obscore/_manager.py | 6 +- .../daf/butler/registry/obscore/_records.py | 14 +- .../queries/expressions/_predicate.py | 4 +- .../registry/queries/expressions/check.py | 7 +- .../lsst/daf/butler/registry/sql_registry.py | 67 +-- .../daf/butler/registry/tests/_registry.py | 99 +++-- python/lsst/daf/butler/script/queryDataIds.py | 28 +- .../lsst/daf/butler/script/queryDatasets.py | 9 +- .../butler/script/register_dataset_type.py | 8 +- .../lsst/daf/butler/tests/_datasetsHelper.py | 8 +- python/lsst/daf/butler/tests/_testRepo.py | 35 +- python/lsst/daf/butler/transfers/_context.py | 3 +- python/lsst/daf/butler/transfers/_yaml.py | 4 +- tests/test_butler.py | 30 +- tests/test_cliCmdQueryDatasets.py | 122 +++--- tests/test_constraints.py | 4 +- tests/test_datasets.py | 54 +-- tests/test_datastore.py | 44 +- tests/test_dimensions.py | 409 +++++++++--------- tests/test_expressions.py | 8 +- tests/test_formatter.py | 5 +- tests/test_obscore.py | 8 +- tests/test_quantum.py | 6 +- tests/test_quantumBackedButler.py | 2 +- tests/test_query_relations.py | 36 +- tests/test_simpleButler.py | 2 +- tests/test_templates.py | 5 +- tests/test_testRepo.py | 10 +- 44 files changed, 633 insertions(+), 600 deletions(-) diff --git a/python/lsst/daf/butler/_quantum.py b/python/lsst/daf/butler/_quantum.py index 3f410c8fd3..45db47ebb1 100644 --- a/python/lsst/daf/butler/_quantum.py +++ b/python/lsst/daf/butler/_quantum.py @@ -270,7 +270,8 @@ def to_simple(self, accumulator: DimensionRecordsAccumulator | None = None) -> S if simple.dataId is not None and simple.dataId.records is not None: # for each dimension record get a id by adding it to the # record accumulator. - for rec in value.dataId.records.values(): + for element_name in value.dataId.dimensions.elements: + rec = value.dataId.records[element_name] if rec is not None: recordId = accumulator.addRecord(rec) recIds.append(recordId) @@ -299,7 +300,8 @@ def to_simple(self, accumulator: DimensionRecordsAccumulator | None = None) -> S # store them over and over again. recIds = [] if simp.dataId is not None and simp.dataId.records is not None: - for rec in e.dataId.records.values(): + for element_name in e.dataId.dimensions.elements: + rec = e.dataId.records[element_name] # for each dimension record get a id by adding it to # the record accumulator. if rec is not None: @@ -334,7 +336,8 @@ def to_simple(self, accumulator: DimensionRecordsAccumulator | None = None) -> S # store them over and over again. recIds = [] if simp.dataId is not None and simp.dataId.records is not None: - for rec in e.dataId.records.values(): + for element_name in e.dataId.dimensions.elements: + rec = e.dataId.records[element_name] # for each dimension record get a id by adding it to # the record accumulator. if rec is not None: diff --git a/python/lsst/daf/butler/_registry_shim.py b/python/lsst/daf/butler/_registry_shim.py index 4d2653abe0..17f7e93bcb 100644 --- a/python/lsst/daf/butler/_registry_shim.py +++ b/python/lsst/daf/butler/_registry_shim.py @@ -44,6 +44,7 @@ Dimension, DimensionElement, DimensionGraph, + DimensionGroup, DimensionRecord, DimensionUniverse, ) @@ -242,6 +243,7 @@ def expandDataId( self, dataId: DataId | None = None, *, + dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, graph: DimensionGraph | None = None, records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, withDefaults: bool = True, @@ -249,7 +251,7 @@ def expandDataId( ) -> DataCoordinate: # Docstring inherited from a base class. return self._registry.expandDataId( - dataId, graph=graph, records=records, withDefaults=withDefaults, **kwargs + dataId, dimensions=dimensions, graph=graph, records=records, withDefaults=withDefaults, **kwargs ) def insertDimensionData( diff --git a/python/lsst/daf/butler/datastore/file_templates.py b/python/lsst/daf/butler/datastore/file_templates.py index c33d67d2a5..7e6e09d650 100644 --- a/python/lsst/daf/butler/datastore/file_templates.py +++ b/python/lsst/daf/butler/datastore/file_templates.py @@ -455,7 +455,7 @@ def format(self, ref: DatasetRef) -> str: extras = {} if isinstance(ref.dataId, DataCoordinate): if ref.dataId.hasRecords(): - extras = ref.dataId.records.byName() + extras = {k: ref.dataId.records[k] for k in ref.dataId.dimensions.elements} skypix_alias = self._determine_skypix_alias(ref) if skypix_alias is not None: fields["skypix"] = fields[skypix_alias] diff --git a/python/lsst/daf/butler/datastores/inMemoryDatastore.py b/python/lsst/daf/butler/datastores/inMemoryDatastore.py index 6aa9474e0d..33b65d71a0 100644 --- a/python/lsst/daf/butler/datastores/inMemoryDatastore.py +++ b/python/lsst/daf/butler/datastores/inMemoryDatastore.py @@ -435,7 +435,7 @@ def getURIs(self, ref: DatasetRef, predict: bool = False) -> DatasetRefURIs: provide an indication of the associated dataset. """ # Include the dataID as a URI query - query = urlencode(ref.dataId) + query = urlencode(ref.dataId.required) # if this has never been written then we have to guess if not self.exists(ref): diff --git a/python/lsst/daf/butler/dimensions/_records.py b/python/lsst/daf/butler/dimensions/_records.py index fd4614c0f1..77811ffcad 100644 --- a/python/lsst/daf/butler/dimensions/_records.py +++ b/python/lsst/daf/butler/dimensions/_records.py @@ -45,8 +45,8 @@ if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. from ..registry import Registry from ._coordinate import DataCoordinate - from ._graph import DimensionUniverse from ._schema import DimensionElementFields + from ._universe import DimensionUniverse def _reconstructDimensionRecord(definition: DimensionElement, mapping: dict[str, Any]) -> DimensionRecord: @@ -283,8 +283,8 @@ def __init__(self, **kwargs: Any): object.__setattr__( self, "dataId", - DataCoordinate.fromRequiredValues( - self.definition.graph, + DataCoordinate.from_required_values( + self.definition.minimal_group, tuple(kwargs[dimension] for dimension in self.definition.required.names), ), ) @@ -317,7 +317,7 @@ def __eq__(self, other: Any) -> bool: return self.dataId == other.dataId def __hash__(self) -> int: - return hash(self.dataId) + return hash(self.dataId.required_values) def __str__(self) -> str: lines = [f"{self.definition.name}:"] diff --git a/python/lsst/daf/butler/direct_butler.py b/python/lsst/daf/butler/direct_butler.py index 2c7be95ce1..4839feec7c 100644 --- a/python/lsst/daf/butler/direct_butler.py +++ b/python/lsst/daf/butler/direct_butler.py @@ -285,7 +285,7 @@ def __reduce__(self) -> tuple: self._config, self.collections, self.run, - self._registry.defaults.dataId.byName(), + dict(self._registry.defaults.dataId.required), self._registry.isWriteable(), ), ) @@ -838,7 +838,7 @@ def _findDatasetRef( dataId = DataCoordinate.standardize( dataId, universe=self.dimensions, defaults=self._registry.defaults.dataId, **kwargs ) - if dataId.graph.temporal: + if dataId.dimensions.temporal: dataId = self._registry.expandDataId(dataId) timespan = dataId.timespan else: @@ -846,7 +846,10 @@ def _findDatasetRef( # type instead of letting registry.findDataset do it, so we get the # result even if no dataset is found. dataId = DataCoordinate.standardize( - dataId, graph=datasetType.dimensions, defaults=self._registry.defaults.dataId, **kwargs + dataId, + dimensions=datasetType.dimensions, + defaults=self._registry.defaults.dataId, + **kwargs, ) # Always lookup the DatasetRef, even if one is given, to ensure it is # present in the current collection. @@ -973,7 +976,7 @@ def put( dataId, kwargs = self._rewrite_data_id(dataId, datasetType, **kwargs) # Add Registry Dataset entry. - dataId = self._registry.expandDataId(dataId, graph=datasetType.dimensions, **kwargs) + dataId = self._registry.expandDataId(dataId, dimensions=datasetType.dimensions, **kwargs) (ref,) = self._registry.insertDatasets(datasetType, run=run, dataIds=[dataId]) self._datastore.put(obj, ref) @@ -1335,7 +1338,9 @@ def get_dataset( ref = self._registry.getDataset(id) if ref is not None: if dimension_records: - ref = ref.expanded(self._registry.expandDataId(ref.dataId, graph=ref.datasetType.dimensions)) + ref = ref.expanded( + self._registry.expandDataId(ref.dataId, dimensions=ref.datasetType.dimensions) + ) if storage_class: ref = ref.overrideStorageClass(storage_class) if datastore_records: @@ -1371,7 +1376,7 @@ def find_dataset( **kwargs, ) if ref is not None and dimension_records: - ref = ref.expanded(self._registry.expandDataId(ref.dataId, graph=ref.datasetType.dimensions)) + ref = ref.expanded(self._registry.expandDataId(ref.dataId, dimensions=ref.datasetType.dimensions)) if ref is not None and storage_class is not None: ref = ref.overrideStorageClass(storage_class) return ref @@ -1974,7 +1979,8 @@ def transfer_from( raise TypeError("Input butler needs to be a full butler to expand DataId.") # If this butler doesn't know about a dimension in the source # butler things will break later. - for record in dataId.records.values(): + for element_name in dataId.dimensions.elements: + record = dataId.records[element_name] if record is not None and record.definition in elements: dimension_records[record.definition].setdefault(record.dataId, record) @@ -2079,13 +2085,13 @@ def validateConfiguration( # In order to create a conforming dataset ref, create # fake DataCoordinate values for the non-instrument # dimensions. The type of the value does not matter here. - dataId = {dim.name: 1 for dim in datasetType.dimensions if dim.name != "instrument"} + dataId = {dim: 1 for dim in datasetType.dimensions.names if dim != "instrument"} for instrument in instruments: datasetRef = DatasetRef( datasetType, DataCoordinate.standardize( - dataId, instrument=instrument, graph=datasetType.dimensions + dataId, instrument=instrument, dimensions=datasetType.dimensions ), run="validate", ) diff --git a/python/lsst/daf/butler/registry/_collection_summary.py b/python/lsst/daf/butler/registry/_collection_summary.py index ec25789548..84b36ad1c2 100644 --- a/python/lsst/daf/butler/registry/_collection_summary.py +++ b/python/lsst/daf/butler/registry/_collection_summary.py @@ -80,7 +80,7 @@ def add_datasets_generator(self, refs: Iterable[DatasetRef]) -> Generator[Datase """ for ref in refs: self.dataset_types.add(ref.datasetType) - for gov in ref.dataId.graph.governors.names: + for gov in ref.dataId.dimensions.governors: self.governors.setdefault(gov, set()).add(cast(str, ref.dataId[gov])) yield ref @@ -123,7 +123,7 @@ def add_data_ids_generator( """ self.dataset_types.add(dataset_type) for data_id in data_ids: - for gov in data_id.graph.governors.names: + for gov in data_id.dimensions.governors: self.governors.setdefault(gov, set()).add(cast(str, data_id[gov])) yield data_id diff --git a/python/lsst/daf/butler/registry/_registry.py b/python/lsst/daf/butler/registry/_registry.py index 21e651314c..f275cedaab 100644 --- a/python/lsst/daf/butler/registry/_registry.py +++ b/python/lsst/daf/butler/registry/_registry.py @@ -49,6 +49,7 @@ Dimension, DimensionElement, DimensionGraph, + DimensionGroup, DimensionRecord, DimensionUniverse, ) @@ -859,6 +860,7 @@ def expandDataId( self, dataId: DataId | None = None, *, + dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, graph: DimensionGraph | None = None, records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, withDefaults: bool = True, @@ -870,12 +872,16 @@ def expandDataId( ---------- dataId : `DataCoordinate` or `dict`, optional Data ID to be expanded; augmented and overridden by ``kwargs``. + dimensions : `~collections.abc.Iterable` [ `str` ], \ + `DimensionGroup`, or `DimensionGraph`, optional + The dimensions to be identified by the new `DataCoordinate`. + If not provided, will be inferred from the keys of ``mapping`` and + ``**kwargs``, and ``universe`` must be provided unless ``mapping`` + is already a `DataCoordinate`. graph : `DimensionGraph`, optional - Set of dimensions for the expanded ID. If `None`, the dimensions - will be inferred from the keys of ``dataId`` and ``kwargs``. - Dimensions that are in ``dataId`` or ``kwargs`` but not in - ``graph`` are silently ignored, providing a way to extract and - ``graph`` expand a subset of a data ID. + Like ``dimensions``, but as a ``DimensionGraph`` instance. Ignored + if ``dimensions`` is provided. Deprecated and will be removed + after v27. records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \ optional Dimension record data to use before querying the database for that diff --git a/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py b/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py index 692d8585a3..72214a8588 100644 --- a/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py +++ b/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py @@ -323,7 +323,7 @@ def register(self, datasetType: DatasetType) -> bool: ) record = self._fetch_dataset_type_record(datasetType.name) if record is None: - dimensionsKey = self._dimensions.saveDimensionGraph(datasetType.dimensions) + dimensionsKey = self._dimensions.save_dimension_group(datasetType.dimensions.as_group()) tagTableName = makeTagTableName(datasetType, dimensionsKey) self._db.ensureTableExists( tagTableName, @@ -541,7 +541,7 @@ def _fetch_dataset_type_record(self, name: str) -> _DatasetTypeRecord | None: def _record_from_row(self, row: Mapping) -> _DatasetTypeRecord: name = row["name"] - dimensions = self._dimensions.loadDimensionGraph(row["dimensions_key"]) + dimensions = self._dimensions.load_dimension_group(row["dimensions_key"]) calibTableName = row["calibration_association_table"] datasetType = DatasetType( name, dimensions, row["storage_class"], isCalibration=(calibTableName is not None) diff --git a/python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py b/python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py index b88b80a3c5..2a015b502c 100644 --- a/python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py +++ b/python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py @@ -132,10 +132,7 @@ def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef] rows = [] summary = CollectionSummary() for dataset in summary.add_datasets_generator(datasets): - row = dict(protoRow, dataset_id=dataset.id) - for dimension, value in dataset.dataId.items(): - row[dimension.name] = value - rows.append(row) + rows.append(dict(protoRow, dataset_id=dataset.id, **dataset.dataId.required)) # Update the summary tables for this collection in case this is the # first time this dataset type or these governor values will be # inserted there. @@ -210,9 +207,7 @@ def certify( ) summary = CollectionSummary() for dataset in summary.add_datasets_generator(datasets): - row = dict(protoRow, dataset_id=dataset.id) - for dimension, value in dataset.dataId.items(): - row[dimension.name] = value + row = dict(protoRow, dataset_id=dataset.id, **dataset.dataId.required) TimespanReprClass.update(timespan, result=row) rows.append(row) if dataIds is not None: @@ -565,9 +560,9 @@ def getDataId(self, id: DatasetId) -> DataCoordinate: with self._db.query(sql) as sql_result: row = sql_result.mappings().fetchone() assert row is not None, "Should be guaranteed by caller and foreign key constraints." - return DataCoordinate.standardize( - {dimension.name: row[dimension.name] for dimension in self.datasetType.dimensions.required}, - graph=self.datasetType.dimensions, + return DataCoordinate.from_required_values( + self.datasetType.dimensions.as_group(), + tuple(row[dimension] for dimension in self.datasetType.dimensions.required.names), ) @@ -601,7 +596,7 @@ def insert( # Iterate over data IDs, transforming a possibly-single-pass iterable # into a list. - dataIdList = [] + dataIdList: list[DataCoordinate] = [] rows = [] summary = CollectionSummary() for dataId in summary.add_data_ids_generator(self.datasetType, dataIds): @@ -629,7 +624,7 @@ def insert( self._collections.getCollectionForeignKeyName(): run.key, } tagsRows = [ - dict(protoTagsRow, dataset_id=row["id"], **dataId.byName()) + dict(protoTagsRow, dataset_id=row["id"], **dataId.required) for dataId, row in zip(dataIdList, rows, strict=True) ] # Insert those rows into the tags table. @@ -660,7 +655,7 @@ def import_( # Iterate over data IDs, transforming a possibly-single-pass iterable # into a list. - dataIds = {} + dataIds: dict[DatasetId, DataCoordinate] = {} summary = CollectionSummary() for dataset in summary.add_datasets_generator(datasets): dataIds[dataset.id] = dataset.dataId @@ -673,7 +668,7 @@ def import_( collFkName: run.key, } tmpRows = [ - dict(protoTagsRow, dataset_id=dataset_id, **dataId.byName()) + dict(protoTagsRow, dataset_id=dataset_id, **dataId.required) for dataset_id, dataId in dataIds.items() ] with self._db.transaction(for_temp_tables=True), self._db.temporary_table(tableSpec) as tmp_tags: diff --git a/python/lsst/daf/butler/registry/datasets/byDimensions/tables.py b/python/lsst/daf/butler/registry/datasets/byDimensions/tables.py index c5a18ad835..3fdacea11e 100644 --- a/python/lsst/daf/butler/registry/datasets/byDimensions/tables.py +++ b/python/lsst/daf/butler/registry/datasets/byDimensions/tables.py @@ -351,7 +351,8 @@ def makeTagTableSpec( target=(collectionFieldSpec.name, "dataset_type_id"), ) ) - for dimension in datasetType.dimensions.required: + for dimension_name in datasetType.dimensions.required.names: + dimension = datasetType.dimensions.universe.dimensions[dimension_name] fieldSpec = addDimensionForeignKey( tableSpec, dimension=dimension, nullable=False, primaryKey=False, constraint=constraints ) @@ -434,7 +435,8 @@ def makeCalibTableSpec( ) ) # Add dimension fields (part of the temporal lookup index.constraint). - for dimension in datasetType.dimensions.required: + for dimension_name in datasetType.dimensions.required.names: + dimension = datasetType.dimensions.universe.dimensions[dimension_name] fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) index.append(fieldSpec.name) # If this is a governor dimension, add a foreign key constraint to the diff --git a/python/lsst/daf/butler/registry/dimensions/overlaps.py b/python/lsst/daf/butler/registry/dimensions/overlaps.py index 0dac7ab55e..ef733bcd23 100644 --- a/python/lsst/daf/butler/registry/dimensions/overlaps.py +++ b/python/lsst/daf/butler/registry/dimensions/overlaps.py @@ -180,7 +180,7 @@ def _makeOverlapTableSpec( tableSpec : `ddl.TableSpec` Table specification. """ - assert elements[0].graph.required.isdisjoint(elements[1].graph.required) + assert elements[0].minimal_group.required.isdisjoint(elements[1].minimal_group.required) tableSpec = ddl.TableSpec(fields=[]) # Add governor dimensions first, so they appear first in the primary # key; we may often (in the future, perhaps always) know these at diff --git a/python/lsst/daf/butler/registry/dimensions/query.py b/python/lsst/daf/butler/registry/dimensions/query.py index 835a93b70a..ef39bff6ea 100644 --- a/python/lsst/daf/butler/registry/dimensions/query.py +++ b/python/lsst/daf/butler/registry/dimensions/query.py @@ -82,7 +82,7 @@ def __init__( self._db = db self._element = element self._target = view_target - if element not in self._target.element.graph.dimensions: + if element.name not in self._target.element.minimal_group: raise NotImplementedError("Query-backed dimension must be a dependency of its target.") if element.metadata: raise NotImplementedError("Cannot use query to back dimension with metadata.") @@ -148,7 +148,7 @@ def fetch_one(self, data_id: DataCoordinate, context: queries.SqlQueryContext) - # Docstring inherited from DimensionRecordStorage. # Given the restrictions imposed at construction, we know there's # nothing to actually fetch: everything we need is in the data ID. - return self.element.RecordClass(**data_id.byName()) + return self.element.RecordClass(**data_id.required) def digestTables(self) -> list[sqlalchemy.schema.Table]: # Docstring inherited from DimensionRecordStorage.digestTables. diff --git a/python/lsst/daf/butler/registry/dimensions/static.py b/python/lsst/daf/butler/registry/dimensions/static.py index 777af98631..6046527fea 100644 --- a/python/lsst/daf/butler/registry/dimensions/static.py +++ b/python/lsst/daf/butler/registry/dimensions/static.py @@ -41,7 +41,7 @@ DatabaseDimensionElement, DatabaseTopologicalFamily, DimensionElement, - DimensionGraph, + DimensionGroup, DimensionUniverse, GovernorDimension, ) @@ -84,8 +84,8 @@ class StaticDimensionRecordStorageManager(DimensionRecordStorageManager): overlaps : `list` [ `DatabaseDimensionOverlapStorage` ] Objects that manage materialized overlaps between database-backed dimensions. - dimensionGraphStorage : `_DimensionGraphStorage` - Object that manages saved `DimensionGraph` definitions. + dimension_group_storage : `_DimensionGroupStorage` + Object that manages saved `DimensionGroup` definitions. universe : `DimensionUniverse` All known dimensions. """ @@ -98,7 +98,7 @@ def __init__( overlaps: dict[ tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage ], - dimensionGraphStorage: _DimensionGraphStorage, + dimension_group_storage: _DimensionGroupStorage, universe: DimensionUniverse, registry_schema_version: VersionTuple | None = None, ): @@ -106,7 +106,7 @@ def __init__( self._db = db self._records = records self._overlaps = overlaps - self._dimensionGraphStorage = dimensionGraphStorage + self._dimension_group_storage = dimension_group_storage @classmethod def initialize( @@ -183,13 +183,13 @@ def initialize( elementStoragePair[1].connect(overlapStorage) overlaps[overlapStorage.elements] = overlapStorage # Create table that stores DimensionGraph definitions. - dimensionGraphStorage = _DimensionGraphStorage.initialize(db, context, universe=universe) + dimension_group_storage = _DimensionGroupStorage.initialize(db, context, universe=universe) return cls( db=db, records=records, universe=universe, overlaps=overlaps, - dimensionGraphStorage=dimensionGraphStorage, + dimension_group_storage=dimension_group_storage, registry_schema_version=registry_schema_version, ) @@ -207,13 +207,13 @@ def register(self, element: DimensionElement) -> DimensionRecordStorage: assert result, "All records instances should be created in initialize()." return result - def saveDimensionGraph(self, graph: DimensionGraph) -> int: + def save_dimension_group(self, graph: DimensionGroup) -> int: # Docstring inherited from DimensionRecordStorageManager. - return self._dimensionGraphStorage.save(graph) + return self._dimension_group_storage.save(graph) - def loadDimensionGraph(self, key: int) -> DimensionGraph: + def load_dimension_group(self, key: int) -> DimensionGroup: # Docstring inherited from DimensionRecordStorageManager. - return self._dimensionGraphStorage.load(key) + return self._dimension_group_storage.load(key) def clearCaches(self) -> None: # Docstring inherited from DimensionRecordStorageManager. @@ -322,8 +322,8 @@ def currentVersions(cls) -> list[VersionTuple]: return [_VERSION] -class _DimensionGraphStorage: - """Helper object that manages saved DimensionGraph definitions. +class _DimensionGroupStorage: + """Helper object that manages saved DimensionGroup definitions. Should generally be constructed by calling `initialize` instead of invoking the constructor directly. @@ -352,8 +352,8 @@ def __init__( self._idTable = idTable self._definitionTable = definitionTable self._universe = universe - self._keysByGraph: dict[DimensionGraph, int] = {universe.empty: 0} - self._graphsByKey: dict[int, DimensionGraph] = {0: universe.empty} + self._keysByGroup: dict[DimensionGroup, int] = {universe.empty.as_group(): 0} + self._groupsByKey: dict[int, DimensionGroup] = {0: universe.empty.as_group()} @classmethod def initialize( @@ -362,7 +362,7 @@ def initialize( context: StaticTablesContext, *, universe: DimensionUniverse, - ) -> _DimensionGraphStorage: + ) -> _DimensionGroupStorage: """Construct a new instance, including creating tables if necessary. Parameters @@ -377,7 +377,7 @@ def initialize( Returns ------- - storage : `_DimensionGraphStorage` + storage : `_DimensionGroupStorage` New instance of this class. """ # We need two tables just so we have one where the autoincrement key is @@ -427,22 +427,22 @@ def refresh(self) -> None: for row in sql_rows: key = row[self._definitionTable.columns.dimension_graph_id] dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name]) - keysByGraph: dict[DimensionGraph, int] = {self._universe.empty: 0} - graphsByKey: dict[int, DimensionGraph] = {0: self._universe.empty} + keysByGraph: dict[DimensionGroup, int] = {self._universe.empty.as_group(): 0} + graphsByKey: dict[int, DimensionGroup] = {0: self._universe.empty.as_group()} for key, dimensionNames in dimensionNamesByKey.items(): - graph = DimensionGraph(self._universe, names=dimensionNames) + graph = DimensionGroup(self._universe, names=dimensionNames) keysByGraph[graph] = key graphsByKey[key] = graph - self._graphsByKey = graphsByKey - self._keysByGraph = keysByGraph + self._groupsByKey = graphsByKey + self._keysByGroup = keysByGraph - def save(self, graph: DimensionGraph) -> int: + def save(self, group: DimensionGroup) -> int: """Save a `DimensionGraph` definition to the database, allowing it to be retrieved later via the returned key. Parameters ---------- - graph : `DimensionGraph` + group : `DimensionGroup` Set of dimensions to save. Returns @@ -451,7 +451,7 @@ def save(self, graph: DimensionGraph) -> int: Integer used as the unique key for this `DimensionGraph` in the database. """ - key = self._keysByGraph.get(graph) + key = self._keysByGroup.get(group) if key is not None: return key # Lock tables and then refresh to guard against races where some other @@ -461,18 +461,18 @@ def save(self, graph: DimensionGraph) -> int: # work in long-lived data repositories. with self._db.transaction(lock=[self._idTable, self._definitionTable]): self.refresh() - key = self._keysByGraph.get(graph) + key = self._keysByGroup.get(group) if key is None: (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore self._db.insert( self._definitionTable, - *[{"dimension_graph_id": key, "dimension_name": name} for name in graph.required.names], + *[{"dimension_graph_id": key, "dimension_name": name} for name in group.required], ) - self._keysByGraph[graph] = key - self._graphsByKey[key] = graph + self._keysByGroup[group] = key + self._groupsByKey[key] = group return key - def load(self, key: int) -> DimensionGraph: + def load(self, key: int) -> DimensionGroup: """Retrieve a `DimensionGraph` that was previously saved in the database. @@ -487,8 +487,8 @@ def load(self, key: int) -> DimensionGraph: graph : `DimensionGraph` Retrieved graph. """ - graph = self._graphsByKey.get(key) + graph = self._groupsByKey.get(key) if graph is None: self.refresh() - graph = self._graphsByKey[key] + graph = self._groupsByKey[key] return graph diff --git a/python/lsst/daf/butler/registry/dimensions/table.py b/python/lsst/daf/butler/registry/dimensions/table.py index ef1f3c61be..75a79d96fc 100644 --- a/python/lsst/daf/butler/registry/dimensions/table.py +++ b/python/lsst/daf/butler/registry/dimensions/table.py @@ -321,7 +321,7 @@ def _insert_skypix_overlaps( # them will be way faster (and we don't want to add a new index # just for this operation). to_delete: list[dict[str, Any]] = [ - {"skypix_system": skypix.system.name, "skypix_level": skypix.level, **record.dataId.byName()} + {"skypix_system": skypix.system.name, "skypix_level": skypix.level, **record.dataId.required} for record in records ] _LOG.debug("Deleting old common skypix overlaps for %s.", self.element.name) @@ -335,7 +335,7 @@ def _insert_skypix_overlaps( for record in records: if record.region is None: continue - base_overlap_record = record.dataId.byName() + base_overlap_record = dict(record.dataId.required) base_overlap_record["skypix_system"] = skypix.system.name base_overlap_record["skypix_level"] = skypix.level for begin, end in skypix.pixelization.envelope(record.region): diff --git a/python/lsst/daf/butler/registry/interfaces/_dimensions.py b/python/lsst/daf/butler/registry/interfaces/_dimensions.py index a5c7516c2f..4598602207 100644 --- a/python/lsst/daf/butler/registry/interfaces/_dimensions.py +++ b/python/lsst/daf/butler/registry/interfaces/_dimensions.py @@ -48,7 +48,7 @@ DatabaseDimensionElement, DataCoordinate, DimensionElement, - DimensionGraph, + DimensionGroup, DimensionRecord, DimensionUniverse, GovernorDimension, @@ -758,19 +758,19 @@ def register(self, element: DimensionElement) -> DimensionRecordStorage: raise NotImplementedError() @abstractmethod - def saveDimensionGraph(self, graph: DimensionGraph) -> int: - """Save a `DimensionGraph` definition to the database, allowing it to + def save_dimension_group(self, graph: DimensionGroup) -> int: + """Save a `DimensionGroup` definition to the database, allowing it to be retrieved later via the returned key. Parameters ---------- - graph : `DimensionGraph` + dimensions : `DimensionGroup` Set of dimensions to save. Returns ------- key : `int` - Integer used as the unique key for this `DimensionGraph` in the + Integer used as the unique key for this `DimensionGroup` in the database. Raises @@ -782,20 +782,20 @@ def saveDimensionGraph(self, graph: DimensionGraph) -> int: raise NotImplementedError() @abstractmethod - def loadDimensionGraph(self, key: int) -> DimensionGraph: - """Retrieve a `DimensionGraph` that was previously saved in the + def load_dimension_group(self, key: int) -> DimensionGroup: + """Retrieve a `DimensionGroup` that was previously saved in the database. Parameters ---------- key : `int` - Integer used as the unique key for this `DimensionGraph` in the + Integer used as the unique key for this `DimensionGroup` in the database. Returns ------- - graph : `DimensionGraph` - Retrieved graph. + dimensions : `DimensionGroup` + Retrieved dimensions. Raises ------ diff --git a/python/lsst/daf/butler/registry/obscore/_manager.py b/python/lsst/daf/butler/registry/obscore/_manager.py index 361024738f..fca2230e8b 100644 --- a/python/lsst/daf/butler/registry/obscore/_manager.py +++ b/python/lsst/daf/butler/registry/obscore/_manager.py @@ -68,8 +68,8 @@ class _ExposureRegionFactory(ExposureRegionFactory): def __init__(self, dimensions: DimensionRecordStorageManager): self.dimensions = dimensions self.universe = dimensions.universe - self.exposure_dimensions = self.universe["exposure"].graph - self.exposure_detector_dimensions = self.universe.extract(["exposure", "detector"]) + self.exposure_dimensions = self.universe["exposure"].minimal_group + self.exposure_detector_dimensions = self.universe.conform(["exposure", "detector"]) def exposure_region(self, dataId: DataCoordinate, context: SqlQueryContext) -> Region | None: # Docstring is inherited from a base class. @@ -81,7 +81,7 @@ def exposure_region(self, dataId: DataCoordinate, context: SqlQueryContext) -> R return None relation = visit_definition_storage.join(relation, Join(), context) # Join in a table with either visit+detector regions or visit regions. - if "detector" in dataId.names: + if "detector" in dataId.dimensions: visit_detector_region_storage = self.dimensions.get(self.universe["visit_detector_region"]) if visit_detector_region_storage is None: return None diff --git a/python/lsst/daf/butler/registry/obscore/_records.py b/python/lsst/daf/butler/registry/obscore/_records.py index d0be555aad..583e82ce68 100644 --- a/python/lsst/daf/butler/registry/obscore/_records.py +++ b/python/lsst/daf/butler/registry/obscore/_records.py @@ -188,12 +188,12 @@ def __call__(self, ref: DatasetRef, context: SqlQueryContext) -> Record | None: record["t_max"] = t_max.mjd region = dataId.region - if self.exposure in dataId: - if (dimension_record := dataId.records[self.exposure]) is not None: + if self.exposure.name in dataId: + if (dimension_record := dataId.records[self.exposure.name]) is not None: self._exposure_records(dimension_record, record) if self.exposure_region_factory is not None: region = self.exposure_region_factory.exposure_region(dataId, context) - elif self.visit in dataId and (dimension_record := dataId.records[self.visit]) is not None: + elif self.visit.name in dataId and (dimension_record := dataId.records[self.visit.name]) is not None: self._visit_records(dimension_record, record) # ask each plugin for its values to add to a record. @@ -208,12 +208,12 @@ def __call__(self, ref: DatasetRef, context: SqlQueryContext) -> Record | None: else: record.update(plugin_records) - if self.band in dataId: + if self.band.name in dataId: em_range = None - if (label := dataId.get(self.physical_filter)) is not None: + if (label := dataId.get(self.physical_filter.name)) is not None: em_range = self.config.spectral_ranges.get(cast(str, label)) if not em_range: - band_name = dataId[self.band] + band_name = dataId[self.band.name] assert isinstance(band_name, str), "Band name must be string" em_range = self.config.spectral_ranges.get(band_name) if em_range: @@ -225,7 +225,7 @@ def __call__(self, ref: DatasetRef, context: SqlQueryContext) -> Record | None: # Dictionary to use for substitutions when formatting various # strings. fmt_kws: dict[str, Any] = dict(records=dataId.records) - fmt_kws.update(dataId.full.byName()) + fmt_kws.update(dataId.mapping) fmt_kws.update(id=ref.id) fmt_kws.update(run=ref.run) fmt_kws.update(dataset_type=dataset_type_name) diff --git a/python/lsst/daf/butler/registry/queries/expressions/_predicate.py b/python/lsst/daf/butler/registry/queries/expressions/_predicate.py index 827cb0f333..637fc4316d 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/_predicate.py +++ b/python/lsst/daf/butler/registry/queries/expressions/_predicate.py @@ -128,8 +128,8 @@ def make_string_expression_predicate( if data_id is None: data_id = DataCoordinate.makeEmpty(dimensions.universe) if not string: - for dimension in data_id.graph.governors: - governor_constraints[dimension.name] = {cast(str, data_id[dimension])} + for dimension in data_id.dimensions.governors: + governor_constraints[dimension] = {cast(str, data_id[dimension])} return None, governor_constraints try: parser = ParserYacc() diff --git a/python/lsst/daf/butler/registry/queries/expressions/check.py b/python/lsst/daf/butler/registry/queries/expressions/check.py index 2f41b6253c..7e5dda1fba 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/check.py +++ b/python/lsst/daf/butler/registry/queries/expressions/check.py @@ -379,10 +379,7 @@ def visitInner(self, branches: Sequence[TreeSummary], form: NormalForm) -> Inner # branches. To take care of that, we add any governor values it # contains to the summary in advance. summary = InnerSummary() - summary.dimension_values.update( - (k, self.dataId[k]) - for k in (self.dataId.graph.names if self.dataId.hasFull() else self.dataId.graph.required.names) - ) + summary.dimension_values.update(self.dataId.mapping) # Finally, we loop over those branches. for branch in branches: # Update the sets of dimensions and columns we've seen anywhere in @@ -435,7 +432,7 @@ def visitInner(self, branches: Sequence[TreeSummary], form: NormalForm) -> Inner governorsNeededInBranch.update(self.universe.dimensions[dimension].minimal_group.governors) if not governorsNeededInBranch.issubset(summary.dimension_values.keys()): missing = governorsNeededInBranch - summary.dimension_values.keys() - if missing <= self.defaults.names: + if missing <= self.defaults.dimensions.required: summary.defaultsNeeded.update(missing) elif not self._allow_orphans: still_missing = missing - self.defaults.names diff --git a/python/lsst/daf/butler/registry/sql_registry.py b/python/lsst/daf/butler/registry/sql_registry.py index 4338e0a5d1..fd5f58d97e 100644 --- a/python/lsst/daf/butler/registry/sql_registry.py +++ b/python/lsst/daf/butler/registry/sql_registry.py @@ -58,6 +58,7 @@ DimensionConfig, DimensionElement, DimensionGraph, + DimensionGroup, DimensionRecord, DimensionUniverse, ) @@ -890,12 +891,12 @@ def findDataset( component = components[0] dataId = DataCoordinate.standardize( dataId, - graph=parent_dataset_type.dimensions, + dimensions=parent_dataset_type.dimensions, universe=self.dimensions, defaults=self.defaults.dataId, **kwargs, ) - governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} + governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.dimensions.governors} (filtered_collections,) = backend.filter_dataset_collections( [parent_dataset_type], matched_collections, @@ -1039,7 +1040,7 @@ def insertDatasets( progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) if expand: expandedDataIds = [ - self.expandDataId(dataId, graph=storage.datasetType.dimensions) + self.expandDataId(dataId, dimensions=storage.datasetType.dimensions) for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") ] else: @@ -1152,7 +1153,7 @@ def _importDatasets( progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) if expand: expandedDatasets = [ - dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) + dataset.expanded(self.expandDataId(dataset.dataId, dimensions=storage.datasetType.dimensions)) for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") ] else: @@ -1411,7 +1412,7 @@ def decertify( standardizedDataIds = None if dataIds is not None: standardizedDataIds = [ - DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds + DataCoordinate.standardize(d, dimensions=storage.datasetType.dimensions) for d in dataIds ] storage.decertify( collectionRecord, @@ -1457,6 +1458,7 @@ def expandDataId( self, dataId: DataId | None = None, *, + dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, graph: DimensionGraph | None = None, records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, withDefaults: bool = True, @@ -1468,12 +1470,16 @@ def expandDataId( ---------- dataId : `DataCoordinate` or `dict`, optional Data ID to be expanded; augmented and overridden by ``kwargs``. + dimensions : `~collections.abc.Iterable` [ `str` ], \ + `DimensionGroup`, or `DimensionGraph`, optional + The dimensions to be identified by the new `DataCoordinate`. + If not provided, will be inferred from the keys of ``mapping`` and + ``**kwargs``, and ``universe`` must be provided unless ``mapping`` + is already a `DataCoordinate`. graph : `DimensionGraph`, optional - Set of dimensions for the expanded ID. If `None`, the dimensions - will be inferred from the keys of ``dataId`` and ``kwargs``. - Dimensions that are in ``dataId`` or ``kwargs`` but not in - ``graph`` are silently ignored, providing a way to extract and - ``graph`` expand a subset of a data ID. + Like ``dimensions``, but as a ``DimensionGraph`` instance. Ignored + if ``dimensions`` is provided. Deprecated and will be removed + after v27. records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \ optional Dimension record data to use before querying the database for that @@ -1515,7 +1521,12 @@ def expandDataId( defaults = self.defaults.dataId try: standardized = DataCoordinate.standardize( - dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs + dataId, + graph=graph, + dimensions=dimensions, + universe=self.dimensions, + defaults=defaults, + **kwargs, ) except KeyError as exc: # This means either kwargs have some odd name or required @@ -1530,46 +1541,50 @@ def expandDataId( else: records = dict(records) if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): - records.update(dataId.records.byName()) - keys = standardized.byName() + for element_name in dataId.dimensions.elements: + records[element_name] = dataId.records[element_name] + keys = dict(standardized.mapping) context = queries.SqlQueryContext(self._db, self._managers.column_types) - for element in standardized.graph.primaryKeyTraversalOrder: - record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL + for element_name in standardized.dimensions.lookup_order: + element = self.dimensions[element_name] + record = records.get(element_name, ...) # Use ... to mean not found; None might mean NULL if record is ...: - if isinstance(element, Dimension) and keys.get(element.name) is None: - if element in standardized.graph.required: + if element_name in self.dimensions.dimensions.names and keys.get(element_name) is None: + if element_name in standardized.dimensions.required: raise DimensionNameError( - f"No value or null value for required dimension {element.name}." + f"No value or null value for required dimension {element_name}." ) - keys[element.name] = None + keys[element_name] = None record = None else: - storage = self._managers.dimensions[element] - record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) - records[element.name] = record + storage = self._managers.dimensions[element_name] + record = storage.fetch_one( + DataCoordinate.standardize(keys, dimensions=element.minimal_group), context + ) + records[element_name] = record if record is not None: for d in element.implied: value = getattr(record, d.name) if keys.setdefault(d.name, value) != value: raise InconsistentDataIdError( f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " - f"but {element.name} implies {d.name}={value!r}." + f"but {element_name} implies {d.name}={value!r}." ) else: - if element in standardized.graph.required: + if element_name in standardized.dimensions.required: raise DataIdValueError( f"Could not fetch record for required dimension {element.name} via keys {keys}." ) if element.alwaysJoin: raise InconsistentDataIdError( - f"Could not fetch record for element {element.name} via keys {keys}, ", + f"Could not fetch record for element {element_name} via keys {keys}, ", "but it is marked alwaysJoin=True; this means one or more dimensions are not " "related.", ) for d in element.implied: keys.setdefault(d.name, None) records.setdefault(d.name, None) - return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) + return DataCoordinate.standardize(keys, dimensions=standardized.dimensions).expanded(records=records) def insertDimensionData( self, diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py index f9016a6fd1..ba8012f20b 100644 --- a/python/lsst/daf/butler/registry/tests/_registry.py +++ b/python/lsst/daf/butler/registry/tests/_registry.py @@ -59,7 +59,7 @@ from ..._named import NamedValueSet from ..._storage_class import StorageClass from ..._timespan import Timespan -from ...dimensions import DataCoordinate, DataCoordinateSet, DimensionGraph, SkyPixDimension +from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension from .._collection_summary import CollectionSummary from .._collection_type import CollectionType from .._config import RegistryConfig @@ -229,8 +229,8 @@ def testDatasetType(self): datasetTypeName = "test" storageClass = StorageClass("testDatasetType") registry.storageClasses.registerStorageClass(storageClass) - dimensions = registry.dimensions.extract(("instrument", "visit")) - differentDimensions = registry.dimensions.extract(("instrument", "patch")) + dimensions = registry.dimensions.conform(("instrument", "visit")) + differentDimensions = registry.dimensions.conform(("instrument", "patch")) inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) # Inserting for the first time should return True self.assertTrue(registry.registerDatasetType(inDatasetType)) @@ -248,7 +248,7 @@ def testDatasetType(self): datasetTypeName = "testNoneTemplate" storageClass = StorageClass("testDatasetType2") registry.storageClasses.registerStorageClass(storageClass) - dimensions = registry.dimensions.extract(("instrument", "visit")) + dimensions = registry.dimensions.conform(("instrument", "visit")) inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) registry.registerDatasetType(inDatasetType) outDatasetType2 = registry.getDatasetType(datasetTypeName) @@ -278,14 +278,14 @@ def testDimensions(self): registry.insertDimensionData(dimensionName, dimensionValue) # expandDataId should retrieve the record we just inserted self.assertEqual( - registry.expandDataId(instrument="DummyCam", graph=dimension.graph) + registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group) .records[dimensionName] .toDict(), dimensionValue, ) # expandDataId should raise if there is no record with the given ID. with self.assertRaises(DataIdValueError): - registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) + registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group) # band doesn't have a table; insert should fail. with self.assertRaises(TypeError): registry.insertDimensionData("band", {"band": "i"}) @@ -300,7 +300,9 @@ def testDimensions(self): registry.insertDimensionData(dimensionName2, dimensionValue2) # expandDataId should retrieve the record we just inserted. self.assertEqual( - registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) + registry.expandDataId( + instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group + ) .records[dimensionName2] .toDict(), dimensionValue2, @@ -721,10 +723,12 @@ def testComponentLookups(self): dataIds, DataCoordinateSet( { - DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) + DataCoordinate.standardize( + instrument="Cam1", detector=d, dimensions=parentType.dimensions + ) for d in (1, 2, 3) }, - parentType.dimensions, + dimensions=parentType.dimensions, ), ) # Search for multiple datasets of a single type with queryDatasets. @@ -959,9 +963,9 @@ def testNestedTransaction(self): # block. registry.insertDimensionData(dimension, dataId1) self.assertTrue(checkpointReached) - self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) + self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group)) with self.assertRaises(DataIdValueError): - registry.expandDataId(dataId2, graph=dimension.graph) + registry.expandDataId(dataId2, dimensions=dimension.minimal_group) def testInstrumentDimensions(self): """Test queries involving only instrument dimensions, with no joins to @@ -1023,13 +1027,13 @@ def testInstrumentDimensions(self): registry.storageClasses.registerStorageClass(storageClass) rawType = DatasetType( name="RAW", - dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), + dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")), storageClass=storageClass, ) registry.registerDatasetType(rawType) calexpType = DatasetType( name="CALEXP", - dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), + dimensions=registry.dimensions.conform(("instrument", "visit", "detector")), storageClass=storageClass, ) registry.registerDatasetType(calexpType) @@ -1055,8 +1059,8 @@ def testInstrumentDimensions(self): (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) registry.associate(tagged2, [ref]) - dimensions = DimensionGraph( - registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) + dimensions = registry.dimensions.conform( + rawType.dimensions.required.names | calexpType.dimensions.required.names ) # Test that single dim string works as well as list of str rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() @@ -1073,7 +1077,7 @@ def testInstrumentDimensions(self): rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors for dataId in rows: - self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) + self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201)) self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20)) self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) @@ -1082,7 +1086,7 @@ def testInstrumentDimensions(self): rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe for dataId in rows: - self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) + self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201)) self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20)) self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) @@ -1163,28 +1167,27 @@ def testSkyMapDimensions(self): registry.storageClasses.registerStorageClass(storageClass) calexpType = DatasetType( name="deepCoadd_calexp", - dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), + dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), storageClass=storageClass, ) registry.registerDatasetType(calexpType) mergeType = DatasetType( name="deepCoadd_mergeDet", - dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), + dimensions=registry.dimensions.conform(("skymap", "tract", "patch")), storageClass=storageClass, ) registry.registerDatasetType(mergeType) measType = DatasetType( name="deepCoadd_meas", - dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), + dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), storageClass=storageClass, ) registry.registerDatasetType(measType) - dimensions = DimensionGraph( - registry.dimensions, - dimensions=( - calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required - ), + dimensions = registry.dimensions.conform( + calexpType.dimensions.required.names + | mergeType.dimensions.required.names + | measType.dimensions.required.names ) # add pre-existing datasets @@ -1200,7 +1203,7 @@ def testSkyMapDimensions(self): rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters for dataId in rows: - self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) + self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band")) self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) @@ -1260,33 +1263,37 @@ def testSpatialJoin(self): # Overlap DatabaseDimensionElements with each other. for family1, family2 in itertools.combinations(families, 2): for element1, element2 in itertools.product(families[family1], families[family2]): - graph = DimensionGraph.union(element1.graph, element2.graph) + dimensions = element1.minimal_group | element2.minimal_group # Construct expected set of overlapping data IDs via a # brute-force comparison of the regions we've already fetched. expected = { - DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) + DataCoordinate.standardize( + {**dataId1.required, **dataId2.required}, dimensions=dimensions + ) for (dataId1, region1), (dataId2, region2) in itertools.product( regions[element1.name].items(), regions[element2.name].items() ) if not region1.isDisjointFrom(region2) } self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") - queried = set(registry.queryDataIds(graph)) + queried = set(registry.queryDataIds(dimensions)) self.assertEqual(expected, queried) # Overlap each DatabaseDimensionElement with the commonSkyPix system. commonSkyPix = registry.dimensions.commonSkyPix for elementName, these_regions in regions.items(): - graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) + dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group expected = set() for dataId, region in these_regions.items(): for begin, end in commonSkyPix.pixelization.envelope(region): expected.update( - DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) + DataCoordinate.standardize( + {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions + ) for index in range(begin, end) ) self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") - queried = set(registry.queryDataIds(graph)) + queried = set(registry.queryDataIds(dimensions)) self.assertEqual(expected, queried) def testAbstractQuery(self): @@ -1413,9 +1420,9 @@ def testQueryResults(self): # Obtain expected results from methods other than those we're testing # here. That includes: # - the dimensions of the data IDs we want to query: - expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) + expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"]) # - the dimensions of some other data IDs we'll extract from that: - expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) + expected_subset_dimensions = registry.dimensions.conform(["detector"]) # - the data IDs we expect to obtain from the first queries: expectedDataIds = DataCoordinateSet( { @@ -1424,7 +1431,7 @@ def testQueryResults(self): ) for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) }, - graph=expectedGraph, + dimensions=expected_dimensions, hasFull=False, hasRecords=False, ) @@ -1442,7 +1449,7 @@ def testQueryResults(self): ), ] # - the data IDs we expect to extract from that: - expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) + expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions) # - the bias datasets we expect to find from those data IDs, after we # subset-out the physical_filter dimension, both with duplicates: expectedAllBiases = [ @@ -1466,7 +1473,7 @@ def testQueryResults(self): where="detector.purpose = 'SCIENCE'", # this rejects detector=4 instrument="Cam1", ) - self.assertEqual(dataIds.graph, expectedGraph) + self.assertEqual(dataIds.dimensions, expected_dimensions) self.assertEqual(dataIds.toSet(), expectedDataIds) self.assertCountEqual( list( @@ -1477,8 +1484,8 @@ def testQueryResults(self): ), expectedFlats, ) - subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) - self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) + subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) + self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) self.assertCountEqual( list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), @@ -1538,7 +1545,7 @@ def testQueryResults(self): self.assertCountEqual(list(biases), expectedDeduplicatedBiases) # Materialize the data ID subset query, but not the dataset queries. with subsetDataIds.materialize() as subsetDataIds: - self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) + self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) self.assertCountEqual( list( @@ -1565,7 +1572,7 @@ def testQueryResults(self): self.assertCountEqual(list(biases), expectedDeduplicatedBiases) # Materialize the original query, but none of the follow-up queries. with dataIds.materialize() as dataIds: - self.assertEqual(dataIds.graph, expectedGraph) + self.assertEqual(dataIds.dimensions, expected_dimensions) self.assertEqual(dataIds.toSet(), expectedDataIds) self.assertCountEqual( list( @@ -1576,8 +1583,8 @@ def testQueryResults(self): ), expectedFlats, ) - subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) - self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) + subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) + self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) self.assertCountEqual( list( @@ -1605,7 +1612,7 @@ def testQueryResults(self): # Materialize the subset data ID query, but not the dataset # queries. with subsetDataIds.materialize() as subsetDataIds: - self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) + self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) self.assertCountEqual( list( @@ -3244,7 +3251,9 @@ def testDatasetIdFactory(self): storageClass="int", ) run = "run" - data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions) + data_id = DataCoordinate.standardize( + instrument="Cam1", detector=1, dimensions=dataset_type.dimensions + ) datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) self.assertIsInstance(datasetId, uuid.UUID) diff --git a/python/lsst/daf/butler/script/queryDataIds.py b/python/lsst/daf/butler/script/queryDataIds.py index 415d2652d8..5f407ffb58 100644 --- a/python/lsst/daf/butler/script/queryDataIds.py +++ b/python/lsst/daf/butler/script/queryDataIds.py @@ -39,7 +39,7 @@ from ..dimensions import DataCoordinate if TYPE_CHECKING: - from lsst.daf.butler import DimensionGraph + from lsst.daf.butler import DimensionGroup _LOG = logging.getLogger(__name__) @@ -78,16 +78,16 @@ def getAstropyTable(self, order: bool) -> AstropyTable: raise RuntimeError("No DataIds were provided.") dataId = next(iter(self.dataIds)) - dimensions = list(dataId.full.keys()) + dimensions = [dataId.universe.dimensions[k] for k in dataId.dimensions.data_coordinate_keys] columnNames = [str(item) for item in dimensions] # Need to hint the column types for numbers since the per-row # constructor of Table does not work this out on its own and sorting # will not work properly without. typeMap = {float: np.float64, int: np.int64} - columnTypes = [typeMap.get(type(value)) for value in dataId.full.values()] + columnTypes = [typeMap.get(type(value)) for value in dataId.full_values] - rows = [list(dataId.full.values()) for dataId in self.dataIds] + rows = [dataId.full_values for dataId in self.dataIds] table = AstropyTable(np.array(rows), names=columnNames, dtype=columnTypes) if order: @@ -116,27 +116,29 @@ def queryDataIds( # Determine the dimensions relevant to all given dataset types. # Since we are going to AND together all dimensions, we can not # seed the result with an empty set. - graph: DimensionGraph | None = None + dataset_type_dimensions: DimensionGroup | None = None dataset_types = list(butler.registry.queryDatasetTypes(datasets)) for dataset_type in dataset_types: - if graph is None: + if dataset_type_dimensions is None: # Seed with dimensions of first dataset type. - graph = dataset_type.dimensions + dataset_type_dimensions = dataset_type.dimensions.as_group() else: # Only retain dimensions that are in the current # set AND the set from this dataset type. - graph = graph.intersection(dataset_type.dimensions) - _LOG.debug("Dimensions now %s from %s", set(graph.names), dataset_type.name) + dataset_type_dimensions = dataset_type_dimensions.intersection( + dataset_type.dimensions.as_group() + ) + _LOG.debug("Dimensions now %s from %s", set(dataset_type_dimensions.names), dataset_type.name) # Break out of the loop early. No additional dimensions # can be added to an empty set when using AND. - if not graph: + if not dataset_type_dimensions: break - if not graph: + if not dataset_type_dimensions: names = [d.name for d in dataset_types] return None, f"No dimensions in common for specified dataset types ({names})" - dimensions = set(graph.names) + dimensions = set(dataset_type_dimensions.names) _LOG.info("Determined dimensions %s from datasets option %s", dimensions, datasets) query_collections: Iterable[str] | EllipsisType | None = None @@ -153,7 +155,7 @@ def queryDataIds( results = results.limit(limit, new_offset) if results.any(exact=False): - if results.graph: + if results.dimensions: table = _Table(results) if not table.dataIds: return None, "Post-query region filtering removed all rows, since nothing overlapped." diff --git a/python/lsst/daf/butler/script/queryDatasets.py b/python/lsst/daf/butler/script/queryDatasets.py index 4a7cac38f3..ebfd29516e 100644 --- a/python/lsst/daf/butler/script/queryDatasets.py +++ b/python/lsst/daf/butler/script/queryDatasets.py @@ -95,7 +95,10 @@ def getAstropyTable(self, datasetTypeName: str) -> AstropyTable: raise RuntimeError(f"No DatasetRefs were provided for dataset type {datasetTypeName}") refInfo = next(iter(self.datasetRefs)) - dimensions = list(refInfo.datasetRef.dataId.full.keys()) + dimensions = [ + refInfo.datasetRef.dataId.universe.dimensions[k] + for k in refInfo.datasetRef.dataId.dimensions.data_coordinate_keys + ] columnNames = ["type", "run", "id", *[str(item) for item in dimensions]] # Need to hint the column types for numbers since the per-row @@ -106,7 +109,7 @@ def getAstropyTable(self, datasetTypeName: str) -> AstropyTable: None, None, str, - *[typeMap.get(type(value)) for value in refInfo.datasetRef.dataId.full.values()], + *[typeMap.get(type(value)) for value in refInfo.datasetRef.dataId.full_values], ] if refInfo.uri: columnNames.append("URI") @@ -118,7 +121,7 @@ def getAstropyTable(self, datasetTypeName: str) -> AstropyTable: datasetTypeName, refInfo.datasetRef.run, str(refInfo.datasetRef.id), - *list(refInfo.datasetRef.dataId.full.values()), + *refInfo.datasetRef.dataId.full_values, ] if refInfo.uri: row.append(refInfo.uri) diff --git a/python/lsst/daf/butler/script/register_dataset_type.py b/python/lsst/daf/butler/script/register_dataset_type.py index f46fda8817..41ab12d4fb 100644 --- a/python/lsst/daf/butler/script/register_dataset_type.py +++ b/python/lsst/daf/butler/script/register_dataset_type.py @@ -71,17 +71,13 @@ def register_dataset_type( """ butler = Butler.from_config(repo, writeable=True, without_datastore=True) - composite, component = DatasetType.splitDatasetTypeName(dataset_type) + _, component = DatasetType.splitDatasetTypeName(dataset_type) if component: raise ValueError("Component dataset types are created automatically when the composite is created.") - # mypy does not think that Tuple[str, ...] is allowed for DatasetType - # constructor so we have to do the conversion here. - graph = butler.dimensions.extract(dimensions) - datasetType = DatasetType( dataset_type, - graph, + butler.dimensions.conform(dimensions), storage_class, parentStorageClass=None, isCalibration=is_calibration, diff --git a/python/lsst/daf/butler/tests/_datasetsHelper.py b/python/lsst/daf/butler/tests/_datasetsHelper.py index 28b00ec91d..ca42cef53f 100644 --- a/python/lsst/daf/butler/tests/_datasetsHelper.py +++ b/python/lsst/daf/butler/tests/_datasetsHelper.py @@ -39,7 +39,7 @@ from collections.abc import Iterable, Mapping from typing import TYPE_CHECKING, Any -from lsst.daf.butler import DataCoordinate, DatasetRef, DatasetType, StorageClass +from lsst.daf.butler import DataCoordinate, DatasetRef, DatasetType, DimensionGroup, StorageClass from lsst.daf.butler.formatters.yaml import YamlFormatter if TYPE_CHECKING: @@ -52,7 +52,7 @@ class DatasetTestHelper: def makeDatasetRef( self, datasetTypeName: str, - dimensions: DimensionGraph | Iterable[str | Dimension], + dimensions: DimensionGroup | DimensionGraph | Iterable[str | Dimension], storageClass: StorageClass | str, dataId: DataCoordinate | Mapping[str, Any], *, @@ -74,7 +74,7 @@ def makeDatasetRef( def _makeDatasetRef( self, datasetTypeName: str, - dimensions: DimensionGraph | Iterable[str | Dimension], + dimensions: DimensionGroup | DimensionGraph | Iterable[str | Dimension], storageClass: StorageClass | str, dataId: DataCoordinate | Mapping, *, @@ -95,7 +95,7 @@ def _makeDatasetRef( if run is None: run = "dummy" if not isinstance(dataId, DataCoordinate): - dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions) + dataId = DataCoordinate.standardize(dataId, dimensions=datasetType.dimensions) return DatasetRef(datasetType, dataId, id=id, run=run, conform=conform) diff --git a/python/lsst/daf/butler/tests/_testRepo.py b/python/lsst/daf/butler/tests/_testRepo.py index eba08df974..59cc4e6a1f 100644 --- a/python/lsst/daf/butler/tests/_testRepo.py +++ b/python/lsst/daf/butler/tests/_testRepo.py @@ -180,11 +180,13 @@ def _makeRecords(dataIds: Mapping[str, Iterable], universe: DimensionUniverse) - # Create values for all dimensions that are (recursive) required or implied # dependencies of the given ones. complete_data_id_values = {} - for dimension in universe.extract(dataIds.keys()): - if dimension.name in dataIds: - complete_data_id_values[dimension.name] = list(dataIds[dimension.name]) - if dimension.name not in complete_data_id_values: - complete_data_id_values[dimension.name] = [_makeRandomDataIdValue(dimension)] + for dimension_name in universe.conform(dataIds.keys()).names: + if dimension_name in dataIds: + complete_data_id_values[dimension_name] = list(dataIds[dimension_name]) + if dimension_name not in complete_data_id_values: + complete_data_id_values[dimension_name] = [ + _makeRandomDataIdValue(universe.dimensions[dimension_name]) + ] # Start populating dicts that will become DimensionRecords by providing # alternate keys like detector names @@ -316,7 +318,7 @@ def expandUniqueId(butler: Butler, partialId: Mapping[str, Any]) -> DataCoordina """ # The example is *not* a doctest because it requires dangerous I/O registry = butler.registry - dimensions = registry.dimensions.extract(partialId.keys()).required + dimensions = registry.dimensions.conform(partialId.keys()).required query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items()) @@ -440,14 +442,14 @@ def addDataIdValue(butler: Butler, dimension: str, value: str | int, **related: # Example is not doctest, because it's probably unsafe to create even an # in-memory butler in that environment. try: - fullDimension = butler.dimensions[dimension] + full_dimension = butler.dimensions[dimension] except KeyError as e: raise ValueError from e # Bad keys ignored by registry code - extraKeys = related.keys() - fullDimension.graph.dimensions.names - if extraKeys: + extra_keys = related.keys() - full_dimension.minimal_group.names + if extra_keys: raise ValueError( - f"Unexpected keywords {extraKeys} not found in {fullDimension.graph.dimensions.names}" + f"Unexpected keywords {extra_keys} not found in {full_dimension.minimal_group.names}" ) # Assemble a dictionary data ID holding the given primary dimension value @@ -456,25 +458,26 @@ def addDataIdValue(butler: Butler, dimension: str, value: str | int, **related: data_id.update(related) # Compute the set of all dimensions that these recursively depend on. - all_dimensions = butler.dimensions.extract(data_id.keys()) + all_dimensions = butler.dimensions.conform(data_id.keys()) # Create dicts that will become DimensionRecords for all of these data IDs. # This iteration is guaranteed to be in topological order, so we can count # on new data ID values being invented before they are needed. record_dicts_by_dimension: dict[Dimension, dict[str, Any]] = {} - for dimension_obj in all_dimensions: - dimension_value = data_id.get(dimension_obj.name) + for dimension_name in all_dimensions.names: + dimension_obj = butler.dimensions.dimensions[dimension_name] + dimension_value = data_id.get(dimension_name) if dimension_value is None: - data_id[dimension_obj.name], invented = _findOrInventDataIdValue(butler, data_id, dimension_obj) + data_id[dimension_name], invented = _findOrInventDataIdValue(butler, data_id, dimension_obj) if not invented: # No need to make a new record; one already exists. continue - if dimension_obj.name in related: + if dimension_name in related: # Caller passed in a value of this dimension explicitly, but it # isn't the primary dimension they asked to have a record created # for. That means they expect this record to already exist. continue - if dimension_obj != fullDimension and dimension_obj in all_dimensions.required: + if dimension_name != dimension and dimension_name in all_dimensions.required: # We also don't want to automatically create new dimension records # for required dimensions (except for the main dimension the caller # asked for); those are also asserted by the caller to already diff --git a/python/lsst/daf/butler/transfers/_context.py b/python/lsst/daf/butler/transfers/_context.py index 29a45627b6..5eec3dd38b 100644 --- a/python/lsst/daf/butler/transfers/_context.py +++ b/python/lsst/daf/butler/transfers/_context.py @@ -188,7 +188,8 @@ def saveDataIds( # least start to add / unblock) query functionality that should # let us speed this up internally as well. dataId = self._registry.expandDataId(dataId) - for record in dataId.records.values(): + for element_name in dataId.dimensions.elements: + record = dataId.records[element_name] if record is not None and record.definition in standardized_elements: self._records[record.definition].setdefault(record.dataId, record) diff --git a/python/lsst/daf/butler/transfers/_yaml.py b/python/lsst/daf/butler/transfers/_yaml.py index 2a558c6d3f..ed88b7a835 100644 --- a/python/lsst/daf/butler/transfers/_yaml.py +++ b/python/lsst/daf/butler/transfers/_yaml.py @@ -151,7 +151,7 @@ def saveDatasets(self, datasetType: DatasetType, run: str, *datasets: FileDatase { "type": "dataset_type", "name": datasetType.name, - "dimensions": [d.name for d in datasetType.dimensions], + "dimensions": list(datasetType.dimensions.names), "storage_class": datasetType.storageClass_name, "is_calibration": datasetType.isCalibration(), } @@ -164,7 +164,7 @@ def saveDatasets(self, datasetType: DatasetType, run: str, *datasets: FileDatase "records": [ { "dataset_id": [ref.id for ref in sorted(dataset.refs)], - "data_id": [ref.dataId.byName() for ref in sorted(dataset.refs)], + "data_id": [dict(ref.dataId.required) for ref in sorted(dataset.refs)], "path": dataset.path, "formatter": dataset.formatter, # TODO: look up and save other collections diff --git a/tests/test_butler.py b/tests/test_butler.py index 6eed6880f5..70a0da4559 100644 --- a/tests/test_butler.py +++ b/tests/test_butler.py @@ -107,7 +107,7 @@ def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] if TYPE_CHECKING: import types - from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass + from lsst.daf.butler import Datastore, DimensionGroup, Registry, StorageClass TESTDIR = os.path.abspath(os.path.dirname(__file__)) @@ -175,7 +175,7 @@ class ButlerPutGetTests(TestCaseMixin): @staticmethod def addDatasetType( - datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry + datasetTypeName: str, dimensions: DimensionGroup, storageClass: StorageClass | str, registry: Registry ) -> DatasetType: """Create a DatasetType and register it""" datasetType = DatasetType(datasetTypeName, dimensions, storageClass) @@ -219,7 +219,7 @@ def create_butler( self.assertEqual(collections, {run}) # Create and register a DatasetType - dimensions = butler.dimensions.extract(["instrument", "visit"]) + dimensions = butler.dimensions.conform(["instrument", "visit"]) datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) @@ -514,7 +514,7 @@ def testDeferredCollectionPassing(self) -> None: # Construct a butler with no run or collection, but make it writeable. butler = Butler.from_config(self.tmpConfigFile, writeable=True) # Create and register a DatasetType - dimensions = butler.dimensions.extract(["instrument", "visit"]) + dimensions = butler.dimensions.conform(["instrument", "visit"]) datasetType = self.addDatasetType( "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry ) @@ -845,7 +845,7 @@ def testIngest(self) -> None: butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) # Create and register a DatasetType - dimensions = butler.dimensions.extract(["instrument", "visit", "detector"]) + dimensions = butler.dimensions.conform(["instrument", "visit", "detector"]) storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") datasetTypeName = "metric" @@ -935,7 +935,7 @@ def testIngest(self) -> None: refs = [] for ref in dataset.refs: # Create a dict from the dataId to drop the records. - new_data_id = {str(k): v for k, v in ref.dataId.items()} + new_data_id = dict(ref.dataId.required) new_ref = butler.find_dataset(ref.datasetType, new_data_id, collections=ref.run) assert new_ref is not None self.assertFalse(new_ref.dataId.hasRecords()) @@ -1004,7 +1004,7 @@ def testPickle(self) -> None: def testGetDatasetTypes(self) -> None: butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) - dimensions = butler.dimensions.extract(["instrument", "visit", "physical_filter"]) + dimensions = butler.dimensions.conform(["instrument", "visit", "physical_filter"]) dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ ( "instrument", @@ -1079,7 +1079,7 @@ def testGetDatasetTypes(self) -> None: def testTransaction(self) -> None: butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) datasetTypeName = "test_metric" - dimensions = butler.dimensions.extract(["instrument", "visit"]) + dimensions = butler.dimensions.conform(["instrument", "visit"]) dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( ("instrument", {"instrument": "DummyCam"}), ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), @@ -1195,7 +1195,7 @@ def testButlerRewriteDataId(self) -> None: "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} ) - dimensions = butler.dimensions.extract(["instrument", "exposure"]) + dimensions = butler.dimensions.conform(["instrument", "exposure"]) datasetType = DatasetType(datasetTypeName, dimensions, storageClass) butler.registry.registerDatasetType(datasetType) @@ -1266,7 +1266,7 @@ def testPutTemplates(self) -> None: # Create two almost-identical DatasetTypes (both will use default # template) - dimensions = butler.dimensions.extract(["instrument", "visit"]) + dimensions = butler.dimensions.conform(["instrument", "visit"]) butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) @@ -1407,7 +1407,7 @@ def testRemoveRuns(self) -> None: butler.registry.registerRun(run2) # put a dataset in each metric = makeExampleMetrics() - dimensions = butler.dimensions.extract(["instrument", "physical_filter"]) + dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) datasetType = self.addDatasetType( "prune_collections_test_dataset", dimensions, storageClass, butler.registry ) @@ -1516,7 +1516,7 @@ def testPruneDatasets(self) -> None: # put some datasets. ref1 and ref2 have the same data ID, and are in # different runs. ref3 has a different data ID. metric = makeExampleMetrics() - dimensions = butler.dimensions.extract(["instrument", "physical_filter"]) + dimensions = butler.dimensions.conform(["instrument", "physical_filter"]) datasetType = self.addDatasetType( "prune_collections_test_dataset", dimensions, storageClass, butler.registry ) @@ -2132,7 +2132,7 @@ def _absolute_transfer(self, transfer: str) -> None: run = "run1" self.source_butler.registry.registerCollection(run, CollectionType.RUN) - dimensions = self.source_butler.dimensions.extract(()) + dimensions = self.source_butler.dimensions.conform(()) datasetType = DatasetType(datasetTypeName, dimensions, storageClass) self.source_butler.registry.registerDatasetType(datasetType) @@ -2188,7 +2188,7 @@ def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "St ) # Create dataset types in the source butler. - dimensions = self.source_butler.dimensions.extract(["instrument", "exposure"]) + dimensions = self.source_butler.dimensions.conform(["instrument", "exposure"]) for datasetTypeName in datasetTypeNames: datasetType = DatasetType(datasetTypeName, dimensions, storageClass) self.source_butler.registry.registerDatasetType(datasetType) @@ -2399,7 +2399,7 @@ def test_fallback(self) -> None: self.assertIn("MYRUN", set(collections)) # Create a ref. - dimensions = butler.dimensions.extract([]) + dimensions = butler.dimensions.conform([]) storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") datasetTypeName = "metric" datasetType = DatasetType(datasetTypeName, dimensions, storageClass) diff --git a/tests/test_cliCmdQueryDatasets.py b/tests/test_cliCmdQueryDatasets.py index 94fa6f4842..c09b32432d 100644 --- a/tests/test_cliCmdQueryDatasets.py +++ b/tests/test_cliCmdQueryDatasets.py @@ -50,10 +50,10 @@ def expectedFilesystemDatastoreTables(root: ResourcePath): ( "test_metric_comp.data", "ingest/run", - "R", "DummyCamComp", - "d-r", "423", + "R", + "d-r", root.join( "ingest/run/test_metric_comp.data/" "test_metric_comp_v00000423_fDummyCamComp_data.yaml" @@ -62,10 +62,10 @@ def expectedFilesystemDatastoreTables(root: ResourcePath): ( "test_metric_comp.data", "ingest/run", - "R", "DummyCamComp", - "d-r", "424", + "R", + "d-r", root.join( "ingest/run/test_metric_comp.data/" "test_metric_comp_v00000424_fDummyCamComp_data.yaml" @@ -73,7 +73,7 @@ def expectedFilesystemDatastoreTables(root: ResourcePath): ), ) ), - names=("type", "run", "band", "instrument", "physical_filter", "visit", "URI"), + names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"), ), AstropyTable( array( @@ -81,10 +81,10 @@ def expectedFilesystemDatastoreTables(root: ResourcePath): ( "test_metric_comp.output", "ingest/run", - "R", "DummyCamComp", - "d-r", "423", + "R", + "d-r", root.join( "ingest/run/test_metric_comp.output/" "test_metric_comp_v00000423_fDummyCamComp_output.yaml" @@ -93,10 +93,10 @@ def expectedFilesystemDatastoreTables(root: ResourcePath): ( "test_metric_comp.output", "ingest/run", - "R", "DummyCamComp", - "d-r", "424", + "R", + "d-r", root.join( "ingest/run/test_metric_comp.output/" "test_metric_comp_v00000424_fDummyCamComp_output.yaml" @@ -104,7 +104,7 @@ def expectedFilesystemDatastoreTables(root: ResourcePath): ), ) ), - names=("type", "run", "band", "instrument", "physical_filter", "visit", "URI"), + names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"), ), AstropyTable( array( @@ -112,10 +112,10 @@ def expectedFilesystemDatastoreTables(root: ResourcePath): ( "test_metric_comp.summary", "ingest/run", - "R", "DummyCamComp", - "d-r", "423", + "R", + "d-r", root.join( "ingest/run/test_metric_comp.summary/" "test_metric_comp_v00000423_fDummyCamComp_summary.yaml" @@ -124,10 +124,10 @@ def expectedFilesystemDatastoreTables(root: ResourcePath): ( "test_metric_comp.summary", "ingest/run", - "R", "DummyCamComp", - "d-r", "424", + "R", + "d-r", root.join( "ingest/run/test_metric_comp.summary/" "test_metric_comp_v00000424_fDummyCamComp_summary.yaml" @@ -135,7 +135,7 @@ def expectedFilesystemDatastoreTables(root: ResourcePath): ), ) ), - names=("type", "run", "band", "instrument", "physical_filter", "visit", "URI"), + names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"), ), ) @@ -197,11 +197,11 @@ def testNoShowURI(self): AstropyTable( array( ( - ("test_metric_comp", "ingest/run", "R", "DummyCamComp", "d-r", "423"), - ("test_metric_comp", "ingest/run", "R", "DummyCamComp", "d-r", "424"), + ("test_metric_comp", "ingest/run", "DummyCamComp", "423", "R", "d-r"), + ("test_metric_comp", "ingest/run", "DummyCamComp", "424", "R", "d-r"), ) ), - names=("type", "run", "band", "instrument", "physical_filter", "visit"), + names=("type", "run", "instrument", "visit", "band", "physical_filter"), ), ) @@ -217,8 +217,8 @@ def testWhere(self): expectedTables = ( AstropyTable( - array(("test_metric_comp", "ingest/run", "R", "DummyCamComp", "d-r", "423")), - names=("type", "run", "band", "instrument", "physical_filter", "visit"), + array(("test_metric_comp", "ingest/run", "DummyCamComp", "423", "R", "d-r")), + names=("type", "run", "instrument", "visit", "band", "physical_filter"), ), ) @@ -247,15 +247,15 @@ def testGlobDatasetType(self): AstropyTable( array( ( - ("test_metric_comp", "ingest/run", "R", "DummyCamComp", "d-r", "423"), - ("test_metric_comp", "ingest/run", "R", "DummyCamComp", "d-r", "424"), + ("test_metric_comp", "ingest/run", "DummyCamComp", "423", "R", "d-r"), + ("test_metric_comp", "ingest/run", "DummyCamComp", "424", "R", "d-r"), ) ), - names=("type", "run", "band", "instrument", "physical_filter", "visit"), + names=("type", "run", "instrument", "visit", "band", "physical_filter"), ), AstropyTable( - array(("alt_test_metric_comp", "ingest/run", "R", "DummyCamComp", "d-r", "425")), - names=("type", "run", "band", "instrument", "physical_filter", "visit"), + array(("alt_test_metric_comp", "ingest/run", "DummyCamComp", "425", "R", "d-r")), + names=("type", "run", "instrument", "visit", "band", "physical_filter"), ), ) @@ -285,10 +285,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.data", "foo", - "R", "DummyCamComp", - "d-r", "424", + "R", + "d-r", datastore_root.join( "foo/test_metric_comp.data/test_metric_comp_v00000424_fDummyCamComp_data.yaml" ), @@ -296,10 +296,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.data", "ingest/run", - "R", "DummyCamComp", - "d-r", "423", + "R", + "d-r", datastore_root.join( "ingest/run/test_metric_comp.data/" "test_metric_comp_v00000423_fDummyCamComp_data.yaml" @@ -308,10 +308,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.data", "ingest/run", - "R", "DummyCamComp", - "d-r", "424", + "R", + "d-r", datastore_root.join( "ingest/run/test_metric_comp.data/" "test_metric_comp_v00000424_fDummyCamComp_data.yaml" @@ -319,7 +319,7 @@ def testFindFirstAndCollections(self): ), ) ), - names=("type", "run", "band", "instrument", "physical_filter", "visit", "URI"), + names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"), ), AstropyTable( array( @@ -327,10 +327,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.output", "foo", - "R", "DummyCamComp", - "d-r", "424", + "R", + "d-r", datastore_root.join( "foo/test_metric_comp.output/" "test_metric_comp_v00000424_fDummyCamComp_output.yaml" @@ -339,10 +339,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.output", "ingest/run", - "R", "DummyCamComp", - "d-r", "423", + "R", + "d-r", datastore_root.join( "ingest/run/test_metric_comp.output/" "test_metric_comp_v00000423_fDummyCamComp_output.yaml" @@ -351,10 +351,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.output", "ingest/run", - "R", "DummyCamComp", - "d-r", "424", + "R", + "d-r", datastore_root.join( "ingest/run/test_metric_comp.output/" "test_metric_comp_v00000424_fDummyCamComp_output.yaml" @@ -362,7 +362,7 @@ def testFindFirstAndCollections(self): ), ) ), - names=("type", "run", "band", "instrument", "physical_filter", "visit", "URI"), + names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"), ), AstropyTable( array( @@ -370,10 +370,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.summary", "foo", - "R", "DummyCamComp", - "d-r", "424", + "R", + "d-r", datastore_root.join( "foo/test_metric_comp.summary/" "test_metric_comp_v00000424_fDummyCamComp_summary.yaml" @@ -382,10 +382,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.summary", "ingest/run", - "R", "DummyCamComp", - "d-r", "423", + "R", + "d-r", datastore_root.join( "ingest/run/test_metric_comp.summary/" "test_metric_comp_v00000423_fDummyCamComp_summary.yaml" @@ -394,10 +394,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.summary", "ingest/run", - "R", "DummyCamComp", - "d-r", "424", + "R", + "d-r", datastore_root.join( "ingest/run/test_metric_comp.summary/" "test_metric_comp_v00000424_fDummyCamComp_summary.yaml" @@ -405,7 +405,7 @@ def testFindFirstAndCollections(self): ), ) ), - names=("type", "run", "band", "instrument", "physical_filter", "visit", "URI"), + names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"), ), ) @@ -424,10 +424,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.data", "foo", - "R", "DummyCamComp", - "d-r", "424", + "R", + "d-r", datastore_root.join( "foo/test_metric_comp.data/test_metric_comp_v00000424_fDummyCamComp_data.yaml" ), @@ -435,10 +435,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.data", "ingest/run", - "R", "DummyCamComp", - "d-r", "423", + "R", + "d-r", datastore_root.join( "ingest/run/test_metric_comp.data/" "test_metric_comp_v00000423_fDummyCamComp_data.yaml" @@ -446,7 +446,7 @@ def testFindFirstAndCollections(self): ), ) ), - names=("type", "run", "band", "instrument", "physical_filter", "visit", "URI"), + names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"), ), AstropyTable( array( @@ -454,10 +454,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.output", "foo", - "R", "DummyCamComp", - "d-r", "424", + "R", + "d-r", datastore_root.join( "foo/test_metric_comp.output/" "test_metric_comp_v00000424_fDummyCamComp_output.yaml" @@ -466,10 +466,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.output", "ingest/run", - "R", "DummyCamComp", - "d-r", "423", + "R", + "d-r", datastore_root.join( "ingest/run/test_metric_comp.output/" "test_metric_comp_v00000423_fDummyCamComp_output.yaml" @@ -477,7 +477,7 @@ def testFindFirstAndCollections(self): ), ) ), - names=("type", "run", "band", "instrument", "physical_filter", "visit", "URI"), + names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"), ), AstropyTable( array( @@ -485,10 +485,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.summary", "foo", - "R", "DummyCamComp", - "d-r", "424", + "R", + "d-r", datastore_root.join( "foo/test_metric_comp.summary/" "test_metric_comp_v00000424_fDummyCamComp_summary.yaml" @@ -497,10 +497,10 @@ def testFindFirstAndCollections(self): ( "test_metric_comp.summary", "ingest/run", - "R", "DummyCamComp", - "d-r", "423", + "R", + "d-r", datastore_root.join( "ingest/run/test_metric_comp.summary/" "test_metric_comp_v00000423_fDummyCamComp_summary.yaml" @@ -508,7 +508,7 @@ def testFindFirstAndCollections(self): ), ) ), - names=("type", "run", "band", "instrument", "physical_filter", "visit", "URI"), + names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"), ), ) diff --git a/tests/test_constraints.py b/tests/test_constraints.py index 59d079345a..29810f5d97 100644 --- a/tests/test_constraints.py +++ b/tests/test_constraints.py @@ -42,7 +42,7 @@ def setUp(self): # Create DatasetRefs to test against constraints model self.universe = DimensionUniverse() - dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) + dimensions = self.universe.conform(("visit", "physical_filter", "instrument")) sc = StorageClass("DummySC", dict, None) self.calexpA = self.makeDatasetRef( "calexp", @@ -51,7 +51,7 @@ def setUp(self): {"instrument": "A", "physical_filter": "u", "visit": 3}, ) - dimensions = self.universe.extract(("visit", "detector", "instrument")) + dimensions = self.universe.conform(("visit", "detector", "instrument")) self.pviA = self.makeDatasetRef( "pvi", dimensions, diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 4abd4d60a4..7ec8cb0947 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -59,7 +59,7 @@ def testConstructor(self) -> None: """ datasetTypeName = "test" storageClass = StorageClass("test_StructuredData") - dimensions = self.universe.extract(("visit", "instrument")) + dimensions = self.universe.conform(("visit", "instrument")) datasetType = DatasetType(datasetTypeName, dimensions, storageClass) self.assertEqual(datasetType.name, datasetTypeName) self.assertEqual(datasetType.storageClass, storageClass) @@ -75,7 +75,7 @@ def testConstructor2(self) -> None: datasetTypeName = "test" storageClass = StorageClass("test_constructor2") StorageClassFactory().registerStorageClass(storageClass) - dimensions = self.universe.extract(("instrument", "visit")) + dimensions = self.universe.conform(("instrument", "visit")) datasetType = DatasetType(datasetTypeName, dimensions, "test_constructor2") self.assertEqual(datasetType.name, datasetTypeName) self.assertEqual(datasetType.storageClass, storageClass) @@ -85,7 +85,7 @@ def testNameValidation(self) -> None: """Test that dataset type names only contain certain characters in certain positions. """ - dimensions = self.universe.extract(("instrument", "visit")) + dimensions = self.universe.conform(("instrument", "visit")) goodNames = ("a", "A", "z1", "Z1", "a_1B", "A_1b", "_a") badNames = ("1", "a%b", "B+Z", "T[0]") @@ -118,8 +118,8 @@ def testEquality(self) -> None: storageA = StorageClass("test_a") storageB = StorageClass("test_b") parent = StorageClass("test") - dimensionsA = self.universe.extract(["instrument"]) - dimensionsB = self.universe.extract(["skymap"]) + dimensionsA = self.universe.conform(["instrument"]) + dimensionsB = self.universe.conform(["skymap"]) self.assertEqual( DatasetType( "a", @@ -266,7 +266,7 @@ def testCompatibility(self) -> None: storageB = StorageClass("test_b", pytype=list) storageC = StorageClass("test_c", pytype=dict) self.assertTrue(storageA.can_convert(storageB)) - dimensionsA = self.universe.extract(["instrument"]) + dimensionsA = self.universe.conform(["instrument"]) dA = DatasetType("a", dimensionsA, storageA) dA2 = DatasetType("a", dimensionsA, storageB) @@ -281,7 +281,7 @@ def testCompatibility(self) -> None: def testOverrideStorageClass(self) -> None: storageA = StorageClass("test_a", pytype=list, converters={"dict": "builtins.list"}) storageB = StorageClass("test_b", pytype=dict) - dimensions = self.universe.extract(["instrument"]) + dimensions = self.universe.conform(["instrument"]) dA = DatasetType("a", dimensions, storageA) dB = dA.overrideStorageClass(storageB) @@ -302,7 +302,7 @@ def testOverrideStorageClass(self) -> None: def testJson(self) -> None: storageA = StorageClass("test_a") - dimensionsA = self.universe.extract(["instrument"]) + dimensionsA = self.universe.conform(["instrument"]) self.assertEqual( DatasetType( "a", @@ -329,7 +329,7 @@ def testJson(self) -> None: def testSorting(self) -> None: """Can we sort a DatasetType""" storage = StorageClass("test_a") - dimensions = self.universe.extract(["instrument"]) + dimensions = self.universe.conform(["instrument"]) d_a = DatasetType("a", dimensions, storage) d_f = DatasetType("f", dimensions, storage) @@ -360,15 +360,15 @@ def testHashability(self) -> None: for name in ["a", "b"]: for storageClass in [storageC, storageD]: for dims in [("instrument",), ("skymap",)]: - datasetType = DatasetType(name, self.universe.extract(dims), storageClass) - datasetTypeCopy = DatasetType(name, self.universe.extract(dims), storageClass) + datasetType = DatasetType(name, self.universe.conform(dims), storageClass) + datasetTypeCopy = DatasetType(name, self.universe.conform(dims), storageClass) types.extend((datasetType, datasetTypeCopy)) unique += 1 # datasetType should always equal its copy self.assertEqual(len(set(types)), unique) # all other combinations are unique # also check that hashes of instances constructed with StorageClass # name matches hashes of instances constructed with instances - dimensions = self.universe.extract(["instrument"]) + dimensions = self.universe.conform(["instrument"]) self.assertEqual( hash(DatasetType("a", dimensions, storageC)), hash(DatasetType("a", dimensions, "test_c")) ) @@ -389,7 +389,7 @@ def testDeepCopy(self) -> None: """Test that we can copy a dataset type.""" storageClass = StorageClass("test_copy") datasetTypeName = "test" - dimensions = self.universe.extract(("instrument", "visit")) + dimensions = self.universe.conform(("instrument", "visit")) datasetType = DatasetType(datasetTypeName, dimensions, storageClass) dcopy = copy.deepcopy(datasetType) self.assertEqual(dcopy, datasetType) @@ -415,14 +415,14 @@ def testPickle(self) -> None: """Test pickle support.""" storageClass = StorageClass("test_pickle") datasetTypeName = "test" - dimensions = self.universe.extract(("instrument", "visit")) + dimensions = self.universe.conform(("instrument", "visit")) # Un-pickling requires that storage class is registered with factory. StorageClassFactory().registerStorageClass(storageClass) datasetType = DatasetType(datasetTypeName, dimensions, storageClass) datasetTypeOut = pickle.loads(pickle.dumps(datasetType)) self.assertIsInstance(datasetTypeOut, DatasetType) self.assertEqual(datasetType.name, datasetTypeOut.name) - self.assertEqual(datasetType.dimensions.names, datasetTypeOut.dimensions.names) + self.assertEqual(datasetType.dimensions, datasetTypeOut.dimensions) self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass) self.assertIsNone(datasetTypeOut.parentStorageClass) self.assertIs(datasetType.isCalibration(), datasetTypeOut.isCalibration()) @@ -487,7 +487,7 @@ def test_composites(self) -> None: self.assertFalse(storageClassA.isComposite()) self.assertFalse(storageClassB.isComposite()) - dimensions = self.universe.extract(("instrument", "visit")) + dimensions = self.universe.conform(("instrument", "visit")) datasetTypeComposite = DatasetType("composite", dimensions, storageClass) datasetTypeComponentA = datasetTypeComposite.makeComponentDatasetType("compA") @@ -527,7 +527,7 @@ def setUp(self) -> None: sc_factory.registerStorageClass(self.componentStorageClass1) sc_factory.registerStorageClass(self.componentStorageClass2) sc_factory.registerStorageClass(self.parentStorageClass) - dimensions = self.universe.extract(("instrument", "visit")) + dimensions = self.universe.conform(("instrument", "visit")) self.dataId = DataCoordinate.standardize( dict(instrument="DummyCam", visit=42), universe=self.universe ) @@ -570,8 +570,8 @@ def testConstructor(self) -> None: # Passing a data ID that is missing dimensions should fail. # Create a full DataCoordinate to ensure that we are testing the # right thing. - dimensions = self.universe.extract(("instrument",)) - dataId = DataCoordinate.standardize(instrument="DummyCam", graph=dimensions) + dimensions = self.universe.conform(("instrument",)) + dataId = DataCoordinate.standardize(instrument="DummyCam", dimensions=dimensions) with self.assertRaises(KeyError): DatasetRef(self.datasetType, dataId, run="run") # Constructing a resolved ref should preserve run as well as everything @@ -592,20 +592,20 @@ def testConstructor(self) -> None: def testSorting(self) -> None: """Can we sort a DatasetRef""" # All refs have the same run. - dimensions = self.universe.extract(("instrument", "visit")) + dimensions = self.universe.conform(("instrument", "visit")) ref1 = DatasetRef( self.datasetType, - DataCoordinate.standardize(instrument="DummyCam", visit=1, graph=dimensions), + DataCoordinate.standardize(instrument="DummyCam", visit=1, dimensions=dimensions), run="run", ) ref2 = DatasetRef( self.datasetType, - DataCoordinate.standardize(instrument="DummyCam", visit=10, graph=dimensions), + DataCoordinate.standardize(instrument="DummyCam", visit=10, dimensions=dimensions), run="run", ) ref3 = DatasetRef( self.datasetType, - DataCoordinate.standardize(instrument="DummyCam", visit=22, graph=dimensions), + DataCoordinate.standardize(instrument="DummyCam", visit=22, dimensions=dimensions), run="run", ) @@ -619,23 +619,23 @@ def testSorting(self) -> None: # Now include different runs. ref1 = DatasetRef( self.datasetType, - DataCoordinate.standardize(instrument="DummyCam", visit=43, graph=dimensions), + DataCoordinate.standardize(instrument="DummyCam", visit=43, dimensions=dimensions), run="b", ) self.assertEqual(ref1.run, "b") ref4 = DatasetRef( self.datasetType, - DataCoordinate.standardize(instrument="DummyCam", visit=10, graph=dimensions), + DataCoordinate.standardize(instrument="DummyCam", visit=10, dimensions=dimensions), run="b", ) ref2 = DatasetRef( self.datasetType, - DataCoordinate.standardize(instrument="DummyCam", visit=4, graph=dimensions), + DataCoordinate.standardize(instrument="DummyCam", visit=4, dimensions=dimensions), run="a", ) ref3 = DatasetRef( self.datasetType, - DataCoordinate.standardize(instrument="DummyCam", visit=104, graph=dimensions), + DataCoordinate.standardize(instrument="DummyCam", visit=104, dimensions=dimensions), run="c", ) diff --git a/tests/test_datastore.py b/tests/test_datastore.py index d18996cfdf..6c1bd7b1a3 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -161,7 +161,7 @@ def testConfigurationValidation(self) -> None: with self.assertRaises(DatastoreValidationError): datastore.validateConfiguration([sc2], logFailures=True) - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} ref = self.makeDatasetRef("metric", dimensions, sc, dataId) datastore.validateConfiguration([ref]) @@ -169,7 +169,7 @@ def testConfigurationValidation(self) -> None: def testParameterValidation(self) -> None: """Check that parameters are validated""" sc = self.storageClassFactory.getStorageClass("ThingOne") - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} ref = self.makeDatasetRef("metric", dimensions, sc, dataId) datastore = self.makeDatastore() @@ -190,7 +190,7 @@ def testBasicPutGet(self) -> None: for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") ] - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} dataId2 = {"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"} @@ -309,7 +309,7 @@ def testTrustGetRequest(self) -> None: # Create multiple storage classes for testing with or without # disassembly sc = self.storageClassFactory.getStorageClass(sc_name) - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"} @@ -457,7 +457,7 @@ def testDisassembly(self) -> None: datastore = self.makeDatastore() # Dummy dataId - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} for i, sc in enumerate(storageClasses): @@ -509,7 +509,7 @@ def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, metrics = makeExampleMetrics() datastore = self.makeDatastore() # Put - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) sc = self.storageClassFactory.getStorageClass("StructuredData") refs = [] for i in range(n_refs): @@ -573,7 +573,7 @@ def testForget(self) -> None: def testTransfer(self) -> None: metrics = makeExampleMetrics() - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"} sc = self.storageClassFactory.getStorageClass("StructuredData") @@ -591,7 +591,7 @@ def testTransfer(self) -> None: def testBasicTransaction(self) -> None: datastore = self.makeDatastore() storageClass = self.storageClassFactory.getStorageClass("StructuredData") - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) nDatasets = 6 dataIds = [ {"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"} for i in range(nDatasets) @@ -637,7 +637,7 @@ def testBasicTransaction(self) -> None: def testNestedTransaction(self) -> None: datastore = self.makeDatastore() storageClass = self.storageClassFactory.getStorageClass("StructuredData") - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) metrics = makeExampleMetrics() dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} @@ -669,7 +669,7 @@ def testNestedTransaction(self) -> None: def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]: storageClass = self.storageClassFactory.getStorageClass("StructuredData") - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) metrics = makeExampleMetrics() dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) @@ -865,7 +865,7 @@ def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[Dataset raise unittest.SkipTest("in-memory datastore does not support record export/import") metrics = makeExampleMetrics() - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) sc = self.storageClassFactory.getStorageClass("StructuredData") refs = [] @@ -930,7 +930,7 @@ def testExport(self) -> None: # Create a new ref that is not known to the datastore and try to # export it. sc = self.storageClassFactory.getStorageClass("ThingOne") - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} ref = self.makeDatasetRef("metric", dimensions, sc, dataId) with self.assertRaises(FileNotFoundError): @@ -1019,7 +1019,7 @@ def testAtomicWrite(self) -> None: """Test that we write to a temporary and then rename""" datastore = self.makeDatastore() storageClass = self.storageClassFactory.getStorageClass("StructuredData") - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) metrics = makeExampleMetrics() dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} @@ -1048,7 +1048,7 @@ def testCanNotDeterminePutFormatterLocation(self) -> None: "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} ) - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) @@ -1088,7 +1088,7 @@ def testChecksum(self) -> None: """Ensure that checksums have not been calculated.""" datastore = self.makeDatastore() storageClass = self.storageClassFactory.getStorageClass("StructuredData") - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) metrics = makeExampleMetrics() dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} @@ -1200,7 +1200,7 @@ def testCleanup(self) -> None: storageClass = self.storageClassFactory.getStorageClass("StructuredData") - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) @@ -1281,7 +1281,7 @@ def testConstraints(self) -> None: sc1 = self.storageClassFactory.getStorageClass("StructuredData") sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") - dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) + dimensions = self.universe.conform(("visit", "physical_filter", "instrument")) dataId = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} # Write empty file suitable for ingest check (JSON and YAML variants) @@ -1379,7 +1379,7 @@ def testConstraints(self) -> None: sc1 = self.storageClassFactory.getStorageClass("StructuredData") sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") - dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) + dimensions = self.universe.conform(("visit", "physical_filter", "instrument")) dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"} @@ -1466,7 +1466,7 @@ def setUp(self) -> None: # Create some test dataset refs and associated test files sc = self.storageClassFactory.getStorageClass("StructuredDataDict") - dimensions = self.universe.extract(("visit", "physical_filter")) + dimensions = self.universe.conform(("visit", "physical_filter")) dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} # Create list of refs and list of temporary files @@ -1829,7 +1829,7 @@ class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase): def test_basics(self) -> None: storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") - ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) + ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {}) null = NullDatastore(None, None) @@ -1921,7 +1921,7 @@ class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): def test_StoredFileInfo(self) -> None: storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") - ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) + ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {}) record = dict( storage_class="StructuredDataDict", @@ -1935,7 +1935,7 @@ def test_StoredFileInfo(self) -> None: self.assertEqual(info.to_record(), record) - ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) + ref2 = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {}) rebased = info.rebase(ref2) self.assertEqual(rebased.rebase(ref), info) diff --git a/tests/test_dimensions.py b/tests/test_dimensions.py index afb7296345..4cd61de192 100644 --- a/tests/test_dimensions.py +++ b/tests/test_dimensions.py @@ -43,7 +43,7 @@ DataCoordinateSet, Dimension, DimensionConfig, - DimensionGraph, + DimensionElement, DimensionGroup, DimensionPacker, DimensionUniverse, @@ -51,6 +51,7 @@ NamedValueSet, TimespanDatabaseRepresentation, YamlRepoImportBackend, + ddl, ) from lsst.daf.butler.registry import RegistryConfig, _RegistryFactory @@ -76,7 +77,7 @@ def loadDimensionData() -> DataCoordinateSequence: backend = YamlRepoImportBackend(stream, registry) backend.register() backend.load(datastore=None) - dimensions = DimensionGraph(registry.dimensions, names=["visit", "detector", "tract", "patch"]) + dimensions = registry.dimensions.conform(["visit", "detector", "tract", "patch"]) return registry.queryDataIds(dimensions).expanded().toSequence() @@ -122,47 +123,51 @@ class DimensionTestCase(unittest.TestCase): def setUp(self): self.universe = DimensionUniverse() - def checkGraphInvariants(self, graph): - elements = list(graph.elements) - for n, element in enumerate(elements): + def checkGroupInvariants(self, group: DimensionGroup): + elements = list(group.elements) + for n, element_name in enumerate(elements): + element = self.universe[element_name] # Ordered comparisons on graphs behave like sets. - self.assertLessEqual(element.graph, graph) + self.assertLessEqual(element.minimal_group, group) # Ordered comparisons on elements correspond to the ordering within # a DimensionUniverse (topological, with deterministic # tiebreakers). - for other in elements[:n]: + for other_name in elements[:n]: + other = self.universe[other_name] self.assertLess(other, element) self.assertLessEqual(other, element) - for other in elements[n + 1 :]: + for other_name in elements[n + 1 :]: + other = self.universe[other_name] self.assertGreater(other, element) self.assertGreaterEqual(other, element) if isinstance(element, Dimension): - self.assertEqual(element.graph.required, element.required) - self.assertEqual(DimensionGraph(self.universe, graph.required), graph) + self.assertEqual(element.minimal_group.required, element.required) + self.assertEqual(self.universe.conform(group.required), group) self.assertCountEqual( - graph.required, + group.required, [ - dimension - for dimension in graph.dimensions - if not any(dimension in other.graph.implied for other in graph.elements) + dimension_name + for dimension_name in group.names + if not any( + dimension_name in self.universe[other_name].minimal_group.implied + for other_name in group.elements + ) ], ) - self.assertCountEqual(graph.implied, graph.dimensions - graph.required) - self.assertCountEqual( - graph.dimensions, [element for element in graph.elements if isinstance(element, Dimension)] - ) - self.assertCountEqual(graph.dimensions, itertools.chain(graph.required, graph.implied)) + self.assertCountEqual(group.implied, group.names - group.required) + self.assertCountEqual(group.names, itertools.chain(group.required, group.implied)) # Check primary key traversal order: each element should follow any it # requires, and element that is implied by any other in the graph # follow at least one of those. - seen = NamedValueSet() - for element in graph.primaryKeyTraversalOrder: - with self.subTest(required=graph.required, implied=graph.implied, element=element): - seen.add(element) - self.assertLessEqual(element.graph.required, seen) - if element in graph.implied: - self.assertTrue(any(element in s.implied for s in seen)) - self.assertCountEqual(seen, graph.elements) + seen: set[str] = set() + for element_name in group.lookup_order: + element = self.universe[element_name] + with self.subTest(required=group.required, implied=group.implied, element=element): + seen.add(element_name) + self.assertLessEqual(element.minimal_group.required, seen) + if element_name in group.implied: + self.assertTrue(any(element_name in self.universe[s].implied for s in seen)) + self.assertCountEqual(seen, group.elements) def testConfigPresent(self): config = self.universe.dimensionConfig @@ -230,7 +235,7 @@ def testVersion(self): def testConfigRead(self): self.assertEqual( - set(self.universe.dimensions.names), + set(self.universe.getStaticDimensions().names), { "instrument", "visit", @@ -249,49 +254,45 @@ def testConfigRead(self): ) def testGraphs(self): - self.checkGraphInvariants(self.universe.empty) - for element in self.universe.elements: - self.checkGraphInvariants(element.graph) + self.checkGroupInvariants(self.universe.empty.as_group()) + for element in self.universe.getStaticElements(): + self.checkGroupInvariants(element.minimal_group) def testInstrumentDimensions(self): - graph = DimensionGraph(self.universe, names=("exposure", "detector", "visit")) + group = self.universe.conform(["exposure", "detector", "visit"]) self.assertCountEqual( - graph.dimensions.names, + group.names, ("instrument", "exposure", "detector", "visit", "physical_filter", "band"), ) - self.assertCountEqual(graph.required.names, ("instrument", "exposure", "detector", "visit")) - self.assertCountEqual(graph.implied.names, ("physical_filter", "band")) - self.assertCountEqual( - graph.elements.names - graph.dimensions.names, ("visit_detector_region", "visit_definition") - ) - self.assertCountEqual(graph.governors.names, {"instrument"}) + self.assertCountEqual(group.required, ("instrument", "exposure", "detector", "visit")) + self.assertCountEqual(group.implied, ("physical_filter", "band")) + self.assertCountEqual(group.elements - group.names, ("visit_detector_region", "visit_definition")) + self.assertCountEqual(group.governors, {"instrument"}) def testCalibrationDimensions(self): - graph = DimensionGraph(self.universe, names=("physical_filter", "detector")) - self.assertCountEqual(graph.dimensions.names, ("instrument", "detector", "physical_filter", "band")) - self.assertCountEqual(graph.required.names, ("instrument", "detector", "physical_filter")) - self.assertCountEqual(graph.implied.names, ("band",)) - self.assertCountEqual(graph.elements.names, graph.dimensions.names) - self.assertCountEqual(graph.governors.names, {"instrument"}) + group = self.universe.conform(["physical_filter", "detector"]) + self.assertCountEqual(group.names, ("instrument", "detector", "physical_filter", "band")) + self.assertCountEqual(group.required, ("instrument", "detector", "physical_filter")) + self.assertCountEqual(group.implied, ("band",)) + self.assertCountEqual(group.elements, group.names) + self.assertCountEqual(group.governors, {"instrument"}) def testObservationDimensions(self): - graph = DimensionGraph(self.universe, names=("exposure", "detector", "visit")) + group = self.universe.conform(["exposure", "detector", "visit"]) self.assertCountEqual( - graph.dimensions.names, + group.names, ("instrument", "detector", "visit", "exposure", "physical_filter", "band"), ) - self.assertCountEqual(graph.required.names, ("instrument", "detector", "exposure", "visit")) - self.assertCountEqual(graph.implied.names, ("physical_filter", "band")) - self.assertCountEqual( - graph.elements.names - graph.dimensions.names, ("visit_detector_region", "visit_definition") - ) - self.assertCountEqual(graph.spatial.names, ("observation_regions",)) - self.assertCountEqual(graph.temporal.names, ("observation_timespans",)) - self.assertCountEqual(graph.governors.names, {"instrument"}) - self.assertEqual(graph.spatial.names, {"observation_regions"}) - self.assertEqual(graph.temporal.names, {"observation_timespans"}) - self.assertEqual(next(iter(graph.spatial)).governor, self.universe["instrument"]) - self.assertEqual(next(iter(graph.temporal)).governor, self.universe["instrument"]) + self.assertCountEqual(group.required, ("instrument", "detector", "exposure", "visit")) + self.assertCountEqual(group.implied, ("physical_filter", "band")) + self.assertCountEqual(group.elements - group.names, ("visit_detector_region", "visit_definition")) + self.assertCountEqual(group.spatial.names, ("observation_regions",)) + self.assertCountEqual(group.temporal.names, ("observation_timespans",)) + self.assertCountEqual(group.governors, {"instrument"}) + self.assertEqual(group.spatial.names, {"observation_regions"}) + self.assertEqual(group.temporal.names, {"observation_timespans"}) + self.assertEqual(next(iter(group.spatial)).governor, self.universe["instrument"]) + self.assertEqual(next(iter(group.temporal)).governor, self.universe["instrument"]) self.assertEqual(self.universe["visit_definition"].populated_by, self.universe["visit"]) self.assertEqual(self.universe["visit_system_membership"].populated_by, self.universe["visit"]) self.assertEqual(self.universe["visit_detector_region"].populated_by, self.universe["visit"]) @@ -308,29 +309,26 @@ def testObservationDimensions(self): ) def testSkyMapDimensions(self): - graph = DimensionGraph(self.universe, names=("patch",)) - self.assertCountEqual(graph.dimensions.names, ("skymap", "tract", "patch")) - self.assertCountEqual(graph.required.names, ("skymap", "tract", "patch")) - self.assertCountEqual(graph.implied.names, ()) - self.assertCountEqual(graph.elements.names, graph.dimensions.names) - self.assertCountEqual(graph.spatial.names, ("skymap_regions",)) - self.assertCountEqual(graph.governors.names, {"skymap"}) - self.assertEqual(graph.spatial.names, {"skymap_regions"}) - self.assertEqual(next(iter(graph.spatial)).governor, self.universe["skymap"]) + group = self.universe.conform(["patch"]) + self.assertEqual(group.names, {"skymap", "tract", "patch"}) + self.assertEqual(group.required, {"skymap", "tract", "patch"}) + self.assertEqual(group.implied, set()) + self.assertEqual(group.elements, group.names) + self.assertEqual(group.governors, {"skymap"}) + self.assertEqual(group.spatial.names, {"skymap_regions"}) + self.assertEqual(next(iter(group.spatial)).governor, self.universe["skymap"]) def testSubsetCalculation(self): """Test that independent spatial and temporal options are computed correctly. """ - graph = DimensionGraph( - self.universe, names=("visit", "detector", "tract", "patch", "htm7", "exposure") - ) - self.assertCountEqual(graph.spatial.names, ("observation_regions", "skymap_regions", "htm")) - self.assertCountEqual(graph.temporal.names, ("observation_timespans",)) + group = self.universe.conform(["visit", "detector", "tract", "patch", "htm7", "exposure"]) + self.assertCountEqual(group.spatial.names, ("observation_regions", "skymap_regions", "htm")) + self.assertCountEqual(group.temporal.names, ("observation_timespans",)) def testSchemaGeneration(self): - tableSpecs = NamedKeyDict({}) - for element in self.universe.elements: + tableSpecs: NamedKeyDict[DimensionElement, ddl.TableSpec] = NamedKeyDict({}) + for element in self.universe.getStaticElements(): if element.hasTable and element.viewOf is None: tableSpecs[element] = element.RecordClass.fields.makeTableSpec( TimespanReprClass=TimespanDatabaseRepresentation.Compound, @@ -365,7 +363,7 @@ def testSchemaGeneration(self): self.assertFalse(tableSpec.fields[dep.name].primaryKey) for foreignKey in tableSpec.foreignKeys: self.assertIn(foreignKey.table, tableSpecs) - self.assertIn(foreignKey.table, element.graph.dimensions.names) + self.assertIn(foreignKey.table, element.dimensions) self.assertEqual(len(foreignKey.source), len(foreignKey.target)) for source, target in zip(foreignKey.source, foreignKey.target, strict=True): self.assertIn(source, tableSpec.fields.names) @@ -390,12 +388,12 @@ def testPickling(self): self.assertIs(universe1, universe2) self.assertIs(universe1, universe3) self.assertIs(universe1, universe4) - for element1 in universe1.elements: + for element1 in universe1.getStaticElements(): element2 = pickle.loads(pickle.dumps(element1)) self.assertIs(element1, element2) - graph1 = element1.graph - graph2 = pickle.loads(pickle.dumps(graph1)) - self.assertIs(graph1, graph2) + group1 = element1.minimal_group + group2 = pickle.loads(pickle.dumps(group1)) + self.assertIs(group1, group2) @dataclass @@ -408,7 +406,7 @@ class SplitByStateFlags: """Data IDs that only contain values for required dimensions. `DataCoordinateSequence.hasFull()` will return `True` for this if and only - if ``minimal.graph.implied`` has no elements. + if ``minimal.dimensions.implied`` has no elements. `DataCoordinate.hasRecords()` will always return `False`. """ @@ -426,7 +424,7 @@ class SplitByStateFlags: always return `True` for this attribute. """ - def chain(self, n: int | None = None) -> Iterator: + def chain(self, n: int | None = None) -> Iterator[DataCoordinate]: """Iterate over the data IDs of different types. Parameters @@ -483,35 +481,34 @@ def randomDataIds(self, n: int, dataIds: DataCoordinateSequence | None = None): dataIds = self.allDataIds return DataCoordinateSequence( self.rng.sample(dataIds, n), - graph=dataIds.graph, + dimensions=dataIds.dimensions, hasFull=dataIds.hasFull(), hasRecords=dataIds.hasRecords(), check=False, ) - def randomDimensionSubset(self, n: int = 3, graph: DimensionGraph | None = None) -> DimensionGraph: - """Generate a random `DimensionGraph` that has a subset of the + def randomDimensionSubset(self, n: int = 3, group: DimensionGroup | None = None) -> DimensionGroup: + """Generate a random `DimensionGroup` that has a subset of the dimensions in a given one. Parameters ---------- n : `int` Number of dimensions to select, before automatic expansion by - `DimensionGraph`. - dataIds : `DimensionGraph`, optional - Dimensions to select from. Defaults to ``self.allDataIds.graph``. + `DimensionGroup`. + group : `DimensionGroup`, optional + Dimensions to select from. Defaults to + ``self.allDataIds.dimensions``. Returns ------- - selected : `DimensionGraph` - ``n`` or more dimensions randomly selected from ``graph`` with + selected : `DimensionGroup` + ``n`` or more dimensions randomly selected from ``group`` with replacement. """ - if graph is None: - graph = self.allDataIds.graph - return DimensionGraph( - graph.universe, names=self.rng.sample(list(graph.dimensions.names), max(n, len(graph.dimensions))) - ) + if group is None: + group = self.allDataIds.dimensions + return group.universe.conform(self.rng.sample(list(group.names), max(n, len(group)))) def splitByStateFlags( self, @@ -552,26 +549,32 @@ def splitByStateFlags( result = SplitByStateFlags(expanded=dataIds) if complete: result.complete = DataCoordinateSequence( - [DataCoordinate.standardize(e.full.byName(), graph=dataIds.graph) for e in result.expanded], - graph=dataIds.graph, + [ + DataCoordinate.standardize(e.mapping, dimensions=dataIds.dimensions) + for e in result.expanded + ], + dimensions=dataIds.dimensions, ) self.assertTrue(result.complete.hasFull()) self.assertFalse(result.complete.hasRecords()) if minimal: result.minimal = DataCoordinateSequence( - [DataCoordinate.standardize(e.byName(), graph=dataIds.graph) for e in result.expanded], - graph=dataIds.graph, + [ + DataCoordinate.standardize(e.required, dimensions=dataIds.dimensions) + for e in result.expanded + ], + dimensions=dataIds.dimensions, ) - self.assertEqual(result.minimal.hasFull(), not dataIds.graph.implied) + self.assertEqual(result.minimal.hasFull(), not dataIds.dimensions.implied) self.assertFalse(result.minimal.hasRecords()) if not expanded: result.expanded = None return result - def testMappingInterface(self): - """Test that the mapping interface in `DataCoordinate` and (when - applicable) its ``full`` property are self-consistent and consistent - with the ``graph`` property. + def testMappingViews(self): + """Test that the ``mapping`` and ``required`` attributes in + `DataCoordinate` are self-consistent and consistent with the + ``dimensions`` property. """ for _ in range(5): dimensions = self.randomDimensionSubset() @@ -579,14 +582,21 @@ def testMappingInterface(self): split = self.splitByStateFlags(dataIds) for dataId in split.chain(): with self.subTest(dataId=dataId): - self.assertEqual(list(dataId.values()), [dataId[d] for d in dataId]) - self.assertEqual(list(dataId.values()), [dataId[d.name] for d in dataId]) - self.assertEqual(dataId.keys(), dataId.graph.required) + self.assertEqual(dataId.required.keys(), dataId.dimensions.required) + self.assertEqual( + list(dataId.required.values()), [dataId[d] for d in dataId.dimensions.required] + ) + self.assertEqual( + list(dataId.required_values), [dataId[d] for d in dataId.dimensions.required] + ) + self.assertEqual(dataId.required.keys(), dataId.dimensions.required) for dataId in itertools.chain(split.complete, split.expanded): with self.subTest(dataId=dataId): self.assertTrue(dataId.hasFull()) - self.assertEqual(dataId.graph.dimensions, dataId.full.keys()) - self.assertEqual(list(dataId.full.values()), [dataId[k] for k in dataId.graph.dimensions]) + self.assertEqual(dataId.dimensions.names, dataId.mapping.keys()) + self.assertEqual( + list(dataId.mapping.values()), [dataId[k] for k in dataId.mapping.keys()] + ) def test_pickle(self): for _ in range(5): @@ -595,14 +605,17 @@ def test_pickle(self): split = self.splitByStateFlags(dataIds) for data_id in split.chain(): s = pickle.dumps(data_id) - read_data_id = pickle.loads(s) + read_data_id: DataCoordinate = pickle.loads(s) self.assertEqual(data_id, read_data_id) self.assertEqual(data_id.hasFull(), read_data_id.hasFull()) self.assertEqual(data_id.hasRecords(), read_data_id.hasRecords()) if data_id.hasFull(): - self.assertEqual(data_id.full, read_data_id.full) + self.assertEqual(data_id.mapping, read_data_id.mapping) if data_id.hasRecords(): - self.assertEqual(data_id.records, read_data_id.records) + for element_name in data_id.dimensions.elements: + self.assertEqual( + data_id.records[element_name], read_data_id.records[element_name] + ) def test_record_attributes(self): """Test that dimension records are available as attributes on expanded @@ -613,15 +626,15 @@ def test_record_attributes(self): dataIds = self.randomDataIds(n=1).subset(dimensions) split = self.splitByStateFlags(dataIds) for data_id in split.expanded: - for element in data_id.graph.elements: - self.assertIs(getattr(data_id, element.name), data_id.records[element.name]) - self.assertIn(element.name, dir(data_id)) + for element_name in data_id.dimensions.elements: + self.assertIs(getattr(data_id, element_name), data_id.records[element_name]) + self.assertIn(element_name, dir(data_id)) with self.assertRaisesRegex(AttributeError, "^not_a_dimension_name$"): data_id.not_a_dimension_name for data_id in itertools.chain(split.minimal, split.complete): - for element in data_id.graph.elements: + for element_name in data_id.dimensions.elements: with self.assertRaisesRegex(AttributeError, "only available on expanded DataCoordinates"): - getattr(data_id, element.name) + getattr(data_id, element_name) with self.assertRaisesRegex(AttributeError, "^not_a_dimension_name$"): data_id.not_a_dimension_name @@ -635,22 +648,14 @@ def testEquality(self): # with the same underlying data ID values. for a0, b0 in itertools.combinations(split.chain(0), 2): self.assertEqual(a0, b0) - self.assertEqual(a0, b0.byName()) - self.assertEqual(a0.byName(), b0) # Same thing, for a different data ID value. for a1, b1 in itertools.combinations(split.chain(1), 2): self.assertEqual(a1, b1) - self.assertEqual(a1, b1.byName()) - self.assertEqual(a1.byName(), b1) # Iterate over all combinations of different states of DataCoordinate, # with different underlying data ID values. for a0, b1 in itertools.product(split.chain(0), split.chain(1)): self.assertNotEqual(a0, b1) self.assertNotEqual(a1, b0) - self.assertNotEqual(a0, b1.byName()) - self.assertNotEqual(a0.byName(), b1) - self.assertNotEqual(a1, b0.byName()) - self.assertNotEqual(a1.byName(), b0) def testStandardize(self): """Test constructing a DataCoordinate from many different kinds of @@ -665,25 +670,27 @@ def testStandardize(self): # that object. self.assertIs(dataId, DataCoordinate.standardize(dataId)) # Same if we also explicitly pass the dimensions we want. - self.assertIs(dataId, DataCoordinate.standardize(dataId, graph=dataId.graph)) + self.assertIs(dataId, DataCoordinate.standardize(dataId, dimensions=dataId.dimensions)) # Same if we pass the dimensions and some irrelevant # kwargs. - self.assertIs(dataId, DataCoordinate.standardize(dataId, graph=dataId.graph, htm7=12)) + self.assertIs( + dataId, DataCoordinate.standardize(dataId, dimensions=dataId.dimensions, htm7=12) + ) # Test constructing a new data ID from this one with a # subset of the dimensions. # This is not possible for some combinations of # dimensions if hasFull is False (see # `DataCoordinate.subset` docs). - newDimensions = self.randomDimensionSubset(n=1, graph=dataId.graph) - if dataId.hasFull() or dataId.graph.required >= newDimensions.required: + newDimensions = self.randomDimensionSubset(n=1, group=dataId.dimensions) + if dataId.hasFull() or dataId.dimensions.required >= newDimensions.required: newDataIds = [ dataId.subset(newDimensions), - DataCoordinate.standardize(dataId, graph=newDimensions), - DataCoordinate.standardize(dataId, graph=newDimensions, htm7=12), + DataCoordinate.standardize(dataId, dimensions=newDimensions), + DataCoordinate.standardize(dataId, dimensions=newDimensions, htm7=12), ] for newDataId in newDataIds: with self.subTest(newDataId=newDataId, type=type(dataId)): - commonKeys = dataId.keys() & newDataId.keys() + commonKeys = dataId.dimensions.required & newDataId.dimensions.required self.assertTrue(commonKeys) self.assertEqual( [newDataId[k] for k in commonKeys], @@ -700,30 +707,30 @@ def testStandardize(self): for dataId in split.complete: # Split the keys (dimension names) into two random subsets, so # we can pass some as kwargs below. - keys1 = set( - self.rng.sample(list(dataId.graph.dimensions.names), len(dataId.graph.dimensions) // 2) - ) - keys2 = dataId.graph.dimensions.names - keys1 + keys1 = set(self.rng.sample(list(dataId.dimensions.names), len(dataId.dimensions) // 2)) + keys2 = dataId.dimensions.names - keys1 newCompleteDataIds = [ - DataCoordinate.standardize(dataId.full.byName(), universe=dataId.universe), - DataCoordinate.standardize(dataId.full.byName(), graph=dataId.graph), + DataCoordinate.standardize(dataId.mapping, universe=dataId.universe), + DataCoordinate.standardize(dataId.mapping, dimensions=dataId.dimensions), DataCoordinate.standardize( - DataCoordinate.makeEmpty(dataId.graph.universe), **dataId.full.byName() + DataCoordinate.makeEmpty(dataId.dimensions.universe), **dataId.mapping ), DataCoordinate.standardize( - DataCoordinate.makeEmpty(dataId.graph.universe), - graph=dataId.graph, - **dataId.full.byName(), + DataCoordinate.makeEmpty(dataId.dimensions.universe), + dimensions=dataId.dimensions, + **dataId.mapping, ), - DataCoordinate.standardize(**dataId.full.byName(), universe=dataId.universe), - DataCoordinate.standardize(graph=dataId.graph, **dataId.full.byName()), + DataCoordinate.standardize(**dataId.mapping, universe=dataId.universe), + DataCoordinate.standardize(dimensions=dataId.dimensions, **dataId.mapping), DataCoordinate.standardize( {k: dataId[k] for k in keys1}, universe=dataId.universe, **{k: dataId[k] for k in keys2}, ), DataCoordinate.standardize( - {k: dataId[k] for k in keys1}, graph=dataId.graph, **{k: dataId[k] for k in keys2} + {k: dataId[k] for k in keys1}, + dimensions=dataId.dimensions, + **{k: dataId[k] for k in keys2}, ), ] for newDataId in newCompleteDataIds: @@ -733,43 +740,43 @@ def testStandardize(self): def testUnion(self): """Test `DataCoordinate.union`.""" - # Make test graphs to combine; mostly random, but with a few explicit + # Make test groups to combine; mostly random, but with a few explicit # cases to make sure certain edge cases are covered. - graphs = [self.randomDimensionSubset(n=2) for i in range(2)] - graphs.append(self.allDataIds.universe["visit"].graph) - graphs.append(self.allDataIds.universe["detector"].graph) - graphs.append(self.allDataIds.universe["physical_filter"].graph) - graphs.append(self.allDataIds.universe["band"].graph) + groups = [self.randomDimensionSubset(n=2) for i in range(2)] + groups.append(self.allDataIds.universe["visit"].minimal_group) + groups.append(self.allDataIds.universe["detector"].minimal_group) + groups.append(self.allDataIds.universe["physical_filter"].minimal_group) + groups.append(self.allDataIds.universe["band"].minimal_group) # Iterate over all combinations, including the same graph with itself. - for graph1, graph2 in itertools.product(graphs, repeat=2): + for group1, group2 in itertools.product(groups, repeat=2): parentDataIds = self.randomDataIds(n=1) - split1 = self.splitByStateFlags(parentDataIds.subset(graph1)) - split2 = self.splitByStateFlags(parentDataIds.subset(graph2)) + split1 = self.splitByStateFlags(parentDataIds.subset(group1)) + split2 = self.splitByStateFlags(parentDataIds.subset(group2)) (parentDataId,) = parentDataIds for lhs, rhs in itertools.product(split1.chain(), split2.chain()): unioned = lhs.union(rhs) with self.subTest(lhs=lhs, rhs=rhs, unioned=unioned): - self.assertEqual(unioned.graph, graph1.union(graph2)) - self.assertEqual(unioned, parentDataId.subset(unioned.graph)) + self.assertEqual(unioned.dimensions, group1.union(group2)) + self.assertEqual(unioned, parentDataId.subset(unioned.dimensions)) if unioned.hasFull(): - self.assertEqual(unioned.subset(lhs.graph), lhs) - self.assertEqual(unioned.subset(rhs.graph), rhs) + self.assertEqual(unioned.subset(lhs.dimensions), lhs) + self.assertEqual(unioned.subset(rhs.dimensions), rhs) if lhs.hasFull() and rhs.hasFull(): self.assertTrue(unioned.hasFull()) - if lhs.graph >= unioned.graph and lhs.hasFull(): + if lhs.dimensions >= unioned.dimensions and lhs.hasFull(): self.assertTrue(unioned.hasFull()) if lhs.hasRecords(): self.assertTrue(unioned.hasRecords()) - if rhs.graph >= unioned.graph and rhs.hasFull(): + if rhs.dimensions >= unioned.dimensions and rhs.hasFull(): self.assertTrue(unioned.hasFull()) if rhs.hasRecords(): self.assertTrue(unioned.hasRecords()) - if lhs.graph.required | rhs.graph.required >= unioned.graph.dimensions: + if lhs.dimensions.required | rhs.dimensions.required >= unioned.dimensions.names: self.assertTrue(unioned.hasFull()) if ( lhs.hasRecords() and rhs.hasRecords() - and lhs.graph.elements | rhs.graph.elements >= unioned.graph.elements + and lhs.dimensions.elements | rhs.dimensions.elements >= unioned.dimensions.elements ): self.assertTrue(unioned.hasRecords()) @@ -777,33 +784,23 @@ def testRegions(self): """Test that data IDs for a few known dimensions have the expected regions. """ - for dataId in self.randomDataIds(n=4).subset( - DimensionGraph(self.allDataIds.universe, names=["visit"]) - ): + for dataId in self.randomDataIds(n=4).subset(self.allDataIds.universe.conform(["visit"])): self.assertIsNotNone(dataId.region) - self.assertEqual(dataId.graph.spatial.names, {"observation_regions"}) + self.assertEqual(dataId.dimensions.spatial.names, {"observation_regions"}) self.assertEqual(dataId.region, dataId.records["visit"].region) - for dataId in self.randomDataIds(n=4).subset( - DimensionGraph(self.allDataIds.universe, names=["visit", "detector"]) - ): + for dataId in self.randomDataIds(n=4).subset(self.allDataIds.universe.conform(["visit", "detector"])): self.assertIsNotNone(dataId.region) - self.assertEqual(dataId.graph.spatial.names, {"observation_regions"}) + self.assertEqual(dataId.dimensions.spatial.names, {"observation_regions"}) self.assertEqual(dataId.region, dataId.records["visit_detector_region"].region) - for dataId in self.randomDataIds(n=4).subset( - DimensionGraph(self.allDataIds.universe, names=["tract"]) - ): + for dataId in self.randomDataIds(n=4).subset(self.allDataIds.universe.conform(["tract"])): self.assertIsNotNone(dataId.region) - self.assertEqual(dataId.graph.spatial.names, {"skymap_regions"}) + self.assertEqual(dataId.dimensions.spatial.names, {"skymap_regions"}) self.assertEqual(dataId.region, dataId.records["tract"].region) - for dataId in self.randomDataIds(n=4).subset( - DimensionGraph(self.allDataIds.universe, names=["patch"]) - ): + for dataId in self.randomDataIds(n=4).subset(self.allDataIds.universe.conform(["patch"])): self.assertIsNotNone(dataId.region) - self.assertEqual(dataId.graph.spatial.names, {"skymap_regions"}) + self.assertEqual(dataId.dimensions.spatial.names, {"skymap_regions"}) self.assertEqual(dataId.region, dataId.records["patch"].region) - for data_id in self.randomDataIds(n=1).subset( - DimensionGraph(self.allDataIds.universe, names=["visit", "tract"]) - ): + for data_id in self.randomDataIds(n=1).subset(self.allDataIds.universe.conform(["visit", "tract"])): self.assertEqual(data_id.region.relate(data_id.records["visit"].region), lsst.sphgeom.WITHIN) self.assertEqual(data_id.region.relate(data_id.records["tract"].region), lsst.sphgeom.WITHIN) @@ -811,17 +808,13 @@ def testTimespans(self): """Test that data IDs for a few known dimensions have the expected timespans. """ - for dataId in self.randomDataIds(n=4).subset( - DimensionGraph(self.allDataIds.universe, names=["visit"]) - ): + for dataId in self.randomDataIds(n=4).subset(self.allDataIds.universe.conform(["visit"])): self.assertIsNotNone(dataId.timespan) - self.assertEqual(dataId.graph.temporal.names, {"observation_timespans"}) + self.assertEqual(dataId.dimensions.temporal.names, {"observation_timespans"}) self.assertEqual(dataId.timespan, dataId.records["visit"].timespan) self.assertEqual(dataId.timespan, dataId.visit.timespan) # Also test the case for non-temporal DataIds. - for dataId in self.randomDataIds(n=4).subset( - DimensionGraph(self.allDataIds.universe, names=["patch"]) - ): + for dataId in self.randomDataIds(n=4).subset(self.allDataIds.universe.conform(["patch"])): self.assertIsNone(dataId.timespan) def testIterableStatusFlags(self): @@ -831,29 +824,31 @@ def testIterableStatusFlags(self): dataIds = self.randomDataIds(n=10) split = self.splitByStateFlags(dataIds) for cls in (DataCoordinateSet, DataCoordinateSequence): - self.assertTrue(cls(split.expanded, graph=dataIds.graph, check=True).hasFull()) - self.assertTrue(cls(split.expanded, graph=dataIds.graph, check=False).hasFull()) - self.assertTrue(cls(split.expanded, graph=dataIds.graph, check=True).hasRecords()) - self.assertTrue(cls(split.expanded, graph=dataIds.graph, check=False).hasRecords()) - self.assertTrue(cls(split.complete, graph=dataIds.graph, check=True).hasFull()) - self.assertTrue(cls(split.complete, graph=dataIds.graph, check=False).hasFull()) - self.assertFalse(cls(split.complete, graph=dataIds.graph, check=True).hasRecords()) - self.assertFalse(cls(split.complete, graph=dataIds.graph, check=False).hasRecords()) + self.assertTrue(cls(split.expanded, dimensions=dataIds.dimensions, check=True).hasFull()) + self.assertTrue(cls(split.expanded, dimensions=dataIds.dimensions, check=False).hasFull()) + self.assertTrue(cls(split.expanded, dimensions=dataIds.dimensions, check=True).hasRecords()) + self.assertTrue(cls(split.expanded, dimensions=dataIds.dimensions, check=False).hasRecords()) + self.assertTrue(cls(split.complete, dimensions=dataIds.dimensions, check=True).hasFull()) + self.assertTrue(cls(split.complete, dimensions=dataIds.dimensions, check=False).hasFull()) + self.assertFalse(cls(split.complete, dimensions=dataIds.dimensions, check=True).hasRecords()) + self.assertFalse(cls(split.complete, dimensions=dataIds.dimensions, check=False).hasRecords()) with self.assertRaises(ValueError): - cls(split.complete, graph=dataIds.graph, hasRecords=True, check=True) + cls(split.complete, dimensions=dataIds.dimensions, hasRecords=True, check=True) self.assertEqual( - cls(split.minimal, graph=dataIds.graph, check=True).hasFull(), not dataIds.graph.implied + cls(split.minimal, dimensions=dataIds.dimensions, check=True).hasFull(), + not dataIds.dimensions.implied, ) self.assertEqual( - cls(split.minimal, graph=dataIds.graph, check=False).hasFull(), not dataIds.graph.implied + cls(split.minimal, dimensions=dataIds.dimensions, check=False).hasFull(), + not dataIds.dimensions.implied, ) - self.assertFalse(cls(split.minimal, graph=dataIds.graph, check=True).hasRecords()) - self.assertFalse(cls(split.minimal, graph=dataIds.graph, check=False).hasRecords()) + self.assertFalse(cls(split.minimal, dimensions=dataIds.dimensions, check=True).hasRecords()) + self.assertFalse(cls(split.minimal, dimensions=dataIds.dimensions, check=False).hasRecords()) with self.assertRaises(ValueError): - cls(split.minimal, graph=dataIds.graph, hasRecords=True, check=True) - if dataIds.graph.implied: + cls(split.minimal, dimensions=dataIds.dimensions, hasRecords=True, check=True) + if dataIds.dimensions.implied: with self.assertRaises(ValueError): - cls(split.minimal, graph=dataIds.graph, hasFull=True, check=True) + cls(split.minimal, dimensions=dataIds.dimensions, hasFull=True, check=True) def testSetOperations(self): """Test for self-consistency across DataCoordinateSet's operations.""" @@ -887,10 +882,10 @@ def testSetOperations(self): def testPackers(self): (instrument_data_id,) = self.allDataIds.subset( - self.allDataIds.universe.extract(["instrument"]) + self.allDataIds.universe.conform(["instrument"]) ).toSet() - (detector_data_id,) = self.randomDataIds(n=1).subset(self.allDataIds.universe.extract(["detector"])) - packer = ConcreteTestDimensionPacker(instrument_data_id, detector_data_id.graph) + (detector_data_id,) = self.randomDataIds(n=1).subset(self.allDataIds.universe.conform(["detector"])) + packer = ConcreteTestDimensionPacker(instrument_data_id, detector_data_id.dimensions) packed_id, max_bits = packer.pack(detector_data_id, returnMaxBits=True) self.assertEqual(packed_id, detector_data_id["detector"]) self.assertEqual(max_bits, packer.maxBits) diff --git a/tests/test_expressions.py b/tests/test_expressions.py index d86cb75f84..4b5334265f 100644 --- a/tests/test_expressions.py +++ b/tests/test_expressions.py @@ -354,7 +354,7 @@ def test_governor(self): parser = ParserYacc() universe = DimensionUniverse() - graph = universe.extract(("instrument", "visit")) + dimensions = universe.conform(("instrument", "visit")) dataId = DataCoordinate.makeEmpty(universe) defaults = DataCoordinate.makeEmpty(universe) @@ -362,20 +362,20 @@ def test_governor(self): tree = parser.parse("instrument = 'LSST'") expr = NormalFormExpression.fromTree(tree, NormalForm.DISJUNCTIVE) binds = {} - visitor = CheckVisitor(dataId, graph, binds, defaults) + visitor = CheckVisitor(dataId, dimensions, binds, defaults) expr.visit(visitor) tree = parser.parse("'LSST' = instrument") expr = NormalFormExpression.fromTree(tree, NormalForm.DISJUNCTIVE) binds = {} - visitor = CheckVisitor(dataId, graph, binds, defaults) + visitor = CheckVisitor(dataId, dimensions, binds, defaults) expr.visit(visitor) # use bind for governor tree = parser.parse("instrument = instr") expr = NormalFormExpression.fromTree(tree, NormalForm.DISJUNCTIVE) binds = {"instr": "LSST"} - visitor = CheckVisitor(dataId, graph, binds, defaults) + visitor = CheckVisitor(dataId, dimensions, binds, defaults) expr.visit(visitor) diff --git a/tests/test_formatter.py b/tests/test_formatter.py index 373a1941fa..ba5d6d21df 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -36,7 +36,6 @@ Config, DataCoordinate, DatasetType, - DimensionGraph, DimensionUniverse, FileDescriptor, Formatter, @@ -195,7 +194,7 @@ def testRegistryConfig(self): # Create a DatasetRef with and without instrument matching the # one in the config file. - dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) + dimensions = self.universe.conform(("visit", "physical_filter", "instrument")) constant_dataId = {"physical_filter": "v", "visit": 1} sc = StorageClass("DummySC", dict, None) refPviHsc = self.makeDatasetRef( @@ -230,7 +229,7 @@ def testRegistryConfig(self): self.assertIn("PickleFormatter", refPvixNotHscFmt.name()) # Create a DatasetRef that should fall back to using StorageClass - dimensionsNoV = DimensionGraph(self.universe, names=("physical_filter", "instrument")) + dimensionsNoV = self.universe.conform(("physical_filter", "instrument")) refPvixNotHscDims = self.makeDatasetRef( "pvix", dimensionsNoV, diff --git a/tests/test_obscore.py b/tests/test_obscore.py index 6f2374ed9c..a020f1c46a 100644 --- a/tests/test_obscore.py +++ b/tests/test_obscore.py @@ -183,16 +183,16 @@ def initialize_registry(self, registry: SqlRegistry) -> None: self.dataset_types: dict[str, DatasetType] = {} - dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "exposure"]) + dimensions = registry.dimensions.conform(["instrument", "physical_filter", "detector", "exposure"]) self.dataset_types["raw"] = DatasetType("raw", dimensions, storage_class) - dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "visit"]) + dimensions = registry.dimensions.conform(["instrument", "physical_filter", "detector", "visit"]) self.dataset_types["calexp"] = DatasetType("calexp", dimensions, storage_class) - dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "visit"]) + dimensions = registry.dimensions.conform(["instrument", "physical_filter", "detector", "visit"]) self.dataset_types["no_obscore"] = DatasetType("no_obscore", dimensions, storage_class) - dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector"]) + dimensions = registry.dimensions.conform(["instrument", "physical_filter", "detector"]) self.dataset_types["calib"] = DatasetType("calib", dimensions, storage_class, isCalibration=True) for dataset_type in self.dataset_types.values(): diff --git a/tests/test_quantum.py b/tests/test_quantum.py index 86003a53bd..dbb196c953 100644 --- a/tests/test_quantum.py +++ b/tests/test_quantum.py @@ -121,13 +121,13 @@ def _buildFullQuantum(self, taskName, addRecords=False) -> tuple[Quantum, Iterab dataId43 = dataId43.expanded(records43) # type: ignore datasetTypeInit = DatasetType( - datasetTypeNameInit, universe.extract(("instrument", "visit")), storageClass + datasetTypeNameInit, universe.conform(("instrument", "visit")), storageClass ) datasetTypeInput = DatasetType( - datasetTypeNameInput, universe.extract(("instrument", "visit")), storageClass + datasetTypeNameInput, universe.conform(("instrument", "visit")), storageClass ) datasetTypeOutput = DatasetType( - datasetTypeNameOutput, universe.extract(("instrument", "visit")), storageClass + datasetTypeNameOutput, universe.conform(("instrument", "visit")), storageClass ) predictedInputs = { datasetTypeInput: [ diff --git a/tests/test_quantumBackedButler.py b/tests/test_quantumBackedButler.py index 423ee7e083..bd43e97494 100644 --- a/tests/test_quantumBackedButler.py +++ b/tests/test_quantumBackedButler.py @@ -69,7 +69,7 @@ def setUp(self) -> None: self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) # make all dataset types - graph = self.universe.extract(("instrument", "detector")) + graph = self.universe.conform(("instrument", "detector")) storageClass = StorageClass("StructuredDataDict") self.datasetTypeInit = DatasetType("test_ds_init", graph, storageClass) self.datasetTypeInput = DatasetType("test_ds_input", graph, storageClass) diff --git a/tests/test_query_relations.py b/tests/test_query_relations.py index 106aa4c526..a3c0552495 100644 --- a/tests/test_query_relations.py +++ b/tests/test_query_relations.py @@ -148,11 +148,11 @@ def test_spatial_constraints(self) -> None: select( Π[band, patch, patch.region, skymap, tract, visit_detector_region.region]( σ[ - band={self.band!r} - and instrument={self.instrument!r} + instrument={self.instrument!r} and detector={self.detector!r} - and physical_filter={self.physical_filter!r} and visit={self.visit!r} + and band={self.band!r} + and physical_filter={self.physical_filter!r} ]( patch_htm7_overlap ⋈ visit_detector_region_htm7_overlap @@ -173,11 +173,11 @@ def test_spatial_constraints(self) -> None: self.registry.queryDataIds( ["patch", "band"], where=( - f"band={self.band!r} " - f"and instrument={self.instrument!r} " + f"instrument={self.instrument!r} " f"and detector={self.detector!r} " - f"and physical_filter={self.physical_filter!r} " f"and visit={self.visit!r}" + f"and band={self.band!r} " + f"and physical_filter={self.physical_filter!r} " ), ), ) @@ -194,11 +194,11 @@ def test_spatial_constraints(self) -> None: select( Π[htm7]( σ[ - band={self.band!r} - and instrument={self.instrument!r} + instrument={self.instrument!r} and detector={self.detector!r} - and physical_filter={self.physical_filter!r} and visit={self.visit!r} + and band={self.band!r} + and physical_filter={self.physical_filter!r} ]( visit_detector_region_htm7_overlap ⋈ physical_filter @@ -217,11 +217,11 @@ def test_spatial_constraints(self) -> None: self.registry.queryDataIds( ["htm7"], where=( - f"band={self.band!r} " - f"and instrument={self.instrument!r} " + f"instrument={self.instrument!r} " f"and detector={self.detector!r} " - f"and physical_filter={self.physical_filter!r} " f"and visit={self.visit!r}" + f"and band={self.band!r} " + f"and physical_filter={self.physical_filter!r} " ), ), ) @@ -299,10 +299,10 @@ def test_spatial_constraints(self) -> None: select( Π[detector, instrument, visit_detector_region.region]( σ[ - band={self.band!r} - and instrument={self.instrument!r} - and physical_filter={self.physical_filter!r} + instrument={self.instrument!r} and visit={self.visit!r} + and band={self.band!r} + and physical_filter={self.physical_filter!r} and htm7={self.htm7!r} ]( visit_detector_region_htm7_overlap @@ -330,11 +330,11 @@ def test_spatial_constraints(self) -> None: select( Π[detector, instrument]( σ[ - band={self.band!r} - and htm7={self.htm7!r} + htm7={self.htm7!r} and instrument={self.instrument!r} - and physical_filter={self.physical_filter!r} and visit={self.visit!r} + and band={self.band!r} + and physical_filter={self.physical_filter!r} ]( visit_detector_region_htm7_overlap ⋈ physical_filter diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py index e14370fc0a..0845e78a33 100644 --- a/tests/test_simpleButler.py +++ b/tests/test_simpleButler.py @@ -558,7 +558,7 @@ def testRegistryDefaults(self): butler3 = Butler.from_config(butler=butler, collections=["imported_g"], instrument="Cam2") self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) self.assertIsNone(butler3.registry.defaults.run, None) - self.assertEqual(butler3.registry.defaults.dataId.byName(), {"instrument": "Cam2"}) + self.assertEqual(butler3.registry.defaults.dataId.required, {"instrument": "Cam2"}) # Check that repr() does not fail. defaults = RegistryDefaults(collections=["imported_g"], run="test") diff --git a/tests/test_templates.py b/tests/test_templates.py index 27e14923bf..d224fc989e 100644 --- a/tests/test_templates.py +++ b/tests/test_templates.py @@ -36,7 +36,6 @@ DatasetId, DatasetRef, DatasetType, - DimensionGraph, DimensionUniverse, StorageClass, ) @@ -65,8 +64,8 @@ def makeDatasetRef( dataId = self.dataId if "physical_filter" in dataId and "band" not in dataId: dataId["band"] = "b" # Add fake band. - dimensions = DimensionGraph(self.universe, names=dataId.keys()) - dataId = DataCoordinate.standardize(dataId, graph=dimensions) + dimensions = self.universe.conform(dataId.keys()) + dataId = DataCoordinate.standardize(dataId, dimensions=dimensions) # Pretend we have a parent if this looks like a composite compositeName, componentName = DatasetType.splitDatasetTypeName(datasetTypeName) diff --git a/tests/test_testRepo.py b/tests/test_testRepo.py index 50c42283b7..6d0dfd7231 100644 --- a/tests/test_testRepo.py +++ b/tests/test_testRepo.py @@ -118,7 +118,7 @@ def testButlerKwargs(self): def _checkButlerDimension(self, dimensions, query, expected): result = list(self.butler.registry.queryDataIds(dimensions, where=query, check=False)) self.assertEqual(len(result), 1) - self.assertIn(result[0].byName(), expected) + self.assertIn(result[0].required, expected) def testButlerDimensions(self): self._checkButlerDimension( @@ -237,18 +237,18 @@ def testUniqueButler(self): def testExpandUniqueId(self): self.assertEqual( - dict(expandUniqueId(self.butler, {"instrument": "notACam"})), {"instrument": "notACam"} + expandUniqueId(self.butler, {"instrument": "notACam"}).required, {"instrument": "notACam"} ) self.assertIn( - dict(expandUniqueId(self.butler, {"visit": 101})), + expandUniqueId(self.butler, {"visit": 101}).required, [{"instrument": "notACam", "visit": 101}, {"instrument": "dummyCam", "visit": 101}], ) self.assertIn( - dict(expandUniqueId(self.butler, {"detector": 5})), + expandUniqueId(self.butler, {"detector": 5}).required, [{"instrument": "notACam", "detector": 5}, {"instrument": "dummyCam", "detector": 5}], ) self.assertIn( - dict(expandUniqueId(self.butler, {"physical_filter": "k2020"})), + expandUniqueId(self.butler, {"physical_filter": "k2020"}).required, [ {"instrument": "notACam", "physical_filter": "k2020"}, {"instrument": "notACam", "physical_filter": "k2020"}, From 0d9f226f9be38947e81a20e5557eb2da97589060 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Fri, 3 Nov 2023 20:48:49 -0400 Subject: [PATCH 12/16] Accept DimensionGroup in Registry.queryDataIds. --- python/lsst/daf/butler/_registry_shim.py | 3 ++- python/lsst/daf/butler/registry/_registry.py | 9 ++++++--- python/lsst/daf/butler/registry/sql_registry.py | 9 ++++++--- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/python/lsst/daf/butler/_registry_shim.py b/python/lsst/daf/butler/_registry_shim.py index 17f7e93bcb..2a49c161e9 100644 --- a/python/lsst/daf/butler/_registry_shim.py +++ b/python/lsst/daf/butler/_registry_shim.py @@ -330,7 +330,8 @@ def queryDatasets( def queryDataIds( self, - dimensions: Iterable[Dimension | str] | Dimension | str, + # TODO: Drop Dimension support on DM-41326. + dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str, *, dataId: DataId | None = None, datasets: Any = None, diff --git a/python/lsst/daf/butler/registry/_registry.py b/python/lsst/daf/butler/registry/_registry.py index f275cedaab..ece7765043 100644 --- a/python/lsst/daf/butler/registry/_registry.py +++ b/python/lsst/daf/butler/registry/_registry.py @@ -1224,7 +1224,8 @@ def queryDatasets( @abstractmethod def queryDataIds( self, - dimensions: Iterable[Dimension | str] | Dimension | str, + # TODO: Drop `Dimension` objects on DM-41326. + dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str, *, dataId: DataId | None = None, datasets: Any = None, @@ -1239,10 +1240,12 @@ def queryDataIds( Parameters ---------- - dimensions : `Dimension` or `str`, or iterable thereof + dimensions : `DimensionGroup`, `Dimension`, or `str`, or \ + `~collections.abc.Iterable` [ `Dimension` or `str` ] The dimensions of the data IDs to yield, as either `Dimension` instances or `str`. Will be automatically expanded to a complete - `DimensionGraph`. + `DimensionGroup`. Support for `Dimension` instances is deprecated + and will not be supported after v27. dataId : `dict` or `DataCoordinate`, optional A data ID whose key-value pairs are used as equality constraints in the query. diff --git a/python/lsst/daf/butler/registry/sql_registry.py b/python/lsst/daf/butler/registry/sql_registry.py index fd5f58d97e..617ce86619 100644 --- a/python/lsst/daf/butler/registry/sql_registry.py +++ b/python/lsst/daf/butler/registry/sql_registry.py @@ -2147,7 +2147,8 @@ def queryDatasets( def queryDataIds( self, - dimensions: Iterable[Dimension | str] | Dimension | str, + # TODO: Drop Dimension support on DM-41326. + dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str, *, dataId: DataId | None = None, datasets: Any = None, @@ -2162,10 +2163,12 @@ def queryDataIds( Parameters ---------- - dimensions : `Dimension` or `str`, or iterable thereof + dimensions : `DimensionGroup`, `Dimension`, or `str`, or \ + `~collections.abc.Iterable` [ `Dimension` or `str` ] The dimensions of the data IDs to yield, as either `Dimension` instances or `str`. Will be automatically expanded to a complete - `DimensionGraph`. + `DimensionGroup`. Support for `Dimension` instances is deprecated + and will not be supported after v27. dataId : `dict` or `DataCoordinate`, optional A data ID whose key-value pairs are used as equality constraints in the query. From a81634de45182202dacd117960d12986a6081c36 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Fri, 3 Nov 2023 20:52:06 -0400 Subject: [PATCH 13/16] Rewrap some docstring text. --- python/lsst/daf/butler/dimensions/_skypix.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/python/lsst/daf/butler/dimensions/_skypix.py b/python/lsst/daf/butler/dimensions/_skypix.py index 7f8bdddcf0..dcea39985f 100644 --- a/python/lsst/daf/butler/dimensions/_skypix.py +++ b/python/lsst/daf/butler/dimensions/_skypix.py @@ -215,18 +215,17 @@ class SkyPixConstructionVisitor(DimensionConstructionVisitor): Fully-qualified name of the class whose instances represent a particular level of this pixelization. maxLevel : `int`, optional - Maximum level (inclusive) of the hierarchy. If not provided, - an attempt will be made to obtain it from a ``MAX_LEVEL`` attribute - of the pixelization class. + Maximum level (inclusive) of the hierarchy. If not provided, an + attempt will be made to obtain it from a ``MAX_LEVEL`` attribute of the + pixelization class. Notes ----- At present, this class adds both a new `SkyPixSystem` instance all possible `SkyPixDimension` to the builder that invokes it. In the future, it may - add only the `SkyPixSystem`, with dimension instances created on-the-fly - by the `DimensionUniverse`; this depends on `DimensionGraph.encode` going - away or otherwise eliminating assumptions about the set of dimensions in a - universe being static. + add only the `SkyPixSystem`, with dimension instances created on-the-fly by + the `DimensionUniverse`; this depends on eliminating assumptions about the + set of dimensions in a universe being static. """ def __init__(self, name: str, pixelizationClassName: str, maxLevel: int | None = None): From f4833e1ffc0e26a56c126670c09320312b7b1b6b Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Fri, 3 Nov 2023 20:52:16 -0400 Subject: [PATCH 14/16] Deprecate DimensionUniverse.expandDimensionNameSet. Only usage was in DimensionGraph. --- python/lsst/daf/butler/dimensions/_universe.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/lsst/daf/butler/dimensions/_universe.py b/python/lsst/daf/butler/dimensions/_universe.py index 0d98a2936a..db7fdc9c32 100644 --- a/python/lsst/daf/butler/dimensions/_universe.py +++ b/python/lsst/daf/butler/dimensions/_universe.py @@ -418,6 +418,12 @@ def getDimensionIndex(self, name: str) -> int: """ return self._dimensionIndices[name] + # TODO: remove on DM-41326. + @deprecated( + "Deprecated in favor of DimensionUniverse.conform, and will be removed after v27.", + version="v27", + category=FutureWarning, + ) def expandDimensionNameSet(self, names: set[str]) -> None: """Expand a set of dimension names in-place. From 6519a1eda8e4de5c4e8c66ca6168979faaa4fb66 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Thu, 16 Nov 2023 10:52:40 -0500 Subject: [PATCH 15/16] Add extensive changelog entry. --- doc/changes/DM-34340.api.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 doc/changes/DM-34340.api.md diff --git a/doc/changes/DM-34340.api.md b/doc/changes/DM-34340.api.md new file mode 100644 index 0000000000..b6f61c19b1 --- /dev/null +++ b/doc/changes/DM-34340.api.md @@ -0,0 +1,19 @@ +Deprecate most public APIs that use Dimension or DimensionElement objects. + +This implements [RFC-834](https://jira.lsstcorp.org/browse/RFC-834), deprecating the `DimensionGraph` class (in favor of the new, similar `DimensionGroup`) and a large number of `DataCoordinate` methods and attributes, including its `collections.abc.Mapping` interface. + +This includes: + +- use `DataCoordinate.dimensions` instead of `DataCoordinate.graph` (likewise for arguments to `DataCoordinate.standardize`); +- use `dict(DataCoordinate.required)` as a drop-in replacement for `DataCoordinate.byName()`, but consider whether you want `DataCoordinate.required` (a `Mapping` view rather than a `dict`) or `DataCoordinate.mapping` (a `Mapping` with all *available* key-value pairs, not just the required ones); +- also use `DataCoordinate.mapping` or `DataCoordinate.required` instead of treating `DataCoordinate` itself as a `Mapping`, *except* square-bracket indexing, which is still very much supported; +- use `DataCoordinate.dimensions.required.names` or `DataCoordinate.required.keys()` as a drop-in replacement for `DataCoordinate.keys().names` or `DataCoordinate.names`, but consider whether you actually want `DataCoordinate.dimensions.names` or `DataCoordinate.mapping.keys` instead. + +`DimensionGroup` is almost identical to `DimensionGraph`, but it and its subset attributes are not directly iterable (since those iterate over `Dimension` and `DimensionElement` objects); use the `.names` attribute to iterate over names instead (just as names could be iterated over in `DimensionGraph`). + +`DimensionGraph` is still used in some `lsst.daf.butler` APIs (most prominently `DatasetType.dimensions`) that may be accessed without deprecation warnings being emitted, but iterating over that object or its subset attributes *will* yield deprecation warnings. +And `DimensionGraph` is still accepted along with `DimensionGroup` without warning in most public APIs. +When `DimensionGraph` is removed, methods and properties that return `DimensionGraph` will start returning `DimensionGroup` instead. + +Rare code (mostly in downstream middleware packages) that does need access to `Dimension` or `DimensionElement` objects should obtain them directly from the `DimensionUniverse`. +For the pattern of checking whether a dimension is a skypix level, test whether its name is in `DimensionUniverse.skypix_dimensions` or `DimensionGroup.skypix` instead of obtaining a `Dimension` instance and calling `isinstance(dimension, SkyPixDimension)`. From 4d005f7fecaae1d911da4e74f4d10568acd06a50 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Tue, 21 Nov 2023 13:54:07 -0500 Subject: [PATCH 16/16] Add, use DataCoordinate.make_empty. The old DataCoordinate.makeEmpty is not being deprecated because that wasn't included on RFC-834 and we don't have a good reason to get rid it, but we've now got a consistent suite of DataCoordinate factory methods with from_full_values and from_required_values (whose camelCase forms _did_ have a good reason to be retired, as this aided with the transition from DimensionGraph to DimensionGroup). --- .../lsst/daf/butler/dimensions/_coordinate.py | 32 +++++++++++++++---- .../daf/butler/registry/queries/_structs.py | 4 +-- .../queries/expressions/_predicate.py | 4 +-- .../daf/butler/registry/tests/_registry.py | 6 ++-- tests/test_butler.py | 2 +- tests/test_datastore.py | 4 +-- tests/test_dimensions.py | 4 +-- tests/test_expressions.py | 4 +-- tests/test_formatter.py | 2 +- 9 files changed, 41 insertions(+), 21 deletions(-) diff --git a/python/lsst/daf/butler/dimensions/_coordinate.py b/python/lsst/daf/butler/dimensions/_coordinate.py index f45a0bebf7..664ea08959 100644 --- a/python/lsst/daf/butler/dimensions/_coordinate.py +++ b/python/lsst/daf/butler/dimensions/_coordinate.py @@ -132,7 +132,7 @@ class DataCoordinate(NamedKeyMapping[Dimension, DataIdValue]): `DataCoordinate` is an ABC, but it provides `staticmethod` factory functions for private concrete implementations that should be sufficient for most purposes. `standardize` is the most flexible and safe of these; - the others (`makeEmpty`, `from_required_values`, and `from_full_values`) + the others (`make_empty`, `from_required_values`, and `from_full_values`) are more specialized and perform little or no checking of inputs. Lookups for implied dimensions (those in ``self.dimensions.implied``) are @@ -260,7 +260,7 @@ def standardize( raise TypeError("universe must be provided if graph is not.") dimensions = DimensionGroup(universe, new_mapping.keys()) if not dimensions: - return DataCoordinate.makeEmpty(universe) + return DataCoordinate.make_empty(universe) # Some backends cannot handle numpy.int64 type which is a subclass of # numbers.Integral; convert that to int. for k, v in new_mapping.items(): @@ -336,6 +336,26 @@ def makeEmpty(universe: DimensionUniverse) -> DataCoordinate: `hasRecords` are guaranteed to return `True`, because both `full` and `records` are just empty mappings. """ + return DataCoordinate.make_empty(universe) + + @staticmethod + def make_empty(universe: DimensionUniverse) -> DataCoordinate: + """Return an empty `DataCoordinate`. + + It identifies the null set of dimensions. + + Parameters + ---------- + universe : `DimensionUniverse` + Universe to which this null dimension set belongs. + + Returns + ------- + data_id : `DataCoordinate` + A data ID object that identifies no dimensions. `hasFull` and + `hasRecords` are guaranteed to return `True`, because both `full` + and `records` are just empty mappings. + """ return _ExpandedTupleDataCoordinate(universe.empty.as_group(), (), {}) # TODO: remove on DM-41326. @@ -390,7 +410,7 @@ def from_required_values(dimensions: DimensionGroup, values: tuple[DataIdValue, Returns ------- - dataId : `DataCoordinate` + data_id : `DataCoordinate` A data ID object that identifies the given dimensions. ``dataId.hasFull()`` will return `True` only if ``dimensions.implied`` is empty. ``dataId.hasRecords()`` will @@ -400,7 +420,7 @@ def from_required_values(dimensions: DimensionGroup, values: tuple[DataIdValue, values ), f"Inconsistency between dimensions {dimensions.required} and required values {values}." if not dimensions: - return DataCoordinate.makeEmpty(dimensions.universe) + return DataCoordinate.make_empty(dimensions.universe) if not dimensions.implied: return _FullTupleDataCoordinate(dimensions, values) return _RequiredTupleDataCoordinate(dimensions, values) @@ -461,7 +481,7 @@ def from_full_values(dimensions: DimensionGroup, values: tuple[DataIdValue, ...] Returns ------- - dataId : `DataCoordinate` + data_id : `DataCoordinate` A data ID object that identifies the given dimensions. ``dataId.hasFull()`` will always return `True`. ``dataId.hasRecords()`` will only return `True` if ``dimensions`` @@ -471,7 +491,7 @@ def from_full_values(dimensions: DimensionGroup, values: tuple[DataIdValue, ...] values ), f"Inconsistency between dimensions {dimensions.data_coordinate_keys} and full values {values}." if not dimensions: - return DataCoordinate.makeEmpty(dimensions.universe) + return DataCoordinate.make_empty(dimensions.universe) return _FullTupleDataCoordinate(dimensions, values) def __bool__(self) -> bool: diff --git a/python/lsst/daf/butler/registry/queries/_structs.py b/python/lsst/daf/butler/registry/queries/_structs.py index 6d6039536c..e3e1de9c32 100644 --- a/python/lsst/daf/butler/registry/queries/_structs.py +++ b/python/lsst/daf/butler/registry/queries/_structs.py @@ -108,9 +108,9 @@ def combine( An object representing the WHERE clause for a query. """ if data_id is None: - data_id = DataCoordinate.makeEmpty(dimensions.universe) + data_id = DataCoordinate.make_empty(dimensions.universe) if defaults is None: - defaults = DataCoordinate.makeEmpty(dimensions.universe) + defaults = DataCoordinate.make_empty(dimensions.universe) expression_predicate, governor_constraints = make_string_expression_predicate( expression, dimensions, diff --git a/python/lsst/daf/butler/registry/queries/expressions/_predicate.py b/python/lsst/daf/butler/registry/queries/expressions/_predicate.py index 637fc4316d..32d1b8160d 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/_predicate.py +++ b/python/lsst/daf/butler/registry/queries/expressions/_predicate.py @@ -126,7 +126,7 @@ def make_string_expression_predicate( """ governor_constraints: dict[str, Set[str]] = {} if data_id is None: - data_id = DataCoordinate.makeEmpty(dimensions.universe) + data_id = DataCoordinate.make_empty(dimensions.universe) if not string: for dimension in data_id.dimensions.governors: governor_constraints[dimension] = {cast(str, data_id[dimension])} @@ -146,7 +146,7 @@ def make_string_expression_predicate( if column and table in dimensions.universe.elements.names: raise RuntimeError(f"Bind parameter key {identifier!r} looks like a dimension column.") if defaults is None: - defaults = DataCoordinate.makeEmpty(dimensions.universe) + defaults = DataCoordinate.make_empty(dimensions.universe) # Convert the expression to disjunctive normal form (ORs of ANDs). # That's potentially super expensive in the general case (where there's # a ton of nesting of ANDs and ORs). That won't be the case for the diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py index ba8012f20b..fbe074c872 100644 --- a/python/lsst/daf/butler/registry/tests/_registry.py +++ b/python/lsst/daf/butler/registry/tests/_registry.py @@ -1685,7 +1685,7 @@ def testEmptyDimensionsQueries(self): self.loadData(registry, "base.yaml") schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") registry.registerDatasetType(schema) - dataId = DataCoordinate.makeEmpty(registry.dimensions) + dataId = DataCoordinate.make_empty(registry.dimensions) run1 = "run1" run2 = "run2" registry.registerRun(run1) @@ -3339,8 +3339,8 @@ def test_long_query_names(self) -> None: registry.registerRun(run1) run2 = "run2" registry.registerRun(run2) - (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1) - registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2) + (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1) + registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2) self.assertEqual( set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), {ref1}, diff --git a/tests/test_butler.py b/tests/test_butler.py index 70a0da4559..fcf9f85fca 100644 --- a/tests/test_butler.py +++ b/tests/test_butler.py @@ -2138,7 +2138,7 @@ def _absolute_transfer(self, transfer: str) -> None: metrics = makeExampleMetrics() with ResourcePath.temporary_uri(suffix=".json") as temp: - dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions) + dataId = DataCoordinate.make_empty(self.source_butler.dimensions) source_refs = [DatasetRef(datasetType, dataId, run=run)] temp.write(json.dumps(metrics.exportAsDict()).encode()) dataset = FileDataset(path=temp, refs=source_refs) diff --git a/tests/test_datastore.py b/tests/test_datastore.py index 6c1bd7b1a3..d32c028781 100644 --- a/tests/test_datastore.py +++ b/tests/test_datastore.py @@ -945,7 +945,7 @@ def test_pydantic_dict_storage_class_conversions(self) -> None: "store_as_model", dimensions=self.universe.empty, storageClass="DictConvertibleModel", - dataId=DataCoordinate.makeEmpty(self.universe), + dataId=DataCoordinate.make_empty(self.universe), ) content = {"a": "one", "b": "two"} model = DictConvertibleModel.from_dict(content, extra="original content") @@ -988,7 +988,7 @@ def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, d f"stora_as_{x}", dimensions=self.universe.empty, storageClass=f"{storageClass_root}{x}", - dataId=DataCoordinate.makeEmpty(self.universe), + dataId=DataCoordinate.make_empty(self.universe), ) for x in ["A", "B"] } diff --git a/tests/test_dimensions.py b/tests/test_dimensions.py index 4cd61de192..c43e8e7ea9 100644 --- a/tests/test_dimensions.py +++ b/tests/test_dimensions.py @@ -713,10 +713,10 @@ def testStandardize(self): DataCoordinate.standardize(dataId.mapping, universe=dataId.universe), DataCoordinate.standardize(dataId.mapping, dimensions=dataId.dimensions), DataCoordinate.standardize( - DataCoordinate.makeEmpty(dataId.dimensions.universe), **dataId.mapping + DataCoordinate.make_empty(dataId.dimensions.universe), **dataId.mapping ), DataCoordinate.standardize( - DataCoordinate.makeEmpty(dataId.dimensions.universe), + DataCoordinate.make_empty(dataId.dimensions.universe), dimensions=dataId.dimensions, **dataId.mapping, ), diff --git a/tests/test_expressions.py b/tests/test_expressions.py index 4b5334265f..59791ff371 100644 --- a/tests/test_expressions.py +++ b/tests/test_expressions.py @@ -355,8 +355,8 @@ def test_governor(self): universe = DimensionUniverse() dimensions = universe.conform(("instrument", "visit")) - dataId = DataCoordinate.makeEmpty(universe) - defaults = DataCoordinate.makeEmpty(universe) + dataId = DataCoordinate.make_empty(universe) + defaults = DataCoordinate.make_empty(universe) # governor-only constraint tree = parser.parse("instrument = 'LSST'") diff --git a/tests/test_formatter.py b/tests/test_formatter.py index ba5d6d21df..b59c25a72c 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -60,7 +60,7 @@ def setUp(self): self.id = 0 self.factory = FormatterFactory() self.universe = DimensionUniverse() - self.dataId = DataCoordinate.makeEmpty(self.universe) + self.dataId = DataCoordinate.make_empty(self.universe) # Dummy FileDescriptor for testing getFormatter self.fileDescriptor = FileDescriptor(