Sketch out new interfaces for querying multiple dataset types.

lsst · Aug 21, 2024 · 783f3ee · 783f3ee
1 parent 201ee96
commit 783f3ee
Show file tree

Hide file tree

Showing 7 changed files with 655 additions and 7 deletions.
diff --git a/python/lsst/daf/butler/__init__.py b/python/lsst/daf/butler/__init__.py
@@ -38,6 +38,7 @@
 from ._butler import *
 from ._butler_collections import *
 from ._butler_config import *
+from ._butler_dataset_types import *
 from ._butler_repo_index import *
 from ._collection_type import CollectionType
 from ._column_categorization import *

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
@@ -41,6 +41,7 @@
 
 from ._butler_collections import ButlerCollections
 from ._butler_config import ButlerConfig, ButlerType
+from ._butler_dataset_types import ButlerDatasetTypes
 from ._butler_instance_options import ButlerInstanceOptions
 from ._butler_repo_index import ButlerRepoIndex
 from ._config import Config, ConfigSubset
@@ -840,6 +841,7 @@ def getURI(
             )
         return primary
 
+    # TODO: RFC deprecating this in favor of butler.dataset_types.get.
     @abstractmethod
     def get_dataset_type(self, name: str) -> DatasetType:
         """Get the `DatasetType`.
@@ -1448,6 +1450,16 @@ def run(self) -> str | None:
         """
         raise NotImplementedError()
 
+    # TODO: make this abstract and implement in derived classes.
+    @property
+    def dataset_types(self) -> ButlerDatasetTypes:
+        """Object with methods for modifying and querying dataset types
+        (`~lsst.daf.butler.ButlerDatasettypes`).
+
+        Use of this object is preferred over `registry` wherever possible.
+        """
+        raise NotImplementedError()
+
     @property
     @abstractmethod
     def registry(self) -> Registry:
@@ -1572,22 +1584,20 @@ def _query_datasets(
         explain: bool = True,
         **kwargs: Any,
     ) -> list[DatasetRef]:
-        """Query for dataset references matching user-provided criteria.
+        """Query for dataset references of a single dataset type.
 
         Parameters
         ----------
         dataset_type : `str` or `DatasetType`
             Dataset type object or name to search for.
         collections : collection expression, optional
             A collection name or iterable of collection names to search. If not
-            provided, the default collections are used.  See
-            :ref:`daf_butler_collection_expressions` for more information.
+            provided, the default collections are used.
         find_first : `bool`, optional
             If `True` (default), for each result data ID, only yield one
             `DatasetRef` of each `DatasetType`, from the first collection in
             which a dataset of that dataset type appears (according to the
-            order of ``collections`` passed in).  If `True`, ``collections``
-            must not contain regular expressions and may not be ``...``.
+            order of ``collections`` passed in).
         data_id : `dict` or `DataCoordinate`, optional
             A data ID whose key-value pairs are used as equality constraints in
             the query.
@@ -1739,6 +1749,89 @@ def _query_dimension_records(
             raise EmptyQueryResultError(list(result.explain_no_results()))
         return dimension_records
 
+    def _query_all_datasets(
+        self,
+        collections: str | Iterable[str] | None = None,
+        *,
+        name: str | Iterable[str] = "*",
+        at_least_dimensions: Iterable[str] | DimensionGroup | None = None,
+        exact_dimensions: Iterable[str] | DimensionGroup | None = None,
+        storage_class: str | Iterable[str] | StorageClass | Iterable[StorageClass] | None = None,
+        is_calibration: bool | None = None,
+        find_first: bool = True,
+        data_id: DataId | None = None,
+        where: str = "",
+        bind: Mapping[str, Any] | None = None,
+        explain: bool = True,
+        **kwargs: Any,
+    ) -> list[DatasetRef]:
+        """Query for datasets of potentially multiple types.
+
+        Parameters
+        ----------
+        collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
+            The collection or collections to search, in order.  If not provided
+            or `None`, the default collection search path for this butler is
+            used.
+        name : `str` or `~collections.abc.Iterable` [ `str` ], optional
+            Names or name patterns (glob-style) that returned dataset type
+            names must match.  If an iterable, items are OR'd together.  The
+            default is to include all dataset types in the given collections.
+        at_least_dimensions : `Iterable` [ `str` ] or `DimensionGroup`,\
+                optional
+            Dimensions that returned dataset types must have as a subset.
+        at_least_dimensions : `Iterable` [ `str` ] or `DimensionGroup`,\
+                optional
+            Dimensions that returned dataset types must have exactly.
+        with_storage_class : `str` or `~collections.abc.Iterable` [ `str` ],\
+                or `StorageClass` or \
+                `~collections.abc.Iterable` [ `StorageClass` ], optional
+            Storage classes or storage class names that returned dataset types
+            must have.  If an iterable, items are OR'd together.
+        is_calibration : `bool` or `None`, optional
+            If `None`, constrain returned dataset types to be or not be
+            calibrations.
+        find_first : `bool`, optional
+            If `True` (default), for each result data ID, only yield one
+            `DatasetRef` of each `DatasetType`, from the first collection in
+            which a dataset of that dataset type appears (according to the
+            order of ``collections`` passed in).
+        data_id : `dict` or `DataCoordinate`, optional
+            A data ID whose key-value pairs are used as equality constraints in
+            the query.
+        where : `str`, optional
+            A string expression similar to a SQL WHERE clause.  May involve any
+            column of a dimension table or (as a shortcut for the primary key
+            column of a dimension table) dimension name.  See
+            :ref:`daf_butler_dimension_expressions` for more information.
+        bind : `~collections.abc.Mapping`, optional
+            Mapping containing literal values that should be injected into the
+            ``where`` expression, keyed by the identifiers they replace. Values
+            of collection type can be expanded in some cases; see
+            :ref:`daf_butler_dimension_expressions_identifiers` for more
+            information.
+        explain : `bool`, optional
+            If `True` (default) then `EmptyQueryResultError` exception is
+            raised when resulting list is empty. The exception contains
+            non-empty list of strings explaining possible causes for empty
+            result.
+        **kwargs
+            Additional keyword arguments are forwarded to
+            `DataCoordinate.standardize` when processing the ``data_id``
+            argument (and may be used to provide a constraining data ID even
+            when the ``data_id`` argument is `None`).
+
+        Returns
+        -------
+        refs : `list` [ `DatasetRef` ]
+            Dataset references matching the given query criteria.  Nested data
+            IDs are guaranteed to include values for all implied dimensions
+            (i.e. `DataCoordinate.hasFull` will return `True`), but will not
+            include dimension records (`DataCoordinate.hasRecords` will be
+            `False`).
+        """
+        raise NotImplementedError()
+
     @abstractmethod
     def _clone(
         self,