diff --git a/python/lsst/daf/butler/__init__.py b/python/lsst/daf/butler/__init__.py
index acbfd4e929..5bb08187ae 100644
--- a/python/lsst/daf/butler/__init__.py
+++ b/python/lsst/daf/butler/__init__.py
@@ -38,6 +38,7 @@
from ._butler import *
from ._butler_collections import *
from ._butler_config import *
+from ._butler_dataset_types import *
from ._butler_repo_index import *
from ._collection_type import CollectionType
from ._column_categorization import *
diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
index fbfedd6bb7..2cb5540368 100644
--- a/python/lsst/daf/butler/_butler.py
+++ b/python/lsst/daf/butler/_butler.py
@@ -42,6 +42,7 @@
from ._butler_collections import ButlerCollections
from ._butler_config import ButlerConfig, ButlerType
+from ._butler_dataset_types import ButlerDatasetTypes
from ._butler_instance_options import ButlerInstanceOptions
from ._butler_repo_index import ButlerRepoIndex
from ._config import Config, ConfigSubset
@@ -841,6 +842,7 @@ def getURI(
)
return primary
+ # TODO: RFC deprecating this in favor of butler.dataset_types.get.
@abstractmethod
def get_dataset_type(self, name: str) -> DatasetType:
"""Get the `DatasetType`.
@@ -1505,6 +1507,16 @@ def run(self) -> str | None:
"""
raise NotImplementedError()
+ # TODO: make this abstract and implement in derived classes.
+ @property
+ def dataset_types(self) -> ButlerDatasetTypes:
+ """Object with methods for modifying and querying dataset types
+ (`~lsst.daf.butler.ButlerDatasetTypes`).
+
+ Use of this object is preferred over `registry` wherever possible.
+ """
+ raise NotImplementedError()
+
@property
@abstractmethod
def registry(self) -> Registry:
@@ -1648,7 +1660,7 @@ def query_datasets(
explain: bool = True,
**kwargs: Any,
) -> list[DatasetRef]:
- """Query for dataset references matching user-provided criteria.
+ """Query for dataset references of a single dataset type.
Parameters
----------
@@ -1659,7 +1671,6 @@ def query_datasets(
provided, the default collections are used. Can be a wildcard if
``find_first`` is `False` (if find first is requested the order
of collections matters and wildcards make the order indeterminate).
- See :ref:`daf_butler_collection_expressions` for more information.
find_first : `bool`, optional
If `True` (default), for each result data ID, only yield one
`DatasetRef` of each `DatasetType`, from the first collection in
@@ -1707,7 +1718,7 @@ def query_datasets(
Returns
-------
- refs : `.queries.DatasetRefQueryResults`
+ refs : `list` [`DatasetRef`]
Dataset references matching the given query criteria. Nested data
IDs are guaranteed to include values for all implied dimensions
(i.e. `DataCoordinate.hasFull` will return `True`).
@@ -1729,13 +1740,6 @@ def query_datasets(
collection wildcard is passed when ``find_first`` is `True`, or
when ``collections`` is `None` and default butler collections are
not defined.
-
- Notes
- -----
- When multiple dataset types are queried in a single call, the results
- of this operation are equivalent to querying for each dataset type
- separately in turn, and no information about the relationships between
- datasets of different types is included.
"""
if data_id is None:
data_id = DataCoordinate.make_empty(self.dimensions)
@@ -1878,6 +1882,89 @@ def query_dimension_records(
raise EmptyQueryResultError(list(result.explain_no_results()))
return dimension_records
+ def _query_all_datasets(
+ self,
+ collections: str | Iterable[str] | None = None,
+ *,
+ name: str | Iterable[str] = "*",
+ at_least_dimensions: Iterable[str] | DimensionGroup | None = None,
+ exact_dimensions: Iterable[str] | DimensionGroup | None = None,
+ storage_class: str | Iterable[str] | StorageClass | Iterable[StorageClass] | None = None,
+ is_calibration: bool | None = None,
+ find_first: bool = True,
+ data_id: DataId | None = None,
+ where: str = "",
+ bind: Mapping[str, Any] | None = None,
+ explain: bool = True,
+ **kwargs: Any,
+ ) -> list[DatasetRef]:
+ """Query for datasets of potentially multiple types.
+
+ Parameters
+ ----------
+ collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
+ The collection or collections to search, in order. If not provided
+ or `None`, the default collection search path for this butler is
+ used.
+ name : `str` or `~collections.abc.Iterable` [ `str` ], optional
+ Names or name patterns (glob-style) that returned dataset type
+ names must match. If an iterable, items are OR'd together. The
+ default is to include all dataset types in the given collections.
+ at_least_dimensions : `Iterable` [ `str` ] or `DimensionGroup`,\
+ optional
+ Dimensions that returned dataset types must have as a subset.
+ exact_dimensions : `Iterable` [ `str` ] or `DimensionGroup`, optional
+ Dimensions that returned dataset types must have exactly.
+ storage_class : `str` or `~collections.abc.Iterable` [ `str` ],\
+ or `StorageClass` or \
+ `~collections.abc.Iterable` [ `StorageClass` ], optional
+ Storage classes or storage class names that returned dataset types
+ must have. If an iterable, items are OR'd together.
+ is_calibration : `bool` or `None`, optional
+ If `None`, constrain returned dataset types to be or not be
+ calibrations.
+ find_first : `bool`, optional
+ If `True` (default), for each result data ID, only yield one
+ `DatasetRef` of each `DatasetType`, from the first collection in
+ which a dataset of that dataset type appears (according to the
+ order of ``collections`` passed in).
+ data_id : `dict` or `DataCoordinate`, optional
+ A data ID whose key-value pairs are used as equality constraints in
+ the query.
+ where : `str`, optional
+ A string expression similar to a SQL WHERE clause. May involve any
+ column of a dimension table or (as a shortcut for the primary key
+ column of a dimension table) dimension name. See
+ :ref:`daf_butler_dimension_expressions` for more information.
+ bind : `~collections.abc.Mapping`, optional
+ Mapping containing literal values that should be injected into the
+ ``where`` expression, keyed by the identifiers they replace. Values
+ of collection type can be expanded in some cases; see
+ :ref:`daf_butler_dimension_expressions_identifiers` for more
+ information.
+ explain : `bool`, optional
+ If `True` (default) then `EmptyQueryResultError` exception is
+ raised when resulting list is empty. The exception contains
+ non-empty list of strings explaining possible causes for empty
+ result.
+ **kwargs
+ Additional keyword arguments are forwarded to
+ `DataCoordinate.standardize` when processing the ``data_id``
+ argument (and may be used to provide a constraining data ID even
+ when the ``data_id`` argument is `None`).
+
+ Returns
+ -------
+ refs : `list` [ `DatasetRef` ]
+ Dataset references matching the given query criteria. Nested data
+ IDs are guaranteed to include values for all implied dimensions
+ (i.e. `DataCoordinate.hasFull` will return `True`), but will not
+ include dimension records (`DataCoordinate.hasRecords` will be
+ `False`).
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
def clone(
self,
*,
diff --git a/python/lsst/daf/butler/_butler_dataset_types.py b/python/lsst/daf/butler/_butler_dataset_types.py
new file mode 100644
index 0000000000..399fe27ce4
--- /dev/null
+++ b/python/lsst/daf/butler/_butler_dataset_types.py
@@ -0,0 +1,244 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively. If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from __future__ import annotations
+
+__all__ = ("ButlerDatasetTypes",)
+
+from abc import ABC, abstractmethod
+from collections.abc import Iterable, Sequence
+
+from ._dataset_type import DatasetType
+from ._storage_class import StorageClass
+from .dimensions import DimensionGroup
+
+
+class ButlerDatasetTypes(ABC, Sequence):
+ """Methods for working with the dataset types known to the Butler."""
+
+ @abstractmethod
+ def get(self, name: str) -> DatasetType:
+ """Return the dataset type with the given name.
+
+ Parameters
+ ----------
+ name : `str`
+ Name of the dataset type.
+
+ Returns
+ -------
+ dataset_type : `DatasetType`
+ Dataset type object with the given name.
+
+ Raises
+ ------
+ MissingDatasetTypeError
+ Raised if there is no dataset type with the given name.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def query(
+ self,
+ name: str | Iterable[str],
+ *,
+ at_least_dimensions: Iterable[str] | DimensionGroup | None = None,
+ exact_dimensions: Iterable[str] | DimensionGroup | None = None,
+ storage_class: str | Iterable[str] | StorageClass | Iterable[StorageClass] | None = None,
+ is_calibration: bool | None = None,
+ ) -> Iterable[DatasetType]:
+ """Query for dataset types matching the given criteria.
+
+ Parameters
+ ----------
+ name : `str` or `~collections.abc.Iterable` [ `str` ]
+ Names or name patterns (glob-style) that returned dataset type
+ names must match. If an iterable, items are OR'd together.
+ at_least_dimensions : `Iterable` [ `str` ] or `DimensionGroup`,\
+ optional
+ Dimensions that returned dataset types must have as a subset.
+ exact_dimensions : `Iterable` [ `str` ] or `DimensionGroup`,\
+ optional
+ Dimensions that returned dataset types must have exactly.
+ storage_class : `str` or `~collections.abc.Iterable` [ `str` ],\
+ or `StorageClass` or \
+ `~collections.abc.Iterable` [ `StorageClass` ], optional
+ Storage classes or storage class names that returned dataset types
+ must have. If an iterable, items are OR'd together.
+ is_calibration : `bool` or `None`, optional
+ If `None`, constrain returned dataset types to be or not be
+ calibrations.
+
+ Returns
+ -------
+ dataset_types : `~collections.abc.Iterable` [ `DatasetType` ]
+ An iterable of dataset types. This is guaranteed to be a regular
+ Python in-memory container, not a lazy single-pass iterator, but
+ the type of container is currently left unspecified in order to
+ leave room for future convenience behavior.
+
+ Notes
+ -----
+ This method queries all registered dataset types in registry. To query
+ for the types of datasets that are in a collection, instead use::
+
+ info = butler.collections.query_info(
+ collections,
+ include_summaries=True,
+ )
+
+ for a simple summary of the dataset types in each collection (see
+ `lsst.daf.butler.ButlerCollections.query_info`). Or, for
+ more complex but powerful queries (including constraints on data IDs or
+ dataset counts), use::
+
+ with butler.query() as q:
+ dataset_types = q.dataset_types(collections)
+
+ See `lsst.daf.butler.queries.Query.dataset_types` for details.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def query_names(
+ self,
+ name: str | Iterable[str],
+ *,
+ at_least_dimensions: Iterable[str] | DimensionGroup | None = None,
+ exact_dimensions: Iterable[str] | DimensionGroup | None = None,
+ storage_class: str | Iterable[str] | StorageClass | Iterable[StorageClass] | None = None,
+ is_calibration: bool | None = None,
+ ) -> Iterable[str]:
+ """Query for the names of dataset types matching the given criteria.
+
+ Parameters
+ ----------
+ name : `str` or `~collections.abc.Iterable` [ `str` ]
+ Names or name patterns (glob-style) that returned dataset type
+ names must match. If an iterable, items are OR'd together.
+ at_least_dimensions : `Iterable` [ `str` ] or `DimensionGroup`,\
+ optional
+ Dimensions that returned dataset types must have as a subset.
+ exact_dimensions : `Iterable` [ `str` ] or `DimensionGroup`,\
+ optional
+ Dimensions that returned dataset types must have exactly.
+ storage_class : `str` or `~collections.abc.Iterable` [ `str` ],\
+ or `StorageClass` or \
+ `~collections.abc.Iterable` [ `StorageClass` ], optional
+ Storage classes or storage class names that returned dataset types
+ must have. If an iterable, items are OR'd together.
+ is_calibration : `bool` or `None`, optional
+ If `None`, constrain returned dataset types to be or not be
+ calibrations.
+
+ Returns
+ -------
+ names : `~collections.abc.Iterable` [ `str` ]
+ An iterable of dataset types.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def register(
+ self,
+ name_or_type: str,
+ /,
+ dimensions: Iterable[str] | DimensionGroup | None = None,
+ storage_class: str | StorageClass | None = None,
+ is_calibration: bool | None = None,
+ ) -> bool:
+ """Register a dataset type.
+
+ It is not an error to register the same `DatasetType` twice.
+
+ Parameters
+ ----------
+ name_or_type : `str` or `DatasetType`
+ The name of the dataset type to be added, or a complete
+ `DatasetType` type object to add.
+ dimensions : `~colletions.abc.Iterable` [ `str` ] or `DimensionGroup`,\
+ optional
+ Dimensions for the dataset type. Required if the first argument
+ is just a `str`, and overrides the dimensions if the first argument
+ is a `DatasetType`.
+ storage_class : `str` or `StorageClass`, optional
+ Storage class for the dataset type. Required if the first argument
+ is just a `str`, and overrides the storage class if the first
+ arguemnt is a `DatasetType`.
+ is_calibration : `bool`, optional
+ Whether the dataset type is a calibration. If the first argument
+ is a `str`, defaults to `False`. If the first argument is a
+ `DatasetType` and this argument is not `None`, it overrides the
+ value on the `DatasetType`.
+
+ Returns
+ -------
+ inserted : `bool`
+ `True` if a new dataset type was inserted, `False` if an identical
+ existing dataset type was found. Note that in either case the
+ dataset type is guaranteed to be defined in the repository
+ consistently with the given definition.
+
+ Raises
+ ------
+ ValueError
+ Raised if the dimensions or storage class are invalid.
+ lsst.daf.butler.registry.ConflictingDefinitionError
+ Raised if this dataset type is already registered with a different
+ definition.
+ """
+ raise NotImplementedError()
+
+ @abstractmethod
+ def remove(self, name: str) -> None:
+ """Remove the dataset type with the given name.
+
+ .. warning::
+
+ Butler implementations can cache the dataset type definitions.
+ This means that deleting the dataset type definition may result in
+ unexpected behavior from other butler processes that are active
+ that have not seen the deletion.
+
+ Parameters
+ ----------
+ name : `str` or `tuple` [`str`]
+ Name of the type to be removed or tuple containing a list of type
+ names to be removed. Wildcards are allowed.
+
+ Raises
+ ------
+ lsst.daf.butler.registry.OrphanedRecordError
+ Raised if an attempt is made to remove the dataset type definition
+ when there are still datasets associated with it.
+
+ Notes
+ -----
+ If the dataset type is not registered the method will return without
+ action.
+ """
+ raise NotImplementedError()
diff --git a/python/lsst/daf/butler/queries/__init__.py b/python/lsst/daf/butler/queries/__init__.py
index 720e4ca6d1..ef695eea46 100644
--- a/python/lsst/daf/butler/queries/__init__.py
+++ b/python/lsst/daf/butler/queries/__init__.py
@@ -28,6 +28,8 @@
from ._base import *
from ._data_coordinate_query_results import *
from ._dataset_query_results import *
+from ._dataset_type_results import *
from ._dimension_record_query_results import *
from ._general_query_results import *
+from ._heterogeneous_dataset_results import *
from ._query import *
diff --git a/python/lsst/daf/butler/queries/_dataset_type_results.py b/python/lsst/daf/butler/queries/_dataset_type_results.py
new file mode 100644
index 0000000000..5e45e80d93
--- /dev/null
+++ b/python/lsst/daf/butler/queries/_dataset_type_results.py
@@ -0,0 +1,124 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively. If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from __future__ import annotations
+
+__all__ = ("DatasetTypeQueryResults",)
+
+from collections.abc import Iterable, Iterator
+
+from .._dataset_type import DatasetType
+
+
+class DatasetTypeQueryResults:
+ """A query result object that summarizes a query for datasets by doing the
+ equivalent of a SQL GROUP BY on the dataset type.
+ """
+
+ def __iter__(self) -> Iterator[DatasetType]:
+ raise NotImplementedError()
+
+ def names(self) -> Iterable[str]:
+ """Iterate over the names of the matched dataset types."""
+ raise NotImplementedError()
+
+ def by_collection(
+ self,
+ *,
+ flatten_chains: bool = False,
+ include_chains: bool | None = None,
+ ) -> Iterable[tuple[str, Iterable[DatasetType]]]:
+ """Iterate over results while grouping by collection as well as dataset
+ type.
+
+ Parameters
+ ----------
+ flatten_chains : `bool`, optional
+ If `True` (`False` is default), expand the child collections of
+ matching `~CollectionType.CHAINED` collections in the results.
+ include_chains : `bool` or `None`, optional
+ If `True`, yield records for matching `~CollectionType.CHAINED`
+ collections. Default is the opposite of ``flatten_chains``:
+ include either CHAINED collections or their children, but not both.
+
+ Returns
+ -------
+ rows : `~collections.abc.Iterable` [ `tuple` ]
+ An iterable of ``(collection, dataset_types)`` pairs. The
+ ``dataset_types`` values are guaranteed to be regular in-memory
+ iterables, not lazy single-pass iterators, but the exact type
+ of iterable is left unspecified to leave room for future
+ improvements.
+ """
+ raise NotImplementedError()
+
+ def with_counts(self, find_first: bool = True) -> Iterable[tuple[DatasetType, int]]:
+ """Iterate over results with counts for the number of datasets of each
+ type.
+
+ Parameters
+ ----------
+ find_first : `bool`, optional
+ If `True` (default), only count unique dataset type + data ID
+ combinations, not shadowed datasets.
+
+ Returns
+ -------
+ rows : `tuple` [ `DatasetRef`, `int` ]
+ An iterable of ``(dataset_type, count)`` pairs.
+ """
+ raise NotImplementedError()
+
+ def by_collection_with_counts(
+ self,
+ *,
+ flatten_chains: bool = False,
+ include_chains: bool | None = None,
+ ) -> Iterable[tuple[str, Iterable[tuple[DatasetType, int]]]]:
+ """Iterate over results while grouping by collection as well as dataset
+ type, and counting the number of datasets in each combination.
+
+ Parameters
+ ----------
+ flatten_chains : `bool`, optional
+ If `True` (`False` is default), expand the child collections of
+ matching `~CollectionType.CHAINED` collections in the results.
+ include_chains : `bool` or `None`, optional
+ If `True`, yield records for matching `~CollectionType.CHAINED`
+ collections. Default is the opposite of ``flatten_chains``:
+ include either CHAINED collections or their children, but not both.
+
+ Returns
+ -------
+ rows : `~collections.abc.Iterable` [ `tuple` ]
+ An iterable of ``(collection, dataset_types_with_counts)`` pairs,
+ with the latter an iterable of ``(DatasetType, int`)``.
+ These inner iterables are guaranteed to be regular in-memory
+ iterables, not lazy single-pass iterators, but the exact type of
+ iterable is left unspecified to leave room for future improvements.
+ """
+ raise NotImplementedError()
diff --git a/python/lsst/daf/butler/queries/_heterogeneous_dataset_results.py b/python/lsst/daf/butler/queries/_heterogeneous_dataset_results.py
new file mode 100644
index 0000000000..fa5362f0bd
--- /dev/null
+++ b/python/lsst/daf/butler/queries/_heterogeneous_dataset_results.py
@@ -0,0 +1,101 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively. If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from __future__ import annotations
+
+__all__ = ("HeterogeneousDatasetRefQueryResults",)
+
+from collections.abc import Iterable, Iterator
+
+from .._dataset_ref import DatasetId, DatasetRef
+from ._base import QueryBase
+
+
+class HeterogeneousDatasetRefQueryResults(QueryBase):
+ """A query result object for datasets with multiple dataset types."""
+
+ def __iter__(self) -> Iterator[DatasetRef]:
+ raise NotImplementedError()
+
+ def ids(self) -> Iterable[DatasetId]:
+ """Iterate over just the dataset IDs.
+
+ This may return a lazy-single pass iterator or a regular in-memory
+ iterable, in order to allow for the possibility that it may be
+ upgraded into a query results object in the future.
+ """
+ # In some cases - depending on the WHERE clause and other things joined
+ # in - this could result in a single query, rather than a Python-side
+ # aggregation of per-dimension-group queries.
+ raise NotImplementedError()
+
+ def any(self, *, execute: bool = True, exact: bool = True) -> bool:
+ # Docstring inherited.
+ raise NotImplementedError("Base class implementation is not correct for this derived class.")
+
+ def explain_no_results(self, execute: bool = True) -> Iterable[str]:
+ # Docstring inherited.
+ raise NotImplementedError("Base class implementation is not correct for this derived class.")
+
+ def count(self, *, exact: bool = True, discard: bool = False) -> int:
+ """Count the number of rows this query would return.
+
+ Parameters
+ ----------
+ exact : `bool`, optional
+ If `True`, run the full query and perform post-query filtering if
+ needed to account for that filtering in the count. If `False`, the
+ result may be an upper bound.
+ discard : `bool`, optional
+ If `True`, compute the exact count even if it would require running
+ the full query and then throwing away the result rows after
+ counting them. If `False`, this is an error, as the user would
+ usually be better off executing the query first to fetch its rows
+ into a new query (or passing ``exact=False``). Ignored if
+ ``exact=False``.
+
+ Returns
+ -------
+ count : `int`
+ The number of rows the query would return, or an upper bound if
+ ``exact=False``.
+ """
+ raise NotImplementedError()
+
+ # This class intentionally lacks some attributes that are defined on other
+ # QueryResults objects:
+ #
+ # - 'dimensions' isn't well-defined in general.
+ #
+ # - 'order_by' and 'limit' are hard to implement in the common case where
+ # we have to run one query for each dimension group.
+ #
+ # - 'where' exists on other result objects because the way they are
+ # constructed adds context (a dataset search join, some dimensions) that
+ # can help interpret arguments to 'where'. That's not generally true
+ # here, so calling `Query.where(...).all_datasets()` can do anything that
+ # `Query.all_datasets().where(...)` might be able to do.
diff --git a/python/lsst/daf/butler/queries/_query.py b/python/lsst/daf/butler/queries/_query.py
index 75f8d8bfe7..dcc0d0436e 100644
--- a/python/lsst/daf/butler/queries/_query.py
+++ b/python/lsst/daf/butler/queries/_query.py
@@ -27,6 +27,9 @@
from __future__ import annotations
+from lsst.daf.butler.queries._dataset_type_results import DatasetTypeQueryResults
+from lsst.daf.butler.queries._heterogeneous_dataset_results import HeterogeneousDatasetRefQueryResults
+
__all__ = ("Query",)
from collections.abc import Iterable, Mapping, Set
@@ -37,7 +40,7 @@
from .._dataset_type import DatasetType
from .._exceptions import DimensionNameError, InvalidQueryError
-from .._storage_class import StorageClassFactory
+from .._storage_class import StorageClass, StorageClassFactory
from ..dimensions import DataCoordinate, DataId, DataIdValue, DimensionGroup
from ..registry import DatasetTypeError
from ._base import QueryBase
@@ -228,7 +231,8 @@ def datasets(
*,
find_first: bool = True,
) -> DatasetRefQueryResults:
- """Return a result object that is a `DatasetRef` iterable.
+ """Return a result object that is a `DatasetRef` iterable with a single
+ dataset type.
Parameters
----------
@@ -242,8 +246,7 @@ def datasets(
If `True` (default), for each result data ID, only yield one
`DatasetRef` of each `DatasetType`, from the first collection in
which a dataset of that dataset type appears (according to the
- order of ``collections`` passed in). If `True`, ``collections``
- must not be ``...``.
+ order of ``collections`` passed in).
Returns
-------
@@ -448,6 +451,84 @@ def general(
)
return GeneralQueryResults(self._driver, tree=tree, spec=result_spec)
+ def all_datasets(
+ self,
+ collections: str | Iterable[str] | None = None,
+ *,
+ name: str | Iterable[str] = "*",
+ at_least_dimensions: Iterable[str] | DimensionGroup | None = None,
+ exact_dimensions: Iterable[str] | DimensionGroup | None = None,
+ storage_class: str | Iterable[str] | StorageClass | Iterable[StorageClass] | None = None,
+ is_calibration: bool | None = None,
+ find_first: bool = True,
+ ) -> HeterogeneousDatasetRefQueryResults:
+ """Return a result object that is a `DatasetRef` iterable whose entries
+ may have different dataset types.
+
+ Parameters
+ ----------
+ collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
+ The collection or collections to search, in order. If not provided
+ or `None`, the default collection search path for this butler is
+ used.
+ name : `str` or `~collections.abc.Iterable` [ `str` ], optional
+ Names or name patterns (glob-style) that returned dataset type
+ names must match. If an iterable, items are OR'd together. The
+ default is to include all dataset types in the given collections.
+ at_least_dimensions : `Iterable` [ `str` ] or `DimensionGroup`,\
+ optional
+ Dimensions that returned dataset types must have as a subset.
+ exact_dimensions : `Iterable` [ `str` ] or `DimensionGroup`, optional
+ Dimensions that returned dataset types must have exactly.
+ storage_class : `str` or `~collections.abc.Iterable` [ `str` ],\
+ or `StorageClass` or \
+ `~collections.abc.Iterable` [ `StorageClass` ], optional
+ Storage classes or storage class names that returned dataset types
+ must have. If an iterable, items are OR'd together.
+ is_calibration : `bool` or `None`, optional
+ If `None`, constrain returned dataset types to be or not be
+ calibrations.
+ find_first : `bool`, optional
+ If `True` (default), for each result data ID, only yield one
+ `DatasetRef` of each `DatasetType`, from the first collection in
+ which a dataset of that dataset type appears (according to the
+ order of ``collections`` passed in).
+
+ Returns
+ -------
+ refs : `.queries.HeterogeneousDatasetRefQueryResults`
+ Dataset references matching the given query criteria. Nested data
+ IDs are guaranteed to include values for all implied dimensions
+ (i.e. `DataCoordinate.hasFull` will return `True`), but will not
+ include dimension records (`DataCoordinate.hasRecords` will be
+ `False`).
+ """
+ raise NotImplementedError()
+
+ def dataset_types( # numpydoc ignore=PR01
+ self,
+ collections: str | Iterable[str] | None = None,
+ *,
+ name: str | Iterable[str] = "*",
+ at_least_dimensions: Iterable[str] | DimensionGroup | None = None,
+ exact_dimensions: Iterable[str] | DimensionGroup | None = None,
+ storage_class: str | Iterable[str] | StorageClass | Iterable[StorageClass] | None = None,
+ is_calibration: bool | None = None,
+ ) -> DatasetTypeQueryResults:
+ """Return a result object that groups dataset queries by their dataset
+ type.
+
+ See `all_datasets` for parameter descriptions.
+
+ Returns
+ -------
+ types : `DatasetTypeQueryResults`
+ A result object that iterates over `DatasetType` objects and
+ provides methods for grouping further by collection and/or counting
+ the number of datasets of each type.
+ """
+ raise NotImplementedError()
+
def materialize(
self,
*,