diff --git a/python/lsst/daf/butler/queries/_query.py b/python/lsst/daf/butler/queries/_query.py index 3618de338e..934f813dee 100644 --- a/python/lsst/daf/butler/queries/_query.py +++ b/python/lsst/daf/butler/queries/_query.py @@ -35,6 +35,7 @@ from .._query import Query from ..dimensions import DataCoordinate, DataId, DataIdValue, DimensionGroup from .data_coordinate_results import DataCoordinateResultSpec, RelationDataCoordinateQueryResults +from .dimension_record_results import DimensionRecordResultSpec, RelationDimensionRecordQueryResults from .driver import QueryDriver from .expression_factory import ExpressionFactory, ExpressionProxy from .relation_tree import ( @@ -45,6 +46,8 @@ OrderExpression, Predicate, RootRelation, + convert_order_by_args, + convert_where_args, make_dimension_relation, make_unit_relation, ) @@ -63,7 +66,7 @@ class RelationQuery(Query): ---------- driver : `QueryDriver` Implementation object that knows how to actually execute queries. - tree : `Relation` + tree : `RootRelation` Description of the query as a tree of relation operations. The instance returned directly by the `Butler._query` entry point should be constructed via `make_unit_relation`. @@ -82,6 +85,16 @@ def __init__(self, driver: QueryDriver, tree: RootRelation, include_dimension_re self._tree = tree self._include_dimension_records = include_dimension_records + @property + def dimensions(self) -> DimensionGroup: + """The dimensions joined into the query.""" + return self._tree.dimensions + + @property + def dataset_types(self) -> frozenset[str]: + """The names of dataset types joined into the query.""" + return self._tree.available_dataset_types + @property def expression_factory(self) -> ExpressionFactory: """A factory for column expressions using overloaded operators. @@ -138,6 +151,8 @@ def data_ids( self, dimensions: DimensionGroup | Iterable[str] | str, *, + # TODO: Arguments below are redundant with chaining methods; which ones + # are so convenient we have to keep them? data_id: DataId | None = None, where: str | Predicate = "", bind: Mapping[str, Any] | None = None, @@ -150,11 +165,11 @@ def data_ids( if not dimensions >= self._tree.dimensions: tree = tree.join(make_dimension_relation(dimensions)) if data_id or where: - tree = tree.where(*self._convert_predicate_args(where, data_id, bind=bind, **kwargs)) + tree = tree.where(*convert_where_args(self._tree, where, data_id, bind=bind, **kwargs)) result_spec = DataCoordinateResultSpec( dimensions=dimensions, include_dimension_records=self._include_dimension_records ) - return RelationDataCoordinateQueryResults(tree, self._driver, result_spec) + return RelationDataCoordinateQueryResults(self._driver, tree, result_spec) def datasets( self, @@ -174,13 +189,22 @@ def dimension_records( self, element: str, *, + # TODO: Arguments below are redundant with chaining methods; which ones + # are so convenient we have to keep them? data_id: DataId | None = None, where: str = "", bind: Mapping[str, Any] | None = None, **kwargs: Any, ) -> DimensionRecordQueryResults: # Docstring inherited. - raise NotImplementedError("TODO") + data_id = DataCoordinate.standardize(data_id, universe=self._driver.universe, **kwargs) + tree = self._tree + if element not in tree.dimensions.elements: + tree = tree.join(make_dimension_relation(self._driver.universe[element].minimal_group)) + if data_id or where: + tree = tree.where(*convert_where_args(self._tree, where, data_id, bind=bind, **kwargs)) + result_spec = DimensionRecordResultSpec(element=self._driver.universe[element]) + return RelationDimensionRecordQueryResults(self._driver, tree, result_spec) # TODO: add general, dict-row results method and QueryResults. @@ -299,7 +323,7 @@ def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> RelationQu their fields in expressions. """ return RelationQuery( - tree=self._tree.order_by(*self._convert_order_by_args(*args)), + tree=self._tree.order_by(*convert_order_by_args(self._tree, *args)), driver=self._driver, include_dimension_records=self._include_dimension_records, ) @@ -531,7 +555,7 @@ def where( their fields in expressions. """ return RelationQuery( - tree=self._tree.where(*self._convert_predicate_args(*args, bind=bind, **kwargs)), + tree=self._tree.where(*convert_where_args(self._tree, *args, bind=bind, **kwargs)), driver=self._driver, include_dimension_records=self._include_dimension_records, ) @@ -587,13 +611,3 @@ def find_first( driver=self._driver, include_dimension_records=self._include_dimension_records, ) - - def _convert_order_by_args(self, *args: str | OrderExpression | ExpressionProxy) -> list[OrderExpression]: - """Convert ``order_by`` arguments to a list of column expressions.""" - raise NotImplementedError("TODO: Parse string expression.") - - def _convert_predicate_args( - self, *args: str | Predicate | DataId, bind: Mapping[str, Any] | None = None - ) -> list[Predicate]: - """Convert ``where`` arguments to a list of column expressions.""" - raise NotImplementedError("TODO: Parse string expression.") diff --git a/python/lsst/daf/butler/queries/data_coordinate_results.py b/python/lsst/daf/butler/queries/data_coordinate_results.py index 6b84e56391..5f85fa55de 100644 --- a/python/lsst/daf/butler/queries/data_coordinate_results.py +++ b/python/lsst/daf/butler/queries/data_coordinate_results.py @@ -34,7 +34,7 @@ ) from collections.abc import Iterable, Iterator -from contextlib import AbstractContextManager +from contextlib import contextmanager from typing import TYPE_CHECKING, Any, Literal import pydantic @@ -44,7 +44,13 @@ from .._query_results import DataCoordinateQueryResults, DatasetQueryResults from ..dimensions import DataCoordinate, DimensionGroup from .driver import QueryDriver -from .relation_tree import RootRelation +from .relation_tree import ( + InvalidRelationError, + Materialization, + RootRelation, + convert_order_by_args, + make_unit_relation, +) if TYPE_CHECKING: from .driver import PageKey @@ -74,7 +80,16 @@ class RelationDataCoordinateQueryResults(DataCoordinateQueryResults): """Implementation of DataCoordinateQueryResults for the relation-based query system. - + Parameters + ---------- + driver : `QueryDriver` + Implementation object that knows how to actually execute queries. + tree : `RootRelation` + Description of the query as a tree of relation operations. The + instance returned directly by the `Butler._query` entry point should + be constructed via `make_unit_relation`. + spec : `DataCoordinateResultSpec` + Specification for the details of the data IDs to return. Notes ----- @@ -82,13 +97,14 @@ class RelationDataCoordinateQueryResults(DataCoordinateQueryResults): we won't need an ABC if this is the only implementation. """ - def __init__(self, tree: RootRelation, driver: QueryDriver, spec: DataCoordinateResultSpec): - self._tree = tree + def __init__(self, driver: QueryDriver, tree: RootRelation, spec: DataCoordinateResultSpec): self._driver = driver + self._tree = tree self._spec = spec @property def dimensions(self) -> DimensionGroup: + # Docstring inherited. return self._spec.dimensions def __iter__(self) -> Iterator[DataCoordinate]: @@ -99,20 +115,36 @@ def __iter__(self) -> Iterator[DataCoordinate]: yield from page.rows def has_full(self) -> bool: # TODO: since this is always true, we may not need it. + # Docstring inherited. return True def has_records(self) -> bool: # TODO: should this be a property now? + # Docstring inherited. return self._spec.include_dimension_records - def materialize(self) -> AbstractContextManager[DataCoordinateQueryResults]: - raise NotImplementedError() + @contextmanager + def materialize(self) -> Iterator[DataCoordinateQueryResults]: + # Docstring inherited. + key = self._driver.materialize(self._tree, frozenset()) + yield RelationDataCoordinateQueryResults( + self._driver, + tree=make_unit_relation(self._driver.universe).join( + Materialization.model_construct(key=key, operand=self._tree, dataset_types=frozenset()) + ), + spec=self._spec, + ) + # TODO: Right now we just rely on the QueryDriver context instead of + # using this one. If we want this to remain a context manager, we + # should make it do something, e.g. by adding QueryDriver method to + # drop a materialization. def expanded(self) -> DataCoordinateQueryResults: + # Docstring inherited. if self.has_records(): return self return RelationDataCoordinateQueryResults( + self._driver, tree=self._tree, - driver=self._driver, spec=DataCoordinateResultSpec(dimensions=self._spec.dimensions, include_dimension_records=True), ) @@ -122,13 +154,31 @@ def subset( *, unique: bool = False, ) -> DataCoordinateQueryResults: - raise NotImplementedError( - "TODO: Copy with a new result spec and/or DimensionProjection pushed onto tree." + # Docstring inherited. + if dimensions is None: + dimensions = self.dimensions + else: + dimensions = self._driver.universe.conform(dimensions) + if not dimensions <= self.dimensions: + raise InvalidRelationError( + f"New dimensions {dimensions} are not a subset of the current " + f"dimensions {self.dimensions}." + ) + # TODO: right now I'm assuming we'll deduplicate all query results (per + # page), even if we have to do that in Python, so the 'unique' argument + # doesn't do anything. + return RelationDataCoordinateQueryResults( + self._driver, + tree=self._tree, + spec=DataCoordinateResultSpec( + dimensions=dimensions, include_dimension_records=self._spec.include_dimension_records + ), ) def find_datasets( self, dataset_type: DatasetType | str, collections: Any, *, find_first: bool = True ) -> DatasetQueryResults: + # Docstring inherited. raise NotImplementedError("TODO: Copy with a new result spec and maybe a new DatasetSearch in tree.") def find_related_datasets( @@ -139,19 +189,33 @@ def find_related_datasets( find_first: bool = True, dimensions: DimensionGroup | Iterable[str] | None = None, ) -> Iterable[tuple[DataCoordinate, DatasetRef]]: + # Docstring inherited. raise NotImplementedError("TODO: drop this in favor of GeneralQueryResults") def count(self, *, exact: bool = True, discard: bool = False) -> int: + # Docstring inherited. return self._driver.count(self._tree, exact=exact, discard=discard) def any(self, *, execute: bool = True, exact: bool = True) -> bool: + # Docstring inherited. return self._driver.any(self._tree, execute=execute, exact=exact) def explain_no_results(self, execute: bool = True) -> Iterable[str]: + # Docstring inherited. return self._driver.explain_no_results(self._tree, execute=execute) def order_by(self, *args: str) -> DataCoordinateQueryResults: - raise NotImplementedError("TODO: Copy with a OrderedSlice pushed onto tree.") + # Docstring inherited. + return RelationDataCoordinateQueryResults( + driver=self._driver, + tree=self._tree.order_by(*convert_order_by_args(self._tree, *args)), + spec=self._spec, + ) - def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults: - raise NotImplementedError("TODO: Copy with a OrderedSlice pushed onto tree.") + def limit(self, limit: int | None = None, offset: int = 0) -> DataCoordinateQueryResults: + # Docstring inherited. + return RelationDataCoordinateQueryResults( + driver=self._driver, + tree=self._tree.order_by(limit=limit, offset=offset), + spec=self._spec, + ) diff --git a/python/lsst/daf/butler/queries/dimension_record_results.py b/python/lsst/daf/butler/queries/dimension_record_results.py index cc60d9f870..dfb32a8ef7 100644 --- a/python/lsst/daf/butler/queries/dimension_record_results.py +++ b/python/lsst/daf/butler/queries/dimension_record_results.py @@ -32,12 +32,15 @@ "DimensionRecordResultPage", ) +from collections.abc import Iterable, Iterator from typing import Literal import pydantic +from .._query_results import DimensionRecordQueryResults from ..dimensions import DimensionElement, DimensionRecord -from .driver import PageKey +from .driver import PageKey, QueryDriver +from .relation_tree import RootRelation, convert_order_by_args class DimensionRecordResultSpec(pydantic.BaseModel): @@ -58,3 +61,78 @@ class DimensionRecordResultPage(pydantic.BaseModel): # rather than `set` under the hood). Right now this model isn't actually # serializable. rows: list[DimensionRecord] + + +class RelationDimensionRecordQueryResults(DimensionRecordQueryResults): + """Implementation of DimensionRecordQueryResults for the relation-based + query system. + + Parameters + ---------- + driver : `QueryDriver` + Implementation object that knows how to actually execute queries. + tree : `RootRelation` + Description of the query as a tree of relation operations. The + instance returned directly by the `Butler._query` entry point should + be constructed via `make_unit_relation`. + spec : `DimensionRecordResultSpec` + Specification for the details of the records to return. + + Notes + ----- + Ideally this will eventually just be "DimensionRecordQueryResults", because + we won't need an ABC if this is the only implementation. + """ + + def __init__(self, driver: QueryDriver, tree: RootRelation, spec: DimensionRecordResultSpec): + self._driver = driver + self._tree = tree + self._spec = spec + + def __iter__(self) -> Iterator[DimensionRecord]: + page = self._driver.execute(self._tree, self._spec) + yield from page.rows + while page.next_key is not None: + page = self._driver.fetch_next_page(self._spec, page.next_key) + yield from page.rows + + # TODO: might want to make this `str`, in keeping with RFC-834's philosophy + # of keeping DimensionElement out of public APIs (this old-query-system + # version of this one was accidentally omitted from that RFC, but we don't + # need an RFC to change it in the new system). + @property + def element(self) -> DimensionElement: + # Docstring inherited. + return self._spec.element + + def run(self) -> DimensionRecordQueryResults: + # Docstring inherited. + raise NotImplementedError("TODO: remove this from the base class.") + + def count(self, *, exact: bool = True, discard: bool = False) -> int: + # Docstring inherited. + return self._driver.count(self._tree, exact=exact, discard=discard) + + def any(self, *, execute: bool = True, exact: bool = True) -> bool: + # Docstring inherited. + return self._driver.any(self._tree, execute=execute, exact=exact) + + def order_by(self, *args: str) -> DimensionRecordQueryResults: + # Docstring inherited. + return RelationDimensionRecordQueryResults( + driver=self._driver, + tree=self._tree.order_by(*convert_order_by_args(self._tree, *args)), + spec=self._spec, + ) + + def limit(self, limit: int | None = None, offset: int = 0) -> DimensionRecordQueryResults: + # Docstring inherited. + return RelationDimensionRecordQueryResults( + driver=self._driver, + tree=self._tree.order_by(limit=limit, offset=offset), + spec=self._spec, + ) + + def explain_no_results(self, execute: bool = True) -> Iterable[str]: + # Docstring inherited. + return self._driver.explain_no_results(self._tree, execute=execute) diff --git a/python/lsst/daf/butler/queries/relation_tree/_ordered_slice.py b/python/lsst/daf/butler/queries/relation_tree/_ordered_slice.py index 339a5be01c..c0c7e73165 100644 --- a/python/lsst/daf/butler/queries/relation_tree/_ordered_slice.py +++ b/python/lsst/daf/butler/queries/relation_tree/_ordered_slice.py @@ -27,7 +27,7 @@ from __future__ import annotations -__all__ = ("OrderedSlice",) +__all__ = ("OrderedSlice", "convert_order_by_args") from typing import TYPE_CHECKING, Literal, final @@ -44,8 +44,9 @@ ) if TYPE_CHECKING: + from ..expression_factory import ExpressionProxy from ._predicate import Predicate - from ._relation import OrderedSliceOperand, Relation + from ._relation import OrderedSliceOperand, Relation, RootRelation from .joins import JoinArg @@ -173,3 +174,10 @@ def _validate_nontrivial(self) -> OrderedSlice: if not self.order_terms and self.limit is None and not self.offset: raise InvalidRelationError("Operation does not do anything.") return self + + +def convert_order_by_args( + tree: RootRelation, *args: str | OrderExpression | ExpressionProxy +) -> list[OrderExpression]: + """Convert ``order_by`` arguments to a list of column expressions.""" + raise NotImplementedError("TODO") diff --git a/python/lsst/daf/butler/queries/relation_tree/_select.py b/python/lsst/daf/butler/queries/relation_tree/_select.py index e6e1794474..87c2b4a0eb 100644 --- a/python/lsst/daf/butler/queries/relation_tree/_select.py +++ b/python/lsst/daf/butler/queries/relation_tree/_select.py @@ -27,15 +27,16 @@ from __future__ import annotations -__all__ = ("Select", "make_unit_relation", "make_dimension_relation") +__all__ = ("Select", "make_unit_relation", "make_dimension_relation", "convert_where_args") import itertools +from collections.abc import Mapping from functools import cached_property -from typing import TYPE_CHECKING, Literal, final +from typing import TYPE_CHECKING, Any, Literal, final import pydantic -from ...dimensions import DimensionGroup, DimensionUniverse +from ...dimensions import DataId, DimensionGroup, DimensionUniverse from ._base import InvalidRelationError, RelationBase from ._column_reference import DatasetFieldReference, DimensionFieldReference, DimensionKeyReference from ._predicate import Predicate @@ -45,7 +46,7 @@ from ._column_expression import OrderExpression from ._find_first import FindFirst from ._ordered_slice import OrderedSlice - from ._relation import JoinOperand, Relation + from ._relation import JoinOperand, Relation, RootRelation def make_unit_relation(universe: DimensionUniverse) -> Select: @@ -278,3 +279,10 @@ def _validate_required_columns(self) -> Select: if dataset_type not in self.available_dataset_types: raise InvalidRelationError(f"Dataset search for column {column} is not present.") return self + + +def convert_where_args( + tree: RootRelation, *args: str | Predicate | DataId, bind: Mapping[str, Any] | None = None +) -> list[Predicate]: + """Convert ``where`` arguments to a list of column expressions.""" + raise NotImplementedError("TODO: Parse string expression.")