Continue work on query results classes in new system.

lsst · Dec 11, 2023 · 25d2fb4 · 25d2fb4
1 parent 01df32d
commit 25d2fb4
Show file tree

Hide file tree

Showing 5 changed files with 208 additions and 36 deletions.
diff --git a/python/lsst/daf/butler/queries/_query.py b/python/lsst/daf/butler/queries/_query.py
@@ -35,6 +35,7 @@
 from .._query import Query
 from ..dimensions import DataCoordinate, DataId, DataIdValue, DimensionGroup
 from .data_coordinate_results import DataCoordinateResultSpec, RelationDataCoordinateQueryResults
+from .dimension_record_results import DimensionRecordResultSpec, RelationDimensionRecordQueryResults
 from .driver import QueryDriver
 from .expression_factory import ExpressionFactory, ExpressionProxy
 from .relation_tree import (
@@ -45,6 +46,8 @@
     OrderExpression,
     Predicate,
     RootRelation,
+    convert_order_by_args,
+    convert_where_args,
     make_dimension_relation,
     make_unit_relation,
 )
@@ -63,7 +66,7 @@ class RelationQuery(Query):
     ----------
     driver : `QueryDriver`
         Implementation object that knows how to actually execute queries.
-    tree : `Relation`
+    tree : `RootRelation`
         Description of the query as a tree of relation operations.  The
         instance returned directly by the `Butler._query` entry point should
         be constructed via `make_unit_relation`.
@@ -82,6 +85,16 @@ def __init__(self, driver: QueryDriver, tree: RootRelation, include_dimension_re
         self._tree = tree
         self._include_dimension_records = include_dimension_records
 
+    @property
+    def dimensions(self) -> DimensionGroup:
+        """The dimensions joined into the query."""
+        return self._tree.dimensions
+
+    @property
+    def dataset_types(self) -> frozenset[str]:
+        """The names of dataset types joined into the query."""
+        return self._tree.available_dataset_types
+
     @property
     def expression_factory(self) -> ExpressionFactory:
         """A factory for column expressions using overloaded operators.
@@ -138,6 +151,8 @@ def data_ids(
         self,
         dimensions: DimensionGroup | Iterable[str] | str,
         *,
+        # TODO: Arguments below are redundant with chaining methods; which ones
+        # are so convenient we have to keep them?
         data_id: DataId | None = None,
         where: str | Predicate = "",
         bind: Mapping[str, Any] | None = None,
@@ -150,11 +165,11 @@ def data_ids(
         if not dimensions >= self._tree.dimensions:
             tree = tree.join(make_dimension_relation(dimensions))
         if data_id or where:
-            tree = tree.where(*self._convert_predicate_args(where, data_id, bind=bind, **kwargs))
+            tree = tree.where(*convert_where_args(self._tree, where, data_id, bind=bind, **kwargs))
         result_spec = DataCoordinateResultSpec(
             dimensions=dimensions, include_dimension_records=self._include_dimension_records
         )
-        return RelationDataCoordinateQueryResults(tree, self._driver, result_spec)
+        return RelationDataCoordinateQueryResults(self._driver, tree, result_spec)
 
     def datasets(
         self,
@@ -174,13 +189,22 @@ def dimension_records(
         self,
         element: str,
         *,
+        # TODO: Arguments below are redundant with chaining methods; which ones
+        # are so convenient we have to keep them?
         data_id: DataId | None = None,
         where: str = "",
         bind: Mapping[str, Any] | None = None,
         **kwargs: Any,
     ) -> DimensionRecordQueryResults:
         # Docstring inherited.
-        raise NotImplementedError("TODO")
+        data_id = DataCoordinate.standardize(data_id, universe=self._driver.universe, **kwargs)
+        tree = self._tree
+        if element not in tree.dimensions.elements:
+            tree = tree.join(make_dimension_relation(self._driver.universe[element].minimal_group))
+        if data_id or where:
+            tree = tree.where(*convert_where_args(self._tree, where, data_id, bind=bind, **kwargs))
+        result_spec = DimensionRecordResultSpec(element=self._driver.universe[element])
+        return RelationDimensionRecordQueryResults(self._driver, tree, result_spec)
 
     # TODO: add general, dict-row results method and QueryResults.
 
@@ -299,7 +323,7 @@ def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> RelationQu
         their fields in expressions.
         """
         return RelationQuery(
-            tree=self._tree.order_by(*self._convert_order_by_args(*args)),
+            tree=self._tree.order_by(*convert_order_by_args(self._tree, *args)),
             driver=self._driver,
             include_dimension_records=self._include_dimension_records,
         )
@@ -531,7 +555,7 @@ def where(
         their fields in expressions.
         """
         return RelationQuery(
-            tree=self._tree.where(*self._convert_predicate_args(*args, bind=bind, **kwargs)),
+            tree=self._tree.where(*convert_where_args(self._tree, *args, bind=bind, **kwargs)),
             driver=self._driver,
             include_dimension_records=self._include_dimension_records,
         )
@@ -587,13 +611,3 @@ def find_first(
             driver=self._driver,
             include_dimension_records=self._include_dimension_records,
         )
-
-    def _convert_order_by_args(self, *args: str | OrderExpression | ExpressionProxy) -> list[OrderExpression]:
-        """Convert ``order_by`` arguments to a list of column expressions."""
-        raise NotImplementedError("TODO: Parse string expression.")
-
-    def _convert_predicate_args(
-        self, *args: str | Predicate | DataId, bind: Mapping[str, Any] | None = None
-    ) -> list[Predicate]:
-        """Convert ``where`` arguments to a list of column expressions."""
-        raise NotImplementedError("TODO: Parse string expression.")
diff --git a/python/lsst/daf/butler/queries/data_coordinate_results.py b/python/lsst/daf/butler/queries/data_coordinate_results.py
@@ -34,7 +34,7 @@
 )
 
 from collections.abc import Iterable, Iterator
-from contextlib import AbstractContextManager
+from contextlib import contextmanager
 from typing import TYPE_CHECKING, Any, Literal
 
 import pydantic
@@ -44,7 +44,13 @@
 from .._query_results import DataCoordinateQueryResults, DatasetQueryResults
 from ..dimensions import DataCoordinate, DimensionGroup
 from .driver import QueryDriver
-from .relation_tree import RootRelation
+from .relation_tree import (
+    InvalidRelationError,
+    Materialization,
+    RootRelation,
+    convert_order_by_args,
+    make_unit_relation,
+)
 
 if TYPE_CHECKING:
     from .driver import PageKey
@@ -74,21 +80,31 @@ class RelationDataCoordinateQueryResults(DataCoordinateQueryResults):
     """Implementation of DataCoordinateQueryResults for the relation-based
     query system.
 
-
+    Parameters
+    ----------
+    driver : `QueryDriver`
+        Implementation object that knows how to actually execute queries.
+    tree : `RootRelation`
+        Description of the query as a tree of relation operations.  The
+        instance returned directly by the `Butler._query` entry point should
+        be constructed via `make_unit_relation`.
+    spec : `DataCoordinateResultSpec`
+        Specification for the details of the data IDs to return.
 
     Notes
     -----
     Ideally this will eventually just be "DataCoordinateQueryResults", because
     we won't need an ABC if this is the only implementation.
     """
 
-    def __init__(self, tree: RootRelation, driver: QueryDriver, spec: DataCoordinateResultSpec):
-        self._tree = tree
+    def __init__(self, driver: QueryDriver, tree: RootRelation, spec: DataCoordinateResultSpec):
         self._driver = driver
+        self._tree = tree
         self._spec = spec
 
     @property
     def dimensions(self) -> DimensionGroup:
+        # Docstring inherited.
         return self._spec.dimensions
 
     def __iter__(self) -> Iterator[DataCoordinate]:
@@ -99,20 +115,36 @@ def __iter__(self) -> Iterator[DataCoordinate]:
             yield from page.rows
 
     def has_full(self) -> bool:  # TODO: since this is always true, we may not need it.
+        # Docstring inherited.
         return True
 
     def has_records(self) -> bool:  # TODO: should this be a property now?
+        # Docstring inherited.
         return self._spec.include_dimension_records
 
-    def materialize(self) -> AbstractContextManager[DataCoordinateQueryResults]:
-        raise NotImplementedError()
+    @contextmanager
+    def materialize(self) -> Iterator[DataCoordinateQueryResults]:
+        # Docstring inherited.
+        key = self._driver.materialize(self._tree, frozenset())
+        yield RelationDataCoordinateQueryResults(
+            self._driver,
+            tree=make_unit_relation(self._driver.universe).join(
+                Materialization.model_construct(key=key, operand=self._tree, dataset_types=frozenset())
+            ),
+            spec=self._spec,
+        )
+        # TODO: Right now we just rely on the QueryDriver context instead of
+        # using this one.  If we want this to remain a context manager, we
+        # should make it do something, e.g. by adding QueryDriver method to
+        # drop a materialization.
 
     def expanded(self) -> DataCoordinateQueryResults:
+        # Docstring inherited.
         if self.has_records():
             return self
         return RelationDataCoordinateQueryResults(
+            self._driver,
             tree=self._tree,
-            driver=self._driver,
             spec=DataCoordinateResultSpec(dimensions=self._spec.dimensions, include_dimension_records=True),
         )
 
@@ -122,13 +154,31 @@ def subset(
         *,
         unique: bool = False,
     ) -> DataCoordinateQueryResults:
-        raise NotImplementedError(
-            "TODO: Copy with a new result spec and/or DimensionProjection pushed onto tree."
+        # Docstring inherited.
+        if dimensions is None:
+            dimensions = self.dimensions
+        else:
+            dimensions = self._driver.universe.conform(dimensions)
+            if not dimensions <= self.dimensions:
+                raise InvalidRelationError(
+                    f"New dimensions {dimensions} are not a subset of the current "
+                    f"dimensions {self.dimensions}."
+                )
+        # TODO: right now I'm assuming we'll deduplicate all query results (per
+        # page), even if we have to do that in Python, so the 'unique' argument
+        # doesn't do anything.
+        return RelationDataCoordinateQueryResults(
+            self._driver,
+            tree=self._tree,
+            spec=DataCoordinateResultSpec(
+                dimensions=dimensions, include_dimension_records=self._spec.include_dimension_records
+            ),
         )
 
     def find_datasets(
         self, dataset_type: DatasetType | str, collections: Any, *, find_first: bool = True
     ) -> DatasetQueryResults:
+        # Docstring inherited.
         raise NotImplementedError("TODO: Copy with a new result spec and maybe a new DatasetSearch in tree.")
 
     def find_related_datasets(
@@ -139,19 +189,33 @@ def find_related_datasets(
         find_first: bool = True,
         dimensions: DimensionGroup | Iterable[str] | None = None,
     ) -> Iterable[tuple[DataCoordinate, DatasetRef]]:
+        # Docstring inherited.
         raise NotImplementedError("TODO: drop this in favor of GeneralQueryResults")
 
     def count(self, *, exact: bool = True, discard: bool = False) -> int:
+        # Docstring inherited.
         return self._driver.count(self._tree, exact=exact, discard=discard)
 
     def any(self, *, execute: bool = True, exact: bool = True) -> bool:
+        # Docstring inherited.
         return self._driver.any(self._tree, execute=execute, exact=exact)
 
     def explain_no_results(self, execute: bool = True) -> Iterable[str]:
+        # Docstring inherited.
         return self._driver.explain_no_results(self._tree, execute=execute)
 
     def order_by(self, *args: str) -> DataCoordinateQueryResults:
-        raise NotImplementedError("TODO: Copy with a OrderedSlice pushed onto tree.")
+        # Docstring inherited.
+        return RelationDataCoordinateQueryResults(
+            driver=self._driver,
+            tree=self._tree.order_by(*convert_order_by_args(self._tree, *args)),
+            spec=self._spec,
+        )
 
-    def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults:
-        raise NotImplementedError("TODO: Copy with a OrderedSlice pushed onto tree.")
+    def limit(self, limit: int | None = None, offset: int = 0) -> DataCoordinateQueryResults:
+        # Docstring inherited.
+        return RelationDataCoordinateQueryResults(
+            driver=self._driver,
+            tree=self._tree.order_by(limit=limit, offset=offset),
+            spec=self._spec,
+        )
diff --git a/python/lsst/daf/butler/queries/dimension_record_results.py b/python/lsst/daf/butler/queries/dimension_record_results.py
@@ -32,12 +32,15 @@
     "DimensionRecordResultPage",
 )
 
+from collections.abc import Iterable, Iterator
 from typing import Literal
 
 import pydantic
 
+from .._query_results import DimensionRecordQueryResults
 from ..dimensions import DimensionElement, DimensionRecord
-from .driver import PageKey
+from .driver import PageKey, QueryDriver
+from .relation_tree import RootRelation, convert_order_by_args
 
 
 class DimensionRecordResultSpec(pydantic.BaseModel):
@@ -58,3 +61,78 @@ class DimensionRecordResultPage(pydantic.BaseModel):
     # rather than `set` under the hood).  Right now this model isn't actually
     # serializable.
     rows: list[DimensionRecord]
+
+
+class RelationDimensionRecordQueryResults(DimensionRecordQueryResults):
+    """Implementation of DimensionRecordQueryResults for the relation-based
+    query system.
+
+    Parameters
+    ----------
+    driver : `QueryDriver`
+        Implementation object that knows how to actually execute queries.
+    tree : `RootRelation`
+        Description of the query as a tree of relation operations.  The
+        instance returned directly by the `Butler._query` entry point should
+        be constructed via `make_unit_relation`.
+    spec : `DimensionRecordResultSpec`
+        Specification for the details of the records to return.
+
+    Notes
+    -----
+    Ideally this will eventually just be "DimensionRecordQueryResults", because
+    we won't need an ABC if this is the only implementation.
+    """
+
+    def __init__(self, driver: QueryDriver, tree: RootRelation, spec: DimensionRecordResultSpec):
+        self._driver = driver
+        self._tree = tree
+        self._spec = spec
+
+    def __iter__(self) -> Iterator[DimensionRecord]:
+        page = self._driver.execute(self._tree, self._spec)
+        yield from page.rows
+        while page.next_key is not None:
+            page = self._driver.fetch_next_page(self._spec, page.next_key)
+            yield from page.rows
+
+    # TODO: might want to make this `str`, in keeping with RFC-834's philosophy
+    # of keeping DimensionElement out of public APIs (this old-query-system
+    # version of this one was accidentally omitted from that RFC, but we don't
+    # need an RFC to change it in the new system).
+    @property
+    def element(self) -> DimensionElement:
+        # Docstring inherited.
+        return self._spec.element
+
+    def run(self) -> DimensionRecordQueryResults:
+        # Docstring inherited.
+        raise NotImplementedError("TODO: remove this from the base class.")
+
+    def count(self, *, exact: bool = True, discard: bool = False) -> int:
+        # Docstring inherited.
+        return self._driver.count(self._tree, exact=exact, discard=discard)
+
+    def any(self, *, execute: bool = True, exact: bool = True) -> bool:
+        # Docstring inherited.
+        return self._driver.any(self._tree, execute=execute, exact=exact)
+
+    def order_by(self, *args: str) -> DimensionRecordQueryResults:
+        # Docstring inherited.
+        return RelationDimensionRecordQueryResults(
+            driver=self._driver,
+            tree=self._tree.order_by(*convert_order_by_args(self._tree, *args)),
+            spec=self._spec,
+        )
+
+    def limit(self, limit: int | None = None, offset: int = 0) -> DimensionRecordQueryResults:
+        # Docstring inherited.
+        return RelationDimensionRecordQueryResults(
+            driver=self._driver,
+            tree=self._tree.order_by(limit=limit, offset=offset),
+            spec=self._spec,
+        )
+
+    def explain_no_results(self, execute: bool = True) -> Iterable[str]:
+        # Docstring inherited.
+        return self._driver.explain_no_results(self._tree, execute=execute)