Skip to content

Commit

Permalink
Continue work on query results classes in new system.
Browse files Browse the repository at this point in the history
  • Loading branch information
TallJimbo committed Dec 11, 2023
1 parent 01df32d commit 25d2fb4
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 36 deletions.
46 changes: 30 additions & 16 deletions python/lsst/daf/butler/queries/_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from .._query import Query
from ..dimensions import DataCoordinate, DataId, DataIdValue, DimensionGroup
from .data_coordinate_results import DataCoordinateResultSpec, RelationDataCoordinateQueryResults
from .dimension_record_results import DimensionRecordResultSpec, RelationDimensionRecordQueryResults
from .driver import QueryDriver
from .expression_factory import ExpressionFactory, ExpressionProxy
from .relation_tree import (
Expand All @@ -45,6 +46,8 @@
OrderExpression,
Predicate,
RootRelation,
convert_order_by_args,
convert_where_args,
make_dimension_relation,
make_unit_relation,
)
Expand All @@ -63,7 +66,7 @@ class RelationQuery(Query):
----------
driver : `QueryDriver`
Implementation object that knows how to actually execute queries.
tree : `Relation`
tree : `RootRelation`
Description of the query as a tree of relation operations. The
instance returned directly by the `Butler._query` entry point should
be constructed via `make_unit_relation`.
Expand All @@ -82,6 +85,16 @@ def __init__(self, driver: QueryDriver, tree: RootRelation, include_dimension_re
self._tree = tree
self._include_dimension_records = include_dimension_records

@property
def dimensions(self) -> DimensionGroup:
"""The dimensions joined into the query."""
return self._tree.dimensions

@property
def dataset_types(self) -> frozenset[str]:
"""The names of dataset types joined into the query."""
return self._tree.available_dataset_types

@property
def expression_factory(self) -> ExpressionFactory:
"""A factory for column expressions using overloaded operators.
Expand Down Expand Up @@ -138,6 +151,8 @@ def data_ids(
self,
dimensions: DimensionGroup | Iterable[str] | str,
*,
# TODO: Arguments below are redundant with chaining methods; which ones
# are so convenient we have to keep them?
data_id: DataId | None = None,
where: str | Predicate = "",
bind: Mapping[str, Any] | None = None,
Expand All @@ -150,11 +165,11 @@ def data_ids(
if not dimensions >= self._tree.dimensions:
tree = tree.join(make_dimension_relation(dimensions))
if data_id or where:
tree = tree.where(*self._convert_predicate_args(where, data_id, bind=bind, **kwargs))
tree = tree.where(*convert_where_args(self._tree, where, data_id, bind=bind, **kwargs))
result_spec = DataCoordinateResultSpec(
dimensions=dimensions, include_dimension_records=self._include_dimension_records
)
return RelationDataCoordinateQueryResults(tree, self._driver, result_spec)
return RelationDataCoordinateQueryResults(self._driver, tree, result_spec)

def datasets(
self,
Expand All @@ -174,13 +189,22 @@ def dimension_records(
self,
element: str,
*,
# TODO: Arguments below are redundant with chaining methods; which ones
# are so convenient we have to keep them?
data_id: DataId | None = None,
where: str = "",
bind: Mapping[str, Any] | None = None,
**kwargs: Any,
) -> DimensionRecordQueryResults:
# Docstring inherited.
raise NotImplementedError("TODO")
data_id = DataCoordinate.standardize(data_id, universe=self._driver.universe, **kwargs)
tree = self._tree
if element not in tree.dimensions.elements:
tree = tree.join(make_dimension_relation(self._driver.universe[element].minimal_group))
if data_id or where:
tree = tree.where(*convert_where_args(self._tree, where, data_id, bind=bind, **kwargs))
result_spec = DimensionRecordResultSpec(element=self._driver.universe[element])
return RelationDimensionRecordQueryResults(self._driver, tree, result_spec)

# TODO: add general, dict-row results method and QueryResults.

Expand Down Expand Up @@ -299,7 +323,7 @@ def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> RelationQu
their fields in expressions.
"""
return RelationQuery(
tree=self._tree.order_by(*self._convert_order_by_args(*args)),
tree=self._tree.order_by(*convert_order_by_args(self._tree, *args)),
driver=self._driver,
include_dimension_records=self._include_dimension_records,
)
Expand Down Expand Up @@ -531,7 +555,7 @@ def where(
their fields in expressions.
"""
return RelationQuery(
tree=self._tree.where(*self._convert_predicate_args(*args, bind=bind, **kwargs)),
tree=self._tree.where(*convert_where_args(self._tree, *args, bind=bind, **kwargs)),
driver=self._driver,
include_dimension_records=self._include_dimension_records,
)
Expand Down Expand Up @@ -587,13 +611,3 @@ def find_first(
driver=self._driver,
include_dimension_records=self._include_dimension_records,
)

def _convert_order_by_args(self, *args: str | OrderExpression | ExpressionProxy) -> list[OrderExpression]:
"""Convert ``order_by`` arguments to a list of column expressions."""
raise NotImplementedError("TODO: Parse string expression.")

def _convert_predicate_args(
self, *args: str | Predicate | DataId, bind: Mapping[str, Any] | None = None
) -> list[Predicate]:
"""Convert ``where`` arguments to a list of column expressions."""
raise NotImplementedError("TODO: Parse string expression.")
90 changes: 77 additions & 13 deletions python/lsst/daf/butler/queries/data_coordinate_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
)

from collections.abc import Iterable, Iterator
from contextlib import AbstractContextManager
from contextlib import contextmanager
from typing import TYPE_CHECKING, Any, Literal

import pydantic
Expand All @@ -44,7 +44,13 @@
from .._query_results import DataCoordinateQueryResults, DatasetQueryResults
from ..dimensions import DataCoordinate, DimensionGroup
from .driver import QueryDriver
from .relation_tree import RootRelation
from .relation_tree import (
InvalidRelationError,
Materialization,
RootRelation,
convert_order_by_args,
make_unit_relation,
)

if TYPE_CHECKING:
from .driver import PageKey
Expand Down Expand Up @@ -74,21 +80,31 @@ class RelationDataCoordinateQueryResults(DataCoordinateQueryResults):
"""Implementation of DataCoordinateQueryResults for the relation-based
query system.
Parameters
----------
driver : `QueryDriver`
Implementation object that knows how to actually execute queries.
tree : `RootRelation`
Description of the query as a tree of relation operations. The
instance returned directly by the `Butler._query` entry point should
be constructed via `make_unit_relation`.
spec : `DataCoordinateResultSpec`
Specification for the details of the data IDs to return.
Notes
-----
Ideally this will eventually just be "DataCoordinateQueryResults", because
we won't need an ABC if this is the only implementation.
"""

def __init__(self, tree: RootRelation, driver: QueryDriver, spec: DataCoordinateResultSpec):
self._tree = tree
def __init__(self, driver: QueryDriver, tree: RootRelation, spec: DataCoordinateResultSpec):
self._driver = driver
self._tree = tree
self._spec = spec

@property
def dimensions(self) -> DimensionGroup:
# Docstring inherited.
return self._spec.dimensions

def __iter__(self) -> Iterator[DataCoordinate]:
Expand All @@ -99,20 +115,36 @@ def __iter__(self) -> Iterator[DataCoordinate]:
yield from page.rows

def has_full(self) -> bool: # TODO: since this is always true, we may not need it.
# Docstring inherited.
return True

def has_records(self) -> bool: # TODO: should this be a property now?
# Docstring inherited.
return self._spec.include_dimension_records

def materialize(self) -> AbstractContextManager[DataCoordinateQueryResults]:
raise NotImplementedError()
@contextmanager
def materialize(self) -> Iterator[DataCoordinateQueryResults]:
# Docstring inherited.
key = self._driver.materialize(self._tree, frozenset())
yield RelationDataCoordinateQueryResults(
self._driver,
tree=make_unit_relation(self._driver.universe).join(
Materialization.model_construct(key=key, operand=self._tree, dataset_types=frozenset())
),
spec=self._spec,
)
# TODO: Right now we just rely on the QueryDriver context instead of
# using this one. If we want this to remain a context manager, we
# should make it do something, e.g. by adding QueryDriver method to
# drop a materialization.

def expanded(self) -> DataCoordinateQueryResults:
# Docstring inherited.
if self.has_records():
return self
return RelationDataCoordinateQueryResults(
self._driver,
tree=self._tree,
driver=self._driver,
spec=DataCoordinateResultSpec(dimensions=self._spec.dimensions, include_dimension_records=True),
)

Expand All @@ -122,13 +154,31 @@ def subset(
*,
unique: bool = False,
) -> DataCoordinateQueryResults:
raise NotImplementedError(
"TODO: Copy with a new result spec and/or DimensionProjection pushed onto tree."
# Docstring inherited.
if dimensions is None:
dimensions = self.dimensions
else:
dimensions = self._driver.universe.conform(dimensions)
if not dimensions <= self.dimensions:
raise InvalidRelationError(
f"New dimensions {dimensions} are not a subset of the current "
f"dimensions {self.dimensions}."
)
# TODO: right now I'm assuming we'll deduplicate all query results (per
# page), even if we have to do that in Python, so the 'unique' argument
# doesn't do anything.
return RelationDataCoordinateQueryResults(
self._driver,
tree=self._tree,
spec=DataCoordinateResultSpec(
dimensions=dimensions, include_dimension_records=self._spec.include_dimension_records
),
)

def find_datasets(
self, dataset_type: DatasetType | str, collections: Any, *, find_first: bool = True
) -> DatasetQueryResults:
# Docstring inherited.
raise NotImplementedError("TODO: Copy with a new result spec and maybe a new DatasetSearch in tree.")

def find_related_datasets(
Expand All @@ -139,19 +189,33 @@ def find_related_datasets(
find_first: bool = True,
dimensions: DimensionGroup | Iterable[str] | None = None,
) -> Iterable[tuple[DataCoordinate, DatasetRef]]:
# Docstring inherited.
raise NotImplementedError("TODO: drop this in favor of GeneralQueryResults")

def count(self, *, exact: bool = True, discard: bool = False) -> int:
# Docstring inherited.
return self._driver.count(self._tree, exact=exact, discard=discard)

def any(self, *, execute: bool = True, exact: bool = True) -> bool:
# Docstring inherited.
return self._driver.any(self._tree, execute=execute, exact=exact)

def explain_no_results(self, execute: bool = True) -> Iterable[str]:
# Docstring inherited.
return self._driver.explain_no_results(self._tree, execute=execute)

def order_by(self, *args: str) -> DataCoordinateQueryResults:
raise NotImplementedError("TODO: Copy with a OrderedSlice pushed onto tree.")
# Docstring inherited.
return RelationDataCoordinateQueryResults(
driver=self._driver,
tree=self._tree.order_by(*convert_order_by_args(self._tree, *args)),
spec=self._spec,
)

def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults:
raise NotImplementedError("TODO: Copy with a OrderedSlice pushed onto tree.")
def limit(self, limit: int | None = None, offset: int = 0) -> DataCoordinateQueryResults:
# Docstring inherited.
return RelationDataCoordinateQueryResults(
driver=self._driver,
tree=self._tree.order_by(limit=limit, offset=offset),
spec=self._spec,
)
80 changes: 79 additions & 1 deletion python/lsst/daf/butler/queries/dimension_record_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,15 @@
"DimensionRecordResultPage",
)

from collections.abc import Iterable, Iterator
from typing import Literal

import pydantic

from .._query_results import DimensionRecordQueryResults
from ..dimensions import DimensionElement, DimensionRecord
from .driver import PageKey
from .driver import PageKey, QueryDriver
from .relation_tree import RootRelation, convert_order_by_args


class DimensionRecordResultSpec(pydantic.BaseModel):
Expand All @@ -58,3 +61,78 @@ class DimensionRecordResultPage(pydantic.BaseModel):
# rather than `set` under the hood). Right now this model isn't actually
# serializable.
rows: list[DimensionRecord]


class RelationDimensionRecordQueryResults(DimensionRecordQueryResults):
"""Implementation of DimensionRecordQueryResults for the relation-based
query system.
Parameters
----------
driver : `QueryDriver`
Implementation object that knows how to actually execute queries.
tree : `RootRelation`
Description of the query as a tree of relation operations. The
instance returned directly by the `Butler._query` entry point should
be constructed via `make_unit_relation`.
spec : `DimensionRecordResultSpec`
Specification for the details of the records to return.
Notes
-----
Ideally this will eventually just be "DimensionRecordQueryResults", because
we won't need an ABC if this is the only implementation.
"""

def __init__(self, driver: QueryDriver, tree: RootRelation, spec: DimensionRecordResultSpec):
self._driver = driver
self._tree = tree
self._spec = spec

def __iter__(self) -> Iterator[DimensionRecord]:
page = self._driver.execute(self._tree, self._spec)
yield from page.rows
while page.next_key is not None:
page = self._driver.fetch_next_page(self._spec, page.next_key)
yield from page.rows

# TODO: might want to make this `str`, in keeping with RFC-834's philosophy
# of keeping DimensionElement out of public APIs (this old-query-system
# version of this one was accidentally omitted from that RFC, but we don't
# need an RFC to change it in the new system).
@property
def element(self) -> DimensionElement:
# Docstring inherited.
return self._spec.element

def run(self) -> DimensionRecordQueryResults:
# Docstring inherited.
raise NotImplementedError("TODO: remove this from the base class.")

def count(self, *, exact: bool = True, discard: bool = False) -> int:
# Docstring inherited.
return self._driver.count(self._tree, exact=exact, discard=discard)

def any(self, *, execute: bool = True, exact: bool = True) -> bool:
# Docstring inherited.
return self._driver.any(self._tree, execute=execute, exact=exact)

def order_by(self, *args: str) -> DimensionRecordQueryResults:
# Docstring inherited.
return RelationDimensionRecordQueryResults(
driver=self._driver,
tree=self._tree.order_by(*convert_order_by_args(self._tree, *args)),
spec=self._spec,
)

def limit(self, limit: int | None = None, offset: int = 0) -> DimensionRecordQueryResults:
# Docstring inherited.
return RelationDimensionRecordQueryResults(
driver=self._driver,
tree=self._tree.order_by(limit=limit, offset=offset),
spec=self._spec,
)

def explain_no_results(self, execute: bool = True) -> Iterable[str]:
# Docstring inherited.
return self._driver.explain_no_results(self._tree, execute=execute)
Loading

0 comments on commit 25d2fb4

Please sign in to comment.