Skip to content

Commit

Permalink
Docs, fixes, and more implementations for DataCoordinate results.
Browse files Browse the repository at this point in the history
  • Loading branch information
TallJimbo committed Dec 10, 2023
1 parent 01df32d commit 67bdb5a
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 33 deletions.
32 changes: 18 additions & 14 deletions python/lsst/daf/butler/queries/_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
OrderExpression,
Predicate,
RootRelation,
convert_order_by_args,
convert_where_args,
make_dimension_relation,
make_unit_relation,
)
Expand Down Expand Up @@ -82,6 +84,16 @@ def __init__(self, driver: QueryDriver, tree: RootRelation, include_dimension_re
self._tree = tree
self._include_dimension_records = include_dimension_records

@property
def dimensions(self) -> DimensionGroup:
"""The dimensions joined into the query."""
return self._tree.dimensions

@property
def dataset_types(self) -> frozenset[str]:
"""The names of dataset types joined into the query."""
return self._tree.available_dataset_types

@property
def expression_factory(self) -> ExpressionFactory:
"""A factory for column expressions using overloaded operators.
Expand Down Expand Up @@ -138,6 +150,8 @@ def data_ids(
self,
dimensions: DimensionGroup | Iterable[str] | str,
*,
# TODO: Arguments below are redundant with chaining methods; which ones
# are so convenient we have to keep them?
data_id: DataId | None = None,
where: str | Predicate = "",
bind: Mapping[str, Any] | None = None,
Expand All @@ -150,11 +164,11 @@ def data_ids(
if not dimensions >= self._tree.dimensions:
tree = tree.join(make_dimension_relation(dimensions))
if data_id or where:
tree = tree.where(*self._convert_predicate_args(where, data_id, bind=bind, **kwargs))
tree = tree.where(*convert_where_args(self._tree, where, data_id, bind=bind, **kwargs))
result_spec = DataCoordinateResultSpec(
dimensions=dimensions, include_dimension_records=self._include_dimension_records
)
return RelationDataCoordinateQueryResults(tree, self._driver, result_spec)
return RelationDataCoordinateQueryResults(self._driver, tree, result_spec)

def datasets(
self,
Expand Down Expand Up @@ -299,7 +313,7 @@ def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> RelationQu
their fields in expressions.
"""
return RelationQuery(
tree=self._tree.order_by(*self._convert_order_by_args(*args)),
tree=self._tree.order_by(*convert_order_by_args(self._tree, *args)),
driver=self._driver,
include_dimension_records=self._include_dimension_records,
)
Expand Down Expand Up @@ -531,7 +545,7 @@ def where(
their fields in expressions.
"""
return RelationQuery(
tree=self._tree.where(*self._convert_predicate_args(*args, bind=bind, **kwargs)),
tree=self._tree.where(*convert_where_args(self._tree, *args, bind=bind, **kwargs)),
driver=self._driver,
include_dimension_records=self._include_dimension_records,
)
Expand Down Expand Up @@ -587,13 +601,3 @@ def find_first(
driver=self._driver,
include_dimension_records=self._include_dimension_records,
)

def _convert_order_by_args(self, *args: str | OrderExpression | ExpressionProxy) -> list[OrderExpression]:
"""Convert ``order_by`` arguments to a list of column expressions."""
raise NotImplementedError("TODO: Parse string expression.")

def _convert_predicate_args(
self, *args: str | Predicate | DataId, bind: Mapping[str, Any] | None = None
) -> list[Predicate]:
"""Convert ``where`` arguments to a list of column expressions."""
raise NotImplementedError("TODO: Parse string expression.")
90 changes: 77 additions & 13 deletions python/lsst/daf/butler/queries/data_coordinate_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
)

from collections.abc import Iterable, Iterator
from contextlib import AbstractContextManager
from contextlib import contextmanager
from typing import TYPE_CHECKING, Any, Literal

import pydantic
Expand All @@ -44,7 +44,13 @@
from .._query_results import DataCoordinateQueryResults, DatasetQueryResults
from ..dimensions import DataCoordinate, DimensionGroup
from .driver import QueryDriver
from .relation_tree import RootRelation
from .relation_tree import (
InvalidRelationError,
Materialization,
RootRelation,
convert_order_by_args,
make_unit_relation,
)

if TYPE_CHECKING:
from .driver import PageKey
Expand Down Expand Up @@ -74,21 +80,31 @@ class RelationDataCoordinateQueryResults(DataCoordinateQueryResults):
"""Implementation of DataCoordinateQueryResults for the relation-based
query system.
Parameters
----------
driver : `QueryDriver`
Implementation object that knows how to actually execute queries.
tree : `Relation`
Description of the query as a tree of relation operations. The
instance returned directly by the `Butler._query` entry point should
be constructed via `make_unit_relation`.
spec : `DataCoordinateResultSpec`
Specification for the details of the data IDs to return.
Notes
-----
Ideally this will eventually just be "DataCoordinateQueryResults", because
we won't need an ABC if this is the only implementation.
"""

def __init__(self, tree: RootRelation, driver: QueryDriver, spec: DataCoordinateResultSpec):
self._tree = tree
def __init__(self, driver: QueryDriver, tree: RootRelation, spec: DataCoordinateResultSpec):
self._driver = driver
self._tree = tree
self._spec = spec

@property
def dimensions(self) -> DimensionGroup:
# Docstring inherited.
return self._spec.dimensions

def __iter__(self) -> Iterator[DataCoordinate]:
Expand All @@ -99,20 +115,36 @@ def __iter__(self) -> Iterator[DataCoordinate]:
yield from page.rows

def has_full(self) -> bool: # TODO: since this is always true, we may not need it.
# Docstring inherited.
return True

def has_records(self) -> bool: # TODO: should this be a property now?
# Docstring inherited.
return self._spec.include_dimension_records

def materialize(self) -> AbstractContextManager[DataCoordinateQueryResults]:
raise NotImplementedError()
@contextmanager
def materialize(self) -> Iterator[DataCoordinateQueryResults]:
# Docstring inherited.
key = self._driver.materialize(self._tree, frozenset())
yield RelationDataCoordinateQueryResults(
self._driver,
tree=make_unit_relation(self._driver.universe).join(
Materialization.model_construct(key=key, operand=self._tree, dataset_types=frozenset())
),
spec=self._spec,
)
# TODO: Right now we just rely on the QueryDriver context instead of
# using this one. If we want this to remain a context manager, we
# should make it do something, e.g. by adding QueryDriver method to
# drop a materialization.

def expanded(self) -> DataCoordinateQueryResults:
# Docstring inherited.
if self.has_records():
return self
return RelationDataCoordinateQueryResults(
self._driver,
tree=self._tree,
driver=self._driver,
spec=DataCoordinateResultSpec(dimensions=self._spec.dimensions, include_dimension_records=True),
)

Expand All @@ -122,13 +154,31 @@ def subset(
*,
unique: bool = False,
) -> DataCoordinateQueryResults:
raise NotImplementedError(
"TODO: Copy with a new result spec and/or DimensionProjection pushed onto tree."
# Docstring inherited.
if dimensions is None:
dimensions = self.dimensions
else:
dimensions = self._driver.universe.conform(dimensions)
if not dimensions <= self.dimensions:
raise InvalidRelationError(
f"New dimensions {dimensions} are not a subset of the current "
f"dimensions {self.dimensions}."
)
# TODO: right now I'm assuming we'll deduplicate all query results (per
# page), even if we have to do that in Python, so the 'unique' argument
# doesn't do anything.
return RelationDataCoordinateQueryResults(
self._driver,
tree=self._tree,
spec=DataCoordinateResultSpec(
dimensions=dimensions, include_dimension_records=self._spec.include_dimension_records
),
)

def find_datasets(
self, dataset_type: DatasetType | str, collections: Any, *, find_first: bool = True
) -> DatasetQueryResults:
# Docstring inherited.
raise NotImplementedError("TODO: Copy with a new result spec and maybe a new DatasetSearch in tree.")

def find_related_datasets(
Expand All @@ -139,19 +189,33 @@ def find_related_datasets(
find_first: bool = True,
dimensions: DimensionGroup | Iterable[str] | None = None,
) -> Iterable[tuple[DataCoordinate, DatasetRef]]:
# Docstring inherited.
raise NotImplementedError("TODO: drop this in favor of GeneralQueryResults")

def count(self, *, exact: bool = True, discard: bool = False) -> int:
# Docstring inherited.
return self._driver.count(self._tree, exact=exact, discard=discard)

def any(self, *, execute: bool = True, exact: bool = True) -> bool:
# Docstring inherited.
return self._driver.any(self._tree, execute=execute, exact=exact)

def explain_no_results(self, execute: bool = True) -> Iterable[str]:
# Docstring inherited.
return self._driver.explain_no_results(self._tree, execute=execute)

def order_by(self, *args: str) -> DataCoordinateQueryResults:
raise NotImplementedError("TODO: Copy with a OrderedSlice pushed onto tree.")
# Docstring inherited.
return RelationDataCoordinateQueryResults(
driver=self._driver,
tree=self._tree.order_by(*convert_order_by_args(self._tree, *args)),
spec=self._spec,
)

def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults:
raise NotImplementedError("TODO: Copy with a OrderedSlice pushed onto tree.")
def limit(self, limit: int | None = None, offset: int = 0) -> DataCoordinateQueryResults:
# Docstring inherited.
return RelationDataCoordinateQueryResults(
driver=self._driver,
tree=self._tree.order_by(limit=limit, offset=offset),
spec=self._spec,
)
12 changes: 10 additions & 2 deletions python/lsst/daf/butler/queries/relation_tree/_ordered_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from __future__ import annotations

__all__ = ("OrderedSlice",)
__all__ = ("OrderedSlice", "convert_order_by_args")

from typing import TYPE_CHECKING, Literal, final

Expand All @@ -44,8 +44,9 @@
)

if TYPE_CHECKING:
from ..expression_factory import ExpressionProxy
from ._predicate import Predicate
from ._relation import OrderedSliceOperand, Relation
from ._relation import OrderedSliceOperand, Relation, RootRelation
from .joins import JoinArg


Expand Down Expand Up @@ -173,3 +174,10 @@ def _validate_nontrivial(self) -> OrderedSlice:
if not self.order_terms and self.limit is None and not self.offset:
raise InvalidRelationError("Operation does not do anything.")
return self


def convert_order_by_args(
tree: RootRelation, *args: str | OrderExpression | ExpressionProxy
) -> list[OrderExpression]:
"""Convert ``order_by`` arguments to a list of column expressions."""
raise NotImplementedError("TODO")
16 changes: 12 additions & 4 deletions python/lsst/daf/butler/queries/relation_tree/_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@

from __future__ import annotations

__all__ = ("Select", "make_unit_relation", "make_dimension_relation")
__all__ = ("Select", "make_unit_relation", "make_dimension_relation", "convert_where_args")

import itertools
from collections.abc import Mapping
from functools import cached_property
from typing import TYPE_CHECKING, Literal, final
from typing import TYPE_CHECKING, Any, Literal, final

import pydantic

from ...dimensions import DimensionGroup, DimensionUniverse
from ...dimensions import DataId, DimensionGroup, DimensionUniverse
from ._base import InvalidRelationError, RelationBase
from ._column_reference import DatasetFieldReference, DimensionFieldReference, DimensionKeyReference
from ._predicate import Predicate
Expand All @@ -45,7 +46,7 @@
from ._column_expression import OrderExpression
from ._find_first import FindFirst
from ._ordered_slice import OrderedSlice
from ._relation import JoinOperand, Relation
from ._relation import JoinOperand, Relation, RootRelation


def make_unit_relation(universe: DimensionUniverse) -> Select:
Expand Down Expand Up @@ -278,3 +279,10 @@ def _validate_required_columns(self) -> Select:
if dataset_type not in self.available_dataset_types:
raise InvalidRelationError(f"Dataset search for column {column} is not present.")
return self


def convert_where_args(
tree: RootRelation, *args: str | Predicate | DataId, bind: Mapping[str, Any] | None = None
) -> list[Predicate]:
"""Convert ``where`` arguments to a list of column expressions."""
raise NotImplementedError("TODO: Parse string expression.")

0 comments on commit 67bdb5a

Please sign in to comment.