Skip to content

Commit

Permalink
Add dimension and datastore record retrieval for get_dataset/find_dat…
Browse files Browse the repository at this point in the history
…aset

Datastore records do not work in remote butler because they
were never added to SerializedDatasetRef.
  • Loading branch information
timj committed Nov 2, 2023
1 parent 694614b commit 1e3d68c
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 10 deletions.
17 changes: 16 additions & 1 deletion python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,13 @@ def get_dataset_type(self, name: str) -> DatasetType:
raise NotImplementedError()

@abstractmethod
def get_dataset(self, id: DatasetId, storage_class: str | StorageClass | None) -> DatasetRef | None:
def get_dataset(
self,
id: DatasetId,
storage_class: str | StorageClass | None,
dimension_records: bool = False,
datastore_records: bool = False,
) -> DatasetRef | None:
"""Retrieve a Dataset entry.
Parameters
Expand All @@ -810,6 +816,10 @@ def get_dataset(self, id: DatasetId, storage_class: str | StorageClass | None) -
storage_class : `str` or `StorageClass` or `None`
A storage class to use when creating the returned entry. If given
it must be compatible with the default storage class.
dimension_records: `bool`, optional
If `True` the ref will be expanded and contain dimension records.
datastore_records: `bool`, optional.
If `True` the ref will contain associated datastore records.
Returns
-------
Expand All @@ -828,6 +838,7 @@ def find_dataset(
collections: str | Sequence[str] | None = None,
timespan: Timespan | None = None,
storage_class: str | StorageClass | None = None,
dimension_records: bool = False,
datastore_records: bool = False,
**kwargs: Any,
) -> DatasetRef | None:
Expand Down Expand Up @@ -861,6 +872,10 @@ def find_dataset(
storage_class : `str` or `StorageClass` or `None`
A storage class to use when creating the returned entry. If given
it must be compatible with the default storage class.
dimension_records: `bool`, optional
If `True` the ref will be expanded and contain dimension records.
datastore_records: `bool`, optional.
If `True` the ref will contain associated datastore records.
**kwargs
Additional keyword arguments passed to
`DataCoordinate.standardize` to convert ``dataId`` to a true
Expand Down
20 changes: 16 additions & 4 deletions python/lsst/daf/butler/direct_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1322,11 +1322,20 @@ def get_dataset_type(self, name: str) -> DatasetType:
return self._registry.getDatasetType(name)

def get_dataset(
self, id: DatasetId, storage_class: str | StorageClass | None = None
self,
id: DatasetId,
storage_class: str | StorageClass | None = None,
dimension_records: bool = False,
datastore_records: bool = False,
) -> DatasetRef | None:
ref = self._registry.getDataset(id)
if ref is not None and storage_class:
ref = ref.overrideStorageClass(storage_class)
if ref is not None:
if dimension_records:
ref = ref.expanded(self._registry.expandDataId(ref.dataId, graph=ref.datasetType.dimensions))
if storage_class:
ref = ref.overrideStorageClass(storage_class)
if datastore_records:
ref = self._registry.get_datastore_records(ref)
return ref

def find_dataset(
Expand All @@ -1337,6 +1346,7 @@ def find_dataset(
collections: str | Sequence[str] | None = None,
timespan: Timespan | None = None,
storage_class: str | StorageClass | None = None,
dimension_records: bool = False,
datastore_records: bool = False,
**kwargs: Any,
) -> DatasetRef | None:
Expand All @@ -1353,9 +1363,11 @@ def find_dataset(
data_id,
collections=collections,
timespan=timespan,
dataset_records=datastore_records,
datastore_records=datastore_records,
**kwargs,
)
if ref is not None and dimension_records:
ref = ref.expanded(self._registry.expandDataId(ref.dataId, graph=ref.datasetType.dimensions))
if ref is not None and storage_class is not None:
ref = ref.overrideStorageClass(storage_class)
return ref
Expand Down
21 changes: 19 additions & 2 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,23 @@ def get_dataset_type(self, name: str) -> DatasetType:
return DatasetType.from_simple(SerializedDatasetType(**response.json()), universe=self.dimensions)

def get_dataset(
self, id: DatasetId, storage_class: str | StorageClass | None = None
self,
id: DatasetId,
storage_class: str | StorageClass | None = None,
dimension_records: bool = False,
datastore_records: bool = False,
) -> DatasetRef | None:
path = f"dataset/{id}"
if isinstance(storage_class, StorageClass):
storage_class_name = storage_class.name
elif storage_class:
storage_class_name = storage_class
params: dict[str, str] = {}
params: dict[str, str | bool] = {
"dimension_records": dimension_records,
"datastore_records": datastore_records,
}
if datastore_records:
raise ValueError("Datastore records can not yet be returned in client/server butler.")
if storage_class:
params["storage_class"] = storage_class_name
response = self._client.get(self._get_url(path), params=params)
Expand All @@ -244,6 +253,7 @@ def find_dataset(
collections: str | Sequence[str] | None = None,
timespan: Timespan | None = None,
storage_class: str | StorageClass | None = None,
dimension_records: bool = False,
datastore_records: bool = False,
**kwargs: Any,
) -> DatasetRef | None:
Expand All @@ -258,6 +268,11 @@ def find_dataset(
# cache to generate list of collection names.
wildcards = CollectionWildcard.from_expression(collections)

if datastore_records:
raise ValueError("Datastore records can not yet be returned in client/server butler.")
if timespan:
raise ValueError("Timespan can not yet be used in butler client/server.")

if isinstance(dataset_type, DatasetType):
dataset_type = dataset_type.name

Expand All @@ -268,6 +283,8 @@ def find_dataset(
data_id=self._simplify_dataId(data_id, **kwargs),
collections=wildcards.strings,
storage_class=storage_class,
dimension_records=dimension_records,
datastore_records=datastore_records,
)

path = f"find_dataset/{dataset_type}"
Expand Down
22 changes: 19 additions & 3 deletions python/lsst/daf/butler/remote_butler/server/_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,20 @@ def get_dataset_type(
response_model_exclude_none=True,
)
def get_dataset(
id: uuid.UUID, storage_class: str | None = None, factory: Factory = Depends(factory_dependency)
id: uuid.UUID,
storage_class: str | None = None,
dimension_records: bool = False,
datastore_records: bool = False,
factory: Factory = Depends(factory_dependency),
) -> SerializedDatasetRef | None:
"""Return a single dataset reference."""
butler = factory.create_butler()
ref = butler.get_dataset(id, storage_class=storage_class)
ref = butler.get_dataset(
id,
storage_class=storage_class,
dimension_records=dimension_records,
datastore_records=datastore_records,
)
if ref is not None:
return ref.to_simple()
# This could raise a 404 since id is not found. The standard implementation
Expand Down Expand Up @@ -153,6 +162,13 @@ def find_dataset(

butler = factory.create_butler()
ref = butler.find_dataset(
dataset_type, None, collections=collection_query, storage_class=query.storage_class, **data_id
dataset_type,
None,
collections=collection_query,
storage_class=query.storage_class,
timespan=None,
dimension_records=query.dimension_records,
datastore_records=query.datastore_records,
**data_id,
)
return ref.to_simple() if ref else None
2 changes: 2 additions & 0 deletions python/lsst/daf/butler/remote_butler/server/_server_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,5 @@ class FindDatasetModel(_BaseModelCompat):
data_id: SerializedDataCoordinate
collections: list[str]
storage_class: str | None
dimension_records: bool = False
datastore_records: bool = False
8 changes: 8 additions & 0 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,18 @@ def test_find_dataset(self):
ref = self.butler.find_dataset("bias", collections="imported_g", detector=1, instrument="Cam1")
self.assertIsInstance(ref, DatasetRef)
self.assertEqual(ref.id, uuid.UUID("e15ab039-bc8b-4135-87c5-90902a7c0b22"))
self.assertFalse(ref.dataId.hasRecords())

# Try again with variation of parameters.
ref_new = self.butler.find_dataset(
"bias",
{"detector": 1},
collections="imported_g",
instrument="Cam1",
dimension_records=True,
)
self.assertEqual(ref_new, ref)
self.assertTrue(ref_new.dataId.hasRecords())

ref_new = self.butler.find_dataset(
ref.datasetType,
Expand All @@ -143,6 +146,11 @@ def test_find_dataset(self):
)
self.assertEqual(ref2, ref3)

# Try expanded refs.
self.assertFalse(ref.dataId.hasRecords())
expanded = self.butler.get_dataset(ref.id, dimension_records=True)
self.assertTrue(expanded.dataId.hasRecords())

# The test datasets are all Exposure so storage class conversion
# can not be tested until we fix that. For now at least test the
# code paths.
Expand Down

0 comments on commit 1e3d68c

Please sign in to comment.