From 64702865143a17f2b7f310d7460890d419790cd1 Mon Sep 17 00:00:00 2001 From: Andy Salnikov Date: Wed, 4 Oct 2023 11:46:31 -0700 Subject: [PATCH] Make datastore records private attribute of DatasetRef. Jim suggested that we use new `OpaqueRecordSet` class to keep datastore records, but after discussion we figured it would requre lots of additional structure to support it. For now there is no clear need for that class and we can continue using `StoredDatastoreItemInfo` in DatasetRefs, but want to make records private to give us freedom to change it later. --- python/lsst/daf/butler/_butler.py | 4 ++-- python/lsst/daf/butler/core/_dataset_ref.py | 18 +++++++++--------- .../daf/butler/datastores/fileDatastore.py | 8 ++++---- python/lsst/daf/butler/registries/sql.py | 4 ++-- .../daf/butler/registry/interfaces/_bridge.py | 2 +- tests/test_datasets.py | 4 ++-- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index 5211c05454..c4c45206ea 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -1066,7 +1066,7 @@ def _findDatasetRef( if collections is not None: warnings.warn("Collections should not be specified with DatasetRef", stacklevel=3) # May need to retrieve datastore records if requested. - if datastore_records and datasetRefOrType.datastore_records is None: + if datastore_records and datasetRefOrType._datastore_records is None: datasetRefOrType = self._registry.get_datastore_records(datasetRefOrType) return datasetRefOrType timespan: Timespan | None = None @@ -1122,7 +1122,7 @@ def _findDatasetRef( # using the user definition such that the expected type is # returned. ref = DatasetRef( - datasetType, ref.dataId, run=ref.run, id=ref.id, datastore_records=ref.datastore_records + datasetType, ref.dataId, run=ref.run, id=ref.id, datastore_records=ref._datastore_records ) return ref diff --git a/python/lsst/daf/butler/core/_dataset_ref.py b/python/lsst/daf/butler/core/_dataset_ref.py index 831cd86ab1..1c4a0c363d 100644 --- a/python/lsst/daf/butler/core/_dataset_ref.py +++ b/python/lsst/daf/butler/core/_dataset_ref.py @@ -320,7 +320,7 @@ class DatasetRef: "datasetType", "dataId", "run", - "datastore_records", + "_datastore_records", ) def __init__( @@ -348,7 +348,7 @@ def __init__( .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode) .int ) - self.datastore_records = datastore_records + self._datastore_records = datastore_records @property def id(self) -> DatasetId: @@ -431,9 +431,9 @@ def to_simple(self, minimal: bool = False) -> SerializedDatasetRef: return SerializedDatasetRef(**simple) datastore_records: Mapping[str, _DatastoreRecords] | None = None - if self.datastore_records is not None: + if self._datastore_records is not None: datastore_records = {} - for opaque_name, records in self.datastore_records.items(): + for opaque_name, records in self._datastore_records.items(): class_name, record_dicts = StoredDatastoreItemInfo.to_records(records) datastore_records[opaque_name] = class_name, list(record_dicts) @@ -578,7 +578,7 @@ def _unpickle( def __reduce__(self) -> tuple: return ( self._unpickle, - (self.datasetType, self.dataId, self.id, self.run, self.datastore_records), + (self.datasetType, self.dataId, self.id, self.run, self._datastore_records), ) def __deepcopy__(self, memo: dict) -> DatasetRef: @@ -607,7 +607,7 @@ def expanded(self, dataId: DataCoordinate) -> DatasetRef: id=self.id, run=self.run, conform=False, - datastore_records=self.datastore_records, + datastore_records=self._datastore_records, ) def isComponent(self) -> bool: @@ -722,7 +722,7 @@ def makeCompositeRef(self) -> DatasetRef: id=self.id, run=self.run, conform=False, - datastore_records=self.datastore_records, + datastore_records=self._datastore_records, ) def makeComponentRef(self, name: str) -> DatasetRef: @@ -748,7 +748,7 @@ def makeComponentRef(self, name: str) -> DatasetRef: id=self.id, run=self.run, conform=False, - datastore_records=self.datastore_records, + datastore_records=self._datastore_records, ) def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef: @@ -799,7 +799,7 @@ def replace( A new dataset reference with updated attributes. """ if datastore_records is None: - datastore_records = self.datastore_records + datastore_records = self._datastore_records if storage_class is None: datasetType = self.datasetType else: diff --git a/python/lsst/daf/butler/datastores/fileDatastore.py b/python/lsst/daf/butler/datastores/fileDatastore.py index 7fbf9ec43f..e4cef2a57c 100644 --- a/python/lsst/daf/butler/datastores/fileDatastore.py +++ b/python/lsst/daf/butler/datastores/fileDatastore.py @@ -422,8 +422,8 @@ def getStoredItemsInfo(self, ref: DatasetIdRef) -> list[StoredFileInfo]: list if no matching datasets can be found. """ # Try to get them from the ref first. - if ref.datastore_records is not None: - if (ref_records := ref.datastore_records.get(self._table.name)) is not None: + if ref._datastore_records is not None: + if (ref_records := ref._datastore_records.get(self._table.name)) is not None: # Need to make sure they have correct type. for record in ref_records: if not isinstance(record, StoredFileInfo): @@ -493,10 +493,10 @@ def _get_stored_records_associated_with_refs( records_by_ref: defaultdict[DatasetId, list[StoredFileInfo]] = defaultdict(list) refs_with_no_records = [] for ref in refs: - if ref.datastore_records is None: + if ref._datastore_records is None: refs_with_no_records.append(ref) else: - if (ref_records := ref.datastore_records.get(self._table.name)) is not None: + if (ref_records := ref._datastore_records.get(self._table.name)) is not None: # Need to make sure they have correct type. for ref_record in ref_records: if not isinstance(ref_record, StoredFileInfo): diff --git a/python/lsst/daf/butler/registries/sql.py b/python/lsst/daf/butler/registries/sql.py index 1cec58ba20..98ccf7d240 100644 --- a/python/lsst/daf/butler/registries/sql.py +++ b/python/lsst/daf/butler/registries/sql.py @@ -1368,8 +1368,8 @@ def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None: bridge.insert([ref]) # store records in opaque tables - assert ref.datastore_records is not None, "Dataset ref must have datastore records" - for table_name, records in ref.datastore_records.items(): + assert ref._datastore_records is not None, "Dataset ref must have datastore records" + for table_name, records in ref._datastore_records.items(): opaque_table = self._managers.opaque.get(table_name) assert opaque_table is not None, f"Unexpected opaque table name {table_name}" opaque_table.insert(*(record.to_record(dataset_id=ref.id) for record in records)) diff --git a/python/lsst/daf/butler/registry/interfaces/_bridge.py b/python/lsst/daf/butler/registry/interfaces/_bridge.py index 653f03584f..62bc4b2465 100644 --- a/python/lsst/daf/butler/registry/interfaces/_bridge.py +++ b/python/lsst/daf/butler/registry/interfaces/_bridge.py @@ -90,7 +90,7 @@ def datasetType(self) -> DatasetType: raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") @property - def datastore_records(self) -> DatasetDatastoreRecords | None: + def _datastore_records(self) -> DatasetDatastoreRecords | None: raise AttributeError("A FakeDatasetRef can not be associated with datastore records") diff --git a/tests/test_datasets.py b/tests/test_datasets.py index d090cd4b5a..ae37641c2f 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -708,8 +708,8 @@ def testJson(self) -> None: s = ref.to_json() ref2 = DatasetRef.from_json(s, universe=self.universe) self.assertEqual(ref2, ref) - self.assertIsNotNone(ref2.datastore_records) - self.assertEqual(ref2.datastore_records, ref.datastore_records) + self.assertIsNotNone(ref2._datastore_records) + self.assertEqual(ref2._datastore_records, ref._datastore_records) def testFileDataset(self) -> None: ref = DatasetRef(self.datasetType, self.dataId, run="somerun")