lsst · dhirving · Dec 8, 2023 · Nov 27, 2023 · Nov 27, 2023 · Nov 29, 2023
diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
@@ -814,7 +814,8 @@ def get_dataset_type(self, name: str) -> DatasetType:
     def get_dataset(
         self,
         id: DatasetId,
-        storage_class: str | StorageClass | None,
+        *,
+        storage_class: str | StorageClass | None = None,
         dimension_records: bool = False,
         datastore_records: bool = False,
     ) -> DatasetRef | None:

diff --git a/python/lsst/daf/butler/datastore/_datastore.py b/python/lsst/daf/butler/datastore/_datastore.py
@@ -540,6 +540,23 @@ def get(
         """
         raise NotImplementedError("Must be implemented by subclass")
 
+    def prepare_get_for_external_client(self, ref: DatasetRef) -> object:
+        """Retrieve serializable data that can be used to execute a get()
+
+        Parameters
+        ----------
+        ref : `DatasetRef`
+            Reference to the required dataset.
+
+        Returns
+        -------
+        payload : `object`
+            Serializable payload containing the information needed to perform a
+            get() operation.  This payload may be sent over the wire to another
+            system to perform the get().
+        """
+        raise NotImplementedError()
+
     @abstractmethod
     def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
         """Write a `InMemoryDataset` with a given `DatasetRef` to the store.

diff --git a/python/lsst/daf/butler/datastore/cache_manager.py b/python/lsst/daf/butler/datastore/cache_manager.py
@@ -1015,7 +1015,11 @@ class DatastoreDisabledCacheManager(AbstractDatastoreCacheManager):
         in lookup keys.
     """
 
-    def __init__(self, config: str | DatastoreCacheManagerConfig, universe: DimensionUniverse):
+    def __init__(
+        self,
+        config: str | DatastoreCacheManagerConfig | None = None,
+        universe: DimensionUniverse | None = None,
+    ):
         return
 
     def should_be_cached(self, entity: DatasetRef | DatasetType | StorageClass) -> bool:

diff --git a/python/lsst/daf/butler/datastore/generic_base.py b/python/lsst/daf/butler/datastore/generic_base.py
@@ -29,7 +29,7 @@
 
 from __future__ import annotations
 
-__all__ = ("GenericBaseDatastore",)
+__all__ = ("GenericBaseDatastore", "post_process_get")
 
 import logging
 from collections.abc import Mapping
@@ -54,54 +54,6 @@ class GenericBaseDatastore(Datastore, Generic[_InfoType]):
     Should always be sub-classed since key abstract methods are missing.
     """
 
-    def _post_process_get(
-        self,
-        inMemoryDataset: object,
-        readStorageClass: StorageClass,
-        assemblerParams: Mapping[str, Any] | None = None,
-        isComponent: bool = False,
-    ) -> object:
-        """Given the Python object read from the datastore, manipulate
-        it based on the supplied parameters and ensure the Python
-        type is correct.
-
-        Parameters
-        ----------
-        inMemoryDataset : `object`
-            Dataset to check.
-        readStorageClass: `StorageClass`
-            The `StorageClass` used to obtain the assembler and to
-            check the python type.
-        assemblerParams : `dict`, optional
-            Parameters to pass to the assembler.  Can be `None`.
-        isComponent : `bool`, optional
-            If this is a component, allow the inMemoryDataset to be `None`.
-
-        Returns
-        -------
-        dataset : `object`
-            In-memory dataset, potentially converted to expected type.
-        """
-        # Process any left over parameters
-        if assemblerParams:
-            inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams)
-
-        # Validate the returned data type matches the expected data type
-        pytype = readStorageClass.pytype
-
-        allowedTypes = []
-        if pytype:
-            allowedTypes.append(pytype)
-
-        # Special case components to allow them to be None
-        if isComponent:
-            allowedTypes.append(type(None))
-
-        if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)):
-            inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset)
-
-        return inMemoryDataset
-
     def _validate_put_parameters(self, inMemoryDataset: object, ref: DatasetRef) -> None:
         """Validate the supplied arguments for put.
 
@@ -173,3 +125,51 @@ def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
         assert inputDatastore is not self  # unless we want it for renames?
         inMemoryDataset = inputDatastore.get(ref)
         return self.put(inMemoryDataset, ref)
+
+
+def post_process_get(
+    inMemoryDataset: object,
+    readStorageClass: StorageClass,
+    assemblerParams: Mapping[str, Any] | None = None,
+    isComponent: bool = False,
+) -> object:
+    """Given the Python object read from the datastore, manipulate
+    it based on the supplied parameters and ensure the Python
+    type is correct.
+
+    Parameters
+    ----------
+    inMemoryDataset : `object`
+        Dataset to check.
+    readStorageClass: `StorageClass`
+        The `StorageClass` used to obtain the assembler and to
+        check the python type.
+    assemblerParams : `dict`, optional
+        Parameters to pass to the assembler.  Can be `None`.
+    isComponent : `bool`, optional
+        If this is a component, allow the inMemoryDataset to be `None`.
+
+    Returns
+    -------
+    dataset : `object`
+        In-memory dataset, potentially converted to expected type.
+    """
+    # Process any left over parameters
+    if assemblerParams:
+        inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams)
+
+    # Validate the returned data type matches the expected data type
+    pytype = readStorageClass.pytype
+
+    allowedTypes = []
+    if pytype:
+        allowedTypes.append(pytype)
+
+    # Special case components to allow them to be None
+    if isComponent:
+        allowedTypes.append(type(None))
+
+    if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)):
+        inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset)
+
+    return inMemoryDataset
diff --git a/python/lsst/daf/butler/datastore/stored_file_info.py b/python/lsst/daf/butler/datastore/stored_file_info.py
@@ -27,13 +27,14 @@
 
 from __future__ import annotations
 
-__all__ = ("StoredDatastoreItemInfo", "StoredFileInfo")
+__all__ = ("StoredDatastoreItemInfo", "StoredFileInfo", "SerializedStoredFileInfo")
 
 import inspect
 from collections.abc import Iterable, Mapping
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
 
+from lsst.daf.butler._compat import _BaseModelCompat
 from lsst.resources import ResourcePath
 from lsst.utils import doImportType
 from lsst.utils.introspection import get_full_type_name
@@ -214,7 +215,7 @@ def __init__(
     """StorageClass associated with Dataset."""
 
     component: str | None
-    """Component associated with this file. Can be None if the file does
+    """Component associated with this file. Can be `None` if the file does
     not refer to a component of a composite."""
 
     checksum: str | None
@@ -260,6 +261,13 @@ def to_record(self, **kwargs: Any) -> dict[str, Any]:
             **kwargs,
         )
 
+    def to_simple(self) -> SerializedStoredFileInfo:
+        record = self.to_record()
+        # We allow None on the model but the record contains a "null string"
+        # instead
+        record["component"] = self.component
+        return SerializedStoredFileInfo.model_validate(record)
+
     def file_location(self, factory: LocationFactory) -> Location:
         """Return the location of artifact.
 
@@ -307,6 +315,10 @@ def from_record(cls: type[StoredFileInfo], record: Mapping[str, Any]) -> StoredF
         )
         return info
 
+    @classmethod
+    def from_simple(cls: type[StoredFileInfo], model: SerializedStoredFileInfo) -> StoredFileInfo:
+        return cls.from_record(dict(model))
+
     def update(self, **kwargs: Any) -> StoredFileInfo:
         new_args = {}
         for k in self.__slots__:
@@ -320,3 +332,26 @@ def update(self, **kwargs: Any) -> StoredFileInfo:
 
     def __reduce__(self) -> str | tuple[Any, ...]:
         return (self.from_record, (self.to_record(),))
+
+
+class SerializedStoredFileInfo(_BaseModelCompat):
+    """Serialized representation of `StoredFileInfo` properties"""
+
+    formatter: str
+    """Fully-qualified name of Formatter."""
+
+    path: str
+    """Path to dataset within Datastore."""
+
+    storage_class: str
+    """Name of the StorageClass associated with Dataset."""
+
+    component: str | None
+    """Component associated with this file. Can be `None` if the file does
+    not refer to a component of a composite."""
+
+    checksum: str | None
+    """Checksum of the serialized dataset."""
+
+    file_size: int
+    """Size of the serialized dataset in bytes."""