Skip to content

Commit

Permalink
Reduce need for schema reflection for registry tables.
Browse files Browse the repository at this point in the history
Static tables do not really need schema verification because we rely
on version numbers in butler_attributes. Verification and reflection
may still be usefule for dynamic tables (tags/calibs) but we now delay
it until the tables are actually used.
  • Loading branch information
andy-slac committed Nov 6, 2023
1 parent 502efab commit f2eeed0
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 37 deletions.
74 changes: 46 additions & 28 deletions python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,36 @@ class MissingDatabaseTableError(RuntimeError):
"""Exception raised when a table is not found in a database."""


class _ExistingTableFactory:
"""Factory for `sqlalchemy.schema.Table` instances that returns already
existing table instance.
"""

def __init__(self, table: sqlalchemy.schema.Table):
self._table = table

def __call__(self) -> sqlalchemy.schema.Table:
return self._table


class _SpecTableFactory:
"""Factory for `sqlalchemy.schema.Table` instances that builds table
instances using provided `ddl.TableSpec` definition and verifies that
table exists in the database.
"""

def __init__(self, db: Database, name: str, spec: ddl.TableSpec):
self._db = db
self._name = name
self._spec = spec

def __call__(self) -> sqlalchemy.schema.Table:
table = self._db.getExistingTable(self._name, self._spec)
if table is None:
raise MissingDatabaseTableError(f"Table {self._name} is missing from database schema.")
return table


class ByDimensionsDatasetRecordStorageManagerBase(DatasetRecordStorageManager):
"""A manager class for datasets that uses one dataset-collection table for
each group of dataset types that share the same dimensions.
Expand Down Expand Up @@ -218,37 +248,24 @@ def refresh(self) -> None:
datasetType = DatasetType(
name, dimensions, row[c.storage_class], isCalibration=(calibTableName is not None)
)
tags = self._db.getExistingTable(
row[c.tag_association_table],
makeTagTableSpec(datasetType, type(self._collections), self.getIdColumnType()),
)
if tags is None:
raise MissingDatabaseTableError(
f"Table {row[c.tag_association_table]} is missing from database schema."
)
tags_spec = makeTagTableSpec(datasetType, type(self._collections), self.getIdColumnType())
tags_table_factory = _SpecTableFactory(self._db, row[c.tag_association_table], tags_spec)
calibs_table_factory = None
if calibTableName is not None:
calibs = self._db.getExistingTable(
row[c.calibration_association_table],
makeCalibTableSpec(
datasetType,
type(self._collections),
self._db.getTimespanRepresentation(),
self.getIdColumnType(),
),
calibs_spec = makeCalibTableSpec(
datasetType,
type(self._collections),
self._db.getTimespanRepresentation(),
self.getIdColumnType(),
)
if calibs is None:
raise MissingDatabaseTableError(
f"Table {row[c.calibration_association_table]} is missing from database schema."
)
else:
calibs = None
calibs_table_factory = _SpecTableFactory(self._db, calibTableName, calibs_spec)
storage = self._recordStorageType(
db=self._db,
datasetType=datasetType,
static=self._static,
summaries=self._summaries,
tags=tags,
calibs=calibs,
tags_table_factory=tags_table_factory,
calibs_table_factory=calibs_table_factory,
dataset_type_id=row["id"],
collections=self._collections,
use_astropy_ingest_date=self.ingest_date_dtype() is ddl.AstropyTimeNsecTai,
Expand Down Expand Up @@ -302,6 +319,8 @@ def register(self, datasetType: DatasetType) -> tuple[DatasetRecordStorage, bool
tagTableName,
makeTagTableSpec(datasetType, type(self._collections), self.getIdColumnType()),
)
tags_table_factory = _ExistingTableFactory(tags)
calibs_table_factory = None
if calibTableName is not None:
calibs = self._db.ensureTableExists(
calibTableName,
Expand All @@ -312,8 +331,7 @@ def register(self, datasetType: DatasetType) -> tuple[DatasetRecordStorage, bool
self.getIdColumnType(),
),
)
else:
calibs = None
calibs_table_factory = _ExistingTableFactory(calibs)
row, inserted = self._db.sync(
self._static.dataset_type,
keys={"name": datasetType.name},
Expand All @@ -335,8 +353,8 @@ def register(self, datasetType: DatasetType) -> tuple[DatasetRecordStorage, bool
datasetType=datasetType,
static=self._static,
summaries=self._summaries,
tags=tags,
calibs=calibs,
tags_table_factory=tags_table_factory,
calibs_table_factory=calibs_table_factory,
dataset_type_id=row["id"],
collections=self._collections,
use_astropy_ingest_date=self.ingest_date_dtype() is ddl.AstropyTimeNsecTai,
Expand Down
26 changes: 21 additions & 5 deletions python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

__all__ = ("ByDimensionsDatasetRecordStorage",)

from collections.abc import Iterable, Iterator, Sequence, Set
from collections.abc import Callable, Iterable, Iterator, Sequence, Set
from datetime import datetime
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -77,20 +77,36 @@ def __init__(
collections: CollectionManager,
static: StaticDatasetTablesTuple,
summaries: CollectionSummaryManager,
tags: sqlalchemy.schema.Table,
tags_table_factory: Callable[[], sqlalchemy.schema.Table],
use_astropy_ingest_date: bool,
calibs: sqlalchemy.schema.Table | None,
calibs_table_factory: Callable[[], sqlalchemy.schema.Table] | None,
):
super().__init__(datasetType=datasetType)
self._dataset_type_id = dataset_type_id
self._db = db
self._collections = collections
self._static = static
self._summaries = summaries
self._tags = tags
self._calibs = calibs
self._tags_table_factory = tags_table_factory
self._calibs_table_factory = calibs_table_factory
self._runKeyColumn = collections.getRunForeignKeyName()
self._use_astropy = use_astropy_ingest_date
self._tags_table: sqlalchemy.schema.Table | None = None
self._calibs_table: sqlalchemy.schema.Table | None = None

@property
def _tags(self) -> sqlalchemy.schema.Table:
if self._tags_table is None:
self._tags_table = self._tags_table_factory()
return self._tags_table

@property
def _calibs(self) -> sqlalchemy.schema.Table | None:
if self._calibs_table is None:
if self._calibs_table_factory is None:
return None
self._calibs_table = self._calibs_table_factory()
return self._calibs_table

def delete(self, datasets: Iterable[DatasetRef]) -> None:
# Docstring inherited from DatasetRecordStorage.
Expand Down
4 changes: 0 additions & 4 deletions python/lsst/daf/butler/registry/interfaces/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,6 @@ def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
relationships.
"""
name = self._db._mangleTableName(name)
if name in self._tableNames:
_checkExistingTableDefinition(
name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
)
metadata = self._db._metadata
assert metadata is not None, "Guaranteed by context manager that returns this object."
table = self._db._convertTableSpec(name, spec, metadata)
Expand Down

0 comments on commit f2eeed0

Please sign in to comment.