From bd1e7063f34b574c92f40a00bf902226d52c13e0 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Wed, 1 Nov 2023 12:36:03 -0700 Subject: [PATCH 1/5] Add kubernetes health check endpoint to server --- pyproject.toml | 4 ++++ .../butler/remote_butler/server/_server.py | 21 +++++++++++++++++++ requirements.txt | 1 + tests/test_server.py | 5 +++++ 4 files changed, 31 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index f65b7decb2..3a1e9417b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,10 @@ dynamic = ["version"] [project.optional-dependencies] postgres = ["psycopg2"] +server = [ + "fastapi", + "safir >= 3.4.0" +] test = [ "pytest >= 3.2", "pytest-openfiles >= 0.5.0", diff --git a/python/lsst/daf/butler/remote_butler/server/_server.py b/python/lsst/daf/butler/remote_butler/server/_server.py index 981130d9a8..9cb564c500 100644 --- a/python/lsst/daf/butler/remote_butler/server/_server.py +++ b/python/lsst/daf/butler/remote_butler/server/_server.py @@ -45,6 +45,7 @@ SerializedDatasetRef, SerializedDatasetType, ) +from safir.metadata import Metadata, get_metadata from ._config import get_config_from_env from ._factory import Factory @@ -97,6 +98,26 @@ def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> D return DataCoordinate.from_simple(data_id, registry=butler.registry) +@app.get( + "/", + description=( + "Return metadata about the running application. Can also be used as" + " a health check. This route is not exposed outside the cluster and" + " therefore cannot be used by external clients." + ), + include_in_schema=False, + response_model=Metadata, + response_model_exclude_none=True, + summary="Application metadata", +) +async def get_index() -> Metadata: + """GET ``/`` (the app's internal root). + + By convention, this endpoint returns only the application's metadata. + """ + return get_metadata(package_name="lsst.daf.butler", application_name="butler") + + @app.get("/butler/v1/universe", response_model=dict[str, Any]) def get_dimension_universe(factory: Factory = Depends(factory_dependency)) -> dict[str, Any]: """Allow remote client to get dimensions definition.""" diff --git a/requirements.txt b/requirements.txt index 011a295ad6..acc5f03b28 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,3 +21,4 @@ pyarrow >= 0.16 responses >= 0.12.0 urllib3 >= 1.25.10 fastapi +safir >= 3.4.0 diff --git a/tests/test_server.py b/tests/test_server.py index a7661023fd..34476d2706 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -93,6 +93,11 @@ def tearDownClass(cls): del app.dependency_overrides[factory_dependency] removeTestTempDir(cls.root) + def test_health_check(self): + response = self.client.get("/") + self.assertEqual(response.status_code, 200) + self.assertEqual(response.json()["name"], "butler") + def test_simple(self): response = self.client.get("/butler/v1/universe") self.assertEqual(response.status_code, 200) From f50b632147b97f52408faf0064aef435e0748af7 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Thu, 2 Nov 2023 12:02:28 -0700 Subject: [PATCH 2/5] Provide a Butler configuration from the server --- .../butler/remote_butler/_remote_butler.py | 11 ++++++++ .../butler/remote_butler/server/_server.py | 24 +++++++++++++++++ tests/test_server.py | 26 +++++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 20cff12322..8351f429f4 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -33,6 +33,7 @@ import httpx from lsst.daf.butler import __version__ +from lsst.daf.butler.repo_relocation import replaceRoot from lsst.resources import ResourcePath, ResourcePathExpression from lsst.utils.introspection import get_full_type_name @@ -72,6 +73,16 @@ def __init__( **kwargs: Any, ): butler_config = ButlerConfig(config, searchPaths, without_datastore=True) + # There is a convention in Butler config files where in a + # configuration option refers to the directory containing the + # configuration file. We allow this for the remote butler's URL so + # that the server doesn't have to know which hostname it is being + # accessed from + server_url_key = ("remote_butler", "url") + if server_url_key in butler_config: + butler_config[server_url_key] = replaceRoot( + butler_config[server_url_key], butler_config.configDir + ) self._config = RemoteButlerConfigModel.model_validate(butler_config) self._dimensions: DimensionUniverse | None = None # TODO: RegistryDefaults should have finish() called on it, but this diff --git a/python/lsst/daf/butler/remote_butler/server/_server.py b/python/lsst/daf/butler/remote_butler/server/_server.py index 9cb564c500..7ddb358292 100644 --- a/python/lsst/daf/butler/remote_butler/server/_server.py +++ b/python/lsst/daf/butler/remote_butler/server/_server.py @@ -118,6 +118,30 @@ async def get_index() -> Metadata: return get_metadata(package_name="lsst.daf.butler", application_name="butler") +@app.get( + "/butler/butler.yaml", + description=( + "Returns a Butler YAML configuration file that can be used to instantiate a Butler client" + " pointing at this server" + ), + summary="Client configuration file", + response_model=dict[str, Any], +) +@app.get( + "/butler/butler.json", + description=( + "Returns a Butler JSON configuration file that can be used to instantiate a Butler client" + " pointing at this server" + ), + summary="Client configuration file", + response_model=dict[str, Any], +) +async def get_client_config() -> dict[str, Any]: + # We can return JSON data for both the YAML and JSON case because all JSON + # files are parseable as YAML. + return {"cls": "lsst.daf.butler.remote_butler.RemoteButler", "remote_butler": {"url": ""}} + + @app.get("/butler/v1/universe", response_model=dict[str, Any]) def get_dimension_universe(factory: Factory = Depends(factory_dependency)) -> dict[str, Any]: """Allow remote client to get dimensions definition.""" diff --git a/tests/test_server.py b/tests/test_server.py index 34476d2706..2b0fe69350 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -38,9 +38,12 @@ TestClient = None app = None +from unittest.mock import patch + from lsst.daf.butler import Butler, DataCoordinate, DatasetRef, MissingDatasetTypeError, StorageClassFactory from lsst.daf.butler.tests import DatastoreMock from lsst.daf.butler.tests.utils import MetricTestRepo, makeTestTempDir, removeTestTempDir +from lsst.resources.http import HttpResourcePath TESTDIR = os.path.abspath(os.path.dirname(__file__)) @@ -169,6 +172,29 @@ def test_find_dataset(self): self.assertIsNone(self.butler.get_dataset(uuid.uuid4())) self.assertIsNone(self.butler.get_dataset(uuid.uuid4(), storage_class="NumpyArray")) + def test_instantiate_via_butler_http_search(self): + """Ensure that the primary Butler constructor's automatic search logic + correctly locates and reads the configuration file and ends up with a + RemoteButler pointing to the correct URL + """ + + # This is kind of a fragile test. Butler's search logic does a lot of + # manipulations involving creating new ResourcePaths, and ResourcePath + # doesn't use httpx so we can't easily inject the TestClient in there. + # We don't have an actual valid HTTP URL to give to the constructor + # because the test instance of the server is accessed via ASGI. + # + # Instead we just monkeypatch the HTTPResourcePath 'read' method and + # hope that all ResourcePath HTTP reads during construction are going + # to the server under test. + def override_read(http_resource_path): + return self.client.get(http_resource_path.geturl()).content + + with patch.object(HttpResourcePath, "read", override_read): + butler = Butler("https://test.example/butler") + assert isinstance(butler, RemoteButler) + assert str(butler._config.remote_butler.url) == "https://test.example/butler/" + if __name__ == "__main__": unittest.main() From 7716c35a78a9514a7ad7dd14d915366d6505b4ab Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Thu, 2 Nov 2023 12:05:42 -0700 Subject: [PATCH 3/5] Add gafaelfawr authentication to the client --- .../butler/remote_butler/_authentication.py | 98 +++++++++++++++++++ .../butler/remote_butler/_remote_butler.py | 13 ++- tests/test_authentication.py | 57 +++++++++++ 3 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 python/lsst/daf/butler/remote_butler/_authentication.py create mode 100644 tests/test_authentication.py diff --git a/python/lsst/daf/butler/remote_butler/_authentication.py b/python/lsst/daf/butler/remote_butler/_authentication.py new file mode 100644 index 0000000000..874519636b --- /dev/null +++ b/python/lsst/daf/butler/remote_butler/_authentication.py @@ -0,0 +1,98 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +__all__ = () + +import os +from fnmatch import fnmatchcase +from urllib.parse import urlparse + +_SERVER_WHITELIST = ["*.lsst.cloud"] +_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY = "BUTLER_RUBIN_ACCESS_TOKEN" +_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY = "ACCESS_TOKEN" + + +def get_authentication_token_from_environment(server_url: str) -> str | None: + """Search the environment for a Rubin Science Platform access token. + + The token may come from the following sources in this order: + + 1. The ``BUTLER_RUBIN_ACCESS_TOKEN`` environment variable. + This environment variable is meant primarily for development use, + running outside the Rubin Science Platform. This token will be sent + to EVERY server that we connect to, so be careful when connecting to + untrusted servers. + 2. The ``ACCESS_TOKEN`` environment variable. + This environment variable is provided by the Rubin Science Platform + Jupyter notebooks. It will only be returned if the given ``server_url`` + is in a whitelist of servers known to belong to the Rubin Science + Platform. Because this is a long-lived token that can be used to + impersonate the user with their full access rights, it should not be + sent to untrusted servers. + + Parameters + ---------- + server_url : `str` + URL of the Butler server that the caller intends to connect to. + + Returns + ------- + access_token: `str` or `None` + A Rubin Science Platform access token, or `None` if no token was + configured in the environment. + """ + explicit_butler_token = os.getenv(_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY) + if explicit_butler_token: + return explicit_butler_token + + hostname = urlparse(server_url.lower()).hostname + hostname_in_whitelist = any( + (hostname and fnmatchcase(hostname, pattern) for pattern in _SERVER_WHITELIST) + ) + notebook_token = os.getenv(_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY) + if hostname_in_whitelist and notebook_token: + return notebook_token + + return None + + +def get_authentication_headers(access_token: str) -> dict[str, str]: + """Return HTTP headers required for authenticating the user via Rubin + Science Platform's Gafaelfawr service. + + Parameters + ---------- + access_token : `str` + Rubin Science Platform access token. + + Returns + ------- + header_map : `dict` [`str`, `str`] + HTTP header names and values as a mapping from name to value. + """ + # Access tokens are opaque bearer tokens. See https://sqr-069.lsst.io/ + return {"Authorization": f"Bearer {access_token}"} diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 8351f429f4..ccb79768ad 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -53,6 +53,7 @@ from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, Registry, RegistryDefaults from ..registry.wildcards import CollectionWildcard from ..transfers import RepoExportContext +from ._authentication import get_authentication_headers, get_authentication_token_from_environment from ._config import RemoteButlerConfigModel from .server import FindDatasetModel @@ -70,6 +71,7 @@ def __init__( inferDefaults: bool = True, # Parameters unique to RemoteButler http_client: httpx.Client | None = None, + access_token: str | None = None, **kwargs: Any, ): butler_config = ButlerConfig(config, searchPaths, without_datastore=True) @@ -84,6 +86,7 @@ def __init__( butler_config[server_url_key], butler_config.configDir ) self._config = RemoteButlerConfigModel.model_validate(butler_config) + self._dimensions: DimensionUniverse | None = None # TODO: RegistryDefaults should have finish() called on it, but this # requires getCollectionSummary() which is not yet implemented @@ -94,8 +97,16 @@ def __init__( # This is generally done for testing. self._client = http_client else: + server_url = str(self._config.remote_butler.url) + auth_headers = {} + if access_token is None: + access_token = get_authentication_token_from_environment(server_url) + if access_token is not None: + auth_headers = get_authentication_headers(access_token) + headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"} - self._client = httpx.Client(headers=headers, base_url=str(self._config.remote_butler.url)) + headers.update(auth_headers) + self._client = httpx.Client(headers=headers, base_url=server_url) def isWriteable(self) -> bool: # Docstring inherited. diff --git a/tests/test_authentication.py b/tests/test_authentication.py new file mode 100644 index 0000000000..77e5cc33a9 --- /dev/null +++ b/tests/test_authentication.py @@ -0,0 +1,57 @@ +import os +import unittest +from contextlib import contextmanager +from unittest.mock import patch + +try: + from lsst.daf.butler.remote_butler import RemoteButler + from lsst.daf.butler.remote_butler._authentication import ( + _EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY, + _RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY, + get_authentication_headers, + get_authentication_token_from_environment, + ) +except ImportError: + RemoteButler = None + + +@contextmanager +def _mock_env(new_environment): + with patch.dict(os.environ, new_environment, clear=True): + yield + + +@unittest.skipIf( + RemoteButler is None, "RemoteButler could not be imported, optional dependencies may not be installed" +) +class TestButlerClientAuthentication(unittest.TestCase): + """Test access-token logic""" + + def test_explicit_butler_token(self): + with _mock_env( + { + _EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY: "token1", + _RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY: "not-this-token", + } + ): + token = get_authentication_token_from_environment("https://untrustedserver.com") + self.assertEqual(token, "token1") + + def test_jupyter_token_with_safe_server(self): + with _mock_env({_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY: "token2"}): + token = get_authentication_token_from_environment("https://data.LSST.cloud/butler") + self.assertEqual(token, "token2") + + def test_jupyter_token_with_unsafe_server(self): + with _mock_env({_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY: "token2"}): + token = get_authentication_token_from_environment("https://untrustedserver.com/butler") + self.assertIsNone(token) + + def test_missing_token(self): + with _mock_env({}): + token = get_authentication_token_from_environment("https://data.lsst.cloud/butler") + self.assertIsNone(token) + + def test_header_generation(self): + headers = get_authentication_headers("tokendata") + self.assertEqual(headers, {"Authorization": "Bearer tokendata"}) From 0fda5f6cc177c507ecd3df2be255fa0ae5b868ed Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Thu, 2 Nov 2023 12:11:22 -0700 Subject: [PATCH 4/5] Make "/butler" URL prefix variable in client For phalanx deployments, all butler servers will have the same hostname but live under different paths. --- .../daf/butler/remote_butler/_remote_butler.py | 14 ++++++-------- tests/test_server.py | 1 + 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index ccb79768ad..841735a28b 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -77,9 +77,9 @@ def __init__( butler_config = ButlerConfig(config, searchPaths, without_datastore=True) # There is a convention in Butler config files where in a # configuration option refers to the directory containing the - # configuration file. We allow this for the remote butler's URL so + # configuration file. We allow this for the remote butler's URL so # that the server doesn't have to know which hostname it is being - # accessed from + # accessed from. server_url_key = ("remote_butler", "url") if server_url_key in butler_config: butler_config[server_url_key] = replaceRoot( @@ -442,20 +442,18 @@ def pruneDatasets( raise NotImplementedError() def _get_url(self, path: str, version: str = "v1") -> str: - """Form the complete path to an endpoint on the server + """Form the complete path to an endpoint on the server. Parameters ---------- path : `str` - The relative path to the server endpoint. Should not include the - "/butler" prefix. + The relative path to the server endpoint. version : `str`, optional Version string to prepend to path. Defaults to "v1". Returns ------- path : `str` - The full path to the endpoint + The full path to the endpoint. """ - prefix = "butler" - return f"{prefix}/{version}/{path}" + return f"{version}/{path}" diff --git a/tests/test_server.py b/tests/test_server.py index 2b0fe69350..2adc161aec 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -85,6 +85,7 @@ def create_factory_dependency(): # Set up the RemoteButler that will connect to the server cls.client = TestClient(app) + cls.client.base_url = "http://text.example/butler/" cls.butler = _make_remote_butler(cls.client) # Populate the test server. From c50bf28a10b626bd0d9e4ffa2372bf20fce02e8c Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Fri, 3 Nov 2023 14:47:37 -0700 Subject: [PATCH 5/5] Change path prefix for butler server For the purposes of the RSP, Butler is considered part of the "API Aspect", so the path to it needs to start with /api/. Handlers were re-organized to be grouped under an APIRouter, since this prefix will need to be configurable in the future. --- .../remote_butler/server/_dependencies.py | 43 +++++ .../butler/remote_butler/server/_server.py | 147 +---------------- .../server/handlers/_external.py | 154 ++++++++++++++++++ tests/test_server.py | 11 +- 4 files changed, 210 insertions(+), 145 deletions(-) create mode 100644 python/lsst/daf/butler/remote_butler/server/_dependencies.py create mode 100644 python/lsst/daf/butler/remote_butler/server/handlers/_external.py diff --git a/python/lsst/daf/butler/remote_butler/server/_dependencies.py b/python/lsst/daf/butler/remote_butler/server/_dependencies.py new file mode 100644 index 0000000000..320654f9e7 --- /dev/null +++ b/python/lsst/daf/butler/remote_butler/server/_dependencies.py @@ -0,0 +1,43 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from functools import cache + +from lsst.daf.butler import Butler + +from ._config import get_config_from_env +from ._factory import Factory + + +@cache +def _make_global_butler() -> Butler: + config = get_config_from_env() + return Butler.from_config(config.config_uri) + + +def factory_dependency() -> Factory: + return Factory(butler=_make_global_butler()) diff --git a/python/lsst/daf/butler/remote_butler/server/_server.py b/python/lsst/daf/butler/remote_butler/server/_server.py index 7ddb358292..d2231d0d52 100644 --- a/python/lsst/daf/butler/remote_butler/server/_server.py +++ b/python/lsst/daf/butler/remote_butler/server/_server.py @@ -27,34 +27,25 @@ from __future__ import annotations -__all__ = ("app", "factory_dependency") +__all__ = ("app",) import logging -import uuid -from functools import cache -from typing import Any -from fastapi import Depends, FastAPI, Request +from fastapi import FastAPI, Request from fastapi.middleware.gzip import GZipMiddleware from fastapi.responses import JSONResponse -from lsst.daf.butler import ( - Butler, - DataCoordinate, - MissingDatasetTypeError, - SerializedDataCoordinate, - SerializedDatasetRef, - SerializedDatasetType, -) +from lsst.daf.butler import Butler, DataCoordinate, MissingDatasetTypeError, SerializedDataCoordinate from safir.metadata import Metadata, get_metadata -from ._config import get_config_from_env -from ._factory import Factory -from ._server_models import FindDatasetModel +from .handlers._external import external_router + +_DEFAULT_API_PATH = "/api/butler" log = logging.getLogger(__name__) app = FastAPI() app.add_middleware(GZipMiddleware, minimum_size=1000) +app.include_router(external_router, prefix=_DEFAULT_API_PATH) @app.exception_handler(MissingDatasetTypeError) @@ -68,16 +59,6 @@ def missing_dataset_type_exception_handler(request: Request, exc: MissingDataset ) -@cache -def _make_global_butler() -> Butler: - config = get_config_from_env() - return Butler.from_config(config.config_uri) - - -def factory_dependency() -> Factory: - return Factory(butler=_make_global_butler()) - - def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None: """Convert the serialized dataId back to full DataCoordinate. @@ -116,117 +97,3 @@ async def get_index() -> Metadata: By convention, this endpoint returns only the application's metadata. """ return get_metadata(package_name="lsst.daf.butler", application_name="butler") - - -@app.get( - "/butler/butler.yaml", - description=( - "Returns a Butler YAML configuration file that can be used to instantiate a Butler client" - " pointing at this server" - ), - summary="Client configuration file", - response_model=dict[str, Any], -) -@app.get( - "/butler/butler.json", - description=( - "Returns a Butler JSON configuration file that can be used to instantiate a Butler client" - " pointing at this server" - ), - summary="Client configuration file", - response_model=dict[str, Any], -) -async def get_client_config() -> dict[str, Any]: - # We can return JSON data for both the YAML and JSON case because all JSON - # files are parseable as YAML. - return {"cls": "lsst.daf.butler.remote_butler.RemoteButler", "remote_butler": {"url": ""}} - - -@app.get("/butler/v1/universe", response_model=dict[str, Any]) -def get_dimension_universe(factory: Factory = Depends(factory_dependency)) -> dict[str, Any]: - """Allow remote client to get dimensions definition.""" - butler = factory.create_butler() - return butler.dimensions.dimensionConfig.toDict() - - -@app.get( - "/butler/v1/dataset_type/{dataset_type_name}", - summary="Retrieve this dataset type definition.", - response_model=SerializedDatasetType, - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def get_dataset_type( - dataset_type_name: str, factory: Factory = Depends(factory_dependency) -) -> SerializedDatasetType: - """Return the dataset type.""" - butler = factory.create_butler() - datasetType = butler.get_dataset_type(dataset_type_name) - return datasetType.to_simple() - - -@app.get( - "/butler/v1/dataset/{id}", - summary="Retrieve this dataset definition.", - response_model=SerializedDatasetRef | None, - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def get_dataset( - id: uuid.UUID, - storage_class: str | None = None, - dimension_records: bool = False, - datastore_records: bool = False, - factory: Factory = Depends(factory_dependency), -) -> SerializedDatasetRef | None: - """Return a single dataset reference.""" - butler = factory.create_butler() - ref = butler.get_dataset( - id, - storage_class=storage_class, - dimension_records=dimension_records, - datastore_records=datastore_records, - ) - if ref is not None: - return ref.to_simple() - # This could raise a 404 since id is not found. The standard implementation - # get_dataset method returns without error so follow that example here. - return ref - - -# Not yet supported: TimeSpan is not yet a pydantic model. -# collections parameter assumes client-side has resolved regexes. -@app.post( - "/butler/v1/find_dataset/{dataset_type}", - summary="Retrieve this dataset definition from collection, dataset type, and dataId", - response_model=SerializedDatasetRef, - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def find_dataset( - dataset_type: str, - query: FindDatasetModel, - factory: Factory = Depends(factory_dependency), -) -> SerializedDatasetRef | None: - collection_query = query.collections if query.collections else None - - # Get the simple dict from the SerializedDataCoordinate. We do not know - # if it is a well-defined DataCoordinate or needs some massaging first. - # find_dataset will use dimension record queries if necessary. - data_id = query.data_id.dataId - - butler = factory.create_butler() - ref = butler.find_dataset( - dataset_type, - None, - collections=collection_query, - storage_class=query.storage_class, - timespan=None, - dimension_records=query.dimension_records, - datastore_records=query.datastore_records, - **data_id, - ) - return ref.to_simple() if ref else None diff --git a/python/lsst/daf/butler/remote_butler/server/handlers/_external.py b/python/lsst/daf/butler/remote_butler/server/handlers/_external.py new file mode 100644 index 0000000000..9f70564b4d --- /dev/null +++ b/python/lsst/daf/butler/remote_butler/server/handlers/_external.py @@ -0,0 +1,154 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +__all__ = () + +import uuid +from typing import Any + +from fastapi import APIRouter, Depends +from lsst.daf.butler import SerializedDatasetRef, SerializedDatasetType + +from .._dependencies import factory_dependency +from .._factory import Factory +from .._server_models import FindDatasetModel + +external_router = APIRouter() + + +@external_router.get( + "/butler.yaml", + description=( + "Returns a Butler YAML configuration file that can be used to instantiate a Butler client" + " pointing at this server" + ), + summary="Client configuration file", + response_model=dict[str, Any], +) +@external_router.get( + "/butler.json", + description=( + "Returns a Butler JSON configuration file that can be used to instantiate a Butler client" + " pointing at this server" + ), + summary="Client configuration file", + response_model=dict[str, Any], +) +async def get_client_config() -> dict[str, Any]: + # We can return JSON data for both the YAML and JSON case because all JSON + # files are parseable as YAML. + return {"cls": "lsst.daf.butler.remote_butler.RemoteButler", "remote_butler": {"url": ""}} + + +@external_router.get("/v1/universe", response_model=dict[str, Any]) +def get_dimension_universe(factory: Factory = Depends(factory_dependency)) -> dict[str, Any]: + """Allow remote client to get dimensions definition.""" + butler = factory.create_butler() + return butler.dimensions.dimensionConfig.toDict() + + +@external_router.get( + "/v1/dataset_type/{dataset_type_name}", + summary="Retrieve this dataset type definition.", + response_model=SerializedDatasetType, + response_model_exclude_unset=True, + response_model_exclude_defaults=True, + response_model_exclude_none=True, +) +def get_dataset_type( + dataset_type_name: str, factory: Factory = Depends(factory_dependency) +) -> SerializedDatasetType: + """Return the dataset type.""" + butler = factory.create_butler() + datasetType = butler.get_dataset_type(dataset_type_name) + return datasetType.to_simple() + + +@external_router.get( + "/v1/dataset/{id}", + summary="Retrieve this dataset definition.", + response_model=SerializedDatasetRef | None, + response_model_exclude_unset=True, + response_model_exclude_defaults=True, + response_model_exclude_none=True, +) +def get_dataset( + id: uuid.UUID, + storage_class: str | None = None, + dimension_records: bool = False, + datastore_records: bool = False, + factory: Factory = Depends(factory_dependency), +) -> SerializedDatasetRef | None: + """Return a single dataset reference.""" + butler = factory.create_butler() + ref = butler.get_dataset( + id, + storage_class=storage_class, + dimension_records=dimension_records, + datastore_records=datastore_records, + ) + if ref is not None: + return ref.to_simple() + # This could raise a 404 since id is not found. The standard implementation + # get_dataset method returns without error so follow that example here. + return ref + + +# Not yet supported: TimeSpan is not yet a pydantic model. +# collections parameter assumes client-side has resolved regexes. +@external_router.post( + "/v1/find_dataset/{dataset_type}", + summary="Retrieve this dataset definition from collection, dataset type, and dataId", + response_model=SerializedDatasetRef, + response_model_exclude_unset=True, + response_model_exclude_defaults=True, + response_model_exclude_none=True, +) +def find_dataset( + dataset_type: str, + query: FindDatasetModel, + factory: Factory = Depends(factory_dependency), +) -> SerializedDatasetRef | None: + collection_query = query.collections if query.collections else None + + # Get the simple dict from the SerializedDataCoordinate. We do not know + # if it is a well-defined DataCoordinate or needs some massaging first. + # find_dataset will use dimension record queries if necessary. + data_id = query.data_id.dataId + + butler = factory.create_butler() + ref = butler.find_dataset( + dataset_type, + None, + collections=collection_query, + storage_class=query.storage_class, + timespan=None, + dimension_records=query.dimension_records, + datastore_records=query.datastore_records, + **data_id, + ) + return ref.to_simple() if ref else None diff --git a/tests/test_server.py b/tests/test_server.py index 2adc161aec..d686227b3c 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -33,7 +33,8 @@ # Failing to import any of these should disable the tests. from fastapi.testclient import TestClient from lsst.daf.butler.remote_butler import RemoteButler - from lsst.daf.butler.remote_butler.server import Factory, app, factory_dependency + from lsst.daf.butler.remote_butler.server import Factory, app + from lsst.daf.butler.remote_butler.server._dependencies import factory_dependency except ImportError: TestClient = None app = None @@ -85,7 +86,7 @@ def create_factory_dependency(): # Set up the RemoteButler that will connect to the server cls.client = TestClient(app) - cls.client.base_url = "http://text.example/butler/" + cls.client.base_url = "http://test.example/api/butler/" cls.butler = _make_remote_butler(cls.client) # Populate the test server. @@ -103,7 +104,7 @@ def test_health_check(self): self.assertEqual(response.json()["name"], "butler") def test_simple(self): - response = self.client.get("/butler/v1/universe") + response = self.client.get("/api/butler/v1/universe") self.assertEqual(response.status_code, 200) self.assertIn("namespace", response.json()) @@ -192,9 +193,9 @@ def override_read(http_resource_path): return self.client.get(http_resource_path.geturl()).content with patch.object(HttpResourcePath, "read", override_read): - butler = Butler("https://test.example/butler") + butler = Butler("https://test.example/api/butler") assert isinstance(butler, RemoteButler) - assert str(butler._config.remote_butler.url) == "https://test.example/butler/" + assert str(butler._config.remote_butler.url) == "https://test.example/api/butler/" if __name__ == "__main__":