Skip to content

Commit

Permalink
Add gafaelfawr authentication to the client
Browse files Browse the repository at this point in the history
  • Loading branch information
dhirving committed Nov 3, 2023
1 parent f50b632 commit 1ac5b1d
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 2 deletions.
98 changes: 98 additions & 0 deletions python/lsst/daf/butler/remote_butler/_authentication.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ()

import os
from fnmatch import fnmatchcase
from urllib.parse import urlparse

_SERVER_WHITELIST = ["*.lsst.cloud"]
_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY = "BUTLER_RUBIN_ACCESS_TOKEN"
_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY = "ACCESS_TOKEN"


def get_authentication_token_from_environment(server_url: str) -> str | None:
"""Search the environment for a Rubin Science Platform access token.
The token may come from the following sources in this order:
1. The ``BUTLER_RUBIN_ACCESS_TOKEN`` environment variable.
This environment variable is meant primarily for development use,
running outside the Rubin Science Platform. This token will be sent
to EVERY server that we connect to, so be careful when connecting to
untrusted servers.
2. The ``ACCESS_TOKEN"`` environment variable.
This environment variable is provided by the Rubin Science Platform
Jupyter notebooks. It will only be returned if the given ``server_url``
is in a whitelist of servers known to belong to the Rubin Science
Platform. Because this is a long-lived token that can be used to
impersonate the user with their full access rights, it should not be
sent to untrusted servers.
Parameters
----------
server_url : `str`
URL of the Butler server that the caller intends to connect to.
Returns
-------
access_token: `str` or `None`
A Rubin Science Platform access token, or `None` if no token was
configured in the environment.
"""
explicit_butler_token = os.getenv(_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY)
if explicit_butler_token:
return explicit_butler_token

hostname = urlparse(server_url.lower()).hostname
hostname_in_whitelist = any(
(hostname and fnmatchcase(hostname, pattern) for pattern in _SERVER_WHITELIST)
)
notebook_token = os.getenv(_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY)
if hostname_in_whitelist and notebook_token:
return notebook_token

return None


def get_authentication_headers(access_token: str) -> dict[str, str]:
"""Return HTTP headers required for authenticating the user via Rubin
Science Platform's Gafaelfawr service.
Parameters
----------
access_token : `str`
Rubin Science Platform access token.
Returns
-------
header_map : `dict` [`str`, `str`]
HTTP header names and values as a mapping from name to value.
"""
# Access tokens are opaque bearer tokens. See https://sqr-069.lsst.io/
return {"Authorization": f"Bearer {access_token}"}
14 changes: 12 additions & 2 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, Registry, RegistryDefaults
from ..registry.wildcards import CollectionWildcard
from ..transfers import RepoExportContext
from ._authentication import get_authentication_headers, get_authentication_token_from_environment
from ._config import RemoteButlerConfigModel
from .server import FindDatasetModel

Expand All @@ -70,6 +71,7 @@ def __init__(
inferDefaults: bool = True,
# Parameters unique to RemoteButler
http_client: httpx.Client | None = None,
access_token: str | None = None,
**kwargs: Any,
):
butler_config = ButlerConfig(config, searchPaths, without_datastore=True)
Expand All @@ -84,6 +86,7 @@ def __init__(
butler_config[server_url_key], butler_config.configDir
)
self._config = RemoteButlerConfigModel.model_validate(butler_config)

self._dimensions: DimensionUniverse | None = None
# TODO: RegistryDefaults should have finish() called on it, but this
# requires getCollectionSummary() which is not yet implemented
Expand All @@ -94,8 +97,15 @@ def __init__(
# This is generally done for testing.
self._client = http_client
else:
headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"}
self._client = httpx.Client(headers=headers, base_url=str(self._config.remote_butler.url))
server_url = str(self._config.remote_butler.url)
auth_headers = {}
if access_token is None:
access_token = get_authentication_token_from_environment(server_url)
if access_token is not None:
auth_headers = get_authentication_headers(access_token)

headers = auth_headers | {"user-agent": f"{get_full_type_name(self)}/{__version__}"}
self._client = httpx.Client(headers=headers, base_url=server_url)

def isWriteable(self) -> bool:
# Docstring inherited.
Expand Down
57 changes: 57 additions & 0 deletions tests/test_authentication.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import os
import unittest
from contextlib import contextmanager
from unittest.mock import patch

try:
from lsst.daf.butler.remote_butler import RemoteButler
from lsst.daf.butler.remote_butler._authentication import (
_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY,
_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY,
get_authentication_headers,
get_authentication_token_from_environment,
)
except ImportError:
RemoteButler = None


@contextmanager
def _mock_env(new_environment):
with patch.dict(os.environ, new_environment, clear=True):
yield


@unittest.skipIf(
RemoteButler is None, "RemoteButler could not be imported, optional dependencies may not be installed"
)
class TestButlerClientAuthentication(unittest.TestCase):
"""Test access-token logic"""

def test_explicit_butler_token(self):
with _mock_env(
{
_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY: "token1",
_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY: "not-this-token",
}
):
token = get_authentication_token_from_environment("https://untrustedserver.com")
self.assertEqual(token, "token1")

def test_jupyter_token_with_safe_server(self):
with _mock_env({_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY: "token2"}):
token = get_authentication_token_from_environment("https://data.LSST.cloud/butler")
self.assertEqual(token, "token2")

def test_jupyter_token_with_unsafe_server(self):
with _mock_env({_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY: "token2"}):
token = get_authentication_token_from_environment("https://untrustedserver.com/butler")
self.assertIsNone(token)

def test_missing_token(self):
with _mock_env({}):
token = get_authentication_token_from_environment("https://data.lsst.cloud/butler")
self.assertIsNone(token)

def test_header_generation(self):
headers = get_authentication_headers("tokendata")
assert headers == {"Authorization": "Bearer tokendata"}

0 comments on commit 1ac5b1d

Please sign in to comment.