diff --git a/.gitignore b/.gitignore index 8882f041..24d54721 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ MANIFEST package-lock.json .vscode/ *.tgz +.idea diff --git a/README.md b/README.md index 05c4f017..7f7585a9 100644 --- a/README.md +++ b/README.md @@ -237,3 +237,88 @@ c.JupyterHub.services = [ ``` The nbviewer instance will automatically read the [various `JUPYTERHUB_*` environment variables](http://jupyterhub.readthedocs.io/en/latest/reference/services.html#launching-a-hub-managed-service) and configure itself accordingly. You can also run the nbviewer instance as an [externally managed JupyterHub service](http://jupyterhub.readthedocs.io/en/latest/reference/services.html#externally-managed-services), but must set the requisite environment variables yourself. + +--- + +# nbviewer Integration with JupyterHub via OAuth2 + +This guide explains how to configure **nbviewer** as a JupyterHub service using OAuth2 token-based authentication. + +## Requirements +- **JupyterHub** (version 2.x or higher) +- **nbviewer** service +- Access to the environment variables for both **JupyterHub** and **nbviewer**. + + +## JupyterHub Configuration + +In JupyterHub’s `jupyterhub_config.py`, add the following configuration to integrate nbviewer as a service: + +```python +c.JupyterHub.services.append( + { + 'name': 'nbviewer', + 'url': 'http://nbviewer:8080', + 'api_token': os.environ['JUPYTERHUB_API_TOKEN'], + 'oauth_no_confirm': True, + 'oauth_client_id': 'service-nbviewer', + 'oauth_redirect_uri': 'https://jupyterhub.yourcompany.com/services/nbviewer/oauth_callback', + } +) + +c.JupyterHub.load_roles = [ + { + 'name': 'nbviewer', + 'services': ['nbviewer', 'jupyterhub-idle-culler'], + 'scopes': [ + "read:users:activity", + "list:users", + "users:activity", + "servers", # For starting and stopping servers + 'admin:users' # Needed if idle users are culled + ] + }, + { + "name": "user", + "scopes": ["self", "access:services"], + } +] + +c.JupyterHub.service_tokens = { + os.environ['JUPYTERHUB_API_TOKEN'] : 'nbviewer' +} +``` + +### Explanation of Key Settings: +- oauth_client_id: The unique ID for the nbviewer service. +- oauth_redirect_uri: The URL that nbviewer uses to handle OAuth2 callbacks from JupyterHub. +- service_tokens: Set the service token used by nbviewer to authenticate with JupyterHub. + +## nbviewer Configuration +In the deployment of nbviewer, configure the following environment variables: +```yaml +extraEnv: + JUPYTERHUB_SERVICE_NAME: 'nbviewer' + JUPYTERHUB_API_URL: 'http://hub:8081/hub/api' + JUPYTERHUB_BASE_URL: '/' + JUPYTERHUB_SERVICE_PREFIX: '/services/nbviewer/' + JUPYTERHUB_URL: 'https://jupyterhub.yourcompany.com' + JUPYTERHUB_CLIENT_ID: 'service-nbviewer' +``` +### Explanation of Environment Variables: +- JUPYTERHUB_API_URL: The internal URL where nbviewer can access JupyterHub’s API. +- JUPYTERHUB_URL: The base URL of your JupyterHub installation (public-facing). +- JUPYTERHUB_CLIENT_ID: Should match the oauth_client_id in the JupyterHub configuration. +- JUPYTERHUB_SERVICE_PREFIX: Specifies the service's routing prefix. + +### OAuth2 Flow +When a user accesses nbviewer, they will be authenticated via the OAuth2 token from JupyterHub. The oauth_callback URL specified in the configuration will be used to handle the token exchange. + +Ensure nbviewer correctly handles OAuth2 requests by ensuring the callback URL is properly set and that nbviewer is able to request the necessary scopes from JupyterHub. + +### Troubleshooting +If you encounter issues with token authentication or authorization, ensure that: +The correct API token is set both in JupyterHub and nbviewer. +The service roles and scopes are correctly configured to allow nbviewer access to JupyterHub's user data. + +--- diff --git a/nbviewer/app.py b/nbviewer/app.py index 5e8b05a4..b5761cff 100644 --- a/nbviewer/app.py +++ b/nbviewer/app.py @@ -205,6 +205,10 @@ class NBViewer(Application): default_value="nbviewer.providers.gist.handlers.UserGistsHandler", help="The Tornado handler to use for viewing directory containing all of a user's Gists", ).tag(config=True) + jupyterhub_login_handler = Unicode( + default_value="nbviewer.handlers.JupyterHubLoginHandler", + help="The Tornado handler to use for OAuth login with JupyterHub.", + ).tag(config=True) answer_yes = Bool( default_value=False, @@ -634,6 +638,7 @@ def init_tornado_application(self): local_handler=self.local_handler, url_handler=self.url_handler, user_gists_handler=self.user_gists_handler, + jupyterhub_login_handler=self.jupyterhub_login_handler, ) handler_kwargs = { "handler_names": handler_names, @@ -656,6 +661,17 @@ def init_tornado_application(self): if os.environ.get("DEBUG"): self.log.setLevel(logging.DEBUG) + hub_api = "/hub/api" + if os.getenv("JUPYTERHUB_URL"): + hub_api = os.getenv("JUPYTERHUB_URL").rstrip("/") + hub_api + redirect_url = ( + os.environ["JUPYTERHUB_URL"].rstrip("/") + + os.getenv("JUPYTERHUB_SERVICE_PREFIX", "/").rstrip("/") + + "/oauth_callback" + ) + else: + redirect_url = None + # input traitlets to settings settings = dict( # Allow FileFindHandler to load static directories from e.g. a Docker container @@ -676,7 +692,8 @@ def init_tornado_application(self): gzip=True, hub_api_token=os.getenv("JUPYTERHUB_API_TOKEN"), hub_api_url=os.getenv("JUPYTERHUB_API_URL"), - hub_base_url=os.getenv("JUPYTERHUB_BASE_URL"), + hub_base_url=os.getenv("JUPYTERHUB_BASE_URL", "/"), + hub_cookie_name="jupyterhub-services", index=self.index, ipywidgets_base_url=self.ipywidgets_base_url, jinja2_env=self.env, @@ -701,6 +718,13 @@ def init_tornado_application(self): statsd_host=self.statsd_host, statsd_port=self.statsd_port, statsd_prefix=self.statsd_prefix, + login_url="/oauth_callback", + cookie_secret=os.urandom(32), # generate a random cookie secret + client_id=os.getenv("JUPYTERHUB_CLIENT_ID"), + redirect_uri=redirect_url, + authorize_url=hub_api + "/oauth2/authorize", + token_url=hub_api + "/oauth2/token", + user_url=hub_api + "/user", ) if self.localfiles: diff --git a/nbviewer/handlers.py b/nbviewer/handlers.py index e3800441..57a03776 100644 --- a/nbviewer/handlers.py +++ b/nbviewer/handlers.py @@ -4,7 +4,14 @@ # Distributed under the terms of the BSD License. The full license is in # the file COPYING, distributed as part of this software. # ----------------------------------------------------------------------------- +import json +from urllib.parse import urlencode + from tornado import web +from tornado.httpclient import AsyncHTTPClient +from tornado.httpclient import HTTPClientError +from tornado.httpclient import HTTPRequest +from tornado.httputil import url_concat from .providers import _load_handler_from_location from .providers import provider_handlers @@ -38,11 +45,96 @@ def render_index_template(self, **namespace): text=self.frontpage_setup.get("text", None), show_input=self.frontpage_setup.get("show_input", True), sections=self.frontpage_setup.get("sections", []), - **namespace + **namespace, ) - def get(self): - self.finish(self.render_index_template()) + def get_current_user(self): + """The login handler stored a JupyterHub API token in a cookie + + @web.authenticated calls this method. + If a Falsy value is returned, the request is redirected to `login_url`. + If a Truthy value is returned, the request is allowed to proceed. + """ + token = self.get_secure_cookie(self.settings["hub_cookie_name"]) + if token: + # secure cookies are bytes, decode to str + return token.decode("ascii", "replace") + + async def user_for_token(self, token): + """Retrieve the user for a given token, via /hub/api/user""" + req = HTTPRequest( + self.settings["user_url"], headers={"Authorization": f"token {token}"} + ) + response = await AsyncHTTPClient().fetch(req) + return json.loads(response.body.decode("utf8", "replace")) + + @web.authenticated + async def get(self): + try: + user_token = self.get_current_user() + await self.user_for_token(user_token) + except HTTPClientError as e: + # If the token is invalid, clear the cookie and redirect to the login page. + # This occurs when we log out from JupyterHub and then log back in. + if e.code == 403: + self.log.info("clearing the cookie and redirecting to the login page") + self.clear_cookie(self.settings["hub_cookie_name"]) + self.redirect_to_login() + return + + await self.finish(self.render_index_template()) + + +class JupyterHubLoginHandler(web.RequestHandler): + """Login Handler + + this handler both begins and ends the OAuth process + """ + + async def token_for_code(self, code): + """Complete OAuth by requesting an access token for an oauth code""" + params = dict( + client_id=self.settings["client_id"], + client_secret=self.settings["hub_api_token"], + grant_type="authorization_code", + code=code, + redirect_uri=self.settings["redirect_uri"], + ) + + req = HTTPRequest( + self.settings["token_url"], + method="POST", + body=urlencode(params).encode("utf8"), + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + response = await AsyncHTTPClient().fetch(req) + data = json.loads(response.body.decode("utf8", "replace")) + return data["access_token"] + + async def get(self): + code = self.get_argument("code", None) + + if code: + # code is set, we are the oauth callback + # complete oauth + token = await self.token_for_code(code) + # login successful, set cookie and redirect back to home + self.set_secure_cookie(self.settings["hub_cookie_name"], token) + self.redirect("/") + else: + # we are the login handler, + # begin oauth process which will come back later with an + # authorization_code + self.redirect( + url_concat( + self.settings["authorize_url"], + dict( + redirect_uri=self.settings["redirect_uri"], + client_id=self.settings["client_id"], + response_type="code", + ), + ) + ) class FAQHandler(BaseHandler): @@ -120,11 +212,15 @@ def init_handlers(formats, providers, base_url, localfiles, **handler_kwargs): custom404_handler = _load_handler_from_location(handler_names["custom404_handler"]) faq_handler = _load_handler_from_location(handler_names["faq_handler"]) index_handler = _load_handler_from_location(handler_names["index_handler"]) + jupyterhub_login_handler = _load_handler_from_location( + handler_names["jupyterhub_login_handler"] + ) # If requested endpoint matches multiple routes, it only gets handled by handler # corresponding to the first matching route. So order of URLSpecs in this list matters. pre_providers = [ ("/?", index_handler, {}), + ("/oauth_callback/?", jupyterhub_login_handler, {}), ("/index.html", index_handler, {}), (r"/faq/?", faq_handler, {}), (r"/create/?", create_handler, {}), diff --git a/nbviewer/providers/base.py b/nbviewer/providers/base.py index 032fb456..399eb79f 100644 --- a/nbviewer/providers/base.py +++ b/nbviewer/providers/base.py @@ -85,6 +85,27 @@ def redirect(self, url, *args, **kwargs): return super().redirect(eurl, *args, **kwargs) + def redirect_to_login(self): + self.redirect( + url_path_join(self.hub_base_url, "/hub/login") + + "?" + + urlencode({"next": self.request.path}) + ) + + def redirect_to_authorize(self): + + self.redirect( + url_path_join(self.hub_base_url, "/hub/api/oauth2/authorize") + + "?" + + urlencode( + { + "client_id": self.client_id, + "response_type": "code", + "next": self.request.path, + } + ) + ) + def set_default_headers(self): self.add_header("Content-Security-Policy", self.content_security_policy) @@ -97,37 +118,39 @@ async def prepare(self): """ # if any of these are set, assume we want to do auth, even if # we're misconfigured (better safe than sorry!) - if self.hub_api_url or self.hub_api_token or self.hub_base_url: - - def redirect_to_login(): - self.redirect( - url_path_join(self.hub_base_url, "/hub/login") - + "?" - + urlencode({"next": self.request.path}) - ) - encrypted_cookie = self.get_cookie(self.hub_cookie_name) - if not encrypted_cookie: - # no cookie == not authenticated - return redirect_to_login() + if self.hub_api_url or self.hub_api_token or self.hub_base_url: - try: - # if the hub returns a success code, the user is known - await self.http_client.fetch( - url_path_join( - self.hub_api_url, - "authorizations/cookie", - self.hub_cookie_name, - quote(encrypted_cookie, safe=""), - ), - headers={"Authorization": "token " + self.hub_api_token}, - ) - except httpclient.HTTPError as ex: - if ex.response.code == 404: - # hub does not recognize the cookie == not authenticated - return redirect_to_login() - # let all other errors surface: they're unexpected - raise ex + # if we have a redirect URL, we're running OAuth2 authentication + if self.redirect_url: + # token = self.get_current_user() + token = self.get_secure_cookie(self.hub_cookie_name) + if not token: + self.redirect_to_authorize() + else: + # support old authentication method via authorizations/cookie + encrypted_cookie = self.get_cookie(self.hub_cookie_name) + if not encrypted_cookie: + # no cookie == not authenticated + return self.redirect_to_login() + + try: + # if the hub returns a success code, the user is known + await self.http_client.fetch( + url_path_join( + self.hub_api_url, + "authorizations/cookie", + self.hub_cookie_name, + quote(encrypted_cookie, safe=""), + ), + headers={"Authorization": "token " + self.hub_api_token}, + ) + except httpclient.HTTPError as ex: + if ex.response.code == 404: + # hub does not recognize the cookie == not authenticated + return self.redirect_to_login() + # let all other errors surface: they're unexpected + raise ex # Properties @@ -139,6 +162,10 @@ def base_url(self): def binder_base_url(self): return self.settings["binder_base_url"] + @property + def redirect_url(self): + return self.settings["redirect_uri"] + @property def cache(self): return self.settings["cache"] @@ -189,7 +216,7 @@ def hub_base_url(self): @property def hub_cookie_name(self): - return "jupyterhub-services" + return self.settings["hub_cookie_name"] @property def index(self): @@ -255,6 +282,14 @@ def statsd(self): self._statsd = EmptyClass() return self._statsd + @property + def authorize_url(self): + return self.settings["authorize_url"] + + @property + def client_id(self): + return self.settings["client_id"] + # --------------------------------------------------------------- # template rendering # --------------------------------------------------------------- @@ -677,7 +712,7 @@ def render_notebook_template( self.base_url, "/" ), date=datetime.utcnow().strftime(self.date_fmt), - **namespace + **namespace, ) async def finish_notebook( @@ -746,7 +781,7 @@ async def finish_notebook( nb=nb, download_url=download_url, json_notebook=json_notebook, - **namespace + **namespace, ) html_time.stop() diff --git a/requirements-dev.txt b/requirements-dev.txt index 9f087d16..ad075a9b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,3 +4,4 @@ mock>=1.3.0 # python34 and older versions of mock do not play well together. pre-commit pytest requests +jupyterhub