From 179dc0377345938426ac8884ebc49fb247d78a2a Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Thu, 6 Sep 2018 12:28:30 +0300 Subject: [PATCH 1/4] Rework req_keepvars handling: - Rework req_keepvars. Old meaning of req_keepvars didn't make sense, because it overrode mandatory variables that JupyterHub sets. Change to these two variables: - req_keepvars_default: Automatically set by JupyterHub defaults, should not be overridden. Old req_keepvars becomes this. Should only be changed if you know what you are doing. - req_keepvars: Adds to the defaults in req_keepvars_default. This is what cluster admins usually want to set. --- CHANGELOG.md | 2 +- batchspawner/batchspawner.py | 34 +++++++++++++++++++++++------ batchspawner/tests/test_spawners.py | 28 +++++++++++++++++++++--- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 243aacfb..4e85d183 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,6 @@ Added (user) * Add Jinja2 templating as an option for all scripts and commands. If '{{' or `{%` is used anywhere in the string, it is used as a jinja2 template. * Add new option exec_prefix, which defaults to `sudo -E -u {username}`. This replaces explicit `sudo` in every batch command - changes in local commands may be needed. -* New option: `req_keepvars_extra`, which allows keeping extra variables in addition to what is defined by JupyterHub itself (addition of variables to keep instead of replacement). #99 * Add `req_prologue` and `req_epilogue` options to scripts which are inserted before/after the main jupyterhub-singleuser command, which allow for generic setup/cleanup without overriding the entire script. #96 * SlurmSpawner: add the `req_reservation` option. #91 * Add basic support for JupyterHub progress updates, but this is not used much yet. #86 @@ -25,6 +24,7 @@ Changed - Add a new option `batchspawner_singleuser_cmd` which is used as a wrapper in the single-user servers, which conveys the remote port back to JupyterHub. This is now an integral part of the spawn process. - If you have installed with `pip install -e`, you will have to re-install so that the new script `batchspawner-singleuser` is added to `$PATH`. * Update minimum requirements to JupyterHub 0.9 and Python 3.5. #143 ++* Change meaning of `req_keepvars`. Previously, setting this would override everything that JupyterHub requires to be set (so you'd have to make sure you set it back). Now, `req_keepvars` only *adds* to the JupyterHub defaults, and `req_keepvars_default` gets set to the JupyterHub upstream and can be used to completly override the JupyterHub defaults. #99, #?? * Update Slurm batch script. Now, the single-user notebook is run in a job step, with a wrapper of `srun`. This may need to be removed using `req_srun=''` if you don't want environment variables limited. * Pass the environment dictionary to the queue and cancel commands as well. This is mostly user environment, but may be useful to these commands as well in some cases. #108, #111 If these environment variables were used for authentication as an admin, be aware that there are pre-existing security issues because they may be passed to the user via the batch submit command, see #82. diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index 3cb51ac0..ca7359cf 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -143,15 +143,30 @@ def _req_username_default(self): def _req_homedir_default(self): return pwd.getpwnam(self.user.name).pw_dir - req_keepvars = Unicode() - @default('req_keepvars') - def _req_keepvars_default(self): + req_keepvars_default = Unicode( + help="All environment variables to pass to the spawner. This is set " + "to a default by JupyterHub, and if you change this list " + "the previous ones are _not_ included and your submissions will " + "break unless you re-add necessary variables. Consider " + "req_keepvars for most use cases." +) + @default('req_keepvars_all') + def _req_keepvars_all_default(self): return ','.join(self.get_env().keys()) - req_keepvars_extra = Unicode( + req_keepvars = Unicode( help="Extra environment variables which should be configured, " "added to the defaults in keepvars, " - "comma separated list.") + "comma separated list.").tag(config=True) + admin_environment = Unicode( + help="Comma-separated list of environment variables to be passed to " + "the batch submit/cancel commands, but _not_ to the batch script " + "via --export. This could be used, for example, to authenticate " + "the submit command as an admin user so that it can submit jobs " + "as another user. These are _not_ included in an " + "--export={keepvars} type line in the batch script, but you " + "should check that your batch system actually does the right " + "thing here.").tag(config=True) batch_script = Unicode('', help="Template for job submission script. Traits on this class named like req_xyz " @@ -177,8 +192,13 @@ def get_req_subvars(self): subvars = {} for t in reqlist: subvars[t[4:]] = getattr(self, t) - if subvars.get('keepvars_extra'): - subvars['keepvars'] += ',' + subvars['keepvars_extra'] + # 'keepvars' is special: 'keepvars' goes through as the + # variable, but 'keepvars_default' is prepended to it. + # 'keepvars_default' is JH-required stuff so you have to try + # extra hard to override it. + if subvars.get('keepvars'): + subvars['keepvars_default'] += ',' + subvars['keepvars'] + subvars['keepvars'] = subvars['keepvars_default'] return subvars batch_submit_cmd = Unicode('', diff --git a/batchspawner/tests/test_spawners.py b/batchspawner/tests/test_spawners.py index f249e6fd..a92d00b6 100644 --- a/batchspawner/tests/test_spawners.py +++ b/batchspawner/tests/test_spawners.py @@ -491,7 +491,7 @@ def test_lfs(db, io_loop): def test_keepvars(db, io_loop): # req_keepvars spawner_kwargs = { - 'req_keepvars': 'ABCDE', + 'req_keepvars_default': 'ABCDE', } batch_script_re_list = [ re.compile(r'--export=ABCDE', re.X|re.M), @@ -500,10 +500,32 @@ def test_keepvars(db, io_loop): spawner_kwargs=spawner_kwargs, batch_script_re_list=batch_script_re_list) - # req_keepvars AND req_keepvars together + # req_keepvars spawner_kwargs = { 'req_keepvars': 'ABCDE', - 'req_keepvars_extra': 'XYZ', + } + batch_script_re_list = [ + re.compile(r'--export=.*ABCDE', re.X|re.M), + ] + run_typical_slurm_spawner(db, io_loop, + spawner_kwargs=spawner_kwargs, + batch_script_re_list=batch_script_re_list) + + # req_keepvars + spawner_kwargs = { + 'admin_environment': 'ABCDE', + } + batch_script_re_list = [ + re.compile(r'^((?!ABCDE).)*$', re.X|re.S), + ] + run_typical_slurm_spawner(db, io_loop, + spawner_kwargs=spawner_kwargs, + batch_script_re_list=batch_script_re_list) + + # req_keepvars AND req_keepvars together + spawner_kwargs = { + 'req_keepvars_default': 'ABCDE', + 'req_keepvars': 'XYZ', } batch_script_re_list = [ re.compile(r'--export=ABCDE,XYZ', re.X|re.M), From 8660d3a07e53eb9b232d5be21ad7002cf555ba44 Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Thu, 6 Sep 2018 19:59:35 +0300 Subject: [PATCH 2/4] Add new option admin_environment - This is environment which is intended to be passed only to the batch commands, not to the user servers. However, it is unknown if any spawners support this. Don't rely on it unless the spawner explicitely says it supports it. --- CHANGELOG.md | 1 + README.md | 4 ++++ SPAWNERS.md | 2 ++ batchspawner/batchspawner.py | 17 +++++++++++++++++ 4 files changed, 24 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e85d183..dfe3c60b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Added (user) * Add `req_prologue` and `req_epilogue` options to scripts which are inserted before/after the main jupyterhub-singleuser command, which allow for generic setup/cleanup without overriding the entire script. #96 * SlurmSpawner: add the `req_reservation` option. #91 * Add basic support for JupyterHub progress updates, but this is not used much yet. #86 ++* Add the option `admin_environment` which get passed to the batch submission commands (like for authentication) but *not* to the `--export Added (developer) diff --git a/README.md b/README.md index c5046c21..e978a741 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,10 @@ shell which will execute arbitrary user environment configuration access to their own cluster user account. This is something which we are working on. +The `admin_environment` option claims to pass environment only to the +batch spawners, and not the user servers. This relies on each spawner +handling environment correctly, and this is *not* checked so you +should *not* rely on this yet. ## Provide different configurations of BatchSpawner diff --git a/SPAWNERS.md b/SPAWNERS.md index cce52e6e..d99d8b68 100644 --- a/SPAWNERS.md +++ b/SPAWNERS.md @@ -34,6 +34,8 @@ environment to be used, set `req_srun=''`. However, this is not perfect: there is still a bash shell begun as the user which could run arbitrary startup, define shell aliases for `srun`, etc. +`admin_environment` does work with SlurmSpawner. + Use of `srun` is required to gracefully terminate. diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index ca7359cf..641d3ccb 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -213,6 +213,23 @@ def cmd_formatted_for_batch(self): """The command which is substituted inside of the batch script""" return ' '.join([self.batchspawner_singleuser_cmd] + self.cmd + self.get_args()) + def get_admin_env(self): + """Get the environment passed to the batch submit/cancel/etc commands. + + This contains all of the output from self.get_env(), plus any + variables defined in self.admin_environment. In fact, only this + command is used to pass to batch commands, and the only use of + self.get_env() is producing the list of variables to be + --export='ed to. Everything in get_env *must* also be in here + or else it won't be used. + """ + env = self.get_env() + if self.admin_environment: + for key in self.admin_environment.split(','): + if key in os.environ and key not in env: + env[key] = os.environ[key] + return env + async def run_command(self, cmd, input=None, env=None): proc = await asyncio.create_subprocess_shell(cmd, env=env, stdin=asyncio.subprocess.PIPE, From 12100a23f3674360fa643232e4c08ffc159c881d Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Tue, 23 Oct 2018 15:30:00 +0300 Subject: [PATCH 3/4] admin_environment: test and make active --- batchspawner/batchspawner.py | 6 ++--- batchspawner/tests/test_spawners.py | 41 +++++++++++++++++++++++------ 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index 641d3ccb..a54ab288 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -284,7 +284,7 @@ async def submit_batch_script(self): script = await self._get_batch_script(**subvars) self.log.info('Spawner submitting job using ' + cmd) self.log.info('Spawner submitted script:\n' + script) - out = await self.run_command(cmd, input=script, env=self.get_env()) + out = await self.run_command(cmd, input=script, env=self.get_admin_env()) try: self.log.info('Job submitted. cmd: ' + cmd + ' output: ' + out) self.job_id = self.parse_job_id(out) @@ -310,7 +310,7 @@ async def read_job_state(self): format_template(self.batch_query_cmd, **subvars))) self.log.debug('Spawner querying job: ' + cmd) try: - out = await self.run_command(cmd) + out = await self.run_command(cmd, env=self.get_admin_env()) self.job_status = out except Exception as e: self.log.error('Error querying job ' + self.job_id) @@ -328,7 +328,7 @@ async def cancel_batch_job(self): cmd = ' '.join((format_template(self.exec_prefix, **subvars), format_template(self.batch_cancel_cmd, **subvars))) self.log.info('Cancelling job ' + self.job_id + ': ' + cmd) - await self.run_command(cmd) + await self.run_command(cmd, env=self.get_admin_env()) def load_state(self, state): """load job_id from state""" diff --git a/batchspawner/tests/test_spawners.py b/batchspawner/tests/test_spawners.py index a92d00b6..adbd1ff8 100644 --- a/batchspawner/tests/test_spawners.py +++ b/batchspawner/tests/test_spawners.py @@ -1,5 +1,6 @@ """Test BatchSpawner and subclasses""" +import itertools import re from unittest import mock from .. import BatchSpawnerRegexStates @@ -29,11 +30,17 @@ class BatchDummy(BatchSpawnerRegexStates): state_running_re = Unicode('RUN') state_exechost_re = Unicode('RUN (.*)$') - cmd_expectlist = None + cmd_expectlist = None #List of (re) out_expectlist = None + env_testlist = None # List of functions to call on env dict, function should assert within. def run_command(self, *args, **kwargs): """Overwriten run command to test templating and outputs""" cmd = args[0] + # Test the environment + if self.env_testlist: # if first item is None, also pop and advance + env_test = self.env_testlist.pop(0) + if env_test: + env_test(kwargs['env']) # Test that the command matches the expectations if self.cmd_expectlist: run_re = self.cmd_expectlist.pop(0) @@ -221,13 +228,14 @@ def run_command(self, cmd, *args, **kwargs): assert status == 1 def run_spawner_script(db, io_loop, spawner, script, - batch_script_re_list=None, spawner_kwargs={}): + batch_script_re_list=None, spawner_kwargs={}, + env_test=None): """Run a spawner script and test that the output and behavior is as expected. db: same as in this module io_loop: same as in this module spawner: the BatchSpawnerBase subclass to test - script: list of (input_re_to_match, output) + script: list of (input_re_to_match, output, env_testfunc) batch_script_re_list: if given, assert batch script matches all of these """ # Create the expected scripts @@ -238,6 +246,9 @@ def run_spawner_script(db, io_loop, spawner, script, class BatchDummyTestScript(spawner): @gen.coroutine def run_command(self, cmd, input=None, env=None): + # Test the environment + if env_test: + env_test(env) # Test the input run_re = cmd_expectlist.pop(0) if run_re: @@ -402,7 +413,8 @@ def run_typical_slurm_spawner(db, io_loop, spawner=SlurmSpawner, script=normal_slurm_script, batch_script_re_list=None, - spawner_kwargs={}): + spawner_kwargs={}, + env_test=None): """Run a full slurm job with default (overrideable) parameters. This is useful, for example, for changing options and testing effect @@ -410,7 +422,8 @@ def run_typical_slurm_spawner(db, io_loop, """ return run_spawner_script(db, io_loop, spawner, script, batch_script_re_list=batch_script_re_list, - spawner_kwargs=spawner_kwargs) + spawner_kwargs=spawner_kwargs, + env_test=env_test) #def test_gridengine(db, io_loop): @@ -489,6 +502,8 @@ def test_lfs(db, io_loop): def test_keepvars(db, io_loop): + """Test of environment handling + """ # req_keepvars spawner_kwargs = { 'req_keepvars_default': 'ABCDE', @@ -496,9 +511,12 @@ def test_keepvars(db, io_loop): batch_script_re_list = [ re.compile(r'--export=ABCDE', re.X|re.M), ] + def env_test(env): + assert 'ABCDE' in env run_typical_slurm_spawner(db, io_loop, spawner_kwargs=spawner_kwargs, - batch_script_re_list=batch_script_re_list) + batch_script_re_list=batch_script_re_list, + env_test=env_test) # req_keepvars spawner_kwargs = { @@ -516,11 +534,18 @@ def test_keepvars(db, io_loop): 'admin_environment': 'ABCDE', } batch_script_re_list = [ - re.compile(r'^((?!ABCDE).)*$', re.X|re.S), + re.compile(r'^((?!ABCDE).)*$', re.X|re.S), # ABCDE not in the script ] + def env_test(env): + assert 'ABCDE' in env + assert 'VWXYZ' not in env + os.environ['ABCDE'] = 'TEST1' + os.environ['VWXYZ'] = 'TEST2' run_typical_slurm_spawner(db, io_loop, spawner_kwargs=spawner_kwargs, - batch_script_re_list=batch_script_re_list) + batch_script_re_list=batch_script_re_list, + env_test=env_test) + del os.environ['ABCDE'], os.environ['VWXYZ'] # req_keepvars AND req_keepvars together spawner_kwargs = { From 88f6523fc005fdd1c85f43cf8383ff8b4d597828 Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Wed, 5 Dec 2018 11:21:51 +0200 Subject: [PATCH 4/4] Fix all the environment variable handling, make tests work - This fixes up all the previous commits on this branch --- batchspawner/batchspawner.py | 26 +++++++-- batchspawner/tests/test_spawners.py | 81 +++++++++++++++++++++-------- 2 files changed, 79 insertions(+), 28 deletions(-) diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index a54ab288..1cf087fe 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -148,16 +148,18 @@ def _req_homedir_default(self): "to a default by JupyterHub, and if you change this list " "the previous ones are _not_ included and your submissions will " "break unless you re-add necessary variables. Consider " - "req_keepvars for most use cases." -) - @default('req_keepvars_all') - def _req_keepvars_all_default(self): - return ','.join(self.get_env().keys()) + "req_keepvars for most use cases, don't edit this under normal " + "use.").tag(config=True) + + @default('req_keepvarsdefault') + def _req_keepvars_default_default(self): + return ','.join(super(BatchSpawnerBase, self).get_env().keys()) req_keepvars = Unicode( help="Extra environment variables which should be configured, " "added to the defaults in keepvars, " "comma separated list.").tag(config=True) + admin_environment = Unicode( help="Comma-separated list of environment variables to be passed to " "the batch submit/cancel commands, but _not_ to the batch script " @@ -213,6 +215,20 @@ def cmd_formatted_for_batch(self): """The command which is substituted inside of the batch script""" return ' '.join([self.batchspawner_singleuser_cmd] + self.cmd + self.get_args()) + def get_env(self): + """Get the env dict from JH, adding req_keepvars options + + get_env() returns the variables given by JH, but not anything + specified by req_keepvars since that's our creation. Add those + (from the JH environment) to the environment passed to the batch + start/stop/cancel/etc commands. + """ + env = super(BatchSpawnerBase, self).get_env() + for k in self.req_keepvars.split(',') + self.req_keepvars_default.split(','): + if k in os.environ: + env[k] = os.environ[k] + return env + def get_admin_env(self): """Get the environment passed to the batch submit/cancel/etc commands. diff --git a/batchspawner/tests/test_spawners.py b/batchspawner/tests/test_spawners.py index adbd1ff8..b71b3536 100644 --- a/batchspawner/tests/test_spawners.py +++ b/batchspawner/tests/test_spawners.py @@ -1,6 +1,8 @@ """Test BatchSpawner and subclasses""" +import contextlib import itertools +import os import re from unittest import mock from .. import BatchSpawnerRegexStates @@ -20,6 +22,21 @@ testjob = "12345" testport = 54321 +@contextlib.contextmanager +def setenv_context(**kwargs): + """Context manage which sets and restores environment variables.""" + orig = { } + for k, v in kwargs.items(): + orig[k] = os.environ.get(k, None) + os.environ[k] = v + yield + for k in kwargs: + if orig[k] is not None: + os.environ[k] = orig[k] + else: + del os.environ[k] + + class BatchDummy(BatchSpawnerRegexStates): exec_prefix = '' batch_submit_cmd = Unicode('cat > /dev/null; echo '+testjob) @@ -504,32 +521,50 @@ def test_lfs(db, io_loop): def test_keepvars(db, io_loop): """Test of environment handling """ - # req_keepvars + environment = {'ABCDE': 'TEST1', 'VWXYZ': 'TEST2', 'XYZ': 'TEST3',} + + + # req_keepvars_default - anything NOT here should not be propogated. spawner_kwargs = { 'req_keepvars_default': 'ABCDE', } batch_script_re_list = [ re.compile(r'--export=ABCDE', re.X|re.M), + re.compile(r'^((?!JUPYTERHUB_API_TOKEN).)*$', re.X|re.S), # *not* in the script ] def env_test(env): - assert 'ABCDE' in env - run_typical_slurm_spawner(db, io_loop, - spawner_kwargs=spawner_kwargs, - batch_script_re_list=batch_script_re_list, - env_test=env_test) - - # req_keepvars + # We can't test these - becasue removing these from the environment is + # a job of the batch system itself, which we do *not* run here. + #assert 'ABCDE' in env + #assert 'JUPYTERHUB_API_TOKEN' not in env + pass + with setenv_context(**environment): + run_typical_slurm_spawner(db, io_loop, + spawner_kwargs=spawner_kwargs, + batch_script_re_list=batch_script_re_list, + env_test=env_test) + + # req_keepvars - this should be added to the environment spawner_kwargs = { 'req_keepvars': 'ABCDE', } batch_script_re_list = [ re.compile(r'--export=.*ABCDE', re.X|re.M), + re.compile(r'^((?!VWXYZ).)*$', re.X|re.M), # *not* in line + re.compile(r'--export=.*JUPYTERHUB_API_TOKEN', re.X|re.S), ] - run_typical_slurm_spawner(db, io_loop, - spawner_kwargs=spawner_kwargs, - batch_script_re_list=batch_script_re_list) - - # req_keepvars + def env_test(env): + assert 'ABCDE' in env + assert 'VWXYZ' not in env + with setenv_context(**environment): + run_typical_slurm_spawner(db, io_loop, + spawner_kwargs=spawner_kwargs, + batch_script_re_list=batch_script_re_list, + env_test=env_test) + + # admin_environment - this should be in the environment passed to + # run commands but not the --export command which is included in + # the batch scripts spawner_kwargs = { 'admin_environment': 'ABCDE', } @@ -539,13 +574,12 @@ def env_test(env): def env_test(env): assert 'ABCDE' in env assert 'VWXYZ' not in env - os.environ['ABCDE'] = 'TEST1' - os.environ['VWXYZ'] = 'TEST2' - run_typical_slurm_spawner(db, io_loop, - spawner_kwargs=spawner_kwargs, - batch_script_re_list=batch_script_re_list, - env_test=env_test) - del os.environ['ABCDE'], os.environ['VWXYZ'] + assert 'JUPYTERHUB_API_TOKEN' in env + with setenv_context(**environment): + run_typical_slurm_spawner(db, io_loop, + spawner_kwargs=spawner_kwargs, + batch_script_re_list=batch_script_re_list, + env_test=env_test) # req_keepvars AND req_keepvars together spawner_kwargs = { @@ -555,6 +589,7 @@ def env_test(env): batch_script_re_list = [ re.compile(r'--export=ABCDE,XYZ', re.X|re.M), ] - run_typical_slurm_spawner(db, io_loop, - spawner_kwargs=spawner_kwargs, - batch_script_re_list=batch_script_re_list) + with setenv_context(**environment): + run_typical_slurm_spawner(db, io_loop, + spawner_kwargs=spawner_kwargs, + batch_script_re_list=batch_script_re_list)