diff --git a/CHANGELOG.md b/CHANGELOG.md index 243aacfb..dfe3c60b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,10 @@ Added (user) * Add Jinja2 templating as an option for all scripts and commands. If '{{' or `{%` is used anywhere in the string, it is used as a jinja2 template. * Add new option exec_prefix, which defaults to `sudo -E -u {username}`. This replaces explicit `sudo` in every batch command - changes in local commands may be needed. -* New option: `req_keepvars_extra`, which allows keeping extra variables in addition to what is defined by JupyterHub itself (addition of variables to keep instead of replacement). #99 * Add `req_prologue` and `req_epilogue` options to scripts which are inserted before/after the main jupyterhub-singleuser command, which allow for generic setup/cleanup without overriding the entire script. #96 * SlurmSpawner: add the `req_reservation` option. #91 * Add basic support for JupyterHub progress updates, but this is not used much yet. #86 ++* Add the option `admin_environment` which get passed to the batch submission commands (like for authentication) but *not* to the `--export Added (developer) @@ -25,6 +25,7 @@ Changed - Add a new option `batchspawner_singleuser_cmd` which is used as a wrapper in the single-user servers, which conveys the remote port back to JupyterHub. This is now an integral part of the spawn process. - If you have installed with `pip install -e`, you will have to re-install so that the new script `batchspawner-singleuser` is added to `$PATH`. * Update minimum requirements to JupyterHub 0.9 and Python 3.5. #143 ++* Change meaning of `req_keepvars`. Previously, setting this would override everything that JupyterHub requires to be set (so you'd have to make sure you set it back). Now, `req_keepvars` only *adds* to the JupyterHub defaults, and `req_keepvars_default` gets set to the JupyterHub upstream and can be used to completly override the JupyterHub defaults. #99, #?? * Update Slurm batch script. Now, the single-user notebook is run in a job step, with a wrapper of `srun`. This may need to be removed using `req_srun=''` if you don't want environment variables limited. * Pass the environment dictionary to the queue and cancel commands as well. This is mostly user environment, but may be useful to these commands as well in some cases. #108, #111 If these environment variables were used for authentication as an admin, be aware that there are pre-existing security issues because they may be passed to the user via the batch submit command, see #82. diff --git a/README.md b/README.md index c5046c21..e978a741 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,10 @@ shell which will execute arbitrary user environment configuration access to their own cluster user account. This is something which we are working on. +The `admin_environment` option claims to pass environment only to the +batch spawners, and not the user servers. This relies on each spawner +handling environment correctly, and this is *not* checked so you +should *not* rely on this yet. ## Provide different configurations of BatchSpawner diff --git a/SPAWNERS.md b/SPAWNERS.md index cce52e6e..d99d8b68 100644 --- a/SPAWNERS.md +++ b/SPAWNERS.md @@ -34,6 +34,8 @@ environment to be used, set `req_srun=''`. However, this is not perfect: there is still a bash shell begun as the user which could run arbitrary startup, define shell aliases for `srun`, etc. +`admin_environment` does work with SlurmSpawner. + Use of `srun` is required to gracefully terminate. diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index 3cb51ac0..1cf087fe 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -143,15 +143,32 @@ def _req_username_default(self): def _req_homedir_default(self): return pwd.getpwnam(self.user.name).pw_dir - req_keepvars = Unicode() - @default('req_keepvars') - def _req_keepvars_default(self): - return ','.join(self.get_env().keys()) - - req_keepvars_extra = Unicode( + req_keepvars_default = Unicode( + help="All environment variables to pass to the spawner. This is set " + "to a default by JupyterHub, and if you change this list " + "the previous ones are _not_ included and your submissions will " + "break unless you re-add necessary variables. Consider " + "req_keepvars for most use cases, don't edit this under normal " + "use.").tag(config=True) + + @default('req_keepvarsdefault') + def _req_keepvars_default_default(self): + return ','.join(super(BatchSpawnerBase, self).get_env().keys()) + + req_keepvars = Unicode( help="Extra environment variables which should be configured, " "added to the defaults in keepvars, " - "comma separated list.") + "comma separated list.").tag(config=True) + + admin_environment = Unicode( + help="Comma-separated list of environment variables to be passed to " + "the batch submit/cancel commands, but _not_ to the batch script " + "via --export. This could be used, for example, to authenticate " + "the submit command as an admin user so that it can submit jobs " + "as another user. These are _not_ included in an " + "--export={keepvars} type line in the batch script, but you " + "should check that your batch system actually does the right " + "thing here.").tag(config=True) batch_script = Unicode('', help="Template for job submission script. Traits on this class named like req_xyz " @@ -177,8 +194,13 @@ def get_req_subvars(self): subvars = {} for t in reqlist: subvars[t[4:]] = getattr(self, t) - if subvars.get('keepvars_extra'): - subvars['keepvars'] += ',' + subvars['keepvars_extra'] + # 'keepvars' is special: 'keepvars' goes through as the + # variable, but 'keepvars_default' is prepended to it. + # 'keepvars_default' is JH-required stuff so you have to try + # extra hard to override it. + if subvars.get('keepvars'): + subvars['keepvars_default'] += ',' + subvars['keepvars'] + subvars['keepvars'] = subvars['keepvars_default'] return subvars batch_submit_cmd = Unicode('', @@ -193,6 +215,37 @@ def cmd_formatted_for_batch(self): """The command which is substituted inside of the batch script""" return ' '.join([self.batchspawner_singleuser_cmd] + self.cmd + self.get_args()) + def get_env(self): + """Get the env dict from JH, adding req_keepvars options + + get_env() returns the variables given by JH, but not anything + specified by req_keepvars since that's our creation. Add those + (from the JH environment) to the environment passed to the batch + start/stop/cancel/etc commands. + """ + env = super(BatchSpawnerBase, self).get_env() + for k in self.req_keepvars.split(',') + self.req_keepvars_default.split(','): + if k in os.environ: + env[k] = os.environ[k] + return env + + def get_admin_env(self): + """Get the environment passed to the batch submit/cancel/etc commands. + + This contains all of the output from self.get_env(), plus any + variables defined in self.admin_environment. In fact, only this + command is used to pass to batch commands, and the only use of + self.get_env() is producing the list of variables to be + --export='ed to. Everything in get_env *must* also be in here + or else it won't be used. + """ + env = self.get_env() + if self.admin_environment: + for key in self.admin_environment.split(','): + if key in os.environ and key not in env: + env[key] = os.environ[key] + return env + async def run_command(self, cmd, input=None, env=None): proc = await asyncio.create_subprocess_shell(cmd, env=env, stdin=asyncio.subprocess.PIPE, @@ -247,7 +300,7 @@ async def submit_batch_script(self): script = await self._get_batch_script(**subvars) self.log.info('Spawner submitting job using ' + cmd) self.log.info('Spawner submitted script:\n' + script) - out = await self.run_command(cmd, input=script, env=self.get_env()) + out = await self.run_command(cmd, input=script, env=self.get_admin_env()) try: self.log.info('Job submitted. cmd: ' + cmd + ' output: ' + out) self.job_id = self.parse_job_id(out) @@ -273,7 +326,7 @@ async def read_job_state(self): format_template(self.batch_query_cmd, **subvars))) self.log.debug('Spawner querying job: ' + cmd) try: - out = await self.run_command(cmd) + out = await self.run_command(cmd, env=self.get_admin_env()) self.job_status = out except Exception as e: self.log.error('Error querying job ' + self.job_id) @@ -291,7 +344,7 @@ async def cancel_batch_job(self): cmd = ' '.join((format_template(self.exec_prefix, **subvars), format_template(self.batch_cancel_cmd, **subvars))) self.log.info('Cancelling job ' + self.job_id + ': ' + cmd) - await self.run_command(cmd) + await self.run_command(cmd, env=self.get_admin_env()) def load_state(self, state): """load job_id from state""" diff --git a/batchspawner/tests/test_spawners.py b/batchspawner/tests/test_spawners.py index f249e6fd..b71b3536 100644 --- a/batchspawner/tests/test_spawners.py +++ b/batchspawner/tests/test_spawners.py @@ -1,5 +1,8 @@ """Test BatchSpawner and subclasses""" +import contextlib +import itertools +import os import re from unittest import mock from .. import BatchSpawnerRegexStates @@ -19,6 +22,21 @@ testjob = "12345" testport = 54321 +@contextlib.contextmanager +def setenv_context(**kwargs): + """Context manage which sets and restores environment variables.""" + orig = { } + for k, v in kwargs.items(): + orig[k] = os.environ.get(k, None) + os.environ[k] = v + yield + for k in kwargs: + if orig[k] is not None: + os.environ[k] = orig[k] + else: + del os.environ[k] + + class BatchDummy(BatchSpawnerRegexStates): exec_prefix = '' batch_submit_cmd = Unicode('cat > /dev/null; echo '+testjob) @@ -29,11 +47,17 @@ class BatchDummy(BatchSpawnerRegexStates): state_running_re = Unicode('RUN') state_exechost_re = Unicode('RUN (.*)$') - cmd_expectlist = None + cmd_expectlist = None #List of (re) out_expectlist = None + env_testlist = None # List of functions to call on env dict, function should assert within. def run_command(self, *args, **kwargs): """Overwriten run command to test templating and outputs""" cmd = args[0] + # Test the environment + if self.env_testlist: # if first item is None, also pop and advance + env_test = self.env_testlist.pop(0) + if env_test: + env_test(kwargs['env']) # Test that the command matches the expectations if self.cmd_expectlist: run_re = self.cmd_expectlist.pop(0) @@ -221,13 +245,14 @@ def run_command(self, cmd, *args, **kwargs): assert status == 1 def run_spawner_script(db, io_loop, spawner, script, - batch_script_re_list=None, spawner_kwargs={}): + batch_script_re_list=None, spawner_kwargs={}, + env_test=None): """Run a spawner script and test that the output and behavior is as expected. db: same as in this module io_loop: same as in this module spawner: the BatchSpawnerBase subclass to test - script: list of (input_re_to_match, output) + script: list of (input_re_to_match, output, env_testfunc) batch_script_re_list: if given, assert batch script matches all of these """ # Create the expected scripts @@ -238,6 +263,9 @@ def run_spawner_script(db, io_loop, spawner, script, class BatchDummyTestScript(spawner): @gen.coroutine def run_command(self, cmd, input=None, env=None): + # Test the environment + if env_test: + env_test(env) # Test the input run_re = cmd_expectlist.pop(0) if run_re: @@ -402,7 +430,8 @@ def run_typical_slurm_spawner(db, io_loop, spawner=SlurmSpawner, script=normal_slurm_script, batch_script_re_list=None, - spawner_kwargs={}): + spawner_kwargs={}, + env_test=None): """Run a full slurm job with default (overrideable) parameters. This is useful, for example, for changing options and testing effect @@ -410,7 +439,8 @@ def run_typical_slurm_spawner(db, io_loop, """ return run_spawner_script(db, io_loop, spawner, script, batch_script_re_list=batch_script_re_list, - spawner_kwargs=spawner_kwargs) + spawner_kwargs=spawner_kwargs, + env_test=env_test) #def test_gridengine(db, io_loop): @@ -489,25 +519,77 @@ def test_lfs(db, io_loop): def test_keepvars(db, io_loop): - # req_keepvars + """Test of environment handling + """ + environment = {'ABCDE': 'TEST1', 'VWXYZ': 'TEST2', 'XYZ': 'TEST3',} + + + # req_keepvars_default - anything NOT here should not be propogated. spawner_kwargs = { - 'req_keepvars': 'ABCDE', + 'req_keepvars_default': 'ABCDE', } batch_script_re_list = [ re.compile(r'--export=ABCDE', re.X|re.M), + re.compile(r'^((?!JUPYTERHUB_API_TOKEN).)*$', re.X|re.S), # *not* in the script ] - run_typical_slurm_spawner(db, io_loop, - spawner_kwargs=spawner_kwargs, - batch_script_re_list=batch_script_re_list) + def env_test(env): + # We can't test these - becasue removing these from the environment is + # a job of the batch system itself, which we do *not* run here. + #assert 'ABCDE' in env + #assert 'JUPYTERHUB_API_TOKEN' not in env + pass + with setenv_context(**environment): + run_typical_slurm_spawner(db, io_loop, + spawner_kwargs=spawner_kwargs, + batch_script_re_list=batch_script_re_list, + env_test=env_test) + + # req_keepvars - this should be added to the environment + spawner_kwargs = { + 'req_keepvars': 'ABCDE', + } + batch_script_re_list = [ + re.compile(r'--export=.*ABCDE', re.X|re.M), + re.compile(r'^((?!VWXYZ).)*$', re.X|re.M), # *not* in line + re.compile(r'--export=.*JUPYTERHUB_API_TOKEN', re.X|re.S), + ] + def env_test(env): + assert 'ABCDE' in env + assert 'VWXYZ' not in env + with setenv_context(**environment): + run_typical_slurm_spawner(db, io_loop, + spawner_kwargs=spawner_kwargs, + batch_script_re_list=batch_script_re_list, + env_test=env_test) + + # admin_environment - this should be in the environment passed to + # run commands but not the --export command which is included in + # the batch scripts + spawner_kwargs = { + 'admin_environment': 'ABCDE', + } + batch_script_re_list = [ + re.compile(r'^((?!ABCDE).)*$', re.X|re.S), # ABCDE not in the script + ] + def env_test(env): + assert 'ABCDE' in env + assert 'VWXYZ' not in env + assert 'JUPYTERHUB_API_TOKEN' in env + with setenv_context(**environment): + run_typical_slurm_spawner(db, io_loop, + spawner_kwargs=spawner_kwargs, + batch_script_re_list=batch_script_re_list, + env_test=env_test) # req_keepvars AND req_keepvars together spawner_kwargs = { - 'req_keepvars': 'ABCDE', - 'req_keepvars_extra': 'XYZ', + 'req_keepvars_default': 'ABCDE', + 'req_keepvars': 'XYZ', } batch_script_re_list = [ re.compile(r'--export=ABCDE,XYZ', re.X|re.M), ] - run_typical_slurm_spawner(db, io_loop, - spawner_kwargs=spawner_kwargs, - batch_script_re_list=batch_script_re_list) + with setenv_context(**environment): + run_typical_slurm_spawner(db, io_loop, + spawner_kwargs=spawner_kwargs, + batch_script_re_list=batch_script_re_list)