diff --git a/examples-proposed/004-time-loop/mymodule/components.py b/examples-proposed/004-time-loop/mymodule/components.py index 8e8ec33..fd079c5 100644 --- a/examples-proposed/004-time-loop/mymodule/components.py +++ b/examples-proposed/004-time-loop/mymodule/components.py @@ -43,13 +43,6 @@ def step(self, timestamp=0.0): # TODO - perhaps monitor timestep does not need to be called every step, but only every 20 steps? self.services.call(monitor, 'step', t) - # With this second "example" notebook, we only create it once and only write to it once. - self.services.initialize_jupyter_notebook( - dest_notebook_name=NOTEBOOK_2_NAME, # path is relative to JupyterHub directory - source_notebook_path=NOTEBOOK_2_TEMPLATE, # path is relative to input directory - initial_data_files=self.services.get_staged_jupyterhub_files(), - ) - self.services.call(worker, 'finalize', 0) @@ -102,9 +95,9 @@ def step(self, timestamp=0.0, **keywords): data = f.read() # stage the state file in the JupyterHub directory - data_file = self.services.jupyterhub_make_state(state_file, timestamp) - print('ADD DATA FILE', data_file) - self.services.add_data_file_to_notebook(NOTEBOOK_1_NAME, data_file) + self.services.add_data_file_to_notebook(state_file, timestamp, NOTEBOOK_1_NAME) print('SEND PORTAL DATA', timestamp, data, file=stderr) self.services.send_portal_data(timestamp, data) + + # TODO add a basic sleep to this example for demonstration purposes diff --git a/examples-proposed/004-time-loop/sim.conf b/examples-proposed/004-time-loop/sim.conf index 8fada16..cfb6250 100644 --- a/examples-proposed/004-time-loop/sim.conf +++ b/examples-proposed/004-time-loop/sim.conf @@ -13,8 +13,8 @@ INPUT_DIR = $SIM_ROOT/input_dir/ USER_W3_DIR = $PWD/www USER_W3_BASEURL = -PORTAL_URL = http://localhost:5000 -#PORTAL_URL = https://lb.ipsportal.development.svc.spin.nersc.org +#PORTAL_URL = http://localhost:5000 +PORTAL_URL = https://lb.ipsportal.development.svc.spin.nersc.org # OPTIONAL # The BASE DIRECTORY of your machine's JupyterHub web server directory. This is used strictly for moving files around on the machine itself. diff --git a/examples-proposed/004-time-loop/sim/input_dir/base-notebook-iterative.ipynb b/examples-proposed/004-time-loop/sim/input_dir/base-notebook-iterative.ipynb index 988a582..b4b7208 100644 --- a/examples-proposed/004-time-loop/sim/input_dir/base-notebook-iterative.ipynb +++ b/examples-proposed/004-time-loop/sim/input_dir/base-notebook-iterative.ipynb @@ -8,12 +8,12 @@ "outputs": [], "source": [ "# Notebook template, the IPS Framework will add a cell before this one\n", - "# defining FILES as a list of state file paths.\n", + "# defining IPS_STATE_FILES as a list of state file paths.\n", "\n", "# In this example, this notebook is generated during the time loop.\n", "\n", "mapping = {}\n", - "for file in FILES:\n", + "for file in IPS_STATE_FILES:\n", " with open(file, 'rb') as f:\n", " mapping[file] = f.read()\n", "print(mapping)\n" diff --git a/examples-proposed/004-time-loop/sim/input_dir/base-notebook-one-pass.ipynb b/examples-proposed/004-time-loop/sim/input_dir/base-notebook-one-pass.ipynb index 925b512..02b9626 100644 --- a/examples-proposed/004-time-loop/sim/input_dir/base-notebook-one-pass.ipynb +++ b/examples-proposed/004-time-loop/sim/input_dir/base-notebook-one-pass.ipynb @@ -8,12 +8,12 @@ "outputs": [], "source": [ "# Notebook template, the IPS Framework will add a cell before this one\n", - "# defining FILES as a list of state file paths.\n", + "# defining IPS_STATE_FILES as a list of state file paths.\n", "\n", "# In this example, this notebook is only generated at the end of the run.\n", "\n", "mapping = {}\n", - "for file in FILES:\n", + "for file in IPS_STATE_FILES:\n", " with open(file, 'rb') as f:\n", " mapping[file] = f.read()\n", "print(mapping)\n" diff --git a/ipsframework/configurationManager.py b/ipsframework/configurationManager.py index 7f40243..6fa63ba 100644 --- a/ipsframework/configurationManager.py +++ b/ipsframework/configurationManager.py @@ -246,6 +246,8 @@ def initialize(self, data_mgr, resource_mgr, task_mgr): # Override platform value for PORTAL_URL if in simulation if 'PORTAL_URL' in conf: self.platform_conf['PORTAL_URL'] = conf['PORTAL_URL'] + if '_IPS_PORTAL_URL_HOST' in conf: + self.platform_conf['_IPS_PORTAL_URL_HOST'] = conf['_IPS_PORTAL_URL_HOST'] except (IOError, SyntaxError): self.fwk.exception('Error opening config file %s: ', conf_file) @@ -383,6 +385,17 @@ def _initialize_fwk_components(self): portal_conf['PORTAL_URL'] = self.get_platform_parameter('PORTAL_URL', silent=True) + if portal_conf['PORTAL_URL']: + from urllib.parse import urlparse + + parsed_url = urlparse(portal_conf['PORTAL_URL']) + if parsed_url.port: # get the host but be sure to strip out the username/password, so don't use netloc + # don't use colons in filepaths + portal_url_host = f'{parsed_url.hostname}_{parsed_url.port}' + else: + portal_url_host = parsed_url.hostname + portal_conf['_IPS_PORTAL_URL_HOST'] = portal_url_host + component_id = self._create_component(portal_conf, self.sim_map[self.fwk_sim_name]) self.fwk_components.append(component_id) diff --git a/ipsframework/jupyter.py b/ipsframework/jupyter.py index 02c77d0..d75fa0d 100644 --- a/ipsframework/jupyter.py +++ b/ipsframework/jupyter.py @@ -10,7 +10,9 @@ ...in a shell on Jupyter NERSC. """ -from typing import List, Optional +from os.path import sep +from pathlib import Path +from typing import Optional import nbformat as nbf @@ -27,31 +29,26 @@ def replace_last(source_string: str, old: str, new: str) -> str: return f'{head}{new}{tail}' -def _initial_jupyter_file_notebook_cell(variable: str, initial_data_files: Optional[List[str]] = None) -> str: - if not initial_data_files: - initial = '' - else: - itemsep = '\n' - initial = '\n' + itemsep.join([f"'{file}'," for file in initial_data_files]) +def _initial_jupyter_file_notebook_cell(dest: str, variable: str) -> str: return f"""{HOOK} import os # NOTE: directory should be sim_name plus the run id from the Portal -# NOTE: add absolute path as a comment to the notebook cell -# Uncomment below line to use any state files saved +IPS_DATA_DIR = '{str(Path(dest).parent / 'data') + sep}' +# Uncomment below line to implicitly use any state files saved in the data directory, note that the IPS framework explicitly lists out each file used #{variable} = os.listdir('data') # files created during the run -{variable} = [{initial} +{variable} = [ ] """ -def initialize_jupyter_notebook(dest: str, src: str, variable_name: str, index: int, initial_data_files: Optional[List[str]] = None): +def initialize_jupyter_notebook(dest: str, src: str, variable_name: str, index: int): """Create a new notebook from an old notebook, copying the result from 'src' to 'dest'. Params: - - dest - location of notebook to create on filesystem + - dest - location of notebook to create on filesystem (absolute file path) - src - location of source notebook on filesystem (is not overwritten unless src == dest) - variable_name: what to call the variable - index: insert new cells at position before this value (will not remove preexisting cells) @@ -59,13 +56,12 @@ def initialize_jupyter_notebook(dest: str, src: str, variable_name: str, index: """ # to avoid conversion, use as_version=nbf.NO_CONVERT - # nb: nbf.NotebookNode = nbf.read(src, as_version=4) header = '# Next cell generated by IPS Framework' nb['cells'] = ( nb['cells'][:index] - + [nbf.v4.new_markdown_cell(header), nbf.v4.new_code_cell(_initial_jupyter_file_notebook_cell(variable_name, initial_data_files))] + + [nbf.v4.new_markdown_cell(header), nbf.v4.new_code_cell(_initial_jupyter_file_notebook_cell(dest, variable_name))] + nb['cells'][index:] ) @@ -90,7 +86,7 @@ def add_data_file_to_notebook(dest: str, data_file: str, index: Optional[int] = ips_cell = nb['cells'][index]['source'] # search from right of string for the ']' character, should work assuming user does not modify the cell past the variable definition - result = replace_last(ips_cell, ']', f"'{data_file}',\n]") + result = replace_last(ips_cell, ']', f"f'{{IPS_DATA_DIR}}{data_file}',\n]") nb['cells'][index]['source'] = result with open(dest, 'w') as f: diff --git a/ipsframework/portalBridge.py b/ipsframework/portalBridge.py index d293094..f96f3ce 100644 --- a/ipsframework/portalBridge.py +++ b/ipsframework/portalBridge.py @@ -362,8 +362,9 @@ def check_send_post_responses(self): try: data = json.loads(msg) - if 'runid' in data: + if 'runid' in data and 'simname' in data: self.services.info('Run Portal URL = %s/%s', self.portal_url, data.get('runid')) + self.services.set_config_param('_IPS_PORTAL_RUNID', str(data.get('runid')), target_sim_name=data.get('simname')) msg = json.dumps(data) except (TypeError, json.decoder.JSONDecodeError): @@ -404,14 +405,6 @@ def check_data_send_post_responses(self): except (EOFError, OSError): break - try: - data = json.loads(msg) - if 'runid' in data: - self.services.info('Run Portal URL = %s/%s', self.portal_url, data.get('runid')) - - msg = json.dumps(data) - except (TypeError, json.decoder.JSONDecodeError): - pass if code == -1: # disable portal, stop trying to send more data self.portal_url = None @@ -446,15 +439,6 @@ def send_notebook_url(self, sim_data, event_data): except (EOFError, OSError): break - print('PUT RESPONSE', code, msg) - try: - data = json.loads(msg) - if 'runid' in data: - self.services.info('Run Portal URL = %s/%s', self.portal_url, data.get('runid')) - - msg = json.dumps(data) - except (TypeError, json.decoder.JSONDecodeError): - pass if code == -1: # disable portal, stop trying to send more data self.portal_url = None @@ -616,6 +600,7 @@ def init_simulation(self, sim_name, sim_root): sim_data = self.SimulationData() sim_data.sim_name = sim_name sim_data.sim_root = sim_root + self.services.set_config_param('_IPS_PORTAL_URL_HOST', self._IPS_PORTAL_URL_HOST, target_sim_name=sim_name) d = datetime.datetime.now() date_str = '%s.%03d' % (d.strftime('%Y-%m-%dT%H:%M:%S'), int(d.microsecond / 1000)) diff --git a/ipsframework/services.py b/ipsframework/services.py index 2e0436f..07d918a 100644 --- a/ipsframework/services.py +++ b/ipsframework/services.py @@ -1796,15 +1796,49 @@ def setMonitorURL(self, url=''): self.monitor_url = url self._send_monitor_event(eventType='IPS_SET_MONITOR_URL', comment='SUCCESS') + def _get_jupyter_host_directory(self) -> str: + """Get the runid Jupyter will associate with this run. + Generally this will be the portal url's hostname, but we will try to allow for fallbacks in certain cases. + """ + try: + return self.get_config_param('_IPS_PORTAL_URL_HOST') + except Exception: + self.warning('_get_jupyter_host_directory: PORTAL_URL_HOST was not defined, falling back to random ID') + return str(uuid.uuid4()) + def _get_jupyter_runid(self) -> str: """Get the runid Jupyter will associate with this run. Generally this will be the Portal RUNID but we will try to allow for fallbacks in certain cases. """ + + # first, check to see if the portal URL was even initialized, fall back if not + try: + self.get_config_param('_IPS_PORTAL_URL_HOST') + except Exception: + self.warning('_get_jupyter_runid: PORTAL_URL was not defined, falling back to random ID') + return str(uuid.uuid4()) + + # see if the portal response has updated + attempts = 0 + max_attempts = 30 + while True: + try: + return self.get_config_param('_IPS_PORTAL_RUNID') + except Exception: + attempts += 1 + if attempts >= max_attempts: + break + time.sleep(1.0) + + # at this point, we must fall back to using an ID the framework generates + self.warning('_get_jupyter_runid: Unable to get RUNID directly from remote portal, using fallback identifier') try: return self.get_config_param('PORTAL_RUNID') except Exception: - # TODO this does NOT work across components. - self.warning('_get_root_path: PORTAL_RUNID was not defined, falling back to random ID') + # this code shouldn't execute unless the user forgot a configuration value somewhere + self.warning( + '_get_jupyter_runid: PORTAL_RUNID not defined - the simulation configuration probably forgot to specify PORTAL_URL and USE_PORTAL. Using randomly generated ID instead' + ) return str(uuid.uuid4()) def _init_jupyter(self) -> bool: @@ -1821,7 +1855,7 @@ def _init_jupyter(self) -> bool: self.warning('JUPYTERHUB_DIR should be an absolute path, skipping Jupyter config') return False - root_dir = os.path.join(root_dir, 'ipsframework', 'runs', self._get_jupyter_runid()) + os.path.sep + root_dir = os.path.join(root_dir, 'ipsframework', 'runs', self._get_jupyter_host_directory(), self._get_jupyter_runid()) + os.path.sep # TODO - it may make sense to also reattempt to create this, especially with long simulations try: @@ -1846,31 +1880,6 @@ def get_staged_jupyterhub_files(self) -> List[str]: data_dir = pathlib.Path(pathlib.Path(self._jupyterhub_dir) / 'data') return [str(p.resolve()) for p in data_dir.glob('*')] - def jupyterhub_make_state(self, state_file_path: str, timestamp: float) -> str: - """ - Move a state file into the JupyterHub directory. - - Returns: - - the path to the state file in the JupyterHub directory. This will be an absolute path. - - Raises: - - Exception, if unable to move file to the provided JUPYTERHUB_DIR - """ - if not self._jupyterhub_dir: - if not self._init_jupyter(): - # TODO generic exception - raise Exception('Unable to initialize base JupyterHub dir') - - file_parts = state_file_path.split('.') - if len(file_parts) > 2: - extension = f'.{file_parts[-1]}' - else: - extension = '' - new_state_file_path = os.path.join(self._jupyterhub_dir, 'data', f'{timestamp}{extension}') - # this may raise an OSError, it is the responsibility of the caller to handle it. - shutil.copyfile(state_file_path, new_state_file_path) - return new_state_file_path - def _get_jupyterhub_url(self) -> Optional[str]: url: str = self.get_config_param('JUPYTERHUB_URL') if not url: @@ -1878,21 +1887,28 @@ def _get_jupyterhub_url(self) -> Optional[str]: return None if not url.endswith('/'): url += '/' + + try: + portal_url_host = self.get_config_param('_IPS_PORTAL_URL_HOST') + except Exception: + self.warning('PORTAL_URL was not defined, skipping JupyterHub configuration') + return None + try: runid = self.get_config_param('PORTAL_RUNID') except Exception: # TODO Figure out how to associate value across components (may need to use a state file?) self.warning("Couldn't get PORTAL_RUNID, skipping Jupyter URL association of data") return None - url += f'ipsframework/runs/{runid}/' + + url += f'ipsframework/runs/{portal_url_host}/{runid}/' return url def initialize_jupyter_notebook( self, dest_notebook_name: str, source_notebook_path: str, - initial_data_files: Optional[List[str]] = None, - variable_name: str = 'FILES', + variable_name: str = 'IPS_STATE_FILES', cell_to_modify: int = 0, ) -> None: """Loads a notebook from source_notebook_path, adds a cell to load the data, and then saves it to source_notebook_path. Will also try to register the notebook with the IPS Portal, if available. @@ -1902,7 +1918,7 @@ def initialize_jupyter_notebook( Params: - dest_notebook_name: name of the JupyterNotebook you want to write (do not include file paths). - source_notebook_path: location you want to load the source notebook from - - variable_name: name of the variable you want to load files from (default: "FILES") + - variable_name: name of the variable you want to load files from (default: "IPS_STATE_FILES") - cell_to_modify: which cell in the JupyterNotebook you want to add the data call to (0-indexed). (This will not overwrite any cells, just appends.) By default, the data listing will happen in the FIRST cell. @@ -1912,7 +1928,7 @@ def initialize_jupyter_notebook( raise Exception('Unable to initialize base JupyterHub dir') # adds notebook to JupyterHub - initialize_jupyter_notebook(f'{self._jupyterhub_dir}{dest_notebook_name}', source_notebook_path, variable_name, cell_to_modify, initial_data_files) + initialize_jupyter_notebook(f'{self._jupyterhub_dir}{dest_notebook_name}', source_notebook_path, variable_name, cell_to_modify) # register notebook with IPS Portal url = self._get_jupyterhub_url() @@ -1931,21 +1947,37 @@ def initialize_jupyter_notebook( self.publish('_IPS_MONITOR', 'PORTAL_REGISTER_NOTEBOOK', event_data) self._send_monitor_event('IPS_PORTAL_REGISTER_NOTEBOOK', f'URL = {url}') - def add_data_file_to_notebook(self, notebook_name: str, state_file: str, index: Optional[int] = None): + def add_data_file_to_notebook(self, state_file_path: str, timestamp: float, notebook_name: str, index: Optional[int] = None): """Add data file to notebook list. This function assumes that a notebook has already been created with intialize_jupyter_notebook. Using this function does not call the IPS Portal. Params: + - state_file_path: location of the current state file we want to copy to the Jupyter directory + - timestamp: label to assign to the data (currently must be a floating point value) - notebook_name: name of notebook which will be modified. Note that this path is relative to the JupyterHub directory. - - data_file: data file we add to the notebook (simple string). This value should almost always be the return value from "self.services.jupyterhub_make_state". - index: optional index of the IPS notebook cell. If not provided, the IPS Framework will attempt to automatically find the cell it created, which should work for every usecase where you don't anticipate modifying the notebook until after the run is complete. """ + if not self._jupyterhub_dir: if not self._init_jupyter(): + # TODO generic exception raise Exception('Unable to initialize base JupyterHub dir') - add_data_file_to_notebook(f'{self._jupyterhub_dir}{notebook_name}', state_file, index) + + file_parts = state_file_path.split('.') + if len(file_parts) > 2: # name of the file could just be a floating point value with no extension + extension = f'.{file_parts[-1]}' + else: + extension = '' + + state_file_name = f'{timestamp}{extension}' + jupyter_data_dir = os.path.join(self._jupyterhub_dir, 'data', state_file_name) + # this may raise an OSError, it is the responsibility of the caller to handle it. + shutil.copyfile(state_file_path, jupyter_data_dir) + + # TODO - maybe add flag which allows us to replace old state files + add_data_file_to_notebook(f'{self._jupyterhub_dir}{notebook_name}', state_file_name, index) def publish(self, topicName, eventName, eventBody): """