From 3f2893e7f4921bfcd939667625f7110bc8d93fee Mon Sep 17 00:00:00 2001 From: Oliver Wissett Date: Fri, 4 Oct 2024 19:24:40 +0100 Subject: [PATCH] Adds sentinel return value to prevent sweeper from crashing (#2479) --- .../hydra_plugins/hydra_submitit_launcher/config.py | 2 ++ .../hydra_submitit_launcher/submitit_launcher.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/plugins/hydra_submitit_launcher/hydra_plugins/hydra_submitit_launcher/config.py b/plugins/hydra_submitit_launcher/hydra_plugins/hydra_submitit_launcher/config.py index e4da9957bd..0345eb8608 100644 --- a/plugins/hydra_submitit_launcher/hydra_plugins/hydra_submitit_launcher/config.py +++ b/plugins/hydra_submitit_launcher/hydra_plugins/hydra_submitit_launcher/config.py @@ -29,6 +29,8 @@ class BaseQueueConf: stderr_to_stdout: bool = False # If True, the launcher will not wait for the job to finish (useful for very long runs) + # This value is not passed to submitit, it is used by the launcher itself. When enabled, + # a set of sentinel values are returned to the sweeper to indicate that the job is running. no_block: bool = False diff --git a/plugins/hydra_submitit_launcher/hydra_plugins/hydra_submitit_launcher/submitit_launcher.py b/plugins/hydra_submitit_launcher/hydra_plugins/hydra_submitit_launcher/submitit_launcher.py index e97e1cad2a..de42635d50 100644 --- a/plugins/hydra_submitit_launcher/hydra_plugins/hydra_submitit_launcher/submitit_launcher.py +++ b/plugins/hydra_submitit_launcher/hydra_plugins/hydra_submitit_launcher/submitit_launcher.py @@ -5,7 +5,13 @@ from typing import Any, Dict, List, Optional, Sequence from hydra.core.singleton import Singleton -from hydra.core.utils import JobReturn, filter_overrides, run_job, setup_globals +from hydra.core.utils import ( + JobReturn, + filter_overrides, + run_job, + setup_globals, + JobStatus, +) from hydra.plugins.launcher import Launcher from hydra.types import HydraContext, TaskFunction from omegaconf import DictConfig, OmegaConf, open_dict @@ -145,7 +151,8 @@ def launch( jobs = executor.map_array(self, *zip(*job_params)) if self.no_block: - return [] + sentinal = JobReturn(status=JobStatus.COMPLETED, _return_value=None) + return [sentinal] * num_jobs else: return [j.results()[0] for j in jobs]