[Execution] Add support for chained experiment instances (#91)

* Add support for chained experiment instances * Update docs * Add test
geoffxy · Oct 8, 2023 · 334fca3 · 334fca3
1 parent 99401a2
commit 334fca3
Show file tree

Hide file tree

Showing 5 changed files with 82 additions and 5 deletions.
diff --git a/src/conductor/task_types/stdlib/run_experiment_group.py b/src/conductor/task_types/stdlib/run_experiment_group.py
@@ -18,10 +18,12 @@ def run_experiment_group(
     name: str,
     run: str,
     experiments: Iterable[ExperimentInstance],
+    chain_experiments: bool = False,
     deps: Optional[Sequence[str]] = None,
 ) -> None:
     task_deps = deps if deps is not None else []
     relative_experiment_identifiers = []
+    prev_experiment_identifier: Optional[str] = None
 
     try:
         seen_experiment_names = set()
@@ -34,6 +36,13 @@ def run_experiment_group(
                 )
 
             seen_experiment_names.add(experiment.name)
+
+            # Add the previously-processed task as a dependency if
+            # `chain_experiments` is set to `True`.
+            experiment_deps = task_deps
+            if chain_experiments and prev_experiment_identifier is not None:
+                experiment_deps = [*task_deps, prev_experiment_identifier]
+
             # run_experiment(): Defined by Conductor at runtime
             # pylint: disable=undefined-variable
             run_experiment(  # type: ignore
@@ -42,9 +51,11 @@ def run_experiment_group(
                 parallelizable=experiment.parallelizable,
                 args=experiment.args,
                 options=experiment.options,
-                deps=task_deps,
+                deps=experiment_deps,
             )
-            relative_experiment_identifiers.append(":" + experiment.name)
+            experiment_identifier = ":" + experiment.name
+            relative_experiment_identifiers.append(experiment_identifier)
+            prev_experiment_identifier = experiment_identifier
 
     except TypeError as ex:
         raise ExperimentGroupInvalidExperimentInstance(task_name=name) from ex

diff --git a/tests/cond_run_test.py b/tests/cond_run_test.py
@@ -414,3 +414,10 @@ def test_cond_run_multiple_failures_stop_early(tmp_path: pathlib.Path):
     # The top level `run_experiment_group()` task is technically not a Conductor experiment task.
     sweep_out = cond.find_task_output_dir("//multiple:sweep", is_experiment=False)
     assert sweep_out is None
+
+
+def test_cond_run_chained(tmp_path: pathlib.Path):
+    cond = ConductorRunner.from_template(tmp_path, FIXTURE_TEMPLATES["experiments"])
+    # The task checks that chaining occurs.
+    result = cond.run("//sweep:chained-test", jobs=5)
+    assert result.returncode == 0
diff --git a/tests/fixture-projects/experiments/sweep/COND b/tests/fixture-projects/experiments/sweep/COND
@@ -21,3 +21,26 @@ run_experiment_group(
     for threads in range(1, 5)
   ],
 )
+
+run_experiment_group(
+  name="chained-test",
+  run="./run_check_file.sh",
+  experiments=[
+    ExperimentInstance(
+      name="chained-test-1",
+      args=["chained", "1"],
+      parallelizable=True,
+    ),
+    ExperimentInstance(
+      name="chained-test-2",
+      args=["chained", "2"],
+      parallelizable=True,
+    ),
+    ExperimentInstance(
+      name="chained-test-3",
+      args=["chained", "3"],
+      parallelizable=True,
+    ),
+  ],
+  chain_experiments=True,
+)
diff --git a/tests/fixture-projects/experiments/sweep/run_check_file.sh b/tests/fixture-projects/experiments/sweep/run_check_file.sh
@@ -0,0 +1,16 @@
+#! /bin/bash
+
+prefix=$1
+num=$2
+
+if [ $num -gt 1 ] && [ -z "$COND_DEPS" ]; then
+  exit 1
+fi
+
+if [ $num = 1 || -f "$COND_DEPS/$prefix_$num" ]; then
+  next_num=(($num + 1))
+  touch $COND_OUT/$prefix_$next_num
+  exit 0
+else
+  exit 1
+fi
diff --git a/website/docs/task-types/run-experiment-group.md b/website/docs/task-types/run-experiment-group.md
@@ -4,7 +4,7 @@ id: run-experiment-group
 ---
 
 ```python
-run_experiment_group(name, run, experiments=[], deps=[])
+run_experiment_group(name, run, experiments=[], chain_experiments=False, deps=[])
 ```
 
 A `run_experiment_group()` task lets you specify a list of experiments that
@@ -70,6 +70,20 @@ the experiment's enclosing `run_experiment_group()`.
 
 :::
 
+### `chain_experiments`
+
+**Type:** Boolean (optional)
+
+If set to `True`, Conductor will add dependency constraints between the
+experiment instances listed in `experiments`. Conductor adds the dependencies in
+the order the experiment instances are defined, creating a "dependency chain."
+See the usage example at the bottom of this page for an example of what this
+argument does.
+
+This argument is useful when you want to run different experiment _groups_
+concurrently, but do not want the experiments within one group to run
+concurrently.
+
 ### `deps`
 
 **Type:** List of task identifiers (default: `[]`)
@@ -108,6 +122,7 @@ run_experiment_group(
     # comprehension when defining your experiments.
     for threads in range(1, 3)
   ],
+  chain_experiments=True,
   deps=[
     ":compile",
   ],
@@ -124,7 +139,9 @@ run_experiment(
     "threads": 1,
   },
   parallelizable=False,
-  deps=[":compile"],
+  deps=[
+    ":compile",
+  ],
 )
 
 run_experiment(
@@ -134,7 +151,10 @@ run_experiment(
     "threads": 2,
   },
   parallelizable=False,
-  deps=[":compile"],
+  deps=[
+    ":compile",
+    ":sweep-1",
+  ],
 )
 
 combine(