From 54e371220e9091940746351c18272e2d192f0a43 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sun, 3 Mar 2024 19:11:51 -0500 Subject: [PATCH] [Lib] Add where() helper function to locate experiment results --- src/conductor/cli/where.py | 21 ++++++---------- src/conductor/lib/path.py | 34 +++++++++++++++++++++++++- website/docs/python-support-library.md | 23 +++++++++++++++++ 3 files changed, 63 insertions(+), 15 deletions(-) diff --git a/src/conductor/cli/where.py b/src/conductor/cli/where.py index fb04a98..e2e276e 100644 --- a/src/conductor/cli/where.py +++ b/src/conductor/cli/where.py @@ -1,7 +1,6 @@ -from conductor.context import Context from conductor.errors import NoTaskOutputPath -from conductor.task_identifier import TaskIdentifier from conductor.utils.user_code import cli_command +from conductor.lib.path import where def register_command(subparsers): @@ -34,17 +33,11 @@ def register_command(subparsers): @cli_command def main(args): - ctx = Context.from_cwd() - task_identifier = TaskIdentifier.from_str( + result = where( args.task_identifier, - require_prefix=False, + relative_to_project_root=args.project, + non_existent_ok=args.non_existent_ok, ) - ctx.task_index.load_single_task(task_identifier) - task = ctx.task_index.get_task(task_identifier) - output_path = task.get_output_path(ctx) - if output_path is None or (not output_path.exists() and not args.non_existent_ok): - raise NoTaskOutputPath(task_identifier=str(task_identifier)) - if args.project: - print(output_path.relative_to(ctx.project_root)) - else: - print(output_path) + if result is None: + raise NoTaskOutputPath(task_identifier=args.task_identifier) + print(result) diff --git a/src/conductor/lib/path.py b/src/conductor/lib/path.py index 3fb74bf..71053f8 100644 --- a/src/conductor/lib/path.py +++ b/src/conductor/lib/path.py @@ -1,12 +1,14 @@ import os import pathlib -from typing import List, Union +from typing import List, Union, Optional from conductor.config import ( DEPS_ENV_VARIABLE_NAME, DEPS_ENV_PATH_SEPARATOR, OUTPUT_ENV_VARIABLE_NAME, ) +from conductor.context import Context +from conductor.task_identifier import TaskIdentifier def get_deps_paths() -> List[pathlib.Path]: @@ -53,3 +55,33 @@ def in_output_dir(file_path: Union[pathlib.Path, str]) -> pathlib.Path: return pathlib.Path(file_path) else: return file_path + + +def where( + identifier: str, relative_to_project_root: bool = False, non_existent_ok: bool = False +) -> Optional[pathlib.Path]: + """ + Returns the output location path of the given task identifier. If this + returns `None`, it indicates no output location is available (e.g., the task + has not run before). + + If `relative_to_project_root` is set to True, this will return a relative + path to the project root. Otherwise, it returns an absolute path. + + If `non_existent_ok` is set to True, this will return the task's output path + even if the path does not yet exist. + """ + ctx = Context.from_cwd() + task_identifier = TaskIdentifier.from_str( + identifier, + require_prefix=False, + ) + ctx.task_index.load_single_task(task_identifier) + task = ctx.task_index.get_task(task_identifier) + output_path = task.get_output_path(ctx) + if output_path is None or (not output_path.exists() and not non_existent_ok): + return None + if relative_to_project_root: + return output_path.relative_to(ctx.project_root) + else: + return output_path diff --git a/website/docs/python-support-library.md b/website/docs/python-support-library.md index b1b7ae3..cbd844d 100644 --- a/website/docs/python-support-library.md +++ b/website/docs/python-support-library.md @@ -105,3 +105,26 @@ stored. Otherwise, this function returns `file_path` unchanged (but as a This is meant to be useful for scripts that may be run independently of Conductor. Note that `file_path` should be a relative path. +### `where()` + +```python +def where( + identifier: str, + relative_to_project_root: bool = False, + non_existent_ok: bool = False, +) -> Optional[pathlib.Path] +``` + +Returns the output location path of the given task identifier. This function +will only work when executed from inside a Conductor project (i.e., in a path +that is under the project root). This function is useful when retrieving +experimental results in scripts or notebooks. + +If this function returns `None`, it indicates no output location is available +(e.g., the task has not run before). + +If `relative_to_project_root` is set to `True`, this will return a relative path +to the project root. Otherwise, it returns an absolute path. + +If `non_existent_ok` is set to `True`, this will return the task's output path +even if the path does not yet exist.