diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index cf8fa182b..365a8b1b1 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -12,12 +12,11 @@ jobs: runs-on: ubuntu-latest name: upload release to PyPI permissions: - # IMPORTANT: this permission is mandatory for trusted publishing - id-token: write + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: "3.x" - name: Install dependencies @@ -25,9 +24,6 @@ jobs: python -m pip install --upgrade pip pip install setuptools wheel twine - name: Build and publish - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python setup.py sdist bdist_wheel - name: Publish package distributions to PyPI diff --git a/.gitignore b/.gitignore index 584209944..2bf77713d 100644 --- a/.gitignore +++ b/.gitignore @@ -64,7 +64,7 @@ open_pipelines/ .coverage* .pytest_cache .vscode/ -.looper.yaml +/tests/data/hello_looper-dev/.gitignore # Reserved files for comparison *RESERVE* diff --git a/.looper.yaml b/.looper.yaml new file mode 100644 index 000000000..d4cfc108f --- /dev/null +++ b/.looper.yaml @@ -0,0 +1,5 @@ +pep_config: example/pep/path +output_dir: . +pipeline_interfaces: + sample: [] + project: [] diff --git a/MANIFEST.in b/MANIFEST.in index 5bc61acec..15473d351 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,3 +6,4 @@ include looper/default_config/* include looper/default_config/divvy_templates/* include looper/jinja_templates_old/* include looper/schemas/* +include looper/command_models/* diff --git a/docs/changelog.md b/docs/changelog.md index 81a32a764..24c3b9b86 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,35 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. + +## [1.8.1] -- 2024-06-06 + +### Fixed +- added `-v` and `--version` to the CLI +- fixed running project level with `--project` argument + +## [1.8.0] -- 2024-06-04 + +### Added +- looper destroy now destroys individual results when pipestat is configured: https://github.com/pepkit/looper/issues/469 +- comprehensive smoketests: https://github.com/pepkit/looper/issues/464 +- allow rerun to work on both failed or waiting flags: https://github.com/pepkit/looper/issues/463 + +### Changed +- Migrated `argparse` CLI definition to a pydantic basis for all commands. See: https://github.com/pepkit/looper/issues/438 +- during project load, check if PEP file path is a file first, then check if it is a registry path: https://github.com/pepkit/looper/issues/456 +- Looper now uses FutureYamlConfigManager due to the yacman refactor v0.9.3: https://github.com/pepkit/looper/issues/452 + +### Fixed +- inferring project name when loading PEP from csv: https://github.com/pepkit/looper/issues/484 +- fix inconsistency resolving pipeline interface paths if multiple paths are supplied: https://github.com/pepkit/looper/issues/474 +- fix bug with checking for completed flags: https://github.com/pepkit/looper/issues/470 +- fix looper destroy not properly destroying all related files: https://github.com/pepkit/looper/issues/468 +- looper rerun now only runs failed jobs as intended: https://github.com/pepkit/looper/issues/467 +- looper inspect now inspects the looper config: https://github.com/pepkit/looper/issues/462 +- Load PEP from CSV: https://github.com/pepkit/looper/issues/456 +- looper now works with sample_table_index https://github.com/pepkit/looper/issues/458 + ## [1.7.1] -- 2024-05-28 ### Fixed diff --git a/docs/usage.md b/docs/usage.md index fe102ddcd..c8c58a5fe 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -26,16 +26,14 @@ Each task is controlled by one of the following commands: `run`, `rerun`, `runp` Here you can see the command-line usage instructions for the main looper command and for each subcommand: ## `looper --help` ```console -version: 1.7.0 -usage: looper [-h] [--version] [--logfile LOGFILE] [--dbg] [--silent] - [--verbosity V] [--logdev] [--commands] - {run,rerun,runp,table,report,destroy,check,clean,inspect,init,init-piface,link} +usage: looper [-h] [-v] [--silent] [--verbosity VERBOSITY] [--logdev] + {run,rerun,runp,table,report,destroy,check,clean,init,init_piface,link,inspect} ... -looper - A project job submission engine and project manager. +Looper Pydantic Argument Parser -positional arguments: - {run,rerun,runp,table,report,destroy,check,clean,inspect,init,init-piface,link} +commands: + {run,rerun,runp,table,report,destroy,check,clean,init,init_piface,link,inspect} run Run or submit sample jobs. rerun Resubmit sample jobs with failed flags. runp Run or submit project jobs. @@ -44,436 +42,628 @@ positional arguments: destroy Remove output files of the project. check Check flag status of current runs. clean Run clean scripts of already processed jobs. - inspect Print information about a project. init Initialize looper config file. - init-piface Initialize generic pipeline interface. + init_piface Initialize generic pipeline interface. link Create directory of symlinks for reported results. + inspect Print information about a project. -options: +optional arguments: + --silent Whether to silence logging (default: False) + --verbosity VERBOSITY + Alternate mode of expression for logging level that + better accords with intuition about how to convey + this. (default: None) + --logdev Whether to log in development mode; possibly among + other behavioral changes to logs handling, use a more + information-rich message format template. (default: + False) + +help: -h, --help show this help message and exit - --version show program's version number and exit - --logfile LOGFILE Optional output file for looper logs (default: None) - --dbg Turn on debug mode (default: False) - --silent Silence logging. Overrides verbosity. - --verbosity V Set logging level (1-5 or logging module level name) - --logdev Expand content of logging message format. - --commands show program's version number and exit - -For subcommand-specific options, type: 'looper -h' -https://github.com/pepkit/looper + -v, --version show program's version number and exit ``` ## `looper run --help` ```console -usage: looper run [-h] [-i] [-d] [-t S] [-x S] [-y S] [-f] [--divvy DIVCFG] [-p P] [-s S] - [-c K [K ...]] [-u X] [-n N] [-j J] [--looper-config LOOPER_CONFIG] - [-S YAML [YAML ...]] [-P YAML [YAML ...]] [-l N] [-k N] - [--sel-attr ATTR] [--sel-excl [E ...] | --sel-incl [I ...]] - [--sel-flag [SELFLAG ...]] [--exc-flag [EXCFLAG ...]] [-a A [A ...]] - [config_file] - -Run or submit sample jobs. - -positional arguments: - config_file Project configuration file (YAML) or pephub registry - path. - -options: - -h, --help show this help message and exit - -i, --ignore-flags Ignore run status flags? Default=False - -d, --dry-run Don't actually submit the jobs. Default=False - -t S, --time-delay S Time delay in seconds between job submissions - -x S, --command-extra S String to append to every command - -y S, --command-extra-override S Same as command-extra, but overrides values in PEP - -f, --skip-file-checks Do not perform input file checks - -u X, --lump-s X Lump by size: total input file size (GB) to batch - into one job - -n N, --lump-n N Lump by number: number of samples to batch into one - job - -j J, --lump-j J Lump samples into number of jobs. - --looper-config LOOPER_CONFIG Looper configuration file (YAML) - -S YAML [YAML ...], --sample-pipeline-interfaces YAML [YAML ...] - Path to looper sample config file - -P YAML [YAML ...], --project-pipeline-interfaces YAML [YAML ...] - Path to looper project config file - -a A [A ...], --amend A [A ...] List of amendments to activate - -divvy arguments: - Configure divvy to change computing settings - - --divvy DIVCFG Path to divvy configuration file. Default=$DIVCFG env - variable. Currently: not set - -p P, --package P Name of computing resource package to use - -s S, --settings S Path to a YAML settings file with compute settings - -c K [K ...], --compute K [K ...] List of key-value pairs (k1=v1) - -sample selection arguments: - Specify samples to include or exclude based on sample attribute values - - -l N, --limit N Limit to n samples - -k N, --skip N Skip samples by numerical index - --sel-attr ATTR Attribute for sample exclusion OR inclusion - --sel-excl [E ...] Exclude samples with these values - --sel-incl [I ...] Include only samples with these values - --sel-flag [SELFLAG ...] Include samples with this flag status, e.g. completed - --exc-flag [EXCFLAG ...] Exclude samples with this flag status, e.g. completed +usage: looper run [-h] [-i] [-t TIME_DELAY] [-d] [-x COMMAND_EXTRA] + [-y COMMAND_EXTRA_OVERRIDE] [-u LUMP] [-n LUMP_N] + [-j LUMP_J] [--divvy DIVVY] [-f] [-c COMPUTE [COMPUTE ...]] + [--package PACKAGE] [--settings SETTINGS] + [--exc-flag EXC_FLAG [EXC_FLAG ...]] + [--sel-flag SEL_FLAG [SEL_FLAG ...]] [--sel-attr SEL_ATTR] + [--sel-incl SEL_INCL [SEL_INCL ...]] [--sel-excl SEL_EXCL] + [-l LIMIT] [-k SKIP] [--pep-config PEP_CONFIG] + [-o OUTPUT_DIR] [--config-file CONFIG_FILE] + [--looper-config LOOPER_CONFIG] + [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] + [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] + [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] + [--project] + +optional arguments: + -i, --ignore-flags Ignore run status flags (default: False) + -t TIME_DELAY, --time-delay TIME_DELAY + Time delay in seconds between job submissions (min: 0, + max: 30) (default: 0) + -d, --dry-run Don't actually submit jobs (default: False) + -x COMMAND_EXTRA, --command-extra COMMAND_EXTRA + String to append to every command (default: ) + -y COMMAND_EXTRA_OVERRIDE, --command-extra-override COMMAND_EXTRA_OVERRIDE + Same as command-extra, but overrides values in PEP + (default: ) + -u LUMP, --lump LUMP Total input file size (GB) to batch into one job + (default: None) + -n LUMP_N, --lump-n LUMP_N + Number of commands to batch into one job (default: + None) + -j LUMP_J, --lump-j LUMP_J + Lump samples into number of jobs. (default: None) + --divvy DIVVY Path to divvy configuration file. Default=$DIVCFG env + variable. Currently: not set (default: None) + -f, --skip-file-checks + Do not perform input file checks (default: False) + -c COMPUTE [COMPUTE ...], --compute COMPUTE [COMPUTE ...] + List of key-value pairs (k1=v1) (default: []) + --package PACKAGE Name of computing resource package to use (default: + None) + --settings SETTINGS Path to a YAML settings file with compute settings + (default: ) + --exc-flag EXC_FLAG [EXC_FLAG ...] + Sample exclusion flag (default: []) + --sel-flag SEL_FLAG [SEL_FLAG ...] + Sample selection flag (default: []) + --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: + toggle) + --sel-incl SEL_INCL [SEL_INCL ...] + Include only samples with these values (default: []) + --sel-excl SEL_EXCL Exclude samples with these values (default: ) + -l LIMIT, --limit LIMIT + Limit to n samples (default: None) + -k SKIP, --skip SKIP Skip samples by numerical index (default: None) + --pep-config PEP_CONFIG + PEP configuration file (default: None) + -o OUTPUT_DIR, --output-dir OUTPUT_DIR + Output directory (default: None) + --config-file CONFIG_FILE + Project configuration file (default: None) + --looper-config LOOPER_CONFIG + Looper configuration file (YAML) (default: None) + -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] + Paths to looper sample pipeline interfaces (default: + []) + -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] + Paths to looper project pipeline interfaces (default: + []) + --pipestat PIPESTAT Path to pipestat files. (default: None) + --amend AMEND [AMEND ...] + List of amendments to activate (default: []) + --project Is this command executed for project-level? (default: + False) + +help: + -h, --help show this help message and exit ``` ## `looper runp --help` ```console -usage: looper runp [-h] [-i] [-d] [-t S] [-x S] [-y S] [-f] [--divvy DIVCFG] [-p P] [-s S] - [-c K [K ...]] [--looper-config LOOPER_CONFIG] [-S YAML [YAML ...]] - [-P YAML [YAML ...]] [-l N] [-k N] [--sel-attr ATTR] - [--sel-excl [E ...] | --sel-incl [I ...]] [--sel-flag [SELFLAG ...]] - [--exc-flag [EXCFLAG ...]] [-a A [A ...]] - [config_file] - -Run or submit project jobs. - -positional arguments: - config_file Project configuration file (YAML) or pephub registry - path. - -options: - -h, --help show this help message and exit - -i, --ignore-flags Ignore run status flags? Default=False - -d, --dry-run Don't actually submit the jobs. Default=False - -t S, --time-delay S Time delay in seconds between job submissions - -x S, --command-extra S String to append to every command - -y S, --command-extra-override S Same as command-extra, but overrides values in PEP - -f, --skip-file-checks Do not perform input file checks - --looper-config LOOPER_CONFIG Looper configuration file (YAML) - -S YAML [YAML ...], --sample-pipeline-interfaces YAML [YAML ...] - Path to looper sample config file - -P YAML [YAML ...], --project-pipeline-interfaces YAML [YAML ...] - Path to looper project config file - -a A [A ...], --amend A [A ...] List of amendments to activate - -divvy arguments: - Configure divvy to change computing settings - - --divvy DIVCFG Path to divvy configuration file. Default=$DIVCFG env - variable. Currently: not set - -p P, --package P Name of computing resource package to use - -s S, --settings S Path to a YAML settings file with compute settings - -c K [K ...], --compute K [K ...] List of key-value pairs (k1=v1) - -sample selection arguments: - Specify samples to include or exclude based on sample attribute values - - -l N, --limit N Limit to n samples - -k N, --skip N Skip samples by numerical index - --sel-attr ATTR Attribute for sample exclusion OR inclusion - --sel-excl [E ...] Exclude samples with these values - --sel-incl [I ...] Include only samples with these values - --sel-flag [SELFLAG ...] Include samples with this flag status, e.g. completed - --exc-flag [EXCFLAG ...] Exclude samples with this flag status, e.g. completed +usage: looper runp [-h] [-i] [-t TIME_DELAY] [-d] [-x COMMAND_EXTRA] + [-y COMMAND_EXTRA_OVERRIDE] [-u LUMP] [-n LUMP_N] + [--divvy DIVVY] [-f] [-c COMPUTE [COMPUTE ...]] + [--package PACKAGE] [--settings SETTINGS] + [--exc-flag EXC_FLAG [EXC_FLAG ...]] + [--sel-flag SEL_FLAG [SEL_FLAG ...]] [--sel-attr SEL_ATTR] + [--sel-incl SEL_INCL [SEL_INCL ...]] [--sel-excl SEL_EXCL] + [-l LIMIT] [-k SKIP] [--pep-config PEP_CONFIG] + [-o OUTPUT_DIR] [--config-file CONFIG_FILE] + [--looper-config LOOPER_CONFIG] + [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] + [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] + [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] + [--project] + +optional arguments: + -i, --ignore-flags Ignore run status flags (default: False) + -t TIME_DELAY, --time-delay TIME_DELAY + Time delay in seconds between job submissions (min: 0, + max: 30) (default: 0) + -d, --dry-run Don't actually submit jobs (default: False) + -x COMMAND_EXTRA, --command-extra COMMAND_EXTRA + String to append to every command (default: ) + -y COMMAND_EXTRA_OVERRIDE, --command-extra-override COMMAND_EXTRA_OVERRIDE + Same as command-extra, but overrides values in PEP + (default: ) + -u LUMP, --lump LUMP Total input file size (GB) to batch into one job + (default: None) + -n LUMP_N, --lump-n LUMP_N + Number of commands to batch into one job (default: + None) + --divvy DIVVY Path to divvy configuration file. Default=$DIVCFG env + variable. Currently: not set (default: None) + -f, --skip-file-checks + Do not perform input file checks (default: False) + -c COMPUTE [COMPUTE ...], --compute COMPUTE [COMPUTE ...] + List of key-value pairs (k1=v1) (default: []) + --package PACKAGE Name of computing resource package to use (default: + None) + --settings SETTINGS Path to a YAML settings file with compute settings + (default: ) + --exc-flag EXC_FLAG [EXC_FLAG ...] + Sample exclusion flag (default: []) + --sel-flag SEL_FLAG [SEL_FLAG ...] + Sample selection flag (default: []) + --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: + toggle) + --sel-incl SEL_INCL [SEL_INCL ...] + Include only samples with these values (default: []) + --sel-excl SEL_EXCL Exclude samples with these values (default: ) + -l LIMIT, --limit LIMIT + Limit to n samples (default: None) + -k SKIP, --skip SKIP Skip samples by numerical index (default: None) + --pep-config PEP_CONFIG + PEP configuration file (default: None) + -o OUTPUT_DIR, --output-dir OUTPUT_DIR + Output directory (default: None) + --config-file CONFIG_FILE + Project configuration file (default: None) + --looper-config LOOPER_CONFIG + Looper configuration file (YAML) (default: None) + -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] + Paths to looper sample pipeline interfaces (default: + []) + -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] + Paths to looper project pipeline interfaces (default: + []) + --pipestat PIPESTAT Path to pipestat files. (default: None) + --amend AMEND [AMEND ...] + List of amendments to activate (default: []) + --project Is this command executed for project-level? (default: + False) + +help: + -h, --help show this help message and exit ``` ## `looper rerun --help` ```console -usage: looper rerun [-h] [-i] [-d] [-t S] [-x S] [-y S] [-f] [--divvy DIVCFG] [-p P] - [-s S] [-c K [K ...]] [-u X] [-n N] [-j J] - [--looper-config LOOPER_CONFIG] [-S YAML [YAML ...]] - [-P YAML [YAML ...]] [-l N] [-k N] [--sel-attr ATTR] - [--sel-excl [E ...] | --sel-incl [I ...]] [--sel-flag [SELFLAG ...]] - [--exc-flag [EXCFLAG ...]] [-a A [A ...]] - [config_file] - -Resubmit sample jobs with failed flags. - -positional arguments: - config_file Project configuration file (YAML) or pephub registry - path. - -options: - -h, --help show this help message and exit - -i, --ignore-flags Ignore run status flags? Default=False - -d, --dry-run Don't actually submit the jobs. Default=False - -t S, --time-delay S Time delay in seconds between job submissions - -x S, --command-extra S String to append to every command - -y S, --command-extra-override S Same as command-extra, but overrides values in PEP - -f, --skip-file-checks Do not perform input file checks - -u X, --lump-s X Lump by size: total input file size (GB) to batch - into one job - -n N, --lump-n N Lump by number: number of samples to batch into one - job - -j J, --lump-j J Lump samples into number of jobs. - --looper-config LOOPER_CONFIG Looper configuration file (YAML) - -S YAML [YAML ...], --sample-pipeline-interfaces YAML [YAML ...] - Path to looper sample config file - -P YAML [YAML ...], --project-pipeline-interfaces YAML [YAML ...] - Path to looper project config file - -a A [A ...], --amend A [A ...] List of amendments to activate - -divvy arguments: - Configure divvy to change computing settings - - --divvy DIVCFG Path to divvy configuration file. Default=$DIVCFG env - variable. Currently: not set - -p P, --package P Name of computing resource package to use - -s S, --settings S Path to a YAML settings file with compute settings - -c K [K ...], --compute K [K ...] List of key-value pairs (k1=v1) - -sample selection arguments: - Specify samples to include or exclude based on sample attribute values - - -l N, --limit N Limit to n samples - -k N, --skip N Skip samples by numerical index - --sel-attr ATTR Attribute for sample exclusion OR inclusion - --sel-excl [E ...] Exclude samples with these values - --sel-incl [I ...] Include only samples with these values - --sel-flag [SELFLAG ...] Include samples with this flag status, e.g. completed - --exc-flag [EXCFLAG ...] Exclude samples with this flag status, e.g. completed +usage: looper rerun [-h] [-i] [-t TIME_DELAY] [-d] [-x COMMAND_EXTRA] + [-y COMMAND_EXTRA_OVERRIDE] [-u LUMP] [-n LUMP_N] + [-j LUMP_J] [--divvy DIVVY] [-f] + [-c COMPUTE [COMPUTE ...]] [--package PACKAGE] + [--settings SETTINGS] [--exc-flag EXC_FLAG [EXC_FLAG ...]] + [--sel-flag SEL_FLAG [SEL_FLAG ...]] [--sel-attr SEL_ATTR] + [--sel-incl SEL_INCL [SEL_INCL ...]] [--sel-excl SEL_EXCL] + [-l LIMIT] [-k SKIP] [--pep-config PEP_CONFIG] + [-o OUTPUT_DIR] [--config-file CONFIG_FILE] + [--looper-config LOOPER_CONFIG] + [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] + [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] + [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] + [--project] + +optional arguments: + -i, --ignore-flags Ignore run status flags (default: False) + -t TIME_DELAY, --time-delay TIME_DELAY + Time delay in seconds between job submissions (min: 0, + max: 30) (default: 0) + -d, --dry-run Don't actually submit jobs (default: False) + -x COMMAND_EXTRA, --command-extra COMMAND_EXTRA + String to append to every command (default: ) + -y COMMAND_EXTRA_OVERRIDE, --command-extra-override COMMAND_EXTRA_OVERRIDE + Same as command-extra, but overrides values in PEP + (default: ) + -u LUMP, --lump LUMP Total input file size (GB) to batch into one job + (default: None) + -n LUMP_N, --lump-n LUMP_N + Number of commands to batch into one job (default: + None) + -j LUMP_J, --lump-j LUMP_J + Lump samples into number of jobs. (default: None) + --divvy DIVVY Path to divvy configuration file. Default=$DIVCFG env + variable. Currently: not set (default: None) + -f, --skip-file-checks + Do not perform input file checks (default: False) + -c COMPUTE [COMPUTE ...], --compute COMPUTE [COMPUTE ...] + List of key-value pairs (k1=v1) (default: []) + --package PACKAGE Name of computing resource package to use (default: + None) + --settings SETTINGS Path to a YAML settings file with compute settings + (default: ) + --exc-flag EXC_FLAG [EXC_FLAG ...] + Sample exclusion flag (default: []) + --sel-flag SEL_FLAG [SEL_FLAG ...] + Sample selection flag (default: []) + --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: + toggle) + --sel-incl SEL_INCL [SEL_INCL ...] + Include only samples with these values (default: []) + --sel-excl SEL_EXCL Exclude samples with these values (default: ) + -l LIMIT, --limit LIMIT + Limit to n samples (default: None) + -k SKIP, --skip SKIP Skip samples by numerical index (default: None) + --pep-config PEP_CONFIG + PEP configuration file (default: None) + -o OUTPUT_DIR, --output-dir OUTPUT_DIR + Output directory (default: None) + --config-file CONFIG_FILE + Project configuration file (default: None) + --looper-config LOOPER_CONFIG + Looper configuration file (YAML) (default: None) + -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] + Paths to looper sample pipeline interfaces (default: + []) + -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] + Paths to looper project pipeline interfaces (default: + []) + --pipestat PIPESTAT Path to pipestat files. (default: None) + --amend AMEND [AMEND ...] + List of amendments to activate (default: []) + --project Is this command executed for project-level? (default: + False) + +help: + -h, --help show this help message and exit ``` ## `looper report --help` ```console -usage: looper report [-h] [--looper-config LOOPER_CONFIG] [-S YAML [YAML ...]] - [-P YAML [YAML ...]] [-l N] [-k N] [--sel-attr ATTR] - [--sel-excl [E ...] | --sel-incl [I ...]] [--sel-flag [SELFLAG ...]] - [--exc-flag [EXCFLAG ...]] [-a A [A ...]] [--project] [--portable] - [config_file] - -Create browsable HTML report of project results. - -positional arguments: - config_file Project configuration file (YAML) or pephub registry - path. - -options: - -h, --help show this help message and exit - --looper-config LOOPER_CONFIG Looper configuration file (YAML) - -S YAML [YAML ...], --sample-pipeline-interfaces YAML [YAML ...] - Path to looper sample config file - -P YAML [YAML ...], --project-pipeline-interfaces YAML [YAML ...] - Path to looper project config file - -a A [A ...], --amend A [A ...] List of amendments to activate - --project Process project-level pipelines - --portable Makes html report portable. - -sample selection arguments: - Specify samples to include or exclude based on sample attribute values - - -l N, --limit N Limit to n samples - -k N, --skip N Skip samples by numerical index - --sel-attr ATTR Attribute for sample exclusion OR inclusion - --sel-excl [E ...] Exclude samples with these values - --sel-incl [I ...] Include only samples with these values - --sel-flag [SELFLAG ...] Include samples with this flag status, e.g. completed - --exc-flag [EXCFLAG ...] Exclude samples with this flag status, e.g. completed +usage: looper report [-h] [--portable] [--settings SETTINGS] + [--exc-flag EXC_FLAG [EXC_FLAG ...]] + [--sel-flag SEL_FLAG [SEL_FLAG ...]] + [--sel-attr SEL_ATTR] + [--sel-incl SEL_INCL [SEL_INCL ...]] + [--sel-excl SEL_EXCL] [-l LIMIT] [-k SKIP] + [--pep-config PEP_CONFIG] [-o OUTPUT_DIR] + [--config-file CONFIG_FILE] + [--looper-config LOOPER_CONFIG] + [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] + [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] + [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] + [--project] + +optional arguments: + --portable Makes html report portable. (default: False) + --settings SETTINGS Path to a YAML settings file with compute settings + (default: ) + --exc-flag EXC_FLAG [EXC_FLAG ...] + Sample exclusion flag (default: []) + --sel-flag SEL_FLAG [SEL_FLAG ...] + Sample selection flag (default: []) + --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: + toggle) + --sel-incl SEL_INCL [SEL_INCL ...] + Include only samples with these values (default: []) + --sel-excl SEL_EXCL Exclude samples with these values (default: ) + -l LIMIT, --limit LIMIT + Limit to n samples (default: None) + -k SKIP, --skip SKIP Skip samples by numerical index (default: None) + --pep-config PEP_CONFIG + PEP configuration file (default: None) + -o OUTPUT_DIR, --output-dir OUTPUT_DIR + Output directory (default: None) + --config-file CONFIG_FILE + Project configuration file (default: None) + --looper-config LOOPER_CONFIG + Looper configuration file (YAML) (default: None) + -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] + Paths to looper sample pipeline interfaces (default: + []) + -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] + Paths to looper project pipeline interfaces (default: + []) + --pipestat PIPESTAT Path to pipestat files. (default: None) + --amend AMEND [AMEND ...] + List of amendments to activate (default: []) + --project Is this command executed for project-level? (default: + False) + +help: + -h, --help show this help message and exit ``` ## `looper table --help` ```console -usage: looper table [-h] [--looper-config LOOPER_CONFIG] [-S YAML [YAML ...]] - [-P YAML [YAML ...]] [-l N] [-k N] [--sel-attr ATTR] - [--sel-excl [E ...] | --sel-incl [I ...]] [--sel-flag [SELFLAG ...]] - [--exc-flag [EXCFLAG ...]] [-a A [A ...]] [--project] - [config_file] - -Write summary stats table for project samples. - -positional arguments: - config_file Project configuration file (YAML) or pephub registry - path. - -options: - -h, --help show this help message and exit - --looper-config LOOPER_CONFIG Looper configuration file (YAML) - -S YAML [YAML ...], --sample-pipeline-interfaces YAML [YAML ...] - Path to looper sample config file - -P YAML [YAML ...], --project-pipeline-interfaces YAML [YAML ...] - Path to looper project config file - -a A [A ...], --amend A [A ...] List of amendments to activate - --project Process project-level pipelines - -sample selection arguments: - Specify samples to include or exclude based on sample attribute values - - -l N, --limit N Limit to n samples - -k N, --skip N Skip samples by numerical index - --sel-attr ATTR Attribute for sample exclusion OR inclusion - --sel-excl [E ...] Exclude samples with these values - --sel-incl [I ...] Include only samples with these values - --sel-flag [SELFLAG ...] Include samples with this flag status, e.g. completed - --exc-flag [EXCFLAG ...] Exclude samples with this flag status, e.g. completed +usage: looper table [-h] [--settings SETTINGS] + [--exc-flag EXC_FLAG [EXC_FLAG ...]] + [--sel-flag SEL_FLAG [SEL_FLAG ...]] [--sel-attr SEL_ATTR] + [--sel-incl SEL_INCL [SEL_INCL ...]] [--sel-excl SEL_EXCL] + [-l LIMIT] [-k SKIP] [--pep-config PEP_CONFIG] + [-o OUTPUT_DIR] [--config-file CONFIG_FILE] + [--looper-config LOOPER_CONFIG] + [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] + [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] + [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] + [--project] + +optional arguments: + --settings SETTINGS Path to a YAML settings file with compute settings + (default: ) + --exc-flag EXC_FLAG [EXC_FLAG ...] + Sample exclusion flag (default: []) + --sel-flag SEL_FLAG [SEL_FLAG ...] + Sample selection flag (default: []) + --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: + toggle) + --sel-incl SEL_INCL [SEL_INCL ...] + Include only samples with these values (default: []) + --sel-excl SEL_EXCL Exclude samples with these values (default: ) + -l LIMIT, --limit LIMIT + Limit to n samples (default: None) + -k SKIP, --skip SKIP Skip samples by numerical index (default: None) + --pep-config PEP_CONFIG + PEP configuration file (default: None) + -o OUTPUT_DIR, --output-dir OUTPUT_DIR + Output directory (default: None) + --config-file CONFIG_FILE + Project configuration file (default: None) + --looper-config LOOPER_CONFIG + Looper configuration file (YAML) (default: None) + -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] + Paths to looper sample pipeline interfaces (default: + []) + -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] + Paths to looper project pipeline interfaces (default: + []) + --pipestat PIPESTAT Path to pipestat files. (default: None) + --amend AMEND [AMEND ...] + List of amendments to activate (default: []) + --project Is this command executed for project-level? (default: + False) + +help: + -h, --help show this help message and exit ``` ## `looper inspect --help` ```console -usage: looper inspect [-h] [--looper-config LOOPER_CONFIG] [-S YAML [YAML ...]] - [-P YAML [YAML ...]] [-l N] [-k N] [--sel-attr ATTR] - [--sel-excl [E ...] | --sel-incl [I ...]] [--sel-flag [SELFLAG ...]] - [--exc-flag [EXCFLAG ...]] [-a A [A ...]] - [--sample-names [SAMPLE_NAMES ...]] [--attr-limit ATTR_LIMIT] - [config_file] - -Print information about a project. - -positional arguments: - config_file Project configuration file (YAML) or pephub registry - path. - -options: - -h, --help show this help message and exit - --looper-config LOOPER_CONFIG Looper configuration file (YAML) - -S YAML [YAML ...], --sample-pipeline-interfaces YAML [YAML ...] - Path to looper sample config file - -P YAML [YAML ...], --project-pipeline-interfaces YAML [YAML ...] - Path to looper project config file - -a A [A ...], --amend A [A ...] List of amendments to activate - --sample-names [SAMPLE_NAMES ...] Names of the samples to inspect - --attr-limit ATTR_LIMIT Number of attributes to display - -sample selection arguments: - Specify samples to include or exclude based on sample attribute values - - -l N, --limit N Limit to n samples - -k N, --skip N Skip samples by numerical index - --sel-attr ATTR Attribute for sample exclusion OR inclusion - --sel-excl [E ...] Exclude samples with these values - --sel-incl [I ...] Include only samples with these values - --sel-flag [SELFLAG ...] Include samples with this flag status, e.g. completed - --exc-flag [EXCFLAG ...] Exclude samples with this flag status, e.g. completed +usage: looper inspect [-h] [--settings SETTINGS] + [--exc-flag EXC_FLAG [EXC_FLAG ...]] + [--sel-flag SEL_FLAG [SEL_FLAG ...]] + [--sel-attr SEL_ATTR] + [--sel-incl SEL_INCL [SEL_INCL ...]] + [--sel-excl SEL_EXCL] [-l LIMIT] [-k SKIP] + [--pep-config PEP_CONFIG] [-o OUTPUT_DIR] + [--config-file CONFIG_FILE] + [--looper-config LOOPER_CONFIG] + [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] + [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] + [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] + [--project] + +optional arguments: + --settings SETTINGS Path to a YAML settings file with compute settings + (default: ) + --exc-flag EXC_FLAG [EXC_FLAG ...] + Sample exclusion flag (default: []) + --sel-flag SEL_FLAG [SEL_FLAG ...] + Sample selection flag (default: []) + --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: + toggle) + --sel-incl SEL_INCL [SEL_INCL ...] + Include only samples with these values (default: []) + --sel-excl SEL_EXCL Exclude samples with these values (default: ) + -l LIMIT, --limit LIMIT + Limit to n samples (default: None) + -k SKIP, --skip SKIP Skip samples by numerical index (default: None) + --pep-config PEP_CONFIG + PEP configuration file (default: None) + -o OUTPUT_DIR, --output-dir OUTPUT_DIR + Output directory (default: None) + --config-file CONFIG_FILE + Project configuration file (default: None) + --looper-config LOOPER_CONFIG + Looper configuration file (YAML) (default: None) + -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] + Paths to looper sample pipeline interfaces (default: + []) + -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] + Paths to looper project pipeline interfaces (default: + []) + --pipestat PIPESTAT Path to pipestat files. (default: None) + --amend AMEND [AMEND ...] + List of amendments to activate (default: []) + --project Is this command executed for project-level? (default: + False) + +help: + -h, --help show this help message and exit ``` ## `looper init --help` ```console -usage: looper init [-h] [-f] [-o DIR] [-S YAML [YAML ...]] [-P YAML [YAML ...]] [-p] - pep_config - -Initialize looper config file. - -positional arguments: - pep_config Project configuration file (PEP) - -options: - -h, --help show this help message and exit - -f, --force Force overwrite - -o DIR, --output-dir DIR - -S YAML [YAML ...], --sample-pipeline-interfaces YAML [YAML ...] - Path to looper sample config file - -P YAML [YAML ...], --project-pipeline-interfaces YAML [YAML ...] - Path to looper project config file - -p, --piface Generates generic pipeline interface +usage: looper init [-h] [-f] [-o OUTPUT_DIR] [--pep-config PEP_CONFIG] + [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] + [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] + +optional arguments: + -f, --force-yes Provide upfront confirmation of destruction intent, to + skip console query. Default=False (default: False) + -o OUTPUT_DIR, --output-dir OUTPUT_DIR + Output directory (default: None) + --pep-config PEP_CONFIG + PEP configuration file (default: None) + -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] + Paths to looper sample pipeline interfaces (default: + []) + -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] + Paths to looper project pipeline interfaces (default: + []) + +help: + -h, --help show this help message and exit ``` ## `looper destroy --help` ```console -usage: looper destroy [-h] [-d] [--force-yes] [--looper-config LOOPER_CONFIG] - [-S YAML [YAML ...]] [-P YAML [YAML ...]] [-l N] [-k N] - [--sel-attr ATTR] [--sel-excl [E ...] | --sel-incl [I ...]] - [--sel-flag [SELFLAG ...]] [--exc-flag [EXCFLAG ...]] [-a A [A ...]] +usage: looper destroy [-h] [-d] [-f] [--settings SETTINGS] + [--exc-flag EXC_FLAG [EXC_FLAG ...]] + [--sel-flag SEL_FLAG [SEL_FLAG ...]] + [--sel-attr SEL_ATTR] + [--sel-incl SEL_INCL [SEL_INCL ...]] + [--sel-excl SEL_EXCL] [-l LIMIT] [-k SKIP] + [--pep-config PEP_CONFIG] [-o OUTPUT_DIR] + [--config-file CONFIG_FILE] + [--looper-config LOOPER_CONFIG] + [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] + [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] + [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] [--project] - [config_file] - -Remove output files of the project. - -positional arguments: - config_file Project configuration file (YAML) or pephub registry - path. - -options: - -h, --help show this help message and exit - -d, --dry-run Don't actually submit the jobs. Default=False - --force-yes Provide upfront confirmation of destruction intent, - to skip console query. Default=False - --looper-config LOOPER_CONFIG Looper configuration file (YAML) - -S YAML [YAML ...], --sample-pipeline-interfaces YAML [YAML ...] - Path to looper sample config file - -P YAML [YAML ...], --project-pipeline-interfaces YAML [YAML ...] - Path to looper project config file - -a A [A ...], --amend A [A ...] List of amendments to activate - --project Process project-level pipelines - -sample selection arguments: - Specify samples to include or exclude based on sample attribute values - - -l N, --limit N Limit to n samples - -k N, --skip N Skip samples by numerical index - --sel-attr ATTR Attribute for sample exclusion OR inclusion - --sel-excl [E ...] Exclude samples with these values - --sel-incl [I ...] Include only samples with these values - --sel-flag [SELFLAG ...] Include samples with this flag status, e.g. completed - --exc-flag [EXCFLAG ...] Exclude samples with this flag status, e.g. completed + +optional arguments: + -d, --dry-run Don't actually submit jobs (default: False) + -f, --force-yes Provide upfront confirmation of destruction intent, to + skip console query. Default=False (default: False) + --settings SETTINGS Path to a YAML settings file with compute settings + (default: ) + --exc-flag EXC_FLAG [EXC_FLAG ...] + Sample exclusion flag (default: []) + --sel-flag SEL_FLAG [SEL_FLAG ...] + Sample selection flag (default: []) + --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: + toggle) + --sel-incl SEL_INCL [SEL_INCL ...] + Include only samples with these values (default: []) + --sel-excl SEL_EXCL Exclude samples with these values (default: ) + -l LIMIT, --limit LIMIT + Limit to n samples (default: None) + -k SKIP, --skip SKIP Skip samples by numerical index (default: None) + --pep-config PEP_CONFIG + PEP configuration file (default: None) + -o OUTPUT_DIR, --output-dir OUTPUT_DIR + Output directory (default: None) + --config-file CONFIG_FILE + Project configuration file (default: None) + --looper-config LOOPER_CONFIG + Looper configuration file (YAML) (default: None) + -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] + Paths to looper sample pipeline interfaces (default: + []) + -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] + Paths to looper project pipeline interfaces (default: + []) + --pipestat PIPESTAT Path to pipestat files. (default: None) + --amend AMEND [AMEND ...] + List of amendments to activate (default: []) + --project Is this command executed for project-level? (default: + False) + +help: + -h, --help show this help message and exit ``` ## `looper check --help` ```console -usage: looper check [-h] [--describe-codes] [--itemized] [-f [F ...]] - [--looper-config LOOPER_CONFIG] [-S YAML [YAML ...]] - [-P YAML [YAML ...]] [-l N] [-k N] [--sel-attr ATTR] - [--sel-excl [E ...] | --sel-incl [I ...]] [--sel-flag [SELFLAG ...]] - [--exc-flag [EXCFLAG ...]] [-a A [A ...]] [--project] - [config_file] - -Check flag status of current runs. - -positional arguments: - config_file Project configuration file (YAML) or pephub registry - path. - -options: - -h, --help show this help message and exit - --describe-codes Show status codes description - --itemized Show a detailed, by sample statuses - -f [F ...], --flags [F ...] Check on only these flags/status values - --looper-config LOOPER_CONFIG Looper configuration file (YAML) - -S YAML [YAML ...], --sample-pipeline-interfaces YAML [YAML ...] - Path to looper sample config file - -P YAML [YAML ...], --project-pipeline-interfaces YAML [YAML ...] - Path to looper project config file - -a A [A ...], --amend A [A ...] List of amendments to activate - --project Process project-level pipelines - -sample selection arguments: - Specify samples to include or exclude based on sample attribute values - - -l N, --limit N Limit to n samples - -k N, --skip N Skip samples by numerical index - --sel-attr ATTR Attribute for sample exclusion OR inclusion - --sel-excl [E ...] Exclude samples with these values - --sel-incl [I ...] Include only samples with these values - --sel-flag [SELFLAG ...] Include samples with this flag status, e.g. completed - --exc-flag [EXCFLAG ...] Exclude samples with this flag status, e.g. completed +usage: looper check [-h] [--describe-codes] [--itemized] + [-f FLAGS [FLAGS ...]] [--settings SETTINGS] + [--exc-flag EXC_FLAG [EXC_FLAG ...]] + [--sel-flag SEL_FLAG [SEL_FLAG ...]] [--sel-attr SEL_ATTR] + [--sel-incl SEL_INCL [SEL_INCL ...]] [--sel-excl SEL_EXCL] + [-l LIMIT] [-k SKIP] [--pep-config PEP_CONFIG] + [-o OUTPUT_DIR] [--config-file CONFIG_FILE] + [--looper-config LOOPER_CONFIG] + [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] + [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] + [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] + [--project] + +optional arguments: + --describe-codes Show status codes description. Default=False (default: + False) + --itemized Show detailed overview of sample statuses. + Default=False (default: False) + -f FLAGS [FLAGS ...], --flags FLAGS [FLAGS ...] + Only check samples based on these status flags. + (default: []) + --settings SETTINGS Path to a YAML settings file with compute settings + (default: ) + --exc-flag EXC_FLAG [EXC_FLAG ...] + Sample exclusion flag (default: []) + --sel-flag SEL_FLAG [SEL_FLAG ...] + Sample selection flag (default: []) + --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: + toggle) + --sel-incl SEL_INCL [SEL_INCL ...] + Include only samples with these values (default: []) + --sel-excl SEL_EXCL Exclude samples with these values (default: ) + -l LIMIT, --limit LIMIT + Limit to n samples (default: None) + -k SKIP, --skip SKIP Skip samples by numerical index (default: None) + --pep-config PEP_CONFIG + PEP configuration file (default: None) + -o OUTPUT_DIR, --output-dir OUTPUT_DIR + Output directory (default: None) + --config-file CONFIG_FILE + Project configuration file (default: None) + --looper-config LOOPER_CONFIG + Looper configuration file (YAML) (default: None) + -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] + Paths to looper sample pipeline interfaces (default: + []) + -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] + Paths to looper project pipeline interfaces (default: + []) + --pipestat PIPESTAT Path to pipestat files. (default: None) + --amend AMEND [AMEND ...] + List of amendments to activate (default: []) + --project Is this command executed for project-level? (default: + False) + +help: + -h, --help show this help message and exit ``` ## `looper clean --help` ```console -usage: looper clean [-h] [-d] [--force-yes] [--looper-config LOOPER_CONFIG] - [-S YAML [YAML ...]] [-P YAML [YAML ...]] [-l N] [-k N] - [--sel-attr ATTR] [--sel-excl [E ...] | --sel-incl [I ...]] - [--sel-flag [SELFLAG ...]] [--exc-flag [EXCFLAG ...]] [-a A [A ...]] - [config_file] - -Run clean scripts of already processed jobs. - -positional arguments: - config_file Project configuration file (YAML) or pephub registry - path. - -options: - -h, --help show this help message and exit - -d, --dry-run Don't actually submit the jobs. Default=False - --force-yes Provide upfront confirmation of destruction intent, - to skip console query. Default=False - --looper-config LOOPER_CONFIG Looper configuration file (YAML) - -S YAML [YAML ...], --sample-pipeline-interfaces YAML [YAML ...] - Path to looper sample config file - -P YAML [YAML ...], --project-pipeline-interfaces YAML [YAML ...] - Path to looper project config file - -a A [A ...], --amend A [A ...] List of amendments to activate - -sample selection arguments: - Specify samples to include or exclude based on sample attribute values - - -l N, --limit N Limit to n samples - -k N, --skip N Skip samples by numerical index - --sel-attr ATTR Attribute for sample exclusion OR inclusion - --sel-excl [E ...] Exclude samples with these values - --sel-incl [I ...] Include only samples with these values - --sel-flag [SELFLAG ...] Include samples with this flag status, e.g. completed - --exc-flag [EXCFLAG ...] Exclude samples with this flag status, e.g. completed +usage: looper clean [-h] [-d] [-f] [--settings SETTINGS] + [--exc-flag EXC_FLAG [EXC_FLAG ...]] + [--sel-flag SEL_FLAG [SEL_FLAG ...]] [--sel-attr SEL_ATTR] + [--sel-incl SEL_INCL [SEL_INCL ...]] [--sel-excl SEL_EXCL] + [-l LIMIT] [-k SKIP] [--pep-config PEP_CONFIG] + [-o OUTPUT_DIR] [--config-file CONFIG_FILE] + [--looper-config LOOPER_CONFIG] + [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] + [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] + [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] + [--project] + +optional arguments: + -d, --dry-run Don't actually submit jobs (default: False) + -f, --force-yes Provide upfront confirmation of destruction intent, to + skip console query. Default=False (default: False) + --settings SETTINGS Path to a YAML settings file with compute settings + (default: ) + --exc-flag EXC_FLAG [EXC_FLAG ...] + Sample exclusion flag (default: []) + --sel-flag SEL_FLAG [SEL_FLAG ...] + Sample selection flag (default: []) + --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: + toggle) + --sel-incl SEL_INCL [SEL_INCL ...] + Include only samples with these values (default: []) + --sel-excl SEL_EXCL Exclude samples with these values (default: ) + -l LIMIT, --limit LIMIT + Limit to n samples (default: None) + -k SKIP, --skip SKIP Skip samples by numerical index (default: None) + --pep-config PEP_CONFIG + PEP configuration file (default: None) + -o OUTPUT_DIR, --output-dir OUTPUT_DIR + Output directory (default: None) + --config-file CONFIG_FILE + Project configuration file (default: None) + --looper-config LOOPER_CONFIG + Looper configuration file (YAML) (default: None) + -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] + Paths to looper sample pipeline interfaces (default: + []) + -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] + Paths to looper project pipeline interfaces (default: + []) + --pipestat PIPESTAT Path to pipestat files. (default: None) + --amend AMEND [AMEND ...] + List of amendments to activate (default: []) + --project Is this command executed for project-level? (default: + False) + +help: + -h, --help show this help message and exit ``` diff --git a/looper/__main__.py b/looper/__main__.py index 5ec266e80..3e9816554 100644 --- a/looper/__main__.py +++ b/looper/__main__.py @@ -1,6 +1,6 @@ import sys -from .cli_looper import main +from .cli_pydantic import main from .cli_divvy import main as divvy_main if __name__ == "__main__": diff --git a/looper/_version.py b/looper/_version.py index 3c1e9cbd3..2ce1f6586 100644 --- a/looper/_version.py +++ b/looper/_version.py @@ -1 +1,2 @@ -__version__ = "1.7.1" +__version__ = "1.8.1" +# You must change the version in parser = pydantic2_argparse.ArgumentParser in cli_pydantic.py!!! diff --git a/looper/cli_looper.py b/looper/cli_looper.py deleted file mode 100644 index fd620a1ec..000000000 --- a/looper/cli_looper.py +++ /dev/null @@ -1,796 +0,0 @@ -import argparse -import logmuse -import os -import sys -import yaml - -from eido import inspect_project -from pephubclient import PEPHubClient -from typing import Tuple, List -from ubiquerg import VersionInHelpParser - -from . import __version__ -from .const import * -from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config -from .exceptions import * -from .looper import * -from .parser_types import * -from .project import Project, ProjectContext -from .utils import ( - dotfile_path, - enrich_args_via_cfg, - is_registry_path, - read_looper_dotfile, - read_looper_config_file, - read_yaml_file, - initiate_looper_config, - init_generic_pipeline, -) - - -class _StoreBoolActionType(argparse.Action): - """ - Enables the storage of a boolean const and custom type definition needed - for systematic html interface generation. To get the _StoreTrueAction - output use default=False in the add_argument function - and default=True to get _StoreFalseAction output. - """ - - def __init__(self, option_strings, dest, type, default, required=False, help=None): - super(_StoreBoolActionType, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - const=not default, - default=default, - type=type, - required=required, - help=help, - ) - - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, self.const) - - -def build_parser(): - """ - Building argument parser. - - :return argparse.ArgumentParser - """ - # Main looper program help text messages - banner = "%(prog)s - A project job submission engine and project manager." - additional_description = ( - "For subcommand-specific options, " "type: '%(prog)s -h'" - ) - additional_description += "\nhttps://github.com/pepkit/looper" - - parser = VersionInHelpParser( - prog="looper", - description=banner, - epilog=additional_description, - version=__version__, - ) - - aux_parser = VersionInHelpParser( - prog="looper", - description=banner, - epilog=additional_description, - version=__version__, - ) - result = [] - for parser in [parser, aux_parser]: - # Logging control - parser.add_argument( - "--logfile", - help="Optional output file for looper logs " "(default: %(default)s)", - ) - parser.add_argument("--logging-level", help=argparse.SUPPRESS) - parser.add_argument( - "--dbg", - action="store_true", - help="Turn on debug mode (default: %(default)s)", - ) - - parser = logmuse.add_logging_options(parser) - subparsers = parser.add_subparsers(dest="command") - - def add_subparser(cmd): - message = MESSAGE_BY_SUBCOMMAND[cmd] - return subparsers.add_parser( - cmd, - description=message, - help=message, - formatter_class=lambda prog: argparse.HelpFormatter( - prog, max_help_position=37, width=90 - ), - ) - - # Run and rerun command - run_subparser = add_subparser("run") - rerun_subparser = add_subparser("rerun") - collate_subparser = add_subparser("runp") - table_subparser = add_subparser("table") - report_subparser = add_subparser("report") - destroy_subparser = add_subparser("destroy") - check_subparser = add_subparser("check") - clean_subparser = add_subparser("clean") - inspect_subparser = add_subparser("inspect") - init_subparser = add_subparser("init") - init_piface = add_subparser("init-piface") - link_subparser = add_subparser("link") - - # Flag arguments - #################################################################### - for subparser in [run_subparser, rerun_subparser, collate_subparser]: - subparser.add_argument( - "-i", - "--ignore-flags", - default=False, - action=_StoreBoolActionType, - type=html_checkbox(checked=False), - help="Ignore run status flags? Default=False", - ) - - for subparser in [ - run_subparser, - rerun_subparser, - destroy_subparser, - clean_subparser, - collate_subparser, - ]: - subparser.add_argument( - "-d", - "--dry-run", - action=_StoreBoolActionType, - default=False, - type=html_checkbox(checked=False), - help="Don't actually submit the jobs. Default=False", - ) - - # Parameter arguments - #################################################################### - for subparser in [run_subparser, rerun_subparser, collate_subparser]: - subparser.add_argument( - "-t", - "--time-delay", - metavar="S", - type=html_range(min_val=0, max_val=30, value=0), - default=0, - help="Time delay in seconds between job submissions", - ) - - subparser.add_argument( - "-x", - "--command-extra", - default="", - metavar="S", - help="String to append to every command", - ) - subparser.add_argument( - "-y", - "--command-extra-override", - metavar="S", - default="", - help="Same as command-extra, but overrides values in PEP", - ) - subparser.add_argument( - "-f", - "--skip-file-checks", - action=_StoreBoolActionType, - default=False, - type=html_checkbox(checked=False), - help="Do not perform input file checks", - ) - - divvy_group = subparser.add_argument_group( - "divvy arguments", "Configure divvy to change computing settings" - ) - divvy_group.add_argument( - "--divvy", - default=None, - metavar="DIVCFG", - help="Path to divvy configuration file. Default=$DIVCFG env " - "variable. Currently: {}".format( - os.getenv("DIVCFG", None) or "not set" - ), - ) - divvy_group.add_argument( - "-p", - "--package", - metavar="P", - help="Name of computing resource package to use", - ) - divvy_group.add_argument( - "-s", - "--settings", - default="", - metavar="S", - help="Path to a YAML settings file with compute settings", - ) - divvy_group.add_argument( - "-c", - "--compute", - metavar="K", - nargs="+", - help="List of key-value pairs (k1=v1)", - ) - - for subparser in [run_subparser, rerun_subparser]: - subparser.add_argument( - "-u", - "--lump-s", - default=None, - metavar="X", - type=html_range(min_val=0, max_val=100, step=0.1, value=0), - help="Lump by size: total input file size (GB) to batch into one job", - ) - subparser.add_argument( - "-n", - "--lump-n", - default=None, - metavar="N", - type=html_range(min_val=1, max_val="num_samples", value=1), - help="Lump by number: number of samples to batch into one job", - ) - subparser.add_argument( - "-j", - "--lump-j", - default=None, - metavar="J", - type=int, - help="Lump samples into number of jobs.", - ) - - check_subparser.add_argument( - "--describe-codes", - help="Show status codes description", - action="store_true", - default=False, - ) - - check_subparser.add_argument( - "--itemized", - help="Show a detailed, by sample statuses", - action="store_true", - default=False, - ) - - check_subparser.add_argument( - "-f", - "--flags", - nargs="*", - default=FLAGS, - type=html_select(choices=FLAGS), - metavar="F", - help="Check on only these flags/status values", - ) - - for subparser in [destroy_subparser, clean_subparser]: - subparser.add_argument( - "--force-yes", - action=_StoreBoolActionType, - default=False, - type=html_checkbox(checked=False), - help="Provide upfront confirmation of destruction intent, " - "to skip console query. Default=False", - ) - - init_subparser.add_argument( - "pep_config", help="Project configuration file (PEP)" - ) - - init_subparser.add_argument( - "-f", "--force", help="Force overwrite", action="store_true", default=False - ) - - init_subparser.add_argument( - "-o", - "--output-dir", - dest="output_dir", - metavar="DIR", - default=None, - type=str, - ) - - init_subparser.add_argument( - "-S", - "--sample-pipeline-interfaces", - dest=SAMPLE_PL_ARG, - metavar="YAML", - default=None, - nargs="+", - type=str, - help="Path to looper sample config file", - ) - init_subparser.add_argument( - "-P", - "--project-pipeline-interfaces", - dest=PROJECT_PL_ARG, - metavar="YAML", - default=None, - nargs="+", - type=str, - help="Path to looper project config file", - ) - - # TODO: add ouput dir, sample, project pifaces - - init_subparser.add_argument( - "-p", - "--piface", - help="Generates generic pipeline interface", - action="store_true", - default=False, - ) - - # Common arguments - for subparser in [ - run_subparser, - rerun_subparser, - table_subparser, - report_subparser, - destroy_subparser, - check_subparser, - clean_subparser, - collate_subparser, - inspect_subparser, - link_subparser, - ]: - subparser.add_argument( - "config_file", - nargs="?", - default=None, - help="Project configuration file (YAML) or pephub registry path.", - ) - subparser.add_argument( - "--looper-config", - required=False, - default=None, - type=str, - help="Looper configuration file (YAML)", - ) - # help="Path to the looper config file" - subparser.add_argument( - "-S", - "--sample-pipeline-interfaces", - dest=SAMPLE_PL_ARG, - metavar="YAML", - default=None, - nargs="+", - type=str, - help="Path to looper sample config file", - ) - subparser.add_argument( - "-P", - "--project-pipeline-interfaces", - dest=PROJECT_PL_ARG, - metavar="YAML", - default=None, - nargs="+", - type=str, - help="Path to looper project config file", - ) - # help="Path to the output directory" - subparser.add_argument( - "-o", - "--output-dir", - dest="output_dir", - metavar="DIR", - default=None, - type=str, - help=argparse.SUPPRESS, - ) - # "Submission subdirectory name" - subparser.add_argument( - "--submission-subdir", metavar="DIR", help=argparse.SUPPRESS - ) - # "Results subdirectory name" - subparser.add_argument( - "--results-subdir", metavar="DIR", help=argparse.SUPPRESS - ) - # "Sample attribute for pipeline interface sources" - subparser.add_argument( - "--pipeline-interfaces-key", metavar="K", help=argparse.SUPPRESS - ) - # "Paths to pipeline interface files" - subparser.add_argument( - "--pipeline-interfaces", - metavar="P", - nargs="+", - action="append", - help=argparse.SUPPRESS, - ) - - for subparser in [ - run_subparser, - rerun_subparser, - table_subparser, - report_subparser, - destroy_subparser, - check_subparser, - clean_subparser, - collate_subparser, - inspect_subparser, - link_subparser, - ]: - fetch_samples_group = subparser.add_argument_group( - "sample selection arguments", - "Specify samples to include or exclude based on sample attribute values", - ) - fetch_samples_group.add_argument( - "-l", - "--limit", - default=None, - metavar="N", - type=html_range(min_val=1, max_val="num_samples", value="num_samples"), - help="Limit to n samples", - ) - fetch_samples_group.add_argument( - "-k", - "--skip", - default=None, - metavar="N", - type=html_range(min_val=1, max_val="num_samples", value="num_samples"), - help="Skip samples by numerical index", - ) - - fetch_samples_group.add_argument( - f"--{SAMPLE_SELECTION_ATTRIBUTE_OPTNAME}", - default="toggle", - metavar="ATTR", - help="Attribute for sample exclusion OR inclusion", - ) - - protocols = fetch_samples_group.add_mutually_exclusive_group() - protocols.add_argument( - f"--{SAMPLE_EXCLUSION_OPTNAME}", - nargs="*", - metavar="E", - help="Exclude samples with these values", - ) - protocols.add_argument( - f"--{SAMPLE_INCLUSION_OPTNAME}", - nargs="*", - metavar="I", - help="Include only samples with these values", - ) - fetch_samples_group.add_argument( - f"--{SAMPLE_SELECTION_FLAG_OPTNAME}", - default=None, - nargs="*", - metavar="SELFLAG", - help="Include samples with this flag status, e.g. completed", - ) - - fetch_samples_group.add_argument( - f"--{SAMPLE_EXCLUSION_FLAG_OPTNAME}", - default=None, - nargs="*", - metavar="EXCFLAG", - help="Exclude samples with this flag status, e.g. completed", - ) - - subparser.add_argument( - "-a", - "--amend", - nargs="+", - metavar="A", - help="List of amendments to activate", - ) - for subparser in [ - report_subparser, - table_subparser, - check_subparser, - destroy_subparser, - link_subparser, - ]: - subparser.add_argument( - "--project", - help="Process project-level pipelines", - action="store_true", - default=False, - ) - inspect_subparser.add_argument( - "--sample-names", - help="Names of the samples to inspect", - nargs="*", - default=None, - ) - - inspect_subparser.add_argument( - "--attr-limit", - help="Number of attributes to display", - type=int, - ) - parser.add_argument( - "--commands", - action="version", - version="{}".format(" ".join(subparsers.choices.keys())), - ) - - report_subparser.add_argument( - "--portable", - help="Makes html report portable.", - action="store_true", - ) - - result.append(parser) - return result - - -def opt_attr_pair(name: str) -> Tuple[str, str]: - return f"--{name}", name.replace("-", "_") - - -def validate_post_parse(args: argparse.Namespace) -> List[str]: - problems = [] - used_exclusives = [ - opt - for opt, attr in map( - opt_attr_pair, - [ - "skip", - "limit", - SAMPLE_EXCLUSION_OPTNAME, - SAMPLE_INCLUSION_OPTNAME, - ], - ) - if getattr(args, attr, None) - ] - if len(used_exclusives) > 1: - problems.append( - f"Used multiple mutually exclusive options: {', '.join(used_exclusives)}" - ) - return problems - - -def _proc_resources_spec(args): - """ - Process CLI-sources compute setting specification. There are two sources - of compute settings in the CLI alone: - * YAML file (--settings argument) - * itemized compute settings (--compute argument) - - The itemized compute specification is given priority - - :param argparse.Namespace: arguments namespace - :return Mapping[str, str]: binding between resource setting name and value - :raise ValueError: if interpretation of the given specification as encoding - of key-value pairs fails - """ - spec = getattr(args, "compute", None) - try: - settings_data = read_yaml_file(args.settings) or {} - except yaml.YAMLError: - _LOGGER.warning( - "Settings file ({}) does not follow YAML format," - " disregarding".format(args.settings) - ) - settings_data = {} - if not spec: - return settings_data - pairs = [(kv, kv.split("=")) for kv in spec] - bads = [] - for orig, pair in pairs: - try: - k, v = pair - except ValueError: - bads.append(orig) - else: - settings_data[k] = v - if bads: - raise ValueError( - "Could not correctly parse itemized compute specification. " - "Correct format: " + EXAMPLE_COMPUTE_SPEC_FMT - ) - return settings_data - - -def main(test_args=None): - """Primary workflow""" - global _LOGGER - - parser, aux_parser = build_parser() - aux_parser.suppress_defaults() - - if test_args: - args, remaining_args = parser.parse_known_args(args=test_args) - else: - args, remaining_args = parser.parse_known_args() - - cli_use_errors = validate_post_parse(args) - if cli_use_errors: - parser.print_help(sys.stderr) - parser.error( - f"{len(cli_use_errors)} CLI use problem(s): {', '.join(cli_use_errors)}" - ) - if args.command is None: - parser.print_help(sys.stderr) - sys.exit(1) - - if args.command == "init": - return int( - not initiate_looper_config( - dotfile_path(), - args.pep_config, - args.output_dir, - args.sample_pipeline_interfaces, - args.project_pipeline_interfaces, - args.force, - ) - ) - - if args.command == "init-piface": - sys.exit(int(not init_generic_pipeline())) - - _LOGGER = logmuse.logger_via_cli(args, make_root=True) - _LOGGER.info("Looper version: {}\nCommand: {}".format(__version__, args.command)) - - if "config_file" in vars(args): - if args.config_file is None: - looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) - try: - if args.looper_config: - looper_config_dict = read_looper_config_file(args.looper_config) - else: - looper_config_dict = read_looper_dotfile() - _LOGGER.info(f"Using looper config ({looper_cfg_path}).") - - for looper_config_key, looper_config_item in looper_config_dict.items(): - setattr(args, looper_config_key, looper_config_item) - - except OSError: - parser.print_help(sys.stderr) - _LOGGER.warning( - f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." - ) - sys.exit(1) - else: - _LOGGER.warning( - "This PEP configures looper through the project config. This approach is deprecated and will " - "be removed in future versions. Please use a looper config file. For more information see " - "looper.databio.org/en/latest/looper-config" - ) - - args = enrich_args_via_cfg(args, aux_parser, test_args) - - # If project pipeline interface defined in the cli, change name to: "pipeline_interface" - if vars(args)[PROJECT_PL_ARG]: - args.pipeline_interfaces = vars(args)[PROJECT_PL_ARG] - - if len(remaining_args) > 0: - _LOGGER.warning( - "Unrecognized arguments: {}".format( - " ".join([str(x) for x in remaining_args]) - ) - ) - - divcfg = ( - select_divvy_config(filepath=args.divvy) if hasattr(args, "divvy") else None - ) - - # Ignore flags if user is selecting or excluding on flags: - if args.sel_flag or args.exc_flag: - args.ignore_flags = True - - # Initialize project - if is_registry_path(args.config_file): - if vars(args)[SAMPLE_PL_ARG]: - p = Project( - amendments=args.amend, - divcfg_path=divcfg, - runp=args.command == "runp", - project_dict=PEPHubClient()._load_raw_pep( - registry_path=args.config_file - ), - **{ - attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args - }, - ) - else: - raise MisconfigurationException( - f"`sample_pipeline_interface` is missing. Provide it in the parameters." - ) - else: - try: - p = Project( - cfg=args.config_file, - amendments=args.amend, - divcfg_path=divcfg, - runp=args.command == "runp", - **{ - attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args - }, - ) - except yaml.parser.ParserError as e: - _LOGGER.error(f"Project config parse failed -- {e}") - sys.exit(1) - - selected_compute_pkg = p.selected_compute_package or DEFAULT_COMPUTE_RESOURCES_NAME - if p.dcc is not None and not p.dcc.activate_package(selected_compute_pkg): - _LOGGER.info( - "Failed to activate '{}' computing package. " - "Using the default one".format(selected_compute_pkg) - ) - - with ProjectContext( - prj=p, - selector_attribute=args.sel_attr, - selector_include=args.sel_incl, - selector_exclude=args.sel_excl, - selector_flag=args.sel_flag, - exclusion_flag=args.exc_flag, - ) as prj: - if args.command in ["run", "rerun"]: - run = Runner(prj) - try: - compute_kwargs = _proc_resources_spec(args) - return run(args, rerun=(args.command == "rerun"), **compute_kwargs) - except SampleFailedException: - sys.exit(1) - except IOError: - _LOGGER.error( - "{} pipeline_interfaces: '{}'".format( - prj.__class__.__name__, prj.pipeline_interface_sources - ) - ) - raise - - if args.command == "runp": - compute_kwargs = _proc_resources_spec(args) - collate = Collator(prj) - collate(args, **compute_kwargs) - return collate.debug - - if args.command == "destroy": - return Destroyer(prj)(args) - - # pipestat support introduces breaking changes and pipelines run - # with no pipestat reporting would not be compatible with - # commands: table, report and check. Therefore we plan maintain - # the old implementations for a couple of releases. - # if hasattr(args, "project"): - # use_pipestat = ( - # prj.pipestat_configured_project - # if args.project - # else prj.pipestat_configured - # ) - use_pipestat = ( - prj.pipestat_configured_project if args.project else prj.pipestat_configured - ) - if args.command == "table": - if use_pipestat: - Tabulator(prj)(args) - else: - raise PipestatConfigurationException("table") - - if args.command == "report": - if use_pipestat: - Reporter(prj)(args) - else: - raise PipestatConfigurationException("report") - - if args.command == "link": - if use_pipestat: - Linker(prj)(args) - else: - raise PipestatConfigurationException("link") - - if args.command == "check": - if use_pipestat: - return Checker(prj)(args) - else: - raise PipestatConfigurationException("check") - - if args.command == "clean": - return Cleaner(prj)(args) - - if args.command == "inspect": - inspect_project(p, args.sample_names, args.attr_limit) - from warnings import warn - - warn( - "The inspect feature has moved to eido and will be removed in the future release of looper. " - "Use `eido inspect` from now on.", - ) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py new file mode 100644 index 000000000..035a80434 --- /dev/null +++ b/looper/cli_pydantic.py @@ -0,0 +1,390 @@ +""" +CLI script using `pydantic-argparse` for parsing of arguments + +Arguments / commands are defined in `command_models/` and are given, eventually, as +`pydantic` models, allowing for type-checking and validation of arguments. + +Note: this is only a test script so far, and coexists next to the current CLI +(`cli_looper.py`), which uses `argparse` directly. The goal is to eventually +replace the current CLI with a CLI based on above-mentioned `pydantic` models, +but whether this will happen with `pydantic-argparse` or another, possibly self- +written library is not yet clear. +It is well possible that this script will be removed again. +""" + +# Note: The following import is used for forward annotations (Python 3.8) +# to prevent potential 'TypeError' related to the use of the '|' operator +# with types. +from __future__ import annotations + +import os +import sys + +import logmuse +import pydantic2_argparse +import yaml +from eido import inspect_project +from pephubclient import PEPHubClient +from pydantic2_argparse.argparse.parser import ArgumentParser + +from divvy import select_divvy_config + +from .const import PipelineLevel +from . import __version__ + +from .command_models.arguments import ArgumentEnum + +from .command_models.commands import ( + SUPPORTED_COMMANDS, + TopLevelParser, + add_short_arguments, +) +from .const import * +from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config +from .exceptions import * +from .looper import * +from .parser_types import * +from .project import Project, ProjectContext +from .utils import ( + dotfile_path, + enrich_args_via_cfg, + is_pephub_registry_path, + read_looper_config_file, + read_looper_dotfile, + initiate_looper_config, + init_generic_pipeline, + read_yaml_file, + inspect_looper_config_file, + is_PEP_file_type, +) + +from typing import List, Tuple + + +def opt_attr_pair(name: str) -> Tuple[str, str]: + """Takes argument as attribute and returns as tuple of top-level or subcommand used.""" + return f"--{name}", name.replace("-", "_") + + +def validate_post_parse(args: argparse.Namespace) -> List[str]: + """Checks if user is attempting to use mutually exclusive options.""" + problems = [] + used_exclusives = [ + opt + for opt, attr in map( + opt_attr_pair, + [ + "skip", + "limit", + SAMPLE_EXCLUSION_OPTNAME, + SAMPLE_INCLUSION_OPTNAME, + ], + ) + # Depending on the subcommand used, the above options might either be in + # the top-level namespace or in the subcommand namespace (the latter due + # to a `modify_args_namespace()`) + if getattr( + args, attr, None + ) # or (getattr(args.run, attr, None) if hasattr(args, "run") else False) + ] + if len(used_exclusives) > 1: + problems.append( + f"Used multiple mutually exclusive options: {', '.join(used_exclusives)}" + ) + return problems + + +# TODO rename to run_looper_via_cli for running lloper as a python library: +# https://github.com/pepkit/looper/pull/472#discussion_r1521970763 +def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): + # here comes adapted `cli_looper.py` code + global _LOGGER + + _LOGGER = logmuse.logger_via_cli(args, make_root=True) + + # Find out which subcommand was used + supported_command_names = [cmd.name for cmd in SUPPORTED_COMMANDS] + subcommand_valued_args = [ + (arg, value) + for arg, value in vars(args).items() + if arg and arg in supported_command_names and value is not None + ] + # Only one subcommand argument will be not `None`, else we found a bug in `pydantic-argparse` + [(subcommand_name, subcommand_args)] = subcommand_valued_args + + cli_use_errors = validate_post_parse(subcommand_args) + if cli_use_errors: + parser.print_help(sys.stderr) + parser.error( + f"{len(cli_use_errors)} CLI use problem(s): {', '.join(cli_use_errors)}" + ) + + if subcommand_name is None: + parser.print_help(sys.stderr) + sys.exit(1) + + if subcommand_name == "init": + return int( + not initiate_looper_config( + dotfile_path(), + subcommand_args.pep_config, + subcommand_args.output_dir, + subcommand_args.sample_pipeline_interfaces, + subcommand_args.project_pipeline_interfaces, + subcommand_args.force_yes, + ) + ) + + if subcommand_name == "init_piface": + sys.exit(int(not init_generic_pipeline())) + + _LOGGER.info("Looper version: {}\nCommand: {}".format(__version__, subcommand_name)) + + if subcommand_args.config_file is None: + looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) + try: + if subcommand_args.looper_config: + looper_config_dict = read_looper_config_file( + subcommand_args.looper_config + ) + else: + looper_config_dict = read_looper_dotfile() + _LOGGER.info(f"Using looper config ({looper_cfg_path}).") + + for looper_config_key, looper_config_item in looper_config_dict.items(): + setattr(subcommand_args, looper_config_key, looper_config_item) + + except OSError: + parser.print_help(sys.stderr) + _LOGGER.warning( + f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." + ) + sys.exit(1) + else: + _LOGGER.warning( + "This PEP configures looper through the project config. This approach is deprecated and will " + "be removed in future versions. Please use a looper config file. For more information see " + "looper.databio.org/en/latest/looper-config" + ) + + subcommand_args = enrich_args_via_cfg( + subcommand_name, subcommand_args, parser, test_args=test_args + ) + + # If project pipeline interface defined in the cli, change name to: "pipeline_interface" + if vars(subcommand_args)[PROJECT_PL_ARG]: + subcommand_args.pipeline_interfaces = vars(subcommand_args)[PROJECT_PL_ARG] + + divcfg = ( + select_divvy_config(filepath=subcommand_args.divvy) + if hasattr(subcommand_args, "divvy") + else None + ) + # Ignore flags if user is selecting or excluding on flags: + if subcommand_args.sel_flag or subcommand_args.exc_flag: + subcommand_args.ignore_flags = True + + # Initialize project + if is_PEP_file_type(subcommand_args.config_file) and os.path.exists( + subcommand_args.config_file + ): + try: + p = Project( + cfg=subcommand_args.config_file, + amendments=subcommand_args.amend, + divcfg_path=divcfg, + runp=subcommand_name == "runp", + **{ + attr: getattr(subcommand_args, attr) + for attr in CLI_PROJ_ATTRS + if attr in subcommand_args + }, + ) + except yaml.parser.ParserError as e: + _LOGGER.error(f"Project config parse failed -- {e}") + sys.exit(1) + elif is_pephub_registry_path(subcommand_args.config_file): + if vars(subcommand_args)[SAMPLE_PL_ARG]: + p = Project( + amendments=subcommand_args.amend, + divcfg_path=divcfg, + runp=subcommand_name == "runp", + project_dict=PEPHubClient()._load_raw_pep( + registry_path=subcommand_args.config_file + ), + **{ + attr: getattr(subcommand_args, attr) + for attr in CLI_PROJ_ATTRS + if attr in subcommand_args + }, + ) + else: + raise MisconfigurationException( + f"`sample_pipeline_interface` is missing. Provide it in the parameters." + ) + else: + raise MisconfigurationException( + f"Cannot load PEP. Check file path or registry path to pep." + ) + + selected_compute_pkg = p.selected_compute_package or DEFAULT_COMPUTE_RESOURCES_NAME + if p.dcc is not None and not p.dcc.activate_package(selected_compute_pkg): + _LOGGER.info( + "Failed to activate '{}' computing package. " + "Using the default one".format(selected_compute_pkg) + ) + + with ProjectContext( + prj=p, + selector_attribute=subcommand_args.sel_attr, + selector_include=subcommand_args.sel_incl, + selector_exclude=subcommand_args.sel_excl, + selector_flag=subcommand_args.sel_flag, + exclusion_flag=subcommand_args.exc_flag, + ) as prj: + + # Check at the beginning if user wants to use pipestat and pipestat is configurable + is_pipestat_configured = ( + prj._check_if_pipestat_configured(pipeline_type=PipelineLevel.PROJECT.value) + if getattr(subcommand_args, "project", None) + else prj._check_if_pipestat_configured() + ) + + if subcommand_name in ["run", "rerun"]: + if getattr(subcommand_args, "project", None): + _LOGGER.warning( + "Project flag set but 'run' command was used. Please use 'runp' to run at project-level." + ) + rerun = subcommand_name == "rerun" + run = Runner(prj) + try: + # compute_kwargs = _proc_resources_spec(args) + compute_kwargs = _proc_resources_spec(subcommand_args) + + # TODO Shouldn't top level args and subcommand args be accessible on the same object? + return run( + subcommand_args, top_level_args=args, rerun=rerun, **compute_kwargs + ) + except SampleFailedException: + sys.exit(1) + except IOError: + _LOGGER.error( + "{} pipeline_interfaces: '{}'".format( + prj.__class__.__name__, prj.pipeline_interface_sources + ) + ) + raise + + if subcommand_name == "runp": + compute_kwargs = _proc_resources_spec(subcommand_args) + collate = Collator(prj) + collate(subcommand_args, **compute_kwargs) + return collate.debug + + if subcommand_name == "destroy": + return Destroyer(prj)(subcommand_args) + + if subcommand_name == "table": + if is_pipestat_configured: + return Tabulator(prj)(subcommand_args) + else: + raise PipestatConfigurationException("table") + + if subcommand_name == "report": + if is_pipestat_configured: + return Reporter(prj)(subcommand_args) + else: + raise PipestatConfigurationException("report") + + if subcommand_name == "link": + if is_pipestat_configured: + Linker(prj)(subcommand_args) + else: + raise PipestatConfigurationException("link") + + if subcommand_name == "check": + if is_pipestat_configured: + return Checker(prj)(subcommand_args) + else: + raise PipestatConfigurationException("check") + + if subcommand_name == "clean": + return Cleaner(prj)(subcommand_args) + + if subcommand_name == "inspect": + # Inspect PEP from Eido + sample_names = [] + for sample in p.samples: + sample_names.append(sample["sample_name"]) + inspect_project(p, sample_names) + # Inspect looper config file + if looper_config_dict: + inspect_looper_config_file(looper_config_dict) + else: + _LOGGER.warning("No looper configuration was supplied.") + + +def main(test_args=None) -> None: + parser = pydantic2_argparse.ArgumentParser( + model=TopLevelParser, + prog="looper", + description="Looper: A job submitter for Portable Encapsulated Projects", + add_help=True, + version="1.8.1", + ) + + parser = add_short_arguments(parser, ArgumentEnum) + + if test_args: + args = parser.parse_typed_args(args=test_args) + else: + args = parser.parse_typed_args() + + return run_looper(args, parser, test_args=test_args) + + +def _proc_resources_spec(args): + """ + Process CLI-sources compute setting specification. There are two sources + of compute settings in the CLI alone: + * YAML file (--settings argument) + * itemized compute settings (--compute argument) + + The itemized compute specification is given priority + + :param argparse.Namespace: arguments namespace + :return Mapping[str, str]: binding between resource setting name and value + :raise ValueError: if interpretation of the given specification as encoding + of key-value pairs fails + """ + spec = getattr(args, "compute", None) + settings = args.settings + try: + settings_data = read_yaml_file(settings) or {} + except yaml.YAMLError: + _LOGGER.warning( + "Settings file ({}) does not follow YAML format," + " disregarding".format(settings) + ) + settings_data = {} + if not spec: + return settings_data + pairs = [(kv, kv.split("=")) for kv in spec] + bads = [] + for orig, pair in pairs: + try: + k, v = pair + except ValueError: + bads.append(orig) + else: + settings_data[k] = v + if bads: + raise ValueError( + "Could not correctly parse itemized compute specification. " + "Correct format: " + EXAMPLE_COMPUTE_SPEC_FMT + ) + return settings_data + + +if __name__ == "__main__": + main() diff --git a/looper/command_models/DEVELOPER.md b/looper/command_models/DEVELOPER.md new file mode 100644 index 000000000..d71f7bf65 --- /dev/null +++ b/looper/command_models/DEVELOPER.md @@ -0,0 +1,85 @@ +# Developer documentation + +## Adding new command models + +To add a new model (command) to the project, follow these steps: + +1. Add new arguments in `looper/command_models/arguments.py` if necessary. + +- Add a new entry for the `ArgumentEnum` class. +- For example: + +```python +# arguments.py + +class ArgumentEnum(enum.Enum): + ... + + NEW_ARGUMENT = Argument( + name="new_argument", + default=(new_argument_type, "default_value"), + description="Description of the new argument", + ) + +``` + +2. Create a new command in the existing command creation logic in `looper/command_models/commands.py`. + +- Create a new `Command` instance. +- Create a `pydantic` model for this new command. +- Add the new `Command` instance to `SUPPORTED_COMMANDS`. +- For example: + +```python +NewCommandParser = Command( + "new_command", + MESSAGE_BY_SUBCOMMAND["new_command"], + [ + ... + ArgumentEnum.NEW_ARGUMENT.value, + # Add more arguments as needed for the new command + ], +) +NewCommandParserModel = NewCommandParser.create_model() + +SUPPORTED_COMMANDS = [..., NewCommandParser] +``` + +3. Update the new argument(s) and command in `TopLevelParser` from `looper/command_models/commands.py`. + +- Add a new field for the new command. +- Add a new field for the new argument(s). +- For example: + +```python +class TopLevelParser(pydantic.BaseModel): + + # commands + ... + new_command: Optional[NewCommandParserModel] = pydantic.Field(description=NewCommandParser.description) + + # arguments + ... + new_argument: Optional[new_argument_type] = ArgumentEnum.NEW_ARGUMENT.value.with_reduced_default() +``` + +## Special treatment for the `run` command + +The `run` command in our project requires special treatment to accommodate hierarchical namespaces +and properly handle its unique characteristics. Several functions have been adapted to ensure the +correct behavior of the run command, and similar adaptations may be necessary for other commands. + +For developers looking to understand the details of the special treatment given to the `run` +command and its associated changes, we recommend to inspect the following functions / part of the +code: +- `looper/cli_looper.py`: + - `make_hierarchical_if_needed()` + - assignment of the `divcfg` variable + - assignment of the `project_args` variable + - `_proc_resources_spec()` + - `validate_post_parse()` +- `looper/utils.py`: + - `enrich_args_via_cfg()` + +If you are adding new commands to the project / migrate existing commands to a `pydantic` model-based definition, adapt these parts of the codes with equivalent behavior for your new command. +Likewise, adapt argument accessions in the corresponding executor in `looper/looper.py` to take into account the hierarchical organization of the command's arguments. diff --git a/looper/command_models/README.md b/looper/command_models/README.md new file mode 100644 index 000000000..dea00d8bd --- /dev/null +++ b/looper/command_models/README.md @@ -0,0 +1,4 @@ +# `pydantic`-based definitions of `looper` commands and their arguments + +With the goal of writing an HTTP API that is in sync with the `looper` CLI, this module defines `looper` commands as `pydantic` models and arguments as fields in there. +These can then be used by the [`pydantic-argparse`](https://pydantic-argparse.supimdos.com/) library to create a type-validated CLI (see `../cli_pydantic.py`), and by the future HTTP API for validating `POST`ed JSON data. Eventually, the `pydantic-argparse`-based CLI will replace the existing `argparse`-based CLI defined in `../cli_looper.py`. diff --git a/looper/command_models/__init__.py b/looper/command_models/__init__.py new file mode 100644 index 000000000..4258506b0 --- /dev/null +++ b/looper/command_models/__init__.py @@ -0,0 +1,6 @@ +""" +This package holds `pydantic` models that describe commands and their arguments. + +These can be used either by an HTTP API or with the `pydantic-argparse` +library to build a CLI. +""" diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py new file mode 100644 index 000000000..8c484d33d --- /dev/null +++ b/looper/command_models/arguments.py @@ -0,0 +1,283 @@ +""" +Argument definitions via a thin wrapper around `pydantic.fields.FieldInfo` +""" + +import enum +import os +from copy import copy +from typing import Any, List + +import pydantic.v1 as pydantic + + +class Argument(pydantic.fields.FieldInfo): + """ + CLI argument / flag definition + + This class is designed to define CLI arguments or flags. It leverages + Pydantic for data validation and serves as a source of truth for multiple + interfaces, including a CLI. + + Naively, one would think one could just subclass `pydantic.Field`, + but actually `pydantic.Field` is a function, and not a class. + `pydantic.Field()` returns a validated `FieldInfo` instance, + so we instead subclass `FieldInfo` directly and validate it in the + constructor. + + :param str name: argument name, e.g. "ignore-args" + :param Any default: a tuple of the form (type, default_value). If the + default value is `...` (Ellipsis), then the argument is required. + :param str description: argument description, which will appear as the + help text for this argument + :param dict kwargs: additional keyword arguments supported by + `FieldInfo`. These are passed along as they are. + """ + + def __init__( + self, name: str, default: Any, description: str, alias: str = None, **kwargs + ) -> None: + self._name = name + super().__init__( + default=default, description=description, alias=alias, **kwargs + ) + self._validate() + + @property + def name(self): + """ + Argument name as used in the CLI, e.g. "ignore-args" + """ + return self._name + + def with_reduced_default(self) -> pydantic.fields.FieldInfo: + """ + Convert to a `FieldInfo` instance with reduced default value + + Returns a copy of an instance, but with the `default` attribute + replaced by only the default value, without the type information. + This is required when using an instance in a direct `pydantic` + model definition, instead of creating a model dynamically using + `pydantic.create_model`. + + TODO: this is due to this issue: + https://github.com/pydantic/pydantic/issues/2248#issuecomment-757448447 + and it's a bit tedious. + + """ + c = copy(self) + _, default_value = self.default + c.default = default_value + return c + + +class ArgumentEnum(enum.Enum): + """ + Lists all available arguments + + Having a single "repository" of arguments allows us to re-use them easily across different commands. + + TODO: not sure whether an enum is the ideal data structure for that + """ + + IGNORE_FLAGS = Argument( + name="ignore_flags", + alias="-i", + default=(bool, False), + description="Ignore run status flags", + ) + FORCE_YES = Argument( + name="force_yes", + alias="-f", + default=(bool, False), + description="Provide upfront confirmation of destruction intent, to skip console query. Default=False", + ) + + DESCRIBE_CODES = Argument( + name="describe_codes", + default=(bool, False), + description="Show status codes description. Default=False", + ) + + ITEMIZED = Argument( + name="itemized", + default=(bool, False), + description="Show detailed overview of sample statuses. Default=False", + ) + + FLAGS = Argument( + name="flags", + alias="-f", + default=(List, []), + description="Only check samples based on these status flags.", + ) + + TIME_DELAY = Argument( + name="time_delay", + alias="-t", + default=(int, 0), + description="Time delay in seconds between job submissions (min: 0, max: 30)", + ) + DRY_RUN = Argument( + name="dry_run", + alias="-d", + default=(bool, False), + description="Don't actually submit jobs", + ) + COMMAND_EXTRA = Argument( + name="command_extra", + alias="-x", + default=(str, ""), + description="String to append to every command", + ) + COMMAND_EXTRA_OVERRIDE = Argument( + name="command_extra_override", + alias="-y", + default=(str, ""), + description="Same as command-extra, but overrides values in PEP", + ) + LUMP = Argument( + name="lump", + alias="-u", + default=(float, None), + description="Total input file size (GB) to batch into one job", + ) + LUMPN = Argument( + name="lump_n", + alias="-n", + default=(int, None), + description="Number of commands to batch into one job", + ) + LUMPJ = Argument( + name="lump_j", + alias="-j", + default=(int, None), + description="Lump samples into number of jobs.", + ) + LIMIT = Argument( + name="limit", alias="-l", default=(int, None), description="Limit to n samples" + ) + SKIP = Argument( + name="skip", + alias="-k", + default=(int, None), + description="Skip samples by numerical index", + ) + CONFIG_FILE = Argument( + name="config_file", + default=(str, None), + description="Project configuration file", + ) + LOOPER_CONFIG = Argument( + name="looper_config", + default=(str, None), + description="Looper configuration file (YAML)", + ) + SETTINGS = Argument( + name="settings", + default=(str, ""), + description="Path to a YAML settings file with compute settings", + ) + PEP_CONFIG = Argument( + name="pep_config", + default=(str, None), + description="PEP configuration file", + ) + OUTPUT_DIR = Argument( + name="output_dir", + alias="-o", + default=(str, None), + description="Output directory", + ) + SAMPLE_PIPELINE_INTERFACES = Argument( + name="sample_pipeline_interfaces", + alias="-S", + default=(List, []), + description="Paths to looper sample pipeline interfaces", + ) + PROJECT_PIPELINE_INTERFACES = Argument( + name="project_pipeline_interfaces", + alias="-P", + default=(List, []), + description="Paths to looper project pipeline interfaces", + ) + AMEND = Argument( + name="amend", default=(List, []), description="List of amendments to activate" + ) + SEL_ATTR = Argument( + name="sel_attr", + default=(str, "toggle"), + description="Attribute for sample exclusion OR inclusion", + ) + SEL_INCL = Argument( + name="sel_incl", + default=(List, []), + description="Include only samples with these values", + ) + SEL_EXCL = Argument( + name="sel_excl", + default=(str, ""), + description="Exclude samples with these values", + ) + SEL_FLAG = Argument( + name="sel_flag", default=(List, []), description="Sample selection flag" + ) + EXC_FLAG = Argument( + name="exc_flag", default=(List, []), description="Sample exclusion flag" + ) + SKIP_FILE_CHECKS = Argument( + name="skip_file_checks", + alias="-f", + default=(bool, False), + description="Do not perform input file checks", + ) + PACKAGE = Argument( + name="package", + default=(str, None), + description="Name of computing resource package to use", + ) + COMPUTE = Argument( + name="compute", + alias="-c", + default=(List, []), + description="List of key-value pairs (k1=v1)", + ) + DIVVY = Argument( + name="divvy", + default=(str, os.getenv("DIVCFG", None)), + description=( + "Path to divvy configuration file. Default=$DIVCFG env " + "variable. Currently: {}".format(os.getenv("DIVCFG") or "not set") + ), + ) + # Arguments for logger compatible with logmuse + SILENT = Argument( + name="silent", default=(bool, False), description="Whether to silence logging" + ) + VERBOSITY = Argument( + name="verbosity", + default=(int, None), + description="Alternate mode of expression for logging level that better " + "accords with intuition about how to convey this.", + ) + LOGDEV = Argument( + name="logdev", + default=(bool, False), + description="Whether to log in development mode; possibly among other " + "behavioral changes to logs handling, use a more information-rich " + "message format template.", + ) + PIPESTAT = Argument( + name="pipestat", + default=(str, None), + description="Path to pipestat files.", + ) + PORTABLE = Argument( + name="portable", + default=(bool, False), + description="Makes html report portable.", + ) + PROJECT_LEVEL = Argument( + name="project", + default=(bool, False), + description="Is this command executed for project-level?", + ) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py new file mode 100644 index 000000000..233cfd0b7 --- /dev/null +++ b/looper/command_models/commands.py @@ -0,0 +1,332 @@ +""" +`pydantic` models for `looper` commands and a wrapper class. +""" + +from dataclasses import dataclass +from typing import List, Optional, Type, Union + +import pydantic.v1 as pydantic + +from ..const import MESSAGE_BY_SUBCOMMAND +from .arguments import Argument, ArgumentEnum +from pydantic2_argparse import ArgumentParser + + +@dataclass +class Command: + """ + Representation of a command + + :param str name: command name + :param str description: command description + :param list[Argument] arguments: list of arguments supported by this command + """ + + name: str + description: str + arguments: List[Argument] + + def create_model(self) -> Type[pydantic.BaseModel]: + """ + Creates a `pydantic` model for this command + """ + arguments = dict() + for arg in self.arguments: + # These gymnastics are necessary because of + # https://github.com/pydantic/pydantic/issues/2248#issuecomment-757448447 + arg_type, arg_default_value = arg.default + arguments[arg.name] = ( + arg_type, + pydantic.Field(arg_default_value, description=arg.description), + ) + return pydantic.create_model(self.name, **arguments) + + +SHARED_ARGUMENTS = [ + ArgumentEnum.SETTINGS.value, + ArgumentEnum.EXC_FLAG.value, + ArgumentEnum.SEL_FLAG.value, + ArgumentEnum.SEL_ATTR.value, + ArgumentEnum.SEL_INCL.value, + ArgumentEnum.SEL_EXCL.value, + ArgumentEnum.LIMIT.value, + ArgumentEnum.SKIP.value, + ArgumentEnum.PEP_CONFIG.value, + ArgumentEnum.OUTPUT_DIR.value, + ArgumentEnum.CONFIG_FILE.value, + ArgumentEnum.LOOPER_CONFIG.value, + ArgumentEnum.SAMPLE_PIPELINE_INTERFACES.value, + ArgumentEnum.PROJECT_PIPELINE_INTERFACES.value, + ArgumentEnum.PIPESTAT.value, + ArgumentEnum.SETTINGS.value, + ArgumentEnum.AMEND.value, + ArgumentEnum.PROJECT_LEVEL.value, +] + +RunParser = Command( + "run", + MESSAGE_BY_SUBCOMMAND["run"], + [ + ArgumentEnum.IGNORE_FLAGS.value, + ArgumentEnum.TIME_DELAY.value, + ArgumentEnum.DRY_RUN.value, + ArgumentEnum.COMMAND_EXTRA.value, + ArgumentEnum.COMMAND_EXTRA_OVERRIDE.value, + ArgumentEnum.LUMP.value, + ArgumentEnum.LUMPN.value, + ArgumentEnum.LUMPJ.value, + ArgumentEnum.DIVVY.value, + ArgumentEnum.SKIP_FILE_CHECKS.value, + ArgumentEnum.COMPUTE.value, + ArgumentEnum.PACKAGE.value, + ], +) + +# RERUN +RerunParser = Command( + "rerun", + MESSAGE_BY_SUBCOMMAND["rerun"], + [ + ArgumentEnum.IGNORE_FLAGS.value, + ArgumentEnum.TIME_DELAY.value, + ArgumentEnum.DRY_RUN.value, + ArgumentEnum.COMMAND_EXTRA.value, + ArgumentEnum.COMMAND_EXTRA_OVERRIDE.value, + ArgumentEnum.LUMP.value, + ArgumentEnum.LUMPN.value, + ArgumentEnum.LUMPJ.value, + ArgumentEnum.DIVVY.value, + ArgumentEnum.SKIP_FILE_CHECKS.value, + ArgumentEnum.COMPUTE.value, + ArgumentEnum.PACKAGE.value, + ], +) + +# RUNP +RunProjectParser = Command( + "runp", + MESSAGE_BY_SUBCOMMAND["runp"], + [ + ArgumentEnum.IGNORE_FLAGS.value, + ArgumentEnum.TIME_DELAY.value, + ArgumentEnum.DRY_RUN.value, + ArgumentEnum.COMMAND_EXTRA.value, + ArgumentEnum.COMMAND_EXTRA_OVERRIDE.value, + ArgumentEnum.LUMP.value, + ArgumentEnum.LUMPN.value, + ArgumentEnum.DIVVY.value, + ArgumentEnum.SKIP_FILE_CHECKS.value, + ArgumentEnum.COMPUTE.value, + ArgumentEnum.PACKAGE.value, + ], +) + +# TABLE +TableParser = Command( + "table", + MESSAGE_BY_SUBCOMMAND["table"], + [], +) + + +# REPORT +ReportParser = Command( + "report", + MESSAGE_BY_SUBCOMMAND["report"], + [ + ArgumentEnum.PORTABLE.value, + ], +) + +# DESTROY +DestroyParser = Command( + "destroy", + MESSAGE_BY_SUBCOMMAND["destroy"], + [ + ArgumentEnum.DRY_RUN.value, + ArgumentEnum.FORCE_YES.value, + ], +) + +# CHECK +CheckParser = Command( + "check", + MESSAGE_BY_SUBCOMMAND["check"], + [ + ArgumentEnum.DESCRIBE_CODES.value, + ArgumentEnum.ITEMIZED.value, + ArgumentEnum.FLAGS.value, + ], +) + +# CLEAN +CleanParser = Command( + "clean", + MESSAGE_BY_SUBCOMMAND["clean"], + [ + ArgumentEnum.DRY_RUN.value, + ArgumentEnum.FORCE_YES.value, + ], +) + +# INSPECT +InspectParser = Command( + "inspect", + MESSAGE_BY_SUBCOMMAND["inspect"], + [], +) + + +# INIT +InitParser = Command( + "init", + MESSAGE_BY_SUBCOMMAND["init"], + [ + # Original command has force flag which is technically a different flag, but we should just use FORCE_YES + ArgumentEnum.FORCE_YES.value, + ArgumentEnum.OUTPUT_DIR.value, + ArgumentEnum.PEP_CONFIG.value, + ArgumentEnum.SAMPLE_PIPELINE_INTERFACES.value, + ArgumentEnum.PROJECT_PIPELINE_INTERFACES.value, + ], +) + + +# INIT-PIFACE +InitPifaceParser = Command( + "init_piface", + MESSAGE_BY_SUBCOMMAND["init-piface"], + [], +) + + +# LINK +LinkParser = Command( + "link", + MESSAGE_BY_SUBCOMMAND["link"], + [], +) + + +# Add shared arguments for all commands that use them +for arg in SHARED_ARGUMENTS: + RunParser.arguments.append(arg) + RerunParser.arguments.append(arg) + RunProjectParser.arguments.append(arg) + ReportParser.arguments.append(arg) + DestroyParser.arguments.append(arg) + CheckParser.arguments.append(arg) + CleanParser.arguments.append(arg) + TableParser.arguments.append(arg) + LinkParser.arguments.append(arg) + InspectParser.arguments.append(arg) + +# Create all Models +RunParserModel = RunParser.create_model() +RerunParserModel = RerunParser.create_model() +RunProjectParserModel = RunProjectParser.create_model() +ReportParserModel = ReportParser.create_model() +DestroyParserModel = DestroyParser.create_model() +CheckParserModel = CheckParser.create_model() +CleanParserModel = CleanParser.create_model() +TableParserModel = TableParser.create_model() +LinkParserModel = LinkParser.create_model() +InspectParserModel = InspectParser.create_model() +InitParserModel = InitParser.create_model() +InitPifaceParserModel = InitPifaceParser.create_model() + + +def add_short_arguments( + parser: ArgumentParser, argument_enums: Type[ArgumentEnum] +) -> ArgumentParser: + """ + This function takes a parser object created under pydantic argparse and adds the short arguments AFTER the initial creation. + This is a workaround as pydantic-argparse does not currently support this during initial parser creation. + + :param ArgumentParser parser: parser before adding short arguments + :param Type[ArgumentEnum] argument_enums: enumeration of arguments that contain names and aliases + :return ArgumentParser parser: parser after short arguments have been added + """ + + for cmd in parser._subcommands.choices.keys(): + + for argument_enum in list(argument_enums): + # First check there is an alias for the argument otherwise skip + if argument_enum.value.alias: + short_key = argument_enum.value.alias + long_key = "--" + argument_enum.value.name.replace( + "_", "-" + ) # We must do this because the ArgumentEnum names are transformed during parser creation + if long_key in parser._subcommands.choices[cmd]._option_string_actions: + argument = parser._subcommands.choices[cmd]._option_string_actions[ + long_key + ] + argument.option_strings = (short_key, long_key) + parser._subcommands.choices[cmd]._option_string_actions[ + short_key + ] = argument + + return parser + + +SUPPORTED_COMMANDS = [ + RunParser, + RerunParser, + RunProjectParser, + TableParser, + ReportParser, + DestroyParser, + CheckParser, + CleanParser, + InitParser, + InitPifaceParser, + LinkParser, + InspectParser, +] + + +class TopLevelParser(pydantic.BaseModel): + """ + Top level parser that takes + - commands (run, runp, check...) + - arguments that are required no matter the subcommand + """ + + # commands + run: Optional[RunParserModel] = pydantic.Field(description=RunParser.description) + rerun: Optional[RerunParserModel] = pydantic.Field( + description=RerunParser.description + ) + runp: Optional[RunProjectParserModel] = pydantic.Field( + description=RunProjectParser.description + ) + table: Optional[TableParserModel] = pydantic.Field( + description=TableParser.description + ) + report: Optional[ReportParserModel] = pydantic.Field( + description=ReportParser.description + ) + destroy: Optional[DestroyParserModel] = pydantic.Field( + description=DestroyParser.description + ) + check: Optional[CheckParserModel] = pydantic.Field( + description=CheckParser.description + ) + clean: Optional[CleanParserModel] = pydantic.Field( + description=CleanParser.description + ) + init: Optional[InitParserModel] = pydantic.Field(description=InitParser.description) + init_piface: Optional[InitPifaceParserModel] = pydantic.Field( + description=InitPifaceParser.description + ) + link: Optional[LinkParserModel] = pydantic.Field(description=LinkParser.description) + + inspect: Optional[InspectParserModel] = pydantic.Field( + description=InspectParser.description + ) + + # Additional arguments for logging, added to ALL commands + # These must be used before the command + silent: Optional[bool] = ArgumentEnum.SILENT.value.with_reduced_default() + verbosity: Optional[int] = ArgumentEnum.VERBOSITY.value.with_reduced_default() + logdev: Optional[bool] = ArgumentEnum.LOGDEV.value.with_reduced_default() diff --git a/looper/conductor.py b/looper/conductor.py index 807d34f3e..ffbb1b547 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -21,12 +21,13 @@ from pipestat import PipestatError from ubiquerg import expandpath, is_command_callable from yaml import dump -from yacman import YAMLConfigManager +from yacman import FutureYAMLConfigManager as YAMLConfigManager from .const import * from .exceptions import JobSubmissionException, SampleFailedException from .processed_project import populate_sample_paths from .utils import fetch_sample_flags, jinja_render_template_strictly +from .const import PipelineLevel _LOGGER = logging.getLogger(__name__) @@ -85,11 +86,23 @@ def _get_yaml_path(namespaces, template_key, default_name_appendix="", filename= def write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict): """ - This is run at the project level, not at the sample level. + This writes a combined configuration file to be passed to a PipestatManager. + :param str looper_pipestat_config_path: path to the created pipestat configuration file + :param dict pipestat_config_dict: the dict containing key value pairs to be written to the pipestat configutation + return bool """ + + if not os.path.exists(os.path.dirname(looper_pipestat_config_path)): + try: + os.makedirs(os.path.dirname(looper_pipestat_config_path)) + except FileExistsError: + pass + with open(looper_pipestat_config_path, "w") as f: yaml.dump(pipestat_config_dict, f) - print(f"Initialized looper config file: {looper_pipestat_config_path}") + _LOGGER.debug( + msg=f"Initialized pipestat config file: {looper_pipestat_config_path}" + ) return True @@ -261,8 +274,12 @@ def is_project_submittable(self, force=False): :param bool frorce: whether to force the project submission (ignore status/flags) """ + psms = {} if self.prj.pipestat_configured_project: - psm = self.prj.get_pipestat_managers(project_level=True)[self.pl_name] + for piface in self.prj.project_pipeline_interfaces: + if piface.psm.pipeline_type == PipelineLevel.PROJECT.value: + psms[piface.psm.pipeline_name] = piface.psm + psm = psms[self.pl_name] status = psm.get_status() if not force and status is not None: _LOGGER.info(f"> Skipping project. Determined status: {status}") @@ -288,12 +305,11 @@ def add_sample(self, sample, rerun=False): ) ) if self.prj.pipestat_configured: - psms = self.prj.get_pipestat_managers(sample_name=sample.sample_name) - sample_statuses = psms[self.pl_name].get_status( + sample_statuses = self.pl_iface.psm.get_status( record_identifier=sample.sample_name ) if sample_statuses == "failed" and rerun is True: - psms[self.pl_name].set_status( + self.pl_iface.psm.set_status( record_identifier=sample.sample_name, status_identifier="waiting" ) sample_statuses = "waiting" @@ -303,23 +319,27 @@ def add_sample(self, sample, rerun=False): use_this_sample = True # default to running this sample msg = None + if rerun and sample_statuses == []: + msg = f"> Skipping sample because rerun requested, but no failed or waiting flag found." + use_this_sample = False if sample_statuses: status_str = ", ".join(sample_statuses) failed_flag = any("failed" in x for x in sample_statuses) + waiting_flag = any("waiting" in x for x in sample_statuses) if self.ignore_flags: msg = f"> Found existing status: {status_str}. Ignoring." else: # this pipeline already has a status msg = f"> Found existing status: {status_str}. Skipping sample." - if failed_flag: + if failed_flag and not rerun: msg += " Use rerun to ignore failed status." # help guidance use_this_sample = False if rerun: # Rescue the sample if rerun requested, and failed flag is found - if failed_flag: - msg = f"> Re-running failed sample. Status: {status_str}" + if failed_flag or waiting_flag: + msg = f"> Re-running sample. Status: {status_str}" use_this_sample = True else: - msg = f"> Skipping sample because rerun requested, but no failed flag found. Status: {status_str}" + msg = f"> Skipping sample because rerun requested, but no failed or waiting flag found. Status: {status_str}" use_this_sample = False if msg: _LOGGER.info(msg) @@ -528,12 +548,7 @@ def _set_pipestat_namespace( :return yacman.YAMLConfigManager: pipestat namespace """ try: - psms = ( - self.prj.get_pipestat_managers(sample_name) - if sample_name - else self.prj.get_pipestat_managers(project_level=True) - ) - psm = psms[self.pl_iface.pipeline_name] + psm = self.pl_iface.psm except (PipestatError, AttributeError) as e: # pipestat section faulty or not found in project.looper or sample # or project is missing required pipestat attributes @@ -621,7 +636,6 @@ def write_script(self, pool, size): argstring = jinja_render_template_strictly( template=templ, namespaces=namespaces ) - print(argstring) except UndefinedError as jinja_exception: _LOGGER.warning(NOT_SUB_MSG.format(str(jinja_exception))) except KeyError as e: diff --git a/looper/const.py b/looper/const.py index a866f2d84..ca70851da 100644 --- a/looper/const.py +++ b/looper/const.py @@ -1,6 +1,7 @@ """ Shared project constants """ import os +from enum import Enum __author__ = "Databio lab" __email__ = "nathan@code.databio.org" @@ -268,3 +269,10 @@ def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG): "init-piface": "Initialize generic pipeline interface.", "link": "Create directory of symlinks for reported results.", } + +# Add project/sample enum + + +class PipelineLevel(Enum): + SAMPLE = "sample" + PROJECT = "project" diff --git a/looper/divvy.py b/looper/divvy.py index 9107907f9..bd880f948 100644 --- a/looper/divvy.py +++ b/looper/divvy.py @@ -6,11 +6,14 @@ import sys import shutil import yaml -from yaml import SafeLoader -from shutil import copytree + +from shutil import copytree +from yacman import FutureYAMLConfigManager as YAMLConfigManager +from yacman import write_lock, FILEPATH_KEY, load_yaml, select_config +from yaml import SafeLoader from ubiquerg import is_writable, VersionInHelpParser -import yacman + from .const import ( COMPUTE_SETTINGS_VARNAME, @@ -28,7 +31,7 @@ # This is the divvy.py submodule from divvy -class ComputingConfiguration(yacman.YAMLConfigManager): +class ComputingConfiguration(YAMLConfigManager): """ Represents computing configuration objects. @@ -44,36 +47,31 @@ class ComputingConfiguration(yacman.YAMLConfigManager): `DIVCFG` file) """ - def __init__(self, entries=None, filepath=None): - if not entries and not filepath: - # Handle the case of an empty one, when we'll use the default - filepath = select_divvy_config(None) - - super(ComputingConfiguration, self).__init__( - entries=entries, - filepath=filepath, - schema_source=DEFAULT_CONFIG_SCHEMA, - validate_on_write=True, + def __init__( + self, + entries=None, + wait_max=None, + strict_ro_locks=False, + schema_source=None, + validate_on_write=False, + ): + super().__init__( + entries, wait_max, strict_ro_locks, schema_source, validate_on_write ) - if not "compute_packages" in self: - raise Exception( - "Your divvy config file is not in divvy config format " - "(it lacks a compute_packages section): '{}'".format(filepath) - ) - # We require that compute_packages be present, even if empty + if "compute_packages" not in self: self["compute_packages"] = {} - # Initialize default compute settings. _LOGGER.debug("Establishing project compute settings") self.compute = None self.setdefault("adapters", None) self.activate_package(DEFAULT_COMPUTE_RESOURCES_NAME) - self.config_file = self.filepath def write(self, filename=None): - super(ComputingConfiguration, self).write(filepath=filename, exclude_case=True) - filename = filename or getattr(self, yacman.FILEPATH_KEY) + with write_lock(self) as locked_ym: + locked_ym.rebase() + locked_ym.write() + filename = filename or getattr(self, FILEPATH_KEY) filedir = os.path.dirname(filename) # For this object, we *also* have to write the template files for pkg_name, pkg in self["compute_packages"].items(): @@ -151,12 +149,12 @@ def activate_package(self, package_name): # Augment compute, creating it if needed. if self.compute is None: _LOGGER.debug("Creating Project compute") - self.compute = yacman.YAMLConfigManager() + self.compute = YAMLConfigManager() _LOGGER.debug( "Adding entries for package_name '{}'".format(package_name) ) - self.compute.update(self["compute_packages"][package_name]) + self.compute.update_from_obj(self["compute_packages"][package_name]) # Ensure submission template is absolute. This *used to be* handled # at update (so the paths were stored as absolutes in the packages), @@ -165,7 +163,7 @@ def activate_package(self, package_name): if not os.path.isabs(self.compute["submission_template"]): try: self.compute["submission_template"] = os.path.join( - os.path.dirname(self.filepath), + os.path.dirname(self.default_config_file), self.compute["submission_template"], ) except AttributeError as e: @@ -200,11 +198,11 @@ def clean_start(self, package_name): self.reset_active_settings() return self.activate_package(package_name) - def get_active_package(self): + def get_active_package(self) -> YAMLConfigManager: """ Returns settings for the currently active compute package - :return yacman.YacAttMap: data defining the active compute package + :return YAMLConfigManager: data defining the active compute package """ return self.compute @@ -222,7 +220,7 @@ def reset_active_settings(self): :return bool: success flag """ - self.compute = yacman.YacAttMap() + self.compute = YAMLConfigManager() return True def update_packages(self, config_file): @@ -235,11 +233,11 @@ def update_packages(self, config_file): :param str config_file: path to file with new divvy configuration data """ - entries = yacman.load_yaml(config_file) + entries = load_yaml(config_file) self.update(entries) return True - def get_adapters(self): + def get_adapters(self) -> YAMLConfigManager: """ Get current adapters, if defined. @@ -248,9 +246,9 @@ def get_adapters(self): package-specific set of adapters, if any defined in 'adapters' section under currently active compute package. - :return yacman.YAMLConfigManager: current adapters mapping + :return YAMLConfigManager: current adapters mapping """ - adapters = yacman.YAMLConfigManager() + adapters = YAMLConfigManager() if "adapters" in self and self["adapters"] is not None: adapters.update(self["adapters"]) if "compute" in self and "adapters" in self.compute: @@ -376,7 +374,7 @@ def select_divvy_config(filepath): :param str | NoneType filepath: direct file path specification :return str: path to the config file to read """ - divcfg = yacman.select_config( + divcfg = select_config( config_filepath=filepath, config_env_vars=COMPUTE_SETTINGS_VARNAME, default_config_filepath=DEFAULT_CONFIG_FILEPATH, diff --git a/looper/looper.py b/looper/looper.py index 51a9ee02a..1eea6edd6 100755 --- a/looper/looper.py +++ b/looper/looper.py @@ -6,6 +6,7 @@ import abc import argparse import csv +import glob import logging import subprocess import yaml @@ -15,6 +16,8 @@ # Need specific sequence of actions for colorama imports? from colorama import init +from .const import PipelineLevel + init() from shutil import rmtree @@ -45,7 +48,6 @@ sample_folder, ) from pipestat.reports import get_file_for_table -from pipestat.reports import get_file_for_project _PKGNAME = "looper" _LOGGER = logging.getLogger(_PKGNAME) @@ -88,21 +90,29 @@ def __call__(self, args): # aggregate pipeline status data status = {} - if args.project: - psms = self.prj.get_pipestat_managers(project_level=True) - for pipeline_name, psm in psms.items(): - s = psm.get_status() or "unknown" - status.setdefault(pipeline_name, {}) - status[pipeline_name][self.prj.name] = s - _LOGGER.debug(f"{self.prj.name} ({pipeline_name}): {s}") + + psms = {} + if getattr(args, "project", None): + + for piface in self.prj.project_pipeline_interfaces: + if piface.psm.pipeline_type == PipelineLevel.PROJECT.value: + psms[piface.psm.pipeline_name] = piface.psm + s = piface.psm.get_status() or "unknown" + status.setdefault(piface.psm.pipeline_name, {}) + status[piface.psm.pipeline_name][self.prj.name] = s + _LOGGER.debug(f"{self.prj.name} ({piface.psm.pipeline_name}): {s}") + else: for sample in self.prj.samples: - psms = self.prj.get_pipestat_managers(sample_name=sample.sample_name) - for pipeline_name, psm in psms.items(): - s = psm.get_status(record_identifier=sample.sample_name) - status.setdefault(pipeline_name, {}) - status[pipeline_name][sample.sample_name] = s - _LOGGER.debug(f"{sample.sample_name} ({pipeline_name}): {s}") + for piface in sample.project.pipeline_interfaces: + if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value: + psms[piface.psm.pipeline_name] = piface.psm + s = piface.psm.get_status(record_identifier=sample.sample_name) + status.setdefault(piface.psm.pipeline_name, {}) + status[piface.psm.pipeline_name][sample.sample_name] = s + _LOGGER.debug( + f"{sample.sample_name} ({piface.psm.pipeline_name}): {s}" + ) console = Console() @@ -116,14 +126,14 @@ def __call__(self, args): ) table.add_column(f"Status", justify="center") table.add_column("Jobs count/total jobs", justify="center") - for status_id in psm.status_schema.keys(): + for status_id in psms[pipeline_name].status_schema.keys(): status_list = list(pipeline_status.values()) if status_id in status_list: status_count = status_list.count(status_id) table.add_row(status_id, f"{status_count}/{len(status_list)}") console.print(table) - if args.itemized: + if getattr(args, "itemized", None): for pipeline_name, pipeline_status in status.items(): table_title = f"Pipeline: '{pipeline_name}'" table = Table( @@ -141,7 +151,7 @@ def __call__(self, args): for name, status_id in pipeline_status.items(): try: color = Color.from_rgb( - *psm.status_schema[status_id]["color"] + *psms[pipeline_name].status_schema[status_id]["color"] ).name except KeyError: color = "#bcbcbc" @@ -150,16 +160,17 @@ def __call__(self, args): console.print(table) if args.describe_codes: + # TODO this needs to be redone because it only takes the last psm in the list and gets status code and descriptions table = Table( show_header=True, header_style="bold magenta", title=f"Status codes description", - width=len(psm.status_schema_source) + 20, - caption=f"Descriptions source: {psm.status_schema_source}", + width=len(psms[pipeline_name].status_schema_source) + 20, + caption=f"Descriptions source: {psms[pipeline_name].status_schema_source}", ) table.add_column("Status code", justify="center") table.add_column("Description", justify="left") - for status, status_obj in psm.status_schema.items(): + for status, status_obj in psms[pipeline_name].status_schema.items(): if "description" in status_obj: desc = status_obj["description"] else: @@ -199,10 +210,10 @@ def __call__(self, args, preview_flag=True): if not preview_flag: _LOGGER.info("Clean complete.") return 0 - if args.dry_run: + if getattr(args, "dry_run", None): _LOGGER.info("Dry run. No files cleaned.") return 0 - if not args.force_yes and not query_yes_no( + if not getattr(args, "force_yes", None) and not query_yes_no( "Are you sure you want to permanently delete all " "intermediate pipeline results for this project?" ): @@ -241,8 +252,22 @@ def __call__(self, args, preview_flag=True): :param bool preview_flag: whether to halt before actually removing files """ - _LOGGER.info("Removing results:") + use_pipestat = ( + self.prj.pipestat_configured_project + if getattr(args, "project", None) + else self.prj.pipestat_configured + ) + + if use_pipestat: + _LOGGER.info("Removing summary:") + destroy_summary( + self.prj, + dry_run=preview_flag, + project_level=getattr(args, "project", None), + ) + _LOGGER.info("Removing results:") + psms = {} for sample in select_samples(prj=self.prj, args=args): _LOGGER.info(self.counter.show(sample.sample_name)) sample_output_folder = sample_folder(self.prj, sample) @@ -250,30 +275,26 @@ def __call__(self, args, preview_flag=True): # Preview: Don't actually delete, just show files. _LOGGER.info(str(sample_output_folder)) else: - _remove_or_dry_run(sample_output_folder, args.dry_run) - - _LOGGER.info("Removing summary:") - use_pipestat = ( - self.prj.pipestat_configured_project - if args.project - else self.prj.pipestat_configured - ) - if use_pipestat: - destroy_summary(self.prj, args.dry_run, args.project) - else: - _LOGGER.warning( - "Pipestat must be configured to destroy any created summaries." - ) + if use_pipestat: + for piface in sample.project.pipeline_interfaces: + if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value: + psms[piface.psm.pipeline_name] = piface.psm + for pipeline_name, psm in psms.items(): + psm.backend.remove_record( + record_identifier=sample.sample_name, rm_record=True + ) + else: + _remove_or_dry_run(sample_output_folder, args.dry_run) if not preview_flag: _LOGGER.info("Destroy complete.") return 0 - if args.dry_run: + if getattr(args, "dry_run", None): _LOGGER.info("Dry run. No files destroyed.") return 0 - if not args.force_yes and not query_yes_no( + if not getattr(args, "force_yes", None) and not query_yes_no( "Are you sure you want to permanently delete all pipeline " "results for this project?" ): @@ -308,7 +329,7 @@ def __call__(self, args, **compute_kwargs): """ jobs = 0 self.debug = {} - project_pifaces = self.prj.project_pipeline_interface_sources + project_pifaces = self.prj.project_pipeline_interfaces if not project_pifaces: raise MisconfigurationException( "Looper requires a pointer to at least one project pipeline. " @@ -318,36 +339,26 @@ def __call__(self, args, **compute_kwargs): ) self.counter = LooperCounter(len(project_pifaces)) for project_piface in project_pifaces: - try: - project_piface_object = PipelineInterface( - project_piface, pipeline_type="project" - ) - except (IOError, ValidationError) as e: - _LOGGER.warning( - "Ignoring invalid pipeline interface source: {}. " - "Caught exception: {}".format( - project_piface, getattr(e, "message", repr(e)) - ) - ) - continue _LOGGER.info( self.counter.show( name=self.prj.name, type="project", - pipeline_name=project_piface_object.pipeline_name, + pipeline_name=project_piface.pipeline_name, ) ) conductor = SubmissionConductor( - pipeline_interface=project_piface_object, + pipeline_interface=project_piface, prj=self.prj, compute_variables=compute_kwargs, - delay=args.time_delay, - extra_args=args.command_extra, - extra_args_override=args.command_extra_override, - ignore_flags=args.ignore_flags, + delay=getattr(args, "time_delay", None), + extra_args=getattr(args, "command_extra", None), + extra_args_override=getattr(args, "command_extra_override", None), + ignore_flags=getattr(args, "ignore_flags", None), collate=True, ) - if conductor.is_project_submittable(force=args.ignore_flags): + if conductor.is_project_submittable( + force=getattr(args, "ignore_flags", None) + ): conductor._pool = [None] conductor.submit() jobs += conductor.num_job_submissions @@ -360,7 +371,7 @@ def __call__(self, args, **compute_kwargs): class Runner(Executor): """The true submitter of pipelines""" - def __call__(self, args, rerun=False, **compute_kwargs): + def __call__(self, args, top_level_args=None, rerun=False, **compute_kwargs): """ Do the Sample submission. @@ -395,25 +406,24 @@ def __call__(self, args, rerun=False, **compute_kwargs): ) submission_conductors = {} + for piface in self.prj.pipeline_interfaces: conductor = SubmissionConductor( pipeline_interface=piface, prj=self.prj, compute_variables=comp_vars, - delay=args.time_delay, - extra_args=args.command_extra, - extra_args_override=args.command_extra_override, - ignore_flags=args.ignore_flags, - max_cmds=args.lump_n, - max_size=args.lump_s, - max_jobs=args.lump_j, + delay=getattr(args, "time_delay", None), + extra_args=getattr(args, "command_extra", None), + extra_args_override=getattr(args, "command_extra_override", None), + ignore_flags=getattr(args, "ignore_flags", None), + max_cmds=getattr(args, "lump_n", None), + max_size=getattr(args, "lump", None), + max_jobs=getattr(args, "lump_j", None), ) submission_conductors[piface.pipe_iface_file] = conductor - _LOGGER.info(f"Pipestat compatible: {self.prj.pipestat_configured_project}") - self.debug["Pipestat compatible"] = ( - self.prj.pipestat_configured_project or self.prj.pipestat_configured - ) + _LOGGER.debug(f"Pipestat compatible: {self.prj.pipestat_configured}") + self.debug["Pipestat compatible"] = self.prj.pipestat_configured for sample in select_samples(prj=self.prj, args=args): pl_fails = [] @@ -485,15 +495,15 @@ def __call__(self, args, rerun=False, **compute_kwargs): len(processed_samples), num_samples ) ) - _LOGGER.info("Commands submitted: {} of {}".format(cmd_sub_total, max_cmds)) + _LOGGER.debug("Commands submitted: {} of {}".format(cmd_sub_total, max_cmds)) self.debug[DEBUG_COMMANDS] = "{} of {}".format(cmd_sub_total, max_cmds) - if args.dry_run: + if getattr(args, "dry_run", None): job_sub_total_if_real = job_sub_total job_sub_total = 0 _LOGGER.info( f"Dry run. No jobs were actually submitted, but {job_sub_total_if_real} would have been." ) - _LOGGER.info("Jobs submitted: {}".format(job_sub_total)) + _LOGGER.debug("Jobs submitted: {}".format(job_sub_total)) self.debug[DEBUG_JOBS] = job_sub_total # Restructure sample/failure data for display. @@ -545,37 +555,35 @@ class Reporter(Executor): def __call__(self, args): # initialize the report builder + self.debug = {} p = self.prj - project_level = args.project + project_level = getattr(args, "project", None) portable = args.portable + psms = {} + if project_level: - psms = self.prj.get_pipestat_managers(project_level=True) - print(psms) - for name, psm in psms.items(): - # Summarize will generate the static HTML Report Function - report_directory = psm.summarize( - looper_samples=self.prj.samples, portable=portable - ) + + for piface in self.prj.project_pipeline_interfaces: + if piface.psm.pipeline_type == PipelineLevel.PROJECT.value: + psms[piface.psm.pipeline_name] = piface.psm + report_directory = piface.psm.summarize( + looper_samples=self.prj.samples, portable=portable + ) print(f"Report directory: {report_directory}") + self.debug["report_directory"] = report_directory + return self.debug else: - for piface_source_samples in self.prj._samples_by_piface( - self.prj.piface_key - ).values(): - # For each piface_key, we have a list of samples, but we only need one sample from the list to - # call the related pipestat manager object which will pull ALL samples when using psm.summarize - first_sample_name = list(piface_source_samples)[0] - psms = self.prj.get_pipestat_managers( - sample_name=first_sample_name, project_level=False - ) - print(psms) - for name, psm in psms.items(): - # Summarize will generate the static HTML Report Function - report_directory = psm.summarize( + for piface in self.prj.pipeline_interfaces: + if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value: + psms[piface.psm.pipeline_name] = piface.psm + report_directory = piface.psm.summarize( looper_samples=self.prj.samples, portable=portable ) print(f"Report directory: {report_directory}") + self.debug["report_directory"] = report_directory + return self.debug class Linker(Executor): @@ -584,26 +592,22 @@ class Linker(Executor): def __call__(self, args): # initialize the report builder p = self.prj - project_level = args.project - link_dir = args.output_dir + project_level = getattr(args, "project", None) + link_dir = getattr(args, "output_dir", None) + + psms = {} if project_level: - psms = self.prj.get_pipestat_managers(project_level=True) - for name, psm in psms.items(): - linked_results_path = psm.link(link_dir=link_dir) - print(f"Linked directory: {linked_results_path}") + for piface in self.prj.project_pipeline_interfaces: + if piface.psm.pipeline_type == PipelineLevel.PROJECT.value: + psms[piface.psm.pipeline_name] = piface.psm + linked_results_path = piface.psm.link(link_dir=link_dir) + print(f"Linked directory: {linked_results_path}") else: - for piface_source_samples in self.prj._samples_by_piface( - self.prj.piface_key - ).values(): - # For each piface_key, we have a list of samples, but we only need one sample from the list to - # call the related pipestat manager object which will pull ALL samples when using psm.summarize - first_sample_name = list(piface_source_samples)[0] - psms = self.prj.get_pipestat_managers( - sample_name=first_sample_name, project_level=False - ) - for name, psm in psms.items(): - linked_results_path = psm.link(link_dir=link_dir) + for piface in self.prj.pipeline_interfaces: + if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value: + psms[piface.psm.pipeline_name] = piface.psm + linked_results_path = piface.psm.link(link_dir=link_dir) print(f"Linked directory: {linked_results_path}") @@ -615,24 +619,19 @@ class Tabulator(Executor): def __call__(self, args): # p = self.prj - project_level = args.project + project_level = getattr(args, "project", None) results = [] + psms = {} if project_level: - psms = self.prj.get_pipestat_managers(project_level=True) - for name, psm in psms.items(): - results = psm.table() + for piface in self.prj.project_pipeline_interfaces: + if piface.psm.pipeline_type == PipelineLevel.PROJECT.value: + psms[piface.psm.pipeline_name] = piface.psm + results = piface.psm.table() else: - for piface_source_samples in self.prj._samples_by_piface( - self.prj.piface_key - ).values(): - # For each piface_key, we have a list of samples, but we only need one sample from the list to - # call the related pipestat manager object which will pull ALL samples when using psm.table - first_sample_name = list(piface_source_samples)[0] - psms = self.prj.get_pipestat_managers( - sample_name=first_sample_name, project_level=False - ) - for name, psm in psms.items(): - results = psm.table() + for piface in self.prj.pipeline_interfaces: + if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value: + psms[piface.psm.pipeline_name] = piface.psm + results = piface.psm.table() # Results contains paths to stats and object summaries. return results @@ -672,64 +671,60 @@ def destroy_summary(prj, dry_run=False, project_level=False): This function is for use with pipestat configured projects. """ + psms = {} if project_level: - psms = prj.get_pipestat_managers(project_level=True) + for piface in prj.pipeline_interfaces: + if piface.psm.pipeline_type == PipelineLevel.PROJECT.value: + psms[piface.psm.pipeline_name] = piface.psm + for name, psm in psms.items(): _remove_or_dry_run( [ - get_file_for_project( - psm, - pipeline_name=psm["_pipeline_name"], - directory="reports", + get_file_for_table( + psm, pipeline_name=psm.pipeline_name, directory="reports" ), get_file_for_table( psm, - pipeline_name=psm["_pipeline_name"], + pipeline_name=psm.pipeline_name, appendix="stats_summary.tsv", ), get_file_for_table( psm, - pipeline_name=psm["_pipeline_name"], + pipeline_name=psm.pipeline_name, appendix="objs_summary.yaml", ), - get_file_for_table( - psm, pipeline_name=psm["_pipeline_name"], appendix="reports" + os.path.join( + os.path.dirname(psm.config_path), "aggregate_results.yaml" ), ], dry_run, ) else: - for piface_source_samples in prj._samples_by_piface(prj.piface_key).values(): - # For each piface_key, we have a list of samples, but we only need one sample from the list to - # call the related pipestat manager object which will pull ALL samples when using psm.table - first_sample_name = list(piface_source_samples)[0] - psms = prj.get_pipestat_managers( - sample_name=first_sample_name, project_level=False + for piface in prj.pipeline_interfaces: + if piface.psm.pipeline_type == PipelineLevel.SAMPLE.value: + psms[piface.psm.pipeline_name] = piface.psm + for name, psm in psms.items(): + _remove_or_dry_run( + [ + get_file_for_table( + psm, pipeline_name=psm.pipeline_name, directory="reports" + ), + get_file_for_table( + psm, + pipeline_name=psm.pipeline_name, + appendix="stats_summary.tsv", + ), + get_file_for_table( + psm, + pipeline_name=psm.pipeline_name, + appendix="objs_summary.yaml", + ), + os.path.join( + os.path.dirname(psm.config_path), "aggregate_results.yaml" + ), + ], + dry_run, ) - for name, psm in psms.items(): - _remove_or_dry_run( - [ - get_file_for_project( - psm, - pipeline_name=psm["_pipeline_name"], - directory="reports", - ), - get_file_for_table( - psm, - pipeline_name=psm["_pipeline_name"], - appendix="stats_summary.tsv", - ), - get_file_for_table( - psm, - pipeline_name=psm["_pipeline_name"], - appendix="objs_summary.yaml", - ), - get_file_for_table( - psm, pipeline_name=psm["_pipeline_name"], appendix="reports" - ), - ], - dry_run, - ) class LooperCounter(object): diff --git a/looper/project.py b/looper/project.py index 6607db6e2..16684ba74 100644 --- a/looper/project.py +++ b/looper/project.py @@ -3,6 +3,8 @@ import itertools import os +from yaml import safe_load + try: from functools import cached_property except ImportError: @@ -26,6 +28,7 @@ from .pipeline_interface import PipelineInterface from .processed_project import populate_project_paths, populate_sample_paths from .utils import * +from .const import PipelineLevel __all__ = ["Project"] @@ -126,6 +129,12 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): self[EXTRA_KEY] = {} + try: + # For loading PEPs via CSV, Peppy cannot infer project name. + name = self.name + except NotImplementedError: + self.name = None + # add sample pipeline interface to the project if kwargs.get(SAMPLE_PL_ARG): self.set_sample_piface(kwargs.get(SAMPLE_PL_ARG)) @@ -144,7 +153,7 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): self.dcc = ( None if divcfg_path is None - else ComputingConfiguration(filepath=divcfg_path) + else ComputingConfiguration.from_yaml_file(filepath=divcfg_path) ) if DRY_RUN_KEY in self and not self[DRY_RUN_KEY]: _LOGGER.debug("Ensuring project directories exist") @@ -300,7 +309,7 @@ def project_pipeline_interfaces(self): :return list[looper.PipelineInterface]: list of pipeline interfaces """ return [ - PipelineInterface(pi, pipeline_type="project") + PipelineInterface(pi, pipeline_type=PipelineLevel.PROJECT.value) for pi in self.project_pipeline_interface_sources ] @@ -343,7 +352,9 @@ def pipestat_configured_project(self): :return bool: whether pipestat configuration is complete """ - return self._check_if_pipestat_configured(project_level=True) + return self._check_if_pipestat_configured( + pipeline_type=PipelineLevel.PROJECT.value + ) def get_sample_piface(self, sample_name): """ @@ -441,73 +452,91 @@ def get_schemas(pifaces, schema_key=INPUT_SCHEMA_KEY): schema_set.update([schema_file]) return list(schema_set) - def get_pipestat_managers(self, sample_name=None, project_level=False): - """ - Get a collection of pipestat managers for the selected sample or project. + def _check_if_pipestat_configured(self, pipeline_type=PipelineLevel.SAMPLE.value): - The number of pipestat managers corresponds to the number of unique - output schemas in the pipeline interfaces specified by the sample or project. + # First check if pipestat key is in looper_config, if not return false - :param str sample_name: sample name to get pipestat managers for - :param bool project_level: whether the project PipestatManagers - should be returned - :return dict[str, pipestat.PipestatManager]: a mapping of pipestat - managers by pipeline interface name - """ - pipestat_configs = self._get_pipestat_configuration( - sample_name=sample_name, project_level=project_level - ) - return { - pipeline_name: PipestatManager(**pipestat_vars) - for pipeline_name, pipestat_vars in pipestat_configs.items() - } + if PIPESTAT_KEY not in self[EXTRA_KEY]: + return False + elif PIPESTAT_KEY in self[EXTRA_KEY]: + if self[EXTRA_KEY][PIPESTAT_KEY] is None: + return False + else: + # If pipestat key is available assume user desires pipestat usage + # This should return True OR raise an exception at this point. + return self._get_pipestat_configuration(pipeline_type) - def _check_if_pipestat_configured(self, project_level=False): - """ - A helper method determining whether pipestat configuration is complete + def _get_pipestat_configuration(self, pipeline_type=PipelineLevel.SAMPLE.value): - :param bool project_level: whether the project pipestat config should be checked - :return bool: whether pipestat configuration is complete - """ - try: - if project_level: - pipestat_configured = self._get_pipestat_configuration( - sample_name=None, project_level=project_level + # First check if it already exists + + if pipeline_type == PipelineLevel.SAMPLE.value: + for piface in self.pipeline_interfaces: + + pipestat_config_path = self._check_for_existing_pipestat_config(piface) + + if not pipestat_config_path: + self._create_pipestat_config(piface) + else: + piface.psm = PipestatManager( + config_file=pipestat_config_path, multi_pipelines=True + ) + + elif pipeline_type == PipelineLevel.PROJECT.value: + for prj_piface in self.project_pipeline_interfaces: + pipestat_config_path = self._check_for_existing_pipestat_config( + prj_piface ) - else: - for s in self.samples: - pipestat_configured = self._get_pipestat_configuration( - sample_name=s.sample_name + + if not pipestat_config_path: + self._create_pipestat_config(prj_piface) + else: + prj_piface.psm = PipestatManager( + config_file=pipestat_config_path, multi_pipelines=True ) - except Exception as e: - context = ( - f"Project '{self.name}'" - if project_level - else f"Sample '{s.sample_name}'" - ) - _LOGGER.debug( - f"Pipestat configuration incomplete for {context}; " - f"caught exception: {getattr(e, 'message', repr(e))}" - ) - return False else: - if pipestat_configured is not None and pipestat_configured != {}: - return True - else: - return False + _LOGGER.error( + msg="No pipeline type specified during pipestat configuration" + ) + + return True - def _get_pipestat_configuration(self, sample_name=None, project_level=False): + def _check_for_existing_pipestat_config(self, piface): """ - Get all required pipestat configuration variables from looper_config file + + config files should be in looper output directory and named as: + + pipestat_config_pipelinename.yaml + """ - ret = {} - if not project_level and sample_name is None: - raise ValueError( - "Must provide the sample_name to determine the " - "sample to get the PipestatManagers for" + # Cannot do much if we cannot retrieve the pipeline_name + try: + pipeline_name = piface.data["pipeline_name"] + except KeyError: + raise Exception( + "To use pipestat, a pipeline_name must be set in the pipeline interface." ) + config_file_name = f"pipestat_config_{pipeline_name}.yaml" + output_dir = expandpath(self.output_dir) + + config_file_path = os.path.join( + # os.path.dirname(output_dir), config_file_name + output_dir, + config_file_name, + ) + + if os.path.exists(config_file_path): + return config_file_path + else: + return None + + def _create_pipestat_config(self, piface): + """ + Each piface needs its own config file and associated psm + """ + if PIPESTAT_KEY in self[EXTRA_KEY]: pipestat_config_dict = self[EXTRA_KEY][PIPESTAT_KEY] else: @@ -521,13 +550,58 @@ def _get_pipestat_configuration(self, sample_name=None, project_level=False): # Expand paths in the event ENV variables were used in config files output_dir = expandpath(self.output_dir) - # Get looper user configured items first and update the pipestat_config_dict + pipestat_config_dict.update({"output_dir": output_dir}) + + if "output_schema" in piface.data: + schema_path = expandpath(piface.data["output_schema"]) + if not os.path.isabs(schema_path): + # Get path relative to the pipeline_interface + schema_path = os.path.join( + os.path.dirname(piface.pipe_iface_file), schema_path + ) + pipestat_config_dict.update({"schema_path": schema_path}) + try: + with open(schema_path, "r") as f: + output_schema_data = safe_load(f) + output_schema_pipeline_name = output_schema_data[ + PIPELINE_INTERFACE_PIPELINE_NAME_KEY + ] + except Exception: + output_schema_pipeline_name = None + else: + output_schema_pipeline_name = None + if "pipeline_name" in piface.data: + pipeline_name = piface.data["pipeline_name"] + pipestat_config_dict.update({"pipeline_name": piface.data["pipeline_name"]}) + else: + pipeline_name = None + if "pipeline_type" in piface.data: + pipestat_config_dict.update({"pipeline_type": piface.data["pipeline_type"]}) + + # Warn user if there is a mismatch in pipeline_names from sources!!! + if pipeline_name != output_schema_pipeline_name: + _LOGGER.warning( + msg=f"Pipeline name mismatch detected. Pipeline interface: {pipeline_name} Output schema: {output_schema_pipeline_name} Defaulting to pipeline_interface value." + ) + try: results_file_path = expandpath(pipestat_config_dict["results_file_path"]) - if not os.path.exists(os.path.dirname(results_file_path)): - results_file_path = os.path.join( - os.path.dirname(output_dir), results_file_path - ) + + if not os.path.isabs(results_file_path): + # e.g. user configures "results.yaml" as results_file_path + if "{record_identifier}" in results_file_path: + # this is specifically to check if the user wishes tro generate a file for EACH record + if not os.path.exists(os.path.dirname(results_file_path)): + results_file_path = os.path.join(output_dir, results_file_path) + else: + if not os.path.exists(os.path.dirname(results_file_path)): + results_file_path = os.path.join( + output_dir, f"{pipeline_name}/", results_file_path + ) + else: + # Do nothing because the user has given an absolute file path + pass + pipestat_config_dict.update({"results_file_path": results_file_path}) except KeyError: results_file_path = None @@ -540,57 +614,20 @@ def _get_pipestat_configuration(self, sample_name=None, project_level=False): except KeyError: flag_file_dir = None - if sample_name: - pipestat_config_dict.update({"record_identifier": sample_name}) - - if project_level and "project_name" in pipestat_config_dict: - pipestat_config_dict.update( - {"project_name": pipestat_config_dict["project_name"]} - ) - - if project_level and "{record_identifier}" in results_file_path: - # if project level and using {record_identifier}, pipestat needs some sort of record_identifier during creation - pipestat_config_dict.update( - {"record_identifier": "default_project_record_identifier"} - ) - - pipestat_config_dict.update({"output_dir": output_dir}) - - pifaces = ( - self.project_pipeline_interfaces - if project_level - else self._interfaces_by_sample[sample_name] + # Pipestat_dict_ is now updated from all sources and can be written to a yaml. + pipestat_config_path = os.path.join( + output_dir, + f"pipestat_config_{pipeline_name}.yaml", ) - for piface in pifaces: - # We must also obtain additional pipestat items from the pipeline author's piface - if "output_schema" in piface.data: - schema_path = expandpath(piface.data["output_schema"]) - if not os.path.isabs(schema_path): - # Get path relative to the pipeline_interface - schema_path = os.path.join( - os.path.dirname(piface.pipe_iface_file), schema_path - ) - pipestat_config_dict.update({"schema_path": schema_path}) - if "pipeline_name" in piface.data: - pipestat_config_dict.update( - {"pipeline_name": piface.data["pipeline_name"]} - ) - if "pipeline_type" in piface.data: - pipestat_config_dict.update( - {"pipeline_type": piface.data["pipeline_type"]} - ) + # Two end goals, create a config file + write_pipestat_config(pipestat_config_path, pipestat_config_dict) - # Pipestat_dict_ is now updated from all sources and can be written to a yaml. - looper_pipestat_config_path = os.path.join( - os.path.dirname(output_dir), "looper_pipestat_config.yaml" - ) - write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict) + piface.psm = PipestatManager( + config_file=pipestat_config_path, multi_pipelines=True + ) - ret[piface.pipeline_name] = { - "config_file": looper_pipestat_config_path, - } - return ret + return None def populate_pipeline_outputs(self): """ @@ -657,7 +694,7 @@ def _piface_by_samples(self): pifaces_by_sample = {} for source, sample_names in self._samples_by_interface.items(): try: - pi = PipelineInterface(source, pipeline_type="sample") + pi = PipelineInterface(source, pipeline_type=PipelineLevel.SAMPLE.value) except PipelineInterfaceConfigError as e: _LOGGER.debug(f"Skipping pipeline interface creation: {e}") else: @@ -708,7 +745,9 @@ def _samples_by_piface(self, piface_key): for source in piface_srcs: source = self._resolve_path_with_cfg(source) try: - PipelineInterface(source, pipeline_type="sample") + PipelineInterface( + source, pipeline_type=PipelineLevel.SAMPLE.value + ) except ( ValidationError, IOError, diff --git a/looper/utils.py b/looper/utils.py index 3796cbc6f..fc6671722 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -16,9 +16,10 @@ from peppy.const import * from ubiquerg import convert_value, expandpath, parse_registry_path from pephubclient.constants import RegistryPath -from pydantic.error_wrappers import ValidationError +from pydantic import ValidationError from .const import * +from .command_models.commands import SUPPORTED_COMMANDS from .exceptions import MisconfigurationException, RegistryPathException _LOGGER = getLogger(__name__) @@ -94,7 +95,9 @@ def fetch_sample_flags(prj, sample, pl_name, flag_dir=None): return [ x for x in folder_contents - if os.path.splitext(x)[1] == ".flag" and os.path.basename(x).startswith(pl_name) + if os.path.splitext(x)[1] == ".flag" + and os.path.basename(x).startswith(pl_name) + and sample.sample_name in x ] @@ -250,19 +253,20 @@ def read_yaml_file(filepath): return data -def enrich_args_via_cfg(parser_args, aux_parser, test_args=None): +def enrich_args_via_cfg(subcommand_name, parser_args, aux_parser, test_args=None): """ Read in a looper dotfile and set arguments. Priority order: CLI > dotfile/config > parser default + :param subcommand name: the name of the command used :param argparse.Namespace parser_args: parsed args by the original parser :param argparse.Namespace aux_parser: parsed args by the a parser with defaults suppressed :return argparse.Namespace: selected argument values """ cfg_args_all = ( - _get_subcommand_args(parser_args) + _get_subcommand_args(subcommand_name, parser_args) if os.path.exists(parser_args.config_file) else dict() ) @@ -273,23 +277,42 @@ def enrich_args_via_cfg(parser_args, aux_parser, test_args=None): else: cli_args, _ = aux_parser.parse_known_args() - for dest in vars(parser_args): - if dest not in POSITIONAL or not hasattr(result, dest): - if dest in cli_args: - x = getattr(cli_args, dest) - r = convert_value(x) if isinstance(x, str) else x - elif cfg_args_all is not None and dest in cfg_args_all: - if isinstance(cfg_args_all[dest], list): - r = [convert_value(i) for i in cfg_args_all[dest]] + def set_single_arg(argname, default_source_namespace, result_namespace): + if argname not in POSITIONAL or not hasattr(result, argname): + if argname in cli_args: + cli_provided_value = getattr(cli_args, argname) + r = ( + convert_value(cli_provided_value) + if isinstance(cli_provided_value, str) + else cli_provided_value + ) + elif cfg_args_all is not None and argname in cfg_args_all: + if isinstance(cfg_args_all[argname], list): + r = [convert_value(i) for i in cfg_args_all[argname]] else: - r = convert_value(cfg_args_all[dest]) + r = convert_value(cfg_args_all[argname]) else: - r = getattr(parser_args, dest) - setattr(result, dest, r) + r = getattr(default_source_namespace, argname) + setattr(result_namespace, argname, r) + + for top_level_argname in vars(parser_args): + if top_level_argname not in [cmd.name for cmd in SUPPORTED_COMMANDS]: + # this argument is a top-level argument + set_single_arg(top_level_argname, parser_args, result) + else: + # this argument actually is a subcommand + enriched_command_namespace = argparse.Namespace() + command_namespace = getattr(parser_args, top_level_argname) + if command_namespace: + for argname in vars(command_namespace): + set_single_arg( + argname, command_namespace, enriched_command_namespace + ) + setattr(result, top_level_argname, enriched_command_namespace) return result -def _get_subcommand_args(parser_args): +def _get_subcommand_args(subcommand_name, parser_args): """ Get the union of values for the subcommand arguments from Project.looper, Project.looper.cli. and Project.looper.cli.all. @@ -321,8 +344,8 @@ def _get_subcommand_args(parser_args): else dict() ) args.update( - cfg_args[parser_args.command] or dict() - if parser_args.command in cfg_args + cfg_args[subcommand_name] or dict() + if subcommand_name in cfg_args else dict() ) except (TypeError, KeyError, AttributeError, ValueError) as e: @@ -449,7 +472,7 @@ def initiate_looper_config( return False if pep_path: - if is_registry_path(pep_path): + if is_pephub_registry_path(pep_path): pass else: pep_path = expandpath(pep_path) @@ -537,12 +560,25 @@ def read_looper_config_file(looper_config_path: str) -> dict: # Expand paths in case ENV variables are used for k, v in return_dict.items(): + if k == SAMPLE_PL_ARG or k == PROJECT_PL_ARG: + # Pipeline interfaces are resolved at a later point. Do it there only to maintain consistency. #474 + pass if isinstance(v, str): v = expandpath(v) - if not os.path.isabs(v) and not is_registry_path(v): - return_dict[k] = os.path.join(config_dir_path, v) - else: + # TODO this is messy because is_pephub_registry needs to fail on anything NOT a pephub registry path + # https://github.com/pepkit/ubiquerg/issues/43 + if is_PEP_file_type(v): + if not os.path.isabs(v): + return_dict[k] = os.path.join(config_dir_path, v) + else: + return_dict[k] = v + elif is_pephub_registry_path(v): return_dict[k] = v + else: + if not os.path.isabs(v): + return_dict[k] = os.path.join(config_dir_path, v) + else: + return_dict[k] = v return return_dict @@ -575,19 +611,23 @@ def dotfile_path(directory=os.getcwd(), must_exist=False): cur_dir = parent_dir -def is_registry_path(input_string: str) -> bool: +def is_PEP_file_type(input_string: str) -> bool: + """ + Determines if the provided path is actually a file type that Looper can use for loading PEP + """ + + PEP_FILE_TYPES = ["yaml", "csv"] + + res = list(filter(input_string.endswith, PEP_FILE_TYPES)) != [] + return res + + +def is_pephub_registry_path(input_string: str) -> bool: """ Check if input is a registry path to pephub :param str input_string: path to the PEP (or registry path) :return bool: True if input is a registry path """ - try: - if input_string.endswith(".yaml"): - return False - except AttributeError: - raise RegistryPathException( - msg=f"Malformed registry path. Unable to parse {input_string} as a registry path." - ) try: registry_path = RegistryPath(**parse_registry_path(input_string)) except (ValidationError, TypeError): @@ -767,3 +807,15 @@ def write_submit_script(fp, content, data): with open(fp, "w") as f: f.write(content) return fp + + +def inspect_looper_config_file(looper_config_dict) -> None: + """ + Inspects looper config by printing it to terminal. + param dict looper_config_dict: dict representing looper_config + + """ + # Simply print this to terminal + print("LOOPER INSPECT") + for key, value in looper_config_dict.items(): + print(f"{key} {value}") diff --git a/looper_init.py b/looper_init.py new file mode 100644 index 000000000..9d7a3c5f3 --- /dev/null +++ b/looper_init.py @@ -0,0 +1,68 @@ +# A simple utility, to be run in the root of a project, to prompt a user through +# configuring a .looper.yaml file for a new project. To be used as `looper init`. + +import os + +cfg = {} + +print("This utility will walk you through creating a .looper.yaml file.") +print("See `looper init --help` for details.") +print("Use `looper run` afterwards to run the pipeline.") +print("Press ^C at any time to quit.\n") + +looper_cfg_path = ".looper.yaml" # not changeable + +if os.path.exists(looper_cfg_path): + print(f"File exists at '{looper_cfg_path}'. Delete it to re-initialize.") + raise SystemExit + +DEFAULTS = { # What you get if you just press enter + "pep_config": "databio/example", + "output_dir": "results", + "piface_path": "pipeline_interface.yaml", + "project_name": os.path.basename(os.getcwd()), +} + + +cfg["project_name"] = ( + input(f"Project name: ({DEFAULTS['project_name']}) ") or DEFAULTS["project_name"] +) + +cfg["pep_config"] = ( + input(f"Registry path or file path to PEP: ({DEFAULTS['pep_config']}) ") + or DEFAULTS["pep_config"] +) + +if not os.path.exists(cfg["pep_config"]): + print(f"Warning: PEP file does not exist at '{cfg['pep_config']}'") + +cfg["output_dir"] = ( + input(f"Path to output directory: ({DEFAULTS['output_dir']}) ") + or DEFAULTS["output_dir"] +) + +# TODO: Right now this assumes you will have one pipeline interface, and a sample pipeline +# but this is not the only way you could configure things. + +piface_path = ( + input("Path to sample pipeline interface: (pipeline_interface.yaml) ") + or DEFAULTS["piface_path"] +) + +if not os.path.exists(piface_path): + print(f"Warning: file does not exist at {piface_path}") + +print(f"Writing config file to {looper_cfg_path}") +print(f"PEP path: {cfg['pep_config']}") +print(f"Pipeline interface path: {piface_path}") + + +with open(looper_cfg_path, "w") as fp: + fp.write( + f"""\ +pep_config: {cfg['pep_config']} +output_dir: {cfg['output_dir']} +pipeline_interfaces: + sample: {piface_path} +""" + ) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index d1848735a..a78b632dd 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -4,10 +4,11 @@ eido>=0.2.1 jinja2 logmuse>=0.2.0 pandas>=2.0.2 -pephubclient>=0.1.2 -peppy>=0.40.0 -pipestat>=0.8.0,<0.9.0 +pephubclient>=0.4.0 +pipestat>=0.9.2 +peppy>=0.40.2 pyyaml>=3.12 rich>=9.10.0 ubiquerg>=0.5.2 -yacman>=0.9.2 +yacman==0.9.3 +pydantic2-argparse>=0.9.2 \ No newline at end of file diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index f02a8bc9d..87d100866 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -4,3 +4,4 @@ pytest pytest-cov pytest-remotedata veracitools +GitPython \ No newline at end of file diff --git a/setup.py b/setup.py index a1150555b..db8d94595 100644 --- a/setup.py +++ b/setup.py @@ -79,7 +79,7 @@ def get_static(name, condition=None): license="BSD2", entry_points={ "console_scripts": [ - "looper = looper.__main__:main", + "looper = looper.cli_pydantic:main", "divvy = looper.__main__:divvy_main", ], }, diff --git a/tests/conftest.py b/tests/conftest.py index 29f601f4d..ef2176feb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,8 @@ +import shutil from contextlib import contextmanager import os import subprocess -from shutil import copyfile, rmtree +from shutil import copyfile, rmtree, copytree import tempfile from typing import * @@ -12,6 +13,8 @@ from looper.const import * +REPO_URL = "https://github.com/pepkit/hello_looper.git" + CFG = "project_config.yaml" PIPESTAT_CONFIG = "global_pipestat_config.yaml" PROJECT_CFG_PIPESTAT = "project_config_pipestat.yaml" @@ -43,7 +46,23 @@ def get_outdir(pth): """ with open(pth, "r") as conf_file: config_data = safe_load(conf_file) - return config_data[LOOPER_KEY][OUTDIR_KEY] + + output_path = config_data[OUTDIR_KEY] + dirname = os.path.dirname(pth) + + return os.path.join(dirname, output_path) + + +def get_project_config_path(looper_config_pth): + """ + Get project config file path from a looper config file path, since they are relative + + :param str pth: + :return str: output directory + """ + dirname = os.path.dirname(looper_config_pth) + + return os.path.join(dirname, "project/project_config.yaml") def _assert_content_in_files(fs: Union[str, Iterable[str]], query: str, negate: bool): @@ -116,9 +135,16 @@ def test_args_expansion(pth=None, cmd=None, appendix=list(), dry=True) -> List[s :param bool dry: whether to append dry run flag :return list of strings to pass to looper.main for testing """ - x = [cmd, "-d" if dry else ""] + # --looper-config .looper.yaml run --dry-run + # x = [cmd, "-d" if dry else ""] + x = [] + if cmd: + x.append(cmd) if pth: + x.append("--looper-config") x.append(pth) + if dry: + x.append("--dry-run") x.extend(appendix) return x @@ -170,61 +196,50 @@ def example_pep_piface_path_cfg(example_pep_piface_path): @pytest.fixture def prep_temp_pep(example_pep_piface_path): - # temp dir - td = tempfile.mkdtemp() - out_td = os.path.join(td, "output") - # ori paths - cfg_path = os.path.join(example_pep_piface_path, CFG) - output_schema_path = os.path.join(example_pep_piface_path, OS) - sample_table_path = os.path.join(example_pep_piface_path, ST) - piface1p_path = os.path.join(example_pep_piface_path, PIP.format("1")) - piface2p_path = os.path.join(example_pep_piface_path, PIP.format("2")) - piface1s_path = os.path.join(example_pep_piface_path, PIS.format("1")) - piface2s_path = os.path.join(example_pep_piface_path, PIS.format("2")) + # Get Path to local copy of hello_looper + hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") - res_proj_path = os.path.join(example_pep_piface_path, RES.format("project")) - res_samp_path = os.path.join(example_pep_piface_path, RES.format("sample")) - # temp copies - temp_path_cfg = os.path.join(td, CFG) - temp_path_output_schema = os.path.join(td, OS) - temp_path_sample_table = os.path.join(td, ST) - temp_path_piface1s = os.path.join(td, PIS.format("1")) - temp_path_piface2s = os.path.join(td, PIS.format("2")) - temp_path_piface1p = os.path.join(td, PIP.format("1")) - temp_path_piface2p = os.path.join(td, PIP.format("2")) - temp_path_res_proj = os.path.join(td, RES.format("project")) - temp_path_res_samp = os.path.join(td, RES.format("sample")) - # copying - copyfile(cfg_path, temp_path_cfg) - copyfile(sample_table_path, temp_path_sample_table) - copyfile(piface1s_path, temp_path_piface1s) - copyfile(piface2s_path, temp_path_piface2s) - copyfile(piface1p_path, temp_path_piface1p) - copyfile(piface2p_path, temp_path_piface2p) - copyfile(output_schema_path, temp_path_output_schema) - copyfile(res_proj_path, temp_path_res_proj) - copyfile(res_samp_path, temp_path_res_samp) - # modififactions - from yaml import dump, safe_load - - with open(temp_path_cfg, "r") as f: - piface_data = safe_load(f) - piface_data[LOOPER_KEY][OUTDIR_KEY] = out_td - piface_data[LOOPER_KEY][CLI_KEY] = {} - piface_data[LOOPER_KEY][CLI_KEY]["runp"] = {} - piface_data[LOOPER_KEY][CLI_KEY]["runp"][PIPELINE_INTERFACES_KEY] = [ - temp_path_piface1p, - temp_path_piface2p, - ] - piface_data[SAMPLE_MODS_KEY][CONSTANT_KEY][PIPELINE_INTERFACES_KEY] = [ - temp_path_piface1s, - temp_path_piface2s, - ] - with open(temp_path_cfg, "w") as f: - dump(piface_data, f) - - return temp_path_cfg + # Make local temp copy of hello_looper + d = tempfile.mkdtemp() + shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) + + advanced_dir = os.path.join(d, "advanced") + path_to_looper_config = os.path.join(advanced_dir, ".looper.yaml") + + return path_to_looper_config + + +@pytest.fixture +def prep_temp_pep_basic(example_pep_piface_path): + + # Get Path to local copy of hello_looper + hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") + + # Make local temp copy of hello_looper + d = tempfile.mkdtemp() + shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) + + advanced_dir = os.path.join(d, "intermediate") + path_to_looper_config = os.path.join(advanced_dir, ".looper.yaml") + + return path_to_looper_config + + +@pytest.fixture +def prep_temp_pep_csv(example_pep_piface_path): + + # Get Path to local copy of hello_looper + hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") + + # Make local temp copy of hello_looper + d = tempfile.mkdtemp() + shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) + + advanced_dir = os.path.join(d, "csv") + path_to_looper_config = os.path.join(advanced_dir, ".looper.yaml") + + return path_to_looper_config @pytest.fixture @@ -249,99 +264,51 @@ def prep_temp_config_with_pep(example_pep_piface_path): @pytest.fixture -def prepare_pep_with_dot_file(prep_temp_pep): - pep_config = prep_temp_pep - with open(pep_config) as f: - pep_data = safe_load(f) +def prep_temp_pep_pipestat(example_pep_piface_path): + + # Get Path to local copy of hello_looper + + hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") + + # Make local temp copy of hello_looper + d = tempfile.mkdtemp() + shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) - output_dir = pep_data["looper"]["output_dir"] - project_piface = pep_data["looper"]["cli"]["runp"]["pipeline_interfaces"] - sample_piface = pep_data["sample_modifiers"]["append"]["pipeline_interfaces"] + advanced_dir = os.path.join(d, "pipestat") + path_to_looper_config = os.path.join(advanced_dir, ".looper.yaml") - pep_data.pop("looper") - pep_data["sample_modifiers"].pop("append") + return path_to_looper_config - with open(pep_config, "w") as f: - config = dump(pep_data, f) - looper_config = { - "pep_config": pep_config, - "output_dir": output_dir, - "pipeline_interfaces": { - "sample": sample_piface, - "project": project_piface, - }, - } +@pytest.fixture +def prep_temp_pep_pipestat_advanced(example_pep_piface_path): + + # Get Path to local copy of hello_looper - # looper_config_path = os.path.join(os.path.dirname(pep_config), "looper_config.yaml") - # - # with open(looper_config_path, "w") as f: - # config = dump(looper_config, f) - # - # looper_dot_file_content = {"looper_config": looper_config_path} + hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") - dot_file_path = ".looper.yaml" - with open(dot_file_path, "w") as f: - config = dump(looper_config, f) + # Make local temp copy of hello_looper + d = tempfile.mkdtemp() + shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) - return dot_file_path + advanced_dir = os.path.join(d, "advanced") + path_to_looper_config = os.path.join(advanced_dir, ".looper_advanced_pipestat.yaml") + + return path_to_looper_config @pytest.fixture -def prep_temp_pep_pipestat(example_pep_piface_path): - # TODO this should be combined with the other prep_temp_pep - # temp dir - td = tempfile.mkdtemp() - out_td = os.path.join(td, "output") - # ori paths +def prep_temp_pep_pephub(example_pep_piface_path): - cfg_path = os.path.join(example_pep_piface_path, LOOPER_CFG) - project_cfg_pipestat_path = os.path.join( - example_pep_piface_path, PROJECT_CFG_PIPESTAT - ) - output_schema_path = os.path.join(example_pep_piface_path, PIPESTAT_OS) + # Get Path to local copy of hello_looper - sample_table_path = os.path.join(example_pep_piface_path, ST) - piface1s_path = os.path.join(example_pep_piface_path, PIPESTAT_PI) - piface1p_path = os.path.join(example_pep_piface_path, PIPESTAT_PI_PRJ) + hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") - res_proj_path = os.path.join(example_pep_piface_path, RES.format("project")) - res_samp_path = os.path.join(example_pep_piface_path, RES.format("sample")) - # temp copies - temp_path_cfg = os.path.join(td, LOOPER_CFG) - temp_path_project_cfg_pipestat = os.path.join(td, PROJECT_CFG_PIPESTAT) - temp_path_output_schema = os.path.join(td, PIPESTAT_OS) + # Make local temp copy of hello_looper + d = tempfile.mkdtemp() + shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) - temp_path_sample_table = os.path.join(td, ST) - temp_path_piface1s = os.path.join(td, PIPESTAT_PI) - temp_path_piface1p = os.path.join(td, PIPESTAT_PI_PRJ) - temp_path_res_proj = os.path.join(td, RES.format("project")) - temp_path_res_samp = os.path.join(td, RES.format("sample")) - # copying - copyfile(cfg_path, temp_path_cfg) - copyfile(project_cfg_pipestat_path, temp_path_project_cfg_pipestat) + advanced_dir = os.path.join(d, "pephub") + path_to_looper_config = os.path.join(advanced_dir, ".looper.yaml") - copyfile(sample_table_path, temp_path_sample_table) - copyfile(piface1s_path, temp_path_piface1s) - copyfile(piface1p_path, temp_path_piface1p) - copyfile(output_schema_path, temp_path_output_schema) - copyfile(res_proj_path, temp_path_res_proj) - copyfile(res_samp_path, temp_path_res_samp) - # modifications - from yaml import dump, safe_load - - with open(temp_path_cfg, "r") as f: - piface_data = safe_load(f) - piface_data[LOOPER_KEY][OUTDIR_KEY] = out_td - piface_data[LOOPER_KEY][CLI_KEY] = {} - piface_data[LOOPER_KEY][CLI_KEY]["runp"] = {} - piface_data[LOOPER_KEY][CLI_KEY]["runp"][PIPELINE_INTERFACES_KEY] = [ - temp_path_piface1p, - ] - piface_data[SAMPLE_MODS_KEY][CONSTANT_KEY][PIPELINE_INTERFACES_KEY] = [ - temp_path_piface1s, - ] - with open(temp_path_cfg, "w") as f: - dump(piface_data, f) - - return temp_path_cfg + return path_to_looper_config diff --git a/tests/data/hello_looper-dev/.looper.yaml b/tests/data/hello_looper-dev/.looper.yaml new file mode 100644 index 000000000..e812a1ea8 --- /dev/null +++ b/tests/data/hello_looper-dev/.looper.yaml @@ -0,0 +1,4 @@ +pep_config: ./project/project_config.yaml # pephub registry path or local path +output_dir: "./results" +pipeline_interfaces: + sample: ../pipeline/pipeline_interface.yaml diff --git a/tests/data/hello_looper-dev/README.md b/tests/data/hello_looper-dev/README.md new file mode 100644 index 000000000..6c213b1a9 --- /dev/null +++ b/tests/data/hello_looper-dev/README.md @@ -0,0 +1,20 @@ +# Hello World! example for looper + +This repository provides minimal working examples for the [looper pipeline submission engine](http://pep.databio.org/looper). + +This repository contains examples + +1. `/minimal` - A basic example pipeline and project. +2. `/intermediate` - An intermediate example pipeline and project with a couple extra options. +3. `/advanced` - A more advanced example, showcasing the capabilities of Looper. +4. `/pephub` - Example of how to point looper to PEPhub. +5. `/pipestat` - Example of a pipeline that uses pipestat for recording results. +6. `/csv` - How to use a pipeline with a CSV sample table (no YAML config) + +Each example contains: + +1. A looper config file (`.looper.yaml`). +2. Sample data plus metadata in PEP format (or pointer to PEPhub). +3. A looper-compatible pipeline. + +Explanation and results of running the above examples can be found at [Looper: Hello World](https://pep.databio.org/looper/code/hello-world/) diff --git a/tests/data/hello_looper-dev/advanced/.looper.yaml b/tests/data/hello_looper-dev/advanced/.looper.yaml new file mode 100644 index 000000000..d2c5797f8 --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/.looper.yaml @@ -0,0 +1,10 @@ +pep_config: project/project_config.yaml +output_dir: "results" +pipeline_interfaces: + sample: + - ../pipeline/pipeline_interface1_sample.yaml + - ../pipeline/pipeline_interface2_sample.yaml + project: + - ../pipeline/pipeline_interface1_project.yaml + - ../pipeline/pipeline_interface2_project.yaml + diff --git a/tests/data/hello_looper-dev/advanced/.looper_advanced_pipestat.yaml b/tests/data/hello_looper-dev/advanced/.looper_advanced_pipestat.yaml new file mode 100644 index 000000000..74da1a3fb --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/.looper_advanced_pipestat.yaml @@ -0,0 +1,9 @@ +pep_config: project/project_config.yaml +output_dir: "results" +pipeline_interfaces: + sample: + - ../pipeline/pipestat_pipeline_interface1_sample.yaml + - ../pipeline/pipestat_pipeline_interface2_sample.yaml +pipestat: + results_file_path: results.yaml + flag_file_dir: results/flags \ No newline at end of file diff --git a/tests/data/hello_looper-dev/advanced/pipeline/output_schema.yaml b/tests/data/hello_looper-dev/advanced/pipeline/output_schema.yaml new file mode 100644 index 000000000..8bc1f6f8e --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/pipeline/output_schema.yaml @@ -0,0 +1,27 @@ +description: Sample objects produced by test pipeline. +properties: + samples: + type: array + items: + type: object + properties: + test_property: + type: string + description: "Test sample property" + path: "~/sample/{sample_name}_file.txt" + test_property1: + type: string + description: "Test sample property" + path: "~/sample/{sample_name}_file1.txt" + test_property: + type: image + title: "Test title" + description: "Test project property" + thumbnail_path: "~/test_{name}.png" + path: "~/test_{name}.pdf" + test_property1: + type: image + title: "Test title1" + description: "Test project property1" + thumbnail_path: "~/test_{name}.png" + path: "~/test_{name}1.pdf" diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_project.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_project.yaml new file mode 100644 index 000000000..cddc14b76 --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_project.yaml @@ -0,0 +1,11 @@ +pipeline_name: PIPELINE1 +pipeline_type: project +output_schema: output_schema.yaml +var_templates: + path: "{looper.piface_dir}/pipelines/col_pipeline1.py" +command_template: > + {pipeline.var_templates.path} --project-name {project.name} + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_sample.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_sample.yaml new file mode 100644 index 000000000..43638d923 --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface1_sample.yaml @@ -0,0 +1,15 @@ +pipeline_name: PIPELINE1 +pipeline_type: sample +input_schema: https://schema.databio.org/pep/2.0.0.yaml +output_schema: output_schema.yaml +var_templates: + path: "{looper.piface_dir}/pipelines/pipeline1.py" +pre_submit: + python_functions: + - looper.write_sample_yaml +command_template: > + {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_project.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_project.yaml new file mode 100644 index 000000000..7c4a42238 --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_project.yaml @@ -0,0 +1,13 @@ +pipeline_name: OTHER_PIPELINE2 +pipeline_type: project +output_schema: output_schema.yaml +var_templates: + path: "{looper.piface_dir}/pipelines/col_pipeline2.py" +command_template: > + {pipeline.var_templates.path} --project-name {project.name} +compute: + size_dependent_variables: resources-project.tsv + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_sample.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_sample.yaml new file mode 100644 index 000000000..987f7873d --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipeline_interface2_sample.yaml @@ -0,0 +1,16 @@ +pipeline_name: OTHER_PIPELINE2 +pipeline_type: sample +output_schema: output_schema.yaml +var_templates: + path: "{looper.piface_dir}/pipelines/other_pipeline2.py" +pre_submit: + python_functions: + - looper.write_sample_yaml +command_template: > + {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} +compute: + size_dependent_variables: resources-sample.tsv + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipestat_output_schema.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipestat_output_schema.yaml new file mode 100644 index 000000000..d6b05c2ac --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipestat_output_schema.yaml @@ -0,0 +1,5 @@ +pipeline_name: example_pipestat_pipeline +samples: + number_of_lines: + type: integer + description: "Number of lines in the input file." \ No newline at end of file diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface1_sample.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface1_sample.yaml new file mode 100644 index 000000000..ff40c411a --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface1_sample.yaml @@ -0,0 +1,15 @@ +pipeline_name: example_pipestat_pipeline +pipeline_type: sample +input_schema: https://schema.databio.org/pep/2.0.0.yaml +output_schema: pipestat_output_schema.yaml +var_templates: + path: "{looper.piface_dir}/pipelines/pipeline1.py" +pre_submit: + python_functions: + - looper.write_sample_yaml +command_template: > + {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface2_sample.yaml b/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface2_sample.yaml new file mode 100644 index 000000000..79dcf50f8 --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/pipeline/pipestat_pipeline_interface2_sample.yaml @@ -0,0 +1,17 @@ +pipeline_name: example_pipestat_pipeline +pipeline_type: sample +input_schema: https://schema.databio.org/pep/2.0.0.yaml +output_schema: pipestat_output_schema.yaml +var_templates: + path: "{looper.piface_dir}/pipelines/other_pipeline2.py" +pre_submit: + python_functions: + - looper.write_sample_yaml +command_template: > + {pipeline.var_templates.path} --sample-name {sample.sample_name} --req-attr {sample.attr} +compute: + size_dependent_variables: resources-sample.tsv + +bioconductor: + readFunName: readData + readFunPath: readData.R diff --git a/tests/data/hello_looper-dev/advanced/pipeline/readData.R b/tests/data/hello_looper-dev/advanced/pipeline/readData.R new file mode 100644 index 000000000..89557a11b --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/pipeline/readData.R @@ -0,0 +1,10 @@ +readData = function(project, sampleName="sample1") { + lapply(getOutputsBySample(project, sampleName), function(x) { + lapply(x, function(x1){ + message("Reading: ", basename(x1)) + df = read.table(x1, stringsAsFactors=F) + colnames(df)[1:3] = c('chr', 'start', 'end') + GenomicRanges::GRanges(df) + }) + }) +} diff --git a/tests/data/hello_looper-dev/advanced/pipeline/resources-project.tsv b/tests/data/hello_looper-dev/advanced/pipeline/resources-project.tsv new file mode 100644 index 000000000..4efd0f19c --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/pipeline/resources-project.tsv @@ -0,0 +1,6 @@ +max_file_size cores mem time +0.05 1 12000 00-01:00:00 +0.5 1 16000 00-01:00:00 +1 1 16000 00-01:00:00 +10 1 16000 00-01:00:00 +NaN 1 32000 00-02:00:00 diff --git a/tests/data/hello_looper-dev/advanced/pipeline/resources-sample.tsv b/tests/data/hello_looper-dev/advanced/pipeline/resources-sample.tsv new file mode 100644 index 000000000..20ec284b6 --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/pipeline/resources-sample.tsv @@ -0,0 +1,7 @@ +max_file_size cores mem time +0.001 1 8000 00-04:00:00 +0.05 2 12000 00-08:00:00 +0.5 4 16000 00-12:00:00 +1 8 16000 00-24:00:00 +10 16 32000 02-00:00:00 +NaN 32 32000 04-00:00:00 diff --git a/tests/data/hello_looper-dev/advanced/project/annotation_sheet.csv b/tests/data/hello_looper-dev/advanced/project/annotation_sheet.csv new file mode 100644 index 000000000..2d0e1265c --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/project/annotation_sheet.csv @@ -0,0 +1,4 @@ +sample_name,protocol,data_source,SRR,Sample_geo_accession,read1,read2 +sample1,PROTO1,SRA,SRR5210416,GSM2471255,SRA_1,SRA_2 +sample2,PROTO1,SRA,SRR5210450,GSM2471300,SRA_1,SRA_2 +sample3,PROTO2,SRA,SRR5210398,GSM2471249,SRA_1,SRA_2 diff --git a/tests/data/hello_looper-dev/advanced/project/project_config.yaml b/tests/data/hello_looper-dev/advanced/project/project_config.yaml new file mode 100644 index 000000000..54db02372 --- /dev/null +++ b/tests/data/hello_looper-dev/advanced/project/project_config.yaml @@ -0,0 +1,12 @@ +name: looper_advanced_test +pep_version: "2.0.0" +sample_table: annotation_sheet.csv + +sample_modifiers: + append: + attr: "val" + derive: + attributes: [read1, read2] + sources: + SRA_1: "{SRR}_1.fastq.gz" + SRA_2: "{SRR}_2.fastq.gz" diff --git a/tests/data/hello_looper-dev/csv/.looper.yaml b/tests/data/hello_looper-dev/csv/.looper.yaml new file mode 100644 index 000000000..c88f0c9a5 --- /dev/null +++ b/tests/data/hello_looper-dev/csv/.looper.yaml @@ -0,0 +1,5 @@ +pep_config: project/sample_annotation.csv # local path to CSV +# pep_config: pepkit/hello_looper:default # you can also use a pephub registry path +output_dir: "results" +pipeline_interfaces: + sample: pipeline/pipeline_interface.yaml diff --git a/tests/data/hello_looper-dev/csv/data/frog1_data.txt b/tests/data/hello_looper-dev/csv/data/frog1_data.txt new file mode 100644 index 000000000..815c0cf7c --- /dev/null +++ b/tests/data/hello_looper-dev/csv/data/frog1_data.txt @@ -0,0 +1,4 @@ +ribbit +ribbit +ribbit +CROAK! diff --git a/tests/data/hello_looper-dev/csv/data/frog2_data.txt b/tests/data/hello_looper-dev/csv/data/frog2_data.txt new file mode 100644 index 000000000..e6fdd5350 --- /dev/null +++ b/tests/data/hello_looper-dev/csv/data/frog2_data.txt @@ -0,0 +1,7 @@ +ribbit +ribbit +ribbit + +ribbit, ribbit +ribbit, ribbit +CROAK! diff --git a/tests/data/hello_looper-dev/csv/pipeline/count_lines.sh b/tests/data/hello_looper-dev/csv/pipeline/count_lines.sh new file mode 100755 index 000000000..71b887fe7 --- /dev/null +++ b/tests/data/hello_looper-dev/csv/pipeline/count_lines.sh @@ -0,0 +1,3 @@ +#!/bin/bash +linecount=`wc -l $1 | sed -E 's/^[[:space:]]+//' | cut -f1 -d' '` +echo "Number of lines: $linecount" diff --git a/tests/data/hello_looper-dev/csv/pipeline/pipeline_interface.yaml b/tests/data/hello_looper-dev/csv/pipeline/pipeline_interface.yaml new file mode 100644 index 000000000..732e69761 --- /dev/null +++ b/tests/data/hello_looper-dev/csv/pipeline/pipeline_interface.yaml @@ -0,0 +1,6 @@ +pipeline_name: count_lines +pipeline_type: sample +var_templates: + pipeline: '{looper.piface_dir}/count_lines.sh' +command_template: > + {pipeline.var_templates.pipeline} {sample.file} diff --git a/tests/data/hello_looper-dev/csv/pipeline/pipeline_interface_project.yaml b/tests/data/hello_looper-dev/csv/pipeline/pipeline_interface_project.yaml new file mode 100644 index 000000000..9063c7d61 --- /dev/null +++ b/tests/data/hello_looper-dev/csv/pipeline/pipeline_interface_project.yaml @@ -0,0 +1,6 @@ +pipeline_name: count_lines +pipeline_type: project +var_templates: + pipeline: '{looper.piface_dir}/count_lines.sh' +command_template: > + {pipeline.var_templates.pipeline} "data/*.txt" diff --git a/tests/data/hello_looper-dev/csv/project/sample_annotation.csv b/tests/data/hello_looper-dev/csv/project/sample_annotation.csv new file mode 100644 index 000000000..05bf4d172 --- /dev/null +++ b/tests/data/hello_looper-dev/csv/project/sample_annotation.csv @@ -0,0 +1,3 @@ +sample_name,library,file,toggle +frog_1,anySampleType,data/frog1_data.txt,1 +frog_2,anySampleType,data/frog2_data.txt,1 diff --git a/tests/data/hello_looper-dev/intermediate/.looper.yaml b/tests/data/hello_looper-dev/intermediate/.looper.yaml new file mode 100644 index 000000000..19fac81d4 --- /dev/null +++ b/tests/data/hello_looper-dev/intermediate/.looper.yaml @@ -0,0 +1,5 @@ +pep_config: project/project_config.yaml # local path to pep config +# pep_config: pepkit/hello_looper:default # you can also use a pephub registry path +output_dir: "results" +pipeline_interfaces: + sample: pipeline/pipeline_interface.yaml diff --git a/tests/data/hello_looper-dev/intermediate/.looper_project.yaml b/tests/data/hello_looper-dev/intermediate/.looper_project.yaml new file mode 100644 index 000000000..b44ef03b7 --- /dev/null +++ b/tests/data/hello_looper-dev/intermediate/.looper_project.yaml @@ -0,0 +1,4 @@ +pep_config: project/project_config.yaml # local path to pep config +output_dir: "results" +pipeline_interfaces: + project: pipeline/pipeline_interface_project.yaml diff --git a/tests/data/hello_looper-dev/intermediate/data/frog_1.txt b/tests/data/hello_looper-dev/intermediate/data/frog_1.txt new file mode 100644 index 000000000..815c0cf7c --- /dev/null +++ b/tests/data/hello_looper-dev/intermediate/data/frog_1.txt @@ -0,0 +1,4 @@ +ribbit +ribbit +ribbit +CROAK! diff --git a/tests/data/hello_looper-dev/intermediate/data/frog_2.txt b/tests/data/hello_looper-dev/intermediate/data/frog_2.txt new file mode 100644 index 000000000..e6fdd5350 --- /dev/null +++ b/tests/data/hello_looper-dev/intermediate/data/frog_2.txt @@ -0,0 +1,7 @@ +ribbit +ribbit +ribbit + +ribbit, ribbit +ribbit, ribbit +CROAK! diff --git a/tests/data/hello_looper-dev/intermediate/pipeline/count_lines.sh b/tests/data/hello_looper-dev/intermediate/pipeline/count_lines.sh new file mode 100755 index 000000000..71b887fe7 --- /dev/null +++ b/tests/data/hello_looper-dev/intermediate/pipeline/count_lines.sh @@ -0,0 +1,3 @@ +#!/bin/bash +linecount=`wc -l $1 | sed -E 's/^[[:space:]]+//' | cut -f1 -d' '` +echo "Number of lines: $linecount" diff --git a/tests/data/hello_looper-dev/intermediate/pipeline/pipeline_interface.yaml b/tests/data/hello_looper-dev/intermediate/pipeline/pipeline_interface.yaml new file mode 100644 index 000000000..732e69761 --- /dev/null +++ b/tests/data/hello_looper-dev/intermediate/pipeline/pipeline_interface.yaml @@ -0,0 +1,6 @@ +pipeline_name: count_lines +pipeline_type: sample +var_templates: + pipeline: '{looper.piface_dir}/count_lines.sh' +command_template: > + {pipeline.var_templates.pipeline} {sample.file} diff --git a/tests/data/hello_looper-dev/intermediate/pipeline/pipeline_interface_project.yaml b/tests/data/hello_looper-dev/intermediate/pipeline/pipeline_interface_project.yaml new file mode 100644 index 000000000..9063c7d61 --- /dev/null +++ b/tests/data/hello_looper-dev/intermediate/pipeline/pipeline_interface_project.yaml @@ -0,0 +1,6 @@ +pipeline_name: count_lines +pipeline_type: project +var_templates: + pipeline: '{looper.piface_dir}/count_lines.sh' +command_template: > + {pipeline.var_templates.pipeline} "data/*.txt" diff --git a/tests/data/hello_looper-dev/intermediate/project/project_config.yaml b/tests/data/hello_looper-dev/intermediate/project/project_config.yaml new file mode 100644 index 000000000..2ba1efdde --- /dev/null +++ b/tests/data/hello_looper-dev/intermediate/project/project_config.yaml @@ -0,0 +1,7 @@ +pep_version: 2.0.0 +sample_table: sample_annotation.csv +sample_modifiers: + derive: + attributes: [file] + sources: + source1: "data/{sample_name}.txt" \ No newline at end of file diff --git a/tests/data/hello_looper-dev/intermediate/project/sample_annotation.csv b/tests/data/hello_looper-dev/intermediate/project/sample_annotation.csv new file mode 100644 index 000000000..8a2a0565f --- /dev/null +++ b/tests/data/hello_looper-dev/intermediate/project/sample_annotation.csv @@ -0,0 +1,3 @@ +sample_name,library,file,toggle +frog_1,anySampleType,source1,1 +frog_2,anySampleType,source1,1 diff --git a/tests/data/hello_looper-dev/minimal/.looper.yaml b/tests/data/hello_looper-dev/minimal/.looper.yaml new file mode 100644 index 000000000..19fac81d4 --- /dev/null +++ b/tests/data/hello_looper-dev/minimal/.looper.yaml @@ -0,0 +1,5 @@ +pep_config: project/project_config.yaml # local path to pep config +# pep_config: pepkit/hello_looper:default # you can also use a pephub registry path +output_dir: "results" +pipeline_interfaces: + sample: pipeline/pipeline_interface.yaml diff --git a/tests/data/hello_looper-dev/minimal/data/frog_1.txt b/tests/data/hello_looper-dev/minimal/data/frog_1.txt new file mode 100644 index 000000000..815c0cf7c --- /dev/null +++ b/tests/data/hello_looper-dev/minimal/data/frog_1.txt @@ -0,0 +1,4 @@ +ribbit +ribbit +ribbit +CROAK! diff --git a/tests/data/hello_looper-dev/minimal/data/frog_2.txt b/tests/data/hello_looper-dev/minimal/data/frog_2.txt new file mode 100644 index 000000000..e6fdd5350 --- /dev/null +++ b/tests/data/hello_looper-dev/minimal/data/frog_2.txt @@ -0,0 +1,7 @@ +ribbit +ribbit +ribbit + +ribbit, ribbit +ribbit, ribbit +CROAK! diff --git a/tests/data/hello_looper-dev/minimal/pipeline/count_lines.sh b/tests/data/hello_looper-dev/minimal/pipeline/count_lines.sh new file mode 100755 index 000000000..71b887fe7 --- /dev/null +++ b/tests/data/hello_looper-dev/minimal/pipeline/count_lines.sh @@ -0,0 +1,3 @@ +#!/bin/bash +linecount=`wc -l $1 | sed -E 's/^[[:space:]]+//' | cut -f1 -d' '` +echo "Number of lines: $linecount" diff --git a/tests/data/hello_looper-dev/minimal/pipeline/pipeline_interface.yaml b/tests/data/hello_looper-dev/minimal/pipeline/pipeline_interface.yaml new file mode 100644 index 000000000..732e69761 --- /dev/null +++ b/tests/data/hello_looper-dev/minimal/pipeline/pipeline_interface.yaml @@ -0,0 +1,6 @@ +pipeline_name: count_lines +pipeline_type: sample +var_templates: + pipeline: '{looper.piface_dir}/count_lines.sh' +command_template: > + {pipeline.var_templates.pipeline} {sample.file} diff --git a/tests/data/hello_looper-dev/minimal/project/project_config.yaml b/tests/data/hello_looper-dev/minimal/project/project_config.yaml new file mode 100644 index 000000000..5456cca30 --- /dev/null +++ b/tests/data/hello_looper-dev/minimal/project/project_config.yaml @@ -0,0 +1,2 @@ +pep_version: 2.0.0 +sample_table: sample_annotation.csv \ No newline at end of file diff --git a/tests/data/hello_looper-dev/minimal/project/sample_annotation.csv b/tests/data/hello_looper-dev/minimal/project/sample_annotation.csv new file mode 100644 index 000000000..97f223700 --- /dev/null +++ b/tests/data/hello_looper-dev/minimal/project/sample_annotation.csv @@ -0,0 +1,3 @@ +sample_name,library,file,toggle +frog_1,anySampleType,data/frog_1.txt,1 +frog_2,anySampleType,data/frog_2.txt,1 diff --git a/tests/data/hello_looper-dev/pephub/.looper.yaml b/tests/data/hello_looper-dev/pephub/.looper.yaml new file mode 100644 index 000000000..00e60ded6 --- /dev/null +++ b/tests/data/hello_looper-dev/pephub/.looper.yaml @@ -0,0 +1,4 @@ +pep_config: pepkit/hello_looper:default # pephub registry path or local path +output_dir: results +pipeline_interfaces: + sample: pipeline/pipeline_interface.yaml diff --git a/tests/data/hello_looper-dev/pephub/data/frog1_data.txt b/tests/data/hello_looper-dev/pephub/data/frog1_data.txt new file mode 100644 index 000000000..815c0cf7c --- /dev/null +++ b/tests/data/hello_looper-dev/pephub/data/frog1_data.txt @@ -0,0 +1,4 @@ +ribbit +ribbit +ribbit +CROAK! diff --git a/tests/data/hello_looper-dev/pephub/data/frog2_data.txt b/tests/data/hello_looper-dev/pephub/data/frog2_data.txt new file mode 100644 index 000000000..e6fdd5350 --- /dev/null +++ b/tests/data/hello_looper-dev/pephub/data/frog2_data.txt @@ -0,0 +1,7 @@ +ribbit +ribbit +ribbit + +ribbit, ribbit +ribbit, ribbit +CROAK! diff --git a/tests/data/hello_looper-dev/pephub/pipeline/count_lines.sh b/tests/data/hello_looper-dev/pephub/pipeline/count_lines.sh new file mode 100755 index 000000000..71b887fe7 --- /dev/null +++ b/tests/data/hello_looper-dev/pephub/pipeline/count_lines.sh @@ -0,0 +1,3 @@ +#!/bin/bash +linecount=`wc -l $1 | sed -E 's/^[[:space:]]+//' | cut -f1 -d' '` +echo "Number of lines: $linecount" diff --git a/tests/data/hello_looper-dev/pephub/pipeline/pipeline_interface.yaml b/tests/data/hello_looper-dev/pephub/pipeline/pipeline_interface.yaml new file mode 100644 index 000000000..732e69761 --- /dev/null +++ b/tests/data/hello_looper-dev/pephub/pipeline/pipeline_interface.yaml @@ -0,0 +1,6 @@ +pipeline_name: count_lines +pipeline_type: sample +var_templates: + pipeline: '{looper.piface_dir}/count_lines.sh' +command_template: > + {pipeline.var_templates.pipeline} {sample.file} diff --git a/tests/data/hello_looper-dev/pephub/pipeline/pipeline_interface_project.yaml b/tests/data/hello_looper-dev/pephub/pipeline/pipeline_interface_project.yaml new file mode 100644 index 000000000..9063c7d61 --- /dev/null +++ b/tests/data/hello_looper-dev/pephub/pipeline/pipeline_interface_project.yaml @@ -0,0 +1,6 @@ +pipeline_name: count_lines +pipeline_type: project +var_templates: + pipeline: '{looper.piface_dir}/count_lines.sh' +command_template: > + {pipeline.var_templates.pipeline} "data/*.txt" diff --git a/tests/data/hello_looper-dev/pipestat/.looper.yaml b/tests/data/hello_looper-dev/pipestat/.looper.yaml new file mode 100644 index 000000000..852c6fa41 --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/.looper.yaml @@ -0,0 +1,8 @@ +pep_config: ./project/project_config.yaml # pephub registry path or local path +output_dir: ./results +pipeline_interfaces: + sample: ./pipeline_pipestat/pipeline_interface.yaml + project: ./pipeline_pipestat/pipeline_interface_project.yaml +pipestat: + results_file_path: results.yaml + flag_file_dir: results/flags \ No newline at end of file diff --git a/tests/data/hello_looper-dev/pipestat/.looper_pipestat_shell.yaml b/tests/data/hello_looper-dev/pipestat/.looper_pipestat_shell.yaml new file mode 100644 index 000000000..fb645a9bd --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/.looper_pipestat_shell.yaml @@ -0,0 +1,7 @@ +pep_config: ./project/project_config.yaml # pephub registry path or local path +output_dir: ./results +pipeline_interfaces: + sample: ./pipeline_pipestat/pipeline_interface_shell.yaml +pipestat: + results_file_path: results.yaml + flag_file_dir: results/flags \ No newline at end of file diff --git a/tests/data/hello_looper-dev/pipestat/data/frog_1.txt b/tests/data/hello_looper-dev/pipestat/data/frog_1.txt new file mode 100644 index 000000000..815c0cf7c --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/data/frog_1.txt @@ -0,0 +1,4 @@ +ribbit +ribbit +ribbit +CROAK! diff --git a/tests/data/hello_looper-dev/pipestat/data/frog_2.txt b/tests/data/hello_looper-dev/pipestat/data/frog_2.txt new file mode 100644 index 000000000..e6fdd5350 --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/data/frog_2.txt @@ -0,0 +1,7 @@ +ribbit +ribbit +ribbit + +ribbit, ribbit +ribbit, ribbit +CROAK! diff --git a/tests/data/hello_looper-dev/pipestat/looper_pipestat_config.yaml b/tests/data/hello_looper-dev/pipestat/looper_pipestat_config.yaml new file mode 100644 index 000000000..0a04ac6f9 --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/looper_pipestat_config.yaml @@ -0,0 +1,7 @@ +results_file_path: /home/drc/GITHUB/hello_looper/hello_looper/pipestat/./results.yaml +flag_file_dir: /home/drc/GITHUB/hello_looper/hello_looper/pipestat/./results/flags +output_dir: /home/drc/GITHUB/hello_looper/hello_looper/pipestat/./results +record_identifier: frog_2 +schema_path: /home/drc/GITHUB/hello_looper/hello_looper/pipestat/./pipeline_pipestat/pipestat_output_schema.yaml +pipeline_name: test_pipe +pipeline_type: sample diff --git a/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/count_lines.py b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/count_lines.py new file mode 100755 index 000000000..97e866ee4 --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/count_lines.py @@ -0,0 +1,26 @@ +import pipestat +import sys + +# Very simple pipeline that calls pipestat +# takes arguments invoked during looper submission via command templates +text_file = sys.argv[ + 1 +] # this is the sample we wish to process by reading the number of lines +sample_name = sys.argv[2] +results_file = sys.argv[3] + +# Create pipestat manager and then report values +psm = pipestat.PipestatManager( + schema_path="pipeline_pipestat/pipestat_output_schema.yaml", + results_file_path=results_file, + record_identifier=sample_name, +) + +# Read text file and count lines +with open(text_file, "r") as f: + result = {"number_of_lines": len(f.readlines())} + +# The results are defined in the pipestat output schema. +psm.report(record_identifier=sample_name, values=result) + +# end of pipeline diff --git a/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/count_lines_pipestat.sh b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/count_lines_pipestat.sh new file mode 100755 index 000000000..99f83f906 --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/count_lines_pipestat.sh @@ -0,0 +1,4 @@ +#!/bin/bash +linecount=`wc -l $1 | sed -E 's/^[[:space:]]+//' | cut -f1 -d' '` +pipestat report -r $2 -i 'number_of_lines' -v $linecount -c $3 +echo "Number of lines: $linecount" diff --git a/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface.yaml b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface.yaml new file mode 100644 index 000000000..1d26ac435 --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface.yaml @@ -0,0 +1,5 @@ +pipeline_name: example_pipestat_pipeline +pipeline_type: sample +output_schema: pipestat_output_schema.yaml +command_template: > + python {looper.piface_dir}/count_lines.py {sample.file} {sample.sample_name} {pipestat.results_file} \ No newline at end of file diff --git a/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface_project.yaml b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface_project.yaml new file mode 100644 index 000000000..2237c2f39 --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface_project.yaml @@ -0,0 +1,8 @@ +pipeline_name: example_pipestat_project_pipeline +pipeline_type: project +output_schema: pipestat_output_schema.yaml +var_templates: + pipeline: '{looper.piface_dir}/count_lines.sh' +command_template: > + {pipeline.var_templates.pipeline} "data/*.txt" + diff --git a/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface_shell.yaml b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface_shell.yaml new file mode 100644 index 000000000..82df8b942 --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipeline_interface_shell.yaml @@ -0,0 +1,5 @@ +pipeline_name: example_pipestat_pipeline +pipeline_type: sample +output_schema: pipestat_output_schema.yaml +command_template: > + {looper.piface_dir}/count_lines_pipestat.sh {sample.file} {sample.sample_name} {pipestat.config_file} \ No newline at end of file diff --git a/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipestat_output_schema.yaml b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipestat_output_schema.yaml new file mode 100644 index 000000000..d6b05c2ac --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/pipeline_pipestat/pipestat_output_schema.yaml @@ -0,0 +1,5 @@ +pipeline_name: example_pipestat_pipeline +samples: + number_of_lines: + type: integer + description: "Number of lines in the input file." \ No newline at end of file diff --git a/tests/data/hello_looper-dev/pipestat/project/project_config.yaml b/tests/data/hello_looper-dev/pipestat/project/project_config.yaml new file mode 100644 index 000000000..2ba1efdde --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/project/project_config.yaml @@ -0,0 +1,7 @@ +pep_version: 2.0.0 +sample_table: sample_annotation.csv +sample_modifiers: + derive: + attributes: [file] + sources: + source1: "data/{sample_name}.txt" \ No newline at end of file diff --git a/tests/data/hello_looper-dev/pipestat/project/sample_annotation.csv b/tests/data/hello_looper-dev/pipestat/project/sample_annotation.csv new file mode 100644 index 000000000..8a2a0565f --- /dev/null +++ b/tests/data/hello_looper-dev/pipestat/project/sample_annotation.csv @@ -0,0 +1,3 @@ +sample_name,library,file,toggle +frog_1,anySampleType,source1,1 +frog_2,anySampleType,source1,1 diff --git a/tests/divvytests/conftest.py b/tests/divvytests/conftest.py index c194a82af..2fa0c9049 100644 --- a/tests/divvytests/conftest.py +++ b/tests/divvytests/conftest.py @@ -3,11 +3,17 @@ import looper.divvy as divvy import pytest +from looper.divvy import select_divvy_config, DEFAULT_CONFIG_SCHEMA + THIS_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(THIS_DIR, "data/divcfg-master") FILES = glob.glob(DATA_DIR + "/*.yaml") -DCC_ATTRIBUTES = divvy.ComputingConfiguration().keys() + + +dcc_filepath = select_divvy_config(None) +DCC = divvy.ComputingConfiguration.from_yaml_file(filepath=dcc_filepath) +DCC_ATTRIBUTES = DCC.keys() @pytest.fixture @@ -19,7 +25,7 @@ def empty_dcc(): @pytest.fixture(params=FILES) def dcc(request): """Provide ComputingConfiguration objects for all files in divcfg repository""" - return divvy.ComputingConfiguration(filepath=request.param) + return divvy.ComputingConfiguration.from_yaml_file(filepath=request.param) @pytest.fixture diff --git a/tests/divvytests/divvy_tests/test_divvy.py b/tests/divvytests/divvy_tests/test_divvy.py index aa8fa85ee..da9ce7c83 100644 --- a/tests/divvytests/divvy_tests/test_divvy.py +++ b/tests/divvytests/divvy_tests/test_divvy.py @@ -6,7 +6,7 @@ from tests.divvytests.conftest import DCC_ATTRIBUTES, FILES, mock_env_missing -class DefaultDCCTests: +class TestDefaultDCC: """Tests the default divvy.ComputingConfiguration object creation""" def test_no_args(self, empty_dcc): @@ -22,7 +22,7 @@ def test_no_env_var(self, mock_env_missing, empty_dcc): empty_dcc -class DCCTests: +class TestDCC: """Tests the divvy.ComputingConfiguration object creation""" def test_object_creation(self, dcc): @@ -35,7 +35,7 @@ def test_attrs_produced(self, att, dcc): dcc[att] -class ActivatingTests: +class TestActivating: """Test for the activate_package method""" def test_activating_default_package(self, dcc): @@ -56,7 +56,7 @@ def test_not_activating_faulty_package(self, dcc, package): assert not dcc.activate_package(package) -class GettingActivePackageTests: +class TestGettingActivePackage: """Test for the get_active_package method""" def test_settings_nonempty(self, dcc): @@ -65,7 +65,7 @@ def test_settings_nonempty(self, dcc): assert settings != YacAttMap() -class ListingPackagesTests: +class TestListingPackages: """Test for the list_compute_packages method""" def test_list_compute_packages_is_set(self, dcc): @@ -77,7 +77,7 @@ def test_list_compute_packages_result_nonempty(self, dcc): assert dcc.list_compute_packages() != set() -class ResettingSettingsTests: +class TestResettingSettings: """ " Test for the reset_active_settings method""" def test_reset_active_settings(self, dcc): diff --git a/tests/divvytests/regression/test_write_script.py b/tests/divvytests/regression/test_write_script.py index c5b071fbf..1159ee46a 100644 --- a/tests/divvytests/regression/test_write_script.py +++ b/tests/divvytests/regression/test_write_script.py @@ -3,7 +3,7 @@ from copy import deepcopy import random import pytest -from looper.divvy import ComputingConfiguration +from looper.divvy import ComputingConfiguration, select_divvy_config from tests.divvytests.helpers import get_random_key __author__ = "Vince Reuter" @@ -19,7 +19,8 @@ ) def test_write_script_is_effect_free(tmpdir, extras): """Writing script doesn't change computing configuration.""" - cc = ComputingConfiguration() + dcc_filepath = select_divvy_config(None) + cc = ComputingConfiguration.from_yaml_file(filepath=dcc_filepath) compute1 = deepcopy(cc["compute_packages"]) cc.write_script(tmpdir.join(get_random_key(20) + ".sh").strpath, extras) assert cc["compute_packages"] == compute1 diff --git a/tests/divvytests/test_divvy_simple.py b/tests/divvytests/test_divvy_simple.py index 6fa2c5ffa..f7795696a 100644 --- a/tests/divvytests/test_divvy_simple.py +++ b/tests/divvytests/test_divvy_simple.py @@ -4,6 +4,7 @@ from collections import OrderedDict from yacman import YacAttMap +from divvy import select_divvy_config # For interactive debugging: # import logmuse @@ -12,7 +13,8 @@ class TestPackageAtivation: def test_activate_package(self): - dcc = divvy.ComputingConfiguration() + dcc_filepath = select_divvy_config(None) + dcc = divvy.ComputingConfiguration().from_yaml_file(filepath=dcc_filepath) dcc.activate_package("default") t = dcc.compute["submission_template"] t2 = dcc["compute_packages"]["default"]["submission_template"] @@ -25,7 +27,8 @@ def test_activate_package(self): class TestWriting: def test_write_script(self): - dcc = divvy.ComputingConfiguration() + dcc_filepath = select_divvy_config(None) + dcc = divvy.ComputingConfiguration().from_yaml_file(filepath=dcc_filepath) dcc dcc.activate_package("singularity_slurm") extra_vars = { diff --git a/tests/smoketests/.looper.yaml b/tests/smoketests/.looper.yaml new file mode 100644 index 000000000..d4cfc108f --- /dev/null +++ b/tests/smoketests/.looper.yaml @@ -0,0 +1,5 @@ +pep_config: example/pep/path +output_dir: . +pipeline_interfaces: + sample: [] + project: [] diff --git a/tests/smoketests/test_cli_validation.py b/tests/smoketests/test_cli_validation.py index be3ea91ee..82e6b4eb1 100644 --- a/tests/smoketests/test_cli_validation.py +++ b/tests/smoketests/test_cli_validation.py @@ -10,7 +10,7 @@ SAMPLE_INCLUSION_OPTNAME, ) from tests.conftest import print_standard_stream, subp_exec, test_args_expansion -from looper.cli_looper import main +from looper.cli_pydantic import main SUBCOMMANDS_WHICH_SUPPORT_SKIP_XOR_LIMIT = ["run", "destroy"] diff --git a/tests/smoketests/test_other.py b/tests/smoketests/test_other.py index a724c7602..2527f4f25 100644 --- a/tests/smoketests/test_other.py +++ b/tests/smoketests/test_other.py @@ -1,18 +1,54 @@ +import os.path + import pytest from peppy import Project -from looper.const import FLAGS -from looper.exceptions import PipestatConfigurationException +from looper.exceptions import PipestatConfigurationException, MisconfigurationException from tests.conftest import * -from looper.cli_looper import main +from looper.cli_pydantic import main +import pandas as pd + + +def _make_flags_pipestat(cfg, type, pipeline_name): + """This makes flags for projects where pipestat is configured and used""" + + # get flag dir from .looper.yaml + with open(cfg, "r") as f: + looper_cfg_data = safe_load(f) + flag_dir = looper_cfg_data[PIPESTAT_KEY]["flag_file_dir"] + + flag_dir = os.path.join(os.path.dirname(cfg), flag_dir) + # get samples from the project config via Peppy + project_config_path = get_project_config_path(cfg) + p = Project(project_config_path) + + for s in p.samples: + sf = flag_dir + if not os.path.exists(sf): + os.makedirs(sf) + flag_path = os.path.join( + sf, pipeline_name + "_" + s.sample_name + "_" + type + ".flag" + ) + with open(flag_path, "w") as f: + f.write(type) def _make_flags(cfg, type, pipeline_name): - p = Project(cfg) - out_dir = p[CONFIG_KEY][LOOPER_KEY][OUTDIR_KEY] - print(p.samples) + """This makes flags for projects where pipestat is NOT configured""" + + # get flag dir from .looper.yaml + with open(cfg, "r") as f: + looper_cfg_data = safe_load(f) + output_dir = looper_cfg_data[OUTDIR_KEY] + + output_dir = os.path.join(os.path.dirname(cfg), output_dir) + # get samples from the project config via Peppy + project_config_path = get_project_config_path(cfg) + p = Project(project_config_path) + for s in p.samples: - sf = os.path.join(out_dir, "results_pipeline") + # Make flags in sample subfolder, e.g /tmp/tmphqxdmxnl/advanced/results/results_pipeline/sample1 + sf = os.path.join(output_dir, "results_pipeline", s.sample_name) if not os.path.exists(sf): os.makedirs(sf) flag_path = os.path.join( @@ -23,11 +59,12 @@ def _make_flags(cfg, type, pipeline_name): class TestLooperPipestat: + @pytest.mark.parametrize("cmd", ["report", "table", "check"]) def test_fail_no_pipestat_config(self, prep_temp_pep, cmd): "report, table, and check should fail if pipestat is NOT configured." tp = prep_temp_pep - x = test_args_expansion(tp, cmd) + x = [cmd, "--looper-config", tp] with pytest.raises(PipestatConfigurationException): main(test_args=x) @@ -35,7 +72,11 @@ def test_fail_no_pipestat_config(self, prep_temp_pep, cmd): def test_pipestat_configured(self, prep_temp_pep_pipestat, cmd): tp = prep_temp_pep_pipestat - x = [cmd, "-d", "--looper-config", tp] + if cmd in ["run", "runp"]: + x = [cmd, "--looper-config", tp, "--dry-run"] + else: + # Not every command supports dry run + x = [cmd, "--looper-config", tp] try: result = main(test_args=x) @@ -45,17 +86,54 @@ def test_pipestat_configured(self, prep_temp_pep_pipestat, cmd): raise pytest.fail("DID RAISE {0}".format(Exception)) +class TestLooperRerun: + @pytest.mark.parametrize( + "flags", [FLAGS[2], FLAGS[3]] + ) # Waiting and Failed flags should work + @pytest.mark.parametrize("pipeline_name", ["example_pipestat_pipeline"]) + def test_pipestat_rerun(self, prep_temp_pep_pipestat, pipeline_name, flags): + """Verify that rerun works with either failed or waiting flags""" + tp = prep_temp_pep_pipestat + _make_flags_pipestat(tp, flags, pipeline_name) + + x = ["rerun", "--looper-config", tp] + try: + result = main(test_args=x) + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + assert result["Jobs submitted"] == 2 + + @pytest.mark.parametrize( + "flags", [FLAGS[2], FLAGS[3]] + ) # Waiting and Failed flags should work + @pytest.mark.parametrize("pipeline_name", ["PIPELINE1"]) + def test_rerun_no_pipestat(self, prep_temp_pep, pipeline_name, flags): + """Verify that rerun works with either failed or waiting flags""" + tp = prep_temp_pep + _make_flags(tp, flags, pipeline_name) + + x = ["rerun", "--looper-config", tp] + try: + result = main(test_args=x) + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + # Only 3 failed flags exist for PIPELINE1, so only 3 samples should be submitted + assert result["Jobs submitted"] == 3 + + class TestLooperCheck: @pytest.mark.parametrize("flag_id", FLAGS) @pytest.mark.parametrize( - "pipeline_name", ["test_pipe"] + "pipeline_name", ["example_pipestat_pipeline"] ) # This is given in the pipestat_output_schema.yaml def test_check_works(self, prep_temp_pep_pipestat, flag_id, pipeline_name): """Verify that checking works""" tp = prep_temp_pep_pipestat - _make_flags(tp, flag_id, pipeline_name) + _make_flags_pipestat(tp, flag_id, pipeline_name) - x = ["check", "-d", "--looper-config", tp] + x = ["check", "--looper-config", tp] try: results = main(test_args=x) @@ -67,27 +145,27 @@ def test_check_works(self, prep_temp_pep_pipestat, flag_id, pipeline_name): raise pytest.fail("DID RAISE {0}".format(Exception)) @pytest.mark.parametrize("flag_id", FLAGS) - @pytest.mark.parametrize("pipeline_name", ["test_pipe"]) + @pytest.mark.parametrize("pipeline_name", ["example_pipestat_pipeline"]) def test_check_multi(self, prep_temp_pep_pipestat, flag_id, pipeline_name): """Verify that checking works when multiple flags are created""" tp = prep_temp_pep_pipestat - _make_flags(tp, flag_id, pipeline_name) - _make_flags(tp, FLAGS[1], pipeline_name) + _make_flags_pipestat(tp, flag_id, pipeline_name) + _make_flags_pipestat(tp, FLAGS[1], pipeline_name) - x = ["check", "-d", "--looper-config", tp] + x = ["check", "--looper-config", tp] # Multiple flag files SHOULD cause pipestat to throw an assertion error if flag_id != FLAGS[1]: with pytest.raises(AssertionError): main(test_args=x) @pytest.mark.parametrize("flag_id", ["3333", "tonieflag", "bogus", "ms"]) - @pytest.mark.parametrize("pipeline_name", ["test_pipe"]) + @pytest.mark.parametrize("pipeline_name", ["example_pipestat_pipeline"]) def test_check_bogus(self, prep_temp_pep_pipestat, flag_id, pipeline_name): """Verify that checking works when bogus flags are created""" tp = prep_temp_pep_pipestat - _make_flags(tp, flag_id, pipeline_name) + _make_flags_pipestat(tp, flag_id, pipeline_name) - x = ["check", "-d", "--looper-config", tp] + x = ["check", "--looper-config", tp] try: results = main(test_args=x) result_key = list(results.keys())[0] @@ -101,18 +179,26 @@ def test_check_bogus(self, prep_temp_pep_pipestat, flag_id, pipeline_name): class TestSelector: @pytest.mark.parametrize("flag_id", ["completed"]) @pytest.mark.parametrize( - "pipeline_name", ["PIPELINE1"] + "pipeline_name", ["example_pipestat_pipeline"] ) # This is given in the pipestat_output_schema.yaml def test_selecting_flags_works( self, prep_temp_pep_pipestat, flag_id, pipeline_name ): - """Verify that checking works""" + """Verify selecting on a single flag""" tp = prep_temp_pep_pipestat - p = Project(tp) - out_dir = p[CONFIG_KEY][LOOPER_KEY][OUTDIR_KEY] + project_config_path = get_project_config_path(tp) + p = Project(project_config_path) + + # get flag dir from .looper.yaml + with open(tp, "r") as f: + looper_cfg_data = safe_load(f) + flag_dir = looper_cfg_data[PIPESTAT_KEY]["flag_file_dir"] + + flag_dir = os.path.join(os.path.dirname(tp), flag_dir) + count = 0 for s in p.samples: - sf = os.path.join(out_dir, "results_pipeline") + sf = flag_dir if not os.path.exists(sf): os.makedirs(sf) flag_path = os.path.join( @@ -122,7 +208,7 @@ def test_selecting_flags_works( f.write(FLAGS[count]) count += 1 - x = ["run", "-d", "--looper-config", tp, "--sel-flag", "failed"] + x = ["run", "--looper-config", tp, "--sel-flag", "completed", "--dry-run"] try: results = main(test_args=x) @@ -135,19 +221,25 @@ def test_selecting_flags_works( @pytest.mark.parametrize("flag_id", ["completed"]) @pytest.mark.parametrize( - "pipeline_name", ["PIPELINE1"] + "pipeline_name", ["example_pipestat_pipeline"] ) # This is given in the pipestat_output_schema.yaml def test_excluding_flags_works( self, prep_temp_pep_pipestat, flag_id, pipeline_name ): - """Verify that checking works""" + """Verify that excluding a single flag works""" tp = prep_temp_pep_pipestat - # _make_flags(tp, flag_id, pipeline_name) - p = Project(tp) - out_dir = p[CONFIG_KEY][LOOPER_KEY][OUTDIR_KEY] + project_config_path = get_project_config_path(tp) + p = Project(project_config_path) + + # get flag dir from .looper.yaml + with open(tp, "r") as f: + looper_cfg_data = safe_load(f) + flag_dir = looper_cfg_data[PIPESTAT_KEY]["flag_file_dir"] + + flag_dir = os.path.join(os.path.dirname(tp), flag_dir) count = 0 for s in p.samples: - sf = os.path.join(out_dir, "results_pipeline") + sf = flag_dir if not os.path.exists(sf): os.makedirs(sf) flag_path = os.path.join( @@ -157,7 +249,7 @@ def test_excluding_flags_works( f.write(FLAGS[count]) count += 1 - x = ["run", "-d", "--looper-config", tp, "--exc-flag", "failed"] + x = ["run", "--looper-config", tp, "--exc-flag", "running", "--dry-run"] try: results = main(test_args=x) @@ -167,23 +259,30 @@ def test_excluding_flags_works( sd = os.path.join(get_outdir(tp), "submission") subs_list = [os.path.join(sd, f) for f in os.listdir(sd) if f.endswith(".sub")] - assert len(subs_list) == 2 + assert len(subs_list) == 1 @pytest.mark.parametrize("flag_id", ["completed"]) @pytest.mark.parametrize( - "pipeline_name", ["PIPELINE1"] + "pipeline_name", ["example_pipestat_pipeline"] ) # This is given in the pipestat_output_schema.yaml def test_excluding_multi_flags_works( self, prep_temp_pep_pipestat, flag_id, pipeline_name ): - """Verify that checking works""" + """Verify excluding multi flags""" tp = prep_temp_pep_pipestat + project_config_path = get_project_config_path(tp) + p = Project(project_config_path) + + # get flag dir from .looper.yaml + with open(tp, "r") as f: + looper_cfg_data = safe_load(f) + flag_dir = looper_cfg_data[PIPESTAT_KEY]["flag_file_dir"] + + flag_dir = os.path.join(os.path.dirname(tp), flag_dir) - p = Project(tp) - out_dir = p[CONFIG_KEY][LOOPER_KEY][OUTDIR_KEY] count = 0 for s in p.samples: - sf = os.path.join(out_dir, "results_pipeline") + sf = flag_dir if not os.path.exists(sf): os.makedirs(sf) flag_path = os.path.join( @@ -193,33 +292,46 @@ def test_excluding_multi_flags_works( f.write(FLAGS[count]) count += 1 - x = ["run", "-d", "--looper-config", tp, "--exc-flag", "failed", "running"] + x = [ + "run", + "--looper-config", + tp, + "--exc-flag", + "completed", + "running", + "--dry-run", + ] try: results = main(test_args=x) except Exception: raise pytest.fail("DID RAISE {0}".format(Exception)) + # No submission directory will exist because both samples are excluded. sd = os.path.join(get_outdir(tp), "submission") - subs_list = [os.path.join(sd, f) for f in os.listdir(sd) if f.endswith(".sub")] - - assert len(subs_list) == 1 + assert os.path.exists(sd) is False @pytest.mark.parametrize("flag_id", ["completed"]) @pytest.mark.parametrize( - "pipeline_name", ["PIPELINE1"] + "pipeline_name", ["example_pipestat_pipeline"] ) # This is given in the pipestat_output_schema.yaml def test_selecting_multi_flags_works( self, prep_temp_pep_pipestat, flag_id, pipeline_name ): - """Verify that checking works""" + """Verify selecting multiple flags""" tp = prep_temp_pep_pipestat + project_config_path = get_project_config_path(tp) + p = Project(project_config_path) + + # get flag dir from .looper.yaml + with open(tp, "r") as f: + looper_cfg_data = safe_load(f) + flag_dir = looper_cfg_data[PIPESTAT_KEY]["flag_file_dir"] - p = Project(tp) - out_dir = p[CONFIG_KEY][LOOPER_KEY][OUTDIR_KEY] + flag_dir = os.path.join(os.path.dirname(tp), flag_dir) count = 0 for s in p.samples: - sf = os.path.join(out_dir, "results_pipeline") + sf = flag_dir if not os.path.exists(sf): os.makedirs(sf) flag_path = os.path.join( @@ -229,7 +341,15 @@ def test_selecting_multi_flags_works( f.write(FLAGS[count]) count += 1 - x = ["run", "-d", "--looper-config", tp, "--sel-flag", "failed", "running"] + x = [ + "run", + "--dry-run", + "--looper-config", + tp, + "--sel-flag", + "completed", + "running", + ] try: results = main(test_args=x) @@ -243,19 +363,26 @@ def test_selecting_multi_flags_works( @pytest.mark.parametrize("flag_id", ["completed"]) @pytest.mark.parametrize( - "pipeline_name", ["PIPELINE1"] + "pipeline_name", ["example_pipestat_pipeline"] ) # This is given in the pipestat_output_schema.yaml def test_selecting_attr_and_flags_works( - self, prep_temp_pep_pipestat, flag_id, pipeline_name + self, prep_temp_pep_pipestat_advanced, flag_id, pipeline_name ): - """Verify that checking works""" - tp = prep_temp_pep_pipestat + """Verify selecting via attr and flags""" + + tp = prep_temp_pep_pipestat_advanced + project_config_path = get_project_config_path(tp) + p = Project(project_config_path) + + # get flag dir from .looper.yaml + with open(tp, "r") as f: + looper_cfg_data = safe_load(f) + flag_dir = looper_cfg_data[PIPESTAT_KEY]["flag_file_dir"] - p = Project(tp) - out_dir = p[CONFIG_KEY][LOOPER_KEY][OUTDIR_KEY] + flag_dir = os.path.join(os.path.dirname(tp), flag_dir) count = 0 for s in p.samples: - sf = os.path.join(out_dir, "results_pipeline") + sf = flag_dir if not os.path.exists(sf): os.makedirs(sf) flag_path = os.path.join( @@ -267,7 +394,7 @@ def test_selecting_attr_and_flags_works( x = [ "run", - "-d", + "--dry-run", "--looper-config", tp, "--sel-flag", @@ -290,19 +417,25 @@ def test_selecting_attr_and_flags_works( @pytest.mark.parametrize("flag_id", ["completed"]) @pytest.mark.parametrize( - "pipeline_name", ["PIPELINE1"] + "pipeline_name", ["example_pipestat_pipeline"] ) # This is given in the pipestat_output_schema.yaml def test_excluding_attr_and_flags_works( - self, prep_temp_pep_pipestat, flag_id, pipeline_name + self, prep_temp_pep_pipestat_advanced, flag_id, pipeline_name ): - """Verify that checking works""" - tp = prep_temp_pep_pipestat + """Verify excluding via attr and flags""" + tp = prep_temp_pep_pipestat_advanced + project_config_path = get_project_config_path(tp) + p = Project(project_config_path) - p = Project(tp) - out_dir = p[CONFIG_KEY][LOOPER_KEY][OUTDIR_KEY] + # get flag dir from .looper.yaml + with open(tp, "r") as f: + looper_cfg_data = safe_load(f) + flag_dir = looper_cfg_data[PIPESTAT_KEY]["flag_file_dir"] + + flag_dir = os.path.join(os.path.dirname(tp), flag_dir) count = 0 for s in p.samples: - sf = os.path.join(out_dir, "results_pipeline") + sf = flag_dir if not os.path.exists(sf): os.makedirs(sf) flag_path = os.path.join( @@ -314,7 +447,7 @@ def test_excluding_attr_and_flags_works( x = [ "run", - "-d", + "--dry-run", "--looper-config", tp, "--exc-flag", @@ -338,19 +471,33 @@ def test_excluding_attr_and_flags_works( @pytest.mark.parametrize("flag_id", ["completed"]) @pytest.mark.parametrize( - "pipeline_name", ["PIPELINE1"] + "pipeline_name", ["example_pipestat_pipeline"] ) # This is given in the pipestat_output_schema.yaml def test_excluding_toggle_attr( - self, prep_temp_pep_pipestat, flag_id, pipeline_name + self, prep_temp_pep_pipestat_advanced, flag_id, pipeline_name ): - """Verify that checking works""" - tp = prep_temp_pep_pipestat + """Verify excluding based on toggle attr""" + tp = prep_temp_pep_pipestat_advanced + project_config_path = get_project_config_path(tp) + p = Project(project_config_path) + + # get flag dir from .looper.yaml + with open(tp, "r") as f: + looper_cfg_data = safe_load(f) + flag_dir = looper_cfg_data[PIPESTAT_KEY]["flag_file_dir"] + + # Manually add a toggle column to the PEP for this specific test + sample_csv = os.path.join( + os.path.dirname(project_config_path), "annotation_sheet.csv" + ) + df = pd.read_csv(sample_csv) + df["toggle"] = 1 + df.to_csv(sample_csv, index=False) - p = Project(tp) - out_dir = p[CONFIG_KEY][LOOPER_KEY][OUTDIR_KEY] + flag_dir = os.path.join(os.path.dirname(tp), flag_dir) count = 0 for s in p.samples: - sf = os.path.join(out_dir, "results_pipeline") + sf = flag_dir if not os.path.exists(sf): os.makedirs(sf) flag_path = os.path.join( @@ -362,7 +509,7 @@ def test_excluding_toggle_attr( x = [ "run", - "-d", + "--dry-run", "--looper-config", tp, "--sel-attr", @@ -385,19 +532,34 @@ def test_excluding_toggle_attr( @pytest.mark.parametrize("flag_id", ["completed"]) @pytest.mark.parametrize( - "pipeline_name", ["PIPELINE1"] + "pipeline_name", ["example_pipestat_pipeline"] ) # This is given in the pipestat_output_schema.yaml def test_including_toggle_attr( - self, prep_temp_pep_pipestat, flag_id, pipeline_name + self, prep_temp_pep_pipestat_advanced, flag_id, pipeline_name ): - """Verify that checking works""" - tp = prep_temp_pep_pipestat + """Verify including based on toggle attr""" + + tp = prep_temp_pep_pipestat_advanced + project_config_path = get_project_config_path(tp) + p = Project(project_config_path) - p = Project(tp) - out_dir = p[CONFIG_KEY][LOOPER_KEY][OUTDIR_KEY] + # get flag dir from .looper.yaml + with open(tp, "r") as f: + looper_cfg_data = safe_load(f) + flag_dir = looper_cfg_data[PIPESTAT_KEY]["flag_file_dir"] + + # Manually add a toggle column to the PEP for this specific test + sample_csv = os.path.join( + os.path.dirname(project_config_path), "annotation_sheet.csv" + ) + df = pd.read_csv(sample_csv) + df["toggle"] = 1 + df.to_csv(sample_csv, index=False) + + flag_dir = os.path.join(os.path.dirname(tp), flag_dir) count = 0 for s in p.samples: - sf = os.path.join(out_dir, "results_pipeline") + sf = flag_dir if not os.path.exists(sf): os.makedirs(sf) flag_path = os.path.join( @@ -409,7 +571,7 @@ def test_including_toggle_attr( x = [ "run", - "-d", + "--dry-run", "--looper-config", tp, "--sel-attr", @@ -427,3 +589,22 @@ def test_including_toggle_attr( subs_list = [os.path.join(sd, f) for f in os.listdir(sd) if f.endswith(".sub")] assert len(subs_list) == 3 + + +class TestLooperInspect: + @pytest.mark.parametrize("cmd", ["inspect"]) + def test_inspect_config(self, prep_temp_pep, cmd): + "Checks inspect command" + tp = prep_temp_pep + x = [cmd, "--looper-config", tp] + try: + results = main(test_args=x) + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + @pytest.mark.parametrize("cmd", ["inspect"]) + def test_inspect_no_config_found(self, cmd): + "Checks inspect command" + x = [cmd] + with pytest.raises(MisconfigurationException): + results = main(test_args=x) diff --git a/tests/smoketests/test_run.py b/tests/smoketests/test_run.py index 059c2721a..6c0bdd8d8 100644 --- a/tests/smoketests/test_run.py +++ b/tests/smoketests/test_run.py @@ -1,3 +1,5 @@ +import os.path + import pytest from peppy.const import * from yaml import dump @@ -6,7 +8,7 @@ from looper.project import Project from tests.conftest import * from looper.utils import * -from looper.cli_looper import main +from looper.cli_pydantic import main CMD_STRS = ["string", " --string", " --sjhsjd 212", "7867#$@#$cc@@"] @@ -14,13 +16,55 @@ def test_cli(prep_temp_pep): tp = prep_temp_pep - x = test_args_expansion(tp, "run") + x = ["run", "--looper-config", tp, "--dry-run"] + try: + main(test_args=x) + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + +def test_cli_shortform(prep_temp_pep): + tp = prep_temp_pep + + x = ["run", "--looper-config", tp, "-d"] + try: + main(test_args=x) + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + x = ["run", "--looper-config", tp, "-d", "-l", "2"] + try: + main(test_args=x) + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + tp = prep_temp_pep + x = ["run", "--looper-config", tp, "-d", "-n", "2"] + try: + main(test_args=x) + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + +def test_running_csv_pep(prep_temp_pep_csv): + tp = prep_temp_pep_csv + + x = ["run", "--looper-config", tp, "--dry-run"] try: main(test_args=x) except Exception: raise pytest.fail("DID RAISE {0}".format(Exception)) +@pytest.mark.parametrize( + "path", ["something/example.yaml", "somethingelse/example2.csv"] +) +def test_is_PEP_file_type(path): + + result = is_PEP_file_type(path) + assert result == True + + def is_connected(): """Determines if local machine can connect to the internet.""" import socket @@ -39,16 +83,20 @@ class TestLooperBothRuns: def test_looper_cfg_invalid(self, cmd): """Verify looper does not accept invalid cfg paths""" - x = test_args_expansion("jdfskfds/dsjfklds/dsjklsf.yaml", cmd) - with pytest.raises(OSError): - main(test_args=x) + x = test_args_expansion( + cmd, "--looper-config", "jdfskfds/dsjfklds/dsjklsf.yaml" + ) + with pytest.raises(SystemExit): + result = main(test_args=x) + print(result) @pytest.mark.parametrize("cmd", ["run", "runp"]) def test_looper_cfg_required(self, cmd): """Verify looper does not accept invalid cfg paths""" x = test_args_expansion("", cmd) - with pytest.raises(SystemExit): + + with pytest.raises(MisconfigurationException): ff = main(test_args=x) print(ff) @@ -87,15 +135,8 @@ def test_unrecognized_args_not_passing(self, prep_temp_pep, cmd): tp = prep_temp_pep x = test_args_expansion(tp, cmd, ["--unknown-arg", "4"]) - try: + with pytest.raises(SystemExit): main(test_args=x) - sd = os.path.join(get_outdir(tp), "submission") - subs_list = [ - os.path.join(sd, f) for f in os.listdir(sd) if f.endswith(".sub") - ] - assert_content_not_in_any_files(subs_list, "--unknown-arg") - except Exception: - raise pytest.fail("DID RAISE {0}".format(Exception)) class TestLooperRunBehavior: @@ -119,13 +160,12 @@ def test_looper_multi_pipeline(self, prep_temp_pep): def test_looper_single_pipeline(self, prep_temp_pep): tp = prep_temp_pep + with mod_yaml_data(tp) as config_data: - pifaces = config_data[SAMPLE_MODS_KEY][CONSTANT_KEY][ - PIPELINE_INTERFACES_KEY - ] - config_data[SAMPLE_MODS_KEY][CONSTANT_KEY][PIPELINE_INTERFACES_KEY] = ( - pifaces[1] - ) + + pifaces = config_data[PIPELINE_INTERFACES_KEY] + config_data[PIPELINE_INTERFACES_KEY]["sample"] = pifaces["sample"][1] + del config_data[PIPELINE_INTERFACES_KEY]["project"] x = test_args_expansion(tp, "run") try: @@ -136,17 +176,12 @@ def test_looper_single_pipeline(self, prep_temp_pep): def test_looper_var_templates(self, prep_temp_pep): tp = prep_temp_pep - with mod_yaml_data(tp) as config_data: - pifaces = config_data[SAMPLE_MODS_KEY][CONSTANT_KEY][ - PIPELINE_INTERFACES_KEY - ] - config_data[SAMPLE_MODS_KEY][CONSTANT_KEY][PIPELINE_INTERFACES_KEY] = ( - pifaces[1] - ) x = test_args_expansion(tp, "run") + x.pop(-1) # remove the --dry-run argument for this specific test + try: # Test that {looper.piface_dir} is correctly rendered to a path which will show up in the final .sub file - main(test_args=x) + results = main(test_args=x) sd = os.path.join(get_outdir(tp), "submission") subs_list = [ os.path.join(sd, f) for f in os.listdir(sd) if f.endswith(".sub") @@ -158,8 +193,10 @@ def test_looper_var_templates(self, prep_temp_pep): def test_looper_cli_pipeline(self, prep_temp_pep): """CLI-specified pipelines overwrite ones from config""" tp = prep_temp_pep - pi_pth = os.path.join(os.path.dirname(tp), PIS.format("1")) - x = test_args_expansion(tp, "run", ["--pipeline-interfaces", pi_pth]) + with mod_yaml_data(tp) as config_data: + pifaces = config_data[PIPELINE_INTERFACES_KEY] + pi_pth = pifaces["sample"][1] + x = test_args_expansion(tp, "run", ["--sample-pipeline-interfaces", pi_pth]) try: result = main(test_args=x) @@ -174,12 +211,12 @@ def test_looper_no_pipeline(self, prep_temp_pep): """ tp = prep_temp_pep with mod_yaml_data(tp) as config_data: - del config_data[SAMPLE_MODS_KEY][CONSTANT_KEY][PIPELINE_INTERFACES_KEY] + del config_data[PIPELINE_INTERFACES_KEY] x = test_args_expansion(tp, "run") try: result = main(test_args=x) - assert result[DEBUG_JOBS] == 0 + assert "No pipeline interfaces defined" in list(result.keys()) except Exception: raise pytest.fail("DID RAISE {0}".format(Exception)) @@ -189,34 +226,7 @@ def test_looper_pipeline_not_found(self, prep_temp_pep): """ tp = prep_temp_pep with mod_yaml_data(tp) as config_data: - config_data[SAMPLE_MODS_KEY][CONSTANT_KEY][PIPELINE_INTERFACES_KEY] = [ - "bogus" - ] - x = test_args_expansion(tp, "run") - try: - result = main(test_args=x) - - assert result[DEBUG_JOBS] == 0 - assert "No pipeline interfaces defined" in result.keys() - except Exception: - raise pytest.fail("DID RAISE {0}".format(Exception)) - - def test_looper_pipeline_invalid(self, prep_temp_pep): - """ - Pipeline is ignored when does not validate successfully - against a schema - """ - tp = prep_temp_pep - with mod_yaml_data(tp) as config_data: - pifaces = config_data[SAMPLE_MODS_KEY][CONSTANT_KEY][ - PIPELINE_INTERFACES_KEY - ] - config_data[SAMPLE_MODS_KEY][CONSTANT_KEY][PIPELINE_INTERFACES_KEY] = ( - pifaces[1] - ) - piface_path = os.path.join(os.path.dirname(tp), pifaces[1]) - with mod_yaml_data(piface_path) as piface_data: - del piface_data["pipeline_name"] + config_data[PIPELINE_INTERFACES_KEY]["sample"] = ["bogus"] x = test_args_expansion(tp, "run") try: result = main(test_args=x) @@ -226,35 +236,24 @@ def test_looper_pipeline_invalid(self, prep_temp_pep): except Exception: raise pytest.fail("DID RAISE {0}".format(Exception)) - def test_looper_sample_attr_missing(self, prep_temp_pep): - """ - Piface is ignored when it does not exist - """ - tp = prep_temp_pep - with mod_yaml_data(tp) as config_data: - del config_data[SAMPLE_MODS_KEY][CONSTANT_KEY]["attr"] - x = test_args_expansion(tp, "run") - try: - result = main(test_args=x) - - assert result[DEBUG_JOBS] == 0 - except Exception: - raise pytest.fail("DID RAISE {0}".format(Exception)) - - @pytest.mark.skipif(not is_connected(), reason="Test needs an internet connection") def test_looper_sample_name_whitespace(self, prep_temp_pep): """ Piface is ignored when it does not exist """ tp = prep_temp_pep + imply_whitespace = [ { IMPLIED_IF_KEY: {"sample_name": "sample1"}, IMPLIED_THEN_KEY: {"sample_name": "sample whitespace"}, } ] - with mod_yaml_data(tp) as config_data: - config_data[SAMPLE_MODS_KEY][IMPLIED_KEY] = imply_whitespace + + project_config_path = get_project_config_path(tp) + + with mod_yaml_data(project_config_path) as project_config_data: + project_config_data[SAMPLE_MODS_KEY][IMPLIED_KEY] = imply_whitespace + x = test_args_expansion(tp, "run") with pytest.raises(Exception): result = main(test_args=x) @@ -266,12 +265,16 @@ def test_looper_toggle(self, prep_temp_pep): If all samples have toggle attr set to 0, no jobs are submitted """ tp = prep_temp_pep - with mod_yaml_data(tp) as config_data: - config_data[SAMPLE_MODS_KEY][CONSTANT_KEY][SAMPLE_TOGGLE_ATTR] = 0 + project_config_path = get_project_config_path(tp) + + with mod_yaml_data(project_config_path) as project_config_data: + project_config_data[SAMPLE_MODS_KEY][CONSTANT_KEY][SAMPLE_TOGGLE_ATTR] = 0 + x = test_args_expansion(tp, "run") + x.pop(-1) # remove dry run for this test + try: result = main(test_args=x) - assert result[DEBUG_JOBS] == 0 except Exception: raise pytest.fail("DID RAISE {0}".format(Exception)) @@ -283,9 +286,10 @@ def test_cmd_extra_sample(self, prep_temp_pep, arg): appended to the pipelinecommand """ tp = prep_temp_pep - with mod_yaml_data(tp) as config_data: - config_data[SAMPLE_MODS_KEY][CONSTANT_KEY]["command_extra"] = arg + project_config_path = get_project_config_path(tp) + with mod_yaml_data(project_config_path) as project_config_data: + project_config_data[SAMPLE_MODS_KEY][CONSTANT_KEY]["command_extra"] = arg x = test_args_expansion(tp, "run") try: main(test_args=x) @@ -303,8 +307,11 @@ def test_cmd_extra_override_sample(self, prep_temp_pep, arg): pipeline command """ tp = prep_temp_pep - with mod_yaml_data(tp) as config_data: - config_data[SAMPLE_MODS_KEY][CONSTANT_KEY]["command_extra"] = arg + project_config_path = get_project_config_path(tp) + + with mod_yaml_data(project_config_path) as project_config_data: + project_config_data[SAMPLE_MODS_KEY][CONSTANT_KEY]["command_extra"] = arg + x = test_args_expansion(tp, "run", ["--command-extra-override='different'"]) try: main(test_args=x) @@ -336,12 +343,17 @@ def test_looper_multi_pipeline(self, prep_temp_pep): def test_looper_single_pipeline(self, prep_temp_pep): tp = prep_temp_pep + with mod_yaml_data(tp) as config_data: - piface_path = os.path.join(os.path.dirname(tp), PIP.format("1")) - config_data[LOOPER_KEY][CLI_KEY]["runp"][ - PIPELINE_INTERFACES_KEY - ] = piface_path + # Modifying in this way due to https://github.com/pepkit/looper/issues/474 + config_data[PIPELINE_INTERFACES_KEY]["project"] = os.path.join( + os.path.dirname(tp), "pipeline/pipeline_interface1_project.yaml" + ) + del config_data[PIPELINE_INTERFACES_KEY]["sample"] + + print(tp) x = test_args_expansion(tp, "runp") + x.pop(-1) # remove the --dry-run argument for this specific test try: result = main(test_args=x) assert result[DEBUG_JOBS] != 2 @@ -351,10 +363,16 @@ def test_looper_single_pipeline(self, prep_temp_pep): @pytest.mark.parametrize("arg", CMD_STRS) def test_cmd_extra_project(self, prep_temp_pep, arg): + tp = prep_temp_pep - with mod_yaml_data(tp) as config_data: - config_data[LOOPER_KEY]["command_extra"] = arg + + project_config_path = get_project_config_path(tp) + + with mod_yaml_data(project_config_path) as project_config_data: + project_config_data["looper"] = {} + project_config_data["looper"]["command_extra"] = arg x = test_args_expansion(tp, "runp") + try: main(test_args=x) except Exception: @@ -383,16 +401,18 @@ def test_looper_basic_plugin(self, prep_temp_pep): ("looper.write_sample_yaml_cwl", "cwl.yaml"), ], ) - @pytest.mark.skipif(not is_connected(), reason="Test needs an internet connection") def test_looper_other_plugins(self, prep_temp_pep, plugin, appendix): tp = prep_temp_pep - for path in { - piface.pipe_iface_file for piface in Project(tp).pipeline_interfaces - }: - with mod_yaml_data(path) as piface_data: - piface_data[PRE_SUBMIT_HOOK_KEY][PRE_SUBMIT_PY_FUN_KEY] = [plugin] + pep_dir = os.path.dirname(tp) + pipeline_interface1 = os.path.join( + pep_dir, "pipeline/pipeline_interface1_sample.yaml" + ) + + with mod_yaml_data(pipeline_interface1) as piface_data: + piface_data[PRE_SUBMIT_HOOK_KEY][PRE_SUBMIT_PY_FUN_KEY] = [plugin] x = test_args_expansion(tp, "run") + x.pop(-1) try: main(test_args=x) except Exception as err: @@ -409,11 +429,13 @@ def test_looper_other_plugins(self, prep_temp_pep, plugin, appendix): ) def test_looper_command_templates_hooks(self, prep_temp_pep, cmd): tp = prep_temp_pep - for path in { - piface.pipe_iface_file for piface in Project(tp).pipeline_interfaces - }: - with mod_yaml_data(path) as piface_data: - piface_data[PRE_SUBMIT_HOOK_KEY][PRE_SUBMIT_CMD_KEY] = [cmd] + pep_dir = os.path.dirname(tp) + pipeline_interface1 = os.path.join( + pep_dir, "pipeline/pipeline_interface1_sample.yaml" + ) + + with mod_yaml_data(pipeline_interface1) as piface_data: + piface_data[PRE_SUBMIT_HOOK_KEY][PRE_SUBMIT_CMD_KEY] = [cmd] x = test_args_expansion(tp, "run") try: main(test_args=x) @@ -426,9 +448,8 @@ def test_looper_command_templates_hooks(self, prep_temp_pep, cmd): class TestLooperRunSubmissionScript: def test_looper_run_produces_submission_scripts(self, prep_temp_pep): tp = prep_temp_pep - with open(tp, "r") as conf_file: - config_data = safe_load(conf_file) - outdir = config_data[LOOPER_KEY][OUTDIR_KEY] + + outdir = get_outdir(tp) x = test_args_expansion(tp, "run") try: main(test_args=x) @@ -533,7 +554,7 @@ def test_cli_yaml_settings_passes_settings(self, prep_temp_pep, cmd): dump({"mem": "testin_mem"}, sf) x = test_args_expansion( - tp, cmd, ["--settings", settings_file_path, "-p", "slurm"] + tp, cmd, ["--settings", settings_file_path, "--package", "slurm"] ) try: main(test_args=x) @@ -553,7 +574,14 @@ def test_cli_compute_overwrites_yaml_settings_spec(self, prep_temp_pep, cmd): x = test_args_expansion( tp, cmd, - ["--settings", settings_file_path, "--compute", "mem=10", "-p", "slurm"], + [ + "--settings", + settings_file_path, + "--compute", + "mem=10", + "--package", + "slurm", + ], ) try: main(test_args=x) @@ -566,33 +594,15 @@ def test_cli_compute_overwrites_yaml_settings_spec(self, prep_temp_pep, cmd): class TestLooperConfig: - @pytest.mark.parametrize("cmd", ["run", "runp"]) - def test_init_config_file(self, prep_temp_pep, cmd, dotfile_path): + + def test_init_config_file(self, prep_temp_pep): tp = prep_temp_pep - x = test_args_expansion(tp, "init") + x = ["init", "--force-yes"] try: result = main(test_args=x) except Exception as err: raise pytest.fail(f"DID RAISE: {err}") assert result == 0 - assert_content_in_all_files(dotfile_path, tp) - x = test_args_expansion(tp, cmd) - try: - result = main(test_args=x) - except Exception as err: - raise pytest.fail(f"DID RAISE {err}") - - def test_correct_execution_of_config(self, prepare_pep_with_dot_file): - """ - Test executing dot file and looper_config - """ - dot_file_path = os.path.abspath(prepare_pep_with_dot_file) - x = test_args_expansion("", "run") - try: - main(test_args=x) - except Exception as err: - raise pytest.fail(f"DID RAISE {err}") - os.remove(dot_file_path) class TestLooperPEPhub: @@ -605,20 +615,7 @@ class TestLooperPEPhub: ], ) def test_pephub_registry_path_recognition(self, pep_path): - assert is_registry_path(pep_path) is True - - @pytest.mark.parametrize( - "pep_path", - [ - "some/path/to/pep.yaml", - "different/path.yaml", - "default/path/to/file/without/yaml", - "file_in_folder.yaml", - "not_yaml_file", - ], - ) - def test_config_recognition(self, pep_path): - assert is_registry_path(pep_path) is False + assert is_pephub_registry_path(pep_path) is True def test_init_project_using_dict(self, prep_temp_config_with_pep): """Verify looper runs using pephub in a basic case and return code is 0""" @@ -628,3 +625,14 @@ def test_init_project_using_dict(self, prep_temp_config_with_pep): ) assert len(init_project.pipeline_interfaces) == 3 + + def test_init_project_using_csv(self, prep_temp_pep_csv): + """Verify looper runs using pephub in a basic case and return code is 0""" + tp = prep_temp_pep_csv + with mod_yaml_data(tp) as config_data: + pep_config_csv = config_data["pep_config"] + + pep_config_csv = os.path.join(os.path.dirname(tp), pep_config_csv) + init_project = Project(cfg=pep_config_csv) + + assert len(init_project.samples) == 2 diff --git a/tests/test_comprehensive.py b/tests/test_comprehensive.py new file mode 100644 index 000000000..cce74ca54 --- /dev/null +++ b/tests/test_comprehensive.py @@ -0,0 +1,185 @@ +import os.path + +import pytest +from peppy.const import * +from yaml import dump + +from looper.const import * +from looper.project import Project +from tests.conftest import * +from looper.utils import * +from looper.cli_pydantic import main +from tests.smoketests.test_run import is_connected +from tempfile import TemporaryDirectory +from pipestat import PipestatManager +from pipestat.exceptions import RecordNotFoundError + +from yaml import dump, safe_load + +CMD_STRS = ["string", " --string", " --sjhsjd 212", "7867#$@#$cc@@"] + + +def test_comprehensive_advanced_looper_no_pipestat(prep_temp_pep): + + path_to_looper_config = prep_temp_pep + + x = ["run", "--looper-config", path_to_looper_config] + + try: + results = main(test_args=x) + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + +def test_comprehensive_looper_no_pipestat(prep_temp_pep_basic): + + path_to_looper_config = prep_temp_pep_basic + basic_dir = os.path.dirname(path_to_looper_config) + + # open up the project config and replace the derived attributes with the path to the data. In a way, this simulates using the environment variables. + basic_project_file = os.path.join(basic_dir, "project", "project_config.yaml") + with open(basic_project_file, "r") as f: + basic_project_data = safe_load(f) + + basic_project_data["sample_modifiers"]["derive"]["sources"]["source1"] = ( + os.path.join(basic_dir, "data/{sample_name}.txt") + ) + + with open(basic_project_file, "w") as f: + dump(basic_project_data, f) + + x = ["run", "--looper-config", path_to_looper_config] + + try: + results = main(test_args=x) + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + +def test_comprehensive_looper_pipestat(prep_temp_pep_pipestat): + + cmd = "run" + + path_to_looper_config = prep_temp_pep_pipestat + pipestat_dir = os.path.dirname(path_to_looper_config) + + # open up the project config and replace the derived attributes with the path to the data. In a way, this simulates using the environment variables. + pipestat_project_file = get_project_config_path(path_to_looper_config) + + pipestat_pipeline_interface_file = os.path.join( + pipestat_dir, "pipeline_pipestat/pipeline_interface.yaml" + ) + + with open(pipestat_project_file, "r") as f: + pipestat_project_data = safe_load(f) + + pipestat_project_data["sample_modifiers"]["derive"]["sources"]["source1"] = ( + os.path.join(pipestat_dir, "data/{sample_name}.txt") + ) + + with open(pipestat_pipeline_interface_file, "r") as f: + pipestat_piface_data = safe_load(f) + + pipeline_name = pipestat_piface_data["pipeline_name"] + + with open(pipestat_project_file, "w") as f: + dump(pipestat_project_data, f) + + x = [cmd, "--looper-config", path_to_looper_config] + + try: + result = main(test_args=x) + if cmd == "run": + assert result["Pipestat compatible"] is True + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + # TODO TEST PROJECT LEVEL RUN + # Must add this to hello_looper for pipestat example + + # TEST LOOPER CHECK + + # looper cannot create flags, the pipeline or pipestat does that + # if you do not specify flag dir, pipestat places them in the same dir as config file + path_to_pipestat_config = os.path.join( + pipestat_dir, f"results/pipestat_config_{pipeline_name}.yaml" + ) + + psm = PipestatManager(config_file=path_to_pipestat_config) + psm.set_status(record_identifier="frog_1", status_identifier="completed") + psm.set_status(record_identifier="frog_2", status_identifier="completed") + + # Now use looper check to get statuses + x = ["check", "--looper-config", path_to_looper_config] + + try: + result = main(test_args=x) + assert result["example_pipestat_pipeline"]["frog_1"] == "completed" + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + # Now use looper check to get project level statuses + x = ["check", "--looper-config", path_to_looper_config, "--project"] + + try: + result = main(test_args=x) + assert result == {"example_pipestat_project_pipeline": {"project": "unknown"}} + + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + # TEST LOOPER REPORT + + x = ["report", "--looper-config", path_to_looper_config] + + try: + result = main(test_args=x) + assert "report_directory" in result + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + # TEST LOOPER Table + + x = ["table", "--looper-config", path_to_looper_config] + + try: + result = main(test_args=x) + assert "example_pipestat_pipeline_stats_summary.tsv" in result[0] + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + # TEST LOOPER DESTROY + # TODO add destroying individual samples via pipestat + + x = [ + "destroy", + "--looper-config", + path_to_looper_config, + "--force-yes", + ] # Must force yes or pytest will throw an exception "OSError: pytest: reading from stdin while output is captured!" + + try: + result = main(test_args=x) + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) + + sd = os.path.dirname(path_to_looper_config) + tsv_list = [os.path.join(sd, f) for f in os.listdir(sd) if f.endswith(".tsv")] + assert len(tsv_list) == 0 + with pytest.raises(RecordNotFoundError): + retrieved_result = psm.retrieve_one(record_identifier="frog_2") + + +@pytest.mark.skipif(not is_connected(), reason="This test needs internet access.") +@pytest.mark.skip(reason="user must be logged into pephub otherwise this will fail.") +def test_comprehensive_looper_pephub(prep_temp_pep_pephub): + """Basic test to determine if Looper can run a PEP from PEPHub""" + # TODO need to add way to check if user is logged into pephub and then run test otherwise skip + path_to_looper_config = prep_temp_pep_pephub + + x = ["run", "--looper-config", path_to_looper_config] + + try: + results = main(test_args=x) + except Exception: + raise pytest.fail("DID RAISE {0}".format(Exception)) diff --git a/tests/update_test_data.sh b/tests/update_test_data.sh new file mode 100644 index 000000000..ece3c1ea8 --- /dev/null +++ b/tests/update_test_data.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +branch='dev' + +wget https://github.com/pepkit/hello_looper/archive/refs/heads/${branch}.zip +mv ${branch}.zip data/ +cd data/ +rm -rf hello_looper-${branch} +unzip ${branch}.zip +rm ${branch}.zip \ No newline at end of file