From 985177b0ecaa0eb9ae00950550d3e24773b63fff Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 13 Aug 2024 14:30:22 -0400 Subject: [PATCH 01/10] refactor: import shared functions from ccbr_tools --- pyproject.toml | 1 + src/xavier/__main__.py | 12 +- src/xavier/gui.py | 17 +- src/xavier/options.py | 12 +- src/xavier/run.py | 9 +- src/xavier/shells.py | 8 +- src/xavier/util.py | 395 ----------------------------------------- 7 files changed, 22 insertions(+), 432 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5a0c99c..13bc863 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ classifiers = [ requires-python = ">=3.11" dependencies = [ "argparse", + "ccbr_tools@git+https://github.com/CCBR/Tools", "Click >= 8.1.3", "PySimpleGui < 5", "snakemake >= 7, < 8", diff --git a/src/xavier/__main__.py b/src/xavier/__main__.py index 5df5326..1ddbebc 100755 --- a/src/xavier/__main__.py +++ b/src/xavier/__main__.py @@ -47,17 +47,9 @@ from .run import init, setup, bind, dryrun, runner, run from .shells import bash from .options import genome_options -from .util import ( - err, - exists, - fatal, - permissions, - check_cache, - require, - get_version, - get_genomes_list, -) +from .util import err, exists, fatal, permissions, check_cache, require from .gui import launch_gui +from .util import xavier_base, get_version __version__ = get_version() __email__ = "ccbr@mail.nih.gov" diff --git a/src/xavier/gui.py b/src/xavier/gui.py index 72ee41e..fe3ca56 100644 --- a/src/xavier/gui.py +++ b/src/xavier/gui.py @@ -8,18 +8,18 @@ from .util import ( get_genomes_dict, get_tmp_dir, - xavier_base, - get_version, get_hpcname, check_python_version, ) +from .util import xavier_base, get_version from .run import run_in_context from .cache import get_sif_cache_dir + def launch_gui(DEBUG=True): check_python_version() # get drop down genome options - jsons = get_genomes_dict() + jsons = get_genomes_dict(repo_base=xavier_base) genome_annotation_combinations = list(jsons.keys()) genome_annotation_combinations.sort() if DEBUG: @@ -165,7 +165,9 @@ def launch_gui(DEBUG=True): if DEBUG: print("layout is ready!") - window = sg.Window(f"XAVIER {get_version()}", layout, location=(0, 500), finalize=True) + window = sg.Window( + f"XAVIER {get_version()}", layout, location=(0, 500), finalize=True + ) if DEBUG: print("window created!") @@ -277,7 +279,11 @@ def launch_gui(DEBUG=True): input=list(glob.glob(os.path.join(values["-INDIR-"], "*.fastq.gz"))), output=output_dir, genome=genome, - targets=values["-TARGETS-"] if values["-TARGETS-"] else xavier_base('resources', 'Agilent_SSv7_allExons_hg38.bed'), # TODO should this be part of the genome config file? + targets=values["-TARGETS-"] + if values["-TARGETS-"] + else xavier_base( + "resources", "Agilent_SSv7_allExons_hg38.bed" + ), # TODO should this be part of the genome config file? mode="slurm", job_name="pl:xavier", callers=["mutect2", "mutect", "strelka", "vardict", "varscan"], @@ -344,6 +350,7 @@ def launch_gui(DEBUG=True): continue window.close() + def copy_to_clipboard(string): r = Tk() r.withdraw() diff --git a/src/xavier/options.py b/src/xavier/options.py index 5676adf..069df95 100644 --- a/src/xavier/options.py +++ b/src/xavier/options.py @@ -1,13 +1,9 @@ #!/usr/bin/env python3 # -*- coding: UTF-8 -*- -# Python standard library -from __future__ import print_function - -# Local imports -from .util import permissions - import os +from __future__ import print_function +from ccbr_tools.pipeline.util import permissions def genome_options(parser, user_option, prebuilt): @@ -45,7 +41,3 @@ def genome_options(parser, user_option, prebuilt): ) return user_option - - -if __name__ == "__main__": - pass diff --git a/src/xavier/run.py b/src/xavier/run.py index 516c0bf..c7b0bee 100644 --- a/src/xavier/run.py +++ b/src/xavier/run.py @@ -11,21 +11,20 @@ import shutil import sys import subprocess - -# Local imports -from .util import ( +from ccbr_tools.pipeline.util import ( git_commit_hash, join_jsons, fatal, which, exists, err, - get_version, - xavier_base, require, get_hpcname, ) +# Local imports +from .util import get_version, xavier_base + def run(sub_args): """Initialize, setup, and run the XAVIER pipeline. diff --git a/src/xavier/shells.py b/src/xavier/shells.py index e9ded44..6480c1f 100644 --- a/src/xavier/shells.py +++ b/src/xavier/shells.py @@ -8,7 +8,7 @@ import subprocess # Local imports -from .util import err +from ccbr_tools.pipeline.util import err def set_options(strict): @@ -65,9 +65,3 @@ def bash( ) return exitcode - - -if __name__ == "__main__": - # Tests - bash("ls -la /home/") - bash("ls -la /fake/path") diff --git a/src/xavier/util.py b/src/xavier/util.py index 3cf87b8..2c13af8 100644 --- a/src/xavier/util.py +++ b/src/xavier/util.py @@ -30,398 +30,3 @@ def get_version(): with open(xavier_base("VERSION"), "r") as vfile: version = f"v{vfile.read().strip()}" return version - - -def scontrol_show(): - """Run scontrol show config and parse the output as a dictionary - @return scontrol_dict : - """ - scontrol_dict = dict() - scontrol_out = subprocess.run( - "scontrol show config", shell=True, capture_output=True, text=True - ).stdout - if len(scontrol_out) > 0: - for line in scontrol_out.split("\n"): - line_split = line.split("=") - if len(line_split) > 1: - scontrol_dict[line_split[0].strip()] = line_split[1].strip() - return scontrol_dict - - -def get_hpcname(): - """Get the HPC name (biowulf, frce, or an empty string) - @return hpcname - """ - scontrol_out = scontrol_show() - hpc = scontrol_out["ClusterName"] if "ClusterName" in scontrol_out.keys() else "" - if hpc == "fnlcr": - hpc = "frce" - return hpc - - -def get_tmp_dir(tmp_dir, outdir, hpc=get_hpcname()): - """Get default temporary directory for biowulf and frce. Allow user override.""" - if not tmp_dir: - if hpc == "biowulf": - tmp_dir = "/lscratch/$SLURM_JOBID" - elif hpc == "frce": - tmp_dir = outdir - else: - tmp_dir = None - return tmp_dir - - -def get_genomes_list(hpcname=get_hpcname(), error_on_warnings=False): - """Get list of genome annotations available for the current platform - @return genomes_list - """ - return sorted( - list( - get_genomes_dict( - hpcname=hpcname, error_on_warnings=error_on_warnings - ).keys() - ) - ) - - -def get_genomes_dict(hpcname=get_hpcname(), error_on_warnings=False): - """Get dictionary of genome annotation versions and the paths to the corresponding JSON files - @return genomes_dict { genome_name: json_file_path } - """ - if error_on_warnings: - warnings.filterwarnings("error") - genomes_dir = xavier_base(os.path.join("config", "genomes", hpcname)) - if not os.path.exists(genomes_dir): - warnings.warn(f"Folder does not exist: {genomes_dir}") - search_term = genomes_dir + "/*.json" - json_files = glob.glob(search_term) - if len(json_files) == 0: - warnings.warn( - f"No Genome+Annotation JSONs found in {genomes_dir}. Please specify a custom genome json file with `--genome`" - ) - genomes_dict = { - os.path.basename(json_file).replace(".json", ""): json_file - for json_file in json_files - } - warnings.resetwarnings() - return genomes_dict - - -def md5sum(filename, first_block_only=False, blocksize=65536): - """Gets md5checksum of a file in memory-safe manner. - The file is read in blocks/chunks defined by the blocksize parameter. This is - a safer option to reading the entire file into memory if the file is very large. - @param filename : - Input file on local filesystem to find md5 checksum - @param first_block_only : - Calculate md5 checksum of the first block/chunk only - @param blocksize : - Blocksize of reading N chunks of data to reduce memory profile - @return hasher.hexdigest() : - MD5 checksum of the file's contents - """ - hasher = hashlib.md5() - with open(filename, "rb") as fh: - buf = fh.read(blocksize) - if first_block_only: - # Calculate MD5 of first block or chunk of file. - # This is a useful heuristic for when potentially - # calculating an MD5 checksum of thousand or - # millions of file. - hasher.update(buf) - return hasher.hexdigest() - while len(buf) > 0: - # Calculate MD5 checksum of entire file - hasher.update(buf) - buf = fh.read(blocksize) - - return hasher.hexdigest() - - -## copied directly from rna-seek -def check_cache(parser, cache, *args, **kwargs): - """Check if provided SINGULARITY_CACHE is valid. Singularity caches cannot be - shared across users (and must be owned by the user). Singularity strictly enforces - 0700 user permission on on the cache directory and will return a non-zero exitcode. - @param parser : - Argparse parser object - @param cache : - Singularity cache directory - @return cache : - If singularity cache dir is valid - """ - if not exists(cache): - # Cache directory does not exist on filesystem - os.makedirs(cache) - elif os.path.isfile(cache): - # Cache directory exists as file, raise error - parser.error( - """\n\t\x1b[6;37;41mFatal: Failed to provided a valid singularity cache!\x1b[0m - The provided --singularity-cache already exists on the filesystem as a file. - Please run {} again with a different --singularity-cache location. - """.format( - sys.argv[0] - ) - ) - elif os.path.isdir(cache): - # Provide cache exists as directory - # Check that the user owns the child cache directory - # May revert to os.getuid() if user id is not sufficient - if ( - exists(os.path.join(cache, "cache")) - and os.stat(os.path.join(cache, "cache")).st_uid != os.getuid() - ): - # User does NOT own the cache directory, raise error - parser.error( - """\n\t\x1b[6;37;41mFatal: Failed to provided a valid singularity cache!\x1b[0m - The provided --singularity-cache already exists on the filesystem with a different owner. - Singularity strictly enforces that the cache directory is not shared across users. - Please run {} again with a different --singularity-cache location. - """.format( - sys.argv[0] - ) - ) - - return cache - - -def permissions(parser, path, *args, **kwargs): - """Checks permissions using os.access() to see the user is authorized to access - a file/directory. Checks for existence, readability, writability and executability via: - os.F_OK (tests existence), os.R_OK (tests read), os.W_OK (tests write), os.X_OK (tests exec). - @param parser : - Argparse parser object - @param path : - Name of path to check - @return path : - Returns abs path if it exists and permissions are correct - """ - if not exists(path): - parser.error( - "Path '{}' does not exists! Failed to provide valid input.".format(path) - ) - if not os.access(path, *args, **kwargs): - parser.error( - "Path '{}' exists, but cannot read path due to permissions!".format(path) - ) - - return os.path.abspath(path) - - -def standard_input(parser, path, *args, **kwargs): - """Checks for standard input when provided or permissions using permissions(). - @param parser : - Argparse parser object - @param path : - Name of path to check - @return path : - If path exists and user can read from location - """ - # Checks for standard input - if not sys.stdin.isatty(): - # Standard input provided, set path as an - # empty string to prevent searching of '-' - path = "" - return path - - # Checks for positional arguments as paths - path = permissions(parser, path, *args, **kwargs) - - return path - - -def exists(testpath): - """Checks if file exists on the local filesystem. - @param parser : - argparse parser object - @param testpath : - Name of file/directory to check - @return does_exist : - True when file/directory exists, False when file/directory does not exist - """ - does_exist = True - if not os.path.exists(testpath): - does_exist = False # File or directory does not exist on the filesystem - - return does_exist - - -def ln(files, outdir): - """Creates symlinks for files to an output directory. - @param files list[]: - List of filenames - @param outdir : - Destination or output directory to create symlinks - """ - # Create symlinks for each file in the output directory - for file in files: - ln = os.path.join(outdir, os.path.basename(file)) - if not exists(ln): - os.symlink(os.path.abspath(os.path.realpath(file)), ln) - - -def which(cmd, path=None): - """Checks if an executable is in $PATH - @param cmd : - Name of executable to check - @param path : - Optional list of PATHs to check [default: $PATH] - @return : - True if exe in PATH, False if not in PATH - """ - if path is None: - path = os.environ["PATH"].split(os.pathsep) - - for prefix in path: - filename = os.path.join(prefix, cmd) - executable = os.access(filename, os.X_OK) - is_not_directory = os.path.isfile(filename) - if executable and is_not_directory: - return True - return False - - -def err(*message, **kwargs): - """Prints any provided args to standard error. - kwargs can be provided to modify print functions - behavior. - @param message : - Values printed to standard error - @params kwargs - Key words to modify print function behavior - """ - print(*message, file=sys.stderr, **kwargs) - - -def fatal(*message, **kwargs): - """Prints any provided args to standard error - and exits with an exit code of 1. - @param message : - Values printed to standard error - @params kwargs - Key words to modify print function behavior - """ - err(*message, **kwargs) - sys.exit(1) - - -def require(cmds, suggestions, path=None): - """Enforces an executable is in $PATH - @param cmds list[]: - List of executable names to check - @param suggestions list[]: - Name of module to suggest loading for a given index - in param cmd. - @param path list[]]: - Optional list of PATHs to check [default: $PATH] - """ - error = False - for i in range(len(cmds)): - available = which(cmds[i]) - if not available: - error = True - err( - """\x1b[6;37;41m\n\tFatal: {} is not in $PATH and is required during runtime! - └── Solution: please 'module load {}' and run again!\x1b[0m""".format( - cmds[i], suggestions[i] - ) - ) - - if error: - fatal() - - return - - -def safe_copy(source, target, resources=[]): - """Private function: Given a list paths it will recursively copy each to the - target location. If a target path already exists, it will NOT over-write the - existing paths data. - @param resources : - List of paths to copy over to target location - @params source : - Add a prefix PATH to each resource - @param target : - Target path to copy templates and required resources - """ - - for resource in resources: - destination = os.path.join(target, resource) - if not exists(destination): - # Required resources do not exist - copytree(os.path.join(source, resource), destination) - - -def git_commit_hash(repo_path): - """Gets the git commit hash of the RNA-seek repo. - @param repo_path : - Path to RNA-seek git repo - @return githash : - Latest git commit hash - """ - try: - githash = ( - subprocess.check_output( - ["git", "rev-parse", "HEAD"], stderr=subprocess.STDOUT, cwd=repo_path - ) - .strip() - .decode("utf-8") - ) - # Typecast to fix python3 TypeError (Object of type bytes is not JSON serializable) - # subprocess.check_output() returns a byte string - githash = str(githash) - except Exception as e: - # Github releases are missing the .git directory, - # meaning you cannot get a commit hash, set the - # commit hash to indicate its from a GH release - githash = "github_release" - return githash - - -def join_jsons(templates): - """Joins multiple JSON files to into one data structure - Used to join multiple template JSON files to create a global config dictionary. - @params templates : - List of template JSON files to join together - @return aggregated : - Dictionary containing the contents of all the input JSON files - """ - # Get absolute PATH to templates in rna-seek git repo - repo_path = os.path.dirname(os.path.abspath(__file__)) - aggregated = {} - - for file in templates: - with open(os.path.join(repo_path, file), "r") as fh: - aggregated.update(json.load(fh)) - - return aggregated - - -def check_python_version(): - # version check - # glob.iglob requires 3.11 for using "include_hidden=True" - MIN_PYTHON = (3, 11) - try: - assert sys.version_info >= MIN_PYTHON - print( - "Python version: {0}.{1}.{2}".format( - sys.version_info.major, sys.version_info.minor, sys.version_info.micro - ) - ) - except AssertionError: - exit( - f"{sys.argv[0]} requires Python {'.'.join([str(n) for n in MIN_PYTHON])} or newer" - ) - - -if __name__ == "__main__": - # Calculate MD5 checksum of entire file - print("{} {}".format(md5sum(sys.argv[0]), sys.argv[0])) - # Calculate MD5 checksum of 512 byte chunk of file, - # which is similar to following unix command: - # dd if=utils.py bs=512 count=1 2>/dev/null | md5sum - print( - "{} {}".format( - md5sum(sys.argv[0], first_block_only=True, blocksize=512), sys.argv[0] - ) - ) From e7efb66d4a76ec32f90c19ddae89bc11d2a61439 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 13 Aug 2024 14:38:26 -0400 Subject: [PATCH 02/10] refactor: use pathlib for xavier_base() --- src/xavier/util.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/xavier/util.py b/src/xavier/util.py index 2c13af8..ba0c925 100644 --- a/src/xavier/util.py +++ b/src/xavier/util.py @@ -17,10 +17,8 @@ def xavier_base(*paths): """Get the absolute path to a file in the repository @return abs_path """ - basedir = os.path.dirname( - os.path.dirname(os.path.dirname(os.path.realpath(__file__))) - ) - return os.path.join(basedir, *paths) + basedir = pathlib.Path(__file__).absolute().parent.parent.parent + return basedir.joinpath(*paths) def get_version(): From 2c8e1b9ad0024e8db3c364f8ba0f39d69aa991ce Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 13 Aug 2024 14:59:20 -0400 Subject: [PATCH 03/10] refactor: update tests and cache functions from ccbr_tools --- src/xavier/gui.py | 14 +++++++------ src/xavier/run.py | 53 ----------------------------------------------- tests/test_cli.py | 2 +- tests/test_run.py | 23 ++++++++++++-------- 4 files changed, 23 insertions(+), 69 deletions(-) diff --git a/src/xavier/gui.py b/src/xavier/gui.py index fe3ca56..588edcf 100644 --- a/src/xavier/gui.py +++ b/src/xavier/gui.py @@ -5,15 +5,17 @@ import glob import PySimpleGUI as sg -from .util import ( +from ccbr_tools.pipeline.util import ( get_genomes_dict, get_tmp_dir, get_hpcname, check_python_version, ) +from ccbr_tools.pipeline.cache import get_sif_cache_dir +from ccbr_tools.shell import exec_in_context + from .util import xavier_base, get_version -from .run import run_in_context -from .cache import get_sif_cache_dir +from .run import run def launch_gui(DEBUG=True): @@ -298,9 +300,9 @@ def launch_gui(DEBUG=True): tmp_dir=get_tmp_dir(None, output_dir), threads=2, ) - allout_init = run_in_context(run_args) + allout_init = exec_in_context(run, run_args) run_args.runmode = "dryrun" - allout_dryrun = run_in_context(run_args) + allout_dryrun = exec_in_context(run, run_args) allout = "\n".join([allout_init, allout_dryrun]) if DEBUG: print(allout) @@ -314,7 +316,7 @@ def launch_gui(DEBUG=True): ) if ch == "Yes": run_args.runmode = "run" - allout = run_in_context(run_args) + allout = exec_in_context(run, run_args) sg.popup_scrolled( allout, title="Slurmrun:STDOUT/STDERR", diff --git a/src/xavier/run.py b/src/xavier/run.py index c7b0bee..b464586 100644 --- a/src/xavier/run.py +++ b/src/xavier/run.py @@ -629,49 +629,6 @@ def add_rawdata_information(sub_args, config, ifiles): return config -def image_cache(sub_args, config, repo_path): - """Adds Docker Image URIs, or SIF paths to config if singularity cache option is provided. - If singularity cache option is provided and a local SIF does not exist, a warning is - displayed and the image will be pulled from URI in 'config/containers/images.json'. - @param sub_args : - Parsed arguments for run sub-command - @params config : - Docker Image config file - @param repo_path : - Path to RNA-seek source code and its templates - @return config : - Updated config dictionary containing user information (username and home directory) - """ - images = os.path.join(repo_path, "config", "containers", "images.json") - - # Read in config for docker image uris - with open(images, "r") as fh: - data = json.load(fh) - # Check if local sif exists - for image, uri in data["images"].items(): - if sub_args.sif_cache: - sif = os.path.join( - sub_args.sif_cache, - "{}.sif".format(os.path.basename(uri).replace(":", "_")), - ) - if not exists(sif): - # If local sif does not exist on in cache, print warning - # and default to pulling from URI in config/containers/images.json - print( - 'Warning: Local image "{}" does not exist in singularity cache'.format( - sif - ), - file=sys.stderr, - ) - else: - # Change pointer to image from Registry URI to local SIF - data["images"][image] = sif - - config.update(data) - - return config - - def get_nends(ifiles): """Determines whether the dataset is paired-end or single-end. If paired-end data, checks to see if both mates (R1 and R2) are present for each sample. @@ -968,13 +925,3 @@ def runner( ) return masterjob - - -def run_in_context(args): - """Execute the run function in a context manager to capture stdout/stderr""" - with contextlib.redirect_stdout(io.StringIO()) as out_f, contextlib.redirect_stderr( - io.StringIO() - ) as err_f: - run(args) - allout = out_f.getvalue() + "\n" + err_f.getvalue() - return allout diff --git a/tests/test_cli.py b/tests/test_cli.py index 878b6dc..8b3ad74 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,7 +3,7 @@ import subprocess import tempfile from xavier.src.xavier.__main__ import main -from xavier.src.xavier.util import get_hpcname +from ccbr_tools.pipeline.util import get_hpcname xavier_run = ( "xavier run " diff --git a/tests/test_run.py b/tests/test_run.py index 8fd28d6..91ca785 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -2,10 +2,12 @@ import glob import os import tempfile +from ccbr_tools.pipeline.util import get_tmp_dir, get_hpcname +from ccbr_tools.pipeline.cache import get_sif_cache_dir +from ccbr_tools.shell import exec_in_context -from xavier.src.xavier.util import get_tmp_dir, xavier_base, get_hpcname -from xavier.src.xavier.cache import get_sif_cache_dir -from xavier.src.xavier.run import run, run_in_context +from xavier.src.xavier.util import xavier_base +from xavier.src.xavier.run import run def test_dryrun(): @@ -32,11 +34,14 @@ def test_dryrun(): threads=2, ) # init - allout_1 = run_in_context(run_args) + allout_1 = exec_in_context(run, run_args) run_args.runmode = "dryrun" # dryrun - allout_2 = run_in_context(run_args) - assert (all([ - "--Initializing" in allout_1, - "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." in allout_2 - ])) + allout_2 = exec_in_context(run, run_args) + assert all( + [ + "--Initializing" in allout_1, + "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." + in allout_2, + ] + ) From 6c98ef106401c17c742df90977f91516d8981da1 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 13 Aug 2024 15:04:02 -0400 Subject: [PATCH 04/10] fix: remove __future__ imports --- src/xavier/__main__.py | 1 - src/xavier/options.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/xavier/__main__.py b/src/xavier/__main__.py index 1ddbebc..ceb5221 100755 --- a/src/xavier/__main__.py +++ b/src/xavier/__main__.py @@ -36,7 +36,6 @@ """ # Python standard library -from __future__ import print_function import sys, os, subprocess, re, json, textwrap diff --git a/src/xavier/options.py b/src/xavier/options.py index 069df95..5f73c0f 100644 --- a/src/xavier/options.py +++ b/src/xavier/options.py @@ -2,7 +2,6 @@ # -*- coding: UTF-8 -*- import os -from __future__ import print_function from ccbr_tools.pipeline.util import permissions From 6aad53c2e796f95eb4ff58b10235470e1cfe5268 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 13 Aug 2024 15:07:44 -0400 Subject: [PATCH 05/10] fix: imports --- src/xavier/__main__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xavier/__main__.py b/src/xavier/__main__.py index ceb5221..8442c2b 100755 --- a/src/xavier/__main__.py +++ b/src/xavier/__main__.py @@ -41,12 +41,13 @@ # 3rd party imports from pypi import argparse # potential python3 3rd party package, added in python/3.5 +from ccbr_tools.pipeline.util import err, exists, fatal, permissions, require +from ccbr_tools.pipeline.cache import check_cache # Local imports from .run import init, setup, bind, dryrun, runner, run from .shells import bash from .options import genome_options -from .util import err, exists, fatal, permissions, check_cache, require from .gui import launch_gui from .util import xavier_base, get_version From fc4efb7b66fc31bad39da58db0cc2f2508ada986 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 13 Aug 2024 15:10:15 -0400 Subject: [PATCH 06/10] fix: import pathlib --- src/xavier/util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/xavier/util.py b/src/xavier/util.py index ba0c925..6bdcbe9 100644 --- a/src/xavier/util.py +++ b/src/xavier/util.py @@ -10,6 +10,7 @@ import json import glob import os +import pathlib import warnings From 9080d0030000dcc557e30558963cdede897112a4 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 13 Aug 2024 15:36:44 -0400 Subject: [PATCH 07/10] test: fix xavier_base test --- tests/test_util.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/test_util.py b/tests/test_util.py index 4cccd51..ad5df41 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,10 +1,7 @@ import os import warnings -from xavier.src.xavier.util import ( - xavier_base -) +from xavier.src.xavier.util import xavier_base + def test_xavier_base(): - test_base = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) - xavier_base() - assert xavier_base("a","b","c").endswith('/a/b/c') + assert str(xavier_base("a", "b", "c")).endswith("/a/b/c") From 1883ede090a81a31dcc985cbf6fd8cbade11a5f4 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 13 Aug 2024 17:01:50 -0400 Subject: [PATCH 08/10] fix: make sure snakemake/7 is loaded --- bin/redirect | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bin/redirect b/bin/redirect index 99a1086..7b36ca3 100755 --- a/bin/redirect +++ b/bin/redirect @@ -56,13 +56,11 @@ fi # - snakemake # are in PATH if [[ $ISBIOWULF == true ]];then - # module purge load_module_if_needed singularity - load_module_if_needed snakemake + load_module_if_needed snakemake/7 elif [[ $ISFRCE == true ]];then # snakemake module on FRCE does not work as expected # use the conda installed version of snakemake instead - # module purge load_module_if_needed load singularity export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH" fi From 44ff43b10d900fdd66550d7465c0ba70c733c11d Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 13 Aug 2024 17:11:07 -0400 Subject: [PATCH 09/10] fix: use image_cache from ccbr_utils --- src/xavier/run.py | 5 +++-- src/xavier/util.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/xavier/run.py b/src/xavier/run.py index b464586..8f9fbbf 100644 --- a/src/xavier/run.py +++ b/src/xavier/run.py @@ -21,6 +21,7 @@ require, get_hpcname, ) +from ccbr_tools.pipeline.cache import image_cache # Local imports from .util import get_version, xavier_base @@ -37,7 +38,7 @@ def run(sub_args): # Step 0. Check for required dependencies # The pipelines has only two requirements: # snakemake and singularity - require(["snakemake", "singularity"], ["snakemake", "singularity"]) + require(["snakemake", "singularity"], ["snakemake/7", "singularity"]) # Optional Step. Initialize working directory, # copy over required resources to run @@ -359,7 +360,7 @@ def setup(sub_args, repo_path, output_path, create_nidap_folder_YN="no", links=[ # Resolves if an image needs to be pulled from an OCI registry or # a local SIF generated from the rna-seek cache subcommand exists - config = image_cache(sub_args, config, repo_path) + config = image_cache(sub_args, config) # Add other cli collected info config["project"]["annotation"] = sub_args.genome diff --git a/src/xavier/util.py b/src/xavier/util.py index 6bdcbe9..4f04bfa 100644 --- a/src/xavier/util.py +++ b/src/xavier/util.py @@ -19,7 +19,7 @@ def xavier_base(*paths): @return abs_path """ basedir = pathlib.Path(__file__).absolute().parent.parent.parent - return basedir.joinpath(*paths) + return str(basedir.joinpath(*paths)) def get_version(): From af637a70bc1dceb64a696bc6cd6806a533786972 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 13 Aug 2024 17:11:33 -0400 Subject: [PATCH 10/10] test: use shell_run() to simplify tests --- tests/test_cli.py | 21 +++++++-------------- tests/test_run.py | 2 +- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 8b3ad74..c5a642e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,6 +4,7 @@ import tempfile from xavier.src.xavier.__main__ import main from ccbr_tools.pipeline.util import get_hpcname +from ccbr_tools.shell import exec_in_context, shell_run xavier_run = ( "xavier run " @@ -17,11 +18,8 @@ def run_in_temp(command_str): with tempfile.TemporaryDirectory() as tmp_dir: outdir = os.path.join(tmp_dir, "testout") run_command = f"{command_str} --output {outdir}" - output = subprocess.run( + output = shell_run( f"{run_command} --runmode init && {run_command} --runmode dryrun", - capture_output=True, - shell=True, - text=True, ) if os.path.exists(os.path.join(outdir, "config.json")): with open(os.path.join(outdir, "config.json"), "r") as infile: @@ -32,12 +30,7 @@ def run_in_temp(command_str): def test_help(): - assert ( - "XAVIER" - in subprocess.run( - "./bin/xavier --help", capture_output=True, shell=True, text=True - ).stdout - ) + assert "XAVIER" in shell_run("./bin/xavier --help") def test_dryrun_targets(): @@ -53,13 +46,13 @@ def test_dryrun_targets(): assert all( [ "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." - in output_human.stdout, + in output_human, "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." - in output_mouse.stdout, + in output_mouse, "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." - in output_custom.stdout, + in output_custom, "error: Path 'not/a/file.txt' does not exists! Failed to provide valid input." - in output_invalid.stderr, + in output_invalid, config_human["input_params"]["EXOME_TARGETS"].endswith( "resources/Agilent_SSv7_allExons_hg38.bed" ), diff --git a/tests/test_run.py b/tests/test_run.py index 91ca785..c705137 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -15,7 +15,7 @@ def test_dryrun(): with tempfile.TemporaryDirectory() as tmp_dir: run_args = argparse.Namespace( runmode="init", - input=list(glob.glob(xavier_base(".tests/*.fastq.gz"))), + input=list(glob.glob(f"{xavier_base('.tests')}/*.fastq.gz")), output=tmp_dir, genome="hg38", targets=xavier_base("resources/Agilent_SSv7_allExons_hg38.bed"),