From 9a6e275196bdb112e0d6f6ac4cdfbacc43db351f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 22 Feb 2024 16:02:53 +0100 Subject: [PATCH 1/6] Fixed #41 --- bedboss/bedboss.py | 14 ++++--- bedboss/bedmaker/bedmaker.py | 1 - bedboss/bedqc/bedqc.py | 2 - bedboss/bedstat/bedstat.py | 1 - bedboss/cli.py | 81 +++++++++++++----------------------- bedboss/models.py | 59 ++++++++++++++++++++++++++ test/test_bedboss.py | 3 +- 7 files changed, 96 insertions(+), 65 deletions(-) diff --git a/bedboss/bedboss.py b/bedboss/bedboss.py index c7033ea..3a0884b 100644 --- a/bedboss/bedboss.py +++ b/bedboss/bedboss.py @@ -28,7 +28,7 @@ BEDSTAT_OUTPUT, BED_PEP_REGISTRY, ) -from bedboss.models import BedMetadata +from bedboss.models import BedMetadata, BedStatCLIModel, BedMakerCLIModel, BedQCCLIModel from bedboss.utils import ( extract_file_name, standardize_genome_name, @@ -310,6 +310,7 @@ def insert_pep( force_overwrite: bool = False, upload_s3: bool = False, upload_pephub: bool = False, + upload_qdrant: bool = False, pm: pypiper.PipelineManager = None, *args, **kwargs, @@ -327,11 +328,12 @@ def insert_pep( :param bool check_qc: whether to run quality control during badmaking :param bool standardize: "Standardize bed files: remove non-standard chromosomes and headers if necessary Default: False" :param str ensdb: a full path to the ensdb gtf file required for genomes not in GDdata - :param bool just_db_commit: whether just to commit the JSON to the database - :param bool no_db_commit: whether the JSON commit to the database should be skipped + :param bool just_db_commit: whether save only to the database (Without saving locally ) + :param bool db_commit: whether to upload data to the database :param bool force_overwrite: whether to overwrite the existing record :param bool upload_s3: whether to upload to s3 :param bool upload_pephub: whether to push bedfiles and metadata to pephub (default: False) + :param bool upload_qdrant: whether to execute qdrant indexing :param pypiper.PipelineManager pm: pypiper object :return: None """ @@ -436,11 +438,11 @@ def main(test_args: dict = None) -> NoReturn: elif args_dict["command"] == "insert": insert_pep(pm=pm, **args_dict) elif args_dict["command"] == "make": - make_all(pm=pm, **args_dict) + make_all(**BedMakerCLIModel(pm=pm, **args_dict).model_dump()) elif args_dict["command"] == "qc": - bedqc(pm=pm, **args_dict) + bedqc(**BedQCCLIModel(pm=pm, **args_dict).model_dump()) elif args_dict["command"] == "stat": - bedstat(pm=pm, **args_dict) + bedstat(**BedStatCLIModel(pm=pm, **args_dict).model_dump()) elif args_dict["command"] == "bunch": run_bedbuncher(pm=pm, **args_dict) elif args_dict["command"] == "index": diff --git a/bedboss/bedmaker/bedmaker.py b/bedboss/bedmaker/bedmaker.py index d16c77d..fa81392 100755 --- a/bedboss/bedmaker/bedmaker.py +++ b/bedboss/bedmaker/bedmaker.py @@ -61,7 +61,6 @@ def __init__( standardize: bool = False, check_qc: bool = True, pm: pypiper.PipelineManager = None, - **kwargs, ): """ Pypiper pipeline to convert supported file formats into diff --git a/bedboss/bedqc/bedqc.py b/bedboss/bedqc/bedqc.py index 233bf31..0ba791b 100755 --- a/bedboss/bedqc/bedqc.py +++ b/bedboss/bedqc/bedqc.py @@ -18,7 +18,6 @@ def bedqc( max_region_number: int = MAX_REGION_NUMBER, min_region_width: int = MIN_REGION_WIDTH, pm: pypiper.PipelineManager = None, - **kwargs, ) -> bool: """ Perform quality checks on a BED file. @@ -32,7 +31,6 @@ def bedqc( :return: True if the file passes the quality check. """ _LOGGER.info("Running bedqc...") - _LOGGER.warning(f"Unused arguments: {kwargs}") output_file = os.path.join(outfolder, "failed_qc.csv") bedfile_name = os.path.basename(bedfile) diff --git a/bedboss/bedstat/bedstat.py b/bedboss/bedstat/bedstat.py index 265e215..ba8d74d 100755 --- a/bedboss/bedstat/bedstat.py +++ b/bedboss/bedstat/bedstat.py @@ -74,7 +74,6 @@ def bedstat( open_signal_matrix: str = None, just_db_commit: bool = False, pm: pypiper.PipelineManager = None, - **kwargs, ) -> dict: """ Run bedstat pipeline - pipeline for obtaining statistics about bed files diff --git a/bedboss/cli.py b/bedboss/cli.py index b9a54d0..821568f 100644 --- a/bedboss/cli.py +++ b/bedboss/cli.py @@ -153,18 +153,19 @@ def build_argparser() -> ArgumentParser: ) sub_all.add_argument( "--no-db-commit", - action="store_true", - help="skip the JSON commit to the database", + dest="db_commit", + action="store_false", + help="skip the JSON commit to the database [Default: False]", ) sub_all.add_argument( "--just-db-commit", action="store_true", - help="just commit the JSON to the database", + help="Do not save the results locally", ) sub_all.add_argument( - "--skip-qdrant", - action="store_true", - help="whether to skip qdrant indexing", + "--upload_qdrant", + action="store_false", + help="whether to execute qdrant indexing", ) sub_all.add_argument( "--upload-pephub", @@ -217,9 +218,9 @@ def build_argparser() -> ArgumentParser: action="store_true", ) sub_all_pep.add_argument( - "--skip-qdrant", - action="store_true", - help="whether to skip qdrant indexing", + "--upload_qdrant", + action="store_false", + help="whether to execute qdrant indexing", ) sub_all_pep.add_argument( "--ensdb", @@ -230,8 +231,9 @@ def build_argparser() -> ArgumentParser: ) sub_all_pep.add_argument( "--no-db-commit", - action="store_true", - help="skip the JSON commit to the database", + dest="db_commit", + action="store_false", + help="skip the JSON commit to the database [Default: False]", ) sub_all_pep.add_argument( "--just-db-commit", @@ -347,6 +349,14 @@ def build_argparser() -> ArgumentParser: sub_stat.add_argument( "--bedfile", help="a full path to bed file to process [Required]", required=True ) + sub_stat.add_argument( + "--genome", + dest="genome", + type=str, + required=True, + help="genome assembly of the sample [Required]", + ) + sub_stat.add_argument( "--outfolder", required=True, @@ -354,62 +364,27 @@ def build_argparser() -> ArgumentParser: type=str, ) sub_stat.add_argument( - "--open-signal-matrix", + "--bigbed", type=str, required=False, default=None, - help="a full path to the openSignalMatrix required for the tissue " - "specificity plots", + help="a full path to the bigbed files", ) - sub_stat.add_argument( - "--ensdb", + "--open-signal-matrix", type=str, required=False, default=None, - help="a full path to the ensdb gtf file required for genomes not in GDdata ", + help="a full path to the openSignalMatrix required for the tissue " + "specificity plots", ) sub_stat.add_argument( - "--bigbed", + "--ensdb", type=str, required=False, default=None, - help="a full path to the bigbed files", - ) - - sub_stat.add_argument( - "--bedbase-config", - dest="bedbase_config", - type=str, - required=True, - help="a path to the bedbase configuration file [Required]", - ) - sub_stat.add_argument( - "-y", - "--sample-yaml", - dest="sample_yaml", - type=str, - required=False, - help="a yaml config file with sample attributes to pass on more metadata " - "into the database", - ) - sub_stat.add_argument( - "--genome", - dest="genome", - type=str, - required=True, - help="genome assembly of the sample [Required]", - ) - sub_stat.add_argument( - "--no-db-commit", - action="store_true", - help="whether the JSON commit to the database should be skipped", - ) - sub_stat.add_argument( - "--just-db-commit", - action="store_true", - help="whether just to commit the JSON to the database", + help="a full path to the ensdb gtf file required for genomes not in GDdata ", ) sub_bunch.add_argument( diff --git a/bedboss/models.py b/bedboss/models.py index eba5407..7ae01b7 100644 --- a/bedboss/models.py +++ b/bedboss/models.py @@ -1,6 +1,11 @@ from pydantic import BaseModel, ConfigDict, Field from enum import Enum +import pypiper +import pathlib +from typing import Union + +from bedboss.const import MAX_FILE_SIZE, MAX_REGION_NUMBER, MIN_REGION_WIDTH class FILE_TYPE(str, Enum): @@ -41,3 +46,57 @@ class BedMetadata(BaseModel): populate_by_name=True, extra="allow", ) + + +class BedStatCLIModel(BaseModel): + """ + CLI model for bedstat + """ + + bedfile: Union[str, pathlib.Path] + genome: str + outfolder: Union[str, pathlib.Path] + bed_digest: str = None + bigbed: Union[str, pathlib.Path] = None + ensdb: str = None + open_signal_matrix: str = None + just_db_commit: bool = False + pm: pypiper.PipelineManager = None + + model_config = ConfigDict(extra="ignore", arbitrary_types_allowed=True) + + +class BedQCCLIModel(BaseModel): + """ + CLI model for bedqc + """ + + bedfile: Union[str, pathlib.Path] + outfolder: Union[str, pathlib.Path] + max_file_size: int = MAX_FILE_SIZE + max_region_number: int = MAX_REGION_NUMBER + min_region_width: int = MIN_REGION_WIDTH + pm: pypiper.PipelineManager = None + + model_config = ConfigDict(extra="ignore", arbitrary_types_allowed=True) + + +class BedMakerCLIModel(BaseModel): + """ + CLI model for bedmaker + """ + + input_file: Union[str, pathlib.Path] + input_type: str + output_bed: Union[str, pathlib.Path] + output_bigbed: Union[str, pathlib.Path] + sample_name: str + genome: str + rfg_config: Union[str, pathlib.Path] = None + chrom_sizes: str = None + narrowpeak: bool = False + standardize: bool = False + check_qc: bool = True + pm: pypiper.PipelineManager = None + + model_config = ConfigDict(extra="ignore", arbitrary_types_allowed=True) diff --git a/test/test_bedboss.py b/test/test_bedboss.py index 25b2879..60a1c33 100644 --- a/test/test_bedboss.py +++ b/test/test_bedboss.py @@ -60,7 +60,7 @@ def test_qc(bedfile, tmpdir): { "command": "qc", "bedfile": bedfile, - "outfolder": tmpdir, + "outfolder": str(tmpdir), "multy": True, } ) @@ -193,7 +193,6 @@ def test_boss(self, input_file, genome, input_type, output_temp_dir): "bedbase_config": BEDBASE_CONFIG, "no_db_commit": True, "outfolder": output_temp_dir, - "skip_qdrant": True, "multy": True, } ) From d391a1253fbd2a75bd4b1f155750cbfa6bbfbfbc Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 22 Feb 2024 16:16:30 +0100 Subject: [PATCH 2/6] Fixed #42 --- bedboss/bedqc/bedqc.py | 1 + bedboss/exceptions.py | 1 + 2 files changed, 2 insertions(+) diff --git a/bedboss/bedqc/bedqc.py b/bedboss/bedqc/bedqc.py index 0ba791b..d71f44d 100755 --- a/bedboss/bedqc/bedqc.py +++ b/bedboss/bedqc/bedqc.py @@ -29,6 +29,7 @@ def bedqc( :param min_region_width: Minimum region width threshold to pass the quality check. :param pm: Pypiper object for managing pipeline operations. :return: True if the file passes the quality check. + :raises QualityException: if the file does not pass the quality """ _LOGGER.info("Running bedqc...") diff --git a/bedboss/exceptions.py b/bedboss/exceptions.py index afd6f03..2aea22b 100644 --- a/bedboss/exceptions.py +++ b/bedboss/exceptions.py @@ -33,6 +33,7 @@ def __init__(self, reason: str = ""): :param str reason: reason why quality control wasn't successful """ + self.reason = reason super(QualityException, self).__init__(reason) From 8c56f33542367db1b739e24fd47f68e3bdd28482 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 22 Feb 2024 19:31:31 +0100 Subject: [PATCH 3/6] Fixed #32 --- bedboss/bedboss.py | 68 ++++++++++++++++++-------------- bedboss/bedbuncher/bedbuncher.py | 34 +++++++++++----- bedboss/bedmaker/bedmaker.py | 17 +++++--- 3 files changed, 74 insertions(+), 45 deletions(-) diff --git a/bedboss/bedboss.py b/bedboss/bedboss.py index 3a0884b..3b7df28 100644 --- a/bedboss/bedboss.py +++ b/bedboss/bedboss.py @@ -301,7 +301,6 @@ def insert_pep( pep: Union[str, peppy.Project], rfg_config: str = None, create_bedset: bool = True, - skip_qdrant: bool = True, check_qc: bool = True, standardize: bool = False, ensdb: str = None, @@ -324,7 +323,7 @@ def insert_pep( :param Union[str, peppy.Project] pep: path to the pep file or pephub registry path :param str rfg_config: path to the genome config file (refgenie) :param bool create_bedset: whether to create bedset - :param bool skip_qdrant: whether to skip qdrant indexing + :param bool upload_qdrant: whether to upload bedfiles to qdrant :param bool check_qc: whether to run quality control during badmaking :param bool standardize: "Standardize bed files: remove non-standard chromosomes and headers if necessary Default: False" :param str ensdb: a full path to the ensdb gtf file required for genomes not in GDdata @@ -338,6 +337,8 @@ def insert_pep( :return: None """ + _LOGGER.warning(f"!Unused arguments: {kwargs}") + failed_samples = [] pephub_registry_path = None if isinstance(pep, peppy.Project): pass @@ -356,36 +357,41 @@ def insert_pep( for i, pep_sample in enumerate(pep.samples): _LOGGER.info(f"Running bedboss pipeline for {pep_sample.sample_name}") - - if pep_sample.get("file_type").lower() == "narrowpeak": - is_narrow_peak = True + if pep_sample.get("file_type"): + if pep_sample.get("file_type").lower() == "narrowpeak": + is_narrow_peak = True + else: + is_narrow_peak = False else: is_narrow_peak = False - - bed_id = run_all( - sample_name=pep_sample.sample_name, - input_file=pep_sample.input_file, - input_type=pep_sample.input_type, - genome=pep_sample.genome, - narrowpeak=is_narrow_peak, - chrom_sizes=pep_sample.get("chrom_sizes"), - open_signal_matrix=pep_sample.get("open_signal_matrix"), - other_metadata=pep_sample.to_dict(), - outfolder=output_folder, - bedbase_config=bbc, - rfg_config=rfg_config, - check_qc=check_qc, - standardize=standardize, - ensdb=ensdb, - just_db_commit=just_db_commit, - no_db_commit=no_db_commit, - force_overwrite=force_overwrite, - skip_qdrant=skip_qdrant, - upload_s3=upload_s3, - upload_pephub=upload_pephub, - pm=pm, - ) - pep.samples[i].record_identifier = bed_id + try: + bed_id = run_all( + sample_name=pep_sample.sample_name, + input_file=pep_sample.input_file, + input_type=pep_sample.input_type, + genome=pep_sample.genome, + narrowpeak=is_narrow_peak, + chrom_sizes=pep_sample.get("chrom_sizes"), + open_signal_matrix=pep_sample.get("open_signal_matrix"), + other_metadata=pep_sample.to_dict(), + outfolder=output_folder, + bedbase_config=bbc, + rfg_config=rfg_config, + check_qc=check_qc, + standardize=standardize, + ensdb=ensdb, + just_db_commit=just_db_commit, + no_db_commit=no_db_commit, + force_overwrite=force_overwrite, + upload_qdrant=upload_qdrant, + upload_s3=upload_s3, + upload_pephub=upload_pephub, + pm=pm, + ) + pep.samples[i].record_identifier = bed_id + except BedBossException as e: + _LOGGER.error(f"Failed to process {pep_sample.sample_name}. See {e}") + failed_samples.append(pep_sample.sample_name) else: _LOGGER.info("Skipping uploading to s3. Flag `upload_s3` is set to False") @@ -396,11 +402,13 @@ def insert_pep( bedbase_config=bbc, bedset_pep=pep, pephub_registry_path=pephub_registry_path, + upload_pephub=upload_pephub, ) else: _LOGGER.info( f"Skipping bedset creation. Create_bedset is set to {create_bedset}" ) + _LOGGER.info(f"Failed samples: {failed_samples}") def main(test_args: dict = None) -> NoReturn: diff --git a/bedboss/bedbuncher/bedbuncher.py b/bedboss/bedbuncher/bedbuncher.py index 01efd64..bee498e 100644 --- a/bedboss/bedbuncher/bedbuncher.py +++ b/bedboss/bedbuncher/bedbuncher.py @@ -12,11 +12,12 @@ import pephubclient from pephubclient.helpers import is_registry_path import logging +from ubiquerg import parse_registry_path from bedboss.const import ( DEFAULT_BEDBASE_API_URL, DEFAULT_BEDBASE_CACHE_PATH, - OUTPUT_FOLDER_NAME, + BED_PEP_REGISTRY, ) @@ -37,11 +38,14 @@ def create_bedset_from_pep( _LOGGER.info("Creating bedset from pep.") new_bedset = BedSet() for bedfile_id in pep.samples: - bedfile_object = BBClient( - cache_folder=cache_folder, - bedbase_api=bedbase_api, - ).load_bed(bedfile_id.get("record_identifier") or bedfile_id.sample_name) - new_bedset.add(bedfile_object) + try: + bedfile_object = BBClient( + cache_folder=cache_folder, + bedbase_api=bedbase_api, + ).load_bed(bedfile_id.get("record_identifier") or bedfile_id.sample_name) + new_bedset.add(bedfile_object) + except Exception as err: + pass _LOGGER.info("Bedset was created successfully") return new_bedset @@ -231,6 +235,7 @@ def run_bedbuncher( bedbase_api: str = DEFAULT_BEDBASE_API_URL, cache_path: str = DEFAULT_BEDBASE_CACHE_PATH, heavy: bool = False, + upload_pephub: bool = False, *args, **kwargs, ) -> None: @@ -244,6 +249,7 @@ def run_bedbuncher( :param cache_path: path to the cache folder [DEFAULT: ./bedbase_cache] :param heavy: whether to use heavy processing (add all columns to the database). if False -> R-script won't be executed, only basic statistics will be calculated + :param upload_pephub: whether to upload bedset to pephub :return: None """ @@ -278,17 +284,27 @@ def run_bedbuncher( _LOGGER.warning( f"Description for bedset {bedset_name or pep_of_bed.get('name')} was not provided." ) - + record_id = bedset_name or pep_of_bed.name add_bedset_to_database( bbc, - record_id=bedset_name or pep_of_bed.name, + record_id=record_id, bed_set=bedset, bedset_name=bedset_name or pep_of_bed.name, genome=dict(pep_of_bed.config.get("genome", {})), description=pep_of_bed.description or "", - pephub_registry_path=pephub_registry_path, + # pephub_registry_path=pephub_registry_path, heavy=heavy, ) + if upload_pephub: + phc = pephubclient.PEPHubClient() + reg_path_obj = parse_registry_path(pephub_registry_path) + phc.view.create( + namespace=reg_path_obj["namespace"], + name=reg_path_obj["item"], + tag=reg_path_obj["tag"], + view_name=record_id, + sample_list=[sample.identifier for sample in bedset], + ) return None diff --git a/bedboss/bedmaker/bedmaker.py b/bedboss/bedmaker/bedmaker.py index fa81392..96e8ea2 100755 --- a/bedboss/bedmaker/bedmaker.py +++ b/bedboss/bedmaker/bedmaker.py @@ -185,11 +185,16 @@ def make(self) -> dict: # we need this exception to catch the case when the input file is not a bed file bed_type, bed_format = get_bed_type(self.output_bed) if self.check_qc: - bedqc( - self.output_bed, - outfolder=os.path.join(self.bed_parent, QC_FOLDER_NAME), - pm=self.pm, - ) + try: + bedqc( + self.output_bed, + outfolder=os.path.join(self.bed_parent, QC_FOLDER_NAME), + pm=self.pm, + ) + except Exception as e: + raise BedBossException( + f"Quality control failed for {self.output_bed}. Error: {e}" + ) self.make_bigbed(bed_type=bed_type) @@ -355,7 +360,7 @@ def copy_with_standardization(self): except (pd.errors.ParserError, pd.errors.EmptyDataError) as e: if row_count <= max_rows: row_count += 1 - if not df: + if not isinstance(df, pd.DataFrame): raise BedBossException( reason=f"Bed file is broken and could not be parsed due to CSV parse error." ) From c4c75ad0e6fabecd280f2a61ef0421b337a8d57d Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 22 Feb 2024 20:30:45 +0100 Subject: [PATCH 4/6] fixed bedsets --- bedboss/bedbuncher/bedbuncher.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/bedboss/bedbuncher/bedbuncher.py b/bedboss/bedbuncher/bedbuncher.py index bee498e..7d9fc64 100644 --- a/bedboss/bedbuncher/bedbuncher.py +++ b/bedboss/bedbuncher/bedbuncher.py @@ -285,19 +285,24 @@ def run_bedbuncher( f"Description for bedset {bedset_name or pep_of_bed.get('name')} was not provided." ) record_id = bedset_name or pep_of_bed.name - add_bedset_to_database( - bbc, - record_id=record_id, - bed_set=bedset, - bedset_name=bedset_name or pep_of_bed.name, - genome=dict(pep_of_bed.config.get("genome", {})), - description=pep_of_bed.description or "", - # pephub_registry_path=pephub_registry_path, - heavy=heavy, - ) + try: + add_bedset_to_database( + bbc, + record_id=record_id, + bed_set=bedset, + bedset_name=bedset_name or pep_of_bed.name, + genome=dict(pep_of_bed.config.get("genome", {})), + description=pep_of_bed.description or "", + # pephub_registry_path=pephub_registry_path, + heavy=heavy, + ) + except Exception as err: + pass if upload_pephub: phc = pephubclient.PEPHubClient() - reg_path_obj = parse_registry_path(pephub_registry_path) + reg_path_obj = parse_registry_path(BED_PEP_REGISTRY) + bed_ids = [sample.identifier for sample in bedset if sample.identifier is not None] + print(bed_ids) phc.view.create( namespace=reg_path_obj["namespace"], name=reg_path_obj["item"], From 6506df2b07abe47a0bdc9200958d324842aad904 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 22 Feb 2024 21:11:37 +0100 Subject: [PATCH 5/6] fixed bedsets 2 --- bedboss/bedbuncher/bedbuncher.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bedboss/bedbuncher/bedbuncher.py b/bedboss/bedbuncher/bedbuncher.py index 7d9fc64..7e737c0 100644 --- a/bedboss/bedbuncher/bedbuncher.py +++ b/bedboss/bedbuncher/bedbuncher.py @@ -301,14 +301,18 @@ def run_bedbuncher( if upload_pephub: phc = pephubclient.PEPHubClient() reg_path_obj = parse_registry_path(BED_PEP_REGISTRY) - bed_ids = [sample.identifier for sample in bedset if sample.identifier is not None] + bed_ids = [ + sample.record_identifier + for sample in pep_of_bed.samples + if sample.get("record_identifier") is not None + ] print(bed_ids) phc.view.create( namespace=reg_path_obj["namespace"], name=reg_path_obj["item"], tag=reg_path_obj["tag"], view_name=record_id, - sample_list=[sample.identifier for sample in bedset], + sample_list=bed_ids, ) return None From 96ca0a86219177db0991c40a79b6accc7537c2de Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 26 Feb 2024 21:54:24 +0100 Subject: [PATCH 6/6] added requirement test to cli --- bedboss/bedboss.py | 19 +++++++++++++++++-- bedboss/bedbuncher/bedbuncher.py | 10 +++++----- bedboss/cli.py | 4 ++++ .../requirements_test.sh | 0 docs/changelog.md | 2 +- test/test_bedboss.py | 18 +++++++++++------- 6 files changed, 38 insertions(+), 15 deletions(-) rename test/bash_requirements_test.sh => bedboss/requirements_test.sh (100%) diff --git a/bedboss/bedboss.py b/bedboss/bedboss.py index 3b7df28..b04ca67 100644 --- a/bedboss/bedboss.py +++ b/bedboss/bedboss.py @@ -8,6 +8,7 @@ import peppy from eido import validate_project import bbconf +import subprocess import pephubclient from pephubclient import PEPHubClient @@ -113,6 +114,18 @@ def load_to_s3( pm.run(cmd=command, lock_name="s3_sync_bedstat") +def requirements_check() -> None: + """ + Check if all requirements are installed + + :return: None + """ + _LOGGER.info("Checking requirements...") + subprocess.run( + ["bash", f"{os.path.dirname(os.path.abspath(__file__))}/requirements_test.sh"] + ) + + def run_all( sample_name: str, input_file: str, @@ -433,13 +446,13 @@ def main(test_args: dict = None) -> NoReturn: or "test_outfolder", ) pm_out_folder = os.path.join(os.path.abspath(pm_out_folder[0]), "pipeline_manager") - pm = pypiper.PipelineManager( name="bedboss-pipeline", outfolder=pm_out_folder, version=__version__, - args=args, + # args=args, multi=args_dict.get("multy", False), + recover=True, ) if args_dict["command"] == "all": run_all(pm=pm, **args_dict) @@ -455,6 +468,8 @@ def main(test_args: dict = None) -> NoReturn: run_bedbuncher(pm=pm, **args_dict) elif args_dict["command"] == "index": add_to_qdrant(pm=pm, **args_dict) + elif args_dict["command"] == "requirements-check": + requirements_check() else: parser.print_help() # raise Exception("Incorrect pipeline name.") diff --git a/bedboss/bedbuncher/bedbuncher.py b/bedboss/bedbuncher/bedbuncher.py index 7e737c0..ec8932b 100644 --- a/bedboss/bedbuncher/bedbuncher.py +++ b/bedboss/bedbuncher/bedbuncher.py @@ -317,8 +317,8 @@ def run_bedbuncher( return None -if __name__ == "__main__": - run_bedbuncher( - "/media/alex/Extreme SSD/databio/repos/bedbase_all/bedhost/bedbase_configuration_compose.yaml", - "databio/excluderanges:id3", - ) +# if __name__ == "__main__": +# run_bedbuncher( +# "/media/alex/Extreme SSD/databio/repos/bedbase_all/bedhost/bedbase_configuration_compose.yaml", +# "databio/excluderanges:id3", +# ) diff --git a/bedboss/cli.py b/bedboss/cli.py index 821568f..9e0dce2 100644 --- a/bedboss/cli.py +++ b/bedboss/cli.py @@ -51,6 +51,10 @@ def build_argparser() -> ArgumentParser: "index", help="Index not indexed bed files and add them to the qdrant database " ) + subparser.add_parser( + "requirements-check", help="Check if all requirements are installed" + ) + sub_all.add_argument( "--outfolder", required=True, diff --git a/test/bash_requirements_test.sh b/bedboss/requirements_test.sh similarity index 100% rename from test/bash_requirements_test.sh rename to bedboss/requirements_test.sh diff --git a/docs/changelog.md b/docs/changelog.md index 5026ad7..e874224 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.1.0a1] - 2023-08-02 +## [0.1.0] - 2024-01-26 ### Added - Initial alpha release diff --git a/test/test_bedboss.py b/test/test_bedboss.py index 60a1c33..20e70a1 100644 --- a/test/test_bedboss.py +++ b/test/test_bedboss.py @@ -1,4 +1,5 @@ from bedboss.bedboss import main +import bedboss import os import warnings import subprocess @@ -13,7 +14,9 @@ ) BEDBASE_CONFIG = os.path.join(FILE_DIR, "test_dependencies", "bedbase_config_test.yaml") -DEPENDENCIES_TEST_SCRIPT = f"{FILE_DIR}/bash_requirements_test.sh" +DEPENDENCIES_TEST_SCRIPT = ( + f"{os.path.dirname(os.path.abspath(bedboss.__file__))}/requirements_test.sh" +) pytest_db_skip_reason = "Database is not set up... To run this test, set up the database. Go to test/README.md for more information." @@ -23,16 +26,17 @@ def check_dependencies_installed() -> bool: print("Testing dependencies...") # key = "PATH" # value = os.getenv(key) - test_dep_return_code = subprocess.run([DEPENDENCIES_TEST_SCRIPT], shell=True) - if not (1 > test_dep_return_code.returncode): + test_dep_return_code = subprocess.run(["bash", DEPENDENCIES_TEST_SCRIPT]) + if test_dep_return_code.returncode == 127: + raise Exception(f"test script '{DEPENDENCIES_TEST_SCRIPT}' doesn't exist.") + elif not (1 > test_dep_return_code.returncode): warnings.warn(UserWarning(f"{pytest_db_skip_reason}")) return False return True # return 1 > test_dep_return_code.returncode -# dependencies_installed = check_dependencies_installed() -dependencies_installed = True +dependencies_installed = check_dependencies_installed() def db_setup(): @@ -45,8 +49,8 @@ def db_setup(): return True -# def test_dependencies(): -# assert dependencies_installed +def test_dependencies(): + assert dependencies_installed @pytest.mark.parametrize(