diff --git a/.gitignore b/.gitignore index c5f7c23..19c66fc 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__/ # Distribution / packaging .Python +.ruff_cache/ build/ develop-eggs/ dist/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..20df14e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: + # Run the Ruff linter. + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.1.3 + hooks: + # Run the Ruff linter. + - id: ruff + # Run the Ruff formatter. + - id: ruff-format diff --git a/MANIFEST.in b/MANIFEST.in index 1c82bfe..5520e14 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,4 +5,6 @@ include bedboss/bedstat/* include bedboss/bedstat/tools/* include bedboss/bedmaker/* include bedboss/bedqc/* -include bedboss/qdrant_index/* \ No newline at end of file +include bedboss/qdrant_index/* +include bedboss/bedbuncher/* +include bedboss/bedbuncher/tools/* \ No newline at end of file diff --git a/README.md b/README.md index 8877f1b..ed62fe2 100644 --- a/README.md +++ b/README.md @@ -30,4 +30,4 @@ Calculates statistics about BED files. Detailed information about each pipeline can be found in the [bedboss Readme](./docs/README.md). -For the specific bedbase.org instance, see instructions in the bedbase.org repo. \ No newline at end of file +For the specific bedbase.org instance, see instructions in the bedbase.org repo. diff --git a/bedboss/__init__.py b/bedboss/__init__.py index ba8f9e0..57bf34b 100644 --- a/bedboss/__init__.py +++ b/bedboss/__init__.py @@ -2,13 +2,20 @@ import logmuse import coloredlogs -from bedboss import * +# from bedboss import * # from bedboss.bedqc.bedqc import bedqc # from bedboss.bedmaker.bedmaker import BedMaker # from bedboss.bedstat.bedstat import bedstat from bedboss._version import __version__ -from bedboss.bedboss import run_all, run_all_by_pep, bedqc, BedMaker, bedstat +from bedboss.bedboss import ( + run_all, + run_all_by_pep, + bedqc, + BedMaker, + bedstat, + run_bedbuncher, +) __package_name__ = "bedboss" @@ -33,6 +40,7 @@ "bedstat", "run_all", "run_all_by_pep", + "run_bedbuncher", ] _LOGGER = logmuse.init_logger("bedboss") diff --git a/bedboss/_version.py b/bedboss/_version.py index b0548b6..0a0820d 100644 --- a/bedboss/_version.py +++ b/bedboss/_version.py @@ -1 +1 @@ -__version__ = "0.1.0a4" +__version__ = "0.1.0a5" diff --git a/bedboss/bedboss.py b/bedboss/bedboss.py index ed0fb4e..5d1e124 100644 --- a/bedboss/bedboss.py +++ b/bedboss/bedboss.py @@ -1,8 +1,7 @@ import logging import os -from typing import NoReturn, Union, Dict +from typing import NoReturn, Union -import peppy import pypiper from argparse import Namespace import logmuse @@ -11,6 +10,7 @@ from bedboss.bedstat.bedstat import bedstat from bedboss.bedmaker.bedmaker import BedMaker from bedboss.bedqc.bedqc import bedqc +from bedboss.bedbuncher import run_bedbuncher from bedboss.qdrant_index import add_to_qdrant from bedboss.cli import build_argparser from bedboss.const import ( @@ -42,7 +42,7 @@ def get_osm_path(genome: str) -> Union[str, None]: :return: path to the Open Signal Matrix """ # TODO: add more osm - _LOGGER.info(f"Getting Open Signal Matrix file path...") + _LOGGER.info("Getting Open Signal Matrix file path...") if genome == "hg19" or genome == "GRCh37": osm_name = OS_HG19 elif genome == "hg38" or genome == "GRCh38": @@ -243,6 +243,8 @@ def main(test_args: dict = None) -> NoReturn: bedqc(pm=pm, **args_dict) elif args_dict["command"] == "stat": bedstat(pm=pm, **args_dict) + elif args_dict["command"] == "bunch": + run_bedbuncher(pm=pm, **args_dict) elif args_dict["command"] == "index": add_to_qdrant(pm=pm, **args_dict) else: diff --git a/bedboss/bedbuncher/__init__.py b/bedboss/bedbuncher/__init__.py new file mode 100644 index 0000000..e6ae136 --- /dev/null +++ b/bedboss/bedbuncher/__init__.py @@ -0,0 +1,3 @@ +from bedboss.bedbuncher.bedbuncher import run_bedbuncher + +__all__ = ["run_bedbuncher"] diff --git a/bedboss/bedbuncher/bedbuncher.py b/bedboss/bedbuncher/bedbuncher.py new file mode 100644 index 0000000..60e4925 --- /dev/null +++ b/bedboss/bedbuncher/bedbuncher.py @@ -0,0 +1,267 @@ +from geniml.io import BedSet +from bbconf import BedBaseConf +from bbconf.const import CFG_PATH_KEY, CFG_PATH_BEDBUNCHER_DIR_KEY +from geniml.bbclient import BBClient +from sqlmodel import select, func, Numeric, Float +import os +import json +import subprocess +import peppy +import pephubclient +from pephubclient.helpers import is_registry_path +import logging + +from bedboss.const import DEFAULT_BEDBASE_API_URL, DEFAULT_BEDBASE_CACHE_PATH + + +_LOGGER = logging.getLogger("bedboss") + + +def create_bedset_from_pep( + pep: peppy.Project, bedbase_api: str, cache_folder: str = DEFAULT_BEDBASE_CACHE_PATH +) -> BedSet: + """ + Create bedset from pep file, where sample_name is bed identifier + + :param pep: + :param bedbase_api: + :param cache_folder: + :return: + """ + new_bedset = BedSet() + for bedfile_id in pep.samples: + bedfile_object = BBClient( + cache_folder=cache_folder, + bedbase_api=bedbase_api, + ).load_bed(bedfile_id.sample_name) + new_bedset.add(bedfile_object) + return new_bedset + + +def calculate_bedset_statistics(bbc: BedBaseConf, bedset: BedSet) -> dict: + """ + Calculate mean and standard deviation for each numeric column of bedfiles in bedset + + :param bbc: BedBase configuration object + :param bedset: Bedset object + :return: dict with mean and standard deviation for each + {"sd": {"column_name": sd_value}, + "mean": {"column_name": mean_value}} + """ + + numeric_columns = [ + column + for column, value in bbc.bed.result_schemas.items() + if value["type"] == "number" + ] + list_of_samples = [sample.identifier for sample in bedset] + + results_dict = {"mean": {}, "sd": {}} + + for column_name in numeric_columns: + with bbc.bed.backend.session as s: + mean_bedset_statement = select( + func.round( + func.avg(getattr(bbc.BedfileORM, column_name)).cast(Numeric), 4 + ).cast(Float) + ).where(bbc.BedfileORM.record_identifier.in_(list_of_samples)) + sd_bedset_statement = select( + func.round( + func.stddev(getattr(bbc.BedfileORM, column_name)).cast(Numeric), 4 + ).cast(Float) + ).where(bbc.BedfileORM.record_identifier.in_(list_of_samples)) + + results_dict["mean"][column_name] = s.exec(mean_bedset_statement).one() + results_dict["sd"][column_name] = s.exec(sd_bedset_statement).one() + + return results_dict + + # # Another way to do it, but it's slower: + # results_dict = {} + # results = bbc.bed.retrieve(record_identifier=list_of_samples, result_identifier=int_col)["records"] + # for sample in results: + # for stat_value_dict in sample.values(): + # for key, value in stat_value_dict.items(): + # if key in results_dict: + # results_dict[key].append(value) + # else: + # results_dict[key] = [value] + + +def create_bed_list_file(bedset: BedSet, file_path: str) -> None: + """ + Create a file with bed_set_list (Later this file is used in R script) + + :param bedset: bed_set object + :param file_path: path to the file + :return: None + """ + list_of_samples = [sample.path for sample in bedset] + + with open(file_path, "w") as f: + for sample in list_of_samples: + f.write(sample + "\n") + + return None + + +def create_plots( + bbc: BedBaseConf, + bedset: BedSet, + bedset_name: str, +) -> dict: + """ + Create plots for a bedset (commonality region plot) + + :param bbc: BedBaseConf object + :param bedset: Bedset object + :param bedset_name: bed_set name + :return: dict with information about crated plots + """ + bedset_md5sum = bedset.bedset_identifier + + output_folder = os.path.abspath( + bbc.config[CFG_PATH_KEY][CFG_PATH_BEDBUNCHER_DIR_KEY] + ) + # if output folder doesn't exist create it + if not os.path.exists(output_folder): + os.makedirs(output_folder) + bedset_list_path = os.path.join(output_folder, f"{bedset_md5sum}_bedset.txt") + create_bed_list_file(bedset, bedset_list_path) + rscript_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "bedbuncher", + "tools", + "bedsetStat.R", + ) + assert os.path.exists(rscript_path), FileNotFoundError( + f"'{rscript_path}' script not found" + ) + + json_file_path = os.path.join(output_folder, bedset_md5sum + ".json") + command = ( + f"Rscript {rscript_path} --outputfolder={output_folder} " + f"--bedfilelist={bedset_list_path} --id={bedset_md5sum} " + f"--json={json_file_path}" + ) + + subprocess.run(command, shell=True) + + with open(json_file_path, "r", encoding="utf-8") as f: + bedset_summary_info = json.loads(f.read()) + + os.remove(bedset_list_path) + os.remove(json_file_path) + return bedset_summary_info["plots"][0] + + +def add_bedset_to_database( + bbc: BedBaseConf, + record_id: str, + bed_set: BedSet, + bedset_name: str, + genome: dict = None, + description: str = None, + heavy: bool = False, +) -> None: + """ + Add bedset to the database + + :param bbc: BedBaseConf object + :param record_id: record identifier to be used in database + :param bed_set: Bedset object + :param bedset_name: Bedset name + :param genome: genome of the bedset + :param description: Bedset description + :param heavy: whether to use heavy processing (add all columns to the database). + if False -> R-script won't be executed, only basic statistics will be calculated + :return: + """ + if not bedset_name: + raise ValueError( + "bedset_name was not provided correctly. Please provide it in pep name or as argument" + ) + + bed_set_stats = calculate_bedset_statistics(bbc, bed_set) + result_dict = { + "name": bedset_name, + "md5sum": bed_set.bedset_identifier, + "description": description, + "genome": genome, + "bedset_standard_deviation": bed_set_stats["sd"], + "bedset_means": bed_set_stats["mean"], + "processed": heavy, + } + + if heavy: + plot_value = create_plots(bbc, bedset=bed_set, bedset_name=record_id) + result_dict["region_commonality"] = plot_value + else: + _LOGGER.warning("Heavy processing is False. Plots won't be calculated") + + bbc.bedset.report( + record_identifier=record_id, + values=result_dict, + force_overwrite=True, + ) + for sample in bed_set: + bbc.report_relationship(record_id, sample.identifier) + + +def run_bedbuncher( + bedbase_config: str, + bedset_pep: str, + bedset_name: str = None, + bedbase_api: str = DEFAULT_BEDBASE_API_URL, + cache_path: str = DEFAULT_BEDBASE_CACHE_PATH, + heavy: bool = False, + *args, + **kwargs, +) -> None: + """ + Create bedset using file with a list of bedfiles + + :param bedbase_config: bed base configuration file path + :param bedset_name: name of the bedset, can be provided here or as pep name + :param bedset_pep: bedset pep path or pephub registry path containing bedset pep + :param bedbase_api: bedbase api url [DEFAULT: http://localhost:8000/api] + :param cache_path: path to the cache folder [DEFAULT: ./bedbase_cache] + :param heavy: whether to use heavy processing (add all columns to the database). + if False -> R-script won't be executed, only basic statistics will be calculated + :return: None + """ + + bbc = BedBaseConf(bedbase_config) + if is_registry_path(bedset_pep): + pep_of_bed = pephubclient.PEPHubClient().load_project(bedset_pep) + bedset_record_id = bedset_pep + else: + pep_of_bed = peppy.Project(bedset_pep) + bedset_record_id = os.path.basename(bedset_pep) + + bedset = create_bedset_from_pep( + pep=pep_of_bed, bedbase_api=bedbase_api, cache_folder=cache_path + ) + + if not pep_of_bed.config.get("genome"): + _LOGGER.warning( + f"Genome for bedset {bedset_name or pep_of_bed.get('name')} was not provided." + ) + if not pep_of_bed.get("description"): + _LOGGER.warning( + f"Description for bedset {bedset_name or pep_of_bed.get('name')} was not provided." + ) + + add_bedset_to_database( + bbc, + record_id=bedset_record_id, + bed_set=bedset, + bedset_name=bedset_name or pep_of_bed.get("name"), + genome=dict(pep_of_bed.config.get("genome", {})), + description=pep_of_bed.description or "", + heavy=heavy, + ) + _LOGGER.info( + f"bedset {bedset_name or pep_of_bed.get('name')} was added successfully to the database" + ) + return None diff --git a/bedboss/bedbuncher/tools/bedsetStat.R b/bedboss/bedbuncher/tools/bedsetStat.R new file mode 100755 index 0000000..fd03ef8 --- /dev/null +++ b/bedboss/bedbuncher/tools/bedsetStat.R @@ -0,0 +1,155 @@ +library(optparse) +library(data.table) +library(GenomicRanges) +library(LOLA) +library(ggplot2) +library(conflicted) +library(R.utils) + +option_list = list( + make_option(c("--bedfilelist"), type="character", default=NULL, + help="path to a txt file with list of BED files to process", + metavar="character"), + make_option(c("--outputfolder"), type="character", default="output", + help="base output folder for results", metavar="character"), + make_option(c("--json"), type="character", default="output", + help="path to the target JSON file", metavar="character"), + make_option(c("--id"), type="character", default=NULL, + help="BED set human-readable ID to use for output files prefix", + metavar="character") +) +opt_parser = OptionParser(option_list=option_list) +opt = parse_args(opt_parser) + +if (is.null(opt$bedfilelist)) { + print_help(opt_parser) + stop("bedfilelist input missing.") +} + +if (is.null(opt$outputfolder)) { + print_help(opt_parser) + stop("outputfolder input missing.") +} + +if (is.null(opt$id)) { + print_help(opt_parser) + stop("id input missing.") +} + +if (is.null(opt$json)) { + print_help(opt_parser) + stop("json input missing.") +} + +#' Generate a universe matrix +#' +#' Generates a universe matrix based on a list of refgionsets +#' +#' @param queryList +#' +#' @return matrix where rows are regions and cols are a binary indications +#' whether a regionset includes the region +#' +#' @export +.getUniverseMtx <- function(queryList) { + message("creating universe...") + universe = (Reduce(c, queryList)) + mtx = matrix(data=0, nrow=length(universe), ncol=length(queryList)) + message("finding overlaps...") + hits = sapply(queryList, function(x) (findOverlaps(x, universe))) + for(e in seq_along(hits)){ + mtx[hits[[e]]@to, e] = 1 + } + mtx +} + +#' Calculate region commonality in a regionset +#' +#' Calculates how many regionsets (bedfiles) overlap at least said percentage +#' of regions included in the universe. The universe is considered a union of +#' all regionsets (bedfiles) in the colection of +#' regionsets (bedset, or set of bedfiles) +#' +#' @param queryList GRangesList object with regionsets to be considered +#' +#' @return data.table with two columns: Perc with percentages and Counts with +#' number of regionsets having at least this percentage of overlaps with +#' the universe +#' +#' @export +calcRegionCommonality <- function(queryList){ + mtx = .getUniverseMtx(queryList) + per = (colSums(mtx)/dim(mtx)[1])*100 + x = unique(c(0, per)) + a=c() + for(i in seq_along(x)){ + a[i] = length(which(per >= x[i])) + } + df = data.table(Perc=x, Counts=a) + df +} + +#' Plot region commonality in a regionset +#' +#' @param percCounts data.table with two columns: Perc with percentages and Counts with +#' number of regionsets having at least this percentage of overlaps with +#' the universe +#' +#' @return ggplot object +#' +#' @export +plotRegionCommonality <- function(percCounts) { + g = ggplot(percCounts, aes(x=Perc, y=Counts)) + + geom_point() + + theme_bw() + + geom_line(linetype="dotted", linewidth=0.1) + + theme(aspect.ratio=1) + + xlab("Percentage of regions in universe (BED set) covered") + + ylab("Regionset (BED file) count") + + ggtitle("Region commonality") + + xlim(0, 100) + + ylim(0, 100) + return(g) +} + +plotBoth <- function(plotId, g){ + pth = paste0(opt$outputfolder, "/", opt$id, "_", plotId) + print(paste0("Plotting: ", pth)) + ggplot2::ggsave(paste0(pth, ".png"), g, device="png", width=8, height=8, units="in") + ggplot2::ggsave(paste0(pth, ".pdf"), g, device="pdf", width=8, height=8, units="in") +} + +getPlotReportDF <- function(plotId, title){ + pth = paste0(opt$outputfolder, "/", opt$id, "_", plotId) + print(paste0("Writing: ", pth)) + rel_pth = getRelativePath(pth, paste0(opt$outputfolder, "/../../../")) + print(paste0("Writing: ", rel_pth)) + newPlot = data.frame( + "name"=plotId, + "title"=title, + "thumbnail_path"=paste0(rel_pth, ".png"), + "path"=paste0(rel_pth, ".pdf"), + stringsAsFactors = FALSE + ) + return(newPlot) +} + +doItAll <- function(opt) { + bedlist = read.table(file=opt$bedfilelist, stringsAsFactors=FALSE) + grl = GRangesList() + for(i in seq_len(NROW(bedlist))){ + bed_path = paste0(bedlist[i, 1]) + if(!file.exists(bed_path)) stop("File not found: ", bed_path) + message("reading BED: ", bed_path) + grl[[i]] = LOLA::readBed(bed_path) + } + plotBoth("region_commonality", plotRegionCommonality(calcRegionCommonality(grl))) + print(paste0("done plotting ")) + plots = getPlotReportDF("region_commonality", "BED region commonality in BED set") + # Note: names of the list elements MUST match what's defined in: https://github.com/databio/bbconf/blob/master/bbconf/schemas/bedsets_schema.yaml + write(jsonlite::toJSON(list(plots=plots), pretty=TRUE), opt$json) + message("Saved JSON: ", opt$json) +} + +bedlist = opt$bedfilelist +doItAll(opt=opt) diff --git a/bedboss/bedmaker/bedmaker.py b/bedboss/bedmaker/bedmaker.py index f0e573e..71f328d 100755 --- a/bedboss/bedmaker/bedmaker.py +++ b/bedboss/bedmaker/bedmaker.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -from argparse import ArgumentParser import pypiper import os @@ -381,7 +380,7 @@ def get_rgc(self) -> str: :return str: rfg_config file path """ if not self.rfg_config: - _LOGGER.info(f"Creating refgenie genome config file...") + _LOGGER.info("Creating refgenie genome config file...") cwd = os.getcwd() self.rfg_config = os.path.join(cwd, "genome_config.yaml") diff --git a/bedboss/bedqc/bedqc.py b/bedboss/bedqc/bedqc.py index 068d49a..6f9e3c3 100755 --- a/bedboss/bedqc/bedqc.py +++ b/bedboss/bedqc/bedqc.py @@ -38,7 +38,7 @@ def bedqc( bedfile_name = os.path.basename(bedfile) input_extension = os.path.splitext(bedfile_name)[1] - file_exists = os.path.isfile(bedfile) + # file_exists = os.path.isfile(bedfile) # to execute bedqc from inside Python (without using cli) Pypiper is set to default: if not pm: @@ -98,7 +98,7 @@ def bedqc( f.write(f"{bedfile_name}\t{detail} \n") else: with open(output_file, "w") as f: - f.write(f"file_name\tdetail \n") + f.write("file_name\tdetail \n") f.write(f"{bedfile_name}\t{detail} \n") raise QualityException(f"{str(detail)}") diff --git a/bedboss/bedstat/bedstat.py b/bedboss/bedstat/bedstat.py index e2d05c0..0a90d22 100755 --- a/bedboss/bedstat/bedstat.py +++ b/bedboss/bedstat/bedstat.py @@ -104,9 +104,6 @@ def bedstat( os.path.abspath(os.path.join(outfolder_stats, os.pardir, os.pardir)), ) if not just_db_commit: - if force_overwrite: - new_start = True - if not pm: pm = pypiper.PipelineManager( name="bedstat-pipeline", diff --git a/bedboss/cli.py b/bedboss/cli.py index a41f3e3..6cdf6f3 100644 --- a/bedboss/cli.py +++ b/bedboss/cli.py @@ -3,7 +3,7 @@ import logmuse from bedboss._version import __version__ -from bedboss.const import DEFAULT_BEDBASE_API_URL +from bedboss.const import DEFAULT_BEDBASE_API_URL, DEFAULT_BEDBASE_CACHE_PATH def build_argparser() -> ArgumentParser: @@ -41,6 +41,11 @@ def build_argparser() -> ArgumentParser: "in JSON format.", ) + sub_bunch = subparser.add_parser( + "bunch", + help="A pipeline to create bedsets (sets of BED files) that will be retrieved from bedbase.", + ) + sub_index = subparser.add_parser( "index", help="Index not indexed bed files and add them to the qdrant database " ) @@ -325,6 +330,52 @@ def build_argparser() -> ArgumentParser: help="whether just to commit the JSON to the database", ) + sub_bunch.add_argument( + "--bedbase-config", + dest="bedbase_config", + type=str, + required=True, + help="a path to the bedbase configuration file [Required]", + ) + sub_bunch.add_argument( + "--bedset-name", + dest="bedset_name", + type=str, + required=True, + help="a name of the bedset [Required]", + ) + + sub_bunch.add_argument( + "--bedset-pep", + dest="bedset_pep", + type=str, + required=True, + help="bedset pep path or pephub registry path containing bedset pep [Required]", + ) + sub_bunch.add_argument( + "--base-api", + dest="bedbase_api", + type=str, + default=f"{DEFAULT_BEDBASE_API_URL}", + required=False, + help=f"Bedbase API to use. Default is {DEFAULT_BEDBASE_API_URL}", + ) + + sub_bunch.add_argument( + "--cache-path", + dest="cache_path", + type=str, + default=f"{DEFAULT_BEDBASE_CACHE_PATH}", + required=False, + help=f"Path to the cache folder. Default is {DEFAULT_BEDBASE_CACHE_PATH}", + ) + sub_bunch.add_argument( + "--heavy", + dest="heavy", + action="store_true", + help="whether to use heavy processing (Calculate and crate plots using R script). ", + ) + sub_index.add_argument( "--bedbase-config", dest="bedbase_config", diff --git a/bedboss/const.py b/bedboss/const.py index a68a1d0..3a7d4fd 100644 --- a/bedboss/const.py +++ b/bedboss/const.py @@ -1,4 +1,5 @@ -DEFAULT_BEDBASE_API_URL = "https://bedbase.org/api" +# DEFAULT_BEDBASE_API_URL = "https://bedbase.org/api" +DEFAULT_BEDBASE_API_URL = "http://localhost:8000/api" OPEN_SIGNAL_FOLDER = "./openSignalMatrix" OPEN_SIGNAL_URL = "http://big.databio.org/open_chromatin_matrix/" @@ -45,3 +46,6 @@ MIN_REGION_WIDTH = 10 # bedstat + +# bedbuncher +DEFAULT_BEDBASE_CACHE_PATH = "./bedabse_cache" diff --git a/bedboss/utils.py b/bedboss/utils.py index fab4694..3182124 100644 --- a/bedboss/utils.py +++ b/bedboss/utils.py @@ -55,12 +55,12 @@ def download_file(url: str, path: str, no_fail: bool = False) -> NoReturn: _LOGGER.info(f"Local path: {os.path.abspath(path)}") try: urllib.request.urlretrieve(url, path) - _LOGGER.info(f"File downloaded successfully!") + _LOGGER.info("File downloaded successfully!") except Exception as e: - _LOGGER.error(f"File download failed.") + _LOGGER.error("File download failed.") if not no_fail: raise e - _LOGGER.error(f"File download failed. Continuing anyway...") + _LOGGER.error("File download failed. Continuing anyway...") def check_db_connection(bedbase_config: str) -> bool: @@ -70,14 +70,14 @@ def check_db_connection(bedbase_config: str) -> bool: :param bedbase_config: path to the bedbase config file :return: True if connection is successful, False otherwise """ - _LOGGER.info(f"Checking database connection...") + _LOGGER.info("Checking database connection...") if not os.path.exists(bedbase_config): raise FileNotFoundError(f"Bedbase config file {bedbase_config} was not found.") else: _LOGGER.info(f"Bedbase config file {bedbase_config} was found.") try: BedBaseConf(bedbase_config) - _LOGGER.info(f"Database connection is successful.") + _LOGGER.info("Database connection is successful.") return True except Exception as e: _LOGGER.error(f"Database connection failed. Error: {e}") diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 8e0796a..c294986 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -1,2 +1,5 @@ mock>=2.0.0 pytest==3.10.1 +black +ruff +pre-commit \ No newline at end of file diff --git a/setup.py b/setup.py index a76bd42..94820a1 100644 --- a/setup.py +++ b/setup.py @@ -17,10 +17,10 @@ def read_reqs(reqs_name): deps = [] with open(os.path.join(REQDIR, f"requirements-{reqs_name}.txt"), "r") as f: - for l in f: - if not l.strip(): + for line in f: + if not line.strip(): continue - deps.append(l) + deps.append(line) return deps diff --git a/test/test_bedboss.py b/test/test_bedboss.py index a27bd23..6d3774f 100644 --- a/test/test_bedboss.py +++ b/test/test_bedboss.py @@ -21,8 +21,8 @@ def check_dependencies_installed() -> bool: # Make sure bedToBigBed etc is in your PATH. print("Testing dependencies...") - key = "PATH" - value = os.getenv(key) + # key = "PATH" + # value = os.getenv(key) test_dep_return_code = subprocess.run([DEPENDENCIES_TEST_SCRIPT], shell=True) if not (1 > test_dep_return_code.returncode): warnings.warn(UserWarning(f"{pytest_db_skip_reason}")) @@ -38,7 +38,7 @@ def db_setup(): # Check if the database is setup try: BedBaseConf(BEDBASE_CONFIG) - except Exception as err: + except Exception: warnings.warn(UserWarning(f"{pytest_db_skip_reason}")) return False return True @@ -234,3 +234,9 @@ def test_check_file_exists(self, file, output_temp_dir): file, ) ) + + +@pytest.mark.skipif(True, reason="Not implemented") +class TestBedbuncher: + def test_bedbuncher_run(self): + pass