Skip to content

Commit

Permalink
added pep option + many fixes + docs
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Oct 3, 2023
1 parent aac750a commit 91ffcac
Show file tree
Hide file tree
Showing 9 changed files with 181 additions and 70 deletions.
13 changes: 11 additions & 2 deletions bedboss/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
""" Package-level data """
import logmuse
import coloredlogs

from bedboss import *
from bedboss.bedqc import bedqc
from bedboss.bedmaker import bedmaker
from bedboss.bedstat import bedstat
from bedboss._version import __version__
import logmuse


__package_name__ = "bedboss"

__author__ = [
"Oleksandr Khoroshevskyi",
"Michal Stolarczyk",
Expand All @@ -25,4 +29,9 @@
"bedstat",
]

logmuse.init_logger(__version__)
_LOGGER = logmuse.init_logger("bedboss")
coloredlogs.install(
logger=_LOGGER,
datefmt="%H:%M:%S",
fmt="[%(levelname)s] [%(asctime)s] %(message)s",
)
44 changes: 43 additions & 1 deletion bedboss/bedboss.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import logging
import os
from typing import NoReturn, Union, Dict

import peppy
import pypiper
from argparse import Namespace
import logmuse
import peppy

from bedboss.bedstat.bedstat import bedstat
from bedboss.bedmaker.bedmaker import BedMaker
Expand Down Expand Up @@ -54,7 +57,7 @@ def get_osm_path(genome: str) -> Union[str, None]:
if not os.path.exists(osm_path):
if not os.path.exists(OPEN_SIGNAL_FOLDER):
os.makedirs(OPEN_SIGNAL_FOLDER)
download_file(url=f"{OPEN_SIGNAL_URL}{osm_name}", path=osm_path)
download_file(url=f"{OPEN_SIGNAL_URL}{osm_name}", path=osm_path, no_fail=True,)
return osm_path


Expand Down Expand Up @@ -163,6 +166,43 @@ def run_all(
)


def run_all_by_pep(pep: Union[str, peppy.Project]) -> NoReturn:
"""
Run bedboss pipeline by providing pep config file.
:param pep: path to the pep config file or peppy.Project object
"""
if isinstance(pep, str):
pep = peppy.Project(pep)
elif isinstance(pep, peppy.Project):
pass
else:
raise Exception("Incorrect pep type. Exiting...")

for pep_sample in pep.samples:
_LOGGER.info(f"Running bedboss pipeline for {pep_sample.sample_name}")
run_all(
sample_name=pep_sample.sample_name,
input_file=pep_sample.input_file,
input_type=pep_sample.input_type,
outfolder=pep_sample.outfolder,
genome=pep_sample.genome,
bedbase_config=pep_sample.bedbase_config,
rfg_config=pep_sample.get("rfg_config"),
narrowpeak=pep_sample.get("narrowpeak"),
check_qc=pep_sample.get("check_qc"),
standard_chrom=pep_sample.get("standard_chrom"),
chrom_sizes=pep_sample.get("chrom_sizes"),
open_signal_matrix=pep_sample.get("open_signal_matrix"),
ensdb=pep_sample.get("ensdb"),
sample_yaml=pep_sample.get("sample_yaml"),
just_db_commit=pep_sample.get("just_db_commit"),
no_db_commit=pep_sample.get("no_db_commit"),
force_overwrite=pep_sample.get("force_overwrite"),
skip_qdrant=pep_sample.get("skip_qdrant"),
)


def main(test_args: dict = None) -> NoReturn:
"""
Run pipeline that was specified in as positional argument.
Expand Down Expand Up @@ -196,6 +236,8 @@ def main(test_args: dict = None) -> NoReturn:
bedqc(pm=pm, **args_dict)
elif args_dict["command"] == "stat":
bedstat(pm=pm, **args_dict)
elif args_dict["command"] == "all-pep":
run_all_by_pep(args_dict["pep_config"])
else:
parser.print_help()
# raise Exception("Incorrect pipeline name.")
Expand Down
16 changes: 9 additions & 7 deletions bedboss/bedmaker/bedmaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,14 @@ def __init__(
If true, filter the input file to contain only
the standard chromosomes, remove regions on
ChrUn chromosomes
:param check_qc: run quality control during badmaking
:param check_qc: run quality control during bedmaking
:param pm: pypiper object
:return: noReturn
"""

# Define file paths
self.input_file = input_file
self.input_type = input_type
self.input_type = input_type.lower()
self.output_bed = output_bed
self.output_bigbed = output_bigbed
self.file_name = os.path.basename(input_file)
Expand Down Expand Up @@ -186,7 +186,7 @@ def make_bed(self) -> NoReturn:
on input file type and execute the command.
"""

_LOGGER.info(f"Converting {self.input_file} to BED format.")
_LOGGER.info(f"Converting {os.path.abspath(self.input_file)} to BED format.")
temp_bed_path = os.path.splitext(self.output_bed)[0]

# creat cmd to run that convert non bed file to bed file
Expand All @@ -195,14 +195,14 @@ def make_bed(self) -> NoReturn:

# Use the gzip and shutil modules to produce temporary unzipped files
if self.input_extension == ".gz":
input_file = os.path.join(
temp_input_file = os.path.join(
os.path.dirname(self.output_bed),
os.path.splitext(self.file_name)[0],
)
with gzip.open(self.input_file, "rb") as f_in:
with open(input_file, "wb") as f_out:
with open(temp_input_file, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
self.pm.clean_add(input_file)
self.pm.clean_add(temp_input_file)

# creating cmd for bedGraph files
if self.input_type == "bedGraph":
Expand Down Expand Up @@ -309,6 +309,7 @@ def make_bed(self) -> NoReturn:
),
]
self.pm.run(cmd, target=self.output_bed)
self.pm._cleanup()

def make_bigbed(self) -> NoReturn:
"""
Expand Down Expand Up @@ -337,7 +338,7 @@ def make_bigbed(self) -> NoReturn:
"https://genome.ucsc.edu/goldenpath/help/bigBed.html"
)
if bedtype is not None:
cmd = "zcat " + self.output_bed + " | sort -k1,1 -k2,2n > " + temp
cmd = f"zcat {self.output_bed} | sort -k1,1 -k2,2n > {temp}"
self.pm.run(cmd, temp)

cmd = f"{BED_TO_BIGBED_PROGRAM} -type={bedtype} {temp} {self.chrom_sizes} {big_narrow_peak}"
Expand Down Expand Up @@ -371,6 +372,7 @@ def make_bigbed(self) -> NoReturn:
f"unable to validate genome assembly with Refgenie. "
f"Error: {err}"
)
self.pm._cleanup()

def get_rgc(self) -> str:
"""
Expand Down
19 changes: 19 additions & 0 deletions bedboss/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ def build_argparser() -> ArgumentParser:
sub_all = subparser.add_parser(
"all", help="Run all bedboss pipelines and insert data into bedbase"
)
sub_all_pep = subparser.add_parser(
"all-pep",
help="Run all bedboss pipelines using one PEP and insert data into bedbase",
)
sub_make = subparser.add_parser(
"make",
help="A pipeline to convert bed, bigbed, bigwig or bedgraph "
Expand Down Expand Up @@ -136,6 +140,21 @@ def build_argparser() -> ArgumentParser:
help="just commit the JSON to the database",
)

# all-pep
sub_all_pep.add_argument(
"--pep_config",
dest="pep_config",
required=True,
help="Path to the pep configuration file [Required]\n "
"Required fields in PEP are: "
"sample_name, input_file, input_type,outfolder, genome, bedbase_config.\n "
"Optional fields in PEP are: "
"rfg_config, narrowpeak, check_qc, standard_chrom, chrom_sizes, "
"open_signal_matrix, ensdb, sample_yaml, no_db_commit, just_db_commit, "
"no_db_commit, force_overwrite, skip_qdrant",
type=str,
)

# bed_qc
sub_qc.add_argument(
"--bedfile",
Expand Down
10 changes: 6 additions & 4 deletions bedboss/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import logging
import requests
import urllib
from bbconf import BedBaseConf
from typing import NoReturn
Expand Down Expand Up @@ -43,22 +42,25 @@ def standardize_genome_name(input_genome: str) -> str:
return input_genome


def download_file(url: str, path: str) -> NoReturn:
def download_file(url: str, path: str, no_fail: bool = False) -> NoReturn:
"""
Download file from the url to specific location
:param url: URL of the file
:param path: Local path with filename
:param no_fail: If True, do not raise exception if download fails
:return: NoReturn
"""
_LOGGER.info(f"Downloading remote file: {url}")
_LOGGER.info(f"Local path: {path}")
_LOGGER.info(f"Local path: {os.path.abspath(path)}")
try:
urllib.request.urlretrieve(url, path)
_LOGGER.info(f"File downloaded successfully!")
except Exception as e:
_LOGGER.error(f"File download failed.")
raise e
if not no_fail:
raise e
_LOGGER.error(f"File download failed. Continuing anyway...")


def check_db_connection(bedbase_config: str) -> bool:
Expand Down
16 changes: 16 additions & 0 deletions docs/templates/usage.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Usage reference

BEDboss is command-line tool-warehouse of 3 pipelines for genomic interval files

BEDboss include: bedmaker, bedqc, bedstat. This pipelines can be run using next positional arguments:

- `bedbase all`: Runs all pipelines one in order: bedmaker -> bedqc -> bedstat

- `bedbase make`: Creates Bed and BigBed files from other type of genomic interval files [bigwig|bedgraph|bed|bigbed|wig]

- `bedbase qc`: Runs Quality control for bed file (Works only with bed files)

- `bedbase stat`: Runs statistics for bed and bigbed files.

Here you can see the command-line usage instructions for the main bedboss command and for each subcommand:

Loading

0 comments on commit 91ffcac

Please sign in to comment.