Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 0.1.0a4 #17

Merged
merged 2 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions bedboss/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
""" Package-level data """
import logmuse
import coloredlogs

from bedboss import *
from bedboss.bedqc import bedqc
from bedboss.bedmaker import bedmaker
from bedboss.bedstat import bedstat

# from bedboss.bedqc.bedqc import bedqc
# from bedboss.bedmaker.bedmaker import BedMaker
# from bedboss.bedstat.bedstat import bedstat
from bedboss._version import __version__
import logmuse
from bedboss.bedboss import run_all, run_all_by_pep, bedqc, BedMaker, bedstat


__package_name__ = "bedboss"

__author__ = [
"Oleksandr Khoroshevskyi",
"Michal Stolarczyk",
Expand All @@ -20,9 +26,18 @@
"__version__",
"__package_name__",
"__author__",
"bedboss",
"bedqc",
"bedmaker",
"BedMaker",
"bedstat",
"run_all",
"run_all_by_pep",
]

logmuse.init_logger(__version__)
_LOGGER = logmuse.init_logger("bedboss")
coloredlogs.install(
logger=_LOGGER,
datefmt="%H:%M:%S",
fmt="[%(levelname)s] [%(asctime)s] %(message)s",
)
2 changes: 1 addition & 1 deletion bedboss/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.0a3"
__version__ = "0.1.0a4"
50 changes: 48 additions & 2 deletions bedboss/bedboss.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import logging
import os
from typing import NoReturn, Union, Dict

import peppy
import pypiper
from argparse import Namespace
import logmuse
import peppy

from bedboss.bedstat.bedstat import bedstat
from bedboss.bedmaker.bedmaker import BedMaker
Expand All @@ -25,7 +28,7 @@
check_db_connection,
)
from bedboss.exceptions import OpenSignalMatrixException
from bedboss import __version__
from bedboss._version import __version__

_LOGGER = logging.getLogger("bedboss")

Expand Down Expand Up @@ -54,7 +57,11 @@ def get_osm_path(genome: str) -> Union[str, None]:
if not os.path.exists(osm_path):
if not os.path.exists(OPEN_SIGNAL_FOLDER):
os.makedirs(OPEN_SIGNAL_FOLDER)
download_file(url=f"{OPEN_SIGNAL_URL}{osm_name}", path=osm_path)
download_file(
url=f"{OPEN_SIGNAL_URL}{osm_name}",
path=osm_path,
no_fail=True,
)
return osm_path


Expand Down Expand Up @@ -163,6 +170,43 @@ def run_all(
)


def run_all_by_pep(pep: Union[str, peppy.Project]) -> NoReturn:
"""
Run bedboss pipeline by providing pep config file.

:param pep: path to the pep config file or peppy.Project object
"""
if isinstance(pep, str):
pep = peppy.Project(pep)
elif isinstance(pep, peppy.Project):
pass
else:
raise Exception("Incorrect pep type. Exiting...")

for pep_sample in pep.samples:
_LOGGER.info(f"Running bedboss pipeline for {pep_sample.sample_name}")
run_all(
sample_name=pep_sample.sample_name,
input_file=pep_sample.input_file,
input_type=pep_sample.input_type,
outfolder=pep_sample.outfolder,
genome=pep_sample.genome,
bedbase_config=pep_sample.bedbase_config,
rfg_config=pep_sample.get("rfg_config"),
narrowpeak=pep_sample.get("narrowpeak"),
check_qc=pep_sample.get("check_qc"),
standard_chrom=pep_sample.get("standard_chrom"),
chrom_sizes=pep_sample.get("chrom_sizes"),
open_signal_matrix=pep_sample.get("open_signal_matrix"),
ensdb=pep_sample.get("ensdb"),
sample_yaml=pep_sample.get("sample_yaml"),
just_db_commit=pep_sample.get("just_db_commit"),
no_db_commit=pep_sample.get("no_db_commit"),
force_overwrite=pep_sample.get("force_overwrite"),
skip_qdrant=pep_sample.get("skip_qdrant"),
)


def main(test_args: dict = None) -> NoReturn:
"""
Run pipeline that was specified in as positional argument.
Expand Down Expand Up @@ -196,6 +240,8 @@ def main(test_args: dict = None) -> NoReturn:
bedqc(pm=pm, **args_dict)
elif args_dict["command"] == "stat":
bedstat(pm=pm, **args_dict)
elif args_dict["command"] == "all-pep":
run_all_by_pep(args_dict["pep_config"])
else:
parser.print_help()
# raise Exception("Incorrect pipeline name.")
Expand Down
16 changes: 9 additions & 7 deletions bedboss/bedmaker/bedmaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,14 @@ def __init__(
If true, filter the input file to contain only
the standard chromosomes, remove regions on
ChrUn chromosomes
:param check_qc: run quality control during badmaking
:param check_qc: run quality control during bedmaking
:param pm: pypiper object
:return: noReturn
"""

# Define file paths
self.input_file = input_file
self.input_type = input_type
self.input_type = input_type.lower()
self.output_bed = output_bed
self.output_bigbed = output_bigbed
self.file_name = os.path.basename(input_file)
Expand Down Expand Up @@ -186,7 +186,7 @@ def make_bed(self) -> NoReturn:
on input file type and execute the command.
"""

_LOGGER.info(f"Converting {self.input_file} to BED format.")
_LOGGER.info(f"Converting {os.path.abspath(self.input_file)} to BED format.")
temp_bed_path = os.path.splitext(self.output_bed)[0]

# creat cmd to run that convert non bed file to bed file
Expand All @@ -195,14 +195,14 @@ def make_bed(self) -> NoReturn:

# Use the gzip and shutil modules to produce temporary unzipped files
if self.input_extension == ".gz":
input_file = os.path.join(
temp_input_file = os.path.join(
os.path.dirname(self.output_bed),
os.path.splitext(self.file_name)[0],
)
with gzip.open(self.input_file, "rb") as f_in:
with open(input_file, "wb") as f_out:
with open(temp_input_file, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
self.pm.clean_add(input_file)
self.pm.clean_add(temp_input_file)

# creating cmd for bedGraph files
if self.input_type == "bedGraph":
Expand Down Expand Up @@ -309,6 +309,7 @@ def make_bed(self) -> NoReturn:
),
]
self.pm.run(cmd, target=self.output_bed)
self.pm._cleanup()

def make_bigbed(self) -> NoReturn:
"""
Expand Down Expand Up @@ -337,7 +338,7 @@ def make_bigbed(self) -> NoReturn:
"https://genome.ucsc.edu/goldenpath/help/bigBed.html"
)
if bedtype is not None:
cmd = "zcat " + self.output_bed + " | sort -k1,1 -k2,2n > " + temp
cmd = f"zcat {self.output_bed} | sort -k1,1 -k2,2n > {temp}"
self.pm.run(cmd, temp)

cmd = f"{BED_TO_BIGBED_PROGRAM} -type={bedtype} {temp} {self.chrom_sizes} {big_narrow_peak}"
Expand Down Expand Up @@ -371,6 +372,7 @@ def make_bigbed(self) -> NoReturn:
f"unable to validate genome assembly with Refgenie. "
f"Error: {err}"
)
self.pm._cleanup()

def get_rgc(self) -> str:
"""
Expand Down
2 changes: 1 addition & 1 deletion bedboss/bedstat/bedstat.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def bedstat(
del plot["name"]
data.update({plot_id: plot})
bbc.bed.report(
sample_name=bed_digest,
record_identifier=bed_digest,
values=data,
force_overwrite=force_overwrite,
)
Expand Down
23 changes: 21 additions & 2 deletions bedboss/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from argparse import ArgumentParser
import logmuse

from bedboss import __version__, __package_name__
from bedboss._version import __version__


def build_argparser() -> ArgumentParser:
Expand All @@ -11,7 +11,7 @@ def build_argparser() -> ArgumentParser:
:retrun: Tuple[pipeline, arguments]
"""
parser = VersionInHelpParser(
prog=__package_name__,
prog="bedboss",
description="Warehouse of pipelines for BED-like files: "
"bedmaker, bedstat, and bedqc.",
epilog="",
Expand All @@ -22,6 +22,10 @@ def build_argparser() -> ArgumentParser:
sub_all = subparser.add_parser(
"all", help="Run all bedboss pipelines and insert data into bedbase"
)
sub_all_pep = subparser.add_parser(
"all-pep",
help="Run all bedboss pipelines using one PEP and insert data into bedbase",
)
sub_make = subparser.add_parser(
"make",
help="A pipeline to convert bed, bigbed, bigwig or bedgraph "
Expand Down Expand Up @@ -136,6 +140,21 @@ def build_argparser() -> ArgumentParser:
help="just commit the JSON to the database",
)

# all-pep
sub_all_pep.add_argument(
"--pep_config",
dest="pep_config",
required=True,
help="Path to the pep configuration file [Required]\n "
"Required fields in PEP are: "
"sample_name, input_file, input_type,outfolder, genome, bedbase_config.\n "
"Optional fields in PEP are: "
"rfg_config, narrowpeak, check_qc, standard_chrom, chrom_sizes, "
"open_signal_matrix, ensdb, sample_yaml, no_db_commit, just_db_commit, "
"no_db_commit, force_overwrite, skip_qdrant",
type=str,
)

# bed_qc
sub_qc.add_argument(
"--bedfile",
Expand Down
10 changes: 6 additions & 4 deletions bedboss/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import logging
import requests
import urllib
from bbconf import BedBaseConf
from typing import NoReturn
Expand Down Expand Up @@ -43,22 +42,25 @@ def standardize_genome_name(input_genome: str) -> str:
return input_genome


def download_file(url: str, path: str) -> NoReturn:
def download_file(url: str, path: str, no_fail: bool = False) -> NoReturn:
"""
Download file from the url to specific location

:param url: URL of the file
:param path: Local path with filename
:param no_fail: If True, do not raise exception if download fails
:return: NoReturn
"""
_LOGGER.info(f"Downloading remote file: {url}")
_LOGGER.info(f"Local path: {path}")
_LOGGER.info(f"Local path: {os.path.abspath(path)}")
try:
urllib.request.urlretrieve(url, path)
_LOGGER.info(f"File downloaded successfully!")
except Exception as e:
_LOGGER.error(f"File download failed.")
raise e
if not no_fail:
raise e
_LOGGER.error(f"File download failed. Continuing anyway...")


def check_db_connection(bedbase_config: str) -> bool:
Expand Down
77 changes: 77 additions & 0 deletions docs/how_run_script.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# How to run bedboss as a Python API

## Install bedboss

```bash
pip install bedboss
```

## Run bedboss all

```python
from bedboss import run_all

run_all(
sample_name="example_sample_name",
input_file="example/path/to/input_file",
input_type="bed",
outfolder="example/path/to/outfolder",
genome="hg38",
bedbase_config="example/path/to/bedbase_config.yaml",
# + another optional arguments
)


```


## Run bedboss all-pep

```python
from bedboss import run_all_by_pep

run_all_by_pep(
pep="example/path/to/pep.yaml"
)
```

## Run bedboss make

```python
from bedboss import BedMaker

BedMaker(
input_file="example/path/to/input_file",
input_type="bed",
output_bed="example/path/to/output_bed",
output_bigbed="example/path/to/output_bigbed",
sample_name="example_sample_name",
genome="hg38",
)

```

## Run bedboss stat

```python
from bedboss import bedstat

bedstat(
bedfile="example/path/to/bedfile.bed",
bedbase_config="example/path/to/bedbase_config.yaml",
genome="hg38",
outfolder="example/path/to/outfolder",
)

```

## Run bedboss qc

```python
from bedboss import bedqc

bedqc(
bedfile="example/path/to/bedfile.bed",
outfolder="example/path/to/outfolder",
)
```
16 changes: 16 additions & 0 deletions docs/templates/usage.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Usage reference

BEDboss is command-line tool-warehouse of 3 pipelines for genomic interval files

BEDboss include: bedmaker, bedqc, bedstat. This pipelines can be run using next positional arguments:

- `bedbase all`: Runs all pipelines one in order: bedmaker -> bedqc -> bedstat

- `bedbase make`: Creates Bed and BigBed files from other type of genomic interval files [bigwig|bedgraph|bed|bigbed|wig]

- `bedbase qc`: Runs Quality control for bed file (Works only with bed files)

- `bedbase stat`: Runs statistics for bed and bigbed files.

Here you can see the command-line usage instructions for the main bedboss command and for each subcommand:

Loading
Loading