Skip to content

Commit

Permalink
fixed pipline manager bugs
Browse files Browse the repository at this point in the history
(temp files cleanup, pipestat incorrect pipeline name)
  • Loading branch information
khoroshevskyi committed Oct 2, 2024
1 parent 0cd9281 commit 460d045
Show file tree
Hide file tree
Showing 7 changed files with 33,989 additions and 5 deletions.
2 changes: 1 addition & 1 deletion bedboss/bedboss.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def run_all(
pm=pm,
)
if not other_metadata:
other_metadata = {}
other_metadata = {"sample_name": name}

statistics_dict = bedstat(
bedfile=bed_metadata.bed_file,
Expand Down
6 changes: 4 additions & 2 deletions bedboss/bedmaker/bedmaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from bedboss.bedmaker.utils import get_chrom_sizes
from bedboss.bedqc.bedqc import bedqc
from bedboss.exceptions import BedBossException, RequirementsException
from bedboss.utils import cleanup_pm_temp

_LOGGER = logging.getLogger("bedboss")

Expand Down Expand Up @@ -102,6 +103,7 @@ def make_bigbed(
_LOGGER.info(f"Running: {cmd}")
pm.run(cmd, big_bed_path, nofail=False)
except Exception as err:
cleanup_pm_temp(pm)
raise BedBossException(
f"Fail to generating bigBed files for {bed_path}: "
f"unable to validate genome assembly with Refgenie. "
Expand All @@ -120,12 +122,12 @@ def make_bigbed(
try:
pm.run(cmd, big_bed_path, nofail=True)
except Exception as err:
cleanup_pm_temp(pm)
_LOGGER.info(
f"Fail to generating bigBed files for {bed_path}: "
f"unable to validate genome assembly with Refgenie. "
f"Error: {err}"
)
pm._cleanup()
if pm_clean:
pm.stop_pipeline()
return big_bed_path
Expand Down Expand Up @@ -367,7 +369,7 @@ def make_all(
narrowpeak=narrowpeak,
rfg_config=rfg_config,
chrom_sizes=chrom_sizes,
pm=None,
pm=pm,
)
bed_type, bed_format = get_bed_type(output_bed)
if check_qc:
Expand Down
22 changes: 22 additions & 0 deletions bedboss/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import logging
import os
import urllib.request
import glob

import requests
from pephubclient.files_manager import FilesManager
import peppy
from peppy.const import SAMPLE_RAW_DICT_KEY
from bedms import AttrStandardizer
from pypiper import PipelineManager

_LOGGER = logging.getLogger("bedboss")

Expand Down Expand Up @@ -167,3 +169,23 @@ def standardize_pep(
del raw_pep[SAMPLE_RAW_DICT_KEY][original_key]

return peppy.Project.from_dict(raw_pep)


def cleanup_pm_temp(pm: PipelineManager) -> None:
"""
Cleanup temporary files from the PipelineManager
:param pm: PipelineManager
"""
if len(pm.cleanup_list_conditional) > 0:
for cleandir in pm.cleanup_list_conditional:
try:
items_to_clean = glob.glob(cleandir)
for clean_item in items_to_clean:
if os.path.isfile(clean_item):
os.remove(clean_item)
elif os.path.isdir(clean_item):
os.rmdir(clean_item)
except Exception as e:
_LOGGER.error(f"Error cleaning up: {e}")
pm.cleanup_list_conditional = []
6 changes: 4 additions & 2 deletions requirements/requirements-all.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
logmuse>=0.2.7
coloredlogs>=15.0.1
eido>=0.2.3
peppy>=0.40.6
eido>=0.2.4
peppy>=0.40.7
yacman>=0.8.4
requests>=2.28.2
piper>=v0.14.0
# Fixed bugs in pipestat
pipestat>=0.11.0
bbconf>=0.7.0
# bbconf @ git+https://github.com/databio/bbconf.git@dev#egg=bbconf
refgenconf>=0.12.2
Expand Down
27 changes: 27 additions & 0 deletions scripts/profiling/prof.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
def runn():
import cProfile
import pstats

from bedboss.bedboss import run_all

with cProfile.Profile() as pr:
run_all(
bedbase_config="/home/bnt4me/virginia/repos/bbuploader/config_db_local.yaml",
outfolder="/home/bnt4me/virginia/repos/bbuploader/data",
genome="hg38",
input_file="/test/data/bed/hg38/GSM6732293_Con_liver-IP2.bed",
input_type="bed",
force_overwrite=True,
upload_pephub=True,
upload_s3=True,
upload_qdrant=True,
name="test",
)

stats = pstats.Stats(pr)
stats.sort_stats(pstats.SortKey.TIME)
stats.dump_stats(filename="test_profiling")


if __name__ == "__main__":
runn()
Loading

0 comments on commit 460d045

Please sign in to comment.