Skip to content

Commit

Permalink
add exception handling to bed classifier, re-add pypiper as the pipel…
Browse files Browse the repository at this point in the history
…ine manager
  • Loading branch information
donaldcampbelljr committed May 30, 2024
1 parent 8ca32cc commit fffe63f
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 7 deletions.
10 changes: 10 additions & 0 deletions bedboss/bedclassifier/bedclassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,16 @@ def get_bed_type(bed: str, no_fail: Optional[bool] = True) -> Tuple[str, str]:
raise BedTypeException(
reason=f"Bed type could not be determined due to CSV parse error {e}"
)
except UnicodeDecodeError as e:
if no_fail:
_LOGGER.warning(
f"Unable to parse bed file {bed}, due to error {e}, setting bed_type = unknown_bedtype"
)
return "unknown_bedtype", "unknown_bedtype"
else:
raise BedTypeException(
reason=f"Bed type could not be determined due to CSV parse error {e}"
)

if df is not None:
df = df.dropna(axis=1)
Expand Down
30 changes: 23 additions & 7 deletions scripts/bedclassifier_tuning/bedclassify.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Optional

from bedboss.bedclassifier import get_bed_type
from bedboss.exceptions import BedTypeException

_LOGGER = logging.getLogger("bedboss")

Expand Down Expand Up @@ -53,6 +54,7 @@ def __init__(
)
# Use existing Pipeline Manager or Construct New one
# Want to use Pipeline Manager to log work AND cleanup unzipped gz files.
self.pm = pm
# if pm is not None:
# self.pm = pm
# self.pm_created = False
Expand Down Expand Up @@ -86,9 +88,16 @@ def __init__(
with open(unzipped_input_file, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
self.input_file = unzipped_input_file
# self.pm.clean_add(unzipped_input_file)
if self.pm:
self.pm.clean_add(unzipped_input_file)

try:
self.bed_type, self.bed_type_named = get_bed_type(self.input_file)
except BedTypeException as e:
_LOGGER.warning(msg=f"FAILED {bed_digest} Exception {e}")
self.bed_type = "unknown_bedtype"
self.bed_type_named = "unknown_bedtype"

self.bed_type, self.bed_type_named = get_bed_type(self.input_file)
# return f"bed{bedtype}+{n}", bed_type_named

if self.input_type is not None:
Expand Down Expand Up @@ -126,8 +135,8 @@ def __init__(

# self.pm.report_result(key="bedtype", value=self.bed_type)

# if self.pm_created is True:
# self.pm.stop_pipeline()
if self.pm:
self.pm.stop_pipeline()


def main():
Expand All @@ -143,6 +152,14 @@ def main():
# gse_list = gse_obj.get_gse_all()
# gse_obj.generate_file("data/output.txt", gse_list=gse_list)

logs_dir = os.path.join(os.path.abspath("results"), "logs")
pm = pypiper.PipelineManager(
name="bedclassifier",
outfolder=logs_dir,
recover=True,
)
pm.start_pipeline()

# for geo in gse_list:
geofetcher_obj = Geofetcher(
filter="\.(bed|narrowPeak|broadPeak)\.",
Expand Down Expand Up @@ -185,12 +202,11 @@ def main():
output_dir=os.path.abspath("results"),
input_type=bed_type_from_geo,
psm=psm,
pm=pm,
gsm=geo_accession,
)

# Get list of Bed Files and Download them

# Open Bed Files, Classify them, Report them.
pm.stop_pipeline()


if __name__ == "__main__":
Expand Down

0 comments on commit fffe63f

Please sign in to comment.