Skip to content

Commit

Permalink
Add better exception handling and allowing for integer/float chromoso…
Browse files Browse the repository at this point in the history
…mes in column 0 #34
  • Loading branch information
donaldcampbelljr committed Feb 13, 2024
1 parent 4ba8f75 commit 55d3b88
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 6 deletions.
41 changes: 37 additions & 4 deletions bedboss/bedclassifier/bedclassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pandas as pd

from bedboss.const import STANDARD_CHROM_LIST
from bedboss.exceptions import BedTypeException

_LOGGER = logging.getLogger("bedboss")

Expand Down Expand Up @@ -93,7 +94,9 @@ def __init__(
self.pm.stop_pipeline()


def get_bed_type(bed: str, standard_chrom: Optional[str] = None) -> Union[str, None]:
def get_bed_type(
bed: str, standard_chrom: Optional[str] = None, no_fail: Optional[bool] = True
) -> Union[str, None]:
"""
get the bed file type (ex. bed3, bed3+n )
standardize chromosomes if necessary:
Expand All @@ -119,11 +122,22 @@ def get_bed_type(bed: str, standard_chrom: Optional[str] = None) -> Union[str, N
# int[blockCount] chromStarts; "Start positions relative to chromStart"

# Use nrows to read only a few lines of the BED file (We don't need all of it)

df = None

try:
df = pd.read_csv(bed, sep="\t", header=None, nrows=4)
except pandas.errors.ParserError as e:
_LOGGER.warning(f"Unable to parse bed file {bed}, setting bed_type = Unknown")
if no_fail:
_LOGGER.warning(
f"Unable to parse bed file {bed}, setting bed_type = Unknown"
)
return "unknown_bedtype"
else:
raise BedTypeException(
reason=f"Bed type could not be determined due to CSV parse error {e}"
)

print(df)
if df is not None:
df = df.dropna(axis=1)
Expand All @@ -144,13 +158,32 @@ def get_bed_type(bed: str, standard_chrom: Optional[str] = None) -> Union[str, N
if col == 0:
if df[col].dtype == "O":
bedtype += 1
elif df[col].dtype == "int" or df[col].dtype == "float":
bedtype += 1
else:
return "unknown_bedtype"
if no_fail:
_LOGGER.warning(
f"Bed type could not be determined at column 0 with data type: {df[col].dtype}"
)
return "unknown_bedtype"
else:
raise BedTypeException(
reason=f"Bed type could not be determined at column {0} with data type: {df[col].dtype}"
)

else:
if df[col].dtype == "int" and (df[col] >= 0).all():
bedtype += 1
else:
return "unknown_bedtype"
if no_fail:
_LOGGER.warning(
f"Bed type could not be determined at column {col} with data type: {df[col].dtype}"
)
return "unknown_bedtype"
else:
raise BedTypeException(
reason=f"Bed type could not be determined at column 0 with data type: {df[col].dtype}"
)
else:
if col == 3:
if df[col].dtype == "O":
Expand Down
13 changes: 13 additions & 0 deletions bedboss/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,16 @@ def __init__(self, reason: str = ""):
:param str reason: additional info about requirements exception
"""
super(RequirementsException, self).__init__(reason)


class BedTypeException(BedBossException):
"""Exception when Bed Type could not be determined."""

def __init__(self, reason: str = ""):
"""
Optionally provide explanation for exceptional condition.
:param str reason: some context why error occurred while
using Open Signal Matrix
"""
super(BedTypeException, self).__init__(reason)
9 changes: 7 additions & 2 deletions test/test_bedclassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def test_get_bed_type():
def test_manual_dir_beds():
"""This test is currently just for local manual testing"""
local_dir = "/home/drc/Downloads/test_beds_BED_classifier/"
# local_dir = "/home/drc/Downloads/individual_beds/"
output_dir = "/home/drc/Downloads/BED_CLASSIFIER_OUTPUT/"

for root, dirs, files in os.walk(local_dir):
Expand All @@ -41,8 +42,12 @@ def test_manual_dir_beds():
print("+++++++++++++++++++")


def test_from_PEPhub_beds():
pass


if __name__ == "__main__":
print("DEBUG FROM MAIN")
test_get_bed_type()
test_classification()
# test_get_bed_type()
# test_classification()
test_manual_dir_beds()

0 comments on commit 55d3b88

Please sign in to comment.