Skip to content

Commit

Permalink
added concise to ref gen validator return
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Sep 17, 2024
1 parent 156376b commit 85f449e
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 22 deletions.
3 changes: 2 additions & 1 deletion bedboss/refgenome_validator/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,5 @@ class CompatibilityConcise(BaseModel):
xs: float = 0.0
oobr: Union[float, None] = None
sequence_fit: Union[float, None] = None
compatibility: Union[RatingModel, None] = None
assigned_points: int
tier_ranking: int
47 changes: 27 additions & 20 deletions bedboss/refgenome_validator/refgenomevalidator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Union, List
from typing import Optional, Union, List, Dict
import os
import pandas as pd
import subprocess
Expand Down Expand Up @@ -136,17 +136,17 @@ def calculate_chrom_stats(
# Calculate recall/sensitivity for chrom lengths
# defined as OOBR -> Out of Bounds Range
sensitivity = num_chrom_within_bounds / (
num_chrom_within_bounds + num_of_chrom_beyond
num_chrom_within_bounds + num_of_chrom_beyond
)
length_stats = ChromLengthStats(
oobr=sensitivity,
beyond_range=chroms_beyond_range,
num_of_chrm_beyond=num_of_chrom_beyond,
num_of_chrom_beyond=num_of_chrom_beyond,
percentage_bed_chrom_beyond=(
100 * num_of_chrom_beyond / len(bed_chrom_set)
100 * num_of_chrom_beyond / len(bed_chrom_set)
),
percentage_genome_chrom_beyond=(
100 * num_of_chrom_beyond / len(genome_chrom_set)
100 * num_of_chrom_beyond / len(genome_chrom_set)
),
)

Expand Down Expand Up @@ -218,7 +218,7 @@ def determine_compatibility(
bedfile: str,
ref_filter: Optional[List[str]] = None,
concise: Optional[bool] = False,
) -> Union[List[CompatibilityStats], List[CompatibilityConcise]]:
) -> Union[Dict[str, CompatibilityStats], Dict[str, CompatibilityConcise]]:
"""
Given a bedfile, determine compatibility with reference genomes (GenomeModels) created at Validator initialization.
Expand All @@ -241,7 +241,7 @@ def determine_compatibility(

if not bed_chrom_info:
# if there is trouble parsing the bed file, return None
return None
raise ValidatorException

model_compat_stats = {}
final_compatibility_list = []
Expand All @@ -264,17 +264,18 @@ def determine_compatibility(
)

# Once all stats are collected, process them and add compatibility rating
model_compat_stats[genome_model.genome_alias].compatibility = self.calculate_rating(model_compat_stats[genome_model.genome_alias])
model_compat_stats[genome_model.genome_alias].compatibility = (
self.calculate_rating(model_compat_stats[genome_model.genome_alias])
)

if concise:
concise_dict = {}
for name, stats in model_compat_stats.items():
concise_dict[name] = self._create_concise_output(stats)

if concise:
...
return concise_dict

return model_compat_stats
# if concise:
# # TODO just return XS, OOBR, SEQ FIT, COMPAT TIER
# return final_compatibility_list
#
# return final_compatibility_list

def calculate_rating(self, compat_stats: CompatibilityStats) -> RatingModel:
"""
Expand Down Expand Up @@ -345,7 +346,6 @@ def calculate_rating(self, compat_stats: CompatibilityStats) -> RatingModel:
if compat_stats.igd_stats and compat_stats.igd_stats != {}:
self.process_igd_stats(compat_stats.igd_stats)


tier_ranking = 0
if points_rating == 0:
tier_ranking = 1
Expand All @@ -361,10 +361,7 @@ def calculate_rating(self, compat_stats: CompatibilityStats) -> RatingModel:
)
tier_ranking = 4

return RatingModel(
assigned_points=points_rating, tier_ranking=tier_ranking
)

return RatingModel(assigned_points=points_rating, tier_ranking=tier_ranking)

def process_igd_stats(self, igd_stats: dict):
"""
Expand Down Expand Up @@ -395,6 +392,16 @@ def _build_default_models() -> list[GenomeModel]:

return all_genome_models

@staticmethod
def _create_concise_output(output: CompatibilityStats) -> CompatibilityConcise:
return CompatibilityConcise(
xs=output.chrom_name_stats.xs,
oobr=output.chrom_length_stats.oobr,
sequence_fit=output.chrom_sequence_fit_stats.sequence_fit,
assigned_points=output.compatibility.assigned_points,
tier_ranking=output.compatibility.tier_ranking,
)


# ----------------------------
# Helper Functions
Expand Down
9 changes: 8 additions & 1 deletion test/test_ref_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,12 @@


def test_main():
ff = ReferenceValidator().determine_compatibility(FILE_PATH)
ff = ReferenceValidator().determine_compatibility(
FILE_PATH,
concise=True,
)
# ff = ReferenceValidator().determine_compatibility(
# "/home/bnt4me/.bbcache/bedfiles/3/2/GSE244926_mm39_LPx6_oligofile.bed.gz",
# concise=True,
# )
ff

0 comments on commit 85f449e

Please sign in to comment.