Skip to content

Commit

Permalink
add sensitivity calculations for chrom names and chrom lengths
Browse files Browse the repository at this point in the history
  • Loading branch information
donaldcampbelljr committed Sep 6, 2024
1 parent 5006b79 commit 8a0ce95
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion bedboss/refgenome_validator/refgenomevalidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def compare_chrom_names_lengths(
if key not in bed_chrom_sizes:
not_q_and_m += 1

# Calculate the Jaccard Index
# Calculate the Jaccard Index for Chrom Names
bed_chrom_set = set(list(bed_chrom_sizes.keys()))
genome_chrom_set = set(list(genome_chrom_sizes.keys()))
chrom_intersection = bed_chrom_set.intersection(genome_chrom_set)
Expand All @@ -93,6 +93,11 @@ def compare_chrom_names_lengths(
if q_and_not_m > 1:
passed_chrom_names = False

# Calculate sensitivity for chrom names
# defined as XS -> Extra Sequences
sensitivity = q_and_m / (q_and_m + q_and_not_m)
name_stats["XS"] = sensitivity

name_stats["q_and_m"] = q_and_m
name_stats["q_and_not_m"] = q_and_not_m
name_stats["not_q_and_m"] = not_q_and_m
Expand All @@ -105,12 +110,22 @@ def compare_chrom_names_lengths(

chroms_beyond_range = False
num_of_chrm_beyond = 0
num_chrm_within_bounds = 0

for key in list(bed_chrom_sizes.keys()):
if key in genome_chrom_sizes:
if bed_chrom_sizes[key] > genome_chrom_sizes[key]:
num_of_chrm_beyond += 1
chroms_beyond_range = True
else:
num_chrm_within_bounds += 1

# Calculate sensitivity for chrom lengths
# defined as OOBR -> Out of Bounds Range
sensitivity = num_chrm_within_bounds / (
num_chrm_within_bounds + num_of_chrm_beyond
)
length_stats["OOBR"] = sensitivity

length_stats["beyond_range"] = chroms_beyond_range
length_stats["num_of_chrm_beyond"] = num_of_chrm_beyond
Expand Down

0 comments on commit 8a0ce95

Please sign in to comment.