From 8a0ce95a1a73665eff5a23c40ed5fbe799c412cb Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 6 Sep 2024 15:46:31 -0400 Subject: [PATCH] add sensitivity calculations for chrom names and chrom lengths --- .../refgenome_validator/refgenomevalidator.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/bedboss/refgenome_validator/refgenomevalidator.py b/bedboss/refgenome_validator/refgenomevalidator.py index 4618c6a..2900769 100644 --- a/bedboss/refgenome_validator/refgenomevalidator.py +++ b/bedboss/refgenome_validator/refgenomevalidator.py @@ -82,7 +82,7 @@ def compare_chrom_names_lengths( if key not in bed_chrom_sizes: not_q_and_m += 1 - # Calculate the Jaccard Index + # Calculate the Jaccard Index for Chrom Names bed_chrom_set = set(list(bed_chrom_sizes.keys())) genome_chrom_set = set(list(genome_chrom_sizes.keys())) chrom_intersection = bed_chrom_set.intersection(genome_chrom_set) @@ -93,6 +93,11 @@ def compare_chrom_names_lengths( if q_and_not_m > 1: passed_chrom_names = False + # Calculate sensitivity for chrom names + # defined as XS -> Extra Sequences + sensitivity = q_and_m / (q_and_m + q_and_not_m) + name_stats["XS"] = sensitivity + name_stats["q_and_m"] = q_and_m name_stats["q_and_not_m"] = q_and_not_m name_stats["not_q_and_m"] = not_q_and_m @@ -105,12 +110,22 @@ def compare_chrom_names_lengths( chroms_beyond_range = False num_of_chrm_beyond = 0 + num_chrm_within_bounds = 0 for key in list(bed_chrom_sizes.keys()): if key in genome_chrom_sizes: if bed_chrom_sizes[key] > genome_chrom_sizes[key]: num_of_chrm_beyond += 1 chroms_beyond_range = True + else: + num_chrm_within_bounds += 1 + + # Calculate sensitivity for chrom lengths + # defined as OOBR -> Out of Bounds Range + sensitivity = num_chrm_within_bounds / ( + num_chrm_within_bounds + num_of_chrm_beyond + ) + length_stats["OOBR"] = sensitivity length_stats["beyond_range"] = chroms_beyond_range length_stats["num_of_chrm_beyond"] = num_of_chrm_beyond