Skip to content

Commit

Permalink
refactor chrom.sizes locales, add default genomemodel func
Browse files Browse the repository at this point in the history
  • Loading branch information
donaldcampbelljr committed Sep 13, 2024
1 parent 8a3e913 commit ea85fbd
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,4 @@ chrUn_gl000231 27386
chrUn_gl000229 19913
chrM 16571
chrUn_gl000226 15008
chr18_gl000207_random 4262
chr18_gl000207_random 4262
66 changes: 66 additions & 0 deletions bedboss/refgenome_validator/chrom_sizes/ucsc_mm10.chrom.sizes
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
chr1 195471971
chr2 182113224
chrX 171031299
chr3 160039680
chr4 156508116
chr5 151834684
chr6 149736546
chr7 145441459
chr10 130694993
chr8 129401213
chr14 124902244
chr9 124595110
chr11 122082543
chr13 120421639
chr12 120129022
chr15 104043685
chr16 98207768
chr17 94987271
chrY 91744698
chr18 90702639
chr19 61431566
chr5_JH584299_random 953012
chrX_GL456233_random 336933
chrY_JH584301_random 259875
chr1_GL456211_random 241735
chr4_GL456350_random 227966
chr4_JH584293_random 207968
chr1_GL456221_random 206961
chr5_JH584297_random 205776
chr5_JH584296_random 199368
chr5_GL456354_random 195993
chr4_JH584294_random 191905
chr5_JH584298_random 184189
chrY_JH584300_random 182347
chr7_GL456219_random 175968
chr1_GL456210_random 169725
chrY_JH584303_random 158099
chrY_JH584302_random 155838
chr1_GL456212_random 153618
chrUn_JH584304 114452
chrUn_GL456379 72385
chr4_GL456216_random 66673
chrUn_GL456393 55711
chrUn_GL456366 47073
chrUn_GL456367 42057
chrUn_GL456239 40056
chr1_GL456213_random 39340
chrUn_GL456383 38659
chrUn_GL456385 35240
chrUn_GL456360 31704
chrUn_GL456378 31602
chrUn_GL456389 28772
chrUn_GL456372 28664
chrUn_GL456370 26764
chrUn_GL456381 25871
chrUn_GL456387 24685
chrUn_GL456390 24668
chrUn_GL456394 24323
chrUn_GL456392 23629
chrUn_GL456382 23158
chrUn_GL456359 22974
chrUn_GL456396 21240
chrUn_GL456368 20208
chrM 16299
chr4_JH584292_random 14945
chr4_JH584295_random 1976
61 changes: 61 additions & 0 deletions bedboss/refgenome_validator/chrom_sizes/ucsc_mm39.chrom.sizes
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
chr1 195154279
chr2 181755017
chrX 169476592
chr3 159745316
chr4 156860686
chr5 151758149
chr6 149588044
chr7 144995196
chr10 130530862
chr8 130127694
chr14 125139656
chr9 124359700
chr11 121973369
chr13 120883175
chr12 120092757
chr15 104073951
chr16 98008968
chr17 95294699
chrY 91455967
chr18 90720763
chr19 61420004
chr5_JH584299v1_random 953012
chrX_GL456233v2_random 559103
chrY_JH584301v1_random 259875
chr1_GL456211v1_random 241735
chr1_GL456221v1_random 206961
chr5_JH584297v1_random 205776
chr5_JH584296v1_random 199368
chr5_GL456354v1_random 195993
chr5_JH584298v1_random 184189
chrY_JH584300v1_random 182347
chr7_GL456219v1_random 175968
chr1_GL456210v1_random 169725
chrY_JH584303v1_random 158099
chrY_JH584302v1_random 155838
chr1_GL456212v1_random 153618
chrUn_JH584304v1 114452
chrUn_GL456379v1 72385
chrUn_GL456366v1 47073
chrUn_GL456367v1 42057
chr1_GL456239v1_random 40056
chrUn_GL456383v1 38659
chrUn_GL456385v1 35240
chrUn_GL456360v1 31704
chrUn_GL456378v1 31602
chrUn_MU069435v1 31129
chrUn_GL456389v1 28772
chrUn_GL456372v1 28664
chrUn_GL456370v1 26764
chrUn_GL456381v1 25871
chrUn_GL456387v1 24685
chrUn_GL456390v1 24668
chrUn_GL456394v1 24323
chrUn_GL456392v1 23629
chrUn_GL456382v1 23158
chrUn_GL456359v1 22974
chrUn_GL456396v1 21240
chrUn_GL456368v1 20208
chrM 16299
chr1_MU069434v1_random 8412
chr4_JH584295v1_random 1976
32 changes: 31 additions & 1 deletion bedboss/refgenome_validator/refgenomevalidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ class RefValidator:
"""

def __init__(
self, genome_models: List[GenomeModel], igd_path: Optional[str] = None
self,
genome_models: Optional[List[GenomeModel]] = None,
igd_path: Optional[str] = None,
):
"""
Initialization method
Expand All @@ -31,6 +33,8 @@ def __init__(
:param str igd_path: path to a local IGD file containing ALL excluded ranges intervals for IGD overlap assessment, if not provided these metrics are not computed.
"""
if not genome_models:
genome_models = self.build_default_models()

if isinstance(genome_models, str):
genome_models = list(genome_models)
Expand Down Expand Up @@ -381,6 +385,32 @@ def process_igd_stats(self, igd_stats: dict):
"""
pass

def build_default_models(self):
"""
Builds a default list of GenomeModels from the chrom.sizes folder.
Uses file names as genome alias.
return list[GenomeModel]
"""

chrm_sizes_directory = os.path.join(
os.path.curdir, os.path.abspath("./chrom_sizes")
)
all_genome_models = []
for root, dirs, files in os.walk(chrm_sizes_directory):
for file in files:
if file.endswith(".sizes"):
# print(os.path.join(root, file))
# Get file name
name = os.path.basename(file)

curr_genome_model = GenomeModel(
genome_alias=name, chrom_sizes_file=file
)
all_genome_models.append(curr_genome_model)

return all_genome_models


# ----------------------------
# Helper Functions
Expand Down

0 comments on commit ea85fbd

Please sign in to comment.