Skip to content

Commit

Permalink
Move vcf data into separate data files in test/parse.test.ts
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Dec 8, 2024
1 parent a3aeb51 commit fb0a11e
Show file tree
Hide file tree
Showing 8 changed files with 210 additions and 198 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"devDependencies": {
"@babel/core": "^7.20.5",
"@eslint/js": "^9.7.0",
"@types/node": "^22.10.1",
"@typescript-eslint/eslint-plugin": "^8.8.1",
"@typescript-eslint/parser": "^8.8.1",
"@vitest/coverage-v8": "^2.1.3",
Expand Down
164 changes: 82 additions & 82 deletions test/__snapshots__/parse.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -674,88 +674,6 @@ exports[`can parse a line from the VCF spec 2`] = `
}
`;

exports[`can parse a line from the VCF spec 3`] = `
{
"ALT": [
"<DEL>",
],
"CHROM": "8",
"FILTER": "PASS",
"GENOTYPES": [Function],
"ID": [
"28329_0",
],
"INFO": {
"AF": [
0.971429,
],
"CHR2": [
"8",
],
"END": [
17709148,
],
"Kurtosis_quant_start": [
"20.524521",
],
"Kurtosis_quant_stop": [
"3.925926",
],
"PRECISE": true,
"RE": [
34,
],
"STD_quant_start": [
"0.000000",
],
"STD_quant_stop": [
"0.000000",
],
"STRANDS": [
"+-",
],
"STRANDS2": [
"20",
"14",
"20",
"14",
],
"SUPTYPE": [
"AL",
],
"SVLEN": [
33,
],
"SVMETHOD": [
"Snifflesv1.0.3",
],
"SVTYPE": [
"DEL",
],
},
"POS": 17709115,
"QUAL": undefined,
"REF": "N",
"SAMPLES": [Function],
}
`;

exports[`can parse a line from the VCF spec 4`] = `
{
"/seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam": {
"DR": [
1,
],
"DV": [
34,
],
"GT": [
"1/1",
],
},
}
`;

exports[`can parse a line from the VCF spec Y chrom (haploid)) 1`] = `
{
"ALT": [
Expand Down Expand Up @@ -2337,6 +2255,88 @@ exports[`shortcut parsing with vcf 4.3 bnd example 1`] = `
]
`;

exports[`sniffles vcf 1`] = `
{
"ALT": [
"<DEL>",
],
"CHROM": "8",
"FILTER": "PASS",
"GENOTYPES": [Function],
"ID": [
"28329_0",
],
"INFO": {
"AF": [
0.971429,
],
"CHR2": [
"8",
],
"END": [
17709148,
],
"Kurtosis_quant_start": [
"20.524521",
],
"Kurtosis_quant_stop": [
"3.925926",
],
"PRECISE": true,
"RE": [
34,
],
"STD_quant_start": [
"0.000000",
],
"STD_quant_stop": [
"0.000000",
],
"STRANDS": [
"+-",
],
"STRANDS2": [
"20",
"14",
"20",
"14",
],
"SUPTYPE": [
"AL",
],
"SVLEN": [
33,
],
"SVMETHOD": [
"Snifflesv1.0.3",
],
"SVTYPE": [
"DEL",
],
},
"POS": 17709115,
"QUAL": undefined,
"REF": "N",
"SAMPLES": [Function],
}
`;

exports[`sniffles vcf 2`] = `
{
"/seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam": {
"DR": [
1,
],
"DV": [
34,
],
"GT": [
"1/1",
],
},
}
`;

exports[`snippet from VCF 4.3 spec 1`] = `
[
{
Expand Down
4 changes: 4 additions & 0 deletions test/data/breakends.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT BAMs/caudaus.sorted.sam
11 94975747 MantaBND:0:2:3:0:0:0:1 G G]8:107653520] . PASS SVTYPE=BND;MATEID=MantaBND:0:2:3:0:0:0:0;CIPOS=0,2;HOMLEN=2;HOMSEQ=TT;BND_DEPTH=216;MATE_BND_DEPTH=735 PR:SR 722,9:463,15
11 94975753 MantaDEL:0:1:2:0:0:0 T <DEL> . PASS END=94987865;SVTYPE=DEL;SVLEN=12112;IMPRECISE;CIPOS=-156,156;CIEND=-150,150 PR 161,13
11 94987872 MantaBND:0:0:1:0:0:0:0 T T[8:107653411[ . PASS SVTYPE=BND;MATEID=MantaBND:0:0:1:0:0:0:1;BND_DEPTH=171;MATE_BND_DEPTH=830 PR:SR 489,4:520,19
23 changes: 23 additions & 0 deletions test/data/sniffles.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
##fileformat=VCFv4.2
##source=Sniffles
##fileDate=20170420
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=DUP,Description="Duplication">
##ALT=<ID=INV,Description="Inversion">
##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
##ALT=<ID=TRA,Description="Translocation">
##ALT=<ID=INS,Description="Insertion">
##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads">
##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT /seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam
8 17709115 28329_0 N <DEL> . PASS PRECISE;SVMETHOD=Snifflesv1.0.3;CHR2=8;END=17709148;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=20.524521;Kurtosis_quant_stop=3.925926;SVTYPE=DEL;SUPTYPE=AL;SVLEN=33;STRANDS=+-;STRANDS2=20,14,20,14;RE=34;AF=0.971429 GT:DR:DV 1/1:1:34
23 changes: 23 additions & 0 deletions test/data/spec-example.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
##fileformat=VCFv4.3
##fileDate=20090805
##source=myImputationProgramV3.1
##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
##phasing=partial
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
##INFO=<ID=TEST,Number=1,Type=String,Description="Used for testing">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##FILTER=<ID=q10,Description="Quality below 10">
##FILTER=<ID=s50,Description="Less than 50% of samples have data">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
##FORMAT=<ID=TEST,Number=1,Type=String,Description="Used for testing">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
42 changes: 42 additions & 0 deletions test/data/y-chrom-haploid.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##fileDate=20150218
##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz
##contig=<ID=Y,length=59373566,assembly=b37>
##source=freeBayes v0.9.9.2 | GT values over-written with maximum likelihood state (subject to threshold) OR phylogenetic imputation
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##source=GenomeSTRiP_v1.04
##ALT=<ID=CNV,Description="Copy number polymorphism">
##FILTER=<ID=ALIGNLENGTH,Description="GSELENGTH < 200">
##FILTER=<ID=CLUSTERSEP,Description="GSCLUSTERSEP == NA || GSCLUSTERSEP <= 2.0">
##FILTER=<ID=DUPLICATE,Description="GSDUPLICATESCORE != NA && GSDUPLICATEOVERLAP >= 0.5 && GSDUPLICATESCORE >= 0.0">
##FILTER=<ID=GTDEPTH,Description="GSM1 == NA || GSM1 <= 0.5 || GSM1 >= 2.0">
##FILTER=<ID=INBREEDINGCOEFF,Description="GLINBREEDINGCOEFF != NA && GLINBREEDINGCOEFF < -0.15">
##FILTER=<ID=NONVARIANT,Description="GSNONVARSCORE != NA && GSNONVARSCORE >= 13.0">
##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
##FORMAT=<ID=CNL,Number=.,Type=Float,Description="Copy number likelihoods with no frequency prior">
##FORMAT=<ID=CNP,Number=.,Type=Float,Description="Copy number likelihoods">
##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events">
##FORMAT=<ID=GP,Number=G,Type=Float,Description="Genotype likelihoods">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=FT,Number=1,Type=String,Description="Per-sample genotype filter">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of this variant">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral allele">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=SAS_AF,Number=A,Type=Float,Description="Allele frequency in the SAS populations calculated from AC and AN, in the range (0,1)">
##INFO=<ID=EUR_AF,Number=A,Type=Float,Description="Allele frequency in the EUR populations calculated from AC and AN, in the range (0,1)">
##INFO=<ID=AFR_AF,Number=A,Type=Float,Description="Allele frequency in the AFR populations calculated from AC and AN, in the range (0,1)">
##INFO=<ID=AMR_AF,Number=A,Type=Float,Description="Allele frequency in the AMR populations calculated from AC and AN, in the range (0,1)">
##INFO=<ID=EAS_AF,Number=A,Type=Float,Description="Allele frequency in the EAS populations calculated from AC and AN, in the range (0,1)">
##INFO=<ID=VT,Number=.,Type=String,Description="indicates what type of variant the line represents">
##INFO=<ID=EX_TARGET,Number=0,Type=Flag,Description="indicates whether a variant is within the exon pull down target boundaries">
##INFO=<ID=MULTI_ALLELIC,Number=0,Type=Flag,Description="indicates whether a site is multi-allelic">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG001055
Y 14483990 CNV_Y_14483990_15232198 C <CN0> 100 PASS AC=1;AF=0.000817661;AN=1223;END=15232198;NS=1233;SVTYPE=CNV;AMR_AF=0;AFR_AF=0;EUR_AF=0.0042;SAS_AF=0;EAS_AF=0;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-119.08:-1000,0,-218.16:99:0,-1000:99:0,10000 0:1:-1000,0,-43.56:-1000,0,-142.64:99:0,-1000:99:0,10000 .:.:.:.:.:.:.:. .:.:.:.:.:.:.:.
Y 2655180 rs11575897 G A 100 PASS AA=G;AC=22;AF=0.0178427;AN=1233;DP=84761;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0902;VT=SNP;EX_TARGET GT 0 0 0 .
Loading

0 comments on commit fb0a11e

Please sign in to comment.