From fb0a11edb7c719e11893c23f791f12a64f846390 Mon Sep 17 00:00:00 2001 From: Colin <colin.diesh@gmail.com> Date: Sun, 8 Dec 2024 15:53:20 -0500 Subject: [PATCH] Move vcf data into separate data files in test/parse.test.ts --- package.json | 1 + test/__snapshots__/parse.test.ts.snap | 164 +++++++++++++------------- test/data/breakends.vcf | 4 + test/data/sniffles.vcf | 23 ++++ test/data/spec-example.vcf | 23 ++++ test/data/y-chrom-haploid.vcf | 42 +++++++ test/parse.test.ts | 139 ++++------------------ yarn.lock | 12 ++ 8 files changed, 210 insertions(+), 198 deletions(-) create mode 100644 test/data/breakends.vcf create mode 100644 test/data/sniffles.vcf create mode 100644 test/data/spec-example.vcf create mode 100644 test/data/y-chrom-haploid.vcf diff --git a/package.json b/package.json index b03ea28..0445a55 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,7 @@ "devDependencies": { "@babel/core": "^7.20.5", "@eslint/js": "^9.7.0", + "@types/node": "^22.10.1", "@typescript-eslint/eslint-plugin": "^8.8.1", "@typescript-eslint/parser": "^8.8.1", "@vitest/coverage-v8": "^2.1.3", diff --git a/test/__snapshots__/parse.test.ts.snap b/test/__snapshots__/parse.test.ts.snap index ba2fb77..f33d218 100644 --- a/test/__snapshots__/parse.test.ts.snap +++ b/test/__snapshots__/parse.test.ts.snap @@ -674,88 +674,6 @@ exports[`can parse a line from the VCF spec 2`] = ` } `; -exports[`can parse a line from the VCF spec 3`] = ` -{ - "ALT": [ - "<DEL>", - ], - "CHROM": "8", - "FILTER": "PASS", - "GENOTYPES": [Function], - "ID": [ - "28329_0", - ], - "INFO": { - "AF": [ - 0.971429, - ], - "CHR2": [ - "8", - ], - "END": [ - 17709148, - ], - "Kurtosis_quant_start": [ - "20.524521", - ], - "Kurtosis_quant_stop": [ - "3.925926", - ], - "PRECISE": true, - "RE": [ - 34, - ], - "STD_quant_start": [ - "0.000000", - ], - "STD_quant_stop": [ - "0.000000", - ], - "STRANDS": [ - "+-", - ], - "STRANDS2": [ - "20", - "14", - "20", - "14", - ], - "SUPTYPE": [ - "AL", - ], - "SVLEN": [ - 33, - ], - "SVMETHOD": [ - "Snifflesv1.0.3", - ], - "SVTYPE": [ - "DEL", - ], - }, - "POS": 17709115, - "QUAL": undefined, - "REF": "N", - "SAMPLES": [Function], -} -`; - -exports[`can parse a line from the VCF spec 4`] = ` -{ - "/seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam": { - "DR": [ - 1, - ], - "DV": [ - 34, - ], - "GT": [ - "1/1", - ], - }, -} -`; - exports[`can parse a line from the VCF spec Y chrom (haploid)) 1`] = ` { "ALT": [ @@ -2337,6 +2255,88 @@ exports[`shortcut parsing with vcf 4.3 bnd example 1`] = ` ] `; +exports[`sniffles vcf 1`] = ` +{ + "ALT": [ + "<DEL>", + ], + "CHROM": "8", + "FILTER": "PASS", + "GENOTYPES": [Function], + "ID": [ + "28329_0", + ], + "INFO": { + "AF": [ + 0.971429, + ], + "CHR2": [ + "8", + ], + "END": [ + 17709148, + ], + "Kurtosis_quant_start": [ + "20.524521", + ], + "Kurtosis_quant_stop": [ + "3.925926", + ], + "PRECISE": true, + "RE": [ + 34, + ], + "STD_quant_start": [ + "0.000000", + ], + "STD_quant_stop": [ + "0.000000", + ], + "STRANDS": [ + "+-", + ], + "STRANDS2": [ + "20", + "14", + "20", + "14", + ], + "SUPTYPE": [ + "AL", + ], + "SVLEN": [ + 33, + ], + "SVMETHOD": [ + "Snifflesv1.0.3", + ], + "SVTYPE": [ + "DEL", + ], + }, + "POS": 17709115, + "QUAL": undefined, + "REF": "N", + "SAMPLES": [Function], +} +`; + +exports[`sniffles vcf 2`] = ` +{ + "/seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam": { + "DR": [ + 1, + ], + "DV": [ + 34, + ], + "GT": [ + "1/1", + ], + }, +} +`; + exports[`snippet from VCF 4.3 spec 1`] = ` [ { diff --git a/test/data/breakends.vcf b/test/data/breakends.vcf new file mode 100644 index 0000000..1cd8526 --- /dev/null +++ b/test/data/breakends.vcf @@ -0,0 +1,4 @@ +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT BAMs/caudaus.sorted.sam +11 94975747 MantaBND:0:2:3:0:0:0:1 G G]8:107653520] . PASS SVTYPE=BND;MATEID=MantaBND:0:2:3:0:0:0:0;CIPOS=0,2;HOMLEN=2;HOMSEQ=TT;BND_DEPTH=216;MATE_BND_DEPTH=735 PR:SR 722,9:463,15 +11 94975753 MantaDEL:0:1:2:0:0:0 T <DEL> . PASS END=94987865;SVTYPE=DEL;SVLEN=12112;IMPRECISE;CIPOS=-156,156;CIEND=-150,150 PR 161,13 +11 94987872 MantaBND:0:0:1:0:0:0:0 T T[8:107653411[ . PASS SVTYPE=BND;MATEID=MantaBND:0:0:1:0:0:0:1;BND_DEPTH=171;MATE_BND_DEPTH=830 PR:SR 489,4:520,19 diff --git a/test/data/sniffles.vcf b/test/data/sniffles.vcf new file mode 100644 index 0000000..2763b6b --- /dev/null +++ b/test/data/sniffles.vcf @@ -0,0 +1,23 @@ +##fileformat=VCFv4.2 +##source=Sniffles +##fileDate=20170420 +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=DUP,Description="Duplication"> +##ALT=<ID=INV,Description="Inversion"> +##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries"> +##ALT=<ID=TRA,Description="Translocation"> +##ALT=<ID=INS,Description="Insertion"> +##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> +##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends"> +##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> +##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads"> +##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT /seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam +8 17709115 28329_0 N <DEL> . PASS PRECISE;SVMETHOD=Snifflesv1.0.3;CHR2=8;END=17709148;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=20.524521;Kurtosis_quant_stop=3.925926;SVTYPE=DEL;SUPTYPE=AL;SVLEN=33;STRANDS=+-;STRANDS2=20,14,20,14;RE=34;AF=0.971429 GT:DR:DV 1/1:1:34 diff --git a/test/data/spec-example.vcf b/test/data/spec-example.vcf new file mode 100644 index 0000000..40f0761 --- /dev/null +++ b/test/data/spec-example.vcf @@ -0,0 +1,23 @@ +##fileformat=VCFv4.3 +##fileDate=20090805 +##source=myImputationProgramV3.1 +##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta +##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x> +##phasing=partial +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency"> +##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele"> +##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129"> +##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership"> +##INFO=<ID=TEST,Number=1,Type=String,Description="Used for testing"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##FILTER=<ID=q10,Description="Quality below 10"> +##FILTER=<ID=s50,Description="Less than 50% of samples have data"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods"> +##FORMAT=<ID=TEST,Number=1,Type=String,Description="Used for testing"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 diff --git a/test/data/y-chrom-haploid.vcf b/test/data/y-chrom-haploid.vcf new file mode 100644 index 0000000..9018bfb --- /dev/null +++ b/test/data/y-chrom-haploid.vcf @@ -0,0 +1,42 @@ +##fileformat=VCFv4.1 +##FILTER=<ID=PASS,Description="All filters passed"> +##fileDate=20150218 +##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz +##contig=<ID=Y,length=59373566,assembly=b37> +##source=freeBayes v0.9.9.2 | GT values over-written with maximum likelihood state (subject to threshold) OR phylogenetic imputation +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##source=GenomeSTRiP_v1.04 +##ALT=<ID=CNV,Description="Copy number polymorphism"> +##FILTER=<ID=ALIGNLENGTH,Description="GSELENGTH < 200"> +##FILTER=<ID=CLUSTERSEP,Description="GSCLUSTERSEP == NA || GSCLUSTERSEP <= 2.0"> +##FILTER=<ID=DUPLICATE,Description="GSDUPLICATESCORE != NA && GSDUPLICATEOVERLAP >= 0.5 && GSDUPLICATESCORE >= 0.0"> +##FILTER=<ID=GTDEPTH,Description="GSM1 == NA || GSM1 <= 0.5 || GSM1 >= 2.0"> +##FILTER=<ID=INBREEDINGCOEFF,Description="GLINBREEDINGCOEFF != NA && GLINBREEDINGCOEFF < -0.15"> +##FILTER=<ID=NONVARIANT,Description="GSNONVARSCORE != NA && GSNONVARSCORE >= 13.0"> +##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events"> +##FORMAT=<ID=CNL,Number=.,Type=Float,Description="Copy number likelihoods with no frequency prior"> +##FORMAT=<ID=CNP,Number=.,Type=Float,Description="Copy number likelihoods"> +##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events"> +##FORMAT=<ID=GP,Number=G,Type=Float,Description="Genotype likelihoods"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=FT,Number=1,Type=String,Description="Per-sample genotype filter"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of this variant"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral allele"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=SAS_AF,Number=A,Type=Float,Description="Allele frequency in the SAS populations calculated from AC and AN, in the range (0,1)"> +##INFO=<ID=EUR_AF,Number=A,Type=Float,Description="Allele frequency in the EUR populations calculated from AC and AN, in the range (0,1)"> +##INFO=<ID=AFR_AF,Number=A,Type=Float,Description="Allele frequency in the AFR populations calculated from AC and AN, in the range (0,1)"> +##INFO=<ID=AMR_AF,Number=A,Type=Float,Description="Allele frequency in the AMR populations calculated from AC and AN, in the range (0,1)"> +##INFO=<ID=EAS_AF,Number=A,Type=Float,Description="Allele frequency in the EAS populations calculated from AC and AN, in the range (0,1)"> +##INFO=<ID=VT,Number=.,Type=String,Description="indicates what type of variant the line represents"> +##INFO=<ID=EX_TARGET,Number=0,Type=Flag,Description="indicates whether a variant is within the exon pull down target boundaries"> +##INFO=<ID=MULTI_ALLELIC,Number=0,Type=Flag,Description="indicates whether a site is multi-allelic"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG001055 +Y 14483990 CNV_Y_14483990_15232198 C <CN0> 100 PASS AC=1;AF=0.000817661;AN=1223;END=15232198;NS=1233;SVTYPE=CNV;AMR_AF=0;AFR_AF=0;EUR_AF=0.0042;SAS_AF=0;EAS_AF=0;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-119.08:-1000,0,-218.16:99:0,-1000:99:0,10000 0:1:-1000,0,-43.56:-1000,0,-142.64:99:0,-1000:99:0,10000 .:.:.:.:.:.:.:. .:.:.:.:.:.:.:. +Y 2655180 rs11575897 G A 100 PASS AA=G;AC=22;AF=0.0178427;AN=1233;DP=84761;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0902;VT=SNP;EX_TARGET GT 0 0 0 . diff --git a/test/parse.test.ts b/test/parse.test.ts index 127aed2..6c58b60 100644 --- a/test/parse.test.ts +++ b/test/parse.test.ts @@ -1,13 +1,12 @@ -// @ts-nocheck import { test, expect } from 'vitest' import fs from 'fs' import VCF, { parseBreakend } from '../src' -const readVcf = file => { +const readVcf = (file: string) => { const f = fs.readFileSync(file, 'utf8') const lines = f.split('\n') - const header = [] - const rest = [] + const header = [] as string[] + const rest = [] as string[] lines.forEach(line => { if (line.startsWith('#')) { header.push(line) @@ -15,35 +14,16 @@ const readVcf = file => { rest.push(line) } }) - return { header: header.join('\n'), lines: rest } + return { + header: header.join('\n'), + lines: rest, + } } function makeParser() { + const { header } = readVcf('test/data/spec-example.vcf') return new VCF({ - header: `##fileformat=VCFv4.3 -##fileDate=20090805 -##source=myImputationProgramV3.1 -##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta -##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x> -##phasing=partial -##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data"> -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth"> -##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency"> -##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele"> -##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129"> -##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership"> -##INFO=<ID=TEST,Number=1,Type=String,Description="Used for testing"> -##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> -##FILTER=<ID=q10,Description="Quality below 10"> -##FILTER=<ID=s50,Description="Less than 50% of samples have data"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality"> -##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods"> -##FORMAT=<ID=TEST,Number=1,Type=String,Description="Used for testing"> -#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNA00001\tNA00002\tNA00003 -`, + header, }) } @@ -70,12 +50,8 @@ test('can get metadata from the header', () => { Number: 1, Type: 'String', }) -}) -test('can get default metadata not in the header', () => { - const VCFParser = makeParser() - const metadata = VCFParser.getMetadata() - expect(metadata.INFO.AC).toEqual({ + expect(VCFParser.getMetadata('INFO', 'AC')).toEqual({ Number: 'A', Type: 'Integer', Description: @@ -106,7 +82,7 @@ test('parses a line with a breakend ALT', () => { const variant = VCFParser.parseLine( '2\t321681\tbnd_W\tG\tG]17:198982]\t6\tPASS\tSVTYPE=BND', ) - expect(variant.ALT.length).toBe(1) + expect(variant.ALT?.length).toBe(1) expect(variant.INFO.SVTYPE).toEqual(['BND']) expect(variant).toMatchSnapshot() }) @@ -116,7 +92,7 @@ test(`parses a line with mix of multiple breakends and non breakends`, () => { const variant = VCFParser.parseLine( `13\t123456\tbnd_U\tC\tCTATGTCG,C[2 : 321682[,C[17 : 198983[\t6\tPASS\tSVTYPE=BND;MATEID=bnd V,bnd Z`, ) - expect(variant.ALT.length).toBe(3) + expect(variant.ALT?.length).toBe(3) expect(variant.INFO.SVTYPE).toEqual(['BND']) expect(variant).toMatchSnapshot() }) @@ -140,87 +116,24 @@ test('throws errors with bad header lines', () => { }).toThrow(/No format line/) }) -test('can parse a line from the VCF spec', () => { +test('sniffles vcf', () => { + const { header, lines } = readVcf('test/data/sniffles.vcf') const VCFParser = new VCF({ - header: `##fileformat=VCFv4.2 -##source=Sniffles -##fileDate=20170420 -##ALT=<ID=DEL,Description="Deletion"> -##ALT=<ID=DUP,Description="Duplication"> -##ALT=<ID=INV,Description="Inversion"> -##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries"> -##ALT=<ID=TRA,Description="Translocation"> -##ALT=<ID=INS,Description="Insertion"> -##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> -##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> -##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends"> -##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> -##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> -##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> -##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> -##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV"> -##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads"> -##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT /seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam`, + header, }) - const variant = VCFParser.parseLine( - '8\t17709115\t28329_0\tN\t<DEL>\t.\tPASS\tPRECISE;SVMETHOD=Snifflesv1.0.3;CHR2=8;END=17709148;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=20.524521;Kurtosis_quant_stop=3.925926;SVTYPE=DEL;SUPTYPE=AL;SVLEN=33;STRANDS=+-;STRANDS2=20,14,20,14;RE=34;AF=0.971429\tGT:DR:DV\t1/1:1:34', - ) + const variant = VCFParser.parseLine(lines[0]) expect(variant).toMatchSnapshot() expect(variant.SAMPLES()).toMatchSnapshot() }) test('can parse a line from the VCF spec Y chrom (haploid))', () => { + const { header, lines } = readVcf('test/data/y-chrom-haploid.vcf') const VCFParser = new VCF({ - header: `##fileformat=VCFv4.1 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20150218 -##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz -##contig=<ID=Y,length=59373566,assembly=b37> -##source=freeBayes v0.9.9.2 | GT values over-written with maximum likelihood state (subject to threshold) OR phylogenetic imputation -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##source=GenomeSTRiP_v1.04 -##ALT=<ID=CNV,Description="Copy number polymorphism"> -##FILTER=<ID=ALIGNLENGTH,Description="GSELENGTH < 200"> -##FILTER=<ID=CLUSTERSEP,Description="GSCLUSTERSEP == NA || GSCLUSTERSEP <= 2.0"> -##FILTER=<ID=DUPLICATE,Description="GSDUPLICATESCORE != NA && GSDUPLICATEOVERLAP >= 0.5 && GSDUPLICATESCORE >= 0.0"> -##FILTER=<ID=GTDEPTH,Description="GSM1 == NA || GSM1 <= 0.5 || GSM1 >= 2.0"> -##FILTER=<ID=INBREEDINGCOEFF,Description="GLINBREEDINGCOEFF != NA && GLINBREEDINGCOEFF < -0.15"> -##FILTER=<ID=NONVARIANT,Description="GSNONVARSCORE != NA && GSNONVARSCORE >= 13.0"> -##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events"> -##FORMAT=<ID=CNL,Number=.,Type=Float,Description="Copy number likelihoods with no frequency prior"> -##FORMAT=<ID=CNP,Number=.,Type=Float,Description="Copy number likelihoods"> -##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events"> -##FORMAT=<ID=GP,Number=G,Type=Float,Description="Genotype likelihoods"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> -##FORMAT=<ID=FT,Number=1,Type=String,Description="Per-sample genotype filter"> -##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> -##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of this variant"> -##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> -##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral allele"> -##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes"> -##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]"> -##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##INFO=<ID=SAS_AF,Number=A,Type=Float,Description="Allele frequency in the SAS populations calculated from AC and AN, in the range (0,1)"> -##INFO=<ID=EUR_AF,Number=A,Type=Float,Description="Allele frequency in the EUR populations calculated from AC and AN, in the range (0,1)"> -##INFO=<ID=AFR_AF,Number=A,Type=Float,Description="Allele frequency in the AFR populations calculated from AC and AN, in the range (0,1)"> -##INFO=<ID=AMR_AF,Number=A,Type=Float,Description="Allele frequency in the AMR populations calculated from AC and AN, in the range (0,1)"> -##INFO=<ID=EAS_AF,Number=A,Type=Float,Description="Allele frequency in the EAS populations calculated from AC and AN, in the range (0,1)"> -##INFO=<ID=VT,Number=.,Type=String,Description="indicates what type of variant the line represents"> -##INFO=<ID=EX_TARGET,Number=0,Type=Flag,Description="indicates whether a variant is within the exon pull down target boundaries"> -##INFO=<ID=MULTI_ALLELIC,Number=0,Type=Flag,Description="indicates whether a site is multi-allelic"> -#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tHG00096\tHG00101\tHG00103\tHG001055`, + header, }) - const variant = VCFParser.parseLine( - 'Y\t14483990\tCNV_Y_14483990_15232198\tC\t<CN0>\t100\tPASS\tAC=1;AF=0.000817661;AN=1223;END=15232198;NS=1233;SVTYPE=CNV;AMR_AF=0;AFR_AF=0;EUR_AF=0.0042;SAS_AF=0;EAS_AF=0;VT=SV;EX_TARGET\tGT:CN:CNL:CNP:CNQ:GP:GQ:PL\t0:1:-1000,0,-119.08:-1000,0,-218.16:99:0,-1000:99:0,10000\t0:1:-1000,0,-43.56:-1000,0,-142.64:99:0,-1000:99:0,10000\t.:.:.:.:.:.:.:.\t.:.:.:.:.:.:.:.', - ) - const variant2 = VCFParser.parseLine( - 'Y\t2655180\trs11575897\tG\tA\t100\tPASS\tAA=G;AC=22;AF=0.0178427;AN=1233;DP=84761;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0902;VT=SNP;EX_TARGET\tGT\t0\t0\t0\t.', - ) + console.log({ lines }) + const variant = VCFParser.parseLine(lines[0]) + const variant2 = VCFParser.parseLine(lines[1]) expect(variant).toMatchSnapshot() expect(variant.SAMPLES()).toMatchSnapshot() expect(variant2).toMatchSnapshot() @@ -237,16 +150,10 @@ test('snippet from VCF 4.3 spec', () => { expect(variants.map(variant => variant.SAMPLES())).toMatchSnapshot() }) test('can parse breakends', () => { - const header = `#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tBAMs/caudaus.sorted.sam` + const { header, lines } = readVcf('test/data/breakends.vcf') const VCFParser = new VCF({ header, }) - const lines = - `11 94975747 MantaBND:0:2:3:0:0:0:1 G G]8:107653520] . PASS SVTYPE=BND;MATEID=MantaBND:0:2:3:0:0:0:0;CIPOS=0,2;HOMLEN=2;HOMSEQ=TT;BND_DEPTH=216;MATE_BND_DEPTH=735 PR:SR 722,9:463,15 -11 94975753 MantaDEL:0:1:2:0:0:0 T <DEL> . PASS END=94987865;SVTYPE=DEL;SVLEN=12112;IMPRECISE;CIPOS=-156,156;CIEND=-150,150 PR 161,13 -11 94987872 MantaBND:0:0:1:0:0:0:0 T T[8:107653411[ . PASS SVTYPE=BND;MATEID=MantaBND:0:0:1:0:0:0:1;BND_DEPTH=171;MATE_BND_DEPTH=830 PR:SR 489,4:520,19`.split( - '\n', - ) expect(lines.map(line => VCFParser.parseLine(line))).toMatchSnapshot() }) @@ -297,7 +204,7 @@ test('shortcut parsing with vcf 4.3 bnd example', () => { const VCFParser = new VCF({ header }) const variants = lines.map(line => VCFParser.parseLine(line)) - expect(variants.map(m => m.ALT[0].toString())).toEqual( + expect(variants.map(m => m.ALT?.[0].toString())).toEqual( lines.map(line => line.split('\t')[4]), ) diff --git a/yarn.lock b/yarn.lock index edd89c1..53254f4 100644 --- a/yarn.lock +++ b/yarn.lock @@ -576,6 +576,13 @@ resolved "https://registry.yarnpkg.com/@types/ms/-/ms-0.7.34.tgz#10964ba0dee6ac4cd462e2795b6bebd407303433" integrity sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g== +"@types/node@^22.10.1": + version "22.10.1" + resolved "https://registry.yarnpkg.com/@types/node/-/node-22.10.1.tgz#41ffeee127b8975a05f8c4f83fb89bcb2987d766" + integrity sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ== + dependencies: + undici-types "~6.20.0" + "@types/normalize-package-data@^2.4.0", "@types/normalize-package-data@^2.4.1", "@types/normalize-package-data@^2.4.3": version "2.4.4" resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.4.tgz#56e2cc26c397c038fab0e3a917a12d5c5909e901" @@ -3523,6 +3530,11 @@ unc-path-regex@^0.1.2: resolved "https://registry.yarnpkg.com/unc-path-regex/-/unc-path-regex-0.1.2.tgz#e73dd3d7b0d7c5ed86fbac6b0ae7d8c6a69d50fa" integrity sha512-eXL4nmJT7oCpkZsHZUOJo8hcX3GbsiDOa0Qu9F646fi8dT3XuSVopVqAcEiVzSKKH7UoDti23wNX3qGFxcW5Qg== +undici-types@~6.20.0: + version "6.20.0" + resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.20.0.tgz#8171bf22c1f588d1554d55bf204bc624af388433" + integrity sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg== + unicorn-magic@^0.1.0: version "0.1.0" resolved "https://registry.yarnpkg.com/unicorn-magic/-/unicorn-magic-0.1.0.tgz#1bb9a51c823aaf9d73a8bfcd3d1a23dde94b0ce4"