diff --git a/package.json b/package.json index b03ea28..0445a55 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,7 @@ "devDependencies": { "@babel/core": "^7.20.5", "@eslint/js": "^9.7.0", + "@types/node": "^22.10.1", "@typescript-eslint/eslint-plugin": "^8.8.1", "@typescript-eslint/parser": "^8.8.1", "@vitest/coverage-v8": "^2.1.3", diff --git a/test/__snapshots__/parse.test.ts.snap b/test/__snapshots__/parse.test.ts.snap index ba2fb77..f33d218 100644 --- a/test/__snapshots__/parse.test.ts.snap +++ b/test/__snapshots__/parse.test.ts.snap @@ -674,88 +674,6 @@ exports[`can parse a line from the VCF spec 2`] = ` } `; -exports[`can parse a line from the VCF spec 3`] = ` -{ - "ALT": [ - "", - ], - "CHROM": "8", - "FILTER": "PASS", - "GENOTYPES": [Function], - "ID": [ - "28329_0", - ], - "INFO": { - "AF": [ - 0.971429, - ], - "CHR2": [ - "8", - ], - "END": [ - 17709148, - ], - "Kurtosis_quant_start": [ - "20.524521", - ], - "Kurtosis_quant_stop": [ - "3.925926", - ], - "PRECISE": true, - "RE": [ - 34, - ], - "STD_quant_start": [ - "0.000000", - ], - "STD_quant_stop": [ - "0.000000", - ], - "STRANDS": [ - "+-", - ], - "STRANDS2": [ - "20", - "14", - "20", - "14", - ], - "SUPTYPE": [ - "AL", - ], - "SVLEN": [ - 33, - ], - "SVMETHOD": [ - "Snifflesv1.0.3", - ], - "SVTYPE": [ - "DEL", - ], - }, - "POS": 17709115, - "QUAL": undefined, - "REF": "N", - "SAMPLES": [Function], -} -`; - -exports[`can parse a line from the VCF spec 4`] = ` -{ - "/seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam": { - "DR": [ - 1, - ], - "DV": [ - 34, - ], - "GT": [ - "1/1", - ], - }, -} -`; - exports[`can parse a line from the VCF spec Y chrom (haploid)) 1`] = ` { "ALT": [ @@ -2337,6 +2255,88 @@ exports[`shortcut parsing with vcf 4.3 bnd example 1`] = ` ] `; +exports[`sniffles vcf 1`] = ` +{ + "ALT": [ + "", + ], + "CHROM": "8", + "FILTER": "PASS", + "GENOTYPES": [Function], + "ID": [ + "28329_0", + ], + "INFO": { + "AF": [ + 0.971429, + ], + "CHR2": [ + "8", + ], + "END": [ + 17709148, + ], + "Kurtosis_quant_start": [ + "20.524521", + ], + "Kurtosis_quant_stop": [ + "3.925926", + ], + "PRECISE": true, + "RE": [ + 34, + ], + "STD_quant_start": [ + "0.000000", + ], + "STD_quant_stop": [ + "0.000000", + ], + "STRANDS": [ + "+-", + ], + "STRANDS2": [ + "20", + "14", + "20", + "14", + ], + "SUPTYPE": [ + "AL", + ], + "SVLEN": [ + 33, + ], + "SVMETHOD": [ + "Snifflesv1.0.3", + ], + "SVTYPE": [ + "DEL", + ], + }, + "POS": 17709115, + "QUAL": undefined, + "REF": "N", + "SAMPLES": [Function], +} +`; + +exports[`sniffles vcf 2`] = ` +{ + "/seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam": { + "DR": [ + 1, + ], + "DV": [ + 34, + ], + "GT": [ + "1/1", + ], + }, +} +`; + exports[`snippet from VCF 4.3 spec 1`] = ` [ { diff --git a/test/data/breakends.vcf b/test/data/breakends.vcf new file mode 100644 index 0000000..1cd8526 --- /dev/null +++ b/test/data/breakends.vcf @@ -0,0 +1,4 @@ +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT BAMs/caudaus.sorted.sam +11 94975747 MantaBND:0:2:3:0:0:0:1 G G]8:107653520] . PASS SVTYPE=BND;MATEID=MantaBND:0:2:3:0:0:0:0;CIPOS=0,2;HOMLEN=2;HOMSEQ=TT;BND_DEPTH=216;MATE_BND_DEPTH=735 PR:SR 722,9:463,15 +11 94975753 MantaDEL:0:1:2:0:0:0 T . PASS END=94987865;SVTYPE=DEL;SVLEN=12112;IMPRECISE;CIPOS=-156,156;CIEND=-150,150 PR 161,13 +11 94987872 MantaBND:0:0:1:0:0:0:0 T T[8:107653411[ . PASS SVTYPE=BND;MATEID=MantaBND:0:0:1:0:0:0:1;BND_DEPTH=171;MATE_BND_DEPTH=830 PR:SR 489,4:520,19 diff --git a/test/data/sniffles.vcf b/test/data/sniffles.vcf new file mode 100644 index 0000000..2763b6b --- /dev/null +++ b/test/data/sniffles.vcf @@ -0,0 +1,23 @@ +##fileformat=VCFv4.2 +##source=Sniffles +##fileDate=20170420 +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT /seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam +8 17709115 28329_0 N . PASS PRECISE;SVMETHOD=Snifflesv1.0.3;CHR2=8;END=17709148;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=20.524521;Kurtosis_quant_stop=3.925926;SVTYPE=DEL;SUPTYPE=AL;SVLEN=33;STRANDS=+-;STRANDS2=20,14,20,14;RE=34;AF=0.971429 GT:DR:DV 1/1:1:34 diff --git a/test/data/spec-example.vcf b/test/data/spec-example.vcf new file mode 100644 index 0000000..40f0761 --- /dev/null +++ b/test/data/spec-example.vcf @@ -0,0 +1,23 @@ +##fileformat=VCFv4.3 +##fileDate=20090805 +##source=myImputationProgramV3.1 +##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta +##contig= +##phasing=partial +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 diff --git a/test/data/y-chrom-haploid.vcf b/test/data/y-chrom-haploid.vcf new file mode 100644 index 0000000..9018bfb --- /dev/null +++ b/test/data/y-chrom-haploid.vcf @@ -0,0 +1,42 @@ +##fileformat=VCFv4.1 +##FILTER= +##fileDate=20150218 +##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz +##contig= +##source=freeBayes v0.9.9.2 | GT values over-written with maximum likelihood state (subject to threshold) OR phylogenetic imputation +##INFO= +##FORMAT= +##source=GenomeSTRiP_v1.04 +##ALT= +##FILTER= +##FILTER= +##FILTER== 0.5 && GSDUPLICATESCORE >= 0.0"> +##FILTER== 2.0"> +##FILTER= +##FILTER== 13.0"> +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG001055 +Y 14483990 CNV_Y_14483990_15232198 C 100 PASS AC=1;AF=0.000817661;AN=1223;END=15232198;NS=1233;SVTYPE=CNV;AMR_AF=0;AFR_AF=0;EUR_AF=0.0042;SAS_AF=0;EAS_AF=0;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-119.08:-1000,0,-218.16:99:0,-1000:99:0,10000 0:1:-1000,0,-43.56:-1000,0,-142.64:99:0,-1000:99:0,10000 .:.:.:.:.:.:.:. .:.:.:.:.:.:.:. +Y 2655180 rs11575897 G A 100 PASS AA=G;AC=22;AF=0.0178427;AN=1233;DP=84761;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0902;VT=SNP;EX_TARGET GT 0 0 0 . diff --git a/test/parse.test.ts b/test/parse.test.ts index 127aed2..6c58b60 100644 --- a/test/parse.test.ts +++ b/test/parse.test.ts @@ -1,13 +1,12 @@ -// @ts-nocheck import { test, expect } from 'vitest' import fs from 'fs' import VCF, { parseBreakend } from '../src' -const readVcf = file => { +const readVcf = (file: string) => { const f = fs.readFileSync(file, 'utf8') const lines = f.split('\n') - const header = [] - const rest = [] + const header = [] as string[] + const rest = [] as string[] lines.forEach(line => { if (line.startsWith('#')) { header.push(line) @@ -15,35 +14,16 @@ const readVcf = file => { rest.push(line) } }) - return { header: header.join('\n'), lines: rest } + return { + header: header.join('\n'), + lines: rest, + } } function makeParser() { + const { header } = readVcf('test/data/spec-example.vcf') return new VCF({ - header: `##fileformat=VCFv4.3 -##fileDate=20090805 -##source=myImputationProgramV3.1 -##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta -##contig= -##phasing=partial -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNA00001\tNA00002\tNA00003 -`, + header, }) } @@ -70,12 +50,8 @@ test('can get metadata from the header', () => { Number: 1, Type: 'String', }) -}) -test('can get default metadata not in the header', () => { - const VCFParser = makeParser() - const metadata = VCFParser.getMetadata() - expect(metadata.INFO.AC).toEqual({ + expect(VCFParser.getMetadata('INFO', 'AC')).toEqual({ Number: 'A', Type: 'Integer', Description: @@ -106,7 +82,7 @@ test('parses a line with a breakend ALT', () => { const variant = VCFParser.parseLine( '2\t321681\tbnd_W\tG\tG]17:198982]\t6\tPASS\tSVTYPE=BND', ) - expect(variant.ALT.length).toBe(1) + expect(variant.ALT?.length).toBe(1) expect(variant.INFO.SVTYPE).toEqual(['BND']) expect(variant).toMatchSnapshot() }) @@ -116,7 +92,7 @@ test(`parses a line with mix of multiple breakends and non breakends`, () => { const variant = VCFParser.parseLine( `13\t123456\tbnd_U\tC\tCTATGTCG,C[2 : 321682[,C[17 : 198983[\t6\tPASS\tSVTYPE=BND;MATEID=bnd V,bnd Z`, ) - expect(variant.ALT.length).toBe(3) + expect(variant.ALT?.length).toBe(3) expect(variant.INFO.SVTYPE).toEqual(['BND']) expect(variant).toMatchSnapshot() }) @@ -140,87 +116,24 @@ test('throws errors with bad header lines', () => { }).toThrow(/No format line/) }) -test('can parse a line from the VCF spec', () => { +test('sniffles vcf', () => { + const { header, lines } = readVcf('test/data/sniffles.vcf') const VCFParser = new VCF({ - header: `##fileformat=VCFv4.2 -##source=Sniffles -##fileDate=20170420 -##ALT= -##ALT= -##ALT= -##ALT= -##ALT= -##ALT= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FORMAT= -##FORMAT= -##FORMAT= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT /seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam`, + header, }) - const variant = VCFParser.parseLine( - '8\t17709115\t28329_0\tN\t\t.\tPASS\tPRECISE;SVMETHOD=Snifflesv1.0.3;CHR2=8;END=17709148;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=20.524521;Kurtosis_quant_stop=3.925926;SVTYPE=DEL;SUPTYPE=AL;SVLEN=33;STRANDS=+-;STRANDS2=20,14,20,14;RE=34;AF=0.971429\tGT:DR:DV\t1/1:1:34', - ) + const variant = VCFParser.parseLine(lines[0]) expect(variant).toMatchSnapshot() expect(variant.SAMPLES()).toMatchSnapshot() }) test('can parse a line from the VCF spec Y chrom (haploid))', () => { + const { header, lines } = readVcf('test/data/y-chrom-haploid.vcf') const VCFParser = new VCF({ - header: `##fileformat=VCFv4.1 -##FILTER= -##fileDate=20150218 -##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz -##contig= -##source=freeBayes v0.9.9.2 | GT values over-written with maximum likelihood state (subject to threshold) OR phylogenetic imputation -##INFO= -##FORMAT= -##source=GenomeSTRiP_v1.04 -##ALT= -##FILTER= -##FILTER= -##FILTER== 0.5 && GSDUPLICATESCORE >= 0.0"> -##FILTER== 2.0"> -##FILTER= -##FILTER== 13.0"> -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tHG00096\tHG00101\tHG00103\tHG001055`, + header, }) - const variant = VCFParser.parseLine( - 'Y\t14483990\tCNV_Y_14483990_15232198\tC\t\t100\tPASS\tAC=1;AF=0.000817661;AN=1223;END=15232198;NS=1233;SVTYPE=CNV;AMR_AF=0;AFR_AF=0;EUR_AF=0.0042;SAS_AF=0;EAS_AF=0;VT=SV;EX_TARGET\tGT:CN:CNL:CNP:CNQ:GP:GQ:PL\t0:1:-1000,0,-119.08:-1000,0,-218.16:99:0,-1000:99:0,10000\t0:1:-1000,0,-43.56:-1000,0,-142.64:99:0,-1000:99:0,10000\t.:.:.:.:.:.:.:.\t.:.:.:.:.:.:.:.', - ) - const variant2 = VCFParser.parseLine( - 'Y\t2655180\trs11575897\tG\tA\t100\tPASS\tAA=G;AC=22;AF=0.0178427;AN=1233;DP=84761;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0902;VT=SNP;EX_TARGET\tGT\t0\t0\t0\t.', - ) + console.log({ lines }) + const variant = VCFParser.parseLine(lines[0]) + const variant2 = VCFParser.parseLine(lines[1]) expect(variant).toMatchSnapshot() expect(variant.SAMPLES()).toMatchSnapshot() expect(variant2).toMatchSnapshot() @@ -237,16 +150,10 @@ test('snippet from VCF 4.3 spec', () => { expect(variants.map(variant => variant.SAMPLES())).toMatchSnapshot() }) test('can parse breakends', () => { - const header = `#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tBAMs/caudaus.sorted.sam` + const { header, lines } = readVcf('test/data/breakends.vcf') const VCFParser = new VCF({ header, }) - const lines = - `11 94975747 MantaBND:0:2:3:0:0:0:1 G G]8:107653520] . PASS SVTYPE=BND;MATEID=MantaBND:0:2:3:0:0:0:0;CIPOS=0,2;HOMLEN=2;HOMSEQ=TT;BND_DEPTH=216;MATE_BND_DEPTH=735 PR:SR 722,9:463,15 -11 94975753 MantaDEL:0:1:2:0:0:0 T . PASS END=94987865;SVTYPE=DEL;SVLEN=12112;IMPRECISE;CIPOS=-156,156;CIEND=-150,150 PR 161,13 -11 94987872 MantaBND:0:0:1:0:0:0:0 T T[8:107653411[ . PASS SVTYPE=BND;MATEID=MantaBND:0:0:1:0:0:0:1;BND_DEPTH=171;MATE_BND_DEPTH=830 PR:SR 489,4:520,19`.split( - '\n', - ) expect(lines.map(line => VCFParser.parseLine(line))).toMatchSnapshot() }) @@ -297,7 +204,7 @@ test('shortcut parsing with vcf 4.3 bnd example', () => { const VCFParser = new VCF({ header }) const variants = lines.map(line => VCFParser.parseLine(line)) - expect(variants.map(m => m.ALT[0].toString())).toEqual( + expect(variants.map(m => m.ALT?.[0].toString())).toEqual( lines.map(line => line.split('\t')[4]), ) diff --git a/yarn.lock b/yarn.lock index edd89c1..53254f4 100644 --- a/yarn.lock +++ b/yarn.lock @@ -576,6 +576,13 @@ resolved "https://registry.yarnpkg.com/@types/ms/-/ms-0.7.34.tgz#10964ba0dee6ac4cd462e2795b6bebd407303433" integrity sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g== +"@types/node@^22.10.1": + version "22.10.1" + resolved "https://registry.yarnpkg.com/@types/node/-/node-22.10.1.tgz#41ffeee127b8975a05f8c4f83fb89bcb2987d766" + integrity sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ== + dependencies: + undici-types "~6.20.0" + "@types/normalize-package-data@^2.4.0", "@types/normalize-package-data@^2.4.1", "@types/normalize-package-data@^2.4.3": version "2.4.4" resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.4.tgz#56e2cc26c397c038fab0e3a917a12d5c5909e901" @@ -3523,6 +3530,11 @@ unc-path-regex@^0.1.2: resolved "https://registry.yarnpkg.com/unc-path-regex/-/unc-path-regex-0.1.2.tgz#e73dd3d7b0d7c5ed86fbac6b0ae7d8c6a69d50fa" integrity sha512-eXL4nmJT7oCpkZsHZUOJo8hcX3GbsiDOa0Qu9F646fi8dT3XuSVopVqAcEiVzSKKH7UoDti23wNX3qGFxcW5Qg== +undici-types@~6.20.0: + version "6.20.0" + resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.20.0.tgz#8171bf22c1f588d1554d55bf204bc624af388433" + integrity sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg== + unicorn-magic@^0.1.0: version "0.1.0" resolved "https://registry.yarnpkg.com/unicorn-magic/-/unicorn-magic-0.1.0.tgz#1bb9a51c823aaf9d73a8bfcd3d1a23dde94b0ce4"