From fb0a11edb7c719e11893c23f791f12a64f846390 Mon Sep 17 00:00:00 2001
From: Colin <colin.diesh@gmail.com>
Date: Sun, 8 Dec 2024 15:53:20 -0500
Subject: [PATCH] Move vcf data into separate data files in test/parse.test.ts

---
 package.json                          |   1 +
 test/__snapshots__/parse.test.ts.snap | 164 +++++++++++++-------------
 test/data/breakends.vcf               |   4 +
 test/data/sniffles.vcf                |  23 ++++
 test/data/spec-example.vcf            |  23 ++++
 test/data/y-chrom-haploid.vcf         |  42 +++++++
 test/parse.test.ts                    | 139 ++++------------------
 yarn.lock                             |  12 ++
 8 files changed, 210 insertions(+), 198 deletions(-)
 create mode 100644 test/data/breakends.vcf
 create mode 100644 test/data/sniffles.vcf
 create mode 100644 test/data/spec-example.vcf
 create mode 100644 test/data/y-chrom-haploid.vcf

diff --git a/package.json b/package.json
index b03ea28..0445a55 100644
--- a/package.json
+++ b/package.json
@@ -40,6 +40,7 @@
   "devDependencies": {
     "@babel/core": "^7.20.5",
     "@eslint/js": "^9.7.0",
+    "@types/node": "^22.10.1",
     "@typescript-eslint/eslint-plugin": "^8.8.1",
     "@typescript-eslint/parser": "^8.8.1",
     "@vitest/coverage-v8": "^2.1.3",
diff --git a/test/__snapshots__/parse.test.ts.snap b/test/__snapshots__/parse.test.ts.snap
index ba2fb77..f33d218 100644
--- a/test/__snapshots__/parse.test.ts.snap
+++ b/test/__snapshots__/parse.test.ts.snap
@@ -674,88 +674,6 @@ exports[`can parse a line from the VCF spec 2`] = `
 }
 `;
 
-exports[`can parse a line from the VCF spec 3`] = `
-{
-  "ALT": [
-    "<DEL>",
-  ],
-  "CHROM": "8",
-  "FILTER": "PASS",
-  "GENOTYPES": [Function],
-  "ID": [
-    "28329_0",
-  ],
-  "INFO": {
-    "AF": [
-      0.971429,
-    ],
-    "CHR2": [
-      "8",
-    ],
-    "END": [
-      17709148,
-    ],
-    "Kurtosis_quant_start": [
-      "20.524521",
-    ],
-    "Kurtosis_quant_stop": [
-      "3.925926",
-    ],
-    "PRECISE": true,
-    "RE": [
-      34,
-    ],
-    "STD_quant_start": [
-      "0.000000",
-    ],
-    "STD_quant_stop": [
-      "0.000000",
-    ],
-    "STRANDS": [
-      "+-",
-    ],
-    "STRANDS2": [
-      "20",
-      "14",
-      "20",
-      "14",
-    ],
-    "SUPTYPE": [
-      "AL",
-    ],
-    "SVLEN": [
-      33,
-    ],
-    "SVMETHOD": [
-      "Snifflesv1.0.3",
-    ],
-    "SVTYPE": [
-      "DEL",
-    ],
-  },
-  "POS": 17709115,
-  "QUAL": undefined,
-  "REF": "N",
-  "SAMPLES": [Function],
-}
-`;
-
-exports[`can parse a line from the VCF spec 4`] = `
-{
-  "/seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam": {
-    "DR": [
-      1,
-    ],
-    "DV": [
-      34,
-    ],
-    "GT": [
-      "1/1",
-    ],
-  },
-}
-`;
-
 exports[`can parse a line from the VCF spec Y chrom (haploid)) 1`] = `
 {
   "ALT": [
@@ -2337,6 +2255,88 @@ exports[`shortcut parsing with vcf 4.3 bnd example 1`] = `
 ]
 `;
 
+exports[`sniffles vcf 1`] = `
+{
+  "ALT": [
+    "<DEL>",
+  ],
+  "CHROM": "8",
+  "FILTER": "PASS",
+  "GENOTYPES": [Function],
+  "ID": [
+    "28329_0",
+  ],
+  "INFO": {
+    "AF": [
+      0.971429,
+    ],
+    "CHR2": [
+      "8",
+    ],
+    "END": [
+      17709148,
+    ],
+    "Kurtosis_quant_start": [
+      "20.524521",
+    ],
+    "Kurtosis_quant_stop": [
+      "3.925926",
+    ],
+    "PRECISE": true,
+    "RE": [
+      34,
+    ],
+    "STD_quant_start": [
+      "0.000000",
+    ],
+    "STD_quant_stop": [
+      "0.000000",
+    ],
+    "STRANDS": [
+      "+-",
+    ],
+    "STRANDS2": [
+      "20",
+      "14",
+      "20",
+      "14",
+    ],
+    "SUPTYPE": [
+      "AL",
+    ],
+    "SVLEN": [
+      33,
+    ],
+    "SVMETHOD": [
+      "Snifflesv1.0.3",
+    ],
+    "SVTYPE": [
+      "DEL",
+    ],
+  },
+  "POS": 17709115,
+  "QUAL": undefined,
+  "REF": "N",
+  "SAMPLES": [Function],
+}
+`;
+
+exports[`sniffles vcf 2`] = `
+{
+  "/seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam": {
+    "DR": [
+      1,
+    ],
+    "DV": [
+      34,
+    ],
+    "GT": [
+      "1/1",
+    ],
+  },
+}
+`;
+
 exports[`snippet from VCF 4.3 spec 1`] = `
 [
   {
diff --git a/test/data/breakends.vcf b/test/data/breakends.vcf
new file mode 100644
index 0000000..1cd8526
--- /dev/null
+++ b/test/data/breakends.vcf
@@ -0,0 +1,4 @@
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	BAMs/caudaus.sorted.sam
+11	94975747	MantaBND:0:2:3:0:0:0:1	G	G]8:107653520]	.	PASS	SVTYPE=BND;MATEID=MantaBND:0:2:3:0:0:0:0;CIPOS=0,2;HOMLEN=2;HOMSEQ=TT;BND_DEPTH=216;MATE_BND_DEPTH=735	PR:SR	722,9:463,15
+11	94975753	MantaDEL:0:1:2:0:0:0	T	<DEL>	.	PASS	END=94987865;SVTYPE=DEL;SVLEN=12112;IMPRECISE;CIPOS=-156,156;CIEND=-150,150	PR	161,13
+11	94987872	MantaBND:0:0:1:0:0:0:0	T	T[8:107653411[	.	PASS	SVTYPE=BND;MATEID=MantaBND:0:0:1:0:0:0:1;BND_DEPTH=171;MATE_BND_DEPTH=830	PR:SR	489,4:520,19
diff --git a/test/data/sniffles.vcf b/test/data/sniffles.vcf
new file mode 100644
index 0000000..2763b6b
--- /dev/null
+++ b/test/data/sniffles.vcf
@@ -0,0 +1,23 @@
+##fileformat=VCFv4.2
+##source=Sniffles
+##fileDate=20170420
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
+##ALT=<ID=TRA,Description="Translocation">
+##ALT=<ID=INS,Description="Insertion">
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
+##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
+##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads">
+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	/seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam
+8	17709115	28329_0	N	<DEL>	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.3;CHR2=8;END=17709148;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=20.524521;Kurtosis_quant_stop=3.925926;SVTYPE=DEL;SUPTYPE=AL;SVLEN=33;STRANDS=+-;STRANDS2=20,14,20,14;RE=34;AF=0.971429	GT:DR:DV	1/1:1:34
diff --git a/test/data/spec-example.vcf b/test/data/spec-example.vcf
new file mode 100644
index 0000000..40f0761
--- /dev/null
+++ b/test/data/spec-example.vcf
@@ -0,0 +1,23 @@
+##fileformat=VCFv4.3
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
+##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##INFO=<ID=TEST,Number=1,Type=String,Description="Used for testing">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
+##FORMAT=<ID=TEST,Number=1,Type=String,Description="Used for testing">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
diff --git a/test/data/y-chrom-haploid.vcf b/test/data/y-chrom-haploid.vcf
new file mode 100644
index 0000000..9018bfb
--- /dev/null
+++ b/test/data/y-chrom-haploid.vcf
@@ -0,0 +1,42 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##fileDate=20150218
+##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz
+##contig=<ID=Y,length=59373566,assembly=b37>
+##source=freeBayes v0.9.9.2 | GT values over-written with maximum likelihood state (subject to threshold) OR phylogenetic imputation
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##source=GenomeSTRiP_v1.04
+##ALT=<ID=CNV,Description="Copy number polymorphism">
+##FILTER=<ID=ALIGNLENGTH,Description="GSELENGTH < 200">
+##FILTER=<ID=CLUSTERSEP,Description="GSCLUSTERSEP == NA || GSCLUSTERSEP <= 2.0">
+##FILTER=<ID=DUPLICATE,Description="GSDUPLICATESCORE != NA && GSDUPLICATEOVERLAP >= 0.5 && GSDUPLICATESCORE >= 0.0">
+##FILTER=<ID=GTDEPTH,Description="GSM1 == NA || GSM1 <= 0.5 || GSM1 >= 2.0">
+##FILTER=<ID=INBREEDINGCOEFF,Description="GLINBREEDINGCOEFF != NA && GLINBREEDINGCOEFF < -0.15">
+##FILTER=<ID=NONVARIANT,Description="GSNONVARSCORE != NA && GSNONVARSCORE >= 13.0">
+##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
+##FORMAT=<ID=CNL,Number=.,Type=Float,Description="Copy number likelihoods with no frequency prior">
+##FORMAT=<ID=CNP,Number=.,Type=Float,Description="Copy number likelihoods">
+##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events">
+##FORMAT=<ID=GP,Number=G,Type=Float,Description="Genotype likelihoods">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=FT,Number=1,Type=String,Description="Per-sample genotype filter">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of this variant">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral allele">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=SAS_AF,Number=A,Type=Float,Description="Allele frequency in the SAS populations calculated from AC and AN, in the range (0,1)">
+##INFO=<ID=EUR_AF,Number=A,Type=Float,Description="Allele frequency in the EUR populations calculated from AC and AN, in the range (0,1)">
+##INFO=<ID=AFR_AF,Number=A,Type=Float,Description="Allele frequency in the AFR populations calculated from AC and AN, in the range (0,1)">
+##INFO=<ID=AMR_AF,Number=A,Type=Float,Description="Allele frequency in the AMR populations calculated from AC and AN, in the range (0,1)">
+##INFO=<ID=EAS_AF,Number=A,Type=Float,Description="Allele frequency in the EAS populations calculated from AC and AN, in the range (0,1)">
+##INFO=<ID=VT,Number=.,Type=String,Description="indicates what type of variant the line represents">
+##INFO=<ID=EX_TARGET,Number=0,Type=Flag,Description="indicates whether a variant is within the exon pull down target boundaries">
+##INFO=<ID=MULTI_ALLELIC,Number=0,Type=Flag,Description="indicates whether a site is multi-allelic">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HG00096	HG00101	HG00103	HG001055
+Y	14483990	CNV_Y_14483990_15232198	C	<CN0>	100	PASS	AC=1;AF=0.000817661;AN=1223;END=15232198;NS=1233;SVTYPE=CNV;AMR_AF=0;AFR_AF=0;EUR_AF=0.0042;SAS_AF=0;EAS_AF=0;VT=SV;EX_TARGET	GT:CN:CNL:CNP:CNQ:GP:GQ:PL	0:1:-1000,0,-119.08:-1000,0,-218.16:99:0,-1000:99:0,10000	0:1:-1000,0,-43.56:-1000,0,-142.64:99:0,-1000:99:0,10000	.:.:.:.:.:.:.:.	.:.:.:.:.:.:.:.
+Y	2655180	rs11575897	G	A	100	PASS	AA=G;AC=22;AF=0.0178427;AN=1233;DP=84761;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0902;VT=SNP;EX_TARGET	GT	0	0	0	.
diff --git a/test/parse.test.ts b/test/parse.test.ts
index 127aed2..6c58b60 100644
--- a/test/parse.test.ts
+++ b/test/parse.test.ts
@@ -1,13 +1,12 @@
-// @ts-nocheck
 import { test, expect } from 'vitest'
 import fs from 'fs'
 import VCF, { parseBreakend } from '../src'
 
-const readVcf = file => {
+const readVcf = (file: string) => {
   const f = fs.readFileSync(file, 'utf8')
   const lines = f.split('\n')
-  const header = []
-  const rest = []
+  const header = [] as string[]
+  const rest = [] as string[]
   lines.forEach(line => {
     if (line.startsWith('#')) {
       header.push(line)
@@ -15,35 +14,16 @@ const readVcf = file => {
       rest.push(line)
     }
   })
-  return { header: header.join('\n'), lines: rest }
+  return {
+    header: header.join('\n'),
+    lines: rest,
+  }
 }
 
 function makeParser() {
+  const { header } = readVcf('test/data/spec-example.vcf')
   return new VCF({
-    header: `##fileformat=VCFv4.3
-##fileDate=20090805
-##source=myImputationProgramV3.1
-##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
-##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
-##phasing=partial
-##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
-##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
-##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
-##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
-##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
-##INFO=<ID=TEST,Number=1,Type=String,Description="Used for testing">
-##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
-##FILTER=<ID=q10,Description="Quality below 10">
-##FILTER=<ID=s50,Description="Less than 50% of samples have data">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
-##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
-##FORMAT=<ID=TEST,Number=1,Type=String,Description="Used for testing">
-#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNA00001\tNA00002\tNA00003
-`,
+    header,
   })
 }
 
@@ -70,12 +50,8 @@ test('can get metadata from the header', () => {
     Number: 1,
     Type: 'String',
   })
-})
 
-test('can get default metadata not in the header', () => {
-  const VCFParser = makeParser()
-  const metadata = VCFParser.getMetadata()
-  expect(metadata.INFO.AC).toEqual({
+  expect(VCFParser.getMetadata('INFO', 'AC')).toEqual({
     Number: 'A',
     Type: 'Integer',
     Description:
@@ -106,7 +82,7 @@ test('parses a line with a breakend ALT', () => {
   const variant = VCFParser.parseLine(
     '2\t321681\tbnd_W\tG\tG]17:198982]\t6\tPASS\tSVTYPE=BND',
   )
-  expect(variant.ALT.length).toBe(1)
+  expect(variant.ALT?.length).toBe(1)
   expect(variant.INFO.SVTYPE).toEqual(['BND'])
   expect(variant).toMatchSnapshot()
 })
@@ -116,7 +92,7 @@ test(`parses a line with mix of multiple breakends and non breakends`, () => {
   const variant = VCFParser.parseLine(
     `13\t123456\tbnd_U\tC\tCTATGTCG,C[2 : 321682[,C[17 : 198983[\t6\tPASS\tSVTYPE=BND;MATEID=bnd V,bnd Z`,
   )
-  expect(variant.ALT.length).toBe(3)
+  expect(variant.ALT?.length).toBe(3)
   expect(variant.INFO.SVTYPE).toEqual(['BND'])
   expect(variant).toMatchSnapshot()
 })
@@ -140,87 +116,24 @@ test('throws errors with bad header lines', () => {
   }).toThrow(/No format line/)
 })
 
-test('can parse a line from the VCF spec', () => {
+test('sniffles vcf', () => {
+  const { header, lines } = readVcf('test/data/sniffles.vcf')
   const VCFParser = new VCF({
-    header: `##fileformat=VCFv4.2
-##source=Sniffles
-##fileDate=20170420
-##ALT=<ID=DEL,Description="Deletion">
-##ALT=<ID=DUP,Description="Duplication">
-##ALT=<ID=INV,Description="Inversion">
-##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
-##ALT=<ID=TRA,Description="Translocation">
-##ALT=<ID=INS,Description="Insertion">
-##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
-##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
-##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
-##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
-##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
-##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
-##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
-##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
-##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads">
-##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads">
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	/seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam`,
+    header,
   })
-  const variant = VCFParser.parseLine(
-    '8\t17709115\t28329_0\tN\t<DEL>\t.\tPASS\tPRECISE;SVMETHOD=Snifflesv1.0.3;CHR2=8;END=17709148;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=20.524521;Kurtosis_quant_stop=3.925926;SVTYPE=DEL;SUPTYPE=AL;SVLEN=33;STRANDS=+-;STRANDS2=20,14,20,14;RE=34;AF=0.971429\tGT:DR:DV\t1/1:1:34',
-  )
+  const variant = VCFParser.parseLine(lines[0])
   expect(variant).toMatchSnapshot()
   expect(variant.SAMPLES()).toMatchSnapshot()
 })
 
 test('can parse a line from the VCF spec Y chrom (haploid))', () => {
+  const { header, lines } = readVcf('test/data/y-chrom-haploid.vcf')
   const VCFParser = new VCF({
-    header: `##fileformat=VCFv4.1
-##FILTER=<ID=PASS,Description="All filters passed">
-##fileDate=20150218
-##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz
-##contig=<ID=Y,length=59373566,assembly=b37>
-##source=freeBayes v0.9.9.2 | GT values over-written with maximum likelihood state (subject to threshold) OR phylogenetic imputation
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##source=GenomeSTRiP_v1.04
-##ALT=<ID=CNV,Description="Copy number polymorphism">
-##FILTER=<ID=ALIGNLENGTH,Description="GSELENGTH < 200">
-##FILTER=<ID=CLUSTERSEP,Description="GSCLUSTERSEP == NA || GSCLUSTERSEP <= 2.0">
-##FILTER=<ID=DUPLICATE,Description="GSDUPLICATESCORE != NA && GSDUPLICATEOVERLAP >= 0.5 && GSDUPLICATESCORE >= 0.0">
-##FILTER=<ID=GTDEPTH,Description="GSM1 == NA || GSM1 <= 0.5 || GSM1 >= 2.0">
-##FILTER=<ID=INBREEDINGCOEFF,Description="GLINBREEDINGCOEFF != NA && GLINBREEDINGCOEFF < -0.15">
-##FILTER=<ID=NONVARIANT,Description="GSNONVARSCORE != NA && GSNONVARSCORE >= 13.0">
-##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
-##FORMAT=<ID=CNL,Number=.,Type=Float,Description="Copy number likelihoods with no frequency prior">
-##FORMAT=<ID=CNP,Number=.,Type=Float,Description="Copy number likelihoods">
-##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events">
-##FORMAT=<ID=GP,Number=G,Type=Float,Description="Genotype likelihoods">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=FT,Number=1,Type=String,Description="Per-sample genotype filter">
-##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
-##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of this variant">
-##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
-##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral allele">
-##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
-##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
-##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
-##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
-##INFO=<ID=SAS_AF,Number=A,Type=Float,Description="Allele frequency in the SAS populations calculated from AC and AN, in the range (0,1)">
-##INFO=<ID=EUR_AF,Number=A,Type=Float,Description="Allele frequency in the EUR populations calculated from AC and AN, in the range (0,1)">
-##INFO=<ID=AFR_AF,Number=A,Type=Float,Description="Allele frequency in the AFR populations calculated from AC and AN, in the range (0,1)">
-##INFO=<ID=AMR_AF,Number=A,Type=Float,Description="Allele frequency in the AMR populations calculated from AC and AN, in the range (0,1)">
-##INFO=<ID=EAS_AF,Number=A,Type=Float,Description="Allele frequency in the EAS populations calculated from AC and AN, in the range (0,1)">
-##INFO=<ID=VT,Number=.,Type=String,Description="indicates what type of variant the line represents">
-##INFO=<ID=EX_TARGET,Number=0,Type=Flag,Description="indicates whether a variant is within the exon pull down target boundaries">
-##INFO=<ID=MULTI_ALLELIC,Number=0,Type=Flag,Description="indicates whether a site is multi-allelic">
-#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tHG00096\tHG00101\tHG00103\tHG001055`,
+    header,
   })
-  const variant = VCFParser.parseLine(
-    'Y\t14483990\tCNV_Y_14483990_15232198\tC\t<CN0>\t100\tPASS\tAC=1;AF=0.000817661;AN=1223;END=15232198;NS=1233;SVTYPE=CNV;AMR_AF=0;AFR_AF=0;EUR_AF=0.0042;SAS_AF=0;EAS_AF=0;VT=SV;EX_TARGET\tGT:CN:CNL:CNP:CNQ:GP:GQ:PL\t0:1:-1000,0,-119.08:-1000,0,-218.16:99:0,-1000:99:0,10000\t0:1:-1000,0,-43.56:-1000,0,-142.64:99:0,-1000:99:0,10000\t.:.:.:.:.:.:.:.\t.:.:.:.:.:.:.:.',
-  )
-  const variant2 = VCFParser.parseLine(
-    'Y\t2655180\trs11575897\tG\tA\t100\tPASS\tAA=G;AC=22;AF=0.0178427;AN=1233;DP=84761;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0902;VT=SNP;EX_TARGET\tGT\t0\t0\t0\t.',
-  )
+  console.log({ lines })
+  const variant = VCFParser.parseLine(lines[0])
+  const variant2 = VCFParser.parseLine(lines[1])
   expect(variant).toMatchSnapshot()
   expect(variant.SAMPLES()).toMatchSnapshot()
   expect(variant2).toMatchSnapshot()
@@ -237,16 +150,10 @@ test('snippet from VCF 4.3 spec', () => {
   expect(variants.map(variant => variant.SAMPLES())).toMatchSnapshot()
 })
 test('can parse breakends', () => {
-  const header = `#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tBAMs/caudaus.sorted.sam`
+  const { header, lines } = readVcf('test/data/breakends.vcf')
   const VCFParser = new VCF({
     header,
   })
-  const lines =
-    `11	94975747	MantaBND:0:2:3:0:0:0:1	G	G]8:107653520]	.	PASS	SVTYPE=BND;MATEID=MantaBND:0:2:3:0:0:0:0;CIPOS=0,2;HOMLEN=2;HOMSEQ=TT;BND_DEPTH=216;MATE_BND_DEPTH=735	PR:SR	722,9:463,15
-11	94975753	MantaDEL:0:1:2:0:0:0	T	<DEL>	.	PASS	END=94987865;SVTYPE=DEL;SVLEN=12112;IMPRECISE;CIPOS=-156,156;CIEND=-150,150	PR	161,13
-11	94987872	MantaBND:0:0:1:0:0:0:0	T	T[8:107653411[	.	PASS	SVTYPE=BND;MATEID=MantaBND:0:0:1:0:0:0:1;BND_DEPTH=171;MATE_BND_DEPTH=830	PR:SR	489,4:520,19`.split(
-      '\n',
-    )
 
   expect(lines.map(line => VCFParser.parseLine(line))).toMatchSnapshot()
 })
@@ -297,7 +204,7 @@ test('shortcut parsing with vcf 4.3 bnd example', () => {
 
   const VCFParser = new VCF({ header })
   const variants = lines.map(line => VCFParser.parseLine(line))
-  expect(variants.map(m => m.ALT[0].toString())).toEqual(
+  expect(variants.map(m => m.ALT?.[0].toString())).toEqual(
     lines.map(line => line.split('\t')[4]),
   )
 
diff --git a/yarn.lock b/yarn.lock
index edd89c1..53254f4 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -576,6 +576,13 @@
   resolved "https://registry.yarnpkg.com/@types/ms/-/ms-0.7.34.tgz#10964ba0dee6ac4cd462e2795b6bebd407303433"
   integrity sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g==
 
+"@types/node@^22.10.1":
+  version "22.10.1"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-22.10.1.tgz#41ffeee127b8975a05f8c4f83fb89bcb2987d766"
+  integrity sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==
+  dependencies:
+    undici-types "~6.20.0"
+
 "@types/normalize-package-data@^2.4.0", "@types/normalize-package-data@^2.4.1", "@types/normalize-package-data@^2.4.3":
   version "2.4.4"
   resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.4.tgz#56e2cc26c397c038fab0e3a917a12d5c5909e901"
@@ -3523,6 +3530,11 @@ unc-path-regex@^0.1.2:
   resolved "https://registry.yarnpkg.com/unc-path-regex/-/unc-path-regex-0.1.2.tgz#e73dd3d7b0d7c5ed86fbac6b0ae7d8c6a69d50fa"
   integrity sha512-eXL4nmJT7oCpkZsHZUOJo8hcX3GbsiDOa0Qu9F646fi8dT3XuSVopVqAcEiVzSKKH7UoDti23wNX3qGFxcW5Qg==
 
+undici-types@~6.20.0:
+  version "6.20.0"
+  resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.20.0.tgz#8171bf22c1f588d1554d55bf204bc624af388433"
+  integrity sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==
+
 unicorn-magic@^0.1.0:
   version "0.1.0"
   resolved "https://registry.yarnpkg.com/unicorn-magic/-/unicorn-magic-0.1.0.tgz#1bb9a51c823aaf9d73a8bfcd3d1a23dde94b0ce4"