-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
bugfix on missing last variant + problems with trios of multiallelics…
… + testing
- Loading branch information
Pablo Riesgo Ferreiro
committed
Feb 5, 2020
1 parent
17740c1
commit 65bd2fb
Showing
21 changed files
with
357 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
##fileformat=VCFv4.2 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##contig=<ID=chr1,length=249250621> | ||
##contig=<ID=chr2,length=243199373> | ||
##contig=<ID=chr3,length=198022430> | ||
##contig=<ID=chr4,length=191154276> | ||
##contig=<ID=chr5,length=180915260> | ||
##contig=<ID=chr6,length=171115067> | ||
##INFO=<ID=tumor_af,Number=1,Type=String,Description=""> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##vafator_command_line={"name": "multiallelic-filter", "version": "0.2.0", "date": "Wed Feb 5 14:11:29 2020", "timestamp": 1580908289.074858, "input_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/test1.vcf", "output_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/results/test1_output.vcf"} | ||
##INFO=<ID=multiallelic,Number=.,Type=String,Description="Indicates multiallelic variants filtered and their frequencies if any (e.g.: T,0.12)"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor | ||
chr1 25734793 . C T . PASS . GT 0/1 | ||
chr1 37323930 . C T . PASS . GT 0/1 | ||
chr2 1234 . C T . PASS . GT 0/1 | ||
chr2 1235 . C T . PASS . GT 0/1 | ||
chr3 1234 . C T . PASS . GT 0/1 | ||
chr4 1234 . C T . PASS . GT 0/1 | ||
chr4 1235 . C T . PASS . GT 0/1 | ||
chr4 1236 . C T . PASS . GT 0/1 | ||
chr5 1234 . C T . PASS . GT 0/1 | ||
chr6 1234 . C T . PASS . GT 0/1 | ||
chr6 1235 . C T . PASS . GT 0/1 | ||
chr6 1236 . C T . PASS . GT 0/1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
##fileformat=VCFv4.2 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##contig=<ID=chr1,length=249250621> | ||
##contig=<ID=chr2,length=243199373> | ||
##contig=<ID=chr3,length=198022430> | ||
##contig=<ID=chr4,length=191154276> | ||
##contig=<ID=chr5,length=180915260> | ||
##contig=<ID=chr6,length=171115067> | ||
##INFO=<ID=tumor_af,Number=1,Type=String,Description=""> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##vafator_command_line={"name": "multiallelic-filter", "version": "0.2.0", "date": "Wed Feb 5 14:11:29 2020", "timestamp": 1580908289.074858, "input_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/test2.vcf", "output_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/results/test2_output.vcf"} | ||
##INFO=<ID=multiallelic,Number=.,Type=String,Description="Indicates multiallelic variants filtered and their frequencies if any (e.g.: T,0.12)"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor | ||
chr1 25734793 . C T . PASS . GT 0/1 | ||
chr1 37323930 . C T . PASS . GT 0/1 | ||
chr2 1234 . C T . PASS . GT 0/1 | ||
chr2 1235 . C T . PASS . GT 0/1 | ||
chr3 1234 . C T . PASS . GT 0/1 | ||
chr4 1234 . C T . PASS . GT 0/1 | ||
chr4 1235 . C A . PASS tumor_af=0.2;multiallelic=,T,0.1 GT 0/1 | ||
chr5 1234 . C T . PASS . GT 0/1 | ||
chr6 1234 . C T . PASS tumor_af=0.5 GT 0/1 | ||
chr6 1235 . C G . PASS tumor_af=0.2;multiallelic=,A,0.01,T,0.1 GT 0/1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
##fileformat=VCFv4.2 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##contig=<ID=chr1,length=249250621> | ||
##contig=<ID=chr2,length=243199373> | ||
##contig=<ID=chr3,length=198022430> | ||
##contig=<ID=chr4,length=191154276> | ||
##contig=<ID=chr5,length=180915260> | ||
##contig=<ID=chr6,length=171115067> | ||
##INFO=<ID=tumor_af,Number=1,Type=String,Description=""> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##vafator_command_line={"name": "multiallelic-filter", "version": "0.2.0", "date": "Wed Feb 5 14:11:29 2020", "timestamp": 1580908289.074858, "input_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/test3.vcf", "output_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/results/test3_output.vcf"} | ||
##INFO=<ID=multiallelic,Number=.,Type=String,Description="Indicates multiallelic variants filtered and their frequencies if any (e.g.: T,0.12)"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor | ||
chr1 1234 . C T . PASS . GT 0/1 | ||
chr1 1234 . G T . PASS . GT 0/1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
##fileformat=VCFv4.2 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##contig=<ID=chr1,length=249250621> | ||
##contig=<ID=chr2,length=243199373> | ||
##contig=<ID=chr3,length=198022430> | ||
##contig=<ID=chr4,length=191154276> | ||
##contig=<ID=chr5,length=180915260> | ||
##contig=<ID=chr6,length=171115067> | ||
##INFO=<ID=tumor_af,Number=1,Type=String,Description=""> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##vafator_command_line={"name": "multiallelic-filter", "version": "0.2.0", "date": "Wed Feb 5 14:11:29 2020", "timestamp": 1580908289.074858, "input_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/test4.vcf", "output_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/results/test4_output.vcf"} | ||
##INFO=<ID=multiallelic,Number=.,Type=String,Description="Indicates multiallelic variants filtered and their frequencies if any (e.g.: T,0.12)"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor | ||
chr4 1235 . C A . PASS tumor_af=0.2;multiallelic=,G,0.15 GT 0/1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
##fileformat=VCFv4.2 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##contig=<ID=chr1,length=249250621> | ||
##contig=<ID=chr2,length=243199373> | ||
##contig=<ID=chr3,length=198022430> | ||
##contig=<ID=chr4,length=191154276> | ||
##contig=<ID=chr5,length=180915260> | ||
##contig=<ID=chr6,length=171115067> | ||
##INFO=<ID=tumor_af,Number=1,Type=String,Description=""> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##vafator_command_line={"name": "multiallelic-filter", "version": "0.2.0", "date": "Wed Feb 5 14:11:29 2020", "timestamp": 1580908289.074858, "input_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/test5.vcf", "output_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/results/test5_output.vcf"} | ||
##INFO=<ID=multiallelic,Number=.,Type=String,Description="Indicates multiallelic variants filtered and their frequencies if any (e.g.: T,0.12)"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor | ||
chr4 1235 . C A . PASS tumor_af=0.1;multiallelic=,G,0.1 GT 0/1 |
29 changes: 29 additions & 0 deletions
29
vafator/tests/resources/results/test_annotator1_output.vcf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
##fileformat=VCFv4.2 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##contig=<ID=chr1,length=249250621> | ||
##contig=<ID=chr2,length=243199373> | ||
##contig=<ID=chr3,length=198022430> | ||
##contig=<ID=chr4,length=191154276> | ||
##contig=<ID=chr5,length=180915260> | ||
##contig=<ID=chr6,length=171115067> | ||
##INFO=<ID=tumor_af,Number=1,Type=String,Description=""> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##vafator_command_line={"name": "vafator", "version": "0.2.0", "date": "Wed Feb 5 14:11:37 2020", "timestamp": 1580908297.673633, "input_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/test1.vcf", "output_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/results/test_annotator1_output.vcf", "normal_bams": ["/home/priesgof/src/vafator/vafator/tests/resources/COLO_829_n1.bam"], "tumor_bams": ["/home/priesgof/src/vafator/vafator/tests/resources/COLO_829_t1.bam"], "mapping_quality_threshold": 0, "base_call_quality_threshold": 29} | ||
##INFO=<ID=tumor_dp,Number=A,Type=Float,Description="Total depth of coverage in the tumor samples"> | ||
##INFO=<ID=tumor_ac,Number=A,Type=Integer,Description="Allele count for the alternate alleles in the tumor samples"> | ||
##INFO=<ID=normal_af,Number=A,Type=Float,Description="Allele frequency for the alternate alleles in the normal samples"> | ||
##INFO=<ID=normal_dp,Number=A,Type=Float,Description="Total depth of coverage in the normal samples"> | ||
##INFO=<ID=normal_ac,Number=A,Type=Integer,Description="Allele count for the alternate alleles in the normal samples"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor | ||
chr1 25734793 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1 | ||
chr1 37323930 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1 | ||
chr2 1234 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1 | ||
chr2 1235 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1 | ||
chr3 1234 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1 | ||
chr4 1234 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1 | ||
chr4 1235 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1 | ||
chr4 1236 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1 | ||
chr5 1234 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1 | ||
chr6 1234 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1 | ||
chr6 1235 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1 | ||
chr6 1236 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
##fileformat=VCFv4.2 | ||
##contig=<ID=chr1,length=249250621> | ||
##contig=<ID=chr2,length=243199373> | ||
##contig=<ID=chr3,length=198022430> | ||
##contig=<ID=chr4,length=191154276> | ||
##contig=<ID=chr5,length=180915260> | ||
##contig=<ID=chr6,length=171115067> | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##INFO=<ID=tumor_af,Number=1,Type=String,Description=""> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor | ||
chr1 25734793 . C T . PASS . GT 0/1 | ||
chr1 37323930 . C T . PASS . GT 0/1 | ||
chr2 1234 . C T . PASS . GT 0/1 | ||
chr2 1235 . C T . PASS . GT 0/1 | ||
chr3 1234 . C T . PASS . GT 0/1 | ||
chr4 1234 . C T . PASS . GT 0/1 | ||
chr4 1235 . C T . PASS . GT 0/1 | ||
chr4 1236 . C T . PASS . GT 0/1 | ||
chr5 1234 . C T . PASS . GT 0/1 | ||
chr6 1234 . C T . PASS . GT 0/1 | ||
chr6 1235 . C T . PASS . GT 0/1 | ||
chr6 1236 . C T . PASS . GT 0/1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
##fileformat=VCFv4.2 | ||
##contig=<ID=chr1,length=249250621> | ||
##contig=<ID=chr2,length=243199373> | ||
##contig=<ID=chr3,length=198022430> | ||
##contig=<ID=chr4,length=191154276> | ||
##contig=<ID=chr5,length=180915260> | ||
##contig=<ID=chr6,length=171115067> | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##INFO=<ID=tumor_af,Number=1,Type=String,Description=""> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor | ||
chr1 25734793 . C T . PASS . GT 0/1 | ||
chr1 37323930 . C T . PASS . GT 0/1 | ||
chr2 1234 . C T . PASS . GT 0/1 | ||
chr2 1235 . C T . PASS . GT 0/1 | ||
chr3 1234 . C T . PASS . GT 0/1 | ||
chr4 1234 . C T . PASS . GT 0/1 | ||
chr4 1235 . C T . PASS tumor_af=0.1 GT 0/1 | ||
chr4 1235 . C A . PASS tumor_af=0.2 GT 0/1 | ||
chr5 1234 . C T . PASS . GT 0/1 | ||
chr6 1234 . C T . PASS tumor_af=0.5 GT 0/1 | ||
chr6 1235 . C A . PASS tumor_af=0.01 GT 0/1 | ||
chr6 1235 . C G . PASS tumor_af=0.2 GT 0/1 | ||
chr6 1235 . C T . PASS tumor_af=0.1 GT 0/1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
##fileformat=VCFv4.2 | ||
##contig=<ID=chr1,length=249250621> | ||
##contig=<ID=chr2,length=243199373> | ||
##contig=<ID=chr3,length=198022430> | ||
##contig=<ID=chr4,length=191154276> | ||
##contig=<ID=chr5,length=180915260> | ||
##contig=<ID=chr6,length=171115067> | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##INFO=<ID=tumor_af,Number=1,Type=String,Description=""> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor | ||
chr1 1234 . C T . PASS . GT 0/1 | ||
chr1 1234 . G T . PASS . GT 0/1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
##fileformat=VCFv4.2 | ||
##contig=<ID=chr1,length=249250621> | ||
##contig=<ID=chr2,length=243199373> | ||
##contig=<ID=chr3,length=198022430> | ||
##contig=<ID=chr4,length=191154276> | ||
##contig=<ID=chr5,length=180915260> | ||
##contig=<ID=chr6,length=171115067> | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##INFO=<ID=tumor_af,Number=1,Type=String,Description=""> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor | ||
chr4 1235 . C G . PASS tumor_af=0.15 GT 0/1 | ||
chr4 1235 . C T . PASS tumor_af=0.1 GT 0/1 | ||
chr4 1235 . C A . PASS tumor_af=0.2 GT 0/1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
##fileformat=VCFv4.2 | ||
##contig=<ID=chr1,length=249250621> | ||
##contig=<ID=chr2,length=243199373> | ||
##contig=<ID=chr3,length=198022430> | ||
##contig=<ID=chr4,length=191154276> | ||
##contig=<ID=chr5,length=180915260> | ||
##contig=<ID=chr6,length=171115067> | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##INFO=<ID=tumor_af,Number=1,Type=String,Description=""> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor | ||
chr4 1235 . C G . PASS tumor_af=0.1 GT 0/1 | ||
chr4 1235 . C T . PASS tumor_af=0.1 GT 0/1 | ||
chr4 1235 . C A . PASS tumor_af=0.1 GT 0/1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import os | ||
import pkg_resources | ||
from unittest import TestCase | ||
from cyvcf2 import VCF | ||
from vafator.annotator import Annotator | ||
import vafator.tests.utils as test_utils | ||
|
||
|
||
class TestAnnotator(TestCase): | ||
|
||
def test_annotator(self): | ||
input_file = pkg_resources.resource_filename(__name__, "resources/test1.vcf") | ||
output_vcf = pkg_resources.resource_filename(__name__, "resources/results/test_annotator1_output.vcf") | ||
bam1 = pkg_resources.resource_filename(__name__, "resources/COLO_829_n1.bam") | ||
bam2 = pkg_resources.resource_filename(__name__, "resources/COLO_829_t1.bam") | ||
annotator = Annotator(input_vcf=input_file, output_vcf=output_vcf, normal_bams=[bam1], tumor_bams=[bam2]) | ||
annotator.run() | ||
|
||
self.assertTrue(os.path.exists(output_vcf)) | ||
n_variants_input = test_utils._get_count_variants(input_file) | ||
n_variants_output = test_utils._get_count_variants(output_vcf) | ||
self.assertTrue(n_variants_input == n_variants_output) | ||
|
||
def _get_info_at(self, input_file, chromosome, position, annotation): | ||
vcf = VCF(input_file) | ||
self.assertIsNotNone(vcf) | ||
for v in vcf: | ||
if v.POS == position and v.CHROM == chromosome: | ||
vcf.close() | ||
return v.INFO.get(annotation) | ||
vcf.close() | ||
return {} |
Oops, something went wrong.