Skip to content

Commit

Permalink
Merge branch 'master' of gitlab.rlp.net:tron/vafator
Browse files Browse the repository at this point in the history
  • Loading branch information
Pablo Riesgo Ferreiro committed Feb 5, 2020
2 parents fe66b8b + 6b00d91 commit b879bbe
Show file tree
Hide file tree
Showing 21 changed files with 357 additions and 6 deletions.
7 changes: 5 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ Input:
Optional input:
* output: the folder where to publish output
* skip_multiallelic_filter: skip the filtering of multiallelics by frequency in the tumor (only highest frequency variant at the same position is kept)
* base_call_quality: threshold for the base call quality, only base calls with a higher value are considered (default: 29)
* mapping_quality: threshold for the mapping quality, only reads with a higher value are considered (default: 0)
Output:
* Annotated VCF file
Expand Down Expand Up @@ -93,11 +96,11 @@ else {

output:
set val("${name}"), val("${publish_dir}/${name}/${vcf.baseName}.filtered_multiallelics.vcf") into output_files
file("${vcf.baseName}.filtered_multiallelics.vcf") into annotated_vcf
file("${vcf.baseName}.filtered_multiallelics.vcf") into filtered_vcf

script:
"""
/home/priesgof/src/vafator/venv/bin/multiallelic-filter --input-vcf ${vcf} --output-vcf ${vcf.baseName}.filtered_multiallelics.vcf
/home/priesgof/src/vafator/venv/bin/multiallelics-filter --input-vcf ${vcf} --output-vcf ${vcf.baseName}.filtered_multiallelics.vcf
"""
}
}
Expand Down
11 changes: 7 additions & 4 deletions vafator/multiallelic_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ def run(self):
# first time stores the variant and moves on
prev_variant = variant
continue
if variant.is_snp and variant.CHROM == prev_variant.CHROM and variant.POS == prev_variant.POS:
# considers only SNVs with same chromosome, position and reference
if variant.is_snp and variant.CHROM == prev_variant.CHROM and variant.POS == prev_variant.POS and \
variant.REF == prev_variant.REF:
af1 = self.get_tumor_af(prev_variant)
af2 = self.get_tumor_af(variant)
# keeps the variant with the highest AF
Expand All @@ -64,15 +66,16 @@ def run(self):
if len(batch) >= 1000:
self._write_batch(batch)
batch = []
if len(batch) > 0:
self._write_batch(batch)
# do not forget to add the last variant!
batch.append(prev_variant)
self._write_batch(batch)

self.vcf_writer.close()
self.vcf.close()

def set_multiallelic_annotation(self, variant, alt, af):
variant.INFO[self.multiallelic_annotation_name] = \
",".join(variant.INFO.get(self.multiallelic_annotation_name, []) + [alt, str(af)])
",".join(variant.INFO.get(self.multiallelic_annotation_name, "").split(",") + [alt, str(af)])

def get_tumor_af(self, prev_variant):
return prev_variant.INFO.get("{}_af".format(self.tumor_sample_name), 0.0)
Expand Down
Empty file added vafator/tests/__init__.py
Empty file.
Binary file added vafator/tests/resources/COLO_829_n1.bam
Binary file not shown.
Binary file added vafator/tests/resources/COLO_829_n1.bam.bai
Binary file not shown.
Binary file added vafator/tests/resources/COLO_829_t1.bam
Binary file not shown.
Binary file added vafator/tests/resources/COLO_829_t1.bam.bai
Binary file not shown.
25 changes: 25 additions & 0 deletions vafator/tests/resources/results/test1_output.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##INFO=<ID=tumor_af,Number=1,Type=String,Description="">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##vafator_command_line={"name": "multiallelic-filter", "version": "0.2.0", "date": "Wed Feb 5 14:11:29 2020", "timestamp": 1580908289.074858, "input_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/test1.vcf", "output_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/results/test1_output.vcf"}
##INFO=<ID=multiallelic,Number=.,Type=String,Description="Indicates multiallelic variants filtered and their frequencies if any (e.g.: T,0.12)">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor
chr1 25734793 . C T . PASS . GT 0/1
chr1 37323930 . C T . PASS . GT 0/1
chr2 1234 . C T . PASS . GT 0/1
chr2 1235 . C T . PASS . GT 0/1
chr3 1234 . C T . PASS . GT 0/1
chr4 1234 . C T . PASS . GT 0/1
chr4 1235 . C T . PASS . GT 0/1
chr4 1236 . C T . PASS . GT 0/1
chr5 1234 . C T . PASS . GT 0/1
chr6 1234 . C T . PASS . GT 0/1
chr6 1235 . C T . PASS . GT 0/1
chr6 1236 . C T . PASS . GT 0/1
23 changes: 23 additions & 0 deletions vafator/tests/resources/results/test2_output.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##INFO=<ID=tumor_af,Number=1,Type=String,Description="">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##vafator_command_line={"name": "multiallelic-filter", "version": "0.2.0", "date": "Wed Feb 5 14:11:29 2020", "timestamp": 1580908289.074858, "input_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/test2.vcf", "output_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/results/test2_output.vcf"}
##INFO=<ID=multiallelic,Number=.,Type=String,Description="Indicates multiallelic variants filtered and their frequencies if any (e.g.: T,0.12)">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor
chr1 25734793 . C T . PASS . GT 0/1
chr1 37323930 . C T . PASS . GT 0/1
chr2 1234 . C T . PASS . GT 0/1
chr2 1235 . C T . PASS . GT 0/1
chr3 1234 . C T . PASS . GT 0/1
chr4 1234 . C T . PASS . GT 0/1
chr4 1235 . C A . PASS tumor_af=0.2;multiallelic=,T,0.1 GT 0/1
chr5 1234 . C T . PASS . GT 0/1
chr6 1234 . C T . PASS tumor_af=0.5 GT 0/1
chr6 1235 . C G . PASS tumor_af=0.2;multiallelic=,A,0.01,T,0.1 GT 0/1
15 changes: 15 additions & 0 deletions vafator/tests/resources/results/test3_output.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##INFO=<ID=tumor_af,Number=1,Type=String,Description="">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##vafator_command_line={"name": "multiallelic-filter", "version": "0.2.0", "date": "Wed Feb 5 14:11:29 2020", "timestamp": 1580908289.074858, "input_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/test3.vcf", "output_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/results/test3_output.vcf"}
##INFO=<ID=multiallelic,Number=.,Type=String,Description="Indicates multiallelic variants filtered and their frequencies if any (e.g.: T,0.12)">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor
chr1 1234 . C T . PASS . GT 0/1
chr1 1234 . G T . PASS . GT 0/1
14 changes: 14 additions & 0 deletions vafator/tests/resources/results/test4_output.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##INFO=<ID=tumor_af,Number=1,Type=String,Description="">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##vafator_command_line={"name": "multiallelic-filter", "version": "0.2.0", "date": "Wed Feb 5 14:11:29 2020", "timestamp": 1580908289.074858, "input_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/test4.vcf", "output_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/results/test4_output.vcf"}
##INFO=<ID=multiallelic,Number=.,Type=String,Description="Indicates multiallelic variants filtered and their frequencies if any (e.g.: T,0.12)">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor
chr4 1235 . C A . PASS tumor_af=0.2;multiallelic=,G,0.15 GT 0/1
14 changes: 14 additions & 0 deletions vafator/tests/resources/results/test5_output.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##INFO=<ID=tumor_af,Number=1,Type=String,Description="">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##vafator_command_line={"name": "multiallelic-filter", "version": "0.2.0", "date": "Wed Feb 5 14:11:29 2020", "timestamp": 1580908289.074858, "input_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/test5.vcf", "output_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/results/test5_output.vcf"}
##INFO=<ID=multiallelic,Number=.,Type=String,Description="Indicates multiallelic variants filtered and their frequencies if any (e.g.: T,0.12)">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor
chr4 1235 . C A . PASS tumor_af=0.1;multiallelic=,G,0.1 GT 0/1
29 changes: 29 additions & 0 deletions vafator/tests/resources/results/test_annotator1_output.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##INFO=<ID=tumor_af,Number=1,Type=String,Description="">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##vafator_command_line={"name": "vafator", "version": "0.2.0", "date": "Wed Feb 5 14:11:37 2020", "timestamp": 1580908297.673633, "input_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/test1.vcf", "output_vcf": "/home/priesgof/src/vafator/vafator/tests/resources/results/test_annotator1_output.vcf", "normal_bams": ["/home/priesgof/src/vafator/vafator/tests/resources/COLO_829_n1.bam"], "tumor_bams": ["/home/priesgof/src/vafator/vafator/tests/resources/COLO_829_t1.bam"], "mapping_quality_threshold": 0, "base_call_quality_threshold": 29}
##INFO=<ID=tumor_dp,Number=A,Type=Float,Description="Total depth of coverage in the tumor samples">
##INFO=<ID=tumor_ac,Number=A,Type=Integer,Description="Allele count for the alternate alleles in the tumor samples">
##INFO=<ID=normal_af,Number=A,Type=Float,Description="Allele frequency for the alternate alleles in the normal samples">
##INFO=<ID=normal_dp,Number=A,Type=Float,Description="Total depth of coverage in the normal samples">
##INFO=<ID=normal_ac,Number=A,Type=Integer,Description="Allele count for the alternate alleles in the normal samples">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor
chr1 25734793 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1
chr1 37323930 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1
chr2 1234 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1
chr2 1235 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1
chr3 1234 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1
chr4 1234 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1
chr4 1235 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1
chr4 1236 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1
chr5 1234 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1
chr6 1234 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1
chr6 1235 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1
chr6 1236 . C T . PASS tumor_af=0.0;tumor_ac=0;tumor_dp=0;normal_af=0.0;normal_ac=0;normal_dp=0 GT 0/1
23 changes: 23 additions & 0 deletions vafator/tests/resources/test1.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
##fileformat=VCFv4.2
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=tumor_af,Number=1,Type=String,Description="">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor
chr1 25734793 . C T . PASS . GT 0/1
chr1 37323930 . C T . PASS . GT 0/1
chr2 1234 . C T . PASS . GT 0/1
chr2 1235 . C T . PASS . GT 0/1
chr3 1234 . C T . PASS . GT 0/1
chr4 1234 . C T . PASS . GT 0/1
chr4 1235 . C T . PASS . GT 0/1
chr4 1236 . C T . PASS . GT 0/1
chr5 1234 . C T . PASS . GT 0/1
chr6 1234 . C T . PASS . GT 0/1
chr6 1235 . C T . PASS . GT 0/1
chr6 1236 . C T . PASS . GT 0/1
24 changes: 24 additions & 0 deletions vafator/tests/resources/test2.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
##fileformat=VCFv4.2
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=tumor_af,Number=1,Type=String,Description="">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor
chr1 25734793 . C T . PASS . GT 0/1
chr1 37323930 . C T . PASS . GT 0/1
chr2 1234 . C T . PASS . GT 0/1
chr2 1235 . C T . PASS . GT 0/1
chr3 1234 . C T . PASS . GT 0/1
chr4 1234 . C T . PASS . GT 0/1
chr4 1235 . C T . PASS tumor_af=0.1 GT 0/1
chr4 1235 . C A . PASS tumor_af=0.2 GT 0/1
chr5 1234 . C T . PASS . GT 0/1
chr6 1234 . C T . PASS tumor_af=0.5 GT 0/1
chr6 1235 . C A . PASS tumor_af=0.01 GT 0/1
chr6 1235 . C G . PASS tumor_af=0.2 GT 0/1
chr6 1235 . C T . PASS tumor_af=0.1 GT 0/1
13 changes: 13 additions & 0 deletions vafator/tests/resources/test3.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
##fileformat=VCFv4.2
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=tumor_af,Number=1,Type=String,Description="">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor
chr1 1234 . C T . PASS . GT 0/1
chr1 1234 . G T . PASS . GT 0/1
14 changes: 14 additions & 0 deletions vafator/tests/resources/test4.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
##fileformat=VCFv4.2
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=tumor_af,Number=1,Type=String,Description="">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor
chr4 1235 . C G . PASS tumor_af=0.15 GT 0/1
chr4 1235 . C T . PASS tumor_af=0.1 GT 0/1
chr4 1235 . C A . PASS tumor_af=0.2 GT 0/1
14 changes: 14 additions & 0 deletions vafator/tests/resources/test5.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
##fileformat=VCFv4.2
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=tumor_af,Number=1,Type=String,Description="">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tumor
chr4 1235 . C G . PASS tumor_af=0.1 GT 0/1
chr4 1235 . C T . PASS tumor_af=0.1 GT 0/1
chr4 1235 . C A . PASS tumor_af=0.1 GT 0/1
32 changes: 32 additions & 0 deletions vafator/tests/test_annotator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
import pkg_resources
from unittest import TestCase
from cyvcf2 import VCF
from vafator.annotator import Annotator
import vafator.tests.utils as test_utils


class TestAnnotator(TestCase):

def test_annotator(self):
input_file = pkg_resources.resource_filename(__name__, "resources/test1.vcf")
output_vcf = pkg_resources.resource_filename(__name__, "resources/results/test_annotator1_output.vcf")
bam1 = pkg_resources.resource_filename(__name__, "resources/COLO_829_n1.bam")
bam2 = pkg_resources.resource_filename(__name__, "resources/COLO_829_t1.bam")
annotator = Annotator(input_vcf=input_file, output_vcf=output_vcf, normal_bams=[bam1], tumor_bams=[bam2])
annotator.run()

self.assertTrue(os.path.exists(output_vcf))
n_variants_input = test_utils._get_count_variants(input_file)
n_variants_output = test_utils._get_count_variants(output_vcf)
self.assertTrue(n_variants_input == n_variants_output)

def _get_info_at(self, input_file, chromosome, position, annotation):
vcf = VCF(input_file)
self.assertIsNotNone(vcf)
for v in vcf:
if v.POS == position and v.CHROM == chromosome:
vcf.close()
return v.INFO.get(annotation)
vcf.close()
return {}
Loading

0 comments on commit b879bbe

Please sign in to comment.