Skip to content

Commit

Permalink
Can split multiallelic calls
Browse files Browse the repository at this point in the history
  • Loading branch information
moonso committed Dec 12, 2014
1 parent b77db2f commit 851342b
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 12 deletions.
16 changes: 16 additions & 0 deletions examples/multi_allele_example.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
##fileformat=VCFv4.1
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##contig=<ID=1,length=249250621,assembly=b37>
##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT father mother proband
1 11900 . A T 100 PASS MQ=1 GT:GQ 0/1:60 0/1:60 1/1:60
1 879585 . A T 100 PASS MQ=1 GT:GQ 0/1:60 0/0:60 0/1:60
1 879586 . A T 100 PASS MQ=1 GT:GQ 0/0:60 0/1:60 0/1:60
1 947378 . A T 100 PASS MQ=1 GT:GQ 0/0:60 0/0:60 0/1:60
1 973348 . G A 100 PASS MQ=1 GT:GQ 0/0:60 0/0:60 0/1:60
3 879585 . A T 100 PASS MQ=1 GT:GQ 0/1:60 0/0:60 0/1:60
3 879586 . A T 100 PASS MQ=1 GT:GQ 0/0:60 0/1:60 0/1:60
3 947378 . A T 100 PASS MQ=1 GT:GQ:AD:DP 0/0:60:5,7:12 0/0:60:4,6:14 0/1:60:7,8:16
3 947379 . A T,C 100 PASS MQ=1 GT:GQ:AD:DP 1/1:60:0,7,0:12 0/2:60:7,0,10:17 1/2:60:0,7,8:16
3 973348 . G A 100 PASS MQ=1 GT:GQ:TP 0/0:60:23 0/0:60 0/1:60
3 973349 . G A,T 100 PASS MQ=1 GT:GQ:TP 0/1:60:23 1/2:60 0/2:60
14 changes: 8 additions & 6 deletions genmod/commands/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ def check_tabix_index(compressed_file, file_type='cadd', verbose=False):
is_flag=True,
help='Do not print the variants.'
)
@click.option('-split' ,'--split_variants',
is_flag=True,
help='If the variants should be splitted.'
)
@click.option('-g' ,'--whole_gene',
is_flag=True,
help="""If compounds should be checked in the whole gene regions.
Expand Down Expand Up @@ -203,7 +207,7 @@ def check_tabix_index(compressed_file, file_type='cadd', verbose=False):
)
def annotate(family_file, variant_file, family_type, vep, silent, phased, strict, cadd_raw, whole_gene,
annotation_dir, cadd_file, cadd_1000g, cadd_esp, cadd_indels, thousand_g, exac, outfile,
chr_prefix, processes, verbose):
chr_prefix, split_variants, processes, verbose):
"""Annotate variants in a VCF file.\n
The main function with genmod is to annotate genetic inheritance patterns for variants in families.
Use flag --family together with a .ped file to describe which individuals in the vcf you wish to check inheritance for in the analysis.
Expand All @@ -226,17 +230,15 @@ def annotate(family_file, variant_file, family_type, vep, silent, phased, strict
######### Setup a variant parser #########

if variant_file == '-':
variant_parser = vcf_parser.VCFParser(fsock = sys.stdin)
variant_parser = vcf_parser.VCFParser(fsock = sys.stdin, split_variants=split_variants)
else:
variant_parser = vcf_parser.VCFParser(infile = variant_file)
variant_parser = vcf_parser.VCFParser(infile = variant_file, split_variants=split_variants)

# These are the individuals in from the vcf file
individuals = variant_parser.individuals

head = variant_parser.metadata



######### Start to parse the ped file (if there is one) #########

families = {}
Expand Down Expand Up @@ -347,7 +349,7 @@ def annotate(family_file, variant_file, family_type, vep, silent, phased, strict

if verbosity:
print('Number of CPU:s %s' % cpu_count())
print('Number of model checkers %s' % num_model_checkers)
print('Number of model checkers: %s' % num_model_checkers)

# These are the workers that do the heavy part of the analysis
model_checkers = [variant_consumer.VariantConsumer(variant_queue,
Expand Down
11 changes: 6 additions & 5 deletions genmod/variant_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,11 +233,12 @@ def check_vep_annotation(self, variant):

annotation = set()
# vep_info is a dictionary with genes as key and annotation as values
for gene in variant.get('vep_info',{}):
for consequence in variant['vep_info'][gene].get('Consequence', '').split('&'):
if consequence in self.INTERESTING_SO_TERMS:
annotation.add(gene)

for allele in variant.get('vep_info',{}):
if allele != 'gene_ids':
for vep_annotation in variant['vep_info'][allele]:
for consequence in vep_annotation.get('Consequence', '').split('&'):
if consequence in self.INTERESTING_SO_TERMS:
annotation.add(vep_annotation.get('SYMBOL', ''))
return annotation


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
license = 'MIT License',
install_requires=[
'ped_parser >= 1.0',
'vcf_parser == 0.8.3',
'vcf_parser >= 1.0',
'pytabix',
'pytest',
'interval_tree',
Expand Down

0 comments on commit 851342b

Please sign in to comment.