-
Notifications
You must be signed in to change notification settings - Fork 0
/
VCF_parser.py
124 lines (96 loc) · 3.41 KB
/
VCF_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python
from __future__ import division
"""
Script for parsing and editing a VCF file
"""
from pysam import VariantFile
import vcf
import sys
import argparse
import os.path
import vcf
epi = ('\
\n\
VCF file parser, allowing for custom advanced filtering of VCF files\n\
\n\
')
# Describe what the script does
parser = argparse.ArgumentParser(description='This script parses a VCF file and applies custom filtering', epilog= epi, formatter_class=argparse.RawTextHelpFormatter)
# Get inputs
parser.add_argument('-i', '--input', default=None, dest='vcf', action='store', required=True, help="VCF file")
#parser.add_argument('-f', '--filter-string', default=None, dest='fi', action='store', required=True, help="String of filters to apply")
# Check for no input
if len(sys.argv)==1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
# Check if input files exist
if not os.path.isfile(args.vcf)==True:
print("Cannot find input file ",args.vcf)
sys.exit(1)
# Read input
o1=args.vcf+".pyVCF.vcf"
output=args.vcf+".filtered.vcf"
#print(args.vcf, output)
vcf_trans = vcf.Reader(filename=args.vcf)
vcf_reader = vcf.Reader(filename=args.vcf)
#vcf_writer = vcf.Writer(open('/dev/null', 'w'), vcf_reader)
vcf_wt = vcf.Writer(open(o1, 'w'), vcf_trans)
vcf_writer = vcf.Writer(open(output, 'w'), vcf_reader)
# Write the file with nothing done, just the changes introduced by pyVCF
for r in vcf_trans:
vcf_wt.write_record(r)
for r in (vcf_reader):
try:
#print(r.FORMAT)
if (r.FORMAT == 'DP:FDP:SDP:SUBDP:AU:CU:GU:TU'):
#print(r.REF,r.ALT)
refb=r.REF
altb=r.ALT
refd=0
altd=0
if (r.REF=='A'):
refd=r.samples[0]['AU'][0]
elif (r.REF=='C'):
refd = r.samples[0]['CU'][0]
elif (r.REF=='G'):
refd = r.samples[0]['GU'][0]
elif (r.REF=='T'):
refd = r.samples[0]['TU'][0]
else:
print("WARN: ", r.REF , " is not A,C,G or T :", r.ID)
if (r.ALT[0]=='A'):
altd = (r.samples[0]['AU'][0])
elif (r.ALT[0]=='C'):
altd = r.samples[0]['CU'][0]
elif (r.ALT[0]=='G'):
altd = r.samples[0]['GU'][0]
elif (r.ALT[0]=='T'):
altd = r.samples[0]['TU'][0]
else:
print("WARN: ", r.REF , " is not A,C,G or T :", r.ID)
r.FORMAT='DP:FDP:SDP:SUBDP:AU:CU:GU:TU:GT:FQ'
freq=altd/(altd+refd)
#r.samples[0]['FQ'] = freq
#r.FORMAT['FQ']=freq
#print("ref", refd, r.REF ,"samp", r.samples[0])
#print("alt", altd, r.ALT, "samp", r.samples[0])
#print(freq, refd, altd)
#print ("INFO",r.FORMAT)
print ("BF",r.samples[0])
r.samples[0].add_field('FOO', 'BAR')
print ("AF",r.samples[0])
#for s in r.samples:
# s.add_field('FOO', 'BAR')
# #s['DP']="Hi"
# print("New ",s)
#print ("SNP",r.FORMAT)
elif (r.FORMAT=='DP:DP2:TAR:TIR:TOR:DP50:FDP50:SUBDP50'):
pass
#print ("INDEL", r.FORMAT)
else:
print ("Warn: not sure which type of variant this is ", r.FORMAT)
vcf_writer.write_record(r)
except:
pass
#print ("EXCEPT: ", r.CHROM, r.POS)