-
Notifications
You must be signed in to change notification settings - Fork 17
/
bamCaller.py
executable file
·67 lines (59 loc) · 2.13 KB
/
bamCaller.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python3
import sys
import re
import gzip
import utils
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("depth", type=float)
parser.add_argument("mask")
parser.add_argument("--minMapQ", type=float, default=20)
parser.add_argument("--minConsQ", type=float, default=20)
parser.add_argument("--legend_file", help="Impute2 reference panel legend file, can be gzipped or not")
args = parser.parse_args()
sites_parser = None
if args.legend_file is not None:
sites_parser = utils.LegendParser(args.legend_file)
minDepth = args.depth / 2.0
maxDepth = args.depth * 2.0
lastPos = 0
line_cnt = 0
chr_ = ""
for line in sys.stdin:
if line[0] == '#':
print(line, end="")
continue
fields = line.strip().split('\t')
if chr_ == "":
chr_ = fields[0]
mask = utils.MaskGenerator(args.mask, chr_)
else:
assert fields[0] == chr_, "found multiple chromosomes"
pos = int(fields[1])
refAllele = fields[3]
altAllele = fields[4]
info = fields[7]
genotypes = fields[9]
if line_cnt % 10000 == 0:
print("parsing position {}".format(pos), file=sys.stderr)
line_cnt += 1
if sites_parser is not None:
while not sites_parser.end and sites_parser.pos < pos:
sites_parser.tick()
if re.match("^[ACTGactg]$", refAllele) and re.match("^[ACTGactg\.]$", altAllele):
dp_match = re.search("DP=(\d+)", info)
mq_match = re.search("MQ=(\d+)", info)
fq_match = re.search("FQ=([\d-]+)", info)
if not (dp_match and mq_match and fq_match):
continue
dp = int(dp_match.group(1))
mq = int(mq_match.group(1))
fq = int(fq_match.group(1))
if dp >= minDepth and dp <= maxDepth and mq >= args.minMapQ and abs(fq) >= args.minConsQ:
mask.addCalledPosition(pos)
if altAllele != "." and re.match("^[01][/|][01]", genotypes):
print(line, end="")
elif sites_parser is not None and sites_parser.pos == pos:
assert refAllele == sites_parser.ref_a
fields[4] = sites_parser.alt_a
print("\t".join(fields))