-
Notifications
You must be signed in to change notification settings - Fork 0
/
filterMnps.py
35 lines (33 loc) · 1.66 KB
/
filterMnps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import sys
import math
import argparse
from collections import OrderedDict
from pysam import VariantFile
from pysam import tabix_index
def filterSitesAlleleCount(bcfIn, minAlleles, maxAlleles, bcfOut):
minAlleles=int(minAlleles)
maxAlleles=int(maxAlleles)
bcf_in=VariantFile(bcfIn)
bcf_out = VariantFile(bcfOut, 'w', header=bcf_in.header)
for rec in bcf_in.fetch():
if rec.alts!=None:
if len(rec.alts)>=minAlleles and len(rec.alts)<=maxAlleles and "*" not in rec.alts and len(rec.ref)==1:
bcf_out.write(rec)
bcf_out.close()
tabix_index(bcfOut, preset='vcf')
if __name__=="__main__":
parser = argparse.ArgumentParser(description="A small python script to filter the sites based on \
user defined total number of alternative alleles identified ", epilog="author: Maulik Upadhyay ([email protected])")
parser.add_argument("-v","--vcfF",metavar="File",help="compressed or uncompressed vcf or \
bcf file",required=True)
parser.add_argument("-m","--minAlt",metavar="Int",help="minimum number of alternative allle counts required to be \
present at each record. OPTIONAL FLAG",default=1,required=False)
parser.add_argument("-M","--maxAlt",metavar="Int",help="max. number of alternative allele counts required to be \
present at each record. OPTIONAL FLAG", default=1, required=False)
parser.add_argument("-o","--outputF",metavar="Str",help="bcf or vcf file name for the output", required=True)
args = parser.parse_args()
if len(sys.argv)==1:
parser.print_help(sys.stderr)
sys.exit(1)
else:
filterSitesAlleleCount(args.vcfF, args.minAlt, args.maxAlt, args.outputF)