-
Notifications
You must be signed in to change notification settings - Fork 0
/
ParseDADA2Tabs
63 lines (54 loc) · 2.13 KB
/
ParseDADA2Tabs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python3
# This is the third script in the PopMLST pipeline. Run it
# without arguments to get the usage. Please provide
# proper attribution if you use this code.
#
# https://github.com/marade/PopMLST
#
# author: M Radey (email: marad_at_uw.edu)
import sys, os, argparse, re, tempfile, glob
from subprocess import call, Popen, PIPE, STDOUT
import pandas as pd
from operator import itemgetter
from Bio import SeqIO
from colorama import init as cinit
from colorama import Fore, Back, Style
cinit(autoreset=True)
version = "1.0.0"
def do_args():
desc = "PopMLST tool for parsing ASV files from DADA2"
parser = argparse.ArgumentParser(prog=os.path.basename(__file__),\
description=desc)
parser.add_argument("indir", help="specifies the input file directory")
parser.add_argument("inpre", help="specifies the input file prefix " +\
"to be used to grab multiple DADA2 ASV files")
parser.add_argument("insuf", help="specifies the input file suffix " +\
"to be used to grab multiple DADA2 ASV files")
parser.add_argument("outfile", help="specifies the file where " +\
"you want the output to go")
return parser.parse_args()
def combine_asvs(indir, inpre, insuf, outfile):
print(Fore.CYAN + Style.BRIGHT + "Combining ASV tables...")
infiles = glob.glob(indir + "/" + inpre + "*" + insuf)
alldfs = pd.DataFrame()
for infile in infiles:
df = pd.read_csv(infile, sep='\t', index_col=0)
df.rename(index=lambda x:x.split('_')[0], inplace=True)
if alldfs.empty:
alldfs = df
continue
alldfs = pd.merge(alldfs, df, how='outer', left_index=True,\
right_index=True)
alldfs = alldfs.fillna(0)
alldfs.rename(columns=lambda x:x.split('_')[0], inplace=True)
alldfs.to_csv(outfile, sep='\t', float_format='%.f')
def main():
# setup
args = do_args()
args.indir = os.path.abspath(args.indir)
args.outfile = os.path.abspath(args.outfile)
combine_asvs(args.indir, args.inpre, args.insuf, args.outfile)
print(Fore.CYAN + Style.BRIGHT + "Done.")
return 0
if __name__ == "__main__":
sys.exit(main())