This repository has been archived by the owner on Dec 6, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index_fixer.py
executable file
·146 lines (130 loc) · 5.3 KB
/
index_fixer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import sys
import re
import os
import click
from flowcell_parser.classes import SampleSheetParser
def generate_samplesheet(ss_reader):
"""Will generate a 'clean' samplesheet, : the given fields will be removed. if rename_samples is True, samples prepended with 'Sample_'
are renamed to match the sample name"""
output=""
#Header
output+="[Header]{}".format(os.linesep)
for field in ss_reader.header:
output+="{},{}".format(field.rstrip(), ss_reader.header[field].rstrip())
output+=os.linesep
#Data
output+="[Data]{}".format(os.linesep)
datafields=[]
for field in ss_reader.datafields:
datafields.append(field)
output+=",".join(datafields)
output+=os.linesep
for line in ss_reader.data:
line_ar=[]
for field in datafields:
value = line[field]
line_ar.append(value)
output+=",".join(line_ar)
output+=os.linesep
return output
def nuc_compliment(nuc):
if nuc == 'A':
return 'T'
elif nuc == 'T':
return 'A'
elif nuc == 'C':
return'G'
elif nuc == 'G':
return 'C'
else:
sys.exit("Critical error. Unknown nucleotide found: {}.".format(nuc))
if sys.version_info[0] == 3:
ss_type = (str, str)
elif sys.version_info[0] == 2:
ss_type = (unicode, unicode)
@click.command()
@click.option('--path', required=True,help='Path to the Samplesheet. E.g. ~/fc/161111_M01320_0095_000000000-AWE6P.csv')
@click.option('--project', required=False,help='Project ID, e.g. P10001. Only the indexes of samples with this specific project ID will be changed')
@click.option('--swap', is_flag=True,help='Swaps index 1 with 2 and vice versa.')
@click.option('--rc1', is_flag=True,help='Exchanges index 1 for its reverse compliment.')
@click.option('--rc2', is_flag=True,help='Exchanges index 2 for its reverse compliment.')
@click.option('--platform', required=True, type=click.Choice(['miseq', 'novaseq', 'nextseq']), help="Run platform ('miseq', 'novaseq', 'nextseq')")
def main(path, project, swap, rc1, rc2, platform):
ss_reader=SampleSheetParser(path)
ss_data=ss_reader.data
single = True
# Check whether both indexes are available
index1 = 'index'
index2 = 'index2'
if index2 in ss_data[0]:
single = False
if single:
#Sanity check
if rc2 or swap:
sys.exit("Single index. Cannot change index 2, nor swap indexes")
#Reverse compliment
if rc1:
for row in ss_data:
sample_id = row['Sample_ID']
if (not project) or (project in sample_id):
index_in = re.match('([ATCG]{4,12})', row[index1])
if index_in:
if rc1:
rc = ""
for nuc in index_in.group(1)[::-1]:
rc = rc + nuc_compliment(nuc)
row[index1] = '{}'.format(rc)
if not single:
#Reverse Compliment
if rc1 or rc2:
for row in ss_data:
sample_id = row['Sample_ID']
if (not project) or (project in sample_id):
if platform == "miseq":
if rc1:
rc = ""
for nuc in row['index'][::-1]:
rc = rc + nuc_compliment(nuc)
row['index'] = rc
row['I7_Index_ID'] = rc
if rc2:
rc = ""
for nuc in row['index2'][::-1]:
rc = rc + nuc_compliment(nuc)
row['index2'] = rc
row['I5_Index_ID'] = rc
elif platform == "novaseq" or platform == "nextseq":
if rc1:
rc = ""
for nuc in row['index'][::-1]:
rc = rc + nuc_compliment(nuc)
row['index'] = rc
if rc2:
rc = ""
for nuc in row['index2'][::-1]:
rc = rc + nuc_compliment(nuc)
row['index2'] = rc
#Swap indexes
if swap:
for row in ss_data:
sample_id = row['Sample_ID']
if (not project) or (project in sample_id):
if platform == "miseq":
storage = row['index']
row['index'] = row['index2']
row['I7_Index_ID'] = row['index2']
row['index2'] = storage
row['I5_Index_ID'] = storage
elif platform == "novaseq" or platform == "nextseq":
storage = row['index']
row['index'] = row['index2']
row['index2'] = storage
redemux_ss = generate_samplesheet(ss_reader)
if platform == "novaseq" or platform == "nextseq":
filename = re.search('\/(\w+).csv$', path).group(1)
else:
filename = "SampleSheet"
with open('{}_redemux.csv'.format(filename), 'w') as fh_out:
fh_out.write(redemux_ss)
if __name__ == '__main__':
main()