-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
150 lines (120 loc) · 7.04 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""
The main script to handle arguments and run commands
"""
from dna import NucleotideSequence
from sys import argv
if __name__ == '__main__':
if len(argv) == 1:
print('========================================================================================================================')
print('COMMANDS:\n--analyze <file.txt>\n--cut <file.txt> <enzyme>\n--search <file.txt> <sequence>\n--makereport <file.txt>')
print('========================================================================================================================')
else:
if argv[1].lower() == '--analyze' or argv[1] == '-A':
dna = ''
with open(argv[2], 'r') as f:
data = f.read()
for char in data:
if char.upper() in 'ACGTYRWSKMDVHBXN-':
dna += char
f.close()
sequence = NucleotideSequence(dna, identify='USER')
r_enzymes = sequence.search_restriction_enzymes()
count = 0
for enzyme in r_enzymes:
if len(enzyme[1]) > 0:
count += 1
o_enzymes = sequence.search_restriction_enzymes(threshhold=1)
count2 = 0
for enzyme in o_enzymes:
if len(enzyme[1]) > 0:
count2 += 1
print('========================================================================================================================')
print('DNA SEQUENCE LENGTH: ' + str(sequence.get_length()) + 'b.p.')
print('TOTAL RECOGNIZED RESTRICTION ENZYMES: ' + str(count))
print('ONE-CUT RESTRICTION ENZYMES: ' + str(count2))
print('========================================================================================================================')
i = 1
for item in o_enzymes:
if len(item[1]) > 0:
print(str(i+1000)[1:] + ': ' + str(item))
i += 1
print('========================================================================================================================')
if argv[1].lower() == '--cut' or argv[1] == '-C':
dna = ''
with open(argv[2], 'r') as f:
data = f.read()
for char in data:
if char.upper() in 'ACGTYRWSKMDVHBXN-':
dna += char
f.close()
sequence = NucleotideSequence(dna, identify='USER')
print('========================================================================================================================')
cuts = sequence.cut(argv[3])
print('LEFT OF CUT: '+str(cuts[0].get_length())+'b.p.'+'\n'+str(cuts[0]))
print('RIGHT OF CUT: '+str(cuts[1].get_length())+'b.p.'+'\n'+str(cuts[1]))
print('========================================================================================================================')
if argv[1].lower() == '--search' or argv[1] == '-S':
dna = ''
with open(argv[2], 'r') as f:
data = f.read()
for char in data:
if char.upper() in 'ACGTYRWSKMDVHBXN-':
dna += char
f.close()
sequence = NucleotideSequence(dna, identify='USER')
print('========================================================================================================================')
print('FOUND QUERY SEQUENCE IN FOLLOWING LOCATIONS FROM LEFT = 0:')
print(str(sequence.search(argv[3])))
print('========================================================================================================================')
if argv[1].lower() == '--makereport' or argv[1] == '-M':
dna = ''
with open(argv[2], 'r') as f:
data = f.read()
for char in data:
if char.upper() in 'ACGTYRWSKMDVHBXN-':
dna += char
f.close()
sequence = NucleotideSequence(dna, identify='USER')
r_enzymes = sequence.search_restriction_enzymes()
count = 0
for enzyme in r_enzymes:
if len(enzyme[1]) > 0:
count += 1
o_enzymes = sequence.search_restriction_enzymes(threshhold=1)
count2 = 0
for enzyme in o_enzymes:
if len(enzyme[1]) > 0:
count2 += 1
with open(argv[2][:-4]+'_report.txt', 'w') as f:
f.write('========================================================================================================================\n')
f.write('DNA SEQUENCE LENGTH: ' + str(sequence.get_length()) + 'b.p.\n')
f.write('TOTAL RECOGNIZED RESTRICTION ENZYMES: ' + str(count) + '\n')
f.write('ONE-CUT RESTRICTION ENZYMES: ' + str(count2) + '\n')
f.write('========================================================================================================================\n')
i = 1
for item in o_enzymes:
if len(item[1]) > 0:
f.write(str(i+1000)[1:] + ': ' + str(item) + '\n')
i += 1
f.write('========================================================================================================================\n')
print('========================================================================================================================')
print('CREATING ' + argv[2][:-4]+'_report.txt: with all possible one-cuts...')
print(str(count2) + ' restriction enzymes to process...')
for item in o_enzymes:
if len(item[1]) > 0:
try:
print('========================================================================================================================')
print('Processing cut with ' + item[0].name)
cuts = sequence.cut(item[0].name)
for _ in range(2):
f.write('\n')
except IndexError:
print('Failed...')
continue
f.write(item[0].name+':\n\n')
f.write('LEFT OF CUT: '+str(cuts[0].get_length())+'b.p.'+'\n'+str(cuts[0])+'\n\n')
f.write('RIGHT OF CUT: '+str(cuts[1].get_length())+'b.p.'+'\n'+str(cuts[1])+'\n')
for _ in range(2):
f.write('\n')
f.write('========================================================================================================================\n')
print('========================================================================================================================')