-
Notifications
You must be signed in to change notification settings - Fork 0
/
backtranslate.py
65 lines (52 loc) · 1.39 KB
/
backtranslate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import sys
def read_fasta (infile):
fasta_dict = {}
seq = ""
name = "dummy"
with open(infile, "r") as F:
for line in F:
if line.startswith(">"):
fasta_dict[name] = seq
name = line.lstrip(">").rstrip("\n")
seq = ""
else:
seq += line.strip("\n")
# last record:
fasta_dict[name] = seq
del fasta_dict["dummy"]
return fasta_dict
def cds_aln_from_aa_aln_and_cds_unalign ( aa_aln_file, cds_unalign_infile ):
aa_seqs = read_fasta( aa_aln_file )
cds_unaligned_seqs = read_fasta( cds_unalign_infile )
aln_vs_raw_dict = {}
for id, seq in aa_seqs.items():
aln_vs_raw_dict[ id ] = [ seq ]
for id, seq in cds_unaligned_seqs.items():
try:
aln_vs_raw_dict[ id ].append( seq )
except KeyError:
None
out_lines = []
for name, v in aln_vs_raw_dict.items():
aa_aln = v[0]
cds_unaln = v[1]
cds_aln = ""
cds_idx = 0
for idx, char in enumerate( aa_aln ):
if char != "-":
cds = cds_unaln[ cds_idx : cds_idx+3 ]
cds_idx += 3
else:
cds = "---"
# print idx, char, cds
cds_aln += cds
out_lines.append( ">" + name )
out_lines.append( cds_aln )
outname = aa_aln_file + ".backtranslated.aln"
with open( outname, "w" ) as OUTF:
OUTF.write( "\n".join( out_lines ) + "\n")
####
if len(sys.argv) != 3:
print ("usage: python backtranslate.py aa_aln_file cds_unalign_infile")
exit()
cds_aln_from_aa_aln_and_cds_unalign(sys.argv[1],sys.argv[2])