-
Notifications
You must be signed in to change notification settings - Fork 2
/
fc_rough.py
executable file
·55 lines (43 loc) · 1.57 KB
/
fc_rough.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/python
import os.path, subprocess
from Bio import Phylo, AlignIO
from Bio.Align import MultipleSeqAlignment
def build_msa(node, sequence_msa_map):
key = str(node.__hash__())
file_name = key + '.msa'
file_handle = open(file_name, 'w')
terminals = node.get_terminals()
alignments = [sequence_msa_map[terminal.name] for terminal in
terminals]
alignments = MultipleSeqAlignment(alignments)
AlignIO.write(alignments, file_handle, 'stockholm')
return file_name
def build_hmm(msa_file_name):
file_prefix = os.path.splitext(msa_file_name)[0]
hmm_file_name = ''.join([file_prefix, '.hmm'])
process_name = "hmmbuild %s %s" % (hmm_file_name, msa_file_name)
process = subprocess.Popen(process_name, shell = True)
process.wait()
return hmm_file_name
def treewalker(root, sequence_msa_map):
msa_file_name = build_msa(root, sequence_msa_map)
hmm_file_name = build_hmm(msa_file_name)
if not root.clades:
return
else:
for clade in root.clades:
treewalker(clade, sequence_msa_map)
def main():
tree_file = "sample_tree.ml"
msa_file = "sample_alignment.msa"
new_sequence_file = "new_sequence.fasta"
# Note: the name of the tree leaf nodes is going to be the
# same as the id of each msa entry
tree = Phylo.read(tree_file, "newick")
msa = AlignIO.read(msa_file, "fasta")
sequence_msa_map = {}
for entry in msa:
sequence_msa_map[entry.id] = entry
treewalker(tree.root, sequence_msa_map)
if __name__ == '__main__':
main()