-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdssr.py
147 lines (134 loc) · 3.98 KB
/
dssr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
ARIADNE - DSSR module
Interface with 3DNA-DSSR
"""
import os
import numpy as np
import pandas as pd
import seq_utils
DSSR_CMD = "x3dna-dssr"
MISSING = "----"
DSSR_SECTIONS = {
"Local base-pair parameters": {
"bp": str,
"Shear": float,
"Stretch": float,
"Stagger": float,
"Buckle": float,
"Propeller": float,
"Opening": float,
},
"Local base-pair step parameters": {
"bp": str,
"Shift": float,
"Slide": float,
"Rise": float,
"Tilt": float,
"Roll": float,
"Twist": float,
},
"Local base-pair helical parameters": {
"bp": str,
"X-disp": float,
"Y-disp": float,
"h-Rise": float,
"Incl.": float,
"Tip": float,
"h-Twist": float,
},
"Simple base-pair parameters based on RC8--YC6 vectors": {
"bp": str,
"Shear": float,
"Stretch": float,
"Stagger": float,
"Buckle": float,
"Propeller": float,
"Opening": float,
},
"Simple step parameters based on consecutive C1'-C1' vectors": {
"bp": str,
"Shift": float,
"Slide": float,
"Rise": float,
"Tilt": float,
"Roll": float,
"Twist": float,
},
"Simple helical parameters based on consecutive C1'-C1' vectors": {
"bp": str,
"X-disp": float,
"Y-disp": float,
"h-Rise": float,
"Incl.": float,
"Tip": float,
"h-Twist": float,
},
"Classification of dinucleotide steps in right-handed nucleic acid structures": {
"bp": str,
"Xp": float,
"Yp": float,
"Zp": float,
"XpH": float,
"YpH": float,
"ZpH": float,
"Form": str,
"ABI": float,
},
}
def parse_dssr_section(output_file, line):
section = None
while section is None:
sections = [section for section in DSSR_SECTIONS if section in line]
if sections:
assert len(sections) == 1
section = sections[0]
line = output_file.readline()
if line == "":
# end of file
return None
fields = DSSR_SECTIONS[section]
field_names = set(fields)
while set(line.split()) != field_names:
line = output_file.readline()
if line == "":
# end of file
return None
field_names_order = line.split()
field_types = {field: fields[field] for field in field_names_order}
section_data = {field: list() for field in field_names if field_types[field] in [int, float]}
line = output_file.readline()
while line.strip() != "":
for field, value in zip(field_names_order, line.split()[1:]):
if field not in section_data:
continue
if value == MISSING:
section_data[field].append(np.nan)
else:
section_data[field].append(field_types[field](value))
line = output_file.readline()
section_data = pd.DataFrame.from_dict(section_data)
return section, section_data
def parse_dssr_output(output_file_name):
with open(output_file_name) as output_file:
output_data = dict()
for i in range(len(DSSR_SECTIONS)):
result = parse_dssr_section(output_file, output_file.readline())
if result:
section, section_data = result
output_data[section] = section_data
else:
assert False
return output_data
def analyze_pdb(pdb_file, clobber=False):
"""
Determine the results of DSSR.
:return:
"""
base_name, ext = os.path.splitext(pdb_file)
output_file_name = f"{base_name}_dssr.txt"
if clobber or not os.path.isfile(output_file_name):
cmd = f"{DSSR_CMD} --analyze -i={seq_utils.escape_path(pdb_file)} -o={seq_utils.escape_path(output_file_name)}"
xc = os.system(cmd)
assert xc == 0
output_data = parse_dssr_output(output_file_name)
return output_data