-
Notifications
You must be signed in to change notification settings - Fork 0
/
LE4DNA_extras.py
483 lines (388 loc) · 17.8 KB
/
LE4DNA_extras.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
# coding: utf-8
def com_traj(ftraj, nfrs, nmon, massarr):
'''converts a formatted trajectory of monomers from the output of
LE4DNA.format_traj() into a trajectory of the center of mass of each monomer.
args:
ftraj (numpy array) : output of LE4DNA.format_traj(), trajectory of monomers
nfrs (int) : np.shape(ftraj)[1], number of frames in trajectory
nmon (int) : np.shape(ftraj)[0]/(3*len(massarr)), number of monomers in molecule
massarr (numpy array) : masses of atoms in each monomer. i.e. massarr[i] = mass
of ith atom in the monomer.
'''
import numpy as np
monsize = len(massarr)
totalM = massarr.sum()
com_traj = np.zeros( ( 3*nmon , nfrs ) )
for i in range(0, 3*nmon, 3):
for j in range(monsize):
com_traj[i] += massarr[j]*ftraj[3*j + monsize*i] #xij
com_traj[i+1] += massarr[j]*ftraj[3*j + monsize*i + 1] #yij
com_traj[i+2] += massarr[j]*ftraj[3*j + monsize*i + 2] #zij
com_traj *= 1/totalM
return com_traj
def save_single_particle_traj( filename, ftraj, nparticles):
import numpy as np
if int(np.shape(ftraj)[0]/3) != nparticles:
print("Wrong number of particles or wrong trajectory.")
for i in range(nparticles):
np.savetxt(filename + f'particle_{i+1}', ftraj[3*i:3*i+3].T)
def fric_calc_SPmodel(TOP, protname, N, nfrs, avblsq, T, intv = 2.71828, viscosity = 3.1e-4, fd20 = 0.0, path_to_resarea = './', cgmodel = 'SP'):
# !!!! TOP must contain only coarse grained sites. !!!!
import numpy as np
import sys
import os
import subprocess
pi = np.pi
print(protname, cgmodel, N, nfrs)
if cgmodel != 'SP':
raise ValueError('''Available coarse grain models are: SP.''')
# All volumes given are pulled from the papers below.
#Deoxynucleotide volumes from "Standard atomic volumes in double-stranded DNA andpacking in protein–DNA interfaces" by Nadassy et al. NAR, 2001.
#Ribonucleotide volumes from "Calculation of Standard Atomic Volumes for RNA andComparison with Proteins: RNA is Packed More Tightly" by Voss and Gersteain, JMB, 2005.
mrad_dict = { "DA5" : np.cbrt((3*310.9) / (4*pi)) ,
"DA3" : np.cbrt((3*310.9) / (4*pi)) ,
"DA" : np.cbrt((3*310.9) / (4*pi)) ,
"DC5" : np.cbrt((3*288.0) / (4*pi)) ,
"DC3" : np.cbrt((3*288.0) / (4*pi)) ,
"DC" : np.cbrt((3*288.0) / (4*pi)) ,
"DG5" : np.cbrt((3*318.6) / (4*pi)) ,
"DG3" : np.cbrt((3*318.6) / (4*pi)) ,
"DG" : np.cbrt((3*318.6) / (4*pi)) ,
"DT5" : np.cbrt((3*307.4) / (4*pi)) ,
"DT3" : np.cbrt((3*307.4) / (4*pi)) ,
"DT" : np.cbrt((3*307.4) / (4*pi)) ,
"A5" : np.cbrt((3*315.3) / (4*pi)) ,
"A3" : np.cbrt((3*315.3) / (4*pi)) ,
"A" : np.cbrt((3*315.3) / (4*pi)) ,
"C5" : np.cbrt((3*291.1) / (4*pi)) ,
"C3" : np.cbrt((3*291.1) / (4*pi)) ,
"C" : np.cbrt((3*291.1) / (4*pi)) ,
"G5" : np.cbrt((3*322.0) / (4*pi)) ,
"G3" : np.cbrt((3*322.0) / (4*pi)) ,
"G" : np.cbrt((3*322.0) / (4*pi)) ,
"U5" : np.cbrt((3*286.9) / (4*pi)) ,
"U3" : np.cbrt((3*286.9) / (4*pi)) ,
"U" : np.cbrt((3*286.9) / (4*pi)) }
# sugar and base volumes, excluding volumes of atoms in phosphates (P, O1P, O2P, O3', O5')
# A = 243.7, C = 220.8, G = 251.4, T = 240.2
#volume of phosphate (P + O1P + O2P + O3' + O5')
Prad_dict = { "P" : np.cbrt((3*67.2) / (4*pi)) }
#volume of sugar+base
Srad_dict = { "DA5" : np.cbrt((3*243.7) / (4*pi)) ,
"DA3" : np.cbrt((3*243.7) / (4*pi)) ,
"DA" : np.cbrt((3*243.7) / (4*pi)) ,
"DC5" : np.cbrt((3*220.8) / (4*pi)) ,
"DC3" : np.cbrt((3*220.8) / (4*pi)) ,
"DC" : np.cbrt((3*220.8) / (4*pi)) ,
"DG5" : np.cbrt((3*251.4) / (4*pi)) ,
"DG3" : np.cbrt((3*251.4) / (4*pi)) ,
"DG" : np.cbrt((3*251.4) / (4*pi)) ,
"DT5" : np.cbrt((3*240.2) / (4*pi)) ,
"DT3" : np.cbrt((3*240.2) / (4*pi)) ,
"DT" : np.cbrt((3*240.2) / (4*pi)) }
#Calculate the Miller radius per bead
mradlist = []
#P_mradlist = []
#S_mradlist = []
with open(TOP) as f:
for line in f:
if line[0:4] != 'ATOM':
pass
elif line[0:4] == 'ATOM': # and line.split()[2] == "P":
dummy = line.split()
if dummy[2] == "P":
mradlist.append( Prad_dict[ "P" ] )
elif dummy[2] == "S":
mradlist.append( Srad_dict[ dummy[3] ])
mradlist = np.array(mradlist)
#file should look like: "./SP_resarea.xvg"
if os.path.exists(path_to_resarea + cgmodel + "_resarea.xvg"):
pass
else:
raise FileNotFoundError('''I can't find the resarea.xvg file containing the solvent-exposed surface area of each residue.
Please either run the process.sh file, if you have not already done so, and move the resarea.xvg
file into the current working directory.''')
# To compute the solvent exposed surface area, run the following GROMACS command:
# gmx_mpi sasa -f after_rot_sim0.xtc -s dsAXA.pdb -n SP_index.ndx -or SP_resarea.xvg -dt 1000 -surface 1 -output 2 3
# This assumes that SP_index.ndx contains the atom indices for the sugar+base and the phosphates in selections
# 2 and 3, respectively.
#determine solvent exposed surface areas of each group from the .xvg file
# this assumes lines in .xvg have the following format:
# res resarea std Sarea std Parea std
# 1 2.211 0.179 1.118 0.102 0.000 0.000
# 2 1.942 0.195 0.603 0.085 0.842 0.076 ...
S_resarea = []
P_resarea = []
with open(path_to_resarea + cgmodel + "_resarea.xvg") as f:
for line in f:
if line[0] == '#' or line[0] == '@':
pass
else:
S_resarea.append(float(line.split()[3]))
P_resarea.append(float(line.split()[5]))
S_resarea = np.array(S_resarea)
P_resarea = np.array(P_resarea)
P_resarea = P_resarea[ P_resarea != 0.0] #each res has a sugar+base but not all have a P.
#build the list of SASA for each group in the same order as mrad_list.
#the list slicing below yields SP_resarea = [S,P,...,P,S,S,P,...,P,S]
SP_resarea = np.zeros(N)
SP_resarea[0:int(N/2):2] = S_resarea[0: int(len(S_resarea)/2)]
SP_resarea[int(N/2)::2] = S_resarea[int(len(S_resarea)/2):]
SP_resarea[1:int(N/2):2] = P_resarea[0: int(len(P_resarea)/2)]
SP_resarea[int(N/2)+1:-1:2] = P_resarea[int(len(P_resarea)/2):]
#create list of CG-site solvent exposed radii
# S_rad = ((S_resarea/(4*pi))**0.5)*10
# P_rad = ((P_resarea/(4*pi))**0.5)*10
SP_rad = ((SP_resarea/(4*pi))**0.5)*10
#np.savetxt('avresrad',np.array(rad),fmt="%f")
fratio = (SP_rad.sum()/N)/10
print('fratio: ', fratio)
#Calculate the friction coefficients
kB = 1.38066E-23
print('Temperature (K): ',T)
print('Internal viscosity factor: ',intv)
#Use NIST formula for viscosity -- good NEAR room temperature and physiological.
#Won't work higher than, say, 360 K.
if viscosity == 0:
print('No viscosity given. Using the NIST formula, which is only valid for physiological conditions,\n')
print('i.e. between about 273 and 310 K.')
viscosity = (.2131590-1.96290E-3*T+(.00246411*T)**2+(-.0018462*T)**3)
print("Viscosity (Pa s): ", viscosity)
print("fd20", fd20)
rv = mradlist #np.array(mradlist)
rw = SP_rad #np.array(rad)
rp = np.zeros(N)
friw = np.zeros(N)
fri = np.zeros(N)
friwt = 0
frit = 0
for i in range(N):
if rw[i] < rv[i]:
rp[i] = (rv[i]**2 - rw[i]**2)**0.5
else:
rp[i] = 0
friw[i] = 6.0*pi*(rw[i]/10)*viscosity
fri[i] = 6.0*pi*(rp[i]/10)*(intv*viscosity) + 6.0*pi*(rw[i]/10)*viscosity
friwt += friw[i]
frit += fri[i]
avfr = frit/float(N)
avfrw = friwt/float(N)
#np.savetxt('avfr',np.array([avfr*1.0E-9]))
#avblsq = float(np.loadtxt('avblsq'))
sigma = (3*kB*T*1E15)/(avblsq*avfr)
#with open('sigma','w') as f:
# f.write('sigma, 1/ps\n')
# f.write(str(sigma)+'\n')
#with open('sigma.dat','w') as f:
# f.write(str(sigma)+'\n')
fric = np.zeros((N+1, 2))
fric[0,0] = avfrw
fric[0,1] = avfr
for i in range(N):
fric[i+1,:] = np.column_stack([friw[i],fri[i]])
#np.savetxt('fric',fric)
return fratio, sigma, fric, avfr
def autocorr(nbonds, path):
import numpy as np
autocorr = []
for num in range(nbonds):
col1 = []
col2 = []
with open( path + str(num + 1)) as file:
for line in file:
line = ' '.join(line.split())
line = line.split()
col1.append(float(line[0]))
col2.append(float(line[1]))
file.close()
autocorr.append( [col1, col2] )
autocorr = np.array(autocorr)
print(autocorr.shape)
return autocorr
def sequence_data(pdb_filename, path = './'):
import numpy as np
from string import ascii_uppercase
if '.pdb' in pdb_filename:
file = path + pdb_filename
else:
file = path + pdb_filename + '.pdb'
linelist = []
with open(file) as pdb:
for line in pdb:
if line.split()[0] == 'ATOM':
linelist.append(line.split()[1:6])
linelist = np.array(linelist)
chains = len(set(linelist[:,3]))
chainindex = ascii_uppercase[0:chains]
bases_by_chain = np.zeros(chains)
for i in range(chains):
bases_by_chain[i] = max( linelist[:,4][ linelist[:,3] == chainindex[i] ].astype('int32'))
chain_sequence = []
for i in range(chains):
enum_list = np.unique( linelist[:,2:5:2][ linelist[:,3] == chainindex[i] ] , axis = 0 )
for j in range(1, int(bases_by_chain[i]) + 1):
chain_sequence.append( enum_list[:,0][ enum_list[:,1 ] == str(j) ][0] )
return chain_sequence, chains, bases_by_chain, linelist
def ftraj2pdb( ftraj_frame , atomlist, chain_sequence, ref_pdb, filename, path = './'):
import numpy as np
import platform
import subprocess
import os
from string import ascii_uppercase
linelist = []
with open(ref_pdb) as pdb:
for line in pdb:
if line.split()[0] == 'ATOM':
linelist.append(line.split()[1:6])
linelist = np.array(linelist)
chains = len(set(linelist[:,3]))
chainindex = ascii_uppercase[0:chains]
sites = len(atomlist)
with open( path + filename , 'w+') as newpdb:
for i in range(sites):
c1 = 'ATOM'
c2 = i+1 #str(i+1).rjust(7)
c3 = atomlist[i,0] #str(atomlist[i,0])
c4 = ''
c5 = atomlist[i,1] #str(atomlist[i,1])
c6 = atomlist[i,2]
c7 = i+1 #str(i+1)
c8 = ''
c9 = "%.3f" % np.around(ftraj_frame[3*i], 3)
c10 = "%.3f" % np.around(ftraj_frame[3*i+1],3)
c11 = "%.3f" % np.around(ftraj_frame[3*i+2],3)
c12= 1.00
c13= 0.00
c14= atomlist[i,0] #str(atomlist[i,0])
c15= ''
#line=c1+c2+c3+c4+c5+c6+c7+c8+c9+c10+c11+c12+c13
line= "{:6s}{:5d} {:^4s}{:1s}{:3s} {:1s}{:4d}{:1s} {:8s}{:8s}{:8s}{:6.2f}{:6.2f} {:>2s}{:2s}".format(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15) +'\n'
newpdb.write(line)
return print('Done!')
def fric_calc_Pmodel(TOP, protname, N, nfrs, avblsq, T, intv = 2.71828, viscosity = 3.1e-4, fd20 = 0.0, path_to_resarea = './', cgmodel = 'P'):
# !!!! TOP must contain only coarse grained sites. !!!!
import numpy as np
import sys
import os
import subprocess
pi = np.pi
print(protname, cgmodel, N, nfrs)
if cgmodel != 'P':
raise ValueError('''Available coarse grain models are: P.''')
# All volumes given are pulled from the papers below.
#Deoxynucleotide volumes from "Standard atomic volumes in double-stranded DNA andpacking in protein–DNA interfaces" by Nadassy et al. NAR, 2001.
#Ribonucleotide volumes from "Calculation of Standard Atomic Volumes for RNA andComparison with Proteins: RNA is Packed More Tightly" by Voss and Gersteain, JMB, 2005.
mrad_dict = { "DA5" : np.cbrt((3*310.9) / (4*pi)) ,
"DA3" : np.cbrt((3*310.9) / (4*pi)) ,
"DA" : np.cbrt((3*310.9) / (4*pi)) ,
"DC5" : np.cbrt((3*288.0) / (4*pi)) ,
"DC3" : np.cbrt((3*288.0) / (4*pi)) ,
"DC" : np.cbrt((3*288.0) / (4*pi)) ,
"DG5" : np.cbrt((3*318.6) / (4*pi)) ,
"DG3" : np.cbrt((3*318.6) / (4*pi)) ,
"DG" : np.cbrt((3*318.6) / (4*pi)) ,
"DT5" : np.cbrt((3*307.4) / (4*pi)) ,
"DT3" : np.cbrt((3*307.4) / (4*pi)) ,
"DT" : np.cbrt((3*307.4) / (4*pi)) ,
"A5" : np.cbrt((3*315.3) / (4*pi)) ,
"A3" : np.cbrt((3*315.3) / (4*pi)) ,
"A" : np.cbrt((3*315.3) / (4*pi)) ,
"C5" : np.cbrt((3*291.1) / (4*pi)) ,
"C3" : np.cbrt((3*291.1) / (4*pi)) ,
"C" : np.cbrt((3*291.1) / (4*pi)) ,
"G5" : np.cbrt((3*322.0) / (4*pi)) ,
"G3" : np.cbrt((3*322.0) / (4*pi)) ,
"G" : np.cbrt((3*322.0) / (4*pi)) ,
"U5" : np.cbrt((3*286.9) / (4*pi)) ,
"U3" : np.cbrt((3*286.9) / (4*pi)) ,
"U" : np.cbrt((3*286.9) / (4*pi)) }
#Calculate the Miller radius per bead
mradlist = []
#P_mradlist = []
#S_mradlist = []
with open(TOP) as f:
for line in f:
if line[0:4]=='ATOM':
dummy=line.split()
if dummy[2] == "P":
mradlist.append( mrad_dict[dummy[3]])
else:
raise Exception('The TOP file contains atoms which are not phosphates.')
else:
pass
mradlist = np.array(mradlist)
#file should look like: "./SP_resarea.xvg"
if os.path.exists(path_to_resarea + cgmodel + "_resarea.xvg"):
pass
else:
raise FileNotFoundError('''I can't find the resarea.xvg file containing the solvent-exposed surface area of each residue.
Please either run the process.sh file, if you have not already done so, and move the resarea.xvg
file into the current working directory.''')
#gmx_mpi sasa -f after_rot_sim0.xtc -s SP_dsAXA.pdb -n SP_index.ndx -or SP_resarea.xvg -dt 1000 -surface 1 -output 2 3
#determine solvent exposed surface areas of each group from the .xvg file
# this assumes lines in .xvg have the following format:
# res resarea std Sarea std Parea std
# 1 2.211 0.179 1.118 0.102 0.000 0.000
# 2 1.942 0.195 0.603 0.085 0.842 0.076 ...
P_resarea = []
with open('./' + 'P' + "_resarea.xvg") as f:
for line in f:
if line[0] == '#' or line[0] == '@':
pass
else:
dummy=line.split()
if float(dummy[3])==0.0:
pass
else:
P_resarea.append(float(dummy[1]))
P_resarea = np.array(P_resarea)
#create list of CG-site solvent exposed radii
P_rad = ((P_resarea/(4*pi))**0.5)*10
#np.savetxt('avresrad',np.array(rad),fmt="%f")
fratio = (P_rad.sum()/N)/10
print('fratio: ', fratio)
#Calculate the friction coefficients
kB = 1.38066E-23
print('Temperature (K): ',T)
print('Internal viscosity factor: ',intv)
#Use NIST formula for viscosity -- good NEAR room temperature and physiological.
#Won't work higher than, say, 360 K.
if viscosity == 0:
print('No viscosity given. Using the NIST formula, which is only valid for physiological conditions,\n')
print('i.e. between about 273 and 310 K.')
viscosity = (.2131590-1.96290E-3*T+(.00246411*T)**2+(-.0018462*T)**3)
print("Viscosity (Pa s): ", viscosity)
print("fd20", fd20)
rv = mradlist #np.array(mradlist)
rw = P_rad #np.array(rad)
rp = np.zeros(N)
friw = np.zeros(N)
fri = np.zeros(N)
friwt = 0
frit = 0
for i in range(N):
if rw[i] < rv[i]:
rp[i] = (rv[i]**2 - rw[i]**2)**0.5
else:
rp[i] = 0
friw[i] = 6.0*pi*(rw[i]/10)*viscosity
fri[i] = 6.0*pi*(rp[i]/10)*(intv*viscosity) + 6.0*pi*(rw[i]/10)*viscosity
friwt += friw[i]
frit += fri[i]
avfr = frit/float(N)
avfrw = friwt/float(N)
#np.savetxt('avfr',np.array([avfr*1.0E-9]))
#avblsq = float(np.loadtxt('avblsq'))
sigma = (3*kB*T*1E15)/(avblsq*avfr)
#with open('sigma','w') as f:
# f.write('sigma, 1/ps\n')
# f.write(str(sigma)+'\n')
#with open('sigma.dat','w') as f:
# f.write(str(sigma)+'\n')
fric = np.zeros((N+1, 2))
fric[0,0] = avfrw
fric[0,1] = avfr
for i in range(N):
fric[i+1,:] = np.column_stack([friw[i],fri[i]])
#np.savetxt('fric',fric)
return fratio, sigma, fric, avfr