-
Notifications
You must be signed in to change notification settings - Fork 4
/
surface_cont_lig.py
209 lines (178 loc) · 7.69 KB
/
surface_cont_lig.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# Developed by Natália Teruel
# Najmanovich Research Group
# Cite this work as Surfaces: A software to quantify and visualize interactions within and between proteins and ligands - Teruel, N. F. B., Borges, V. M., & Najmanovich, R. (2023)
#Imports
import argparse
import sys
import os
import re
import pandas as pd
#Useful dicts
aa = {'C':'CYS', 'D':'ASP', 'S':'SER', 'Q':'GLN', 'K':'LYS', 'I':'ILE', 'P':'PRO', 'T':'THR', 'F':'PHE', 'N':'ASN', 'G':'GLY', 'H':'HIS', 'L':'LEU', 'R':'ARG', 'W':'TRP', 'A':'ALA', 'V':'VAL', 'E':'GLU', 'Y':'TYR', 'M':'MET'}
#Input of pdb file and the 2 chains between which we want to evaluate the interactions
def read_residues(pdb_file, chains, ligands):
list_chains = []
list_atoms = []
f = open(pdb_file, 'r')
Lines = f.readlines()
for line in Lines:
if (line[:4] == 'ATOM' or line[:4] == 'HETA'):
if line[21] in chains:
res_num = re.findall('[+-]?\d+',line[22:27])
res_name = line[17:20]
string = res_name + str(res_num[0]) + line[21]
if string not in list_chains and res_name not in ligands:
list_chains.append(string)
if line[17:20] in ligands:
atom_name = line[12:16].strip()
atom_number = re.findall('[+-]?\d+',line[6:12])
string = str(atom_number[0]) + atom_name
list_atoms.append(string)
atoms_numbers = []
for k in range(len(chains)):
atoms_numbers.append([])
for line in Lines:
for i in range(len(chains)):
if (line[:4] == 'ATOM' or line[:4] == 'HETA') and line[21] == chains[i]:
atoms_numbers[i].append(int(line[6:12]))
return (list_chains, list_atoms, atoms_numbers)
#Function to generate the file with the output of perl code vcont.pl
def vcon(pdb_name):
string = f'{os.path.join(".", "vcon")} {pdb_name} > vcon_file.txt'
os.system(string)
#Functions to fix the names of the chains
def total_chains(pdb_file):
all_chains = []
f = open(pdb_file, 'r')
Lines = f.readlines()
for line in Lines:
if (line[:4] == 'ATOM' or line[:4] == 'HETA'):
if line[21] not in all_chains:
all_chains.append(line[21])
return (all_chains)
def get_chain(atom, og_chain, chains, atom_numbers):
if og_chain in chains:
index = chains.index(og_chain)
else:
return (og_chain)
if atom in atom_numbers[index]:
return (og_chain)
else:
for i in range(len(chains)):
if i != index:
if atom in atom_numbers[i]:
return (chains[i])
return (0)
#Functions to open vcon results, AMINO.def and MC_st0r5.2_6.dat for atom types interactions
def read_atom(line):
atnum = int(line[:6])
attype = line[7:11].strip()
resnum = int(line[12:17])
res = line[19:22]
chain = line[23:24]
return (atnum,attype,resnum,res,chain)
def read_surface(line):
surf = (float(line[-12:-6]))
return (surf)
def read_interactions(file, matrix, chains, ligands, def_file, dat_file, all_chains, atom_numbers, scale_factor):
f = open(file, 'r')
Lines = f.readlines()
for line in Lines:
if line[:1] != '#' and line != '\n':
if line[31:34] == 'Sol':
atnum,attype,resnum,res,chain = read_atom(line)
fixed_chain = get_chain(atnum,chain,all_chains,atom_numbers)
if res in ligands:
atom_name = str(atnum) + attype
else:
main_atnum,main_attype,main_resnum,main_res,main_chain = read_atom(line[22:])
fixed_main_chain = get_chain(main_atnum,main_chain,all_chains,atom_numbers)
if fixed_main_chain != 0:
if main_res not in ligands:
main_residue = main_res+str(main_resnum)+fixed_main_chain
surf = read_surface(line)
if (fixed_main_chain in chains) and (res in ligands):
matrix.loc[main_residue, atom_name] += (surf * score(main_attype, main_res, attype, res, def_file, dat_file) * scale_factor)
return(matrix)
#get the atom type number from def file of choice
def atomtype_num(def_file, res, attyp):
attyp = attyp.replace(" ", "")
f = open(def_file, 'r')
Lines = f.readlines()
for i in range(len(Lines)):
if Lines[i][:3] == res:
ind = Lines[i].index(attyp+':')
ind_end = Lines[i][ind:].index(',')
attype_num = int(Lines[i][ind+len(attyp)+1:ind+ind_end])
f.close()
return(attype_num)
#get the interaction between atom type 1 and atom type 2 from dat file of choice
def interactions(dat_file, at1, at2):
if len(str(at1)) == 1:
at1 = ' ' + str(at1)
if len(str(at2)) == 1:
at2 = ' ' + str(at2)
f = open(dat_file, 'r')
Lines = f.readlines()
for line in Lines:
if str(at1)+'-'+str(at2) == line[5:10] or str(at2)+'-'+str(at1) == line[5:10]:
interact = float(line[13:])
f.close()
return(interact)
#get the final score based on atom type num and interactions
def score(attype1, res1, attype2, res2, def_file, dat_file):
at1 = atomtype_num(def_file, res1, attype1)
at2 = atomtype_num(def_file, res2, attype2)
value = interactions(dat_file, at1, at2)
return (value)
#create file of list of interactions
def list_file(matrix,output_name):
residues1 = []
residues2 = []
values = []
abs_values = []
for i in range(len(matrix.index)):
for j in range(len(matrix.columns)):
num = matrix.loc[matrix.index[i], matrix.columns[j]]
if num != 0:
residues1.append(matrix.index[i])
residues2.append(matrix.columns[j])
values.append(num)
abs_values.append(abs(num))
sorted_residues1 = [x for _,x in sorted(zip(abs_values,residues1),reverse=True)]
sorted_residues2 = [x for _,x in sorted(zip(abs_values,residues2),reverse=True)]
sorted_values = [x for _,x in sorted(zip(abs_values,values),reverse=True)]
f = open("List_" + output_name[:-4] + ".txt", "w")
for k in range(len(values)):
f.write(sorted_residues1[k] + "," + sorted_residues2[k] + "," + str(sorted_values[k]) + "\n")
f.close()
return
def main():
parser= argparse.ArgumentParser(description="the arguments.", add_help=False)
parser.add_argument("-f","--pdb_file", action="store")
parser.add_argument("-c","--chains", action="store")
parser.add_argument("-lig","--ligand", action="store")
parser.add_argument("-o","--output_name", action="store")
parser.add_argument("-def","--atomtypes_definition", action="store")
parser.add_argument("-dat","--atomtypes_interactions", action="store")
args=parser.parse_args()
list_ligands = args.ligand.split(",")
all_chains = total_chains(args.pdb_file)
res, atoms, atom_numbers = read_residues(args.pdb_file, args.chains, list_ligands)
#print (res, atoms)
#print (args.chains, atom_numbers)
vcon(args.pdb_file)
matrix = [ [ 0 for i in range(len(atoms)) ] for j in range(len(res)) ]
matrix = pd.DataFrame(matrix)
matrix.columns = atoms
matrix.index = res
# Determined according to the AB-Bind dataset results
scale_factor = 0.00024329
res, atoms, atom_numbers = read_residues(args.pdb_file, all_chains, list_ligands)
matrix = read_interactions('vcon_file.txt', matrix, args.chains, list_ligands, args.atomtypes_definition, args.atomtypes_interactions, all_chains, atom_numbers, scale_factor)
matrix.to_csv(args.output_name)
list_file(matrix,args.output_name)
# remove files
os.remove("vcon_file.txt")
return ()
main()