-
Notifications
You must be signed in to change notification settings - Fork 0
/
chembl.py
51 lines (39 loc) · 1.96 KB
/
chembl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
'''
pip install chembl_webresource_client
https://www.ebi.ac.uk/chembl/ws
#Having a list of molecules ChEMBL IDs in a CSV file, produce another CSV file that maps every compound ID into a list of uniprot accession numbers and save the mapping into output csv file.
'''
import csv
from chembl_webresource_client.new_client import new_client
# This will be our resulting structure mapping compound ChEMBL IDs into target uniprot IDs
compounds2targets = dict()
# First, let's just parse the csv file to extract compounds ChEMBL IDs:
with open('compounds_list.csv', 'rb') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
compounds2targets[row[0]] = set()
# OK, we have our source IDs, let's process them in chunks:
chunk_size = 50
keys = compounds2targets.keys()
for i in range(0, len(keys), chunk_size):
# we jump from compounds to targets through activities:
activities = new_client.activity.filter(molecule_chembl_id__in=keys[i:i + chunk_size])
# extracting target ChEMBL IDs from activities:
for act in activities:
compounds2targets[act['molecule_chembl_id']].add(act['target_chembl_id'])
# OK, now our dictionary maps from compound ChEMBL IDs into target ChEMBL IDs
# We would like to replace target ChEMBL IDs with uniprot IDs
for key, val in compounds2targets.items():
# We don't know how many targets are assigned to a given compound so again it's
# better to process targets in chunks:
lval = list(val)
uniprots = set()
for i in range(0, len(val), chunk_size):
targets = new_client.target.filter(target_chembl_id__in=lval[i:i + chunk_size])
uniprots |= set(sum([[comp['accession'] for comp in t['target_components']] for t in targets],[]))
compounds2targets[key] = uniprots
# Finally write it to the output csv file
with open('compounds_2_targets.csv', 'wb') as csvfile:
writer = csv.writer(csvfile)
for key, val in compounds2targets.items():
writer.writerow([key] + list(val))