forked from AldisiRana/masters_thesis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
2361 lines (2190 loc) · 198 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@inproceedings{mcgarry_identifying_2015,
title = {Identifying candidate drugs for repositioning by graph based modeling techniques based on drug side-effects},
author = {Mcgarry, Kenneth and Slater, Nicole and Amanning, Angela},
year = {2015}
}
@article{sheikh_gat2vec:_2018,
title = {gat2vec: representation learning for attributed graphs},
issn = {0010-485X, 1436-5057},
shorttitle = {gat2vec},
url = {http://link.springer.com/10.1007/s00607-018-0622-9},
doi = {10.1007/s00607-018-0622-9},
language = {en},
urldate = {2019-02-17},
journal = {Computing},
author = {Sheikh, Nasrullah and Kefato, Zekarias and Montresor, Alberto},
month = apr,
year = {2018}
}
@article{su_network_2018,
title = {Network embedding in biomedical data science},
issn = {1467-5463, 1477-4054},
url = {https://academic.oup.com/bib/advance-article/doi/10.1093/bib/bby117/5228144},
doi = {10.1093/bib/bby117},
language = {en},
urldate = {2019-02-17},
journal = {Briefings in Bioinformatics},
author = {Su, Chang and Tong, Jie and Zhu, Yongjun and Cui, Peng and Wang, Fei},
month = dec,
year = {2018}
}
@article{ali_biokeen:_2018,
title = {{BioKEEN}: {A} library for learning and evaluating biological knowledge graph embeddings:},
shorttitle = {{BioKEEN}},
url = {http://biorxiv.org/lookup/doi/10.1101/475202},
doi = {10.1101/475202},
abstract = {Knowledge graph embeddings (KGEs) have received significant attention in other domains due to their ability to predict links and create dense representations for graphs' nodes and edges. However, the software ecosystem for their application to bioinformatics remains limited and inaccessible for users without expertise in programming and machine learning. Therefore, we developed BioKEEN (Biological KnowlEdge EmbeddiNgs) and PyKEEN (Python KnowlEdge EmbeddiNgs) to facilitate their easy use through an interactive command line interface. Finally, we present a case study in which we used a novel biological pathway mapping resource to predict links that represent pathway crosstalks and hierarchies.
Availability: BioKEEN and PyKEEN are open source Python packages publicly available under the MIT License at https://github.com/SmartDataAnalytics/BioKEEN and https://github.com/SmartDataAnalytics/PyKEEN as well as through PyPI.},
urldate = {2019-02-17},
journal = {bioRxiv},
author = {Ali, Mehdi and Hoyt, Charles Tapley and Domingo-Fernandez, Daniel and Lehmann, Jens and Jabeen, Hajira},
month = nov,
year = {2018}
}
@article{cui_survey_2017,
title = {A {Survey} on {Network} {Embedding}},
url = {http://arxiv.org/abs/1711.08752},
abstract = {Network embedding assigns nodes in a network to low-dimensional representations and effectively preserves the network structure. Recently, a significant amount of progresses have been made toward this emerging network analysis paradigm. In this survey, we focus on categorizing and then reviewing the current development on network embedding methods, and point out its future research directions. We first summarize the motivation of network embedding. We discuss the classical graph embedding algorithms and their relationship with network embedding. Afterwards and primarily, we provide a comprehensive overview of a large number of network embedding methods in a systematic manner, covering the structure- and property-preserving network embedding methods, the network embedding methods with side information and the advanced information preserving network embedding methods. Moreover, several evaluation approaches for network embedding and some useful online resources, including the network data sets and softwares, are reviewed, too. Finally, we discuss the framework of exploiting these network embedding methods to build an effective system and point out some potential future directions.},
urldate = {2019-02-17},
journal = {arXiv:1711.08752 [cs]},
author = {Cui, Peng and Wang, Xiao and Pei, Jian and Zhu, Wenwu},
month = nov,
year = {2017},
note = {arXiv: 1711.08752},
keywords = {Computer Science - Social and Information Networks},
}
@inproceedings{grover_node2vec:_2016,
address = {San Francisco, California, USA},
title = {node2vec: {Scalable} {Feature} {Learning} for {Networks}},
isbn = {978-1-4503-4232-2},
shorttitle = {node2vec},
url = {http://dl.acm.org/citation.cfm?doid=2939672.2939754},
doi = {10.1145/2939672.2939754},
language = {en},
urldate = {2019-02-17},
booktitle = {Proceedings of the 22nd {ACM} {SIGKDD} {International} {Conference} on {Knowledge} {Discovery} and {Data} {Mining} - {KDD} '16},
publisher = {ACM Press},
author = {Grover, Aditya and Leskovec, Jure},
year = {2016},
pages = {855--864}
}
@inproceedings{tang_line:_2015,
address = {Florence, Italy},
title = {{LINE}: {Large}-scale {Information} {Network} {Embedding}},
isbn = {978-1-4503-3469-3},
shorttitle = {{LINE}},
url = {http://dl.acm.org/citation.cfm?doid=2736277.2741093},
doi = {10.1145/2736277.2741093},
language = {en},
urldate = {2019-02-17},
booktitle = {Proceedings of the 24th {International} {Conference} on {World} {Wide} {Web} - {WWW} '15},
publisher = {ACM Press},
author = {Tang, Jian and Qu, Meng and Wang, Mingzhe and Zhang, Ming and Yan, Jun and Mei, Qiaozhu},
year = {2015},
pages = {1067--1077}
}
@article{doria_contribution_2016,
title = {Contribution of cholesterol and oxysterols to the pathophysiology of {Parkinson}'s disease},
volume = {101},
issn = {08915849},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0891584916304506},
doi = {10.1016/j.freeradbiomed.2016.10.008},
language = {en},
urldate = {2019-02-22},
journal = {Free Radical Biology and Medicine},
author = {Doria, Margaux and Maugest, Lucie and Moreau, Thibault and Lizard, Gérard and Vejux, Anne},
month = dec,
year = {2016},
pages = {393--400}
}
@article{hoyt_re-curation_2019,
title = {Re-curation and {Rational} {Enrichment} of {Knowledge} {Graphs} in {Biological} {Expression} {Language}: {Supplementary} {Information}},
shorttitle = {Re-curation and {Rational} {Enrichment} of {Knowledge} {Graphs} in {Biological} {Expression} {Language}},
url = {http://biorxiv.org/lookup/doi/10.1101/536409},
doi = {10.1101/536409},
abstract = {The rapid accumulation of new biomedical literature not only causes curated knowledge graphs to become outdated and incomplete, but also makes manual curation an impractical and unsustainable solution. Automated or semi-automated workflows are necessary to assist in prioritizing and curating the literature to update and enrich knowledge graphs. We have developed two workflows: one for re-curating a given knowledge graph to assure its syntactic and semantic quality and another for rationally enriching it by manually revising automatically extracted relations for nodes with low information density. We applied these workflows to the knowledge graphs encoded in Biological Expression Language from the NeuroMMSig database using content that was pre-extracted from MEDLINE abstracts and PubMed Central full text articles using text mining output integrated by INDRA. We have made this workflow freely available at https://github.com/bel-enrichment.},
urldate = {2019-02-24},
journal = {bioRxiv},
author = {Hoyt, Charles and Domingo-Fernandez, Daniel and Aldisi, Rana and Xu, Lingling and Kolpeja, Kristian and Spalek, Sandra and Wollert, Esther and Bachman, John and Gyori, Benjamin and Greene, Patrick and Hofmann-Apitius, Martin},
month = jan,
year = {2019}
}
@article{ciani_role_2014,
title = {The role of health technology assessment bodies in shaping drug development},
issn = {1177-8881},
url = {http://www.dovepress.com/the-role-of-health-technology-assessment-bodies-in-shaping-drug-develo-peer-reviewed-article-DDDT},
doi = {10.2147/DDDT.S49935},
language = {en},
urldate = {2019-03-07},
journal = {Drug Design, Development and Therapy},
author = {Ciani, Oriana and Jommi, Claudio},
month = nov,
year = {2014},
pages = {2273}
}
@article{dimitri_drugclust:_2017,
title = {{DrugClust}: {A} machine learning approach for drugs side effects prediction},
volume = {68},
issn = {14769271},
shorttitle = {{DrugClust}},
url = {https://linkinghub.elsevier.com/retrieve/pii/S1476927116302195},
doi = {10.1016/j.compbiolchem.2017.03.008},
language = {en},
urldate = {2019-03-07},
journal = {Computational Biology and Chemistry},
author = {Dimitri, Giovanna Maria and Lió, Pietro},
month = jun,
year = {2017},
pages = {204--210}
}
@article{boolell_sildenafil:_1996,
title = {Sildenafil: an orally active type 5 cyclic {GMP}-specific phosphodiesterase inhibitor for the treatment of penile erectile dysfunction},
volume = {8},
issn = {0955-9930},
shorttitle = {Sildenafil},
abstract = {Sildenafil (Viagra, UK-92,480) is a novel oral agent under development for the treatment of penile erectile dysfunction. Erection is dependent on nitric oxide and its second messenger, cyclic guanosine monophosphate (cGMP). However, the relative importance of phosphodiesterase (PDE) isozymes is not clear. We have identified both cGMP- and cyclic adenosine monophosphate-specific phosphodiesterases (PDEs) in human corpora cavernosa in vitro. The main PDE activity in this tissue was due to PDE5, with PDE2 and 3 also identified. Sildenafil is a selective inhibitor of PDE5 with a mean IC50 of 0.0039 microM. In human volunteers, we have shown sildenafil to have suitable pharmacokinetic and pharmacodynamic properties (rapid absorption, relatively short half-life, no significant effect on heart rate and blood pressure) for an oral agent to be taken, as required, prior to sexual activity. Moreover, in a clinical study of 12 patients with erectile dysfunction without an established organic cause, we have shown sildenafil to enhance the erectile response (duration and rigidity of erection) to visual sexual stimulation, thus highlighting the important role of PDE5 in human penile erection. Sildenafil holds promise as a new effective oral treatment for penile erectile dysfunction.},
language = {eng},
number = {2},
journal = {International Journal of Impotence Research},
author = {Boolell, M. and Allen, M. J. and Ballard, S. A. and Gepi-Attee, S. and Muirhead, G. J. and Naylor, A. M. and Osterloh, I. H. and Gingell, C.},
month = jun,
year = {1996},
pmid = {8858389},
keywords = {Administration, Oral, Cross-Over Studies, Cyclic GMP, Double-Blind Method, Enzyme Inhibitors, Erectile Dysfunction, Humans, Isoenzymes, Male, Middle Aged, Penis, Phosphodiesterase Inhibitors, Phosphoric Diester Hydrolases, Piperazines, Purines, Sildenafil Citrate, Sulfones, Treatment Outcome},
pages = {47--52}
}
@article{campillos_drug_2008,
title = {Drug target identification using side-effect similarity},
volume = {321},
issn = {1095-9203},
doi = {10.1126/science.1158140},
abstract = {Targets for drugs have so far been predicted on the basis of molecular or cellular features, for example, by exploiting similarity in chemical structure or in activity across cell lines. We used phenotypic side-effect similarities to infer whether two drugs share a target. Applied to 746 marketed drugs, a network of 1018 side effect-driven drug-drug relations became apparent, 261 of which are formed by chemically dissimilar drugs from different therapeutic indications. We experimentally tested 20 of these unexpected drug-drug relations and validated 13 implied drug-target relations by in vitro binding assays, of which 11 reveal inhibition constants equal to less than 10 micromolar. Nine of these were tested and confirmed in cell assays, documenting the feasibility of using phenotypic information to infer molecular interactions and hinting at new uses of marketed drugs.},
language = {eng},
number = {5886},
journal = {Science (New York, N.Y.)},
author = {Campillos, Monica and Kuhn, Michael and Gavin, Anne-Claude and Jensen, Lars Juhl and Bork, Peer},
month = jul,
year = {2008},
pmid = {18621671},
keywords = {Humans, Adverse Drug Reaction Reporting Systems, Algorithms, Chemistry, Pharmaceutical, Databases, Factual, Drug Evaluation, Preclinical, Drug Labeling, Drug Therapy, Drug-Related Side Effects and Adverse Reactions, Pharmaceutical Preparations, Probability, Proteins},
pages = {263--266}
}
@article{vargesson_thalidomideinduced_2015,
title = {Thalidomide‐induced teratogenesis: {History} and mechanisms},
volume = {105},
issn = {1542-975X},
shorttitle = {Thalidomide‐induced teratogenesis},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4737249/},
doi = {10.1002/bdrc.21096},
abstract = {Nearly 60 years ago thalidomide was prescribed to treat morning sickness in pregnant women. What followed was the biggest man‐made medical disaster ever, where over 10,000 children were born with a range of severe and debilitating malformations. Despite this, the drug is now used successfully to treat a range of adult conditions, including multiple myeloma and complications of leprosy. Tragically, a new generation of thalidomide damaged children has been identified in Brazil. Yet, how thalidomide caused its devastating effects in the forming embryo remains unclear. However, studies in the past few years have greatly enhanced our understanding of the molecular mechanisms the drug. This review will look at the history of the drug, and the range and type of damage the drug caused, and outline the mechanisms of action the drug uses including recent molecular advances and new findings. Some of the remaining challenges facing thalidomide biologists are also discussed. Birth Defects Research (Part C) 105:140–156, 2015. © 2015 The Authors Birth Defects Research Part C: Embryo Today: Reviews Published by Wiley Periodicals, Inc.},
number = {2},
urldate = {2019-03-07},
journal = {Birth Defects Research},
author = {Vargesson, Neil},
month = jun,
year = {2015},
pmid = {26043938},
pmcid = {PMC4737249},
pages = {140--156}
}
@article{zhang_network_2017,
title = {Network {Representation} {Learning}: {A} {Survey}},
shorttitle = {Network {Representation} {Learning}},
url = {http://arxiv.org/abs/1801.05852},
abstract = {With the widespread use of information technologies, information networks are becoming increasingly popular to capture complex relationships across various disciplines, such as social networks, citation networks, telecommunication networks, and biological networks. Analyzing these networks sheds light on different aspects of social life such as the structure of societies, information diffusion, and communication patterns. In reality, however, the large scale of information networks often makes network analytic tasks computationally expensive or intractable. Network representation learning has been recently proposed as a new learning paradigm to embed network vertices into a low-dimensional vector space, by preserving network topology structure, vertex content, and other side information. This facilitates the original network to be easily handled in the new vector space for further analysis. In this survey, we perform a comprehensive review of the current literature on network representation learning in the data mining and machine learning field. We propose new taxonomies to categorize and summarize the state-of-the-art network representation learning techniques according to the underlying learning mechanisms, the network information intended to preserve, as well as the algorithmic designs and methodologies. We summarize evaluation protocols used for validating network representation learning including published benchmark datasets, evaluation methods, and open source algorithms. We also perform empirical studies to compare the performance of representative algorithms on common datasets, and analyze their computational complexity. Finally, we suggest promising research directions to facilitate future study.},
urldate = {2019-03-07},
journal = {arXiv:1801.05852 [cs, stat]},
author = {Zhang, Daokun and Yin, Jie and Zhu, Xingquan and Zhang, Chengqi},
month = dec,
year = {2017},
note = {arXiv: 1801.05852},
keywords = {Computer Science - Social and Information Networks, Computer Science - Machine Learning, Statistics - Machine Learning}
}
@misc{noauthor_sider_nodate,
title = {{SIDER} {Side} {Effect} {Resource}},
url = {http://sideeffects.embl.de/},
urldate = {2019-03-14},
}
@article{kuhn_side_2010,
title = {A side effect resource to capture phenotypic effects of drugs},
volume = {6},
issn = {1744-4292},
url = {http://msb.embopress.org/cgi/doi/10.1038/msb.2009.98},
doi = {10.1038/msb.2009.98},
urldate = {2019-03-14},
journal = {Molecular Systems Biology},
author = {Kuhn, Michael and Campillos, Monica and Letunic, Ivica and Jensen, Lars Juhl and Bork, Peer},
month = jan,
year = {2010}
}
@article{pourpak_understanding_2008,
title = {Understanding adverse drug reactions and drug allergies: principles, diagnosis and treatment aspects},
volume = {2},
issn = {1872-213X},
shorttitle = {Understanding adverse drug reactions and drug allergies},
abstract = {Adverse Drug Reactions (ADRs) and drug allergies- as a subset of ADRs- make a significant public health concern, complicating 5 to 15\% of therapeutic drug courses. They may result in diminished quality of life, increased physician visits, health care costs, hospitalizations, and even death. The incidence of serious ADRs in hospitalized patients was estimated to be 6.7\% and for fatal ADRs to be 0.32\%, so recognizing and taking action on ADRs is an important aspect of medication management. Allergic reactions to drugs refer to those ADRs that involve immune mechanisms which account up to 15\% of ADRs and can be identified as being a type I through IV immune reaction that the most common immunologic mechanism is IgE-mediated- type I reaction. Clinical manifestations of allergic reactions range from pruritus and rash to serious reactions such as systemic anaphylaxis and cardiovascular emergencies and they are responsible for 2-3\% of hospitalized patients. Health professionals should be aware of the ADRs presenting clinical features and the risk factors and should be able to differentiate between allergic and non-allergic adverse drug reactions. This will lead to increased opportunities to review drug selection and prescribing practices affecting patients' outcome. This article will review the definition and estimated incidence, the features, classification and types of ADRs and drug allergies and related patents. It will highlight the role of detecting, reporting, and assessing suspected ADRs and drug allergies in the most clinically relevant drugs group. Priorities in the evaluation and management of the conditions of patients who have experienced allergic and non-allergic drug reactions also will be discussed.},
language = {eng},
number = {1},
journal = {Recent Patents on Inflammation \& Allergy Drug Discovery},
author = {Pourpak, Zahra and Fazlollahi, Mohammad R. and Fattahi, Fatemeh},
month = jan,
year = {2008},
pmid = {19075990},
keywords = {Humans, Treatment Outcome, Adverse Drug Reaction Reporting Systems, Drug-Related Side Effects and Adverse Reactions, Drug Hypersensitivity, Patents as Topic, Quality of Life, Risk Factors},
pages = {24--46}
}
@article{mcgarry_resko:_2018,
title = {{RESKO}: {Repositioning} drugs by using side effects and knowledge from ontologies},
volume = {160},
issn = {09507051},
shorttitle = {{RESKO}},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0950705118303332},
doi = {10.1016/j.knosys.2018.06.017},
language = {en},
urldate = {2019-03-15},
journal = {Knowledge-Based Systems},
author = {McGarry, Ken and Graham, Yitka and McDonald, Sharon and Rashid, Anuam},
month = nov,
year = {2018},
pages = {34--48}
}
@article{kuhn_systematic_2014,
title = {Systematic identification of proteins that elicit drug side effects},
volume = {9},
issn = {1744-4292},
url = {http://msb.embopress.org/cgi/doi/10.1038/msb.2013.10},
doi = {10.1038/msb.2013.10},
language = {en},
number = {1},
urldate = {2019-03-15},
journal = {Molecular Systems Biology},
author = {Kuhn, M. and Al Banchaabouchi, M. and Campillos, M. and Jensen, L. J. and Gross, C. and Gavin, A.-C. and Bork, P.},
month = apr,
year = {2014},
pages = {663--663}
}
@article{wu_computational_2013,
title = {Computational drug repositioning through heterogeneous network clustering},
volume = {7 Suppl 5},
issn = {1752-0509},
doi = {10.1186/1752-0509-7-S5-S6},
abstract = {BACKGROUND: Given the costly and time consuming process and high attrition rates in drug discovery and development, drug repositioning or drug repurposing is considered as a viable strategy both to replenish the drying out drug pipelines and to surmount the innovation gap. Although there is a growing recognition that mechanistic relationships from molecular to systems level should be integrated into drug discovery paradigms, relatively few studies have integrated information about heterogeneous networks into computational drug-repositioning candidate discovery platforms.
RESULTS: Using known disease-gene and drug-target relationships from the KEGG database, we built a weighted disease and drug heterogeneous network. The nodes represent drugs or diseases while the edges represent shared gene, biological process, pathway, phenotype or a combination of these features. We clustered this weighted network to identify modules and then assembled all possible drug-disease pairs (putative drug repositioning candidates) from these modules. We validated our predictions by testing their robustness and evaluated them by their overlap with drug indications that were either reported in published literature or investigated in clinical trials.
CONCLUSIONS: Previous computational approaches for drug repositioning focused either on drug-drug and disease-disease similarity approaches whereas we have taken a more holistic approach by considering drug-disease relationships also. Further, we considered not only gene but also other features to build the disease drug networks. Despite the relative simplicity of our approach, based on the robustness analyses and the overlap of some of our predictions with drug indications that are under investigation, we believe our approach could complement the current computational approaches for drug repositioning candidate discovery.},
language = {eng},
journal = {BMC systems biology},
author = {Wu, Chao and Gudivada, Ranga C. and Aronow, Bruce J. and Jegga, Anil G.},
year = {2013},
pmid = {24564976},
pmcid = {PMC4029299},
keywords = {Humans, Databases, Factual, Alzheimer Disease, Amyloid Precursor Protein Secretases, Anilides, Basal Cell Nevus Syndrome, Cluster Analysis, Computational Biology, Computer Graphics, Drug Repositioning, Hidradenitis Suppurativa, Phenotype, Protease Inhibitors, Pyridines},
pages = {S6}
}
@article{luo_drug_2016,
title = {Drug repositioning based on comprehensive similarity measures and {Bi}-{Random} walk algorithm},
volume = {32},
issn = {1367-4811},
doi = {10.1093/bioinformatics/btw228},
abstract = {MOTIVATION: Drug repositioning, which aims to identify new indications for existing drugs, offers a promising alternative to reduce the total time and cost of traditional drug development. Many computational strategies for drug repositioning have been proposed, which are based on similarities among drugs and diseases. Current studies typically use either only drug-related properties (e.g. chemical structures) or only disease-related properties (e.g. phenotypes) to calculate drug or disease similarity, respectively, while not taking into account the influence of known drug-disease association information on the similarity measures.
RESULTS: In this article, based on the assumption that similar drugs are normally associated with similar diseases and vice versa, we propose a novel computational method named MBiRW, which utilizes some comprehensive similarity measures and Bi-Random walk (BiRW) algorithm to identify potential novel indications for a given drug. By integrating drug or disease features information with known drug-disease associations, the comprehensive similarity measures are firstly developed to calculate similarity for drugs and diseases. Then drug similarity network and disease similarity network are constructed, and they are incorporated into a heterogeneous network with known drug-disease interactions. Based on the drug-disease heterogeneous network, BiRW algorithm is adopted to predict novel potential drug-disease associations. Computational experiment results from various datasets demonstrate that the proposed approach has reliable prediction performance and outperforms several recent computational drug repositioning approaches. Moreover, case studies of five selected drugs further confirm the superior performance of our method to discover potential indications for drugs practically.
AVAILABILITY AND IMPLEMENTATION: http://github.com//bioinfomaticsCSU/MBiRW CONTACT: [email protected]
SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
language = {eng},
number = {17},
journal = {Bioinformatics (Oxford, England)},
author = {Luo, Huimin and Wang, Jianxin and Li, Min and Luo, Junwei and Peng, Xiaoqing and Wu, Fang-Xiang and Pan, Yi},
year = {2016},
pmid = {27153662},
keywords = {Algorithms, Computational Biology, Drug Repositioning, Models, Theoretical},
pages = {2664--2671},
}
@article{wishart_drugbank_2018,
title = {{DrugBank} 5.0: a major update to the {DrugBank} database for 2018},
volume = {46},
issn = {0305-1048, 1362-4962},
shorttitle = {{DrugBank} 5.0},
url = {http://academic.oup.com/nar/article/46/D1/D1074/4602867},
doi = {10.1093/nar/gkx1037},
language = {en},
number = {D1},
urldate = {2019-05-07},
journal = {Nucleic Acids Research},
author = {Wishart, David S and Feunang, Yannick D and Guo, An C and Lo, Elvis J and Marcu, Ana and Grant, Jason R and Sajed, Tanvir and Johnson, Daniel and Li, Carin and Sayeeda, Zinat and Assempour, Nazanin and Iynkkaran, Ithayavani and Liu, Yifeng and Maciejewski, Adam and Gale, Nicola and Wilson, Alex and Chin, Lucy and Cummings, Ryan and Le, Diana and Pon, Allison and Knox, Craig and Wilson, Michael},
month = jan,
year = {2018},
pages = {D1074--D1082},
}
@article{montanari_virtual_2017,
title = {Virtual {Screening} of {DrugBank} {Reveals} {Two} {Drugs} as {New} {BCRP} {Inhibitors}},
volume = {22},
issn = {2472-5552, 2472-5560},
url = {http://journals.sagepub.com/doi/10.1177/1087057116657513},
doi = {10.1177/1087057116657513},
language = {en},
number = {1},
urldate = {2019-05-07},
journal = {SLAS DISCOVERY: Advancing Life Sciences R\&D},
author = {Montanari, Floriane and Cseke, Anna and Wlcek, Katrin and Ecker, Gerhard F.},
month = jan,
year = {2017},
pages = {86--93}
}
@article{wang_exploring_2014,
title = {Exploring the associations between drug side-effects and therapeutic indications},
volume = {51},
issn = {1532-0464},
url = {http://www.sciencedirect.com/science/article/pii/S1532046414000811},
doi = {https://doi.org/10.1016/j.jbi.2014.03.014},
abstract = {Drug therapeutic indications and side-effects are both measurable patient phenotype changes in response to the treatment. Inferring potential drug therapeutic indications and identifying clinically interesting drug side-effects are both important and challenging tasks. Previous studies have utilized either chemical structures or protein targets to predict indications and side-effects. In this study, we compared drug therapeutic indication prediction using various information including chemical structures, protein targets and side-effects. We also compared drug side-effect prediction with various information sources including chemical structures, protein targets and therapeutic indication. Prediction performance based on 10-fold cross-validation demonstrates that drug side-effects and therapeutic indications are the most predictive information source for each other. In addition, we extracted 6706 statistically significant indication-side-effect associations from all known drug-disease and drug-side-effect relationships. We further developed a novel user interface that allows the user to interactively explore these associations in the form of a dynamic bipartitie graph. Many relationship pairs provide explicit repositioning hypotheses (e.g., drugs causing postural hypotension are potential candidates for hypertension) and clear adverse-reaction watch lists (e.g., drugs for heart failure possibly cause impotence). All data sets and highly correlated disease-side-effect relationships are available at http://astro.temple.edu/∼tua87106/druganalysis.html.},
journal = {Journal of Biomedical Informatics},
author = {Wang, Fei and Zhang, Ping and Cao, Nan and Hu, Jianying and Sorrentino, Robert},
year = {2014},
keywords = {Associations, Side-effects, Theraputic indications},
pages = {15 -- 23}
}
@article{ashburn_drug_2004,
title = {Drug repositioning: identifying and developing new uses for existing drugs},
volume = {3},
issn = {1474-1776, 1474-1784},
shorttitle = {Drug repositioning},
url = {http://www.nature.com/articles/nrd1468},
doi = {10.1038/nrd1468},
language = {en},
number = {8},
urldate = {2019-06-24},
journal = {Nature Reviews Drug Discovery},
author = {Ashburn, Ted T. and Thor, Karl B.},
month = aug,
year = {2004},
pages = {673--683}
}
@incollection{parthasarathi_chapter_2018,
title = {Chapter 5 - {In} {Silico} {Approaches} for {Predictive} {Toxicology}},
isbn = {978-0-12-804667-8},
url = {http://www.sciencedirect.com/science/article/pii/B9780128046678000055},
abstract = {In silico toxicology plays a vital role in the assessment of safety/toxicity of chemicals and the drug development process. Computational approaches continue to increase in capability and applicability to predictive toxicology. These advanced methodology are utilized in various stages of the development of substance by prediction of properties that correlate with toxicity endpoints, structure activity relationship models for new chemical formulations, and building/retrieving information on chemical databases. This chapter covers different aspects of computational approaches that focuses on in silico toxicology, that aims to complement prevailing in vitro/in vivo toxicity tests to predict toxicity and prioritize chemicals/drugs to minimize harmful effects. The state-of-the-art computational approaches used in in silico toxicology are highlighted in this chapter. Special attention has been drawn on the usefulness of quantitative structure activity relationship models for toxicity prediction, descriptor development for predictive toxicology and databases/in silico tools used for toxicity prediction.},
urldate = {2019-07-04},
booktitle = {In {Vitro} {Toxicology}},
publisher = {Academic Press},
author = {Parthasarathi, Ramakrishnan and Dhawan, Alok},
editor = {Dhawan, Alok and Kwon, Seok},
month = jan,
year = {2018},
doi = {10.1016/B978-0-12-804667-8.00005-5},
keywords = {Descriptors, In silico toxicology, Predictive toxicology, QSAR, Quantitative structure toxicity relationship, Toxicity database, Toxicity endpoints},
pages = {91--109}
}
@inproceedings{ehrlinger_towards_2016,
title = {Towards a {Definition} of {Knowledge} {Graphs}},
abstract = {Recently, the term knowledge graph has been used frequently in research and business, usually in close association with Semantic Web technologies, linked data, large-scale data analytics and cloud computing. Its popularity is clearly influenced by the introduction of Google’s Knowledge Graph in 2012, and since then the term has been widely used without a definition. A large variety of interpretations has hampered the evolution of a common understanding of knowledge graphs. Numerous research papers refer to Google’s Knowledge Graph, although no official documentation about the used methods exists. The prerequisite for widespread academic and commercial adoption of a concept or technology is a common understanding, based ideally on a definition that is free from ambiguity. We tackle this issue by discussing and defining the term knowledge graph, considering its history and diversity in interpretations and use. Our goal is to propose a definition of knowledge graphs that serves as basis for discussions on this topic and contributes to a common vision.},
booktitle = {{SEMANTiCS}},
author = {Ehrlinger, Lisa and Wöß, Wolfram},
year = {2016},
keywords = {Cloud computing, Documentation, Knowledge Graph, Linked data, Semantic Web}
}
@book{foster_basic_2015,
title = {Basic {Pharmacology}},
isbn = {978-1-4831-4202-9},
abstract = {Basic Pharmacology, Third Edition aims to present accounts of drug actions and their mechanisms in a compact, inexpensive, and updated form, and explain the basis of the therapeutic exploitation of drugs. This book is divided into sections that follow a particular theme and is introduced by the relevant pharmacological general principles. In each section, the major groups of drugs related to the theme are discussed with detailed expositions of the important “type substances. Drugs of lesser importance are placed in proper context. A list of abbreviations that are referenced throughout the book is provided after the introduction. An index is also included at the end. This edition is designed to help students taking pharmacology, including medical students of subjects affiliated to medicine, to appreciate the rationale underlying the uses of drugs in therapeutics.},
language = {en},
publisher = {Elsevier},
author = {Foster, R. W.},
month = may,
year = {2015},
note = {Google-Books-ID: WbMgBQAAQBAJ},
keywords = {Medical / Pharmacology}
}
@book{satoskar_pharmacology_1973,
title = {Pharmacology and {Pharmacotherapeutics}},
isbn = {978-81-7991-527-1},
language = {en},
publisher = {Popular Prakashan},
author = {Satoskar, R. S. and Rege, S. D. Bhandarkar \&nirmala N.},
year = {1973},
note = {Google-Books-ID: 7d493VOD4P8C}
}
@book{rang_rang_2014,
title = {Rang \& {Dale}'s {Pharmacology} {E}-{Book}: with {STUDENT} {CONSULT} {Online} {Access}},
isbn = {978-0-7020-5497-6},
shorttitle = {Rang \& {Dale}'s {Pharmacology} {E}-{Book}},
abstract = {For 25 years, Rang and Dale’s Pharmacology has delivered the core basic and clinical science information required by students and healthcare practitioners worldwide. Authors H. P. Rang, J. M. Ritter, R. J. Flower, and G. Henderson have ensured that the 8th Edition of this easy-to-read, comprehensive text continues the tradition of excellence with new coverage of drugs affecting the skin and new components online at studentconsult.com.Consult this title on your favorite e-reader. Get the essential pharmacology information you need from one authoritative source with an outstanding global reputation for excellence. Progress confidently through all relevant aspects of pharmacology, beginning with a molecular understanding of receptors and drug actions through clinical uses of key groups of drugs. Find important content quickly thanks to a color-coded layout that enables easy navigation and cross-referencing. Master difficult concepts with Key Points boxes, Clinical Uses boxes, and full-color illustrations throughout. Stay up to date with new information in the field, including an all-new chapter on drugs that affect the skin. Take advantage of new and unique features online, including 500+ chapter-specific multiple choice questions for immediate self-assessment. eBook version included! For the first time, you can access the entire book online or offline across all devices with the Student Consult eBook!},
language = {en},
publisher = {Elsevier Health Sciences},
author = {Rang, Humphrey P. and Ritter, James M. and Flower, Rod J. and Henderson, Graeme},
month = dec,
year = {2014},
note = {Google-Books-ID: iOLTBQAAQBAJ},
keywords = {Medical / Pharmacology}
}
@article{gashaw_what_2011,
title = {What makes a good drug target?},
volume = {16},
issn = {13596446},
url = {https://linkinghub.elsevier.com/retrieve/pii/S1359644611002972},
doi = {10.1016/j.drudis.2011.09.007},
language = {en},
number = {23-24},
urldate = {2019-07-17},
journal = {Drug Discovery Today},
author = {Gashaw, Isabella and Ellinghaus, Peter and Sommer, Anette and Asadullah, Khusru},
month = dec,
year = {2011},
pages = {1037--1043}
}
@article{kuhn_sider_2016,
title = {The {SIDER} database of drugs and side effects},
volume = {44},
issn = {0305-1048},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4702794/},
doi = {10.1093/nar/gkv1075},
abstract = {Unwanted side effects of drugs are a burden on patients and a severe impediment in the development of new drugs. At the same time, adverse drug reactions (ADRs) recorded during clinical trials are an important source of human phenotypic data. It is therefore essential to combine data on drugs, targets and side effects into a more complete picture of the therapeutic mechanism of actions of drugs and the ways in which they cause adverse reactions. To this end, we have created the SIDER (‘Side Effect Resource’, http://sideeffects.embl.de) database of drugs and ADRs. The current release, SIDER 4, contains data on 1430 drugs, 5880 ADRs and 140 064 drug–ADR pairs, which is an increase of 40\% compared to the previous version. For more fine-grained analyses, we extracted the frequency with which side effects occur from the package inserts. This information is available for 39\% of drug–ADR pairs, 19\% of which can be compared to the frequency under placebo treatment. SIDER furthermore contains a data set of drug indications, extracted from the package inserts using Natural Language Processing. These drug indications are used to reduce the rate of false positives by identifying medical terms that do not correspond to ADRs.},
number = {Database issue},
urldate = {2019-07-21},
journal = {Nucleic Acids Research},
author = {Kuhn, Michael and Letunic, Ivica and Jensen, Lars Juhl and Bork, Peer},
month = jan,
year = {2016},
pmid = {26481350},
pmcid = {PMC4702794},
pages = {D1075--D1079}
}
@article{goyal_graph_2018,
title = {Graph {Embedding} {Techniques}, {Applications}, and {Performance}: {A} {Survey}},
volume = {151},
issn = {09507051},
shorttitle = {Graph {Embedding} {Techniques}, {Applications}, and {Performance}},
url = {http://arxiv.org/abs/1705.02801},
doi = {10.1016/j.knosys.2018.03.022},
abstract = {Graphs, such as social networks, word co-occurrence networks, and communication networks, occur naturally in various real-world applications. Analyzing them yields insight into the structure of society, language, and different patterns of communication. Many approaches have been proposed to perform the analysis. Recently, methods which use the representation of graph nodes in vector space have gained traction from the research community. In this survey, we provide a comprehensive and structured analysis of various graph embedding techniques proposed in the literature. We first introduce the embedding task and its challenges such as scalability, choice of dimensionality, and features to be preserved, and their possible solutions. We then present three categories of approaches based on factorization methods, random walks, and deep learning, with examples of representative algorithms in each category and analysis of their performance on various tasks. We evaluate these state-of-the-art methods on a few common datasets and compare their performance against one another. Our analysis concludes by suggesting some potential applications and future directions. We finally present the open-source Python library we developed, named GEM (Graph Embedding Methods, available at https://github.com/palash1992/GEM), which provides all presented algorithms within a unified interface to foster and facilitate research on the topic.},
urldate = {2019-07-21},
journal = {Knowledge-Based Systems},
author = {Goyal, Palash and Ferrara, Emilio},
month = jul,
year = {2018},
note = {arXiv: 1705.02801},
keywords = {Computer Science - Social and Information Networks, Computer Science - Machine Learning, Physics - Data Analysis, Statistics and Probability},
pages = {78--94}
}
@article{wang_knowledge_2017,
title = {Knowledge {Graph} {Embedding}: {A} {Survey} of {Approaches} and {Applications}},
volume = {29},
issn = {1041-4347},
shorttitle = {Knowledge {Graph} {Embedding}},
url = {http://ieeexplore.ieee.org/document/8047276/},
doi = {10.1109/TKDE.2017.2754499},
number = {12},
urldate = {2019-07-22},
journal = {IEEE Transactions on Knowledge and Data Engineering},
author = {Wang, Quan and Mao, Zhendong and Wang, Bin and Guo, Li},
month = dec,
year = {2017},
pages = {2724--2743}
}
@inproceedings{bordes_translating_2013,
address = {USA},
series = {{NIPS}'13},
title = {Translating {Embeddings} for {Modeling} {Multi}-relational {Data}},
url = {http://dl.acm.org/citation.cfm?id=2999792.2999923},
abstract = {We consider the problem of embedding entities and relationships of multi-relational data in low-dimensional vector spaces. Our objective is to propose a canonical model which is easy to train, contains a reduced number of parameters and can scale up to very large databases. Hence, we propose TransE, a method which models relationships by interpreting them as translations operating on the low-dimensional embeddings of the entities. Despite its simplicity, this assumption proves to be powerful since extensive experiments show that TransE significantly outperforms state-of-the-art methods in link prediction on two knowledge bases. Besides, it can be successfully trained on a large scale data set with 1M entities, 25k relationships and more than 17M training samples.},
urldate = {2019-07-22},
booktitle = {Proceedings of the 26th {International} {Conference} on {Neural} {Information} {Processing} {Systems} - {Volume} 2},
publisher = {Curran Associates Inc.},
author = {Bordes, Antoine and Usunier, Nicolas and Garcia-Durán, Alberto and Weston, Jason and Yakhnenko, Oksana},
year = {2013},
note = {event-place: Lake Tahoe, Nevada},
pages = {2787--2795}
}
@inproceedings{wang_knowledge_2014,
title = {Knowledge {Graph} {Embedding} by {Translating} on {Hyperplanes}},
abstract = {We deal with embedding a large scale knowledge graph composed of entities and relations into a continuous vector space. TransE is a promising method proposed recently, which is very efficient while achieving state-of-the-art predictive performance. We discuss some mapping properties of relations which should be considered in embedding, such as reflexive, one-to-many, many-to-one, and many-to-many. We note that TransE does not do well in dealing with these properties. Some complex models are capable of preserving these mapping properties but sacrifice efficiency in the process. To make a good trade-off between model capacity and efficiency, in this paper we propose TransH which models a relation as a hyperplane together with a translation operation on it. In this way, we can well preserve the above mapping properties of relations with almost the same model complexity of TransE. Additionally, as a practical knowledge graph is often far from completed, how to construct negative examples to reduce false negative labels in training is very important. Utilizing the one-to-many/many-to-one mapping property of a relation, we propose a simple trick to reduce the possibility of false negative labeling. We conduct extensive experiments on link prediction, triplet classification and fact extraction on benchmark datasets like WordNet and Freebase. Experiments show TransH delivers significant improvements over TransE on predictive accuracy with comparable capability to scale up.},
booktitle = {{AAAI}},
author = {Wang, Zhen and Zhang, Jianwen and Feng, Jianlin and Chen, Zhigang},
year = {2014},
keywords = {Knowledge Graph, Benchmark (computing), Entity, Experiment, Freebase, Graph embedding, Many-to-many (data model), Negative feedback, One-to-many (data model), Triplet state, WordNet}
}
@article{cai_comprehensive_2017,
title = {A {Comprehensive} {Survey} of {Graph} {Embedding}: {Problems}, {Techniques} and {Applications}},
shorttitle = {A {Comprehensive} {Survey} of {Graph} {Embedding}},
url = {http://arxiv.org/abs/1709.07604},
abstract = {Graph is an important data representation which appears in a wide diversity of real-world scenarios. Effective graph analytics provides users a deeper understanding of what is behind the data, and thus can benefit a lot of useful applications such as node classification, node recommendation, link prediction, etc. However, most graph analytics methods suffer the high computation and space cost. Graph embedding is an effective yet efficient way to solve the graph analytics problem. It converts the graph data into a low dimensional space in which the graph structural information and graph properties are maximally preserved. In this survey, we conduct a comprehensive review of the literature in graph embedding. We first introduce the formal definition of graph embedding as well as the related concepts. After that, we propose two taxonomies of graph embedding which correspond to what challenges exist in different graph embedding problem settings and how the existing work address these challenges in their solutions. Finally, we summarize the applications that graph embedding enables and suggest four promising future research directions in terms of computation efficiency, problem settings, techniques and application scenarios.},
urldate = {2019-07-22},
journal = {arXiv:1709.07604 [cs]},
author = {Cai, Hongyun and Zheng, Vincent W. and Chang, Kevin Chen-Chuan},
month = sep,
year = {2017},
note = {arXiv: 1709.07604},
keywords = {Computer Science - Artificial Intelligence}
}
@article{lotfi_shahreza_review_2018,
title = {A review of network-based approaches to drug repositioning},
volume = {19},
issn = {1477-4054},
doi = {10.1093/bib/bbx017},
abstract = {Experimental drug development is time-consuming, expensive and limited to a relatively small number of targets. However, recent studies show that repositioning of existing drugs can function more efficiently than de novo experimental drug development to minimize costs and risks. Previous studies have proven that network analysis is a versatile platform for this purpose, as the biological networks are used to model interactions between many different biological concepts. The present study is an attempt to review network-based methods in predicting drug targets for drug repositioning. For each method, the preferred type of data set is described, and their advantages and limitations are discussed. For each method, we seek to provide a brief description, as well as an evaluation based on its performance metrics.We conclude that integrating distinct and complementary data should be used because each type of data set reveals a unique aspect of information about an organism. We also suggest that applying a standard set of evaluation metrics and data sets would be essential in this fast-growing research domain.},
language = {eng},
number = {5},
journal = {Briefings in Bioinformatics},
author = {Lotfi Shahreza, Maryam and Ghadiri, Nasser and Mousavi, Sayed Rasoul and Varshosaz, Jaleh and Green, James R.},
year = {2018},
pmid = {28334136},
keywords = {Humans, Drug-Related Side Effects and Adverse Reactions, Computational Biology, Drug Repositioning, Databases, Pharmaceutical, Drug Interactions, Gene Regulatory Networks, Machine Learning, Metabolic Networks and Pathways, Molecular Docking Simulation, Protein Interaction Maps},
pages = {878--892}
}
@article{mizutani_relating_2012,
title = {Relating drug-protein interaction network with drug side effects},
volume = {28},
issn = {1367-4803, 1460-2059},
url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/bts383},
doi = {10.1093/bioinformatics/bts383},
language = {en},
number = {18},
urldate = {2019-07-23},
journal = {Bioinformatics},
author = {Mizutani, S. and Pauwels, E. and Stoven, V. and Goto, S. and Yamanishi, Y.},
month = sep,
year = {2012},
pages = {i522--i528}
}
@article{scheiber_mapping_2009,
title = {Mapping {Adverse} {Drug} {Reactions} in {Chemical} {Space}},
volume = {52},
issn = {0022-2623, 1520-4804},
url = {https://pubs.acs.org/doi/10.1021/jm801546k},
doi = {10.1021/jm801546k},
language = {en},
number = {9},
urldate = {2019-07-23},
journal = {Journal of Medicinal Chemistry},
author = {Scheiber, Josef and Jenkins, Jeremy L. and Sukuru, Sai Chetan K. and Bender, Andreas and Mikhailov, Dmitri and Milik, Mariusz and Azzaoui, Kamal and Whitebread, Steven and Hamon, Jacques and Urban, Laszlo and Glick, Meir and Davies, John W.},
month = may,
year = {2009},
pages = {3103--3107}
}
@article{atias_algorithmic_2011,
title = {An {Algorithmic} {Framework} for {Predicting} {Side} {Effects} of {Drugs}},
volume = {18},
issn = {1066-5277, 1557-8666},
url = {http://www.liebertpub.com/doi/10.1089/cmb.2010.0255},
doi = {10.1089/cmb.2010.0255},
language = {en},
number = {3},
urldate = {2019-07-24},
journal = {Journal of Computational Biology},
author = {Atias, Nir and Sharan, Roded},
month = mar,
year = {2011},
pages = {207--218}
}
@article{pauwels_predicting_2011,
title = {Predicting drug side-effect profiles: a chemical fragment-based approach},
volume = {12},
issn = {1471-2105},
shorttitle = {Predicting drug side-effect profiles},
url = {http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-169},
doi = {10.1186/1471-2105-12-169},
language = {en},
number = {1},
urldate = {2019-07-24},
journal = {BMC Bioinformatics},
author = {Pauwels, Edouard and Stoven, Véronique and Yamanishi, Yoshihiro},
year = {2011},
pages = {169}
}
@article{yang_systematic_2011,
title = {Systematic {Drug} {Repositioning} {Based} on {Clinical} {Side}-{Effects}},
volume = {6},
issn = {1932-6203},
url = {https://dx.plos.org/10.1371/journal.pone.0028025},
doi = {10.1371/journal.pone.0028025},
language = {en},
number = {12},
urldate = {2019-07-24},
journal = {PLoS ONE},
author = {Yang, Lun and Agarwal, Pankaj},
editor = {Csermely, Peter},
month = dec,
year = {2011},
pages = {e28025}
}
@article{ye_construction_2014,
title = {Construction of {Drug} {Network} {Based} on {Side} {Effects} and {Its} {Application} for {Drug} {Repositioning}},
volume = {9},
issn = {1932-6203},
url = {http://dx.plos.org/10.1371/journal.pone.0087864},
doi = {10.1371/journal.pone.0087864},
language = {en},
number = {2},
urldate = {2019-07-24},
journal = {PLoS ONE},
author = {Ye, Hao and Liu, Qi and Wei, Jia},
editor = {Keskin, Ozlem},
month = feb,
year = {2014},
pages = {e87864},
}
@article{kim_pubchem_2019,
title = {{PubChem} 2019 update: improved access to chemical data},
volume = {47},
issn = {1362-4962},
shorttitle = {{PubChem} 2019 update},
doi = {10.1093/nar/gky1033},
abstract = {PubChem (https://pubchem.ncbi.nlm.nih.gov) is a key chemical information resource for the biomedical research community. Substantial improvements were made in the past few years. New data content was added, including spectral information, scientific articles mentioning chemicals, and information for food and agricultural chemicals. PubChem released new web interfaces, such as PubChem Target View page, Sources page, Bioactivity dyad pages and Patent View page. PubChem also released a major update to PubChem Widgets and introduced a new programmatic access interface, called PUG-View. This paper describes these new developments in PubChem.},
language = {eng},
number = {D1},
journal = {Nucleic Acids Research},
author = {Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A. and Thiessen, Paul A. and Yu, Bo and Zaslavsky, Leonid and Zhang, Jian and Bolton, Evan E.},
month = jan,
year = {2019},
pmid = {30371825},
pmcid = {PMC6324075},
pages = {D1102--D1109},
}
@article{kim_pubchem_2016,
title = {{PubChem} {Substance} and {Compound} databases},
volume = {44},
issn = {1362-4962},
doi = {10.1093/nar/gkv951},
abstract = {PubChem (https://pubchem.ncbi.nlm.nih.gov) is a public repository for information on chemical substances and their biological activities, launched in 2004 as a component of the Molecular Libraries Roadmap Initiatives of the US National Institutes of Health (NIH). For the past 11 years, PubChem has grown to a sizable system, serving as a chemical information resource for the scientific research community. PubChem consists of three inter-linked databases, Substance, Compound and BioAssay. The Substance database contains chemical information deposited by individual data contributors to PubChem, and the Compound database stores unique chemical structures extracted from the Substance database. Biological activity data of chemical substances tested in assay experiments are contained in the BioAssay database. This paper provides an overview of the PubChem Substance and Compound databases, including data sources and contents, data organization, data submission using PubChem Upload, chemical structure standardization, web-based interfaces for textual and non-textual searches, and programmatic access. It also gives a brief description of PubChem3D, a resource derived from theoretical three-dimensional structures of compounds in PubChem, as well as PubChemRDF, Resource Description Framework (RDF)-formatted PubChem data for data sharing, analysis and integration with information contained in other databases.},
language = {eng},
number = {D1},
journal = {Nucleic Acids Research},
author = {Kim, Sunghwan and Thiessen, Paul A. and Bolton, Evan E. and Chen, Jie and Fu, Gang and Gindulyte, Asta and Han, Lianyi and He, Jane and He, Siqian and Shoemaker, Benjamin A. and Wang, Jiyao and Yu, Bo and Zhang, Jian and Bryant, Stephen H.},
month = jan,
year = {2016},
pmid = {26400175},
pmcid = {PMC4702940},
keywords = {Pharmaceutical Preparations, Databases, Chemical, Internet, Molecular Structure, Software},
pages = {D1202--1213},
}
@inproceedings{lin_learning_2015,
series = {{AAAI}'15},
title = {Learning {Entity} and {Relation} {Embeddings} for {Knowledge} {Graph} {Completion}},
isbn = {978-0-262-51129-2},
url = {http://dl.acm.org/citation.cfm?id=2886521.2886624},
abstract = {Knowledge graph completion aims to perform link prediction between entities. In this paper, we consider the approach of knowledge graph embeddings. Recently, models such as TransE and TransH build entity and relation embeddings by regarding a relation as translation from head entity to tail entity. We note that these models simply put both entities and relations within the same semantic space. In fact, an entity may have multiple aspects and various relations may focus on different aspects of entities, which makes a common space insufficient for modeling. In this paper, we propose TransR to build entity and relation embeddings in separate entity space and relation spaces. Afterwards, we learn embeddings by first projecting entities from entity space to corresponding relation space and then building translations between projected entities. In experiments, we evaluate our models on three tasks including link prediction, triple classification and relational fact extraction. Experimental results show significant and consistent improvements compared to state-of-the-art baselines including TransE and TransH. The source code of this paper can be obtained from https://github.com/mrlyk423/relation\_extraction.},
urldate = {2019-07-24},
booktitle = {Proceedings of the {Twenty}-{Ninth} {AAAI} {Conference} on {Artificial} {Intelligence}},
publisher = {AAAI Press},
author = {Lin, Yankai and Liu, Zhiyuan and Sun, Maosong and Liu, Yang and Zhu, Xuan},
year = {2015},
note = {event-place: Austin, Texas},
pages = {2181--2187}
}
@inproceedings{ou_asymmetric_2016,
address = {New York, NY, USA},
series = {{KDD} '16},
title = {Asymmetric {Transitivity} {Preserving} {Graph} {Embedding}},
isbn = {978-1-4503-4232-2},
url = {http://doi.acm.org/10.1145/2939672.2939751},
doi = {10.1145/2939672.2939751},
abstract = {Graph embedding algorithms embed a graph into a vector space where the structure and the inherent properties of the graph are preserved. The existing graph embedding methods cannot preserve the asymmetric transitivity well, which is a critical property of directed graphs. Asymmetric transitivity depicts the correlation among directed edges, that is, if there is a directed path from u to v, then there is likely a directed edge from u to v. Asymmetric transitivity can help in capturing structures of graphs and recovering from partially observed graphs. To tackle this challenge, we propose the idea of preserving asymmetric transitivity by approximating high-order proximity which are based on asymmetric transitivity. In particular, we develop a novel graph embedding algorithm, High-Order Proximity preserved Embedding (HOPE for short), which is scalable to preserve high-order proximities of large scale graphs and capable of capturing the asymmetric transitivity. More specifically, we first derive a general formulation that cover multiple popular high-order proximity measurements, then propose a scalable embedding algorithm to approximate the high-order proximity measurements based on their general formulation. Moreover, we provide a theoretical upper bound on the RMSE (Root Mean Squared Error) of the approximation. Our empirical experiments on a synthetic dataset and three real-world datasets demonstrate that HOPE can approximate the high-order proximities significantly better than the state-of-art algorithms and outperform the state-of-art algorithms in tasks of reconstruction, link prediction and vertex recommendation.},
urldate = {2019-07-24},
booktitle = {Proceedings of the 22Nd {ACM} {SIGKDD} {International} {Conference} on {Knowledge} {Discovery} and {Data} {Mining}},
publisher = {ACM},
author = {Ou, Mingdong and Cui, Peng and Pei, Jian and Zhang, Ziwei and Zhu, Wenwu},
year = {2016},
note = {event-place: San Francisco, California, USA},
keywords = {asymmetric transitivity, directed graph, graph embedding, high-order proximity},
pages = {1105--1114}
}
@inproceedings{wang_structural_2016,
address = {San Francisco, California, USA},
title = {Structural {Deep} {Network} {Embedding}},
isbn = {978-1-4503-4232-2},
url = {http://dl.acm.org/citation.cfm?doid=2939672.2939753},
doi = {10.1145/2939672.2939753},
abstract = {Network embedding is an important method to learn low-dimensional representations of vertexes in networks, aiming to capture and preserve the network structure. Almost all the existing network embedding methods adopt shallow models. However, since the underlying network structure is complex, shallow models cannot capture the highly non-linear network structure, resulting in sub-optimal network representations. Therefore, how to find a method that is able to effectively capture the highly non-linear network structure and preserve the global and local structure is an open yet important problem. To solve this problem, in this paper we propose a Structural Deep Network Embedding method, namely SDNE. More specifically, we first propose a semi-supervised deep model, which has multiple layers of non-linear functions, thereby being able to capture the highly non-linear network structure. Then we propose to exploit the first-order and second-order proximity jointly to preserve the network structure. The second-order proximity is used by the unsupervised component to capture the global network structure. While the first-order proximity is used as the supervised information in the supervised component to preserve the local network structure. By jointly optimizing them in the semi-supervised deep model, our method can preserve both the local and global network structure and is robust to sparse networks. Empirically, we conduct the experiments on five real-world networks, including a language network, a citation network and three social networks. The results show that compared to the baselines, our method can reconstruct the original network significantly better and achieves substantial gains in three applications, i.e. multi-label classification, link prediction and visualization.},
language = {en},
urldate = {2019-07-24},
booktitle = {Proceedings of the 22nd {ACM} {SIGKDD} {International} {Conference} on {Knowledge} {Discovery} and {Data} {Mining} - {KDD} '16},
publisher = {ACM Press},
author = {Wang, Daixin and Cui, Peng and Zhu, Wenwu},
year = {2016},
pages = {1225--1234}
}
@article{hoyt_integration_2019,
title = {Integration of {Structured} {Biological} {Data} {Sources} using {Biological} {Expression} {Language}},
copyright = {© 2019, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/},
url = {https://www.biorxiv.org/content/10.1101/631812v1},
doi = {10.1101/631812},
abstract = {{\textless}h3{\textgreater}Abstract{\textless}/h3{\textgreater} {\textless}h3{\textgreater}Background{\textless}/h3{\textgreater} {\textless}p{\textgreater}The integration of heterogeneous, multiscale, and multimodal knowledge and data has become a common prerequisite for joint analysis to unravel the mechanisms and aetiologies of complex diseases. Because of its unique ability to capture this variety, Biological Expression Language (BEL) is well suited to be further used as a platform for semantic integration and harmonization in networks and systems biology.{\textless}/p{\textgreater}{\textless}h3{\textgreater}Results{\textless}/h3{\textgreater} {\textless}p{\textgreater}We have developed numerous independent packages capable of downloading, structuring, and serializing various biological data sources to BEL. Each Bio2BEL package is implemented in the Python programming language and distributed through GitHub (https://github.com/bio2bel) and PyPI.{\textless}/p{\textgreater}{\textless}h3{\textgreater}Conclusions{\textless}/h3{\textgreater} {\textless}p{\textgreater}The philosophy of Bio2BEL encourages reproducibility, accessibility, and democratization of biological databases. We present several applications of Bio2BEL packages including their ability to support the curation of pathway mappings, integration of pathway databases, and machine learning applications.{\textless}/p{\textgreater}{\textless}h3{\textgreater}Tweet{\textless}/h3{\textgreater} {\textless}p{\textgreater}A suite of independent Python packages for downloading, parsing, warehousing, and converting multi-modal and multi-scale biological databases to Biological Expression Language{\textless}/p{\textgreater}},
language = {en},
urldate = {2019-07-25},
journal = {bioRxiv},
author = {Hoyt, Charles Tapley and Domingo-Fernández, Daniel and Mubeen, Sarah and Llaó, Josep Marin and Konotopez, Andrej and Ebeling, Christian and Birkenbihl, Colin and Muslu, Özlem and English, Bradley and Müller, Simon and Lacerda, Mauricio Pio de and Ali, Mehdi and Colby, Scott and Türei, Dénes and Palacio-Escat, Nicolàs and Hofmann-Apitius, Martin},
month = may,
year = {2019},
pages = {631812},
}
@misc{mccormick_word2vec_nodate,
title = {Word2Vec {Tutorial} - {The} {Skip}-{Gram} {Model}},
url = {http://www.mccormickml.com},
urldate = {2019-07-25},
author = {McCormick, Chris}
}
@article{muslu_guiltytargets:_2019,
title = {{GuiltyTargets}: {Prioritization} of {Novel} {Therapeutic} {Targets} with {Deep} {Network} {Representation} {Learning}},
copyright = {© 2019, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/},
shorttitle = {{GuiltyTargets}},
url = {https://www.biorxiv.org/content/10.1101/521161v1},
doi = {10.1101/521161},
abstract = {{\textless}h3{\textgreater}Abstract{\textless}/h3{\textgreater} {\textless}p{\textgreater}The majority of clinical trial failures are caused by low efficacy of investigated drugs, often due to a poor choice of target protein. Computational prioritization approaches aim to support target selection by ranking candidate targets in the context of a given disease. We propose a novel target prioritization approach, GuiltyTargets, which relies on deep network representation learning of a genome-wide protein-protein interaction network annotated with disease-specific differential gene expression and uses positive-unlabeled machine learning for candidate ranking. We evaluated our approach on six diseases of different types (cancer, metabolic, neurodegenerative) within a 10 times repeated 5-fold stratified cross-validation and achieved AUROC values between 0.92 - 0.94, significantly outperforming a previous approach, which relies on manually engineered topological features. Moreover, we showed that GuiltyTargets allows for target repositioning across related disease areas. Applying GuiltyTargets to Alzheimer’s disease resulted into a number of highly ranked candidates that are currently discussed as targets in the literature. Interestingly, one (COMT) is also the target of an approved drug (Tolcapone) for Parkinson’s disease, highlighting the potential for target repositioning of our method.{\textless}/p{\textgreater}{\textless}h3{\textgreater}Availability{\textless}/h3{\textgreater} {\textless}p{\textgreater}The GuiltyTargets Python package is available on PyPI and all code used for analysis can be found under the MIT License at https://github.com/GuiltyTargets.{\textless}/p{\textgreater}{\textless}h3{\textgreater}Author summary{\textless}/h3{\textgreater} {\textless}p{\textgreater}Many drug candidates fail in clinical trials due to low efficacy. One of the reasons is the choice of the wrong target protein, i.e. perturbation of the protein does not effectively modulate the disease phenotype on a molecular level. In consequence many patients do not demonstrate a clear response to the drug candidate. Traditionally, targets are selected based on evidence from the literature and follow-up experiments. However, this process is very labor intensive and often biased by subjective choices. Computational tools could help a more rational and unbiased choice of target proteins and thus increase the chance of drug discovery programs. In this work we propose a novel machine learning based method for target candidate ranking. The method (GuiltyTargets) captures properties of known targets to learn a ranking of candidates. GuiltyTargets compares favorably against existing machine learning based target prioritization methods and allowed us to propose novel targets for Alzheimer’s disease.{\textless}/p{\textgreater}},
language = {en},
urldate = {2019-07-25},
journal = {bioRxiv},
author = {Muslu, Özlem and Hoyt, Charles Tapley and Hofmann-Apitius, Martin and Fröhlich, Holger},
month = jan,
year = {2019},
pages = {521161}
}
@article{braschi_genenames.org:_2019,
title = {Genenames.org: the {HGNC} and {VGNC} resources in 2019},
volume = {47},
issn = {1362-4962},
shorttitle = {Genenames.org},
doi = {10.1093/nar/gky930},
abstract = {The HUGO Gene Nomenclature Committee (HGNC) based at EMBL's European Bioinformatics Institute (EMBL-EBI) assigns unique symbols and names to human genes. There are over 40 000 approved gene symbols in our current database of which over 19 000 are for protein-coding genes. The Vertebrate Gene Nomenclature Committee (VGNC) was established in 2016 to assign standardized nomenclature in line with human for vertebrate species that lack their own nomenclature committees. The VGNC initially assigned nomenclature for over 15000 protein-coding genes in chimpanzee. We have extended this process to other vertebrate species, naming over 14000 protein-coding genes in cow and dog and over 13 000 in horse to date. Our HGNC website https://www.genenames.org has undergone a major design update, simplifying the homepage to provide easy access to our search tools and making the site more mobile friendly. Our gene families pages are now known as 'gene groups' and have increased in number to over 1200, with nearly half of all named genes currently assigned to at least one gene group. This article provides an overview of our online data and resources, focusing on our work over the last two years.},
language = {eng},
number = {D1},
journal = {Nucleic Acids Research},
author = {Braschi, Bryony and Denny, Paul and Gray, Kristian and Jones, Tamsin and Seal, Ruth and Tweedie, Susan and Yates, Bethan and Bruford, Elspeth},
month = jan,
year = {2019},
pmid = {30304474},
pmcid = {PMC6324057},
pages = {D786--D792}
}
@article{noauthor_uniprot:_2019,
title = {{UniProt}: a worldwide hub of protein knowledge},
volume = {47},
issn = {0305-1048},
shorttitle = {{UniProt}},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6323992/},
doi = {10.1093/nar/gky1049},
abstract = {The UniProt Knowledgebase is a collection of sequences and annotations for over 120 million proteins across all branches of life. Detailed annotations extracted from the literature by expert curators have been collected for over half a million of these proteins. These annotations are supplemented by annotations provided by rule based automated systems, and those imported from other resources. In this article we describe significant updates that we have made over the last 2 years to the resource. We have greatly expanded the number of Reference Proteomes that we provide and in particular we have focussed on improving the number of viral Reference Proteomes. The UniProt website has been augmented with new data visualizations for the subcellular localization of proteins as well as their structure and interactions. UniProt resources are available under a CC-BY (4.0) license via the web at https://www.uniprot.org/.},
number = {Database issue},
urldate = {2019-07-26},
journal = {Nucleic Acids Research},
month = jan,
year = {2019},
pmid = {30395287},
pmcid = {PMC6323992},
pages = {D506--D515}
}
@article{roberts_pubmed_2001,
title = {{PubMed} {Central}: {The} {GenBank} of the published literature},
volume = {98},
issn = {0027-8424},
shorttitle = {{PubMed} {Central}},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC33354/},
number = {2},
urldate = {2019-07-26},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
author = {Roberts, Richard J.},
month = jan,
year = {2001},
pmid = {11209037},
pmcid = {PMC33354},
pages = {381--382}
}
@article{noauthor_gene_2008,
title = {The {Gene} {Ontology} project in 2008},
volume = {36},
issn = {0305-1048},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2238979/},
doi = {10.1093/nar/gkm883},
abstract = {The Gene Ontology (GO) project (http://www.geneontology.org/) provides a set of structured, controlled vocabularies for community use in annotating genes, gene products and sequences (also see http://www.sequenceontology.org/). The ontologies have been extended and refined for several biological areas, and improvements to the structure of the ontologies have been implemented. To improve the quantity and quality of gene product annotations available from its public repository, the GO Consortium has launched a focused effort to provide comprehensive and detailed annotation of orthologous genes across a number of ‘reference’ genomes, including human and several key model organisms. Software developments include two releases of the ontology-editing tool OBO-Edit, and improvements to the AmiGO browser interface.},
number = {Database issue},
urldate = {2019-07-26},
journal = {Nucleic Acids Research},
month = jan,
year = {2008},
pmid = {17984083},
pmcid = {PMC2238979},
pages = {D440--D444}
}
@inproceedings{baralis_exploring_2008,
title = {Exploring {Heterogeneous} {Biological} {Data} {Sources}},
doi = {10.1109/DEXA.2008.116},
abstract = {Research activity in the life science area is becoming increasingly data intensive. Huge amounts of highly heterogeneous data, including high throughput experiment results, publication collections, and clinical records are generated at a fast pace by researchers all over the world. The capability of correlating heterogeneous information stored in separated data repositories is a compelling, yet currently unsatisfied, need for bioinformatics scientists. Developed systems address this issue by building knowledge repositories for specific bioinformatics sub-domains such as protein-protein interaction or array expression analysis. In this paper we present an overview of heterogeneous biological data sources and discuss the many difficulties faced by biological data querying and analysis.},
booktitle = {2008 19th {International} {Workshop} on {Database} and {Expert} {Systems} {Applications}},
author = {Baralis, E. and Fiori, A.},
month = sep,
year = {2008},
keywords = {Proteins, Bioinformatics, array expression analysis, Arrays, bioinformatics, biological data analysis, biological data querying, biological databases, Biological information theory, Biology, biology computing, data analysis, data repositories, Databases, heterogeneous biological data sources, knowledge repositories, life science area, Ontologies, protein-protein interaction, proteins, query processing, querying, research activity},
pages = {647--651}
}
@article{huang_recommending_2011,
title = {Recommending {MeSH} terms for annotating biomedical articles},
volume = {18},
issn = {1067-5027},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3168302/},
doi = {10.1136/amiajnl-2010-000055},
abstract = {Background
Due to the high cost of manual curation of key aspects from the scientific literature, automated methods for assisting this process are greatly desired. Here, we report a novel approach to facilitate MeSH indexing, a challenging task of assigning MeSH terms to MEDLINE citations for their archiving and retrieval.
Methods
Unlike previous methods for automatic MeSH term assignment, we reformulate the indexing task as a ranking problem such that relevant MeSH headings are ranked higher than those irrelevant ones. Specifically, for each document we retrieve 20 neighbor documents, obtain a list of MeSH main headings from neighbors, and rank the MeSH main headings using ListNet–a learning-to-rank algorithm. We trained our algorithm on 200 documents and tested on a previously used benchmark set of 200 documents and a larger dataset of 1000 documents.
Results
Tested on the benchmark dataset, our method achieved a precision of 0.390, recall of 0.712, and mean average precision (MAP) of 0.626. In comparison to the state of the art, we observe statistically significant improvements as large as 39\% in MAP (p-value {\textless}0.001). Similar significant improvements were also obtained on the larger document set.
Conclusion
Experimental results show that our approach makes the most accurate MeSH predictions to date, which suggests its great potential in making a practical impact on MeSH indexing. Furthermore, as discussed the proposed learning framework is robust and can be adapted to many other similar tasks beyond MeSH indexing in the biomedical domain. All data sets are available at: http://www.ncbi.nlm.nih.gov/CBBresearch/Lu/indexing.},
number = {5},
urldate = {2019-07-26},
journal = {Journal of the American Medical Informatics Association : JAMIA},
author = {Huang, Minlie and Névéol, Aurélie and Lu, Zhiyong},
year = {2011},
pmid = {21613640},
pmcid = {PMC3168302},
pages = {660--667}
}
@misc{noauthor_introduction_nodate,
type = {Technical {Documentation}},
title = {Introduction to {MeSH}},
copyright = {Public Domain},
url = {https://www.nlm.nih.gov/mesh/introduction.html},
language = {eng},
urldate = {2019-07-26}
}
@article{bodenreider_unified_2004,
title = {The {Unified} {Medical} {Language} {System} ({UMLS}): integrating biomedical terminology},
volume = {32},
issn = {0305-1048},
shorttitle = {The {Unified} {Medical} {Language} {System} ({UMLS})},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC308795/},
doi = {10.1093/nar/gkh061},
abstract = {The Unified Medical Language System (http://umlsks.nlm.nih.gov) is a repository of biomedical vocabularies developed by the US National Library of Medicine. The UMLS integrates over 2 million names for some 900 000 concepts from more than 60 families of biomedical vocabularies, as well as 12 million relations among these concepts. Vocabularies integrated in the UMLS Metathesaurus include the NCBI taxonomy, Gene Ontology, the Medical Subject Headings (MeSH), OMIM and the Digital Anatomist Symbolic Knowledge Base. UMLS concepts are not only inter-related, but may also be linked to external resources such as GenBank. In addition to data, the UMLS includes tools for customizing the Metathesaurus (MetamorphoSys), for generating lexical variants of concept names (lvg) and for extracting UMLS concepts from text (MetaMap). The UMLS knowledge sources are updated quarterly. All vocabularies are available at no fee for research purposes within an institution, but UMLS users are required to sign a license agreement. The UMLS knowledge sources are distributed on CD-ROM and by FTP.},
number = {Database issue},
urldate = {2019-07-26},
journal = {Nucleic Acids Research},
author = {Bodenreider, Olivier},
month = jan,
year = {2004},
pmid = {14681409},
pmcid = {PMC308795},
pages = {D267--D270}
}
@article{hamilton_representation_nodate,
title = {Representation {Learning} on {Graphs}: {Methods} and {Applications}},
abstract = {Machine learning on graphs is an important and ubiquitous task with applications ranging from drug design to friendship recommendation in social networks. The primary challenge in this domain is finding a way to represent, or encode, graph structure so that it can be easily exploited by machine learning models. Traditionally, machine learning approaches relied on user-defined heuristics to extract features encoding structural information about a graph (e.g., degree statistics or kernel functions). However, recent years have seen a surge in approaches that automatically learn to encode graph structure into low-dimensional embeddings, using techniques based on deep learning and nonlinear dimensionality reduction. Here we provide a conceptual review of key advancements in this area of representation learning on graphs, including matrix factorization-based methods, random-walk based algorithms, and graph convolutional networks. We review methods to embed individual nodes as well as approaches to embed entire (sub)graphs. In doing so, we develop a unified framework to describe these recent approaches, and we highlight a number of important applications and directions for future work.},
language = {en},
author = {Hamilton, William L and Ying, Rex and Leskovec, Jure},
pages = {23}
}
@incollection{dancik_properties_2013,
address = {Dordrecht},
title = {Properties of {Biological} {Networks}},
isbn = {978-94-007-6803-1},
url = {https://doi.org/10.1007/978-94-007-6803-1_5},
abstract = {Relationships in biological systems are frequently represented as networks with the goal of abstracting a system’s components to nodes and connections between them. While such representations allow modeling and analysis using abstract computational methods, there are certain aspects of such modeling that are particularly important for biological networks. We explore features that are deemed necessary for living and evolving organisms and reflect the evolutionary origins of biological networks. Biological networks are robust to random alterations of their nodes and connections yet may be vulnerable to attacks targeting essential genes. Biological systems are dynamic and modular, and so are their network representations. Comparisons of biological networks across species can reveal conserved and evolved regions and shed light on evolutionary events and processes. It is important to understand networks as a whole, as significant insights might emerge from the network approach that cannot be attributed to properties of the nodes alone. Network-based approaches have a potential to significantly increase our understanding of biological systems and consequently, our understanding and treatment of human diseases.},
language = {en},
urldate = {2019-07-26},
booktitle = {Systems {Biology}: {Integrative} {Biology} and {Simulation} {Tools}},
publisher = {Springer Netherlands},
author = {Dančík, Vlado and Basu, Amrita and Clemons, Paul},
editor = {Prokop, Aleš and Csukás, Béla},
year = {2013},
doi = {10.1007/978-94-007-6803-1_5},
keywords = {Biological networks, Emergent network properties, Network conservation, Network dynamics, Network medicine, Network modularity, Network robustness},
pages = {129--178}
}
@misc{lobo_auc:_2008,
title = {{AUC}: a misleading measure of the performance of predictive distribution models},
shorttitle = {{AUC}},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/j.1466-8238.2007.00358.x},
language = {en},
urldate = {2019-08-06},
journal = {Global Ecology and Biogeography},
author = {Lobo, Jorge M. and Jiménez‐Valverde, Alberto and Real, Raimundo},
month = mar,
year = {2008},
doi = {10.1111/j.1466-8238.2007.00358.x}
}
@article{nickel_review_2016,
title = {A {Review} of {Relational} {Machine} {Learning} for {Knowledge} {Graphs}},
volume = {104},
issn = {0018-9219, 1558-2256},
url = {http://arxiv.org/abs/1503.00759},
doi = {10.1109/JPROC.2015.2483592},
abstract = {Relational machine learning studies methods for the statistical analysis of relational, or graph-structured, data. In this paper, we provide a review of how such statistical models can be “trained” on large knowledge graphs, and then used to predict new facts about the world (which is equivalent to predicting new edges in the graph). In particular, we discuss two different kinds of statistical relational models, both of which can scale to massive datasets. The first is based on tensor factorization methods and related latent variable models. The second is based on mining observable patterns in the graph. We also show how to combine these latent and observable models to get improved modeling power at decreased computational cost. Finally, we discuss how such statistical models of graphs can be combined with text-based information extraction methods for automatically constructing knowledge graphs from the Web. In particular, we discuss Google’s Knowledge Vault project.},
language = {en},
number = {1},
urldate = {2019-08-23},
journal = {Proceedings of the IEEE},
author = {Nickel, Maximilian and Murphy, Kevin and Tresp, Volker and Gabrilovich, Evgeniy},
month = jan,
year = {2016},
note = {arXiv: 1503.00759},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
pages = {11--33}
}
@article{akiba_optuna:_2019,
title = {Optuna: {A} {Next}-generation {Hyperparameter} {Optimization} {Framework}},
shorttitle = {Optuna},
url = {http://arxiv.org/abs/1907.10902},
abstract = {The purpose of this study is to introduce new design-criteria for next-generation hyperparameter optimization software. The criteria we propose include (1) define-by-run API that allows users to construct the parameter search space dynamically, (2) efficient implementation of both searching and pruning strategies, and (3) easy-to-setup, versatile architecture that can be deployed for various purposes, ranging from scalable distributed computing to light-weight experiment conducted via interactive interface. In order to prove our point, we will introduce Optuna, an optimization software which is a culmination of our effort in the development of a next generation optimization software. As an optimization software designed with define-by-run principle, Optuna is particularly the first of its kind. We will present the design-techniques that became necessary in the development of the software that meets the above criteria, and demonstrate the power of our new design through experimental results and real world applications. Our software is available under the MIT license (https://github.com/pfnet/optuna/).},
urldate = {2019-08-23},
journal = {arXiv:1907.10902 [cs, stat]},
author = {Akiba, Takuya and Sano, Shotaro and Yanase, Toshihiko and Ohta, Takeru and Koyama, Masanori},
month = jul,
year = {2019},
note = {arXiv: 1907.10902},