forked from ridhipatil/RL_complex_detection
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy patheval_complex_RL.py
121 lines (104 loc) · 5.21 KB
/
eval_complex_RL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from argparse import ArgumentParser as argparse_ArgumentParser, ArgumentParser
from pickle import load as pickle_load
from yaml import load as yaml_load, dump as yaml_dump, Loader as yaml_Loader
from eval_cmplx_sc import eval_complex, remove_unknown_prots
from main6_eval import run_metrics
import os
def main():
# Evaluating
parser: ArgumentParser = argparse_ArgumentParser("Input parameters")
parser.add_argument("--input_file_name", default="", help="Input parameters file name")
parser.add_argument("--input_training_file", default="", help="Training Graph file path")
parser.add_argument("--input_testing_file", default="", help="Testing Graph file path")
parser.add_argument("--out_dir_name", default="", help="Output directory name")
parser.add_argument("--evaluate_additional_metrics", default=1, help="complexes file name")
parser.add_argument("--id_name_path", default="", help="Path for id to gene name file")
args = parser.parse_args()
print(args.input_file_name)
with open(args.input_file_name, 'r') as f:
inputs = yaml_load(f, yaml_Loader)
file = ''
if inputs['dir_nm'] == 'toy_network':
file = args.out_dir_name + '/qi_results'
out_comp_nm = file + '/res'
else:
if inputs['overlap_method'] == 'qi':
file = args.out_dir_name + '/qi_results'
out_comp_nm = file + '/res' # inputs['out_comp_nm']
elif inputs['overlap_method'] == '1': # jaccard coeff
file = args.out_dir_name + '/jacc_results'
out_comp_nm = file + '/res' # inputs['out_comp_nm']
with open(out_comp_nm + "_input_eval_train.yaml", 'w') as outfile:
yaml_dump(inputs, outfile, default_flow_style=False)
filename = out_comp_nm + "_pred_complexes_pp.pkl"
with open(filename, 'rb') as f:
fin_list_graphs_orig = pickle_load(f)
## training set
with open(out_comp_nm + '_metrics.out', "a") as fid:
print("\n --- On training set ---", file=fid)
file = args.input_training_file
with open(file, 'r') as f:
training = f.read().splitlines()
for c in range(len(training)):
training[c] = training[c].split()
known_complex_nodes_list = training
prot_list = [n for sublist in known_complex_nodes_list for n in sublist]
prot_list = set(prot_list)
# Remove all proteins in Predicted complexes that are not present in known complex protein list
fin_list_graphs = remove_unknown_prots(fin_list_graphs_orig, prot_list)
suffix = ''
eval_complex(0, 0, inputs, known_complex_nodes_list, prot_list, fin_list_graphs, out_comp_nm, suffix="_train", id_name_map = args.id_name_path)
if args.evaluate_additional_metrics:
try:
run_metrics(known_complex_nodes_list, fin_list_graphs, out_comp_nm, "_train")
except:
print("Error in running additional metrics for train")
## testing set
with open(out_comp_nm + '_metrics.out', "a") as fid:
print("\n --- On testing set ---", file=fid)
file = args.input_testing_file
with open(file, 'r') as f:
testing = f.read().splitlines()
for c in range(len(testing)):
testing[c] = testing[c].split() #pickle_load(f)
known_complex_nodes_list = testing
prot_list = [n for sublist in known_complex_nodes_list for n in sublist]
prot_list = set(prot_list)
# Remove all proteins in Predicted complexes that are not present in known complex protein list
fin_list_graphs = remove_unknown_prots(fin_list_graphs_orig, prot_list)
suffix = ''
eval_complex(0, 0, inputs, known_complex_nodes_list, prot_list, fin_list_graphs, out_comp_nm,suffix="_train", id_name_map = args.id_name_path)
if args.evaluate_additional_metrics:
try:
run_metrics(known_complex_nodes_list, fin_list_graphs, out_comp_nm, "_test")
except:
print("Error in running additional metrics for test")
## both (training and testing) sets
with open(out_comp_nm + '_metrics.out', "a") as fid:
print("\n --- On both sets ---", file=fid)
file_test = args.input_testing_file
with open(file_test, 'r') as f:
testing = f.read().splitlines()
for c in range(len(testing)):
testing[c] = testing[c].split()
file_train = args.input_training_file
with open(file_train, 'r') as f:
training = f.read().splitlines()
for c in range(len(training)):
training[c] = training[c].split()
known_complex_nodes_list = testing + training
N_test_comp = len(known_complex_nodes_list)
prot_list = [n for sublist in known_complex_nodes_list for n in sublist]
prot_list = set(prot_list)
# Remove all proteins in Predicted complexes that are not present in known complex protein list
fin_list_graphs = remove_unknown_prots(fin_list_graphs_orig, prot_list)
N_pred_comp = len(fin_list_graphs)
suffix = ''
eval_complex(0, 0, inputs, known_complex_nodes_list, prot_list, fin_list_graphs, out_comp_nm,suffix="_train", id_name_map = args.id_name_path)
if args.evaluate_additional_metrics:
try:
run_metrics(known_complex_nodes_list, fin_list_graphs, out_comp_nm, "_both")
except:
print("Error in running additional metrics for both")
if __name__ == '__main__':
main()