forked from AKSW/natuke
-
Notifications
You must be signed in to change notification settings - Fork 0
/
knn_dynamic_benchmark.py
92 lines (80 loc) · 5.24 KB
/
knn_dynamic_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import pickle5 as pickle
import time
from ge import DeepWalk
from ge import Node2Vec
from natuke_utils import metapath2vec
from natuke_utils import disturbed_hin
from natuke_utils import regularization
from natuke_utils import restore_hin
from natuke_utils import embedding_graph
from natuke_utils import true_restore
path = 'path-to-data-repository'
file_name = 'knn_results'
def execution(G, algorithm, split, iteration, edge_group, percentual_to_time):
G_disturbed, train, test, hidden = disturbed_hin(G, split=split, random_state=(1 + iteration), edge_group=edge_group)
G_found, hidden, train, test = true_restore(G_disturbed, hidden, train, test, percentual=0.0, edge_group=edge_group)
if algorithm == 'deep_walk':
for key, value in percentual_to_time.items():
print(f'Evaluation for {algorithm},{split},{iteration},{edge_group},{key}')
start_time = time.time()
model_deep_walk = DeepWalk(G_found,walk_length=10,num_walks=80,workers=1)
model_deep_walk.train(window_size=5,iter=3,embed_size=512)
embeddings_deep_walk = model_deep_walk.get_embeddings()
G_found = embedding_graph(G_found, embeddings_deep_walk)
restored_df = restore_hin(G_found, test)
with open("{}results/execution_time.txt".format(path), 'a') as f:
f.write(f'{algorithm},{split},{iteration},{edge_group},{key},{(time.time() - start_time)}\n')
restored_df.to_csv("{}results/{}_{}_{}_{}_{}_{}.csv".format(path, file_name, algorithm, split, edge_group, iteration, key), index=False)
G_found, hidden, train, test = true_restore(G_found, hidden, train, test, percentual=value, edge_group=edge_group)
elif algorithm == 'node2vec':
for key, value in percentual_to_time.items():
print(f'Evaluation for {algorithm},{split},{iteration},{edge_group},{key}')
start_time = time.time()
model_node2vec = Node2Vec(G_found, walk_length = 10, num_walks = 80, p = 0.5, q = 1, workers = 1)
model_node2vec.train(window_size=5,iter=3,embed_size=512)
embeddings_node2vec = model_node2vec.get_embeddings()
G_found = embedding_graph(G_found, embeddings_node2vec)
restored_df = restore_hin(G_found, test)
with open("{}results/execution_time.txt".format(path), 'a') as f:
f.write(f'{algorithm},{split},{iteration},{edge_group},{key},{(time.time() - start_time)}\n')
restored_df.to_csv("{}results/{}_{}_{}_{}_{}_{}.csv".format(path, file_name, algorithm, split, edge_group, iteration, key), index=False)
G_found, hidden, train, test = true_restore(G_found, hidden, train, test, percentual=value, edge_group=edge_group)
elif algorithm == 'metapath2vec':
for key, value in percentual_to_time.items():
print(f'Evaluation for {algorithm},{split},{iteration},{edge_group},{key}')
start_time = time.time()
embeddings_metapath2vec = metapath2vec(G_found, dimensions=512)
G_found = embedding_graph(G_found, embeddings_metapath2vec)
restored_df = restore_hin(G_found, test)
with open("{}results/execution_time.txt".format(path), 'a') as f:
f.write(f'{algorithm},{split},{iteration},{edge_group},{key},{(time.time() - start_time)}\n')
restored_df.to_csv("{}results/{}_{}_{}_{}_{}_{}.csv".format(path, file_name, algorithm, split, edge_group, iteration, key), index=False)
G_found, hidden, train, test = true_restore(G_found, hidden, train, test, percentual=value, edge_group=edge_group)
elif algorithm == 'regularization':
iterations = 30
for key, value in percentual_to_time.items():
print(f'Evaluation for {algorithm},{split},{iteration},{edge_group},{key}')
start_time = time.time()
G_found = regularization(G_found, iterations=iterations, mi=0.85)
restored_df = restore_hin(G_found, test)
with open("{}results/execution_time.txt".format(path), 'a') as f:
f.write(f'{algorithm},{split},{iteration},{edge_group},{key},{(time.time() - start_time)}\n')
restored_df.to_csv("{}results/{}_{}_{}_{}_{}_{}.csv".format(path, file_name, algorithm, split, edge_group, iteration, key), index=False)
iterations = 20
G_found, hidden, train, test = true_restore(G_found, hidden, train, test, percentual=value, edge_group=edge_group)
if __name__ == '__main__':
network_name = "hin03-05"
splits = [0.8]
#edge_groups = ['doi_name', 'doi_bioActivity', 'doi_collectionSpecie', 'doi_collectionSite', 'doi_collectionType']
edge_groups = ['doi_collectionType']
#algorithms = ['deep_walk', 'node2vec', 'metapath2vec', 'regularization']
algorithms = ['deep_walk', 'node2vec', 'metapath2vec', 'regularization']
percentual_to_time = {'1st': 0.3, '2nd': 0.32, '3rd': 0.5, '4th': 0.0}
with open("{}{}.gpickle".format(path, network_name), "rb") as fh:
G = pickle.load(fh)
# regularization
for split in splits:
for iteration in range(10):
for edge_group in edge_groups:
for algorithm in algorithms:
execution(G, algorithm, split, iteration, edge_group, percentual_to_time)