-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
329 lines (269 loc) · 13 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
import os
import numpy as np
import time
import copy
import torch
import torch.nn.functional as F
from torch.utils.data import Subset
from torch_geometric.profile import count_parameters
from torch.optim import lr_scheduler
import pandas as pd
import argparse
from LCNAData import LCNAData
from torch_geometric.loader import DataLoader
from model import GNN
from loss import topk_loss, unit_loss, consist_loss
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
def main():
parser = argparse.ArgumentParser(description='Path to the project directory.')
parser.add_argument('project_dir', type=str, help='Path to the project repository.',
default='/home/Arash-Sal-Moslehian/Playground/EPFL/epfl-ml4science/')
args = parser.parse_args()
current_dir = args.project_dir
# Change this accordingly
data_path = os.path.join(current_dir, 'data', 'gnn_data', 'preproc')
model_path = os.path.join(current_dir, 'gnn', 'model')
print(model_path)
torch.manual_seed(7)
np.random.seed(7)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Model Parameters
ratio = 0.5
n_roi = 68
indim = n_roi
# Adam Optimizer Parameters
lr = 0.001
weightdecay = 0.2
# Learning Rate Parameters
stepsize = 10
gamma = 0.5
# Setting up
num_epoch = 50
# Read up how batching is performed on graphs https://pytorch-geometric.readthedocs.io/en/latest/advanced/batching.html
batch_size = 32
kfold = 5
lamb_ce = 1
lamb0 = 0.1
lamb1 = 0.1
lamb2 = 0.4
n_class = 2
if not os.path.exists(model_path):
os.makedirs(model_path)
# Load dataset
dataset = LCNAData(data_path)
data_labels = pd.read_csv(os.path.join(data_path, 'data_labels.csv'))
test_to_valid_ratio = 0.5
# Since the labels for subjects are not distributed equally, we will create the fold manually.abs
# In each fold, we make sure to have atleast two subjects that have both labels.
# We will add whatever is left from subjects with one label to the training set in eahc fold.
subjects_with_both_labels = [1619, 1623, 1633, 1634, 1635, 1646, 1674, 1675, 2108, 2123]
subjects_with_one_label = [1620, 1644, 1663, 1669, 1670, 2073, 2081, 2084, 2109, 2110, 2129, 2130, 2174]
all_subjects = subjects_with_both_labels + subjects_with_one_label
subject_folds = []
for _ in range(kfold):
two_sub_with_both_labels = np.random.choice(subjects_with_both_labels, 2, replace=False)
subjects_with_both_labels = list(set(subjects_with_both_labels) - set(two_sub_with_both_labels))
two_sub_with_one_label = np.random.choice(subjects_with_one_label, 2, replace=False)
subjects_with_one_label = list(set(subjects_with_one_label) - set(two_sub_with_one_label))
fold_subjects = np.concatenate([two_sub_with_both_labels, two_sub_with_one_label])
np.random.shuffle(fold_subjects)
subject_folds.append(fold_subjects)
for fold in range(kfold):
subjects_in_test_valid = subject_folds[fold]
subjects_in_train = list(set(all_subjects)-set(subjects_in_test_valid))
# Get the indices for all the graphs for these subjects
test_valid_indices = data_labels[data_labels['subject_id'].isin(subjects_in_test_valid)].index.tolist()
train_indices = data_labels[data_labels['subject_id'].isin(subjects_in_train)].index.tolist()
test_indices = np.random.choice(test_valid_indices, int(len(test_valid_indices)*test_to_valid_ratio), replace=False)
valid_indices = list(set(test_valid_indices)-set(test_indices))
# Use random_split to create train, validation, and test sets
train_set, val_set, test_set = Subset(dataset, indices=train_indices), Subset(dataset, indices=valid_indices), Subset(dataset, indices=test_indices),
# Create DataLoader for each set
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
# Initialize the GNN model and print its structure
model = GNN(indim, ratio).to(device)
print(model)
print('Total parameters: ', count_parameters(model))
# Set up Adam optimizer and learning rate scheduler
optimizer = torch.optim.Adam(
model.parameters(), lr=lr, weight_decay=weightdecay)
scheduler = lr_scheduler.StepLR(optimizer, step_size=stepsize, gamma=gamma)
# Record losses
metric_recorder = {
'epoch': [],
'train_loss': [],
'valid_loss': [],
'train_accuracy': [],
'valid_accuracy': [],
}
cross_val_recorder = {
'test_accuracy': [],
'test_loss': []
}
def train(epoch):
"""
Train the model for one epoch.
Parameters:
- epoch (int): Current epoch number.
Returns:
- float: Average loss over the training dataset for the epoch.
- np.ndarray: Concatenated array of s1 values.
- np.ndarray: Concatenated array of s2 values.
- torch.Tensor: Model's w1 parameters.
- torch.Tensor: Model's w2 parameters.
"""
for param_group in optimizer.param_groups:
print("LR: ", param_group['lr'])
# Set the model to training mode
model.train()
# Lists to store s1 and s2 values
scores_dict = {
's1': [],
'p1': [],
}
# Variables to store the total loss and step count
total_loss = 0
step = 0
# Iterate over the training data loader
for data in train_loader:
data = data.to(device)
optimizer.zero_grad()
# Forward pass through the model
output, w1, s1, p1 = model(
data.x, data.edge_index, data.batch, data.edge_attr)
# Append s1 and s2 values to the lists
# s1, p1 have shape [|batch|, (|N|*ratio)], we add blocks (batches) of them
# to a list and later on vstack them to get an array of shape [total_n_graphs, (|N|*ratio)]
scores_dict['s1'].append(s1.detach().cpu().numpy())
scores_dict['p1'].append(p1.detach().cpu().numpy())
# Calculate the loss components
loss_c = F.nll_loss(output, data.y)
loss_p1 = unit_loss(w1)
loss_tpk1 = topk_loss(s1, ratio)
loss_consist = 0
for c in range(n_class):
loss_consist += consist_loss(s1[data.y == c], device)
# loss = classification loss + unit loss + topkpooling loss + Group-level consistency loss
loss = (lamb_ce * loss_c) + (lamb0 * loss_p1) + (lamb1 * loss_tpk1) + (lamb2 * loss_consist)
step = step + 1
# Backward pass and optimization step
loss.backward()
total_loss += loss.item() * data.num_graphs
optimizer.step()
# Change LR
scheduler.step()
# vstack s1, s2, p1, p2 to get the scores and indices for all the graphs.
# We get an array of shape [total_n_graphs, (|N|*ratio)] for each.
scores_dict['s1'] = np.vstack(scores_dict['s1'])
scores_dict['p1'] = np.vstack(scores_dict['p1'])
# Return average loss and other logged values
return total_loss / len(train_set), scores_dict, w1
def evaluate_accuracy(loader):
"""
Evaluate the model accuracy on a given data loader.
Parameters:
- loader: DataLoader for evaluation.
Returns:
- float: Accuracy on the evaluation dataset.
"""
# Set the model to evaluation mode
model.eval()
# Variable to store the correct predictions count
correct = 0
# Iterate over the evaluation data loader
for data in loader:
data = data.to(device)
# Forward pass through the model
outputs = model(data.x, data.edge_index, data.batch,
data.edge_attr)
# Get predicted labels
pred = outputs[0].max(dim=1)[1]
# Update correct predictions count
correct += pred.eq(data.y).sum().item()
# Compute and return accuracy
return correct / len(loader.dataset)
def evaluate_loss(loader, epoch):
"""
Evaluate the model loss on a given data loader.
Parameters:
- loader: DataLoader for evaluation.
- epoch (int): Current epoch number.
Returns:
- float: Average loss on the evaluation dataset.
"""
# Set the model to evaluation mode
model.eval()
# Variable to store the total loss
total_loss = 0
# Iterate over the evaluation data loader
for data in loader:
data = data.to(device)
# Forward pass through the model
output, w1, s1, p1 = model(data.x, data.edge_index, data.batch, data.edge_attr)
# Calculate the loss components
loss_c = F.nll_loss(output, data.y)
loss_p1 = unit_loss(w1)
loss_tpk1 = topk_loss(s1, ratio)
loss_consist = 0
for c in range(n_class):
loss_consist += consist_loss(s1[data.y == c], device)
# Combine the loss components with specified weights
loss = (lamb_ce * loss_c) + (lamb0 * loss_p1) + (lamb1 * loss_tpk1) + (lamb2 * loss_consist)
# Update total loss
total_loss += loss.item() * data.num_graphs
# Return average loss
return total_loss / len(loader.dataset)
# Initialize variables for tracking the best model weights and loss
best_model_weights = copy.deepcopy(model.state_dict())
best_model_scores = {}
best_loss = np.inf
# Iterate through training epochs
for epoch in range(0, num_epoch):
# Record the start time of the epoch
since = time.time()
# Train the model and retrieve training metrics
training_loss, scores_dict, w1 = train(epoch)
# Evaluate training and validation accuracy
training_accuracy = evaluate_accuracy(train_loader)
validation_accuracy = evaluate_accuracy(val_loader)
# Evaluate validation loss
validation_loss = evaluate_loss(val_loader, epoch)
# Calculate the time elapsed for the current epoch
time_elapsed = time.time() - since
# Print epoch summary
print('---')
print(f'{time_elapsed // 60}m {time_elapsed % 60}s')
print(f'Epoch: {epoch}, Train Loss: {training_loss}, Train Acc: {training_accuracy}, Valid Loss: {validation_loss}, Test Valid: {validation_accuracy}')
print('---')
# Log metrics
metric_recorder['epoch'].append(epoch)
metric_recorder['train_loss'].append(training_loss)
metric_recorder['valid_loss'].append(validation_loss)
metric_recorder['train_accuracy'].append(training_accuracy)
metric_recorder['valid_accuracy'].append(validation_accuracy)
# Save the best model along with the scores if the validation loss improves
if validation_loss < best_loss and epoch > 2:
print("New best model.")
best_loss = validation_loss
best_model_weights = copy.deepcopy(model.state_dict())
best_model_scores = copy.deepcopy(scores_dict)
# Save the model, scores, and metric on disk.
torch.save(best_model_weights, os.path.join(model_path, f'best-model-{str(fold)}.pth'))
np.savez_compressed(os.path.join(model_path, f'scores-{str(fold)}.npz'), **best_model_scores)
pd.DataFrame(metric_recorder).to_pickle(os.path.join(model_path, f'metrics-{str(fold)}.pkl'))
# Use the best model weights obtained during training
model.load_state_dict(best_model_weights)
model.eval()
# Evaluate the model on the testing set
test_accuracy = evaluate_accuracy(test_loader)
test_loss = evaluate_loss(test_loader, 0)
cross_val_recorder['test_accuracy'].append(test_accuracy)
cross_val_recorder['test_loss'].append(test_loss)
print("---")
print(f"Test Acc: {test_accuracy}, Test Loss: {test_loss}")
print("---")
print(f"Cross-validation: Test Acc: {np.mean(cross_val_recorder['test_accuracy'])}, Test Loss: {np.mean(cross_val_recorder['test_loss'])}")
if __name__ == "__main__":
main()