-
Notifications
You must be signed in to change notification settings - Fork 2
/
experiment.py
executable file
·188 lines (148 loc) · 7.53 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
from trainer import Trainer, config
import time
import random
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import ParameterSampler
from dataset.image_dataset import get_train_val_test_loaders as get_image_loaders
from model.image_cnn import CNN
import os
from tqdm import tqdm
import torch.nn as nn
import collections
class Experiment(object):
def __init__(self, optimizer, device, config_str, model_type, model_name, param_grid, save_best_num, savename, budget=1, repeat=1, name='tmp', save_every = None, eval_train = True, metadata = "", pretrain_file = ""):
self.eval_train = eval_train
self.model_name = model_name
self.model_type = model_type
self.budget = budget
self.repeat = repeat # number of restarts with different random seeds
self.param_grid = param_grid
self.param_sampler = ParameterSampler(param_grid, n_iter=self.budget, random_state=0)
self.config_str = config_str
self.device = device
self.save_every = save_every
self.optimizer = optimizer
self.save_best_num = save_best_num
self.savename = savename
self.metadata = metadata
self.pretrain_file = pretrain_file
self.va_loader = []
self.tr_loader = []
self.te_loader = []
self.bias_te_loader = []
self.loader_names = config(config_str + ".loader_names")
self.save_best_num = save_best_num
def run(self):
if (os.path.exists('{}/log/df_search.csv'.format(self.savename))):
df_search = pd.read_csv('{}/log/df_search.csv'.format(self.savename))
else:
df_search = pd.DataFrame(columns=['best_score', 'best_iter', 'seed', 'savename'] + list(self.param_grid.keys()))
iterator = 0
for run, params in tqdm(enumerate(self.param_sampler), desc='params' + str(iterator), leave = False):
print(self.config_str, '\t', 'Run:', run, '/', self.budget)
for i in range(self.repeat):
try:
seed = config(self.config_str + ".seed")
except:
seed = i
if not os.path.exists('{}/seed_{}/'.format(self.savename, seed)):
os.makedirs('{}/seed_{}/'.format(self.savename, seed))
params_ordered = collections.OrderedDict()
for k in sorted (params.keys()):
params_ordered[k] = params[k]
savename = '{}/seed_{}/{}_checkpoint.pth.tar'.format(self.savename, seed, params_ordered)
print("SAVENAME:", savename)
# if (not sum(df_search['savename']== savename)):
if (not sum(df_search['savename']== savename)):
results = self._run_trial(seed, params)
if (self.save_best_num > 1):
for result in results:
df_search = df_search.append(result, ignore_index=True)
else:
df_search = df_search.append(results, ignore_index = True)
else:
df_search = df_search.drop_duplicates(subset = ["savename"], keep = "first")
print("Already ran this, moving onto next")
df_search.to_csv('{}/log/df_search.csv'.format(self.savename[0:self.savename.rfind("seed")]), index=False)
iterator += 1
return df_search
def _run_trial(self, seed, params_unordered):
print("Running trial:", seed)
params = collections.OrderedDict()
for i in sorted (params_unordered.keys()):
params[i] = params_unordered[i]
savename = '{}/seed_{}/{}_checkpoint.pth.tar'.format(self.savename, seed, params)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
print("Getting model")
self.model, criterion, optimizer = self._get_model(params)
self.model = nn.DataParallel(self.model)
print("Getting data")
self._get_data_loaders(seed, params)
print("Initializing trainer")
self.trainer = Trainer(seed,self.model, criterion, optimizer, params, self.loaders, self.loader_names, self.device, savename, self.config_str, save_best_num = self.save_best_num, save_every = self.save_every, eval_train = self.eval_train)
print("Training")
self.trainer.fit()
if (self.save_best_num > 1):
all_scores = [ {
'best_score': self.trainer.best_checkpoint_to_score[checkpoint], 'best_iter': 0,
'savename': checkpoint, 'seed': seed,
**params,
} for checkpoint in self.trainer.best_checkpoint_to_score]
else:
best_score = self.trainer.best_score
_best_iter = self.trainer.best_iter
all_scores = self.trainer.scores
all_losses = self.trainer.losses
best_lower = self.trainer.best_lower
best_upper = self.trainer.best_upper
best_test = self.trainer.best_test
best_loss = self.trainer.best_loss
all_data = collections.OrderedDict()
for loader_name in self.loader_names:
all_data[loader_name + "_score"] = best_test[loader_name]
all_data[loader_name + "_loss"] = best_loss[loader_name]
if (loader_name != "train"):
all_data[loader_name + "_best_lower"] = best_lower[loader_name]
all_data[loader_name + "_best_upper"] = best_upper[loader_name]
all_scores = {
'best_score': best_score, 'best_iter': _best_iter,
'savename': savename, 'seed': seed,
**params,
**all_data,
}
return all_scores
def _get_data_loaders(self, seed_no, params):
self.loaders, _ = get_image_loaders(seed_no, self.config_str,params["batch_size"], params["augmentation"], config(self.config_str + '.num_classes'), data = self.metadata)
return self.loaders
def _get_model(self, params):
model = CNN(self.model_type, self.model_name, config(self.config_str + ".pretrain"), self.device, params, pretrain_file = self.pretrain_file).model.to(self.device)
try:
tune_classifier = config(self.config_str + ".tune_classifier")
except:
tune_classifier = False
if (tune_classifier):
print("Updating classifier")
for param in model.features.parameters():
param.requires_grad = False
for param in model.classifier.parameters():
param.requires_grad = True
parameters = model.classifier.parameters()
else:
print("All parameters are being updated")
parameters = model.parameters()
if (self.optimizer == "sgd"):
print("Optimizer: SGD")
optimizer = torch.optim.SGD(parameters,
lr=params["lr"], momentum=params["momentum"])
else:
print("Optimizer: ADAM")
optimizer = torch.optim.Adam(model.parameters(), lr = params["lr"])
criterion = torch.nn.BCEWithLogitsLoss()
return model, criterion, optimizer