-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
82 lines (63 loc) · 2.49 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import numpy as np
import json
"""
Do not modify this file.
"""
# Softmax loss and Softmax gradient
### Loss functions ###
class softmax_cross_entropy:
def __init__(self):
self.expand_Y = None
self.calib_logit = None
self.sum_exp_calib_logit = None
self.prob = None
def forward(self, X, Y):
self.expand_Y = np.zeros(X.shape).reshape(-1)
self.expand_Y[Y.astype(int).reshape(-1) + np.arange(X.shape[0]) * X.shape[1]] = 1.0
self.expand_Y = self.expand_Y.reshape(X.shape)
self.calib_logit = X - np.amax(X, axis = 1, keepdims = True)
self.sum_exp_calib_logit = np.sum(np.exp(self.calib_logit), axis = 1, keepdims = True)
self.prob = np.exp(self.calib_logit) / self.sum_exp_calib_logit
forward_output = - np.sum(np.multiply(self.expand_Y, self.calib_logit - np.log(self.sum_exp_calib_logit))) / X.shape[0]
return forward_output
def backward(self, X, Y):
backward_output = - (self.expand_Y - self.prob) / X.shape[0]
return backward_output
### Momentum ###
def add_momentum(model):
momentum = dict()
for module_name, module in model.items():
if hasattr(module, 'params'):
for key, _ in module.params.items():
momentum[module_name + '_' + key] = np.zeros(module.gradient[key].shape)
return momentum
def data_loader_mnist(dataset):
# This function reads the MNIST data and separate it into train, val, and test set
with open(dataset, 'r') as f:
data_set = json.load(f)
train_set, valid_set, test_set = data_set['train'], data_set['valid'], data_set['test']
Xtrain = np.array(train_set[0])
Ytrain = np.array(train_set[1])
Xvalid = np.array(valid_set[0])
Yvalid = np.array(valid_set[1])
Xtest = np.array(test_set[0])
Ytest = np.array(test_set[1])
return Xtrain, Ytrain, Xvalid, Yvalid, Xtest, Ytest
def predict_label(f):
# This is a function to determine the predicted label given scores
if f.shape[1] == 1:
return (f > 0).astype(float)
else:
return np.argmax(f, axis=1).astype(float).reshape((f.shape[0], -1))
class DataSplit:
def __init__(self, X, Y):
self.X = X
self.Y = Y
self.N, self.d = self.X.shape
def get_example(self, idx):
batchX = np.zeros((len(idx), self.d))
batchY = np.zeros((len(idx), 1))
for i in range(len(idx)):
batchX[i] = self.X[idx[i]]
batchY[i, :] = self.Y[idx[i]]
return batchX, batchY