-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathoptimal_learning_rates.py
105 lines (87 loc) · 3.41 KB
/
optimal_learning_rates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.optimizers import SGD, Adam
import matplotlib.pyplot as plt
from keras_lr_finder import LRFinder
# Model configuration
batch_size = 250
img_width, img_height, img_num_channels = 28, 28, 1
loss_function = sparse_categorical_crossentropy
no_classes = 10
no_epochs = 5
start_lr = 0.0001
end_lr = 1
moving_average = 20
# Load MNIST data
(input_train, target_train), (input_test, target_test) = mnist.load_data()
# Determine shape of the data
input_shape = (img_width, img_height, img_num_channels)
# Cast numbers to float32 format and reshape data
input_train = input_train.astype('float32').reshape(input_train.shape[0], img_width, img_height, img_num_channels)
input_test = input_test.astype('float32').reshape(input_test.shape[0], img_width, img_height, img_num_channels)
# Normalize data
input_train = input_train / 255
input_test = input_test / 255
# Create the model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(no_classes, activation='softmax'))
# Determine tests you want to perform
tests = [
(SGD(), 'SGD optimizer'),
(Adam(), 'Adam optimizer'),
]
# Set containers for tests
test_learning_rates = []
test_losses = []
test_loss_changes = []
labels = []
# Perform each test
for test_optimizer, label in tests:
# Compile the model
model.compile(loss=loss_function,
optimizer=test_optimizer,
metrics=['accuracy'])
# Instantiate the Learning Rate Range Test / LR Finder
lr_finder = LRFinder(model)
# Perform the Learning Rate Range Test
outputs = lr_finder.find(input_train, target_train, start_lr=start_lr, end_lr=end_lr, batch_size=batch_size, epochs=no_epochs)
# Get values
learning_rates = lr_finder.lrs
losses = lr_finder.losses
loss_changes = []
# Compute smoothed loss changes
# Inspired by Keras LR Finder: https://github.com/surmenok/keras_lr_finder/blob/master/keras_lr_finder/lr_finder.py
for i in range(moving_average, len(learning_rates)):
loss_changes.append((losses[i] - losses[i - moving_average]) / moving_average)
# Append values to container
test_learning_rates.append(learning_rates)
test_losses.append(losses)
test_loss_changes.append(loss_changes)
labels.append(label)
# Generate plot for Loss Deltas
for i in range(0, len(test_learning_rates)):
plt.plot(test_learning_rates[i][moving_average:], test_loss_changes[i], label=labels[i])
plt.xscale('log')
plt.legend(loc='upper left')
plt.ylabel('loss delta')
plt.xlabel('learning rate (log scale)')
plt.title('Results for Learning Rate Range Test / Loss Deltas for Learning Rate')
plt.show()
# Generate plot for Loss Values
for i in range(0, len(test_learning_rates)):
plt.plot(test_learning_rates[i], test_losses[i], label=labels[i])
plt.xscale('log')
plt.legend(loc='upper left')
plt.ylabel('loss')
plt.xlabel('learning rate (log scale)')
plt.title('Results for Learning Rate Range Test / Loss Values for Learning Rate')
plt.show()