forked from HIPS/autograd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
neural_net.py
84 lines (68 loc) · 3.11 KB
/
neural_net.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""A multi-layer perceptron for classification of MNIST handwritten digits."""
from __future__ import absolute_import, division
from __future__ import print_function
import autograd.numpy as np
import autograd.numpy.random as npr
from autograd.scipy.misc import logsumexp
from autograd import grad
from autograd.util import flatten
from autograd.optimizers import adam
from data import load_mnist
def init_random_params(scale, layer_sizes, rs=npr.RandomState(0)):
"""Build a list of (weights, biases) tuples,
one for each layer in the net."""
return [(scale * rs.randn(m, n), # weight matrix
scale * rs.randn(n)) # bias vector
for m, n in zip(layer_sizes[:-1], layer_sizes[1:])]
def neural_net_predict(params, inputs):
"""Implements a deep neural network for classification.
params is a list of (weights, bias) tuples.
inputs is an (N x D) matrix.
returns normalized class log-probabilities."""
for W, b in params:
outputs = np.dot(inputs, W) + b
inputs = np.tanh(outputs)
return outputs - logsumexp(outputs, axis=1, keepdims=True)
def l2_norm(params):
"""Computes l2 norm of params by flattening them into a vector."""
flattened, _ = flatten(params)
return np.dot(flattened, flattened)
def log_posterior(params, inputs, targets, L2_reg):
log_prior = -L2_reg * l2_norm(params)
log_lik = np.sum(neural_net_predict(params, inputs) * targets)
return log_prior + log_lik
def accuracy(params, inputs, targets):
target_class = np.argmax(targets, axis=1)
predicted_class = np.argmax(neural_net_predict(params, inputs), axis=1)
return np.mean(predicted_class == target_class)
if __name__ == '__main__':
# Model parameters
layer_sizes = [784, 200, 100, 10]
L2_reg = 1.0
# Training parameters
param_scale = 0.1
batch_size = 256
num_epochs = 5
step_size = 0.001
print("Loading training data...")
N, train_images, train_labels, test_images, test_labels = load_mnist()
init_params = init_random_params(param_scale, layer_sizes)
num_batches = int(np.ceil(len(train_images) / batch_size))
def batch_indices(iter):
idx = iter % num_batches
return slice(idx * batch_size, (idx+1) * batch_size)
# Define training objective
def objective(params, iter):
idx = batch_indices(iter)
return -log_posterior(params, train_images[idx], train_labels[idx], L2_reg)
# Get gradient of objective using autograd.
objective_grad = grad(objective)
print(" Epoch | Train accuracy | Test accuracy ")
def print_perf(params, iter, gradient):
if iter % num_batches == 0:
train_acc = accuracy(params, train_images, train_labels)
test_acc = accuracy(params, test_images, test_labels)
print("{:15}|{:20}|{:20}".format(iter//num_batches, train_acc, test_acc))
# The optimizers provided can optimize lists, tuples, or dicts of parameters.
optimized_params = adam(objective_grad, init_params, step_size=step_size,
num_iters=num_epochs * num_batches, callback=print_perf)