-
Notifications
You must be signed in to change notification settings - Fork 90
/
Copy pathmlnn.py
73 lines (61 loc) · 2.58 KB
/
mlnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import numpy as np
from gate import MultiplyGate, AddGate
from output import Softmax
from layer import Tanh
class Model:
def __init__(self, layers_dim):
self.b = []
self.W = []
for i in range(len(layers_dim)-1):
self.W.append(np.random.randn(layers_dim[i], layers_dim[i+1]) / np.sqrt(layers_dim[i]))
self.b.append(np.random.randn(layers_dim[i+1]).reshape(1, layers_dim[i+1]))
def calculate_loss(self, X, y):
mulGate = MultiplyGate()
addGate = AddGate()
layer = Tanh()
softmaxOutput = Softmax()
input = X
for i in range(len(self.W)):
mul = mulGate.forward(self.W[i], input)
add = addGate.forward(mul, self.b[i])
input = layer.forward(add)
return softmaxOutput.loss(input, y)
def predict(self, X):
mulGate = MultiplyGate()
addGate = AddGate()
layer = Tanh()
softmaxOutput = Softmax()
input = X
for i in range(len(self.W)):
mul = mulGate.forward(self.W[i], input)
add = addGate.forward(mul, self.b[i])
input = layer.forward(add)
probs = softmaxOutput.predict(input)
return np.argmax(probs, axis=1)
def train(self, X, y, num_passes=20000, epsilon=0.01, reg_lambda=0.01, print_loss=False):
mulGate = MultiplyGate()
addGate = AddGate()
layer = Tanh()
softmaxOutput = Softmax()
for epoch in range(num_passes):
# Forward propagation
input = X
forward = [(None, None, input)]
for i in range(len(self.W)):
mul = mulGate.forward(self.W[i], input)
add = addGate.forward(mul, self.b[i])
input = layer.forward(add)
forward.append((mul, add, input))
# Back propagation
dtanh = softmaxOutput.diff(forward[len(forward)-1][2], y)
for i in range(len(forward)-1, 0, -1):
dadd = layer.backward(forward[i][1], dtanh)
db, dmul = addGate.backward(forward[i][0], self.b[i-1], dadd)
dW, dtanh = mulGate.backward(self.W[i-1], forward[i-1][2], dmul)
# Add regularization terms (b1 and b2 don't have regularization terms)
dW += reg_lambda * self.W[i-1]
# Gradient descent parameter update
self.b[i-1] += -epsilon * db
self.W[i-1] += -epsilon * dW
if print_loss and epoch % 1000 == 0:
print("Loss after iteration %i: %f" %(epoch, self.calculate_loss(X, y)))