forked from teganmaharaj/zoneout
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinitialization.py
53 lines (39 loc) · 1.57 KB
/
initialization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy
import theano
from fractions import gcd
from blocks.initialization import NdarrayInitialization
class NormalizedInitialization(NdarrayInitialization):
"""Initialize parameters with Glorot method.
Notes
-----
For details see
Understanding the difficulty of training deep feedforward neural networks,
Glorot, Bengio, 2010
"""
def generate(self, rng, shape):
# In the case of diagonal matrix, we initialize the diagonal
# to zero. This may happen in LSTM for the weights from cell
# to gates.
if len(shape) == 1:
m = numpy.zeros(shape=shape)
else:
input_size, output_size = shape
high = numpy.sqrt(6) / numpy.sqrt(input_size + output_size)
m = rng.uniform(-high, high, size=shape)
return m.astype(theano.config.floatX)
class IdentityInitialization(NdarrayInitialization):
""" Initialize parameters with I * c."""
def __init__(self, c):
self.c = c
def generate(self, rng, shape):
return self.c * numpy.eye(*shape, dtype=theano.config.floatX)
class OrthogonalInitialization(NdarrayInitialization):
# Janos Kramar
def generate(self, rng, shape):
W = rng.normal(0.0, 1.0, shape)
factor = gcd(*W.shape)
assert factor in W.shape
for i in range(W.shape[0]/factor):
for j in range(W.shape[1]/factor):
W[factor*i:factor*(i+1),factor*j:factor*(j+1)], _, _ = numpy.linalg.svd(W[factor*i:factor*(i+1),factor*j:factor*(j+1)])
return W.astype(theano.config.floatX)