-
Notifications
You must be signed in to change notification settings - Fork 17
/
RecGRU.lua
168 lines (141 loc) · 5.24 KB
/
RecGRU.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
local _ = require 'moses'
local RecGRU, parent = torch.class('nn.RecGRU', 'nn.AbstractRecurrent')
function RecGRU:__init(inputsize, outputsize)
local stepmodule = nn.StepGRU(inputsize, outputsize)
parent.__init(self, stepmodule)
self.inputsize = inputsize
self.outputsize = outputsize
self.zeroOutput = torch.Tensor()
end
function RecGRU:maskZero(v1)
assert(torch.isTypeOf(self.modules[1], 'nn.StepGRU'))
for i,stepmodule in pairs(self.sharedClones) do
stepmodule:maskZero(v1)
end
self.modules[1]:maskZero(v1)
return self
end
------------------------- forward backward -----------------------------
function RecGRU:_updateOutput(input)
assert(input:dim() == 2, "RecGRU expecting batchsize x inputsize tensor (Only supports batchmode)")
local prevOutput = self:getHiddenState(self.step-1, input)
-- output(t) = gru{input(t), output(t-1)}
local output
if self.train ~= false then
local stepmodule = self:getStepModule(self.step)
output = stepmodule:updateOutput({input, prevOutput})
else
self._prevOutput = self._prevOutput or prevOutput.new()
self._prevOutput:resizeAs(prevOutput):copy(prevOutput)
output = self.modules[1]:updateOutput({input, self._prevOutput})
end
return output
end
function RecGRU:_updateGradInput(input, gradOutput)
assert(self.step > 1, "expecting at least one updateOutput")
local step = self.updateGradInputStep - 1
assert(step >= 1)
-- set the output/gradOutput states of current Module
local stepmodule = self:getStepModule(step)
-- backward propagate through this step
local _gradOutput = assert(self:getGradHiddenState(step, input))
self._gradOutputs[step] = self._gradOutputs[step] or _gradOutput.new()
self._gradOutputs[step]:resizeAs(_gradOutput)
self._gradOutputs[step]:add(_gradOutput, gradOutput)
gradOutput = self._gradOutputs[step]
local inputTable = {input, self:getHiddenState(step-1)}
local gradInputTable = stepmodule:updateGradInput(inputTable, gradOutput)
self:setGradHiddenState(step-1, gradInputTable[2])
return gradInputTable[1]
end
function RecGRU:_accGradParameters(input, gradOutput, scale)
local step = self.accGradParametersStep - 1
assert(step >= 1)
-- set the output/gradOutput states of current Module
local stepmodule = self:getStepModule(step)
-- backward propagate through this step
local inputTable = {input, self:getHiddenState(step-1)}
local gradOutput = self._gradOutputs[step] or self:getGradHiddenState(step)
stepmodule:accGradParameters(inputTable, gradOutput, scale)
end
function RecGRU:clearState()
self.startState = nil
self.zeroOutput:set()
return parent.clearState(self)
end
function RecGRU:type(type, ...)
if type then
self:forget()
self:clearState()
end
return parent.type(self, type, ...)
end
function RecGRU:initZeroTensor(input)
if input then
if input:dim() == 2 then
self.zeroOutput:resize(input:size(1), self.outputsize):zero()
else
self.zeroOutput:resize(self.outputsize):zero()
end
end
end
function RecGRU:getHiddenState(step, input)
step = step == nil and (self.step - 1) or (step < 0) and (self.step - step - 1) or step
local prevOutput
if step == 0 then
if self.startState then
prevOutput = self.startState
if input and input:dim() == 2 then
assert(prevOutput:size(2) == self.outputsize)
assert(prevOutput:size(1) == input:size(1))
end
else
prevOutput = self.zeroOutput
self:initZeroTensor(input)
end
else
-- previous output of this module
prevOutput = self.outputs[step]
end
return prevOutput
end
function RecGRU:setHiddenState(step, hiddenState)
step = step == nil and (self.step - 1) or (step < 0) and (self.step - step - 1) or step
assert(torch.isTensor(hiddenState))
if step == 0 then
-- this hack bipasses the fact that Sequencer calls forget when remember is false
-- which makes it impossible to use self.outputs to set the h[0] (it is forgotten)
self:setStartState(hiddenState)
else
-- previous output of this module
self.outputs[step] = hiddenState
end
end
function RecGRU:getGradHiddenState(step, input)
self.gradOutputs = self.gradOutputs or {}
local _step = self.updateGradInputStep or self.step
step = step == nil and (_step - 1) or (step < 0) and (_step - step - 1) or step
local gradOutput
if step == self.step-1 then
if self.startState and not self.gradOutputs[step] then
self:initZeroTensor(input)
end
gradOutput = self.gradOutputs[step] or self.zeroOutput
else
gradOutput = self.gradOutputs[step]
end
return gradOutput
end
function RecGRU:setGradHiddenState(step, gradHiddenState)
local _step = self.updateGradInputStep or self.step
step = step == nil and (_step - 1) or (step < 0) and (_step - step - 1) or step
assert(torch.isTensor(gradHiddenState))
self.gradOutputs[step] = gradHiddenState
end
function RecGRU:__tostring__()
if self.weightO then
return self.__typename .. string.format("(%d -> %d -> %d)", self.inputsize, self.hiddensize, self.outputsize)
else
return self.__typename .. string.format("(%d -> %d)", self.inputsize, self.outputsize)
end
end