Skip to content

Commit

Permalink
Merge pull request #12 from MartinuzziFrancesco/fm/ds
Browse files Browse the repository at this point in the history
More docstrings
  • Loading branch information
MartinuzziFrancesco authored Nov 27, 2024
2 parents 5164499 + b71c6d8 commit 28d6646
Show file tree
Hide file tree
Showing 9 changed files with 132 additions and 11 deletions.
7 changes: 6 additions & 1 deletion src/indrnn_cell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ end

Flux.@layer :expand IndRNN

"""
@doc raw"""
IndRNN((input_size, hidden_size)::Pair, σ = tanh, σ=relu;
kwargs...)
Expand All @@ -84,6 +84,11 @@ See [`IndRNNCell`](@ref) for a layer that processes a single sequence.
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\mathbf{h}_{t} = \sigma(\mathbf{W} \mathbf{x}_t + \mathbf{u} \odot \mathbf{h}_{t-1} + \mathbf{b})
```
"""
function IndRNN((input_size, hidden_size)::Pair, σ = tanh; kwargs...)
cell = IndRNNCell(input_size, hidden_size, σ; kwargs...)
Expand Down
11 changes: 10 additions & 1 deletion src/lightru_cell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ end

Flux.@layer :expand LightRU

"""
@doc raw"""
LightRU((input_size => hidden_size)::Pair; kwargs...)
[Light recurrent unit network](https://www.mdpi.com/2079-9292/13/16/3204).
Expand All @@ -89,6 +89,15 @@ See [`LightRUCell`](@ref) for a layer that processes a single sequence.
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
\tilde{h}_t &= \tanh(W_h x_t), \\
f_t &= \delta(W_f x_t + U_f h_{t-1} + b_f), \\
h_t &= (1 - f_t) \odot h_{t-1} + f_t \odot \tilde{h}_t.
\end{aligned}
```
"""
function LightRU((input_size, hidden_size)::Pair; kwargs...)
cell = LightRUCell(input_size => hidden_size; kwargs...)
Expand Down
11 changes: 10 additions & 1 deletion src/ligru_cell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ end

Flux.@layer :expand LiGRU

"""
@doc raw"""
LiGRU((input_size => hidden_size)::Pair; kwargs...)
[Light gated recurrent network](https://arxiv.org/pdf/1803.10225).
Expand All @@ -89,6 +89,15 @@ See [`LiGRUCell`](@ref) for a layer that processes a single sequence.
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
z_t &= \sigma(W_z x_t + U_z h_{t-1}), \\
\tilde{h}_t &= \text{ReLU}(W_h x_t + U_h h_{t-1}), \\
h_t &= z_t \odot h_{t-1} + (1 - z_t) \odot \tilde{h}_t
\end{aligned}
```
"""
function LiGRU((input_size, hidden_size)::Pair; kwargs...)
cell = LiGRUCell(input_size => hidden_size; kwargs...)
Expand Down
11 changes: 10 additions & 1 deletion src/mgu_cell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ end

Flux.@layer :expand MGU

"""
@doc raw"""
MGU((input_size => hidden_size)::Pair; kwargs...)
[Minimal gated unit network](https://arxiv.org/pdf/1603.09420).
Expand All @@ -88,6 +88,15 @@ See [`MGUCell`](@ref) for a layer that processes a single sequence.
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
f_t &= \sigma(W_f x_t + U_f h_{t-1} + b_f), \\
\tilde{h}_t &= \tanh(W_h x_t + U_h (f_t \odot h_{t-1}) + b_h), \\
h_t &= (1 - f_t) \odot h_{t-1} + f_t \odot \tilde{h}_t
\end{aligned}
```
"""
function MGU((input_size, hidden_size)::Pair; kwargs...)
cell = MGUCell(input_size => hidden_size; kwargs...)
Expand Down
36 changes: 33 additions & 3 deletions src/mut_cell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ end

Flux.@layer :expand MUT1

"""
@doc raw"""
MUT1((input_size => hidden_size); kwargs...)
[Mutated unit 1 network](https://proceedings.mlr.press/v37/jozefowicz15.pdf).
Expand All @@ -91,6 +91,16 @@ See [`MUT1Cell`](@ref) for a layer that processes a single sequence.
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
z &= \sigma(W_z x_t + b_z), \\
r &= \sigma(W_r x_t + U_r h_t + b_r), \\
h_{t+1} &= \tanh(U_h (r \odot h_t) + \tanh(W_h x_t) + b_h) \odot z \\
&\quad + h_t \odot (1 - z).
\end{aligned}
```
"""
function MUT1((input_size, hidden_size)::Pair; kwargs...)
cell = MUT1Cell(input_size => hidden_size; kwargs...)
Expand Down Expand Up @@ -194,7 +204,7 @@ end

Flux.@layer :expand MUT2

"""
@doc raw"""
MUT2Cell((input_size => hidden_size); kwargs...)
[Mutated unit 2 network](https://proceedings.mlr.press/v37/jozefowicz15.pdf).
Expand All @@ -206,6 +216,16 @@ See [`MUT2Cell`](@ref) for a layer that processes a single sequence.
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
z &= \sigma(W_z x_t + U_z h_t + b_z), \\
r &= \sigma(x_t + U_r h_t + b_r), \\
h_{t+1} &= \tanh(U_h (r \odot h_t) + W_h x_t + b_h) \odot z \\
&\quad + h_t \odot (1 - z).
\end{aligned}
```
"""
function MUT2((input_size, hidden_size)::Pair; kwargs...)
cell = MUT2Cell(input_size => hidden_size; kwargs...)
Expand Down Expand Up @@ -306,7 +326,7 @@ end

Flux.@layer :expand MUT3

"""
@doc raw"""
MUT3((input_size => hidden_size); kwargs...)
[Mutated unit 3 network](https://proceedings.mlr.press/v37/jozefowicz15.pdf).
Expand All @@ -318,6 +338,16 @@ See [`MUT3Cell`](@ref) for a layer that processes a single sequence.
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
z &= \sigma(W_z x_t + U_z \tanh(h_t) + b_z), \\
r &= \sigma(W_r x_t + U_r h_t + b_r), \\
h_{t+1} &= \tanh(U_h (r \odot h_t) + W_h x_t + b_h) \odot z \\
&\quad + h_t \odot (1 - z).
\end{aligned}
```
"""
function MUT3((input_size, hidden_size)::Pair; kwargs...)
cell = MUT3Cell(input_size => hidden_size; kwargs...)
Expand Down
31 changes: 30 additions & 1 deletion src/nas_cell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ end

Flux.@layer :expand NAS

"""
@doc raw"""
NAS((input_size => hidden_size)::Pair; kwargs...)
Expand All @@ -155,6 +155,35 @@ See [`NASCell`](@ref) for a layer that processes a single sequence.
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
\text{First Layer Outputs:} & \\
o_1 &= \sigma(W_i^{(1)} x_t + W_h^{(1)} h_{t-1} + b^{(1)}), \\
o_2 &= \text{ReLU}(W_i^{(2)} x_t + W_h^{(2)} h_{t-1} + b^{(2)}), \\
o_3 &= \sigma(W_i^{(3)} x_t + W_h^{(3)} h_{t-1} + b^{(3)}), \\
o_4 &= \text{ReLU}(W_i^{(4)} x_t \cdot W_h^{(4)} h_{t-1}), \\
o_5 &= \tanh(W_i^{(5)} x_t + W_h^{(5)} h_{t-1} + b^{(5)}), \\
o_6 &= \sigma(W_i^{(6)} x_t + W_h^{(6)} h_{t-1} + b^{(6)}), \\
o_7 &= \tanh(W_i^{(7)} x_t + W_h^{(7)} h_{t-1} + b^{(7)}), \\
o_8 &= \sigma(W_i^{(8)} x_t + W_h^{(8)} h_{t-1} + b^{(8)}). \\
\text{Second Layer Computations:} & \\
l_1 &= \tanh(o_1 \cdot o_2) \\
l_2 &= \tanh(o_3 + o_4) \\
l_3 &= \tanh(o_5 \cdot o_6) \\
l_4 &= \sigma(o_7 + o_8) \\
\text{Inject Cell State:} & \\
l_1 &= \tanh(l_1 + c_{\text{state}}) \\
\text{Final Layer Computations:} & \\
c_{\text{new}} &= l_1 \cdot l_2 \\
l_5 &= \tanh(l_3 + l_4) \\
h_{\text{new}} &= \tanh(c_{\text{new}} \cdot l_5)
\end{aligned}
```
"""
function NAS((input_size, hidden_size)::Pair; kwargs...)
cell = NASCell(input_size => hidden_size; kwargs...)
Expand Down
12 changes: 11 additions & 1 deletion src/ran_cell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ end

Flux.@layer :expand RAN

"""
@doc raw"""
RAN(input_size => hidden_size; kwargs...)
The `RANCell`, introduced in [this paper](https://arxiv.org/pdf/1705.07393),
Expand All @@ -126,6 +126,16 @@ See [`RANCell`](@ref) for a layer that processes a single sequence.
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
\tilde{c}_t &= W_c x_t, \\
i_t &= \sigma(W_i x_t + U_i h_{t-1} + b_i), \\
f_t &= \sigma(W_f x_t + U_f h_{t-1} + b_f), \\
c_t &= i_t \odot \tilde{c}_t + f_t \odot c_{t-1}, \\
h_t &= g(c_t)
\end{aligned}
```
"""
function RAN((input_size, hidden_size)::Pair; kwargs...)
cell = RANCell(input_size => hidden_size; kwargs...)
Expand Down
13 changes: 12 additions & 1 deletion src/rhn_cell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ end

Flux.@layer :expand RHN

"""
@doc raw"""
RHN((input_size => hidden_size)::Pair depth=3; kwargs...)
[Recurrent highway network](https://arxiv.org/pdf/1607.03474).
Expand All @@ -154,6 +154,17 @@ See [`RHNCell`](@ref) for a layer that processes a single sequence.
- `couple_carry`: couples the carry gate and the transform gate. Default `true`
- `init_kernel`: initializer for the input to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
s_{\ell}^{[t]} &= h_{\ell}^{[t]} \odot t_{\ell}^{[t]} + s_{\ell-1}^{[t]} \odot c_{\ell}^{[t]}, \\
\text{where} \\
h_{\ell}^{[t]} &= \tanh(W_h x^{[t]}\mathbb{I}_{\ell = 1} + U_{h_{\ell}} s_{\ell-1}^{[t]} + b_{h_{\ell}}), \\
t_{\ell}^{[t]} &= \sigma(W_t x^{[t]}\mathbb{I}_{\ell = 1} + U_{t_{\ell}} s_{\ell-1}^{[t]} + b_{t_{\ell}}), \\
c_{\ell}^{[t]} &= \sigma(W_c x^{[t]}\mathbb{I}_{\ell = 1} + U_{c_{\ell}} s_{\ell-1}^{[t]} + b_{c_{\ell}})
\end{aligned}
```
"""
function RHN((input_size, hidden_size)::Pair, depth=3; kwargs...)
cell = RHNCell(input_size => hidden_size, depth; kwargs...)
Expand Down
11 changes: 10 additions & 1 deletion src/scrn_cell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ end

Flux.@layer :expand SCRN

"""
@doc raw"""
SCRN((input_size => hidden_size)::Pair;
init_kernel = glorot_uniform,
init_recurrent_kernel = glorot_uniform,
Expand All @@ -103,6 +103,15 @@ See [`SCRNCell`](@ref) for a layer that processes a single sequence.
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
- `alpha`: structural contraint. Default is 0.0
# Equations
```math
\begin{aligned}
s_t &= (1 - \alpha) W_s x_t + \alpha s_{t-1}, \\
h_t &= \sigma(W_h s_t + U_h h_{t-1} + b_h), \\
y_t &= f(U_y h_t + W_y s_t)
\end{aligned}
```
"""
function SCRN((input_size, hidden_size)::Pair; kwargs...)
cell = SCRNCell(input_size => hidden_size; kwargs...)
Expand Down

0 comments on commit 28d6646

Please sign in to comment.