Skip to content

Commit

Permalink
Merge pull request #11 from MartinuzziFrancesco/fm/docs
Browse files Browse the repository at this point in the history
More docstrings
  • Loading branch information
MartinuzziFrancesco authored Nov 24, 2024
2 parents 77d18f7 + 53c62ed commit 5164499
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 6 deletions.
1 change: 1 addition & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ makedocs(;
deploydocs(;
repo="github.com/MartinuzziFrancesco/RecurrentLayers.jl",
devbranch="main",
push_preview=true,
)
108 changes: 105 additions & 3 deletions src/mut_cell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,35 @@ end

Flux.@layer MUT1Cell

"""
@doc raw"""
MUT1Cell((input_size => hidden_size);
init_kernel = glorot_uniform,
init_recurrent_kernel = glorot_uniform,
bias = true)
[Mutated unit 1 cell](https://proceedings.mlr.press/v37/jozefowicz15.pdf).
See [`MUT1`](@ref) for a layer that processes entire sequences.
# Arguments
- `input_size => hidden_size`: input and inner dimension of the layer
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
z &= \sigma(W_z x_t + b_z), \\
r &= \sigma(W_r x_t + U_r h_t + b_r), \\
h_{t+1} &= \tanh(U_h (r \odot h_t) + \tanh(W_h x_t) + b_h) \odot z \\
&\quad + h_t \odot (1 - z).
\end{aligned}
```
# Forward
rnncell(inp, [state])
"""
function MUT1Cell((input_size, hidden_size)::Pair;
init_kernel = glorot_uniform,
Expand Down Expand Up @@ -57,6 +81,16 @@ Flux.@layer :expand MUT1

"""
MUT1((input_size => hidden_size); kwargs...)
[Mutated unit 1 network](https://proceedings.mlr.press/v37/jozefowicz15.pdf).
See [`MUT1Cell`](@ref) for a layer that processes a single sequence.
# Arguments
- `input_size => hidden_size`: input and inner dimension of the layer
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
"""
function MUT1((input_size, hidden_size)::Pair; kwargs...)
cell = MUT1Cell(input_size => hidden_size; kwargs...)
Expand Down Expand Up @@ -88,11 +122,35 @@ end

Flux.@layer MUT2Cell

"""
@doc raw"""
MUT2Cell((input_size => hidden_size);
init_kernel = glorot_uniform,
init_recurrent_kernel = glorot_uniform,
bias = true)
[Mutated unit 2 cell](https://proceedings.mlr.press/v37/jozefowicz15.pdf).
See [`MUT2`](@ref) for a layer that processes entire sequences.
# Arguments
- `input_size => hidden_size`: input and inner dimension of the layer
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
z &= \sigma(W_z x_t + U_z h_t + b_z), \\
r &= \sigma(x_t + U_r h_t + b_r), \\
h_{t+1} &= \tanh(U_h (r \odot h_t) + W_h x_t + b_h) \odot z \\
&\quad + h_t \odot (1 - z).
\end{aligned}
```
# Forward
rnncell(inp, [state])
"""
function MUT2Cell((input_size, hidden_size)::Pair;
init_kernel = glorot_uniform,
Expand Down Expand Up @@ -138,6 +196,16 @@ Flux.@layer :expand MUT2

"""
MUT2Cell((input_size => hidden_size); kwargs...)
[Mutated unit 2 network](https://proceedings.mlr.press/v37/jozefowicz15.pdf).
See [`MUT2Cell`](@ref) for a layer that processes a single sequence.
# Arguments
- `input_size => hidden_size`: input and inner dimension of the layer
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
"""
function MUT2((input_size, hidden_size)::Pair; kwargs...)
cell = MUT2Cell(input_size => hidden_size; kwargs...)
Expand Down Expand Up @@ -168,11 +236,35 @@ end

Flux.@layer MUT3Cell

"""
@doc raw"""
MUT3Cell((input_size => hidden_size);
init_kernel = glorot_uniform,
init_recurrent_kernel = glorot_uniform,
bias = true)
[Mutated unit 3 cell](https://proceedings.mlr.press/v37/jozefowicz15.pdf).
See [`MUT3`](@ref) for a layer that processes entire sequences.
# Arguments
- `input_size => hidden_size`: input and inner dimension of the layer
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
z &= \sigma(W_z x_t + U_z \tanh(h_t) + b_z), \\
r &= \sigma(W_r x_t + U_r h_t + b_r), \\
h_{t+1} &= \tanh(U_h (r \odot h_t) + W_h x_t + b_h) \odot z \\
&\quad + h_t \odot (1 - z).
\end{aligned}
```
# Forward
rnncell(inp, [state])
"""
function MUT3Cell((input_size, hidden_size)::Pair;
init_kernel = glorot_uniform,
Expand Down Expand Up @@ -216,6 +308,16 @@ Flux.@layer :expand MUT3

"""
MUT3((input_size => hidden_size); kwargs...)
[Mutated unit 3 network](https://proceedings.mlr.press/v37/jozefowicz15.pdf).
See [`MUT3Cell`](@ref) for a layer that processes a single sequence.
# Arguments
- `input_size => hidden_size`: input and inner dimension of the layer
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
"""
function MUT3((input_size, hidden_size)::Pair; kwargs...)
cell = MUT3Cell(input_size => hidden_size; kwargs...)
Expand Down
56 changes: 55 additions & 1 deletion src/nas_cell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,54 @@ end

Flux.@layer NASCell

"""
@doc raw"""
NASCell((input_size => hidden_size);
init_kernel = glorot_uniform,
init_recurrent_kernel = glorot_uniform,
bias = true)
[Neural Architecture Search unit](https://arxiv.org/pdf/1611.01578).
See [`NAS`](@ref) for a layer that processes entire sequences.
# Arguments
- `input_size => hidden_size`: input and inner dimension of the layer
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
# Equations
```math
\begin{aligned}
\text{First Layer Outputs:} & \\
o_1 &= \sigma(W_i^{(1)} x_t + W_h^{(1)} h_{t-1} + b^{(1)}), \\
o_2 &= \text{ReLU}(W_i^{(2)} x_t + W_h^{(2)} h_{t-1} + b^{(2)}), \\
o_3 &= \sigma(W_i^{(3)} x_t + W_h^{(3)} h_{t-1} + b^{(3)}), \\
o_4 &= \text{ReLU}(W_i^{(4)} x_t \cdot W_h^{(4)} h_{t-1}), \\
o_5 &= \tanh(W_i^{(5)} x_t + W_h^{(5)} h_{t-1} + b^{(5)}), \\
o_6 &= \sigma(W_i^{(6)} x_t + W_h^{(6)} h_{t-1} + b^{(6)}), \\
o_7 &= \tanh(W_i^{(7)} x_t + W_h^{(7)} h_{t-1} + b^{(7)}), \\
o_8 &= \sigma(W_i^{(8)} x_t + W_h^{(8)} h_{t-1} + b^{(8)}). \\
\text{Second Layer Computations:} & \\
l_1 &= \tanh(o_1 \cdot o_2) \\
l_2 &= \tanh(o_3 + o_4) \\
l_3 &= \tanh(o_5 \cdot o_6) \\
l_4 &= \sigma(o_7 + o_8) \\
\text{Inject Cell State:} & \\
l_1 &= \tanh(l_1 + c_{\text{state}}) \\
\text{Final Layer Computations:} & \\
c_{\text{new}} &= l_1 \cdot l_2 \\
l_5 &= \tanh(l_3 + l_4) \\
h_{\text{new}} &= \tanh(c_{\text{new}} \cdot l_5)
\end{aligned}
```
# Forward
rnncell(inp, [state])
"""
function NASCell((input_size, hidden_size)::Pair;
init_kernel = glorot_uniform,
Expand Down Expand Up @@ -101,6 +144,17 @@ Flux.@layer :expand NAS

"""
NAS((input_size => hidden_size)::Pair; kwargs...)
[Neural Architecture Search unit](https://arxiv.org/pdf/1611.01578).
See [`NASCell`](@ref) for a layer that processes a single sequence.
# Arguments
- `input_size => hidden_size`: input and inner dimension of the layer
- `init_kernel`: initializer for the input to hidden weights
- `init_recurrent_kernel`: initializer for the hidden to hidden weights
- `bias`: include a bias or not. Default is `true`
"""
function NAS((input_size, hidden_size)::Pair; kwargs...)
cell = NASCell(input_size => hidden_size; kwargs...)
Expand Down
4 changes: 2 additions & 2 deletions src/peepholelstm_cell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ See [`PeepholeLSTM`](@ref) for a layer that processes entire sequences.
# Equations
```math
\begin{align}
\begin{aligned}
f_t &= \sigma_g(W_f x_t + U_f c_{t-1} + b_f), \\
i_t &= \sigma_g(W_i x_t + U_i c_{t-1} + b_i), \\
o_t &= \sigma_g(W_o x_t + U_o c_{t-1} + b_o), \\
c_t &= f_t \odot c_{t-1} + i_t \odot \sigma_c(W_c x_t + b_c), \\
h_t &= o_t \odot \sigma_h(c_t).
\end{align}
\end{aligned}
```
# Forward
Expand Down

0 comments on commit 5164499

Please sign in to comment.