-
-
Notifications
You must be signed in to change notification settings - Fork 124
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Doc corrections and unicode(theta) #184
base: master
Are you sure you want to change the base?
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,10 +28,9 @@ end | |
Segment-wise linear approximation of sigmoid. | ||
See [BinaryConnect: Training Deep Neural Networks withbinary weights during propagations](https://arxiv.org/pdf/1511.00363.pdf). | ||
""" | ||
hardσ(x::Real, a=0.2) = oftype(x/1, max(zero(x/1), min(one(x/1), oftype(x/1,a) * x + oftype(x/1,0.5)))) | ||
hardσ(x::Real, a=0.2) = oftype(x / 1, max(zero(x / 1), min(one(x / 1), oftype(x / 1, a) * x + oftype(x / 1, 0.5)))) | ||
const hardsigmoid = hardσ | ||
|
||
|
||
""" | ||
logσ(x) | ||
|
||
|
@@ -48,7 +47,6 @@ Return `log(σ(x))` which is computed in a numerically stable way. | |
logσ(x::Real) = -softplus(-x) | ||
const logsigmoid = logσ | ||
|
||
|
||
""" | ||
hardtanh(x) = max(-1, min(1, x)) | ||
|
||
|
@@ -57,7 +55,6 @@ See [Large Scale Machine Learning](http://ronan.collobert.org/pub/matos/2004_phd | |
""" | ||
hardtanh(x::Real) = max(-one(x), min( one(x), x)) | ||
|
||
|
||
""" | ||
relu(x) = max(0, x) | ||
|
||
|
@@ -66,15 +63,14 @@ activation function. | |
""" | ||
relu(x::Real) = max(zero(x), x) | ||
|
||
|
||
""" | ||
leakyrelu(x, a=0.01) = max(a*x, x) | ||
|
||
Leaky [Rectified Linear Unit](https://en.wikipedia.org/wiki/Rectifier_(neural_networks)) | ||
activation function. | ||
You can also specify the coefficient explicitly, e.g. `leakyrelu(x, 0.01)`. | ||
""" | ||
leakyrelu(x::Real, a = oftype(x / 1, 0.01)) = max(a * x, x / one(x)) | ||
leakyrelu(x::Real, a=0.01) = max(oftype(x / 1, a) * x, x / 1) | ||
|
||
""" | ||
relu6(x) = min(max(0, x), 6) | ||
|
@@ -107,8 +103,7 @@ Exponential Linear Unit activation function. | |
See [Fast and Accurate Deep Network Learning by Exponential Linear Units](https://arxiv.org/abs/1511.07289). | ||
You can also specify the coefficient explicitly, e.g. `elu(x, 1)`. | ||
""" | ||
elu(x::Real, α = one(x)) = ifelse(x ≥ 0, x / one(x), α * (exp(x) - one(x))) | ||
|
||
elu(x::Real, α = one(x)) = ifelse(x ≥ 0, x / 1, α * (exp(x) - one(x))) | ||
|
||
""" | ||
gelu(x) = 0.5x * (1 + tanh(√(2/π) * (x + 0.044715x^3))) | ||
|
@@ -124,7 +119,6 @@ function gelu(x::Real) | |
h * x * (one(x) + tanh(λ * (x + α * x^3))) | ||
end | ||
|
||
|
||
""" | ||
swish(x) = x * σ(x) | ||
|
||
|
@@ -133,7 +127,6 @@ See [Swish: a Self-Gated Activation Function](https://arxiv.org/pdf/1710.05941.p | |
""" | ||
swish(x::Real) = x * σ(x) | ||
|
||
|
||
""" | ||
lisht(x) = x * tanh(x) | ||
|
||
|
@@ -142,7 +135,6 @@ See [LiSHT](https://arxiv.org/abs/1901.05894) | |
""" | ||
lisht(x::Real) = x * tanh(x) | ||
|
||
|
||
""" | ||
selu(x) = λ * (x ≥ 0 ? x : α * (exp(x) - 1)) | ||
|
||
|
@@ -155,53 +147,47 @@ See [Self-Normalizing Neural Networks](https://arxiv.org/pdf/1706.02515.pdf). | |
function selu(x::Real) | ||
λ = oftype(x / 1, 1.0507009873554804934193349852946) | ||
α = oftype(x / 1, 1.6732632423543772848170429916717) | ||
λ * ifelse(x > 0, x / one(x), α * (exp(x) - one(x))) | ||
λ * ifelse(x > 0, x / 1, α * (exp(x) - one(x))) | ||
end | ||
|
||
""" | ||
celu(x, α=1) = | ||
(x ≥ 0 ? x : α * (exp(x/α) - 1)) | ||
|
||
Continuously Differentiable Exponential Linear Units | ||
See [Continuously Differentiable Exponential Linear Units](https://arxiv.org/pdf/1704.07483.pdf). | ||
""" | ||
celu(x::Real, α::Real = one(x)) = ifelse(x ≥ 0, x / one(x), α * (exp(x/α) - one(x))) | ||
|
||
celu(x::Real, α::Real = one(x)) = ifelse(x ≥ 0, x / 1, α * (exp(x/α) - one(x))) | ||
|
||
""" | ||
trelu(x, theta = 1.0) = x > theta ? x : 0 | ||
trelu(x, θ=1.0) = x > θ ? x : 0 | ||
|
||
Threshold Gated Rectified Linear. | ||
See [ThresholdRelu](https://arxiv.org/pdf/1402.3337.pdf) | ||
""" | ||
trelu(x::Real,theta = one(x)) = ifelse(x> theta, x, zero(x)) | ||
trelu(x::Real,θ = one(x)) = ifelse(x> θ, x, zero(x)) | ||
const thresholdrelu = trelu | ||
|
||
|
||
""" | ||
softsign(x) = x / (1 + |x|) | ||
|
||
See [Quadratic Polynomials Learn Better Image Features](http://www.iro.umontreal.ca/~lisa/publications2/index.php/attachments/single/205). | ||
""" | ||
softsign(x::Real) = x / (one(x) + abs(x)) | ||
|
||
|
||
""" | ||
softplus(x) = log(exp(x) + 1) | ||
|
||
See [Deep Sparse Rectifier Neural Networks](http://proceedings.mlr.press/v15/glorot11a/glorot11a.pdf). | ||
""" | ||
softplus(x::Real) = ifelse(x > 0, x + log1p(exp(-x)), log1p(exp(x))) | ||
|
||
|
||
""" | ||
logcosh(x) | ||
logcosh(x) = x + softplus(-2x) - log(2) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the mathematical definition better down in the docstring There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this should be changed for all the existing functions? As in all the rest functions, mathematical definition is written beside the function. |
||
|
||
Return `log(cosh(x))` which is computed in a numerically stable way. | ||
""" | ||
logcosh(x::Real) = x + softplus(-2x) - log(oftype(x, 2)) | ||
|
||
|
||
""" | ||
mish(x) = x * tanh(softplus(x)) | ||
|
||
|
@@ -223,7 +209,7 @@ tanhshrink(x::Real) = x - tanh(x) | |
|
||
See [Softshrink Activation Function](https://www.gabormelli.com/RKB/Softshrink_Activation_Function). | ||
""" | ||
softshrink(x::Real, λ = oftype(x/1, 0.5)) = min(max(zero(x), x - λ), x + λ) | ||
softshrink(x::Real, λ = oftype(x / 1, 0.5)) = min(max(zero(x), x - λ), x + λ) | ||
|
||
# Provide an informative error message if activation functions are called with an array | ||
for f in (:σ, :σ_stable, :hardσ, :logσ, :hardtanh, :relu, :leakyrelu, :relu6, :rrelu, :elu, :gelu, :swish, :lisht, :selu, :celu, :trelu, :softsign, :softplus, :logcosh, :mish, :tanhshrink, :softshrink) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.