From 367227a0c3725662376bd695424846ba2931c62c Mon Sep 17 00:00:00 2001 From: CarloLucibello Date: Wed, 28 Dec 2022 13:02:21 +0100 Subject: [PATCH 1/4] revisit losses --- src/losses/functions.jl | 89 ++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/src/losses/functions.jl b/src/losses/functions.jl index ffda2ff99a..2028c69136 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -20,7 +20,7 @@ julia> Flux.mae(y_model, 1:3) """ function mae(ŷ, y; agg = mean) _check_sizes(ŷ, y) - agg(abs.(ŷ .- y)) + return agg(abs.(ŷ .- y)) end """ @@ -44,7 +44,7 @@ julia> Flux.mse(y_model, y_true) """ function mse(ŷ, y; agg = mean) _check_sizes(ŷ, y) - agg(abs2.(ŷ .- y)) + return agg(abs2.(ŷ .- y)) end """ @@ -93,12 +93,11 @@ julia> Flux.huber_loss(ŷ, 1:3, δ=0.05) # changes behaviour as |ŷ - y| > δ ``` """ function huber_loss(ŷ, y; agg = mean, δ = ofeltype(ŷ, 1)) - _check_sizes(ŷ, y) - abs_error = abs.(ŷ .- y) - #TODO: remove dropgrad when Zygote can handle this function with CuArrays - temp = Zygote.dropgrad(abs_error .< δ) - x = ofeltype(ŷ, 0.5) - agg(((abs_error .^ 2) .* temp) .* x .+ δ * (abs_error .- x * δ) .* (1 .- temp)) + _check_sizes(ŷ, y) + abs_error = abs.(ŷ .- y) + temp = abs_error .< δ + x = ofeltype(ŷ, 0.5) + return agg(((abs_error .^ 2) .* temp) .* x .+ δ * (abs_error .- x * δ) .* (1 .- temp)) end """ @@ -153,17 +152,17 @@ true ``` """ function label_smoothing(y::Union{AbstractArray,Number}, α::Number; dims::Int = 1) - if !(0 < α < 1) - throw(ArgumentError("α must be between 0 and 1")) - end - if dims == 0 - y_smoothed = y .* (1 - α) .+ α*1//2 - elseif dims == 1 - y_smoothed = y .* (1 - α) .+ α* 1 // size(y, 1) - else - throw(ArgumentError("`dims` should be either 0 or 1")) - end - return y_smoothed + if !(0 < α < 1) + throw(ArgumentError("α must be between 0 and 1")) + end + if dims == 0 + y_smoothed = y .* (1 - α) .+ α*1//2 + elseif dims == 1 + y_smoothed = y .* (1 - α) .+ α* 1 // size(y, 1) + else + throw(ArgumentError("`dims` should be either 0 or 1")) + end + return y_smoothed end """ @@ -224,7 +223,7 @@ julia> Flux.crossentropy(y_model, y_smooth) """ function crossentropy(ŷ, y; dims = 1, agg = mean, ϵ = epseltype(ŷ)) _check_sizes(ŷ, y) - agg(.-sum(xlogy.(y, ŷ .+ ϵ); dims = dims)) + return agg(.-sum(xlogy.(y, ŷ .+ ϵ); dims = dims)) end """ @@ -263,7 +262,7 @@ julia> Flux.crossentropy(softmax(y_model), y_label) """ function logitcrossentropy(ŷ, y; dims = 1, agg = mean) _check_sizes(ŷ, y) - agg(.-sum(y .* logsoftmax(ŷ; dims = dims); dims = dims)) + return agg(.-sum(y .* logsoftmax(ŷ; dims = dims); dims = dims)) end """ @@ -312,7 +311,7 @@ julia> Flux.crossentropy(y_prob, y_hot) """ function binarycrossentropy(ŷ, y; agg = mean, ϵ = epseltype(ŷ)) _check_sizes(ŷ, y) - agg(@.(-xlogy(y, ŷ + ϵ) - xlogy(1 - y, 1 - ŷ + ϵ))) + return agg(@.(-xlogy(y, ŷ + ϵ) - xlogy(1 - y, 1 - ŷ + ϵ))) end """ @@ -342,7 +341,7 @@ julia> Flux.binarycrossentropy(sigmoid.(y_model), y_bin) """ function logitbinarycrossentropy(ŷ, y; agg = mean) _check_sizes(ŷ, y) - agg(@.((1 - y) * ŷ - logσ(ŷ))) + return agg(@.((1 - y) * ŷ - logσ(ŷ))) end """ @@ -407,7 +406,7 @@ julia> Flux.poisson_loss(y_model, 1:3) """ function poisson_loss(ŷ, y; agg = mean) _check_sizes(ŷ, y) - agg(ŷ .- xlogy.(y, ŷ)) + return agg(ŷ .- xlogy.(y, ŷ)) end """ @@ -442,7 +441,7 @@ true """ function hinge_loss(ŷ, y; agg = mean) _check_sizes(ŷ, y) - agg(max.(0, 1 .- ŷ .* y)) + return agg(max.(0, 1 .- ŷ .* y)) end """ @@ -477,7 +476,7 @@ true """ function squared_hinge_loss(ŷ, y; agg = mean) _check_sizes(ŷ, y) - agg((max.(0, 1 .- ŷ .* y)) .^ 2) + return agg((max.(0, 1 .- ŷ .* y)) .^ 2) end """ @@ -503,7 +502,7 @@ julia> 1 - Flux.dice_coeff_loss(y_pred, 1:3) # ~ F1 score for image segmentatio """ function dice_coeff_loss(ŷ, y; smooth = ofeltype(ŷ, 1.0)) _check_sizes(ŷ, y) - 1 - (2 * sum(y .* ŷ) + smooth) / (sum(y .^ 2) + sum(ŷ .^ 2) + smooth) #TODO agg + return 1 - (2 * sum(y .* ŷ) + smooth) / (sum(y .^ 2) + sum(ŷ .^ 2) + smooth) #TODO agg end """ @@ -518,11 +517,11 @@ Calculated as: """ function tversky_loss(ŷ, y; β = ofeltype(ŷ, 0.7)) - _check_sizes(ŷ, y) - #TODO add agg - num = sum(y .* ŷ) + 1 - den = sum(y .* ŷ + β * (1 .- y) .* ŷ + (1 - β) * y .* (1 .- ŷ)) + 1 - 1 - num / den + _check_sizes(ŷ, y) + #TODO add agg + num = sum(y .* ŷ) + 1 + den = sum(y .* ŷ + β * (1 .- y) .* ŷ + (1 - β) * y .* (1 .- ŷ)) + 1 + return 1 - num / den end """ @@ -554,13 +553,13 @@ true ``` """ function binary_focal_loss(ŷ, y; agg=mean, γ=2, ϵ=epseltype(ŷ)) - _check_sizes(ŷ, y) - ŷ = ŷ .+ ϵ - p_t = y .* ŷ + (1 .- y) .* (1 .- ŷ) - ce = -log.(p_t) - weight = (1 .- p_t) .^ γ - loss = weight .* ce - agg(loss) + _check_sizes(ŷ, y) + ŷ = ŷ .+ ϵ + p_t = y .* ŷ + (1 .- y) .* (1 .- ŷ) + ce = -log.(p_t) + weight = (1 .- p_t) .^ γ + loss = weight .* ce + return agg(loss) end """ @@ -598,9 +597,9 @@ See also: [`Losses.binary_focal_loss`](@ref) for binary (not one-hot) labels """ function focal_loss(ŷ, y; dims=1, agg=mean, γ=2, ϵ=epseltype(ŷ)) - _check_sizes(ŷ, y) - ŷ = ŷ .+ ϵ - agg(sum(@. -y * (1 - ŷ)^γ * log(ŷ); dims=dims)) + _check_sizes(ŷ, y) + ŷ = ŷ .+ ϵ + return agg(sum(@. -y * (1 - ŷ)^γ * log(ŷ); dims=dims)) end """ @@ -625,9 +624,9 @@ julia> Flux.siamese_contrastive_loss(ŷ, 1:3, margin = 2) ``` """ function siamese_contrastive_loss(ŷ, y; agg = mean, margin::Real = 1) - _check_sizes(ŷ, y) - margin < 0 && throw(DomainError(margin, "Margin must be non-negative")) - return agg(@. (1 - y) * ŷ^2 + y * max(0, margin - ŷ)^2) + _check_sizes(ŷ, y) + margin < 0 && throw(DomainError(margin, "Margin must be non-negative")) + return agg(@. (1 - y) * ŷ^2 + y * max(0, margin - ŷ)^2) end ```@meta From 4fd18df1c09d62960eb1d9e2823d709b1a4d2387 Mon Sep 17 00:00:00 2001 From: CarloLucibello Date: Wed, 28 Dec 2022 13:05:00 +0100 Subject: [PATCH 2/4] more in losses --- src/losses/functions.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/losses/functions.jl b/src/losses/functions.jl index 2028c69136..dc609f5fe2 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -223,7 +223,7 @@ julia> Flux.crossentropy(y_model, y_smooth) """ function crossentropy(ŷ, y; dims = 1, agg = mean, ϵ = epseltype(ŷ)) _check_sizes(ŷ, y) - return agg(.-sum(xlogy.(y, ŷ .+ ϵ); dims = dims)) + return agg(.-sum(xlogy.(y, ŷ .+ ϵ); dims)) end """ @@ -262,7 +262,7 @@ julia> Flux.crossentropy(softmax(y_model), y_label) """ function logitcrossentropy(ŷ, y; dims = 1, agg = mean) _check_sizes(ŷ, y) - return agg(.-sum(y .* logsoftmax(ŷ; dims = dims); dims = dims)) + return agg(.-sum(y .* logsoftmax(ŷ; dims); dims)) end """ @@ -381,8 +381,8 @@ Inf """ function kldivergence(ŷ, y; dims = 1, agg = mean, ϵ = epseltype(ŷ)) _check_sizes(ŷ, y) - entropy = agg(sum(xlogx.(y), dims = dims)) - cross_entropy = crossentropy(ŷ, y; dims = dims, agg = agg, ϵ = ϵ) + entropy = agg(sum(xlogx.(y); dims)) + cross_entropy = crossentropy(ŷ, y; dims, agg, ϵ) return entropy + cross_entropy end @@ -596,10 +596,10 @@ true See also: [`Losses.binary_focal_loss`](@ref) for binary (not one-hot) labels """ -function focal_loss(ŷ, y; dims=1, agg=mean, γ=2, ϵ=epseltype(ŷ)) +function focal_loss(ŷ, y; dims = 1, agg = mean, γ = 2, ϵ = epseltype(ŷ)) _check_sizes(ŷ, y) ŷ = ŷ .+ ϵ - return agg(sum(@. -y * (1 - ŷ)^γ * log(ŷ); dims=dims)) + return agg(sum(@. -y * (1 - ŷ)^γ * log(ŷ); dims)) end """ From e36480812fa019f5635cecadd8cb8889f6ca3438 Mon Sep 17 00:00:00 2001 From: CarloLucibello Date: Wed, 28 Dec 2022 15:49:57 +0100 Subject: [PATCH 3/4] remove same warnings --- test/runtests.jl | 32 ++++++++++++++++---------------- test/train.jl | 1 + test/utils.jl | 12 ------------ 3 files changed, 17 insertions(+), 28 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 29b2bad311..db5d7616cc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,5 @@ using Flux -using Flux.Data +using Flux: DataLoader using Flux: OneHotArray, OneHotMatrix, OneHotVector using Flux: params using Test @@ -12,24 +12,24 @@ Random.seed!(0) @testset verbose=true "Flux.jl" begin - @testset "Utils" begin - include("utils.jl") - end + # @testset "Utils" begin + # include("utils.jl") + # end - @testset "Optimise / Train" begin - include("optimise.jl") - include("train.jl") - end + # @testset "Optimise / Train" begin + # include("optimise.jl") + # include("train.jl") + # end - @testset "Data" begin - include("data.jl") - end + # @testset "Data" begin + # include("data.jl") + # end - @testset "Losses" begin - include("losses.jl") - include("ctc.jl") - CUDA.functional() && include("ctc-gpu.jl") - end + # @testset "Losses" begin + # include("losses.jl") + # include("ctc.jl") + # CUDA.functional() && include("ctc-gpu.jl") + # end @testset "Layers" begin include("layers/basic.jl") diff --git a/test/train.jl b/test/train.jl index 310102331e..2f6674ed20 100644 --- a/test/train.jl +++ b/test/train.jl @@ -24,6 +24,7 @@ using Random # Test direct use of Optimisers.jl rule, only really OK for `Descent`: @testset "without setup, $opt" for opt in [Descent(0.1), Optimisers.Descent(0.1), Optimisers.Adam()] + opt isa Optimisers.Adam && @info "should see a warning about discarded state" loss(m, x) = Flux.Losses.mse(w*x, m.weight*x .+ m.bias) model = (weight=copy(w2), bias=zeros(10), ignore=nothing) @test loss(model, rand(10, 10)) > 1 diff --git a/test/utils.jl b/test/utils.jl index fbb7f7d9d1..77bb7832c4 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -385,18 +385,6 @@ end @test_skip typeof(l1.bias) === typeof(l2.bias) end - @testset "loadparams!" begin - pars(w, b) = [w, b] - pars(l) = pars(l.weight, l.bias) - pararray(m) = mapreduce(pars, vcat, m) - weights(m) = mapreduce(l -> [l.weight], vcat, m) - @testset "Bias type $bt" for bt in (Flux.zeros32, nobias) - m = dm(bt) - Flux.loadparams!(m, params(m)) - testdense(m, bt) - end - end - @testset "loadmodel!(dst, src)" begin m1 = Chain(Dense(10, 5), Dense(5, 2, relu)) m2 = Chain(Dense(10, 5), Dense(5, 2)) From 0334edc658c80f2ef2d293af3f6fe861b04b45bc Mon Sep 17 00:00:00 2001 From: CarloLucibello Date: Wed, 28 Dec 2022 15:51:25 +0100 Subject: [PATCH 4/4] cleanup --- test/runtests.jl | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index db5d7616cc..8d65a8725e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,24 +12,24 @@ Random.seed!(0) @testset verbose=true "Flux.jl" begin - # @testset "Utils" begin - # include("utils.jl") - # end + @testset "Utils" begin + include("utils.jl") + end - # @testset "Optimise / Train" begin - # include("optimise.jl") - # include("train.jl") - # end + @testset "Optimise / Train" begin + include("optimise.jl") + include("train.jl") + end - # @testset "Data" begin - # include("data.jl") - # end + @testset "Data" begin + include("data.jl") + end - # @testset "Losses" begin - # include("losses.jl") - # include("ctc.jl") - # CUDA.functional() && include("ctc-gpu.jl") - # end + @testset "Losses" begin + include("losses.jl") + include("ctc.jl") + CUDA.functional() && include("ctc-gpu.jl") + end @testset "Layers" begin include("layers/basic.jl")