From 034ef471c7a28d129a3eafea8480d3207bfa2c2f Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 17 Sep 2024 23:53:58 -0400 Subject: [PATCH] fix: update to use test_gradients macro --- test/Project.toml | 2 +- test/common_ops/activation_tests.jl | 6 +++--- test/common_ops/bias_act_tests.jl | 6 +++--- test/common_ops/conv_tests.jl | 2 +- test/common_ops/dense_tests.jl | 2 +- test/common_ops/dropout_tests.jl | 8 ++++---- test/normalization/batchnorm_tests.jl | 4 ++-- test/normalization/groupnorm_tests.jl | 2 +- test/normalization/instancenorm_tests.jl | 4 ++-- test/normalization/layernorm_tests.jl | 4 ++-- test/others/bmm_tests.jl | 24 ++++++++++++------------ 11 files changed, 32 insertions(+), 32 deletions(-) diff --git a/test/Project.toml b/test/Project.toml index 79a435ea..51b229fc 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -44,7 +44,7 @@ ForwardDiff = "0.10.36" Hwloc = "3.2" InteractiveUtils = "<0.0.1, 1" JLArrays = "0.1.5" -LuxTestUtils = "1.1.2" +LuxTestUtils = "1.2" MKL = "0.7" MLDataDevices = "1.0.0" NNlib = "0.9.21" diff --git a/test/common_ops/activation_tests.jl b/test/common_ops/activation_tests.jl index ca78ae41..a5c3e2f8 100644 --- a/test/common_ops/activation_tests.jl +++ b/test/common_ops/activation_tests.jl @@ -39,9 +39,9 @@ end @test @inferred(Zygote.gradient(apply_act_fast2, f, x)) isa Any - test_gradients(Base.Fix1(apply_act, f), x; atol, rtol) - test_gradients(Base.Fix1(apply_act_fast, f), x; atol, rtol) - test_gradients(Base.Fix1(apply_act_fast2, f), x; atol, rtol) + @test_gradients(Base.Fix1(apply_act, f), x; atol, rtol) + @test_gradients(Base.Fix1(apply_act_fast, f), x; atol, rtol) + @test_gradients(Base.Fix1(apply_act_fast2, f), x; atol, rtol) ∂x1 = Zygote.gradient(apply_act, f, x)[2] ∂x2 = Zygote.gradient(apply_act_fast, f, x)[2] diff --git a/test/common_ops/bias_act_tests.jl b/test/common_ops/bias_act_tests.jl index 40d84eeb..2bdbc830 100644 --- a/test/common_ops/bias_act_tests.jl +++ b/test/common_ops/bias_act_tests.jl @@ -50,11 +50,11 @@ @test_broken @inferred(Zygote.gradient(bias_act_loss3, act, x, b)) isa Any end - test_gradients(__Fix1(bias_act_loss1, act), x, b; atol, rtol, + @test_gradients(__Fix1(bias_act_loss1, act), x, b; atol, rtol, soft_fail=fp16 ? [AutoFiniteDiff()] : []) - test_gradients(__Fix1(bias_act_loss2, act), x, b; atol, rtol, + @test_gradients(__Fix1(bias_act_loss2, act), x, b; atol, rtol, soft_fail=fp16 ? [AutoFiniteDiff()] : []) - test_gradients(__Fix1(bias_act_loss3, act), x, b; atol, rtol, + @test_gradients(__Fix1(bias_act_loss3, act), x, b; atol, rtol, soft_fail=fp16 ? [AutoFiniteDiff()] : []) ∂x1, ∂b1 = Zygote.gradient(__Fix1(bias_act_loss1, act), x, b) diff --git a/test/common_ops/conv_tests.jl b/test/common_ops/conv_tests.jl index ea498dae..5c208cd4 100644 --- a/test/common_ops/conv_tests.jl +++ b/test/common_ops/conv_tests.jl @@ -68,7 +68,7 @@ function run_conv_testing(gen_f::Function, activation, kernel, stride, padding, mp && push!(skip_backends, AutoReverseDiff()) ((mp && ongpu) || (mode == "amdgpu" && (Tx == Float64 || Tw == Float64))) && push!(skip_backends, AutoTracker()) - test_gradients(__f_grad, weight, x, bias; atol, rtol, skip_backends, soft_fail=fp16) + @test_gradients(__f_grad, weight, x, bias; atol, rtol, skip_backends, soft_fail=fp16) end anonact = x -> gelu(x) diff --git a/test/common_ops/dense_tests.jl b/test/common_ops/dense_tests.jl index 92af93ba..a14906b6 100644 --- a/test/common_ops/dense_tests.jl +++ b/test/common_ops/dense_tests.jl @@ -46,7 +46,7 @@ function run_dense_testing(Tw, Tx, M, N, hasbias, activation, aType, mode, ongpu __f_grad = let activation = activation (w, x, b) -> __f(activation, w, x, b) end - test_gradients(__f_grad, w, x, bias; atol, rtol, skip_backends, soft_fail=fp16) + @test_gradients(__f_grad, w, x, bias; atol, rtol, skip_backends, soft_fail=fp16) y_simple = dense_simple(activation, w, x, bias) y_zyg = fused_dense_bias_activation(activation, w, x, bias) diff --git a/test/common_ops/dropout_tests.jl b/test/common_ops/dropout_tests.jl index 5d3baa28..8d409fee 100644 --- a/test/common_ops/dropout_tests.jl +++ b/test/common_ops/dropout_tests.jl @@ -27,7 +27,7 @@ __f = let rng = rng, T = T x -> sum(first(dropout(rng, x, T(0.5), Val(true), T(2), dims))) end - test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3, + @test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3, soft_fail=(T == Float16 ? [AutoFiniteDiff()] : []), broken_backends=(T == Float16 && Sys.iswindows() ? [AutoEnzyme()] : [])) @@ -74,7 +74,7 @@ end __f = let rng = rng, mask = mask, p = T(0.5), invp = T(2) x -> sum(first(dropout(rng, x, mask, p, Val(true), Val(true), invp, :))) end - test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3, + @test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3, soft_fail=(T == Float16 ? [AutoFiniteDiff()] : [])) @jet sum(first(dropout( @@ -105,7 +105,7 @@ end soft_fail = T == Float16 ? Any[AutoFiniteDiff()] : [] skip_backends = length(x_shape) == 5 ? [AutoEnzyme()] : [] - test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3, soft_fail, skip_backends) + @test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3, soft_fail, skip_backends) @jet sum(first(dropout( rng, x, mask, T(0.5), Val(true), Val(false), T(2), :))) @@ -154,7 +154,7 @@ end __f = let rng = rng x -> sum(first(alpha_dropout(rng, x, T(0.5), Val(true)))) end - test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3, + @test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3, soft_fail=(T == Float16 ? [AutoFiniteDiff()] : []), broken_backends=(T == Float16 && Sys.iswindows() ? [AutoEnzyme()] : [])) diff --git a/test/normalization/batchnorm_tests.jl b/test/normalization/batchnorm_tests.jl index 553cc8c0..38d14a2d 100644 --- a/test/normalization/batchnorm_tests.jl +++ b/test/normalization/batchnorm_tests.jl @@ -98,7 +98,7 @@ function run_batchnorm_testing( __f = (args...) -> sum(first(batchnorm( args..., rm, rv, training, act, T(0.9), epsilon))) - test_gradients( + @test_gradients( __f, x, scale, bias; atol, rtol, skip_backends, soft_fail, broken_backends) end @@ -183,6 +183,6 @@ end __f = (args...) -> sum(first(batchnorm( args..., running_mean, running_var, Val(true), identity, 0.9f0, 1.0f-5))) - test_gradients(__f, x, scale, bias; atol=1.0f-3, rtol=1.0f-3) + @test_gradients(__f, x, scale, bias; atol=1.0f-3, rtol=1.0f-3) end end diff --git a/test/normalization/groupnorm_tests.jl b/test/normalization/groupnorm_tests.jl index 6a512148..3d5e821a 100644 --- a/test/normalization/groupnorm_tests.jl +++ b/test/normalization/groupnorm_tests.jl @@ -74,7 +74,7 @@ function run_groupnorm_testing(T, sz, groups, affine, act, aType, mode, ongpu) if affine __f = (args...) -> sum(groupnorm(args..., groups, act, epsilon)) - test_gradients(__f, x, scale, bias; atol, rtol, soft_fail) + @test_gradients(__f, x, scale, bias; atol, rtol, soft_fail) end end diff --git a/test/normalization/instancenorm_tests.jl b/test/normalization/instancenorm_tests.jl index 9091a436..a48a502d 100644 --- a/test/normalization/instancenorm_tests.jl +++ b/test/normalization/instancenorm_tests.jl @@ -39,7 +39,7 @@ function run_instancenorm_testing(gen_f, T, sz, training, act, aType, mode, ongp if is_training(training) __f = (args...) -> sum(first(instancenorm(args..., training, act, epsilon))) soft_fail = fp16 ? fp16 : [AutoFiniteDiff()] - test_gradients(__f, x, scale, bias; atol, rtol, soft_fail) + @test_gradients(__f, x, scale, bias; atol, rtol, soft_fail) end # Now test with running stats @@ -67,7 +67,7 @@ function run_instancenorm_testing(gen_f, T, sz, training, act, aType, mode, ongp args..., rm, rv, training, act, T(0.1), epsilon))) soft_fail = fp16 ? fp16 : [AutoFiniteDiff()] skip_backends = (Sys.iswindows() && fp16) ? [AutoEnzyme()] : [] - test_gradients(__f, x, scale, bias; atol, rtol, soft_fail, skip_backends) + @test_gradients(__f, x, scale, bias; atol, rtol, soft_fail, skip_backends) end end diff --git a/test/normalization/layernorm_tests.jl b/test/normalization/layernorm_tests.jl index 63386f4a..bdfccb47 100644 --- a/test/normalization/layernorm_tests.jl +++ b/test/normalization/layernorm_tests.jl @@ -58,10 +58,10 @@ function run_layernorm_testing_core( soft_fail = fp16 ? fp16 : [AutoFiniteDiff()] if affine_shape !== nothing __f = (args...) -> sum(_f(args...)) - test_gradients(__f, x, scale, bias; atol, rtol, soft_fail) + @test_gradients(__f, x, scale, bias; atol, rtol, soft_fail) else __f = x -> sum(_f(x, scale, bias)) - test_gradients(__f, x; atol, rtol, soft_fail) + @test_gradients(__f, x; atol, rtol, soft_fail) end if anonact !== act diff --git a/test/others/bmm_tests.jl b/test/others/bmm_tests.jl index df51df15..ea847568 100644 --- a/test/others/bmm_tests.jl +++ b/test/others/bmm_tests.jl @@ -264,36 +264,36 @@ end B = 3 @testset "Two 3-arrays" begin - test_gradients(fn, aType(randn(rng, M, P, B)), + @test_gradients(fn, aType(randn(rng, M, P, B)), aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - test_gradients(fn, batched_adjoint(aType(randn(rng, P, M, B))), + @test_gradients(fn, batched_adjoint(aType(randn(rng, P, M, B))), aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - test_gradients(fn, aType(randn(rng, M, P, B)), + @test_gradients(fn, aType(randn(rng, M, P, B)), batched_transpose(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3) end @testset "One a matrix..." begin - test_gradients(fn, aType(randn(rng, M, P)), + @test_gradients(fn, aType(randn(rng, M, P)), aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - test_gradients(fn, adjoint(aType(randn(rng, P, M))), + @test_gradients(fn, adjoint(aType(randn(rng, P, M))), aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - test_gradients(fn, aType(randn(rng, M, P)), + @test_gradients(fn, aType(randn(rng, M, P)), batched_adjoint(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3) - test_gradients(fn, aType(randn(rng, M, P)), + @test_gradients(fn, aType(randn(rng, M, P)), aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - test_gradients(fn, adjoint(aType(randn(rng, P, M))), + @test_gradients(fn, adjoint(aType(randn(rng, P, M))), aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - test_gradients(fn, aType(randn(rng, M, P)), + @test_gradients(fn, aType(randn(rng, M, P)), batched_adjoint(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3) end @testset "... or equivalent to a matrix" begin - test_gradients(fn, aType(randn(rng, M, P, 1)), + @test_gradients(fn, aType(randn(rng, M, P, 1)), aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - test_gradients(fn, batched_transpose(aType(randn(rng, P, M, 1))), + @test_gradients(fn, batched_transpose(aType(randn(rng, P, M, 1))), aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - test_gradients(fn, aType(randn(rng, M, P, 1)), + @test_gradients(fn, aType(randn(rng, M, P, 1)), batched_transpose(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3) end end