From 034ef471c7a28d129a3eafea8480d3207bfa2c2f Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Tue, 17 Sep 2024 23:53:58 -0400
Subject: [PATCH] fix: update to use test_gradients macro

---
 test/Project.toml                        |  2 +-
 test/common_ops/activation_tests.jl      |  6 +++---
 test/common_ops/bias_act_tests.jl        |  6 +++---
 test/common_ops/conv_tests.jl            |  2 +-
 test/common_ops/dense_tests.jl           |  2 +-
 test/common_ops/dropout_tests.jl         |  8 ++++----
 test/normalization/batchnorm_tests.jl    |  4 ++--
 test/normalization/groupnorm_tests.jl    |  2 +-
 test/normalization/instancenorm_tests.jl |  4 ++--
 test/normalization/layernorm_tests.jl    |  4 ++--
 test/others/bmm_tests.jl                 | 24 ++++++++++++------------
 11 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index 79a435ea..51b229fc 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -44,7 +44,7 @@ ForwardDiff = "0.10.36"
 Hwloc = "3.2"
 InteractiveUtils = "<0.0.1, 1"
 JLArrays = "0.1.5"
-LuxTestUtils = "1.1.2"
+LuxTestUtils = "1.2"
 MKL = "0.7"
 MLDataDevices = "1.0.0"
 NNlib = "0.9.21"
diff --git a/test/common_ops/activation_tests.jl b/test/common_ops/activation_tests.jl
index ca78ae41..a5c3e2f8 100644
--- a/test/common_ops/activation_tests.jl
+++ b/test/common_ops/activation_tests.jl
@@ -39,9 +39,9 @@
             end
             @test @inferred(Zygote.gradient(apply_act_fast2, f, x)) isa Any
 
-            test_gradients(Base.Fix1(apply_act, f), x; atol, rtol)
-            test_gradients(Base.Fix1(apply_act_fast, f), x; atol, rtol)
-            test_gradients(Base.Fix1(apply_act_fast2, f), x; atol, rtol)
+            @test_gradients(Base.Fix1(apply_act, f), x; atol, rtol)
+            @test_gradients(Base.Fix1(apply_act_fast, f), x; atol, rtol)
+            @test_gradients(Base.Fix1(apply_act_fast2, f), x; atol, rtol)
 
             ∂x1 = Zygote.gradient(apply_act, f, x)[2]
             ∂x2 = Zygote.gradient(apply_act_fast, f, x)[2]
diff --git a/test/common_ops/bias_act_tests.jl b/test/common_ops/bias_act_tests.jl
index 40d84eeb..2bdbc830 100644
--- a/test/common_ops/bias_act_tests.jl
+++ b/test/common_ops/bias_act_tests.jl
@@ -50,11 +50,11 @@
                 @test_broken @inferred(Zygote.gradient(bias_act_loss3, act, x, b)) isa Any
             end
 
-            test_gradients(__Fix1(bias_act_loss1, act), x, b; atol, rtol,
+            @test_gradients(__Fix1(bias_act_loss1, act), x, b; atol, rtol,
                 soft_fail=fp16 ? [AutoFiniteDiff()] : [])
-            test_gradients(__Fix1(bias_act_loss2, act), x, b; atol, rtol,
+            @test_gradients(__Fix1(bias_act_loss2, act), x, b; atol, rtol,
                 soft_fail=fp16 ? [AutoFiniteDiff()] : [])
-            test_gradients(__Fix1(bias_act_loss3, act), x, b; atol, rtol,
+            @test_gradients(__Fix1(bias_act_loss3, act), x, b; atol, rtol,
                 soft_fail=fp16 ? [AutoFiniteDiff()] : [])
 
             ∂x1, ∂b1 = Zygote.gradient(__Fix1(bias_act_loss1, act), x, b)
diff --git a/test/common_ops/conv_tests.jl b/test/common_ops/conv_tests.jl
index ea498dae..5c208cd4 100644
--- a/test/common_ops/conv_tests.jl
+++ b/test/common_ops/conv_tests.jl
@@ -68,7 +68,7 @@ function run_conv_testing(gen_f::Function, activation, kernel, stride, padding,
     mp && push!(skip_backends, AutoReverseDiff())
     ((mp && ongpu) || (mode == "amdgpu" && (Tx == Float64 || Tw == Float64))) &&
         push!(skip_backends, AutoTracker())
-    test_gradients(__f_grad, weight, x, bias; atol, rtol, skip_backends, soft_fail=fp16)
+    @test_gradients(__f_grad, weight, x, bias; atol, rtol, skip_backends, soft_fail=fp16)
 end
 
 anonact = x -> gelu(x)
diff --git a/test/common_ops/dense_tests.jl b/test/common_ops/dense_tests.jl
index 92af93ba..a14906b6 100644
--- a/test/common_ops/dense_tests.jl
+++ b/test/common_ops/dense_tests.jl
@@ -46,7 +46,7 @@ function run_dense_testing(Tw, Tx, M, N, hasbias, activation, aType, mode, ongpu
     __f_grad = let activation = activation
         (w, x, b) -> __f(activation, w, x, b)
     end
-    test_gradients(__f_grad, w, x, bias; atol, rtol, skip_backends, soft_fail=fp16)
+    @test_gradients(__f_grad, w, x, bias; atol, rtol, skip_backends, soft_fail=fp16)
 
     y_simple = dense_simple(activation, w, x, bias)
     y_zyg = fused_dense_bias_activation(activation, w, x, bias)
diff --git a/test/common_ops/dropout_tests.jl b/test/common_ops/dropout_tests.jl
index 5d3baa28..8d409fee 100644
--- a/test/common_ops/dropout_tests.jl
+++ b/test/common_ops/dropout_tests.jl
@@ -27,7 +27,7 @@
             __f = let rng = rng, T = T
                 x -> sum(first(dropout(rng, x, T(0.5), Val(true), T(2), dims)))
             end
-            test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3,
+            @test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3,
                 soft_fail=(T == Float16 ? [AutoFiniteDiff()] : []),
                 broken_backends=(T == Float16 && Sys.iswindows() ? [AutoEnzyme()] : []))
 
@@ -74,7 +74,7 @@ end
             __f = let rng = rng, mask = mask, p = T(0.5), invp = T(2)
                 x -> sum(first(dropout(rng, x, mask, p, Val(true), Val(true), invp, :)))
             end
-            test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3,
+            @test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3,
                 soft_fail=(T == Float16 ? [AutoFiniteDiff()] : []))
 
             @jet sum(first(dropout(
@@ -105,7 +105,7 @@ end
             soft_fail = T == Float16 ? Any[AutoFiniteDiff()] : []
             skip_backends = length(x_shape) == 5 ? [AutoEnzyme()] : []
 
-            test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3, soft_fail, skip_backends)
+            @test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3, soft_fail, skip_backends)
 
             @jet sum(first(dropout(
                 rng, x, mask, T(0.5), Val(true), Val(false), T(2), :)))
@@ -154,7 +154,7 @@ end
             __f = let rng = rng
                 x -> sum(first(alpha_dropout(rng, x, T(0.5), Val(true))))
             end
-            test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3,
+            @test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3,
                 soft_fail=(T == Float16 ? [AutoFiniteDiff()] : []),
                 broken_backends=(T == Float16 && Sys.iswindows() ? [AutoEnzyme()] : []))
 
diff --git a/test/normalization/batchnorm_tests.jl b/test/normalization/batchnorm_tests.jl
index 553cc8c0..38d14a2d 100644
--- a/test/normalization/batchnorm_tests.jl
+++ b/test/normalization/batchnorm_tests.jl
@@ -98,7 +98,7 @@ function run_batchnorm_testing(
 
         __f = (args...) -> sum(first(batchnorm(
             args..., rm, rv, training, act, T(0.9), epsilon)))
-        test_gradients(
+        @test_gradients(
             __f, x, scale, bias; atol, rtol, skip_backends, soft_fail, broken_backends)
     end
 
@@ -183,6 +183,6 @@ end
 
         __f = (args...) -> sum(first(batchnorm(
             args..., running_mean, running_var, Val(true), identity, 0.9f0, 1.0f-5)))
-        test_gradients(__f, x, scale, bias; atol=1.0f-3, rtol=1.0f-3)
+        @test_gradients(__f, x, scale, bias; atol=1.0f-3, rtol=1.0f-3)
     end
 end
diff --git a/test/normalization/groupnorm_tests.jl b/test/normalization/groupnorm_tests.jl
index 6a512148..3d5e821a 100644
--- a/test/normalization/groupnorm_tests.jl
+++ b/test/normalization/groupnorm_tests.jl
@@ -74,7 +74,7 @@ function run_groupnorm_testing(T, sz, groups, affine, act, aType, mode, ongpu)
 
     if affine
         __f = (args...) -> sum(groupnorm(args..., groups, act, epsilon))
-        test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
+        @test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
     end
 end
 
diff --git a/test/normalization/instancenorm_tests.jl b/test/normalization/instancenorm_tests.jl
index 9091a436..a48a502d 100644
--- a/test/normalization/instancenorm_tests.jl
+++ b/test/normalization/instancenorm_tests.jl
@@ -39,7 +39,7 @@ function run_instancenorm_testing(gen_f, T, sz, training, act, aType, mode, ongp
     if is_training(training)
         __f = (args...) -> sum(first(instancenorm(args..., training, act, epsilon)))
         soft_fail = fp16 ? fp16 : [AutoFiniteDiff()]
-        test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
+        @test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
     end
 
     # Now test with running stats
@@ -67,7 +67,7 @@ function run_instancenorm_testing(gen_f, T, sz, training, act, aType, mode, ongp
             args..., rm, rv, training, act, T(0.1), epsilon)))
         soft_fail = fp16 ? fp16 : [AutoFiniteDiff()]
         skip_backends = (Sys.iswindows() && fp16) ? [AutoEnzyme()] : []
-        test_gradients(__f, x, scale, bias; atol, rtol, soft_fail, skip_backends)
+        @test_gradients(__f, x, scale, bias; atol, rtol, soft_fail, skip_backends)
     end
 end
 
diff --git a/test/normalization/layernorm_tests.jl b/test/normalization/layernorm_tests.jl
index 63386f4a..bdfccb47 100644
--- a/test/normalization/layernorm_tests.jl
+++ b/test/normalization/layernorm_tests.jl
@@ -58,10 +58,10 @@ function run_layernorm_testing_core(
     soft_fail = fp16 ? fp16 : [AutoFiniteDiff()]
     if affine_shape !== nothing
         __f = (args...) -> sum(_f(args...))
-        test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
+        @test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
     else
         __f = x -> sum(_f(x, scale, bias))
-        test_gradients(__f, x; atol, rtol, soft_fail)
+        @test_gradients(__f, x; atol, rtol, soft_fail)
     end
 
     if anonact !== act
diff --git a/test/others/bmm_tests.jl b/test/others/bmm_tests.jl
index df51df15..ea847568 100644
--- a/test/others/bmm_tests.jl
+++ b/test/others/bmm_tests.jl
@@ -264,36 +264,36 @@ end
         B = 3
 
         @testset "Two 3-arrays" begin
-            test_gradients(fn, aType(randn(rng, M, P, B)),
+            @test_gradients(fn, aType(randn(rng, M, P, B)),
                 aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
-            test_gradients(fn, batched_adjoint(aType(randn(rng, P, M, B))),
+            @test_gradients(fn, batched_adjoint(aType(randn(rng, P, M, B))),
                 aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
-            test_gradients(fn, aType(randn(rng, M, P, B)),
+            @test_gradients(fn, aType(randn(rng, M, P, B)),
                 batched_transpose(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3)
         end
 
         @testset "One a matrix..." begin
-            test_gradients(fn, aType(randn(rng, M, P)),
+            @test_gradients(fn, aType(randn(rng, M, P)),
                 aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
-            test_gradients(fn, adjoint(aType(randn(rng, P, M))),
+            @test_gradients(fn, adjoint(aType(randn(rng, P, M))),
                 aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
-            test_gradients(fn, aType(randn(rng, M, P)),
+            @test_gradients(fn, aType(randn(rng, M, P)),
                 batched_adjoint(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3)
 
-            test_gradients(fn, aType(randn(rng, M, P)),
+            @test_gradients(fn, aType(randn(rng, M, P)),
                 aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
-            test_gradients(fn, adjoint(aType(randn(rng, P, M))),
+            @test_gradients(fn, adjoint(aType(randn(rng, P, M))),
                 aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
-            test_gradients(fn, aType(randn(rng, M, P)),
+            @test_gradients(fn, aType(randn(rng, M, P)),
                 batched_adjoint(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3)
         end
 
         @testset "... or equivalent to a matrix" begin
-            test_gradients(fn, aType(randn(rng, M, P, 1)),
+            @test_gradients(fn, aType(randn(rng, M, P, 1)),
                 aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
-            test_gradients(fn, batched_transpose(aType(randn(rng, P, M, 1))),
+            @test_gradients(fn, batched_transpose(aType(randn(rng, P, M, 1))),
                 aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
-            test_gradients(fn, aType(randn(rng, M, P, 1)),
+            @test_gradients(fn, aType(randn(rng, M, P, 1)),
                 batched_transpose(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3)
         end
     end