diff --git a/benchmarks/benchmarks_suite.jl b/benchmarks/benchmarks_suite.jl
index 04e6a65cb..ef00117ee 100644
--- a/benchmarks/benchmarks_suite.jl
+++ b/benchmarks/benchmarks_suite.jl
@@ -63,7 +63,7 @@ n_adapts = 2_000
 
 BenchmarkSuite["mnormal"]["hmc"] = @benchmarkable sample($(target(dim)), $(HMC(0.1, 5)), $n_samples)
 
-## MvNormal: ForwardDiff vs BackwardDiff (Tracker)
+## MvNormal: ForwardDiff vs ReverseDiff
 
 @model function mdemo(d, N)
     Θ = Vector(undef, N)
@@ -77,10 +77,8 @@ A    = rand(Wishart(dim2, Matrix{Float64}(I, dim2, dim2)));
 d    = MvNormal(zeros(dim2), A)
 
 # ForwardDiff
-Turing.setadbackend(:forwarddiff)
-BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5)), 5000)
+BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoForwardDiff(; chunksize=0))), 5000)
 
 
-# BackwardDiff
-Turing.setadbackend(:reversediff)
-BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5)), 5000)
+# ReverseDiff
+BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoReverseDiff(false))), 5000)
diff --git a/ext/TuringDynamicHMCExt.jl b/ext/TuringDynamicHMCExt.jl
index baa50f04f..13686b394 100644
--- a/ext/TuringDynamicHMCExt.jl
+++ b/ext/TuringDynamicHMCExt.jl
@@ -34,7 +34,7 @@ end
 function DynamicNUTS(
     spl::DynamicHMC.NUTS = DynamicHMC.NUTS(),
     space::Tuple = ();
-    adtype::ADTypes.AbstractADType = Turing.ADBackend()
+    adtype::ADTypes.AbstractADType = ADTypes.AutoForwardDiff(; chunksize=0)
 )
     return DynamicNUTS{typeof(adtype),space,typeof(spl)}(spl, adtype)
 end
diff --git a/ext/TuringOptimExt.jl b/ext/TuringOptimExt.jl
index eb594929d..14fbf106e 100644
--- a/ext/TuringOptimExt.jl
+++ b/ext/TuringOptimExt.jl
@@ -179,7 +179,6 @@ map_est = optimize(model, MAP())
 map_est = optimize(model, MAP(), NelderMead())
 ```
 """
-
 function Optim.optimize(model::DynamicPPL.Model, ::Turing.MAP, options::Optim.Options=Optim.Options(); kwargs...)
     ctx = Turing.OptimizationContext(DynamicPPL.DefaultContext())
     f = Turing.OptimLogDensity(model, ctx)
diff --git a/src/Turing.jl b/src/Turing.jl
index 5dbf3d4fa..b91f0608d 100644
--- a/src/Turing.jl
+++ b/src/Turing.jl
@@ -98,10 +98,6 @@ export  @model,                 # modelling
         @prob_str,
         externalsampler,
 
-        setchunksize,           # helper
-        setadbackend,
-        setadsafe,
-
         setprogress!,           # debugging
 
         Flat,
diff --git a/src/essential/Essential.jl b/src/essential/Essential.jl
index ed3f972ed..e92e541c5 100644
--- a/src/essential/Essential.jl
+++ b/src/essential/Essential.jl
@@ -38,20 +38,11 @@ export  @model,
         effectiveSampleSize,
         sweep!,
         ResampleWithESSThreshold,
-        ADBackend,
-        setadbackend,
-        setadsafe,
         AutoForwardDiff,
         AutoTracker,
         AutoZygote,
         AutoReverseDiff,
         value,
-        CHUNKSIZE,
-        ADBACKEND,
-        setchunksize,
-        setrdcache,
-        getrdcache,
-        verifygrad,
         @logprob_str,
         @prob_str
 
diff --git a/src/essential/ad.jl b/src/essential/ad.jl
index 01cdb1657..c873e3a03 100644
--- a/src/essential/ad.jl
+++ b/src/essential/ad.jl
@@ -1,75 +1,19 @@
-##############################
-# Global variables/constants #
-##############################
-const ADBACKEND = Ref(:forwarddiff)
-setadbackend(backend_sym::Symbol) = setadbackend(Val(backend_sym))
-function setadbackend(backend::Val)
-    _setadbackend(backend)
-    AdvancedVI.setadbackend(backend)
-end
-
-function _setadbackend(::Val{:forwarddiff})
-    ADBACKEND[] = :forwarddiff
-end
-function _setadbackend(::Val{:tracker})
-    @warn "Usage of Tracker.jl with Turing.jl is no longer being actively tested and maintained; please use at your own risk. See Zygote.jl or ReverseDiff.jl for fully supported reverse-mode backends."
-    ADBACKEND[] = :tracker
-end
-function _setadbackend(::Val{:zygote})
-    ADBACKEND[] = :zygote
-end
-function _setadbackend(::Val{:reversediff})
-    ADBACKEND[] = :reversediff
-end
-
-const ADSAFE = Ref(false)
-function setadsafe(switch::Bool)
-    @info("[Turing]: global ADSAFE is set as $switch")
-    ADSAFE[] = switch
-end
-
-const CHUNKSIZE = Ref(0) # 0 means letting ForwardDiff set it automatically
-
-function setchunksize(chunk_size::Int)
-    @info("[Turing]: AD chunk size is set as $chunk_size")
-    CHUNKSIZE[] = chunk_size
-    AdvancedVI.setchunksize(chunk_size)
-end
-
 getchunksize(::AutoForwardDiff{chunk}) where {chunk} = chunk
 
 standardtag(::AutoForwardDiff{<:Any,Nothing}) = true
 standardtag(::AutoForwardDiff) = false
 
-const RDCache = Ref(false)
-
-setrdcache(b::Bool) = setrdcache(Val(b))
-setrdcache(::Val{false}) = RDCache[] = false
-setrdcache(::Val{true}) = RDCache[] = true
-
-getrdcache() = RDCache[]
-
-ADBackend() = ADBackend(ADBACKEND[])
-ADBackend(T::Symbol) = ADBackend(Val(T))
-
-ADBackend(::Val{:forwarddiff}) = AutoForwardDiff(; chunksize=CHUNKSIZE[])
-ADBackend(::Val{:tracker}) = AutoTracker()
-ADBackend(::Val{:zygote}) = AutoZygote()
-ADBackend(::Val{:reversediff}) = AutoReverseDiff(; compile=getrdcache())
-
-ADBackend(::Val) = error("The requested AD backend is not available. Make sure to load all required packages.")
-
 """
     getADbackend(alg)
 
 Find the autodifferentiation backend of the algorithm `alg`.
 """
 getADbackend(spl::Sampler) = getADbackend(spl.alg)
-getADbackend(::SampleFromPrior) = ADBackend()
+getADbackend(::SampleFromPrior) = AutoForwardDiff(; chunksize=0) # TODO: remove `getADbackend`
 getADbackend(ctx::DynamicPPL.SamplingContext) = getADbackend(ctx.sampler)
 getADbackend(ctx::DynamicPPL.AbstractContext) = getADbackend(DynamicPPL.NodeTrait(ctx), ctx)
 
-getADbackend(::DynamicPPL.IsLeaf, ctx::DynamicPPL.AbstractContext) = ADBackend()
+getADbackend(::DynamicPPL.IsLeaf, ctx::DynamicPPL.AbstractContext) = AutoForwardDiff(; chunksize=0)
 getADbackend(::DynamicPPL.IsParent, ctx::DynamicPPL.AbstractContext) = getADbackend(DynamicPPL.childcontext(ctx))
 
 function LogDensityProblemsAD.ADgradient(ℓ::Turing.LogDensityFunction)
diff --git a/src/mcmc/hmc.jl b/src/mcmc/hmc.jl
index d5ace1ddc..d9a47f5c5 100644
--- a/src/mcmc/hmc.jl
+++ b/src/mcmc/hmc.jl
@@ -32,7 +32,7 @@ end
 ###
 
 """
-    HMC(ϵ::Float64, n_leapfrog::Int; adtype::ADTypes.AbstractADType = Turing.ADBackend())
+    HMC(ϵ::Float64, n_leapfrog::Int; adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0))
 
 Hamiltonian Monte Carlo sampler with static trajectory.
 
@@ -41,7 +41,7 @@ Hamiltonian Monte Carlo sampler with static trajectory.
 - `ϵ`: The leapfrog step size to use.
 - `n_leapfrog`: The number of leapfrog steps to use.
 - `adtype`: The automatic differentiation (AD) backend.
-  If it is not provided, the currently activated AD backend in Turing is used.
+    If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined.
 
 # Usage
 
@@ -67,15 +67,15 @@ struct HMC{AD, space, metricT <: AHMC.AbstractMetric} <: StaticHamiltonian
     adtype::AD
 end
 
-function HMC(ϵ::Float64, n_leapfrog::Int, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType = ADBackend()) where {metricT <: AHMC.AbstractMetric}
-    return HMC{typeof(adtype), space, metricT}(ϵ, n_leapfrog, adtype)
+function HMC(ϵ::Float64, n_leapfrog::Int, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)) where {metricT<:AHMC.AbstractMetric}
+    return HMC{typeof(adtype),space,metricT}(ϵ, n_leapfrog, adtype)
 end
 function HMC(
     ϵ::Float64,
     n_leapfrog::Int,
     space::Symbol...;
     metricT=AHMC.UnitEuclideanMetric,
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 )
     return HMC(ϵ, n_leapfrog, metricT, space; adtype = adtype)
 end
@@ -280,7 +280,7 @@ end
 """
     HMCDA(
         n_adapts::Int, δ::Float64, λ::Float64; ϵ::Float64 = 0.0;
-        adtype::ADTypes.AbstractADType = Turing.ADBackend(),
+        adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
     )
 
 Hamiltonian Monte Carlo sampler with Dual Averaging algorithm.
@@ -298,7 +298,7 @@ HMCDA(200, 0.65, 0.3)
 - `λ`: Target leapfrog length.
 - `ϵ`: Initial step size; 0 means automatically search by Turing.
 - `adtype`: The automatic differentiation (AD) backend.
-  If it is not provided, the currently activated AD backend in Turing is used.
+    If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined.
 
 # Reference
 
@@ -316,8 +316,8 @@ struct HMCDA{AD, space, metricT <: AHMC.AbstractMetric} <: AdaptiveHamiltonian
     adtype::AD
 end
 
-function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ϵ::Float64, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType = ADBackend()) where {metricT <: AHMC.AbstractMetric}
-    return HMCDA{typeof(adtype), space, metricT}(n_adapts, δ, λ, ϵ, adtype)
+function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ϵ::Float64, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)) where {metricT<:AHMC.AbstractMetric}
+    return HMCDA{typeof(adtype),space,metricT}(n_adapts, δ, λ, ϵ, adtype)
 end
 
 function HMCDA(
@@ -325,7 +325,7 @@ function HMCDA(
     λ::Float64;
     init_ϵ::Float64=0.0,
     metricT=AHMC.UnitEuclideanMetric,
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 )
     return HMCDA(-1, δ, λ, init_ϵ, metricT, (); adtype = adtype)
 end
@@ -347,14 +347,14 @@ function HMCDA(
     space::Symbol...;
     init_ϵ::Float64=0.0,
     metricT=AHMC.UnitEuclideanMetric,
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 )
     return HMCDA(n_adapts, δ, λ, init_ϵ, metricT, space; adtype = adtype)
 end
 
 
 """
-    NUTS(n_adapts::Int, δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0)
+    NUTS(n_adapts::Int, δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)
 
 No-U-Turn Sampler (NUTS) sampler.
 
@@ -372,6 +372,8 @@ Arguments:
 - `max_depth::Int` : Maximum doubling tree depth.
 - `Δ_max::Float64` : Maximum divergence during doubling tree.
 - `init_ϵ::Float64` : Initial step size; 0 means automatically searching using a heuristic procedure.
+- `adtype::ADTypes.AbstractADType` : The automatic differentiation (AD) backend.
+    If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined.
 
 """
 struct NUTS{AD,space,metricT<:AHMC.AbstractMetric} <: AdaptiveHamiltonian
@@ -391,9 +393,9 @@ function NUTS(
     ϵ::Float64,
     ::Type{metricT},
     space::Tuple;
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 ) where {metricT}
-    return NUTS{typeof(adtype), space, metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype)
+    return NUTS{typeof(adtype),space,metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype)
 end
 
 function NUTS(
@@ -413,9 +415,9 @@ function NUTS(
     Δ_max::Float64=1000.0,
     init_ϵ::Float64=0.0,
     metricT=AHMC.DiagEuclideanMetric,
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 )
-    NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype = adtype)
+    NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype=adtype)
 end
 
 function NUTS(
@@ -424,9 +426,9 @@ function NUTS(
     Δ_max::Float64=1000.0,
     init_ϵ::Float64=0.0,
     metricT=AHMC.DiagEuclideanMetric,
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 )
-    NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype = adtype)
+    NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype=adtype)
 end
 
 function NUTS(; kwargs...)
diff --git a/src/mcmc/sghmc.jl b/src/mcmc/sghmc.jl
index eda3a5fa4..aa89e5192 100644
--- a/src/mcmc/sghmc.jl
+++ b/src/mcmc/sghmc.jl
@@ -23,13 +23,13 @@ end
         space::Symbol...;
         learning_rate::Real,
         momentum_decay::Real,
-        adtype::ADTypes.AbstractADType = Turing.ADBackend(),
+        adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
     )
 
 Create a Stochastic Gradient Hamiltonian Monte Carlo (SGHMC) sampler.
 
-If the automatic differentiation (AD) backend `adtype` is not provided, the currently activated
-AD backend in Turing is used.
+If the automatic differentiation (AD) backend `adtype` is not provided, ForwardDiff
+with automatically determined `chunksize` is used.
 
 # Reference
 
@@ -41,7 +41,7 @@ function SGHMC(
     space::Symbol...;
     learning_rate::Real,
     momentum_decay::Real,
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 )
     _learning_rate, _momentum_decay = promote(learning_rate, momentum_decay)
     return SGHMC{typeof(adtype),space,typeof(_learning_rate)}(_learning_rate, _momentum_decay, adtype)
@@ -163,15 +163,15 @@ end
     SGLD(
         space::Symbol...;
         stepsize = PolynomialStepsize(0.01),
-        adtype::ADTypes.AbstractADType = Turing.ADBackend(),
+        adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
     )
 
 Stochastic gradient Langevin dynamics (SGLD) sampler.
 
 By default, a polynomially decaying stepsize is used.
 
-If the automatic differentiation (AD) backend `adtype` is not provided, the currently activated
-AD backend in Turing is used.
+If the automatic differentiation (AD) backend `adtype` is not provided, ForwardDiff
+with automatically determined `chunksize` is used.
 
 # Reference
 
@@ -184,7 +184,7 @@ See also: [`PolynomialStepsize`](@ref)
 function SGLD(
     space::Symbol...;
     stepsize = PolynomialStepsize(0.01),
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
 )
     return SGLD{typeof(adtype),space,typeof(stepsize)}(stepsize, adtype)
 end
diff --git a/test/essential/ad.jl b/test/essential/ad.jl
index 351d7fe41..f245a87b1 100644
--- a/test/essential/ad.jl
+++ b/test/essential/ad.jl
@@ -84,31 +84,24 @@
         @model function dir()
             theta ~ Dirichlet(1 ./ fill(4, 4))
         end
-        Turing.setadbackend(:zygote)
-        sample(dir(), HMC(0.01, 1), 1000)
-        Turing.setadbackend(:reversediff)
-        Turing.setrdcache(false)
-        sample(dir(), HMC(0.01, 1), 1000)
-        Turing.setrdcache(true)
-        sample(dir(), HMC(0.01, 1), 1000)
-        Turing.setrdcache(false)
+        sample(dir(), HMC(0.01, 1; adtype=AutoZygote()), 1000)
+        sample(dir(), HMC(0.01, 1; adtype=AutoReverseDiff(false)), 1000)
+        sample(dir(), HMC(0.01, 1; adtype=AutoReverseDiff(true)), 1000)
     end
     @testset "PDMatDistribution AD" begin
         @model function wishart()
             theta ~ Wishart(4, Matrix{Float64}(I, 4, 4))
         end
-        Turing.setadbackend(:reversediff)
-        sample(wishart(), HMC(0.01, 1), 1000)
-        Turing.setadbackend(:zygote)
-        sample(wishart(), HMC(0.01, 1), 1000)
+
+        sample(wishart(), HMC(0.01, 1; adtype=AutoReverseDiff(false)), 1000)
+        sample(wishart(), HMC(0.01, 1; adtype=AutoZygote()), 1000)
 
         @model function invwishart()
             theta ~ InverseWishart(4, Matrix{Float64}(I, 4, 4))
         end
-        Turing.setadbackend(:reversediff)
-        sample(invwishart(), HMC(0.01, 1), 1000)
-        Turing.setadbackend(:zygote)
-        sample(invwishart(), HMC(0.01, 1), 1000)
+
+        sample(invwishart(), HMC(0.01, 1; adtype=AutoReverseDiff(false)), 1000)
+        sample(invwishart(), HMC(0.01, 1; adtype=AutoZygote()), 1000)
     end
     @testset "Hessian test" begin
         @model function tst(x, ::Type{TV}=Vector{Float64}) where {TV}
@@ -156,8 +149,6 @@
     end
 
     @testset "memoization: issue #1393" begin
-        Turing.setadbackend(:reversediff)
-        Turing.setrdcache(true)
 
         @model function demo(data)
             sigma ~ Uniform(0.0, 20.0)
@@ -168,27 +159,13 @@
         for i in 1:5
             d = Normal(0.0, i)
             data = rand(d, N)
-            chn = sample(demo(data), NUTS(0.65), 1000)
+            chn = sample(demo(data), NUTS(0.65; adtype=AutoReverseDiff(true)), 1000)
             @test mean(Array(chn[:sigma])) ≈ std(data) atol = 0.5
         end
 
-        Turing.setrdcache(false)
-    end
-
-    @testset "chunksize" begin
-        # Default value is 0 (automatic choice by ForwardDiff)
-        @test Turing.CHUNKSIZE[] == 0
-
-        setchunksize(8)
-        @test Turing.CHUNKSIZE[] == 8
-        @test Turing.AdvancedVI.CHUNKSIZE[] == 8
-        setchunksize(0)
-        @test Turing.CHUNKSIZE[] == 0
-        @test Turing.AdvancedVI.CHUNKSIZE[] == 0
     end
 
     @testset "tag" begin
-        @test Turing.ADBackend(Val(:forwarddiff)) === Turing.AutoForwardDiff(; chunksize=Turing.CHUNKSIZE[])
         for chunksize in (0, 1, 10)
             ad = Turing.AutoForwardDiff(; chunksize=chunksize)
             @test ad === Turing.AutoForwardDiff(; chunksize=chunksize)
diff --git a/test/mcmc/Inference.jl b/test/mcmc/Inference.jl
index 9b10f9470..1f5a14869 100644
--- a/test/mcmc/Inference.jl
+++ b/test/mcmc/Inference.jl
@@ -1,4 +1,4 @@
-@testset "inference.jl" begin
+@testset "Testing inference.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false))
     # Only test threading if 1.3+.
     if VERSION > v"1.2"
         @testset "threaded sampling" begin
@@ -10,19 +10,19 @@
                 # https://github.com/TuringLang/Turing.jl/issues/1571
                 samplers = @static if VERSION <= v"1.5.3" || VERSION >= v"1.6.0"
                     (
-                        HMC(0.1, 7),
+                        HMC(0.1, 7; adtype=adbackend),
                         PG(10),
                         IS(),
                         MH(),
-                        Gibbs(PG(3, :s), HMC(0.4, 8, :m)),
-                        Gibbs(HMC(0.1, 5, :s), ESS(:m)),
+                        Gibbs(PG(3, :s), HMC(0.4, 8, :m; adtype=adbackend)),
+                        Gibbs(HMC(0.1, 5, :s; adtype=adbackend), ESS(:m)),
                     )
                 else
                     (
-                        HMC(0.1, 7),
+                        HMC(0.1, 7; adtype=adbackend),
                         IS(),
                         MH(),
-                        Gibbs(HMC(0.1, 5, :s), ESS(:m)),
+                        Gibbs(HMC(0.1, 5, :s; adtype=adbackend), ESS(:m)),
                     )
                 end
                 for sampler in samplers
@@ -51,12 +51,12 @@
 
             # Smoke test for default sample call.
             Random.seed!(100)
-            chain = sample(gdemo_default, HMC(0.1, 7), MCMCThreads(), 1000, 4)
+            chain = sample(gdemo_default, HMC(0.1, 7; adtype=adbackend), MCMCThreads(), 1000, 4)
             check_gdemo(chain)
 
             # run sampler: progress logging should be disabled and
             # it should return a Chains object
-            sampler = Sampler(HMC(0.1, 7), gdemo_default)
+            sampler = Sampler(HMC(0.1, 7; adtype=adbackend), gdemo_default)
             chains = sample(gdemo_default, sampler, MCMCThreads(), 1000, 4)
             @test chains isa MCMCChains.Chains
         end
@@ -64,9 +64,9 @@
     @testset "chain save/resume" begin
         Random.seed!(1234)
 
-        alg1 = HMCDA(1000, 0.65, 0.15)
+        alg1 = HMCDA(1000, 0.65, 0.15; adtype=adbackend)
         alg2 = PG(20)
-        alg3 = Gibbs(PG(30, :s), HMC(0.2, 4, :m))
+        alg3 = Gibbs(PG(30, :s), HMC(0.2, 4, :m; adtype=adbackend))
 
         chn1 = sample(gdemo_default, alg1, 5000; save_state=true)
         check_gdemo(chn1)
@@ -200,7 +200,7 @@
 
         smc = SMC()
         pg = PG(10)
-        gibbs = Gibbs(HMC(0.2, 3, :p), PG(10, :x))
+        gibbs = Gibbs(HMC(0.2, 3, :p; adtype=adbackend), PG(10, :x))
 
         chn_s = sample(testbb(obs), smc, 1000)
         chn_p = sample(testbb(obs), pg, 2000)
@@ -227,7 +227,7 @@
             return s, m
         end
 
-        gibbs = Gibbs(PG(10, :s), HMC(0.4, 8, :m))
+        gibbs = Gibbs(PG(10, :s), HMC(0.4, 8, :m; adtype=adbackend))
         chain = sample(fggibbstest(xs), gibbs, 2)
     end
     @testset "new grammar" begin
@@ -303,7 +303,7 @@
             end
         end
 
-        chain = sample(noreturn([1.5 2.0]), HMC(0.1, 10), 4000)
+        chain = sample(noreturn([1.5 2.0]), HMC(0.1, 10; adtype=adbackend), 4000)
         check_numerical(chain, [:s, :m], [49 / 24, 7 / 6])
     end
     @testset "observe" begin
@@ -333,87 +333,85 @@
         @test all(isone, res_pg[:x])
     end
     @testset "sample" begin
-        alg = Gibbs(HMC(0.2, 3, :m), PG(10, :s))
+        alg = Gibbs(HMC(0.2, 3, :m; adtype=adbackend), PG(10, :s))
         chn = sample(gdemo_default, alg, 1000)
     end
     @testset "vectorization @." begin
         # https://github.com/FluxML/Tracker.jl/issues/119
-        if !(Turing.ADBackend() isa Turing.AutoTracker)
-            @model function vdemo1(x)
-                s ~ InverseGamma(2, 3)
-                m ~ Normal(0, sqrt(s))
-                @. x ~ Normal(m, sqrt(s))
-                return s, m
-            end
+        @model function vdemo1(x)
+            s ~ InverseGamma(2, 3)
+            m ~ Normal(0, sqrt(s))
+            @. x ~ Normal(m, sqrt(s))
+            return s, m
+        end
 
-            alg = HMC(0.01, 5)
-            x = randn(100)
-            res = sample(vdemo1(x), alg, 250)
+        alg = HMC(0.01, 5; adtype=adbackend)
+        x = randn(100)
+        res = sample(vdemo1(x), alg, 250)
 
-            @model function vdemo1b(x)
-                s ~ InverseGamma(2, 3)
-                m ~ Normal(0, sqrt(s))
-                @. x ~ Normal(m, $(sqrt(s)))
-                return s, m
-            end
+        @model function vdemo1b(x)
+            s ~ InverseGamma(2, 3)
+            m ~ Normal(0, sqrt(s))
+            @. x ~ Normal(m, $(sqrt(s)))
+            return s, m
+        end
 
-            res = sample(vdemo1b(x), alg, 250)
+        res = sample(vdemo1b(x), alg, 250)
 
-            @model function vdemo2(x)
-                μ ~ MvNormal(zeros(size(x, 1)), I)
-                @. x ~ $(MvNormal(μ, I))
-            end
+        @model function vdemo2(x)
+            μ ~ MvNormal(zeros(size(x, 1)), I)
+            @. x ~ $(MvNormal(μ, I))
+        end
 
-            D = 2
-            alg = HMC(0.01, 5)
-            res = sample(vdemo2(randn(D, 100)), alg, 250)
+        D = 2
+        alg = HMC(0.01, 5; adtype=adbackend)
+        res = sample(vdemo2(randn(D, 100)), alg, 250)
 
-            # Vector assumptions
-            N = 10
-            alg = HMC(0.2, 4)
+        # Vector assumptions
+        N = 10
+        alg = HMC(0.2, 4; adtype=adbackend)
 
-            @model function vdemo3()
-                x = Vector{Real}(undef, N)
-                for i in 1:N
-                    x[i] ~ Normal(0, sqrt(4))
-                end
+        @model function vdemo3()
+            x = Vector{Real}(undef, N)
+            for i in 1:N
+                x[i] ~ Normal(0, sqrt(4))
             end
+        end
 
-            t_loop = @elapsed res = sample(vdemo3(), alg, 1000)
-
-            # Test for vectorize UnivariateDistribution
-            @model function vdemo4()
-                x = Vector{Real}(undef, N)
-                @. x ~ Normal(0, 2)
-            end
+        t_loop = @elapsed res = sample(vdemo3(), alg, 1000)
 
-            t_vec = @elapsed res = sample(vdemo4(), alg, 1000)
+        # Test for vectorize UnivariateDistribution
+        @model function vdemo4()
+            x = Vector{Real}(undef, N)
+            @. x ~ Normal(0, 2)
+        end
 
-            @model vdemo5() = x ~ MvNormal(zeros(N), 4 * I)
+        t_vec = @elapsed res = sample(vdemo4(), alg, 1000)
 
-            t_mv = @elapsed res = sample(vdemo5(), alg, 1000)
+        @model vdemo5() = x ~ MvNormal(zeros(N), 4 * I)
 
-            println("Time for")
-            println("  Loop : ", t_loop)
-            println("  Vec  : ", t_vec)
-            println("  Mv   : ", t_mv)
+        t_mv = @elapsed res = sample(vdemo5(), alg, 1000)
 
-            # Transformed test
-            @model function vdemo6()
-                x = Vector{Real}(undef, N)
-                @. x ~ InverseGamma(2, 3)
-            end
+        println("Time for")
+        println("  Loop : ", t_loop)
+        println("  Vec  : ", t_vec)
+        println("  Mv   : ", t_mv)
 
-            sample(vdemo6(), alg, 1000)
+        # Transformed test
+        @model function vdemo6()
+            x = Vector{Real}(undef, N)
+            @. x ~ InverseGamma(2, 3)
+        end
 
-            N = 3
-            @model function vdemo7()
-                x = Array{Real}(undef, N, N)
-                @. x ~ [InverseGamma(2, 3) for i in 1:N]
-            end
+        sample(vdemo6(), alg, 1000)
 
-            sample(vdemo7(), alg, 1000)
+        N = 3
+        @model function vdemo7()
+            x = Array{Real}(undef, N, N)
+            @. x ~ [InverseGamma(2, 3) for i in 1:N]
         end
+
+        sample(vdemo7(), alg, 1000)
     end
     @testset "vectorization .~" begin
         @model function vdemo1(x)
@@ -423,7 +421,7 @@
             return s, m
         end
 
-        alg = HMC(0.01, 5)
+        alg = HMC(0.01, 5; adtype=adbackend)
         x = randn(100)
         res = sample(vdemo1(x), alg, 250)
 
@@ -433,12 +431,12 @@
         end
 
         D = 2
-        alg = HMC(0.01, 5)
+        alg = HMC(0.01, 5; adtype=adbackend)
         res = sample(vdemo2(randn(D, 100)), alg, 250)
 
         # Vector assumptions
         N = 10
-        alg = HMC(0.2, 4)
+        alg = HMC(0.2, 4; adtype=adbackend)
 
         @model function vdemo3()
             x = Vector{Real}(undef, N)
@@ -483,7 +481,7 @@
     end
     @testset "Type parameters" begin
         N = 10
-        alg = HMC(0.01, 5)
+        alg = HMC(0.01, 5; adtype=adbackend)
         x = randn(1000)
         @model function vdemo1(::Type{T}=Float64) where {T}
             x = Vector{T}(undef, N)
diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl
index ec2821222..ef2299dca 100644
--- a/test/mcmc/gibbs.jl
+++ b/test/mcmc/gibbs.jl
@@ -1,12 +1,12 @@
-@testset "gibbs.jl" begin
+@testset "Testing gibbs.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false))
     @turing_testset "gibbs constructor" begin
         N = 500
-        s1 = Gibbs(HMC(0.1, 5, :s, :m))
+        s1 = Gibbs(HMC(0.1, 5, :s, :m; adtype=adbackend))
         s2 = Gibbs(PG(10, :s, :m))
-        s3 = Gibbs(PG(3, :s), HMC( 0.4, 8, :m))
-        s4 = Gibbs(PG(3, :s), HMC(0.4, 8, :m))
-        s5 = Gibbs(CSMC(3, :s), HMC(0.4, 8, :m))
-        s6 = Gibbs(HMC(0.1, 5, :s), ESS(:m))
+        s3 = Gibbs(PG(3, :s), HMC(0.4, 8, :m; adtype=adbackend))
+        s4 = Gibbs(PG(3, :s), HMC(0.4, 8, :m; adtype=adbackend))
+        s5 = Gibbs(CSMC(3, :s), HMC(0.4, 8, :m; adtype=adbackend))
+        s6 = Gibbs(HMC(0.1, 5, :s; adtype=adbackend), ESS(:m))
         for s in (s1, s2, s3, s4, s5, s6)
             @test DynamicPPL.alg_str(Turing.Sampler(s, gdemo_default)) == "Gibbs"
         end
@@ -32,13 +32,13 @@
     end
     @numerical_testset "gibbs inference" begin
         Random.seed!(100)
-        alg = Gibbs(CSMC(15, :s), HMC(0.2, 4, :m))
+        alg = Gibbs(CSMC(15, :s), HMC(0.2, 4, :m; adtype=adbackend))
         chain = sample(gdemo(1.5, 2.0), alg, 10_000)
         check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.15)
 
         Random.seed!(100)
 
-        alg = Gibbs(MH(:s), HMC(0.2, 4, :m))
+        alg = Gibbs(MH(:s), HMC(0.2, 4, :m; adtype=adbackend))
         chain = sample(gdemo(1.5, 2.0), alg, 10_000)
         check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.1)
 
@@ -51,14 +51,14 @@
         check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.1)
 
         Random.seed!(200)
-        gibbs = Gibbs(PG(15, :z1, :z2, :z3, :z4), HMC(0.15, 3, :mu1, :mu2))
+        gibbs = Gibbs(PG(15, :z1, :z2, :z3, :z4), HMC(0.15, 3, :mu1, :mu2; adtype=adbackend))
         chain = sample(MoGtest_default, gibbs, 10_000)
         check_MoGtest_default(chain, atol=0.15)
 
         Random.seed!(200)
         for alg in [
-            Gibbs((MH(:s), 2), (HMC(0.2, 4, :m), 1)),
-            Gibbs((MH(:s), 1), (HMC(0.2, 4, :m), 2)),
+            Gibbs((MH(:s), 2), (HMC(0.2, 4, :m; adtype=adbackend), 1)),
+            Gibbs((MH(:s), 1), (HMC(0.2, 4, :m; adtype=adbackend), 2)),
         ]
             chain = sample(gdemo(1.5, 2.0), alg, 10_000)
             check_gdemo(chain; atol=0.15)
@@ -75,7 +75,7 @@
         end
         model = gdemo_copy()
 
-        function AbstractMCMC.bundle_samples(
+        @nospecialize function AbstractMCMC.bundle_samples(
             samples::Vector,
             ::typeof(model),
             ::Turing.Sampler{<:Gibbs},
@@ -93,7 +93,7 @@
             return
         end
 
-        alg = Gibbs(MH(:s), HMC(0.2, 4, :m))
+        alg = Gibbs(MH(:s), HMC(0.2, 4, :m; adtype=adbackend))
         sample(model, alg, 100; callback = callback)
     end
     @turing_testset "dynamic model" begin
@@ -119,6 +119,6 @@
         model = imm(randn(100), 1.0);
         # https://github.com/TuringLang/Turing.jl/issues/1725
         # sample(model, Gibbs(MH(:z), HMC(0.01, 4, :m)), 100);
-        sample(model, Gibbs(PG(10, :z), HMC(0.01, 4, :m)), 100);
+        sample(model, Gibbs(PG(10, :z), HMC(0.01, 4, :m; adtype=adbackend)), 100)
     end
 end
diff --git a/test/mcmc/gibbs_conditional.jl b/test/mcmc/gibbs_conditional.jl
index 094c949aa..d7752da64 100644
--- a/test/mcmc/gibbs_conditional.jl
+++ b/test/mcmc/gibbs_conditional.jl
@@ -1,4 +1,4 @@
-@turing_testset "gibbs conditionals.jl" begin
+@turing_testset "Testing gibbs conditionals.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false))
     Random.seed!(1000); rng = StableRNG(123)
 
     @turing_testset "gdemo" begin
@@ -131,7 +131,7 @@
         # Compare three Gibbs samplers
         sampler1 = Gibbs(GibbsConditional(:z, cond_z), GibbsConditional(:μ, cond_μ))
         sampler2 = Gibbs(GibbsConditional(:z, cond_z), MH(:μ))
-        sampler3 = Gibbs(GibbsConditional(:z, cond_z), HMC(0.01, 7, :μ))
+        sampler3 = Gibbs(GibbsConditional(:z, cond_z), HMC(0.01, 7, :μ; adtype=adbackend))
         for sampler in (sampler1, sampler2, sampler3)
             chain = sample(rng, model, sampler, 10_000)
 
diff --git a/test/mcmc/hmc.jl b/test/mcmc/hmc.jl
index 52aff59e9..fe18fa773 100644
--- a/test/mcmc/hmc.jl
+++ b/test/mcmc/hmc.jl
@@ -1,4 +1,4 @@
-@testset "hmc.jl" begin
+@testset "Testing hmc.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false))
     # Set a seed
     rng = StableRNG(123)
     @numerical_testset "constrained bounded" begin
@@ -15,7 +15,7 @@
         chain = sample(
             rng,
             constrained_test(obs),
-            HMC(1.5, 3),# using a large step size (1.5)
+            HMC(1.5, 3; adtype=adbackend),# using a large step size (1.5)
             1000)
 
         check_numerical(chain, [:p], [10/14], atol=0.1)
@@ -35,13 +35,13 @@
         chain = sample(
             rng,
             constrained_simplex_test(obs12),
-            HMC(0.75, 2),
+            HMC(0.75, 2; adtype=adbackend),
             1000)
 
         check_numerical(chain, ["ps[1]", "ps[2]"], [5/16, 11/16], atol=0.015)
     end
     @numerical_testset "hmc reverse diff" begin
-        alg = HMC(0.1, 10)
+        alg = HMC(0.1, 10; adtype=adbackend)
         res = sample(rng, gdemo_default, alg, 4000)
         check_gdemo(res, rtol=0.1)
     end
@@ -53,7 +53,7 @@
         model_f = hmcmatrixsup()
         n_samples = 1_000
         vs = map(1:3) do _
-            chain = sample(rng, model_f, HMC(0.15, 7), n_samples)
+            chain = sample(rng, model_f, HMC(0.15, 7; adtype=adbackend), n_samples)
             r = reshape(Array(group(chain, :v)), n_samples, 2, 2)
             reshape(mean(r; dims = 1), 2, 2)
         end
@@ -103,14 +103,14 @@
         end
 
         # Sampling
-        chain = sample(rng, bnn(ts), HMC(0.1, 5), 10)
+        chain = sample(rng, bnn(ts), HMC(0.1, 5; adtype=adbackend), 10)
     end
 
-    @numerical_testset "hmcda inference" begin 
-        alg1 = HMCDA(500, 0.8, 0.015)
-        # alg2 = Gibbs(HMCDA(200, 0.8, 0.35, :m), HMC(0.25, 3, :s))
-        
-        # alg3 = Gibbs(HMC(0.25, 3, :m), PG(30, 3, :s))
+    @numerical_testset "hmcda inference" begin
+        alg1 = HMCDA(500, 0.8, 0.015; adtype=adbackend)
+        # alg2 = Gibbs(HMCDA(200, 0.8, 0.35, :m; adtype=adbackend), HMC(0.25, 3, :s; adtype=adbackend))
+
+        # alg3 = Gibbs(HMC(0.25, 3, :m; adtype=adbackend), PG(30, 3, :s))
         # alg3 = PG(50, 2000)
 
         res1 = sample(rng, gdemo_default, alg1, 3000)
@@ -122,27 +122,27 @@
         # @test mean(res2[:m]) ≈ 7/6 atol=0.2
     end
 
-    @numerical_testset "hmcda+gibbs inference" begin 
+    @numerical_testset "hmcda+gibbs inference" begin
         rng = StableRNG(123)
         Random.seed!(12345) # particle samplers do not support user-provided `rng` yet
-        alg3 = Gibbs(PG(20, :s), HMCDA(500, 0.8, 0.25, init_ϵ = 0.05, :m))
+        alg3 = Gibbs(PG(20, :s), HMCDA(500, 0.8, 0.25, :m; init_ϵ=0.05, adtype=adbackend))
 
         res3 = sample(rng, gdemo_default, alg3, 3000, discard_initial=1000)
         check_gdemo(res3)
     end
 
     @turing_testset "hmcda constructor" begin
-        alg = HMCDA(0.8, 0.75)
+        alg = HMCDA(0.8, 0.75; adtype=adbackend)
         println(alg)
         sampler = Sampler(alg, gdemo_default)
         @test DynamicPPL.alg_str(sampler) == "HMCDA"
 
-        alg = HMCDA(200, 0.8, 0.75)
+        alg = HMCDA(200, 0.8, 0.75; adtype=adbackend)
         println(alg)
         sampler = Sampler(alg, gdemo_default)
         @test DynamicPPL.alg_str(sampler) == "HMCDA"
 
-        alg = HMCDA(200, 0.8, 0.75, :s)
+        alg = HMCDA(200, 0.8, 0.75, :s; adtype=adbackend)
         println(alg)
         sampler = Sampler(alg, gdemo_default)
         @test DynamicPPL.alg_str(sampler) == "HMCDA"
@@ -151,36 +151,36 @@
         @test isa(sampler, Sampler{<:Turing.Hamiltonian})
     end
     @numerical_testset "nuts inference" begin
-        alg = NUTS(1000, 0.8)
+        alg = NUTS(1000, 0.8; adtype=adbackend)
         res = sample(rng, gdemo_default, alg, 6000)
         check_gdemo(res)
     end
     @turing_testset "nuts constructor" begin
-        alg = NUTS(200, 0.65)
+        alg = NUTS(200, 0.65; adtype=adbackend)
         sampler = Sampler(alg, gdemo_default)
         @test DynamicPPL.alg_str(sampler) == "NUTS"
 
-        alg = NUTS(0.65)
+        alg = NUTS(0.65; adtype=adbackend)
         sampler = Sampler(alg, gdemo_default)
         @test DynamicPPL.alg_str(sampler) == "NUTS"
 
-        alg = NUTS(200, 0.65, :m)
+        alg = NUTS(200, 0.65, :m; adtype=adbackend)
         sampler = Sampler(alg, gdemo_default)
         @test DynamicPPL.alg_str(sampler) == "NUTS"
     end
     @turing_testset "check discard" begin
-        alg = NUTS(100, 0.8)
+        alg = NUTS(100, 0.8; adtype=adbackend)
 
-        c1 = sample(rng, gdemo_default, alg, 500, discard_adapt = true)
-        c2 = sample(rng, gdemo_default, alg, 500, discard_adapt = false)
+        c1 = sample(rng, gdemo_default, alg, 500, discard_adapt=true)
+        c2 = sample(rng, gdemo_default, alg, 500, discard_adapt=false)
 
         @test size(c1, 1) == 500
         @test size(c2, 1) == 500
     end
     @turing_testset "AHMC resize" begin
-        alg1 = Gibbs(PG(10, :m), NUTS(100, 0.65, :s))
-        alg2 = Gibbs(PG(10, :m), HMC(0.1, 3, :s))
-        alg3 = Gibbs(PG(10, :m), HMCDA(100, 0.65, 0.3, :s))
+        alg1 = Gibbs(PG(10, :m), NUTS(100, 0.65, :s; adtype=adbackend))
+        alg2 = Gibbs(PG(10, :m), HMC(0.1, 3, :s; adtype=adbackend))
+        alg3 = Gibbs(PG(10, :m), HMCDA(100, 0.65, 0.3, :s; adtype=adbackend))
         @test sample(rng, gdemo_default, alg1, 300) isa Chains
         @test sample(rng, gdemo_default, alg2, 300) isa Chains
         @test sample(rng, gdemo_default, alg3, 300) isa Chains
@@ -192,25 +192,25 @@
             m = Matrix{T}(undef, 2, 3)
             m .~ MvNormal(zeros(2), I)
         end
-        @test sample(rng, mwe1(), HMC(0.2, 4), 1_000) isa Chains
+        @test sample(rng, mwe1(), HMC(0.2, 4; adtype=adbackend), 1_000) isa Chains
 
-        @model function mwe2(::Type{T} = Matrix{Float64}) where T
+        @model function mwe2(::Type{T}=Matrix{Float64}) where {T}
             m = T(undef, 2, 3)
             m .~ MvNormal(zeros(2), I)
         end
-        @test sample(rng, mwe2(), HMC(0.2, 4), 1_000) isa Chains
+        @test sample(rng, mwe2(), HMC(0.2, 4; adtype=adbackend), 1_000) isa Chains
 
         # https://github.com/TuringLang/Turing.jl/issues/1308
-        @model function mwe3(::Type{T} = Array{Float64}) where T
+        @model function mwe3(::Type{T}=Array{Float64}) where {T}
             m = T(undef, 2, 3)
             m .~ MvNormal(zeros(2), I)
         end
-        @test sample(rng, mwe3(), HMC(0.2, 4), 1_000) isa Chains
+        @test sample(rng, mwe3(), HMC(0.2, 4; adtype=adbackend), 1_000) isa Chains
     end
 
     # issue #1923
     @turing_testset "reproducibility" begin
-        alg = NUTS(1000, 0.8)
+        alg = NUTS(1000, 0.8; adtype=adbackend)
         res1 = sample(StableRNG(123), gdemo_default, alg, 1000)
         res2 = sample(StableRNG(123), gdemo_default, alg, 1000)
         res3 = sample(StableRNG(123), gdemo_default, alg, 1000)
@@ -224,7 +224,7 @@
             s ~ truncated(Normal(3, 1), lower=0)
             m ~ Normal(0, sqrt(s))
         end
-        alg = NUTS(1000, 0.8)
+        alg = NUTS(1000, 0.8; adtype=adbackend)
         gdemo_default_prior = DynamicPPL.contextualize(demo_hmc_prior(), DynamicPPL.PriorContext())
         chain = sample(gdemo_default_prior, alg, 10_000)
         check_numerical(chain, [:s, :m], [mean(truncated(Normal(3, 1); lower=0)), 0], atol=0.1)
@@ -243,7 +243,7 @@
             :warn,
             "failed to find valid initial parameters in 10 tries; consider providing explicit initial parameters using the `init_params` keyword",
         ) (:info,) match_mode=:any begin
-            sample(demo_warn_init_params(), NUTS(), 5)
+            sample(demo_warn_init_params(), NUTS(; adtype=adbackend), 5)
         end
     end
 end
diff --git a/test/mcmc/sghmc.jl b/test/mcmc/sghmc.jl
index 959f40008..4405b505a 100644
--- a/test/mcmc/sghmc.jl
+++ b/test/mcmc/sghmc.jl
@@ -1,16 +1,16 @@
-@testset "sghmc.jl" begin
+@testset "Testing sghmc.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false))
     @turing_testset "sghmc constructor" begin
-        alg = SGHMC(; learning_rate=0.01, momentum_decay=0.1)
+        alg = SGHMC(; learning_rate=0.01, momentum_decay=0.1, adtype=adbackend)
         @test alg isa SGHMC
         sampler = Turing.Sampler(alg)
         @test sampler isa Turing.Sampler{<:SGHMC}
 
-        alg = SGHMC(:m; learning_rate=0.01, momentum_decay=0.1)
+        alg = SGHMC(:m; learning_rate=0.01, momentum_decay=0.1, adtype=adbackend)
         @test alg isa SGHMC
         sampler = Turing.Sampler(alg)
         @test sampler isa Turing.Sampler{<:SGHMC}
 
-        alg = SGHMC(:s; learning_rate=0.01, momentum_decay=0.1)
+        alg = SGHMC(:s; learning_rate=0.01, momentum_decay=0.1, adtype=adbackend)
         @test alg isa SGHMC
         sampler = Turing.Sampler(alg)
         @test sampler isa Turing.Sampler{<:SGHMC}
@@ -18,25 +18,25 @@
     @numerical_testset "sghmc inference" begin
         rng = StableRNG(123)
 
-        alg = SGHMC(; learning_rate=0.02, momentum_decay=0.5)
+        alg = SGHMC(; learning_rate=0.02, momentum_decay=0.5, adtype=adbackend)
         chain = sample(rng, gdemo_default, alg, 10_000)
-        check_gdemo(chain, atol = 0.1)
+        check_gdemo(chain, atol=0.1)
     end
 end
 
-@testset "sgld.jl" begin
+@testset "Testing sgld.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false))
     @turing_testset "sgld constructor" begin
-        alg = SGLD(; stepsize = PolynomialStepsize(0.25))
+        alg = SGLD(; stepsize=PolynomialStepsize(0.25), adtype=adbackend)
         @test alg isa SGLD
         sampler = Turing.Sampler(alg)
         @test sampler isa Turing.Sampler{<:SGLD}
 
-        alg = SGLD(:m; stepsize = PolynomialStepsize(0.25))
+        alg = SGLD(:m; stepsize=PolynomialStepsize(0.25), adtype=adbackend)
         @test alg isa SGLD
         sampler = Turing.Sampler(alg)
         @test sampler isa Turing.Sampler{<:SGLD}
 
-        alg = SGLD(:s; stepsize = PolynomialStepsize(0.25))
+        alg = SGLD(:s; stepsize=PolynomialStepsize(0.25), adtype=adbackend)
         @test alg isa SGLD
         sampler = Turing.Sampler(alg)
         @test sampler isa Turing.Sampler{<:SGLD}
diff --git a/test/optimisation/OptimInterface.jl b/test/optimisation/OptimInterface.jl
index 5ac338876..919de9702 100644
--- a/test/optimisation/OptimInterface.jl
+++ b/test/optimisation/OptimInterface.jl
@@ -120,56 +120,54 @@ end
     end
 
     # FIXME: Some models doesn't work for Tracker and ReverseDiff.
-    if Turing.Essential.ADBACKEND[] === :forwarddiff
-        @testset "MAP for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS
-            result_true = DynamicPPL.TestUtils.posterior_optima(model)
+    @testset "MAP for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS
+        result_true = DynamicPPL.TestUtils.posterior_optima(model)
 
-            @testset "$(nameof(typeof(optimizer)))" for optimizer in [LBFGS(), NelderMead()]
-                result = optimize(model, MAP(), optimizer)
-                vals = result.values
+        @testset "$(nameof(typeof(optimizer)))" for optimizer in [LBFGS(), NelderMead()]
+            result = optimize(model, MAP(), optimizer)
+            vals = result.values
 
-                for vn in DynamicPPL.TestUtils.varnames(model)
-                    for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn))
-                        @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05
-                    end
+            for vn in DynamicPPL.TestUtils.varnames(model)
+                for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn))
+                    @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05
                 end
             end
         end
+    end
 
 
-        # Some of the models have one variance parameter per observation, and so
-        # the MLE should have the variances set to 0. Since we're working in
-        # transformed space, this corresponds to `-Inf`, which is of course not achievable.
-        # In particular, it can result in "early termniation" of the optimization process
-        # because we hit NaNs, etc. To avoid this, we set the `g_tol` and the `f_tol` to
-        # something larger than the default.
-        allowed_incorrect_mle = [
-            DynamicPPL.TestUtils.demo_dot_assume_dot_observe,
-            DynamicPPL.TestUtils.demo_assume_index_observe,
-            DynamicPPL.TestUtils.demo_assume_multivariate_observe,
-            DynamicPPL.TestUtils.demo_assume_observe_literal,
-            DynamicPPL.TestUtils.demo_dot_assume_observe_submodel,
-            DynamicPPL.TestUtils.demo_dot_assume_dot_observe_matrix,
-            DynamicPPL.TestUtils.demo_dot_assume_matrix_dot_observe_matrix,
-            DynamicPPL.TestUtils.demo_assume_submodel_observe_index_literal,
-            DynamicPPL.TestUtils.demo_dot_assume_observe_index_literal,
-            DynamicPPL.TestUtils.demo_assume_matrix_dot_observe_matrix
-        ]
-        @testset "MLE for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS
-            result_true = DynamicPPL.TestUtils.likelihood_optima(model)
-
-            # `NelderMead` seems to struggle with convergence here, so we exclude it.
-            @testset "$(nameof(typeof(optimizer)))" for optimizer in [LBFGS(),]
-                result = optimize(model, MLE(), optimizer, Optim.Options(g_tol=1e-3, f_tol=1e-3))
-                vals = result.values
-
-                for vn in DynamicPPL.TestUtils.varnames(model)
-                    for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn))
-                        if model.f in allowed_incorrect_mle
-                            @test isfinite(get(result_true, vn_leaf))
-                        else
-                            @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05
-                        end
+    # Some of the models have one variance parameter per observation, and so
+    # the MLE should have the variances set to 0. Since we're working in
+    # transformed space, this corresponds to `-Inf`, which is of course not achievable.
+    # In particular, it can result in "early termniation" of the optimization process
+    # because we hit NaNs, etc. To avoid this, we set the `g_tol` and the `f_tol` to
+    # something larger than the default.
+    allowed_incorrect_mle = [
+        DynamicPPL.TestUtils.demo_dot_assume_dot_observe,
+        DynamicPPL.TestUtils.demo_assume_index_observe,
+        DynamicPPL.TestUtils.demo_assume_multivariate_observe,
+        DynamicPPL.TestUtils.demo_assume_observe_literal,
+        DynamicPPL.TestUtils.demo_dot_assume_observe_submodel,
+        DynamicPPL.TestUtils.demo_dot_assume_dot_observe_matrix,
+        DynamicPPL.TestUtils.demo_dot_assume_matrix_dot_observe_matrix,
+        DynamicPPL.TestUtils.demo_assume_submodel_observe_index_literal,
+        DynamicPPL.TestUtils.demo_dot_assume_observe_index_literal,
+        DynamicPPL.TestUtils.demo_assume_matrix_dot_observe_matrix
+    ]
+    @testset "MLE for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS
+        result_true = DynamicPPL.TestUtils.likelihood_optima(model)
+
+        # `NelderMead` seems to struggle with convergence here, so we exclude it.
+        @testset "$(nameof(typeof(optimizer)))" for optimizer in [LBFGS(),]
+            result = optimize(model, MLE(), optimizer, Optim.Options(g_tol=1e-3, f_tol=1e-3))
+            vals = result.values
+
+            for vn in DynamicPPL.TestUtils.varnames(model)
+                for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn))
+                    if model.f in allowed_incorrect_mle
+                        @test isfinite(get(result_true, vn_leaf))
+                    else
+                        @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05
                     end
                 end
             end
diff --git a/test/runtests.jl b/test/runtests.jl
index 5cb3ed51f..0000e32a6 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -64,34 +64,26 @@ macro timeit_include(path::AbstractString) :(@timeit TIMEROUTPUT $path include($
         @timeit_include("mcmc/ess.jl")
         @timeit_include("mcmc/is.jl")
     end
+    
+    @timeit TIMEROUTPUT "inference" begin
+        @testset "inference with samplers" begin
+            @timeit_include("mcmc/gibbs.jl")
+            @timeit_include("mcmc/gibbs_conditional.jl")
+            @timeit_include("mcmc/hmc.jl")
+            @timeit_include("mcmc/Inference.jl")
+            @timeit_include("mcmc/sghmc.jl")
+            @timeit_include("mcmc/abstractmcmc.jl")
+            @timeit_include("mcmc/mh.jl")
+            @timeit_include("ext/dynamichmc.jl")
+        end
 
-    Turing.setrdcache(false)
-    for adbackend in (:forwarddiff, :reversediff)
-        @timeit TIMEROUTPUT "inference: $adbackend" begin
-            Turing.setadbackend(adbackend)
-            @info "Testing $(adbackend)"
-            @testset "inference: $adbackend" begin
-                @testset "samplers" begin
-                    @timeit_include("mcmc/gibbs.jl")
-                    @timeit_include("mcmc/gibbs_conditional.jl")
-                    @timeit_include("mcmc/hmc.jl")
-                    @timeit_include("mcmc/Inference.jl")
-                    @timeit_include("mcmc/sghmc.jl")
-                    @timeit_include("mcmc/abstractmcmc.jl")
-                    @timeit_include("mcmc/mh.jl")
-                    @timeit_include("ext/dynamichmc.jl")
-                end
-            end
-
-            @testset "variational algorithms : $adbackend" begin
-                @timeit_include("variational/advi.jl")
-            end
-
-            @testset "mode estimation : $adbackend" begin
-                @timeit_include("optimisation/OptimInterface.jl")
-                @timeit_include("ext/Optimisation.jl")
-            end
+        @testset "variational algorithms" begin
+            @timeit_include("variational/advi.jl")
+        end
 
+        @testset "mode estimation" begin
+            @timeit_include("optimisation/OptimInterface.jl")
+            @timeit_include("ext/Optimisation.jl")
         end
     end
 
@@ -99,7 +91,6 @@ macro timeit_include(path::AbstractString) :(@timeit TIMEROUTPUT $path include($
         @timeit_include("variational/optimisers.jl")
     end
 
-    Turing.setadbackend(:forwarddiff)
     @testset "stdlib" begin
         @timeit_include("stdlib/distributions.jl")
         @timeit_include("stdlib/RandomMeasures.jl")