TuringLang · yebai · Dec 13, 2023 · Nov 24, 2023 · Dec 3, 2023 · Dec 3, 2023
diff --git a/benchmarks/benchmarks_suite.jl b/benchmarks/benchmarks_suite.jl
@@ -63,7 +63,7 @@ n_adapts = 2_000
 
 BenchmarkSuite["mnormal"]["hmc"] = @benchmarkable sample($(target(dim)), $(HMC(0.1, 5)), $n_samples)
 
-## MvNormal: ForwardDiff vs BackwardDiff (Tracker)
+## MvNormal: ForwardDiff vs ReverseDiff
 
 @model function mdemo(d, N)
     Θ = Vector(undef, N)
@@ -77,10 +77,8 @@ A    = rand(Wishart(dim2, Matrix{Float64}(I, dim2, dim2)));
 d    = MvNormal(zeros(dim2), A)
 
 # ForwardDiff
-Turing.setadbackend(:forwarddiff)
-BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5)), 5000)
+BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoForwardDiff(; chunksize=0))), 5000)
 
 
-# BackwardDiff
-Turing.setadbackend(:reversediff)
-BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5)), 5000)
+# ReverseDiff
+BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoReverseDiff(false))), 5000)
diff --git a/ext/TuringDynamicHMCExt.jl b/ext/TuringDynamicHMCExt.jl
@@ -34,7 +34,7 @@ end
 function DynamicNUTS(
     spl::DynamicHMC.NUTS = DynamicHMC.NUTS(),
     space::Tuple = ();
-    adtype::ADTypes.AbstractADType = Turing.ADBackend()
+    adtype::ADTypes.AbstractADType = ADTypes.AutoForwardDiff(; chunksize=0)
 )
     return DynamicNUTS{typeof(adtype),space,typeof(spl)}(spl, adtype)
 end

diff --git a/ext/TuringOptimExt.jl b/ext/TuringOptimExt.jl
@@ -179,7 +179,6 @@ map_est = optimize(model, MAP())
 map_est = optimize(model, MAP(), NelderMead())
 ```
 """
-
 function Optim.optimize(model::DynamicPPL.Model, ::Turing.MAP, options::Optim.Options=Optim.Options(); kwargs...)
     ctx = Turing.OptimizationContext(DynamicPPL.DefaultContext())
     f = Turing.OptimLogDensity(model, ctx)

diff --git a/src/Turing.jl b/src/Turing.jl
@@ -98,10 +98,6 @@ export  @model,                 # modelling
         @prob_str,
         externalsampler,
 
-        setchunksize,           # helper
-        setadbackend,
-        setadsafe,
-
         setprogress!,           # debugging
 
         Flat,

diff --git a/src/essential/Essential.jl b/src/essential/Essential.jl
@@ -38,20 +38,11 @@ export  @model,
         effectiveSampleSize,
         sweep!,
         ResampleWithESSThreshold,
-        ADBackend,
-        setadbackend,
-        setadsafe,
         AutoForwardDiff,
         AutoTracker,
         AutoZygote,
         AutoReverseDiff,
         value,
-        CHUNKSIZE,
-        ADBACKEND,
-        setchunksize,
-        setrdcache,
-        getrdcache,
-        verifygrad,
         @logprob_str,
         @prob_str
 

diff --git a/src/essential/ad.jl b/src/essential/ad.jl
@@ -1,75 +1,19 @@
-##############################
-# Global variables/constants #
-##############################
-const ADBACKEND = Ref(:forwarddiff)
-setadbackend(backend_sym::Symbol) = setadbackend(Val(backend_sym))
-function setadbackend(backend::Val)
-    _setadbackend(backend)
-    AdvancedVI.setadbackend(backend)
-end
-
-function _setadbackend(::Val{:forwarddiff})
-    ADBACKEND[] = :forwarddiff
-end
-function _setadbackend(::Val{:tracker})
-    @warn "Usage of Tracker.jl with Turing.jl is no longer being actively tested and maintained; please use at your own risk. See Zygote.jl or ReverseDiff.jl for fully supported reverse-mode backends."
-    ADBACKEND[] = :tracker
-end
-function _setadbackend(::Val{:zygote})
-    ADBACKEND[] = :zygote
-end
-function _setadbackend(::Val{:reversediff})
-    ADBACKEND[] = :reversediff
-end
-
-const ADSAFE = Ref(false)
-function setadsafe(switch::Bool)
-    @info("[Turing]: global ADSAFE is set as $switch")
-    ADSAFE[] = switch
-end
-
-const CHUNKSIZE = Ref(0) # 0 means letting ForwardDiff set it automatically
-
-function setchunksize(chunk_size::Int)
-    @info("[Turing]: AD chunk size is set as $chunk_size")
-    CHUNKSIZE[] = chunk_size
-    AdvancedVI.setchunksize(chunk_size)
-end
-
 getchunksize(::AutoForwardDiff{chunk}) where {chunk} = chunk
 
 standardtag(::AutoForwardDiff{<:Any,Nothing}) = true
 standardtag(::AutoForwardDiff) = false
 
-const RDCache = Ref(false)
-
-setrdcache(b::Bool) = setrdcache(Val(b))
-setrdcache(::Val{false}) = RDCache[] = false
-setrdcache(::Val{true}) = RDCache[] = true
-
-getrdcache() = RDCache[]
-
-ADBackend() = ADBackend(ADBACKEND[])
-ADBackend(T::Symbol) = ADBackend(Val(T))
-
-ADBackend(::Val{:forwarddiff}) = AutoForwardDiff(; chunksize=CHUNKSIZE[])
-ADBackend(::Val{:tracker}) = AutoTracker()
-ADBackend(::Val{:zygote}) = AutoZygote()
-ADBackend(::Val{:reversediff}) = AutoReverseDiff(; compile=getrdcache())
-
-ADBackend(::Val) = error("The requested AD backend is not available. Make sure to load all required packages.")
-
 """
     getADbackend(alg)
 
 Find the autodifferentiation backend of the algorithm `alg`.
 """
 getADbackend(spl::Sampler) = getADbackend(spl.alg)
-getADbackend(::SampleFromPrior) = ADBackend()
+getADbackend(::SampleFromPrior) = AutoForwardDiff(; chunksize=0) # TODO: remove `getADbackend`
 getADbackend(ctx::DynamicPPL.SamplingContext) = getADbackend(ctx.sampler)
 getADbackend(ctx::DynamicPPL.AbstractContext) = getADbackend(DynamicPPL.NodeTrait(ctx), ctx)
 
-getADbackend(::DynamicPPL.IsLeaf, ctx::DynamicPPL.AbstractContext) = ADBackend()
+getADbackend(::DynamicPPL.IsLeaf, ctx::DynamicPPL.AbstractContext) = AutoForwardDiff(; chunksize=0)
 getADbackend(::DynamicPPL.IsParent, ctx::DynamicPPL.AbstractContext) = getADbackend(DynamicPPL.childcontext(ctx))
 
 function LogDensityProblemsAD.ADgradient(ℓ::Turing.LogDensityFunction)

diff --git a/src/mcmc/hmc.jl b/src/mcmc/hmc.jl
@@ -32,7 +32,7 @@ end
 ###
 
 """
-    HMC(ϵ::Float64, n_leapfrog::Int; adtype::ADTypes.AbstractADType = Turing.ADBackend())
+    HMC(ϵ::Float64, n_leapfrog::Int; adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0))
 
 Hamiltonian Monte Carlo sampler with static trajectory.
 
@@ -41,7 +41,7 @@ Hamiltonian Monte Carlo sampler with static trajectory.
 - `ϵ`: The leapfrog step size to use.
 - `n_leapfrog`: The number of leapfrog steps to use.
 - `adtype`: The automatic differentiation (AD) backend.
-  If it is not provided, the currently activated AD backend in Turing is used.
+    If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined.
 
 # Usage
 
@@ -67,15 +67,15 @@ struct HMC{AD, space, metricT <: AHMC.AbstractMetric} <: StaticHamiltonian
     adtype::AD
 end
 
-function HMC(ϵ::Float64, n_leapfrog::Int, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType = ADBackend()) where {metricT <: AHMC.AbstractMetric}
-    return HMC{typeof(adtype), space, metricT}(ϵ, n_leapfrog, adtype)
+function HMC(ϵ::Float64, n_leapfrog::Int, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)) where {metricT<:AHMC.AbstractMetric}
+    return HMC{typeof(adtype),space,metricT}(ϵ, n_leapfrog, adtype)
 end
 function HMC(
     ϵ::Float64,
     n_leapfrog::Int,
     space::Symbol...;
     metricT=AHMC.UnitEuclideanMetric,
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 )
     return HMC(ϵ, n_leapfrog, metricT, space; adtype = adtype)
 end
@@ -280,7 +280,7 @@ end
 """
     HMCDA(
         n_adapts::Int, δ::Float64, λ::Float64; ϵ::Float64 = 0.0;
-        adtype::ADTypes.AbstractADType = Turing.ADBackend(),
+        adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
     )
 
 Hamiltonian Monte Carlo sampler with Dual Averaging algorithm.
@@ -298,7 +298,7 @@ HMCDA(200, 0.65, 0.3)
 - `λ`: Target leapfrog length.
 - `ϵ`: Initial step size; 0 means automatically search by Turing.
 - `adtype`: The automatic differentiation (AD) backend.
-  If it is not provided, the currently activated AD backend in Turing is used.
+    If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined.
 
 # Reference
 
@@ -316,16 +316,16 @@ struct HMCDA{AD, space, metricT <: AHMC.AbstractMetric} <: AdaptiveHamiltonian
     adtype::AD
 end
 
-function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ϵ::Float64, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType = ADBackend()) where {metricT <: AHMC.AbstractMetric}
-    return HMCDA{typeof(adtype), space, metricT}(n_adapts, δ, λ, ϵ, adtype)
+function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ϵ::Float64, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)) where {metricT<:AHMC.AbstractMetric}
+    return HMCDA{typeof(adtype),space,metricT}(n_adapts, δ, λ, ϵ, adtype)
 end
 
 function HMCDA(
     δ::Float64,
     λ::Float64;
     init_ϵ::Float64=0.0,
     metricT=AHMC.UnitEuclideanMetric,
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 )
     return HMCDA(-1, δ, λ, init_ϵ, metricT, (); adtype = adtype)
 end
@@ -347,14 +347,14 @@ function HMCDA(
     space::Symbol...;
     init_ϵ::Float64=0.0,
     metricT=AHMC.UnitEuclideanMetric,
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 )
     return HMCDA(n_adapts, δ, λ, init_ϵ, metricT, space; adtype = adtype)
 end
 
 
 """
-    NUTS(n_adapts::Int, δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0)
+    NUTS(n_adapts::Int, δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)
 
 No-U-Turn Sampler (NUTS) sampler.
 
@@ -372,6 +372,8 @@ Arguments:
 - `max_depth::Int` : Maximum doubling tree depth.
 - `Δ_max::Float64` : Maximum divergence during doubling tree.
 - `init_ϵ::Float64` : Initial step size; 0 means automatically searching using a heuristic procedure.
+- `adtype::ADTypes.AbstractADType` : The automatic differentiation (AD) backend.
+    If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined.
 
 """
 struct NUTS{AD,space,metricT<:AHMC.AbstractMetric} <: AdaptiveHamiltonian
@@ -391,9 +393,9 @@ function NUTS(
     ϵ::Float64,
     ::Type{metricT},
     space::Tuple;
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 ) where {metricT}
-    return NUTS{typeof(adtype), space, metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype)
+    return NUTS{typeof(adtype),space,metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype)
 end
 
 function NUTS(
@@ -413,9 +415,9 @@ function NUTS(
     Δ_max::Float64=1000.0,
     init_ϵ::Float64=0.0,
     metricT=AHMC.DiagEuclideanMetric,
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 )
-    NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype = adtype)
+    NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype=adtype)
 end
 
 function NUTS(
@@ -424,9 +426,9 @@ function NUTS(
     Δ_max::Float64=1000.0,
     init_ϵ::Float64=0.0,
     metricT=AHMC.DiagEuclideanMetric,
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 )
-    NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype = adtype)
+    NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype=adtype)
 end
 
 function NUTS(; kwargs...)

diff --git a/src/mcmc/sghmc.jl b/src/mcmc/sghmc.jl
@@ -23,13 +23,13 @@ end
         space::Symbol...;
         learning_rate::Real,
         momentum_decay::Real,
-        adtype::ADTypes.AbstractADType = Turing.ADBackend(),
+        adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
     )
 
 Create a Stochastic Gradient Hamiltonian Monte Carlo (SGHMC) sampler.
 
-If the automatic differentiation (AD) backend `adtype` is not provided, the currently activated
-AD backend in Turing is used.
+If the automatic differentiation (AD) backend `adtype` is not provided, ForwardDiff
+with automatically determined `chunksize` is used.
 
 # Reference
 
@@ -41,7 +41,7 @@ function SGHMC(
     space::Symbol...;
     learning_rate::Real,
     momentum_decay::Real,
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
 )
     _learning_rate, _momentum_decay = promote(learning_rate, momentum_decay)
     return SGHMC{typeof(adtype),space,typeof(_learning_rate)}(_learning_rate, _momentum_decay, adtype)
@@ -163,15 +163,15 @@ end
     SGLD(
         space::Symbol...;
         stepsize = PolynomialStepsize(0.01),
-        adtype::ADTypes.AbstractADType = Turing.ADBackend(),
+        adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
     )
 
 Stochastic gradient Langevin dynamics (SGLD) sampler.
 
 By default, a polynomially decaying stepsize is used.
 
-If the automatic differentiation (AD) backend `adtype` is not provided, the currently activated
-AD backend in Turing is used.
+If the automatic differentiation (AD) backend `adtype` is not provided, ForwardDiff
+with automatically determined `chunksize` is used.
 
 # Reference
 
@@ -184,7 +184,7 @@ See also: [`PolynomialStepsize`](@ref)
 function SGLD(
     space::Symbol...;
     stepsize = PolynomialStepsize(0.01),
-    adtype::ADTypes.AbstractADType = ADBackend(),
+    adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
 )
     return SGLD{typeof(adtype),space,typeof(stepsize)}(stepsize, adtype)
 end