Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove global AD flag "ADBACKEND" and function suite #2134

Merged
merged 15 commits into from
Dec 13, 2023
10 changes: 4 additions & 6 deletions benchmarks/benchmarks_suite.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ n_adapts = 2_000

BenchmarkSuite["mnormal"]["hmc"] = @benchmarkable sample($(target(dim)), $(HMC(0.1, 5)), $n_samples)

## MvNormal: ForwardDiff vs BackwardDiff (Tracker)
## MvNormal: ForwardDiff vs ReverseDiff

@model function mdemo(d, N)
Θ = Vector(undef, N)
Expand All @@ -77,10 +77,8 @@ A = rand(Wishart(dim2, Matrix{Float64}(I, dim2, dim2)));
d = MvNormal(zeros(dim2), A)

# ForwardDiff
Turing.setadbackend(:forwarddiff)
BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5)), 5000)
BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoForwardDiff(; chunksize=0))), 5000)


# BackwardDiff
Turing.setadbackend(:reversediff)
BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5)), 5000)
# ReverseDiff
BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoReverseDiff(false))), 5000)
2 changes: 1 addition & 1 deletion ext/TuringDynamicHMCExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ end
function DynamicNUTS(
spl::DynamicHMC.NUTS = DynamicHMC.NUTS(),
space::Tuple = ();
adtype::ADTypes.AbstractADType = Turing.ADBackend()
adtype::ADTypes.AbstractADType = ADTypes.AutoForwardDiff(; chunksize=0)
)
return DynamicNUTS{typeof(adtype),space,typeof(spl)}(spl, adtype)
end
Expand Down
1 change: 0 additions & 1 deletion ext/TuringOptimExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ map_est = optimize(model, MAP())
map_est = optimize(model, MAP(), NelderMead())
```
"""

function Optim.optimize(model::DynamicPPL.Model, ::Turing.MAP, options::Optim.Options=Optim.Options(); kwargs...)
ctx = Turing.OptimizationContext(DynamicPPL.DefaultContext())
f = Turing.OptimLogDensity(model, ctx)
Expand Down
4 changes: 0 additions & 4 deletions src/Turing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,6 @@ export @model, # modelling
@prob_str,
externalsampler,

setchunksize, # helper
setadbackend,
setadsafe,

setprogress!, # debugging

Flat,
Expand Down
9 changes: 0 additions & 9 deletions src/essential/Essential.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,11 @@ export @model,
effectiveSampleSize,
sweep!,
ResampleWithESSThreshold,
ADBackend,
setadbackend,
setadsafe,
AutoForwardDiff,
AutoTracker,
AutoZygote,
AutoReverseDiff,
value,
CHUNKSIZE,
ADBACKEND,
setchunksize,
setrdcache,
getrdcache,
verifygrad,
@logprob_str,
@prob_str

Expand Down
60 changes: 2 additions & 58 deletions src/essential/ad.jl
Original file line number Diff line number Diff line change
@@ -1,75 +1,19 @@
##############################
# Global variables/constants #
##############################
const ADBACKEND = Ref(:forwarddiff)
setadbackend(backend_sym::Symbol) = setadbackend(Val(backend_sym))
function setadbackend(backend::Val)
_setadbackend(backend)
AdvancedVI.setadbackend(backend)
end

function _setadbackend(::Val{:forwarddiff})
ADBACKEND[] = :forwarddiff
end
function _setadbackend(::Val{:tracker})
@warn "Usage of Tracker.jl with Turing.jl is no longer being actively tested and maintained; please use at your own risk. See Zygote.jl or ReverseDiff.jl for fully supported reverse-mode backends."
ADBACKEND[] = :tracker
end
function _setadbackend(::Val{:zygote})
ADBACKEND[] = :zygote
end
function _setadbackend(::Val{:reversediff})
ADBACKEND[] = :reversediff
end

const ADSAFE = Ref(false)
function setadsafe(switch::Bool)
@info("[Turing]: global ADSAFE is set as $switch")
ADSAFE[] = switch
end

const CHUNKSIZE = Ref(0) # 0 means letting ForwardDiff set it automatically

function setchunksize(chunk_size::Int)
@info("[Turing]: AD chunk size is set as $chunk_size")
CHUNKSIZE[] = chunk_size
AdvancedVI.setchunksize(chunk_size)
end

getchunksize(::AutoForwardDiff{chunk}) where {chunk} = chunk

standardtag(::AutoForwardDiff{<:Any,Nothing}) = true
standardtag(::AutoForwardDiff) = false

const RDCache = Ref(false)

setrdcache(b::Bool) = setrdcache(Val(b))
setrdcache(::Val{false}) = RDCache[] = false
setrdcache(::Val{true}) = RDCache[] = true

getrdcache() = RDCache[]

ADBackend() = ADBackend(ADBACKEND[])
ADBackend(T::Symbol) = ADBackend(Val(T))

ADBackend(::Val{:forwarddiff}) = AutoForwardDiff(; chunksize=CHUNKSIZE[])
ADBackend(::Val{:tracker}) = AutoTracker()
ADBackend(::Val{:zygote}) = AutoZygote()
ADBackend(::Val{:reversediff}) = AutoReverseDiff(; compile=getrdcache())

ADBackend(::Val) = error("The requested AD backend is not available. Make sure to load all required packages.")

"""
getADbackend(alg)

Find the autodifferentiation backend of the algorithm `alg`.
"""
getADbackend(spl::Sampler) = getADbackend(spl.alg)
getADbackend(::SampleFromPrior) = ADBackend()
getADbackend(::SampleFromPrior) = AutoForwardDiff(; chunksize=0) # TODO: remove `getADbackend`
getADbackend(ctx::DynamicPPL.SamplingContext) = getADbackend(ctx.sampler)
getADbackend(ctx::DynamicPPL.AbstractContext) = getADbackend(DynamicPPL.NodeTrait(ctx), ctx)

getADbackend(::DynamicPPL.IsLeaf, ctx::DynamicPPL.AbstractContext) = ADBackend()
getADbackend(::DynamicPPL.IsLeaf, ctx::DynamicPPL.AbstractContext) = AutoForwardDiff(; chunksize=0)
getADbackend(::DynamicPPL.IsParent, ctx::DynamicPPL.AbstractContext) = getADbackend(DynamicPPL.childcontext(ctx))

function LogDensityProblemsAD.ADgradient(ℓ::Turing.LogDensityFunction)
Expand Down
38 changes: 20 additions & 18 deletions src/mcmc/hmc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ end
###

"""
HMC(ϵ::Float64, n_leapfrog::Int; adtype::ADTypes.AbstractADType = Turing.ADBackend())
HMC(ϵ::Float64, n_leapfrog::Int; adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0))

Hamiltonian Monte Carlo sampler with static trajectory.

Expand All @@ -41,7 +41,7 @@ Hamiltonian Monte Carlo sampler with static trajectory.
- `ϵ`: The leapfrog step size to use.
- `n_leapfrog`: The number of leapfrog steps to use.
- `adtype`: The automatic differentiation (AD) backend.
If it is not provided, the currently activated AD backend in Turing is used.
If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined.

# Usage

Expand All @@ -67,15 +67,15 @@ struct HMC{AD, space, metricT <: AHMC.AbstractMetric} <: StaticHamiltonian
adtype::AD
end

function HMC(ϵ::Float64, n_leapfrog::Int, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType = ADBackend()) where {metricT <: AHMC.AbstractMetric}
return HMC{typeof(adtype), space, metricT}(ϵ, n_leapfrog, adtype)
function HMC(ϵ::Float64, n_leapfrog::Int, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)) where {metricT<:AHMC.AbstractMetric}
return HMC{typeof(adtype),space,metricT}(ϵ, n_leapfrog, adtype)
end
function HMC(
ϵ::Float64,
n_leapfrog::Int,
space::Symbol...;
metricT=AHMC.UnitEuclideanMetric,
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
)
return HMC(ϵ, n_leapfrog, metricT, space; adtype = adtype)
end
Expand Down Expand Up @@ -280,7 +280,7 @@ end
"""
HMCDA(
n_adapts::Int, δ::Float64, λ::Float64; ϵ::Float64 = 0.0;
adtype::ADTypes.AbstractADType = Turing.ADBackend(),
adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
)

Hamiltonian Monte Carlo sampler with Dual Averaging algorithm.
Expand All @@ -298,7 +298,7 @@ HMCDA(200, 0.65, 0.3)
- `λ`: Target leapfrog length.
- `ϵ`: Initial step size; 0 means automatically search by Turing.
- `adtype`: The automatic differentiation (AD) backend.
If it is not provided, the currently activated AD backend in Turing is used.
If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined.

# Reference

Expand All @@ -316,16 +316,16 @@ struct HMCDA{AD, space, metricT <: AHMC.AbstractMetric} <: AdaptiveHamiltonian
adtype::AD
end

function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ϵ::Float64, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType = ADBackend()) where {metricT <: AHMC.AbstractMetric}
return HMCDA{typeof(adtype), space, metricT}(n_adapts, δ, λ, ϵ, adtype)
function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ϵ::Float64, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)) where {metricT<:AHMC.AbstractMetric}
return HMCDA{typeof(adtype),space,metricT}(n_adapts, δ, λ, ϵ, adtype)
end

function HMCDA(
δ::Float64,
λ::Float64;
init_ϵ::Float64=0.0,
metricT=AHMC.UnitEuclideanMetric,
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
)
return HMCDA(-1, δ, λ, init_ϵ, metricT, (); adtype = adtype)
end
Expand All @@ -347,14 +347,14 @@ function HMCDA(
space::Symbol...;
init_ϵ::Float64=0.0,
metricT=AHMC.UnitEuclideanMetric,
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
)
return HMCDA(n_adapts, δ, λ, init_ϵ, metricT, space; adtype = adtype)
end


"""
NUTS(n_adapts::Int, δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0)
NUTS(n_adapts::Int, δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)

No-U-Turn Sampler (NUTS) sampler.

Expand All @@ -372,6 +372,8 @@ Arguments:
- `max_depth::Int` : Maximum doubling tree depth.
- `Δ_max::Float64` : Maximum divergence during doubling tree.
- `init_ϵ::Float64` : Initial step size; 0 means automatically searching using a heuristic procedure.
- `adtype::ADTypes.AbstractADType` : The automatic differentiation (AD) backend.
If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined.

"""
struct NUTS{AD,space,metricT<:AHMC.AbstractMetric} <: AdaptiveHamiltonian
Expand All @@ -391,9 +393,9 @@ function NUTS(
ϵ::Float64,
::Type{metricT},
space::Tuple;
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
) where {metricT}
return NUTS{typeof(adtype), space, metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype)
return NUTS{typeof(adtype),space,metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype)
end

function NUTS(
Expand All @@ -413,9 +415,9 @@ function NUTS(
Δ_max::Float64=1000.0,
init_ϵ::Float64=0.0,
metricT=AHMC.DiagEuclideanMetric,
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
)
NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype = adtype)
NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype=adtype)
end

function NUTS(
Expand All @@ -424,9 +426,9 @@ function NUTS(
Δ_max::Float64=1000.0,
init_ϵ::Float64=0.0,
metricT=AHMC.DiagEuclideanMetric,
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
)
NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype = adtype)
NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype=adtype)
end

function NUTS(; kwargs...)
Expand Down
16 changes: 8 additions & 8 deletions src/mcmc/sghmc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ end
space::Symbol...;
learning_rate::Real,
momentum_decay::Real,
adtype::ADTypes.AbstractADType = Turing.ADBackend(),
adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
)

Create a Stochastic Gradient Hamiltonian Monte Carlo (SGHMC) sampler.

If the automatic differentiation (AD) backend `adtype` is not provided, the currently activated
AD backend in Turing is used.
If the automatic differentiation (AD) backend `adtype` is not provided, ForwardDiff
with automatically determined `chunksize` is used.

# Reference

Expand All @@ -41,7 +41,7 @@ function SGHMC(
space::Symbol...;
learning_rate::Real,
momentum_decay::Real,
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
)
_learning_rate, _momentum_decay = promote(learning_rate, momentum_decay)
return SGHMC{typeof(adtype),space,typeof(_learning_rate)}(_learning_rate, _momentum_decay, adtype)
Expand Down Expand Up @@ -163,15 +163,15 @@ end
SGLD(
space::Symbol...;
stepsize = PolynomialStepsize(0.01),
adtype::ADTypes.AbstractADType = Turing.ADBackend(),
adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
)

Stochastic gradient Langevin dynamics (SGLD) sampler.

By default, a polynomially decaying stepsize is used.

If the automatic differentiation (AD) backend `adtype` is not provided, the currently activated
AD backend in Turing is used.
If the automatic differentiation (AD) backend `adtype` is not provided, ForwardDiff
with automatically determined `chunksize` is used.

# Reference

Expand All @@ -184,7 +184,7 @@ See also: [`PolynomialStepsize`](@ref)
function SGLD(
space::Symbol...;
stepsize = PolynomialStepsize(0.01),
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
)
return SGLD{typeof(adtype),space,typeof(stepsize)}(stepsize, adtype)
end
Expand Down
Loading
Loading