Skip to content

Commit

Permalink
Remove global AD flag "ADBACKEND" and function suite (#2134)
Browse files Browse the repository at this point in the history
* remove usage of ADBackend from samplers

* Remove `ADBackend`related code from tests

* Add package for AutoForwardDiff in DynamicHMCExt

* More removal

* More fix

* Fix test code

* Revert type signature of bundle_samples overload

* More error fixes

* Even more fix

* Remove formatting

* Bring back test for `tag`

* Undo more formatting

* Apply suggestions from code review

Co-authored-by: Hong Ge <[email protected]>

* Bump minor version

* Revert minir version bump

Co-authored-by: Hong Ge <[email protected]>

---------

Co-authored-by: Hong Ge <[email protected]>
  • Loading branch information
sunxd3 and yebai authored Dec 13, 2023
1 parent 6649f10 commit 239263e
Show file tree
Hide file tree
Showing 16 changed files with 238 additions and 344 deletions.
10 changes: 4 additions & 6 deletions benchmarks/benchmarks_suite.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ n_adapts = 2_000

BenchmarkSuite["mnormal"]["hmc"] = @benchmarkable sample($(target(dim)), $(HMC(0.1, 5)), $n_samples)

## MvNormal: ForwardDiff vs BackwardDiff (Tracker)
## MvNormal: ForwardDiff vs ReverseDiff

@model function mdemo(d, N)
Θ = Vector(undef, N)
Expand All @@ -77,10 +77,8 @@ A = rand(Wishart(dim2, Matrix{Float64}(I, dim2, dim2)));
d = MvNormal(zeros(dim2), A)

# ForwardDiff
Turing.setadbackend(:forwarddiff)
BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5)), 5000)
BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoForwardDiff(; chunksize=0))), 5000)


# BackwardDiff
Turing.setadbackend(:reversediff)
BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5)), 5000)
# ReverseDiff
BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoReverseDiff(false))), 5000)
2 changes: 1 addition & 1 deletion ext/TuringDynamicHMCExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ end
function DynamicNUTS(
spl::DynamicHMC.NUTS = DynamicHMC.NUTS(),
space::Tuple = ();
adtype::ADTypes.AbstractADType = Turing.ADBackend()
adtype::ADTypes.AbstractADType = ADTypes.AutoForwardDiff(; chunksize=0)
)
return DynamicNUTS{typeof(adtype),space,typeof(spl)}(spl, adtype)
end
Expand Down
1 change: 0 additions & 1 deletion ext/TuringOptimExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ map_est = optimize(model, MAP())
map_est = optimize(model, MAP(), NelderMead())
```
"""

function Optim.optimize(model::DynamicPPL.Model, ::Turing.MAP, options::Optim.Options=Optim.Options(); kwargs...)
ctx = Turing.OptimizationContext(DynamicPPL.DefaultContext())
f = Turing.OptimLogDensity(model, ctx)
Expand Down
4 changes: 0 additions & 4 deletions src/Turing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,6 @@ export @model, # modelling
@prob_str,
externalsampler,

setchunksize, # helper
setadbackend,
setadsafe,

setprogress!, # debugging

Flat,
Expand Down
9 changes: 0 additions & 9 deletions src/essential/Essential.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,11 @@ export @model,
effectiveSampleSize,
sweep!,
ResampleWithESSThreshold,
ADBackend,
setadbackend,
setadsafe,
AutoForwardDiff,
AutoTracker,
AutoZygote,
AutoReverseDiff,
value,
CHUNKSIZE,
ADBACKEND,
setchunksize,
setrdcache,
getrdcache,
verifygrad,
@logprob_str,
@prob_str

Expand Down
60 changes: 2 additions & 58 deletions src/essential/ad.jl
Original file line number Diff line number Diff line change
@@ -1,75 +1,19 @@
##############################
# Global variables/constants #
##############################
const ADBACKEND = Ref(:forwarddiff)
setadbackend(backend_sym::Symbol) = setadbackend(Val(backend_sym))
function setadbackend(backend::Val)
_setadbackend(backend)
AdvancedVI.setadbackend(backend)
end

function _setadbackend(::Val{:forwarddiff})
ADBACKEND[] = :forwarddiff
end
function _setadbackend(::Val{:tracker})
@warn "Usage of Tracker.jl with Turing.jl is no longer being actively tested and maintained; please use at your own risk. See Zygote.jl or ReverseDiff.jl for fully supported reverse-mode backends."
ADBACKEND[] = :tracker
end
function _setadbackend(::Val{:zygote})
ADBACKEND[] = :zygote
end
function _setadbackend(::Val{:reversediff})
ADBACKEND[] = :reversediff
end

const ADSAFE = Ref(false)
function setadsafe(switch::Bool)
@info("[Turing]: global ADSAFE is set as $switch")
ADSAFE[] = switch
end

const CHUNKSIZE = Ref(0) # 0 means letting ForwardDiff set it automatically

function setchunksize(chunk_size::Int)
@info("[Turing]: AD chunk size is set as $chunk_size")
CHUNKSIZE[] = chunk_size
AdvancedVI.setchunksize(chunk_size)
end

getchunksize(::AutoForwardDiff{chunk}) where {chunk} = chunk

standardtag(::AutoForwardDiff{<:Any,Nothing}) = true
standardtag(::AutoForwardDiff) = false

const RDCache = Ref(false)

setrdcache(b::Bool) = setrdcache(Val(b))
setrdcache(::Val{false}) = RDCache[] = false
setrdcache(::Val{true}) = RDCache[] = true

getrdcache() = RDCache[]

ADBackend() = ADBackend(ADBACKEND[])
ADBackend(T::Symbol) = ADBackend(Val(T))

ADBackend(::Val{:forwarddiff}) = AutoForwardDiff(; chunksize=CHUNKSIZE[])
ADBackend(::Val{:tracker}) = AutoTracker()
ADBackend(::Val{:zygote}) = AutoZygote()
ADBackend(::Val{:reversediff}) = AutoReverseDiff(; compile=getrdcache())

ADBackend(::Val) = error("The requested AD backend is not available. Make sure to load all required packages.")

"""
getADbackend(alg)
Find the autodifferentiation backend of the algorithm `alg`.
"""
getADbackend(spl::Sampler) = getADbackend(spl.alg)
getADbackend(::SampleFromPrior) = ADBackend()
getADbackend(::SampleFromPrior) = AutoForwardDiff(; chunksize=0) # TODO: remove `getADbackend`
getADbackend(ctx::DynamicPPL.SamplingContext) = getADbackend(ctx.sampler)
getADbackend(ctx::DynamicPPL.AbstractContext) = getADbackend(DynamicPPL.NodeTrait(ctx), ctx)

getADbackend(::DynamicPPL.IsLeaf, ctx::DynamicPPL.AbstractContext) = ADBackend()
getADbackend(::DynamicPPL.IsLeaf, ctx::DynamicPPL.AbstractContext) = AutoForwardDiff(; chunksize=0)
getADbackend(::DynamicPPL.IsParent, ctx::DynamicPPL.AbstractContext) = getADbackend(DynamicPPL.childcontext(ctx))

function LogDensityProblemsAD.ADgradient(ℓ::Turing.LogDensityFunction)
Expand Down
38 changes: 20 additions & 18 deletions src/mcmc/hmc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ end
###

"""
HMC(ϵ::Float64, n_leapfrog::Int; adtype::ADTypes.AbstractADType = Turing.ADBackend())
HMC(ϵ::Float64, n_leapfrog::Int; adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0))
Hamiltonian Monte Carlo sampler with static trajectory.
Expand All @@ -41,7 +41,7 @@ Hamiltonian Monte Carlo sampler with static trajectory.
- `ϵ`: The leapfrog step size to use.
- `n_leapfrog`: The number of leapfrog steps to use.
- `adtype`: The automatic differentiation (AD) backend.
If it is not provided, the currently activated AD backend in Turing is used.
If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined.
# Usage
Expand All @@ -67,15 +67,15 @@ struct HMC{AD, space, metricT <: AHMC.AbstractMetric} <: StaticHamiltonian
adtype::AD
end

function HMC::Float64, n_leapfrog::Int, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType = ADBackend()) where {metricT <: AHMC.AbstractMetric}
return HMC{typeof(adtype), space, metricT}(ϵ, n_leapfrog, adtype)
function HMC::Float64, n_leapfrog::Int, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)) where {metricT<:AHMC.AbstractMetric}
return HMC{typeof(adtype),space,metricT}(ϵ, n_leapfrog, adtype)
end
function HMC(
ϵ::Float64,
n_leapfrog::Int,
space::Symbol...;
metricT=AHMC.UnitEuclideanMetric,
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
)
return HMC(ϵ, n_leapfrog, metricT, space; adtype = adtype)
end
Expand Down Expand Up @@ -280,7 +280,7 @@ end
"""
HMCDA(
n_adapts::Int, δ::Float64, λ::Float64; ϵ::Float64 = 0.0;
adtype::ADTypes.AbstractADType = Turing.ADBackend(),
adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
)
Hamiltonian Monte Carlo sampler with Dual Averaging algorithm.
Expand All @@ -298,7 +298,7 @@ HMCDA(200, 0.65, 0.3)
- `λ`: Target leapfrog length.
- `ϵ`: Initial step size; 0 means automatically search by Turing.
- `adtype`: The automatic differentiation (AD) backend.
If it is not provided, the currently activated AD backend in Turing is used.
If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined.
# Reference
Expand All @@ -316,16 +316,16 @@ struct HMCDA{AD, space, metricT <: AHMC.AbstractMetric} <: AdaptiveHamiltonian
adtype::AD
end

function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ϵ::Float64, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType = ADBackend()) where {metricT <: AHMC.AbstractMetric}
return HMCDA{typeof(adtype), space, metricT}(n_adapts, δ, λ, ϵ, adtype)
function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ϵ::Float64, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)) where {metricT<:AHMC.AbstractMetric}
return HMCDA{typeof(adtype),space,metricT}(n_adapts, δ, λ, ϵ, adtype)
end

function HMCDA(
δ::Float64,
λ::Float64;
init_ϵ::Float64=0.0,
metricT=AHMC.UnitEuclideanMetric,
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
)
return HMCDA(-1, δ, λ, init_ϵ, metricT, (); adtype = adtype)
end
Expand All @@ -347,14 +347,14 @@ function HMCDA(
space::Symbol...;
init_ϵ::Float64=0.0,
metricT=AHMC.UnitEuclideanMetric,
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
)
return HMCDA(n_adapts, δ, λ, init_ϵ, metricT, space; adtype = adtype)
end


"""
NUTS(n_adapts::Int, δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0)
NUTS(n_adapts::Int, δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)
No-U-Turn Sampler (NUTS) sampler.
Expand All @@ -372,6 +372,8 @@ Arguments:
- `max_depth::Int` : Maximum doubling tree depth.
- `Δ_max::Float64` : Maximum divergence during doubling tree.
- `init_ϵ::Float64` : Initial step size; 0 means automatically searching using a heuristic procedure.
- `adtype::ADTypes.AbstractADType` : The automatic differentiation (AD) backend.
If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined.
"""
struct NUTS{AD,space,metricT<:AHMC.AbstractMetric} <: AdaptiveHamiltonian
Expand All @@ -391,9 +393,9 @@ function NUTS(
ϵ::Float64,
::Type{metricT},
space::Tuple;
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
) where {metricT}
return NUTS{typeof(adtype), space, metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype)
return NUTS{typeof(adtype),space,metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype)
end

function NUTS(
Expand All @@ -413,9 +415,9 @@ function NUTS(
Δ_max::Float64=1000.0,
init_ϵ::Float64=0.0,
metricT=AHMC.DiagEuclideanMetric,
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
)
NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype = adtype)
NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype=adtype)
end

function NUTS(
Expand All @@ -424,9 +426,9 @@ function NUTS(
Δ_max::Float64=1000.0,
init_ϵ::Float64=0.0,
metricT=AHMC.DiagEuclideanMetric,
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
)
NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype = adtype)
NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype=adtype)
end

function NUTS(; kwargs...)
Expand Down
16 changes: 8 additions & 8 deletions src/mcmc/sghmc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ end
space::Symbol...;
learning_rate::Real,
momentum_decay::Real,
adtype::ADTypes.AbstractADType = Turing.ADBackend(),
adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
)
Create a Stochastic Gradient Hamiltonian Monte Carlo (SGHMC) sampler.
If the automatic differentiation (AD) backend `adtype` is not provided, the currently activated
AD backend in Turing is used.
If the automatic differentiation (AD) backend `adtype` is not provided, ForwardDiff
with automatically determined `chunksize` is used.
# Reference
Expand All @@ -41,7 +41,7 @@ function SGHMC(
space::Symbol...;
learning_rate::Real,
momentum_decay::Real,
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0),
)
_learning_rate, _momentum_decay = promote(learning_rate, momentum_decay)
return SGHMC{typeof(adtype),space,typeof(_learning_rate)}(_learning_rate, _momentum_decay, adtype)
Expand Down Expand Up @@ -163,15 +163,15 @@ end
SGLD(
space::Symbol...;
stepsize = PolynomialStepsize(0.01),
adtype::ADTypes.AbstractADType = Turing.ADBackend(),
adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
)
Stochastic gradient Langevin dynamics (SGLD) sampler.
By default, a polynomially decaying stepsize is used.
If the automatic differentiation (AD) backend `adtype` is not provided, the currently activated
AD backend in Turing is used.
If the automatic differentiation (AD) backend `adtype` is not provided, ForwardDiff
with automatically determined `chunksize` is used.
# Reference
Expand All @@ -184,7 +184,7 @@ See also: [`PolynomialStepsize`](@ref)
function SGLD(
space::Symbol...;
stepsize = PolynomialStepsize(0.01),
adtype::ADTypes.AbstractADType = ADBackend(),
adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0),
)
return SGLD{typeof(adtype),space,typeof(stepsize)}(stepsize, adtype)
end
Expand Down
Loading

0 comments on commit 239263e

Please sign in to comment.