diff --git a/benchmarks/benchmarks_suite.jl b/benchmarks/benchmarks_suite.jl index 04e6a65cb..ef00117ee 100644 --- a/benchmarks/benchmarks_suite.jl +++ b/benchmarks/benchmarks_suite.jl @@ -63,7 +63,7 @@ n_adapts = 2_000 BenchmarkSuite["mnormal"]["hmc"] = @benchmarkable sample($(target(dim)), $(HMC(0.1, 5)), $n_samples) -## MvNormal: ForwardDiff vs BackwardDiff (Tracker) +## MvNormal: ForwardDiff vs ReverseDiff @model function mdemo(d, N) Θ = Vector(undef, N) @@ -77,10 +77,8 @@ A = rand(Wishart(dim2, Matrix{Float64}(I, dim2, dim2))); d = MvNormal(zeros(dim2), A) # ForwardDiff -Turing.setadbackend(:forwarddiff) -BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5)), 5000) +BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoForwardDiff(; chunksize=0))), 5000) -# BackwardDiff -Turing.setadbackend(:reversediff) -BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5)), 5000) +# ReverseDiff +BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoReverseDiff(false))), 5000) diff --git a/ext/TuringDynamicHMCExt.jl b/ext/TuringDynamicHMCExt.jl index baa50f04f..13686b394 100644 --- a/ext/TuringDynamicHMCExt.jl +++ b/ext/TuringDynamicHMCExt.jl @@ -34,7 +34,7 @@ end function DynamicNUTS( spl::DynamicHMC.NUTS = DynamicHMC.NUTS(), space::Tuple = (); - adtype::ADTypes.AbstractADType = Turing.ADBackend() + adtype::ADTypes.AbstractADType = ADTypes.AutoForwardDiff(; chunksize=0) ) return DynamicNUTS{typeof(adtype),space,typeof(spl)}(spl, adtype) end diff --git a/ext/TuringOptimExt.jl b/ext/TuringOptimExt.jl index eb594929d..14fbf106e 100644 --- a/ext/TuringOptimExt.jl +++ b/ext/TuringOptimExt.jl @@ -179,7 +179,6 @@ map_est = optimize(model, MAP()) map_est = optimize(model, MAP(), NelderMead()) ``` """ - function Optim.optimize(model::DynamicPPL.Model, ::Turing.MAP, options::Optim.Options=Optim.Options(); kwargs...) ctx = Turing.OptimizationContext(DynamicPPL.DefaultContext()) f = Turing.OptimLogDensity(model, ctx) diff --git a/src/Turing.jl b/src/Turing.jl index 5dbf3d4fa..b91f0608d 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -98,10 +98,6 @@ export @model, # modelling @prob_str, externalsampler, - setchunksize, # helper - setadbackend, - setadsafe, - setprogress!, # debugging Flat, diff --git a/src/essential/Essential.jl b/src/essential/Essential.jl index ed3f972ed..e92e541c5 100644 --- a/src/essential/Essential.jl +++ b/src/essential/Essential.jl @@ -38,20 +38,11 @@ export @model, effectiveSampleSize, sweep!, ResampleWithESSThreshold, - ADBackend, - setadbackend, - setadsafe, AutoForwardDiff, AutoTracker, AutoZygote, AutoReverseDiff, value, - CHUNKSIZE, - ADBACKEND, - setchunksize, - setrdcache, - getrdcache, - verifygrad, @logprob_str, @prob_str diff --git a/src/essential/ad.jl b/src/essential/ad.jl index 01cdb1657..c873e3a03 100644 --- a/src/essential/ad.jl +++ b/src/essential/ad.jl @@ -1,75 +1,19 @@ -############################## -# Global variables/constants # -############################## -const ADBACKEND = Ref(:forwarddiff) -setadbackend(backend_sym::Symbol) = setadbackend(Val(backend_sym)) -function setadbackend(backend::Val) - _setadbackend(backend) - AdvancedVI.setadbackend(backend) -end - -function _setadbackend(::Val{:forwarddiff}) - ADBACKEND[] = :forwarddiff -end -function _setadbackend(::Val{:tracker}) - @warn "Usage of Tracker.jl with Turing.jl is no longer being actively tested and maintained; please use at your own risk. See Zygote.jl or ReverseDiff.jl for fully supported reverse-mode backends." - ADBACKEND[] = :tracker -end -function _setadbackend(::Val{:zygote}) - ADBACKEND[] = :zygote -end -function _setadbackend(::Val{:reversediff}) - ADBACKEND[] = :reversediff -end - -const ADSAFE = Ref(false) -function setadsafe(switch::Bool) - @info("[Turing]: global ADSAFE is set as $switch") - ADSAFE[] = switch -end - -const CHUNKSIZE = Ref(0) # 0 means letting ForwardDiff set it automatically - -function setchunksize(chunk_size::Int) - @info("[Turing]: AD chunk size is set as $chunk_size") - CHUNKSIZE[] = chunk_size - AdvancedVI.setchunksize(chunk_size) -end - getchunksize(::AutoForwardDiff{chunk}) where {chunk} = chunk standardtag(::AutoForwardDiff{<:Any,Nothing}) = true standardtag(::AutoForwardDiff) = false -const RDCache = Ref(false) - -setrdcache(b::Bool) = setrdcache(Val(b)) -setrdcache(::Val{false}) = RDCache[] = false -setrdcache(::Val{true}) = RDCache[] = true - -getrdcache() = RDCache[] - -ADBackend() = ADBackend(ADBACKEND[]) -ADBackend(T::Symbol) = ADBackend(Val(T)) - -ADBackend(::Val{:forwarddiff}) = AutoForwardDiff(; chunksize=CHUNKSIZE[]) -ADBackend(::Val{:tracker}) = AutoTracker() -ADBackend(::Val{:zygote}) = AutoZygote() -ADBackend(::Val{:reversediff}) = AutoReverseDiff(; compile=getrdcache()) - -ADBackend(::Val) = error("The requested AD backend is not available. Make sure to load all required packages.") - """ getADbackend(alg) Find the autodifferentiation backend of the algorithm `alg`. """ getADbackend(spl::Sampler) = getADbackend(spl.alg) -getADbackend(::SampleFromPrior) = ADBackend() +getADbackend(::SampleFromPrior) = AutoForwardDiff(; chunksize=0) # TODO: remove `getADbackend` getADbackend(ctx::DynamicPPL.SamplingContext) = getADbackend(ctx.sampler) getADbackend(ctx::DynamicPPL.AbstractContext) = getADbackend(DynamicPPL.NodeTrait(ctx), ctx) -getADbackend(::DynamicPPL.IsLeaf, ctx::DynamicPPL.AbstractContext) = ADBackend() +getADbackend(::DynamicPPL.IsLeaf, ctx::DynamicPPL.AbstractContext) = AutoForwardDiff(; chunksize=0) getADbackend(::DynamicPPL.IsParent, ctx::DynamicPPL.AbstractContext) = getADbackend(DynamicPPL.childcontext(ctx)) function LogDensityProblemsAD.ADgradient(ℓ::Turing.LogDensityFunction) diff --git a/src/mcmc/hmc.jl b/src/mcmc/hmc.jl index d5ace1ddc..d9a47f5c5 100644 --- a/src/mcmc/hmc.jl +++ b/src/mcmc/hmc.jl @@ -32,7 +32,7 @@ end ### """ - HMC(ϵ::Float64, n_leapfrog::Int; adtype::ADTypes.AbstractADType = Turing.ADBackend()) + HMC(ϵ::Float64, n_leapfrog::Int; adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0)) Hamiltonian Monte Carlo sampler with static trajectory. @@ -41,7 +41,7 @@ Hamiltonian Monte Carlo sampler with static trajectory. - `ϵ`: The leapfrog step size to use. - `n_leapfrog`: The number of leapfrog steps to use. - `adtype`: The automatic differentiation (AD) backend. - If it is not provided, the currently activated AD backend in Turing is used. + If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined. # Usage @@ -67,15 +67,15 @@ struct HMC{AD, space, metricT <: AHMC.AbstractMetric} <: StaticHamiltonian adtype::AD end -function HMC(ϵ::Float64, n_leapfrog::Int, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType = ADBackend()) where {metricT <: AHMC.AbstractMetric} - return HMC{typeof(adtype), space, metricT}(ϵ, n_leapfrog, adtype) +function HMC(ϵ::Float64, n_leapfrog::Int, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)) where {metricT<:AHMC.AbstractMetric} + return HMC{typeof(adtype),space,metricT}(ϵ, n_leapfrog, adtype) end function HMC( ϵ::Float64, n_leapfrog::Int, space::Symbol...; metricT=AHMC.UnitEuclideanMetric, - adtype::ADTypes.AbstractADType = ADBackend(), + adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0), ) return HMC(ϵ, n_leapfrog, metricT, space; adtype = adtype) end @@ -280,7 +280,7 @@ end """ HMCDA( n_adapts::Int, δ::Float64, λ::Float64; ϵ::Float64 = 0.0; - adtype::ADTypes.AbstractADType = Turing.ADBackend(), + adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0), ) Hamiltonian Monte Carlo sampler with Dual Averaging algorithm. @@ -298,7 +298,7 @@ HMCDA(200, 0.65, 0.3) - `λ`: Target leapfrog length. - `ϵ`: Initial step size; 0 means automatically search by Turing. - `adtype`: The automatic differentiation (AD) backend. - If it is not provided, the currently activated AD backend in Turing is used. + If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined. # Reference @@ -316,8 +316,8 @@ struct HMCDA{AD, space, metricT <: AHMC.AbstractMetric} <: AdaptiveHamiltonian adtype::AD end -function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ϵ::Float64, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType = ADBackend()) where {metricT <: AHMC.AbstractMetric} - return HMCDA{typeof(adtype), space, metricT}(n_adapts, δ, λ, ϵ, adtype) +function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ϵ::Float64, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0)) where {metricT<:AHMC.AbstractMetric} + return HMCDA{typeof(adtype),space,metricT}(n_adapts, δ, λ, ϵ, adtype) end function HMCDA( @@ -325,7 +325,7 @@ function HMCDA( λ::Float64; init_ϵ::Float64=0.0, metricT=AHMC.UnitEuclideanMetric, - adtype::ADTypes.AbstractADType = ADBackend(), + adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0), ) return HMCDA(-1, δ, λ, init_ϵ, metricT, (); adtype = adtype) end @@ -347,14 +347,14 @@ function HMCDA( space::Symbol...; init_ϵ::Float64=0.0, metricT=AHMC.UnitEuclideanMetric, - adtype::ADTypes.AbstractADType = ADBackend(), + adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0), ) return HMCDA(n_adapts, δ, λ, init_ϵ, metricT, space; adtype = adtype) end """ - NUTS(n_adapts::Int, δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0) + NUTS(n_adapts::Int, δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0; adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0) No-U-Turn Sampler (NUTS) sampler. @@ -372,6 +372,8 @@ Arguments: - `max_depth::Int` : Maximum doubling tree depth. - `Δ_max::Float64` : Maximum divergence during doubling tree. - `init_ϵ::Float64` : Initial step size; 0 means automatically searching using a heuristic procedure. +- `adtype::ADTypes.AbstractADType` : The automatic differentiation (AD) backend. + If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined. """ struct NUTS{AD,space,metricT<:AHMC.AbstractMetric} <: AdaptiveHamiltonian @@ -391,9 +393,9 @@ function NUTS( ϵ::Float64, ::Type{metricT}, space::Tuple; - adtype::ADTypes.AbstractADType = ADBackend(), + adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0), ) where {metricT} - return NUTS{typeof(adtype), space, metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype) + return NUTS{typeof(adtype),space,metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype) end function NUTS( @@ -413,9 +415,9 @@ function NUTS( Δ_max::Float64=1000.0, init_ϵ::Float64=0.0, metricT=AHMC.DiagEuclideanMetric, - adtype::ADTypes.AbstractADType = ADBackend(), + adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0), ) - NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype = adtype) + NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype=adtype) end function NUTS( @@ -424,9 +426,9 @@ function NUTS( Δ_max::Float64=1000.0, init_ϵ::Float64=0.0, metricT=AHMC.DiagEuclideanMetric, - adtype::ADTypes.AbstractADType = ADBackend(), + adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0), ) - NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype = adtype) + NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype=adtype) end function NUTS(; kwargs...) diff --git a/src/mcmc/sghmc.jl b/src/mcmc/sghmc.jl index eda3a5fa4..aa89e5192 100644 --- a/src/mcmc/sghmc.jl +++ b/src/mcmc/sghmc.jl @@ -23,13 +23,13 @@ end space::Symbol...; learning_rate::Real, momentum_decay::Real, - adtype::ADTypes.AbstractADType = Turing.ADBackend(), + adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0), ) Create a Stochastic Gradient Hamiltonian Monte Carlo (SGHMC) sampler. -If the automatic differentiation (AD) backend `adtype` is not provided, the currently activated -AD backend in Turing is used. +If the automatic differentiation (AD) backend `adtype` is not provided, ForwardDiff +with automatically determined `chunksize` is used. # Reference @@ -41,7 +41,7 @@ function SGHMC( space::Symbol...; learning_rate::Real, momentum_decay::Real, - adtype::ADTypes.AbstractADType = ADBackend(), + adtype::ADTypes.AbstractADType=AutoForwardDiff(; chunksize=0), ) _learning_rate, _momentum_decay = promote(learning_rate, momentum_decay) return SGHMC{typeof(adtype),space,typeof(_learning_rate)}(_learning_rate, _momentum_decay, adtype) @@ -163,15 +163,15 @@ end SGLD( space::Symbol...; stepsize = PolynomialStepsize(0.01), - adtype::ADTypes.AbstractADType = Turing.ADBackend(), + adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0), ) Stochastic gradient Langevin dynamics (SGLD) sampler. By default, a polynomially decaying stepsize is used. -If the automatic differentiation (AD) backend `adtype` is not provided, the currently activated -AD backend in Turing is used. +If the automatic differentiation (AD) backend `adtype` is not provided, ForwardDiff +with automatically determined `chunksize` is used. # Reference @@ -184,7 +184,7 @@ See also: [`PolynomialStepsize`](@ref) function SGLD( space::Symbol...; stepsize = PolynomialStepsize(0.01), - adtype::ADTypes.AbstractADType = ADBackend(), + adtype::ADTypes.AbstractADType = AutoForwardDiff(; chunksize=0), ) return SGLD{typeof(adtype),space,typeof(stepsize)}(stepsize, adtype) end diff --git a/test/essential/ad.jl b/test/essential/ad.jl index 351d7fe41..f245a87b1 100644 --- a/test/essential/ad.jl +++ b/test/essential/ad.jl @@ -84,31 +84,24 @@ @model function dir() theta ~ Dirichlet(1 ./ fill(4, 4)) end - Turing.setadbackend(:zygote) - sample(dir(), HMC(0.01, 1), 1000) - Turing.setadbackend(:reversediff) - Turing.setrdcache(false) - sample(dir(), HMC(0.01, 1), 1000) - Turing.setrdcache(true) - sample(dir(), HMC(0.01, 1), 1000) - Turing.setrdcache(false) + sample(dir(), HMC(0.01, 1; adtype=AutoZygote()), 1000) + sample(dir(), HMC(0.01, 1; adtype=AutoReverseDiff(false)), 1000) + sample(dir(), HMC(0.01, 1; adtype=AutoReverseDiff(true)), 1000) end @testset "PDMatDistribution AD" begin @model function wishart() theta ~ Wishart(4, Matrix{Float64}(I, 4, 4)) end - Turing.setadbackend(:reversediff) - sample(wishart(), HMC(0.01, 1), 1000) - Turing.setadbackend(:zygote) - sample(wishart(), HMC(0.01, 1), 1000) + + sample(wishart(), HMC(0.01, 1; adtype=AutoReverseDiff(false)), 1000) + sample(wishart(), HMC(0.01, 1; adtype=AutoZygote()), 1000) @model function invwishart() theta ~ InverseWishart(4, Matrix{Float64}(I, 4, 4)) end - Turing.setadbackend(:reversediff) - sample(invwishart(), HMC(0.01, 1), 1000) - Turing.setadbackend(:zygote) - sample(invwishart(), HMC(0.01, 1), 1000) + + sample(invwishart(), HMC(0.01, 1; adtype=AutoReverseDiff(false)), 1000) + sample(invwishart(), HMC(0.01, 1; adtype=AutoZygote()), 1000) end @testset "Hessian test" begin @model function tst(x, ::Type{TV}=Vector{Float64}) where {TV} @@ -156,8 +149,6 @@ end @testset "memoization: issue #1393" begin - Turing.setadbackend(:reversediff) - Turing.setrdcache(true) @model function demo(data) sigma ~ Uniform(0.0, 20.0) @@ -168,27 +159,13 @@ for i in 1:5 d = Normal(0.0, i) data = rand(d, N) - chn = sample(demo(data), NUTS(0.65), 1000) + chn = sample(demo(data), NUTS(0.65; adtype=AutoReverseDiff(true)), 1000) @test mean(Array(chn[:sigma])) ≈ std(data) atol = 0.5 end - Turing.setrdcache(false) - end - - @testset "chunksize" begin - # Default value is 0 (automatic choice by ForwardDiff) - @test Turing.CHUNKSIZE[] == 0 - - setchunksize(8) - @test Turing.CHUNKSIZE[] == 8 - @test Turing.AdvancedVI.CHUNKSIZE[] == 8 - setchunksize(0) - @test Turing.CHUNKSIZE[] == 0 - @test Turing.AdvancedVI.CHUNKSIZE[] == 0 end @testset "tag" begin - @test Turing.ADBackend(Val(:forwarddiff)) === Turing.AutoForwardDiff(; chunksize=Turing.CHUNKSIZE[]) for chunksize in (0, 1, 10) ad = Turing.AutoForwardDiff(; chunksize=chunksize) @test ad === Turing.AutoForwardDiff(; chunksize=chunksize) diff --git a/test/mcmc/Inference.jl b/test/mcmc/Inference.jl index 9b10f9470..1f5a14869 100644 --- a/test/mcmc/Inference.jl +++ b/test/mcmc/Inference.jl @@ -1,4 +1,4 @@ -@testset "inference.jl" begin +@testset "Testing inference.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false)) # Only test threading if 1.3+. if VERSION > v"1.2" @testset "threaded sampling" begin @@ -10,19 +10,19 @@ # https://github.com/TuringLang/Turing.jl/issues/1571 samplers = @static if VERSION <= v"1.5.3" || VERSION >= v"1.6.0" ( - HMC(0.1, 7), + HMC(0.1, 7; adtype=adbackend), PG(10), IS(), MH(), - Gibbs(PG(3, :s), HMC(0.4, 8, :m)), - Gibbs(HMC(0.1, 5, :s), ESS(:m)), + Gibbs(PG(3, :s), HMC(0.4, 8, :m; adtype=adbackend)), + Gibbs(HMC(0.1, 5, :s; adtype=adbackend), ESS(:m)), ) else ( - HMC(0.1, 7), + HMC(0.1, 7; adtype=adbackend), IS(), MH(), - Gibbs(HMC(0.1, 5, :s), ESS(:m)), + Gibbs(HMC(0.1, 5, :s; adtype=adbackend), ESS(:m)), ) end for sampler in samplers @@ -51,12 +51,12 @@ # Smoke test for default sample call. Random.seed!(100) - chain = sample(gdemo_default, HMC(0.1, 7), MCMCThreads(), 1000, 4) + chain = sample(gdemo_default, HMC(0.1, 7; adtype=adbackend), MCMCThreads(), 1000, 4) check_gdemo(chain) # run sampler: progress logging should be disabled and # it should return a Chains object - sampler = Sampler(HMC(0.1, 7), gdemo_default) + sampler = Sampler(HMC(0.1, 7; adtype=adbackend), gdemo_default) chains = sample(gdemo_default, sampler, MCMCThreads(), 1000, 4) @test chains isa MCMCChains.Chains end @@ -64,9 +64,9 @@ @testset "chain save/resume" begin Random.seed!(1234) - alg1 = HMCDA(1000, 0.65, 0.15) + alg1 = HMCDA(1000, 0.65, 0.15; adtype=adbackend) alg2 = PG(20) - alg3 = Gibbs(PG(30, :s), HMC(0.2, 4, :m)) + alg3 = Gibbs(PG(30, :s), HMC(0.2, 4, :m; adtype=adbackend)) chn1 = sample(gdemo_default, alg1, 5000; save_state=true) check_gdemo(chn1) @@ -200,7 +200,7 @@ smc = SMC() pg = PG(10) - gibbs = Gibbs(HMC(0.2, 3, :p), PG(10, :x)) + gibbs = Gibbs(HMC(0.2, 3, :p; adtype=adbackend), PG(10, :x)) chn_s = sample(testbb(obs), smc, 1000) chn_p = sample(testbb(obs), pg, 2000) @@ -227,7 +227,7 @@ return s, m end - gibbs = Gibbs(PG(10, :s), HMC(0.4, 8, :m)) + gibbs = Gibbs(PG(10, :s), HMC(0.4, 8, :m; adtype=adbackend)) chain = sample(fggibbstest(xs), gibbs, 2) end @testset "new grammar" begin @@ -303,7 +303,7 @@ end end - chain = sample(noreturn([1.5 2.0]), HMC(0.1, 10), 4000) + chain = sample(noreturn([1.5 2.0]), HMC(0.1, 10; adtype=adbackend), 4000) check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]) end @testset "observe" begin @@ -333,87 +333,85 @@ @test all(isone, res_pg[:x]) end @testset "sample" begin - alg = Gibbs(HMC(0.2, 3, :m), PG(10, :s)) + alg = Gibbs(HMC(0.2, 3, :m; adtype=adbackend), PG(10, :s)) chn = sample(gdemo_default, alg, 1000) end @testset "vectorization @." begin # https://github.com/FluxML/Tracker.jl/issues/119 - if !(Turing.ADBackend() isa Turing.AutoTracker) - @model function vdemo1(x) - s ~ InverseGamma(2, 3) - m ~ Normal(0, sqrt(s)) - @. x ~ Normal(m, sqrt(s)) - return s, m - end + @model function vdemo1(x) + s ~ InverseGamma(2, 3) + m ~ Normal(0, sqrt(s)) + @. x ~ Normal(m, sqrt(s)) + return s, m + end - alg = HMC(0.01, 5) - x = randn(100) - res = sample(vdemo1(x), alg, 250) + alg = HMC(0.01, 5; adtype=adbackend) + x = randn(100) + res = sample(vdemo1(x), alg, 250) - @model function vdemo1b(x) - s ~ InverseGamma(2, 3) - m ~ Normal(0, sqrt(s)) - @. x ~ Normal(m, $(sqrt(s))) - return s, m - end + @model function vdemo1b(x) + s ~ InverseGamma(2, 3) + m ~ Normal(0, sqrt(s)) + @. x ~ Normal(m, $(sqrt(s))) + return s, m + end - res = sample(vdemo1b(x), alg, 250) + res = sample(vdemo1b(x), alg, 250) - @model function vdemo2(x) - μ ~ MvNormal(zeros(size(x, 1)), I) - @. x ~ $(MvNormal(μ, I)) - end + @model function vdemo2(x) + μ ~ MvNormal(zeros(size(x, 1)), I) + @. x ~ $(MvNormal(μ, I)) + end - D = 2 - alg = HMC(0.01, 5) - res = sample(vdemo2(randn(D, 100)), alg, 250) + D = 2 + alg = HMC(0.01, 5; adtype=adbackend) + res = sample(vdemo2(randn(D, 100)), alg, 250) - # Vector assumptions - N = 10 - alg = HMC(0.2, 4) + # Vector assumptions + N = 10 + alg = HMC(0.2, 4; adtype=adbackend) - @model function vdemo3() - x = Vector{Real}(undef, N) - for i in 1:N - x[i] ~ Normal(0, sqrt(4)) - end + @model function vdemo3() + x = Vector{Real}(undef, N) + for i in 1:N + x[i] ~ Normal(0, sqrt(4)) end + end - t_loop = @elapsed res = sample(vdemo3(), alg, 1000) - - # Test for vectorize UnivariateDistribution - @model function vdemo4() - x = Vector{Real}(undef, N) - @. x ~ Normal(0, 2) - end + t_loop = @elapsed res = sample(vdemo3(), alg, 1000) - t_vec = @elapsed res = sample(vdemo4(), alg, 1000) + # Test for vectorize UnivariateDistribution + @model function vdemo4() + x = Vector{Real}(undef, N) + @. x ~ Normal(0, 2) + end - @model vdemo5() = x ~ MvNormal(zeros(N), 4 * I) + t_vec = @elapsed res = sample(vdemo4(), alg, 1000) - t_mv = @elapsed res = sample(vdemo5(), alg, 1000) + @model vdemo5() = x ~ MvNormal(zeros(N), 4 * I) - println("Time for") - println(" Loop : ", t_loop) - println(" Vec : ", t_vec) - println(" Mv : ", t_mv) + t_mv = @elapsed res = sample(vdemo5(), alg, 1000) - # Transformed test - @model function vdemo6() - x = Vector{Real}(undef, N) - @. x ~ InverseGamma(2, 3) - end + println("Time for") + println(" Loop : ", t_loop) + println(" Vec : ", t_vec) + println(" Mv : ", t_mv) - sample(vdemo6(), alg, 1000) + # Transformed test + @model function vdemo6() + x = Vector{Real}(undef, N) + @. x ~ InverseGamma(2, 3) + end - N = 3 - @model function vdemo7() - x = Array{Real}(undef, N, N) - @. x ~ [InverseGamma(2, 3) for i in 1:N] - end + sample(vdemo6(), alg, 1000) - sample(vdemo7(), alg, 1000) + N = 3 + @model function vdemo7() + x = Array{Real}(undef, N, N) + @. x ~ [InverseGamma(2, 3) for i in 1:N] end + + sample(vdemo7(), alg, 1000) end @testset "vectorization .~" begin @model function vdemo1(x) @@ -423,7 +421,7 @@ return s, m end - alg = HMC(0.01, 5) + alg = HMC(0.01, 5; adtype=adbackend) x = randn(100) res = sample(vdemo1(x), alg, 250) @@ -433,12 +431,12 @@ end D = 2 - alg = HMC(0.01, 5) + alg = HMC(0.01, 5; adtype=adbackend) res = sample(vdemo2(randn(D, 100)), alg, 250) # Vector assumptions N = 10 - alg = HMC(0.2, 4) + alg = HMC(0.2, 4; adtype=adbackend) @model function vdemo3() x = Vector{Real}(undef, N) @@ -483,7 +481,7 @@ end @testset "Type parameters" begin N = 10 - alg = HMC(0.01, 5) + alg = HMC(0.01, 5; adtype=adbackend) x = randn(1000) @model function vdemo1(::Type{T}=Float64) where {T} x = Vector{T}(undef, N) diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl index ec2821222..ef2299dca 100644 --- a/test/mcmc/gibbs.jl +++ b/test/mcmc/gibbs.jl @@ -1,12 +1,12 @@ -@testset "gibbs.jl" begin +@testset "Testing gibbs.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false)) @turing_testset "gibbs constructor" begin N = 500 - s1 = Gibbs(HMC(0.1, 5, :s, :m)) + s1 = Gibbs(HMC(0.1, 5, :s, :m; adtype=adbackend)) s2 = Gibbs(PG(10, :s, :m)) - s3 = Gibbs(PG(3, :s), HMC( 0.4, 8, :m)) - s4 = Gibbs(PG(3, :s), HMC(0.4, 8, :m)) - s5 = Gibbs(CSMC(3, :s), HMC(0.4, 8, :m)) - s6 = Gibbs(HMC(0.1, 5, :s), ESS(:m)) + s3 = Gibbs(PG(3, :s), HMC(0.4, 8, :m; adtype=adbackend)) + s4 = Gibbs(PG(3, :s), HMC(0.4, 8, :m; adtype=adbackend)) + s5 = Gibbs(CSMC(3, :s), HMC(0.4, 8, :m; adtype=adbackend)) + s6 = Gibbs(HMC(0.1, 5, :s; adtype=adbackend), ESS(:m)) for s in (s1, s2, s3, s4, s5, s6) @test DynamicPPL.alg_str(Turing.Sampler(s, gdemo_default)) == "Gibbs" end @@ -32,13 +32,13 @@ end @numerical_testset "gibbs inference" begin Random.seed!(100) - alg = Gibbs(CSMC(15, :s), HMC(0.2, 4, :m)) + alg = Gibbs(CSMC(15, :s), HMC(0.2, 4, :m; adtype=adbackend)) chain = sample(gdemo(1.5, 2.0), alg, 10_000) check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.15) Random.seed!(100) - alg = Gibbs(MH(:s), HMC(0.2, 4, :m)) + alg = Gibbs(MH(:s), HMC(0.2, 4, :m; adtype=adbackend)) chain = sample(gdemo(1.5, 2.0), alg, 10_000) check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.1) @@ -51,14 +51,14 @@ check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.1) Random.seed!(200) - gibbs = Gibbs(PG(15, :z1, :z2, :z3, :z4), HMC(0.15, 3, :mu1, :mu2)) + gibbs = Gibbs(PG(15, :z1, :z2, :z3, :z4), HMC(0.15, 3, :mu1, :mu2; adtype=adbackend)) chain = sample(MoGtest_default, gibbs, 10_000) check_MoGtest_default(chain, atol=0.15) Random.seed!(200) for alg in [ - Gibbs((MH(:s), 2), (HMC(0.2, 4, :m), 1)), - Gibbs((MH(:s), 1), (HMC(0.2, 4, :m), 2)), + Gibbs((MH(:s), 2), (HMC(0.2, 4, :m; adtype=adbackend), 1)), + Gibbs((MH(:s), 1), (HMC(0.2, 4, :m; adtype=adbackend), 2)), ] chain = sample(gdemo(1.5, 2.0), alg, 10_000) check_gdemo(chain; atol=0.15) @@ -75,7 +75,7 @@ end model = gdemo_copy() - function AbstractMCMC.bundle_samples( + @nospecialize function AbstractMCMC.bundle_samples( samples::Vector, ::typeof(model), ::Turing.Sampler{<:Gibbs}, @@ -93,7 +93,7 @@ return end - alg = Gibbs(MH(:s), HMC(0.2, 4, :m)) + alg = Gibbs(MH(:s), HMC(0.2, 4, :m; adtype=adbackend)) sample(model, alg, 100; callback = callback) end @turing_testset "dynamic model" begin @@ -119,6 +119,6 @@ model = imm(randn(100), 1.0); # https://github.com/TuringLang/Turing.jl/issues/1725 # sample(model, Gibbs(MH(:z), HMC(0.01, 4, :m)), 100); - sample(model, Gibbs(PG(10, :z), HMC(0.01, 4, :m)), 100); + sample(model, Gibbs(PG(10, :z), HMC(0.01, 4, :m; adtype=adbackend)), 100) end end diff --git a/test/mcmc/gibbs_conditional.jl b/test/mcmc/gibbs_conditional.jl index 094c949aa..d7752da64 100644 --- a/test/mcmc/gibbs_conditional.jl +++ b/test/mcmc/gibbs_conditional.jl @@ -1,4 +1,4 @@ -@turing_testset "gibbs conditionals.jl" begin +@turing_testset "Testing gibbs conditionals.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false)) Random.seed!(1000); rng = StableRNG(123) @turing_testset "gdemo" begin @@ -131,7 +131,7 @@ # Compare three Gibbs samplers sampler1 = Gibbs(GibbsConditional(:z, cond_z), GibbsConditional(:μ, cond_μ)) sampler2 = Gibbs(GibbsConditional(:z, cond_z), MH(:μ)) - sampler3 = Gibbs(GibbsConditional(:z, cond_z), HMC(0.01, 7, :μ)) + sampler3 = Gibbs(GibbsConditional(:z, cond_z), HMC(0.01, 7, :μ; adtype=adbackend)) for sampler in (sampler1, sampler2, sampler3) chain = sample(rng, model, sampler, 10_000) diff --git a/test/mcmc/hmc.jl b/test/mcmc/hmc.jl index 52aff59e9..fe18fa773 100644 --- a/test/mcmc/hmc.jl +++ b/test/mcmc/hmc.jl @@ -1,4 +1,4 @@ -@testset "hmc.jl" begin +@testset "Testing hmc.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false)) # Set a seed rng = StableRNG(123) @numerical_testset "constrained bounded" begin @@ -15,7 +15,7 @@ chain = sample( rng, constrained_test(obs), - HMC(1.5, 3),# using a large step size (1.5) + HMC(1.5, 3; adtype=adbackend),# using a large step size (1.5) 1000) check_numerical(chain, [:p], [10/14], atol=0.1) @@ -35,13 +35,13 @@ chain = sample( rng, constrained_simplex_test(obs12), - HMC(0.75, 2), + HMC(0.75, 2; adtype=adbackend), 1000) check_numerical(chain, ["ps[1]", "ps[2]"], [5/16, 11/16], atol=0.015) end @numerical_testset "hmc reverse diff" begin - alg = HMC(0.1, 10) + alg = HMC(0.1, 10; adtype=adbackend) res = sample(rng, gdemo_default, alg, 4000) check_gdemo(res, rtol=0.1) end @@ -53,7 +53,7 @@ model_f = hmcmatrixsup() n_samples = 1_000 vs = map(1:3) do _ - chain = sample(rng, model_f, HMC(0.15, 7), n_samples) + chain = sample(rng, model_f, HMC(0.15, 7; adtype=adbackend), n_samples) r = reshape(Array(group(chain, :v)), n_samples, 2, 2) reshape(mean(r; dims = 1), 2, 2) end @@ -103,14 +103,14 @@ end # Sampling - chain = sample(rng, bnn(ts), HMC(0.1, 5), 10) + chain = sample(rng, bnn(ts), HMC(0.1, 5; adtype=adbackend), 10) end - @numerical_testset "hmcda inference" begin - alg1 = HMCDA(500, 0.8, 0.015) - # alg2 = Gibbs(HMCDA(200, 0.8, 0.35, :m), HMC(0.25, 3, :s)) - - # alg3 = Gibbs(HMC(0.25, 3, :m), PG(30, 3, :s)) + @numerical_testset "hmcda inference" begin + alg1 = HMCDA(500, 0.8, 0.015; adtype=adbackend) + # alg2 = Gibbs(HMCDA(200, 0.8, 0.35, :m; adtype=adbackend), HMC(0.25, 3, :s; adtype=adbackend)) + + # alg3 = Gibbs(HMC(0.25, 3, :m; adtype=adbackend), PG(30, 3, :s)) # alg3 = PG(50, 2000) res1 = sample(rng, gdemo_default, alg1, 3000) @@ -122,27 +122,27 @@ # @test mean(res2[:m]) ≈ 7/6 atol=0.2 end - @numerical_testset "hmcda+gibbs inference" begin + @numerical_testset "hmcda+gibbs inference" begin rng = StableRNG(123) Random.seed!(12345) # particle samplers do not support user-provided `rng` yet - alg3 = Gibbs(PG(20, :s), HMCDA(500, 0.8, 0.25, init_ϵ = 0.05, :m)) + alg3 = Gibbs(PG(20, :s), HMCDA(500, 0.8, 0.25, :m; init_ϵ=0.05, adtype=adbackend)) res3 = sample(rng, gdemo_default, alg3, 3000, discard_initial=1000) check_gdemo(res3) end @turing_testset "hmcda constructor" begin - alg = HMCDA(0.8, 0.75) + alg = HMCDA(0.8, 0.75; adtype=adbackend) println(alg) sampler = Sampler(alg, gdemo_default) @test DynamicPPL.alg_str(sampler) == "HMCDA" - alg = HMCDA(200, 0.8, 0.75) + alg = HMCDA(200, 0.8, 0.75; adtype=adbackend) println(alg) sampler = Sampler(alg, gdemo_default) @test DynamicPPL.alg_str(sampler) == "HMCDA" - alg = HMCDA(200, 0.8, 0.75, :s) + alg = HMCDA(200, 0.8, 0.75, :s; adtype=adbackend) println(alg) sampler = Sampler(alg, gdemo_default) @test DynamicPPL.alg_str(sampler) == "HMCDA" @@ -151,36 +151,36 @@ @test isa(sampler, Sampler{<:Turing.Hamiltonian}) end @numerical_testset "nuts inference" begin - alg = NUTS(1000, 0.8) + alg = NUTS(1000, 0.8; adtype=adbackend) res = sample(rng, gdemo_default, alg, 6000) check_gdemo(res) end @turing_testset "nuts constructor" begin - alg = NUTS(200, 0.65) + alg = NUTS(200, 0.65; adtype=adbackend) sampler = Sampler(alg, gdemo_default) @test DynamicPPL.alg_str(sampler) == "NUTS" - alg = NUTS(0.65) + alg = NUTS(0.65; adtype=adbackend) sampler = Sampler(alg, gdemo_default) @test DynamicPPL.alg_str(sampler) == "NUTS" - alg = NUTS(200, 0.65, :m) + alg = NUTS(200, 0.65, :m; adtype=adbackend) sampler = Sampler(alg, gdemo_default) @test DynamicPPL.alg_str(sampler) == "NUTS" end @turing_testset "check discard" begin - alg = NUTS(100, 0.8) + alg = NUTS(100, 0.8; adtype=adbackend) - c1 = sample(rng, gdemo_default, alg, 500, discard_adapt = true) - c2 = sample(rng, gdemo_default, alg, 500, discard_adapt = false) + c1 = sample(rng, gdemo_default, alg, 500, discard_adapt=true) + c2 = sample(rng, gdemo_default, alg, 500, discard_adapt=false) @test size(c1, 1) == 500 @test size(c2, 1) == 500 end @turing_testset "AHMC resize" begin - alg1 = Gibbs(PG(10, :m), NUTS(100, 0.65, :s)) - alg2 = Gibbs(PG(10, :m), HMC(0.1, 3, :s)) - alg3 = Gibbs(PG(10, :m), HMCDA(100, 0.65, 0.3, :s)) + alg1 = Gibbs(PG(10, :m), NUTS(100, 0.65, :s; adtype=adbackend)) + alg2 = Gibbs(PG(10, :m), HMC(0.1, 3, :s; adtype=adbackend)) + alg3 = Gibbs(PG(10, :m), HMCDA(100, 0.65, 0.3, :s; adtype=adbackend)) @test sample(rng, gdemo_default, alg1, 300) isa Chains @test sample(rng, gdemo_default, alg2, 300) isa Chains @test sample(rng, gdemo_default, alg3, 300) isa Chains @@ -192,25 +192,25 @@ m = Matrix{T}(undef, 2, 3) m .~ MvNormal(zeros(2), I) end - @test sample(rng, mwe1(), HMC(0.2, 4), 1_000) isa Chains + @test sample(rng, mwe1(), HMC(0.2, 4; adtype=adbackend), 1_000) isa Chains - @model function mwe2(::Type{T} = Matrix{Float64}) where T + @model function mwe2(::Type{T}=Matrix{Float64}) where {T} m = T(undef, 2, 3) m .~ MvNormal(zeros(2), I) end - @test sample(rng, mwe2(), HMC(0.2, 4), 1_000) isa Chains + @test sample(rng, mwe2(), HMC(0.2, 4; adtype=adbackend), 1_000) isa Chains # https://github.com/TuringLang/Turing.jl/issues/1308 - @model function mwe3(::Type{T} = Array{Float64}) where T + @model function mwe3(::Type{T}=Array{Float64}) where {T} m = T(undef, 2, 3) m .~ MvNormal(zeros(2), I) end - @test sample(rng, mwe3(), HMC(0.2, 4), 1_000) isa Chains + @test sample(rng, mwe3(), HMC(0.2, 4; adtype=adbackend), 1_000) isa Chains end # issue #1923 @turing_testset "reproducibility" begin - alg = NUTS(1000, 0.8) + alg = NUTS(1000, 0.8; adtype=adbackend) res1 = sample(StableRNG(123), gdemo_default, alg, 1000) res2 = sample(StableRNG(123), gdemo_default, alg, 1000) res3 = sample(StableRNG(123), gdemo_default, alg, 1000) @@ -224,7 +224,7 @@ s ~ truncated(Normal(3, 1), lower=0) m ~ Normal(0, sqrt(s)) end - alg = NUTS(1000, 0.8) + alg = NUTS(1000, 0.8; adtype=adbackend) gdemo_default_prior = DynamicPPL.contextualize(demo_hmc_prior(), DynamicPPL.PriorContext()) chain = sample(gdemo_default_prior, alg, 10_000) check_numerical(chain, [:s, :m], [mean(truncated(Normal(3, 1); lower=0)), 0], atol=0.1) @@ -243,7 +243,7 @@ :warn, "failed to find valid initial parameters in 10 tries; consider providing explicit initial parameters using the `init_params` keyword", ) (:info,) match_mode=:any begin - sample(demo_warn_init_params(), NUTS(), 5) + sample(demo_warn_init_params(), NUTS(; adtype=adbackend), 5) end end end diff --git a/test/mcmc/sghmc.jl b/test/mcmc/sghmc.jl index 959f40008..4405b505a 100644 --- a/test/mcmc/sghmc.jl +++ b/test/mcmc/sghmc.jl @@ -1,16 +1,16 @@ -@testset "sghmc.jl" begin +@testset "Testing sghmc.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false)) @turing_testset "sghmc constructor" begin - alg = SGHMC(; learning_rate=0.01, momentum_decay=0.1) + alg = SGHMC(; learning_rate=0.01, momentum_decay=0.1, adtype=adbackend) @test alg isa SGHMC sampler = Turing.Sampler(alg) @test sampler isa Turing.Sampler{<:SGHMC} - alg = SGHMC(:m; learning_rate=0.01, momentum_decay=0.1) + alg = SGHMC(:m; learning_rate=0.01, momentum_decay=0.1, adtype=adbackend) @test alg isa SGHMC sampler = Turing.Sampler(alg) @test sampler isa Turing.Sampler{<:SGHMC} - alg = SGHMC(:s; learning_rate=0.01, momentum_decay=0.1) + alg = SGHMC(:s; learning_rate=0.01, momentum_decay=0.1, adtype=adbackend) @test alg isa SGHMC sampler = Turing.Sampler(alg) @test sampler isa Turing.Sampler{<:SGHMC} @@ -18,25 +18,25 @@ @numerical_testset "sghmc inference" begin rng = StableRNG(123) - alg = SGHMC(; learning_rate=0.02, momentum_decay=0.5) + alg = SGHMC(; learning_rate=0.02, momentum_decay=0.5, adtype=adbackend) chain = sample(rng, gdemo_default, alg, 10_000) - check_gdemo(chain, atol = 0.1) + check_gdemo(chain, atol=0.1) end end -@testset "sgld.jl" begin +@testset "Testing sgld.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false)) @turing_testset "sgld constructor" begin - alg = SGLD(; stepsize = PolynomialStepsize(0.25)) + alg = SGLD(; stepsize=PolynomialStepsize(0.25), adtype=adbackend) @test alg isa SGLD sampler = Turing.Sampler(alg) @test sampler isa Turing.Sampler{<:SGLD} - alg = SGLD(:m; stepsize = PolynomialStepsize(0.25)) + alg = SGLD(:m; stepsize=PolynomialStepsize(0.25), adtype=adbackend) @test alg isa SGLD sampler = Turing.Sampler(alg) @test sampler isa Turing.Sampler{<:SGLD} - alg = SGLD(:s; stepsize = PolynomialStepsize(0.25)) + alg = SGLD(:s; stepsize=PolynomialStepsize(0.25), adtype=adbackend) @test alg isa SGLD sampler = Turing.Sampler(alg) @test sampler isa Turing.Sampler{<:SGLD} diff --git a/test/optimisation/OptimInterface.jl b/test/optimisation/OptimInterface.jl index 5ac338876..919de9702 100644 --- a/test/optimisation/OptimInterface.jl +++ b/test/optimisation/OptimInterface.jl @@ -120,56 +120,54 @@ end end # FIXME: Some models doesn't work for Tracker and ReverseDiff. - if Turing.Essential.ADBACKEND[] === :forwarddiff - @testset "MAP for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS - result_true = DynamicPPL.TestUtils.posterior_optima(model) + @testset "MAP for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS + result_true = DynamicPPL.TestUtils.posterior_optima(model) - @testset "$(nameof(typeof(optimizer)))" for optimizer in [LBFGS(), NelderMead()] - result = optimize(model, MAP(), optimizer) - vals = result.values + @testset "$(nameof(typeof(optimizer)))" for optimizer in [LBFGS(), NelderMead()] + result = optimize(model, MAP(), optimizer) + vals = result.values - for vn in DynamicPPL.TestUtils.varnames(model) - for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn)) - @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05 - end + for vn in DynamicPPL.TestUtils.varnames(model) + for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn)) + @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05 end end end + end - # Some of the models have one variance parameter per observation, and so - # the MLE should have the variances set to 0. Since we're working in - # transformed space, this corresponds to `-Inf`, which is of course not achievable. - # In particular, it can result in "early termniation" of the optimization process - # because we hit NaNs, etc. To avoid this, we set the `g_tol` and the `f_tol` to - # something larger than the default. - allowed_incorrect_mle = [ - DynamicPPL.TestUtils.demo_dot_assume_dot_observe, - DynamicPPL.TestUtils.demo_assume_index_observe, - DynamicPPL.TestUtils.demo_assume_multivariate_observe, - DynamicPPL.TestUtils.demo_assume_observe_literal, - DynamicPPL.TestUtils.demo_dot_assume_observe_submodel, - DynamicPPL.TestUtils.demo_dot_assume_dot_observe_matrix, - DynamicPPL.TestUtils.demo_dot_assume_matrix_dot_observe_matrix, - DynamicPPL.TestUtils.demo_assume_submodel_observe_index_literal, - DynamicPPL.TestUtils.demo_dot_assume_observe_index_literal, - DynamicPPL.TestUtils.demo_assume_matrix_dot_observe_matrix - ] - @testset "MLE for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS - result_true = DynamicPPL.TestUtils.likelihood_optima(model) - - # `NelderMead` seems to struggle with convergence here, so we exclude it. - @testset "$(nameof(typeof(optimizer)))" for optimizer in [LBFGS(),] - result = optimize(model, MLE(), optimizer, Optim.Options(g_tol=1e-3, f_tol=1e-3)) - vals = result.values - - for vn in DynamicPPL.TestUtils.varnames(model) - for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn)) - if model.f in allowed_incorrect_mle - @test isfinite(get(result_true, vn_leaf)) - else - @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05 - end + # Some of the models have one variance parameter per observation, and so + # the MLE should have the variances set to 0. Since we're working in + # transformed space, this corresponds to `-Inf`, which is of course not achievable. + # In particular, it can result in "early termniation" of the optimization process + # because we hit NaNs, etc. To avoid this, we set the `g_tol` and the `f_tol` to + # something larger than the default. + allowed_incorrect_mle = [ + DynamicPPL.TestUtils.demo_dot_assume_dot_observe, + DynamicPPL.TestUtils.demo_assume_index_observe, + DynamicPPL.TestUtils.demo_assume_multivariate_observe, + DynamicPPL.TestUtils.demo_assume_observe_literal, + DynamicPPL.TestUtils.demo_dot_assume_observe_submodel, + DynamicPPL.TestUtils.demo_dot_assume_dot_observe_matrix, + DynamicPPL.TestUtils.demo_dot_assume_matrix_dot_observe_matrix, + DynamicPPL.TestUtils.demo_assume_submodel_observe_index_literal, + DynamicPPL.TestUtils.demo_dot_assume_observe_index_literal, + DynamicPPL.TestUtils.demo_assume_matrix_dot_observe_matrix + ] + @testset "MLE for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS + result_true = DynamicPPL.TestUtils.likelihood_optima(model) + + # `NelderMead` seems to struggle with convergence here, so we exclude it. + @testset "$(nameof(typeof(optimizer)))" for optimizer in [LBFGS(),] + result = optimize(model, MLE(), optimizer, Optim.Options(g_tol=1e-3, f_tol=1e-3)) + vals = result.values + + for vn in DynamicPPL.TestUtils.varnames(model) + for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn)) + if model.f in allowed_incorrect_mle + @test isfinite(get(result_true, vn_leaf)) + else + @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05 end end end diff --git a/test/runtests.jl b/test/runtests.jl index 5cb3ed51f..0000e32a6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -64,34 +64,26 @@ macro timeit_include(path::AbstractString) :(@timeit TIMEROUTPUT $path include($ @timeit_include("mcmc/ess.jl") @timeit_include("mcmc/is.jl") end + + @timeit TIMEROUTPUT "inference" begin + @testset "inference with samplers" begin + @timeit_include("mcmc/gibbs.jl") + @timeit_include("mcmc/gibbs_conditional.jl") + @timeit_include("mcmc/hmc.jl") + @timeit_include("mcmc/Inference.jl") + @timeit_include("mcmc/sghmc.jl") + @timeit_include("mcmc/abstractmcmc.jl") + @timeit_include("mcmc/mh.jl") + @timeit_include("ext/dynamichmc.jl") + end - Turing.setrdcache(false) - for adbackend in (:forwarddiff, :reversediff) - @timeit TIMEROUTPUT "inference: $adbackend" begin - Turing.setadbackend(adbackend) - @info "Testing $(adbackend)" - @testset "inference: $adbackend" begin - @testset "samplers" begin - @timeit_include("mcmc/gibbs.jl") - @timeit_include("mcmc/gibbs_conditional.jl") - @timeit_include("mcmc/hmc.jl") - @timeit_include("mcmc/Inference.jl") - @timeit_include("mcmc/sghmc.jl") - @timeit_include("mcmc/abstractmcmc.jl") - @timeit_include("mcmc/mh.jl") - @timeit_include("ext/dynamichmc.jl") - end - end - - @testset "variational algorithms : $adbackend" begin - @timeit_include("variational/advi.jl") - end - - @testset "mode estimation : $adbackend" begin - @timeit_include("optimisation/OptimInterface.jl") - @timeit_include("ext/Optimisation.jl") - end + @testset "variational algorithms" begin + @timeit_include("variational/advi.jl") + end + @testset "mode estimation" begin + @timeit_include("optimisation/OptimInterface.jl") + @timeit_include("ext/Optimisation.jl") end end @@ -99,7 +91,6 @@ macro timeit_include(path::AbstractString) :(@timeit TIMEROUTPUT $path include($ @timeit_include("variational/optimisers.jl") end - Turing.setadbackend(:forwarddiff) @testset "stdlib" begin @timeit_include("stdlib/distributions.jl") @timeit_include("stdlib/RandomMeasures.jl")