From 5b34d8015e741c8cd5103a520a80b03982900331 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Thu, 24 Sep 2020 12:46:49 +0100 Subject: [PATCH 01/12] added the main file from SciML --- src/TuringTutorials.jl | 123 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 src/TuringTutorials.jl diff --git a/src/TuringTutorials.jl b/src/TuringTutorials.jl new file mode 100644 index 000000000..2b336428c --- /dev/null +++ b/src/TuringTutorials.jl @@ -0,0 +1,123 @@ +module TuringTutorials + +using Weave, Pkg, InteractiveUtils, IJulia + +repo_directory = joinpath(@__DIR__,"..") +cssfile = joinpath(@__DIR__, "..", "templates", "skeleton_css.css") +latexfile = joinpath(@__DIR__, "..", "templates", "julia_tex.tpl") + +function weave_file(folder,file,build_list=(:script,:html,:pdf,:github,:notebook); kwargs...) + tmp = joinpath(repo_directory,"tutorials",folder,file) + Pkg.activate(dirname(tmp)) + Pkg.instantiate() + args = Dict{Symbol,String}(:folder=>folder,:file=>file) + if :script ∈ build_list + println("Building Script") + dir = joinpath(repo_directory,"script",folder) + isdir(dir) || mkpath(dir) + args[:doctype] = "script" + tangle(tmp;out_path=dir) + end + if :html ∈ build_list + println("Building HTML") + dir = joinpath(repo_directory,"html",folder) + isdir(dir) || mkpath(dir) + args[:doctype] = "html" + weave(tmp,doctype = "md2html",out_path=dir,args=args; fig_ext=".svg", css=cssfile, kwargs...) + end + if :pdf ∈ build_list + println("Building PDF") + dir = joinpath(repo_directory,"pdf",folder) + isdir(dir) || mkpath(dir) + args[:doctype] = "pdf" + try + weave(tmp,doctype="md2pdf",out_path=dir,args=args; template=latexfile, kwargs...) + catch ex + @warn "PDF generation failed" exception=(ex, catch_backtrace()) + end + end + if :github ∈ build_list + println("Building Github Markdown") + dir = joinpath(repo_directory,"markdown",folder) + isdir(dir) || mkpath(dir) + args[:doctype] = "github" + weave(tmp,doctype = "github",out_path=dir,args=args; kwargs...) + end + if :notebook ∈ build_list + println("Building Notebook") + dir = joinpath(repo_directory,"notebook",folder) + isdir(dir) || mkpath(dir) + args[:doctype] = "notebook" + Weave.convert_doc(tmp,joinpath(dir,file[1:end-4]*".ipynb")) + end +end + +function weave_all() + for folder in readdir(joinpath(repo_directory,"tutorials")) + folder == "test.jmd" && continue + weave_folder(folder) + end +end + +function weave_folder(folder) + for file in readdir(joinpath(repo_directory,"tutorials",folder)) + println("Building $(joinpath(folder,file)))") + try + weave_file(folder,file) + catch + end + end +end + +function tutorial_footer(folder=nothing, file=nothing; remove_homedir=true) + display("text/markdown", """ + ## Appendix + This tutorial is part of the TuringTutorials.jl repository, found at: .. + """) + if folder !== nothing && file !== nothing + display("text/markdown", """ + To locally run this tutorial, do the following commands: + ``` + using TuringTutorials + TuringTutorials.weave_file("$folder","$file") + ``` + """) + end + display("text/markdown", "Computer Information:") + vinfo = sprint(InteractiveUtils.versioninfo) + display("text/markdown", """ + ``` + $(vinfo) + ``` + """) + + ctx = Pkg.API.Context() + pkgs = Pkg.Display.status(Pkg.API.Context(), use_as_api=true); + projfile = ctx.env.project_file + remove_homedir && (projfile = replace(projfile, homedir() => "~")) + + display("text/markdown",""" + Package Information: + """) + + md = "" + md *= "```\nStatus `$(projfile)`\n" + + for pkg in pkgs + if !isnothing(pkg.old) && pkg.old.ver !== nothing + md *= "[$(string(pkg.uuid))] $(string(pkg.name)) $(string(pkg.old.ver))\n" + else + md *= "[$(string(pkg.uuid))] $(string(pkg.name))\n" + end + end + md *= "```" + display("text/markdown", md) +end + +function open_notebooks() + Base.eval(Main, Meta.parse("import IJulia")) + path = joinpath(repo_directory,"notebook") + IJulia.notebook(;dir=path) +end + +end From c9814b6211f88e076953553d44809b49e36a2884 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Thu, 24 Sep 2020 12:48:59 +0100 Subject: [PATCH 02/12] re-did the introduction to VI file --- .../01-vi_introduction.jmd | 820 ++++++++++++++++++ tutorials/variational-inference/Project.toml | 13 + 2 files changed, 833 insertions(+) create mode 100644 tutorials/variational-inference/01-vi_introduction.jmd create mode 100644 tutorials/variational-inference/Project.toml diff --git a/tutorials/variational-inference/01-vi_introduction.jmd b/tutorials/variational-inference/01-vi_introduction.jmd new file mode 100644 index 000000000..3378bf45b --- /dev/null +++ b/tutorials/variational-inference/01-vi_introduction.jmd @@ -0,0 +1,820 @@ +--- +title: Variational inference (VI) in Turing.jl +permalink: /:collection/:name/ +--- + +In this post we'll have a look at what's know as **variational inference (VI)**, a family of _approximate_ Bayesian inference methods, and how to use it in Turing.jl as an alternative to other approaches such as MCMC. In particular, we will focus on one of the more standard VI methods called **Automatic Differentation Variational Inference (ADVI)**. + +Here we will focus on how to use VI in Turing and not much on the theory underlying VI. If you're interested in understanding the mathematics you can checkout [our write-up](../../docs/for-developers/variational_inference) or any other resource online (there a lot of great ones). + +Using VI in Turing.jl is very straight forward. If `model` denotes a definition of a `Turing.Model`, performing VI is as simple as +```julia; eval = false +m = model(data...) # instantiate model on the data +q = vi(m, vi_alg) # perform VI on `m` using the VI method `vi_alg`, which returns a `VariationalPosterior` +``` +Thus it's no more work than standard MCMC sampling in Turing. + +To get a bit more into what we can do with `vi`, we'll first have a look at a simple example and then we'll reproduce the [tutorial on Bayesian linear regression](../../tutorials/5-linearregression) using VI instead of MCMC. Finally we'll look at some of the different parameters of `vi` and how you for example can use your own custom variational family. + +## Setup + +```julia; results = "hidden" +using Random +using Turing +using Turing: Variational + +Random.seed!(42); +``` + +## Simple example: Normal-Gamma conjugate model + +The Normal-(Inverse)Gamma conjugate model is defined by the following generative process + +\begin{align} + s &\sim \mathrm{InverseGamma}(2, 3) \\\\ + m &\sim \mathcal{N}(0, s) \\\\ + x_i &\overset{\text{i.i.d.}}{=} \mathcal{N}(m, s), \quad i = 1, \dots, n +\end{align} + +Recall that *conjugate* refers to the fact that we can obtain a closed-form expression for the posterior. Of course one wouldn't use something like variational inference for a conjugate model, but it's useful as a simple demonstration as we can compare the result to the true posterior. + +First we generate some synthetic data, define the `Turing.Model` and instantiate the model on the data: + +```julia; results = "hidden" +# generate data +x = randn(2000); +``` + +```julia +@model model(x) = begin + s ~ InverseGamma(2, 3) + m ~ Normal(0.0, sqrt(s)) + for i = 1:length(x) + x[i] ~ Normal(m, sqrt(s)) + end +end; +``` + +```julia; results = "hidden" +# Instantiate model +m = model(x); +``` + +Now we'll produce some samples from the posterior using a MCMC method, which in constrast to VI is guaranteed to converge to the *exact* posterior (as the number of samples go to infinity). + +We'll produce 10 000 samples with 200 steps used for adaptation and a target acceptance rate of 0.65 + +If you don't understand what "adaptation" or "target acceptance rate" refers to, all you really need to know is that `NUTS` is known to be one of the most accurate and efficient samplers (when applicable) while requiring little to no hand-tuning to work well. + + +```julia; results = "hidden" +samples_nuts = sample(m, NUTS(200, 0.65), 10000); +``` + +Now let's try VI. The most important function you need to now about to do VI in Turing is `vi`: + + +```julia +print(@doc(Variational.vi)) +``` + +Additionally, you can pass +- an initial variational posterior `q`, for which we assume there exists a implementation of `update(::typeof(q), θ::AbstractVector)` returning an updated posterior `q` with parameters `θ`. +- a function mapping $$\theta \mapsto q_{\theta}$$ (denoted above `getq`) together with initial parameters `θ`. This provides more flexibility in the types of variational families that we can use, and can sometimes be slightly more convenient for quick and rough work. + +By default, i.e. when calling `vi(m, advi)`, Turing use a *mean-field* approximation with a multivariate normal as the base-distribution. Mean-field refers to the fact that we assume all the latent variables to be *independent*. This the "standard" ADVI approach; see [Automatic Differentiation Variational Inference (2016)](https://arxiv.org/abs/1603.00788) for more. In Turing, one can obtain such a mean-field approximation by calling `Variational.meanfield(model)` for which there exists an internal implementation for `update`: + + +```julia +print(@doc(Variational.meanfield)) +``` + +Currently the only implementation of `VariationalInference` available is `ADVI`, which is very convenient and applicable as long as your `Model` is differentiable with respect to the *variational parameters*, that is, the parameters of your variational distribution, e.g. mean and variance in the mean-field approximation. + + +```julia +print(@doc(Variational.ADVI)) +``` + +To perform VI on the model `m` using 10 samples for gradient estimation and taking 1000 gradient steps is then as simple as: + + +```julia; results = "hidden" +# ADVI +advi = ADVI(10, 1000) +q = vi(m, advi); +``` + +Unfortunately, for such a small problem Turing's new `NUTS` sampler is *so* efficient now that it's not that much more efficient to use ADVI. So, so very unfortunate... + +With that being said, this is not the case in general. For very complex models we'll later find that `ADVI` produces very reasonable results in a much shorter time than `NUTS`. + +And one significant advantage of using `vi` is that we can sample from the resulting `q` with ease. In fact, the result of the `vi` call is a `TransformedDistribution` from Bijectors.jl, and it implements the Distributions.jl interface for a `Distribution`: + + +```julia +q isa MultivariateDistribution +``` + +This means that we can call `rand` to sample from the variational posterior `q` + + +```julia +rand(q) +``` + +and `logpdf` to compute the log-probability + + +```julia +logpdf(q, rand(q)) +``` + +Let's check the first and second moments of the data to see how our approximation compares to the point-estimates form the data: + + +```julia +var(x), mean(x) +``` + +```julia +(mean(rand(q, 1000); dims = 2)..., ) +``` + +That's pretty close! But we're Bayesian so we're not interested in *just* matching the mean. +Let's instead look the actual density `q`. + +For that we need samples: + + +```julia; results = "hidden" +samples = rand(q, 10000); +``` + +```julia +# setup for plotting +using Plots, LaTeXStrings, StatsPlots +pyplot() +``` + +```julia +p1 = histogram(samples[1, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") +density!(samples[1, :], label = "s (ADVI)", color = :blue, linewidth = 2) +density!(collect(skipmissing(samples_nuts[:s].data)), label = "s (NUTS)", color = :green, linewidth = 2) +vline!([var(x)], label = "s (data)", color = :black) +vline!([mean(samples[1, :])], color = :blue, label ="") + +p2 = histogram(samples[2, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") +density!(samples[2, :], label = "m (ADVI)", color = :blue, linewidth = 2) +density!(collect(skipmissing(samples_nuts[:m].data)), label = "m (NUTS)", color = :green, linewidth = 2) +vline!([mean(x)], color = :black, label = "m (data)") +vline!([mean(samples[2, :])], color = :blue, label="") + +plot(p1, p2, layout=(2, 1), size=(900, 500)) +``` + +For this particular `Model`, we can in fact obtain the posterior of the latent variables in closed form. This allows us to compare both `NUTS` and `ADVI` to the true posterior $$p(s, m \mid \{x_i\}_{i = 1}^n )$$. + +*The code below is just work to get the marginals $$p(s \mid \{x_i\}_{i = 1}^n)$$ and $$p(m \mid \{x_i\}_{i = 1}^n)$$ from the posterior obtained using ConjugatePriors.jl. Feel free to skip it.* + + +```julia +# used to compute closed form expression of posterior +using ConjugatePriors + +# closed form computation +# notation mapping has been verified by explicitly computing expressions +# in "Conjugate Bayesian analysis of the Gaussian distribution" by Murphy +μ₀ = 0.0 # => μ +κ₀ = 1.0 # => ν, which scales the precision of the Normal +α₀ = 2.0 # => "shape" +β₀ = 3.0 # => "rate", which is 1 / θ, where θ is "scale" + +# prior +pri = NormalGamma(μ₀, κ₀, α₀, β₀) + +# posterior +post = posterior(pri, Normal, x) + +# marginal distribution of τ = 1 / σ² +# Eq. (90) in "Conjugate Bayesian analysis of the Gaussian distribution" by Murphy +# `scale(post)` = θ +p_τ = Gamma(post.shape, scale(post)) +p_σ²_pdf = z -> pdf(p_τ, 1 / z) # τ => 1 / σ² + +# marginal of μ +# Eq. (91) in "Conjugate Bayesian analysis of the Gaussian distribution" by Murphy +p_μ = TDist(2 * post.shape) + +μₙ = post.mu # μ → μ +κₙ = post.nu # κ → ν +αₙ = post.shape # α → shape +βₙ = post.rate # β → rate + +# numerically more stable but doesn't seem to have effect; issue is probably internal to +# `pdf` which needs to compute ≈ Γ(1000) +p_μ_pdf = z -> exp(logpdf(p_μ, (z - μₙ) * exp(- 0.5 * log(βₙ) + 0.5 * log(αₙ) + 0.5 * log(κₙ)))) + +# posterior plots +p1 = plot(); +histogram!(samples[1, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") +density!(samples[1, :], label = "s (ADVI)", color = :blue) +density!(vec(samples_nuts[:s].data), label = "s (NUTS)", color = :green) +vline!([mean(samples[1, :])], linewidth = 1.5, color = :blue, label ="") + +# normalize using Riemann approx. because of (almost certainly) numerical issues +Δ = 0.001 +r = 0.75:0.001:1.50 +norm_const = sum(p_σ²_pdf.(r) .* Δ) +plot!(r, p_σ²_pdf, label = "s (posterior)", color = :red); +vline!([var(x)], label = "s (data)", linewidth = 1.5, color = :black, alpha = 0.7); +xlims!(0.75, 1.35); + +p2 = plot(); +histogram!(samples[2, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") +density!(samples[2, :], label = "m (ADVI)", color = :blue) +density!(vec(samples_nuts[:m].data), label = "m (NUTS)", color = :green) +vline!([mean(samples[2, :])], linewidth = 1.5, color = :blue, label="") + + +# normalize using Riemann approx. because of (almost certainly) numerical issues +Δ = 0.0001 +r = -0.1 + mean(x):Δ:0.1 + mean(x) +norm_const = sum(p_μ_pdf.(r) .* Δ) +plot!(r, z -> p_μ_pdf(z) / norm_const, label = "m (posterior)", color = :red); +vline!([mean(x)], label = "m (data)", linewidth = 1.5, color = :black, alpha = 0.7); + +xlims!(-0.25, 0.25); + +p = plot(p1, p2; layout=(2, 1), size=(900, 500)) +``` + + +# Bayesian linear regression example using `ADVI` + +This is simply a duplication of the tutorial [5. Linear regression](../../tutorials/5-linearregression) but now with the addition of an approximate posterior obtained using `ADVI`. + +As we'll see, there is really no additional work required to apply variational inference to a more complex `Model`. + +## Copy-paste from [5. Linear regression](../../tutorials/5-linearregression) + +This section is basically copy-pasting the code from the [linear regression tutorial](../../tutorials/5-linearregression). + + +```julia; results = "hidden" +Random.seed!(1); +``` + + +```julia; results = "hidden" +# Import RDatasets. +using RDatasets + +# Hide the progress prompt while sampling. +Turing.turnprogress(true); +``` + + +```julia +# Import the "Default" dataset. +data = RDatasets.dataset("datasets", "mtcars"); + +# Show the first six rows of the dataset. +first(data, 6) +``` + +```julia +# Function to split samples. +function split_data(df, at = 0.70) + r = size(df,1) + index = Int(round(r * at)) + train = df[1:index, :] + test = df[(index+1):end, :] + return train, test +end + +# A handy helper function to rescale our dataset. +function standardize(x) + return (x .- mean(x, dims=1)) ./ std(x, dims=1), x +end + +# Another helper function to unstandardize our datasets. +function unstandardize(x, orig) + return (x .+ mean(orig, dims=1)) .* std(orig, dims=1) +end +``` + +```julia; results = "hidden" +# Remove the model column. +select!(data, Not(:Model)) + +# Standardize our dataset. +(std_data, data_arr) = standardize(Matrix(data)) + +# Split our dataset 70%/30% into training/test sets. +train, test = split_data(std_data, 0.7) + +# Save dataframe versions of our dataset. +train_cut = DataFrame(train, names(data)) +test_cut = DataFrame(test, names(data)) + +# Create our labels. These are the values we are trying to predict. +train_label = train_cut[:, :MPG] +test_label = test_cut[:, :MPG] + +# Get the list of columns to keep. +remove_names = filter(x->!in(x, [:MPG, :Model]), names(data)) + +# Filter the test and train sets. +train = Matrix(train_cut[:,remove_names]); +test = Matrix(test_cut[:,remove_names]); +``` + + +```julia +# Bayesian linear regression. +@model linear_regression(x, y, n_obs, n_vars, ::Type{T}=Vector{Float64}) where {T} = begin + # Set variance prior. + σ₂ ~ truncated(Normal(0,100), 0, Inf) + + # Set intercept prior. + intercept ~ Normal(0, 3) + + # Set the priors on our coefficients. + coefficients ~ MvNormal(zeros(n_vars), 10 * ones(n_vars)) + + # Calculate all the mu terms. + mu = intercept .+ x * coefficients + y ~ MvNormal(mu, σ₂) +end; +``` + + +```julia; results = "hidden" +n_obs, n_vars = size(train) +m = linear_regression(train, train_label, n_obs, n_vars); +``` + +## Performing VI + +First we define the initial variational distribution, or, equivalently, the family of distributions to consider. We're going to use the same mean-field approximation as Turing will use by default when we call `vi(m, advi)`, which we obtain by calling `Variational.meanfield`. This returns a `TransformedDistribution` with a `TuringDiagMvNormal` as the underlying distribution and the transformation mapping from the reals to the domain of the latent variables. + + +```julia +q0 = Variational.meanfield(m) +typeof(q0) +``` + +```julia +advi = ADVI(10, 10_000) +``` + +Turing also provides a couple of different optimizers: +- `TruncatedADAGrad` (default) +- `DecayedADAGrad` +as these are well-suited for problems with high-variance stochastic objectives, which is usually what the ELBO ends up being at different times in our optimization process. + +With that being said, thanks to Requires.jl, if we add a `using Flux` prior to `using Turing` we can also make use of all the optimizers in `Flux`, e.g. `ADAM`, without any additional changes to your code! For example: +```julia; eval = false +using Flux, Turing +using Turing.Variational + +vi(m, advi; optimizer = Flux.ADAM()) +``` +just works. + +For this problem we'll use the `DecayedADAGrad` from Turing: + + +```julia +opt = Variational.DecayedADAGrad(1e-2, 1.1, 0.9) +``` + + +```julia +q = vi(m, advi, q0; optimizer = opt) +typeof(q) +``` + +*Note: as mentioned before, we internally define a `update(q::TransformedDistribution{<:TuringDiagMvNormal}, θ::AbstractVector)` method which takes in the current variational approximation `q` together with new parameters `z` and returns the new variational approximation. This is required so that we can actually update the `Distribution` object after each optimization step.* + +*Alternatively, we can instead provide the mapping $$\theta \mapsto q_{\theta}$$ directly together with initial parameters using the signature `vi(m, advi, getq, θ_init)` as mentioned earlier. We'll see an explicit example of this later on!* + +To compute statistics for our approximation we need samples: + + +```julia; results = "hidden" +z = rand(q, 10_000); +``` + +Now we can for example look at the average + + +```julia +avg = vec(mean(z; dims = 2)) +``` + +The vector has the same ordering as the model, e.g. in this case `σ₂` has index `1`, `intercept` has index `2` and `coefficients` has indices `3:12`. If you forget or you might want to do something programmatically with the result, you can obtain the `sym → indices` mapping as follows: + + +```julia +_, sym2range = bijector(m, Val(true)); +sym2range +``` + +```julia +avg[union(sym2range[:σ₂]...)] +``` + +```julia +avg[union(sym2range[:intercept]...)] +``` + +```julia +avg[union(sym2range[:coefficients]...)] +``` + +*Note: as you can see, this is slightly awkward to work with at the moment. We'll soon add a better way of dealing with this.* + +With a bit of work (this will be much easier in the future), we can also visualize the approximate marginals of the different variables, similar to `plot(chain)`: + + +```julia +function plot_variational_marginals(z, sym2range) + ps = [] + + for (i, sym) in enumerate(keys(sym2range)) + indices = union(sym2range[sym]...) # <= array of ranges + if sum(length.(indices)) > 1 + offset = 1 + for r in indices + for j in r + p = density(z[j, :], title = "$(sym)[$offset]", titlefontsize = 10, label = "") + push!(ps, p) + + offset += 1 + end + end + else + p = density(z[first(indices), :], title = "$(sym)", titlefontsize = 10, label = "") + push!(ps, p) + end + end + + return plot(ps..., layout = (length(ps), 1), size = (500, 1500)) +end +``` + + +```julia +plot_variational_marginals(z, sym2range) +``` + +And let's compare this to using the `NUTS` sampler: + + +```julia; results = "hidden" +chain = sample(m, NUTS(0.65), 10_000); +``` + +```julia +plot(chain) +``` + + +```julia +vi_mean = vec(mean(z; dims = 2))[[union(sym2range[:coefficients]...)..., union(sym2range[:intercept]...)..., union(sym2range[:σ₂]...)...]] +``` + +```julia +mean(chain).nt.mean +``` + +One thing we can look at is simply the squared error between the means: + + +```julia +sum(abs2, mean(chain).nt.mean .- vi_mean) +``` + +That looks pretty good! But let's see how the predictive distributions looks for the two. + +## Prediction + +Similarily to the linear regression tutorial, we're going to compare to multivariate ordinary linear regression using the `GLM` package: + + +```julia; results = "hidden" +# Import the GLM package. +using GLM + +# Perform multivariate OLS. +ols = lm(@formula(MPG ~ Cyl + Disp + HP + DRat + WT + QSec + VS + AM + Gear + Carb), train_cut) + +# Store our predictions in the original dataframe. +train_cut.OLSPrediction = unstandardize(GLM.predict(ols), data.MPG); +test_cut.OLSPrediction = unstandardize(GLM.predict(ols, test_cut), data.MPG); +``` + + +```julia +# Make a prediction given an input vector. +function prediction_chain(chain, x) + p = get_params(chain) + α = mean(p.intercept) + β = collect(mean.(p.coefficients)) + return α .+ x * β +end +``` + +```julia +# Make a prediction using samples from the variational posterior given an input vector. +function prediction(samples::AbstractVector, sym2ranges, x) + α = mean(samples[union(sym2ranges[:intercept]...)]) + β = vec(mean(samples[union(sym2ranges[:coefficients]...)]; dims = 2)) + return α .+ x * β +end + +function prediction(samples::AbstractMatrix, sym2ranges, x) + α = mean(samples[union(sym2ranges[:intercept]...), :]) + β = vec(mean(samples[union(sym2ranges[:coefficients]...), :]; dims = 2)) + return α .+ x * β +end +``` + +```julia; results = "hidden" +# Unstandardize the dependent variable. +train_cut.MPG = unstandardize(train_cut.MPG, data.MPG); +test_cut.MPG = unstandardize(test_cut.MPG, data.MPG); +``` + + +```julia +# Show the first side rows of the modified dataframe. +first(test_cut, 6) +``` + +```julia; results = "hidden" +z = rand(q, 10_000); +``` + + +```julia; results = "hidden" +# Calculate the predictions for the training and testing sets using the samples `z` from variational posterior +train_cut.VIPredictions = unstandardize(prediction(z, sym2range, train), data.MPG); +test_cut.VIPredictions = unstandardize(prediction(z, sym2range, test), data.MPG); + +train_cut.BayesPredictions = unstandardize(prediction_chain(chain, train), data.MPG); +test_cut.BayesPredictions = unstandardize(prediction_chain(chain, test), data.MPG); +``` + + +```julia +vi_loss1 = mean((train_cut.VIPredictions - train_cut.MPG).^2) +bayes_loss1 = mean((train_cut.BayesPredictions - train_cut.MPG).^2) +ols_loss1 = mean((train_cut.OLSPrediction - train_cut.MPG).^2) + +vi_loss2 = mean((test_cut.VIPredictions - test_cut.MPG).^2) +bayes_loss2 = mean((test_cut.BayesPredictions - test_cut.MPG).^2) +ols_loss2 = mean((test_cut.OLSPrediction - test_cut.MPG).^2) + +println("Training set: + VI loss: $vi_loss1 + Bayes loss: $bayes_loss1 + OLS loss: $ols_loss1 +Test set: + VI loss: $vi_loss2 + Bayes loss: $bayes_loss2 + OLS loss: $ols_loss2") +``` + + +Interestingly the squared difference between true- and mean-prediction on the test-set is actually *better* for the mean-field variational posterior than for the "true" posterior obtained by MCMC sampling using `NUTS`. But, as Bayesians, we know that the mean doesn't tell the entire story. One quick check is to look at the mean predictions ± standard deviation of the two different approaches: + + +```julia +z = rand(q, 1000); +preds = hcat([unstandardize(prediction(z[:, i], sym2range, test), data.MPG) for i = 1:size(z, 2)]...); + +scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) +scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") +xaxis!(1:size(test, 1)) +ylims!(95, 140) +title!("Mean-field ADVI (Normal)") +``` + +```julia +preds = hcat([unstandardize(prediction_chain(chain[i], test), data.MPG) for i = 1:5:size(chain, 1)]...); + +scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) +scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") +xaxis!(1:size(test, 1)) +ylims!(95, 140) +title!("MCMC (NUTS)") +``` + +Indeed we see that the MCMC approach generally provides better uncertainty estimates than the mean-field ADVI approach! Good. So all the work we've done to make MCMC fast isn't for nothing. + +## Alternative: provide parameter-to-distribution instead of `q` with`update` implemented + +As mentioned earlier, it's also possible to just provide the mapping $$\theta \mapsto q_{\theta}$$ rather than the variational family / initial variational posterior `q`, i.e. use the interface `vi(m, advi, getq, θ_init)` where `getq` is the mapping $$\theta \mapsto q_{\theta}$$ + +In this section we're going to construct a mean-field approximation to the model by hand using a composition of`Shift` and `Scale` from Bijectors.jl togheter with a standard multivariate Gaussian as the base distribution. + + +```julia +using Bijectors +``` + + +```julia +using Bijectors: Scale, Shift +``` + + +```julia +d = length(q) +base_dist = Turing.DistributionsAD.TuringDiagMvNormal(zeros(d), ones(d)) +``` + +`bijector(model::Turing.Model)` is defined by Turing, and will return a `bijector` which takes you from the space of the latent variables to the real space. In this particular case, this is a mapping `((0, ∞) × ℝ × ℝ¹⁰) → ℝ¹²`. We're interested in using a normal distribution as a base-distribution and transform samples to the latent space, thus we need the inverse mapping from the reals to the latent space: + + +```julia; results = "hidden" +to_constrained = inv(bijector(m)); +``` + + +```julia +function getq(θ) + d = length(θ) ÷ 2 + A = @inbounds θ[1:d] + b = @inbounds θ[d + 1: 2 * d] + + b = to_constrained ∘ Shift(b; dim = Val(1)) ∘ Scale(exp.(A); dim = Val(1)) + + return transformed(base_dist, b) +end +``` + +```julia; results = "hidden" +q_mf_normal = vi(m, advi, getq, randn(2 * d)); +``` + +```julia +p1 = plot_variational_marginals(rand(q_mf_normal, 10_000), sym2range) # MvDiagNormal + Affine transformation + to_constrained +p2 = plot_variational_marginals(rand(q, 10_000), sym2range) # Turing.meanfield(m) + +plot(p1, p2, layout = (1, 2), size = (800, 2000)) +``` +As expected, the fits look pretty much identical. + +But using this interface it becomes trivial to go beyond the mean-field assumption we made for the variational posterior, as we'll see in the next section. + +### Relaxing the mean-field assumption + +Here we'll instead consider the variational family to be a full non-diagonal multivariate Gaussian. As in the previous section we'll implement this by transforming a standard multivariate Gaussian using `Scale` and `Shift`, but now `Scale` will instead be using a lower-triangular matrix (representing the Cholesky of the covariance matrix of a multivariate normal) in constrast to the diagonal matrix we used in for the mean-field approximate posterior. + + +```julia +using LinearAlgebra +``` + +```julia +# Using `ComponentArrays.jl` together with `UnPack.jl` makes our lives much easier. +using ComponentArrays, UnPack +``` + +```julia +proto_arr = ComponentArray( + L = zeros(d, d), + b = zeros(d) +) +proto_axes = proto_arr |> getaxes +num_params = length(proto_arr) + +function getq(θ) + L, b = begin + @unpack L, b = ComponentArray(θ, proto_axes) + LowerTriangular(L), b + end + # For this to represent a covariance matrix we need to ensure that the diagonal is positive. + # We can enforce this by zeroing out the diagonal and then adding back the diagonal exponentiated. + D = Diagonal(diag(L)) + A = L - D + exp(D) # exp for Diagonal is the same as exponentiating only the diagonal entries + + b = to_constrained ∘ Shift(b; dim = Val(1)) ∘ Scale(A; dim = Val(1)) + + return transformed(base_dist, b) +end +``` + +```julia +advi = ADVI(10, 20_000) +``` + +```julia; results = "hidden" +q_full_normal = vi(m, advi, getq, randn(num_params); optimizer = Variational.DecayedADAGrad(1e-2)); +``` + +Let's have a look at the learned covariance matrix: + + +```julia +A = q_full_normal.transform.ts[1].a +``` + +```julia +heatmap(cov(A * A')) +``` + +```julia; results = "hidden" +zs = rand(q_full_normal, 10_000); +``` + + +```julia +p1 = plot_variational_marginals(rand(q_mf_normal, 10_000), sym2range) +p2 = plot_variational_marginals(rand(q_full_normal, 10_000), sym2range) + +plot(p1, p2, layout = (1, 2), size = (800, 2000)) +``` + + +So it seems like the "full" ADVI approach, i.e. no mean-field assumption, obtain the same modes as the mean-field approach but with greater uncertainty for some of the `coefficients`. This + + +```julia; results = "hidden" +# Unfortunately, it seems like this has quite a high variance which is likely to be due to numerical instability, +# so we consider a larger number of samples. If we get a couple of outliers due to numerical issues, +# these kind affect the mean prediction greatly. +z = rand(q_full_normal, 10_000); +``` + + +```julia; results = "hidden" +train_cut.VIFullPredictions = unstandardize(prediction(z, sym2range, train), data.MPG); +test_cut.VIFullPredictions = unstandardize(prediction(z, sym2range, test), data.MPG); +``` + + +```julia +vi_loss1 = mean((train_cut.VIPredictions - train_cut.MPG).^2) +vifull_loss1 = mean((train_cut.VIFullPredictions - train_cut.MPG).^2) +bayes_loss1 = mean((train_cut.BayesPredictions - train_cut.MPG).^2) +ols_loss1 = mean((train_cut.OLSPrediction - train_cut.MPG).^2) + +vi_loss2 = mean((test_cut.VIPredictions - test_cut.MPG).^2) +vifull_loss2 = mean((test_cut.VIFullPredictions - test_cut.MPG).^2) +bayes_loss2 = mean((test_cut.BayesPredictions - test_cut.MPG).^2) +ols_loss2 = mean((test_cut.OLSPrediction - test_cut.MPG).^2) + +println("Training set: + VI loss: $vi_loss1 + Bayes loss: $bayes_loss1 + OLS loss: $ols_loss1 +Test set: + VI loss: $vi_loss2 + Bayes loss: $bayes_loss2 + OLS loss: $ols_loss2") +``` + +```julia +z = rand(q_mf_normal, 1000); +preds = hcat([unstandardize(prediction(z[:, i], sym2range, test), data.MPG) for i = 1:size(z, 2)]...); + +p1 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) +scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") +xaxis!(1:size(test, 1)) +ylims!(95, 140) +title!("Mean-field ADVI (Normal)") +``` + +```julia +z = rand(q_full_normal, 1000); +preds = hcat([unstandardize(prediction(z[:, i], sym2range, test), data.MPG) for i = 1:size(z, 2)]...); + +p2 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) +scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") +xaxis!(1:size(test, 1)) +ylims!(95, 140) +title!("Full ADVI (Normal)") +``` + +```julia +preds = hcat([unstandardize(prediction_chain(chain[i], test), data.MPG) for i = 1:5:size(chain, 1)]...); + +p3 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) +scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") +xaxis!(1:size(test, 1)) +ylims!(95, 140) +title!("MCMC (NUTS)") +``` + +```julia +plot(p1, p2, p3, layout = (1, 3), size = (900, 250), label="") +``` + +Here we actually see that indeed both the full ADVI and the MCMC approaches does a much better job of quantifying the uncertainty of predictions for never-before-seen samples, with full ADVI seemingly *overestimating* the variance slightly compared to MCMC. + +So now you know how to do perform VI on your Turing.jl model! Great isn't it? diff --git a/tutorials/variational-inference/Project.toml b/tutorials/variational-inference/Project.toml new file mode 100644 index 000000000..1082228ed --- /dev/null +++ b/tutorials/variational-inference/Project.toml @@ -0,0 +1,13 @@ +[deps] +Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" +ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" +ConjugatePriors = "1624bea9-42b1-5fc1-afd3-e96f729c8d6c" +GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" +RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" +Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" +UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" From 84d3861c694916a0514b64a5a9f2daf73b69a83d Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Thu, 24 Sep 2020 12:53:25 +0100 Subject: [PATCH 03/12] added compat section to VI tutorial --- tutorials/variational-inference/Project.toml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tutorials/variational-inference/Project.toml b/tutorials/variational-inference/Project.toml index 1082228ed..dd28a2f4e 100644 --- a/tutorials/variational-inference/Project.toml +++ b/tutorials/variational-inference/Project.toml @@ -11,3 +11,17 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" + +[compat] +Bijectors = "0.8.5" +ComponentArrays = "0.8.2" +ConjugatePriors = "0.4.0" +GLM = "1.3.10" +LaTeXStrings = "1.2.0" +Plots = "1.6.6" +PyPlot = "2.9.0" +RDatasets = "0.6.10" +StatsPlots = "0.14.13" +Turing = "0.14.3" +UnPack = "1.0.2" + From 653dfc05bc71a98d3fea87211d271ccacb9f4979 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Thu, 24 Sep 2020 12:55:10 +0100 Subject: [PATCH 04/12] updated Project.toml --- Project.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Project.toml b/Project.toml index f947bb9a0..6e6537360 100644 --- a/Project.toml +++ b/Project.toml @@ -1,3 +1,6 @@ +name = "TuringTutorials" +version = "0.1.0" + [deps] Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" ConjugatePriors = "1624bea9-42b1-5fc1-afd3-e96f729c8d6c" @@ -11,10 +14,13 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a" +InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" MLDataUtils = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" @@ -22,6 +28,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" +UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" Weave = "44d3d7a6-8a23-5bf8-98c5-b353f8df5ec9" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" From 7ea0e8fd078f4b3f5eba62aa022056f26d0babab Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Thu, 24 Sep 2020 14:15:20 +0100 Subject: [PATCH 05/12] renamed file --- .../01-variational-inference.jmd | 820 ++++++++++++++++++ 1 file changed, 820 insertions(+) create mode 100644 tutorials/variational-inference/01-variational-inference.jmd diff --git a/tutorials/variational-inference/01-variational-inference.jmd b/tutorials/variational-inference/01-variational-inference.jmd new file mode 100644 index 000000000..3378bf45b --- /dev/null +++ b/tutorials/variational-inference/01-variational-inference.jmd @@ -0,0 +1,820 @@ +--- +title: Variational inference (VI) in Turing.jl +permalink: /:collection/:name/ +--- + +In this post we'll have a look at what's know as **variational inference (VI)**, a family of _approximate_ Bayesian inference methods, and how to use it in Turing.jl as an alternative to other approaches such as MCMC. In particular, we will focus on one of the more standard VI methods called **Automatic Differentation Variational Inference (ADVI)**. + +Here we will focus on how to use VI in Turing and not much on the theory underlying VI. If you're interested in understanding the mathematics you can checkout [our write-up](../../docs/for-developers/variational_inference) or any other resource online (there a lot of great ones). + +Using VI in Turing.jl is very straight forward. If `model` denotes a definition of a `Turing.Model`, performing VI is as simple as +```julia; eval = false +m = model(data...) # instantiate model on the data +q = vi(m, vi_alg) # perform VI on `m` using the VI method `vi_alg`, which returns a `VariationalPosterior` +``` +Thus it's no more work than standard MCMC sampling in Turing. + +To get a bit more into what we can do with `vi`, we'll first have a look at a simple example and then we'll reproduce the [tutorial on Bayesian linear regression](../../tutorials/5-linearregression) using VI instead of MCMC. Finally we'll look at some of the different parameters of `vi` and how you for example can use your own custom variational family. + +## Setup + +```julia; results = "hidden" +using Random +using Turing +using Turing: Variational + +Random.seed!(42); +``` + +## Simple example: Normal-Gamma conjugate model + +The Normal-(Inverse)Gamma conjugate model is defined by the following generative process + +\begin{align} + s &\sim \mathrm{InverseGamma}(2, 3) \\\\ + m &\sim \mathcal{N}(0, s) \\\\ + x_i &\overset{\text{i.i.d.}}{=} \mathcal{N}(m, s), \quad i = 1, \dots, n +\end{align} + +Recall that *conjugate* refers to the fact that we can obtain a closed-form expression for the posterior. Of course one wouldn't use something like variational inference for a conjugate model, but it's useful as a simple demonstration as we can compare the result to the true posterior. + +First we generate some synthetic data, define the `Turing.Model` and instantiate the model on the data: + +```julia; results = "hidden" +# generate data +x = randn(2000); +``` + +```julia +@model model(x) = begin + s ~ InverseGamma(2, 3) + m ~ Normal(0.0, sqrt(s)) + for i = 1:length(x) + x[i] ~ Normal(m, sqrt(s)) + end +end; +``` + +```julia; results = "hidden" +# Instantiate model +m = model(x); +``` + +Now we'll produce some samples from the posterior using a MCMC method, which in constrast to VI is guaranteed to converge to the *exact* posterior (as the number of samples go to infinity). + +We'll produce 10 000 samples with 200 steps used for adaptation and a target acceptance rate of 0.65 + +If you don't understand what "adaptation" or "target acceptance rate" refers to, all you really need to know is that `NUTS` is known to be one of the most accurate and efficient samplers (when applicable) while requiring little to no hand-tuning to work well. + + +```julia; results = "hidden" +samples_nuts = sample(m, NUTS(200, 0.65), 10000); +``` + +Now let's try VI. The most important function you need to now about to do VI in Turing is `vi`: + + +```julia +print(@doc(Variational.vi)) +``` + +Additionally, you can pass +- an initial variational posterior `q`, for which we assume there exists a implementation of `update(::typeof(q), θ::AbstractVector)` returning an updated posterior `q` with parameters `θ`. +- a function mapping $$\theta \mapsto q_{\theta}$$ (denoted above `getq`) together with initial parameters `θ`. This provides more flexibility in the types of variational families that we can use, and can sometimes be slightly more convenient for quick and rough work. + +By default, i.e. when calling `vi(m, advi)`, Turing use a *mean-field* approximation with a multivariate normal as the base-distribution. Mean-field refers to the fact that we assume all the latent variables to be *independent*. This the "standard" ADVI approach; see [Automatic Differentiation Variational Inference (2016)](https://arxiv.org/abs/1603.00788) for more. In Turing, one can obtain such a mean-field approximation by calling `Variational.meanfield(model)` for which there exists an internal implementation for `update`: + + +```julia +print(@doc(Variational.meanfield)) +``` + +Currently the only implementation of `VariationalInference` available is `ADVI`, which is very convenient and applicable as long as your `Model` is differentiable with respect to the *variational parameters*, that is, the parameters of your variational distribution, e.g. mean and variance in the mean-field approximation. + + +```julia +print(@doc(Variational.ADVI)) +``` + +To perform VI on the model `m` using 10 samples for gradient estimation and taking 1000 gradient steps is then as simple as: + + +```julia; results = "hidden" +# ADVI +advi = ADVI(10, 1000) +q = vi(m, advi); +``` + +Unfortunately, for such a small problem Turing's new `NUTS` sampler is *so* efficient now that it's not that much more efficient to use ADVI. So, so very unfortunate... + +With that being said, this is not the case in general. For very complex models we'll later find that `ADVI` produces very reasonable results in a much shorter time than `NUTS`. + +And one significant advantage of using `vi` is that we can sample from the resulting `q` with ease. In fact, the result of the `vi` call is a `TransformedDistribution` from Bijectors.jl, and it implements the Distributions.jl interface for a `Distribution`: + + +```julia +q isa MultivariateDistribution +``` + +This means that we can call `rand` to sample from the variational posterior `q` + + +```julia +rand(q) +``` + +and `logpdf` to compute the log-probability + + +```julia +logpdf(q, rand(q)) +``` + +Let's check the first and second moments of the data to see how our approximation compares to the point-estimates form the data: + + +```julia +var(x), mean(x) +``` + +```julia +(mean(rand(q, 1000); dims = 2)..., ) +``` + +That's pretty close! But we're Bayesian so we're not interested in *just* matching the mean. +Let's instead look the actual density `q`. + +For that we need samples: + + +```julia; results = "hidden" +samples = rand(q, 10000); +``` + +```julia +# setup for plotting +using Plots, LaTeXStrings, StatsPlots +pyplot() +``` + +```julia +p1 = histogram(samples[1, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") +density!(samples[1, :], label = "s (ADVI)", color = :blue, linewidth = 2) +density!(collect(skipmissing(samples_nuts[:s].data)), label = "s (NUTS)", color = :green, linewidth = 2) +vline!([var(x)], label = "s (data)", color = :black) +vline!([mean(samples[1, :])], color = :blue, label ="") + +p2 = histogram(samples[2, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") +density!(samples[2, :], label = "m (ADVI)", color = :blue, linewidth = 2) +density!(collect(skipmissing(samples_nuts[:m].data)), label = "m (NUTS)", color = :green, linewidth = 2) +vline!([mean(x)], color = :black, label = "m (data)") +vline!([mean(samples[2, :])], color = :blue, label="") + +plot(p1, p2, layout=(2, 1), size=(900, 500)) +``` + +For this particular `Model`, we can in fact obtain the posterior of the latent variables in closed form. This allows us to compare both `NUTS` and `ADVI` to the true posterior $$p(s, m \mid \{x_i\}_{i = 1}^n )$$. + +*The code below is just work to get the marginals $$p(s \mid \{x_i\}_{i = 1}^n)$$ and $$p(m \mid \{x_i\}_{i = 1}^n)$$ from the posterior obtained using ConjugatePriors.jl. Feel free to skip it.* + + +```julia +# used to compute closed form expression of posterior +using ConjugatePriors + +# closed form computation +# notation mapping has been verified by explicitly computing expressions +# in "Conjugate Bayesian analysis of the Gaussian distribution" by Murphy +μ₀ = 0.0 # => μ +κ₀ = 1.0 # => ν, which scales the precision of the Normal +α₀ = 2.0 # => "shape" +β₀ = 3.0 # => "rate", which is 1 / θ, where θ is "scale" + +# prior +pri = NormalGamma(μ₀, κ₀, α₀, β₀) + +# posterior +post = posterior(pri, Normal, x) + +# marginal distribution of τ = 1 / σ² +# Eq. (90) in "Conjugate Bayesian analysis of the Gaussian distribution" by Murphy +# `scale(post)` = θ +p_τ = Gamma(post.shape, scale(post)) +p_σ²_pdf = z -> pdf(p_τ, 1 / z) # τ => 1 / σ² + +# marginal of μ +# Eq. (91) in "Conjugate Bayesian analysis of the Gaussian distribution" by Murphy +p_μ = TDist(2 * post.shape) + +μₙ = post.mu # μ → μ +κₙ = post.nu # κ → ν +αₙ = post.shape # α → shape +βₙ = post.rate # β → rate + +# numerically more stable but doesn't seem to have effect; issue is probably internal to +# `pdf` which needs to compute ≈ Γ(1000) +p_μ_pdf = z -> exp(logpdf(p_μ, (z - μₙ) * exp(- 0.5 * log(βₙ) + 0.5 * log(αₙ) + 0.5 * log(κₙ)))) + +# posterior plots +p1 = plot(); +histogram!(samples[1, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") +density!(samples[1, :], label = "s (ADVI)", color = :blue) +density!(vec(samples_nuts[:s].data), label = "s (NUTS)", color = :green) +vline!([mean(samples[1, :])], linewidth = 1.5, color = :blue, label ="") + +# normalize using Riemann approx. because of (almost certainly) numerical issues +Δ = 0.001 +r = 0.75:0.001:1.50 +norm_const = sum(p_σ²_pdf.(r) .* Δ) +plot!(r, p_σ²_pdf, label = "s (posterior)", color = :red); +vline!([var(x)], label = "s (data)", linewidth = 1.5, color = :black, alpha = 0.7); +xlims!(0.75, 1.35); + +p2 = plot(); +histogram!(samples[2, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") +density!(samples[2, :], label = "m (ADVI)", color = :blue) +density!(vec(samples_nuts[:m].data), label = "m (NUTS)", color = :green) +vline!([mean(samples[2, :])], linewidth = 1.5, color = :blue, label="") + + +# normalize using Riemann approx. because of (almost certainly) numerical issues +Δ = 0.0001 +r = -0.1 + mean(x):Δ:0.1 + mean(x) +norm_const = sum(p_μ_pdf.(r) .* Δ) +plot!(r, z -> p_μ_pdf(z) / norm_const, label = "m (posterior)", color = :red); +vline!([mean(x)], label = "m (data)", linewidth = 1.5, color = :black, alpha = 0.7); + +xlims!(-0.25, 0.25); + +p = plot(p1, p2; layout=(2, 1), size=(900, 500)) +``` + + +# Bayesian linear regression example using `ADVI` + +This is simply a duplication of the tutorial [5. Linear regression](../../tutorials/5-linearregression) but now with the addition of an approximate posterior obtained using `ADVI`. + +As we'll see, there is really no additional work required to apply variational inference to a more complex `Model`. + +## Copy-paste from [5. Linear regression](../../tutorials/5-linearregression) + +This section is basically copy-pasting the code from the [linear regression tutorial](../../tutorials/5-linearregression). + + +```julia; results = "hidden" +Random.seed!(1); +``` + + +```julia; results = "hidden" +# Import RDatasets. +using RDatasets + +# Hide the progress prompt while sampling. +Turing.turnprogress(true); +``` + + +```julia +# Import the "Default" dataset. +data = RDatasets.dataset("datasets", "mtcars"); + +# Show the first six rows of the dataset. +first(data, 6) +``` + +```julia +# Function to split samples. +function split_data(df, at = 0.70) + r = size(df,1) + index = Int(round(r * at)) + train = df[1:index, :] + test = df[(index+1):end, :] + return train, test +end + +# A handy helper function to rescale our dataset. +function standardize(x) + return (x .- mean(x, dims=1)) ./ std(x, dims=1), x +end + +# Another helper function to unstandardize our datasets. +function unstandardize(x, orig) + return (x .+ mean(orig, dims=1)) .* std(orig, dims=1) +end +``` + +```julia; results = "hidden" +# Remove the model column. +select!(data, Not(:Model)) + +# Standardize our dataset. +(std_data, data_arr) = standardize(Matrix(data)) + +# Split our dataset 70%/30% into training/test sets. +train, test = split_data(std_data, 0.7) + +# Save dataframe versions of our dataset. +train_cut = DataFrame(train, names(data)) +test_cut = DataFrame(test, names(data)) + +# Create our labels. These are the values we are trying to predict. +train_label = train_cut[:, :MPG] +test_label = test_cut[:, :MPG] + +# Get the list of columns to keep. +remove_names = filter(x->!in(x, [:MPG, :Model]), names(data)) + +# Filter the test and train sets. +train = Matrix(train_cut[:,remove_names]); +test = Matrix(test_cut[:,remove_names]); +``` + + +```julia +# Bayesian linear regression. +@model linear_regression(x, y, n_obs, n_vars, ::Type{T}=Vector{Float64}) where {T} = begin + # Set variance prior. + σ₂ ~ truncated(Normal(0,100), 0, Inf) + + # Set intercept prior. + intercept ~ Normal(0, 3) + + # Set the priors on our coefficients. + coefficients ~ MvNormal(zeros(n_vars), 10 * ones(n_vars)) + + # Calculate all the mu terms. + mu = intercept .+ x * coefficients + y ~ MvNormal(mu, σ₂) +end; +``` + + +```julia; results = "hidden" +n_obs, n_vars = size(train) +m = linear_regression(train, train_label, n_obs, n_vars); +``` + +## Performing VI + +First we define the initial variational distribution, or, equivalently, the family of distributions to consider. We're going to use the same mean-field approximation as Turing will use by default when we call `vi(m, advi)`, which we obtain by calling `Variational.meanfield`. This returns a `TransformedDistribution` with a `TuringDiagMvNormal` as the underlying distribution and the transformation mapping from the reals to the domain of the latent variables. + + +```julia +q0 = Variational.meanfield(m) +typeof(q0) +``` + +```julia +advi = ADVI(10, 10_000) +``` + +Turing also provides a couple of different optimizers: +- `TruncatedADAGrad` (default) +- `DecayedADAGrad` +as these are well-suited for problems with high-variance stochastic objectives, which is usually what the ELBO ends up being at different times in our optimization process. + +With that being said, thanks to Requires.jl, if we add a `using Flux` prior to `using Turing` we can also make use of all the optimizers in `Flux`, e.g. `ADAM`, without any additional changes to your code! For example: +```julia; eval = false +using Flux, Turing +using Turing.Variational + +vi(m, advi; optimizer = Flux.ADAM()) +``` +just works. + +For this problem we'll use the `DecayedADAGrad` from Turing: + + +```julia +opt = Variational.DecayedADAGrad(1e-2, 1.1, 0.9) +``` + + +```julia +q = vi(m, advi, q0; optimizer = opt) +typeof(q) +``` + +*Note: as mentioned before, we internally define a `update(q::TransformedDistribution{<:TuringDiagMvNormal}, θ::AbstractVector)` method which takes in the current variational approximation `q` together with new parameters `z` and returns the new variational approximation. This is required so that we can actually update the `Distribution` object after each optimization step.* + +*Alternatively, we can instead provide the mapping $$\theta \mapsto q_{\theta}$$ directly together with initial parameters using the signature `vi(m, advi, getq, θ_init)` as mentioned earlier. We'll see an explicit example of this later on!* + +To compute statistics for our approximation we need samples: + + +```julia; results = "hidden" +z = rand(q, 10_000); +``` + +Now we can for example look at the average + + +```julia +avg = vec(mean(z; dims = 2)) +``` + +The vector has the same ordering as the model, e.g. in this case `σ₂` has index `1`, `intercept` has index `2` and `coefficients` has indices `3:12`. If you forget or you might want to do something programmatically with the result, you can obtain the `sym → indices` mapping as follows: + + +```julia +_, sym2range = bijector(m, Val(true)); +sym2range +``` + +```julia +avg[union(sym2range[:σ₂]...)] +``` + +```julia +avg[union(sym2range[:intercept]...)] +``` + +```julia +avg[union(sym2range[:coefficients]...)] +``` + +*Note: as you can see, this is slightly awkward to work with at the moment. We'll soon add a better way of dealing with this.* + +With a bit of work (this will be much easier in the future), we can also visualize the approximate marginals of the different variables, similar to `plot(chain)`: + + +```julia +function plot_variational_marginals(z, sym2range) + ps = [] + + for (i, sym) in enumerate(keys(sym2range)) + indices = union(sym2range[sym]...) # <= array of ranges + if sum(length.(indices)) > 1 + offset = 1 + for r in indices + for j in r + p = density(z[j, :], title = "$(sym)[$offset]", titlefontsize = 10, label = "") + push!(ps, p) + + offset += 1 + end + end + else + p = density(z[first(indices), :], title = "$(sym)", titlefontsize = 10, label = "") + push!(ps, p) + end + end + + return plot(ps..., layout = (length(ps), 1), size = (500, 1500)) +end +``` + + +```julia +plot_variational_marginals(z, sym2range) +``` + +And let's compare this to using the `NUTS` sampler: + + +```julia; results = "hidden" +chain = sample(m, NUTS(0.65), 10_000); +``` + +```julia +plot(chain) +``` + + +```julia +vi_mean = vec(mean(z; dims = 2))[[union(sym2range[:coefficients]...)..., union(sym2range[:intercept]...)..., union(sym2range[:σ₂]...)...]] +``` + +```julia +mean(chain).nt.mean +``` + +One thing we can look at is simply the squared error between the means: + + +```julia +sum(abs2, mean(chain).nt.mean .- vi_mean) +``` + +That looks pretty good! But let's see how the predictive distributions looks for the two. + +## Prediction + +Similarily to the linear regression tutorial, we're going to compare to multivariate ordinary linear regression using the `GLM` package: + + +```julia; results = "hidden" +# Import the GLM package. +using GLM + +# Perform multivariate OLS. +ols = lm(@formula(MPG ~ Cyl + Disp + HP + DRat + WT + QSec + VS + AM + Gear + Carb), train_cut) + +# Store our predictions in the original dataframe. +train_cut.OLSPrediction = unstandardize(GLM.predict(ols), data.MPG); +test_cut.OLSPrediction = unstandardize(GLM.predict(ols, test_cut), data.MPG); +``` + + +```julia +# Make a prediction given an input vector. +function prediction_chain(chain, x) + p = get_params(chain) + α = mean(p.intercept) + β = collect(mean.(p.coefficients)) + return α .+ x * β +end +``` + +```julia +# Make a prediction using samples from the variational posterior given an input vector. +function prediction(samples::AbstractVector, sym2ranges, x) + α = mean(samples[union(sym2ranges[:intercept]...)]) + β = vec(mean(samples[union(sym2ranges[:coefficients]...)]; dims = 2)) + return α .+ x * β +end + +function prediction(samples::AbstractMatrix, sym2ranges, x) + α = mean(samples[union(sym2ranges[:intercept]...), :]) + β = vec(mean(samples[union(sym2ranges[:coefficients]...), :]; dims = 2)) + return α .+ x * β +end +``` + +```julia; results = "hidden" +# Unstandardize the dependent variable. +train_cut.MPG = unstandardize(train_cut.MPG, data.MPG); +test_cut.MPG = unstandardize(test_cut.MPG, data.MPG); +``` + + +```julia +# Show the first side rows of the modified dataframe. +first(test_cut, 6) +``` + +```julia; results = "hidden" +z = rand(q, 10_000); +``` + + +```julia; results = "hidden" +# Calculate the predictions for the training and testing sets using the samples `z` from variational posterior +train_cut.VIPredictions = unstandardize(prediction(z, sym2range, train), data.MPG); +test_cut.VIPredictions = unstandardize(prediction(z, sym2range, test), data.MPG); + +train_cut.BayesPredictions = unstandardize(prediction_chain(chain, train), data.MPG); +test_cut.BayesPredictions = unstandardize(prediction_chain(chain, test), data.MPG); +``` + + +```julia +vi_loss1 = mean((train_cut.VIPredictions - train_cut.MPG).^2) +bayes_loss1 = mean((train_cut.BayesPredictions - train_cut.MPG).^2) +ols_loss1 = mean((train_cut.OLSPrediction - train_cut.MPG).^2) + +vi_loss2 = mean((test_cut.VIPredictions - test_cut.MPG).^2) +bayes_loss2 = mean((test_cut.BayesPredictions - test_cut.MPG).^2) +ols_loss2 = mean((test_cut.OLSPrediction - test_cut.MPG).^2) + +println("Training set: + VI loss: $vi_loss1 + Bayes loss: $bayes_loss1 + OLS loss: $ols_loss1 +Test set: + VI loss: $vi_loss2 + Bayes loss: $bayes_loss2 + OLS loss: $ols_loss2") +``` + + +Interestingly the squared difference between true- and mean-prediction on the test-set is actually *better* for the mean-field variational posterior than for the "true" posterior obtained by MCMC sampling using `NUTS`. But, as Bayesians, we know that the mean doesn't tell the entire story. One quick check is to look at the mean predictions ± standard deviation of the two different approaches: + + +```julia +z = rand(q, 1000); +preds = hcat([unstandardize(prediction(z[:, i], sym2range, test), data.MPG) for i = 1:size(z, 2)]...); + +scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) +scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") +xaxis!(1:size(test, 1)) +ylims!(95, 140) +title!("Mean-field ADVI (Normal)") +``` + +```julia +preds = hcat([unstandardize(prediction_chain(chain[i], test), data.MPG) for i = 1:5:size(chain, 1)]...); + +scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) +scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") +xaxis!(1:size(test, 1)) +ylims!(95, 140) +title!("MCMC (NUTS)") +``` + +Indeed we see that the MCMC approach generally provides better uncertainty estimates than the mean-field ADVI approach! Good. So all the work we've done to make MCMC fast isn't for nothing. + +## Alternative: provide parameter-to-distribution instead of `q` with`update` implemented + +As mentioned earlier, it's also possible to just provide the mapping $$\theta \mapsto q_{\theta}$$ rather than the variational family / initial variational posterior `q`, i.e. use the interface `vi(m, advi, getq, θ_init)` where `getq` is the mapping $$\theta \mapsto q_{\theta}$$ + +In this section we're going to construct a mean-field approximation to the model by hand using a composition of`Shift` and `Scale` from Bijectors.jl togheter with a standard multivariate Gaussian as the base distribution. + + +```julia +using Bijectors +``` + + +```julia +using Bijectors: Scale, Shift +``` + + +```julia +d = length(q) +base_dist = Turing.DistributionsAD.TuringDiagMvNormal(zeros(d), ones(d)) +``` + +`bijector(model::Turing.Model)` is defined by Turing, and will return a `bijector` which takes you from the space of the latent variables to the real space. In this particular case, this is a mapping `((0, ∞) × ℝ × ℝ¹⁰) → ℝ¹²`. We're interested in using a normal distribution as a base-distribution and transform samples to the latent space, thus we need the inverse mapping from the reals to the latent space: + + +```julia; results = "hidden" +to_constrained = inv(bijector(m)); +``` + + +```julia +function getq(θ) + d = length(θ) ÷ 2 + A = @inbounds θ[1:d] + b = @inbounds θ[d + 1: 2 * d] + + b = to_constrained ∘ Shift(b; dim = Val(1)) ∘ Scale(exp.(A); dim = Val(1)) + + return transformed(base_dist, b) +end +``` + +```julia; results = "hidden" +q_mf_normal = vi(m, advi, getq, randn(2 * d)); +``` + +```julia +p1 = plot_variational_marginals(rand(q_mf_normal, 10_000), sym2range) # MvDiagNormal + Affine transformation + to_constrained +p2 = plot_variational_marginals(rand(q, 10_000), sym2range) # Turing.meanfield(m) + +plot(p1, p2, layout = (1, 2), size = (800, 2000)) +``` +As expected, the fits look pretty much identical. + +But using this interface it becomes trivial to go beyond the mean-field assumption we made for the variational posterior, as we'll see in the next section. + +### Relaxing the mean-field assumption + +Here we'll instead consider the variational family to be a full non-diagonal multivariate Gaussian. As in the previous section we'll implement this by transforming a standard multivariate Gaussian using `Scale` and `Shift`, but now `Scale` will instead be using a lower-triangular matrix (representing the Cholesky of the covariance matrix of a multivariate normal) in constrast to the diagonal matrix we used in for the mean-field approximate posterior. + + +```julia +using LinearAlgebra +``` + +```julia +# Using `ComponentArrays.jl` together with `UnPack.jl` makes our lives much easier. +using ComponentArrays, UnPack +``` + +```julia +proto_arr = ComponentArray( + L = zeros(d, d), + b = zeros(d) +) +proto_axes = proto_arr |> getaxes +num_params = length(proto_arr) + +function getq(θ) + L, b = begin + @unpack L, b = ComponentArray(θ, proto_axes) + LowerTriangular(L), b + end + # For this to represent a covariance matrix we need to ensure that the diagonal is positive. + # We can enforce this by zeroing out the diagonal and then adding back the diagonal exponentiated. + D = Diagonal(diag(L)) + A = L - D + exp(D) # exp for Diagonal is the same as exponentiating only the diagonal entries + + b = to_constrained ∘ Shift(b; dim = Val(1)) ∘ Scale(A; dim = Val(1)) + + return transformed(base_dist, b) +end +``` + +```julia +advi = ADVI(10, 20_000) +``` + +```julia; results = "hidden" +q_full_normal = vi(m, advi, getq, randn(num_params); optimizer = Variational.DecayedADAGrad(1e-2)); +``` + +Let's have a look at the learned covariance matrix: + + +```julia +A = q_full_normal.transform.ts[1].a +``` + +```julia +heatmap(cov(A * A')) +``` + +```julia; results = "hidden" +zs = rand(q_full_normal, 10_000); +``` + + +```julia +p1 = plot_variational_marginals(rand(q_mf_normal, 10_000), sym2range) +p2 = plot_variational_marginals(rand(q_full_normal, 10_000), sym2range) + +plot(p1, p2, layout = (1, 2), size = (800, 2000)) +``` + + +So it seems like the "full" ADVI approach, i.e. no mean-field assumption, obtain the same modes as the mean-field approach but with greater uncertainty for some of the `coefficients`. This + + +```julia; results = "hidden" +# Unfortunately, it seems like this has quite a high variance which is likely to be due to numerical instability, +# so we consider a larger number of samples. If we get a couple of outliers due to numerical issues, +# these kind affect the mean prediction greatly. +z = rand(q_full_normal, 10_000); +``` + + +```julia; results = "hidden" +train_cut.VIFullPredictions = unstandardize(prediction(z, sym2range, train), data.MPG); +test_cut.VIFullPredictions = unstandardize(prediction(z, sym2range, test), data.MPG); +``` + + +```julia +vi_loss1 = mean((train_cut.VIPredictions - train_cut.MPG).^2) +vifull_loss1 = mean((train_cut.VIFullPredictions - train_cut.MPG).^2) +bayes_loss1 = mean((train_cut.BayesPredictions - train_cut.MPG).^2) +ols_loss1 = mean((train_cut.OLSPrediction - train_cut.MPG).^2) + +vi_loss2 = mean((test_cut.VIPredictions - test_cut.MPG).^2) +vifull_loss2 = mean((test_cut.VIFullPredictions - test_cut.MPG).^2) +bayes_loss2 = mean((test_cut.BayesPredictions - test_cut.MPG).^2) +ols_loss2 = mean((test_cut.OLSPrediction - test_cut.MPG).^2) + +println("Training set: + VI loss: $vi_loss1 + Bayes loss: $bayes_loss1 + OLS loss: $ols_loss1 +Test set: + VI loss: $vi_loss2 + Bayes loss: $bayes_loss2 + OLS loss: $ols_loss2") +``` + +```julia +z = rand(q_mf_normal, 1000); +preds = hcat([unstandardize(prediction(z[:, i], sym2range, test), data.MPG) for i = 1:size(z, 2)]...); + +p1 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) +scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") +xaxis!(1:size(test, 1)) +ylims!(95, 140) +title!("Mean-field ADVI (Normal)") +``` + +```julia +z = rand(q_full_normal, 1000); +preds = hcat([unstandardize(prediction(z[:, i], sym2range, test), data.MPG) for i = 1:size(z, 2)]...); + +p2 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) +scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") +xaxis!(1:size(test, 1)) +ylims!(95, 140) +title!("Full ADVI (Normal)") +``` + +```julia +preds = hcat([unstandardize(prediction_chain(chain[i], test), data.MPG) for i = 1:5:size(chain, 1)]...); + +p3 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) +scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") +xaxis!(1:size(test, 1)) +ylims!(95, 140) +title!("MCMC (NUTS)") +``` + +```julia +plot(p1, p2, p3, layout = (1, 3), size = (900, 250), label="") +``` + +Here we actually see that indeed both the full ADVI and the MCMC approaches does a much better job of quantifying the uncertainty of predictions for never-before-seen samples, with full ADVI seemingly *overestimating* the variance slightly compared to MCMC. + +So now you know how to do perform VI on your Turing.jl model! Great isn't it? From 65263c83e4f09c95afe24d5a793d2ef8e92d9126 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Thu, 24 Sep 2020 14:16:26 +0100 Subject: [PATCH 06/12] renamed file CORRECTLY --- .../01-vi_introduction.jmd | 820 ------------------ ...rence.jmd => 01_variational-inference.jmd} | 0 2 files changed, 820 deletions(-) delete mode 100644 tutorials/variational-inference/01-vi_introduction.jmd rename tutorials/variational-inference/{01-variational-inference.jmd => 01_variational-inference.jmd} (100%) diff --git a/tutorials/variational-inference/01-vi_introduction.jmd b/tutorials/variational-inference/01-vi_introduction.jmd deleted file mode 100644 index 3378bf45b..000000000 --- a/tutorials/variational-inference/01-vi_introduction.jmd +++ /dev/null @@ -1,820 +0,0 @@ ---- -title: Variational inference (VI) in Turing.jl -permalink: /:collection/:name/ ---- - -In this post we'll have a look at what's know as **variational inference (VI)**, a family of _approximate_ Bayesian inference methods, and how to use it in Turing.jl as an alternative to other approaches such as MCMC. In particular, we will focus on one of the more standard VI methods called **Automatic Differentation Variational Inference (ADVI)**. - -Here we will focus on how to use VI in Turing and not much on the theory underlying VI. If you're interested in understanding the mathematics you can checkout [our write-up](../../docs/for-developers/variational_inference) or any other resource online (there a lot of great ones). - -Using VI in Turing.jl is very straight forward. If `model` denotes a definition of a `Turing.Model`, performing VI is as simple as -```julia; eval = false -m = model(data...) # instantiate model on the data -q = vi(m, vi_alg) # perform VI on `m` using the VI method `vi_alg`, which returns a `VariationalPosterior` -``` -Thus it's no more work than standard MCMC sampling in Turing. - -To get a bit more into what we can do with `vi`, we'll first have a look at a simple example and then we'll reproduce the [tutorial on Bayesian linear regression](../../tutorials/5-linearregression) using VI instead of MCMC. Finally we'll look at some of the different parameters of `vi` and how you for example can use your own custom variational family. - -## Setup - -```julia; results = "hidden" -using Random -using Turing -using Turing: Variational - -Random.seed!(42); -``` - -## Simple example: Normal-Gamma conjugate model - -The Normal-(Inverse)Gamma conjugate model is defined by the following generative process - -\begin{align} - s &\sim \mathrm{InverseGamma}(2, 3) \\\\ - m &\sim \mathcal{N}(0, s) \\\\ - x_i &\overset{\text{i.i.d.}}{=} \mathcal{N}(m, s), \quad i = 1, \dots, n -\end{align} - -Recall that *conjugate* refers to the fact that we can obtain a closed-form expression for the posterior. Of course one wouldn't use something like variational inference for a conjugate model, but it's useful as a simple demonstration as we can compare the result to the true posterior. - -First we generate some synthetic data, define the `Turing.Model` and instantiate the model on the data: - -```julia; results = "hidden" -# generate data -x = randn(2000); -``` - -```julia -@model model(x) = begin - s ~ InverseGamma(2, 3) - m ~ Normal(0.0, sqrt(s)) - for i = 1:length(x) - x[i] ~ Normal(m, sqrt(s)) - end -end; -``` - -```julia; results = "hidden" -# Instantiate model -m = model(x); -``` - -Now we'll produce some samples from the posterior using a MCMC method, which in constrast to VI is guaranteed to converge to the *exact* posterior (as the number of samples go to infinity). - -We'll produce 10 000 samples with 200 steps used for adaptation and a target acceptance rate of 0.65 - -If you don't understand what "adaptation" or "target acceptance rate" refers to, all you really need to know is that `NUTS` is known to be one of the most accurate and efficient samplers (when applicable) while requiring little to no hand-tuning to work well. - - -```julia; results = "hidden" -samples_nuts = sample(m, NUTS(200, 0.65), 10000); -``` - -Now let's try VI. The most important function you need to now about to do VI in Turing is `vi`: - - -```julia -print(@doc(Variational.vi)) -``` - -Additionally, you can pass -- an initial variational posterior `q`, for which we assume there exists a implementation of `update(::typeof(q), θ::AbstractVector)` returning an updated posterior `q` with parameters `θ`. -- a function mapping $$\theta \mapsto q_{\theta}$$ (denoted above `getq`) together with initial parameters `θ`. This provides more flexibility in the types of variational families that we can use, and can sometimes be slightly more convenient for quick and rough work. - -By default, i.e. when calling `vi(m, advi)`, Turing use a *mean-field* approximation with a multivariate normal as the base-distribution. Mean-field refers to the fact that we assume all the latent variables to be *independent*. This the "standard" ADVI approach; see [Automatic Differentiation Variational Inference (2016)](https://arxiv.org/abs/1603.00788) for more. In Turing, one can obtain such a mean-field approximation by calling `Variational.meanfield(model)` for which there exists an internal implementation for `update`: - - -```julia -print(@doc(Variational.meanfield)) -``` - -Currently the only implementation of `VariationalInference` available is `ADVI`, which is very convenient and applicable as long as your `Model` is differentiable with respect to the *variational parameters*, that is, the parameters of your variational distribution, e.g. mean and variance in the mean-field approximation. - - -```julia -print(@doc(Variational.ADVI)) -``` - -To perform VI on the model `m` using 10 samples for gradient estimation and taking 1000 gradient steps is then as simple as: - - -```julia; results = "hidden" -# ADVI -advi = ADVI(10, 1000) -q = vi(m, advi); -``` - -Unfortunately, for such a small problem Turing's new `NUTS` sampler is *so* efficient now that it's not that much more efficient to use ADVI. So, so very unfortunate... - -With that being said, this is not the case in general. For very complex models we'll later find that `ADVI` produces very reasonable results in a much shorter time than `NUTS`. - -And one significant advantage of using `vi` is that we can sample from the resulting `q` with ease. In fact, the result of the `vi` call is a `TransformedDistribution` from Bijectors.jl, and it implements the Distributions.jl interface for a `Distribution`: - - -```julia -q isa MultivariateDistribution -``` - -This means that we can call `rand` to sample from the variational posterior `q` - - -```julia -rand(q) -``` - -and `logpdf` to compute the log-probability - - -```julia -logpdf(q, rand(q)) -``` - -Let's check the first and second moments of the data to see how our approximation compares to the point-estimates form the data: - - -```julia -var(x), mean(x) -``` - -```julia -(mean(rand(q, 1000); dims = 2)..., ) -``` - -That's pretty close! But we're Bayesian so we're not interested in *just* matching the mean. -Let's instead look the actual density `q`. - -For that we need samples: - - -```julia; results = "hidden" -samples = rand(q, 10000); -``` - -```julia -# setup for plotting -using Plots, LaTeXStrings, StatsPlots -pyplot() -``` - -```julia -p1 = histogram(samples[1, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") -density!(samples[1, :], label = "s (ADVI)", color = :blue, linewidth = 2) -density!(collect(skipmissing(samples_nuts[:s].data)), label = "s (NUTS)", color = :green, linewidth = 2) -vline!([var(x)], label = "s (data)", color = :black) -vline!([mean(samples[1, :])], color = :blue, label ="") - -p2 = histogram(samples[2, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") -density!(samples[2, :], label = "m (ADVI)", color = :blue, linewidth = 2) -density!(collect(skipmissing(samples_nuts[:m].data)), label = "m (NUTS)", color = :green, linewidth = 2) -vline!([mean(x)], color = :black, label = "m (data)") -vline!([mean(samples[2, :])], color = :blue, label="") - -plot(p1, p2, layout=(2, 1), size=(900, 500)) -``` - -For this particular `Model`, we can in fact obtain the posterior of the latent variables in closed form. This allows us to compare both `NUTS` and `ADVI` to the true posterior $$p(s, m \mid \{x_i\}_{i = 1}^n )$$. - -*The code below is just work to get the marginals $$p(s \mid \{x_i\}_{i = 1}^n)$$ and $$p(m \mid \{x_i\}_{i = 1}^n)$$ from the posterior obtained using ConjugatePriors.jl. Feel free to skip it.* - - -```julia -# used to compute closed form expression of posterior -using ConjugatePriors - -# closed form computation -# notation mapping has been verified by explicitly computing expressions -# in "Conjugate Bayesian analysis of the Gaussian distribution" by Murphy -μ₀ = 0.0 # => μ -κ₀ = 1.0 # => ν, which scales the precision of the Normal -α₀ = 2.0 # => "shape" -β₀ = 3.0 # => "rate", which is 1 / θ, where θ is "scale" - -# prior -pri = NormalGamma(μ₀, κ₀, α₀, β₀) - -# posterior -post = posterior(pri, Normal, x) - -# marginal distribution of τ = 1 / σ² -# Eq. (90) in "Conjugate Bayesian analysis of the Gaussian distribution" by Murphy -# `scale(post)` = θ -p_τ = Gamma(post.shape, scale(post)) -p_σ²_pdf = z -> pdf(p_τ, 1 / z) # τ => 1 / σ² - -# marginal of μ -# Eq. (91) in "Conjugate Bayesian analysis of the Gaussian distribution" by Murphy -p_μ = TDist(2 * post.shape) - -μₙ = post.mu # μ → μ -κₙ = post.nu # κ → ν -αₙ = post.shape # α → shape -βₙ = post.rate # β → rate - -# numerically more stable but doesn't seem to have effect; issue is probably internal to -# `pdf` which needs to compute ≈ Γ(1000) -p_μ_pdf = z -> exp(logpdf(p_μ, (z - μₙ) * exp(- 0.5 * log(βₙ) + 0.5 * log(αₙ) + 0.5 * log(κₙ)))) - -# posterior plots -p1 = plot(); -histogram!(samples[1, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") -density!(samples[1, :], label = "s (ADVI)", color = :blue) -density!(vec(samples_nuts[:s].data), label = "s (NUTS)", color = :green) -vline!([mean(samples[1, :])], linewidth = 1.5, color = :blue, label ="") - -# normalize using Riemann approx. because of (almost certainly) numerical issues -Δ = 0.001 -r = 0.75:0.001:1.50 -norm_const = sum(p_σ²_pdf.(r) .* Δ) -plot!(r, p_σ²_pdf, label = "s (posterior)", color = :red); -vline!([var(x)], label = "s (data)", linewidth = 1.5, color = :black, alpha = 0.7); -xlims!(0.75, 1.35); - -p2 = plot(); -histogram!(samples[2, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") -density!(samples[2, :], label = "m (ADVI)", color = :blue) -density!(vec(samples_nuts[:m].data), label = "m (NUTS)", color = :green) -vline!([mean(samples[2, :])], linewidth = 1.5, color = :blue, label="") - - -# normalize using Riemann approx. because of (almost certainly) numerical issues -Δ = 0.0001 -r = -0.1 + mean(x):Δ:0.1 + mean(x) -norm_const = sum(p_μ_pdf.(r) .* Δ) -plot!(r, z -> p_μ_pdf(z) / norm_const, label = "m (posterior)", color = :red); -vline!([mean(x)], label = "m (data)", linewidth = 1.5, color = :black, alpha = 0.7); - -xlims!(-0.25, 0.25); - -p = plot(p1, p2; layout=(2, 1), size=(900, 500)) -``` - - -# Bayesian linear regression example using `ADVI` - -This is simply a duplication of the tutorial [5. Linear regression](../../tutorials/5-linearregression) but now with the addition of an approximate posterior obtained using `ADVI`. - -As we'll see, there is really no additional work required to apply variational inference to a more complex `Model`. - -## Copy-paste from [5. Linear regression](../../tutorials/5-linearregression) - -This section is basically copy-pasting the code from the [linear regression tutorial](../../tutorials/5-linearregression). - - -```julia; results = "hidden" -Random.seed!(1); -``` - - -```julia; results = "hidden" -# Import RDatasets. -using RDatasets - -# Hide the progress prompt while sampling. -Turing.turnprogress(true); -``` - - -```julia -# Import the "Default" dataset. -data = RDatasets.dataset("datasets", "mtcars"); - -# Show the first six rows of the dataset. -first(data, 6) -``` - -```julia -# Function to split samples. -function split_data(df, at = 0.70) - r = size(df,1) - index = Int(round(r * at)) - train = df[1:index, :] - test = df[(index+1):end, :] - return train, test -end - -# A handy helper function to rescale our dataset. -function standardize(x) - return (x .- mean(x, dims=1)) ./ std(x, dims=1), x -end - -# Another helper function to unstandardize our datasets. -function unstandardize(x, orig) - return (x .+ mean(orig, dims=1)) .* std(orig, dims=1) -end -``` - -```julia; results = "hidden" -# Remove the model column. -select!(data, Not(:Model)) - -# Standardize our dataset. -(std_data, data_arr) = standardize(Matrix(data)) - -# Split our dataset 70%/30% into training/test sets. -train, test = split_data(std_data, 0.7) - -# Save dataframe versions of our dataset. -train_cut = DataFrame(train, names(data)) -test_cut = DataFrame(test, names(data)) - -# Create our labels. These are the values we are trying to predict. -train_label = train_cut[:, :MPG] -test_label = test_cut[:, :MPG] - -# Get the list of columns to keep. -remove_names = filter(x->!in(x, [:MPG, :Model]), names(data)) - -# Filter the test and train sets. -train = Matrix(train_cut[:,remove_names]); -test = Matrix(test_cut[:,remove_names]); -``` - - -```julia -# Bayesian linear regression. -@model linear_regression(x, y, n_obs, n_vars, ::Type{T}=Vector{Float64}) where {T} = begin - # Set variance prior. - σ₂ ~ truncated(Normal(0,100), 0, Inf) - - # Set intercept prior. - intercept ~ Normal(0, 3) - - # Set the priors on our coefficients. - coefficients ~ MvNormal(zeros(n_vars), 10 * ones(n_vars)) - - # Calculate all the mu terms. - mu = intercept .+ x * coefficients - y ~ MvNormal(mu, σ₂) -end; -``` - - -```julia; results = "hidden" -n_obs, n_vars = size(train) -m = linear_regression(train, train_label, n_obs, n_vars); -``` - -## Performing VI - -First we define the initial variational distribution, or, equivalently, the family of distributions to consider. We're going to use the same mean-field approximation as Turing will use by default when we call `vi(m, advi)`, which we obtain by calling `Variational.meanfield`. This returns a `TransformedDistribution` with a `TuringDiagMvNormal` as the underlying distribution and the transformation mapping from the reals to the domain of the latent variables. - - -```julia -q0 = Variational.meanfield(m) -typeof(q0) -``` - -```julia -advi = ADVI(10, 10_000) -``` - -Turing also provides a couple of different optimizers: -- `TruncatedADAGrad` (default) -- `DecayedADAGrad` -as these are well-suited for problems with high-variance stochastic objectives, which is usually what the ELBO ends up being at different times in our optimization process. - -With that being said, thanks to Requires.jl, if we add a `using Flux` prior to `using Turing` we can also make use of all the optimizers in `Flux`, e.g. `ADAM`, without any additional changes to your code! For example: -```julia; eval = false -using Flux, Turing -using Turing.Variational - -vi(m, advi; optimizer = Flux.ADAM()) -``` -just works. - -For this problem we'll use the `DecayedADAGrad` from Turing: - - -```julia -opt = Variational.DecayedADAGrad(1e-2, 1.1, 0.9) -``` - - -```julia -q = vi(m, advi, q0; optimizer = opt) -typeof(q) -``` - -*Note: as mentioned before, we internally define a `update(q::TransformedDistribution{<:TuringDiagMvNormal}, θ::AbstractVector)` method which takes in the current variational approximation `q` together with new parameters `z` and returns the new variational approximation. This is required so that we can actually update the `Distribution` object after each optimization step.* - -*Alternatively, we can instead provide the mapping $$\theta \mapsto q_{\theta}$$ directly together with initial parameters using the signature `vi(m, advi, getq, θ_init)` as mentioned earlier. We'll see an explicit example of this later on!* - -To compute statistics for our approximation we need samples: - - -```julia; results = "hidden" -z = rand(q, 10_000); -``` - -Now we can for example look at the average - - -```julia -avg = vec(mean(z; dims = 2)) -``` - -The vector has the same ordering as the model, e.g. in this case `σ₂` has index `1`, `intercept` has index `2` and `coefficients` has indices `3:12`. If you forget or you might want to do something programmatically with the result, you can obtain the `sym → indices` mapping as follows: - - -```julia -_, sym2range = bijector(m, Val(true)); -sym2range -``` - -```julia -avg[union(sym2range[:σ₂]...)] -``` - -```julia -avg[union(sym2range[:intercept]...)] -``` - -```julia -avg[union(sym2range[:coefficients]...)] -``` - -*Note: as you can see, this is slightly awkward to work with at the moment. We'll soon add a better way of dealing with this.* - -With a bit of work (this will be much easier in the future), we can also visualize the approximate marginals of the different variables, similar to `plot(chain)`: - - -```julia -function plot_variational_marginals(z, sym2range) - ps = [] - - for (i, sym) in enumerate(keys(sym2range)) - indices = union(sym2range[sym]...) # <= array of ranges - if sum(length.(indices)) > 1 - offset = 1 - for r in indices - for j in r - p = density(z[j, :], title = "$(sym)[$offset]", titlefontsize = 10, label = "") - push!(ps, p) - - offset += 1 - end - end - else - p = density(z[first(indices), :], title = "$(sym)", titlefontsize = 10, label = "") - push!(ps, p) - end - end - - return plot(ps..., layout = (length(ps), 1), size = (500, 1500)) -end -``` - - -```julia -plot_variational_marginals(z, sym2range) -``` - -And let's compare this to using the `NUTS` sampler: - - -```julia; results = "hidden" -chain = sample(m, NUTS(0.65), 10_000); -``` - -```julia -plot(chain) -``` - - -```julia -vi_mean = vec(mean(z; dims = 2))[[union(sym2range[:coefficients]...)..., union(sym2range[:intercept]...)..., union(sym2range[:σ₂]...)...]] -``` - -```julia -mean(chain).nt.mean -``` - -One thing we can look at is simply the squared error between the means: - - -```julia -sum(abs2, mean(chain).nt.mean .- vi_mean) -``` - -That looks pretty good! But let's see how the predictive distributions looks for the two. - -## Prediction - -Similarily to the linear regression tutorial, we're going to compare to multivariate ordinary linear regression using the `GLM` package: - - -```julia; results = "hidden" -# Import the GLM package. -using GLM - -# Perform multivariate OLS. -ols = lm(@formula(MPG ~ Cyl + Disp + HP + DRat + WT + QSec + VS + AM + Gear + Carb), train_cut) - -# Store our predictions in the original dataframe. -train_cut.OLSPrediction = unstandardize(GLM.predict(ols), data.MPG); -test_cut.OLSPrediction = unstandardize(GLM.predict(ols, test_cut), data.MPG); -``` - - -```julia -# Make a prediction given an input vector. -function prediction_chain(chain, x) - p = get_params(chain) - α = mean(p.intercept) - β = collect(mean.(p.coefficients)) - return α .+ x * β -end -``` - -```julia -# Make a prediction using samples from the variational posterior given an input vector. -function prediction(samples::AbstractVector, sym2ranges, x) - α = mean(samples[union(sym2ranges[:intercept]...)]) - β = vec(mean(samples[union(sym2ranges[:coefficients]...)]; dims = 2)) - return α .+ x * β -end - -function prediction(samples::AbstractMatrix, sym2ranges, x) - α = mean(samples[union(sym2ranges[:intercept]...), :]) - β = vec(mean(samples[union(sym2ranges[:coefficients]...), :]; dims = 2)) - return α .+ x * β -end -``` - -```julia; results = "hidden" -# Unstandardize the dependent variable. -train_cut.MPG = unstandardize(train_cut.MPG, data.MPG); -test_cut.MPG = unstandardize(test_cut.MPG, data.MPG); -``` - - -```julia -# Show the first side rows of the modified dataframe. -first(test_cut, 6) -``` - -```julia; results = "hidden" -z = rand(q, 10_000); -``` - - -```julia; results = "hidden" -# Calculate the predictions for the training and testing sets using the samples `z` from variational posterior -train_cut.VIPredictions = unstandardize(prediction(z, sym2range, train), data.MPG); -test_cut.VIPredictions = unstandardize(prediction(z, sym2range, test), data.MPG); - -train_cut.BayesPredictions = unstandardize(prediction_chain(chain, train), data.MPG); -test_cut.BayesPredictions = unstandardize(prediction_chain(chain, test), data.MPG); -``` - - -```julia -vi_loss1 = mean((train_cut.VIPredictions - train_cut.MPG).^2) -bayes_loss1 = mean((train_cut.BayesPredictions - train_cut.MPG).^2) -ols_loss1 = mean((train_cut.OLSPrediction - train_cut.MPG).^2) - -vi_loss2 = mean((test_cut.VIPredictions - test_cut.MPG).^2) -bayes_loss2 = mean((test_cut.BayesPredictions - test_cut.MPG).^2) -ols_loss2 = mean((test_cut.OLSPrediction - test_cut.MPG).^2) - -println("Training set: - VI loss: $vi_loss1 - Bayes loss: $bayes_loss1 - OLS loss: $ols_loss1 -Test set: - VI loss: $vi_loss2 - Bayes loss: $bayes_loss2 - OLS loss: $ols_loss2") -``` - - -Interestingly the squared difference between true- and mean-prediction on the test-set is actually *better* for the mean-field variational posterior than for the "true" posterior obtained by MCMC sampling using `NUTS`. But, as Bayesians, we know that the mean doesn't tell the entire story. One quick check is to look at the mean predictions ± standard deviation of the two different approaches: - - -```julia -z = rand(q, 1000); -preds = hcat([unstandardize(prediction(z[:, i], sym2range, test), data.MPG) for i = 1:size(z, 2)]...); - -scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) -scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") -xaxis!(1:size(test, 1)) -ylims!(95, 140) -title!("Mean-field ADVI (Normal)") -``` - -```julia -preds = hcat([unstandardize(prediction_chain(chain[i], test), data.MPG) for i = 1:5:size(chain, 1)]...); - -scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) -scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") -xaxis!(1:size(test, 1)) -ylims!(95, 140) -title!("MCMC (NUTS)") -``` - -Indeed we see that the MCMC approach generally provides better uncertainty estimates than the mean-field ADVI approach! Good. So all the work we've done to make MCMC fast isn't for nothing. - -## Alternative: provide parameter-to-distribution instead of `q` with`update` implemented - -As mentioned earlier, it's also possible to just provide the mapping $$\theta \mapsto q_{\theta}$$ rather than the variational family / initial variational posterior `q`, i.e. use the interface `vi(m, advi, getq, θ_init)` where `getq` is the mapping $$\theta \mapsto q_{\theta}$$ - -In this section we're going to construct a mean-field approximation to the model by hand using a composition of`Shift` and `Scale` from Bijectors.jl togheter with a standard multivariate Gaussian as the base distribution. - - -```julia -using Bijectors -``` - - -```julia -using Bijectors: Scale, Shift -``` - - -```julia -d = length(q) -base_dist = Turing.DistributionsAD.TuringDiagMvNormal(zeros(d), ones(d)) -``` - -`bijector(model::Turing.Model)` is defined by Turing, and will return a `bijector` which takes you from the space of the latent variables to the real space. In this particular case, this is a mapping `((0, ∞) × ℝ × ℝ¹⁰) → ℝ¹²`. We're interested in using a normal distribution as a base-distribution and transform samples to the latent space, thus we need the inverse mapping from the reals to the latent space: - - -```julia; results = "hidden" -to_constrained = inv(bijector(m)); -``` - - -```julia -function getq(θ) - d = length(θ) ÷ 2 - A = @inbounds θ[1:d] - b = @inbounds θ[d + 1: 2 * d] - - b = to_constrained ∘ Shift(b; dim = Val(1)) ∘ Scale(exp.(A); dim = Val(1)) - - return transformed(base_dist, b) -end -``` - -```julia; results = "hidden" -q_mf_normal = vi(m, advi, getq, randn(2 * d)); -``` - -```julia -p1 = plot_variational_marginals(rand(q_mf_normal, 10_000), sym2range) # MvDiagNormal + Affine transformation + to_constrained -p2 = plot_variational_marginals(rand(q, 10_000), sym2range) # Turing.meanfield(m) - -plot(p1, p2, layout = (1, 2), size = (800, 2000)) -``` -As expected, the fits look pretty much identical. - -But using this interface it becomes trivial to go beyond the mean-field assumption we made for the variational posterior, as we'll see in the next section. - -### Relaxing the mean-field assumption - -Here we'll instead consider the variational family to be a full non-diagonal multivariate Gaussian. As in the previous section we'll implement this by transforming a standard multivariate Gaussian using `Scale` and `Shift`, but now `Scale` will instead be using a lower-triangular matrix (representing the Cholesky of the covariance matrix of a multivariate normal) in constrast to the diagonal matrix we used in for the mean-field approximate posterior. - - -```julia -using LinearAlgebra -``` - -```julia -# Using `ComponentArrays.jl` together with `UnPack.jl` makes our lives much easier. -using ComponentArrays, UnPack -``` - -```julia -proto_arr = ComponentArray( - L = zeros(d, d), - b = zeros(d) -) -proto_axes = proto_arr |> getaxes -num_params = length(proto_arr) - -function getq(θ) - L, b = begin - @unpack L, b = ComponentArray(θ, proto_axes) - LowerTriangular(L), b - end - # For this to represent a covariance matrix we need to ensure that the diagonal is positive. - # We can enforce this by zeroing out the diagonal and then adding back the diagonal exponentiated. - D = Diagonal(diag(L)) - A = L - D + exp(D) # exp for Diagonal is the same as exponentiating only the diagonal entries - - b = to_constrained ∘ Shift(b; dim = Val(1)) ∘ Scale(A; dim = Val(1)) - - return transformed(base_dist, b) -end -``` - -```julia -advi = ADVI(10, 20_000) -``` - -```julia; results = "hidden" -q_full_normal = vi(m, advi, getq, randn(num_params); optimizer = Variational.DecayedADAGrad(1e-2)); -``` - -Let's have a look at the learned covariance matrix: - - -```julia -A = q_full_normal.transform.ts[1].a -``` - -```julia -heatmap(cov(A * A')) -``` - -```julia; results = "hidden" -zs = rand(q_full_normal, 10_000); -``` - - -```julia -p1 = plot_variational_marginals(rand(q_mf_normal, 10_000), sym2range) -p2 = plot_variational_marginals(rand(q_full_normal, 10_000), sym2range) - -plot(p1, p2, layout = (1, 2), size = (800, 2000)) -``` - - -So it seems like the "full" ADVI approach, i.e. no mean-field assumption, obtain the same modes as the mean-field approach but with greater uncertainty for some of the `coefficients`. This - - -```julia; results = "hidden" -# Unfortunately, it seems like this has quite a high variance which is likely to be due to numerical instability, -# so we consider a larger number of samples. If we get a couple of outliers due to numerical issues, -# these kind affect the mean prediction greatly. -z = rand(q_full_normal, 10_000); -``` - - -```julia; results = "hidden" -train_cut.VIFullPredictions = unstandardize(prediction(z, sym2range, train), data.MPG); -test_cut.VIFullPredictions = unstandardize(prediction(z, sym2range, test), data.MPG); -``` - - -```julia -vi_loss1 = mean((train_cut.VIPredictions - train_cut.MPG).^2) -vifull_loss1 = mean((train_cut.VIFullPredictions - train_cut.MPG).^2) -bayes_loss1 = mean((train_cut.BayesPredictions - train_cut.MPG).^2) -ols_loss1 = mean((train_cut.OLSPrediction - train_cut.MPG).^2) - -vi_loss2 = mean((test_cut.VIPredictions - test_cut.MPG).^2) -vifull_loss2 = mean((test_cut.VIFullPredictions - test_cut.MPG).^2) -bayes_loss2 = mean((test_cut.BayesPredictions - test_cut.MPG).^2) -ols_loss2 = mean((test_cut.OLSPrediction - test_cut.MPG).^2) - -println("Training set: - VI loss: $vi_loss1 - Bayes loss: $bayes_loss1 - OLS loss: $ols_loss1 -Test set: - VI loss: $vi_loss2 - Bayes loss: $bayes_loss2 - OLS loss: $ols_loss2") -``` - -```julia -z = rand(q_mf_normal, 1000); -preds = hcat([unstandardize(prediction(z[:, i], sym2range, test), data.MPG) for i = 1:size(z, 2)]...); - -p1 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) -scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") -xaxis!(1:size(test, 1)) -ylims!(95, 140) -title!("Mean-field ADVI (Normal)") -``` - -```julia -z = rand(q_full_normal, 1000); -preds = hcat([unstandardize(prediction(z[:, i], sym2range, test), data.MPG) for i = 1:size(z, 2)]...); - -p2 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) -scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") -xaxis!(1:size(test, 1)) -ylims!(95, 140) -title!("Full ADVI (Normal)") -``` - -```julia -preds = hcat([unstandardize(prediction_chain(chain[i], test), data.MPG) for i = 1:5:size(chain, 1)]...); - -p3 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) -scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") -xaxis!(1:size(test, 1)) -ylims!(95, 140) -title!("MCMC (NUTS)") -``` - -```julia -plot(p1, p2, p3, layout = (1, 3), size = (900, 250), label="") -``` - -Here we actually see that indeed both the full ADVI and the MCMC approaches does a much better job of quantifying the uncertainty of predictions for never-before-seen samples, with full ADVI seemingly *overestimating* the variance slightly compared to MCMC. - -So now you know how to do perform VI on your Turing.jl model! Great isn't it? diff --git a/tutorials/variational-inference/01-variational-inference.jmd b/tutorials/variational-inference/01_variational-inference.jmd similarity index 100% rename from tutorials/variational-inference/01-variational-inference.jmd rename to tutorials/variational-inference/01_variational-inference.jmd From 6f30cee95f09621bb9be6eba7fe8fa812304c0f8 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Thu, 24 Sep 2020 14:19:59 +0100 Subject: [PATCH 07/12] added modifications --- .../01_variational-inference.jmd | 641 ++---------------- 1 file changed, 69 insertions(+), 572 deletions(-) diff --git a/tutorials/variational-inference/01_variational-inference.jmd b/tutorials/variational-inference/01_variational-inference.jmd index f025b8775..3378bf45b 100644 --- a/tutorials/variational-inference/01_variational-inference.jmd +++ b/tutorials/variational-inference/01_variational-inference.jmd @@ -8,7 +8,7 @@ In this post we'll have a look at what's know as **variational inference (VI)**, Here we will focus on how to use VI in Turing and not much on the theory underlying VI. If you're interested in understanding the mathematics you can checkout [our write-up](../../docs/for-developers/variational_inference) or any other resource online (there a lot of great ones). Using VI in Turing.jl is very straight forward. If `model` denotes a definition of a `Turing.Model`, performing VI is as simple as -```julia +```julia; eval = false m = model(data...) # instantiate model on the data q = vi(m, vi_alg) # perform VI on `m` using the VI method `vi_alg`, which returns a `VariationalPosterior` ``` @@ -18,8 +18,7 @@ To get a bit more into what we can do with `vi`, we'll first have a look at a si ## Setup - -```julia +```julia; results = "hidden" using Random using Turing using Turing: Variational @@ -32,8 +31,8 @@ Random.seed!(42); The Normal-(Inverse)Gamma conjugate model is defined by the following generative process \begin{align} - s &\sim \mathrm{InverseGamma}(2, 3) \\ - m &\sim \mathcal{N}(0, s) \\ + s &\sim \mathrm{InverseGamma}(2, 3) \\\\ + m &\sim \mathcal{N}(0, s) \\\\ x_i &\overset{\text{i.i.d.}}{=} \mathcal{N}(m, s), \quad i = 1, \dots, n \end{align} @@ -41,15 +40,11 @@ Recall that *conjugate* refers to the fact that we can obtain a closed-form expr First we generate some synthetic data, define the `Turing.Model` and instantiate the model on the data: - -```julia -# generate data, n = 2000 +```julia; results = "hidden" +# generate data x = randn(2000); ``` - - - ```julia @model model(x) = begin s ~ InverseGamma(2, 3) @@ -57,19 +52,11 @@ x = randn(2000); for i = 1:length(x) x[i] ~ Normal(m, sqrt(s)) end -end +end; ``` - - - - ##model#344 (generic function with 2 methods) - - - - -```julia -# construct model +```julia; results = "hidden" +# Instantiate model m = model(x); ``` @@ -80,16 +67,10 @@ We'll produce 10 000 samples with 200 steps used for adaptation and a target acc If you don't understand what "adaptation" or "target acceptance rate" refers to, all you really need to know is that `NUTS` is known to be one of the most accurate and efficient samplers (when applicable) while requiring little to no hand-tuning to work well. -```julia +```julia; results = "hidden" samples_nuts = sample(m, NUTS(200, 0.65), 10000); ``` - ┌ Info: Found initial step size - │ ϵ = 0.025 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/cReBm/src/inference/hmc.jl:556 - Sampling: 100%|█████████████████████████████████████████| Time: 0:00:02 - - Now let's try VI. The most important function you need to now about to do VI in Turing is `vi`: @@ -97,25 +78,6 @@ Now let's try VI. The most important function you need to now about to do VI in print(@doc(Variational.vi)) ``` - ``` - vi(model, alg::VariationalInference) - vi(model, alg::VariationalInference, q::VariationalPosterior) - vi(model, alg::VariationalInference, getq::Function, θ::AbstractArray) - ``` - - Constructs the variational posterior from the `model` and performs the optimization following the configuration of the given `VariationalInference` instance. - - # Arguments - - * `model`: `Turing.Model` or `Function` z ↦ log p(x, z) where `x` denotes the observations - * `alg`: the VI algorithm used - * `q`: a `VariationalPosterior` for which it is assumed a specialized implementation of the variational objective used exists. - * `getq`: function taking parameters `θ` as input and returns a `VariationalPosterior` - * `θ`: only required if `getq` is used, in which case it is the initial parameters for the variational posterior - - -`vi` takes the `Model` you want to approximate, a `VariationalInference` whose type specifies the method to use and then its fields specify the configuration of the method. - Additionally, you can pass - an initial variational posterior `q`, for which we assume there exists a implementation of `update(::typeof(q), θ::AbstractVector)` returning an updated posterior `q` with parameters `θ`. - a function mapping $$\theta \mapsto q_{\theta}$$ (denoted above `getq`) together with initial parameters `θ`. This provides more flexibility in the types of variational families that we can use, and can sometimes be slightly more convenient for quick and rough work. @@ -127,14 +89,6 @@ By default, i.e. when calling `vi(m, advi)`, Turing use a *mean-field* approxima print(@doc(Variational.meanfield)) ``` - ``` - meanfield(model::Model) - meanfield(rng::AbstractRNG, model::Model) - ``` - - Creates a mean-field approximation with multivariate normal as underlying distribution. - - Currently the only implementation of `VariationalInference` available is `ADVI`, which is very convenient and applicable as long as your `Model` is differentiable with respect to the *variational parameters*, that is, the parameters of your variational distribution, e.g. mean and variance in the mean-field approximation. @@ -142,43 +96,15 @@ Currently the only implementation of `VariationalInference` available is `ADVI`, print(@doc(Variational.ADVI)) ``` - ```julia - struct ADVI{AD} <: Turing.Variational.VariationalInference{AD} - ``` - - Automatic Differentiation Variational Inference (ADVI) with automatic differentiation backend `AD`. - - # Fields - - * `samples_per_step::Int64` - - Number of samples used to estimate the ELBO in each optimization step. - * `max_iters::Int64` - - Maximum number of gradient steps. - - ``` - ADVI([samples_per_step=1, max_iters=1000]) - ``` - - Create an [`ADVI`](@ref) with the currently enabled automatic differentiation backend `ADBackend()`. - - To perform VI on the model `m` using 10 samples for gradient estimation and taking 1000 gradient steps is then as simple as: -```julia +```julia; results = "hidden" # ADVI advi = ADVI(10, 1000) q = vi(m, advi); ``` - ┌ Info: [ADVI] Should only be seen once: optimizer created for θ - │ objectid(θ) = 12334556482979097499 - └ @ Turing.Variational /home/cameron/.julia/packages/Turing/cReBm/src/variational/VariationalInference.jl:204 - [ADVI] Optimizing...: 100%|█████████████████████████████████████████| Time: 0:00:03 - - Unfortunately, for such a small problem Turing's new `NUTS` sampler is *so* efficient now that it's not that much more efficient to use ADVI. So, so very unfortunate... With that being said, this is not the case in general. For very complex models we'll later find that `ADVI` produces very reasonable results in a much shorter time than `NUTS`. @@ -190,13 +116,6 @@ And one significant advantage of using `vi` is that we can sample from the resul q isa MultivariateDistribution ``` - - - - true - - - This means that we can call `rand` to sample from the variational posterior `q` @@ -204,15 +123,6 @@ This means that we can call `rand` to sample from the variational posterior `q` rand(q) ``` - - - - 2-element Array{Float64,1}: - 1.0134702063474585 - -0.07429020521027016 - - - and `logpdf` to compute the log-probability @@ -220,13 +130,6 @@ and `logpdf` to compute the log-probability logpdf(q, rand(q)) ``` - - - - 4.277478745320889 - - - Let's check the first and second moments of the data to see how our approximation compares to the point-estimates form the data: @@ -234,80 +137,45 @@ Let's check the first and second moments of the data to see how our approximatio var(x), mean(x) ``` - - - - (1.021109459575047, -0.028838703049547422) - - - - ```julia (mean(rand(q, 1000); dims = 2)..., ) ``` - - - - (1.02716749432684, -0.02510701319723139) - - - That's pretty close! But we're Bayesian so we're not interested in *just* matching the mean. Let's instead look the actual density `q`. For that we need samples: -```julia +```julia; results = "hidden" samples = rand(q, 10000); ``` - ```julia # setup for plotting using Plots, LaTeXStrings, StatsPlots pyplot() ``` - ┌ Info: Precompiling PyPlot [d330b81b-6aea-500a-939a-2ce795aea3ee] - └ @ Base loading.jl:1260 - - - - - - Plots.PyPlotBackend() - - - - ```julia p1 = histogram(samples[1, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") density!(samples[1, :], label = "s (ADVI)", color = :blue, linewidth = 2) -density!(collect(skipmissing(samples_nuts[:s].value)), label = "s (NUTS)", color = :green, linewidth = 2) +density!(collect(skipmissing(samples_nuts[:s].data)), label = "s (NUTS)", color = :green, linewidth = 2) vline!([var(x)], label = "s (data)", color = :black) vline!([mean(samples[1, :])], color = :blue, label ="") p2 = histogram(samples[2, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") density!(samples[2, :], label = "m (ADVI)", color = :blue, linewidth = 2) -density!(collect(skipmissing(samples_nuts[:m].value)), label = "m (NUTS)", color = :green, linewidth = 2) +density!(collect(skipmissing(samples_nuts[:m].data)), label = "m (NUTS)", color = :green, linewidth = 2) vline!([mean(x)], color = :black, label = "m (data)") vline!([mean(samples[2, :])], color = :blue, label="") plot(p1, p2, layout=(2, 1), size=(900, 500)) ``` - - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_34_0.png) - - - For this particular `Model`, we can in fact obtain the posterior of the latent variables in closed form. This allows us to compare both `NUTS` and `ADVI` to the true posterior $$p(s, m \mid \{x_i\}_{i = 1}^n )$$. -*The code below is just work to get the marginals $$p(s \mid \{x_i\}_{i = 1}^n)$$ and $$p(m \mid \{x_i\}_{i = 1}^n$$ from the posterior obtained using ConjugatePriors.jl. Feel free to skip it.* +*The code below is just work to get the marginals $$p(s \mid \{x_i\}_{i = 1}^n)$$ and $$p(m \mid \{x_i\}_{i = 1}^n)$$ from the posterior obtained using ConjugatePriors.jl. Feel free to skip it.* ```julia @@ -351,7 +219,7 @@ p_μ_pdf = z -> exp(logpdf(p_μ, (z - μₙ) * exp(- 0.5 * log(βₙ) + 0.5 * lo p1 = plot(); histogram!(samples[1, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") density!(samples[1, :], label = "s (ADVI)", color = :blue) -density!(collect(skipmissing(samples_nuts[:s].value)), label = "s (NUTS)", color = :green) +density!(vec(samples_nuts[:s].data), label = "s (NUTS)", color = :green) vline!([mean(samples[1, :])], linewidth = 1.5, color = :blue, label ="") # normalize using Riemann approx. because of (almost certainly) numerical issues @@ -365,7 +233,7 @@ xlims!(0.75, 1.35); p2 = plot(); histogram!(samples[2, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") density!(samples[2, :], label = "m (ADVI)", color = :blue) -density!(collect(skipmissing(samples_nuts[:m].value)), label = "m (NUTS)", color = :green) +density!(vec(samples_nuts[:m].data), label = "m (NUTS)", color = :green) vline!([mean(samples[2, :])], linewidth = 1.5, color = :blue, label="") @@ -381,16 +249,6 @@ xlims!(-0.25, 0.25); p = plot(p1, p2; layout=(2, 1), size=(900, 500)) ``` - ┌ Info: Precompiling ConjugatePriors [1624bea9-42b1-5fc1-afd3-e96f729c8d6c] - └ @ Base loading.jl:1260 - - - - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_36_1.png) - - # Bayesian linear regression example using `ADVI` @@ -403,12 +261,12 @@ As we'll see, there is really no additional work required to apply variational i This section is basically copy-pasting the code from the [linear regression tutorial](../../tutorials/5-linearregression). -```julia +```julia; results = "hidden" Random.seed!(1); ``` -```julia +```julia; results = "hidden" # Import RDatasets. using RDatasets @@ -416,10 +274,6 @@ using RDatasets Turing.turnprogress(true); ``` - ┌ Info: [Turing]: progress logging is enabled globally - └ @ Turing /home/cameron/.julia/packages/Turing/cReBm/src/Turing.jl:22 - - ```julia # Import the "Default" dataset. @@ -429,14 +283,6 @@ data = RDatasets.dataset("datasets", "mtcars"); first(data, 6) ``` - - - -

6 rows × 12 columns (omitted printing of 3 columns)

ModelMPGCylDispHPDRatWTQSecVS
StringFloat64Int64Float64Int64Float64Float64Float64Int64
1Mazda RX421.06160.01103.92.6216.460
2Mazda RX4 Wag21.06160.01103.92.87517.020
3Datsun 71022.84108.0933.852.3218.611
4Hornet 4 Drive21.46258.01103.083.21519.441
5Hornet Sportabout18.78360.01753.153.4417.020
6Valiant18.16225.01052.763.4620.221
- - - - ```julia # Function to split samples. function split_data(df, at = 0.70) @@ -458,15 +304,7 @@ function unstandardize(x, orig) end ``` - - - - unstandardize (generic function with 1 method) - - - - -```julia +```julia; results = "hidden" # Remove the model column. select!(data, Not(:Model)) @@ -512,7 +350,7 @@ end; ``` -```julia +```julia; results = "hidden" n_obs, n_vars = size(train) m = linear_regression(train, train_label, n_obs, n_vars); ``` @@ -527,32 +365,17 @@ q0 = Variational.meanfield(m) typeof(q0) ``` - - - - Bijectors.TransformedDistribution{DistributionsAD.TuringDiagMvNormal{Array{Float64,1},Array{Float64,1}},Bijectors.Stacked{Tuple{Bijectors.Inverse{Bijectors.TruncatedBijector{Float64},0},Bijectors.Identity{0},Bijectors.Identity{1}},3},Multivariate} - - - - ```julia advi = ADVI(10, 10_000) ``` - - - - ADVI{Turing.Core.ForwardDiffAD{40}}(10, 10000) - - - Turing also provides a couple of different optimizers: - `TruncatedADAGrad` (default) - `DecayedADAGrad` as these are well-suited for problems with high-variance stochastic objectives, which is usually what the ELBO ends up being at different times in our optimization process. With that being said, thanks to Requires.jl, if we add a `using Flux` prior to `using Turing` we can also make use of all the optimizers in `Flux`, e.g. `ADAM`, without any additional changes to your code! For example: -```julia +```julia; eval = false using Flux, Turing using Turing.Variational @@ -568,28 +391,11 @@ opt = Variational.DecayedADAGrad(1e-2, 1.1, 0.9) ``` - - - Turing.Variational.DecayedADAGrad(0.01, 1.1, 0.9, IdDict{Any,Any}()) - - - - ```julia q = vi(m, advi, q0; optimizer = opt) typeof(q) ``` - [ADVI] Optimizing...: 100%|█████████████████████████████████████████| Time: 0:00:05 - - - - - - Bijectors.TransformedDistribution{DistributionsAD.TuringDiagMvNormal{Array{Float64,1},Array{Float64,1}},Bijectors.Stacked{Tuple{Bijectors.Inverse{Bijectors.TruncatedBijector{Float64},0},Bijectors.Identity{0},Bijectors.Identity{1}},3},Multivariate} - - - *Note: as mentioned before, we internally define a `update(q::TransformedDistribution{<:TuringDiagMvNormal}, θ::AbstractVector)` method which takes in the current variational approximation `q` together with new parameters `z` and returns the new variational approximation. This is required so that we can actually update the `Distribution` object after each optimization step.* *Alternatively, we can instead provide the mapping $$\theta \mapsto q_{\theta}$$ directly together with initial parameters using the signature `vi(m, advi, getq, θ_init)` as mentioned earlier. We'll see an explicit example of this later on!* @@ -597,7 +403,7 @@ typeof(q) To compute statistics for our approximation we need samples: -```julia +```julia; results = "hidden" z = rand(q, 10_000); ``` @@ -608,88 +414,26 @@ Now we can for example look at the average avg = vec(mean(z; dims = 2)) ``` - - - - 12-element Array{Float64,1}: - 0.4606389176400052 - 0.05202909837745655 - 0.4064267006145497 - -0.11468688188714653 - -0.09745310785481277 - 0.6148587707658169 - 0.01308179579131569 - 0.09698898180610954 - -0.07232304322690832 - 0.13320265040493984 - 0.28561578772443025 - -0.829825963610117 - - - The vector has the same ordering as the model, e.g. in this case `σ₂` has index `1`, `intercept` has index `2` and `coefficients` has indices `3:12`. If you forget or you might want to do something programmatically with the result, you can obtain the `sym → indices` mapping as follows: ```julia -_, sym2range = Variational.bijector(m; sym_to_ranges = Val(true)); +_, sym2range = bijector(m, Val(true)); sym2range ``` - - - - (intercept = UnitRange{Int64}[2:2], σ₂ = UnitRange{Int64}[1:1], coefficients = UnitRange{Int64}[3:12]) - - - - ```julia avg[union(sym2range[:σ₂]...)] ``` - - - - 1-element Array{Float64,1}: - 0.4606389176400052 - - - - ```julia avg[union(sym2range[:intercept]...)] ``` - - - - 1-element Array{Float64,1}: - 0.05202909837745655 - - - - ```julia avg[union(sym2range[:coefficients]...)] ``` - - - - 10-element Array{Float64,1}: - 0.4064267006145497 - -0.11468688188714653 - -0.09745310785481277 - 0.6148587707658169 - 0.01308179579131569 - 0.09698898180610954 - -0.07232304322690832 - 0.13320265040493984 - 0.28561578772443025 - -0.829825963610117 - - - *Note: as you can see, this is slightly awkward to work with at the moment. We'll soon add a better way of dealing with this.* With a bit of work (this will be much easier in the future), we can also visualize the approximate marginals of the different variables, similar to `plot(chain)`: @@ -705,14 +449,14 @@ function plot_variational_marginals(z, sym2range) offset = 1 for r in indices for j in r - p = density(z[j, :], title = "$$(sym)[$$offset]", titlefontsize = 10, label = "") + p = density(z[j, :], title = "$(sym)[$offset]", titlefontsize = 10, label = "") push!(ps, p) offset += 1 end end else - p = density(z[first(indices), :], title = "$$(sym)", titlefontsize = 10, label = "") + p = density(z[first(indices), :], title = "$(sym)", titlefontsize = 10, label = "") push!(ps, p) end end @@ -722,97 +466,30 @@ end ``` - - - plot_variational_marginals (generic function with 1 method) - - - - ```julia plot_variational_marginals(z, sym2range) ``` - - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_68_0.png) - - - And let's compare this to using the `NUTS` sampler: -```julia +```julia; results = "hidden" chain = sample(m, NUTS(0.65), 10_000); ``` - ┌ Info: Found initial step size - │ ϵ = 0.4 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/cReBm/src/inference/hmc.jl:556 - Sampling: 100%|█████████████████████████████████████████| Time: 0:00:04 - - - ```julia plot(chain) ``` - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_71_0.png) - - - - ```julia vi_mean = vec(mean(z; dims = 2))[[union(sym2range[:coefficients]...)..., union(sym2range[:intercept]...)..., union(sym2range[:σ₂]...)...]] ``` - - - - 12-element Array{Float64,1}: - 0.4064267006145497 - -0.11468688188714653 - -0.09745310785481277 - 0.6148587707658169 - 0.01308179579131569 - 0.09698898180610954 - -0.07232304322690832 - 0.13320265040493984 - 0.28561578772443025 - -0.829825963610117 - 0.05202909837745655 - 0.4606389176400052 - - - - ```julia mean(chain).nt.mean ``` - - - - 12-element Array{Float64,1}: - 0.40737234076000634 - -0.12119407949255825 - -0.09258229213058687 - 0.6075161662165318 - 0.010710254061742489 - 0.0962666098260057 - -0.07340041375352217 - 0.14124748712473906 - 0.2782293300542158 - -0.8234179979734787 - 0.049650076749642606 - 0.47011974512236054 - - - One thing we can look at is simply the squared error between the means: @@ -820,13 +497,6 @@ One thing we can look at is simply the squared error between the means: sum(abs2, mean(chain).nt.mean .- vi_mean) ``` - - - - 0.00038407031406971286 - - - That looks pretty good! But let's see how the predictive distributions looks for the two. ## Prediction @@ -834,7 +504,7 @@ That looks pretty good! But let's see how the predictive distributions looks for Similarily to the linear regression tutorial, we're going to compare to multivariate ordinary linear regression using the `GLM` package: -```julia +```julia; results = "hidden" # Import the GLM package. using GLM @@ -857,14 +527,6 @@ function prediction_chain(chain, x) end ``` - - - - prediction_chain (generic function with 1 method) - - - - ```julia # Make a prediction using samples from the variational posterior given an input vector. function prediction(samples::AbstractVector, sym2ranges, x) @@ -880,15 +542,7 @@ function prediction(samples::AbstractMatrix, sym2ranges, x) end ``` - - - - prediction (generic function with 2 methods) - - - - -```julia +```julia; results = "hidden" # Unstandardize the dependent variable. train_cut.MPG = unstandardize(train_cut.MPG, data.MPG); test_cut.MPG = unstandardize(test_cut.MPG, data.MPG); @@ -900,20 +554,12 @@ test_cut.MPG = unstandardize(test_cut.MPG, data.MPG); first(test_cut, 6) ``` - - - -

6 rows × 12 columns (omitted printing of 4 columns)

MPGCylDispHPDRatWTQSecVS
Float64Float64Float64Float64Float64Float64Float64Float64
1116.1951.014880.5912450.0483133-0.8351980.222544-0.307089-0.868028
2114.2951.014880.9623961.43390.2495660.636461-1.36476-0.868028
3120.1951.014881.365820.412942-0.9661180.641571-0.446992-0.868028
4128.295-1.22486-1.22417-1.176840.904164-1.310480.5882951.11604
5126.995-1.22486-0.890939-0.8122111.55876-1.10097-0.642858-0.868028
6131.395-1.22486-1.09427-0.4913370.324377-1.74177-0.5309351.11604
- - - - -```julia +```julia; results = "hidden" z = rand(q, 10_000); ``` -```julia +```julia; results = "hidden" # Calculate the predictions for the training and testing sets using the samples `z` from variational posterior train_cut.VIPredictions = unstandardize(prediction(z, sym2range, train), data.MPG); test_cut.VIPredictions = unstandardize(prediction(z, sym2range, test), data.MPG); @@ -933,24 +579,15 @@ bayes_loss2 = mean((test_cut.BayesPredictions - test_cut.MPG).^2) ols_loss2 = mean((test_cut.OLSPrediction - test_cut.MPG).^2) println("Training set: - VI loss: $$vi_loss1 - Bayes loss: $$bayes_loss1 - OLS loss: $$ols_loss1 + VI loss: $vi_loss1 + Bayes loss: $bayes_loss1 + OLS loss: $ols_loss1 Test set: - VI loss: $$vi_loss2 - Bayes loss: $$bayes_loss2 - OLS loss: $$ols_loss2") + VI loss: $vi_loss2 + Bayes loss: $bayes_loss2 + OLS loss: $ols_loss2") ``` - Training set: - VI loss: 3.0784608943296643 - Bayes loss: 3.0716118391411906 - OLS loss: 3.070926124893019 - Test set: - VI loss: 27.159605003619333 - Bayes loss: 26.58835451660728 - OLS loss: 27.094813070760107 - Interestingly the squared difference between true- and mean-prediction on the test-set is actually *better* for the mean-field variational posterior than for the "true" posterior obtained by MCMC sampling using `NUTS`. But, as Bayesians, we know that the mean doesn't tell the entire story. One quick check is to look at the mean predictions ± standard deviation of the two different approaches: @@ -959,38 +596,23 @@ Interestingly the squared difference between true- and mean-prediction on the te z = rand(q, 1000); preds = hcat([unstandardize(prediction(z[:, i], sym2range, test), data.MPG) for i = 1:size(z, 2)]...); -scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500)) +scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") xaxis!(1:size(test, 1)) ylims!(95, 140) title!("Mean-field ADVI (Normal)") ``` - - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_88_0.png) - - - - ```julia preds = hcat([unstandardize(prediction_chain(chain[i], test), data.MPG) for i = 1:5:size(chain, 1)]...); -scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500)) +scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") xaxis!(1:size(test, 1)) ylims!(95, 140) title!("MCMC (NUTS)") ``` - - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_89_0.png) - - - Indeed we see that the MCMC approach generally provides better uncertainty estimates than the mean-field ADVI approach! Good. So all the work we've done to make MCMC fast isn't for nothing. ## Alternative: provide parameter-to-distribution instead of `q` with`update` implemented @@ -1015,21 +637,10 @@ d = length(q) base_dist = Turing.DistributionsAD.TuringDiagMvNormal(zeros(d), ones(d)) ``` - - - - DistributionsAD.TuringDiagMvNormal{Array{Float64,1},Array{Float64,1}}( - m: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - σ: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] - ) - - - - `bijector(model::Turing.Model)` is defined by Turing, and will return a `bijector` which takes you from the space of the latent variables to the real space. In this particular case, this is a mapping `((0, ∞) × ℝ × ℝ¹⁰) → ℝ¹²`. We're interested in using a normal distribution as a base-distribution and transform samples to the latent space, thus we need the inverse mapping from the reals to the latent space: -```julia +```julia; results = "hidden" to_constrained = inv(bijector(m)); ``` @@ -1046,39 +657,16 @@ function getq(θ) end ``` - - - - getq (generic function with 1 method) - - - - -```julia +```julia; results = "hidden" q_mf_normal = vi(m, advi, getq, randn(2 * d)); ``` - ┌ Info: [ADVI] Should only be seen once: optimizer created for θ - │ objectid(θ) = 8127634262038331167 - └ @ Turing.Variational /home/cameron/.julia/packages/Turing/cReBm/src/variational/VariationalInference.jl:204 - [ADVI] Optimizing...: 100%|█████████████████████████████████████████| Time: 0:00:06 - - - ```julia p1 = plot_variational_marginals(rand(q_mf_normal, 10_000), sym2range) # MvDiagNormal + Affine transformation + to_constrained p2 = plot_variational_marginals(rand(q, 10_000), sym2range) # Turing.meanfield(m) plot(p1, p2, layout = (1, 2), size = (800, 2000)) ``` - - - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_100_0.png) - - - As expected, the fits look pretty much identical. But using this interface it becomes trivial to go beyond the mean-field assumption we made for the variational posterior, as we'll see in the next section. @@ -1092,16 +680,24 @@ Here we'll instead consider the variational family to be a full non-diagonal mul using LinearAlgebra ``` +```julia +# Using `ComponentArrays.jl` together with `UnPack.jl` makes our lives much easier. +using ComponentArrays, UnPack +``` ```julia -d = 12 +proto_arr = ComponentArray( + L = zeros(d, d), + b = zeros(d) +) +proto_axes = proto_arr |> getaxes +num_params = length(proto_arr) function getq(θ) - offset = 0 - L = LowerTriangular(reshape(@inbounds(θ[offset + 1: offset + d^2]), (d, d))) - offset += d^2 - b = @inbounds θ[offset + 1: offset + d] - + L, b = begin + @unpack L, b = ComponentArray(θ, proto_axes) + LowerTriangular(L), b + end # For this to represent a covariance matrix we need to ensure that the diagonal is positive. # We can enforce this by zeroing out the diagonal and then adding back the diagonal exponentiated. D = Diagonal(diag(L)) @@ -1113,33 +709,14 @@ function getq(θ) end ``` - - - - getq (generic function with 1 method) - - - - ```julia advi = ADVI(10, 20_000) ``` - - - - ADVI{Turing.Core.ForwardDiffAD{40}}(10, 20000) - - - - -```julia -q_full_normal = vi(m, advi, getq, randn(d^2 + d); optimizer = Variational.DecayedADAGrad(1e-2)); +```julia; results = "hidden" +q_full_normal = vi(m, advi, getq, randn(num_params); optimizer = Variational.DecayedADAGrad(1e-2)); ``` - [ADVI] Optimizing...: 100%|█████████████████████████████████████████| Time: 0:01:05 - - Let's have a look at the learned covariance matrix: @@ -1147,39 +724,11 @@ Let's have a look at the learned covariance matrix: A = q_full_normal.transform.ts[1].a ``` - - - - 12×12 LowerTriangular{Float64,Array{Float64,2}}: - 0.154572 ⋅ ⋅ … ⋅ ⋅ ⋅ - 0.00674249 0.169072 ⋅ ⋅ ⋅ ⋅ - -0.00288782 -0.0283984 0.413288 ⋅ ⋅ ⋅ - -0.030621 0.0450533 -0.0415525 ⋅ ⋅ ⋅ - -0.0115003 0.208366 -0.0420414 ⋅ ⋅ ⋅ - 0.00139553 -0.0619506 0.0853589 … ⋅ ⋅ ⋅ - 0.0129097 -0.0647154 0.00228644 ⋅ ⋅ ⋅ - -0.0128701 -0.0531755 0.0999936 ⋅ ⋅ ⋅ - 0.00169318 0.0274239 0.0903744 ⋅ ⋅ ⋅ - -0.0172387 -0.0304655 0.0661713 0.14843 ⋅ ⋅ - -0.000468924 0.300281 0.0789093 … -0.131391 0.128256 ⋅ - 0.00160201 -0.122274 -0.0776935 0.0468996 -0.00752499 0.120458 - - - - ```julia heatmap(cov(A * A')) ``` - - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_110_0.png) - - - - -```julia +```julia; results = "hidden" zs = rand(q_full_normal, 10_000); ``` @@ -1192,16 +741,10 @@ plot(p1, p2, layout = (1, 2), size = (800, 2000)) ``` - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_112_0.png) - - - So it seems like the "full" ADVI approach, i.e. no mean-field assumption, obtain the same modes as the mean-field approach but with greater uncertainty for some of the `coefficients`. This -```julia +```julia; results = "hidden" # Unfortunately, it seems like this has quite a high variance which is likely to be due to numerical instability, # so we consider a larger number of samples. If we get a couple of outliers due to numerical issues, # these kind affect the mean prediction greatly. @@ -1209,7 +752,7 @@ z = rand(q_full_normal, 10_000); ``` -```julia +```julia; results = "hidden" train_cut.VIFullPredictions = unstandardize(prediction(z, sym2range, train), data.MPG); test_cut.VIFullPredictions = unstandardize(prediction(z, sym2range, test), data.MPG); ``` @@ -1227,97 +770,51 @@ bayes_loss2 = mean((test_cut.BayesPredictions - test_cut.MPG).^2) ols_loss2 = mean((test_cut.OLSPrediction - test_cut.MPG).^2) println("Training set: - VI loss: $$vi_loss1 - VI (full) loss: $$vifull_loss1 - Bayes loss: $$bayes_loss1 - OLS loss: $$ols_loss1 + VI loss: $vi_loss1 + Bayes loss: $bayes_loss1 + OLS loss: $ols_loss1 Test set: - VI loss: $$vi_loss2 - VI (full) loss: $$vifull_loss2 - Bayes loss: $$bayes_loss2 - OLS loss: $$ols_loss2") + VI loss: $vi_loss2 + Bayes loss: $bayes_loss2 + OLS loss: $ols_loss2") ``` - Training set: - VI loss: 3.0784608943296643 - VI (full) loss: 3.0926834377972288 - Bayes loss: 3.0716118391411906 - OLS loss: 3.070926124893019 - Test set: - VI loss: 27.159605003619333 - VI (full) loss: 26.912162732716684 - Bayes loss: 26.58835451660728 - OLS loss: 27.094813070760107 - - - ```julia z = rand(q_mf_normal, 1000); preds = hcat([unstandardize(prediction(z[:, i], sym2range, test), data.MPG) for i = 1:size(z, 2)]...); -p1 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500)) +p1 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") xaxis!(1:size(test, 1)) ylims!(95, 140) title!("Mean-field ADVI (Normal)") ``` - - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_117_0.png) - - - - ```julia z = rand(q_full_normal, 1000); preds = hcat([unstandardize(prediction(z[:, i], sym2range, test), data.MPG) for i = 1:size(z, 2)]...); -p2 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500)) +p2 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") xaxis!(1:size(test, 1)) ylims!(95, 140) title!("Full ADVI (Normal)") ``` - - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_118_0.png) - - - - ```julia preds = hcat([unstandardize(prediction_chain(chain[i], test), data.MPG) for i = 1:5:size(chain, 1)]...); -p3 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500)) +p3 = scatter(1:size(test, 1), mean(preds; dims = 2), yerr=std(preds; dims = 2), label="prediction (mean ± std)", size = (900, 500), markersize = 8) scatter!(1:size(test, 1), unstandardize(test_label, data.MPG), label="true") xaxis!(1:size(test, 1)) ylims!(95, 140) title!("MCMC (NUTS)") ``` - - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_119_0.png) - - - - ```julia plot(p1, p2, p3, layout = (1, 3), size = (900, 250), label="") ``` - - - -![png](/tutorials/9_VariationalInference_files/9_VariationalInference_120_0.png) - - - Here we actually see that indeed both the full ADVI and the MCMC approaches does a much better job of quantifying the uncertainty of predictions for never-before-seen samples, with full ADVI seemingly *overestimating* the variance slightly compared to MCMC. So now you know how to do perform VI on your Turing.jl model! Great isn't it? From 4e0fe4d2db734450b0af181c51e3a050d0c85341 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Thu, 24 Sep 2020 14:26:09 +0100 Subject: [PATCH 08/12] removed dep --- Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Project.toml b/Project.toml index 6e6537360..6456cb271 100644 --- a/Project.toml +++ b/Project.toml @@ -28,7 +28,6 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" -UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" Weave = "44d3d7a6-8a23-5bf8-98c5-b353f8df5ec9" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" From 8dda8eb269c044465923a0797dd79108c0b8e364 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Thu, 24 Sep 2020 14:28:38 +0100 Subject: [PATCH 09/12] removed redundant dependencies for main Project.toml --- Manifest.toml | 1498 ------------------------------------------------- Project.toml | 23 - 2 files changed, 1521 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 04ca7ad9e..7a49ca026 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,698 +1,42 @@ # This file is machine-generated - editing it directly is not advised -[[AbstractAlgebra]] -deps = ["InteractiveUtils", "LinearAlgebra", "Markdown", "Random", "SparseArrays", "Test"] -git-tree-sha1 = "8fa03ecf25341ff3e8fb301dba3f41c6fe09952e" -uuid = "c3fe647b-3220-5bb0-a1ea-a7954cac585d" -version = "0.10.0" - -[[AbstractFFTs]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "051c95d6836228d120f5f4b984dd5aba1624f716" -uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" -version = "0.5.0" - -[[AbstractMCMC]] -deps = ["ConsoleProgressMonitor", "Distributed", "Logging", "LoggingExtras", "ProgressLogging", "Random", "StatsBase", "TerminalLoggers"] -git-tree-sha1 = "31a0a7b957525748e05599488ca6eef476fef12b" -uuid = "80f14c24-f653-4e6a-9b94-39d6b0f70001" -version = "1.0.1" - -[[AbstractTrees]] -deps = ["Markdown"] -git-tree-sha1 = "33e450545eaf7699da1a6e755f9ea65f14077a45" -uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" -version = "0.3.3" - -[[Adapt]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "95f8bda0555209f122bc796b0382ea4a3a121720" -uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -version = "2.1.0" - -[[AdvancedHMC]] -deps = ["ArgCheck", "DocStringExtensions", "InplaceOps", "LinearAlgebra", "Parameters", "ProgressMeter", "Random", "Requires", "Statistics", "StatsBase", "StatsFuns"] -git-tree-sha1 = "573080c224795309a965ff61d2b442c7e14d8c04" -uuid = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d" -version = "0.2.25" - -[[AdvancedMH]] -deps = ["AbstractMCMC", "Distributions", "Random", "Requires"] -git-tree-sha1 = "3d25126440a0d3412c9608498db6008309163670" -uuid = "5b7e9947-ddc0-4b3f-9b55-0d8042f74170" -version = "0.5.1" - -[[AdvancedVI]] -deps = ["Bijectors", "Distributions", "DistributionsAD", "DocStringExtensions", "ForwardDiff", "LinearAlgebra", "ProgressMeter", "Random", "Requires", "StatsBase", "StatsFuns", "Tracker"] -git-tree-sha1 = "dd4b7c101e15b23ebde935a9f89c74b00e245916" -uuid = "b5ca4192-6429-45e5-a2d9-87aec30a685c" -version = "0.1.0" - -[[ApproxBayes]] -deps = ["DelimitedFiles", "Distances", "Distributions", "Printf", "ProgressMeter", "Random", "RecipesBase", "Statistics", "StatsBase"] -git-tree-sha1 = "8ece4d5d6c4c1157cbcc1c21e3082eb8c0c00c7b" -uuid = "f5f396d3-230c-5e07-80e6-9fadf06146cc" -version = "0.3.2" - -[[ArgCheck]] -git-tree-sha1 = "dedbbb2ddb876f899585c4ec4433265e3017215a" -uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197" -version = "2.1.0" - -[[ArnoldiMethod]] -deps = ["DelimitedFiles", "LinearAlgebra", "Random", "SparseArrays", "StaticArrays", "Test"] -git-tree-sha1 = "2b6845cea546604fb4dca4e31414a6a59d39ddcd" -uuid = "ec485272-7323-5ecc-a04f-4719b315124d" -version = "0.0.4" - -[[Arpack]] -deps = ["Arpack_jll", "Libdl", "LinearAlgebra"] -git-tree-sha1 = "2ff92b71ba1747c5fdd541f8fc87736d82f40ec9" -uuid = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" -version = "0.4.0" - -[[Arpack_jll]] -deps = ["Libdl", "OpenBLAS_jll", "Pkg"] -git-tree-sha1 = "e214a9b9bd1b4e1b4f15b22c0994862b66af7ff7" -uuid = "68821587-b530-5797-8361-c406ea357684" -version = "3.5.0+3" - -[[ArrayInterface]] -deps = ["LinearAlgebra", "Requires", "SparseArrays"] -git-tree-sha1 = "a2b4a1b7c725297565105f98dcee04e362d955d6" -uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" -version = "2.12.0" - -[[ArrayLayouts]] -deps = ["Compat", "FillArrays", "LinearAlgebra", "SparseArrays"] -git-tree-sha1 = "d6f1ecde467019346f7601fb2ee58cb2bc60d121" -uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" -version = "0.4.8" - -[[AxisAlgorithms]] -deps = ["LinearAlgebra", "Random", "SparseArrays", "WoodburyMatrices"] -git-tree-sha1 = "a4d07a1c313392a77042855df46c5f534076fab9" -uuid = "13072b0f-2c55-5437-9ae7-d433b7a33950" -version = "1.0.0" - -[[AxisArrays]] -deps = ["Dates", "IntervalSets", "IterTools", "RangeArrays"] -git-tree-sha1 = "f31f50712cbdf40ee8287f0443b57503e34122ef" -uuid = "39de3d68-74b9-583c-8d2d-e117c070f3a9" -version = "0.4.3" - -[[BandedMatrices]] -deps = ["ArrayLayouts", "FillArrays", "LinearAlgebra", "Random", "SparseArrays"] -git-tree-sha1 = "78b756299a35dc553c8f98b29fbcfa0cd187ee71" -uuid = "aae01518-5342-5314-be14-df237901396f" -version = "0.15.20" - [[Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" -[[BenchmarkTools]] -deps = ["JSON", "Logging", "Printf", "Statistics", "UUIDs"] -git-tree-sha1 = "9e62e66db34540a0c919d72172cc2f642ac71260" -uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" -version = "0.5.0" - -[[Bijectors]] -deps = ["ArgCheck", "Compat", "Distributions", "LinearAlgebra", "MappedArrays", "NNlib", "Random", "Reexport", "Requires", "Roots", "SparseArrays", "Statistics", "StatsFuns"] -git-tree-sha1 = "eef13682b2a23ebe30d9460a3829fd52ce0713b4" -uuid = "76274a88-744f-5084-9051-94815aaf08c4" -version = "0.8.5" - -[[BinaryProvider]] -deps = ["Libdl", "Logging", "SHA"] -git-tree-sha1 = "ecdec412a9abc8db54c0efc5548c64dfce072058" -uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232" -version = "0.5.10" - -[[BoundaryValueDiffEq]] -deps = ["BandedMatrices", "DiffEqBase", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "NLsolve", "Reexport", "SparseArrays"] -git-tree-sha1 = "3f2969de608af70db755cee9d4490a7294a6afc3" -uuid = "764a87c0-6b3e-53db-9096-fe964310641d" -version = "2.5.0" - -[[Bzip2_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "03a44490020826950c68005cafb336e5ba08b7e8" -uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" -version = "1.0.6+4" - -[[CEnum]] -git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" -uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" -version = "0.4.1" - -[[CSV]] -deps = ["CategoricalArrays", "DataFrames", "Dates", "Mmap", "Parsers", "PooledArrays", "SentinelArrays", "Tables", "Unicode"] -git-tree-sha1 = "a390152e6850405a48ca51bd7ca33d11a21d6230" -uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" -version = "0.7.7" - -[[CUDA]] -deps = ["AbstractFFTs", "Adapt", "BinaryProvider", "CEnum", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "Libdl", "LinearAlgebra", "Logging", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "Requires", "SparseArrays", "Statistics", "TimerOutputs"] -git-tree-sha1 = "83bfd180e2f842f6d4ee315a6db8665e9aa0c19b" -uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" -version = "1.3.3" - -[[CanonicalTraits]] -deps = ["MLStyle"] -git-tree-sha1 = "e4581e3fadda3824e0df04396c85258a2107035d" -uuid = "a603d957-0e48-4f86-8fbd-0b7bc66df689" -version = "0.2.2" - -[[CategoricalArrays]] -deps = ["DataAPI", "Future", "JSON", "Missings", "Printf", "Statistics", "StructTypes", "Unicode"] -git-tree-sha1 = "e7cb4f23938914f09afae58b611a59f3aa8d8f66" -uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597" -version = "0.8.2" - -[[ChainRules]] -deps = ["ChainRulesCore", "LinearAlgebra", "Random", "Reexport", "Requires", "Statistics"] -git-tree-sha1 = "60b76639ff1dc573b0708a68924539d03ed6520b" -uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2" -version = "0.7.20" - -[[ChainRulesCore]] -deps = ["LinearAlgebra", "MuladdMacro"] -git-tree-sha1 = "ac64a416997ae87eb86550020d0607ff608253d1" -uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -version = "0.9.10" - -[[Clustering]] -deps = ["Distances", "LinearAlgebra", "NearestNeighbors", "Printf", "SparseArrays", "Statistics", "StatsBase"] -git-tree-sha1 = "b11c8d607af357776a046889a7c32567d05f1319" -uuid = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" -version = "0.14.1" - -[[CodecZlib]] -deps = ["TranscodingStreams", "Zlib_jll"] -git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da" -uuid = "944b1d66-785c-5afd-91f1-9de20f533193" -version = "0.7.0" - -[[ColorSchemes]] -deps = ["ColorTypes", "Colors", "FixedPointNumbers", "Random", "StaticArrays"] -git-tree-sha1 = "7a15e3690529fd1042f0ab954dff7445b1efc8a5" -uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4" -version = "3.9.0" - -[[ColorTypes]] -deps = ["FixedPointNumbers", "Random"] -git-tree-sha1 = "4bffea7ed1a9f0f3d1a131bbcd4b925548d75288" -uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" -version = "0.10.9" - -[[Colors]] -deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Reexport"] -git-tree-sha1 = "008d6bc68dea6beb6303fdc37188cb557391ebf2" -uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" -version = "0.12.4" - -[[Combinatorics]] -git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860" -uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" -version = "1.0.2" - -[[CommonSubexpressions]] -deps = ["MacroTools", "Test"] -git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" -uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" -version = "0.3.0" - -[[Compat]] -deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "7c7f4cda0d58ec999189d70f5ee500348c4b4df1" -uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.16.0" - -[[CompilerSupportLibraries_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "7c4f882c41faa72118841185afc58a2eb00ef612" -uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "0.3.3+0" - -[[Conda]] -deps = ["JSON", "VersionParsing"] -git-tree-sha1 = "7a58bb32ce5d85f8bf7559aa7c2842f9aecf52fc" -uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d" -version = "1.4.1" - -[[ConjugatePriors]] -deps = ["Distributions", "LinearAlgebra", "PDMats", "SpecialFunctions", "Statistics", "StatsFuns"] -git-tree-sha1 = "bcfc470f3aca36a78c1736fbfe669406e2327ea6" -uuid = "1624bea9-42b1-5fc1-afd3-e96f729c8d6c" -version = "0.4.0" - -[[ConsoleProgressMonitor]] -deps = ["Logging", "ProgressMeter"] -git-tree-sha1 = "3ab7b2136722890b9af903859afcf457fa3059e8" -uuid = "88cd18e8-d9cc-4ea6-8889-5259c0d15c8b" -version = "0.1.2" - -[[ConstructionBase]] -git-tree-sha1 = "a2a6a5fea4d6f730ec4c18a76d27ec10e8ec1c50" -uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" -version = "1.0.0" - -[[Contour]] -deps = ["StaticArrays"] -git-tree-sha1 = "d05a3a25b762720d40246d5bedf518c9c2614ef5" -uuid = "d38c429a-6771-53c6-b99e-75d170b6e991" -version = "0.5.5" - -[[CpuId]] -deps = ["Markdown", "Test"] -git-tree-sha1 = "f0464e499ab9973b43c20f8216d088b61fda80c6" -uuid = "adafc99b-e345-5852-983c-f28acb93d879" -version = "0.2.2" - -[[Crayons]] -git-tree-sha1 = "3f71217b538d7aaee0b69ab47d9b7724ca8afa0d" -uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" -version = "4.0.4" - [[DataAPI]] git-tree-sha1 = "176e23402d80e7743fc26c19c681bfb11246af32" uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" version = "1.3.0" -[[DataFrames]] -deps = ["CategoricalArrays", "Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "Missings", "PooledArrays", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] -git-tree-sha1 = "a7c1c9a6e47a92321bbc9d500dab9b04cc4a6a39" -uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -version = "0.21.7" - -[[DataStructures]] -deps = ["Compat", "InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "0347f23484a96d56e7096eb1f55c6975be34b11a" -uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.18.6" - [[DataValueInterfaces]] git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" version = "1.0.0" -[[DataValues]] -deps = ["DataValueInterfaces", "Dates"] -git-tree-sha1 = "d88a19299eba280a6d062e135a43f00323ae70bf" -uuid = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" -version = "0.4.13" - [[Dates]] deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" -[[DelayDiffEq]] -deps = ["DataStructures", "DiffEqBase", "LinearAlgebra", "Logging", "OrdinaryDiffEq", "Printf", "RecursiveArrayTools", "Reexport", "Roots", "UnPack"] -git-tree-sha1 = "a74a10a4c9885313b00e1f6409ef0fc8141af68e" -uuid = "bcd4f6db-9728-5f36-b5f7-82caef46ccdb" -version = "5.24.2" - -[[DelimitedFiles]] -deps = ["Mmap"] -uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" - -[[DiffEqBase]] -deps = ["ArrayInterface", "ChainRulesCore", "DataStructures", "Distributed", "DocStringExtensions", "FunctionWrappers", "IterativeSolvers", "IteratorInterfaceExtensions", "LabelledArrays", "LinearAlgebra", "Logging", "MuladdMacro", "Parameters", "Printf", "RecipesBase", "RecursiveArrayTools", "RecursiveFactorization", "Requires", "Roots", "SparseArrays", "StaticArrays", "Statistics", "SuiteSparse", "TableTraits", "Tables", "TreeViews", "ZygoteRules"] -git-tree-sha1 = "949596defa7b0e24a35037253c8e8e87bd7a6c2c" -uuid = "2b5f629d-d688-5b77-993f-72d75c75574e" -version = "6.47.1" - -[[DiffEqBayes]] -deps = ["ApproxBayes", "DiffEqBase", "DiffResults", "Distances", "Distributed", "Distributions", "DocStringExtensions", "DynamicHMC", "ForwardDiff", "LabelledArrays", "LinearAlgebra", "LogDensityProblems", "MacroTools", "Missings", "ModelingToolkit", "Optim", "PDMats", "Parameters", "Random", "RecursiveArrayTools", "Reexport", "Requires", "StructArrays", "TransformVariables", "Turing"] -git-tree-sha1 = "91621cfb8f1df974dbaa0d23f967aca8fd5b93c0" -uuid = "ebbdde9d-f333-5424-9be2-dbf1e9acfb5e" -version = "2.17.0" - -[[DiffEqCallbacks]] -deps = ["DataStructures", "DiffEqBase", "ForwardDiff", "LinearAlgebra", "NLsolve", "OrdinaryDiffEq", "RecipesBase", "RecursiveArrayTools", "StaticArrays"] -git-tree-sha1 = "c252e7a153d902f7c535feb3d296fdd9812049c3" -uuid = "459566f4-90b8-5000-8ac3-15dfb0a30def" -version = "2.14.1" - -[[DiffEqFinancial]] -deps = ["DiffEqBase", "DiffEqNoiseProcess", "LinearAlgebra", "Markdown", "RandomNumbers"] -git-tree-sha1 = "db08e0def560f204167c58fd0637298e13f58f73" -uuid = "5a0ffddc-d203-54b0-88ba-2c03c0fc2e67" -version = "2.4.0" - -[[DiffEqJump]] -deps = ["ArrayInterface", "Compat", "DataStructures", "DiffEqBase", "FunctionWrappers", "LinearAlgebra", "PoissonRandom", "Random", "RandomNumbers", "RecursiveArrayTools", "StaticArrays", "TreeViews", "UnPack"] -git-tree-sha1 = "68c389b108388d09f01065cf6d6df426f126d5a0" -uuid = "c894b116-72e5-5b58-be3c-e6d8d4ac2b12" -version = "6.10.1" - -[[DiffEqNoiseProcess]] -deps = ["DiffEqBase", "Distributions", "LinearAlgebra", "PoissonRandom", "Random", "RandomNumbers", "RecipesBase", "RecursiveArrayTools", "Requires", "ResettableStacks", "StaticArrays", "Statistics"] -git-tree-sha1 = "5f8ba8bbdc8a4d6f2504c9cf34ba42a0274b7e97" -uuid = "77a26b50-5914-5dd7-bc55-306e6241c503" -version = "5.3.0" - -[[DiffEqPhysics]] -deps = ["DiffEqBase", "DiffEqCallbacks", "ForwardDiff", "LinearAlgebra", "Printf", "Random", "RecipesBase", "RecursiveArrayTools", "Reexport", "StaticArrays"] -git-tree-sha1 = "6bf21882775dc8f7b5fd4f97989a01ab65df157f" -uuid = "055956cb-9e8b-5191-98cc-73ae4a59e68a" -version = "3.6.0" - -[[DiffEqSensitivity]] -deps = ["Adapt", "DiffEqBase", "DiffEqCallbacks", "DiffEqNoiseProcess", "Distributions", "FFTW", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "Parameters", "QuadGK", "QuasiMonteCarlo", "RecursiveArrayTools", "Requires", "ReverseDiff", "Statistics", "StochasticDiffEq", "Tracker", "Zygote", "ZygoteRules"] -git-tree-sha1 = "4883872dacae2bb8a86abbedf22785013d33691b" -uuid = "41bf760c-e81c-5289-8e54-58b1f1f8abe2" -version = "6.31.5" - -[[DiffResults]] -deps = ["StaticArrays"] -git-tree-sha1 = "da24935df8e0c6cf28de340b958f6aac88eaa0cc" -uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" -version = "1.0.2" - -[[DiffRules]] -deps = ["NaNMath", "Random", "SpecialFunctions"] -git-tree-sha1 = "eb0c34204c8410888844ada5359ac8b96292cfd1" -uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" -version = "1.0.1" - -[[DifferentialEquations]] -deps = ["BoundaryValueDiffEq", "DelayDiffEq", "DiffEqBase", "DiffEqCallbacks", "DiffEqFinancial", "DiffEqJump", "DiffEqNoiseProcess", "DiffEqPhysics", "DimensionalPlotRecipes", "LinearAlgebra", "MultiScaleArrays", "OrdinaryDiffEq", "ParameterizedFunctions", "Random", "RecursiveArrayTools", "Reexport", "SteadyStateDiffEq", "StochasticDiffEq", "Sundials"] -git-tree-sha1 = "84502233aa79c6b2d0b9d5d50d205b88a02ffd8b" -uuid = "0c46a032-eb83-5123-abaf-570d42b7fbaa" -version = "6.15.0" - -[[DimensionalPlotRecipes]] -deps = ["LinearAlgebra", "RecipesBase"] -git-tree-sha1 = "af883a26bbe6e3f5f778cb4e1b81578b534c32a6" -uuid = "c619ae07-58cd-5f6d-b883-8f17bd6a98f9" -version = "1.2.0" - -[[Distances]] -deps = ["LinearAlgebra", "Statistics"] -git-tree-sha1 = "bed62cc5afcff16de797a9f38fb358b74071f785" -uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" -version = "0.9.0" - [[Distributed]] deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" -[[Distributions]] -deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"] -git-tree-sha1 = "3676697fd903ba314aaaa0ec8d6813b354edb875" -uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" -version = "0.23.11" - -[[DistributionsAD]] -deps = ["Adapt", "ChainRules", "ChainRulesCore", "Compat", "DiffRules", "Distributions", "FillArrays", "ForwardDiff", "LinearAlgebra", "NaNMath", "PDMats", "Random", "Requires", "SpecialFunctions", "StaticArrays", "StatsBase", "StatsFuns", "ZygoteRules"] -git-tree-sha1 = "309458fa6cd189cb080a349130918ace2bce0256" -uuid = "ced4e74d-a319-5a8a-b0ac-84af2272839c" -version = "0.6.9" - [[DocStringExtensions]] deps = ["LibGit2", "Markdown", "Pkg", "Test"] git-tree-sha1 = "50ddf44c53698f5e784bbebb3f4b21c5807401b1" uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" version = "0.8.3" -[[Documenter]] -deps = ["Base64", "Dates", "DocStringExtensions", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] -git-tree-sha1 = "fb1ff838470573adc15c71ba79f8d31328f035da" -uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "0.25.2" - -[[DynamicHMC]] -deps = ["ArgCheck", "DocStringExtensions", "LinearAlgebra", "LogDensityProblems", "NLSolversBase", "Optim", "Parameters", "ProgressMeter", "Random", "Statistics"] -git-tree-sha1 = "7aa21d9ff8d2dafb8a4bf9f1b18c69bcc8960f8d" -uuid = "bbc10e6e-7c05-544b-b16e-64fede858acb" -version = "2.2.0" - -[[DynamicPPL]] -deps = ["AbstractMCMC", "Bijectors", "ChainRulesCore", "Distributions", "MacroTools", "NaturalSort", "Random"] -git-tree-sha1 = "0d48b8509b4d40a673b63d10ad3f8681acdb86da" -uuid = "366bfd00-2699-11ea-058f-f148b4cae6d8" -version = "0.9.1" - -[[EarCut_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "eabac56550a7d7e0be499125673fbff560eb8b20" -uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5" -version = "2.1.5+0" - -[[EllipsisNotation]] -git-tree-sha1 = "65dad386e877850e6fce4fc77f60fe75a468ce9d" -uuid = "da5c29d0-fa7d-589e-88eb-ea29b0a81949" -version = "0.4.0" - -[[EllipticalSliceSampling]] -deps = ["AbstractMCMC", "ArrayInterface", "Distributions", "Random", "Statistics"] -git-tree-sha1 = "38227940de7a53f62da3d288876b96cd6892c239" -uuid = "cad2338a-1db2-11e9-3401-43bc07c9ede2" -version = "0.2.2" - -[[ExponentialUtilities]] -deps = ["LinearAlgebra", "Printf", "Requires", "SparseArrays"] -git-tree-sha1 = "4e7db935d55d4a11acb74856ee6cb113a7808c6f" -uuid = "d4d017d3-3776-5f7e-afef-a10c40355c18" -version = "1.8.0" - -[[ExprTools]] -git-tree-sha1 = "7fce513fcda766962ff67c5596cb16c463dfd371" -uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" -version = "0.1.2" - -[[EzXML]] -deps = ["Printf", "XML2_jll"] -git-tree-sha1 = "0fa3b52a04a4e210aeb1626def9c90df3ae65268" -uuid = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615" -version = "1.1.0" - -[[FFMPEG]] -deps = ["FFMPEG_jll", "x264_jll"] -git-tree-sha1 = "9a73ffdc375be61b0e4516d83d880b265366fe1f" -uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a" -version = "0.4.0" - -[[FFMPEG_jll]] -deps = ["Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "LAME_jll", "LibVPX_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "Pkg", "Zlib_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"] -git-tree-sha1 = "13a934b9e74a8722bf1786c989de346a9602e695" -uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5" -version = "4.3.1+2" - -[[FFTW]] -deps = ["AbstractFFTs", "FFTW_jll", "IntelOpenMP_jll", "Libdl", "LinearAlgebra", "MKL_jll", "Reexport"] -git-tree-sha1 = "8b7c16b56936047ca41bf25effa137ae0b381ae8" -uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" -version = "1.2.4" - -[[FFTW_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "6c975cd606128d45d1df432fb812d6eb10fee00b" -uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a" -version = "3.3.9+5" - -[[FastClosures]] -git-tree-sha1 = "acebe244d53ee1b461970f8910c235b259e772ef" -uuid = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a" -version = "0.3.2" - -[[FileIO]] -deps = ["Pkg"] -git-tree-sha1 = "992b4aeb62f99b69fcf0cb2085094494cc05dfb3" -uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" -version = "1.4.3" - -[[FillArrays]] -deps = ["LinearAlgebra", "Random", "SparseArrays"] -git-tree-sha1 = "b955c227b0d1413a1a97e2ca0635a5de019d7337" -uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" -version = "0.9.6" - -[[FiniteDiff]] -deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"] -git-tree-sha1 = "a78ee56e4636c20c2db9ccde8afe57065f6ab387" -uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" -version = "2.7.0" - -[[FixedPointNumbers]] -deps = ["Statistics"] -git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc" -uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" -version = "0.8.4" - -[[Flux]] -deps = ["AbstractTrees", "Adapt", "CUDA", "CodecZlib", "Colors", "DelimitedFiles", "Functors", "Juno", "LinearAlgebra", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "SHA", "Statistics", "StatsBase", "Test", "ZipFile", "Zygote"] -git-tree-sha1 = "ceb09ce8510ef31fd86a791bbd0e1d72a60f27d7" -uuid = "587475ba-b771-5e3f-ad9e-33799f191a9c" -version = "0.11.1" - -[[Formatting]] -deps = ["Printf"] -git-tree-sha1 = "a0c901c29c0e7c763342751c0a94211d56c0de5c" -uuid = "59287772-0a20-5a39-b81b-1366585eb4c0" -version = "0.4.1" - -[[ForwardDiff]] -deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "NaNMath", "Random", "SpecialFunctions", "StaticArrays"] -git-tree-sha1 = "1d090099fb82223abc48f7ce176d3f7696ede36d" -uuid = "f6369f11-7733-5829-9624-2563aa707210" -version = "0.10.12" - -[[FreeType2_jll]] -deps = ["Bzip2_jll", "Libdl", "Pkg", "Zlib_jll"] -git-tree-sha1 = "720eee04e3b496c15e5e2269669c2532fb5005c0" -uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7" -version = "2.10.1+4" - -[[FriBidi_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "cfc3485a0a968263c789e314fca5d66daf75ed6c" -uuid = "559328eb-81f9-559d-9380-de523a88c83c" -version = "1.0.5+5" - -[[FunctionWrappers]] -git-tree-sha1 = "e4813d187be8c7b993cb7f85cbf2b7bfbaadc694" -uuid = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e" -version = "1.1.1" - -[[Functors]] -deps = ["MacroTools"] -git-tree-sha1 = "f40adc6422f548176bb4351ebd29e4abf773040a" -uuid = "d9f16b24-f501-4c13-a1f2-28368ffc5196" -version = "0.1.0" - -[[Future]] -deps = ["Random"] -uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" - -[[GLM]] -deps = ["Distributions", "LinearAlgebra", "Printf", "Random", "Reexport", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "StatsModels"] -git-tree-sha1 = "11d47ad2699a26d5557e265a2acf3e5c3799e83d" -uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -version = "1.3.10" - -[[GPUArrays]] -deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization"] -git-tree-sha1 = "d3a68bdf4868f3922f5b9bcb2cbf2776d5c0ecf5" -uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" -version = "5.2.0" - -[[GPUCompiler]] -deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "TimerOutputs", "UUIDs"] -git-tree-sha1 = "05097d81898c527e3bf218bb083ad0ead4378e5f" -uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" -version = "0.6.1" - -[[GR]] -deps = ["Base64", "DelimitedFiles", "HTTP", "JSON", "LinearAlgebra", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"] -git-tree-sha1 = "cd0f34bd097d4d5eb6bbe01778cf8a7ed35f29d9" -uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" -version = "0.52.0" - -[[GeneralizedGenerated]] -deps = ["CanonicalTraits", "DataStructures", "JuliaVariables", "MLStyle"] -git-tree-sha1 = "50e0ed8fbcd56ae2e65b9aa73394f20b30269b2d" -uuid = "6b9d7cbe-bcb9-11e9-073f-15a7a543e2eb" -version = "0.2.7" - -[[GenericSVD]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "62909c3eda8a25b5673a367d1ad2392ebb265211" -uuid = "01680d73-4ee2-5a08-a1aa-533608c188bb" -version = "0.3.0" - -[[GeometryBasics]] -deps = ["EarCut_jll", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"] -git-tree-sha1 = "49d13ebd048bd71315ff98bdc2c560ec16eda2b4" -uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326" -version = "0.3.1" - -[[GeometryTypes]] -deps = ["ColorTypes", "FixedPointNumbers", "LinearAlgebra", "StaticArrays"] -git-tree-sha1 = "34bfa994967e893ab2f17b864eec221b3521ba4d" -uuid = "4d00f742-c7ba-57c2-abde-4428a4b178cb" -version = "0.8.3" - -[[HTTP]] -deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets"] -git-tree-sha1 = "c7ec02c4c6a039a98a15f955462cd7aea5df4508" -uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" -version = "0.8.19" - [[Highlights]] deps = ["DocStringExtensions", "InteractiveUtils", "REPL"] git-tree-sha1 = "f823a2d04fb233d52812c8024a6d46d9581904a4" uuid = "eafb193a-b7ab-5a9e-9068-77385905fa72" version = "0.4.5" -[[IRTools]] -deps = ["InteractiveUtils", "MacroTools", "Test"] -git-tree-sha1 = "a8d88c05a23b44b4da6cf4fb5659e13ff95e0f47" -uuid = "7869d1d1-7146-5819-86e3-90919afe41df" -version = "0.4.1" - -[[IfElse]] -git-tree-sha1 = "28e837ff3e7a6c3cdb252ce49fb412c8eb3caeef" -uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" -version = "0.1.0" - -[[Inflate]] -git-tree-sha1 = "f5fc07d4e706b84f72d54eedcc1c13d92fb0871c" -uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" -version = "0.1.2" - -[[IniFile]] -deps = ["Test"] -git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8" -uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f" -version = "0.5.0" - -[[InplaceOps]] -deps = ["LinearAlgebra", "Test"] -git-tree-sha1 = "50b41d59e7164ab6fda65e71049fee9d890731ff" -uuid = "505f98c9-085e-5b2c-8e89-488be7bf1f34" -version = "0.3.0" - -[[IntelOpenMP_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "fb8e1c7a5594ba56f9011310790e03b5384998d6" -uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0" -version = "2018.0.3+0" - [[InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" -[[Interpolations]] -deps = ["AxisAlgorithms", "LinearAlgebra", "OffsetArrays", "Random", "Ratios", "SharedArrays", "SparseArrays", "StaticArrays", "WoodburyMatrices"] -git-tree-sha1 = "2b7d4e9be8b74f03115e64cf36ed2f48ae83d946" -uuid = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" -version = "0.12.10" - -[[IntervalSets]] -deps = ["Dates", "EllipsisNotation", "Statistics"] -git-tree-sha1 = "3b1cef135bc532b3c3401b309e1b8a2a2ba26af5" -uuid = "8197267c-284f-5f27-9208-e0e47529a953" -version = "0.5.1" - -[[InvertedIndices]] -deps = ["Test"] -git-tree-sha1 = "15732c475062348b0165684ffe28e85ea8396afc" -uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" -version = "1.0.0" - -[[IterTools]] -git-tree-sha1 = "05110a2ab1fc5f932622ffea2a003221f4782c18" -uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" -version = "1.3.0" - -[[IterativeSolvers]] -deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"] -git-tree-sha1 = "3b7e2aac8c94444947facea7cc7ca91c49169be0" -uuid = "42fd0dbc-a981-5370-80f2-aaf504508153" -version = "0.8.4" - [[IteratorInterfaceExtensions]] git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" uuid = "82899510-4779-5014-852e-03e436cf321d" @@ -704,383 +48,33 @@ git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" version = "0.21.1" -[[JuliaVariables]] -deps = ["MLStyle", "NameResolution"] -git-tree-sha1 = "e0fcfa0a2f6122fbe13603764c5310dde00c5593" -uuid = "b14d175d-62b4-44ba-8fb7-3064adc8c3ec" -version = "0.2.3" - -[[Juno]] -deps = ["Base64", "Logging", "Media", "Profile"] -git-tree-sha1 = "90976c3ab792a98d240d42f9df07420ccfc60668" -uuid = "e5e0dc1b-0480-54bc-9374-aad01c23163d" -version = "0.8.3" - -[[KernelDensity]] -deps = ["Distributions", "DocStringExtensions", "FFTW", "Interpolations", "StatsBase"] -git-tree-sha1 = "c4cd736343d767c0e78e37c7d95548782b8f9424" -uuid = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" -version = "0.6.0" - -[[LAME_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "a7999edc634307964d5651265ebf7c2e14b4ef91" -uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d" -version = "3.100.0+2" - -[[LLVM]] -deps = ["CEnum", "Libdl", "Printf", "Unicode"] -git-tree-sha1 = "a662366a5d485dee882077e8da3e1a95a86d097f" -uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "2.0.0" - -[[LaTeXStrings]] -git-tree-sha1 = "de44b395389b84fd681394d4e8d39ef14e3a2ea8" -uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" -version = "1.1.0" - -[[LabelledArrays]] -deps = ["ArrayInterface", "LinearAlgebra", "MacroTools", "StaticArrays"] -git-tree-sha1 = "5e04374019448f8509349948ab504f117e3b575a" -uuid = "2ee39098-c373-598a-b85f-a56591580800" -version = "1.3.0" - -[[Latexify]] -deps = ["Formatting", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "Printf", "Requires"] -git-tree-sha1 = "829b033e31573b8ffdd14e0d47154fd3ddc7abbf" -uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" -version = "0.14.0" - -[[LatinHypercubeSampling]] -deps = ["Random", "StatsBase", "Test"] -git-tree-sha1 = "f6df9a2cd3c2ee1123c39fac3cca18993fc88247" -uuid = "a5e1c1ea-c99a-51d3-a14d-a9a37257b02d" -version = "1.6.4" - -[[LearnBase]] -deps = ["LinearAlgebra", "StatsBase"] -git-tree-sha1 = "47e6f4623c1db88570c7a7fa66c6528b92ba4725" -uuid = "7f8f8fb0-2700-5f03-b4bd-41f8cfc144b6" -version = "0.3.0" - -[[LeftChildRightSiblingTrees]] -deps = ["AbstractTrees"] -git-tree-sha1 = "71be1eb5ad19cb4f61fa8c73395c0338fd092ae0" -uuid = "1d6d02ad-be62-4b6b-8a6d-2f90e265016e" -version = "0.1.2" - [[LibGit2]] deps = ["Printf"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" -[[LibVPX_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "e02378f5707d0f94af22b99e4aba798e20368f6e" -uuid = "dd192d2f-8180-539f-9fb4-cc70b1dcf69a" -version = "1.9.0+0" - [[Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" -[[Libiconv_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "7c36dfe7971e55da03d8f54b67d4b3fb8ee01d63" -uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" -version = "1.16.0+6" - -[[Libtask]] -deps = ["BinaryProvider", "Libdl", "Pkg"] -git-tree-sha1 = "68a658db4792dfc468ea2aabcf06f3f74f153f23" -uuid = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" -version = "0.4.1" - -[[LightGraphs]] -deps = ["ArnoldiMethod", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"] -git-tree-sha1 = "a0d4bcea4b9c056da143a5ded3c2b7f7740c2d41" -uuid = "093fc24a-ae57-5d10-9952-331d41423f4d" -version = "1.3.0" - -[[LineSearches]] -deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"] -git-tree-sha1 = "d6e6b2ed397a402a22e474a3f1859c8c1db82c8c" -uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" -version = "7.1.0" - [[LinearAlgebra]] deps = ["Libdl"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -[[LogDensityProblems]] -deps = ["ArgCheck", "BenchmarkTools", "DiffResults", "DocStringExtensions", "Parameters", "Random", "Requires", "TransformVariables"] -git-tree-sha1 = "77ac6c4bb1eae4072a5d0fcfba621020f7802cc2" -uuid = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" -version = "0.10.3" - [[Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" -[[LoggingExtras]] -deps = ["Dates"] -git-tree-sha1 = "03289aba73c0abc25ff0229bed60f2a4129cd15c" -uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" -version = "0.4.2" - -[[LoopVectorization]] -deps = ["DocStringExtensions", "LinearAlgebra", "OffsetArrays", "SIMDPirates", "SLEEFPirates", "UnPack", "VectorizationBase"] -git-tree-sha1 = "3242a8f411e19eda9adc49d0b877681975c11375" -uuid = "bdcacae8-1622-11e9-2a5c-532679323890" -version = "0.8.26" - -[[MCMCChains]] -deps = ["AbstractFFTs", "AbstractMCMC", "AxisArrays", "Compat", "Distributions", "Formatting", "IteratorInterfaceExtensions", "LinearAlgebra", "MLJModelInterface", "NaturalSort", "PrettyTables", "Random", "RecipesBase", "Serialization", "SpecialFunctions", "Statistics", "StatsBase", "TableTraits", "Tables"] -git-tree-sha1 = "f02b83d0adb4318f66ba73f5e1f50ef139fee6c2" -uuid = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" -version = "4.2.1" - -[[METIS_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "3f52ed323683398498ef163a45ce998f1ceca363" -uuid = "d00139f3-1899-568f-a2f0-47f597d42d70" -version = "5.1.0+4" - -[[MKL_jll]] -deps = ["IntelOpenMP_jll", "Libdl", "Pkg"] -git-tree-sha1 = "eb540ede3aabb8284cb482aa41d00d6ca850b1f8" -uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" -version = "2020.2.254+0" - -[[MLDataPattern]] -deps = ["LearnBase", "MLLabelUtils", "Random", "SparseArrays", "StatsBase"] -git-tree-sha1 = "b6c21ad63e622a56c9ad9ce3a14f91b2e2af5f5e" -uuid = "9920b226-0b2a-5f5f-9153-9aa70a013f8b" -version = "0.5.3" - -[[MLDataUtils]] -deps = ["DataFrames", "DelimitedFiles", "LearnBase", "MLDataPattern", "MLLabelUtils", "Statistics", "StatsBase"] -git-tree-sha1 = "6c74dba2cba82d441190447f988e5eb48a815f48" -uuid = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d" -version = "0.5.2" - -[[MLJModelInterface]] -deps = ["Random", "ScientificTypes"] -git-tree-sha1 = "6d719b5831d2dffc579895e8070b65aa70322609" -uuid = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" -version = "0.3.5" - -[[MLLabelUtils]] -deps = ["LearnBase", "MappedArrays", "StatsBase"] -git-tree-sha1 = "50bbf776ef1b88075e27f1eb97e50dccfa382d58" -uuid = "66a33bbf-0c2b-5fc8-a008-9da813334f0a" -version = "0.5.2" - -[[MLStyle]] -git-tree-sha1 = "937eda9ce36fcce082a42edd7181c8d23f4eb550" -uuid = "d8e11817-5142-5d16-987a-aa16d5891078" -version = "0.4.6" - -[[MacroTools]] -deps = ["Markdown", "Random"] -git-tree-sha1 = "f7d2e3f654af75f01ec49be82c231c382214223a" -uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -version = "0.5.5" - -[[MappedArrays]] -git-tree-sha1 = "e2a02fe7ee86a10c707ff1756ab1650b40b140bb" -uuid = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900" -version = "0.2.2" - [[Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" -[[MbedTLS]] -deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"] -git-tree-sha1 = "426a6978b03a97ceb7ead77775a1da066343ec6e" -uuid = "739be429-bea8-5141-9913-cc70e7f3736d" -version = "1.0.2" - -[[MbedTLS_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "c0b1286883cac4e2b617539de41111e0776d02e8" -uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.16.8+0" - -[[Measures]] -git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f" -uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e" -version = "0.3.1" - -[[Media]] -deps = ["MacroTools", "Test"] -git-tree-sha1 = "75a54abd10709c01f1b86b84ec225d26e840ed58" -uuid = "e89f7d12-3494-54d1-8411-f7d8b9ae1f27" -version = "0.5.0" - -[[Missings]] -deps = ["DataAPI"] -git-tree-sha1 = "ed61674a0864832495ffe0a7e889c0da76b0f4c8" -uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" -version = "0.4.4" - [[Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" -[[Mocking]] -deps = ["ExprTools"] -git-tree-sha1 = "916b850daad0d46b8c71f65f719c49957e9513ed" -uuid = "78c3b35d-d492-501b-9361-3d52fe80e533" -version = "0.7.1" - -[[ModelingToolkit]] -deps = ["ArrayInterface", "DataStructures", "DiffEqBase", "DiffEqJump", "DiffRules", "Distributed", "DocStringExtensions", "GeneralizedGenerated", "IfElse", "LabelledArrays", "Latexify", "Libdl", "LightGraphs", "LinearAlgebra", "MacroTools", "NaNMath", "RecursiveArrayTools", "Requires", "SafeTestsets", "SparseArrays", "SpecialFunctions", "StaticArrays", "SymbolicUtils", "TreeViews", "UnPack", "Unitful"] -git-tree-sha1 = "aea52f25fdd87e2f31d511c338da12c89c6a4838" -uuid = "961ee093-0014-501f-94e3-6117800e7a78" -version = "3.20.0" - -[[MuladdMacro]] -git-tree-sha1 = "c6190f9a7fc5d9d5915ab29f2134421b12d24a68" -uuid = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221" -version = "0.2.2" - -[[MultiScaleArrays]] -deps = ["DiffEqBase", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "OrdinaryDiffEq", "Random", "RecursiveArrayTools", "SparseDiffTools", "Statistics", "StochasticDiffEq", "TreeViews"] -git-tree-sha1 = "258f3be6770fe77be8870727ba9803e236c685b8" -uuid = "f9640e96-87f6-5992-9c3b-0743c6a49ffa" -version = "1.8.1" - -[[MultivariateStats]] -deps = ["Arpack", "LinearAlgebra", "SparseArrays", "Statistics", "StatsBase"] -git-tree-sha1 = "352fae519b447bf52e6de627b89f448bcd469e4e" -uuid = "6f286f6a-111f-5878-ab1e-185364afe411" -version = "0.7.0" - [[Mustache]] deps = ["Printf", "Tables"] git-tree-sha1 = "17e60d71d720c33ac2fbac21298ee495bae27587" uuid = "ffc61752-8dc7-55ee-8c37-f3e9cdd09e70" version = "1.0.5" -[[NLSolversBase]] -deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"] -git-tree-sha1 = "db63edd0199a4813a1fa117e3c840a4528b59d72" -uuid = "d41bc354-129a-5804-8e4c-c37616107c6c" -version = "7.7.0" - -[[NLsolve]] -deps = ["Distances", "LineSearches", "LinearAlgebra", "NLSolversBase", "Printf", "Reexport"] -git-tree-sha1 = "650f266dcb5a24b4095fdab92f0137c0f4ee9392" -uuid = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" -version = "4.4.1" - -[[NNlib]] -deps = ["Libdl", "LinearAlgebra", "Pkg", "Requires", "Statistics"] -git-tree-sha1 = "8ec4693a5422f0b064ce324f59351f24aa474893" -uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -version = "0.7.4" - -[[NaNMath]] -git-tree-sha1 = "c84c576296d0e2fbb3fc134d3e09086b3ea617cd" -uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" -version = "0.3.4" - -[[NameResolution]] -deps = ["PrettyPrint"] -git-tree-sha1 = "1a0fa0e9613f46c9b8c11eee38ebb4f590013c5e" -uuid = "71a1bf82-56d0-4bbc-8a3c-48b961074391" -version = "0.1.5" - -[[NamedArrays]] -deps = ["Combinatorics", "DataStructures", "DelimitedFiles", "InvertedIndices", "LinearAlgebra", "Random", "Requires", "SparseArrays", "Statistics"] -git-tree-sha1 = "7d96d4c09526458d66ff84d7648be7eb7c38a547" -uuid = "86f7a689-2022-50b4-a561-43c23ac3c673" -version = "0.9.4" - -[[NaturalSort]] -git-tree-sha1 = "eda490d06b9f7c00752ee81cfa451efe55521e21" -uuid = "c020b1a1-e9b0-503a-9c33-f039bfc54a85" -version = "1.0.0" - -[[NearestNeighbors]] -deps = ["Distances", "StaticArrays"] -git-tree-sha1 = "93107e3cdada73d63245ed8170dcae680f0c8fd8" -uuid = "b8a86587-4115-5ab1-83bc-aa920d37bbce" -version = "0.4.6" - -[[Observables]] -git-tree-sha1 = "11832878355305984235a2e90d0e3737383c634c" -uuid = "510215fc-4207-5dde-b226-833fc4488ee2" -version = "0.3.1" - -[[OffsetArrays]] -git-tree-sha1 = "663d3402efa943c95f4736fa7b462e9dd97be1a9" -uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" -version = "1.2.0" - -[[Ogg_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "4c3275cda1ba99d1244d0b82a9d0ca871c3cf66b" -uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051" -version = "1.3.4+1" - -[[OpenBLAS_jll]] -deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"] -git-tree-sha1 = "5fae4d1510bdcf7768cc951878b8aa48666c58a8" -uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.10+0" - -[[OpenSSL_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "997359379418d233767f926ea0c43f0e731735c0" -uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" -version = "1.1.1+5" - -[[OpenSpecFun_jll]] -deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"] -git-tree-sha1 = "d51c416559217d974a1113522d5919235ae67a87" -uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" -version = "0.5.3+3" - -[[Optim]] -deps = ["Compat", "FillArrays", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "SparseArrays", "StatsBase"] -git-tree-sha1 = "c434c4853a7e61506caf0340b3c153fc08865088" -uuid = "429524aa-4258-5aef-a3af-852621145aeb" -version = "1.2.0" - -[[Opus_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "cc90a125aa70dbb069adbda2b913b02cf2c5f6fe" -uuid = "91d4177d-7536-5919-b921-800302f37372" -version = "1.3.1+2" - -[[OrderedCollections]] -git-tree-sha1 = "16c08bf5dba06609fe45e30860092d6fa41fde7b" -uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.3.1" - -[[OrdinaryDiffEq]] -deps = ["Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "ExponentialUtilities", "FastClosures", "FiniteDiff", "ForwardDiff", "GenericSVD", "LinearAlgebra", "Logging", "MacroTools", "MuladdMacro", "NLsolve", "RecursiveArrayTools", "Reexport", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"] -git-tree-sha1 = "53de3dbb29a134e94c57025398c4420df6817434" -uuid = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" -version = "5.42.8" - -[[PDMats]] -deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"] -git-tree-sha1 = "b3405086eb6a974eba1958923d46bc0e1c2d2d63" -uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" -version = "0.10.0" - -[[ParameterizedFunctions]] -deps = ["DataStructures", "DiffEqBase", "Latexify", "LinearAlgebra", "ModelingToolkit", "Reexport"] -git-tree-sha1 = "291279c720121d7f5c6a145726bb94da79e9b42c" -uuid = "65888b18-ceab-5e60-b2b9-181511a3b968" -version = "5.6.0" - -[[Parameters]] -deps = ["OrderedCollections", "UnPack"] -git-tree-sha1 = "38b2e970043613c187bd56a995fe2e551821eb4a" -uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" -version = "0.12.1" - [[Parsers]] deps = ["Dates", "Test"] git-tree-sha1 = "8077624b3c450b15c087944363606a6ba12f925e" @@ -1091,109 +85,10 @@ version = "1.0.10" deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -[[PlotThemes]] -deps = ["PlotUtils", "Requires", "Statistics"] -git-tree-sha1 = "c6f5ea535551b3b16835134697f0c65d06c94b91" -uuid = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a" -version = "2.0.0" - -[[PlotUtils]] -deps = ["ColorSchemes", "Colors", "Dates", "Printf", "Random", "Reexport", "Statistics"] -git-tree-sha1 = "8d23333aee2e2733a5cd4e1985462df8203f47f7" -uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043" -version = "1.0.6" - -[[Plots]] -deps = ["Base64", "Contour", "Dates", "FFMPEG", "FixedPointNumbers", "GR", "GeometryBasics", "GeometryTypes", "JSON", "LinearAlgebra", "Measures", "NaNMath", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "Requires", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs"] -git-tree-sha1 = "392a4007f3584cbf98c89273d2453f9441ead3d4" -uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -version = "1.6.4" - -[[PoissonRandom]] -deps = ["Random", "Statistics", "Test"] -git-tree-sha1 = "44d018211a56626288b5d3f8c6497d28c26dc850" -uuid = "e409e4f3-bfea-5376-8464-e040bb5c01ab" -version = "0.4.0" - -[[PooledArrays]] -deps = ["DataAPI"] -git-tree-sha1 = "b1333d4eced1826e15adbdf01a4ecaccca9d353c" -uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" -version = "0.5.3" - -[[PositiveFactorizations]] -deps = ["LinearAlgebra", "Test"] -git-tree-sha1 = "127c47b91990c101ee3752291c4f45640eeb03d1" -uuid = "85a6dd25-e78a-55b7-8502-1745935b8125" -version = "0.2.3" - -[[PrettyPrint]] -git-tree-sha1 = "632eb4abab3449ab30c5e1afaa874f0b98b586e4" -uuid = "8162dcfd-2161-5ef2-ae6c-7681170c5f98" -version = "0.2.0" - -[[PrettyTables]] -deps = ["Crayons", "Formatting", "Parameters", "Reexport", "Tables"] -git-tree-sha1 = "8458dc04a493ae5c2fed3796c1d3117972c69694" -uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" -version = "0.9.1" - [[Printf]] deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" -[[Profile]] -deps = ["Printf"] -uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" - -[[ProgressLogging]] -deps = ["Logging", "SHA", "UUIDs"] -git-tree-sha1 = "59398022b661b6fd569f25de6b18fde39843196a" -uuid = "33c8b6b6-d38a-422a-b730-caa89a2f386c" -version = "0.1.3" - -[[ProgressMeter]] -deps = ["Distributed", "Printf"] -git-tree-sha1 = "ddfd3ab9d50916fa39c4167c0324f56163482d6a" -uuid = "92933f4c-e287-5a05-a399-4b506db050ca" -version = "1.3.3" - -[[PyCall]] -deps = ["Conda", "Dates", "Libdl", "LinearAlgebra", "MacroTools", "Serialization", "VersionParsing"] -git-tree-sha1 = "3a3fdb9000d35958c9ba2323ca7c4958901f115d" -uuid = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" -version = "1.91.4" - -[[PyPlot]] -deps = ["Colors", "LaTeXStrings", "PyCall", "Sockets", "Test", "VersionParsing"] -git-tree-sha1 = "67dde2482fe1a72ef62ed93f8c239f947638e5a2" -uuid = "d330b81b-6aea-500a-939a-2ce795aea3ee" -version = "2.9.0" - -[[QuadGK]] -deps = ["DataStructures", "LinearAlgebra"] -git-tree-sha1 = "12fbe86da16df6679be7521dfb39fbc861e1dc7b" -uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" -version = "2.4.1" - -[[QuasiMonteCarlo]] -deps = ["Distributions", "LatinHypercubeSampling", "Sobol"] -git-tree-sha1 = "b90f9ff53b4c1f14eee512626cd8e7f9791b20a2" -uuid = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" -version = "0.2.0" - -[[RData]] -deps = ["CategoricalArrays", "CodecZlib", "DataFrames", "Dates", "FileIO", "Requires", "TimeZones", "Unicode"] -git-tree-sha1 = "10693c581956334a368c26b7c544e406c4c94385" -uuid = "df47a6cb-8c03-5eed-afd8-b6050d6c41da" -version = "0.7.2" - -[[RDatasets]] -deps = ["CSV", "CodecZlib", "DataFrames", "FileIO", "Printf", "RData", "Reexport"] -git-tree-sha1 = "68e425aa9461f45e286553cc4d049c0710dcc3aa" -uuid = "ce6b1742-4840-55fa-b093-852dadbb1d8b" -version = "0.6.10" - [[REPL]] deps = ["InteractiveUtils", "Markdown", "Sockets"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" @@ -1202,266 +97,21 @@ uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" deps = ["Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -[[RandomNumbers]] -deps = ["Random", "Requires"] -git-tree-sha1 = "441e6fc35597524ada7f85e13df1f4e10137d16f" -uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143" -version = "1.4.0" - -[[RangeArrays]] -git-tree-sha1 = "b9039e93773ddcfc828f12aadf7115b4b4d225f5" -uuid = "b3c3ace0-ae52-54e7-9d0b-2c1406fd6b9d" -version = "0.3.2" - -[[Ratios]] -git-tree-sha1 = "37d210f612d70f3f7d57d488cb3b6eff56ad4e41" -uuid = "c84ed2f1-dad5-54f0-aa8e-dbefe2724439" -version = "0.4.0" - -[[RecipesBase]] -git-tree-sha1 = "6ee6c35fe69e79e17c455a386c1ccdc66d9f7da4" -uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" -version = "1.1.0" - -[[RecipesPipeline]] -deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase"] -git-tree-sha1 = "4a325c9bcc2d8e62a8f975b9666d0251d53b63b9" -uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c" -version = "0.1.13" - -[[RecursiveArrayTools]] -deps = ["ArrayInterface", "LinearAlgebra", "RecipesBase", "Requires", "StaticArrays", "Statistics", "ZygoteRules"] -git-tree-sha1 = "47e117a002fc1dbbe905557b333a84126c93671c" -uuid = "731186ca-8d62-57ce-b412-fbd966d074cd" -version = "2.7.0" - -[[RecursiveFactorization]] -deps = ["LinearAlgebra", "LoopVectorization", "VectorizationBase"] -git-tree-sha1 = "4ca0bdad1d69abbd59c35af89a9a2ab6cd5ef0f1" -uuid = "f2c3362d-daeb-58d1-803e-2bc74f2840b4" -version = "0.1.4" - -[[Reexport]] -deps = ["Pkg"] -git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0" -uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "0.2.0" - [[Requires]] deps = ["UUIDs"] git-tree-sha1 = "2fc2e1ab606a5dca7bbad9036a694553c3a57926" uuid = "ae029012-a4dd-5104-9daa-d747884805df" version = "1.0.3" -[[ResettableStacks]] -deps = ["StaticArrays"] -git-tree-sha1 = "d19e9c93de6020a96dbb2820567c78d0ab8f7248" -uuid = "ae5879a3-cd67-5da8-be7f-38c6eb64a37b" -version = "1.0.0" - -[[ReverseDiff]] -deps = ["DiffResults", "DiffRules", "ForwardDiff", "FunctionWrappers", "LinearAlgebra", "MacroTools", "NaNMath", "Random", "SpecialFunctions", "StaticArrays", "Statistics"] -git-tree-sha1 = "97c6f7dc9ef6ca1d5bd3aa0072d804281af5072d" -uuid = "37e2e3b7-166d-5795-8a7a-e32c996b4267" -version = "1.4.3" - -[[Rmath]] -deps = ["Random", "Rmath_jll"] -git-tree-sha1 = "86c5647b565873641538d8f812c04e4c9dbeb370" -uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" -version = "0.6.1" - -[[Rmath_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "d76185aa1f421306dec73c057aa384bad74188f0" -uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" -version = "0.2.2+1" - -[[Roots]] -deps = ["Printf"] -git-tree-sha1 = "1211c7c1928c1ed29cdcef65979b7a791e3b9fbe" -uuid = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" -version = "1.0.5" - [[SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" -[[SIMDPirates]] -deps = ["VectorizationBase"] -git-tree-sha1 = "450d163d3279a1d35e3aad3352a5167ef21b84a4" -uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a" -version = "0.8.25" - -[[SLEEFPirates]] -deps = ["Libdl", "SIMDPirates", "VectorizationBase"] -git-tree-sha1 = "67ae90a18aa8c22bf159318300e765fbd89ddf6e" -uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" -version = "0.5.5" - -[[SafeTestsets]] -deps = ["Test"] -git-tree-sha1 = "36ebc5622c82eb9324005cc75e7e2cc51181d181" -uuid = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" -version = "0.0.1" - -[[ScientificTypes]] -git-tree-sha1 = "3c88d3db0ffed7dabc94aa3d09798f97f1d7316f" -uuid = "321657f4-b219-11e9-178b-2701a2544e81" -version = "1.0.0" - -[[SentinelArrays]] -deps = ["Dates", "Random"] -git-tree-sha1 = "7a74946ace3b34fbb6c10e61b6e250b33d7e758c" -uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" -version = "1.2.15" - [[Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" -[[SharedArrays]] -deps = ["Distributed", "Mmap", "Random", "Serialization"] -uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" - -[[ShiftedArrays]] -git-tree-sha1 = "22395afdcf37d6709a5a0766cc4a5ca52cb85ea0" -uuid = "1277b4bf-5013-50f5-be3d-901d8477a67a" -version = "1.0.0" - -[[Showoff]] -deps = ["Dates"] -git-tree-sha1 = "e032c9df551fb23c9f98ae1064de074111b7bc39" -uuid = "992d4aef-0814-514b-bc4d-f2e9a6c4116f" -version = "0.3.1" - -[[SimpleTraits]] -deps = ["InteractiveUtils", "MacroTools"] -git-tree-sha1 = "daf7aec3fe3acb2131388f93a4c409b8c7f62226" -uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" -version = "0.9.3" - -[[Sobol]] -deps = ["DelimitedFiles", "Random"] -git-tree-sha1 = "c267048df70f47d47d5272fe1a63a66c51c53014" -uuid = "ed01d8cd-4d21-5b2a-85b4-cc3bdc58bad4" -version = "1.4.0" - [[Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" -[[SortingAlgorithms]] -deps = ["DataStructures", "Random", "Test"] -git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd" -uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" -version = "0.3.1" - -[[SparseArrays]] -deps = ["LinearAlgebra", "Random"] -uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - -[[SparseDiffTools]] -deps = ["Adapt", "ArrayInterface", "Compat", "DataStructures", "FiniteDiff", "ForwardDiff", "LightGraphs", "LinearAlgebra", "Requires", "SparseArrays", "VertexSafeGraphs"] -git-tree-sha1 = "69de355cb5e2b9a0e89f383c1762bba5ae70b580" -uuid = "47a9eef4-7e08-11e9-0b38-333d64bd3804" -version = "1.10.0" - -[[SpecialFunctions]] -deps = ["OpenSpecFun_jll"] -git-tree-sha1 = "d8d8b8a9f4119829410ecd706da4cc8594a1e020" -uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "0.10.3" - -[[StaticArrays]] -deps = ["LinearAlgebra", "Random", "Statistics"] -git-tree-sha1 = "016d1e1a00fabc556473b07161da3d39726ded35" -uuid = "90137ffa-7385-5640-81b9-e52037218182" -version = "0.12.4" - -[[Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" - -[[StatsBase]] -deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"] -git-tree-sha1 = "d72a47c47c522e283db774fc8c459dd5ed773710" -uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -version = "0.33.1" - -[[StatsFuns]] -deps = ["Rmath", "SpecialFunctions"] -git-tree-sha1 = "04a5a8e6ab87966b43f247920eab053fd5fdc925" -uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -version = "0.9.5" - -[[StatsModels]] -deps = ["DataAPI", "DataStructures", "Distributions", "LinearAlgebra", "Printf", "ShiftedArrays", "SparseArrays", "StatsBase", "Tables"] -git-tree-sha1 = "c7363985627baf75f3f857988e0218e05c38014a" -uuid = "3eaba693-59b7-5ba5-a881-562e759f1c8d" -version = "0.6.14" - -[[StatsPlots]] -deps = ["Clustering", "DataStructures", "DataValues", "Distributions", "Interpolations", "KernelDensity", "LinearAlgebra", "MultivariateStats", "Observables", "Plots", "RecipesBase", "RecipesPipeline", "Reexport", "StatsBase", "TableOperations", "Tables", "Widgets"] -git-tree-sha1 = "90ee7fe8d026ffdfa2eca8632c4b2af359b8b289" -uuid = "f3b207a7-027a-5e70-b257-86293d7955fd" -version = "0.14.13" - -[[SteadyStateDiffEq]] -deps = ["DiffEqBase", "DiffEqCallbacks", "LinearAlgebra", "NLsolve", "Reexport"] -git-tree-sha1 = "75f258513b7ef8b235876f4cf146577ffd545094" -uuid = "9672c7b4-1e72-59bd-8a11-6ac3964bc41f" -version = "1.5.1" - -[[StochasticDiffEq]] -deps = ["ArrayInterface", "DataStructures", "DiffEqBase", "DiffEqJump", "DiffEqNoiseProcess", "FillArrays", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "Logging", "MuladdMacro", "NLsolve", "OrdinaryDiffEq", "Random", "RandomNumbers", "RecursiveArrayTools", "Reexport", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"] -git-tree-sha1 = "09c24c310da843a3c6b41984089f2a28c301d3b1" -uuid = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0" -version = "6.26.0" - -[[StructArrays]] -deps = ["Adapt", "DataAPI", "Tables"] -git-tree-sha1 = "8099ed9fb90b6e754d6ba8c6ed8670f010eadca0" -uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" -version = "0.4.4" - -[[StructTypes]] -deps = ["Dates", "UUIDs"] -git-tree-sha1 = "1ed04f622a39d2e5a6747c3a70be040c00333933" -uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" -version = "1.1.0" - -[[SuiteSparse]] -deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] -uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" - -[[SuiteSparse_jll]] -deps = ["Libdl", "METIS_jll", "OpenBLAS_jll", "Pkg"] -git-tree-sha1 = "4a2295b63d67e6f13a0b539c935ccbf218fa1143" -uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" -version = "5.4.0+9" - -[[Sundials]] -deps = ["CEnum", "DataStructures", "DiffEqBase", "Libdl", "LinearAlgebra", "Logging", "Reexport", "SparseArrays", "Sundials_jll"] -git-tree-sha1 = "9e7036b5690b264248d99090170f60884105a5e1" -uuid = "c3572dad-4567-51f8-b174-8c6c989267f4" -version = "4.3.0" - -[[Sundials_jll]] -deps = ["CompilerSupportLibraries_jll", "Libdl", "OpenBLAS_jll", "Pkg", "SuiteSparse_jll"] -git-tree-sha1 = "013ff4504fc1d475aa80c63b455b6b3a58767db2" -uuid = "fb77eaff-e24c-56d4-86b1-d163f2edb164" -version = "5.2.0+1" - -[[SymbolicUtils]] -deps = ["AbstractAlgebra", "Combinatorics", "DataStructures", "NaNMath", "SpecialFunctions", "TimerOutputs"] -git-tree-sha1 = "3cd0b83054bd456ac5c8740900ef4d1f830462c0" -uuid = "d1185830-fcd6-423d-90d6-eec64667417b" -version = "0.5.1" - -[[TableOperations]] -deps = ["Tables", "Test"] -git-tree-sha1 = "208630a14884abd110a8f8008b0882f0d0f5632c" -uuid = "ab02a1b2-a7df-11e8-156e-fb1833f50b87" -version = "0.2.1" - [[TableTraits]] deps = ["IteratorInterfaceExtensions"] git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e" @@ -1474,173 +124,25 @@ git-tree-sha1 = "b7f762e9820b7fab47544c36f26f54ac59cf8abf" uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" version = "1.0.5" -[[TerminalLoggers]] -deps = ["LeftChildRightSiblingTrees", "Logging", "Markdown", "Printf", "ProgressLogging", "UUIDs"] -git-tree-sha1 = "cbea752b5eef52a3e1188fb31580c3e4fa0cbc35" -uuid = "5d786b92-1e48-4d6f-9151-6b4477ca9bed" -version = "0.1.2" - [[Test]] deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -[[TimeZones]] -deps = ["Dates", "EzXML", "Mocking", "Pkg", "Printf", "RecipesBase", "Serialization", "Unicode"] -git-tree-sha1 = "338ddbb2b9b50a9a423ba6c3fad1824553535988" -uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53" -version = "1.3.2" - -[[TimerOutputs]] -deps = ["Printf"] -git-tree-sha1 = "f458ca23ff80e46a630922c555d838303e4b9603" -uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -version = "0.5.6" - -[[Tracker]] -deps = ["Adapt", "DiffRules", "ForwardDiff", "LinearAlgebra", "MacroTools", "NNlib", "NaNMath", "Printf", "Random", "Requires", "SpecialFunctions", "Statistics"] -git-tree-sha1 = "5ecb538f7a537377f95fa6cc2690bf208192f35a" -uuid = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" -version = "0.2.11" - -[[TranscodingStreams]] -deps = ["Random", "Test"] -git-tree-sha1 = "7c53c35547de1c5b9d46a4797cf6d8253807108c" -uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" -version = "0.9.5" - -[[TransformVariables]] -deps = ["ArgCheck", "DocStringExtensions", "ForwardDiff", "LinearAlgebra", "MacroTools", "Parameters", "Pkg", "Random"] -git-tree-sha1 = "cd253a2ff93ee97e8d2bc938a3dc71d822d89dd9" -uuid = "84d833dd-6860-57f9-a1a7-6da5db126cff" -version = "0.3.10" - -[[TreeViews]] -deps = ["Test"] -git-tree-sha1 = "8d0d7a3fe2f30d6a7f833a5f19f7c7a5b396eae6" -uuid = "a2a6695c-b41b-5b7d-aed9-dbfdeacea5d7" -version = "0.3.0" - -[[Turing]] -deps = ["AbstractMCMC", "AdvancedHMC", "AdvancedMH", "AdvancedVI", "Bijectors", "Distributions", "DistributionsAD", "DocStringExtensions", "DynamicPPL", "EllipticalSliceSampling", "ForwardDiff", "Libtask", "LinearAlgebra", "LogDensityProblems", "MCMCChains", "NamedArrays", "Printf", "Random", "Reexport", "Requires", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Tracker", "ZygoteRules"] -git-tree-sha1 = "d538335b12cbf32692fe5027cba38b36bd961e7c" -uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" -version = "0.14.3" - [[UUIDs]] deps = ["Random", "SHA"] uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" -[[UnPack]] -git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" -uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" -version = "1.0.2" - [[Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" -[[Unitful]] -deps = ["ConstructionBase", "LinearAlgebra", "Random"] -git-tree-sha1 = "ad27b1a82c81d2bb65fa3a94fa05b98136eefaad" -uuid = "1986cc42-f94f-5a68-af5c-568840ba703d" -version = "1.4.1" - -[[VectorizationBase]] -deps = ["CpuId", "Libdl", "LinearAlgebra"] -git-tree-sha1 = "03e2fbb479a1ea350398195b6fbf439bae0f8260" -uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" -version = "0.12.33" - -[[VersionParsing]] -git-tree-sha1 = "80229be1f670524750d905f8fc8148e5a8c4537f" -uuid = "81def892-9a0e-5fdd-b105-ffc91e053289" -version = "1.2.0" - -[[VertexSafeGraphs]] -deps = ["LightGraphs"] -git-tree-sha1 = "b9b450c99a3ca1cc1c6836f560d8d887bcbe356e" -uuid = "19fa3120-7c27-5ec5-8db8-b0b0aa330d6f" -version = "0.1.2" - [[Weave]] deps = ["Base64", "Dates", "Highlights", "JSON", "Markdown", "Mustache", "Pkg", "Printf", "REPL", "Requires", "Serialization", "YAML"] git-tree-sha1 = "258dc2c65b93710c489dc7c56389fc5fad5e2061" uuid = "44d3d7a6-8a23-5bf8-98c5-b353f8df5ec9" version = "0.10.3" -[[Widgets]] -deps = ["Colors", "Dates", "Observables", "OrderedCollections"] -git-tree-sha1 = "fc0feda91b3fef7fe6948ee09bb628f882b49ca4" -uuid = "cc8bc4a8-27d6-5769-a93b-9d913e69aa62" -version = "0.6.2" - -[[WoodburyMatrices]] -deps = ["LinearAlgebra", "SparseArrays"] -git-tree-sha1 = "28ffe06d28b1ba8fdb2f36ec7bb079fac81bac0d" -uuid = "efce3f68-66dc-5838-9240-27a6d6f5f9b6" -version = "0.5.2" - -[[XML2_jll]] -deps = ["Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"] -git-tree-sha1 = "ecff6bff03b35d482ad5eb51276d6fc4c823cd39" -uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" -version = "2.9.10+2" - [[YAML]] deps = ["Base64", "Dates", "Printf"] git-tree-sha1 = "209c033ada051007a934f7ab4738a4776bc041c3" uuid = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" version = "0.4.2" - -[[ZipFile]] -deps = ["Libdl", "Printf", "Zlib_jll"] -git-tree-sha1 = "254975fef2fc526583bb9b7c9420fe66ffe09f2f" -uuid = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" -version = "0.9.2" - -[[Zlib_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "fdd89e5ab270ea0f2a0174bd9093e557d06d4bfa" -uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.11+16" - -[[Zygote]] -deps = ["AbstractFFTs", "ArrayLayouts", "ChainRules", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "Future", "IRTools", "InteractiveUtils", "LinearAlgebra", "LoopVectorization", "MacroTools", "NNlib", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"] -git-tree-sha1 = "e7b3106f045bd6e526708d1a7821ee9ecc24d094" -uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" -version = "0.5.7" - -[[ZygoteRules]] -deps = ["MacroTools"] -git-tree-sha1 = "b3b4882cc9accf6731a08cc39543fbc6b669dca8" -uuid = "700de1a5-db45-46bc-99cf-38207098b444" -version = "0.2.0" - -[[libass_jll]] -deps = ["Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "Libdl", "Pkg", "Zlib_jll"] -git-tree-sha1 = "f02d0db58888592e98c5f4953cef620ce9274eee" -uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0" -version = "0.14.0+3" - -[[libfdk_aac_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "e17b4513993b4413d31cffd1b36a63625ebbc3d3" -uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280" -version = "0.1.6+3" - -[[libvorbis_jll]] -deps = ["Libdl", "Ogg_jll", "Pkg"] -git-tree-sha1 = "8014e1c1033009edcfe820ec25877a9f1862ba4c" -uuid = "f27f6e37-5d2b-51aa-960f-b287f2bc3b7a" -version = "1.3.6+5" - -[[x264_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "e496625b900df1b02ab0e02fad316b77446616ef" -uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a" -version = "2020.7.14+1" - -[[x265_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "ac7d44fa1639a780d0ae79ca1a5a7f4181131825" -uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76" -version = "3.0.0+2" diff --git a/Project.toml b/Project.toml index 6456cb271..a0fbf9981 100644 --- a/Project.toml +++ b/Project.toml @@ -2,34 +2,11 @@ name = "TuringTutorials" version = "0.1.0" [deps] -Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" -ConjugatePriors = "1624bea9-42b1-5fc1-afd3-e96f729c8d6c" -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" -DiffEqBayes = "ebbdde9d-f333-5424-9be2-dbf1e9acfb5e" -DiffEqSensitivity = "41bf760c-e81c-5289-8e54-58b1f1f8abe2" -DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" -Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" -Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" -GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" -LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" -MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" -MLDataUtils = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d" -NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" -RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" -Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" Weave = "44d3d7a6-8a23-5bf8-98c5-b353f8df5ec9" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] julia = "1" From 578f94105c8082e37033d5fdfc813fc05bddb59f Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Thu, 24 Sep 2020 14:30:38 +0100 Subject: [PATCH 10/12] removed Maniftest.toml --- Manifest.toml | 148 -------------------------------------------------- 1 file changed, 148 deletions(-) delete mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml deleted file mode 100644 index 7a49ca026..000000000 --- a/Manifest.toml +++ /dev/null @@ -1,148 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -[[Base64]] -uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" - -[[DataAPI]] -git-tree-sha1 = "176e23402d80e7743fc26c19c681bfb11246af32" -uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" -version = "1.3.0" - -[[DataValueInterfaces]] -git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" -uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" -version = "1.0.0" - -[[Dates]] -deps = ["Printf"] -uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" - -[[Distributed]] -deps = ["Random", "Serialization", "Sockets"] -uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" - -[[DocStringExtensions]] -deps = ["LibGit2", "Markdown", "Pkg", "Test"] -git-tree-sha1 = "50ddf44c53698f5e784bbebb3f4b21c5807401b1" -uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -version = "0.8.3" - -[[Highlights]] -deps = ["DocStringExtensions", "InteractiveUtils", "REPL"] -git-tree-sha1 = "f823a2d04fb233d52812c8024a6d46d9581904a4" -uuid = "eafb193a-b7ab-5a9e-9068-77385905fa72" -version = "0.4.5" - -[[InteractiveUtils]] -deps = ["Markdown"] -uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" - -[[IteratorInterfaceExtensions]] -git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" -uuid = "82899510-4779-5014-852e-03e436cf321d" -version = "1.0.0" - -[[JSON]] -deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" -uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.1" - -[[LibGit2]] -deps = ["Printf"] -uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" - -[[Libdl]] -uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" - -[[LinearAlgebra]] -deps = ["Libdl"] -uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" - -[[Logging]] -uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" - -[[Markdown]] -deps = ["Base64"] -uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" - -[[Mmap]] -uuid = "a63ad114-7e13-5084-954f-fe012c677804" - -[[Mustache]] -deps = ["Printf", "Tables"] -git-tree-sha1 = "17e60d71d720c33ac2fbac21298ee495bae27587" -uuid = "ffc61752-8dc7-55ee-8c37-f3e9cdd09e70" -version = "1.0.5" - -[[Parsers]] -deps = ["Dates", "Test"] -git-tree-sha1 = "8077624b3c450b15c087944363606a6ba12f925e" -uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "1.0.10" - -[[Pkg]] -deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] -uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" - -[[Printf]] -deps = ["Unicode"] -uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" - -[[REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets"] -uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" - -[[Random]] -deps = ["Serialization"] -uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - -[[Requires]] -deps = ["UUIDs"] -git-tree-sha1 = "2fc2e1ab606a5dca7bbad9036a694553c3a57926" -uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "1.0.3" - -[[SHA]] -uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" - -[[Serialization]] -uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" - -[[Sockets]] -uuid = "6462fe0b-24de-5631-8697-dd941f90decc" - -[[TableTraits]] -deps = ["IteratorInterfaceExtensions"] -git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e" -uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" -version = "1.0.0" - -[[Tables]] -deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"] -git-tree-sha1 = "b7f762e9820b7fab47544c36f26f54ac59cf8abf" -uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" -version = "1.0.5" - -[[Test]] -deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[UUIDs]] -deps = ["Random", "SHA"] -uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" - -[[Unicode]] -uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - -[[Weave]] -deps = ["Base64", "Dates", "Highlights", "JSON", "Markdown", "Mustache", "Pkg", "Printf", "REPL", "Requires", "Serialization", "YAML"] -git-tree-sha1 = "258dc2c65b93710c489dc7c56389fc5fad5e2061" -uuid = "44d3d7a6-8a23-5bf8-98c5-b353f8df5ec9" -version = "0.10.3" - -[[YAML]] -deps = ["Base64", "Dates", "Printf"] -git-tree-sha1 = "209c033ada051007a934f7ab4738a4776bc041c3" -uuid = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" -version = "0.4.2" From 3379a7f0ef60b12859850e7a775106f9c049ae3c Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Fri, 25 Sep 2020 04:51:48 +0100 Subject: [PATCH 11/12] made all tutorials compatible with the new setup, also made sure they work --- Project.toml | 2 +- .../01_bayesian-neural-network.jmd | 109 +---- tutorials/bayesian-deep-learning/Project.toml | 8 + .../01_bayesian-diff-eq.jmd | 427 +----------------- tutorials/differential-equations/Project.toml | 12 + .../01_hidden-markov-model.jmd | 63 +-- tutorials/graphical-models/Project.toml | 5 + tutorials/introduction/01_introduction.jmd | 43 +- .../02_gaussian-mixture-model.jmd | 84 +--- tutorials/introduction/Project.toml | 7 + .../01_infinite-mixture-model.jmd | 121 ++--- tutorials/non-parameteric/Project.toml | 4 + .../regression/01_logistic-regression.jmd | 99 +--- tutorials/regression/02_linear-regression.jmd | 142 +----- .../regression/03_poisson-regression.jmd | 294 +----------- .../04_multinomial-logistic-regression.jmd | 133 +----- tutorials/regression/Project.toml | 14 + .../01_variational-inference.jmd | 44 +- tutorials/variational-inference/Project.toml | 1 - 19 files changed, 199 insertions(+), 1413 deletions(-) create mode 100644 tutorials/bayesian-deep-learning/Project.toml create mode 100644 tutorials/differential-equations/Project.toml create mode 100644 tutorials/graphical-models/Project.toml create mode 100644 tutorials/introduction/Project.toml create mode 100644 tutorials/non-parameteric/Project.toml create mode 100644 tutorials/regression/Project.toml diff --git a/Project.toml b/Project.toml index 72e38e72e..a0fbf9981 100644 --- a/Project.toml +++ b/Project.toml @@ -9,4 +9,4 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Weave = "44d3d7a6-8a23-5bf8-98c5-b353f8df5ec9" [compat] -julia = "1" \ No newline at end of file +julia = "1" diff --git a/tutorials/bayesian-deep-learning/01_bayesian-neural-network.jmd b/tutorials/bayesian-deep-learning/01_bayesian-neural-network.jmd index fc5af42f0..9b82181ca 100644 --- a/tutorials/bayesian-deep-learning/01_bayesian-neural-network.jmd +++ b/tutorials/bayesian-deep-learning/01_bayesian-neural-network.jmd @@ -10,7 +10,7 @@ We will begin with importing the relevant libraries. ```julia # Import libraries. -using Turing, Flux, Plots, Random +using Turing, Flux, Plots, Random, ReverseDiff # Hide sampling progress. Turing.turnprogress(false); @@ -19,17 +19,6 @@ Turing.turnprogress(false); Turing.setadbackend(:reversediff) ``` - ┌ Info: [Turing]: progress logging is disabled globally - └ @ Turing /home/cameron/.julia/packages/Turing/cReBm/src/Turing.jl:22 - - - - - - :reversediff - - - Our goal here is to use a Bayesian neural network to classify points in an artificial dataset. The code below generates data points arranged in a box-like pattern and displays a graph of the dataset we'll be working with. @@ -68,13 +57,6 @@ end plot_data() ``` - - - -![svg](/tutorials/3_BayesNN_files/3_BayesNN_4_0.svg) - - - ## Building a Neural Network The next step is to define a [feedforward neural network](https://en.wikipedia.org/wiki/Feedforward_neural_network) where we express our parameters as distribtuions, and not single points as with traditional neural networks. The two functions below, `unpack` and `nn_forward` are helper functions we need when we specify our model in Turing. @@ -121,7 +103,7 @@ alpha = 0.09 sig = sqrt(1.0 / alpha) # Specify the probabalistic model. -@model bayes_nn(xs, ts) = begin +@model function bayes_nn(xs, ts) # Create the weight and bias vector. nn_params ~ MvNormal(zeros(20), sig .* ones(20)) @@ -138,7 +120,6 @@ end; Inference can now be performed by calling `sample`. We use the `HMC` sampler here. - ```julia # Perform inference. N = 5000 @@ -147,10 +128,9 @@ ch = sample(bayes_nn(hcat(xs...), ts), HMC(0.05, 4), N); Now we extract the weights and biases from the sampled chain. We'll use these primarily in determining how good a classifier our model is. - ```julia # Extract all weight and bias parameters. -theta = ch[:nn_params].value.data; +theta = MCMCChains.group(ch, :nn_params).value; ``` ## Prediction Visualization @@ -163,7 +143,7 @@ We can use [MAP estimation](https://en.wikipedia.org/wiki/Maximum_a_posteriori_e plot_data() # Find the index that provided the highest log posterior in the chain. -_, i = findmax(ch[:lp].value.data) +_, i = findmax(ch[:lp]) # Extract the max row value from i. i = i.I[1] @@ -175,24 +155,16 @@ Z = [nn_forward([x, y], theta[i, :])[1] for x=x_range, y=y_range] contour!(x_range, y_range, Z) ``` - - - -![svg](/tutorials/3_BayesNN_files/3_BayesNN_16_0.svg) - - - The contour plot above shows that the MAP method is not too bad at classifying our data. Now we can visualize our predictions. -\$\$ +$$ p(\tilde{x} | X, \alpha) = \int_{\theta} p(\tilde{x} | \theta) p(\theta | X, \alpha) \approx \sum_{\theta \sim p(\theta | X, \alpha)}f_{\theta}(\tilde{x}) -\$\$ +$$ The `nn_predict` function takes the average predicted value from a network parameterized by weights drawn from the MCMC chain. - ```julia # Return the average predicted value across # multiple weights. @@ -203,7 +175,6 @@ end; Next, we use the `nn_predict` function to predict the value at a sample of points where the `x` and `y` coordinates range between -6 and 6. As we can see below, we still have a satisfactory fit to our data. - ```julia # Plot the average prediction. plot_data() @@ -215,16 +186,8 @@ Z = [nn_predict([x, y], theta, n_end)[1] for x=x_range, y=y_range] contour!(x_range, y_range, Z) ``` - - - -![svg](/tutorials/3_BayesNN_files/3_BayesNN_21_0.svg) - - - If you are interested in how the predictive power of our Bayesian neural network evolved between samples, the following graph displays an animation of the contour plot generated from the network weights in samples 1 to 1,000. - ```julia # Number of iterations to plot. n_end = 500 @@ -232,24 +195,10 @@ n_end = 500 anim = @gif for i=1:n_end plot_data() Z = [nn_forward([x, y], theta[i,:])[1] for x=x_range, y=y_range] - contour!(x_range, y_range, Z, title="Iteration $$i", clim = (0,1)) + contour!(x_range, y_range, Z, title="Iteration $i", clim = (0,1)) end every 5 - - ``` - ┌ Info: Saved animation to - │ fn = /home/cameron/code/TuringTutorials/tmp.gif - └ @ Plots /home/cameron/.julia/packages/Plots/cc8wh/src/animation.jl:98 - - - - - - - - - ## Variational Inference (ADVI) We can also use Turing's variational inference tools to estimate the parameters of this model. See [variational inference](https://turing.ml/dev/docs/for-developers/variational_inference) for more information. @@ -258,6 +207,7 @@ We can also use Turing's variational inference tools to estimate the parameters ```julia using Bijectors using Turing: Variational +using AdvancedVI m = bayes_nn(hcat(xs...), ts); @@ -266,27 +216,20 @@ q = Variational.meanfield(m) μ = randn(length(q)) ω = -1 .* ones(length(q)) -q = Variational.update(q, μ, exp.(ω)); +q = AdvancedVI.update(q, μ, exp.(ω)); -advi = ADVI(10, 1000) +advi = ADVI(10, 5_000) q_hat = vi(m, advi, q); ``` - ┌ Info: [ADVI] Should only be seen once: optimizer created for θ - │ objectid(θ) = 3812708583762184342 - └ @ Turing.Variational /home/cameron/.julia/packages/Turing/cReBm/src/variational/VariationalInference.jl:204 - - - ```julia samples = transpose(rand(q_hat, 5000)) -ch_vi = Chains(reshape(samples, size(samples)..., 1), ["nn_params[$$i]" for i = 1:20]); +ch_vi = Chains(reshape(samples, size(samples)..., 1), string.(MCMCChains.namesingroup(ch, :nn_params))); # Extract all weight and bias parameters. -theta = ch_vi[:nn_params].value.data; +theta = MCMCChains.group(ch_vi, :nn_params).value; ``` - ```julia # Plot the average prediction. plot_data() @@ -298,13 +241,6 @@ Z = [nn_predict([x, y], theta, n_end)[1] for x=x_range, y=y_range] contour!(x_range, y_range, Z) ``` - - - -![svg](/tutorials/3_BayesNN_files/3_BayesNN_28_0.svg) - - - ## Generic Bayesian Neural Networks The below code is intended for use in more general applications, where you need to be able to change the basic network shape fluidly. The code above is highly rigid, and adapting it for other architectures would be time consuming. Currently the code below only supports networks of `Dense` layers. @@ -366,23 +302,11 @@ end end end -# Set the backend. -Turing.setadbackend(:reverse_diff) - # Perform inference. num_samples = 500 ch2 = sample(bayes_nn_general(hcat(xs...), ts, network_shape, num_params), NUTS(0.65), num_samples); ``` - ┌ Warning: `Turing.setadbackend(:reverse_diff)` is deprecated. Please use `Turing.setadbackend(:tracker)` to use `Tracker` or `Turing.setadbackend(:reversediff)` to use `ReverseDiff`. To use `ReverseDiff`, please make sure it is loaded separately with `using ReverseDiff`. - │ caller = setadbackend(::Symbol) at ad.jl:5 - └ @ Turing.Core /home/cameron/.julia/packages/Turing/cReBm/src/core/ad.jl:5 - ┌ Info: Found initial step size - │ ϵ = 0.2 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/cReBm/src/inference/hmc.jl:556 - - - ```julia # This function makes predictions based on network shape. function nn_predict(x, theta, num, network_shape) @@ -390,7 +314,7 @@ function nn_predict(x, theta, num, network_shape) end; # Extract the θ parameters from the sampled chain. -params2 = ch2[:θ].value.data +params2 = MCMCChains.group(ch2, :θ).value plot_data() @@ -400,11 +324,4 @@ Z = [nn_predict([x, y], params2, length(ch2), network_shape)[1] for x=x_range, y contour!(x_range, y_range, Z) ``` - - - -![svg](/tutorials/3_BayesNN_files/3_BayesNN_31_0.svg) - - - This has been an introduction to the applications of Turing and Flux in defining Bayesian neural networks. diff --git a/tutorials/bayesian-deep-learning/Project.toml b/tutorials/bayesian-deep-learning/Project.toml new file mode 100644 index 000000000..df324f01c --- /dev/null +++ b/tutorials/bayesian-deep-learning/Project.toml @@ -0,0 +1,8 @@ +[deps] +AdvancedVI = "b5ca4192-6429-45e5-a2d9-87aec30a685c" +Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" +Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" diff --git a/tutorials/differential-equations/01_bayesian-diff-eq.jmd b/tutorials/differential-equations/01_bayesian-diff-eq.jmd index 7f01456ce..4c70407dd 100644 --- a/tutorials/differential-equations/01_bayesian-diff-eq.jmd +++ b/tutorials/differential-equations/01_bayesian-diff-eq.jmd @@ -25,8 +25,6 @@ $$\frac{dx}{dt} = (\alpha - \beta y)x$$ $$\frac{dy}{dt} = (\delta x - \gamma)y$$ - - ```julia function lotka_volterra(du,u,p,t) x, y = u @@ -41,30 +39,13 @@ sol = solve(prob,Tsit5()) plot(sol) ``` - - - -![svg](/tutorials/10_BayesianDiffEq_files/10_BayesianDiffEq_3_0.svg) - - - We'll generate the data to use for the parameter estimation from simulation. With the `saveat` [argument](https://docs.sciml.ai/latest/basics/common_solver_opts/) we specify that the solution is stored only at `0.1` time units. - ```julia odedata = Array(solve(prob,Tsit5(),saveat=0.1)) ``` - - - - 2×101 Array{Float64,2}: - 1.0 1.03981 1.05332 1.03247 0.972908 … 0.133965 0.148601 0.165247 - 1.0 1.22939 1.52387 1.88714 2.30908 0.476902 0.450153 0.426924 - - - ## Fitting Lotka-Volterra with DiffEqBayes [DiffEqBayes.jl](https://github.com/SciML/DiffEqBayes.jl) is a high level package that set of extension functionality for estimating the parameters of differential equations using Bayesian methods. It allows the choice of using CmdStan.jl, Turing.jl, DynamicHMC.jl and ApproxBayes.jl to perform a Bayesian estimation of a differential equation problem specified via the DifferentialEquations.jl interface. You can read the [docs](https://docs.sciml.ai/latest/analysis/parameter_estimation/#Bayesian-Methods-1) for an understanding of the available functionality. @@ -77,145 +58,18 @@ priors = [truncated(Normal(1.5,0.5),0.5,2.5),truncated(Normal(1.2,0.5),0,2),trun bayesian_result_turing = turing_inference(prob,Tsit5(),t,odedata,priors,num_samples=10_000) ``` - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Info: Found initial step size - │ ϵ = 0.00625 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/GMBTf/src/inference/hmc.jl:629 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - - - - - - Object of type Chains, with data of type 9000×17×1 Array{Float64,3} - - Iterations = 1:9000 - Thinning interval = 1 - Chains = 1 - Samples per chain = 9000 - internals = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth - parameters = theta[1], theta[2], theta[3], theta[4], σ[1] - - 2-element Array{ChainDataFrame,1} - - Summary Statistics - parameters mean std naive_se mcse ess r_hat - ────────── ────── ────── ──────── ────── ───────── ────── - theta[1] 2.3263 0.1073 0.0011 0.0021 2202.3643 1.0000 - theta[2] 1.5434 0.0957 0.0010 0.0019 2575.4033 1.0002 - theta[3] 3.1259 0.1983 0.0021 0.0031 4127.1344 1.0000 - theta[4] 1.8356 0.0827 0.0009 0.0017 2189.2825 1.0000 - σ[1] 0.8569 0.0436 0.0005 0.0005 6856.5421 0.9999 - - Quantiles - parameters 2.5% 25.0% 50.0% 75.0% 97.5% - ────────── ────── ────── ────── ────── ────── - theta[1] 2.1185 2.2428 2.3337 2.4169 2.4916 - theta[2] 1.3655 1.4750 1.5422 1.6075 1.7367 - theta[3] 2.7571 2.9893 3.1166 3.2546 3.5440 - theta[4] 1.6902 1.7708 1.8307 1.9006 1.9868 - σ[1] 0.7755 0.8266 0.8551 0.8847 0.9484 - - - - The estimated parameters are clearly very close to the desired parameter values. We can also check that the chains have converged in the plot. - ```julia plot(bayesian_result_turing) ``` - - - -![svg](/tutorials/10_BayesianDiffEq_files/10_BayesianDiffEq_9_0.svg) - - - ## Direct Handling of Bayesian Estimation with Turing You could want to do some sort of reduction with the differential equation's solution or use it in some other way as well. In those cases DiffEqBayes might not be useful. Turing and DifferentialEquations are completely composable and you can write of the differential equation inside a Turing `@model` and it will just work. We can rewrite the Lotka Volterra parameter estimation problem with a Turing `@model` interface as below - ```julia Turing.setadbackend(:forwarddiff) @@ -239,47 +93,6 @@ model = fitlv(odedata) chain = sample(model, NUTS(.65),10000) ``` - ┌ Info: Found initial step size - │ ϵ = 0.2 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/GMBTf/src/inference/hmc.jl:629 - Sampling: 100%|█████████████████████████████████████████| Time: 0:02:48 - - - - - - Object of type Chains, with data of type 9000×17×1 Array{Float64,3} - - Iterations = 1:9000 - Thinning interval = 1 - Chains = 1 - Samples per chain = 9000 - internals = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth - parameters = α, β, γ, δ, σ - - 2-element Array{ChainDataFrame,1} - - Summary Statistics - parameters mean std naive_se mcse ess r_hat - ────────── ────── ────── ──────── ────── ───────── ────── - α 1.4999 0.0060 0.0001 0.0001 2341.1779 0.9999 - β 0.9999 0.0037 0.0000 0.0001 2440.6968 0.9999 - γ 3.0001 0.0047 0.0000 0.0001 4070.6419 1.0003 - δ 1.0001 0.0032 0.0000 0.0001 2324.4733 0.9999 - σ 0.0151 0.0011 0.0000 0.0000 4591.2728 0.9999 - - Quantiles - parameters 2.5% 25.0% 50.0% 75.0% 97.5% - ────────── ────── ────── ────── ────── ────── - α 1.4881 1.4960 1.4998 1.5038 1.5118 - β 0.9925 0.9975 0.9999 1.0024 1.0074 - γ 2.9911 2.9970 3.0000 3.0032 3.0095 - δ 0.9937 0.9979 1.0001 1.0022 1.0066 - σ 0.0131 0.0143 0.0150 0.0158 0.0173 - - - - ## Scaling to Large Models: Adjoint Sensitivities DifferentialEquations.jl's efficiency for large stiff models has been shown in multiple [benchmarks](https://github.com/SciML/DiffEqBenchmarks.jl). To learn more about how to optimize solving performance for stiff problems you can take a look at the [docs](https://docs.sciml.ai/latest/tutorials/advanced_ode_example/). @@ -297,7 +110,9 @@ All we had to do is switch the AD backend to one of the adjoint-compatible backe ```julia +using Zygote Turing.setadbackend(:zygote) + @model function fitlv(data) σ ~ InverseGamma(2, 3) α ~ truncated(Normal(1.5,0.5),0.5,2.5) @@ -315,50 +130,8 @@ model = fitlv(odedata) chain = sample(model, NUTS(.65),1000) ``` - ┌ Info: Found initial step size - │ ϵ = 0.2 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/GMBTf/src/inference/hmc.jl:629 - Sampling: 100%|█████████████████████████████████████████| Time: 0:10:42 - - - - - - Object of type Chains, with data of type 500×17×1 Array{Float64,3} - - Iterations = 1:500 - Thinning interval = 1 - Chains = 1 - Samples per chain = 500 - internals = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth - parameters = α, β, γ, δ, σ - - 2-element Array{ChainDataFrame,1} - - Summary Statistics - parameters mean std naive_se mcse ess r_hat - ────────── ────── ────── ──────── ────── ──────── ────── - α 1.4997 0.0052 0.0002 0.0003 201.5277 1.0046 - β 0.9999 0.0033 0.0001 0.0001 219.1974 1.0027 - γ 3.0003 0.0047 0.0002 0.0003 290.3332 1.0014 - δ 1.0002 0.0029 0.0001 0.0002 210.0807 1.0046 - σ 0.0151 0.0010 0.0000 0.0001 246.6502 1.0017 - - Quantiles - parameters 2.5% 25.0% 50.0% 75.0% 97.5% - ────────── ────── ────── ────── ────── ────── - α 1.4892 1.4962 1.5002 1.5030 1.5108 - β 0.9934 0.9978 1.0000 1.0019 1.0066 - γ 2.9910 2.9971 3.0002 3.0039 3.0084 - δ 0.9943 0.9983 1.0000 1.0021 1.0060 - σ 0.0131 0.0143 0.0151 0.0158 0.0172 - - - - Now we can exercise control of the sensitivity analysis method that is used by using the `sensealg` keyword argument. Let's choose the `InterpolatingAdjoint` from the available AD [methods](https://docs.sciml.ai/latest/analysis/sensitivity/#Sensitivity-Algorithms-1) and enable a compiled ReverseDiff vector-Jacobian product: - ```julia @model function fitlv(data) σ ~ InverseGamma(2, 3) @@ -377,98 +150,6 @@ model = fitlv(odedata) @time chain = sample(model, NUTS(.65),1000) ``` - ┌ Info: Found initial step size - │ ϵ = 0.2 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/GMBTf/src/inference/hmc.jl:629 - Sampling: 11%|████▍ | ETA: 0:06:27┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 13%|█████▍ | ETA: 0:05:58┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 15%|██████▎ | ETA: 0:05:27┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 21%|████████▌ | ETA: 0:04:20┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 23%|█████████▎ | ETA: 0:04:03┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 24%|██████████ | ETA: 0:03:48┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 28%|███████████▌ | ETA: 0:03:27┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 29%|███████████▊ | ETA: 0:03:24┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 29%|████████████ | ETA: 0:03:20┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 36%|███████████████ | ETA: 0:02:45┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 37%|███████████████▏ | ETA: 0:02:44┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 39%|████████████████ | ETA: 0:02:36┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 46%|██████████████████▉ | ETA: 0:02:08┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 48%|███████████████████▊ | ETA: 0:02:03┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 49%|████████████████████▏ | ETA: 0:02:01┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 50%|████████████████████▎ | ETA: 0:02:00┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 100%|█████████████████████████████████████████| Time: 0:03:32 - - - 225.663919 seconds (1.41 G allocations: 66.216 GiB, 5.25% gc time) - - - - - - Object of type Chains, with data of type 500×17×1 Array{Float64,3} - - Iterations = 1:500 - Thinning interval = 1 - Chains = 1 - Samples per chain = 500 - internals = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth - parameters = α, β, γ, δ, σ - - 2-element Array{ChainDataFrame,1} - - Summary Statistics - parameters mean std naive_se mcse ess r_hat - ────────── ────── ────── ──────── ────── ──────── ────── - α 0.9122 0.2810 0.0126 0.0152 211.4497 0.9992 - β 1.8499 0.1141 0.0051 0.0055 302.7650 1.0018 - γ 2.5879 0.3299 0.0148 0.0228 307.5110 0.9997 - δ 0.1259 0.0221 0.0010 0.0007 219.5371 1.0006 - σ 0.8746 0.0437 0.0020 0.0017 342.6660 1.0008 - - Quantiles - parameters 2.5% 25.0% 50.0% 75.0% 97.5% - ────────── ────── ────── ────── ────── ────── - α 0.5060 0.6920 0.8932 1.0874 1.5467 - β 1.5810 1.7796 1.8709 1.9437 1.9873 - γ 1.9519 2.3707 2.5999 2.8158 3.1966 - δ 0.0843 0.1103 0.1245 0.1410 0.1704 - σ 0.7984 0.8444 0.8722 0.9044 0.9651 - - - - For more examples of adjoint usage on large parameter models, consult the [DiffEqFlux documentation](https://diffeqflux.sciml.ai/dev/) ## Including Process Noise: Estimation of Stochastic Differential Equations @@ -483,7 +164,6 @@ $$dx = (\alpha - \beta y)xdt + \phi_1 xdW_1$$ $$dy = (\delta x - \gamma)ydt + \phi_2 ydW_2$$ - ```julia function lotka_volterra_noise(du,u,p,t) du[1] = p[5]*u[1] @@ -493,18 +173,8 @@ p = [1.5, 1.0, 3.0, 1.0, 0.3, 0.3] prob = SDEProblem(lotka_volterra,lotka_volterra_noise,u0,(0.0,10.0),p) ``` - - - - SDEProblem with uType Array{Float64,1} and tType Float64. In-place: true - timespan: (0.0, 10.0) - u0: [1.0, 1.0] - - - Solving it repeatedly confirms the randomness of the solution - ```julia sol = solve(prob,saveat=0.01) p1 = plot(sol) @@ -515,63 +185,29 @@ p3 = plot(sol) plot(p1,p2,p3) ``` - - - -![svg](/tutorials/10_BayesianDiffEq_files/10_BayesianDiffEq_23_0.svg) - - - With the `MonteCarloSummary` it is easy to summarize the results from multiple runs through the `EnsembleProblem` interface, here we run the problem for 1000 `trajectories` and visualize the summary: - ```julia sol = solve(EnsembleProblem(prob),SRIW1(),saveat=0.1,trajectories=500) summ = MonteCarloSummary(sol) plot(summ) ``` - - - -![svg](/tutorials/10_BayesianDiffEq_files/10_BayesianDiffEq_25_0.svg) - - - Get data from the means to fit: - ```julia using DiffEqBase.EnsembleAnalysis averagedata = Array(timeseries_steps_mean(sol)) ``` - - - - 2×101 Array{Float64,2}: - 1.0 1.04218 1.05885 1.03187 0.967422 … 0.190811 0.197071 0.203714 - 1.0 1.22803 1.5283 1.89036 2.30967 1.16424 1.11006 1.07984 - - - Now fit the means with Turing. We will utilize multithreading with the [`EnsembleProblem`](https://docs.sciml.ai/stable/tutorials/sde_example/#Ensemble-Simulations-1) interface to speed up the SDE parameter estimation. - ```julia Threads.nthreads() ``` - - - - 16 - - - - ```julia Turing.setadbackend(:forwarddiff) @@ -596,63 +232,4 @@ end; model = fitlv(averagedata) chain = sample(model, NUTS(.65),500) -``` - - ┌ Info: Found initial step size - │ ϵ = 0.2 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/GMBTf/src/inference/hmc.jl:629 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 0%|▏ | ETA: 0:03:49┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - Sampling: 100%|█████████████████████████████████████████| Time: 2:33:35 - - - - - - Object of type Chains, with data of type 250×19×1 Array{Float64,3} - - Iterations = 1:250 - Thinning interval = 1 - Chains = 1 - Samples per chain = 250 - internals = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth - parameters = α, β, γ, δ, σ, ϕ1, ϕ2 - - 2-element Array{ChainDataFrame,1} - - Summary Statistics - parameters mean std naive_se mcse ess r_hat - ────────── ────── ────── ──────── ────── ────── ────── - α 1.6255 0.0000 0.0000 0.0000 2.0325 2.5501 - β 1.1163 0.0000 0.0000 0.0000 2.0325 Inf - γ 3.2056 0.0000 0.0000 0.0000 2.0325 0.9960 - δ 0.9268 0.0000 0.0000 0.0000 2.0325 2.9880 - σ 0.0669 0.0000 0.0000 0.0000 2.0325 1.1011 - ϕ1 0.2329 0.0000 0.0000 0.0000 2.0325 3.2549 - ϕ2 0.2531 0.0000 0.0000 0.0000 2.0325 0.9960 - - Quantiles - parameters 2.5% 25.0% 50.0% 75.0% 97.5% - ────────── ────── ────── ────── ────── ────── - α 1.6255 1.6255 1.6255 1.6255 1.6255 - β 1.1163 1.1163 1.1163 1.1163 1.1163 - γ 3.2056 3.2056 3.2056 3.2056 3.2056 - δ 0.9268 0.9268 0.9268 0.9268 0.9268 - σ 0.0669 0.0669 0.0669 0.0669 0.0669 - ϕ1 0.2329 0.2329 0.2329 0.2329 0.2329 - ϕ2 0.2531 0.2531 0.2531 0.2531 0.2531 - - - - - -```julia - ``` diff --git a/tutorials/differential-equations/Project.toml b/tutorials/differential-equations/Project.toml new file mode 100644 index 000000000..4330259fe --- /dev/null +++ b/tutorials/differential-equations/Project.toml @@ -0,0 +1,12 @@ +[deps] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" +DiffEqBayes = "ebbdde9d-f333-5424-9be2-dbf1e9acfb5e" +DiffEqSensitivity = "41bf760c-e81c-5289-8e54-58b1f1f8abe2" +DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" +Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" diff --git a/tutorials/graphical-models/01_hidden-markov-model.jmd b/tutorials/graphical-models/01_hidden-markov-model.jmd index 2dc5f9057..bbe0bccd6 100644 --- a/tutorials/graphical-models/01_hidden-markov-model.jmd +++ b/tutorials/graphical-models/01_hidden-markov-model.jmd @@ -36,13 +36,6 @@ N = length(y); K = 3; plot(y, xlim = (0,15), ylim = (-1,5), size = (500, 250)) ``` - - - -![svg](/tutorials/4_BayesHmm_files/4_BayesHmm_3_0.svg) - - - We can see that we have three states, one for each height of the plot (1, 2, 3). This height is also our emission parameter, so state one produces a value of one, state two produces a value of two, and so on. Ultimately, we would like to understand three major parameters: @@ -53,9 +46,9 @@ Ultimately, we would like to understand three major parameters: With this in mind, let's set up our model. We are going to use some of our knowledge as modelers to provide additional information about our system. This takes the form of the prior on our emission parameter. -\$\$ -m_i \sim Normal(i, 0.5), \space m = \{1,2,3\} -\$\$ +$$ +m_i \sim \mathrm{Normal}(i, 0.5) \quad \text{where} \quad m = \{1,2,3\} +$$ Simply put, this says that we expect state one to emit values in a Normally distributed manner, where the mean of each state's emissions is that state's value. The variance of 0.5 helps the model converge more quickly — consider the case where we have a variance of 1 or 2. In this case, the likelihood of observing a 2 when we are in state 1 is actually quite high, as it is within a standard deviation of the true emission value. Applying the prior that we are likely to be tightly centered around the mean prevents our model from being too confused about the state that is generating our observations. @@ -120,8 +113,8 @@ The code below generates an animation showing the graph of the data above, and t using StatsPlots # Extract our m and s parameters from the chain. -m_set = c[:m].value.data -s_set = c[:s].value.data +m_set = MCMCChains.group(c, :m).value +s_set = MCMCChains.group(c, :s).value # Iterate through the MCMC samples. Ns = 1:length(c) @@ -130,7 +123,7 @@ Ns = 1:length(c) animation = @animate for i in Ns m = m_set[i, :]; s = Int.(s_set[i,:]); - emissions = collect(skipmissing(m[s])) + emissions = m[s] p = plot(y, c = :red, size = (500, 250), @@ -139,60 +132,32 @@ animation = @animate for i in Ns legend = :topright, label = "True data", xlim = (0,15), ylim = (-1,5)); - plot!(emissions, color = :blue, label = "Sample $$N") -end every 10; + plot!(emissions, color = :blue, label = "Sample $N") +end every 10 ``` -![animation](https://user-images.githubusercontent.com/422990/50612436-de588980-0e8e-11e9-8635-4e3e97c0d7f9.gif) - Looks like our model did a pretty good job, but we should also check to make sure our chain converges. A quick check is to examine whether the diagonal (representing the probability of remaining in the current state) of the transition matrix appears to be stationary. The code below extracts the diagonal and shows a traceplot of each persistence probability. ```julia # Index the chain with the persistence probabilities. -subchain = c[:,["T[$$i][$$i]" for i in 1:K],:] +subchain = MCMCChains.group(c, :T) +# TODO: This doesn't work anymore. Note sure what it was originally doing # Plot the chain. -plot(subchain, +plot( + subchain, colordim = :parameter, seriestype=:traceplot, title = "Persistence Probability", legend=:right - ) +) ``` - - - -![svg](/tutorials/4_BayesHmm_files/4_BayesHmm_11_0.svg) - - - A cursory examination of the traceplot above indicates that at least `T[3,3]` and possibly `T[2,2]` have converged to something resembling stationary. `T[1,1]`, on the other hand, has a slight "wobble", and seems less consistent than the others. We can use the diagnostic functions provided by [MCMCChain](https://github.com/TuringLang/MCMCChain.jl) to engage in some formal tests, like the Heidelberg and Welch diagnostic: - ```julia -heideldiag(c[:T]) +heideldiag(MCMCChains.group(c, :T)) ``` - - - - 1-element Array{ChainDataFrame{NamedTuple{(:parameters, Symbol("Burn-in"), :Stationarity, Symbol("p-value"), :Mean, :Halfwidth, :Test),Tuple{Array{String,1},Array{Float64,1},Array{Float64,1},Array{Float64,1},Array{Float64,1},Array{Float64,1},Array{Float64,1}}}},1}: - Heidelberger and Welch Diagnostic - Chain 1 - parameters Burn-in Stationarity p-value Mean Halfwidth Test - ────────── ─────── ──────────── ─────── ────── ───────── ────── - T[1][1] 50.0000 0.0000 0.0001 0.5329 0.0063 1.0000 - T[1][2] 50.0000 0.0000 0.0189 0.1291 0.0043 1.0000 - T[1][3] 50.0000 0.0000 0.0230 0.3381 0.0032 1.0000 - T[2][1] 30.0000 1.0000 0.2757 0.0037 0.0000 1.0000 - T[2][2] 0.0000 1.0000 0.1689 0.0707 0.0022 1.0000 - T[2][3] 0.0000 1.0000 0.1365 0.9255 0.0022 1.0000 - T[3][1] 50.0000 0.0000 0.0454 0.4177 0.0147 1.0000 - T[3][2] 40.0000 1.0000 0.0909 0.2549 0.0080 1.0000 - T[3][3] 50.0000 0.0000 0.0098 0.3274 0.0067 1.0000 - - - - The p-values on the test suggest that we cannot reject the hypothesis that the observed sequence comes from a stationary distribution, so we can be somewhat more confident that our transition matrix has converged to something reasonable. diff --git a/tutorials/graphical-models/Project.toml b/tutorials/graphical-models/Project.toml new file mode 100644 index 000000000..2704e9aad --- /dev/null +++ b/tutorials/graphical-models/Project.toml @@ -0,0 +1,5 @@ +[deps] +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" +Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" diff --git a/tutorials/introduction/01_introduction.jmd b/tutorials/introduction/01_introduction.jmd index 0fec4556d..452ff0a42 100644 --- a/tutorials/introduction/01_introduction.jmd +++ b/tutorials/introduction/01_introduction.jmd @@ -53,18 +53,6 @@ data = rand(Bernoulli(p_true), last(Ns)) data[1:5] ``` - - - - 5-element Array{Bool,1}: - 1 - 0 - 1 - 1 - 0 - - - After flipping all our coins, we want to set a prior belief about what we think the distribution of coin flips look like. In this case, we are going to choose a common prior distribution called the [Beta](https://en.wikipedia.org/wiki/Beta_distribution) distribution. @@ -81,11 +69,11 @@ For the mathematically inclined, the `Beta` distribution is updated by adding ea This works because mean of the `Beta` distribution is defined as the following: -\$\$ \text{E}[\text{Beta}] = \dfrac{\alpha}{\alpha+\beta} \$\$ +$$\text{E}[\text{Beta}] = \dfrac{\alpha}{\alpha+\beta}$$ Which is 0.5 when $$\alpha = \beta$$, as we expect for a large enough number of coin flips. As we increase the number of samples, our variance will also decrease, such that the distribution will reflect less uncertainty about the probability of receiving a heads. The definition of the variance for the `Beta` distribution is the following: -\$\$ \text{var}[\text{Beta}] = \dfrac{\alpha\beta}{(\alpha + \beta)^2 (\alpha + \beta + 1)} \$\$ +$$\text{var}[\text{Beta}] = \dfrac{\alpha\beta}{(\alpha + \beta)^2 (\alpha + \beta + 1)}$$ The intuition about this definition is that the variance of the distribution will approach 0 with more and more samples, as the denominator will grow faster than will the numerator. More samples means less variance. @@ -107,23 +95,16 @@ animation = @gif for (i, N) in enumerate(Ns) # Plotting plot(updated_belief, size = (500, 250), - title = "Updated belief after $$N observations", + title = "Updated belief after $N observations", xlabel = "probability of heads", ylabel = "", legend = nothing, xlim = (0,1), fill=0, α=0.3, w=3) vline!([p_true]) -end; +end ``` - ┌ Info: Saved animation to - │ fn = /home/cameron/code/TuringTutorials/tmp.gif - └ @ Plots /home/cameron/.julia/packages/Plots/Xnzc7/src/animation.jl:104 - - -![animation](https://user-images.githubusercontent.com/7974003/44995702-37c1b200-af9c-11e8-8b26-c88a528956af.gif) - The animation above shows that with increasing evidence our belief about the probability of heads in a coin flip slowly adjusts towards the true value. The orange line in the animation represents the true probability of seeing heads on a single coin flip, while the mode of the distribution shows what the model believes the probability of a heads is given the evidence it has seen. ### Coin Flipping With Turing @@ -151,7 +132,7 @@ First, we define the coin-flip model using Turing. @model coinflip(y) = begin # Our prior belief about the probability of heads in a coin. - p ~ Beta(1, 1) + p ~ Beta(1, 1) # The number of observations. N = length(y) @@ -184,13 +165,6 @@ p_summary = chain[:p] plot(p_summary, seriestype = :histogram) ``` - - - -![svg](/tutorials/0_Introduction_files/0_Introduction_21_0.svg) - - - Now we can build our plot: @@ -212,11 +186,4 @@ plot!(p, range(0, stop = 1, length = 100), pdf.(Ref(updated_belief), range(0, st vline!(p, [p_true], label = "True probability", c = :red) ``` - - - -![svg](/tutorials/0_Introduction_files/0_Introduction_23_0.svg) - - - As we can see, the Turing model closely approximates the true probability. Hopefully this tutorial has provided an easy-to-follow, yet informative introduction to Turing's simpler applications. More advanced usage will be demonstrated in later tutorials. diff --git a/tutorials/introduction/02_gaussian-mixture-model.jmd b/tutorials/introduction/02_gaussian-mixture-model.jmd index 4ed0c4110..e444183ba 100644 --- a/tutorials/introduction/02_gaussian-mixture-model.jmd +++ b/tutorials/introduction/02_gaussian-mixture-model.jmd @@ -27,31 +27,24 @@ x = mapreduce(c -> rand(MvNormal([μs[c], μs[c]], 1.), N), hcat, 1:2) scatter(x[1,:], x[2,:], legend = false, title = "Synthetic Dataset") ``` - - - -![svg](/tutorials/1_GaussianMixtureModel_files/1_GaussianMixtureModel_2_0.svg) - - - ## Gaussian Mixture Model in Turing To cluster the data points shown above, we use a model that consists of two mixture components (clusters) and assigns each datum to one of the components. The assignment thereof determines the distribution that the data point is generated from. In particular, in a Bayesian Gaussian mixture model with $$1 \leq k \leq K$$ components for 1-D data each data point $$x_i$$ with $$1 \leq i \leq N$$ is generated according to the following generative process. First we draw the parameters for each cluster, i.e. in our example we draw location of the distributions from a Normal: -\$\$ -\mu_k \sim Normal() \, , \; \forall k \\ -\$\$ +$$ +\mu_k \sim \mathrm{Normal}() \, , \; \forall k +$$ and then draw mixing weight for the $$K$$ clusters from a Dirichlet distribution, i.e. -\$\$ - w \sim Dirichlet(K, \alpha) \, . \\ -\$\$ +$$ + w \sim \mathrm{Dirichlet}(K, \alpha) \, . +$$ After having constructed all the necessary model parameters, we can generate an observation by first selecting one of the clusters and then drawing the datum accordingly, i.e. -\$\$ - z_i \sim Categorical(w) \, , \; \forall i \\ - x_i \sim Normal(\mu_{z_i}, 1.) \, , \; \forall i -\$\$ +$$ + z_i \sim \mathrm{Categorical}(w) \, , \; \forall i \\ + x_i \sim \mathrm{Normal}(\mu_{z_i}, 1.) \, , \; \forall i +$$ For more details on Gaussian mixture models, we refer to Christopher M. Bishop, *Pattern Recognition and Machine Learning*, Section 9. @@ -60,21 +53,9 @@ For more details on Gaussian mixture models, we refer to Christopher M. Bishop, using Turing, MCMCChains # Turn off the progress monitor. -Turing.turnprogress(false) +Turing.turnprogress(false); ``` - ┌ Info: [Turing]: progress logging is disabled globally - └ @ Turing /home/cameron/.julia/packages/Turing/cReBm/src/Turing.jl:22 - - - - - - false - - - - ```julia @model GaussianMixtureModel(x) = begin @@ -107,13 +88,6 @@ Turing.turnprogress(false) end ``` - - - - ##GaussianMixtureModel#361 (generic function with 2 methods) - - - After having specified the model in Turing, we can construct the model function and run a MCMC simulation to obtain assignments of the data points. @@ -139,17 +113,10 @@ In particular, in this example we consider the sample values of the location par ```julia -ids = findall(map(name -> occursin("μ", name), names(tchain))); +ids = findall(map(name -> occursin("μ", string(name)), names(tchain))); p=plot(tchain[:, ids, :], legend=true, labels = ["Mu 1" "Mu 2"], colordim=:parameter) ``` - - - -![svg](/tutorials/1_GaussianMixtureModel_files/1_GaussianMixtureModel_13_0.svg) - - - You'll note here that it appears the location means are switching between chains. We will address this in future tutorials. For those who are keenly interested, see [this](https://mc-stan.org/users/documentation/case-studies/identifying_mixture_models.html) article on potential solutions. For the moment, we will just use the first chain to ensure the validity of our inference. @@ -173,44 +140,23 @@ function predict(x, y, w, μ) end ``` - - - - predict (generic function with 1 method) - - - - ```julia contour(range(-5, stop = 3), range(-6, stop = 2), - (x, y) -> predict(x, y, [0.5, 0.5], [mean(tchain[:μ1].value), mean(tchain[:μ2].value)]) + (x, y) -> predict(x, y, [0.5, 0.5], [mean(tchain[:μ1]), mean(tchain[:μ2])]) ) scatter!(x[1,:], x[2,:], legend = false, title = "Synthetic Dataset") ``` - - - -![svg](/tutorials/1_GaussianMixtureModel_files/1_GaussianMixtureModel_18_0.svg) - - - ## Infered Assignments Finally, we can inspect the assignments of the data points infered using Turing. As we can see, the dataset is partitioned into two distinct groups. ```julia -assignments = collect(skipmissing(mean(tchain[:k].value, dims=1).data)) +# TODO: is there a better way than this icky `.nt.mean` stuff? +assignments = mean(MCMCChains.group(tchain, :k)).nt.mean scatter(x[1,:], x[2,:], legend = false, title = "Assignments on Synthetic Dataset", zcolor = assignments) ``` - - - - -![svg](/tutorials/1_GaussianMixtureModel_files/1_GaussianMixtureModel_21_0.svg) - - diff --git a/tutorials/introduction/Project.toml b/tutorials/introduction/Project.toml new file mode 100644 index 000000000..c3d3e22a9 --- /dev/null +++ b/tutorials/introduction/Project.toml @@ -0,0 +1,7 @@ +[deps] +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" +Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" diff --git a/tutorials/non-parameteric/01_infinite-mixture-model.jmd b/tutorials/non-parameteric/01_infinite-mixture-model.jmd index 310cad70a..5ac4ef9d8 100644 --- a/tutorials/non-parameteric/01_infinite-mixture-model.jmd +++ b/tutorials/non-parameteric/01_infinite-mixture-model.jmd @@ -23,12 +23,12 @@ The generative process of such a model can be written as: $$ \begin{align} -\pi_1 &\sim Beta(a, b) \\ +\pi_1 &\sim \mathrm{Beta}(a, b) \\ \pi_2 &= 1-\pi_1 \\ -\mu_1 &\sim Normal(\mu_0, \Sigma_0) \\ -\mu_2 &\sim Normal(\mu_0, \Sigma_0) \\ -z_i &\sim Categorical(\pi_1, \pi_2) \\ -x_i &\sim Normal(\mu_{z_i}, \Sigma) +\mu_1 &\sim \mathrm{Normal}(\mu_0, \Sigma_0) \\ +\mu_2 &\sim \mathrm{Normal}(\mu_0, \Sigma_0) \\ +z_i &\sim \mathrm{Categorical}(\pi_1, \pi_2) \\ +x_i &\sim \mathrm{Normal}(\mu_{z_i}, \Sigma) \end{align} $$ @@ -36,41 +36,32 @@ where $$\pi_1, \pi_2$$ are the mixing weights of the mixture model, i.e. $$\pi_1 We can implement this model in Turing for 1D data as follows: - ```julia -@model two_model(x) = begin - +@model function two_model(x) # Hyper-parameters μ0 = 0.0 σ0 = 1.0 # Draw weights. - π1 ~ Beta(1,1) + π1 ~ Beta(1,1) π2 = 1-π1 # Draw locations of the components. - μ1 ~ Normal(μ0, σ0) - μ2 ~ Normal(μ0, σ0) + μ1 ~ Normal(μ0, σ0) + μ2 ~ Normal(μ0, σ0) # Draw latent assignment. - z ~ Categorical([π1, π2]) + z ~ Categorical([π1, π2]) # Draw observation from selected component. if z == 1 - x ~ Normal(μ1, 1.0) + x ~ Normal(μ1, 1.0) else - x ~ Normal(μ2, 1.0) + x ~ Normal(μ2, 1.0) end end ``` - - - - DynamicPPL.ModelGen{var"###generator#282",(:x,),(),Tuple{}}(##generator#282, NamedTuple()) - - - #### Finite Mixture Model If we have more than two components, this model can elegantly be extend using a Dirichlet distribution as prior for the mixing weights $$\pi_1, \dots, \pi_K$$. Note that the Dirichlet distribution is the multivariate generalization of the beta distribution. The resulting model can be written as: @@ -78,9 +69,9 @@ If we have more than two components, this model can elegantly be extend using a $$ \begin{align} (\pi_1, \dots, \pi_K) &\sim Dirichlet(K, \alpha) \\ -\mu_k &\sim Normal(\mu_0, \Sigma_0), \;\; \forall k \\ +\mu_k &\sim \mathrm{Normal}(\mu_0, \Sigma_0), \;\; \forall k \\ z &\sim Categorical(\pi_1, \dots, \pi_K) \\ -x &\sim Normal(\mu_z, \Sigma) +x &\sim \mathrm{Normal}(\mu_z, \Sigma) \end{align} $$ @@ -101,9 +92,9 @@ We now will utilize the fact that one can integrate out the mixing weights in a In fact, if the mixing weights are integrated out, the conditional prior for the latent variable $$z$$ is given by: -\$\$ -p(z_i = k \mid z_{\not i}, \alpha) = \frac{n_k + \alpha/K}{N - 1 + \alpha} -\$\$ +$$ +p(z_i = k \mid z_{\not i}, \alpha) = \frac{n_k + \alpha K}{N - 1 + \alpha} +$$ where $$z_{\not i}$$ are the latent assignments of all observations except observation $$i$$. Note that we use $$n_k$$ to denote the number of observations at component $$k$$ excluding observation $$i$$. The parameter $$\alpha$$ is the concentration parameter of the Dirichlet distribution used as prior over the mixing weights. @@ -113,15 +104,15 @@ To obtain the Chinese restaurant process construction, we can now derive the con For $$n_k > 0$$ we obtain: -\$\$ -p(z_i = k \mid z_{\not i}, \alpha) = \frac{n_k}{N - 1 + \alpha} -\$\$ +$$ +p(z_i = k \mid z_{\not i}, \alpha) = \frac{n_k}{N - 1 + \alpha} +$$ and for all infinitely many clusters that are empty (combined) we get: -\$\$ +$$ p(z_i = k \mid z_{\not i}, \alpha) = \frac{\alpha}{N - 1 + \alpha} -\$\$ +$$ Those equations show that the conditional prior for component assignments is proportional to the number of such observations, meaning that the Chinese restaurant process has a rich get richer property. @@ -159,21 +150,14 @@ using Plots # Plot the cluster assignments over time @gif for i in 1:Nmax scatter(collect(1:i), z[1:i], markersize = 2, xlabel = "observation (i)", ylabel = "cluster (k)", legend = false) -end; +end ``` - ┌ Info: Saved animation to - │ fn = /home/cameron/code/TuringTutorials/tmp.gif - └ @ Plots /home/cameron/.julia/packages/Plots/Xnzc7/src/animation.jl:104 - - -![tmp](https://user-images.githubusercontent.com/422990/55284032-90cfa980-5323-11e9-8a99-f9315db170cb.gif) - Further, we can see that the number of clusters is logarithmic in the number of observations and data points. This is a side-effect of the "rich-get-richer" phenomenon, i.e. we expect large clusters and thus the number of clusters has to be smaller than the number of observations. -\$\$ -E[K \mid N] \approx \alpha \cdot log \big(1 + \frac{N}{\alpha}\big) -\$\$ +$$ +\mathbb{E}[K \mid N] \approx \alpha \cdot log \big(1 + \frac{N}{\alpha}\big) +$$ We can see from the equation that the concentration parameter $$\alpha$$ allows us to control the number of clusters formed *a priori*. @@ -181,8 +165,7 @@ In Turing we can implement an infinite Gaussian mixture model using the Chinese ```julia -@model infiniteGMM(x) = begin - +@model function infiniteGMM(x) # Hyper-parameters, i.e. concentration parameter and parameters of H. α = 1.0 μ0 = 0.0 @@ -207,14 +190,14 @@ In Turing we can implement an infinite Gaussian mixture model using the Chinese nk = Vector{Int}(map(k -> sum(z .== k), 1:K)) # Draw the latent assignment. - z[i] ~ ChineseRestaurantProcess(rpm, nk) + z[i] ~ ChineseRestaurantProcess(rpm, nk) # Create a new cluster? if z[i] > K push!(μ, 0.0) # Draw location of new cluster. - μ[z[i]] ~ H + μ[z[i]] ~ H end # Draw observation. @@ -223,13 +206,6 @@ In Turing we can implement an infinite Gaussian mixture model using the Chinese end ``` - - - - DynamicPPL.ModelGen{var"###generator#800",(:x,),(),Tuple{}}(##generator#800, NamedTuple()) - - - We can now use Turing to infer the assignments of some data points. First, we will create some random data that comes from three clusters, with means of 0, -5, and 10. @@ -254,60 +230,45 @@ model_fun = infiniteGMM(data); chain = sample(model_fun, SMC(), iterations); ``` - Sampling: 100%|█████████████████████████████████████████| Time: 0:00:00 - - Finally, we can plot the number of clusters in each sample. ```julia # Extract the number of clusters for each sample of the Markov chain. -k = map(t -> length(unique(chain[:z].value[t,:,:])), 1:iterations); +k = map( + t -> length(unique(vec(chain[t, MCMCChains.namesingroup(chain, :z), :].value))), + 1:iterations +); # Visualize the number of clusters. plot(k, xlabel = "Iteration", ylabel = "Number of clusters", label = "Chain 1") ``` - - - -![svg](/tutorials/6_InfiniteMixtureModel_files/6_InfiniteMixtureModel_31_0.svg) - - - If we visualize the histogram of the number of clusters sampled from our posterior, we observe that the model seems to prefer 3 clusters, which is the true number of clusters. Note that the number of clusters in a Dirichlet process mixture model is not limited a priori and will grow to infinity with probability one. However, if conditioned on data the posterior will concentrate on a finite number of clusters enforcing the resulting model to have a finite amount of clusters. It is, however, not given that the posterior of a Dirichlet process Gaussian mixture model converges to the true number of clusters, given that data comes from a finite mixture model. See Jeffrey Miller and Matthew Harrison: [A simple example of Dirichlet process mixture inconsitency for the number of components](https://arxiv.org/pdf/1301.2708.pdf) for details. - ```julia histogram(k, xlabel = "Number of clusters", legend = false) ``` - - - -![svg](/tutorials/6_InfiniteMixtureModel_files/6_InfiniteMixtureModel_33_0.svg) - - - One issue with the Chinese restaurant process construction is that the number of latent parameters we need to sample scales with the number of observations. It may be desirable to use alternative constructions in certain cases. Alternative methods of constructing a Dirichlet process can be employed via the following representations: Size-Biased Sampling Process -\$\$ -j_k \sim Beta(1, \alpha) * surplus -\$\$ +$$ +j_k \sim \mathrm{Beta}(1, \alpha) \cdot \mathrm{surplus} +$$ Stick-Breaking Process -\$\$ -v_k \sim Beta(1, \alpha) -\$\$ +$$ +v_k \sim \mathrm{Beta}(1, \alpha) +$$ Chinese Restaurant Process -\$\$ +$$ p(z_n = k | z_{1:n-1}) \propto \begin{cases} \frac{m_k}{n-1+\alpha}, \text{ if } m_k > 0\\\ \frac{\alpha}{n-1+\alpha} \end{cases} -\$\$ +$$ For more details see [this article](https://www.stats.ox.ac.uk/~teh/research/npbayes/Teh2010a.pdf). diff --git a/tutorials/non-parameteric/Project.toml b/tutorials/non-parameteric/Project.toml new file mode 100644 index 000000000..dc3ff2545 --- /dev/null +++ b/tutorials/non-parameteric/Project.toml @@ -0,0 +1,4 @@ +[deps] +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" diff --git a/tutorials/regression/01_logistic-regression.jmd b/tutorials/regression/01_logistic-regression.jmd index 037ec08b2..7e4544379 100644 --- a/tutorials/regression/01_logistic-regression.jmd +++ b/tutorials/regression/01_logistic-regression.jmd @@ -33,17 +33,6 @@ Random.seed!(0); Turing.turnprogress(false) ``` - ┌ Info: [Turing]: progress logging is disabled globally - └ @ Turing /home/cameron/.julia/packages/Turing/cReBm/src/Turing.jl:22 - - - - - - false - - - ## Data Cleaning & Set Up Now we're going to import our dataset. The first six rows of the dataset are shown below so you can get a good feel for what kind of data we have. @@ -57,13 +46,6 @@ data = RDatasets.dataset("ISLR", "Default"); first(data, 6) ``` - - - -

6 rows × 4 columns

DefaultStudentBalanceIncome
Categorical…Categorical…Float64Float64
1NoNo729.52644361.6
2NoYes817.1812106.1
3NoNo1073.5531767.1
4NoNo529.25135704.5
5NoNo785.65638463.5
6NoYes919.5897491.56
- - - Most machine learning processes require some effort to tidy up the data, and this is no different. We need to convert the `Default` and `Student` columns, which say "Yes" or "No" into 1s and 0s. Afterwards, we'll get rid of the old words-based columns. @@ -78,14 +60,6 @@ select!(data, Not([:Default, :Student])) # Show the first six rows of our edited dataset. first(data, 6) ``` - - - - -

6 rows × 4 columns

BalanceIncomeDefaultNumStudentNum
Float64Float64Float64Float64
1729.52644361.60.00.0
2817.1812106.10.01.0
31073.5531767.10.00.0
4529.25135704.50.00.0
5785.65638463.50.00.0
6919.5897491.560.01.0
- - - After we've done that tidying, it's time to split our dataset into training and testing sets, and separate the labels from the data. We separate our data into two halves, `train` and `test`. You can use a higher percentage of splitting (or a lower one) by modifying the `at = 0.05` argument. We have highlighted the use of only a 5% sample to show the power of Bayesian inference with small sample sizes. We must rescale our variables so that they are centered around zero by subtracting each column by the mean and dividing it by the standard deviation. Without this step, Turing's sampler will have a hard time finding a place to start searching for parameter estimates. To do this we will leverage `MLDataUtils`, which also lets us effortlessly shuffle our observations and perform a stratified split to get a representative test set. @@ -164,30 +138,6 @@ chain = mapreduce(c -> sample(logistic_regression(train, train_label, n, 1), HMC describe(chain) ``` - - - - 2-element Array{ChainDataFrame,1} - - Summary Statistics - parameters mean std naive_se mcse ess r_hat - ────────── ─────── ────── ──────── ────── ───────── ────── - balance 1.6517 0.3099 0.0046 0.0080 110.2122 1.0004 - income -0.5174 0.3241 0.0048 0.0081 1440.4337 1.0010 - intercept -3.8265 0.5148 0.0077 0.0148 54.8792 1.0004 - student -1.8662 0.6088 0.0091 0.0223 840.9122 1.0037 - - Quantiles - parameters 2.5% 25.0% 50.0% 75.0% 97.5% - ────────── ─────── ─────── ─────── ─────── ─────── - balance 1.1418 1.4534 1.6331 1.8242 2.2196 - income -1.1678 -0.7300 -0.5094 -0.3006 0.1079 - intercept -4.6202 -4.0685 -3.7947 -3.5465 -3.0855 - student -3.0690 -2.2803 -1.8574 -1.4528 -0.7137 - - - - Since we ran multiple chains, we may as well do a spot check to make sure each chain converges around similar points. @@ -195,13 +145,6 @@ Since we ran multiple chains, we may as well do a spot check to make sure each c plot(chain) ``` - - - -![svg](/tutorials/2_LogisticRegression_files/2_LogisticRegression_13_0.svg) - - - Looks good! We can also use the `corner` function from MCMCChains to show the distributions of the various parameters of our logistic regression. @@ -215,13 +158,6 @@ l = [:student, :balance, :income] corner(chain, l) ``` - - - -![svg](/tutorials/2_LogisticRegression_files/2_LogisticRegression_15_0.svg) - - - Fortunately the corner plot appears to demonstrate unimodal distributions for each of our parameters, so it should be straightforward to take the means of each parameter's sampled values to estimate our model to make predictions. ## Making Predictions @@ -233,10 +169,10 @@ The `prediction` function below takes a `Matrix` and a `Chain` object. It takes ```julia function prediction(x::Matrix, chain, threshold) # Pull the means from each parameter's sampled values in the chain. - intercept = mean(chain[:intercept].value) - student = mean(chain[:student].value) - balance = mean(chain[:balance].value) - income = mean(chain[:income].value) + intercept = mean(chain[:intercept]) + student = mean(chain[:student]) + balance = mean(chain[:balance]) + income = mean(chain[:income]) # Retrieve the number of rows. n, _ = size(x) @@ -271,13 +207,6 @@ predictions = prediction(test, chain, threshold) loss = sum((predictions - test_label).^2) / length(test_label) ``` - - - - 0.1163157894736842 - - - Perhaps more important is to see what percentage of defaults we correctly predicted. The code below simply counts defaults and predictions and presents the results. @@ -288,23 +217,15 @@ not_defaults = length(test_label) - defaults predicted_defaults = sum(test_label .== predictions .== 1) predicted_not_defaults = sum(test_label .== predictions .== 0) -println("Defaults: $$defaults - Predictions: $$predicted_defaults - Percentage defaults correct $$(predicted_defaults/defaults)") +println("Defaults: $defaults + Predictions: $predicted_defaults + Percentage defaults correct $(predicted_defaults/defaults)") -println("Not defaults: $$not_defaults - Predictions: $$predicted_not_defaults - Percentage non-defaults correct $$(predicted_not_defaults/not_defaults)") +println("Not defaults: $not_defaults + Predictions: $predicted_not_defaults + Percentage non-defaults correct $(predicted_not_defaults/not_defaults)") ``` - Defaults: 316.0 - Predictions: 265 - Percentage defaults correct 0.8386075949367089 - Not defaults: 9184.0 - Predictions: 8130 - Percentage non-defaults correct 0.8852351916376306 - - The above shows that with a threshold of 0.07, we correctly predict a respectable portion of the defaults, and correctly identify most non-defaults. This is fairly sensitive to a choice of threshold, and you may wish to experiment with it. This tutorial has demonstrated how to use Turing to perform Bayesian logistic regression. diff --git a/tutorials/regression/02_linear-regression.jmd b/tutorials/regression/02_linear-regression.jmd index 72226a5da..f3f41c07e 100644 --- a/tutorials/regression/02_linear-regression.jmd +++ b/tutorials/regression/02_linear-regression.jmd @@ -34,20 +34,6 @@ Random.seed!(0) Turing.turnprogress(false); ``` - ┌ Info: Precompiling Turing [fce5fe82-541a-59a6-adf8-730c64b5f9a0] - └ @ Base loading.jl:1260 - ┌ Info: Precompiling RDatasets [ce6b1742-4840-55fa-b093-852dadbb1d8b] - └ @ Base loading.jl:1260 - ┌ Info: Precompiling Plots [91a5bcdd-55d7-5caf-9e0b-520d859cae80] - └ @ Base loading.jl:1260 - ┌ Info: Precompiling StatsPlots [f3b207a7-027a-5e70-b257-86293d7955fd] - └ @ Base loading.jl:1260 - ┌ Info: Precompiling MLDataUtils [cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d] - └ @ Base loading.jl:1260 - ┌ Info: [Turing]: progress logging is disabled globally - └ @ Turing /home/cameron/.julia/packages/Turing/GMBTf/src/Turing.jl:22 - - We will use the `mtcars` dataset from the [RDatasets](https://github.com/johnmyleswhite/RDatasets.jl) package. `mtcars` contains a variety of statistics on different car models, including their miles per gallon, number of cylinders, and horsepower, among others. We want to know if we can construct a Bayesian linear regression model to predict the miles per gallon of a car, given the other statistics it has. Lets take a look at the data we have. @@ -61,25 +47,10 @@ data = RDatasets.dataset("datasets", "mtcars"); first(data, 6) ``` - - - -

6 rows × 12 columns (omitted printing of 3 columns)

ModelMPGCylDispHPDRatWTQSecVS
StringFloat64Int64Float64Int64Float64Float64Float64Int64
1Mazda RX421.06160.01103.92.6216.460
2Mazda RX4 Wag21.06160.01103.92.87517.020
3Datsun 71022.84108.0933.852.3218.611
4Hornet 4 Drive21.46258.01103.083.21519.441
5Hornet Sportabout18.78360.01753.153.4417.020
6Valiant18.16225.01052.763.4620.221
- - - - ```julia size(data) ``` - - - - (32, 12) - - - The next step is to get our data ready for testing. We'll split the `mtcars` dataset into two subsets, one for training our model and one for evaluating our model. Then, we separate the targets we want to learn (`MPG`, in this case) and standardize the datasets by subtracting each column's means and dividing by the standard deviation of that column. The resulting data is not very familiar looking, but this standardization process helps the sampler converge far easier. @@ -110,15 +81,15 @@ rescale!(test_target, μtarget, σtarget; obsdim = 1); In a traditional frequentist model using [OLS](https://en.wikipedia.org/wiki/Ordinary_least_squares), our model might look like: -\$\$ -MPG_i = \alpha + \boldsymbol{\beta}^\mathsf{T}\boldsymbol{X_i} -\$\$ +$$ +\mathrm{MPG}_i = \alpha + \boldsymbol{\beta}^\mathsf{T}\boldsymbol{X_i} +$$ where $$\boldsymbol{\beta}$$ is a vector of coefficients and $$\boldsymbol{X}$$ is a vector of inputs for observation $$i$$. The Bayesian model we are more concerned with is the following: -\$\$ -MPG_i \sim \mathcal{N}(\alpha + \boldsymbol{\beta}^\mathsf{T}\boldsymbol{X_i}, \sigma^2) -\$\$ +$$ +\mathrm{MPG}_i \sim \mathcal{N}(\alpha + \boldsymbol{\beta}^\mathsf{T}\boldsymbol{X_i}, \sigma^2) +$$ where $$\alpha$$ is an intercept term common to all observations, $$\boldsymbol{\beta}$$ is a coefficient vector, $$\boldsymbol{X_i}$$ is the observed data for car $$i$$, and $$\sigma^2$$ is a common variance term. @@ -144,32 +115,13 @@ For $$\sigma^2$$, we assign a prior of `truncated(Normal(0, 100), 0, Inf)`. This end ``` - - - - DynamicPPL.ModelGen{var"###generator#273",(:x, :y),(),Tuple{}}(##generator#273, NamedTuple()) - - - With our model specified, we can call the sampler. We will use the No U-Turn Sampler ([NUTS](http://turing.ml/docs/library/#-turingnuts--type)) here. - ```julia model = linear_regression(train, train_target) chain = sample(model, NUTS(0.65), 3_000); ``` - ┌ Info: Found initial step size - │ ϵ = 1.6 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/GMBTf/src/inference/hmc.jl:629 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/P9wqk/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/P9wqk/src/hamiltonian.jl:47 - - As a visual check to confirm that our coefficients have converged, we show the densities and trace plots for our parameters using the `plot` functionality. @@ -177,60 +129,12 @@ As a visual check to confirm that our coefficients have converged, we show the d plot(chain) ``` - - - -![svg](/tutorials/5_LinearRegression_files/5_LinearRegression_12_0.svg) - - - It looks like each of our parameters has converged. We can check our numerical esimates using `describe(chain)`, as below. - ```julia describe(chain) ``` - - - - 2-element Array{ChainDataFrame,1} - - Summary Statistics - parameters mean std naive_se mcse ess r_hat - ──────────────── ─────── ────── ──────── ────── ──────── ────── - coefficients[1] -0.0413 0.5648 0.0126 0.0389 265.1907 1.0010 - coefficients[2] 0.2770 0.6994 0.0156 0.0401 375.2777 1.0067 - coefficients[3] -0.4116 0.3850 0.0086 0.0160 695.3990 1.0032 - coefficients[4] 0.1805 0.2948 0.0066 0.0126 479.9290 1.0010 - coefficients[5] -0.2669 0.7168 0.0160 0.0316 373.0291 1.0009 - coefficients[6] 0.0256 0.3461 0.0077 0.0119 571.0954 1.0028 - coefficients[7] 0.0277 0.3899 0.0087 0.0174 637.1596 1.0007 - coefficients[8] 0.1535 0.3050 0.0068 0.0117 579.1998 1.0032 - coefficients[9] 0.1223 0.2839 0.0063 0.0105 587.6752 0.9995 - coefficients[10] -0.2839 0.3975 0.0089 0.0195 360.9612 1.0019 - intercept 0.0058 0.1179 0.0026 0.0044 580.0222 0.9995 - σ₂ 0.3017 0.1955 0.0044 0.0132 227.2322 1.0005 - - Quantiles - parameters 2.5% 25.0% 50.0% 75.0% 97.5% - ──────────────── ─────── ─────── ─────── ─────── ────── - coefficients[1] -1.0991 -0.4265 -0.0199 0.3244 1.1093 - coefficients[2] -1.1369 -0.1523 0.2854 0.7154 1.6488 - coefficients[3] -1.1957 -0.6272 -0.3986 -0.1800 0.3587 - coefficients[4] -0.3896 -0.0155 0.1663 0.3593 0.7818 - coefficients[5] -1.6858 -0.6835 -0.2683 0.1378 1.1995 - coefficients[6] -0.6865 -0.1672 0.0325 0.2214 0.7251 - coefficients[7] -0.7644 -0.1976 0.0090 0.2835 0.8185 - coefficients[8] -0.4980 -0.0194 0.1451 0.3428 0.7685 - coefficients[9] -0.4643 -0.0294 0.1237 0.2807 0.7218 - coefficients[10] -1.0898 -0.5091 -0.2846 -0.0413 0.5163 - intercept -0.2240 -0.0671 0.0083 0.0746 0.2364 - σ₂ 0.1043 0.1860 0.2525 0.3530 0.8490 - - - - ## Comparing to OLS A satisfactory test of our model is to evaluate how well it predicts. Importantly, we want to compare our model to existing tools like OLS. The code below uses the [GLM.jl]() package to generate a traditional OLS multiple regression model on the same data as our probabalistic model. @@ -256,10 +160,6 @@ p = GLM.predict(ols, test_with_intercept) test_prediction_ols = μtarget .+ σtarget .* p; ``` - ┌ Info: Precompiling GLM [38e38edf-8417-5370-95a0-9cbb8c7f171a] - └ @ Base loading.jl:1260 - - The function below accepts a chain and an input matrix and calculates predictions. We use the samples of the model parameters in the chain starting with sample 200, which is where the warm-up period for the NUTS sampler ended. @@ -272,16 +172,8 @@ function prediction(chain, x) end ``` - - - - prediction (generic function with 1 method) - - - When we make predictions, we unstandardize them so they are more understandable. - ```julia # Calculate the predictions for the training and testing sets # and unstandardize them. @@ -298,20 +190,12 @@ DataFrame( ) ``` - - - -

10 rows × 3 columns

MPGBayesOLS
Float64Float64Float64
119.218.376618.1265
215.06.41766.37891
316.413.912513.883
414.311.839311.7337
521.425.362225.1916
618.120.768720.672
719.716.0315.8408
815.218.290318.3391
926.028.519128.4865
1017.314.49814.534
- - - Now let's evaluate the loss for each method, and each prediction set. We will use the mean squared error to evaluate loss, given by -\$\$ -\text{MSE} = \frac{1}{n} \sum_{i=1}^n {(y_i - \hat{y_i})^2} -\$\$ +$$ +\mathrm{MSE} = \frac{1}{n} \sum_{i=1}^n {(y_i - \hat{y_i})^2} +$$ where $$y_i$$ is the actual value (true MPG) and $$\hat{y_i}$$ is the predicted value using either OLS or Bayesian linear regression. A lower SSE indicates a closer fit to the data. - ```julia println( "Training set:", @@ -330,12 +214,4 @@ println( ) ``` - Training set: - Bayes loss: 4.664508273535872 - OLS loss: 4.648142085690519 - Test set: - Bayes loss: 14.66153554719035 - OLS loss: 14.796847779051628 - - As we can see above, OLS and our Bayesian model fit our training and test data set about the same. diff --git a/tutorials/regression/03_poisson-regression.jmd b/tutorials/regression/03_poisson-regression.jmd index 2ca7349bb..db0dae1a6 100644 --- a/tutorials/regression/03_poisson-regression.jmd +++ b/tutorials/regression/03_poisson-regression.jmd @@ -26,17 +26,6 @@ Random.seed!(12); Turing.turnprogress(false) ``` - ┌ Info: [Turing]: progress logging is disabled globally - └ @ Turing /home/cameron/.julia/packages/Turing/cReBm/src/Turing.jl:22 - - - - - - false - - - # Generating data We start off by creating a toy dataset. We take the case of a person who takes medicine to prevent excessive sneezing. Alcohol consumption increases the rate of sneezing for that person. Thus, the two factors affecting the number of sneezes in a given day are alcohol consumption and whether the person has taken his medicine. Both these variable are taken as boolean valued while the number of sneezes will be a count valued variable. We also take into consideration that the interaction between the two boolean variables will affect the number of sneezes @@ -66,13 +55,6 @@ df = DataFrame(nsneeze = nsneeze_data, alcohol_taken = alcohol_data, nomeds_take df[sample(1:nrow(df), 5, replace = false), :] ``` - - - -

5 rows × 4 columns

nsneezealcohol_takennomeds_takenproduct_alcohol_meds
Int64Float64Float64Float64
150.00.00.0
251.00.00.0
381.00.00.0
410.00.00.0
5381.01.01.0
- - - # Visualisation of the dataset We plot the distribution of the number of sneezes for the 4 different cases taken above. As expected, the person sneezes the most when he has taken alcohol and not taken his medicine. He sneezes the least when he doesn't consume alcohol and takes his medicine. @@ -87,13 +69,6 @@ p4 = Plots.histogram((df[(df[:,:alcohol_taken] .== 1) .& (df[:,:nomeds_taken] .= plot(p1, p2, p3, p4, layout = (2, 2), legend = false) ``` - - - -![svg](/tutorials/7_PoissonRegression_files/7_PoissonRegression_5_0.svg) - - - We must convert our `DataFrame` data into the `Matrix` form as the manipulations that we are about are designed to work with `Matrix` data. We also separate the features from the labels which will be later used by the Turing sampler to generate samples from the posterior. @@ -104,39 +79,6 @@ data_labels = df[:,:nsneeze] data ``` - - - - 400×3 Array{Float64,2}: - 0.0 0.0 0.0 - 0.0 0.0 0.0 - 0.0 0.0 0.0 - 0.0 0.0 0.0 - 0.0 0.0 0.0 - 0.0 0.0 0.0 - 0.0 0.0 0.0 - 0.0 0.0 0.0 - 0.0 0.0 0.0 - 0.0 0.0 0.0 - 0.0 0.0 0.0 - 0.0 0.0 0.0 - 0.0 0.0 0.0 - ⋮ - 1.0 1.0 1.0 - 1.0 1.0 1.0 - 1.0 1.0 1.0 - 1.0 1.0 1.0 - 1.0 1.0 1.0 - 1.0 1.0 1.0 - 1.0 1.0 1.0 - 1.0 1.0 1.0 - 1.0 1.0 1.0 - 1.0 1.0 1.0 - 1.0 1.0 1.0 - 1.0 1.0 1.0 - - - We must recenter our data about 0 to help the Turing sampler in initialising the parameter estimates. So, normalising the data in each column by subtracting the mean and dividing by the standard deviation: @@ -145,39 +87,6 @@ We must recenter our data about 0 to help the Turing sampler in initialising the data = (data .- mean(data, dims=1)) ./ std(data, dims=1) ``` - - - - 400×3 Array{Float64,2}: - -0.998749 -0.998749 -0.576628 - -0.998749 -0.998749 -0.576628 - -0.998749 -0.998749 -0.576628 - -0.998749 -0.998749 -0.576628 - -0.998749 -0.998749 -0.576628 - -0.998749 -0.998749 -0.576628 - -0.998749 -0.998749 -0.576628 - -0.998749 -0.998749 -0.576628 - -0.998749 -0.998749 -0.576628 - -0.998749 -0.998749 -0.576628 - -0.998749 -0.998749 -0.576628 - -0.998749 -0.998749 -0.576628 - -0.998749 -0.998749 -0.576628 - ⋮ - 0.998749 0.998749 1.72988 - 0.998749 0.998749 1.72988 - 0.998749 0.998749 1.72988 - 0.998749 0.998749 1.72988 - 0.998749 0.998749 1.72988 - 0.998749 0.998749 1.72988 - 0.998749 0.998749 1.72988 - 0.998749 0.998749 1.72988 - 0.998749 0.998749 1.72988 - 0.998749 0.998749 1.72988 - 0.998749 0.998749 1.72988 - 0.998749 0.998749 1.72988 - - - # Declaring the Model: Poisson Regression Our model, `poisson_regression` takes four arguments: @@ -228,110 +137,6 @@ chain = mapreduce( 1:num_chains); ``` - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Info: Found initial step size - │ ϵ = 2.384185791015625e-8 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/cReBm/src/inference/hmc.jl:556 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Info: Found initial step size - │ ϵ = 0.00078125 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/cReBm/src/inference/hmc.jl:556 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - ┌ Info: Found initial step size - │ ϵ = 0.000390625 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/cReBm/src/inference/hmc.jl:556 - ┌ Info: Found initial step size - │ ϵ = 0.05 - └ @ Turing.Inference /home/cameron/.julia/packages/Turing/cReBm/src/inference/hmc.jl:556 - ┌ Warning: The current proposal will be rejected due to numerical error(s). - │ isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false) - └ @ AdvancedHMC /home/cameron/.julia/packages/AdvancedHMC/WJCQA/src/hamiltonian.jl:47 - - # Viewing the Diagnostics We use the Gelman, Rubin, and Brooks Diagnostic to check whether our chains have converged. Note that we require multiple chains to use this diagnostic which analyses the difference between these multiple chains. @@ -342,20 +147,6 @@ We expect the chains to have converged. This is because we have taken sufficient gelmandiag(chain) ``` - - - - Gelman, Rubin, and Brooks Diagnostic - parameters PSRF 97.5% - ────────── ────── ────── - b0 1.1861 1.3924 - b1 1.1307 1.2582 - b2 1.1350 1.2865 - b3 1.0660 1.1118 - - - - From the above diagnostic, we can conclude that the chains have converged because the PSRF values of the coefficients are close to 1. So, we have obtained the posterior distributions of the parameters. We transform the coefficients and recover theta values by taking the exponent of the meaned values of the coefficients `b0`, `b1`, `b2` and `b3`. We take the exponent of the means to get a better comparison of the relative values of the coefficients. We then compare this with the intuitive meaning that was described earlier. @@ -366,38 +157,22 @@ So, we have obtained the posterior distributions of the parameters. We transform c1 = chain[:,:,1] # Calculating the exponentiated means -b0_exp = exp(mean(c1[:b0].value)) -b1_exp = exp(mean(c1[:b1].value)) -b2_exp = exp(mean(c1[:b2].value)) -b3_exp = exp(mean(c1[:b3].value)) +b0_exp = exp(mean(c1[:b0])) +b1_exp = exp(mean(c1[:b1])) +b2_exp = exp(mean(c1[:b2])) +b3_exp = exp(mean(c1[:b3])) print("The exponent of the meaned values of the weights (or coefficients are): \n") print("b0: ", b0_exp, " \n", "b1: ", b1_exp, " \n", "b2: ", b2_exp, " \n", "b3: ", b3_exp, " \n") print("The posterior distributions obtained after sampling can be visualised as :\n") ``` - The exponent of the meaned values of the weights (or coefficients are): - b0: 5.116678482496325 - b1: 1.8791946940293356 - b2: 2.5245646467859904 - b3: 1.3005130214177183 - The posterior distributions obtained after sampling can be visualised as : - - - Visualising the posterior by plotting it: - +Visualising the posterior by plotting it: ```julia plot(chain) ``` - - - -![svg](/tutorials/7_PoissonRegression_files/7_PoissonRegression_19_0.svg) - - - # Interpreting the Obtained Mean Values The exponentiated mean of the coefficient `b1` is roughly half of that of `b2`. This makes sense because in the data that we generated, the number of sneezes was more sensitive to the medicinal intake as compared to the alcohol consumption. We also get a weaker dependence on the interaction between the alcohol consumption and the medicinal intake as can be seen from the value of `b3`. @@ -413,73 +188,14 @@ To remove these warmup values, we take all values except the first 200. This is describe(chain) ``` - - - - 2-element Array{ChainDataFrame,1} - - Summary Statistics - parameters mean std naive_se mcse ess r_hat - ────────── ────── ────── ──────── ────── ─────── ────── - b0 1.2639 2.1637 0.0216 0.2114 42.6654 1.0565 - b1 0.7091 0.8433 0.0084 0.0728 41.7860 1.0620 - b2 1.1998 1.7572 0.0176 0.1676 42.5718 1.0675 - b3 0.2357 0.7392 0.0074 0.0596 91.3888 1.0240 - - Quantiles - parameters 2.5% 25.0% 50.0% 75.0% 97.5% - ────────── ─────── ────── ────── ────── ────── - b0 -4.7815 1.6189 1.6409 1.6624 1.7026 - b1 0.4366 0.5151 0.5548 0.5986 3.7771 - b2 0.7707 0.8461 0.8848 0.9259 8.4861 - b3 -1.7651 0.2497 0.2882 0.3275 0.4136 - - - - - ```julia # Removing the first 200 values of the chains. chains_new = chain[201:2500,:,:] describe(chains_new) ``` - - - - 2-element Array{ChainDataFrame,1} - - Summary Statistics - parameters mean std naive_se mcse ess r_hat - ────────── ────── ────── ──────── ────── ─────── ────── - b0 1.6378 0.0823 0.0009 0.0055 46.6518 1.0182 - b1 0.5639 0.1729 0.0018 0.0117 45.5782 1.0196 - b2 0.8932 0.1727 0.0018 0.0118 45.0961 1.0195 - b3 0.2798 0.1544 0.0016 0.0104 46.0058 1.0195 - - Quantiles - parameters 2.5% 25.0% 50.0% 75.0% 97.5% - ────────── ────── ────── ────── ────── ────── - b0 1.5791 1.6226 1.6427 1.6637 1.7024 - b1 0.4413 0.5142 0.5516 0.5919 0.6726 - b2 0.7764 0.8448 0.8819 0.9187 0.9973 - b3 0.1785 0.2544 0.2893 0.3266 0.3942 - - - - -Visualising the new posterior by plotting it: - - ```julia plot(chains_new) ``` - - - -![svg](/tutorials/7_PoissonRegression_files/7_PoissonRegression_25_0.svg) - - - As can be seen from the numeric values and the plots above, the standard deviation values have decreased and all the plotted values are from the estimated posteriors. The exponentiated mean values, with the warmup samples removed, have not changed by much and they are still in accordance with their intuitive meanings as described earlier. diff --git a/tutorials/regression/04_multinomial-logistic-regression.jmd b/tutorials/regression/04_multinomial-logistic-regression.jmd index 4440737ea..2bf5934ad 100644 --- a/tutorials/regression/04_multinomial-logistic-regression.jmd +++ b/tutorials/regression/04_multinomial-logistic-regression.jmd @@ -33,25 +33,10 @@ Random.seed!(0) Turing.turnprogress(false); ``` - ┌ Info: Precompiling Turing [fce5fe82-541a-59a6-adf8-730c64b5f9a0] - └ @ Base loading.jl:1260 - ┌ Info: Precompiling RDatasets [ce6b1742-4840-55fa-b093-852dadbb1d8b] - └ @ Base loading.jl:1260 - ┌ Info: Precompiling StatsPlots [f3b207a7-027a-5e70-b257-86293d7955fd] - └ @ Base loading.jl:1260 - ┌ Info: Precompiling MLDataUtils [cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d] - └ @ Base loading.jl:1260 - ┌ Info: [Turing]: progress logging is disabled globally - └ @ Turing /home/cameron/.julia/packages/Turing/3goIa/src/Turing.jl:23 - ┌ Info: [AdvancedVI]: global PROGRESS is set as false - └ @ AdvancedVI /home/cameron/.julia/packages/AdvancedVI/PaSeO/src/AdvancedVI.jl:15 - - ## Data Cleaning & Set Up Now we're going to import our dataset. Twenty rows of the dataset are shown below so you can get a good feel for what kind of data we have. - ```julia # Import the "iris" dataset. data = RDatasets.dataset("datasets", "iris"); @@ -60,16 +45,8 @@ data = RDatasets.dataset("datasets", "iris"); data[rand(1:size(data, 1), 20), :] ``` - - - -

20 rows × 5 columns

SepalLengthSepalWidthPetalLengthPetalWidthSpecies
Float64Float64Float64Float64Cat…
15.62.93.61.3versicolor
25.82.73.91.2versicolor
35.52.34.01.3versicolor
46.73.35.72.5virginica
55.13.51.40.2setosa
65.13.81.50.3setosa
74.83.41.90.2setosa
86.02.94.51.5versicolor
96.93.15.42.1virginica
105.43.91.70.4setosa
115.03.61.40.2setosa
125.73.04.21.2versicolor
135.03.31.40.2setosa
147.73.06.12.3virginica
155.82.85.12.4virginica
164.43.01.30.2setosa
176.33.34.71.6versicolor
186.02.75.11.6versicolor
194.63.41.40.3setosa
206.02.24.01.0versicolor
- - - In this data set, the outcome `Species` is currently coded as a string. We convert it to a numerical value by using indices `1`, `2`, and `3` to indicate species `setosa`, `versicolor`, and `virginica`, respectively. - ```julia # Recode the `Species` column. species = ["setosa", "versicolor", "virginica"] @@ -79,13 +56,6 @@ data[!, :Species_index] = indexin(data[!, :Species], species) data[rand(1:size(data, 1), 20), [:Species, :Species_index]] ``` - - - -

20 rows × 2 columns

SpeciesSpecies_index
Cat…Union…
1versicolor2
2setosa1
3setosa1
4versicolor2
5setosa1
6virginica3
7versicolor2
8versicolor2
9setosa1
10virginica3
11setosa1
12setosa1
13versicolor2
14versicolor2
15virginica3
16versicolor2
17setosa1
18setosa1
19virginica3
20setosa1
- - - After we've done that tidying, it's time to split our dataset into training and testing sets, and separate the features and target from the data. Additionally, we must rescale our feature variables so that they are centered around zero by subtracting each column by the mean and dividing it by the standard deviation. Without this step, Turing's sampler will have a hard time finding a place to start searching for parameter estimates. @@ -151,99 +121,30 @@ Now we can run our sampler. This time we'll use [`HMC`](http://turing.ml/docs/li chain = sample(logistic_regression(train_features, train_target, 1), HMC(0.05, 10), MCMCThreads(), 1500, 3) ``` - - - - Chains MCMC chain (1500×19×3 Array{Float64,3}): - - Iterations = 1:1500 - Thinning interval = 1 - Chains = 1, 2, 3 - Samples per chain = 1500 - parameters = coefficients_versicolor[1], coefficients_versicolor[2], coefficients_versicolor[3], coefficients_versicolor[4], coefficients_virginica[1], coefficients_virginica[2], coefficients_virginica[3], coefficients_virginica[4], intercept_versicolor, intercept_virginica - internals = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, n_steps, nom_step_size, step_size - - Summary Statistics -  parameters   mean   std   naive_se   mcse   ess   rhat   -  Symbol   Float64   Float64   Float64   Float64   Float64   Float64   -                -  coefficients_versicolor[1]   1.5404   0.6753   0.0101   0.0335   332.4769   1.0017   -  coefficients_versicolor[2]   -1.4298   0.5098   0.0076   0.0171   786.5622   1.0015   -  coefficients_versicolor[3]   1.1382   0.7772   0.0116   0.0398   328.8508   1.0091   -  coefficients_versicolor[4]   0.0693   0.7300   0.0109   0.0374   368.3007   1.0048   -  coefficients_virginica[1]   0.4251   0.6983   0.0104   0.0294   381.6545   1.0017   -  coefficients_virginica[2]   -0.6744   0.6036   0.0090   0.0250   654.1030   1.0012   -  coefficients_virginica[3]   2.0076   0.8424   0.0126   0.0390   344.6077   1.0067   -  coefficients_virginica[4]   2.6704   0.7982   0.0119   0.0423   337.9600   1.0043   -  intercept_versicolor   0.8408   0.5257   0.0078   0.0167   874.4821   1.0044   -  intercept_virginica   -0.7351   0.6639   0.0099   0.0285   525.8135   1.0039   - - Quantiles -  parameters   2.5%   25.0%   50.0%   75.0%   97.5%   -  Symbol   Float64   Float64   Float64   Float64   Float64   -              -  coefficients_versicolor[1]   0.2659   1.0755   1.5231   1.9860   2.9059   -  coefficients_versicolor[2]   -2.4714   -1.7610   -1.4109   -1.0749   -0.4921   -  coefficients_versicolor[3]   -0.4377   0.6358   1.1456   1.6500   2.6215   -  coefficients_versicolor[4]   -1.3741   -0.4381   0.0652   0.5711   1.4808   -  coefficients_virginica[1]   -0.9452   -0.0487   0.4287   0.8991   1.7973   -  coefficients_virginica[2]   -1.8717   -1.0756   -0.6641   -0.2501   0.4867   -  coefficients_virginica[3]   0.3740   1.4180   1.9941   2.5862   3.6788   -  coefficients_virginica[4]   1.1985   2.1347   2.6359   3.1795   4.3502   -  intercept_versicolor   -0.1652   0.4888   0.8340   1.1858   1.8891   -  intercept_virginica   -2.0101   -1.1944   -0.7453   -0.2834   0.5836   - - - - Since we ran multiple chains, we may as well do a spot check to make sure each chain converges around similar points. - ```julia plot(chain) ``` - - - -![svg](/tutorials/8_MultinomialLogisticRegression_files/8_MultinomialLogisticRegression_13_0.svg) - - - Looks good! We can also use the `corner` function from MCMCChains to show the distributions of the various parameters of our multinomial logistic regression. The corner function requires MCMCChains and StatsPlots. - ```julia corner( - chain, [Symbol("coefficients_versicolor[$$i]") for i in 1:4]; - label=[string(i) for i in 1:4], fmt=:png + chain, MCMCChains.namesingroup(chain, :coefficients_versicolor); + label=[string(i) for i in 1:4] ) ``` - - - -![png](/tutorials/8_MultinomialLogisticRegression_files/8_MultinomialLogisticRegression_15_0.png) - - - - ```julia corner( - chain, [Symbol("coefficients_virginica[$$i]") for i in 1:4]; - label=[string(i) for i in 1:4], fmt=:png + chain, MCMCChains.namesingroup(chain, :coefficients_virginica); + label=[string(i) for i in 1:4] ) ``` - - - -![png](/tutorials/8_MultinomialLogisticRegression_files/8_MultinomialLogisticRegression_16_0.png) - - - Fortunately the corner plots appear to demonstrate unimodal distributions for each of our parameters, so it should be straightforward to take the means of each parameter's sampled values to estimate our model to make predictions. ## Making Predictions @@ -258,8 +159,14 @@ function prediction(x::Matrix, chain) # Pull the means from each parameter's sampled values in the chain. intercept_versicolor = mean(chain, :intercept_versicolor) intercept_virginica = mean(chain, :intercept_virginica) - coefficients_versicolor = [mean(chain, "coefficients_versicolor[$$i]") for i in 1:4] - coefficients_virginica = [mean(chain, "coefficients_virginica[$$i]") for i in 1:4] + coefficients_versicolor = [ + mean(chain, k) for k in + MCMCChains.namesingroup(chain, :coefficients_versicolor) + ] + coefficients_virginica = [ + mean(chain, k) for k in + MCMCChains.namesingroup(chain, :coefficients_virginica) + ] # Compute the index of the species with the highest probability for each observation. values_versicolor = intercept_versicolor .+ x * coefficients_versicolor @@ -281,16 +188,8 @@ predictions = prediction(test_features, chain) mean(predictions .== testset[!, :Species_index]) ``` - - - - 0.8533333333333334 - - - Perhaps more important is to see the accuracy per class. - ```julia for s in 1:3 rows = testset[!, :Species_index] .== s @@ -300,12 +199,4 @@ for s in 1:3 end ``` - Number of `setosa`: 22 - Percentage of `setosa` predicted correctly: 1.0 - Number of `versicolor`: 24 - Percentage of `versicolor` predicted correctly: 0.875 - Number of `virginica`: 29 - Percentage of `virginica` predicted correctly: 0.7241379310344828 - - This tutorial has demonstrated how to use Turing to perform Bayesian multinomial logistic regression. diff --git a/tutorials/regression/Project.toml b/tutorials/regression/Project.toml new file mode 100644 index 000000000..51d942647 --- /dev/null +++ b/tutorials/regression/Project.toml @@ -0,0 +1,14 @@ +[deps] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" +MLDataUtils = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d" +NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" +Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" diff --git a/tutorials/variational-inference/01_variational-inference.jmd b/tutorials/variational-inference/01_variational-inference.jmd index 3378bf45b..18f247582 100644 --- a/tutorials/variational-inference/01_variational-inference.jmd +++ b/tutorials/variational-inference/01_variational-inference.jmd @@ -18,7 +18,7 @@ To get a bit more into what we can do with `vi`, we'll first have a look at a si ## Setup -```julia; results = "hidden" +```julia using Random using Turing using Turing: Variational @@ -40,7 +40,7 @@ Recall that *conjugate* refers to the fact that we can obtain a closed-form expr First we generate some synthetic data, define the `Turing.Model` and instantiate the model on the data: -```julia; results = "hidden" +```julia # generate data x = randn(2000); ``` @@ -55,7 +55,7 @@ x = randn(2000); end; ``` -```julia; results = "hidden" +```julia # Instantiate model m = model(x); ``` @@ -67,7 +67,7 @@ We'll produce 10 000 samples with 200 steps used for adaptation and a target acc If you don't understand what "adaptation" or "target acceptance rate" refers to, all you really need to know is that `NUTS` is known to be one of the most accurate and efficient samplers (when applicable) while requiring little to no hand-tuning to work well. -```julia; results = "hidden" +```julia samples_nuts = sample(m, NUTS(200, 0.65), 10000); ``` @@ -99,7 +99,7 @@ print(@doc(Variational.ADVI)) To perform VI on the model `m` using 10 samples for gradient estimation and taking 1000 gradient steps is then as simple as: -```julia; results = "hidden" +```julia # ADVI advi = ADVI(10, 1000) q = vi(m, advi); @@ -147,7 +147,7 @@ Let's instead look the actual density `q`. For that we need samples: -```julia; results = "hidden" +```julia samples = rand(q, 10000); ``` @@ -261,12 +261,12 @@ As we'll see, there is really no additional work required to apply variational i This section is basically copy-pasting the code from the [linear regression tutorial](../../tutorials/5-linearregression). -```julia; results = "hidden" +```julia Random.seed!(1); ``` -```julia; results = "hidden" +```julia # Import RDatasets. using RDatasets @@ -304,7 +304,7 @@ function unstandardize(x, orig) end ``` -```julia; results = "hidden" +```julia # Remove the model column. select!(data, Not(:Model)) @@ -350,7 +350,7 @@ end; ``` -```julia; results = "hidden" +```julia n_obs, n_vars = size(train) m = linear_regression(train, train_label, n_obs, n_vars); ``` @@ -403,7 +403,7 @@ typeof(q) To compute statistics for our approximation we need samples: -```julia; results = "hidden" +```julia z = rand(q, 10_000); ``` @@ -473,7 +473,7 @@ plot_variational_marginals(z, sym2range) And let's compare this to using the `NUTS` sampler: -```julia; results = "hidden" +```julia chain = sample(m, NUTS(0.65), 10_000); ``` @@ -504,7 +504,7 @@ That looks pretty good! But let's see how the predictive distributions looks for Similarily to the linear regression tutorial, we're going to compare to multivariate ordinary linear regression using the `GLM` package: -```julia; results = "hidden" +```julia # Import the GLM package. using GLM @@ -542,7 +542,7 @@ function prediction(samples::AbstractMatrix, sym2ranges, x) end ``` -```julia; results = "hidden" +```julia # Unstandardize the dependent variable. train_cut.MPG = unstandardize(train_cut.MPG, data.MPG); test_cut.MPG = unstandardize(test_cut.MPG, data.MPG); @@ -554,12 +554,12 @@ test_cut.MPG = unstandardize(test_cut.MPG, data.MPG); first(test_cut, 6) ``` -```julia; results = "hidden" +```julia z = rand(q, 10_000); ``` -```julia; results = "hidden" +```julia # Calculate the predictions for the training and testing sets using the samples `z` from variational posterior train_cut.VIPredictions = unstandardize(prediction(z, sym2range, train), data.MPG); test_cut.VIPredictions = unstandardize(prediction(z, sym2range, test), data.MPG); @@ -640,7 +640,7 @@ base_dist = Turing.DistributionsAD.TuringDiagMvNormal(zeros(d), ones(d)) `bijector(model::Turing.Model)` is defined by Turing, and will return a `bijector` which takes you from the space of the latent variables to the real space. In this particular case, this is a mapping `((0, ∞) × ℝ × ℝ¹⁰) → ℝ¹²`. We're interested in using a normal distribution as a base-distribution and transform samples to the latent space, thus we need the inverse mapping from the reals to the latent space: -```julia; results = "hidden" +```julia to_constrained = inv(bijector(m)); ``` @@ -657,7 +657,7 @@ function getq(θ) end ``` -```julia; results = "hidden" +```julia q_mf_normal = vi(m, advi, getq, randn(2 * d)); ``` @@ -713,7 +713,7 @@ end advi = ADVI(10, 20_000) ``` -```julia; results = "hidden" +```julia q_full_normal = vi(m, advi, getq, randn(num_params); optimizer = Variational.DecayedADAGrad(1e-2)); ``` @@ -728,7 +728,7 @@ A = q_full_normal.transform.ts[1].a heatmap(cov(A * A')) ``` -```julia; results = "hidden" +```julia zs = rand(q_full_normal, 10_000); ``` @@ -744,7 +744,7 @@ plot(p1, p2, layout = (1, 2), size = (800, 2000)) So it seems like the "full" ADVI approach, i.e. no mean-field assumption, obtain the same modes as the mean-field approach but with greater uncertainty for some of the `coefficients`. This -```julia; results = "hidden" +```julia # Unfortunately, it seems like this has quite a high variance which is likely to be due to numerical instability, # so we consider a larger number of samples. If we get a couple of outliers due to numerical issues, # these kind affect the mean prediction greatly. @@ -752,7 +752,7 @@ z = rand(q_full_normal, 10_000); ``` -```julia; results = "hidden" +```julia train_cut.VIFullPredictions = unstandardize(prediction(z, sym2range, train), data.MPG); test_cut.VIFullPredictions = unstandardize(prediction(z, sym2range, test), data.MPG); ``` diff --git a/tutorials/variational-inference/Project.toml b/tutorials/variational-inference/Project.toml index dd28a2f4e..638a1fd97 100644 --- a/tutorials/variational-inference/Project.toml +++ b/tutorials/variational-inference/Project.toml @@ -24,4 +24,3 @@ RDatasets = "0.6.10" StatsPlots = "0.14.13" Turing = "0.14.3" UnPack = "1.0.2" - From 6d38bcda70ef3a5a6223bdaa06219defadfcc107 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Fri, 25 Sep 2020 05:03:58 +0100 Subject: [PATCH 12/12] small update to the VI tutorial --- .../01_variational-inference.jmd | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tutorials/variational-inference/01_variational-inference.jmd b/tutorials/variational-inference/01_variational-inference.jmd index 18f247582..d65e01cd2 100644 --- a/tutorials/variational-inference/01_variational-inference.jmd +++ b/tutorials/variational-inference/01_variational-inference.jmd @@ -31,8 +31,8 @@ Random.seed!(42); The Normal-(Inverse)Gamma conjugate model is defined by the following generative process \begin{align} - s &\sim \mathrm{InverseGamma}(2, 3) \\\\ - m &\sim \mathcal{N}(0, s) \\\\ + s &\sim \mathrm{InverseGamma}(2, 3) \\ + m &\sim \mathcal{N}(0, s) \\ x_i &\overset{\text{i.i.d.}}{=} \mathcal{N}(m, s), \quad i = 1, \dots, n \end{align} @@ -154,19 +154,18 @@ samples = rand(q, 10000); ```julia # setup for plotting using Plots, LaTeXStrings, StatsPlots -pyplot() ``` ```julia p1 = histogram(samples[1, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") density!(samples[1, :], label = "s (ADVI)", color = :blue, linewidth = 2) -density!(collect(skipmissing(samples_nuts[:s].data)), label = "s (NUTS)", color = :green, linewidth = 2) +density!(samples_nuts, :s; label = "s (NUTS)", color = :green, linewidth = 2) vline!([var(x)], label = "s (data)", color = :black) vline!([mean(samples[1, :])], color = :blue, label ="") p2 = histogram(samples[2, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") density!(samples[2, :], label = "m (ADVI)", color = :blue, linewidth = 2) -density!(collect(skipmissing(samples_nuts[:m].data)), label = "m (NUTS)", color = :green, linewidth = 2) +density!(samples_nuts, :m; label = "m (NUTS)", color = :green, linewidth = 2) vline!([mean(x)], color = :black, label = "m (data)") vline!([mean(samples[2, :])], color = :blue, label="") @@ -219,7 +218,7 @@ p_μ_pdf = z -> exp(logpdf(p_μ, (z - μₙ) * exp(- 0.5 * log(βₙ) + 0.5 * lo p1 = plot(); histogram!(samples[1, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") density!(samples[1, :], label = "s (ADVI)", color = :blue) -density!(vec(samples_nuts[:s].data), label = "s (NUTS)", color = :green) +density!(samples_nuts, :s; label = "s (NUTS)", color = :green) vline!([mean(samples[1, :])], linewidth = 1.5, color = :blue, label ="") # normalize using Riemann approx. because of (almost certainly) numerical issues @@ -233,7 +232,7 @@ xlims!(0.75, 1.35); p2 = plot(); histogram!(samples[2, :], bins=100, normed=true, alpha=0.2, color = :blue, label = "") density!(samples[2, :], label = "m (ADVI)", color = :blue) -density!(vec(samples_nuts[:m].data), label = "m (NUTS)", color = :green) +density!(samples_nuts, :m; label = "m (NUTS)", color = :green) vline!([mean(samples[2, :])], linewidth = 1.5, color = :blue, label="") @@ -252,13 +251,13 @@ p = plot(p1, p2; layout=(2, 1), size=(900, 500)) # Bayesian linear regression example using `ADVI` -This is simply a duplication of the tutorial [5. Linear regression](../../tutorials/5-linearregression) but now with the addition of an approximate posterior obtained using `ADVI`. +This is simply a duplication of the tutorial [5. Linear regression](../regression/02_linear-regression) but now with the addition of an approximate posterior obtained using `ADVI`. As we'll see, there is really no additional work required to apply variational inference to a more complex `Model`. -## Copy-paste from [5. Linear regression](../../tutorials/5-linearregression) +## Copy-paste from [5. Linear regression](../regression/02_linear-regression) -This section is basically copy-pasting the code from the [linear regression tutorial](../../tutorials/5-linearregression). +This section is basically copy-pasting the code from the [linear regression tutorial](../regression/02_linear-regression). ```julia