From edd122a141c1731590885367ab42b06f45e4e419 Mon Sep 17 00:00:00 2001 From: athowes Date: Thu, 30 May 2024 15:32:51 +0100 Subject: [PATCH 01/28] Laplace and pathfinder via brms --- vignettes/approx-inference.Rmd | 179 +++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 vignettes/approx-inference.Rmd diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd new file mode 100644 index 000000000..6793965af --- /dev/null +++ b/vignettes/approx-inference.Rmd @@ -0,0 +1,179 @@ +--- +title: "Approximate Bayesian inference in `epidist`" +description: "..." +output: + bookdown::html_document2: + fig_caption: yes + code_folding: show + number_sections: true +pkgdown: + as_is: true +# csl: https://raw.githubusercontent.com/citation-style-language/styles/master/apa-numeric-superscript-brackets.csl +link-citations: true +vignette: > + %\VignetteIndexEntry{Getting started with epidist} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +bibliography: references.bib +--- + +```{r setup, include=FALSE} +# exclude compile warnings from cmdstanr +knitr::opts_chunk$set( + fig.path = "figures/epidist-", + cache = TRUE, + collapse = TRUE, + comment = "#>", + message = FALSE, + warning = FALSE, + error = FALSE +) +``` + +```{r load-requirements} +library(epidist) +library(data.table) +library(purrr) +library(ggplot2) +library(gt) +library(dplyr) +``` + +```{r} +meanlog <- 1.8 +sdlog <- 0.5 +obs_time <- 25 +sample_size <- 200 + +obs_cens_trunc <- simulate_gillespie() |> + simulate_secondary( + meanlog = meanlog, + sdlog = sdlog + ) |> + observe_process() |> + filter_obs_by_obs_time(obs_time = obs_time) + +obs_cens_trunc_samp <- obs_cens_trunc[sample(seq_len(.N), sample_size, replace = FALSE)] + +data <- obs_cens_trunc_samp + +formula <- brms::bf(delay_central | vreal(obs_t, pwindow_upr, swindow_upr) ~ 1, sigma ~ 1) + +fn <- brms::brm + +family <- brms::custom_family( + "latent_lognormal", + dpars = c("mu", "sigma"), + links = c("identity", "log"), + lb = c(NA, 0), + ub = c(NA, NA), + type = "real", + vars = c("pwindow", "swindow", "vreal1"), + loop = FALSE +) + +scode_functions <- " + \n real latent_lognormal_lpdf(vector y, vector mu, vector sigma, + \n vector pwindow, vector swindow, + \n array[] real obs_t) { + \n int n = num_elements(y); + \n vector[n] d = y - pwindow + swindow; + \n vector[n] obs_time = to_vector(obs_t) - pwindow; + \n return lognormal_lpdf(d | mu, sigma) - + \n lognormal_lcdf(obs_time | mu, sigma); + \n } + \n +" + +scode_parameters <- " + \n vector[N] swindow_raw; + \n vector[N] pwindow_raw; + \n +" + +scode_tparameters <- " + \n vector[N] pwindow;\n vector[N] swindow; + \n swindow = to_vector(vreal3) .* swindow_raw; + \n pwindow[noverlap] = to_vector(vreal2[noverlap]) .* pwindow_raw[noverlap]; + \n if (wN) { + \n pwindow[woverlap] = swindow[woverlap] .* pwindow_raw[woverlap]; + \n } + \n +" + +scode_priors <- " + \n swindow_raw ~ uniform(0, 1); + \n pwindow_raw ~ uniform(0, 1); + \n +" + +data <- data.table::as.data.table(data) +data[, id := 1:.N] +data[, obs_t := obs_at - ptime_lwr] + +data[, pwindow_upr := ifelse( + stime_lwr < ptime_upr, + stime_upr - ptime_lwr, + ptime_upr - ptime_lwr +)] + +data[, woverlap := as.numeric(stime_lwr < ptime_upr)] +data[, swindow_upr := stime_upr - stime_lwr] +data[, delay_central := stime_lwr - ptime_lwr] +data[, row_id := 1:.N] + +if (nrow(data) > 1) { + data <- data[, id := as.factor(id)] +} + +stanvars_functions <- brms::stanvar( + block = "functions", scode = scode_functions +) + +stanvars_data <- brms::stanvar( + block = "data", scode = "int wN;", + x = nrow(data[woverlap > 0]), + name = "wN" +) + + +brms::stanvar( + block = "data", scode = "array[N - wN] int noverlap;", + x = data[woverlap == 0][, row_id], + name = "noverlap" +) + +brms::stanvar( + block = "data", scode = "array[wN] int woverlap;", + x = data[woverlap > 0][, row_id], + name = "woverlap" +) + +stanvars_parameters <- brms::stanvar( + block = "parameters", scode = scode_parameters +) + +stanvars_tparameters <- brms::stanvar( + block = "tparameters", scode = scode_tparameters +) + +stanvars_priors <- brms::stanvar(block = "model", scode = scode_priors) + +stanvars_all <- stanvars_functions + stanvars_data + stanvars_parameters + + stanvars_tparameters + stanvars_priors + +fit_laplace <- fn( + formula = formula, family = family, stanvars = stanvars_all, + backend = "cmdstanr", data = data, algorithm = "laplace" +) + +fit_pathfinder <- fn( + formula = formula, family = family, stanvars = stanvars_all, + backend = "cmdstanr", data = data, algorithm = "pathfinder" +) + +fit_hmc <- fn( + formula = formula, family = family, stanvars = stanvars_all, + backend = "cmdstanr", data = data, algorithm = "sampling" +) +``` + +## Bibliography {-} From 7d824b6b16f21808aa561dd49b4582e258a30817 Mon Sep 17 00:00:00 2001 From: athowes Date: Mon, 10 Jun 2024 15:49:50 +0100 Subject: [PATCH 02/28] Update vignette outline and use S3 versions --- vignettes/approx-inference.Rmd | 147 +++++++-------------------------- 1 file changed, 29 insertions(+), 118 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 6793965af..e005d162c 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -30,6 +30,30 @@ knitr::opts_chunk$set( ) ``` +# Background + +* What is the default inference method used in `epidist` (HMC) + * How does it work + * Why is it the default + * What are its strengths + * What are its drawbacks +* What are the atlernatives + * Why might you consider them + +## Laplace + +* Briefly, how does it work + +## Variational inference + +* Briefly, how does it work + +## Pathfinder + +* Briefly, how does it work + +# Demonstration + ```{r load-requirements} library(epidist) library(data.table) @@ -55,125 +79,12 @@ obs_cens_trunc <- simulate_gillespie() |> obs_cens_trunc_samp <- obs_cens_trunc[sample(seq_len(.N), sample_size, replace = FALSE)] -data <- obs_cens_trunc_samp - -formula <- brms::bf(delay_central | vreal(obs_t, pwindow_upr, swindow_upr) ~ 1, sigma ~ 1) - -fn <- brms::brm - -family <- brms::custom_family( - "latent_lognormal", - dpars = c("mu", "sigma"), - links = c("identity", "log"), - lb = c(NA, 0), - ub = c(NA, NA), - type = "real", - vars = c("pwindow", "swindow", "vreal1"), - loop = FALSE -) - -scode_functions <- " - \n real latent_lognormal_lpdf(vector y, vector mu, vector sigma, - \n vector pwindow, vector swindow, - \n array[] real obs_t) { - \n int n = num_elements(y); - \n vector[n] d = y - pwindow + swindow; - \n vector[n] obs_time = to_vector(obs_t) - pwindow; - \n return lognormal_lpdf(d | mu, sigma) - - \n lognormal_lcdf(obs_time | mu, sigma); - \n } - \n -" - -scode_parameters <- " - \n vector[N] swindow_raw; - \n vector[N] pwindow_raw; - \n -" - -scode_tparameters <- " - \n vector[N] pwindow;\n vector[N] swindow; - \n swindow = to_vector(vreal3) .* swindow_raw; - \n pwindow[noverlap] = to_vector(vreal2[noverlap]) .* pwindow_raw[noverlap]; - \n if (wN) { - \n pwindow[woverlap] = swindow[woverlap] .* pwindow_raw[woverlap]; - \n } - \n -" - -scode_priors <- " - \n swindow_raw ~ uniform(0, 1); - \n pwindow_raw ~ uniform(0, 1); - \n -" - -data <- data.table::as.data.table(data) -data[, id := 1:.N] -data[, obs_t := obs_at - ptime_lwr] - -data[, pwindow_upr := ifelse( - stime_lwr < ptime_upr, - stime_upr - ptime_lwr, - ptime_upr - ptime_lwr -)] - -data[, woverlap := as.numeric(stime_lwr < ptime_upr)] -data[, swindow_upr := stime_upr - stime_lwr] -data[, delay_central := stime_lwr - ptime_lwr] -data[, row_id := 1:.N] - -if (nrow(data) > 1) { - data <- data[, id := as.factor(id)] -} - -stanvars_functions <- brms::stanvar( - block = "functions", scode = scode_functions -) - -stanvars_data <- brms::stanvar( - block = "data", scode = "int wN;", - x = nrow(data[woverlap > 0]), - name = "wN" -) + - -brms::stanvar( - block = "data", scode = "array[N - wN] int noverlap;", - x = data[woverlap == 0][, row_id], - name = "noverlap" -) + -brms::stanvar( - block = "data", scode = "array[wN] int woverlap;", - x = data[woverlap > 0][, row_id], - name = "woverlap" -) - -stanvars_parameters <- brms::stanvar( - block = "parameters", scode = scode_parameters -) - -stanvars_tparameters <- brms::stanvar( - block = "tparameters", scode = scode_tparameters -) +data <- epidist_prepare(obs_cens_trunc_samp, model = "ltcad") -stanvars_priors <- brms::stanvar(block = "model", scode = scode_priors) - -stanvars_all <- stanvars_functions + stanvars_data + stanvars_parameters + - stanvars_tparameters + stanvars_priors - -fit_laplace <- fn( - formula = formula, family = family, stanvars = stanvars_all, - backend = "cmdstanr", data = data, algorithm = "laplace" -) - -fit_pathfinder <- fn( - formula = formula, family = family, stanvars = stanvars_all, - backend = "cmdstanr", data = data, algorithm = "pathfinder" -) - -fit_hmc <- fn( - formula = formula, family = family, stanvars = stanvars_all, - backend = "cmdstanr", data = data, algorithm = "sampling" -) +fit_hmc <- epidist(data = data, algorithm = "sampling") +fit_laplace <- epidist(data = data, algorithm = "laplace") +fit_variational <- epidist(data = data, algorithm = "meanfield") +fit_pathfinder <- epidist(data = data, algorithm = "pathfinder") ``` ## Bibliography {-} From e67f068098033a597e81d9471b57d3f9d3464b39 Mon Sep 17 00:00:00 2001 From: athowes Date: Mon, 10 Jun 2024 15:51:49 +0100 Subject: [PATCH 03/28] Fix typo --- vignettes/approx-inference.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index e005d162c..2d0530654 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -37,7 +37,7 @@ knitr::opts_chunk$set( * Why is it the default * What are its strengths * What are its drawbacks -* What are the atlernatives +* What are the alternatives * Why might you consider them ## Laplace From 7171327e9e308773eebc288415484189210077ba Mon Sep 17 00:00:00 2001 From: athowes Date: Tue, 11 Jun 2024 09:40:07 +0100 Subject: [PATCH 04/28] Rename ltcad to latent_individual --- vignettes/approx-inference.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 2d0530654..2d0e4ed55 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -79,7 +79,7 @@ obs_cens_trunc <- simulate_gillespie() |> obs_cens_trunc_samp <- obs_cens_trunc[sample(seq_len(.N), sample_size, replace = FALSE)] -data <- epidist_prepare(obs_cens_trunc_samp, model = "ltcad") +data <- epidist_prepare(obs_cens_trunc_samp, model = "latent_individual") fit_hmc <- epidist(data = data, algorithm = "sampling") fit_laplace <- epidist(data = data, algorithm = "laplace") From bc4cb4edfadc1336748a944f3cbbeab509987d43 Mon Sep 17 00:00:00 2001 From: athowes Date: Tue, 11 Jun 2024 10:51:25 +0100 Subject: [PATCH 05/28] Background and Laplace --- vignettes/approx-inference.Rmd | 49 +++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 2d0e4ed55..99ab87216 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -32,27 +32,40 @@ knitr::opts_chunk$set( # Background -* What is the default inference method used in `epidist` (HMC) - * How does it work - * Why is it the default - * What are its strengths - * What are its drawbacks -* What are the alternatives - * Why might you consider them +The `epidist` package uses Bayesian inference to estimate delay distributions. +Doing Bayesian inference amounts to approximating the posterior distribution of each model parameter^[See a work-in-progress vignette about the model structure.]. + +By default, `epidist` uses the No-U-Turn Sampler (NUTS) Hamiltonian Monte Carlo (HMC) algorithm to approximate the posterior distribution. +NUTS works by simulating from a Markov chain which has the posterior distribution as its stationary distribution. +As a result, when NUTS is run for sufficiently many iterations, the samples can be considered to be drawn from the posterior distribution, and used to compute relevant posterior quantities such as expectations. +A drawback of NUTS, and other Markov chain Monte Carlo (MCMC) methods, is that they can be quite computational intensive, especially for complex models or large data. + +The `epidist` package is built using `brms`, itself built on the Stan probabilistic programming language. +One benefit of this design, is that as other approximate Bayesian inference algorithms are implemented in Stan (and then `brms`), they automatically are available in `epidist`. +As such, if you are using `epidist` and having difficulties using the default NUTS algorithm, you may want to consider an alternative. + +In this vignette, we will first briefly describe the alternative algorithms available in Section \@ref(other), as well as directing you to more detailed resources. +Then in Section \@ref(demo) we demonstrate their application to fitting simulated data, before extracting and comparing posterior distributions. +By comparing the resulting inferences to those from NUTS, to hope to help you make informed decisions about which algorithm to use in your problem. + +# Alternative approximate inference algorithms {#other} ## Laplace -* Briefly, how does it work +The Laplace method approximates a posterior distribution by a Gaussian distribution. +In Stan, the Gaussian distribution is constructed on the unconstrained parameter space, and samples may then be transformed to the constrained parameter space. +See the section [Laplace sampling](https://mc-stan.org/docs/cmdstan-guide/laplace_sample_config.html) of the `CmdStan` User's Guide for more information. ## Variational inference * Briefly, how does it work +* It's the mean field version (only?) ## Pathfinder * Briefly, how does it work -# Demonstration +# Demonstration {#demo} ```{r load-requirements} library(epidist) @@ -63,6 +76,8 @@ library(gt) library(dplyr) ``` +Simulate data: + ```{r} meanlog <- 1.8 sdlog <- 0.5 @@ -80,11 +95,27 @@ obs_cens_trunc <- simulate_gillespie() |> obs_cens_trunc_samp <- obs_cens_trunc[sample(seq_len(.N), sample_size, replace = FALSE)] data <- epidist_prepare(obs_cens_trunc_samp, model = "latent_individual") +``` +Fit models: + +```{r} fit_hmc <- epidist(data = data, algorithm = "sampling") fit_laplace <- epidist(data = data, algorithm = "laplace") fit_variational <- epidist(data = data, algorithm = "meanfield") fit_pathfinder <- epidist(data = data, algorithm = "pathfinder") ``` +Extract posterior distribution: + +```{r} +# TODO +``` + +Compare with a figure or table or both: + +```{r} +# TODO +``` + ## Bibliography {-} From dde007492b1ee47411c2951ecea7390dfd7905ac Mon Sep 17 00:00:00 2001 From: athowes Date: Tue, 11 Jun 2024 11:58:13 +0100 Subject: [PATCH 06/28] Decent way through first draft now --- vignettes/approx-inference.Rmd | 83 +++++++++++++++++++++++++--------- vignettes/references.bib | 22 +++++++++ 2 files changed, 84 insertions(+), 21 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 99ab87216..6804b3b1e 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -50,30 +50,28 @@ By comparing the resulting inferences to those from NUTS, to hope to help you ma # Alternative approximate inference algorithms {#other} -## Laplace +## Laplace method The Laplace method approximates a posterior distribution by a Gaussian distribution. In Stan, the Gaussian distribution is constructed on the unconstrained parameter space, and samples may then be transformed to the constrained parameter space. -See the section [Laplace sampling](https://mc-stan.org/docs/cmdstan-guide/laplace_sample_config.html) of the `CmdStan` User's Guide for more information. +See the section [Laplace Sampling](https://mc-stan.org/docs/cmdstan-guide/laplace_sample_config.html) of the `CmdStan` User's Guide for more information. -## Variational inference +## Variational inference using ADVI -* Briefly, how does it work -* It's the mean field version (only?) +Automatic differentiation variational inference [ADVI; @kucukelbir2017advi] is... +See the section [Variational Inference using ADVI](https://mc-stan.org/docs/cmdstan-guide/variational_config.html) of the `CmdStan` User's Guide for more information. ## Pathfinder -* Briefly, how does it work +Pathfinder is a variational inference method more recently developed by @zhang2022pathfinder. +See the section [Pathfinder Method for Approximate Bayesian Inference](https://mc-stan.org/docs/cmdstan-guide/pathfinder_config.html) of the `CmdStan` User's Guide for more information. # Demonstration {#demo} +This demonstration requires the following packages: + ```{r load-requirements} library(epidist) -library(data.table) -library(purrr) -library(ggplot2) -library(gt) -library(dplyr) ``` Simulate data: @@ -92,30 +90,73 @@ obs_cens_trunc <- simulate_gillespie() |> observe_process() |> filter_obs_by_obs_time(obs_time = obs_time) -obs_cens_trunc_samp <- obs_cens_trunc[sample(seq_len(.N), sample_size, replace = FALSE)] +obs_cens_trunc_samp <- + obs_cens_trunc[sample(seq_len(.N), sample_size, replace = FALSE)] +``` + +Fit the latent individual model using each approximate inference method: +```{r results='hide'} data <- epidist_prepare(obs_cens_trunc_samp, model = "latent_individual") + +fit_hmc <- epidist(data = data, algorithm = "sampling") +fit_laplace <- epidist(data = data, algorithm = "laplace", draws = 4000) +fit_advi <- epidist(data = data, algorithm = "meanfield", draws = 4000) +fit_pathfinder <- epidist(data = data, algorithm = "pathfinder", draws = 4000) ``` -Fit models: +Extract posterior distribution for the delay parameters: ```{r} -fit_hmc <- epidist(data = data, algorithm = "sampling") -fit_laplace <- epidist(data = data, algorithm = "laplace") -fit_variational <- epidist(data = data, algorithm = "meanfield") -fit_pathfinder <- epidist(data = data, algorithm = "pathfinder") +draws_hmc <- extract_lognormal_draws(fit_hmc) +draws_laplace <- extract_lognormal_draws(fit_laplace) +draws_advi<- extract_lognormal_draws(fit_advi) +draws_pathfinder <- extract_lognormal_draws(fit_pathfinder) ``` -Extract posterior distribution: +Compare with a figure or table or both: ```{r} -# TODO +draws_hmc <- draws_hmc |> + draws_to_long() |> + filter(parameter %in% c("mean", "sd")) |> + mutate(method = "hmc") + +draws_laplace <- draws_laplace |> + draws_to_long() |> + filter(parameter %in% c("mean", "sd")) |> + mutate(method = "laplace") + +draws_advi <- draws_advi |> + draws_to_long() |> + filter(parameter %in% c("mean", "sd")) |> + mutate(method = "advi") + +draws_pathfinder <- draws_pathfinder |> + draws_to_long() |> + filter(parameter %in% c("mean", "sd")) |> + mutate(method = "pathfinder") + +rbind(draws_hmc, draws_laplace, draws_advi) |> + filter(parameter == "mean") |> + ggplot(aes(x = value)) + + geom_histogram() + + facet_grid(method ~ parameter) + + theme_minimal() + +rbind(draws_hmc, draws_laplace, draws_advi) |> + filter(parameter == "sd") |> + ggplot(aes(x = value)) + + geom_histogram() + + facet_grid(method ~ parameter) + + theme_minimal() ``` -Compare with a figure or table or both: +Compare the time taken: ```{r} -# TODO +rstan::get_elapsed_time(fit_hmc$fit) +# Remainst to extract from the others ``` ## Bibliography {-} diff --git a/vignettes/references.bib b/vignettes/references.bib index d07f00314..a4caa6b8b 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -30,4 +30,26 @@ @Article{brms pages = {1--28}, doi = {10.18637/jss.v080.i01}, encoding = {UTF-8}, +} + +@article{kucukelbir2017advi, + author = {Alp Kucukelbir and Dustin Tran and Rajesh Ranganath and Andrew Gelman and David M. Blei}, + title = {Automatic Differentiation Variational Inference}, + journal = {Journal of Machine Learning Research}, + year = {2017}, + volume = {18}, + number = {14}, + pages = {1--45}, + url = {http://jmlr.org/papers/v18/16-107.html} +} + +@article{zhang2022pathfinder, + author = {Lu Zhang and Bob Carpenter and Andrew Gelman and Aki Vehtari}, + title = {Pathfinder: Parallel quasi-Newton variational inference}, + journal = {Journal of Machine Learning Research}, + year = {2022}, + volume = {23}, + number = {306}, + pages = {1--49}, + url = {http://jmlr.org/papers/v23/21-0889.html} } \ No newline at end of file From 10aaaefb8e57cf2b47725e5e8823ea48ee7ed558 Mon Sep 17 00:00:00 2001 From: athowes Date: Wed, 12 Jun 2024 13:35:23 +0100 Subject: [PATCH 07/28] Fix lint --- vignettes/approx-inference.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 6804b3b1e..fe78eeaf1 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -110,7 +110,7 @@ Extract posterior distribution for the delay parameters: ```{r} draws_hmc <- extract_lognormal_draws(fit_hmc) draws_laplace <- extract_lognormal_draws(fit_laplace) -draws_advi<- extract_lognormal_draws(fit_advi) +draws_advi <- extract_lognormal_draws(fit_advi) draws_pathfinder <- extract_lognormal_draws(fit_pathfinder) ``` From 7a1a2d2ef6a395de6dbdd3ad745c1948fe7330da Mon Sep 17 00:00:00 2001 From: athowes Date: Wed, 12 Jun 2024 13:46:19 +0100 Subject: [PATCH 08/28] Add required package deps --- vignettes/approx-inference.Rmd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index fe78eeaf1..93ba7759f 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -72,6 +72,8 @@ This demonstration requires the following packages: ```{r load-requirements} library(epidist) +library(ggplot2) +library(dplyr) ``` Simulate data: From 1dc24492b0ae1fdf425e20bcf08246091b4706a7 Mon Sep 17 00:00:00 2001 From: athowes Date: Wed, 12 Jun 2024 14:10:41 +0100 Subject: [PATCH 09/28] Improve text, figure, and note pathfinder error --- vignettes/approx-inference.Rmd | 50 +++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 93ba7759f..c9271abd8 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -37,7 +37,7 @@ Doing Bayesian inference amounts to approximating the posterior distribution of By default, `epidist` uses the No-U-Turn Sampler (NUTS) Hamiltonian Monte Carlo (HMC) algorithm to approximate the posterior distribution. NUTS works by simulating from a Markov chain which has the posterior distribution as its stationary distribution. -As a result, when NUTS is run for sufficiently many iterations, the samples can be considered to be drawn from the posterior distribution, and used to compute relevant posterior quantities such as expectations. +When NUTS is run for sufficiently many iterations, and has "reached convergence", the samples can be considered to be drawn from the posterior distribution, and used to compute relevant posterior quantities such as expectations. A drawback of NUTS, and other Markov chain Monte Carlo (MCMC) methods, is that they can be quite computational intensive, especially for complex models or large data. The `epidist` package is built using `brms`, itself built on the Stan probabilistic programming language. @@ -50,6 +50,8 @@ By comparing the resulting inferences to those from NUTS, to hope to help you ma # Alternative approximate inference algorithms {#other} +Here we describe three alternative approximate Bayesian inference algorithms that are available to use in `epidist`. + ## Laplace method The Laplace method approximates a posterior distribution by a Gaussian distribution. @@ -96,17 +98,26 @@ obs_cens_trunc_samp <- obs_cens_trunc[sample(seq_len(.N), sample_size, replace = FALSE)] ``` -Fit the latent individual model using each approximate inference method: +Fit the latent individual model using HMC: ```{r results='hide'} data <- epidist_prepare(obs_cens_trunc_samp, model = "latent_individual") fit_hmc <- epidist(data = data, algorithm = "sampling") +``` + +Fit the latent individual model using each method in Section \@ref(other) and draw 4000 samples to match the four Markov chains of 1000 in HMC: + +```{r} fit_laplace <- epidist(data = data, algorithm = "laplace", draws = 4000) fit_advi <- epidist(data = data, algorithm = "meanfield", draws = 4000) fit_pathfinder <- epidist(data = data, algorithm = "pathfinder", draws = 4000) ``` +Pathfinder runs into the error "Error evaluating model log probability: Non-finite gradient." here. +Strange! +Find a fix. + Extract posterior distribution for the delay parameters: ```{r} @@ -139,19 +150,38 @@ draws_pathfinder <- draws_pathfinder |> filter(parameter %in% c("mean", "sd")) |> mutate(method = "pathfinder") -rbind(draws_hmc, draws_laplace, draws_advi) |> - filter(parameter == "mean") |> +fct_reorg <- function(fac, ...) { + forcats::fct_recode(forcats::fct_relevel(fac, ...), ...) +} + +df <- rbind(draws_hmc, draws_laplace, draws_advi) |> + mutate( + method = forcats::fct_recode(method, + "HMC" = "hmc", + "Laplace" = "laplace", + "ADVI" = "advi", + "Pathfinder" = "pathfinder") |> + forcats::fct_relevel("HMC", "Laplace", "ADVI", "Pathfinder"), + parameter = forcats::fct_recode(parameter, + "Mean" = "mean", + "SD" = "sd" + ) + ) + +filter(df, parameter == "Mean") |> ggplot(aes(x = value)) + - geom_histogram() + + geom_histogram(aes(y = ..density..)) + facet_grid(method ~ parameter) + - theme_minimal() + theme_minimal() + + labs(x = "", y = "") -rbind(draws_hmc, draws_laplace, draws_advi) |> - filter(parameter == "sd") |> +rbind(df, draws_hmc, draws_laplace, draws_advi) |> + filter(parameter == "SD") |> ggplot(aes(x = value)) + - geom_histogram() + + geom_histogram(aes(y= ..density..)) + facet_grid(method ~ parameter) + - theme_minimal() + theme_minimal() + + labs(x = "", y = "") ``` Compare the time taken: From 6d82cc6d29765a61415d537e2e9a7985b120cdc3 Mon Sep 17 00:00:00 2001 From: athowes Date: Fri, 14 Jun 2024 11:38:27 +0100 Subject: [PATCH 10/28] Text and code improvements for approximate inference vignette --- vignettes/approx-inference.Rmd | 121 +++++++++++++++------------------ vignettes/references.bib | 21 +++++- 2 files changed, 75 insertions(+), 67 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index c9271abd8..b5e9ea0a1 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -32,45 +32,49 @@ knitr::opts_chunk$set( # Background -The `epidist` package uses Bayesian inference to estimate delay distributions. -Doing Bayesian inference amounts to approximating the posterior distribution of each model parameter^[See a work-in-progress vignette about the model structure.]. +The `epidist` package uses Bayesian inference to estimate delay distributions and other quantities. +Doing Bayesian inference amounts to approximating the posterior distribution of each parameter in the statistical model^[See a work-in-progress vignette about the model structure.]. -By default, `epidist` uses the No-U-Turn Sampler (NUTS) Hamiltonian Monte Carlo (HMC) algorithm to approximate the posterior distribution. -NUTS works by simulating from a Markov chain which has the posterior distribution as its stationary distribution. -When NUTS is run for sufficiently many iterations, and has "reached convergence", the samples can be considered to be drawn from the posterior distribution, and used to compute relevant posterior quantities such as expectations. -A drawback of NUTS, and other Markov chain Monte Carlo (MCMC) methods, is that they can be quite computational intensive, especially for complex models or large data. +By default, `epidist` uses the No-U-Turn Sampler [NUTS; @hoffman2014no] Hamiltonian Monte Carlo (HMC) algorithm to approximate the posterior distribution. +NUTS is an example of a broader class of Markov chain Monte Carlo (MCMC) methods which work by simulating from a Markov chain with the targetted posterior distribution as stationary distribution. +When MCMC algorithms are run for sufficiently many iterations, and have "reached convergence", the samples can be treated as being being drawn from the posterior distribution and used to compute relevant posterior quantities such as expectations. +A drawback of MCMC methods like NUTS, is that they can be quite computational intensive, especially for complex models or large data. -The `epidist` package is built using `brms`, itself built on the Stan probabilistic programming language. -One benefit of this design, is that as other approximate Bayesian inference algorithms are implemented in Stan (and then `brms`), they automatically are available in `epidist`. -As such, if you are using `epidist` and having difficulties using the default NUTS algorithm, you may want to consider an alternative. +The `epidist` package is built using `brms` [@brms], which stands for "Bayesian Regression Models using Stan", where Stan [@carpenter2017stan] is a probabilistic programming language. +NUTS is not the only inference algorithm implemented in Stan. +By relying on `brms` (and Stan) these additional inference algorithms are also available in `epidist`. +As such, if you are using `epidist` and having difficulties (for example you may have a long runtime, or your chains are failing to converge) using the default NUTS algorithm, you may want to consider an alternative. -In this vignette, we will first briefly describe the alternative algorithms available in Section \@ref(other), as well as directing you to more detailed resources. -Then in Section \@ref(demo) we demonstrate their application to fitting simulated data, before extracting and comparing posterior distributions. -By comparing the resulting inferences to those from NUTS, to hope to help you make informed decisions about which algorithm to use in your problem. +In this vignette, we first briefly describe the alternative algorithms available (Section \@ref(other)) as well as directing you to more detailed resources. +Then (Section \@ref(demo)) we demonstrate their application to fitting simulated data, before extracting and comparing posterior distributions. +By comparing the resulting inferences to those from NUTS, to hope to help you make informed decisions about which algorithm to use in your applied problem. # Alternative approximate inference algorithms {#other} -Here we describe three alternative approximate Bayesian inference algorithms that are available to use in `epidist`. +Here we describe three alternative approximate Bayesian inference algorithms that are available to use in `brms` and therefore `epidist`. +It's worth noting that further inference algorithms may have become available since this vignette was last updated. +Please check `brms` package updates if interested! ## Laplace method The Laplace method approximates a posterior distribution by a Gaussian distribution. -In Stan, the Gaussian distribution is constructed on the unconstrained parameter space, and samples may then be transformed to the constrained parameter space. +In Stan, the Gaussian approximation is constructed on the unconstrained parameter space as the domain of a Gaussian distribution is the real line. +Samples from the Gaussian approximation may then be transformed to the constrained parameter space. See the section [Laplace Sampling](https://mc-stan.org/docs/cmdstan-guide/laplace_sample_config.html) of the `CmdStan` User's Guide for more information. ## Variational inference using ADVI -Automatic differentiation variational inference [ADVI; @kucukelbir2017advi] is... +Automatic differentiation variational inference [ADVI; @kucukelbir2017advi] is a variety of variational inference algorithm. See the section [Variational Inference using ADVI](https://mc-stan.org/docs/cmdstan-guide/variational_config.html) of the `CmdStan` User's Guide for more information. ## Pathfinder -Pathfinder is a variational inference method more recently developed by @zhang2022pathfinder. +Pathfinder is another variational inference method, more recently developed by @zhang2022pathfinder. See the section [Pathfinder Method for Approximate Bayesian Inference](https://mc-stan.org/docs/cmdstan-guide/pathfinder_config.html) of the `CmdStan` User's Guide for more information. # Demonstration {#demo} -This demonstration requires the following packages: +In this demonstration, we use the following packages: ```{r load-requirements} library(epidist) @@ -78,7 +82,8 @@ library(ggplot2) library(dplyr) ``` -Simulate data: +First, we begin by simulating data. +The example data simulation process follows that used in the [Getting started with epidist](https://epidist.epinowcast.org/articles/epidist.html#data) vignette: ```{r} meanlog <- 1.8 @@ -86,7 +91,7 @@ sdlog <- 0.5 obs_time <- 25 sample_size <- 200 -obs_cens_trunc <- simulate_gillespie() |> +obs_cens_trunc <- simulate_gillespie(seed = 101) |> simulate_secondary( meanlog = meanlog, sdlog = sdlog @@ -98,7 +103,7 @@ obs_cens_trunc_samp <- obs_cens_trunc[sample(seq_len(.N), sample_size, replace = FALSE)] ``` -Fit the latent individual model using HMC: +We now prepare the data for fitting with the latent individual model, and perform inference with HMC: ```{r results='hide'} data <- epidist_prepare(obs_cens_trunc_samp, model = "latent_individual") @@ -106,7 +111,10 @@ data <- epidist_prepare(obs_cens_trunc_samp, model = "latent_individual") fit_hmc <- epidist(data = data, algorithm = "sampling") ``` -Fit the latent individual model using each method in Section \@ref(other) and draw 4000 samples to match the four Markov chains of 1000 in HMC: +Note that for clarity above we specify `algorithm = "sampling"`, but if you were to call `epidist(data = data)` the result would be the same since `"sampling"` (i.e. HMC) is the default value for the `algorithm` argument. + +Now, we fit the same latent individual model using each method in Section \@ref(other). +To match the four Markov chains of length 1000 in HMC above, we then draw 4000 samples from each approximate posterior: ```{r} fit_laplace <- epidist(data = data, algorithm = "laplace", draws = 4000) @@ -114,11 +122,11 @@ fit_advi <- epidist(data = data, algorithm = "meanfield", draws = 4000) fit_pathfinder <- epidist(data = data, algorithm = "pathfinder", draws = 4000) ``` -Pathfinder runs into the error "Error evaluating model log probability: Non-finite gradient." here. -Strange! -Find a fix. +Although both the Laplace and ADVI methods ran without problem, the Pathfinder algorithm produced errors of the form: + +> "Error evaluating model log probability: Non-finite gradient." -Extract posterior distribution for the delay parameters: +We now extract posterior distribution for the delay parameters from the fitted model for each inference method: ```{r} draws_hmc <- extract_lognormal_draws(fit_hmc) @@ -130,53 +138,34 @@ draws_pathfinder <- extract_lognormal_draws(fit_pathfinder) Compare with a figure or table or both: ```{r} -draws_hmc <- draws_hmc |> - draws_to_long() |> - filter(parameter %in% c("mean", "sd")) |> - mutate(method = "hmc") - -draws_laplace <- draws_laplace |> - draws_to_long() |> - filter(parameter %in% c("mean", "sd")) |> - mutate(method = "laplace") - -draws_advi <- draws_advi |> - draws_to_long() |> - filter(parameter %in% c("mean", "sd")) |> - mutate(method = "advi") - -draws_pathfinder <- draws_pathfinder |> - draws_to_long() |> - filter(parameter %in% c("mean", "sd")) |> - mutate(method = "pathfinder") - -fct_reorg <- function(fac, ...) { - forcats::fct_recode(forcats::fct_relevel(fac, ...), ...) +process_draws <- function(draws, name) { + draws |> + draws_to_long() |> + filter(parameter %in% c("mean", "sd")) |> + mutate( + parameter = recode(parameter, "mean" = "Mean", "sd" = "SD"), + method = name + ) } -df <- rbind(draws_hmc, draws_laplace, draws_advi) |> - mutate( - method = forcats::fct_recode(method, - "HMC" = "hmc", - "Laplace" = "laplace", - "ADVI" = "advi", - "Pathfinder" = "pathfinder") |> - forcats::fct_relevel("HMC", "Laplace", "ADVI", "Pathfinder"), - parameter = forcats::fct_recode(parameter, - "Mean" = "mean", - "SD" = "sd" - ) - ) - -filter(df, parameter == "Mean") |> +draws_hmc <- process_draws(draws_hmc, "HMC") +draws_laplace <- process_draws(draws_laplace, "Laplace") +draws_advi <- process_draws(draws_advi, "ADVI") +draws_pathfinder <- process_draws(draws_pathfinder, "Pathfinder") + +df <- rbind(draws_hmc, draws_laplace, draws_advi, draws_pathfinder) |> + mutate(method = forcats::as_factor(method)) + +df |> + filter(parameter == "Mean", method != "Pathfinder") |> ggplot(aes(x = value)) + geom_histogram(aes(y = ..density..)) + facet_grid(method ~ parameter) + theme_minimal() + labs(x = "", y = "") -rbind(df, draws_hmc, draws_laplace, draws_advi) |> - filter(parameter == "SD") |> +df |> + filter(parameter == "SD", method != "Pathfinder") |> ggplot(aes(x = value)) + geom_histogram(aes(y= ..density..)) + facet_grid(method ~ parameter) + @@ -184,11 +173,11 @@ rbind(df, draws_hmc, draws_laplace, draws_advi) |> labs(x = "", y = "") ``` -Compare the time taken: +How long did each of the methods take? ```{r} rstan::get_elapsed_time(fit_hmc$fit) -# Remainst to extract from the others +# Remains to find way to do this with other methods ``` ## Bibliography {-} diff --git a/vignettes/references.bib b/vignettes/references.bib index a4caa6b8b..89d6e295b 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -52,4 +52,23 @@ @article{zhang2022pathfinder number = {306}, pages = {1--49}, url = {http://jmlr.org/papers/v23/21-0889.html} -} \ No newline at end of file +} + +@article{hoffman2014no, + title={The No-U-Turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo.}, + author={Hoffman, Matthew D and Gelman, Andrew and others}, + journal={J. Mach. Learn. Res.}, + volume={15}, + number={1}, + pages={1593--1623}, + year={2014} +} + +@article{carpenter2017stan, + title={Stan: A probabilistic programming language}, + author={Carpenter, Bob and Gelman, Andrew and Hoffman, Matthew D and Lee, Daniel and Goodrich, Ben and Betancourt, Michael and Brubaker, Marcus A and Guo, Jiqiang and Li, Peter and Riddell, Allen}, + journal={Journal of statistical software}, + volume={76}, + year={2017}, + publisher={NIH Public Access} +} From c8af32ebe60fe0c9baebf7ed617e55ea80cf0dea Mon Sep 17 00:00:00 2001 From: athowes Date: Tue, 18 Jun 2024 14:59:50 +0100 Subject: [PATCH 11/28] Sam comment --- vignettes/approx-inference.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index b5e9ea0a1..9c66668f9 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -74,7 +74,7 @@ See the section [Pathfinder Method for Approximate Bayesian Inference](https://m # Demonstration {#demo} -In this demonstration, we use the following packages: +In this demonstration, we use `epidist` alongside the following packages: ```{r load-requirements} library(epidist) From 4ea1009fc2e7f1b4bdc713a0be185655482b15d2 Mon Sep 17 00:00:00 2001 From: athowes Date: Tue, 25 Jun 2024 14:03:23 +0100 Subject: [PATCH 12/28] Better describe ADVI --- vignettes/approx-inference.Rmd | 13 +++++++++---- vignettes/references.bib | 12 ++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 9c66668f9..f55295b58 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -36,9 +36,10 @@ The `epidist` package uses Bayesian inference to estimate delay distributions an Doing Bayesian inference amounts to approximating the posterior distribution of each parameter in the statistical model^[See a work-in-progress vignette about the model structure.]. By default, `epidist` uses the No-U-Turn Sampler [NUTS; @hoffman2014no] Hamiltonian Monte Carlo (HMC) algorithm to approximate the posterior distribution. -NUTS is an example of a broader class of Markov chain Monte Carlo (MCMC) methods which work by simulating from a Markov chain with the targetted posterior distribution as stationary distribution. +NUTS is an example of a broader class of Markov chain Monte Carlo (MCMC) methods. +These methods work by simulating from a Markov chain with the intended posterior distribution as its stationary distribution. When MCMC algorithms are run for sufficiently many iterations, and have "reached convergence", the samples can be treated as being being drawn from the posterior distribution and used to compute relevant posterior quantities such as expectations. -A drawback of MCMC methods like NUTS, is that they can be quite computational intensive, especially for complex models or large data. +A drawback of MCMC methods like NUTS, is that these simulations can be quite computational intensive, especially for complex models or large data. The `epidist` package is built using `brms` [@brms], which stands for "Bayesian Regression Models using Stan", where Stan [@carpenter2017stan] is a probabilistic programming language. NUTS is not the only inference algorithm implemented in Stan. @@ -51,7 +52,7 @@ By comparing the resulting inferences to those from NUTS, to hope to help you ma # Alternative approximate inference algorithms {#other} -Here we describe three alternative approximate Bayesian inference algorithms that are available to use in `brms` and therefore `epidist`. +Here we describe three alternative approximate Bayesian inference algorithms that are available to use in `brms`, and therefore also available in `epidist`. It's worth noting that further inference algorithms may have become available since this vignette was last updated. Please check `brms` package updates if interested! @@ -60,11 +61,15 @@ Please check `brms` package updates if interested! The Laplace method approximates a posterior distribution by a Gaussian distribution. In Stan, the Gaussian approximation is constructed on the unconstrained parameter space as the domain of a Gaussian distribution is the real line. Samples from the Gaussian approximation may then be transformed to the constrained parameter space. +To access the Laplace method, specify `algorithm = "laplace"` within `brms::brm`. See the section [Laplace Sampling](https://mc-stan.org/docs/cmdstan-guide/laplace_sample_config.html) of the `CmdStan` User's Guide for more information. ## Variational inference using ADVI -Automatic differentiation variational inference [ADVI; @kucukelbir2017advi] is a variety of variational inference algorithm. +Automatic differentiation variational inference [ADVI; @kucukelbir2017advi] is a type of variational inference [VI; @blei2017variational] algorithm. +VI works by restricting to a family of distributions, and then selecting the member of that family which is the most similar to the posterior distribution. +Most commonly, and in Stan, (dis-)similarity is measured using the Kullback–Leibler divergence. +There are two options for the family of distributions, either a fully factorised Gaussian with `algorithm = "meanfield"` or a Gaussian with a full-rank covariance matrix with `algorithm = "fullrank"`. See the section [Variational Inference using ADVI](https://mc-stan.org/docs/cmdstan-guide/variational_config.html) of the `CmdStan` User's Guide for more information. ## Pathfinder diff --git a/vignettes/references.bib b/vignettes/references.bib index 89d6e295b..82f971d71 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -72,3 +72,15 @@ @article{carpenter2017stan year={2017}, publisher={NIH Public Access} } + +@article{blei2017variational, + title={Variational inference: A review for statisticians}, + author={Blei, David M and Kucukelbir, Alp and McAuliffe, Jon D}, + journal={Journal of the American statistical Association}, + volume={112}, + number={518}, + pages={859--877}, + year={2017}, + publisher={Taylor \& Francis} +} + From 305160674cf81c3deb52f42a5ba54544d1e8ee85 Mon Sep 17 00:00:00 2001 From: athowes Date: Tue, 25 Jun 2024 14:36:20 +0100 Subject: [PATCH 13/28] Some writing, layout, and use of purrr --- vignettes/approx-inference.Rmd | 61 ++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index f55295b58..928aaa674 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -79,7 +79,7 @@ See the section [Pathfinder Method for Approximate Bayesian Inference](https://m # Demonstration {#demo} -In this demonstration, we use `epidist` alongside the following packages: +In this demonstration, we use `epidist` alongside `ggplot2` and `dplyr`: ```{r load-requirements} library(epidist) @@ -88,7 +88,7 @@ library(dplyr) ``` First, we begin by simulating data. -The example data simulation process follows that used in the [Getting started with epidist](https://epidist.epinowcast.org/articles/epidist.html#data) vignette: +The example data simulation process follows that used in the [Getting started with epidist](https://epidist.epinowcast.org/articles/epidist.html#data) vignette, so we will not detail exactly what is happening here, but please consult that vignette if interested: ```{r} meanlog <- 1.8 @@ -118,10 +118,10 @@ fit_hmc <- epidist(data = data, algorithm = "sampling") Note that for clarity above we specify `algorithm = "sampling"`, but if you were to call `epidist(data = data)` the result would be the same since `"sampling"` (i.e. HMC) is the default value for the `algorithm` argument. -Now, we fit the same latent individual model using each method in Section \@ref(other). -To match the four Markov chains of length 1000 in HMC above, we then draw 4000 samples from each approximate posterior: +Now, we fit^[Note that in this section, and above for the MCMC, the output of the call is hidden, but if you were to call these functions yourself they would display information about the fitting procedure as it occurs] the same latent individual model using each method in Section \@ref(other). +To match the four Markov chains of length 1000 in HMC above, we then draw 4000 samples from each approximate posterior. -```{r} +```{r results='hide'} fit_laplace <- epidist(data = data, algorithm = "laplace", draws = 4000) fit_advi <- epidist(data = data, algorithm = "meanfield", draws = 4000) fit_pathfinder <- epidist(data = data, algorithm = "pathfinder", draws = 4000) @@ -131,37 +131,32 @@ Although both the Laplace and ADVI methods ran without problem, the Pathfinder a > "Error evaluating model log probability: Non-finite gradient." -We now extract posterior distribution for the delay parameters from the fitted model for each inference method: +We now extract posterior distribution for the delay parameters from the fitted model for each inference method. +Thankfully, as each algorithm is implemented to sample ```{r} -draws_hmc <- extract_lognormal_draws(fit_hmc) -draws_laplace <- extract_lognormal_draws(fit_laplace) -draws_advi <- extract_lognormal_draws(fit_advi) -draws_pathfinder <- extract_lognormal_draws(fit_pathfinder) -``` - -Compare with a figure or table or both: +fits <- list( + "HMC" = fit_hmc, + "Laplace" = fit_laplace, + "ADVI" = fit_advi, + "Pathfinder" = fit_pathfinder +) -```{r} -process_draws <- function(draws, name) { - draws |> +draws <- purrr::map2(fits, names(fits), function(fit, name) { + extract_lognormal_draws(fit) |> draws_to_long() |> filter(parameter %in% c("mean", "sd")) |> - mutate( - parameter = recode(parameter, "mean" = "Mean", "sd" = "SD"), - method = name - ) -} + mutate(parameter = recode(parameter, "mean" = "Mean", "sd" = "SD"), + method = as.factor(name)) +}) -draws_hmc <- process_draws(draws_hmc, "HMC") -draws_laplace <- process_draws(draws_laplace, "Laplace") -draws_advi <- process_draws(draws_advi, "ADVI") -draws_pathfinder <- process_draws(draws_pathfinder, "Pathfinder") +draws <- bind_rows(draws) +``` -df <- rbind(draws_hmc, draws_laplace, draws_advi, draws_pathfinder) |> - mutate(method = forcats::as_factor(method)) +## Comparison of parameter posterior distributions -df |> +```{r} +draws |> filter(parameter == "Mean", method != "Pathfinder") |> ggplot(aes(x = value)) + geom_histogram(aes(y = ..density..)) + @@ -169,7 +164,7 @@ df |> theme_minimal() + labs(x = "", y = "") -df |> +draws |> filter(parameter == "SD", method != "Pathfinder") |> ggplot(aes(x = value)) + geom_histogram(aes(y= ..density..)) + @@ -178,6 +173,10 @@ df |> labs(x = "", y = "") ``` +## Comparison resulting delay distributions + +## Comparison of time taken + How long did each of the methods take? ```{r} @@ -185,4 +184,8 @@ rstan::get_elapsed_time(fit_hmc$fit) # Remains to find way to do this with other methods ``` +# Concluion + +Remains to write! + ## Bibliography {-} From ef531cc5e1e861a63cd34015999bbf865920cc04 Mon Sep 17 00:00:00 2001 From: athowes Date: Tue, 25 Jun 2024 14:57:55 +0100 Subject: [PATCH 14/28] Add plot of delay distributions --- vignettes/approx-inference.Rmd | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 928aaa674..00e3bf8f2 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -145,9 +145,8 @@ fits <- list( draws <- purrr::map2(fits, names(fits), function(fit, name) { extract_lognormal_draws(fit) |> draws_to_long() |> - filter(parameter %in% c("mean", "sd")) |> - mutate(parameter = recode(parameter, "mean" = "Mean", "sd" = "SD"), - method = as.factor(name)) + filter(parameter %in% c("meanlog", "sdlog")) |> + mutate(method = as.factor(name)) }) draws <- bind_rows(draws) @@ -157,7 +156,7 @@ draws <- bind_rows(draws) ```{r} draws |> - filter(parameter == "Mean", method != "Pathfinder") |> + filter(parameter == "meanlog", method != "Pathfinder") |> ggplot(aes(x = value)) + geom_histogram(aes(y = ..density..)) + facet_grid(method ~ parameter) + @@ -165,9 +164,9 @@ draws |> labs(x = "", y = "") draws |> - filter(parameter == "SD", method != "Pathfinder") |> + filter(parameter == "sdlog", method != "Pathfinder") |> ggplot(aes(x = value)) + - geom_histogram(aes(y= ..density..)) + + geom_histogram(aes(y = ..density..)) + facet_grid(method ~ parameter) + theme_minimal() + labs(x = "", y = "") @@ -175,6 +174,24 @@ draws |> ## Comparison resulting delay distributions +```{r} +pars <- draws |> + group_by(method, parameter) |> + summarise(value = mean(value)) |> + tidyr::pivot_wider(names_from = parameter, values_from = value) |> + filter(!method == "Pathfinder") + +purrr::pmap_df( + pars, ~ tibble( + x = seq(0, 25, by = 0.1), + method = ..1, density = dlnorm(x, ..2, ..3)) + ) |> + ggplot(aes(x = x, y = density, col = method)) + + geom_line() + + theme_minimal() + + labs(x = "", y = "", col = "Method") +``` + ## Comparison of time taken How long did each of the methods take? From 12bea732bbdebf629aeaf0997ae0f3498cafd480 Mon Sep 17 00:00:00 2001 From: athowes Date: Tue, 25 Jun 2024 15:18:55 +0100 Subject: [PATCH 15/28] Add time taken manually (looks like no inbuilt Stan functions for this) --- vignettes/approx-inference.Rmd | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 00e3bf8f2..1a31b1635 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -113,7 +113,9 @@ We now prepare the data for fitting with the latent individual model, and perfor ```{r results='hide'} data <- epidist_prepare(obs_cens_trunc_samp, model = "latent_individual") +t <- proc.time() fit_hmc <- epidist(data = data, algorithm = "sampling") +time_hmc <- proc.time() - t ``` Note that for clarity above we specify `algorithm = "sampling"`, but if you were to call `epidist(data = data)` the result would be the same since `"sampling"` (i.e. HMC) is the default value for the `algorithm` argument. @@ -122,9 +124,18 @@ Now, we fit^[Note that in this section, and above for the MCMC, the output of th To match the four Markov chains of length 1000 in HMC above, we then draw 4000 samples from each approximate posterior. ```{r results='hide'} +t <- proc.time() fit_laplace <- epidist(data = data, algorithm = "laplace", draws = 4000) +time_laplace <- proc.time() - t + + +t <- proc.time() fit_advi <- epidist(data = data, algorithm = "meanfield", draws = 4000) +time_advi <- proc.time() - t + +t <- proc.time() fit_pathfinder <- epidist(data = data, algorithm = "pathfinder", draws = 4000) +time_pathfinder <- proc.time() - t ``` Although both the Laplace and ADVI methods ran without problem, the Pathfinder algorithm produced errors of the form: @@ -197,8 +208,10 @@ purrr::pmap_df( How long did each of the methods take? ```{r} -rstan::get_elapsed_time(fit_hmc$fit) -# Remains to find way to do this with other methods +time_hmc +time_laplace +time_advi +time_pathfinder ``` # Concluion From bd124f684fad51150d3d1e71982b98a99901ba33 Mon Sep 17 00:00:00 2001 From: athowes Date: Thu, 27 Jun 2024 23:00:26 +0100 Subject: [PATCH 16/28] What is Pathfinder (basic) --- vignettes/approx-inference.Rmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 1a31b1635..15f8b303b 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -74,7 +74,8 @@ See the section [Variational Inference using ADVI](https://mc-stan.org/docs/cmds ## Pathfinder -Pathfinder is another variational inference method, more recently developed by @zhang2022pathfinder. +Pathfinder is method closely related to variational inference, more recently developed by @zhang2022pathfinder. +It works by generating Gaussian approximations along each step of an iterative optimisation algorithm (such as L-BFGS). See the section [Pathfinder Method for Approximate Bayesian Inference](https://mc-stan.org/docs/cmdstan-guide/pathfinder_config.html) of the `CmdStan` User's Guide for more information. # Demonstration {#demo} From 8935e94b612d684ed75184897dde9af870e6bc80 Mon Sep 17 00:00:00 2001 From: athowes Date: Fri, 28 Jun 2024 15:14:23 +0100 Subject: [PATCH 17/28] Standardise colours across figures --- vignettes/approx-inference.Rmd | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 15f8b303b..41a228e27 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -169,18 +169,22 @@ draws <- bind_rows(draws) ```{r} draws |> filter(parameter == "meanlog", method != "Pathfinder") |> - ggplot(aes(x = value)) + + ggplot(aes(x = value, fill = method)) + geom_histogram(aes(y = ..density..)) + + scale_fill_manual(values = c("#56B4E9", "#009E73", "#E69F00")) + facet_grid(method ~ parameter) + theme_minimal() + + guides(fill = "none") + labs(x = "", y = "") draws |> filter(parameter == "sdlog", method != "Pathfinder") |> - ggplot(aes(x = value)) + + ggplot(aes(x = value, fill = method)) + geom_histogram(aes(y = ..density..)) + + scale_fill_manual(values = c("#56B4E9", "#009E73", "#E69F00")) + facet_grid(method ~ parameter) + theme_minimal() + + guides(fill = "none") + labs(x = "", y = "") ``` @@ -200,6 +204,7 @@ purrr::pmap_df( ) |> ggplot(aes(x = x, y = density, col = method)) + geom_line() + + scale_color_manual(values = c("#56B4E9", "#009E73", "#E69F00")) + theme_minimal() + labs(x = "", y = "", col = "Method") ``` From b308d959f3a195306a9cdd329352ea6dd0027a26 Mon Sep 17 00:00:00 2001 From: athowes Date: Fri, 28 Jun 2024 16:24:09 +0100 Subject: [PATCH 18/28] Clarify the stochastic behaviour of Pathfinder, set a seed that works, use single path --- vignettes/approx-inference.Rmd | 39 ++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 41a228e27..e70281d3e 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -1,5 +1,5 @@ --- -title: "Approximate Bayesian inference in `epidist`" +title: "Approximate Bayesian inference in epidist" description: "..." output: bookdown::html_document2: @@ -18,7 +18,9 @@ bibliography: references.bib --- ```{r setup, include=FALSE} -# exclude compile warnings from cmdstanr +# With set.seed(1) then Pathfinder fails +set.seed(2) + knitr::opts_chunk$set( fig.path = "figures/epidist-", cache = TRUE, @@ -135,16 +137,16 @@ fit_advi <- epidist(data = data, algorithm = "meanfield", draws = 4000) time_advi <- proc.time() - t t <- proc.time() -fit_pathfinder <- epidist(data = data, algorithm = "pathfinder", draws = 4000) +fit_pathfinder <- epidist(data = data, algorithm = "pathfinder", draws = 4000, num_paths = 1) time_pathfinder <- proc.time() - t ``` -Although both the Laplace and ADVI methods ran without problem, the Pathfinder algorithm produced errors of the form: +Although both the Laplace and ADVI methods ran without problem, the Pathfinder algorithm is liable to produce errors of the form: > "Error evaluating model log probability: Non-finite gradient." We now extract posterior distribution for the delay parameters from the fitted model for each inference method. -Thankfully, as each algorithm is implemented to sample +Thankfully, as each algorithm is implemented to sample draws from the posterior then post-processing is simple ```{r} fits <- list( @@ -162,26 +164,33 @@ draws <- purrr::map2(fits, names(fits), function(fit, name) { }) draws <- bind_rows(draws) + +pars <- draws |> + group_by(method, parameter) |> + summarise(value = mean(value)) |> + tidyr::pivot_wider(names_from = parameter, values_from = value) + +pars ``` ## Comparison of parameter posterior distributions ```{r} draws |> - filter(parameter == "meanlog", method != "Pathfinder") |> + filter(parameter == "meanlog") |> ggplot(aes(x = value, fill = method)) + geom_histogram(aes(y = ..density..)) + - scale_fill_manual(values = c("#56B4E9", "#009E73", "#E69F00")) + + scale_fill_manual(values = c("#56B4E9", "#009E73", "#E69F00", "#CC79A7")) + facet_grid(method ~ parameter) + theme_minimal() + guides(fill = "none") + labs(x = "", y = "") draws |> - filter(parameter == "sdlog", method != "Pathfinder") |> + filter(parameter == "sdlog") |> ggplot(aes(x = value, fill = method)) + geom_histogram(aes(y = ..density..)) + - scale_fill_manual(values = c("#56B4E9", "#009E73", "#E69F00")) + + scale_fill_manual(values = c("#56B4E9", "#009E73", "#E69F00", "#CC79A7")) + facet_grid(method ~ parameter) + theme_minimal() + guides(fill = "none") + @@ -191,20 +200,14 @@ draws |> ## Comparison resulting delay distributions ```{r} -pars <- draws |> - group_by(method, parameter) |> - summarise(value = mean(value)) |> - tidyr::pivot_wider(names_from = parameter, values_from = value) |> - filter(!method == "Pathfinder") - purrr::pmap_df( - pars, ~ tibble( + filter(pars), ~ tibble( x = seq(0, 25, by = 0.1), method = ..1, density = dlnorm(x, ..2, ..3)) ) |> ggplot(aes(x = x, y = density, col = method)) + geom_line() + - scale_color_manual(values = c("#56B4E9", "#009E73", "#E69F00")) + + scale_color_manual(values = c("#56B4E9", "#009E73", "#E69F00", "#CC79A7")) + theme_minimal() + labs(x = "", y = "", col = "Method") ``` @@ -220,7 +223,7 @@ time_advi time_pathfinder ``` -# Concluion +# Conclusion Remains to write! From ea872b377d45298e93e6103392d7483fe6c337f5 Mon Sep 17 00:00:00 2001 From: athowes Date: Mon, 8 Jul 2024 10:20:31 +0100 Subject: [PATCH 19/28] Use imap as suggested by Damon --- vignettes/approx-inference.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index e70281d3e..296cf7e4a 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -156,7 +156,7 @@ fits <- list( "Pathfinder" = fit_pathfinder ) -draws <- purrr::map2(fits, names(fits), function(fit, name) { +draws <- purrr::imap(fits, function(fit, name) { extract_lognormal_draws(fit) |> draws_to_long() |> filter(parameter %in% c("meanlog", "sdlog")) |> From 9ccad892f3047840671c78907d30bf0e337d3600 Mon Sep 17 00:00:00 2001 From: athowes Date: Mon, 8 Jul 2024 11:36:37 +0100 Subject: [PATCH 20/28] Finish first draft of approximate inference vignette with conclusions, and pass lintr --- vignettes/approx-inference.Rmd | 97 +++++++++++++++++++++++----------- vignettes/references.bib | 7 +++ 2 files changed, 73 insertions(+), 31 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 296cf7e4a..37c73c838 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -18,8 +18,7 @@ bibliography: references.bib --- ```{r setup, include=FALSE} -# With set.seed(1) then Pathfinder fails -set.seed(2) +set.seed(1) knitr::opts_chunk$set( fig.path = "figures/epidist-", @@ -35,18 +34,19 @@ knitr::opts_chunk$set( # Background The `epidist` package uses Bayesian inference to estimate delay distributions and other quantities. -Doing Bayesian inference amounts to approximating the posterior distribution of each parameter in the statistical model^[See a work-in-progress vignette about the model structure.]. +Doing Bayesian inference amounts to approximating the posterior distribution of each parameter in the statistical model^[We are currently developing a vignette explaining the statistical model in more detail!]. +A range of methods exist to perform this approximation. -By default, `epidist` uses the No-U-Turn Sampler [NUTS; @hoffman2014no] Hamiltonian Monte Carlo (HMC) algorithm to approximate the posterior distribution. +By default, `epidist` uses the No-U-Turn Sampler [NUTS; @hoffman2014no] Hamiltonian Monte Carlo (HMC) algorithm. NUTS is an example of a broader class of Markov chain Monte Carlo (MCMC) methods. -These methods work by simulating from a Markov chain with the intended posterior distribution as its stationary distribution. -When MCMC algorithms are run for sufficiently many iterations, and have "reached convergence", the samples can be treated as being being drawn from the posterior distribution and used to compute relevant posterior quantities such as expectations. -A drawback of MCMC methods like NUTS, is that these simulations can be quite computational intensive, especially for complex models or large data. +These methods work by simulating from a Markov chain with the target posterior distribution as its stationary distribution. +When MCMC algorithms are run for sufficiently many iterations, and have reached convergence, the samples can be treated as being being drawn from the posterior distribution. +Relevant posterior quantities such as expectations may then be computed using these samples. +A drawback of MCMC methods, like NUTS, is that simulations can be quite computational intensive, especially for complex models or large data. The `epidist` package is built using `brms` [@brms], which stands for "Bayesian Regression Models using Stan", where Stan [@carpenter2017stan] is a probabilistic programming language. -NUTS is not the only inference algorithm implemented in Stan. -By relying on `brms` (and Stan) these additional inference algorithms are also available in `epidist`. -As such, if you are using `epidist` and having difficulties (for example you may have a long runtime, or your chains are failing to converge) using the default NUTS algorithm, you may want to consider an alternative. +Although NUTS the the primary inference algorithm used in Stan, additional options are available. +These additional inference algorithms are also available in `epidist` due to its dependence on `brms`. In this vignette, we first briefly describe the alternative algorithms available (Section \@ref(other)) as well as directing you to more detailed resources. Then (Section \@ref(demo)) we demonstrate their application to fitting simulated data, before extracting and comparing posterior distributions. @@ -60,8 +60,8 @@ Please check `brms` package updates if interested! ## Laplace method -The Laplace method approximates a posterior distribution by a Gaussian distribution. -In Stan, the Gaussian approximation is constructed on the unconstrained parameter space as the domain of a Gaussian distribution is the real line. +The Laplace method approximates a posterior distribution by a Gaussian distribution centered (by default) at the posterior mode. +In Stan, the Gaussian approximation is constructed on the unconstrained parameter space (as the domain of a Gaussian distribution is the real line). Samples from the Gaussian approximation may then be transformed to the constrained parameter space. To access the Laplace method, specify `algorithm = "laplace"` within `brms::brm`. See the section [Laplace Sampling](https://mc-stan.org/docs/cmdstan-guide/laplace_sample_config.html) of the `CmdStan` User's Guide for more information. @@ -70,7 +70,7 @@ See the section [Laplace Sampling](https://mc-stan.org/docs/cmdstan-guide/laplac Automatic differentiation variational inference [ADVI; @kucukelbir2017advi] is a type of variational inference [VI; @blei2017variational] algorithm. VI works by restricting to a family of distributions, and then selecting the member of that family which is the most similar to the posterior distribution. -Most commonly, and in Stan, (dis-)similarity is measured using the Kullback–Leibler divergence. +Most commonly, and in Stan, (dis-)similarity is measured using the Kullback–Leibler (KL) divergence. There are two options for the family of distributions, either a fully factorised Gaussian with `algorithm = "meanfield"` or a Gaussian with a full-rank covariance matrix with `algorithm = "fullrank"`. See the section [Variational Inference using ADVI](https://mc-stan.org/docs/cmdstan-guide/variational_config.html) of the `CmdStan` User's Guide for more information. @@ -78,6 +78,10 @@ See the section [Variational Inference using ADVI](https://mc-stan.org/docs/cmds Pathfinder is method closely related to variational inference, more recently developed by @zhang2022pathfinder. It works by generating Gaussian approximations along each step of an iterative optimisation algorithm (such as L-BFGS). +The KL divergence from each approximation to the posterior is measured, with the best approximation chosen. +Pareto smoothed importance sampling [PSIS; @vehtari2015pareto] is optionally used to resample draws from the chosen Gaussian distribution. +When multiple paths are specified (using `num_paths`) then the Pathfinder algorithm is run multiple times, initialising the optimisation at different points. +The resulting approximation is a mixture of Gaussian distributions, rather than a single Gaussian distribution. See the section [Pathfinder Method for Approximate Bayesian Inference](https://mc-stan.org/docs/cmdstan-guide/pathfinder_config.html) of the `CmdStan` User's Guide for more information. # Demonstration {#demo} @@ -131,22 +135,25 @@ t <- proc.time() fit_laplace <- epidist(data = data, algorithm = "laplace", draws = 4000) time_laplace <- proc.time() - t - t <- proc.time() fit_advi <- epidist(data = data, algorithm = "meanfield", draws = 4000) time_advi <- proc.time() - t +``` + +For the Pathfinder algorithm we will set `num_paths = 1`. +Although in testing both the Laplace and ADVI methods ran without problem in all cases, we found Pathfinder often produced "Error evaluating model log probability: Non-finite gradient.". +Although a `save_single_paths` option is available, which may have allowed recovery of individual Pathfinder paths (and therefore removing faulty paths), it does not appear to be working currently^[See https://github.com/stan-dev/cmdstanr/issues/878]. +```{r} t <- proc.time() -fit_pathfinder <- epidist(data = data, algorithm = "pathfinder", draws = 4000, num_paths = 1) +fit_pathfinder <- epidist( + data = data, algorithm = "pathfinder", draws = 4000, num_paths = 1 +) time_pathfinder <- proc.time() - t ``` -Although both the Laplace and ADVI methods ran without problem, the Pathfinder algorithm is liable to produce errors of the form: - -> "Error evaluating model log probability: Non-finite gradient." - We now extract posterior distribution for the delay parameters from the fitted model for each inference method. -Thankfully, as each algorithm is implemented to sample draws from the posterior then post-processing is simple +Thankfully, each algorithm is implemented to sample draws from the posterior distribution, and so post-processing is simple. ```{r} fits <- list( @@ -164,16 +171,24 @@ draws <- purrr::imap(fits, function(fit, name) { }) draws <- bind_rows(draws) +``` + +## Comparison of parameter posterior distributions +The mean estimated value of each parameter, from each method is as follows. + +```{r} pars <- draws |> group_by(method, parameter) |> summarise(value = mean(value)) |> tidyr::pivot_wider(names_from = parameter, values_from = value) -pars +pars |> + ungroup() |> + gt::gt() ``` -## Comparison of parameter posterior distributions +More comprehensively, the estimated posterior distributions are as follows. ```{r} draws |> @@ -185,7 +200,9 @@ draws |> theme_minimal() + guides(fill = "none") + labs(x = "", y = "") +``` +```{r} draws |> filter(parameter == "sdlog") |> ggplot(aes(x = value, fill = method)) + @@ -199,32 +216,50 @@ draws |> ## Comparison resulting delay distributions +How do these different distributions on the `meanlog` and `sdlog` parameters effect the estimated delay distribution? + ```{r} purrr::pmap_df( filter(pars), ~ tibble( x = seq(0, 25, by = 0.1), - method = ..1, density = dlnorm(x, ..2, ..3)) - ) |> + method = ..1, density = dlnorm(x, ..2, ..3) + ) +) |> ggplot(aes(x = x, y = density, col = method)) + geom_line() + - scale_color_manual(values = c("#56B4E9", "#009E73", "#E69F00", "#CC79A7")) + + scale_color_manual(values = c("#56B4E9", "#009E73", "#E69F00", "#CC79A7")) + theme_minimal() + labs(x = "", y = "", col = "Method") ``` ## Comparison of time taken -How long did each of the methods take? +In this example, HMC took much longer than the other methods, with Pathfinder being the fastest method. +That said, even for HMC the computation time in this case is unlikely to be prohibitive. ```{r} -time_hmc -time_laplace -time_advi -time_pathfinder +times <- list( + "HMC" = time_hmc, + "Laplace" = time_laplace, + "ADVI" = time_advi, + "Pathfinder" = time_pathfinder +) + +purrr::imap(times, function(time, name) { + data.frame( + method = name, + time = time[["elapsed"]] + ) +}) |> + bind_rows() |> + gt::gt() ``` # Conclusion -Remains to write! +The range of alternative approximation algorithms available, and their ease of use, is an attractive feature of `brms`. +We found that these algorithms do produce reasonable approximations in far less time than HMC. +Of course, this vignette only includes one example, and a more thorough investigation would be required to make specific recommendations. +That said, currently we do not recommend use of the Pathfinder algorithm due to its unstable performance in our testing, and early stage software implementation. ## Bibliography {-} diff --git a/vignettes/references.bib b/vignettes/references.bib index 82f971d71..082bed95a 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -84,3 +84,10 @@ @article{blei2017variational publisher={Taylor \& Francis} } +@article{vehtari2015pareto, + title={Pareto smoothed importance sampling}, + author={Vehtari, Aki and Simpson, Daniel and Gelman, Andrew and Yao, Yuling and Gabry, Jonah}, + journal={arXiv preprint arXiv:1507.02646}, + year={2015} +} + From 3b2f78b79eb15c6a027df88a70d31ddf69867091 Mon Sep 17 00:00:00 2001 From: athowes Date: Tue, 9 Jul 2024 15:01:08 +0100 Subject: [PATCH 21/28] Try adding back rstan to see if it fixes CI --- DESCRIPTION | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index bfd80f933..ac285e398 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,7 @@ License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE, roclets = c("collate", "namespace", "rd", "roxyglobals::global_roclet")) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Imports: brms, cmdstanr, @@ -33,7 +33,8 @@ Imports: ggridges, here, stats, - cli + cli, + rstan Suggests: bookdown, epinowcast, From 12c14656ef4fdf02c39f83b0c19deba1d70aa138 Mon Sep 17 00:00:00 2001 From: athowes Date: Tue, 9 Jul 2024 16:10:50 +0100 Subject: [PATCH 22/28] Add description --- vignettes/approx-inference.Rmd | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 37c73c838..07e85f20d 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -1,6 +1,8 @@ --- title: "Approximate Bayesian inference in epidist" -description: "..." +description: | + "A demonstration of how to fit models in epidist using approximate Bayesian + inference methods such as the Laplace approximation, ADVI, and Pathfinder." output: bookdown::html_document2: fig_caption: yes @@ -18,7 +20,7 @@ bibliography: references.bib --- ```{r setup, include=FALSE} -set.seed(1) +set.seed(2) knitr::opts_chunk$set( fig.path = "figures/epidist-", From cfbe00b8513c4fe0a1ba4a39cc9ac183c5e3db4c Mon Sep 17 00:00:00 2001 From: athowes Date: Wed, 10 Jul 2024 10:02:24 +0100 Subject: [PATCH 23/28] Add GH remote for brms to get latest bug fix --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ac285e398..cec0e7dc5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -48,7 +48,8 @@ Suggests: Remotes: stan-dev/cmdstanr, Rdatatable/data.table, - epinowcast/epinowcast + epinowcast/epinowcast, + paul-buerkner/brms Config/Needs/website: r-lib/pkgdown, epinowcast/enwtheme From fe525c0854b971a9f3008a8fdcdc7109450fe11b Mon Sep 17 00:00:00 2001 From: athowes Date: Fri, 12 Jul 2024 10:30:54 +0100 Subject: [PATCH 24/28] Fix typos --- vignettes/approx-inference.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 07e85f20d..d2e13f9d3 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -78,7 +78,7 @@ See the section [Variational Inference using ADVI](https://mc-stan.org/docs/cmds ## Pathfinder -Pathfinder is method closely related to variational inference, more recently developed by @zhang2022pathfinder. +Pathfinder is a method closely related to variational inference, which has been more recently developed by @zhang2022pathfinder. It works by generating Gaussian approximations along each step of an iterative optimisation algorithm (such as L-BFGS). The KL divergence from each approximation to the posterior is measured, with the best approximation chosen. Pareto smoothed importance sampling [PSIS; @vehtari2015pareto] is optionally used to resample draws from the chosen Gaussian distribution. @@ -216,7 +216,7 @@ draws |> labs(x = "", y = "") ``` -## Comparison resulting delay distributions +## Comparison of resulting delay distributions How do these different distributions on the `meanlog` and `sdlog` parameters effect the estimated delay distribution? From df0178a8e431f3545405c106cfdb2c5be84eb2dd Mon Sep 17 00:00:00 2001 From: athowes Date: Fri, 12 Jul 2024 11:08:09 +0100 Subject: [PATCH 25/28] As of recent PR then prepare has been replaced --- vignettes/approx-inference.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index d2e13f9d3..611792553 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -120,7 +120,7 @@ obs_cens_trunc_samp <- We now prepare the data for fitting with the latent individual model, and perform inference with HMC: ```{r results='hide'} -data <- epidist_prepare(obs_cens_trunc_samp, model = "latent_individual") +data <- as_latent_individual(obs_cens_trunc_samp) t <- proc.time() fit_hmc <- epidist(data = data, algorithm = "sampling") From c9a1de5517ca0aaca6ddca9b34b2f35d77951a20 Mon Sep 17 00:00:00 2001 From: athowes Date: Sun, 14 Jul 2024 10:54:24 +0100 Subject: [PATCH 26/28] Add units to times --- vignettes/approx-inference.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 611792553..3dae3eb7f 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -249,8 +249,8 @@ times <- list( purrr::imap(times, function(time, name) { data.frame( - method = name, - time = time[["elapsed"]] + "method" = name, + "time (s)" = time[["elapsed"]] ) }) |> bind_rows() |> From 875772d6b1a2b86108d2534a6e4435d0389c4f9a Mon Sep 17 00:00:00 2001 From: athowes Date: Sun, 14 Jul 2024 10:58:43 +0100 Subject: [PATCH 27/28] Fix typo spotted by Katie --- vignettes/approx-inference.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 3dae3eb7f..324b8723a 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -52,7 +52,7 @@ These additional inference algorithms are also available in `epidist` due to its In this vignette, we first briefly describe the alternative algorithms available (Section \@ref(other)) as well as directing you to more detailed resources. Then (Section \@ref(demo)) we demonstrate their application to fitting simulated data, before extracting and comparing posterior distributions. -By comparing the resulting inferences to those from NUTS, to hope to help you make informed decisions about which algorithm to use in your applied problem. +By comparing the resulting inferences to those from NUTS, we hope to help you make informed decisions about which algorithm to use in your applied problem. # Alternative approximate inference algorithms {#other} From c28e6da92bbb7de11a7a9614728f3c83fe01aaf5 Mon Sep 17 00:00:00 2001 From: athowes Date: Mon, 15 Jul 2024 09:46:32 +0100 Subject: [PATCH 28/28] Fix typo spotted by Daniel --- vignettes/approx-inference.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/approx-inference.Rmd b/vignettes/approx-inference.Rmd index 324b8723a..2b4cb1bbc 100644 --- a/vignettes/approx-inference.Rmd +++ b/vignettes/approx-inference.Rmd @@ -42,7 +42,7 @@ A range of methods exist to perform this approximation. By default, `epidist` uses the No-U-Turn Sampler [NUTS; @hoffman2014no] Hamiltonian Monte Carlo (HMC) algorithm. NUTS is an example of a broader class of Markov chain Monte Carlo (MCMC) methods. These methods work by simulating from a Markov chain with the target posterior distribution as its stationary distribution. -When MCMC algorithms are run for sufficiently many iterations, and have reached convergence, the samples can be treated as being being drawn from the posterior distribution. +When MCMC algorithms are run for sufficiently many iterations, and have reached convergence, the samples can be treated as being drawn from the posterior distribution. Relevant posterior quantities such as expectations may then be computed using these samples. A drawback of MCMC methods, like NUTS, is that simulations can be quite computational intensive, especially for complex models or large data.