Skip to content

Commit

Permalink
implement all.negative.pools option
Browse files Browse the repository at this point in the history
Also bit the the bullet and imported dplyr and made some improvements to documentation (moving some things to 'details' rather than in parameter descriptions). Also made getPrevalence a proper S3 generic. Also fixed an issue with PoolRegBayes having priors that don't correspond to model parameters when you have a model with population effects
  • Loading branch information
AngusMcLure committed Mar 19, 2024
1 parent e5e8c01 commit 829bd73
Show file tree
Hide file tree
Showing 11 changed files with 279 additions and 158 deletions.
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
# Generated by roxygen2: do not edit by hand

S3method(getPrevalence,brmsfit)
S3method(getPrevalence,glm)
S3method(getPrevalence,glmerMod)
export(HierPoolPrev)
export(PoolLink)
export(PoolPrev)
export(PoolReg)
export(PoolRegBayes)
export(getPrevalence)
import(Rcpp)
import(dplyr)
import(methods)
import(rstan)
importFrom(dplyr,"%>%")
useDynLib(PoolTestR, .registration = TRUE)
101 changes: 65 additions & 36 deletions R/HierPoolPrev.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,27 +32,14 @@
#' defined by these columns
#' @param prior List of parameters specifying the parameters for the the priors
#' on the population intercept and standard deviations of group-effect terms.
#' The default value (NULL) uses the following parameters:
#' \code{list(intercept = list(nu = 3, mu = 0, sigma = 4.0),
#' group_sd = list(nu = 3, mu = 0, sigma = 2.5),
#' individual_sd = FALSE)}
#' This models the prior of the linear scale intercept as t-distributed with
#' parameters in `intercept` and the standard deviation of the group-level
#' effects as truncated (non-negative) t-distribution. `individual_sd = FALSE`
#' means that this prior is for the root-sum-square of group-effect standard
#' deviations for models with multiple grouping levels. The default implies a
#' prior on population prevalence that is approximately distributed as
#' beta(0.5,0.5). To set custom priors, use the same nested list format. Any
#' omitted parameters will be replaced with the default values and additional
#' parameters ignored silently. For example, to change the parameters to be
#' equal to the defaults for intercept-only random-effect model in
#' PoolRegBayes you can use:
#' \code{list(individual_sd = TRUE)},
#' which puts a prior on each the standard deviations of each of group-level
#' effects separately, but doesn't change the priors used.
#'
#' See details.
#' @param level The confidence level to be used for the confidence and credible
#' intervals. Defaults to 0.95 (i.e. 95\% intervals)
#' @param all.negative.pools The kind of point estimate and interval to use when
#' all pools are negative. If 'consistent' (default) result is the same as for
#' the case where at least one pool is positive. If 'zero' uses 0 as the point
#' estimate and lower bound for the interval and \code{level} posterior
#' quantile the upper bound of the interval.
#' @param verbose Logical indicating whether to print progress to screen.
#' Defaults to false (no printing to screen)
#' @param cores The number of CPU cores to be used. By default one core is used
Expand All @@ -77,16 +64,39 @@
#' @seealso \code{\link{PoolPrev}}, \code{\link{getPrevalence}}
#'
#' @example examples/HierPrevalence.R
#'
#' @details
#'
#' When using the default value of the \code{prior} argument (NULL), the model
#' uses the following prior:
#' \code{list(intercept = list(nu = 3, mu = 0, sigma = 4.0),
#' group_sd = list(nu = 3, mu = 0, sigma = 2.5),
#' individual_sd = FALSE)}
#' This models the prior of the linear scale intercept as t-distributed with
#' parameters in `intercept` and the standard deviation of the group-level
#' effects as truncated (non-negative) t-distribution. `individual_sd = FALSE`
#' means that this prior is for the root-sum-square of group-effect standard
#' deviations for models with multiple grouping levels. The default implies a
#' prior on population prevalence that is approximately distributed as
#' beta(0.5,0.5). To set custom priors, use the same nested list format. Any
#' omitted parameters will be replaced with the default values and additional
#' parameters ignored silently. For example, to change the parameters to be
#' equal to the defaults for intercept-only random-effect model in PoolRegBayes
#' you can use: \code{list(individual_sd = TRUE)}, which puts a prior on each
#' the standard deviations of each of group-level effects separately, but
#' doesn't change the priors used.
#'


HierPoolPrev <- function(data,result,poolSize,hierarchy,...,
prior = NULL,
level = 0.95, verbose = FALSE, cores = NULL,
iter = 2000, warmup = iter/2,
chains = 4, control = list(adapt_delta = 0.9)){
result <- dplyr::enquo(result) #The name of column with the result of each test on each pooled sample
poolSize <- dplyr::enquo(poolSize) #The name of the column with number of bugs in each pool
groupVar <- dplyr::enquos(...) #optional name(s) of columns with other variable to group by. If omitted uses the complete dataset of pooled sample results to calculate a single prevalence
chains = 4, control = list(adapt_delta = 0.9),
all.negative.pools = 'consistent'){
result <- enquo(result) #The name of column with the result of each test on each pooled sample
poolSize <- enquo(poolSize) #The name of the column with number of bugs in each pool
groupVar <- enquos(...) #optional name(s) of columns with other variable to group by. If omitted uses the complete dataset of pooled sample results to calculate a single prevalence

# Ideally I would like to:
# Set number of cores to use (use all the cores! BUT when checking R
Expand All @@ -110,9 +120,9 @@ HierPoolPrev <- function(data,result,poolSize,hierarchy,...,

#Make the model matrix for the group effects - there might be a simpler way of doing this...
G <- data[,hierarchy,drop = FALSE] %>%
dplyr::mutate_all(as.factor) %>%
mutate_all(as.factor) %>%
droplevels %>%
dplyr::mutate_all(as.integer)
mutate_all(as.integer)
NumGroups <- G %>% sapply(max)
G <- t(t(G) + cumsum(NumGroups) - NumGroups)
Z <- matrix(0,nrow = nrow(data), ncol = sum(NumGroups))
Expand All @@ -125,8 +135,8 @@ HierPoolPrev <- function(data,result,poolSize,hierarchy,...,
NumGroups = array(NumGroups), #Number of groups at each level of hierarchy
TotalGroups = sum(NumGroups),
#Result = array(data$Result), #PERHAPS TRY REMOVING COLUMN NAMES?
Result = dplyr::select(data, !! result)[,1] %>% as.matrix %>% as.numeric %>% array, #This seems a rather obscene way to select a column, but other more sensible methods have inexplicible errors when passed to rstan::sampling
PoolSize = dplyr::select(data, !! poolSize)[,1] %>% as.matrix %>% array,
Result = select(data, !! result)[,1] %>% as.matrix %>% as.numeric %>% array, #This seems a rather obscene way to select a column, but other more sensible methods have inexplicible errors when passed to rstan::sampling
PoolSize = select(data, !! poolSize)[,1] %>% as.matrix %>% array,
#G = G, #The group membership for each data point and level of hierarchy
Z = Z,
# Parameters for t-distributed priors for:
Expand Down Expand Up @@ -162,27 +172,45 @@ HierPoolPrev <- function(data,result,poolSize,hierarchy,...,
sfit$total_group_sd,
list(stats::plogis))

out <- data.frame(PrevBayes = mean(sfit$p))
out[,'CrILow'] <- stats::quantile(sfit$p,(1-level)/2)
out[,'CrIHigh'] <- stats::quantile(sfit$p,(1+level)/2)
if(all.negative.pools == 'zero' & sum(sdata$Result) == 0){
estimate.type <- 'zero'
}else if(!(all.negative.pools %in% c('zero', 'consistent'))){
stop(all.negative.pools, ' is not a valid option for `all.negative.pools`')
} else{
estimate.type = 'consistent'
}

out <- data.frame(PrevBayes =
switch(estimate.type,
consistent = mean(sfit$p),
zero = 0)
)
out[,'CrILow'] <- switch(estimate.type,
consistent = stats::quantile(sfit$p,(1-level)/2),
zero = 0)

out[,'CrIHigh'] <- switch(estimate.type,
consistent = stats::quantile(sfit$p,(1+level)/2),
zero = stats::quantile(sfit$p,level))


out[,'NumberOfPools'] <- sdata$N
out[,'NumberPositive'] <- sum(sdata$Result)

out <- out %>%
dplyr::select('PrevBayes',
select('PrevBayes',
'CrILow','CrIHigh',
'NumberOfPools', 'NumberPositive')

out
}else{ #if there are stratifying variables the function calls itself iteratively on each stratum
data <- data %>%
dplyr::group_by(!!! groupVar)
nGroups <- dplyr::n_groups(data)
group_by(!!! groupVar)
nGroups <- n_groups(data)
ProgBar <- progress::progress_bar$new(format = "[:bar] :current/:total (:percent)", total = nGroups)
ProgBar$tick(-1)
out <- data %>%
dplyr::group_modify(function(x,...){
group_modify(function(x,...){
ProgBar$tick(1)
HierPoolPrev(x,!! result,!! poolSize,
hierarchy,
Expand All @@ -191,11 +219,12 @@ HierPoolPrev <- function(data,result,poolSize,hierarchy,...,
verbose = verbose,
cores = cores,
iter = iter, warmup = warmup,
chains = chains, control = control)
chains = chains, control = control,
all.negative.pools = all.negative.pools)
}) %>%
as.data.frame()
ProgBar$tick(1)
}
dplyr::tibble(out)
tibble(out)
}

24 changes: 12 additions & 12 deletions R/PoolPrev.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ PoolPrev <- function(data,result,poolSize,...,
verbose = FALSE, cores = NULL,
iter = 2000, warmup = iter/2,
chains = 4, control = list(adapt_delta = 0.9)){
result <- dplyr::enquo(result) #The name of column with the result of each test on each pooled sample
poolSize <- dplyr::enquo(poolSize) #The name of the column with number of bugs in each pool
groupVar <- dplyr::enquos(...) #optional name(s) of columns with other variable to group by. If omitted uses the complete dataset of pooled sample results to calculate a single prevalence
result <- enquo(result) #The name of column with the result of each test on each pooled sample
poolSize <- enquo(poolSize) #The name of the column with number of bugs in each pool
groupVar <- enquos(...) #optional name(s) of columns with other variable to group by. If omitted uses the complete dataset of pooled sample results to calculate a single prevalence

useJefferysPrior <- is.null(prior)
if(bayesian){
Expand Down Expand Up @@ -133,8 +133,8 @@ PoolPrev <- function(data,result,poolSize,...,
rplnull <- function(x,replacement){if(is.null(x)){replacement}else{x}}
sdata <- list(N = nrow(data),
#Result = array(data$Result), #PERHAPS TRY REMOVING COLUMN NAMES?
Result = dplyr::select(data, !! result)[,1] %>% as.matrix %>% as.numeric %>% array, #This seems a rather obscene way to select a column, but other more sensible methods have inexplicible errors when passed to rstan::sampling
PoolSize = dplyr::select(data, !! poolSize)[,1] %>% as.matrix %>% array,
Result = select(data, !! result)[,1] %>% as.matrix %>% as.numeric %>% array, #This seems a rather obscene way to select a column, but other more sensible methods have inexplicible errors when passed to rstan::sampling
PoolSize = select(data, !! poolSize)[,1] %>% as.matrix %>% array,
PriorAlpha = rplnull(prior$alpha,0),
PriorBeta = rplnull(prior$beta,0),
JeffreysPrior = useJefferysPrior
Expand Down Expand Up @@ -249,16 +249,16 @@ PoolPrev <- function(data,result,poolSize,...,

if(bayesian){
out <- out %>%
dplyr::rename('PrevBayes' = mean) %>%
dplyr::select('PrevMLE',
rename('PrevBayes' = mean) %>%
select('PrevMLE',
'CILow', 'CIHigh',
'PrevBayes',
'CrILow','CrIHigh',
'ProbAbsent',
'NumberOfPools', 'NumberPositive')
}else{
out <- out %>%
dplyr::select('PrevMLE',
select('PrevMLE',
'CILow', 'CIHigh',
'NumberOfPools', 'NumberPositive')
}
Expand All @@ -268,11 +268,11 @@ PoolPrev <- function(data,result,poolSize,...,
out
}else{ #if there are stratifying variables the function calls itself iteratively on each stratum
data <- data %>%
dplyr::group_by(!!! groupVar)
nGroups <- dplyr::n_groups(data)
group_by(!!! groupVar)
nGroups <- n_groups(data)
ProgBar <- progress::progress_bar$new(format = "[:bar] :current/:total (:percent)", total = nGroups)
ProgBar$tick(-1)
out <- data %>% dplyr::group_modify(function(x,...){
out <- data %>% group_modify(function(x,...){
ProgBar$tick(1)
PoolPrev(x,!! result,!! poolSize,
level=level,verbose = verbose,
Expand All @@ -286,6 +286,6 @@ PoolPrev <- function(data,result,poolSize,...,
as.data.frame()
ProgBar$tick(1)
}
dplyr::tibble(out)
tibble(out)
}

15 changes: 7 additions & 8 deletions R/PoolRegBayes.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,11 @@
#'
#'




PoolRegBayes <- function (formula, data, poolSize,
link = 'logit', prior = NULL, cores = NULL, ...){
poolSize <- dplyr::enquo(poolSize)
poolSize <- enquo(poolSize)
AllVars <- all.vars(formula)
PoolSizeName <- dplyr::as_label(poolSize)
PoolSizeName <- as_label(poolSize)

# Ideally I would like to:
# Set number of cores to use (use all the cores! BUT when checking R
Expand Down Expand Up @@ -117,9 +114,11 @@ PoolRegBayes <- function (formula, data, poolSize,
stop('Invalid link function. Options are logit, log, loglogit, or cloglog')),
block = "functions")

if(is.null(prior)){
prior <- brms::set_prior("student_t(6, 0, 1.5)", class = "b",
nlpar = "")
if(is.null(prior) | !("b" %in% prior$class)){ #if there is no supplied prior on the population-level ('fixed') effects
if("b" %in% brms::get_prior(formula, data, family)$class){ #if there are population-level ('fixed') effects in the model
prior <- brms::set_prior("student_t(6, 0, 1.5)", class = "b",
nlpar = "")
}
}

code <- brms::make_stancode(bform,
Expand Down
2 changes: 1 addition & 1 deletion R/PoolTestR-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#' @name PoolTestR-package
#' @aliases PoolTestR
#' @useDynLib PoolTestR, .registration = TRUE
#' @importFrom dplyr %>%
#' @import dplyr
#' @import methods
#' @import Rcpp
#' @import rstan
Expand Down
Loading

0 comments on commit 829bd73

Please sign in to comment.