Skip to content

Commit

Permalink
add test for warnings on N<60 with GLMM
Browse files Browse the repository at this point in the history
  • Loading branch information
MikeDMorgan committed Aug 6, 2024
1 parent 76a6c40 commit a9f0832
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 4 deletions.
24 changes: 23 additions & 1 deletion R/testNhoods.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
#' @param fail.on.error A logical scalar the determines the behaviour of the error reporting. Used for debugging only.
#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying the arguments for parallelisation. By default
#' this will evaluate using \code{SerialParam()}. See \code{details}on how to use parallelisation in \code{testNhoods}.
#' @param force A logical scalar that overrides the default behaviour to nicely error when N < 50 and using a mixed
#' effect model. This is because model parameter estimation may be unstable with these sample sizes, and hence the
#' fixed effect GLM is recommended instead. If used with the LMM, a warning will be produced.
#'
#' @details
#' This function wraps up several steps of differential abundance testing using
Expand All @@ -84,6 +87,14 @@
#' consistency. While it is strictly feasible to compute multiple contrasts at once, the
#' recommendation, for ease of interpretability, is to compute one at a time.
#'
#' If using the GLMM option, i.e. including a random effect variable in the \code{design}
#' formula, then \code{testNhoods} will check for the sample size of the analysis. If this is
#' less than 60 it will stop and produce an error. It is \emph{strongly} recommended that
#' the GLMM is not used with relatively small sample sizes, i.e. N<60, and even up to N~100
#' may have unstable parameter estimates across nhoods. This behaviour can be overriden by
#' setting \code{force=TRUE}, but also be aware that parameter estimates may not be
#' accurate. A warning will be produced to alert you to this fact.
#'
#' @return A \code{data.frame} of model results, which contain:
#' \describe{
#' \item{\code{logFC}:}{Numeric, the log fold change between conditions, or for
Expand Down Expand Up @@ -152,7 +163,8 @@ testNhoods <- function(x, design, design.df, kinship=NULL,
min.mean=0, model.contrasts=NULL, robust=TRUE, reduced.dim="PCA", REML=TRUE,
norm.method=c("TMM", "RLE", "logMS"), cell.sizes=NULL,
max.iters = 50, max.tol = 1e-5, glmm.solver=NULL,
subset.nhoods=NULL, fail.on.error=FALSE, BPPARAM=SerialParam()){
subset.nhoods=NULL, fail.on.error=FALSE, BPPARAM=SerialParam(),
force=FALSE){
is.lmm <- FALSE
geno.only <- FALSE
if(is(design, "formula")){
Expand Down Expand Up @@ -223,6 +235,16 @@ testNhoods <- function(x, design, design.df, kinship=NULL,
stop("design must be either a formula or model matrix")
}

check.n <- nrow(x.model) < 60

if(is.lmm & check.n & isFALSE(force)){
stop("You are attempting to use the GLMM with N=", nrow(x.model), ". It is ",
"strongly discouraged. To override this behaviour set force=TRUE")
} else if(is.lmm & check.n & force){
warning("Running GLMM with small sample size, N=", nrow(x.model), ". Model ",
"estimates may not be reliable")
}

if(!is(x, "Milo")){
stop("Unrecognised input type - must be of class Milo")
} else if(.check_empty(x, "nhoodCounts")){
Expand Down
12 changes: 9 additions & 3 deletions tests/testthat/test_testNhoods.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,16 +250,22 @@ test_that("Singular Hessians are detectable and fail appropriately", {

# collinear fixed and random effects
expect_error(suppressWarnings(testNhoods(sim1.mylo, design=~Condition + (1|Condition),
design.df=sim1.meta, glmm.solver="Fisher", fail.on.error=TRUE)),
"matrix is singular")
design.df=sim1.meta, glmm.solver="Fisher", force=TRUE, fail.on.error=TRUE)),
"Lowest traceback returned")
})

test_that("Invalid formulae give expected errors", {
expect_error(suppressWarnings(testNhoods(sim1.mylo, design=~Condition + (50|Condition),
design.df=sim1.meta, glmm.solver="Fisher")),
design.df=sim1.meta, force=TRUE, glmm.solver="Fisher")),
"is an invalid formula for random effects")
})

test_that("Small sample sizes produce expected warnings and errors", {
expect_error(suppressWarnings(testNhoods(sim1.mylo, design=~Condition + (1|Condition),
design.df=sim1.meta, force=FALSE, glmm.solver="Fisher")),
"You are attempting to use the GLMM with N")
})

test_that("NA or Inf cell sizes causes the expected errors", {
cell.sizes.na <- colSums(nhoodCounts(sim1.mylo))
cell.sizes.na[1] <- NA
Expand Down

0 comments on commit a9f0832

Please sign in to comment.