Merge pull request #9 from JeffreyRStevens/devel

Add format_stats.merMod method, test, and documentation
JeffreyRStevens · Dec 2, 2024 · ad4bd93 · ad4bd93
2 parents b148f47 + 1154524
commit ad4bd93
Show file tree

Hide file tree

Showing 33 changed files with 524 additions and 182 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -18,6 +18,7 @@ Suggests:
     BayesFactor,
     correlation,
     knitr,
+    lme4,
     rmarkdown,
     testthat (>= 3.0.0)
 Config/testthat/edition: 3

diff --git a/NAMESPACE b/NAMESPACE
@@ -6,6 +6,7 @@ S3method(format_stats,default)
 S3method(format_stats,easycorrelation)
 S3method(format_stats,htest)
 S3method(format_stats,lm)
+S3method(format_stats,merMod)
 export(format_bf)
 export(format_chr)
 export(format_corr)

diff --git a/R/format_numbers.R b/R/format_numbers.R
@@ -1,9 +1,9 @@
 #' Format numbers
 #'
-#' @param x Number
-#' @param digits Number of digits after the decimal
+#' @param x Number.
+#' @param digits Number of digits after the decimal.
 #' @param pzero Logical value (default = TRUE) for whether to include leading
-#' zero numbers less than 1
+#' zero numbers less than 1.
 #'
 #' @return
 #' A character string formatting the number with specified number of digits
@@ -30,9 +30,9 @@ format_num <- function(x,
 
 #' Format numbers in scientific notation
 #'
-#' @param x Number
-#' @param digits Number of digits after the decimal
-#' @param type Type of formatting ("md" = markdown, "latex" = LaTeX)
+#' @param x Number.
+#' @param digits Number of digits after the decimal.
+#' @param type Type of formatting ("md" = markdown, "latex" = LaTeX).
 #'
 #' @return
 #' A character string of a number in scientific notation formatted in Markdown
@@ -69,10 +69,10 @@ format_scientific <- function(x,
 
 #' Format character strings with italics and type
 #'
-#' @param x Character string
+#' @param x Character string.
 #' @param italics Logical value (default = TRUE) for whether text should be
-#' italicized
-#' @param type Type of formatting (`"md"` = markdown, `"latex"` = LaTeX)
+#' italicized.
+#' @param type Type of formatting (`"md"` = markdown, `"latex"` = LaTeX).
 #'
 #' @return
 #' A character string that has either Markdown or LaTeX formatting for italics
@@ -102,8 +102,8 @@ format_chr <- function(x,
 
 #' Format subscript text
 #'
-#' @param subscript Character string or NULL
-#' @param type Type of formatting (`"md"` = markdown, `"latex"` = LaTeX)
+#' @param subscript Character string or NULL.
+#' @param type Type of formatting (`"md"` = markdown, `"latex"` = LaTeX).
 #'
 #' @return
 #' A character string that is formatted as subscript for either Markdown or

diff --git a/R/format_stats.R b/R/format_stats.R
@@ -16,8 +16,11 @@
 #' @param x Statistical object.
 #' @param ... Additional arguments passed to methods. For method-specific
 #' arguments, see [format_stats.htest()] for htest correlations, t-tests,
-#' and Wilcoxon tests and [format_stats.BFBayesFactor()] for Bayes factors from
-#' the \{[BayesFactor](https://cran.r-project.org/package=BayesFactor)\} package.
+#' and Wilcoxon tests, [format_stats.easycorrelation()] for easycorrelation
+#' correlations, [format_stats.lm()] for linear models,
+#' [format_stats.merMod()] for linear mixedmodels, and
+#' [format_stats.BFBayesFactor()] for Bayes factors from the
+#' \{[BayesFactor](https://cran.r-project.org/package=BayesFactor)\} package.
 #'
 #' @return
 #' A character string of statistical information formatted in Markdown or LaTeX.
@@ -35,6 +38,17 @@
 #' # Format t.test() object
 #' format_stats(t.test(mtcars$vs, mtcars$am))
 #'
+#' # Format aov() object
+#' format_stats(aov(mpg ~ cyl * hp, data = mtcars), term = "cyl")
+#'
+#' # Format lm() or glm() object
+#' format_stats(lm(mpg ~ cyl * hp, data = mtcars), term = "cyl")
+#' format_stats(glm(am ~ cyl * hp, data = mtcars, family = binomial), term = "cyl")
+#'
+#' # Format lme4::lmer() or lme4::glmer() object
+#' format_stats(lme4::lmer(mpg ~ hp + (1 | cyl), data = mtcars), term = "hp")
+#' format_stats(lme4::glmer(am ~ hp + (1 | cyl), data = mtcars, family = binomial), term = "hp")
+#'
 #' # Format BFBayesFactor object from {BayesFactor} package
 #' format_stats(BayesFactor::ttestBF(mtcars$vs, mtcars$am))
 format_stats <- function(x, ...) {

diff --git a/R/format_stats_BFBayesFactor.R b/R/format_stats_BFBayesFactor.R
@@ -10,19 +10,19 @@
 #' cutoffs (e.g., BF > 1000 or BF < 0.001). Numbers of digits, cutoffs,
 #' italics, and label subscripts are all customizable.
 #'
-#' @param x BayesFactor object or vector of numeric Bayes factor values
-#' @param digits1 Number of digits after the decimal for Bayes factors > 1
-#' @param digits2 Number of digits after the decimal for Bayes factors < 1
+#' @param x BayesFactor object or vector of numeric Bayes factor values.
+#' @param digits1 Number of digits after the decimal for Bayes factors > 1.
+#' @param digits2 Number of digits after the decimal for Bayes factors < 1.
 #' @param cutoff Cutoff for using `_BF_~10~ > <cutoff>` or
-#' `_BF_~10~ < 1 / <cutoff>` (value must be > 1)
+#' `_BF_~10~ < 1 / <cutoff>` (value must be > 1).
 #' @param label Character string for label before Bayes factor. Default is BF.
 #' Set `label = ""` to return just the formatted Bayes factor value with no
-#' label or operator (`=`, `<`, `>`)
+#' label or operator (`=`, `<`, `>`).
 #' @param italics Logical value (default = TRUE) for whether label should be
-#' italicized (_BF_ or BF)
+#' italicized (_BF_ or BF).
 #' @param subscript Subscript to include with _BF_ label (`"10"`, `"01"`, or
-#' `""` for no subscript)
-#' @param type Type of formatting (`"md"` = markdown, `"latex"` = LaTeX)
+#' `""` for no subscript).
+#' @param type Type of formatting (`"md"` = markdown, `"latex"` = LaTeX).
 #' @param ... Additional arguments passed to methods.
 #'
 #'

diff --git a/R/format_stats_aov.R b/R/format_stats_aov.R
@@ -7,21 +7,21 @@
 #' over numbers of digits, leading zeros, italics, degrees of freedom,
 #' and output format of Markdown or LaTeX.
 #'
-#' @param x An `aov` object
+#' @param x An `aov` object from [stats::aov()].
 #' @param term Character string for row name of term to extract statistics for.
 #' This must be the exact string returned in the `summary()` output from the
-#' `aov` object
+#' `aov` object.
 #' @param digits Number of digits after the decimal for means, confidence
-#' intervals, and test statistics
+#' intervals, and test statistics.
 #' @param pdigits Number of digits after the decimal for p-values, ranging
-#' between 1-5 (also controls cutoff for small p-values)
+#' between 1-5 (also controls cutoff for small p-values).
 #' @param pzero Logical value (default = FALSE) for whether to include
-#' leading zero for p-values
+#' leading zero for p-values.
 #' @param italics Logical value (default = TRUE) for whether _p_ label should be
-#' italicized
+#' italicized.
 #' @param dfs Formatting for degrees of freedom ("par" = parenthetical,
-#' "sub" = subscript, "none" = do not print degrees of freedom)
-#' @param type Type of formatting ("md" = markdown, "latex" = LaTeX)
+#' "sub" = subscript, "none" = do not print degrees of freedom).
+#' @param type Type of formatting ("md" = markdown, "latex" = LaTeX).
 #' @param ... Additional arguments passed to methods.
 #'
 #' @return

diff --git a/R/format_stats_htest.R b/R/format_stats_htest.R
@@ -12,21 +12,21 @@
 #' italics, degrees of freedom, and mean labels, and output format of
 #' Markdown or LaTeX.
 #'
-#' @param x An `htest` object
+#' @param x An `htest` object from [cor.test()], [t.test()], or [wilcox.test()].
 #' @param digits Number of digits after the decimal for means, confidence
-#' intervals, and test statistics
+#' intervals, and test statistics.
 #' @param pdigits Number of digits after the decimal for p-values, ranging
-#' between 1-5 (also controls cutoff for small p-values)
+#' between 1-5 (also controls cutoff for small p-values).
 #' @param pzero Logical value (default = FALSE) for whether to include
-#' leading zero for p-values
+#' leading zero for p-values.
 #' @param full Logical value (default = TRUE) for whether to include means
-#' and confidence intervals or just test statistic and p-value
+#' and confidence intervals or just test statistic and p-value.
 #' @param italics Logical value (default = TRUE) for whether _p_ label should be
-#' italicized
+#' italicized.
 #' @param dfs Formatting for degrees of freedom ("par" = parenthetical,
-#' "sub" = subscript, "none" = do not print degrees of freedom)
-#' @param mean Formatting for mean label ("abbr" = M, "word" = Mean)
-#' @param type Type of formatting ("md" = markdown, "latex" = LaTeX)
+#' "sub" = subscript, "none" = do not print degrees of freedom).
+#' @param mean Formatting for mean label ("abbr" = M, "word" = Mean).
+#' @param type Type of formatting ("md" = markdown, "latex" = LaTeX).
 #' @param ... Additional arguments passed to methods.
 #'
 #' @return

diff --git a/R/format_stats_lm.R b/R/format_stats_lm.R
@@ -1,8 +1,8 @@
 
-#' Format (generalized) linear regression statistics
+#' Format linear model statistics
 #'
 #' @description
-#' This method formats (generalized) linear regression statistics from the class
+#' This method formats (generalized) linear model statistics from the class
 #' `lm` or `glm`. If no term is specified, overall model statistics are
 #' returned. For linear models (`lm` objects), this includes the R-squared,
 #' F statistic, and p-value. For generalized linear models (`glm` objects),
@@ -11,24 +11,23 @@
 #' control over numbers of digits, leading zeros, italics, degrees of freedom,
 #' and output format of Markdown or LaTeX.
 #'
-#' @param x An `lm` or `glm` object
+#' @param x An `lm` or `glm` object from [stats::lm()] or [stats::glm()].
 #' @param term Character string for row name of term to extract statistics for.
 #' This must be the exact string returned in the `summary()` output from the
-#' `lm` or `glm` object
-#' @param digits Number of digits after the decimal for means, confidence
-#' intervals, and test statistics
+#' `lm` or `glm` object.
+#' @param digits Number of digits after the decimal for test statistics.
 #' @param pdigits Number of digits after the decimal for p-values, ranging
-#' between 1-5 (also controls cutoff for small p-values)
+#' between 1-5 (also controls cutoff for small p-values).
 #' @param pzero Logical value (default = FALSE) for whether to include
-#' leading zero for p-values
+#' leading zero for p-values.
 #' @param full Logical value (default = TRUE) for whether to include extra
 #' info (e.g., standard errors and t-values or z-values for terms)
-#' or just test statistic and p-value
+#' or just test statistic and p-value.
 #' @param italics Logical value (default = TRUE) for whether statistics labels
-#' should be italicized
+#' should be italicized.
 #' @param dfs Formatting for degrees of freedom ("par" = parenthetical,
-#' "sub" = subscript, "none" = do not print degrees of freedom)
-#' @param type Type of formatting ("md" = markdown, "latex" = LaTeX)
+#' "sub" = subscript, "none" = do not print degrees of freedom).
+#' @param type Type of formatting ("md" = markdown, "latex" = LaTeX).
 #' @param ... Additional arguments passed to methods.
 #'
 #' @return
@@ -85,7 +84,7 @@ format_stats.lm <- function(x,
   model_type <- ifelse(inherits(x, "glm"), "glm", "lm")
   summ <- summary(x)
 
-  # Overall statistics for linear regression
+  # Overall statistics for linear model
   if (is.null(term) & model_type == "lm") {
     r2 <- summ$adj.r.squared
     f <- summ$fstatistic
@@ -151,7 +150,7 @@ format_stats.lm <- function(x,
     }
     # Term-specific statistics for linear and generalized linear models
   } else {
-    # For linear regression
+    # For linear model
     if (model_type == "lm") {
       terms <- names(x$coefficients)
       stopifnot("Argument `term` not found in model terms." = term %in% terms)
@@ -162,7 +161,7 @@ format_stats.lm <- function(x,
       z <- summ$coefficients[term_num, "t value"]
       p_value <- summ$coefficients[term_num, "Pr(>|t|)"]
       z_lab <- "t"
-      # For generalized linear regression
+      # For generalized linear model
     } else {
       terms <- rownames(summ$coefficients)
       stopifnot("Argument `term` not found in model terms." = term %in% terms)

diff --git a/R/format_stats_merMod.R b/R/format_stats_merMod.R
@@ -0,0 +1,125 @@
+
+#' Format linear mixed model statistics
+#'
+#' @description
+#' This method formats (generalized) linear mixed model statistics from the
+#' class `lmerMod` or `glmerMod` from the
+#' \{[lme4](https://cran.r-project.org/package=lme4)\} package.
+#' Only fixed effects can be extracted.
+#' The default output is APA formatted, but this function allows
+#' control over numbers of digits, leading zeros, italics,
+#' and output format of Markdown or LaTeX.
+#'
+#' @param x An `lmerMod` or `glmerMod` object from [lme4::lmer()] or
+#' [lme4::glmer()].
+#' @param term Character string for row name of term to extract statistics for.
+#' This must be the exact string returned in the `summary()` output from the
+#' `lmerMod` or `glmerMod` object and can only be fixed effects.
+#' @param digits Number of digits after the decimal for test statistics.
+#' @param pdigits Number of digits after the decimal for p-values, ranging
+#' between 1-5 (also controls cutoff for small p-values).
+#' @param pzero Logical value (default = FALSE) for whether to include
+#' leading zero for p-values.
+#' @param full Logical value (default = TRUE) for whether to include extra
+#' info (e.g., standard errors and t-values or z-values for terms)
+#' or just test statistic and p-value.
+#' @param italics Logical value (default = TRUE) for whether statistics labels
+#' should be italicized.
+#' @param type Type of formatting ("md" = markdown, "latex" = LaTeX).
+#' @param ... Additional arguments passed to methods.
+#'
+#' @return
+#' A character string of statistical information formatted in Markdown or LaTeX.
+#'
+#' @method format_stats merMod
+#' @family functions for printing statistical objects
+#' @export
+#'
+#' @examples
+#' library(lme4)
+#' test_lmer <- lmer(mpg ~ hp + (1 | cyl), data = mtcars)
+#' test_glmer <- glmer(am ~ hp + (1 | cyl), data = mtcars, family = binomial)
+#'
+#' # Format linear mixed model term statistics
+#' format_stats(test_lmer, term = "hp")
+#'
+#' # Format generalized linear mixed model term statistics
+#' format_stats(test_glmer, term = "hp")
+#'
+#' # Remove italics
+#' format_stats(test_lmer, term = "hp", italics = FALSE)
+#'
+#' # Change digits and add leading zero to p-value
+#' format_stats(test_lmer, term = "hp", digits = 3, pdigits = 4, pzero = TRUE)
+#'
+#' # Format for LaTeX
+#' format_stats(test_lmer, term = "hp", type = "latex")
+format_stats.merMod <- function(x,
+                                term = NULL,
+                                digits = 3,
+                                pdigits = 3,
+                                pzero = FALSE,
+                                full = TRUE,
+                                italics = TRUE,
+                                type = "md",
+                                ...) {
+  # Validate arguments
+  check_character(term)
+  check_number_whole(digits, min = 0, allow_null = TRUE)
+  check_number_whole(pdigits, min = 1, max = 5)
+  check_bool(pzero)
+  check_bool(full)
+  check_bool(italics)
+  check_string(type)
+  check_match(type, c("md", "latex"))
+
+  model_type <- ifelse(inherits(x, "glmerMod"), "glmer", "lmer")
+  summ <- summary(x)
+
+  terms <- rownames(summ$coefficients)
+  stopifnot("Argument `term` not found in model terms." = term %in% terms)
+  term_num <- which(terms == term)
+
+  coeffs <- as.data.frame(summ$coefficients)
+  estimate <- coeffs[term_num, "Estimate"]
+  se <- coeffs[term_num, "Std. Error"]
+  # For linear mixed model
+  if (model_type == "lmer") {
+    z <- coeffs[term_num, "t value"]
+    p_value <- NULL
+    z_lab <- "t"
+    # For generalized linear mixed model
+  } else {
+    z <- coeffs[term_num, "z value"]
+    p_value <- coeffs[term_num, "Pr(>|z|)"]
+    z_lab <- "z"
+    pvalue <- format_p(p_value,
+                       digits = pdigits, pzero = pzero,
+                       italics = italics, type = type
+    )
+  }
+
+  # Format values
+  stat_value <- format_num(estimate, digits = digits, pzero = TRUE)
+  se_value <- format_num(se, digits = digits, pzero = TRUE)
+  z_value <- format_num(z, digits = digits, pzero = TRUE)
+
+  # Build label
+  stat_label <- dplyr::case_when(
+    !italics & identical(type, "md") ~ "\u03B2",
+    !italics & identical(type, "latex") ~ "\\textbeta",
+    italics & identical(type, "md") ~ format_chr("\u03B2", italics = TRUE, type = "md"),
+    italics & identical(type, "latex") ~ format_chr("\\beta", italics = TRUE, type = "latex")
+  )
+
+  # Create statistics string
+  if(full & model_type == "lmer") {
+    paste0(stat_label, " = ", stat_value, ", SE = ", se_value, ", ", format_chr(z_lab, italics = italics, type = type), " = ", z_value)
+  } else if(full & model_type == "glmer") {
+    paste0(stat_label, " = ", stat_value, ", SE = ", se_value, ", ", format_chr(z_lab, italics = italics, type = type), " = ", z_value, ", ", pvalue)
+  } else if(!full & model_type == "lmer") {
+    paste0(stat_label, " = ", stat_value)
+  } else {
+    paste0(stat_label, " = ", stat_value, ", ", pvalue)
+  }
+}