Merge pull request #8 from JeffreyRStevens/devel

Add format_stats lm method
JeffreyRStevens · Dec 1, 2024 · b148f47 · b148f47
2 parents ba77757 + fc7b3ff
commit b148f47
Show file tree

Hide file tree

Showing 21 changed files with 469 additions and 36 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -5,6 +5,7 @@ S3method(format_stats,aov)
 S3method(format_stats,default)
 S3method(format_stats,easycorrelation)
 S3method(format_stats,htest)
+S3method(format_stats,lm)
 export(format_bf)
 export(format_chr)
 export(format_corr)

diff --git a/R/format_stats_lm.R b/R/format_stats_lm.R
@@ -0,0 +1,202 @@
+
+#' Format (generalized) linear regression statistics
+#'
+#' @description
+#' This method formats (generalized) linear regression statistics from the class
+#' `lm` or `glm`. If no term is specified, overall model statistics are
+#' returned. For linear models (`lm` objects), this includes the R-squared,
+#' F statistic, and p-value. For generalized linear models (`glm` objects),
+#' this includes deviance and AIC.
+#' The default output is APA formatted, but this function allows
+#' control over numbers of digits, leading zeros, italics, degrees of freedom,
+#' and output format of Markdown or LaTeX.
+#'
+#' @param x An `lm` or `glm` object
+#' @param term Character string for row name of term to extract statistics for.
+#' This must be the exact string returned in the `summary()` output from the
+#' `lm` or `glm` object
+#' @param digits Number of digits after the decimal for means, confidence
+#' intervals, and test statistics
+#' @param pdigits Number of digits after the decimal for p-values, ranging
+#' between 1-5 (also controls cutoff for small p-values)
+#' @param pzero Logical value (default = FALSE) for whether to include
+#' leading zero for p-values
+#' @param full Logical value (default = TRUE) for whether to include extra
+#' info (e.g., standard errors and t-values or z-values for terms)
+#' or just test statistic and p-value
+#' @param italics Logical value (default = TRUE) for whether statistics labels
+#' should be italicized
+#' @param dfs Formatting for degrees of freedom ("par" = parenthetical,
+#' "sub" = subscript, "none" = do not print degrees of freedom)
+#' @param type Type of formatting ("md" = markdown, "latex" = LaTeX)
+#' @param ... Additional arguments passed to methods.
+#'
+#' @return
+#' A character string of statistical information formatted in Markdown or LaTeX.
+#'
+#' @method format_stats lm
+#' @family functions for printing statistical objects
+#' @export
+#'
+#' @examples
+#' test_lm <- lm(mpg ~ cyl * hp, data = mtcars)
+#' test_glm <- glm(am ~ cyl * hp, data = mtcars, family = binomial)
+#'
+#' # Format linear model overall statistics
+#' format_stats(test_lm)
+#'
+#' # Format linear model term statistics
+#' format_stats(test_lm, term = "cyl")
+#'
+#' # Format generalized linear model overall statistics
+#' format_stats(test_glm)
+#'
+#' # Format generalized linear model term statistics
+#' format_stats(test_glm, term = "cyl")
+#'
+#' # Remove italics and make degrees of freedom subscripts
+#' format_stats(test_lm, term = "cyl", italics = FALSE, dfs = "sub")
+#'
+#' # Change digits and add leading zero to p-value
+#' format_stats(test_lm, term = "hp", digits = 3, pdigits = 4, pzero = TRUE)
+#'
+#' # Format for LaTeX
+#' format_stats(test_lm, term = "hp", type = "latex")
+format_stats.lm <- function(x,
+                            term = NULL,
+                            digits = 3,
+                            pdigits = 3,
+                            pzero = FALSE,
+                            full = TRUE,
+                            italics = TRUE,
+                            dfs = "par",
+                            type = "md",
+                            ...) {
+  # Validate arguments
+  check_character(term, allow_null = TRUE)
+  check_number_whole(digits, min = 0, allow_null = TRUE)
+  check_number_whole(pdigits, min = 1, max = 5)
+  check_bool(pzero)
+  check_bool(full)
+  check_bool(italics)
+  check_string(type)
+  check_match(type, c("md", "latex"))
+
+  model_type <- ifelse(inherits(x, "glm"), "glm", "lm")
+  summ <- summary(x)
+
+  # Overall statistics for linear regression
+  if (is.null(term) & model_type == "lm") {
+    r2 <- summ$adj.r.squared
+    f <- summ$fstatistic
+    f_stat <- f[1]
+    df1 <- f[2]
+    df2 <- f[3]
+    p_value <- stats::pf(f[1], f[2], f[3], lower.tail = FALSE)
+
+    # Build label
+    r2_label <- dplyr::case_when(
+      italics & identical(type, "md") ~ paste0(format_chr("R", italics = italics, type = type), "^2^"),
+      identical(type, "latex") ~ paste0(format_chr("R", italics = italics, type = type), "$^{2}$")
+    )
+    r2_value <- format_num(r2, digits = digits)
+
+    fstatlab <- "F"
+    fstat_label <- dplyr::case_when(
+      !italics ~ paste0(fstatlab),
+      identical(type, "md") ~ paste0("_", fstatlab, "_"),
+      identical(type, "latex") ~ paste0("$", fstatlab, "$")
+    )
+    fstat_label <- dplyr::case_when(identical(dfs, "par") ~ paste0(fstat_label, "(", df1, ", ", df2, ")"),
+                                   identical(dfs, "sub") & identical(type, "md") ~ paste0(fstat_label, "~", df1, ",", df2, "~"),
+                                   identical(dfs, "sub") & identical(type, "latex") ~ paste0(fstat_label, "$_{", df1, ",", df2, "}$"),
+                                   .default = fstat_label
+    )[1]
+    fstat_value <- format_num(f_stat, digits = digits, pzero = TRUE)
+    pvalue <- format_p(p_value,
+                       digits = pdigits, pzero = pzero,
+                       italics = italics, type = type
+    )
+
+    # Create statistics string
+    if (full) {
+      mean_label <- paste0(r2_label, " = ")
+      mean_value <- r2_value
+      stat_label <- fstat_label
+      stat_value <- fstat_value
+      cis <- NULL
+    } else {
+      stat_label <- r2_label
+      stat_value <- r2_value
+      mean_label <- mean_value <- cis <- NULL
+    }
+
+    build_string(mean_label = mean_label,
+                 mean_value = mean_value,
+                 cis = cis,
+                 stat_label = stat_label,
+                 stat_value = stat_value,
+                 pvalue = pvalue,
+                 full = full)
+    # Overall statistics for generalized linear model
+  } else if (is.null(term) & model_type == "glm") {
+    if (full) {
+      stat_label <- dplyr::case_when(
+        italics & identical(type, "md") ~ paste0(format_chr("\u03C7", italics = italics, type = type), "^2^ = "),
+        identical(type, "latex") ~ paste0(format_chr("\\chi", italics = italics, type = type), "$^{2}$ = ")
+      )
+      paste0("Deviance = ", format_num(summ$deviance, digits = digits), ", ", stat_label, format_num(summ$null.deviance - summ$deviance, digits = digits), ", AIC = ", format_num(summ$aic, digits = digits))
+    } else {
+      paste0("Deviance = ", format_num(summ$deviance, digits = digits), ", AIC = ", format_num(summ$aic, digits = digits))
+    }
+    # Term-specific statistics for linear and generalized linear models
+  } else {
+    # For linear regression
+    if (model_type == "lm") {
+      terms <- names(x$coefficients)
+      stopifnot("Argument `term` not found in model terms." = term %in% terms)
+      term_num <- which(terms == term)
+
+      estimate <- summ$coefficients[term_num]
+      se <- summ$coefficients[term_num, "Std. Error"]
+      z <- summ$coefficients[term_num, "t value"]
+      p_value <- summ$coefficients[term_num, "Pr(>|t|)"]
+      z_lab <- "t"
+      # For generalized linear regression
+    } else {
+      terms <- rownames(summ$coefficients)
+      stopifnot("Argument `term` not found in model terms." = term %in% terms)
+      term_num <- which(terms == term)
+
+      estimate <- summ$coefficients[term_num, "Estimate"]
+      se <- summ$coefficients[term_num, "Std. Error"]
+      z <- summ$coefficients[term_num, "z value"]
+      p_value <- summ$coefficients[term_num, "Pr(>|z|)"]
+      z_lab <- "z"
+    }
+
+    # Format values
+    stat_value <- format_num(estimate, digits = digits, pzero = TRUE)
+    se_value <- format_num(se, digits = digits, pzero = TRUE)
+    z_value <- format_num(z, digits = digits, pzero = TRUE)
+    pvalue <- format_p(p_value,
+                       digits = pdigits, pzero = pzero,
+                       italics = italics, type = type
+    )
+
+    # Build label
+    stat_label <- dplyr::case_when(
+      !italics & identical(type, "md") ~ "\u03B2",
+      !italics & identical(type, "latex") ~ "\\textbeta",
+      italics & identical(type, "md") ~ format_chr("\u03B2", italics = TRUE, type = "md"),
+      italics & identical(type, "latex") ~ format_chr("\\beta", italics = TRUE, type = "latex")
+    )
+
+    # Create statistics string
+    if(full) {
+      paste0(stat_label, " = ", stat_value, ", SE = ", se_value, ", ", format_chr(z_lab, italics = italics, type = type), " = ", z_value, ", ", pvalue)
+    } else {
+      paste0(stat_label, " = ", stat_value, ", ", pvalue)
+    }
+  }
+}
diff --git a/R/format_statvalues.R b/R/format_statvalues.R
@@ -61,14 +61,16 @@ format_corr <- function(x,
 
   # Build label
   stat_label <- dplyr::case_when(
-    !italics & identical(corr_method, "pearson") ~ paste0("r"),
-    !italics & identical(corr_method, "spearman") & identical(type, "md") ~ paste0("\u03C1"),
-    !italics & identical(corr_method, "spearman") & identical(type, "latex") ~ paste0("\\rho"),
-    !italics & identical(corr_method, "kendall") & identical(type, "md") ~ paste0("\u03C4"),
-    !italics & identical(corr_method, "kendall") & identical(type, "latex") ~ paste0("\\tau"),
-    identical(corr_method, "pearson") ~ paste0(format_chr("r", italics = italics, type = type)),
-    identical(corr_method, "kendall") ~ paste0(format_chr("\u03C4", italics = italics, type = type)),
-    identical(corr_method, "spearman") ~ paste0(format_chr("\u03C1", italics = italics, type = type)),
+    !italics & identical(corr_method, "pearson") ~ "r",
+    !italics & identical(corr_method, "spearman") & identical(type, "md") ~ "\u03C1",
+    !italics & identical(corr_method, "spearman") & identical(type, "latex") ~ "\\textrho",
+    !italics & identical(corr_method, "kendall") & identical(type, "md") ~ "\u03C4",
+    !italics & identical(corr_method, "kendall") & identical(type, "latex") ~ "\\texttau",
+    identical(corr_method, "pearson") ~ format_chr("r", italics = italics, type = type),
+    identical(corr_method, "kendall") & identical(type, "md") ~ format_chr("\u03C4", italics = italics, type = type),
+    identical(corr_method, "kendall") & identical(type, "latex") ~ format_chr("\\rho", italics = italics, type = type),
+    identical(corr_method, "spearman") & identical(type, "md") ~ format_chr("\u03C1", italics = italics, type = type),
+    identical(corr_method, "spearman") & identical(type, "latex") ~ format_chr("\\tau", italics = italics, type = type)
   )
 
   # Create statistics string

diff --git a/R/format_summary.R b/R/format_summary.R
@@ -260,23 +260,3 @@ format_medianiqr <- function(x = NULL,
                              type = "md") {
   format_summary(x = x, tendency = tendency, error = error, values = values, digits = digits, tendlabel = tendlabel, italics = italics, subscript = subscript, units = units, display = display, errorlabel = errorlabel, type = type)
 }
-
-
-#' @keywords internal
-build_string <- function(mean_label = NULL,
-                         mean_value = NULL,
-                         cis = NULL,
-                         stat_label,
-                         stat_value,
-                         pvalue,
-                         full) {
-  dplyr::case_when(full & !is.null(mean_label) & !is.null(mean_value) & !is.null(cis) ~
-                     paste0(mean_label, mean_value, ", 95% CI [", cis[1], ", ", cis[2], "], ", stat_label, " = ", stat_value, ", ", pvalue),
-                   full & is.null(mean_label) & is.null(mean_value) & !is.null(cis) ~
-                     paste0(stat_label, " = ", stat_value, ", 95% CI [", cis[1], ", ", cis[2], "], ", pvalue),
-                   !full | (is.null(mean_label) & !is.null(mean_value) & !is.null(cis)) ~
-                     paste0(stat_label, " = ", stat_value, ", ", pvalue))
-}
-
-
-
diff --git a/R/utils.R b/R/utils.R
@@ -0,0 +1,18 @@
+#' @keywords internal
+build_string <- function(mean_label = NULL,
+                         mean_value = NULL,
+                         cis = NULL,
+                         stat_label,
+                         stat_value,
+                         pvalue,
+                         full) {
+  dplyr::case_when(full & !is.null(mean_label) & !is.null(mean_value) & !is.null(cis) ~
+                     paste0(mean_label, mean_value, ", 95% CI [", cis[1], ", ", cis[2], "], ", stat_label, " = ", stat_value, ", ", pvalue),
+                   full & is.null(mean_label) & is.null(mean_value) & !is.null(cis) ~
+                     paste0(stat_label, " = ", stat_value, ", 95% CI [", cis[1], ", ", cis[2], "], ", pvalue),
+                   full & !is.null(mean_label) & !is.null(mean_value) & is.null(cis) ~
+                     paste0(mean_label, mean_value, ", ", stat_label, " = ", stat_value, ", ", pvalue),
+                   !full | (is.null(mean_label) & !is.null(mean_value) & !is.null(cis)) ~
+                     paste0(stat_label, " = ", stat_value, ", ", pvalue))
+}
+
diff --git a/README.Rmd b/README.Rmd
@@ -101,6 +101,7 @@ Fuel efficiency and engine displacement were highly correlated
 from `t.test()` and `wilcox.test()`, including one-sample, two-sample 
 independent, and paired tests)
   - ANOVAs from `aov()`
+  - Linear models from `lm()` and generalized linear models from `glm()`
   - Bayes factors (output from [`{BayesFactor}`](https://cran.r-project.org/package=BayesFactor) package)
 * `format_summary()`: Means and error (calculates from vector or uses vector 
 of mean and error interval or mean, lower error limit, and upper error limit)

diff --git a/README.md b/README.md
@@ -107,6 +107,7 @@ Fuel efficiency and engine displacement were highly correlated (r =
     from `t.test()` and `wilcox.test()`, including one-sample,
     two-sample independent, and paired tests)
   - ANOVAs from `aov()`
+  - Linear models from `lm()` and generalized linear models from `glm()`
   - Bayes factors (output from
     [`{BayesFactor}`](https://cran.r-project.org/package=BayesFactor)
     package)

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -8,26 +8,27 @@ template:
     fg: "#f8f9fa"
 
 reference:
-- title: Format numbers/characters
-  contents:
-  - format_num
-  - format_scientific
-  - format_chr
-  - format_sub
 - title: Format statistical objects
   contents:
   - format_stats
   - format_stats.aov
   - format_stats.BFBayesFactor
   - format_stats.easycorrelation
   - format_stats.htest
+  - format_stats.lm
 - title: Format statistical values
   contents:
   - format_summary
   - format_p
   - format_bf
   - format_corr
   - format_ttest
+- title: Format numbers/characters
+  contents:
+  - format_num
+  - format_scientific
+  - format_chr
+  - format_sub
 
 authors:
   Jeffrey R. Stevens:

diff --git a/man/format_bf.Rd b/man/format_bf.Rd
diff --git a/man/format_corr.Rd b/man/format_corr.Rd
diff --git a/man/format_stats.BFBayesFactor.Rd b/man/format_stats.BFBayesFactor.Rd
diff --git a/man/format_stats.Rd b/man/format_stats.Rd
diff --git a/man/format_stats.aov.Rd b/man/format_stats.aov.Rd
diff --git a/man/format_stats.easycorrelation.Rd b/man/format_stats.easycorrelation.Rd
diff --git a/man/format_stats.htest.Rd b/man/format_stats.htest.Rd