diff --git a/NAMESPACE b/NAMESPACE index ba73d2d..f7b1c60 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ S3method(format_stats,aov) S3method(format_stats,default) S3method(format_stats,easycorrelation) S3method(format_stats,htest) +S3method(format_stats,lm) export(format_bf) export(format_chr) export(format_corr) diff --git a/R/format_stats_lm.R b/R/format_stats_lm.R new file mode 100644 index 0000000..4219bdc --- /dev/null +++ b/R/format_stats_lm.R @@ -0,0 +1,202 @@ + +#' Format (generalized) linear regression statistics +#' +#' @description +#' This method formats (generalized) linear regression statistics from the class +#' `lm` or `glm`. If no term is specified, overall model statistics are +#' returned. For linear models (`lm` objects), this includes the R-squared, +#' F statistic, and p-value. For generalized linear models (`glm` objects), +#' this includes deviance and AIC. +#' The default output is APA formatted, but this function allows +#' control over numbers of digits, leading zeros, italics, degrees of freedom, +#' and output format of Markdown or LaTeX. +#' +#' @param x An `lm` or `glm` object +#' @param term Character string for row name of term to extract statistics for. +#' This must be the exact string returned in the `summary()` output from the +#' `lm` or `glm` object +#' @param digits Number of digits after the decimal for means, confidence +#' intervals, and test statistics +#' @param pdigits Number of digits after the decimal for p-values, ranging +#' between 1-5 (also controls cutoff for small p-values) +#' @param pzero Logical value (default = FALSE) for whether to include +#' leading zero for p-values +#' @param full Logical value (default = TRUE) for whether to include extra +#' info (e.g., standard errors and t-values or z-values for terms) +#' or just test statistic and p-value +#' @param italics Logical value (default = TRUE) for whether statistics labels +#' should be italicized +#' @param dfs Formatting for degrees of freedom ("par" = parenthetical, +#' "sub" = subscript, "none" = do not print degrees of freedom) +#' @param type Type of formatting ("md" = markdown, "latex" = LaTeX) +#' @param ... Additional arguments passed to methods. +#' +#' @return +#' A character string of statistical information formatted in Markdown or LaTeX. +#' +#' @method format_stats lm +#' @family functions for printing statistical objects +#' @export +#' +#' @examples +#' test_lm <- lm(mpg ~ cyl * hp, data = mtcars) +#' test_glm <- glm(am ~ cyl * hp, data = mtcars, family = binomial) +#' +#' # Format linear model overall statistics +#' format_stats(test_lm) +#' +#' # Format linear model term statistics +#' format_stats(test_lm, term = "cyl") +#' +#' # Format generalized linear model overall statistics +#' format_stats(test_glm) +#' +#' # Format generalized linear model term statistics +#' format_stats(test_glm, term = "cyl") +#' +#' # Remove italics and make degrees of freedom subscripts +#' format_stats(test_lm, term = "cyl", italics = FALSE, dfs = "sub") +#' +#' # Change digits and add leading zero to p-value +#' format_stats(test_lm, term = "hp", digits = 3, pdigits = 4, pzero = TRUE) +#' +#' # Format for LaTeX +#' format_stats(test_lm, term = "hp", type = "latex") +format_stats.lm <- function(x, + term = NULL, + digits = 3, + pdigits = 3, + pzero = FALSE, + full = TRUE, + italics = TRUE, + dfs = "par", + type = "md", + ...) { + # Validate arguments + check_character(term, allow_null = TRUE) + check_number_whole(digits, min = 0, allow_null = TRUE) + check_number_whole(pdigits, min = 1, max = 5) + check_bool(pzero) + check_bool(full) + check_bool(italics) + check_string(type) + check_match(type, c("md", "latex")) + + model_type <- ifelse(inherits(x, "glm"), "glm", "lm") + summ <- summary(x) + + # Overall statistics for linear regression + if (is.null(term) & model_type == "lm") { + r2 <- summ$adj.r.squared + f <- summ$fstatistic + f_stat <- f[1] + df1 <- f[2] + df2 <- f[3] + p_value <- stats::pf(f[1], f[2], f[3], lower.tail = FALSE) + + # Build label + r2_label <- dplyr::case_when( + italics & identical(type, "md") ~ paste0(format_chr("R", italics = italics, type = type), "^2^"), + identical(type, "latex") ~ paste0(format_chr("R", italics = italics, type = type), "$^{2}$") + ) + r2_value <- format_num(r2, digits = digits) + + fstatlab <- "F" + fstat_label <- dplyr::case_when( + !italics ~ paste0(fstatlab), + identical(type, "md") ~ paste0("_", fstatlab, "_"), + identical(type, "latex") ~ paste0("$", fstatlab, "$") + ) + fstat_label <- dplyr::case_when(identical(dfs, "par") ~ paste0(fstat_label, "(", df1, ", ", df2, ")"), + identical(dfs, "sub") & identical(type, "md") ~ paste0(fstat_label, "~", df1, ",", df2, "~"), + identical(dfs, "sub") & identical(type, "latex") ~ paste0(fstat_label, "$_{", df1, ",", df2, "}$"), + .default = fstat_label + )[1] + fstat_value <- format_num(f_stat, digits = digits, pzero = TRUE) + pvalue <- format_p(p_value, + digits = pdigits, pzero = pzero, + italics = italics, type = type + ) + + # Create statistics string + if (full) { + mean_label <- paste0(r2_label, " = ") + mean_value <- r2_value + stat_label <- fstat_label + stat_value <- fstat_value + cis <- NULL + } else { + stat_label <- r2_label + stat_value <- r2_value + mean_label <- mean_value <- cis <- NULL + } + + build_string(mean_label = mean_label, + mean_value = mean_value, + cis = cis, + stat_label = stat_label, + stat_value = stat_value, + pvalue = pvalue, + full = full) + # Overall statistics for generalized linear model + } else if (is.null(term) & model_type == "glm") { + if (full) { + stat_label <- dplyr::case_when( + italics & identical(type, "md") ~ paste0(format_chr("\u03C7", italics = italics, type = type), "^2^ = "), + identical(type, "latex") ~ paste0(format_chr("\\chi", italics = italics, type = type), "$^{2}$ = ") + ) + paste0("Deviance = ", format_num(summ$deviance, digits = digits), ", ", stat_label, format_num(summ$null.deviance - summ$deviance, digits = digits), ", AIC = ", format_num(summ$aic, digits = digits)) + } else { + paste0("Deviance = ", format_num(summ$deviance, digits = digits), ", AIC = ", format_num(summ$aic, digits = digits)) + } + # Term-specific statistics for linear and generalized linear models + } else { + # For linear regression + if (model_type == "lm") { + terms <- names(x$coefficients) + stopifnot("Argument `term` not found in model terms." = term %in% terms) + term_num <- which(terms == term) + + estimate <- summ$coefficients[term_num] + se <- summ$coefficients[term_num, "Std. Error"] + z <- summ$coefficients[term_num, "t value"] + p_value <- summ$coefficients[term_num, "Pr(>|t|)"] + z_lab <- "t" + # For generalized linear regression + } else { + terms <- rownames(summ$coefficients) + stopifnot("Argument `term` not found in model terms." = term %in% terms) + term_num <- which(terms == term) + + estimate <- summ$coefficients[term_num, "Estimate"] + se <- summ$coefficients[term_num, "Std. Error"] + z <- summ$coefficients[term_num, "z value"] + p_value <- summ$coefficients[term_num, "Pr(>|z|)"] + z_lab <- "z" + } + + # Format values + stat_value <- format_num(estimate, digits = digits, pzero = TRUE) + se_value <- format_num(se, digits = digits, pzero = TRUE) + z_value <- format_num(z, digits = digits, pzero = TRUE) + pvalue <- format_p(p_value, + digits = pdigits, pzero = pzero, + italics = italics, type = type + ) + + # Build label + stat_label <- dplyr::case_when( + !italics & identical(type, "md") ~ "\u03B2", + !italics & identical(type, "latex") ~ "\\textbeta", + italics & identical(type, "md") ~ format_chr("\u03B2", italics = TRUE, type = "md"), + italics & identical(type, "latex") ~ format_chr("\\beta", italics = TRUE, type = "latex") + ) + + # Create statistics string + if(full) { + paste0(stat_label, " = ", stat_value, ", SE = ", se_value, ", ", format_chr(z_lab, italics = italics, type = type), " = ", z_value, ", ", pvalue) + } else { + paste0(stat_label, " = ", stat_value, ", ", pvalue) + } + } +} diff --git a/R/format_statvalues.R b/R/format_statvalues.R index 4b7400c..4f5fa2e 100644 --- a/R/format_statvalues.R +++ b/R/format_statvalues.R @@ -61,14 +61,16 @@ format_corr <- function(x, # Build label stat_label <- dplyr::case_when( - !italics & identical(corr_method, "pearson") ~ paste0("r"), - !italics & identical(corr_method, "spearman") & identical(type, "md") ~ paste0("\u03C1"), - !italics & identical(corr_method, "spearman") & identical(type, "latex") ~ paste0("\\rho"), - !italics & identical(corr_method, "kendall") & identical(type, "md") ~ paste0("\u03C4"), - !italics & identical(corr_method, "kendall") & identical(type, "latex") ~ paste0("\\tau"), - identical(corr_method, "pearson") ~ paste0(format_chr("r", italics = italics, type = type)), - identical(corr_method, "kendall") ~ paste0(format_chr("\u03C4", italics = italics, type = type)), - identical(corr_method, "spearman") ~ paste0(format_chr("\u03C1", italics = italics, type = type)), + !italics & identical(corr_method, "pearson") ~ "r", + !italics & identical(corr_method, "spearman") & identical(type, "md") ~ "\u03C1", + !italics & identical(corr_method, "spearman") & identical(type, "latex") ~ "\\textrho", + !italics & identical(corr_method, "kendall") & identical(type, "md") ~ "\u03C4", + !italics & identical(corr_method, "kendall") & identical(type, "latex") ~ "\\texttau", + identical(corr_method, "pearson") ~ format_chr("r", italics = italics, type = type), + identical(corr_method, "kendall") & identical(type, "md") ~ format_chr("\u03C4", italics = italics, type = type), + identical(corr_method, "kendall") & identical(type, "latex") ~ format_chr("\\rho", italics = italics, type = type), + identical(corr_method, "spearman") & identical(type, "md") ~ format_chr("\u03C1", italics = italics, type = type), + identical(corr_method, "spearman") & identical(type, "latex") ~ format_chr("\\tau", italics = italics, type = type) ) # Create statistics string diff --git a/R/format_summary.R b/R/format_summary.R index 1bd45f4..b5ac11e 100644 --- a/R/format_summary.R +++ b/R/format_summary.R @@ -260,23 +260,3 @@ format_medianiqr <- function(x = NULL, type = "md") { format_summary(x = x, tendency = tendency, error = error, values = values, digits = digits, tendlabel = tendlabel, italics = italics, subscript = subscript, units = units, display = display, errorlabel = errorlabel, type = type) } - - -#' @keywords internal -build_string <- function(mean_label = NULL, - mean_value = NULL, - cis = NULL, - stat_label, - stat_value, - pvalue, - full) { - dplyr::case_when(full & !is.null(mean_label) & !is.null(mean_value) & !is.null(cis) ~ - paste0(mean_label, mean_value, ", 95% CI [", cis[1], ", ", cis[2], "], ", stat_label, " = ", stat_value, ", ", pvalue), - full & is.null(mean_label) & is.null(mean_value) & !is.null(cis) ~ - paste0(stat_label, " = ", stat_value, ", 95% CI [", cis[1], ", ", cis[2], "], ", pvalue), - !full | (is.null(mean_label) & !is.null(mean_value) & !is.null(cis)) ~ - paste0(stat_label, " = ", stat_value, ", ", pvalue)) -} - - - diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..a12aeeb --- /dev/null +++ b/R/utils.R @@ -0,0 +1,18 @@ +#' @keywords internal +build_string <- function(mean_label = NULL, + mean_value = NULL, + cis = NULL, + stat_label, + stat_value, + pvalue, + full) { + dplyr::case_when(full & !is.null(mean_label) & !is.null(mean_value) & !is.null(cis) ~ + paste0(mean_label, mean_value, ", 95% CI [", cis[1], ", ", cis[2], "], ", stat_label, " = ", stat_value, ", ", pvalue), + full & is.null(mean_label) & is.null(mean_value) & !is.null(cis) ~ + paste0(stat_label, " = ", stat_value, ", 95% CI [", cis[1], ", ", cis[2], "], ", pvalue), + full & !is.null(mean_label) & !is.null(mean_value) & is.null(cis) ~ + paste0(mean_label, mean_value, ", ", stat_label, " = ", stat_value, ", ", pvalue), + !full | (is.null(mean_label) & !is.null(mean_value) & !is.null(cis)) ~ + paste0(stat_label, " = ", stat_value, ", ", pvalue)) +} + diff --git a/README.Rmd b/README.Rmd index 5ba9664..9be1f5e 100644 --- a/README.Rmd +++ b/README.Rmd @@ -101,6 +101,7 @@ Fuel efficiency and engine displacement were highly correlated from `t.test()` and `wilcox.test()`, including one-sample, two-sample independent, and paired tests) - ANOVAs from `aov()` + - Linear models from `lm()` and generalized linear models from `glm()` - Bayes factors (output from [`{BayesFactor}`](https://cran.r-project.org/package=BayesFactor) package) * `format_summary()`: Means and error (calculates from vector or uses vector of mean and error interval or mean, lower error limit, and upper error limit) diff --git a/README.md b/README.md index fa061a0..1a3c67c 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,7 @@ Fuel efficiency and engine displacement were highly correlated (r = from `t.test()` and `wilcox.test()`, including one-sample, two-sample independent, and paired tests) - ANOVAs from `aov()` + - Linear models from `lm()` and generalized linear models from `glm()` - Bayes factors (output from [`{BayesFactor}`](https://cran.r-project.org/package=BayesFactor) package) diff --git a/_pkgdown.yml b/_pkgdown.yml index f5b0049..cd01f9f 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -8,12 +8,6 @@ template: fg: "#f8f9fa" reference: -- title: Format numbers/characters - contents: - - format_num - - format_scientific - - format_chr - - format_sub - title: Format statistical objects contents: - format_stats @@ -21,6 +15,7 @@ reference: - format_stats.BFBayesFactor - format_stats.easycorrelation - format_stats.htest + - format_stats.lm - title: Format statistical values contents: - format_summary @@ -28,6 +23,12 @@ reference: - format_bf - format_corr - format_ttest +- title: Format numbers/characters + contents: + - format_num + - format_scientific + - format_chr + - format_sub authors: Jeffrey R. Stevens: diff --git a/man/format_bf.Rd b/man/format_bf.Rd index 461fde7..355bdf8 100644 --- a/man/format_bf.Rd +++ b/man/format_bf.Rd @@ -95,6 +95,7 @@ Other functions for printing statistical objects: \code{\link{format_stats.aov}()}, \code{\link{format_stats.easycorrelation}()}, \code{\link{format_stats.htest}()}, +\code{\link{format_stats.lm}()}, \code{\link{format_ttest}()} } \concept{functions for printing statistical objects} diff --git a/man/format_corr.Rd b/man/format_corr.Rd index 9980c7c..fd5a09e 100644 --- a/man/format_corr.Rd +++ b/man/format_corr.Rd @@ -51,6 +51,7 @@ Other functions for printing statistical objects: \code{\link{format_stats.aov}()}, \code{\link{format_stats.easycorrelation}()}, \code{\link{format_stats.htest}()}, +\code{\link{format_stats.lm}()}, \code{\link{format_ttest}()} } \concept{functions for printing statistical objects} diff --git a/man/format_stats.BFBayesFactor.Rd b/man/format_stats.BFBayesFactor.Rd index df3d7bb..8514739 100644 --- a/man/format_stats.BFBayesFactor.Rd +++ b/man/format_stats.BFBayesFactor.Rd @@ -80,6 +80,7 @@ Other functions for printing statistical objects: \code{\link{format_stats.aov}()}, \code{\link{format_stats.easycorrelation}()}, \code{\link{format_stats.htest}()}, +\code{\link{format_stats.lm}()}, \code{\link{format_ttest}()} } \concept{functions for printing statistical objects} diff --git a/man/format_stats.Rd b/man/format_stats.Rd index 090270d..9c11856 100644 --- a/man/format_stats.Rd +++ b/man/format_stats.Rd @@ -53,6 +53,7 @@ Other functions for printing statistical objects: \code{\link{format_stats.aov}()}, \code{\link{format_stats.easycorrelation}()}, \code{\link{format_stats.htest}()}, +\code{\link{format_stats.lm}()}, \code{\link{format_ttest}()} } \concept{functions for printing statistical objects} diff --git a/man/format_stats.aov.Rd b/man/format_stats.aov.Rd index 5380e86..37d16e8 100644 --- a/man/format_stats.aov.Rd +++ b/man/format_stats.aov.Rd @@ -74,6 +74,7 @@ Other functions for printing statistical objects: \code{\link{format_stats.BFBayesFactor}()}, \code{\link{format_stats.easycorrelation}()}, \code{\link{format_stats.htest}()}, +\code{\link{format_stats.lm}()}, \code{\link{format_ttest}()} } \concept{functions for printing statistical objects} diff --git a/man/format_stats.easycorrelation.Rd b/man/format_stats.easycorrelation.Rd index 46d3700..d9c81c2 100644 --- a/man/format_stats.easycorrelation.Rd +++ b/man/format_stats.easycorrelation.Rd @@ -74,6 +74,7 @@ Other functions for printing statistical objects: \code{\link{format_stats.BFBayesFactor}()}, \code{\link{format_stats.aov}()}, \code{\link{format_stats.htest}()}, +\code{\link{format_stats.lm}()}, \code{\link{format_ttest}()} } \concept{functions for printing statistical objects} diff --git a/man/format_stats.htest.Rd b/man/format_stats.htest.Rd index 3fab08c..32d2b61 100644 --- a/man/format_stats.htest.Rd +++ b/man/format_stats.htest.Rd @@ -98,6 +98,7 @@ Other functions for printing statistical objects: \code{\link{format_stats.BFBayesFactor}()}, \code{\link{format_stats.aov}()}, \code{\link{format_stats.easycorrelation}()}, +\code{\link{format_stats.lm}()}, \code{\link{format_ttest}()} } \concept{functions for printing statistical objects} diff --git a/man/format_stats.lm.Rd b/man/format_stats.lm.Rd new file mode 100644 index 0000000..efaf61c --- /dev/null +++ b/man/format_stats.lm.Rd @@ -0,0 +1,99 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/format_stats_lm.R +\name{format_stats.lm} +\alias{format_stats.lm} +\title{Format (generalized) linear regression statistics} +\usage{ +\method{format_stats}{lm}( + x, + term = NULL, + digits = 3, + pdigits = 3, + pzero = FALSE, + full = TRUE, + italics = TRUE, + dfs = "par", + type = "md", + ... +) +} +\arguments{ +\item{x}{An \code{lm} or \code{glm} object} + +\item{term}{Character string for row name of term to extract statistics for. +This must be the exact string returned in the \code{summary()} output from the +\code{lm} or \code{glm} object} + +\item{digits}{Number of digits after the decimal for means, confidence +intervals, and test statistics} + +\item{pdigits}{Number of digits after the decimal for p-values, ranging +between 1-5 (also controls cutoff for small p-values)} + +\item{pzero}{Logical value (default = FALSE) for whether to include +leading zero for p-values} + +\item{full}{Logical value (default = TRUE) for whether to include extra +info (e.g., standard errors and t-values or z-values for terms) +or just test statistic and p-value} + +\item{italics}{Logical value (default = TRUE) for whether statistics labels +should be italicized} + +\item{dfs}{Formatting for degrees of freedom ("par" = parenthetical, +"sub" = subscript, "none" = do not print degrees of freedom)} + +\item{type}{Type of formatting ("md" = markdown, "latex" = LaTeX)} + +\item{...}{Additional arguments passed to methods.} +} +\value{ +A character string of statistical information formatted in Markdown or LaTeX. +} +\description{ +This method formats (generalized) linear regression statistics from the class +\code{lm} or \code{glm}. If no term is specified, overall model statistics are +returned. For linear models (\code{lm} objects), this includes the R-squared, +F statistic, and p-value. For generalized linear models (\code{glm} objects), +this includes deviance and AIC. +The default output is APA formatted, but this function allows +control over numbers of digits, leading zeros, italics, degrees of freedom, +and output format of Markdown or LaTeX. +} +\examples{ +test_lm <- lm(mpg ~ cyl * hp, data = mtcars) +test_glm <- glm(am ~ cyl * hp, data = mtcars, family = binomial) + +# Format linear model overall statistics +format_stats(test_lm) + +# Format linear model term statistics +format_stats(test_lm, term = "cyl") + +# Format generalized linear model overall statistics +format_stats(test_glm) + +# Format generalized linear model term statistics +format_stats(test_glm, term = "cyl") + +# Remove italics and make degrees of freedom subscripts +format_stats(test_lm, term = "cyl", italics = FALSE, dfs = "sub") + +# Change digits and add leading zero to p-value +format_stats(test_lm, term = "hp", digits = 3, pdigits = 4, pzero = TRUE) + +# Format for LaTeX +format_stats(test_lm, term = "hp", type = "latex") +} +\seealso{ +Other functions for printing statistical objects: +\code{\link{format_bf}()}, +\code{\link{format_corr}()}, +\code{\link{format_stats}()}, +\code{\link{format_stats.BFBayesFactor}()}, +\code{\link{format_stats.aov}()}, +\code{\link{format_stats.easycorrelation}()}, +\code{\link{format_stats.htest}()}, +\code{\link{format_ttest}()} +} +\concept{functions for printing statistical objects} diff --git a/man/format_ttest.Rd b/man/format_ttest.Rd index 20868ba..4575b56 100644 --- a/man/format_ttest.Rd +++ b/man/format_ttest.Rd @@ -53,6 +53,7 @@ Other functions for printing statistical objects: \code{\link{format_stats.BFBayesFactor}()}, \code{\link{format_stats.aov}()}, \code{\link{format_stats.easycorrelation}()}, -\code{\link{format_stats.htest}()} +\code{\link{format_stats.htest}()}, +\code{\link{format_stats.lm}()} } \concept{functions for printing statistical objects} diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index 8662778..32606a8 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -1,4 +1,8 @@ -df <- data.frame(a = 1:10, b = 2:11, c = c(1, 8, 3, 7, 8, 2, 4, 1, 4, 5)) +set.seed(2024) +df <- data.frame(a = 1:10, + b = 2:11, + c = c(1, 8, 3, 7, 8, 2, 4, 1, 4, 5), + d = sample(0:1, 10, replace = TRUE)) test_corr <- cor.test(df$a, df$b) test_corr2 <- cor.test(df$a, df$c) test_easycorr <- correlation::correlation(df, select = "a", select2 = "c") @@ -12,6 +16,8 @@ test_ttest4 <- suppressWarnings(wilcox.test(df$a, df$b)) test_ttest5 <- suppressWarnings(wilcox.test(df$a, c(df$b, 120))) test_chisq <- chisq.test(as.table(rbind(c(762, 327, 468), c(484, 239, 477)))) test_aov <- aov(c ~ a, data = df) +test_lm <- lm(c ~ a, data = df) +test_glm <- glm(d ~ a, data = df, family = binomial) test_bf <- BayesFactor::ttestBF(df$a, mu = 5) library(rlang) diff --git a/tests/testthat/test-format_stats_aov.R b/tests/testthat/test-format_stats_aov.R index 0883c2d..aadccdc 100644 --- a/tests/testthat/test-format_stats_aov.R +++ b/tests/testthat/test-format_stats_aov.R @@ -4,6 +4,10 @@ test_that("aov ANOVAs are validated properly", { format_stats(test_aov), '`term` must be a character vector, not absent' )) + suppressMessages(expect_error( + format_stats(test_aov, term = "d"), + 'Argument `term` not found in model terms' + )) suppressMessages(expect_error( format_stats(test_aov, term = "c", digits = "xxx"), '`digits` must be a whole number or `NULL`, not the string "xxx"' diff --git a/tests/testthat/test-format_stats_lm.R b/tests/testthat/test-format_stats_lm.R new file mode 100644 index 0000000..7fb63fe --- /dev/null +++ b/tests/testthat/test-format_stats_lm.R @@ -0,0 +1,74 @@ + +test_that("lm linear regessions are validated properly", { + suppressMessages(expect_error( + format_stats(test_lm, term = "d"), + 'Argument `term` not found in model terms' + )) + suppressMessages(expect_error( + format_stats(test_lm, term = "c", digits = "xxx"), + '`digits` must be a whole number or `NULL`, not the string "xxx"' + )) + suppressMessages(expect_error( + format_stats(test_lm, term = "c", digits = -1), + '`digits` must be a whole number larger than or equal to 0 or `NULL`, not the number -1' + )) + suppressMessages(expect_error( + format_stats(test_lm, term = "c", digits = 1.5), + '`digits` must be a whole number or `NULL`, not the number 1.5' + )) + suppressMessages(expect_error( + format_stats(test_lm, term = "c", pdigits = "xxx"), + '`pdigits` must be a whole number, not the string "xxx"' + )) + suppressMessages(expect_error( + format_stats(test_lm, term = "c", pdigits = 0), + '`pdigits` must be a whole number between 1 and 5, not the number 0' + )) + suppressMessages(expect_error( + format_stats(test_lm, term = "c", pdigits = 7), + '`pdigits` must be a whole number between 1 and 5, not the number 7' + )) + suppressMessages(expect_error( + format_stats(test_lm, term = "c", pzero = "xxx"), + '`pzero` must be `TRUE` or `FALSE`, not the string "xxx"' + )) + suppressMessages(expect_error( + format_stats(test_lm, term = "c", full = "xxx"), + '`full` must be `TRUE` or `FALSE`, not the string "xxx"' + )) + suppressMessages(expect_error( + format_stats(test_lm, term = "c", italics = "xxx"), + '`italics` must be `TRUE` or `FALSE`, not the string "xxx"' + )) + suppressMessages(expect_error( + format_stats(test_lm, term = "c", type = "xxx"), + '`type` must be "md" or "latex", not the string "xxx"' + )) +}) + + +test_that("formatting lm linear models works properly", { + expect_equal(format_stats(test_lm), "_R_^2^ = -0.110, _F_(1, 8) = 0.111, _p_ = .748") + expect_equal(format_stats(test_lm, full = FALSE), "_R_^2^ = -0.110, _p_ = .748") + expect_equal(format_stats(test_lm, "a"), "_β_ = -0.103, SE = 0.310, _t_ = -0.333, _p_ = .748") + expect_equal(format_stats(test_lm, "a", digits = 2), "_β_ = -0.10, SE = 0.31, _t_ = -0.33, _p_ = .748") + expect_equal(format_stats(test_lm, "a", pdigits = 2), "_β_ = -0.103, SE = 0.310, _t_ = -0.333, _p_ = .75") + expect_equal(format_stats(test_lm, "a", pzero = TRUE), "_β_ = -0.103, SE = 0.310, _t_ = -0.333, _p_ = 0.748") + expect_equal(format_stats(test_lm, "a", full = FALSE), "_β_ = -0.103, _p_ = .748") + expect_equal(format_stats(test_lm, "a", italics = FALSE), "β = -0.103, SE = 0.310, t = -0.333, p = .748") + expect_equal(format_stats(test_lm, "a", type = "latex"), "$\\beta$ = -0.103, SE = 0.310, $t$ = -0.333, $p$ = .748") + expect_equal(format_stats(test_lm, "a", type = "latex", dfs = "sub"), "$\\beta$ = -0.103, SE = 0.310, $t$ = -0.333, $p$ = .748") +}) + +test_that("formatting glm linear models works properly", { + expect_equal(format_stats(test_glm), "Deviance = 13.410, _χ_^2^ = 0.051, AIC = 17.410") + expect_equal(format_stats(test_glm, full = FALSE), "Deviance = 13.410, AIC = 17.410") + expect_equal(format_stats(test_glm, "a"), "_β_ = 0.051, SE = 0.226, _z_ = 0.224, _p_ = .822") + expect_equal(format_stats(test_glm, "a", digits = 2), "_β_ = 0.05, SE = 0.23, _z_ = 0.22, _p_ = .822") + expect_equal(format_stats(test_glm, "a", pdigits = 2), "_β_ = 0.051, SE = 0.226, _z_ = 0.224, _p_ = .82") + expect_equal(format_stats(test_glm, "a", pzero = TRUE), "_β_ = 0.051, SE = 0.226, _z_ = 0.224, _p_ = 0.822") + expect_equal(format_stats(test_glm, "a", full = FALSE), "_β_ = 0.051, _p_ = .822") + expect_equal(format_stats(test_glm, "a", italics = FALSE), "β = 0.051, SE = 0.226, z = 0.224, p = .822") + expect_equal(format_stats(test_glm, "a", type = "latex"), "$\\beta$ = 0.051, SE = 0.226, $z$ = 0.224, $p$ = .822") + expect_equal(format_stats(test_glm, "a", type = "latex", dfs = "sub"), "$\\beta$ = 0.051, SE = 0.226, $z$ = 0.224, $p$ = .822") +}) diff --git a/vignettes/cocoon.Rmd b/vignettes/cocoon.Rmd index 07bf885..e00bbd2 100644 --- a/vignettes/cocoon.Rmd +++ b/vignettes/cocoon.Rmd @@ -128,6 +128,42 @@ To use `format_stats()` on ANOVAs, you must pass the `aov` object and a characte | `format_stats(aov_mpg_cyl_hp, term = "cyl", dfs = "sub")` | `r format_stats(aov_mpg_cyl_hp, term = "cyl", dfs = "sub")` | +### Linear models + +The `format_stats()` function can also input objects returned by the `lm()` and `glm()` functions. It can report overall model statistics (e.g., R-squared, AIC) and term-specific statistics (e.g., coefficients, p-values). + +Let's start by creating a linear model and generalized linear model: + +```{r} +lm_mpg_cyl_hp <- lm(mpg ~ cyl * hp, data = mtcars) +summary(lm_mpg_cyl_hp) +glm_am_cyl_hp <- glm(am ~ cyl * hp, data = mtcars, family = binomial) +summary(glm_am_cyl_hp) +``` + +To extract overall model statistics from `format_stats()`, pass the `lm` or `glm` object but omit any terms. + +| Code | Output | +|----------|--------| +| `format_stats(lm_mpg_cyl_hp)` | `r format_stats(lm_mpg_cyl_hp)` | +| `format_stats(lm_mpg_cyl_hp, full = FALSE)` | `r format_stats(lm_mpg_cyl_hp, full = FALSE)` | +| `format_stats(glm_am_cyl_hp)` | `r format_stats(glm_am_cyl_hp)` | +| `format_stats(glm_am_cyl_hp, full = FALSE)` | `r format_stats(glm_am_cyl_hp, full = FALSE)` | + + +To extract term-specific statistics, pass the object and a character string describing which term to extract. Apply `summary()` to your `aov` object and copy the text of the term you want to extract. Then you can format the number of digits of coefficients with `digits` and digits of p-values with `pdigits`. Include the leading zeros for coefficients and p-values with `pzero = TRUE`. Remove italics with `italics = FALSE`. With `dfs`, format degrees of freedom as parenthetical (`par`) or subscripts (`sub`) or remove them (`none`). + +| Code | Output | +|----------|-------| +| `format_stats(lm_mpg_cyl_hp, term = "cyl")` | `r format_stats(lm_mpg_cyl_hp, term = "cyl")` | +| `format_stats(lm_mpg_cyl_hp, term = "cyl:hp")` | `r format_stats(lm_mpg_cyl_hp, term = "cyl:hp")` | +| `format_stats(glm_am_cyl_hp, term = "cyl")` | `r format_stats(glm_am_cyl_hp, term = "cyl")` | +| `format_stats(lm_mpg_cyl_hp, term = "cyl", digits = 2, pdigits = 2)` | `r format_stats(lm_mpg_cyl_hp, term = "cyl", digits = 2, pdigits = 2)` | +| `format_stats(lm_mpg_cyl_hp, term = "cyl", pzero = TRUE)` | `r format_stats(lm_mpg_cyl_hp, term = "cyl", pzero = TRUE)` | +| `format_stats(lm_mpg_cyl_hp, term = "cyl", italics = FALSE)` | `r format_stats(lm_mpg_cyl_hp, term = "cyl", italics = FALSE)` | +| `format_stats(lm_mpg_cyl_hp, term = "cyl", dfs = "sub")` | `r format_stats(lm_mpg_cyl_hp, term = "cyl", dfs = "sub")` | + + ### Bayes factors The `format_stats()` function can also extract and format Bayes factors from a `BFBayesFactor` object from the [`{BayesFactor}`](https://cran.r-project.org/package=BayesFactor) package. Bayes factors are not as standardized in how they are formatted. One issue is that Bayes factors can be referenced from either the alternative hypothesis (H~1~) or the null hypothesis (H~0~). Also, as a ratio, digits after the decimal are more important below 1 than above 1.