Skip to content

Commit

Permalink
Merge pull request #8 from JeffreyRStevens/devel
Browse files Browse the repository at this point in the history
Add format_stats lm method
  • Loading branch information
JeffreyRStevens authored Dec 1, 2024
2 parents ba77757 + fc7b3ff commit b148f47
Show file tree
Hide file tree
Showing 21 changed files with 469 additions and 36 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ S3method(format_stats,aov)
S3method(format_stats,default)
S3method(format_stats,easycorrelation)
S3method(format_stats,htest)
S3method(format_stats,lm)
export(format_bf)
export(format_chr)
export(format_corr)
Expand Down
202 changes: 202 additions & 0 deletions R/format_stats_lm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@

#' Format (generalized) linear regression statistics
#'
#' @description
#' This method formats (generalized) linear regression statistics from the class
#' `lm` or `glm`. If no term is specified, overall model statistics are
#' returned. For linear models (`lm` objects), this includes the R-squared,
#' F statistic, and p-value. For generalized linear models (`glm` objects),
#' this includes deviance and AIC.
#' The default output is APA formatted, but this function allows
#' control over numbers of digits, leading zeros, italics, degrees of freedom,
#' and output format of Markdown or LaTeX.
#'
#' @param x An `lm` or `glm` object
#' @param term Character string for row name of term to extract statistics for.
#' This must be the exact string returned in the `summary()` output from the
#' `lm` or `glm` object
#' @param digits Number of digits after the decimal for means, confidence
#' intervals, and test statistics
#' @param pdigits Number of digits after the decimal for p-values, ranging
#' between 1-5 (also controls cutoff for small p-values)
#' @param pzero Logical value (default = FALSE) for whether to include
#' leading zero for p-values
#' @param full Logical value (default = TRUE) for whether to include extra
#' info (e.g., standard errors and t-values or z-values for terms)
#' or just test statistic and p-value
#' @param italics Logical value (default = TRUE) for whether statistics labels
#' should be italicized
#' @param dfs Formatting for degrees of freedom ("par" = parenthetical,
#' "sub" = subscript, "none" = do not print degrees of freedom)
#' @param type Type of formatting ("md" = markdown, "latex" = LaTeX)
#' @param ... Additional arguments passed to methods.
#'
#' @return
#' A character string of statistical information formatted in Markdown or LaTeX.
#'
#' @method format_stats lm
#' @family functions for printing statistical objects
#' @export
#'
#' @examples
#' test_lm <- lm(mpg ~ cyl * hp, data = mtcars)
#' test_glm <- glm(am ~ cyl * hp, data = mtcars, family = binomial)
#'
#' # Format linear model overall statistics
#' format_stats(test_lm)
#'
#' # Format linear model term statistics
#' format_stats(test_lm, term = "cyl")
#'
#' # Format generalized linear model overall statistics
#' format_stats(test_glm)
#'
#' # Format generalized linear model term statistics
#' format_stats(test_glm, term = "cyl")
#'
#' # Remove italics and make degrees of freedom subscripts
#' format_stats(test_lm, term = "cyl", italics = FALSE, dfs = "sub")
#'
#' # Change digits and add leading zero to p-value
#' format_stats(test_lm, term = "hp", digits = 3, pdigits = 4, pzero = TRUE)
#'
#' # Format for LaTeX
#' format_stats(test_lm, term = "hp", type = "latex")
format_stats.lm <- function(x,
term = NULL,
digits = 3,
pdigits = 3,
pzero = FALSE,
full = TRUE,
italics = TRUE,
dfs = "par",
type = "md",
...) {
# Validate arguments
check_character(term, allow_null = TRUE)
check_number_whole(digits, min = 0, allow_null = TRUE)
check_number_whole(pdigits, min = 1, max = 5)
check_bool(pzero)
check_bool(full)
check_bool(italics)
check_string(type)
check_match(type, c("md", "latex"))

model_type <- ifelse(inherits(x, "glm"), "glm", "lm")
summ <- summary(x)

# Overall statistics for linear regression
if (is.null(term) & model_type == "lm") {
r2 <- summ$adj.r.squared
f <- summ$fstatistic
f_stat <- f[1]
df1 <- f[2]
df2 <- f[3]
p_value <- stats::pf(f[1], f[2], f[3], lower.tail = FALSE)

# Build label
r2_label <- dplyr::case_when(
italics & identical(type, "md") ~ paste0(format_chr("R", italics = italics, type = type), "^2^"),
identical(type, "latex") ~ paste0(format_chr("R", italics = italics, type = type), "$^{2}$")
)
r2_value <- format_num(r2, digits = digits)

fstatlab <- "F"
fstat_label <- dplyr::case_when(
!italics ~ paste0(fstatlab),
identical(type, "md") ~ paste0("_", fstatlab, "_"),
identical(type, "latex") ~ paste0("$", fstatlab, "$")
)
fstat_label <- dplyr::case_when(identical(dfs, "par") ~ paste0(fstat_label, "(", df1, ", ", df2, ")"),
identical(dfs, "sub") & identical(type, "md") ~ paste0(fstat_label, "~", df1, ",", df2, "~"),
identical(dfs, "sub") & identical(type, "latex") ~ paste0(fstat_label, "$_{", df1, ",", df2, "}$"),
.default = fstat_label
)[1]
fstat_value <- format_num(f_stat, digits = digits, pzero = TRUE)
pvalue <- format_p(p_value,
digits = pdigits, pzero = pzero,
italics = italics, type = type
)

# Create statistics string
if (full) {
mean_label <- paste0(r2_label, " = ")
mean_value <- r2_value
stat_label <- fstat_label
stat_value <- fstat_value
cis <- NULL
} else {
stat_label <- r2_label
stat_value <- r2_value
mean_label <- mean_value <- cis <- NULL
}

build_string(mean_label = mean_label,
mean_value = mean_value,
cis = cis,
stat_label = stat_label,
stat_value = stat_value,
pvalue = pvalue,
full = full)
# Overall statistics for generalized linear model
} else if (is.null(term) & model_type == "glm") {
if (full) {
stat_label <- dplyr::case_when(
italics & identical(type, "md") ~ paste0(format_chr("\u03C7", italics = italics, type = type), "^2^ = "),
identical(type, "latex") ~ paste0(format_chr("\\chi", italics = italics, type = type), "$^{2}$ = ")
)
paste0("Deviance = ", format_num(summ$deviance, digits = digits), ", ", stat_label, format_num(summ$null.deviance - summ$deviance, digits = digits), ", AIC = ", format_num(summ$aic, digits = digits))
} else {
paste0("Deviance = ", format_num(summ$deviance, digits = digits), ", AIC = ", format_num(summ$aic, digits = digits))
}
# Term-specific statistics for linear and generalized linear models
} else {
# For linear regression
if (model_type == "lm") {
terms <- names(x$coefficients)
stopifnot("Argument `term` not found in model terms." = term %in% terms)
term_num <- which(terms == term)

estimate <- summ$coefficients[term_num]
se <- summ$coefficients[term_num, "Std. Error"]
z <- summ$coefficients[term_num, "t value"]
p_value <- summ$coefficients[term_num, "Pr(>|t|)"]
z_lab <- "t"
# For generalized linear regression
} else {
terms <- rownames(summ$coefficients)
stopifnot("Argument `term` not found in model terms." = term %in% terms)
term_num <- which(terms == term)

estimate <- summ$coefficients[term_num, "Estimate"]
se <- summ$coefficients[term_num, "Std. Error"]
z <- summ$coefficients[term_num, "z value"]
p_value <- summ$coefficients[term_num, "Pr(>|z|)"]
z_lab <- "z"
}

# Format values
stat_value <- format_num(estimate, digits = digits, pzero = TRUE)
se_value <- format_num(se, digits = digits, pzero = TRUE)
z_value <- format_num(z, digits = digits, pzero = TRUE)
pvalue <- format_p(p_value,
digits = pdigits, pzero = pzero,
italics = italics, type = type
)

# Build label
stat_label <- dplyr::case_when(
!italics & identical(type, "md") ~ "\u03B2",
!italics & identical(type, "latex") ~ "\\textbeta",
italics & identical(type, "md") ~ format_chr("\u03B2", italics = TRUE, type = "md"),
italics & identical(type, "latex") ~ format_chr("\\beta", italics = TRUE, type = "latex")
)

# Create statistics string
if(full) {
paste0(stat_label, " = ", stat_value, ", SE = ", se_value, ", ", format_chr(z_lab, italics = italics, type = type), " = ", z_value, ", ", pvalue)
} else {
paste0(stat_label, " = ", stat_value, ", ", pvalue)
}
}
}
18 changes: 10 additions & 8 deletions R/format_statvalues.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,16 @@ format_corr <- function(x,

# Build label
stat_label <- dplyr::case_when(
!italics & identical(corr_method, "pearson") ~ paste0("r"),
!italics & identical(corr_method, "spearman") & identical(type, "md") ~ paste0("\u03C1"),
!italics & identical(corr_method, "spearman") & identical(type, "latex") ~ paste0("\\rho"),
!italics & identical(corr_method, "kendall") & identical(type, "md") ~ paste0("\u03C4"),
!italics & identical(corr_method, "kendall") & identical(type, "latex") ~ paste0("\\tau"),
identical(corr_method, "pearson") ~ paste0(format_chr("r", italics = italics, type = type)),
identical(corr_method, "kendall") ~ paste0(format_chr("\u03C4", italics = italics, type = type)),
identical(corr_method, "spearman") ~ paste0(format_chr("\u03C1", italics = italics, type = type)),
!italics & identical(corr_method, "pearson") ~ "r",
!italics & identical(corr_method, "spearman") & identical(type, "md") ~ "\u03C1",
!italics & identical(corr_method, "spearman") & identical(type, "latex") ~ "\\textrho",
!italics & identical(corr_method, "kendall") & identical(type, "md") ~ "\u03C4",
!italics & identical(corr_method, "kendall") & identical(type, "latex") ~ "\\texttau",
identical(corr_method, "pearson") ~ format_chr("r", italics = italics, type = type),
identical(corr_method, "kendall") & identical(type, "md") ~ format_chr("\u03C4", italics = italics, type = type),
identical(corr_method, "kendall") & identical(type, "latex") ~ format_chr("\\rho", italics = italics, type = type),
identical(corr_method, "spearman") & identical(type, "md") ~ format_chr("\u03C1", italics = italics, type = type),
identical(corr_method, "spearman") & identical(type, "latex") ~ format_chr("\\tau", italics = italics, type = type)
)

# Create statistics string
Expand Down
20 changes: 0 additions & 20 deletions R/format_summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -260,23 +260,3 @@ format_medianiqr <- function(x = NULL,
type = "md") {
format_summary(x = x, tendency = tendency, error = error, values = values, digits = digits, tendlabel = tendlabel, italics = italics, subscript = subscript, units = units, display = display, errorlabel = errorlabel, type = type)
}


#' @keywords internal
build_string <- function(mean_label = NULL,
mean_value = NULL,
cis = NULL,
stat_label,
stat_value,
pvalue,
full) {
dplyr::case_when(full & !is.null(mean_label) & !is.null(mean_value) & !is.null(cis) ~
paste0(mean_label, mean_value, ", 95% CI [", cis[1], ", ", cis[2], "], ", stat_label, " = ", stat_value, ", ", pvalue),
full & is.null(mean_label) & is.null(mean_value) & !is.null(cis) ~
paste0(stat_label, " = ", stat_value, ", 95% CI [", cis[1], ", ", cis[2], "], ", pvalue),
!full | (is.null(mean_label) & !is.null(mean_value) & !is.null(cis)) ~
paste0(stat_label, " = ", stat_value, ", ", pvalue))
}



18 changes: 18 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#' @keywords internal
build_string <- function(mean_label = NULL,
mean_value = NULL,
cis = NULL,
stat_label,
stat_value,
pvalue,
full) {
dplyr::case_when(full & !is.null(mean_label) & !is.null(mean_value) & !is.null(cis) ~
paste0(mean_label, mean_value, ", 95% CI [", cis[1], ", ", cis[2], "], ", stat_label, " = ", stat_value, ", ", pvalue),
full & is.null(mean_label) & is.null(mean_value) & !is.null(cis) ~
paste0(stat_label, " = ", stat_value, ", 95% CI [", cis[1], ", ", cis[2], "], ", pvalue),
full & !is.null(mean_label) & !is.null(mean_value) & is.null(cis) ~
paste0(mean_label, mean_value, ", ", stat_label, " = ", stat_value, ", ", pvalue),
!full | (is.null(mean_label) & !is.null(mean_value) & !is.null(cis)) ~
paste0(stat_label, " = ", stat_value, ", ", pvalue))
}

1 change: 1 addition & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ Fuel efficiency and engine displacement were highly correlated
from `t.test()` and `wilcox.test()`, including one-sample, two-sample
independent, and paired tests)
- ANOVAs from `aov()`
- Linear models from `lm()` and generalized linear models from `glm()`
- Bayes factors (output from [`{BayesFactor}`](https://cran.r-project.org/package=BayesFactor) package)
* `format_summary()`: Means and error (calculates from vector or uses vector
of mean and error interval or mean, lower error limit, and upper error limit)
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ Fuel efficiency and engine displacement were highly correlated (r =
from `t.test()` and `wilcox.test()`, including one-sample,
two-sample independent, and paired tests)
- ANOVAs from `aov()`
- Linear models from `lm()` and generalized linear models from `glm()`
- Bayes factors (output from
[`{BayesFactor}`](https://cran.r-project.org/package=BayesFactor)
package)
Expand Down
13 changes: 7 additions & 6 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,27 @@ template:
fg: "#f8f9fa"

reference:
- title: Format numbers/characters
contents:
- format_num
- format_scientific
- format_chr
- format_sub
- title: Format statistical objects
contents:
- format_stats
- format_stats.aov
- format_stats.BFBayesFactor
- format_stats.easycorrelation
- format_stats.htest
- format_stats.lm
- title: Format statistical values
contents:
- format_summary
- format_p
- format_bf
- format_corr
- format_ttest
- title: Format numbers/characters
contents:
- format_num
- format_scientific
- format_chr
- format_sub

authors:
Jeffrey R. Stevens:
Expand Down
1 change: 1 addition & 0 deletions man/format_bf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/format_corr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/format_stats.BFBayesFactor.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/format_stats.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/format_stats.aov.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/format_stats.easycorrelation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/format_stats.htest.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit b148f47

Please sign in to comment.