From ae39b22f9b3ad6d6b42ec49ae89efe744ccac59c Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Mon, 4 Dec 2023 13:34:56 -0800 Subject: [PATCH] Dynamic column headers (#1576) * in progress * progress * progress --- NAMESPACE | 2 + R/as_tibble.R | 219 +++++++++++++++++++++++++++++++++++++ R/assign_summary_type.R | 53 +++++---- R/bridge_summary.R | 77 ++++++++++++- R/modify_header.R | 42 +++++++ R/tbl_summary.R | 23 ++++ man/as_gt.Rd | 4 + man/as_tibble.gtsummary.Rd | 63 +++++++++++ 8 files changed, 460 insertions(+), 23 deletions(-) create mode 100644 R/as_tibble.R create mode 100644 man/as_tibble.gtsummary.Rd diff --git a/NAMESPACE b/NAMESPACE index 6a8cfdeab0..2f037b3f32 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +S3method(as.data.frame,gtsummary) +S3method(as_tibble,gtsummary) S3method(print,gtsummary) export("%>%") export(.table_styling_expr_to_row_number) diff --git a/R/as_tibble.R b/R/as_tibble.R new file mode 100644 index 0000000000..539878cf9d --- /dev/null +++ b/R/as_tibble.R @@ -0,0 +1,219 @@ +#' Convert gtsummary object to a tibble +#' +#' Function converts a gtsummary object to a tibble. +#' +#' @inheritParams as_gt +#' @param col_labels Logical argument adding column labels to output tibble. +#' Default is `TRUE`. +#' @param fmt_missing Logical argument adding the missing value formats. +#' @param ... Not used +#' @return a [tibble][tibble::tibble-package] +#' @family gtsummary output types +#' @author Daniel D. Sjoberg +#' @name as_tibble.gtsummary +#' @examples +#' \donttest{ +#' tbl <- +#' trial %>% +#' select(trt, age, grade, response) %>% +#' tbl_summary(by = trt) +#' +#' as_tibble(tbl) +#' +#' # without column labels +#' as_tibble(tbl, col_labels = FALSE) +#' } +NULL + +#' @export +#' @rdname as_tibble.gtsummary +as_tibble.gtsummary <- function(x, include = everything(), col_labels = TRUE, + return_calls = FALSE, fmt_missing = FALSE, ...) { + # running pre-conversion function, if present -------------------------------- + x <- do.call(get_theme_element("pkgwide-fun:pre_conversion", default = identity), list(x)) + + # converting row specifications to row numbers, and removing old cmds -------- + x <- .table_styling_expr_to_row_number(x) + + # creating list of calls to get formatted tibble ----------------------------- + tibble_calls <- + table_styling_to_tibble_calls( + x = x, + col_labels = col_labels, + fmt_missing = fmt_missing + ) + + # converting to character vector --------------------------------------------- + include <- + .select_to_varnames( + select = {{ include }}, + var_info = names(tibble_calls), + arg_name = "include" + ) + + # making list of commands to include ----------------------------------------- + # this ensures list is in the same order as names(x$kable_calls) + include <- names(tibble_calls) %>% intersect(include) + # user cannot exclude the first 'tibble' command + include <- "tibble" %>% union(include) + + # return calls, if requested ------------------------------------------------- + if (return_calls == TRUE) { + return(tibble_calls[include]) + } + + # taking each gt function call, concatenating them with %>% separating them + .eval_list_of_exprs(tibble_calls[include]) +} + +#' @export +#' @rdname as_tibble.gtsummary +as.data.frame.gtsummary <- function(...) { + res <- as_tibble(...) + + if (inherits(res, "data.frame")) + return(as.data.frame(res)) + + res +} + + +table_styling_to_tibble_calls <- function(x, col_labels = TRUE, fmt_missing = FALSE) { + tibble_calls <- list() + + # tibble --------------------------------------------------------------------- + tibble_calls[["tibble"]] <- expr(x$table_body) + + # ungroup -------------------------------------------------------------------- + if ("groupname_col" %in% x$table_styling$header$column) { + tibble_calls[["ungroup"]] <- + list( + expr(dplyr::group_by(.data$groupname_col)), + expr(dplyr::mutate(groupname_col = ifelse(dplyr::row_number() == 1, + as.character(.data$groupname_col), + NA_character_ + ))), + expr(dplyr::ungroup()) + ) + } + + # fmt (part 1) --------------------------------------------------------------- + # this needs to be called in as_tibble() before the bolding and italic function, + # but the bolding and italic code needs to executed on pre-formatted data + # (e.g. `bold_p()`) this holds its place for when it is finally run + tibble_calls[["fmt"]] <- list() + + # cols_merge ----------------------------------------------------------------- + tibble_calls[["cols_merge"]] <- + map( + seq_len(nrow(x$table_styling$cols_merge)), + ~ expr( + dplyr::mutate( + !!x$table_styling$cols_merge$column[.x] := + ifelse( + dplyr::row_number() %in% !!x$table_styling$cols_merge$rows[[.x]], + glue::glue(!!x$table_styling$cols_merge$pattern[.x]) %>% as.character(), + !!rlang::sym(x$table_styling$cols_merge$column[.x]) + ) + ) + ) + ) + + # tab_style_bold ------------------------------------------------------------- + df_bold <- x$table_styling$text_format %>% dplyr::filter(.data$format_type == "bold") + + tibble_calls[["tab_style_bold"]] <- + map( + seq_len(nrow(df_bold)), + ~ expr(dplyr::mutate_at( + gt::vars(!!!syms(df_bold$column[[.x]])), + ~ ifelse(row_number() %in% !!df_bold$row_numbers[[.x]], + paste0("__", ., "__"), . + ) + )) + ) + + # tab_style_italic ------------------------------------------------------------- + df_italic <- x$table_styling$text_format %>% dplyr::filter(.data$format_type == "italic") + + tibble_calls[["tab_style_italic"]] <- + map( + seq_len(nrow(df_italic)), + ~ expr(dplyr::mutate_at( + gt::vars(!!!syms(df_italic$column[[.x]])), + ~ ifelse(dplyr::row_number() %in% !!df_italic$row_numbers[[.x]], + paste0("_", ., "_"), . + ) + )) + ) + + # fmt (part 2) --------------------------------------------------------------- + tibble_calls[["fmt"]] <- + map( + seq_len(nrow(x$table_styling$fmt_fun)), + ~ expr((!!expr(!!eval(parse_expr("gtsummary:::.apply_fmt_fun"))))( + columns = !!x$table_styling$fmt_fun$column[[.x]], + row_numbers = !!x$table_styling$fmt_fun$row_numbers[[.x]], + fmt_fun = !!x$table_styling$fmt_fun$fmt_fun[[.x]], + update_from = !!x$table_body + )) + ) + + # fmt_missing ---------------------------------------------------------------- + if (isTRUE(fmt_missing)) { + tibble_calls[["fmt_missing"]] <- + map( + seq_len(nrow(x$table_styling$fmt_missing)), + ~ expr( + ifelse( + dplyr::row_number() %in% !!x$table_styling$fmt_missing$row_numbers[[.x]] & is.na(!!sym(x$table_styling$fmt_missing$column[.x])), + !!x$table_styling$fmt_missing$symbol[.x], + !!sym(x$table_styling$fmt_missing$column[.x]) + ) + ) + ) %>% + rlang::set_names(x$table_styling$fmt_missing$column) %>% + { + expr(dplyr::mutate(!!!.)) + } %>% + list() + } else { + tibble_calls[["fmt_missing"]] <- list() + } + + # cols_hide ------------------------------------------------------------------ + # cols_to_keep object created above in fmt section + tibble_calls[["cols_hide"]] <- + expr(dplyr::select(any_of("groupname_col"), !!!syms(.cols_to_show(x)))) + + # cols_label ----------------------------------------------------------------- + if (col_labels) { + df_col_labels <- + dplyr::filter(x$table_styling$header, .data$hide == FALSE) + + tibble_calls[["cols_label"]] <- + expr(rlang::set_names(!!df_col_labels$label)) + } + + tibble_calls +} + +.apply_fmt_fun <- function(data, columns, row_numbers, fmt_fun, update_from) { + # apply formatting functions + df_updated <- + update_from[row_numbers, columns, drop = FALSE] %>% + map(~ fmt_fun(.x)) |> + dplyr::bind_cols() + + # convert underlying column to character if updated col is character + for (v in columns) { + if (is.character(df_updated[[v]]) && !is.character(data[[v]])) { + data[[v]] <- as.character(data[[v]]) + } + } + + # update data and return + data[row_numbers, columns, drop = FALSE] <- df_updated + + data +} diff --git a/R/assign_summary_type.R b/R/assign_summary_type.R index 654d1c99fc..f09466332b 100644 --- a/R/assign_summary_type.R +++ b/R/assign_summary_type.R @@ -32,28 +32,7 @@ assign_summary_type <- function(data, variables, value, type = NULL) { return(type[[variable]]) } - # logical variables are dichotomous - if (inherits(data[[variable]], "logical")) { - return("dichotomous") - } - - # numeric variables that are 0 and 1 only, will be dichotomous - if (inherits(data[[variable]], c("integer", "numeric")) && - length(setdiff(stats::na.omit(data[[variable]]), c(0, 1))) == 0) { - return("dichotomous") - } - - # factor variables that are "No" and "Yes" only, will be dichotomous - if (inherits(data[[variable]], "factor") && - length(levels(data[[variable]])) == 2L && - setequal(toupper(levels(data[[variable]])), c("NO", "YES"))) { - return("dichotomous") - } - - # character variables that are "No" and "Yes" only, will be dichotomous - if (inherits(data[[variable]], "character") && - setequal(toupper(stats::na.omit(data[[variable]])), c("NO", "YES")) && - length(stats::na.omit(data[[variable]])) == 2L) { + if (!is.null(.get_default_dichotomous_value(data[[variable]]))) { return("dichotomous") } @@ -83,6 +62,36 @@ assign_summary_type <- function(data, variables, value, type = NULL) { type } +.get_default_dichotomous_value <- function(x) { + # logical variables are dichotomous + if (inherits(x, "logical")) { + return(TRUE) + } + + # numeric variables that are 0 and 1 only, will be dichotomous + if (inherits(x, c("integer", "numeric")) && + length(setdiff(stats::na.omit(x), c(0, 1))) == 0) { + return(stats::na.omit(x) |> unique() |> sort() |> dplyr::last()) + } + + # factor variables that are "No" and "Yes" only, will be dichotomous + if (inherits(x, "factor") && + length(levels(x)) == 2L && + setequal(toupper(levels(x)), c("NO", "YES"))) { + return(levels(x)[toupper(levels(x)) %in% "YES"]) + } + + # character variables that are "No" and "Yes" only, will be dichotomous + if (inherits(x, "character") && + setequal(toupper(stats::na.omit(x)), c("NO", "YES")) && + length(stats::na.omit(x)) == 2L) { + return(unique(x)[toupper(unique(x)) %in% "YES"]) + } + + # otherwise, return NULL + NULL +} + .add_summary_type_as_attr <- function(data, type) { type <- type[names(type) %in% names(data)] type_names <- names(type) diff --git a/R/bridge_summary.R b/R/bridge_summary.R index 871d4e3999..e3e038673c 100644 --- a/R/bridge_summary.R +++ b/R/bridge_summary.R @@ -129,11 +129,16 @@ brdg_summary <- function(x, calling_function = "tbl_summary") { # construct default table_styling -------------------------------------------- x <- construct_initial_table_styling(x) + # add info to x$table_styling$header for dynamic headers --------------------- + x <- .add_table_styling_stats(x) + + x } #' @rdname bridge_summary #' @export pier_summary_dichotomous <- function(x, variables, value = x$inputs$value) { + if (is_empty(variables)) return(dplyr::tibble()) # subsetting cards object on dichotomous summaries --------------------------- x$cards <- x$cards |> @@ -154,6 +159,7 @@ pier_summary_dichotomous <- function(x, variables, value = x$inputs$value) { #' @rdname bridge_summary #' @export pier_summary_categorical <- function(x, variables, missing, missing_text, missing_stat) { + if (is_empty(variables)) return(dplyr::tibble()) # subsetting cards object on categorical summaries ---------------------------- card <- x$cards |> @@ -265,6 +271,7 @@ pier_summary_categorical <- function(x, variables, missing, missing_text, missin #' @rdname bridge_summary #' @export pier_summary_continuous2 <- function(x, variables, missing, missing_text, missing_stat) { + if (is_empty(variables)) return(dplyr::tibble()) # subsetting cards object on continuous2 summaries ---------------------------- card <- x$cards |> @@ -275,7 +282,7 @@ pier_summary_continuous2 <- function(x, variables, missing, missing_text, missin df_glued <- # construct stat columns with glue by grouping variables and primary summary variable card |> - dplyr::group_by(across(c("gts_column" ,cards::all_ard_groups(), "variable"))) |> + dplyr::group_by(across(c("gts_column", cards::all_ard_groups(), "variable"))) |> dplyr::group_map( function(.x, .y) { dplyr::mutate( @@ -418,6 +425,7 @@ pier_summary_continuous <- function(x, variables, missing, missing_text, missing #' @rdname bridge_summary #' @export pier_summary_missing_row <- function(x, variables = x$inputs$include) { + if (is_empty(variables)) return(dplyr::tibble()) # keeping variables to report missing obs for (or returning empty df if none) if (x$inputs$missing == "no") return(dplyr::tibble()) if (x$inputs$missing == "ifany") { @@ -445,3 +453,70 @@ pier_summary_missing_row <- function(x, variables = x$inputs$include) { label = x$inputs$missing_text ) } + +.add_table_styling_stats <- function(x) { + if (is_empty(x$inputs$by)) { + x$table_styling$header <- + x$table_styling$header |> + dplyr::mutate( + modify_stat_N = + x$cards |> + dplyr::filter(.data$stat_name %in% "N_obs") |> + dplyr::pull("statistic") |> + unlist() |> + getElement(1), + modify_stat_n = .data$modify_stat_N, + modify_stat_p = 1, + modify_stat_level = "Overall" + ) + } + else { + df_by_stats <- x$cards |> + dplyr::filter(.data$variable %in% .env$x$inputs$by & .data$stat_name %in% c("N", "n", "p")) + + # get a data frame with the by variable stats + df_by_stats_wide <- + df_by_stats |> + dplyr::filter(.data$stat_name %in% c("n", "p")) |> + dplyr::mutate( + .by = .data$variable_level, + column = paste0("stat_", dplyr::cur_group_id()) + ) %>% + {dplyr::bind_rows( + ., + dplyr::select(., "variable_level", "column", statistic = "variable_level") |> + dplyr::mutate(stat_name = "level") |> + dplyr::distinct() + )} |> + tidyr::pivot_wider( + id_cols = "column", + names_from = "stat_name", + values_from = "statistic" + ) |> + dplyr::mutate( + dplyr::across(-"column", unlist), + dplyr::across("level", as.character) + ) |> + dplyr::rename_with( + function(x) paste0("modify_stat_", x), + .cols = -"column" + ) + + # add the stats here to the header data frame + x$table_styling$header <- + x$table_styling$header |> + dplyr::mutate( + modify_stat_N = + df_by_stats |> + dplyr::filter(.data$stat_name %in% "N") |> + dplyr::pull("statistic") |> + unlist() + ) |> + dplyr::left_join( + df_by_stats_wide, + by = "column" + ) + } + + x +} diff --git a/R/modify_header.R b/R/modify_header.R index 5128e787aa..f0a91daf7c 100644 --- a/R/modify_header.R +++ b/R/modify_header.R @@ -23,6 +23,14 @@ modify_header <- function(x, ..., text_interpret = c("md", "html"), } cards::process_formula_selectors(data = x$table_body, dots = dots) + cards::check_list_elements( + dots = function(x) is_string(x), + error_msg = list(dots = c("All values passed in {.arg ...} must be strings.", + "i" = "For example, {.code label = '**Variable**'}")) + ) + + # evaluate the strings with glue + dots <- .evaluate_string_with_glue(x, dots) # updated header meta data x$table_styling$header <- @@ -38,3 +46,37 @@ modify_header <- function(x, ..., text_interpret = c("md", "html"), # return object x } + +.evaluate_string_with_glue <- function(x, dots) { + # only keep values that are in the table_body + dots <- dots[intersect(names(dots), x$table_styling$header$column)] + + df_header_subset <- + x$table_styling$header |> + dplyr::select("column", starts_with("modify_stat_")) |> + dplyr::rename_with( + .fn = function(x) gsub("^modify_stat_", "", x), + .cols = starts_with("modify_stat_") + ) + + imap( + dots, + function(value, variable) { + df_header_subset <- + df_header_subset |> + dplyr::filter(.data$column %in% .env$variable) |> + dplyr::select(-"column") + + glued_value <- + cards::eval_capture_conditions( + expr(glue::glue(value)), + data = df_header_subset + ) + + if (!is.null(glued_value$result)) + return(glued_value$result) + + cli::cli_abort("There was an error the {.field glue} evaluation of {.val {value}}.") + } + ) +} diff --git a/R/tbl_summary.R b/R/tbl_summary.R index 8752f13da8..6122719e95 100644 --- a/R/tbl_summary.R +++ b/R/tbl_summary.R @@ -72,6 +72,8 @@ tbl_summary <- function(data, type <- utils::modifyList(default_types, type) } + value <- .assign_default_values(data[include], value, type) + # evaluate the remaining list-formula arguments ------------------------------ # processed arguments are saved into this env cards::process_formula_selectors( @@ -167,3 +169,24 @@ tbl_summary <- function(data, include[!is_all_na] } + +.assign_default_values <- function(data, value, type) { + lapply( + names(data), + function(variable) { + # if user passed value, then use it + if (!is.null(value[[variable]])) return(value[[variable]]) + # if not a dichotomous summary type, then return NULL + if (!type[[variable]] %in% "dichotomous") return(NULL) + + # otherwise, return default value + default_value <- .get_default_dichotomous_value(data[[variable]]) + if (!is.null(default_value)) return(default_value) + cli::cli_abort(c( + "Error in argument {.arg value} for variable {.val {variable}}.", + "i" = "Summary type is {.val dichotomous} but no summary value has been assigned." + )) + } + ) |> + stats::setNames(names(data)) +} diff --git a/man/as_gt.Rd b/man/as_gt.Rd index d0146bcdd1..349ee2a314 100644 --- a/man/as_gt.Rd +++ b/man/as_gt.Rd @@ -36,6 +36,10 @@ available via the \href{https://gt.rstudio.com/index.html}{gt package}. # tbl_summary(by = trt) \%>\% # as_gt() } +\seealso{ +Other gtsummary output types: +\code{\link{as_tibble.gtsummary}()} +} \author{ Daniel D. Sjoberg } diff --git a/man/as_tibble.gtsummary.Rd b/man/as_tibble.gtsummary.Rd new file mode 100644 index 0000000000..8939339e44 --- /dev/null +++ b/man/as_tibble.gtsummary.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/as_tibble.R +\name{as_tibble.gtsummary} +\alias{as_tibble.gtsummary} +\alias{as.data.frame.gtsummary} +\title{Convert gtsummary object to a tibble} +\usage{ +\method{as_tibble}{gtsummary}( + x, + include = everything(), + col_labels = TRUE, + return_calls = FALSE, + fmt_missing = FALSE, + ... +) + +\method{as.data.frame}{gtsummary}(...) +} +\arguments{ +\item{x}{An object of class `"gtsummary"} + +\item{include}{Commands to include in output. Input may be a vector of +quoted or unquoted names. tidyselect and gtsummary select helper +functions are also accepted. +Default is \code{everything()}.} + +\item{col_labels}{Logical argument adding column labels to output tibble. +Default is \code{TRUE}.} + +\item{return_calls}{Logical. Default is \code{FALSE}. If \code{TRUE}, the calls are returned +as a list of expressions.} + +\item{fmt_missing}{Logical argument adding the missing value formats.} + +\item{...}{Not used} +} +\value{ +a \link[tibble:tibble-package]{tibble} +} +\description{ +Function converts a gtsummary object to a tibble. +} +\examples{ +\donttest{ +tbl <- + trial \%>\% + select(trt, age, grade, response) \%>\% + tbl_summary(by = trt) + +as_tibble(tbl) + +# without column labels +as_tibble(tbl, col_labels = FALSE) +} +} +\seealso{ +Other gtsummary output types: +\code{\link{as_gt}()} +} +\author{ +Daniel D. Sjoberg +} +\concept{gtsummary output types}