Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic column headers #1576

Merged
merged 3 commits into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ URL: https://github.com/ddsjoberg/gtsummary,
BugReports: https://github.com/ddsjoberg/gtsummary/issues
Imports:
broom.helpers,
cards (>= 0.0.0.9002),
cards (>= 0.0.0.9003),
cli (>= 3.6.1),
dplyr (>= 1.1.3),
glue (>= 1.6.2),
Expand Down
12 changes: 12 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Generated by roxygen2: do not edit by hand

S3method(as.data.frame,gtsummary)
S3method(as_tibble,gtsummary)
S3method(print,gtsummary)
export("%>%")
export(.table_styling_expr_to_row_number)
Expand Down Expand Up @@ -33,6 +35,16 @@ export(pier_summary_dichotomous)
export(pier_summary_missing_row)
export(select)
export(starts_with)
export(styfn_number)
export(styfn_percent)
export(styfn_pvalue)
export(styfn_ratio)
export(styfn_sigfig)
export(style_number)
export(style_percent)
export(style_pvalue)
export(style_ratio)
export(style_sigfig)
export(tbl_summary)
export(vars)
export(where)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

* If a column is all `NA` it is now removed from the summary table created with `tbl_summary()`.

* Added a family of function `styfn_*()` that are similar to the `style_*()` except they return a styling _function_, rather than a styled value.

* Previously, in a `tbl_summary()` variables that were `c(0, 1)`, `c("no", "yes")`, `c("No", "Yes")`, and `c("NO", "YES")` would default to a dichotomous summary with the `1` and `yes` level being shown in the table. This would occur even in the case when, for example, only `0` was observed. In this release, the line shown for dichotomous variables must be observed OR the unobserved level must be explicitly defined in a factor.

#### Internal Updates
Expand Down
219 changes: 219 additions & 0 deletions R/as_tibble.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
#' Convert gtsummary object to a tibble
#'
#' Function converts a gtsummary object to a tibble.
#'
#' @inheritParams as_gt
#' @param col_labels Logical argument adding column labels to output tibble.
#' Default is `TRUE`.
#' @param fmt_missing Logical argument adding the missing value formats.
#' @param ... Not used
#' @return a [tibble][tibble::tibble-package]
#' @family gtsummary output types
#' @author Daniel D. Sjoberg
#' @name as_tibble.gtsummary
#' @examples
#' \donttest{
#' tbl <-
#' trial %>%
#' select(trt, age, grade, response) %>%
#' tbl_summary(by = trt)
#'
#' as_tibble(tbl)
#'
#' # without column labels
#' as_tibble(tbl, col_labels = FALSE)
#' }
NULL

#' @export
#' @rdname as_tibble.gtsummary
as_tibble.gtsummary <- function(x, include = everything(), col_labels = TRUE,
return_calls = FALSE, fmt_missing = FALSE, ...) {
# running pre-conversion function, if present --------------------------------
x <- do.call(get_theme_element("pkgwide-fun:pre_conversion", default = identity), list(x))

# converting row specifications to row numbers, and removing old cmds --------
x <- .table_styling_expr_to_row_number(x)

# creating list of calls to get formatted tibble -----------------------------
tibble_calls <-
table_styling_to_tibble_calls(
x = x,
col_labels = col_labels,
fmt_missing = fmt_missing
)

# converting to character vector ---------------------------------------------
include <-
.select_to_varnames(
select = {{ include }},
var_info = names(tibble_calls),
arg_name = "include"
)

# making list of commands to include -----------------------------------------
# this ensures list is in the same order as names(x$kable_calls)
include <- names(tibble_calls) %>% intersect(include)
# user cannot exclude the first 'tibble' command
include <- "tibble" %>% union(include)

# return calls, if requested -------------------------------------------------
if (return_calls == TRUE) {
return(tibble_calls[include])
}

# taking each gt function call, concatenating them with %>% separating them
.eval_list_of_exprs(tibble_calls[include])
}

#' @export
#' @rdname as_tibble.gtsummary
as.data.frame.gtsummary <- function(...) {
res <- as_tibble(...)

if (inherits(res, "data.frame"))
return(as.data.frame(res))

res
}


table_styling_to_tibble_calls <- function(x, col_labels = TRUE, fmt_missing = FALSE) {
tibble_calls <- list()

# tibble ---------------------------------------------------------------------
tibble_calls[["tibble"]] <- expr(x$table_body)

# ungroup --------------------------------------------------------------------
if ("groupname_col" %in% x$table_styling$header$column) {
tibble_calls[["ungroup"]] <-
list(
expr(dplyr::group_by(.data$groupname_col)),
expr(dplyr::mutate(groupname_col = ifelse(dplyr::row_number() == 1,
as.character(.data$groupname_col),
NA_character_
))),
expr(dplyr::ungroup())
)
}

# fmt (part 1) ---------------------------------------------------------------
# this needs to be called in as_tibble() before the bolding and italic function,
# but the bolding and italic code needs to executed on pre-formatted data
# (e.g. `bold_p()`) this holds its place for when it is finally run
tibble_calls[["fmt"]] <- list()

# cols_merge -----------------------------------------------------------------
tibble_calls[["cols_merge"]] <-
map(
seq_len(nrow(x$table_styling$cols_merge)),
~ expr(
dplyr::mutate(
!!x$table_styling$cols_merge$column[.x] :=
ifelse(
dplyr::row_number() %in% !!x$table_styling$cols_merge$rows[[.x]],
glue::glue(!!x$table_styling$cols_merge$pattern[.x]) %>% as.character(),
!!rlang::sym(x$table_styling$cols_merge$column[.x])
)
)
)
)

# tab_style_bold -------------------------------------------------------------
df_bold <- x$table_styling$text_format %>% dplyr::filter(.data$format_type == "bold")

tibble_calls[["tab_style_bold"]] <-
map(
seq_len(nrow(df_bold)),
~ expr(dplyr::mutate_at(
gt::vars(!!!syms(df_bold$column[[.x]])),
~ ifelse(row_number() %in% !!df_bold$row_numbers[[.x]],
paste0("__", ., "__"), .
)
))
)

# tab_style_italic -------------------------------------------------------------
df_italic <- x$table_styling$text_format %>% dplyr::filter(.data$format_type == "italic")

tibble_calls[["tab_style_italic"]] <-
map(
seq_len(nrow(df_italic)),
~ expr(dplyr::mutate_at(
gt::vars(!!!syms(df_italic$column[[.x]])),
~ ifelse(dplyr::row_number() %in% !!df_italic$row_numbers[[.x]],
paste0("_", ., "_"), .
)
))
)

# fmt (part 2) ---------------------------------------------------------------
tibble_calls[["fmt"]] <-
map(
seq_len(nrow(x$table_styling$fmt_fun)),
~ expr((!!expr(!!eval(parse_expr("gtsummary:::.apply_fmt_fun"))))(
columns = !!x$table_styling$fmt_fun$column[[.x]],
row_numbers = !!x$table_styling$fmt_fun$row_numbers[[.x]],
fmt_fun = !!x$table_styling$fmt_fun$fmt_fun[[.x]],
update_from = !!x$table_body
))
)

# fmt_missing ----------------------------------------------------------------
if (isTRUE(fmt_missing)) {
tibble_calls[["fmt_missing"]] <-
map(
seq_len(nrow(x$table_styling$fmt_missing)),
~ expr(
ifelse(
dplyr::row_number() %in% !!x$table_styling$fmt_missing$row_numbers[[.x]] & is.na(!!sym(x$table_styling$fmt_missing$column[.x])),
!!x$table_styling$fmt_missing$symbol[.x],
!!sym(x$table_styling$fmt_missing$column[.x])
)
)
) %>%
rlang::set_names(x$table_styling$fmt_missing$column) %>%
{
expr(dplyr::mutate(!!!.))
} %>%
list()
} else {
tibble_calls[["fmt_missing"]] <- list()
}

# cols_hide ------------------------------------------------------------------
# cols_to_keep object created above in fmt section
tibble_calls[["cols_hide"]] <-
expr(dplyr::select(any_of("groupname_col"), !!!syms(.cols_to_show(x))))

# cols_label -----------------------------------------------------------------
if (col_labels) {
df_col_labels <-
dplyr::filter(x$table_styling$header, .data$hide == FALSE)

tibble_calls[["cols_label"]] <-
expr(rlang::set_names(!!df_col_labels$label))
}

tibble_calls
}

.apply_fmt_fun <- function(data, columns, row_numbers, fmt_fun, update_from) {
# apply formatting functions
df_updated <-
update_from[row_numbers, columns, drop = FALSE] %>%
map(~ fmt_fun(.x)) |>
dplyr::bind_cols()

# convert underlying column to character if updated col is character
for (v in columns) {
if (is.character(df_updated[[v]]) && !is.character(data[[v]])) {
data[[v]] <- as.character(data[[v]])
}
}

# update data and return
data[row_numbers, columns, drop = FALSE] <- df_updated

data
}
132 changes: 114 additions & 18 deletions R/assign_summary_digits.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,116 @@


# assign_summary_digits <- function(data, statistic, type, digits = NULL) {
# # extract the statistics
# statistic <- lapply(statistic, function(x) .extract_glue_elements(x) |> unlist())
#
# lapply(
# names(statistic),
# function(variable) {
# if (!is.null(digits[[variable]])){
# return(rep_named(statistic[[variable]], digits[[variable]]))
# }
#
# if (type[[variable]] %in% c("cateogrical", "dichotomous")) {
#
# }
# }
# )
#
# }
assign_summary_digits <- function(data, statistic, type, digits = NULL) {
# stats returned for all variables
lst_cat_summary_fns <- .categorical_summary_functions(c("n", "p"))
lst_all_fmt_fns <- .categorical_summary_functions()

# extract the statistics
statistic <- lapply(statistic, function(x) .extract_glue_elements(x) |> unlist())

lapply(
names(statistic),
function(variable) {
# if user passed digits AND they've specified every statistic, use the passed value
# otherwise, we need to calculate the defaults, and later we can update with the pieces the user passed
if (!is.null(digits[[variable]])) {
# if a scalar or vector passed, convert it to a list
if (!is.list(digits[[variable]]) && is_vector(digits[[variable]])) {
digits[[variable]] <- as.list(digits[[variable]])
}

# if user-passed value is not named, repeat the passed value to the length of 'statistic'
if (!is_named(digits[[variable]])) {
digits[[variable]] <- rep_named(statistic[[variable]], digits[[variable]])
}

# convert integers to a proper function
digits[[variable]] <- .convert_integer_to_fmt_fn(digits[[variable]])

# if the passed value fully specifies the formatting for each 'statistic',
# then return it. Otherwise, the remaining stat will be filled below
if (setequal(statistic[[variable]], names(digits[[variable]]))) {
return(digits[[variable]])
}
}

if (type[[variable]] %in% c("categorical", "dichotomous")) {
return(
c(lst_cat_summary_fns, lst_all_fmt_fns) |>
utils::modifyList(digits[[variable]] %||% list())
)
}

if (type[[variable]] %in% c("continuous", "continuous2")) {
return(
rep_named(
statistic[[variable]],
list(.guess_continuous_summary_digits(data[[variable]]))
) |>
utils::modifyList(lst_all_fmt_fns) |>
utils::modifyList(digits[[variable]] %||% list())
)
}
}
) |>
stats::setNames(names(statistic))
}

.convert_integer_to_fmt_fn <- function(x) {
imap(
x,
function(value, stat_name) {
# if not an integer, simply return the value
if (!is_integerish(value)) return(value)
# if an integer is passed for a percentage, process stat with style_percent()
if (stat_name %in% c("p", "p_miss", "p_nonmiss", "p_unweighted"))
return(styfn_percent(digits = value))
# otherwise, use style_numer() to style number
return(styfn_number(digits = value))
}
)
}

.guess_continuous_summary_digits <- function(x) {
# if all missing, return 0
if (all(is.na(x))) return(styfn_number(digits = 0L))

# if class is integer, then round everything to nearest integer
if (inherits(x, "integer")) {
return(styfn_number(digits = 0L))
}

# otherwise guess the number of dignits to use based on the spread
# calculate the spread of the variable
var_spread <-
stats::quantile(x, probs = c(0.95), na.rm = TRUE) -
stats::quantile(x, probs = c(0.05), na.rm = TRUE)

styfn_number(
digits =
dplyr::case_when(
var_spread < 0.01 ~ 4L,
var_spread >= 0.01 & var_spread < 0.1 ~ 3L,
var_spread >= 0.1 & var_spread < 10 ~ 2L,
var_spread >= 10 & var_spread < 20 ~ 1L,
var_spread >= 20 ~ 0L
)
)
}

.categorical_summary_functions <-
function(statistics = c("
N", "N_obs", "N_miss", "N_nonmiss", "n_unweighted", "N_unweighted",
"p_miss", "p_nonmiss", "p_unweighted")) {
lst_defaults <-
c(
c("n", "N", "N_obs", "N_miss", "N_nonmiss", "n_unweighted", "N_unweighted") |>
intersect(statistics) |>
rep_named(list(styfn_number())),
c("p", "p_miss", "p_nonmiss", "p_unweighted") |>
intersect(statistics) |>
rep_named(list(styfn_percent()))
)

lst_defaults
}
Loading
Loading