Skip to content

Commit

Permalink
* standalone-helpers updated
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewallenbruce committed Dec 17, 2024
1 parent 69def8d commit 787fa93
Show file tree
Hide file tree
Showing 58 changed files with 1,430 additions and 87 deletions.
2 changes: 0 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ Suggests:
clock,
dplyr,
fipio,
fixtuRes,
fontawesome,
forcats,
fs (>= 1.6.5),
Expand All @@ -54,7 +53,6 @@ Suggests:
tidyr,
timeplyr,
usethis,
wakefield,
zipcodeR
Config/roxyglobals/filename: generated-globals.R
Config/roxyglobals/unique: TRUE
Expand Down
44 changes: 44 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ export("%or%")
export(add_counties)
export(age_days)
export(arrows)
export(as_chr)
export(as_date)
export(as_int)
export(as_num)
export(bracks)
export(change)
export(change_lagged)
Expand All @@ -29,18 +33,25 @@ export(count_prop_multi)
export(count_wide)
export(create_vec)
export(data_types)
export(delist)
export(delister)
export(density)
export(describe)
export(describe2)
export(describe_unique)
export(desplit)
export(display_long)
export(duration_vec)
export(empty)
export(expand_date_range)
export(false)
export(fancy_ts)
export(find_common_order)
export(gchop)
export(gelm)
export(geomean)
export(get_pin)
export(getelem)
export(gg_theme)
export(gh_raw)
export(glue_chr)
Expand All @@ -49,35 +60,68 @@ export(gluestick)
export(gt_marks)
export(histo)
export(histogram)
export(if_empty_null)
export(iif_else)
export(initialize_package)
export(interpolate)
export(invert_named)
export(is_valid_npi)
export(is_valid_npi2)
export(list_pins)
export(make_interval)
export(max_vlen)
export(mock_forager)
export(mock_provider)
export(mount_board)
export(na)
export(na_if)
export(na_if_common)
export(named_group_split)
export(new_value)
export(not_na)
export(not_null)
export(null)
export(pad_number)
export(parens)
export(percentage_calculator)
export(percentage_change)
export(percentage_difference)
export(print_ls)
export(random_npi_generator)
export(random_string)
export(rate_of_return)
export(remove_all_na)
export(remove_quiet)
export(remove_quotes)
export(rename_seq)
export(roundup)
export(search_for)
export(search_in)
export(sf_at)
export(sf_c)
export(sf_chars)
export(sf_conv)
export(sf_detect)
export(sf_extract)
export(sf_ndetect)
export(sf_nextract)
export(sf_remove)
export(sf_replace)
export(sf_smush)
export(sf_strsplit)
export(sf_sub)
export(single_line_string)
export(sorted_bars)
export(splitter)
export(strsort)
export(summary_stats)
export(true)
export(ttimestamp)
export(uniq)
export(uniq_narm)
export(uniq_vlen)
export(update_personal_packages)
export(vlen)
export(years_df)
export(years_floor)
export(years_vec)
Expand Down
84 changes: 81 additions & 3 deletions R/describe.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,75 @@
#' Describe 2
#'
#' @param df `<data.frame>` desc
#'
#' @param ... `<dots>` tidyselect columns
#'
#' @returns `<tibble>` of summary statistics
#'
#' @examples
#' describe2(mock_provider(2000:2020))
#'
#' describe2(mock_forager(200))
#'
#' @autoglobal
#'
#' @export
describe2 <- function(df, ...) {

get_type <- \(x) {
cheapr::enframe_(
purrr::map_vec(x, function(x)
glue_chr("<{pillar::type_sum(x)}>")),
name = "column",
value = "type")
}

fiqr <- \(x) diff(collapse::.quantile(as.numeric(x), c(0.25, 0.75)))

if (nargs() > 1) df <- dplyr::select(df, ...)

dates <- dplyr::select(df, dplyr::where(\(x) inherits(x, "Date")))
df <- dplyr::select(df, dplyr::where(\(x) !inherits(x, "Date")))

sums <- df |>
dplyr::mutate(
dplyr::across(dplyr::where(is.character), stringr::str_length),
dplyr::across(dplyr::where(\(x) is.factor(x) | is.logical(x)), as.numeric)) |>
tidyr::pivot_longer(dplyr::everything(), names_to = "column") |>
dplyr::mutate(n = 1 - cheapr::is_na(value)) |>
dplyr::reframe(
n = collapse::fsum(value, nthreads = 4L),
min = collapse::fmin(value),
mean = collapse::fmean(value, nthreads = 4L),
iqr = fiqr(value),
max = collapse::fmax(value),
med = collapse::fmedian(value),
sd = collapse::fsd(value),
mad = mad(value, na.rm = TRUE),
distribution = histo(value),
.by = column) |>
dplyr::left_join(get_type(df), by = dplyr::join_by(column))

topn <- \(x, limit = 10) {
dplyr::tibble(
column = names(x),
uniq = collapse::fnunique(collapse::na_rm(x)),
top = collapse::fcount(collapse::na_rm(x), name = "n") |>
dplyr::arrange(dplyr::desc(n)) |>
dplyr::slice(seq(1, limit)) |>
dplyr::pull(x) |>
stringr::str_flatten_comma())
}

tops <- purrr::map(df, topn) |>
purrr::list_rbind(names_to = "column") |>
dplyr::filter(uniq != nrow(df))

dplyr::left_join(sums, tops, by = dplyr::join_by(column)) |>
dplyr::arrange(dplyr::desc(type)) |>
dplyr::select(column, type, n, min, mean, med, max, iqr, sd, mad, distribution, uniq, top)
}

#' Describe a dataset
#'
#' @param df `<data.frame>` desc
Expand Down Expand Up @@ -140,9 +212,13 @@ histo <- function(x, width = 10) {
#'
#' @examples
#'
#' describe_unique(mock_forager(), ins_class, payer)
#' describe_unique(mock_forager(), class, payer)
#'
#' # describe_unique(mock_forager(200), names(df)[2:3])
#' describe_unique(
#' mock_forager(50),
#' names(mock_forager())[c(2:3, 6:9)]) |>
#' dplyr::filter(n > 2) |>
#' print(n = 30)
#'
#' @autoglobal
#'
Expand All @@ -157,7 +233,9 @@ describe_unique <- function(df,

df <- dplyr::select(df, ...)

.set_names <- if (is.null(.set_names)) names(df) else .set_names
.set_names <- if (null(.set_names)) names(df) else .set_names

df <- dplyr::mutate(df, dplyr::across(!dplyr::where(is.character), as.character))

df <- columns_to_character(df) |>
names() |>
Expand Down
13 changes: 13 additions & 0 deletions R/generated-globals.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ utils::globalVariables(c(
# <rate_of_return>
# <combine>
":=",
# <describe2>
"column",
# <combine>
"combined",
# <rate_of_return>
Expand All @@ -23,35 +25,46 @@ utils::globalVariables(c(
"date_of_service",
# <mock_forager>
"date_of_submission",
# <describe2>
"distribution",
# <id_runs>
"group",
# <id_runs>
"group_size",
# <make_interval>
"interval",
# <describe2>
# <describe>
"iqr",
# <id_runs>
"key",
# <rate_of_return>
"lg",
# <describe2>
# <describe>
"mad",
# <describe2>
# <describe>
"med",
# <describe2>
# <describe>
# <count_prop>
# <count_prop_multi>
# <count_wide>
"n",
# <describe>
"nuniq",
# <describe2>
# <describe>
"sd",
# <describe2>
"top",
# <describe>
"top_n",
# <describe2>
# <describe>
"type",
# <describe2>
# <describe>
# <id_runs>
"value",
Expand Down
52 changes: 38 additions & 14 deletions R/mock.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ mock_provider <- \(years) {
#'
#' @param rows `<int>` number of rows to generate; default is 10
#'
#' @param nest `<lgl>` whether to nest the dates column; default is `TRUE`
#' @param nest `<lgl>` whether to nest the dates column; default is `FALSE`
#'
#' @returns A [tibble][tibble::tibble-package]
#'
Expand All @@ -41,23 +41,47 @@ mock_provider <- \(years) {
#' @family mock
#'
#' @export
mock_forager <- function(rows = 10, nest = TRUE){
mock_forager <- function(rows = 10, nest = FALSE){

payer_names <- c("Medicare", "Medicaid", "Cigna", "Humana", "UHC", "Anthem", "BCBS", "Centene")
payers <- sample(
x = factor(
x = c("Medicare",
"Medicaid",
"Cigna",
"Humana",
"UHC",
"Anthem",
"BCBS",
"Centene",
"MAO")),
size = rows,
replace = TRUE)

classes <- sample(
x = ordered(
c("Primary",
"Secondary")),
size = rows,
replace = TRUE)

x <- dplyr::tibble(
claim_id = as.character(wakefield::id(n = rows)),
date_of_service = wakefield::dob(n = rows, start = Sys.Date() - 730, random = TRUE, k = 12, by = "-1 months"),
payer = fixtuRes::set_vector(rows, set = payer_names),
ins_class = fixtuRes::set_vector(rows, set = c("Primary", "Secondary")),
balance = as.double(wakefield::income(n = rows, digits = 2) / 300),
date_of_release = date_of_service + round(abs(stats::rnorm(length(date_of_service), 11, 4))),
date_of_submission = date_of_release + round(abs(stats::rnorm(length(date_of_release), 2, 2))),
date_of_acceptance = date_of_submission + round(abs(stats::rnorm(length(date_of_submission), 3, 2))),
date_of_adjudication = date_of_acceptance + round(abs(stats::rnorm(length(date_of_acceptance), 30, 3))))
id = sprintf(paste0("%0", nchar(rows) + 3, "d"), seq_len(rows)),
payer = payers,
class = classes,
balance = roundup(stats::rgamma(n = rows, 2) * 20000) / 300,
date_of_service = sample(x = seq.Date(from = Sys.Date(), by = "-1 months", length.out = 12), size = rows, replace = TRUE),
date_of_release = date_of_service + roundup(abs(stats::rnorm(rows, 11, 4))),
date_of_submission = date_of_release + roundup(abs(stats::rnorm(rows, 2, 2))),
date_of_acceptance = date_of_submission + roundup(abs(stats::rnorm(rows, 3, 2))),
date_of_adjudication = date_of_acceptance + roundup(abs(stats::rnorm(rows, 30, 3)))
)

if (nest)
return(tidyr::nest(x, dates = tidyr::contains("date")))

return(
tidyr::nest(
x,
dates = tidyr::contains("date")
)
)
x
}
Loading

0 comments on commit 787fa93

Please sign in to comment.