Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update key_colnames, revision_summary #540

Draft
wants to merge 9 commits into
base: dev
Choose a base branch
from
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: epiprocess
Title: Tools for basic signal processing in epidemiology
Version: 0.9.4
Version: 0.9.6
Authors@R: c(
person("Jacob", "Bien", role = "ctb"),
person("Logan", "Brooks", , "[email protected]", role = c("aut", "cre")),
Expand Down
11 changes: 10 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ S3method(guess_period,Date)
S3method(guess_period,POSIXt)
S3method(guess_period,default)
S3method(key_colnames,data.frame)
S3method(key_colnames,default)
S3method(key_colnames,epi_archive)
S3method(key_colnames,epi_df)
S3method(key_colnames,tbl_ts)
S3method(mean,epi_df)
S3method(next_after,Date)
S3method(next_after,integer)
Expand Down Expand Up @@ -99,12 +99,14 @@ export(time_column_names)
export(ungroup)
export(unnest)
export(validate_epi_archive)
export(vec_position_lag)
export(version_column_names)
importFrom(checkmate,anyInfinite)
importFrom(checkmate,anyMissing)
importFrom(checkmate,assert)
importFrom(checkmate,assert_character)
importFrom(checkmate,assert_class)
importFrom(checkmate,assert_count)
importFrom(checkmate,assert_data_frame)
importFrom(checkmate,assert_function)
importFrom(checkmate,assert_int)
Expand Down Expand Up @@ -191,6 +193,8 @@ importFrom(rlang,as_label)
importFrom(rlang,caller_arg)
importFrom(rlang,caller_env)
importFrom(rlang,check_dots_empty)
importFrom(rlang,check_dots_empty0)
importFrom(rlang,dots_n)
importFrom(rlang,enquo)
importFrom(rlang,enquos)
importFrom(rlang,env)
Expand Down Expand Up @@ -230,3 +234,8 @@ importFrom(tidyselect,starts_with)
importFrom(tsibble,as_tsibble)
importFrom(utils,capture.output)
importFrom(utils,tail)
importFrom(vctrs,obj_check_vector)
importFrom(vctrs,vec_c)
importFrom(vctrs,vec_cast)
importFrom(vctrs,vec_size)
importFrom(vctrs,vec_slice)
21 changes: 19 additions & 2 deletions R/archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,23 @@ removed_by_compactify <- function(df, keys, tolerance) {
)) # nolint: object_usage_linter
}

#' Lag entries in a vctrs-style vector by their position in the vector
#'
#' @importFrom checkmate assert_count
#' @importFrom vctrs obj_check_vector vec_slice vec_size
#' @keywords internal
#' @importFrom vctrs vec_c vec_slice vec_size
#' @export
vec_position_lag <- function(x, n) {
# obj_check_vector(x)
assert_count(n)
if (length(x) == 0L) {
x
} else {
vec_c(rep(NA, n), vec_slice(x, seq_len(vec_size(x) - 1L)))
}
}

#' Checks to see if a value in a vector is LOCF
#' @description
#' LOCF meaning last observation carried forward. lags the vector by 1, then
Expand All @@ -394,8 +411,8 @@ removed_by_compactify <- function(df, keys, tolerance) {
#' @importFrom dplyr lag if_else near
#' @keywords internal
is_locf <- function(vec, tolerance) { # nolint: object_usage_linter
lag_vec <- dplyr::lag(vec)
if (typeof(vec) == "double") {
lag_vec <- vec_position_lag(vec, 1L)
if (inherits(vec, "numeric")) { # (no matrix/array/general support)
res <- if_else(
!is.na(vec) & !is.na(lag_vec),
near(vec, lag_vec, tol = tolerance),
Expand Down
131 changes: 109 additions & 22 deletions R/key_colnames.R
Original file line number Diff line number Diff line change
@@ -1,47 +1,134 @@
#' Grab any keys associated to an epi_df
#' Get names of columns that form a (unique) key associated with an object
#'
#' @param x a data.frame, tibble, or epi_df
#' This is entirely based on metadata and arguments passed; there are no
#' explicit checks that the key actually is unique in any associated data
#' structures.
#'
#' @param x an object, such as an [`epi_df`]
#' @param ... additional arguments passed on to methods
#' @param other_keys an optional character vector of other keys to include
#' @param exclude an optional character vector of keys to exclude
#' @return If an `epi_df`, this returns all "keys". Otherwise `NULL`.
#' @param geo_keys optional character vector; which columns (if any) to consider
#' keys specifying the geographical region? Defaults to `"geo_value"` if
#' present; must be `"geo_value"` if `x` is an `epi_df`.
#' @param other_keys character vector; which columns (if any) to consider keys
#' specifying demographical or identifying/grouping information besides the
#' geographical region and time interval? Mandatory if `x` is a vanilla
#' `data.frame` or `tibble`. Optional if `x` is an `epi_df`; default is the
#' `epi_df`'s `other_keys`; if you provide `other_keys`, they must match the
#' default. (This behavior is to enable consistent and sane results when you
#' can't guarantee whether `x` is an `epi_df` or just a
#' `tibble`/`data.frame`.)
#' @param time_keys optional character vector; which columns (if any) to
#' consider keys specifying the time interval during which associated events
#' occurred? Defaults to `"time_value"` if present; must be `"time_value"` if
#' `x` is an `epi_df`.
#' @param exclude an optional character vector of key column names to exclude
#' from the result
#' @return character vector
#' @keywords internal
#' @export
key_colnames <- function(x, ...) {
UseMethod("key_colnames")
}

#' @rdname key_colnames
#' @method key_colnames default
#' @export
key_colnames.default <- function(x, ...) {
character(0L)
key_colnames <- function(x, ..., exclude = character()) {
provided_args <- rlang::call_args_names(rlang::call_match())
if ("extra_keys" %in% provided_args) {
lifecycle::deprecate_soft("0.9.6", "key_colnames(extra_keys=)", "key_colnames(other_keys=)")
redispatch <- function(..., extra_keys) {
key_colnames(..., other_keys = extra_keys)
}
redispatch(x, ..., exclude = exclude)
} else {
UseMethod("key_colnames")
}
}

#' @rdname key_colnames
#' @importFrom rlang check_dots_empty0
#' @method key_colnames data.frame
#' @export
key_colnames.data.frame <- function(x, other_keys = character(0L), exclude = character(0L), ...) {
key_colnames.data.frame <- function(x, ...,
geo_keys = intersect("geo_value", names(x)),
other_keys,
time_keys = intersect("time_value", names(x)),
exclude = character()) {
check_dots_empty0(...)
assert_character(geo_keys)
assert_character(time_keys)
assert_character(other_keys)
assert_character(exclude)
nm <- setdiff(c("geo_value", other_keys, "time_value"), exclude)
intersect(nm, colnames(x))
keys <- c(geo_keys, other_keys, time_keys)
if (!all(keys %in% names(x))) {
cli_abort(c(
"Some of the specified key columns aren't present in `x`",
"i" = "Specified keys: {format_varnames(keys)}",
"i" = "Columns of x: {format_varnames(names(x))}",
"x" = "Missing keys: {format_varnames(setdiff(keys, names(x)))}"
), class = "epiprocess__key_colnames__keys_not_in_colnames")
}
setdiff(keys, exclude)
}

#' @rdname key_colnames
#' @method key_colnames epi_df
#' @export
key_colnames.epi_df <- function(x, exclude = character(0L), ...) {
key_colnames.epi_df <- function(x, ...,
geo_keys = "geo_value",
other_keys = NULL,
time_keys = "time_value",
exclude = character()) {
check_dots_empty0(...)
if (!identical(geo_keys, "geo_value")) {
cli_abort('If `x` is an `epi_df`, then `geo_keys` must be `"geo_value"`',
class = "epiprocess__key_colnames__mismatched_geo_keys"
)
}
if (!identical(time_keys, "time_value")) {
cli_abort('If `x` is an `epi_df`, then `time_keys` must be `"time_value"`',
class = "epiprocess__key_colnames__mismatched_time_keys"
)
}
expected_other_keys <- attr(x, "metadata")$other_keys
if (is.null(other_keys)) {
other_keys <- expected_other_keys
} else {
if (!identical(other_keys, expected_other_keys)) {
cli_abort(c(
"The provided `other_keys` argument didn't match the `other_keys` of `x`",
"*" = "`other_keys` was {format_chr_with_quotes(other_keys)}",
"*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}",
"i" = "If you know that `x` will always be an `epi_df` and
resolve this discrepancy by adjusting the metadata of `x`, you
shouldn't have to pass `other_keys =` here anymore,
unless you want to continue to perform this check."
), class = "epiprocess__key_colnames__mismatched_other_keys")
}
}
assert_character(exclude)
other_keys <- attr(x, "metadata")$other_keys
setdiff(c("geo_value", other_keys, "time_value"), exclude)
}

#' @rdname key_colnames
#' @method key_colnames tbl_ts
#' @export
key_colnames.tbl_ts <- function(x, ..., exclude = character()) {
check_dots_empty0(...)
assert_character(exclude)
idx <- tsibble::index_var(x)
idx2 <- tsibble::index2_var(x)
if (!identical(idx, idx2)) {
cli_abort(c(
"`x` is in the middle of a re-indexing operation with `index_by()`; it's unclear
whether we should output the old unique key or the new unique key-to-be",
"i" = "Old index: {format_varname(idx)}",
"i" = "Pending new index: {format_varname(idx2)}",
"Please complete (e.g., with `summarise()`) or remove the re-indexing operation."
), class = "epiprocess__key_colnames__incomplete_reindexing_operation")
}
setdiff(c(tsibble::key_vars(x), idx), exclude)
}

#' @rdname key_colnames
#' @method key_colnames epi_archive
#' @export
key_colnames.epi_archive <- function(x, exclude = character(0L), ...) {
key_colnames.epi_archive <- function(x, ..., exclude = character()) {
check_dots_empty0(...)
assert_character(exclude)
other_keys <- attr(x, "metadata")$other_keys
setdiff(c("geo_value", other_keys, "time_value"), exclude)
setdiff(c("geo_value", x$other_keys, "time_value", "version"), exclude)
}
Loading
Loading