From 1125ce57f424e9185e9ae5d6a983c06a80edf8ae Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Thu, 26 Sep 2024 10:35:14 -0500 Subject: [PATCH 1/5] daily -> weekly and upsampler: basic code --- R/time_converters.R | 72 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 R/time_converters.R diff --git a/R/time_converters.R b/R/time_converters.R new file mode 100644 index 00000000..1866b44f --- /dev/null +++ b/R/time_converters.R @@ -0,0 +1,72 @@ +#' convert an archive from daily data to weekly data, summing where appropriate +#' @details +#' this function is slow, so make sure you are calling it correctly, and +#' consider testing it on a small portion of your archive first +#' @param day_of_week integer, day of the week, starting from Sunday, select the +#' date to represent the week in the time_value column, based on it's +#' corresponding day of the week. The default value represents the week using +#' Wednesday. +#' @param day_of_week_end integer, day of the week starting on Sunday. +#' Represents the last day, so the week consists of data summed to this day. +#' The default value `6` means that the week is summed from Sunday through +#' Saturday. +daily_to_weekly <- function(epi_arch, + agg_columns, + agg_method = c("total", "mean", "median"), + day_of_week = 4L, + day_of_week_end = 6L) { + keys <- grep("time_value", key_colnames(epi_arch), invert = TRUE, value = TRUE) + too_many_tibbles <- epix_slide( + epi_arch, + before = 99999999L, + ref_time_values = ref_time_values, + function(x, group, ref_time) { + x %>% + group_by(across(all_of(keys))) %>% + epi_slide_sum(agg_columns, before = 6L) %>% + select(-all_of(agg_columns)) %>% + rename_with(~ gsub("slide_value_", "", .x)) %>% + # only keep 1/week + filter(wday(time_value) == day_of_week_end) %>% + # switch time_value to the designated day of the week + mutate(time_value = time_value - 7L + day_of_week) %>% + as_tibble() + } + ) + too_many_tibbles %>% + rename(version = time_value) %>% + rename_with(~ gsub("slide_value_", "", .x)) %>% + as_epi_archive(compactify = TRUE) +} + + + +#' fill in values between, and divide any numeric values equally between them +#' @param to_complete epi_archive +#' @param groups to be grouped by. Should include both time_value and version, and any epi_keys +#' @param columns_to_complete any columns that need their values extended +#' @param aggregate_columns any columns which have numerical data that is a sum +#' across days, and thus needs to be divided into equal parts distributed +#' accross days +convert_to_period_upsample <- function(to_complete, groups, columns_to_complete, + aggregate_columns, source_period = 7, target_period = 1) { + to_complete_datatable <- to_complete$DT + completed_time_values <- + to_complete_datatable %>% + group_by(across(all_of(groups))) %>% + reframe( + time_value = seq(from = time_value, to = time_value + source_period - 1, by = target_period) + ) %>% + unique() + completed <- to_complete_datatable %>% + full_join( + completed_time_values, + by = join_by(season, geo_value, version, time_value) + ) %>% + arrange(geo_value, version, time_value) %>% + fill(all_of(columns_to_complete), .direction = "down") %>% + mutate(across(all_of(aggregate_columns), \(x) x / 7)) + completed %>% + arrange(geo_value, time_value) %>% + as_epi_archive(compactify = TRUE) +} From 4804c34889fe99d539b9e92e671a79a6ff8a8b5d Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Thu, 26 Sep 2024 10:54:25 -0500 Subject: [PATCH 2/5] conversions to/from week/date and season --- R/time_converters.R | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/R/time_converters.R b/R/time_converters.R index 1866b44f..a2c294a9 100644 --- a/R/time_converters.R +++ b/R/time_converters.R @@ -70,3 +70,51 @@ convert_to_period_upsample <- function(to_complete, groups, columns_to_complete, arrange(geo_value, time_value) %>% as_epi_archive(compactify = TRUE) } + + + + + +#' get a season e.g. "2020/21" for a given year-week pair +#' @keywords internal +convert_epiweek_to_season <- function(epiyear, epiweek, season_start_week = 40) { + # Convert epiweek to season + update_inds <- epiweek < season_start_week + epiyear <- ifelse(update_inds, epiyear - 1, epiyear) + + season <- paste0(epiyear, "/", substr((epiyear + 1), 3, 4)) + return(season) +} + +#' get a total count of the epiweeks in a given year +#' @keywords internal +epiweeks_in_year <- function(year) { + last_week_of_year <- seq.Date(as.Date(paste0(year, "-12-24")), + as.Date(paste0(year, "-12-31")), + by = 1 + ) + return(max(as.numeric(MMWRweek(last_week_of_year)$MMWRweek))) +} + +#' get the week in a season +#' @keywords internal +convert_epiweek_to_season_week <- function(epiyear, epiweek, season_start = 40) { + season_week <- epiweek - season_start+1 + + update_inds <- season_week <= 0 + # last year's # of epiweeks determines which week in the season we're at at + # the beginning of the year + season_week[update_inds] <- season_week[update_inds] + + sapply(epiyear[update_inds] - 1, epiweeks_in_year) + + return(season_week) +} + +#' get a canonical date to represent a given epiweek +#' @keywords internal +convert_epiweek_to_date <- function(epiyear, epiweek, + day_of_week = 1) { + end_date <- MMWRweek2Date(epiyear, epiweek, day_of_week) + + return(end_date) +} From 44fe03f3effea277399acb1c6dcaeed91af0009d Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Thu, 26 Sep 2024 10:59:27 -0500 Subject: [PATCH 3/5] actually does both sum and mean --- R/time_converters.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/R/time_converters.R b/R/time_converters.R index a2c294a9..61a5ae42 100644 --- a/R/time_converters.R +++ b/R/time_converters.R @@ -12,9 +12,15 @@ #' Saturday. daily_to_weekly <- function(epi_arch, agg_columns, - agg_method = c("total", "mean", "median"), + agg_method = c("sum", "mean"), day_of_week = 4L, day_of_week_end = 6L) { + agg_method <- arg_match(agg_method) + if (agg_method =="total") { + agg_fun <- epi_slide_sum + } else if (agg_method == "mean") { + agg_fun <- epi_slide_mean + } keys <- grep("time_value", key_colnames(epi_arch), invert = TRUE, value = TRUE) too_many_tibbles <- epix_slide( epi_arch, @@ -23,7 +29,7 @@ daily_to_weekly <- function(epi_arch, function(x, group, ref_time) { x %>% group_by(across(all_of(keys))) %>% - epi_slide_sum(agg_columns, before = 6L) %>% + agg_fun(agg_columns, before = 6L) %>% select(-all_of(agg_columns)) %>% rename_with(~ gsub("slide_value_", "", .x)) %>% # only keep 1/week @@ -41,6 +47,7 @@ daily_to_weekly <- function(epi_arch, + #' fill in values between, and divide any numeric values equally between them #' @param to_complete epi_archive #' @param groups to be grouped by. Should include both time_value and version, and any epi_keys From a9375cebce2712117aab3cf2e5e6575e2c2e1d92 Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Thu, 26 Sep 2024 16:04:16 +0000 Subject: [PATCH 4/5] docs: document (GHA) --- DESCRIPTION | 1 + man/convert_epiweek_to_date.Rd | 12 ++++++++++ man/convert_epiweek_to_season.Rd | 12 ++++++++++ man/convert_epiweek_to_season_week.Rd | 12 ++++++++++ man/convert_to_period_upsample.Rd | 29 ++++++++++++++++++++++++ man/daily_to_weekly.Rd | 32 +++++++++++++++++++++++++++ man/epiprocess.Rd | 2 +- man/epiweeks_in_year.Rd | 12 ++++++++++ 8 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 man/convert_epiweek_to_date.Rd create mode 100644 man/convert_epiweek_to_season.Rd create mode 100644 man/convert_epiweek_to_season_week.Rd create mode 100644 man/convert_to_period_upsample.Rd create mode 100644 man/daily_to_weekly.Rd create mode 100644 man/epiweeks_in_year.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 18aec43f..5eb996b7 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -86,5 +86,6 @@ Collate: 'reexports.R' 'revision_analysis.R' 'slide.R' + 'time_converters.R' 'utils.R' 'utils_pipe.R' diff --git a/man/convert_epiweek_to_date.Rd b/man/convert_epiweek_to_date.Rd new file mode 100644 index 00000000..a333d0cd --- /dev/null +++ b/man/convert_epiweek_to_date.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/time_converters.R +\name{convert_epiweek_to_date} +\alias{convert_epiweek_to_date} +\title{get a canonical date to represent a given epiweek} +\usage{ +convert_epiweek_to_date(epiyear, epiweek, day_of_week = 1) +} +\description{ +get a canonical date to represent a given epiweek +} +\keyword{internal} diff --git a/man/convert_epiweek_to_season.Rd b/man/convert_epiweek_to_season.Rd new file mode 100644 index 00000000..c0ca5ea1 --- /dev/null +++ b/man/convert_epiweek_to_season.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/time_converters.R +\name{convert_epiweek_to_season} +\alias{convert_epiweek_to_season} +\title{get a season e.g. "2020/21" for a given year-week pair} +\usage{ +convert_epiweek_to_season(epiyear, epiweek, season_start_week = 40) +} +\description{ +get a season e.g. "2020/21" for a given year-week pair +} +\keyword{internal} diff --git a/man/convert_epiweek_to_season_week.Rd b/man/convert_epiweek_to_season_week.Rd new file mode 100644 index 00000000..3e46ecfe --- /dev/null +++ b/man/convert_epiweek_to_season_week.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/time_converters.R +\name{convert_epiweek_to_season_week} +\alias{convert_epiweek_to_season_week} +\title{get the week in a season} +\usage{ +convert_epiweek_to_season_week(epiyear, epiweek, season_start = 40) +} +\description{ +get the week in a season +} +\keyword{internal} diff --git a/man/convert_to_period_upsample.Rd b/man/convert_to_period_upsample.Rd new file mode 100644 index 00000000..2fc0b0bf --- /dev/null +++ b/man/convert_to_period_upsample.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/time_converters.R +\name{convert_to_period_upsample} +\alias{convert_to_period_upsample} +\title{fill in values between, and divide any numeric values equally between them} +\usage{ +convert_to_period_upsample( + to_complete, + groups, + columns_to_complete, + aggregate_columns, + source_period = 7, + target_period = 1 +) +} +\arguments{ +\item{to_complete}{epi_archive} + +\item{groups}{to be grouped by. Should include both time_value and version, and any epi_keys} + +\item{columns_to_complete}{any columns that need their values extended} + +\item{aggregate_columns}{any columns which have numerical data that is a sum +across days, and thus needs to be divided into equal parts distributed +accross days} +} +\description{ +fill in values between, and divide any numeric values equally between them +} diff --git a/man/daily_to_weekly.Rd b/man/daily_to_weekly.Rd new file mode 100644 index 00000000..3600719f --- /dev/null +++ b/man/daily_to_weekly.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/time_converters.R +\name{daily_to_weekly} +\alias{daily_to_weekly} +\title{convert an archive from daily data to weekly data, summing where appropriate} +\usage{ +daily_to_weekly( + epi_arch, + agg_columns, + agg_method = c("sum", "mean"), + day_of_week = 4L, + day_of_week_end = 6L +) +} +\arguments{ +\item{day_of_week}{integer, day of the week, starting from Sunday, select the +date to represent the week in the time_value column, based on it's +corresponding day of the week. The default value represents the week using +Wednesday.} + +\item{day_of_week_end}{integer, day of the week starting on Sunday. +Represents the last day, so the week consists of data summed to this day. +The default value \code{6} means that the week is summed from Sunday through +Saturday.} +} +\description{ +convert an archive from daily data to weekly data, summing where appropriate +} +\details{ +this function is slow, so make sure you are calling it correctly, and +consider testing it on a small portion of your archive first +} diff --git a/man/epiprocess.Rd b/man/epiprocess.Rd index a3e98366..f6345cbe 100644 --- a/man/epiprocess.Rd +++ b/man/epiprocess.Rd @@ -2,8 +2,8 @@ % Please edit documentation in R/epiprocess.R \docType{package} \name{epiprocess} -\alias{epiprocess} \alias{epiprocess-package} +\alias{epiprocess} \title{epiprocess: Tools for basic signal processing in epidemiology} \description{ This package introduces a common data structure for epidemiological data sets diff --git a/man/epiweeks_in_year.Rd b/man/epiweeks_in_year.Rd new file mode 100644 index 00000000..b79a0745 --- /dev/null +++ b/man/epiweeks_in_year.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/time_converters.R +\name{epiweeks_in_year} +\alias{epiweeks_in_year} +\title{get a total count of the epiweeks in a given year} +\usage{ +epiweeks_in_year(year) +} +\description{ +get a total count of the epiweeks in a given year +} +\keyword{internal} From 09cf3676774325271142a661dc6f3e6b836252b8 Mon Sep 17 00:00:00 2001 From: dsweber2 Date: Thu, 26 Sep 2024 16:04:45 +0000 Subject: [PATCH 5/5] style: styler (GHA) --- R/time_converters.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/time_converters.R b/R/time_converters.R index 61a5ae42..394fa3cf 100644 --- a/R/time_converters.R +++ b/R/time_converters.R @@ -16,7 +16,7 @@ daily_to_weekly <- function(epi_arch, day_of_week = 4L, day_of_week_end = 6L) { agg_method <- arg_match(agg_method) - if (agg_method =="total") { + if (agg_method == "total") { agg_fun <- epi_slide_sum } else if (agg_method == "mean") { agg_fun <- epi_slide_mean @@ -106,7 +106,7 @@ epiweeks_in_year <- function(year) { #' get the week in a season #' @keywords internal convert_epiweek_to_season_week <- function(epiyear, epiweek, season_start = 40) { - season_week <- epiweek - season_start+1 + season_week <- epiweek - season_start + 1 update_inds <- season_week <= 0 # last year's # of epiweeks determines which week in the season we're at at