diff --git a/R/aedseo.R b/R/aedseo.R index 32c2227..dae5ed4 100644 --- a/R/aedseo.R +++ b/R/aedseo.R @@ -13,9 +13,14 @@ #' @param level The confidence level for parameter estimates, a numeric value #' between 0 and 1. #' @param disease_threshold An integer specifying the threshold for considering -#' a disease outbreak. +#' a disease outbreak. It defines the per time-step disease threshold that has +#' to be surpassed to possibly trigger a seasonal onset alarm. If the total +#' number of cases in a window of size k exceeds `disease_threshold * k`, +#' a seasonal onset alarm can be triggered. #' @param family A character string specifying the family for modeling. #' Choose between "poisson," or "quasipoisson". +#' @param na_fraction_allowed Numeric value specifying the fraction of +#' observables in the window of size k that are allowed to be NA. #' #' @return A `aedseo` object containing: #' - 'reference_time': The time point for which the growth rate is estimated. @@ -32,6 +37,7 @@ #' disease threshold? #' - 'seasonal_onset_alarm': Logical. Is there a seasonal onset alarm? #' - 'converged': Logical. Was the IWLS judged to have converged? +#' - 'skipped_window': Logical. Was the window skipped due to missing? #' #' @export #' @@ -56,7 +62,8 @@ #' k = 3, #' level = 0.95, #' disease_threshold = 200, -#' family = "poisson" +#' family = "poisson", +#' na_fraction_allowed = 0.4, #' ) #' #' # Print the AEDSEO results @@ -70,7 +77,8 @@ aedseo <- function( "poisson", "quasipoisson" # TODO: #10 Include negative.binomial regressions. @telkamp7 - )) { + ), + na_fraction_allowed = 0.4) { # Throw an error if any of the inputs are not supported family <- rlang::arg_match(family) @@ -79,26 +87,35 @@ aedseo <- function( # Allocate space for growth rate estimates res <- tibble::tibble() + skipped_window <- base::rep(FALSE, base::nrow(tsd)) for (i in k:n) { # Index observations for this iteration obs_iter <- tsd[(i - k + 1):i, ] - # Calculate growth rates - growth_rates <- fit_growth_rate( - observations = obs_iter$observed, - level = level, - family = family - ) + # Evaluate NA values in windows + if (sum(is.na(obs_iter)) >= k * na_fraction_allowed) { + skipped_window[i] <- TRUE + # Set fields to NA since the window is skipped + growth_rates <- list(estimate = c(NA, NA, NA), + fit = list(converged = FALSE)) + } else { + # Calculate growth rates + growth_rates <- fit_growth_rate( + observations = obs_iter$observed, + level = level, + family = family + ) + } # See if the growth rate is significantly higher than zero growth_warning <- growth_rates$estimate[2] > 0 # Calculate Sum of Cases (sum_of_cases) - sum_of_cases <- base::sum(obs_iter$observed) + sum_of_cases <- base::sum(obs_iter$observed, na.rm = TRUE) # Evaluate if sum_of_cases exceeds disease_threshold - sum_of_cases_warning <- sum_of_cases > disease_threshold + sum_of_cases_warning <- sum_of_cases > (disease_threshold * k) # Give an seasonal_onset_alarm if both criteria are met seasonal_onset_alarm <- growth_warning & sum_of_cases_warning @@ -116,6 +133,7 @@ aedseo <- function( sum_of_cases = sum_of_cases, sum_of_cases_warning = sum_of_cases_warning, seasonal_onset_alarm = seasonal_onset_alarm, + skipped_window = skipped_window[i], converged = growth_rates$fit$converged ) ) diff --git a/man/aedseo.Rd b/man/aedseo.Rd index 576ffa7..e6611b8 100644 --- a/man/aedseo.Rd +++ b/man/aedseo.Rd @@ -9,7 +9,8 @@ aedseo( k = 5, level = 0.95, disease_threshold = NA_integer_, - family = c("poisson", "quasipoisson") + family = c("poisson", "quasipoisson"), + na_fraction_allowed = 0.4 ) } \arguments{ @@ -26,6 +27,9 @@ a disease outbreak.} \item{family}{A character string specifying the family for modeling. Choose between "poisson," or "quasipoisson".} + +\item{na_fraction_allowed}{A fraction that determines how many NA values +that are allowed in the observations for each k window} } \value{ A \code{aedseo} object containing: @@ -43,6 +47,7 @@ zero? \item 'sum_of_cases_warning': Logical. Does the Sum of Cases exceed the disease threshold? \item 'seasonal_onset_alarm': Logical. Is there a seasonal onset alarm? +\item 'skipped_window': Logical. Was the window skipped? \item 'converged': Logical. Was the IWLS judged to have converged? } } @@ -74,7 +79,8 @@ aedseo_results <- aedseo( k = 3, level = 0.95, disease_threshold = 200, - family = "poisson" + family = "poisson", + na_fraction_allowed = 0.4 ) # Print the AEDSEO results diff --git a/tests/testthat/test-aedseo.R b/tests/testthat/test-aedseo.R index 36ab2d0..34b247e 100644 --- a/tests/testthat/test-aedseo.R +++ b/tests/testthat/test-aedseo.R @@ -25,16 +25,78 @@ test_that("The growth rate models converge", { tsd = tsd_data_poisson, k = 3, level = 0.95, - family = "poisson" + family = "poisson", + disease_threshold = 20, + na_fraction_allowed = 0.2 ) aedseo_quasipoisson <- aedseo( tsd = tsd_data_nbinom, k = 3, level = 0.95, - family = "quasipoisson" + family = "quasipoisson", + disease_threshold = 20, + na_fraction_allowed = 0.2 ) # Check if they all converge expect_true(object = all(aedseo_poisson$converged)) expect_true(object = all(aedseo_quasipoisson$converged)) }) + +test_that("Test if it works with weeks with NA values", { + from_date <- as.Date("2021-01-01") + to_date <- as.Date("2021-01-31") + + # Choose some time dates + time <- seq.Date(from = from_date, to = to_date, by = "day") + + # Count the number of observations + n <- length(time) + + # Add NA values to observed + na_count <- 15 + + # Randomly select indices to replace with NA + na_indices <- sample(1:n, na_count, replace = FALSE) + + # Create observable + observed <- rpois(n = n, lambda = 1:n) + + # Add NA values + observed[na_indices] <- NA + + # Data + tsd_data_poisson_na <- tsd( + observed = observed, + time = time, + time_interval = "day" + ) + + # Calculate AEDSEO with a 3-day window + aedseo_poisson_na <- aedseo( + tsd = tsd_data_poisson_na, + k = 3, + level = 0.95, + family = "poisson", + disease_threshold = 20, + na_fraction_allowed = 0.4 + ) + + # Test if correct amount of windows with NA are skipped + k <- 3 + na_fraction_allowed <- 0.4 + n <- base::nrow(tsd_data_poisson_na) + skipped_window_count <- 0 + + for (i in k:n) { + obs_iter <- tsd_data_poisson_na[(i - k + 1):i, ] + if (sum(is.na(obs_iter)) >= k * na_fraction_allowed) { + skipped_window_count <- skipped_window_count + 1 + } + } + + # Not all will be converged due to NA injections + expect_false(all(aedseo_poisson_na$converged)) + # Count if the skipped windows are = ones in output + expect_equal(skipped_window_count, sum(aedseo_poisson_na$skipped_window)) +}) diff --git a/vignettes/aedseo_introduction.Rmd b/vignettes/aedseo_introduction.Rmd index 29d92df..8a6c3be 100644 --- a/vignettes/aedseo_introduction.Rmd +++ b/vignettes/aedseo_introduction.Rmd @@ -174,7 +174,7 @@ In the following figure, the simulated data (solid circles) is visualized alongs plot(tsd_data) ``` -Next, the time series data object is passed to the `aedseo()` function. Here, a window width of $k=5$ is specified, meaning that a total of 5 weeks is used in the local estimate of the exponential growth rate. Additionally, a 95\% confidence interval is specified. Finally, the exponential growth rate is estimated using quasi-Poisson regression to account for overdispersion in the data. +Next, the time series data object is passed to the `aedseo()` function. Here, a window size of `k=5` is specified, meaning that a total of 5 weeks is used in the local estimate of the exponential growth rate. `na_fraction_allowed = 0.4` defines how large a fraction of observables in the k window that are allowed to be NA, here $0.4*5 = 2$ observables. Additionally, a 95\% confidence interval is specified. Finally, the exponential growth rate is estimated using quasi-Poisson regression to account for overdispersion in the data. ```{r} # Apply the 'aedseo' algorithm @@ -183,7 +183,8 @@ aedseo_results <- aedseo( k = 5, level = 0.95, disease_threshold = 2000, - family = "quasipoisson" + family = "quasipoisson", + na_fraction_allowed = 0.4 ) ```