Skip to content

Commit

Permalink
Merge pull request #32 from ssi-dk/change_disease_threshold
Browse files Browse the repository at this point in the history
Changing threshold and adding functionallity if observables have NA
  • Loading branch information
SofiaOtero authored Oct 14, 2024
2 parents 8336df0 + 4bfbedf commit 4dd00e3
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 17 deletions.
40 changes: 29 additions & 11 deletions R/aedseo.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@
#' @param level The confidence level for parameter estimates, a numeric value
#' between 0 and 1.
#' @param disease_threshold An integer specifying the threshold for considering
#' a disease outbreak.
#' a disease outbreak. It defines the per time-step disease threshold that has
#' to be surpassed to possibly trigger a seasonal onset alarm. If the total
#' number of cases in a window of size k exceeds `disease_threshold * k`,
#' a seasonal onset alarm can be triggered.
#' @param family A character string specifying the family for modeling.
#' Choose between "poisson," or "quasipoisson".
#' @param na_fraction_allowed Numeric value specifying the fraction of
#' observables in the window of size k that are allowed to be NA.
#'
#' @return A `aedseo` object containing:
#' - 'reference_time': The time point for which the growth rate is estimated.
Expand All @@ -32,6 +37,7 @@
#' disease threshold?
#' - 'seasonal_onset_alarm': Logical. Is there a seasonal onset alarm?
#' - 'converged': Logical. Was the IWLS judged to have converged?
#' - 'skipped_window': Logical. Was the window skipped due to missing?
#'
#' @export
#'
Expand All @@ -56,7 +62,8 @@
#' k = 3,
#' level = 0.95,
#' disease_threshold = 200,
#' family = "poisson"
#' family = "poisson",
#' na_fraction_allowed = 0.4,
#' )
#'
#' # Print the AEDSEO results
Expand All @@ -70,7 +77,8 @@ aedseo <- function(
"poisson",
"quasipoisson"
# TODO: #10 Include negative.binomial regressions. @telkamp7
)) {
),
na_fraction_allowed = 0.4) {
# Throw an error if any of the inputs are not supported
family <- rlang::arg_match(family)

Expand All @@ -79,26 +87,35 @@ aedseo <- function(

# Allocate space for growth rate estimates
res <- tibble::tibble()
skipped_window <- base::rep(FALSE, base::nrow(tsd))

for (i in k:n) {
# Index observations for this iteration
obs_iter <- tsd[(i - k + 1):i, ]

# Calculate growth rates
growth_rates <- fit_growth_rate(
observations = obs_iter$observed,
level = level,
family = family
)
# Evaluate NA values in windows
if (sum(is.na(obs_iter)) >= k * na_fraction_allowed) {
skipped_window[i] <- TRUE
# Set fields to NA since the window is skipped
growth_rates <- list(estimate = c(NA, NA, NA),
fit = list(converged = FALSE))
} else {
# Calculate growth rates
growth_rates <- fit_growth_rate(
observations = obs_iter$observed,
level = level,
family = family
)
}

# See if the growth rate is significantly higher than zero
growth_warning <- growth_rates$estimate[2] > 0

# Calculate Sum of Cases (sum_of_cases)
sum_of_cases <- base::sum(obs_iter$observed)
sum_of_cases <- base::sum(obs_iter$observed, na.rm = TRUE)

# Evaluate if sum_of_cases exceeds disease_threshold
sum_of_cases_warning <- sum_of_cases > disease_threshold
sum_of_cases_warning <- sum_of_cases > (disease_threshold * k)

# Give an seasonal_onset_alarm if both criteria are met
seasonal_onset_alarm <- growth_warning & sum_of_cases_warning
Expand All @@ -116,6 +133,7 @@ aedseo <- function(
sum_of_cases = sum_of_cases,
sum_of_cases_warning = sum_of_cases_warning,
seasonal_onset_alarm = seasonal_onset_alarm,
skipped_window = skipped_window[i],
converged = growth_rates$fit$converged
)
)
Expand Down
10 changes: 8 additions & 2 deletions man/aedseo.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 64 additions & 2 deletions tests/testthat/test-aedseo.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,78 @@ test_that("The growth rate models converge", {
tsd = tsd_data_poisson,
k = 3,
level = 0.95,
family = "poisson"
family = "poisson",
disease_threshold = 20,
na_fraction_allowed = 0.2
)
aedseo_quasipoisson <- aedseo(
tsd = tsd_data_nbinom,
k = 3,
level = 0.95,
family = "quasipoisson"
family = "quasipoisson",
disease_threshold = 20,
na_fraction_allowed = 0.2
)

# Check if they all converge
expect_true(object = all(aedseo_poisson$converged))
expect_true(object = all(aedseo_quasipoisson$converged))
})

test_that("Test if it works with weeks with NA values", {
from_date <- as.Date("2021-01-01")
to_date <- as.Date("2021-01-31")

# Choose some time dates
time <- seq.Date(from = from_date, to = to_date, by = "day")

# Count the number of observations
n <- length(time)

# Add NA values to observed
na_count <- 15

# Randomly select indices to replace with NA
na_indices <- sample(1:n, na_count, replace = FALSE)

# Create observable
observed <- rpois(n = n, lambda = 1:n)

# Add NA values
observed[na_indices] <- NA

# Data
tsd_data_poisson_na <- tsd(
observed = observed,
time = time,
time_interval = "day"
)

# Calculate AEDSEO with a 3-day window
aedseo_poisson_na <- aedseo(
tsd = tsd_data_poisson_na,
k = 3,
level = 0.95,
family = "poisson",
disease_threshold = 20,
na_fraction_allowed = 0.4
)

# Test if correct amount of windows with NA are skipped
k <- 3
na_fraction_allowed <- 0.4
n <- base::nrow(tsd_data_poisson_na)
skipped_window_count <- 0

for (i in k:n) {
obs_iter <- tsd_data_poisson_na[(i - k + 1):i, ]
if (sum(is.na(obs_iter)) >= k * na_fraction_allowed) {
skipped_window_count <- skipped_window_count + 1
}
}

# Not all will be converged due to NA injections
expect_false(all(aedseo_poisson_na$converged))
# Count if the skipped windows are = ones in output
expect_equal(skipped_window_count, sum(aedseo_poisson_na$skipped_window))
})
5 changes: 3 additions & 2 deletions vignettes/aedseo_introduction.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ In the following figure, the simulated data (solid circles) is visualized alongs
plot(tsd_data)
```

Next, the time series data object is passed to the `aedseo()` function. Here, a window width of $k=5$ is specified, meaning that a total of 5 weeks is used in the local estimate of the exponential growth rate. Additionally, a 95\% confidence interval is specified. Finally, the exponential growth rate is estimated using quasi-Poisson regression to account for overdispersion in the data.
Next, the time series data object is passed to the `aedseo()` function. Here, a window size of `k=5` is specified, meaning that a total of 5 weeks is used in the local estimate of the exponential growth rate. `na_fraction_allowed = 0.4` defines how large a fraction of observables in the k window that are allowed to be NA, here $0.4*5 = 2$ observables. Additionally, a 95\% confidence interval is specified. Finally, the exponential growth rate is estimated using quasi-Poisson regression to account for overdispersion in the data.

```{r}
# Apply the 'aedseo' algorithm
Expand All @@ -183,7 +183,8 @@ aedseo_results <- aedseo(
k = 5,
level = 0.95,
disease_threshold = 2000,
family = "quasipoisson"
family = "quasipoisson",
na_fraction_allowed = 0.4
)
```

Expand Down

0 comments on commit 4dd00e3

Please sign in to comment.