From 4cdcae32e222da8d2849cea3859ca4c9e1b7d95c Mon Sep 17 00:00:00 2001 From: SofiaOtero Date: Fri, 1 Nov 2024 16:54:19 +0100 Subject: [PATCH] Changing functions --- NAMESPACE | 1 + R/compute_relative_dist_intensity_levels.R | 80 ------- R/fit_quantiles.R | 10 +- R/intensity_levels.R | 111 ---------- R/seasonal_burden_levels.R | 200 ++++++++++++++++++ man/compute_relative_dist_intensity_levels.Rd | 64 ------ man/fit_quantiles.Rd | 7 +- ...ty_levels.Rd => seasonal_burden_levels.Rd} | 104 ++++++--- ...t-compute_relative_dist_intensity_levels.R | 16 -- tests/testthat/test-fit_quantiles.R | 2 +- tests/testthat/test-intensity_levels.R | 87 -------- tests/testthat/test-seasonal_burden_levels.R | 142 +++++++++++++ 12 files changed, 431 insertions(+), 393 deletions(-) delete mode 100644 R/compute_relative_dist_intensity_levels.R delete mode 100644 R/intensity_levels.R create mode 100644 R/seasonal_burden_levels.R delete mode 100644 man/compute_relative_dist_intensity_levels.Rd rename man/{intensity_levels.Rd => seasonal_burden_levels.Rd} (50%) delete mode 100644 tests/testthat/test-compute_relative_dist_intensity_levels.R delete mode 100644 tests/testthat/test-intensity_levels.R create mode 100644 tests/testthat/test-seasonal_burden_levels.R diff --git a/NAMESPACE b/NAMESPACE index b7dd387..9608ee9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(autoplot) export(epi_calendar) export(fit_growth_rate) export(fit_quantiles) +export(seasonal_burden_levels) export(tsd) importFrom(ggplot2,autoplot) importFrom(grDevices,devAskNewPage) diff --git a/R/compute_relative_dist_intensity_levels.R b/R/compute_relative_dist_intensity_levels.R deleted file mode 100644 index 182ce77..0000000 --- a/R/compute_relative_dist_intensity_levels.R +++ /dev/null @@ -1,80 +0,0 @@ -#' Compute intensity levels with weighted time series observations (relative dist model). -#' -#' @description -#' `r lifecycle::badge("stable")` -#' -#' This function calculates the intensity levels of weighted time series observations. The output contains very low, -#' low, medium, high intensity levels, that predict the intensity levels of the next series of observations. -#' It uses the highest level from the `compute_weighted_intensity_levels` function as the high intensity level and -#' the disease_threshold as the very low intensity level. -#' -#' @param weighted_observations A tibble containing two columns of length n; `observed`, which contains the data -#' points, and `weight`, which is the importance assigned to the observation. Higher weights indicate that an -#' observation has more influence on the model outcome, while lower weights reduce its impact. -#' @param disease_threshold An integer specifying the threshold for considering a disease outbreak. It defines the per -#' time-step disease threshold that has to be surpassed for the observation to be included in the calculations. -#' @param ... arguments that can be passed to the `compute_weighted_intensity_levels` function. -#' -#' @return A tibble containing: -#' - 'very_low_level' The very low intensity level. -#' - 'low_level': The low intensity level. -#' - 'medium_level': The medium intensity level. -#' - 'high_level': The high intensity level. -#' - 'relative_dist': The relative distance between levels. -#' - 'high_conf_level': The conf level for the high intensity level. -#' - 'optim_fit_par_1': The first fit parameter for the chosen family. -#' - For 'weibull': Shape parameter (k). -#' - For 'lnorm': Mean of the log-transformed observations. -#' - For 'exp': Rate parameter (λ). -#' - 'optim_fit_par_2': The second fit parameter for the chosen family. -#' - For 'weibull': Scale parameter (λ). -#' - For 'lnorm': Standard deviation of the log-transformed observations. -#' - For 'exp': Not applicable (set to NA). -#' - 'obj_value': The value of the objective function. -#' (negative log-likelihood), which represent the minimized objective -#' function value from the optimisation. Smaller value equals better -#' optimisation. -#' - 'converged': Logical. TRUE if the optimisation converged. -#' - 'family': The distribution family used for the optimization. -#' - 'weibull': Uses the Weibull distribution for fitting. -#' - 'lnorm': Uses the Log-normal distribution for fitting. -#' - 'exp': Uses the Exponential distribution for fitting. -#' -#' -compute_relative_dist_intensity_levels <- function( - weighted_observations, - disease_threshold = 20, - ... -) { - # Check input arguments - coll <- checkmate::makeAssertCollection() - checkmate::assert_tibble(weighted_observations, add = coll) - checkmate::assert_names(colnames(weighted_observations), - identical.to = c("observed", "weight"), add = coll) - checkmate::assert_integerish(disease_threshold, len = 1, add = coll) - checkmate::reportAssertions(coll) - - # Run `compute_weighted_intensity_levels` function on data - weighted_intensity_levels <- compute_weighted_intensity_levels(weighted_observations = weighted_observations, ...) - - # Log transform levels - high_level_log <- log(weighted_intensity_levels$high_level) - very_low_level_log <- log(disease_threshold) - - # Relative distance between very low and high level - relative_dist <- (high_level_log - very_low_level_log) / 3 - - # Calculate low and medium levels - medium_level <- exp(high_level_log - relative_dist) - low_level <- exp(very_low_level_log + relative_dist) - - # Combine output results - relative_dist_results <- weighted_intensity_levels |> - dplyr::select(-c("low_level", "medium_level")) |> - dplyr::mutate(medium_level = medium_level, - low_level = low_level, - very_low_level = disease_threshold, - relative_dist = relative_dist) - - print(relative_dist_results) -} diff --git a/R/fit_quantiles.R b/R/fit_quantiles.R index 2c767e5..8ce4dbf 100644 --- a/R/fit_quantiles.R +++ b/R/fit_quantiles.R @@ -22,11 +22,11 @@ #' - 'conf_levels': The conf_levels chosen to fit the quantiles. #' - 'quantiles': The quantile results from the fit. #' - 'par': The fit parameters for the chosen family. -#' - par[1]: +#' - par_1: #' - For 'weibull': Shape parameter (k). #' - For 'lnorm': Mean of the log-transformed observations. #' - For 'exp': Rate parameter (λ). -#' - 'par[2]': +#' - 'par_2': #' - For 'weibull': Scale parameter (λ). #' - For 'lnorm': Standard deviation of the log-transformed observations. #' - For 'exp': Not applicable (set to NA). @@ -54,10 +54,10 @@ #' ) #' #' # Use the model -#' fit_quantiles(weighted_observations = data_input, +#' quantile_results <- fit_quantiles(weighted_observations = data_input, #' conf_levels = c(0.50, 0.90, 0.95), #' family = "weibull") - +#' print(quantile_results) fit_quantiles <- function( weighted_observations, conf_levels = c(0.50, 0.90, 0.95), @@ -136,7 +136,7 @@ fit_quantiles <- function( # Create return fit parameters return(list( conf_levels = conf_levels, - quantiles = as.numeric(quantiles), + values = as.numeric(quantiles), par = list(par_fit[1], ifelse(length(par_fit) == 2, par_fit[2], NA)), obj_value = optim_obj$value, converged = optim_obj$convergence == 0, diff --git a/R/intensity_levels.R b/R/intensity_levels.R deleted file mode 100644 index 7080983..0000000 --- a/R/intensity_levels.R +++ /dev/null @@ -1,111 +0,0 @@ -#' Compute burden levels with seasonal time series observations. -#' -#' @description -#' `r lifecycle::badge("stable")` -#' -#' This function calculates the burden levels of time series of observations that are stratified by season. -#' It uses the previous seasons to calculate the levels of the newest season. -#' -#' @param tsd A `aedseo_tsd` object containing time series data with 'time' and 'observed'. -#' @param method A character string specifying the model to be used in the level calculations. -#' Choose between intensity_levels or peak_levels. Both model predict the levels of the next series of observations. -#' - `intensity_levels`: models the risk compared to what has been observed in previous years. -#' - `peak_levels`: models the risk compared to what has been observed in the `n_peak` observations each season. -#' @param conf_levels -#' @param decay_factor A numeric value between 0 and 1, that specifies the weight applied to previous seasons in -#' calculations. It is used as `decay_factor`^(number of seasons back), whereby the weight for the most recent season -#' will be `decay_factor`^0 = 1. This parameter allows for a decreasing weight assigned to prior seasons, such that -#' the influence of older seasons diminishes exponentially. -#' @param disease_threshold An integer specifying the threshold for considering a disease outbreak. It defines the per -#' time-step disease threshold that has to be surpassed for the observation to be included in the calculations. -#' @param n_peak A numeric value specifying the number of peak observations to be selected from each season in the -#' level calculations. The `n_peak` observations have to surpass the `disease_threshold` to be included. -#' @param season_weeks A numeric vector of length 2, `c(start, end)`, with the start and end weeks of the seasons to -#' stratify the observations by. Must span the new year; e.g.: `season_weeks = c(21, 20)`. -#' @param ... arguments that can be passed to the `compute_weighted_intensity_levels()` function. -#' -#' @return A tibble containing: -#' - 'very_low_level': The very low burden level -#' - 'low_level': The low burden level -#' - 'medium_level': The medium burden level -#' - 'high_level': The high burden level -#' - 'optim_fit_par_1': The first fit parameter for the chosen family. -#' - For 'weibull': Shape parameter (k). -#' - For 'lnorm': Mean of the log-transformed observations. -#' - For 'exp': Rate parameter (λ). -#' - 'optim_fit_par_2': The second fit parameter for the chosen family. -#' - For 'weibull': Scale parameter (λ). -#' - For 'lnorm': Standard deviation of the log-transformed observations. -#' - For 'exp': Not applicable (set to NA). -#' - 'obj_value': The value of the objective function -#' (negative log-likelihood), which represent the minimized objective -#' function value from the optimisation. Smaller value equals better -#' optimisation. -#' - 'converged': Logical. TRUE if the optimisation converged. -#' - 'family': The distribution family used for the optimization. -#' - 'weibull': Uses the Weibull distribution for fitting. -#' - 'lnorm': Uses the Log-normal distribution for fitting. -#' - 'exp': Uses the Exponential distribution for fitting. - -intensity_levels <- function( - tsd, - method = c("intensity_levels", "peak_levels"), - conf_levels = 0.95, - decay_factor = 0.8, - disease_threshold = 20, - n_peak = 6, - season_weeks = c(21, 20), - ... -) { - # Check input arguments - method <- rlang::arg_match(method) - coll <- checkmate::makeAssertCollection() - checkmate::assert_data_frame(tsd, add = coll) - checkmate::assert_class(tsd, "aedseo_tsd", add = coll) - checkmate::assert_names(colnames(tsd), identical.to = c("time", "observed"), add = coll) - checkmate::assert_numeric(decay_factor, lower = 0, upper = 1, len = 1, add = coll) - checkmate::assert_numeric(n_peak, lower = 1, len = 1, add = coll) - checkmate::assert_integerish(disease_threshold, len = 1, add = coll) - checkmate::assert_integerish(season_weeks, len = 2, lower = 1, upper = 53, - null.ok = FALSE, add = coll) - # Assert conf_levels based on the method chosen - if (method == "intensity_levels") { - checkmate::assert_numeric(conf_levels, lower = 0, upper = 1, len = 1, - unique = TRUE, sorted = TRUE, add = coll) - } else if (method == "peak_levels") { - checkmate::assert_numeric(conf_levels, lower = 0, upper = 1, len = 3, - unique = TRUE, sorted = TRUE, add = coll) - } - checkmate::reportAssertions(coll) - - # Add the seasons to data - seasonal_tsd <- tsd |> - dplyr::mutate(season = epi_calendar(.data$time, start = season_weeks[1], end = season_weeks[2])) |> - dplyr::arrange(dplyr::desc(.data$season)) - - # Add weights and remove newest season to get predictions for this season - weighted_seasonal_tsd <- seasonal_tsd |> - dplyr::filter(.data$season != max(.data$season)) |> - dplyr::mutate(year = purrr::map_chr(.data$season, ~ stringr::str_extract(.x, "[0-9]+")) |> - as.numeric()) |> - dplyr::mutate(weight = decay_factor^(max(.data$year) - .data$year)) - - # Select n_peak highest observations and filter observations >= disease_threshold - season_observations_and_weights <- weighted_seasonal_tsd |> - dplyr::select(-c("year", "time")) |> - dplyr::filter(.data$observed >= disease_threshold) |> - dplyr::slice_max(.data$observed, n = n_peak, with_ties = FALSE, by = "season") - - # Run with chosen method - burden_levels <- switch(method, - peak_levels = fit_quantiles(weighted_observations = season_observations_and_weights |> - dplyr::select(.data$observed, .data$weight), ...) - ) - - # Select the newest season for intensity level output - #season_peak_levels <- computed_peak_levels |> - # dplyr::mutate(season = max(seasonal_tsd$season)) |> - # dplyr::select(.data$season, dplyr::everything()) - - return(burden_levels) -} diff --git a/R/seasonal_burden_levels.R b/R/seasonal_burden_levels.R new file mode 100644 index 0000000..82a10c6 --- /dev/null +++ b/R/seasonal_burden_levels.R @@ -0,0 +1,200 @@ +#' Compute burden levels with seasonal time series observations. +#' +#' @description +#' `r lifecycle::badge("stable")` +#' +#' This function calculates the burden levels of time series of observations that are stratified by season. +#' It uses the previous seasons to calculate the levels of the newest season. +#' +#' @param tsd A `aedseo_tsd` object containing time series data with 'time' and 'observed'. +#' @param season_weeks A numeric vector of length 2, `c(start, end)`, with the start and end weeks of the seasons to +#' stratify the observations by. Must span the new year; e.g.: `season_weeks = c(21, 20)`. +#' NOTE: The data must include data for a complete previous season to make predictions for the newest season. +#' @param method A character string specifying the model to be used in the level calculations. +#' Choose between intensity_levels or peak_levels. Both model predict the levels of the newest series of observations. +#' - `intensity_levels`: models the risk compared to what has been observed in previous years. +#' - `peak_levels`: models the risk compared to what has been observed in the `n_peak` observations each season. +#' @param conf_levels A numeric vector specifying the confidence levels for parameter estimates. The values have +#' to be unique and in ascending order, such as the lowest level is first and highest level is last. +#' The `conf_levels` are specific for each method: +#' - for `intensity_levels` only specify the highest confidence level e.g.: `0.95`, which is the highest level +#' that has been observed in previous seasons. +#' - for `peak_levels` specify three confidence levels e.g.: `c(0.5, 0.9, 0.95)`, which are the three confidence +#' levels low, medium and high that reflect the levels of the peaks that have been observed in previous seasons. +#' @param decay_factor A numeric value between 0 and 1, that specifies the weight applied to previous seasons in +#' calculations. It is used as `decay_factor`^(number of seasons back), whereby the weight for the most recent season +#' will be `decay_factor`^0 = 1. This parameter allows for a decreasing weight assigned to prior seasons, such that +#' the influence of older seasons diminishes exponentially. +#' @param disease_threshold An integer specifying the threshold for considering a disease outbreak. It defines the per +#' time-step disease threshold that has to be surpassed for the observation to be included in the calculations. +#' @param n_peak A numeric value specifying the number of peak observations to be selected from each season in the +#' level calculations. The `n_peak` observations have to surpass the `disease_threshold` to be included. +#' @param ... arguments that can be passed to the `compute_weighted_intensity_levels()` function. +#' +#' @return A list containing: +#' - 'season': The season that burden levels are calculated for. +#' - 'high_conf_level': (intensity_level method) The conf_level chosen for the high level. +#' - 'conf_levels': (peak_level method) The conf_levels chosen to fit the low, medium and high levels. +#' - 'levels': The output levels from the model, always; very low, low, medium, high. +#' - 'values': The level values from corresponding method. +#' - 'par': The fit parameters for the chosen family. +#' - par_1: +#' - For 'weibull': Shape parameter (k). +#' - For 'lnorm': Mean of the log-transformed observations. +#' - For 'exp': Rate parameter (λ). +#' - 'par_2': +#' - For 'weibull': Scale parameter (λ). +#' - For 'lnorm': Standard deviation of the log-transformed observations. +#' - For 'exp': Not applicable (set to NA). +#' - 'obj_value': The value of the objective function - (negative log-likelihood), which represent the minimized +#' objective function value from the optimisation. Smaller value equals better optimisation. +#' - 'converged': Logical. TRUE if the optimisation converged. +#' - 'family': The distribution family used for the optimization. +#' - 'weibull': Uses the Weibull distribution for fitting. +#' - 'lnorm': Uses the Log-normal distribution for fitting. +#' - 'exp': Uses the Exponential distribution for fitting. +#' - 'disease_threshold': The input disease threshold, which is also the very low level. +#' +#' @export +#' +#' @examples +#' # Generate random flu season +#' generate_flu_season <- function(start = 1, end = 1000) { +#' random_increasing_obs <- round(sort(runif(24, min = start, max = end))) +#' random_decreasing_obs <- round(rev(random_increasing_obs)) +#' +#' # Generate peak numbers +#' add_to_max <- c(50, 100, 200, 100) +#' peak <- add_to_max + max(random_increasing_obs) +#' +#' # Combine into a single observed sequence +#' observed <- c(random_increasing_obs, peak, random_decreasing_obs) +#' +#' return(observed) +#' } +#' +#' season_1 <- generate_flu_season() +#' season_2 <- generate_flu_season() +#' +#' start_date <- as.Date("2022-05-29") +#' end_date <- as.Date("2024-05-20") +#' +#' weekly_dates <- seq.Date(from = start_date, +#' to = end_date, +#' by = "week") +#' +#' tsd_data <- tsd( +#' observed = c(season_1, season_2), +#' time = as.Date(weekly_dates), +#' time_interval = "week" +#' ) +#' +#' # Print seasonal burden results +#' intensity_levels <- seasonal_burden_levels(tsd_data) +#' print(intensity_levels) +seasonal_burden_levels <- function( + tsd, + season_weeks = c(21, 20), + method = c("intensity_levels", "peak_levels"), + conf_levels = 0.95, + decay_factor = 0.8, + disease_threshold = 20, + n_peak = 6, + ... +) { + # Check input arguments + method <- rlang::arg_match(method) + coll <- checkmate::makeAssertCollection() + checkmate::assert_data_frame(tsd, add = coll) + checkmate::assert_class(tsd, "aedseo_tsd", add = coll) + checkmate::assert_names(colnames(tsd), identical.to = c("time", "observed"), add = coll) + checkmate::assert_integerish(season_weeks, len = 2, lower = 1, upper = 53, + null.ok = FALSE, add = coll) + checkmate::assert_numeric(decay_factor, lower = 0, upper = 1, len = 1, add = coll) + checkmate::assert_numeric(n_peak, lower = 1, len = 1, add = coll) + checkmate::assert_integerish(disease_threshold, len = 1, add = coll) + # Assert conf_levels based on the method chosen + if (method == "intensity_levels") { + checkmate::assert_numeric(conf_levels, lower = 0, upper = 1, len = 1, + unique = TRUE, sorted = TRUE, add = coll) + } else if (method == "peak_levels") { + checkmate::assert_numeric(conf_levels, lower = 0, upper = 1, len = 3, + unique = TRUE, sorted = TRUE, add = coll) + } + # Add the seasons to data + seasonal_tsd <- tsd |> + dplyr::mutate(season = epi_calendar(.data$time, start = season_weeks[1], end = season_weeks[2])) |> + dplyr::arrange(dplyr::desc(.data$season)) + + # Check that there is at least two seasons of data + if (length(unique(seasonal_tsd$season)) <= 1) coll$push("There must be at least two unique seasons in the data.") + checkmate::reportAssertions(coll) + + # Add weights and remove newest season to get predictions for this season + weighted_seasonal_tsd <- seasonal_tsd |> + dplyr::filter(.data$season != max(.data$season)) |> + dplyr::mutate(year = purrr::map_chr(.data$season, ~ stringr::str_extract(.x, "[0-9]+")) |> + as.numeric()) |> + dplyr::mutate(weight = decay_factor^(max(.data$year) - .data$year)) + + # Select n_peak highest observations and filter observations >= disease_threshold + season_observations_and_weights <- weighted_seasonal_tsd |> + dplyr::select(-c("year", "time")) |> + dplyr::filter(.data$observed >= disease_threshold) |> + dplyr::slice_max(.data$observed, n = n_peak, with_ties = FALSE, by = "season") + + # Run quantiles_fit function which selected method + # Wrap to pass ... arguments + wrap_fit_quantiles <- function(method, conf_levels, ...) { + switch(method, + peak_levels = + fit_quantiles(weighted_observations = season_observations_and_weights |> + dplyr::select(.data$observed, .data$weight), + conf_levels, + ...), + intensity_levels = + fit_quantiles(weighted_observations = season_observations_and_weights |> + dplyr::select(.data$observed, .data$weight), + conf_levels, + ...) + ) + } + quantiles_fit <- wrap_fit_quantiles(method, conf_levels, ...) + + # If method intensity_levels was chosen; use the high level from the `fit_quantiles` function as the high + # level and the disease_threshold as the very low level. The low and medium levels are defined as the relative + # increase between the very low level and high level. + results <- switch(method, + peak_levels = { + model_output <- append(quantiles_fit, list(season = max(seasonal_tsd$season)), after = 0) + model_output <- append(model_output, list(levels = c("very low", "low", "medium", "high")), after = 2) + model_output$values <- c(disease_threshold, model_output$values) + model_output <- append(model_output, list(disease_threshold = disease_threshold)) + }, + intensity_levels = { + # Log transform levels + high_level_log <- log(quantiles_fit$values) + very_low_level_log <- log(disease_threshold) + + # Relative distance between very low and high level + relative_dist <- (log(quantiles_fit$values) - log(disease_threshold)) / 3 + + # Calculate low and medium levels + medium_level <- exp(high_level_log - relative_dist) + low_level <- exp(very_low_level_log + relative_dist) + + model_output <- list( + season = max(seasonal_tsd$season), + high_conf_level = quantiles_fit$conf_levels, + levels = c("very low", "low", "medium", "high"), + values = c(disease_threshold, low_level, medium_level, quantiles_fit$values), + par = quantiles_fit$par, + obj_value = quantiles_fit$conf_levels, + converged = quantiles_fit$converged, + family = quantiles_fit$family, + disease_threshold = disease_threshold + ) + } + ) + return(results) +} diff --git a/man/compute_relative_dist_intensity_levels.Rd b/man/compute_relative_dist_intensity_levels.Rd deleted file mode 100644 index f9eb245..0000000 --- a/man/compute_relative_dist_intensity_levels.Rd +++ /dev/null @@ -1,64 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compute_relative_dist_intensity_levels.R -\name{compute_relative_dist_intensity_levels} -\alias{compute_relative_dist_intensity_levels} -\title{Compute intensity levels with weighted time series observations (relative dist model).} -\usage{ -compute_relative_dist_intensity_levels( - weighted_observations, - disease_threshold = 20, - ... -) -} -\arguments{ -\item{weighted_observations}{A tibble containing two columns of length n; \code{observed}, which contains the data -points, and \code{weight}, which is the importance assigned to the observation. Higher weights indicate that an -observation has more influence on the model outcome, while lower weights reduce its impact.} - -\item{disease_threshold}{An integer specifying the threshold for considering a disease outbreak. It defines the per -time-step disease threshold that has to be surpassed for the observation to be included in the calculations.} - -\item{...}{arguments that can be passed to the \code{compute_weighted_intensity_levels} function.} -} -\value{ -A tibble containing: -\itemize{ -\item 'very_low_level' The very low intensity level. -\item 'low_level': The low intensity level. -\item 'medium_level': The medium intensity level. -\item 'high_level': The high intensity level. -\item 'relative_dist': The relative distance between levels. -\item 'high_conf_level': The conf level for the high intensity level. -\item 'optim_fit_par_1': The first fit parameter for the chosen family. -\itemize{ -\item For 'weibull': Shape parameter (k). -\item For 'lnorm': Mean of the log-transformed observations. -\item For 'exp': Rate parameter (λ). -} -\item 'optim_fit_par_2': The second fit parameter for the chosen family. -\itemize{ -\item For 'weibull': Scale parameter (λ). -\item For 'lnorm': Standard deviation of the log-transformed observations. -\item For 'exp': Not applicable (set to NA). -} -\item 'obj_value': The value of the objective function. -(negative log-likelihood), which represent the minimized objective -function value from the optimisation. Smaller value equals better -optimisation. -\item 'converged': Logical. TRUE if the optimisation converged. -\item 'family': The distribution family used for the optimization. -\itemize{ -\item 'weibull': Uses the Weibull distribution for fitting. -\item 'lnorm': Uses the Log-normal distribution for fitting. -\item 'exp': Uses the Exponential distribution for fitting. -} -} -} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} - -This function calculates the intensity levels of weighted time series observations. The output contains very low, -low, medium, high intensity levels, that predict the intensity levels of the next series of observations. -It uses the highest level from the \code{compute_weighted_intensity_levels} function as the high intensity level and -the disease_threshold as the very low intensity level. -} diff --git a/man/fit_quantiles.Rd b/man/fit_quantiles.Rd index dfe73ba..21ceca2 100644 --- a/man/fit_quantiles.Rd +++ b/man/fit_quantiles.Rd @@ -38,13 +38,13 @@ A list containing: \item 'quantiles': The quantile results from the fit. \item 'par': The fit parameters for the chosen family. \itemize{ -\item par\link{1}: +\item par_1: \itemize{ \item For 'weibull': Shape parameter (k). \item For 'lnorm': Mean of the log-transformed observations. \item For 'exp': Rate parameter (λ). } -\item 'par\link{2}': +\item 'par_2': \itemize{ \item For 'weibull': Scale parameter (λ). \item For 'lnorm': Standard deviation of the log-transformed observations. @@ -82,7 +82,8 @@ data_input <- tibble::tibble( ) # Use the model -fit_quantiles(weighted_observations = data_input, +quantile_results <- fit_quantiles(weighted_observations = data_input, conf_levels = c(0.50, 0.90, 0.95), family = "weibull") +print(quantile_results) } diff --git a/man/intensity_levels.Rd b/man/seasonal_burden_levels.Rd similarity index 50% rename from man/intensity_levels.Rd rename to man/seasonal_burden_levels.Rd index 96e7798..f02164a 100644 --- a/man/intensity_levels.Rd +++ b/man/seasonal_burden_levels.Rd @@ -1,21 +1,44 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/intensity_levels.R -\name{intensity_levels} -\alias{intensity_levels} +% Please edit documentation in R/seasonal_burden_levels.R +\name{seasonal_burden_levels} +\alias{seasonal_burden_levels} \title{Compute burden levels with seasonal time series observations.} \usage{ -intensity_levels( +seasonal_burden_levels( tsd, + season_weeks = c(21, 20), + method = c("intensity_levels", "peak_levels"), + conf_levels = 0.95, decay_factor = 0.8, disease_threshold = 20, n_peak = 6, - season_weeks = c(21, 20), ... ) } \arguments{ \item{tsd}{A \code{aedseo_tsd} object containing time series data with 'time' and 'observed'.} +\item{season_weeks}{A numeric vector of length 2, \code{c(start, end)}, with the start and end weeks of the seasons to +stratify the observations by. Must span the new year; e.g.: \code{season_weeks = c(21, 20)}. +NOTE: The data must include data for a complete previous season to make predictions for the newest season.} + +\item{method}{A character string specifying the model to be used in the level calculations. +Choose between intensity_levels or peak_levels. Both model predict the levels of the newest series of observations. +\itemize{ +\item \code{intensity_levels}: models the risk compared to what has been observed in previous years. +\item \code{peak_levels}: models the risk compared to what has been observed in the \code{n_peak} observations each season. +}} + +\item{conf_levels}{A numeric vector specifying the confidence levels for parameter estimates. The values have +to be unique and in ascending order, such as the lowest level is first and highest level is last. +The \code{conf_levels} are specific for each method: +\itemize{ +\item for \code{intensity_levels} only specify the highest confidence level e.g.: \code{0.95}, which is the highest level +that has been observed in previous seasons. +\item for \code{peak_levels} specify three confidence levels e.g.: c(0.5, 0.9, 0.95), which are the three confidence +levels low, medium and high that reflect the levels of the peaks that have been observed in previous seasons. +}} + \item{decay_factor}{A numeric value between 0 and 1, that specifies the weight applied to previous seasons in calculations. It is used as \code{decay_factor}^(number of seasons back), whereby the weight for the most recent season will be \code{decay_factor}^0 = 1. This parameter allows for a decreasing weight assigned to prior seasons, such that @@ -27,47 +50,40 @@ time-step disease threshold that has to be surpassed for the observation to be i \item{n_peak}{A numeric value specifying the number of peak observations to be selected from each season in the level calculations. The \code{n_peak} observations have to surpass the \code{disease_threshold} to be included.} -\item{season_weeks}{A numeric vector of length 2, \code{c(start, end)}, with the start and end weeks of the seasons to -stratify the observations by. Must span the new year; e.g.: \code{season_weeks = c(21, 20)}.} - \item{...}{arguments that can be passed to the \code{compute_weighted_intensity_levels()} function.} - -\item{method}{A character string specifying the model to be used in the level calculations. -Choose between intensity_levels or peak_levels. Both model predict the levels of the next series of observations. -\itemize{ -\item \code{intensity_levels}: models the risk compared to what has been observed in previous years. -\item \code{peak_levels}: models the risk compared to what has been observed in the \code{n_peak} observations each season. -}} } \value{ -A tibble containing: +A list containing: +\itemize{ +\item 'season': The season that burden levels are calculated for. +\item 'high_conf_level': (intensity_level method) The conf_level chosen for the high level. +\item 'conf_levels': (peak_level method) The conf_levels chosen to fit the low, medium and high levels. +\item 'levels': The output levels from the model, always; very low, low, medium, high. +\item 'values': The level values from corresponding method. +\item 'par': The fit parameters for the chosen family. \itemize{ -\item 'very_low_level': The very low burden level -\item 'low_level': The low burden level -\item 'medium_level': The medium burden level -\item 'high_level': The high burden level -\item 'optim_fit_par_1': The first fit parameter for the chosen family. +\item par_1: \itemize{ \item For 'weibull': Shape parameter (k). \item For 'lnorm': Mean of the log-transformed observations. \item For 'exp': Rate parameter (λ). } -\item 'optim_fit_par_2': The second fit parameter for the chosen family. +\item 'par_2': \itemize{ \item For 'weibull': Scale parameter (λ). \item For 'lnorm': Standard deviation of the log-transformed observations. \item For 'exp': Not applicable (set to NA). } -\item 'obj_value': The value of the objective function -(negative log-likelihood), which represent the minimized objective -function value from the optimisation. Smaller value equals better -optimisation. +} +\item 'obj_value': The value of the objective function - (negative log-likelihood), which represent the minimized +objective function value from the optimisation. Smaller value equals better optimisation. \item 'converged': Logical. TRUE if the optimisation converged. \item 'family': The distribution family used for the optimization. \itemize{ \item 'weibull': Uses the Weibull distribution for fitting. \item 'lnorm': Uses the Log-normal distribution for fitting. \item 'exp': Uses the Exponential distribution for fitting. +\item 'disease_threshold': The input disease threshold, which is also the very low level. } } } @@ -77,3 +93,39 @@ optimisation. This function calculates the burden levels of time series of observations that are stratified by season. It uses the previous seasons to calculate the levels of the newest season. } +\examples{ +# Generate random flu season +generate_flu_season <- function(start = 1, end = 1000) { + random_increasing_obs <- round(sort(runif(21, min = start, max = end))) + random_decreasing_obs <- round(rev(random_increasing_obs)) + + # Generate peak numbers + add_to_max <- c(50, 75, 100, 200, 250, 300, 200, 100, 75, 50) + peak <- add_to_max + max(random_increasing_obs) + + # Combine into a single observed sequence + observed <- c(random_increasing_obs, peak, random_decreasing_obs) + + return(observed) +} + +season_1 <- generate_flu_season() +season_2 <- generate_flu_season() + +start_date <- as.Date("2022-05-29") +end_date <- as.Date("2024-05-20") + +weekly_dates <- seq.Date(from = start_date, + to = end_date, + by = "week") + +tsd_data <- tsd( + observed = c(season_1, season_2), + time = as.Date(weekly_dates), + time_interval = "week" +) + +# Print seasonal burden results +intensity_levels <- seasonal_burden_levels(tsd_data) +print(intensity_levels) +} diff --git a/tests/testthat/test-compute_relative_dist_intensity_levels.R b/tests/testthat/test-compute_relative_dist_intensity_levels.R deleted file mode 100644 index cb61398..0000000 --- a/tests/testthat/test-compute_relative_dist_intensity_levels.R +++ /dev/null @@ -1,16 +0,0 @@ -test_that("Test if checkmate checks work", { - - # Create data - obs <- 10 - season <- c("2018/2019", "2019/2020", "2020/2021") - season_num_rev <- rev(seq(from = 1, to = length(season))) - observations <- rep(stats::rnorm(obs, 1000), length(season)) - - peak_input <- tibble::tibble( - observed = observations, - weight = 0.8^rep(season_num_rev, each = obs) - ) - - compute_relative_dist_intensity_levels(weighted_observations = peak_input) - -}) diff --git a/tests/testthat/test-fit_quantiles.R b/tests/testthat/test-fit_quantiles.R index e4c37ab..8fd52d6 100644 --- a/tests/testthat/test-fit_quantiles.R +++ b/tests/testthat/test-fit_quantiles.R @@ -11,7 +11,7 @@ test_that("Test if checkmate checks work", { weight = 0.8^rep(season_num_rev, each = obs) ) - fit_quantiles(weighted_observations = peak_input, conf_levels = c(0.1,0.2,0.4,0.8,0.9)) + fit_quantiles(weighted_observations = peak_input, conf_levels = c(0.1, 0.2, 0.4, 0.8, 0.9)) # Exp fit expect_no_error(fit_quantiles(weighted_observations = peak_input, diff --git a/tests/testthat/test-intensity_levels.R b/tests/testthat/test-intensity_levels.R deleted file mode 100644 index 4a7f9f8..0000000 --- a/tests/testthat/test-intensity_levels.R +++ /dev/null @@ -1,87 +0,0 @@ -test_that("Test that input argument checks work", { - - start_date <- as.Date("2021-01-04") - end_date <- as.Date("2023-12-31") - - weekly_dates <- seq.Date(from = start_date, - to = end_date, - by = "week") - - obs <- stats::rpois(length(weekly_dates), 1000) - - tsd_data <- tsd( - observed = obs, - time = as.Date(weekly_dates), - time_interval = "week" - ) - - expect_no_error(intensity_levels(tsd_data)) - - expect_error( - checkmate_err_msg(intensity_levels(tsd_data, decay_factor = 3), - "Variable 'decay_factor': Element 1 is not <= 1." - ) - ) - - expect_error( - checkmate_err_msg(intensity_levels(tsd_data, n_peak = c(1, 2)), - "Variable 'n_peak': Must have length 1, but has length 2." - ) - ) - - expect_error( - checkmate_err_msg(intensity_levels(tsd_data, disease_threshold = "hey"), - "'disease_threshold': Must be of type 'integerish', not 'character'." - ) - ) - - expect_error( - checkmate_err_msg(intensity_levels(tsd_data, season_weeks = c(2, 10)), - "`start` must be greater than `end`!" - ) - ) - - tsd_fail <- tsd_data |> dplyr::rename(week = time) - - expect_error( - checkmate_err_msg(intensity_levels(tsd_fail), - "Variable 'colnames(tsd)': Names must be a identical to set {'time','observed'}, but is {'week','observed'}.", - fixed = TRUE - ) - ) - - #Finally check that dots arguments work - expect_no_error(intensity_levels(tsd_data, conf_levels = c(0.2, 0.5, 0.9))) - - expect_no_error(intensity_levels(tsd_data, family = "exp", optim_method = "Brent", - lower_optim = 1, upper_optim = 1000)) - -}) - -test_that("Test that we get correct season output for newest season", { - - start_date <- as.Date("2021-01-04") - end_date <- as.Date("2023-12-31") - - weekly_dates <- seq.Date(from = start_date, - to = end_date, - by = "week") - - obs <- rpois(length(weekly_dates), 1000) - - tsd_data <- tsd( - observed = obs, - time = as.Date(weekly_dates), - time_interval = "week" - ) - - seasonal_tsd <- tsd_data |> - dplyr::mutate(season = epi_calendar(.data$time, start = 21, end = 20)) - - newest_season <- max(seasonal_tsd$season) - - intensity_levels <- intensity_levels(tsd_data) - - expect_equal(newest_season, intensity_levels$season) - -}) diff --git a/tests/testthat/test-seasonal_burden_levels.R b/tests/testthat/test-seasonal_burden_levels.R new file mode 100644 index 0000000..72c0ff2 --- /dev/null +++ b/tests/testthat/test-seasonal_burden_levels.R @@ -0,0 +1,142 @@ +test_that("Test that input argument checks work", { + + start_date <- as.Date("2021-01-04") + end_date <- as.Date("2023-12-31") + + weekly_dates <- seq.Date(from = start_date, + to = end_date, + by = "week") + + obs <- stats::rpois(length(weekly_dates), 1000) + + tsd_data <- tsd( + observed = obs, + time = as.Date(weekly_dates), + time_interval = "week" + ) + + expect_error( + checkmate_err_msg(seasonal_burden_levels(tsd_data, decay_factor = 3), + "Variable 'decay_factor': Element 1 is not <= 1." + ) + ) + + expect_error( + checkmate_err_msg(seasonal_burden_levels(tsd_data, n_peak = c(1, 2)), + "Variable 'n_peak': Must have length 1, but has length 2." + ) + ) + + expect_error( + checkmate_err_msg(seasonal_burden_levels(tsd_data, disease_threshold = "hey"), + "'disease_threshold': Must be of type 'integerish', not 'character'." + ) + ) + + expect_error( + checkmate_err_msg(seasonal_burden_levels(tsd_data, season_weeks = c(2, 10)), + "`start` must be greater than `end`!" + ) + ) + + expect_error( + checkmate_err_msg(seasonal_burden_levels(tsd_data, method = "peak_levels"), + "Variable 'conf_levels': Must have length 3, but has length 1." + ) + ) + + expect_error( + checkmate_err_msg(seasonal_burden_levels(tsd_data, conf_levels = c(0.2, 0.5)), + "Variable 'conf_levels': Must have length 1, but has length 2." + ) + ) + + tsd_fail <- tsd_data |> dplyr::rename(week = time) + + expect_error( + checkmate_err_msg(seasonal_burden_levels(tsd_fail), + "Variable 'colnames(tsd)': Names must be a identical to set {'time','observed'}, but is {'week','observed'}.", + fixed = TRUE + ) + ) + + #Finally check that dots arguments work + model_output <- seasonal_burden_levels(tsd_data, family = "exp", optim_method = "Brent", + lower_optim = 1, upper_optim = 1000) + expect_equal(model_output$family, "exp") +}) + +test_that("Test that we get correct season output for newest season", { + + start_date <- as.Date("2021-01-04") + end_date <- as.Date("2023-12-31") + + weekly_dates <- seq.Date(from = start_date, + to = end_date, + by = "week") + + obs <- stats::rpois(length(weekly_dates), 1000) + + tsd_data <- tsd( + observed = obs, + time = as.Date(weekly_dates), + time_interval = "week" + ) + + seasonal_tsd <- tsd_data |> + dplyr::mutate(season = epi_calendar(.data$time, start = 21, end = 20)) + + newest_season <- max(seasonal_tsd$season) + + intensity_levels <- seasonal_burden_levels(tsd_data, method = "intensity_levels") + + expect_equal(newest_season, intensity_levels$season) +}) + +test_that("Test that we have same numbers of outputs for both methods", { + + start_date <- as.Date("2021-01-04") + end_date <- as.Date("2023-12-31") + + weekly_dates <- seq.Date(from = start_date, + to = end_date, + by = "week") + + obs <- stats::rpois(length(weekly_dates), 1000) + + tsd_data <- tsd( + observed = obs, + time = as.Date(weekly_dates), + time_interval = "week" + ) + + intensity_levels <- seasonal_burden_levels(tsd_data, method = "intensity_levels") + + peak_levels <- seasonal_burden_levels(tsd_data, method = "peak_levels", conf_levels = c(0.4, 0.9, 0.99)) + + expect_equal(length(intensity_levels), length(peak_levels)) +}) + +test_that("Test that function fail with less than two seasons", { + + start_date <- as.Date("2021-06-04") + end_date <- as.Date("2021-12-31") + + weekly_dates <- seq.Date(from = start_date, + to = end_date, + by = "week") + + obs <- stats::rpois(length(weekly_dates), 1000) + + tsd_one_season <- tsd( + observed = obs, + time = as.Date(weekly_dates), + time_interval = "week" + ) + + expect_error( + checkmate_err_msg(seasonal_burden_levels(tsd_one_season), + "There must be at least two unique seasons in the data." + ) + ) +})