Skip to content

Commit

Permalink
Merge pull request #37 from ssi-dk/add_compute_relative_dist_intensit…
Browse files Browse the repository at this point in the history
…y_levels

Change function structure (intensity_levels and dependencies)
  • Loading branch information
SofiaOtero committed Nov 4, 2024
2 parents 1857b03 + d23c2c2 commit 73a25c7
Show file tree
Hide file tree
Showing 34 changed files with 609 additions and 395 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Imports:
lifecycle,
magrittr,
purrr,
pracma,
rlang,
stats,
stringr,
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ S3method(summary,aedseo)
export("%>%")
export(aedseo)
export(autoplot)
export(compute_weighted_intensity_levels)
export(epi_calendar)
export(fit_growth_rate)
export(fit_quantiles)
export(seasonal_burden_levels)
export(tsd)
importFrom(ggplot2,autoplot)
importFrom(grDevices,devAskNewPage)
Expand Down
4 changes: 2 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

## Features

* Added the `intensity_levels()` function, which currently calculates peak intensity levels with the `compute_weighted_intensity_levels()` function for the newest season in input tsd data. Soon the `compute_relative_dist_intensity_levels()` function will be an option to calculate intensity levels in the `intensity_levels()` function (#36).
* Added the `seasonal_burden_levels()` function, which calculates burden levels based on data from previous seasons with two different methods; "peak_levels" or "intensity_levels" (#37).

* Added the `compute_weighted_intensity_levels()` function, which optimises a user selected distribution and calculates the intensity levels based on observations and weights. It is meant to be used within the soon coming `intensity_level()` function (#35).
* Added the `fit_quantiles()` function, which optimises a user selected distribution and calculates the quantiles based on observations and weights. It is meant to be used within the `seasonal_burden_levels()` function (#35, #37).

## Improvements

Expand Down
15 changes: 7 additions & 8 deletions R/aedseo.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
#' Automated and Early Detection of Seasonal Epidemic Onset
#'
#' @description
#' `r lifecycle::badge("stable")`
#'
#' This function performs automated and early detection of seasonal epidemic
#' onsets (aedseo) on a time series dataset. It estimates growth rates for
#' consecutive time intervals and calculates the sum of cases (sum_of_cases).
#'
#' @param tsd A `aedseo_tsd` object containing time series data with 'time' and
#' 'observed.'
#' 'observation.'
#' @param k An integer specifying the window size for modeling growth rates.
#' @param level The confidence level for parameter estimates, a numeric value
#' between 0 and 1.
Expand All @@ -28,7 +27,7 @@
#'
#' @return A `aedseo` object containing:
#' - 'reference_time': The time point for which the growth rate is estimated.
#' - 'observed': The observed value in the reference time point.
#' - 'observation': The observation in the reference time point.
#' - 'season': The stratification of observables in corresponding seasons.
#' - 'growth_rate': The estimated growth rate.
#' - 'lower_growth_rate': The lower bound of the growth rate's confidence
Expand All @@ -49,7 +48,7 @@
#' @examples
#' # Create a tibble object from sample data
#' tsd_data <- tsd(
#' observed = c(100, 120, 150, 180, 220, 270),
#' observation = c(100, 120, 150, 180, 220, 270),
#' time = as.Date(c(
#' "2023-01-01",
#' "2023-01-02",
Expand Down Expand Up @@ -90,7 +89,7 @@ aedseo <- function(
coll <- checkmate::makeAssertCollection()
checkmate::assert_data_frame(tsd, add = coll)
checkmate::assert_class(tsd, "aedseo_tsd", add = coll)
checkmate::assert_names(colnames(tsd), identical.to = c("time", "observed"), add = coll)
checkmate::assert_names(colnames(tsd), identical.to = c("time", "observation"), add = coll)
checkmate::assert_numeric(level, lower = 0, upper = 1, add = coll)
checkmate::assert_numeric(na_fraction_allowed, lower = 0, upper = 1,
add = coll)
Expand Down Expand Up @@ -130,7 +129,7 @@ aedseo <- function(
} else {
# Calculate growth rates
growth_rates <- fit_growth_rate(
observations = obs_iter$observed,
observations = obs_iter$observation,
level = level,
family = family
)
Expand All @@ -140,7 +139,7 @@ aedseo <- function(
growth_warning <- growth_rates$estimate[2] > 0

# Calculate Sum of Cases (sum_of_cases)
sum_of_cases <- base::sum(obs_iter$observed, na.rm = TRUE)
sum_of_cases <- base::sum(obs_iter$observation, na.rm = TRUE)

# Evaluate if sum_of_cases exceeds disease_threshold
sum_of_cases_warning <- sum_of_cases > (disease_threshold * k)
Expand All @@ -153,7 +152,7 @@ aedseo <- function(
res,
tibble::tibble(
reference_time = tsd$time[i],
observed = tsd$observed[i],
observation = tsd$observation[i],
season = tsd$season[i],
growth_rate = growth_rates$estimate[1],
lower_growth_rate = growth_rates$estimate[2],
Expand Down
7 changes: 3 additions & 4 deletions R/autoplot.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#' Create a complete 'ggplot' appropriate to a particular data type
#'
#' @description
#' `r lifecycle::badge("stable")`
#'
#' This function generates a complete 'ggplot' object suitable for
#' visualizing time series data in an `aedseo_tsd` object. It creates a line
Expand All @@ -23,7 +22,7 @@
#' @examples
#' # Create an example aedseo_tsd object
#' aedseo_tsd_object <- tsd(
#' observed = c(100, 120, 150, 180, 220, 270),
#' observation = c(100, 120, 150, 180, 220, 270),
#' time = as.Date(c(
#' "2023-01-01",
#' "2023-01-02",
Expand Down Expand Up @@ -62,7 +61,7 @@ autoplot.aedseo_tsd <- function(object, ...) {
ggplot2::ggplot(
mapping = ggplot2::aes(
x = .data$time,
y = .data$observed
y = .data$observation
)
) +
ggplot2::geom_point() +
Expand All @@ -87,7 +86,7 @@ autoplot.aedseo <- function(
ggplot2::ggplot(
mapping = ggplot2::aes(
x = .data$reference_time,
y = .data$observed
y = .data$observation
)
) +
ggplot2::geom_point(
Expand Down
1 change: 0 additions & 1 deletion R/epi_calendar.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#' Determine Epidemiological Season
#'
#' @description
#' `r lifecycle::badge("stable")`
#'
#' This function identifies the epidemiological season, (must span new year)
#' to which a given date belongs.
Expand Down
1 change: 0 additions & 1 deletion R/fit_growth_rate.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#' Fit a growth rate model to time series observations.
#'
#' @description
#' `r lifecycle::badge("stable")`
#'
#' This function fits a growth rate model to time series observations and
#' provides parameter estimates along with confidence intervals.
Expand Down
82 changes: 37 additions & 45 deletions R/compute_weighted_intensity_levels.R → R/fit_quantiles.R
Original file line number Diff line number Diff line change
@@ -1,39 +1,36 @@
#' Compute intensity levels with weighted time series observations.
#' Fits weighted observations to distribution and returns quantiles
#'
#' @description
#' `r lifecycle::badge("stable")`
#'
#' This function calculates the intensity levels of weighted time series observations. The output contain low, medium,
#' high intensity levels, that predict the intensity levels of the next series of observations.
#' This function calculates the quantiles of weighted time series observations. The output contains the quantiles
#' from the fitted distribution.
#'
#' @param weighted_observations A tibble containing two columns of length n; `observed`, which contains the data
#' @param weighted_observations A tibble containing two columns of length n; `observation`, which contains the data
#' points, and `weight`, which is the importance assigned to the observation. Higher weights indicate that an
#' observation has more influence on the model outcome, while lower weights reduce its impact.
#' @param conf_levels The confidence levels for parameter estimates, a numeric vector of length 3. The three values
#' have to be unique and in ascending order, such as the low level is first and high level is last.
#' @param conf_levels A numeric vector specifying the confidence levels for parameter estimates. The values have
#' to be unique and in ascending order, that is the lowest level is first and highest level is last.
#' @param family A character string specifying the family for modeling. Choose between "weibull", "lnorm" or "exp".
#' @param optim_method A character string specifying the method to be used in the optimisation. Lookup `?optim::stats`
#' for details about methods.
#' If using the exp family it is recommended to use Brent as it is a one-dimensional optimisation.
#' @param lower_optim A numeric value for the optimisation.
#' @param upper_optim A numeric value for the optimisation.
#'
#' @return A tibble containing:
#' - 'low_level': The lowest intensity level
#' - 'medium_level': The medium intensity level
#' - 'high_level': The highest intensity level
#' - 'optim_fit_par_1': The first fit parameter for the chosen family.
#' - For 'weibull': Shape parameter (k).
#' - For 'lnorm': Mean of the log-transformed observations.
#' - For 'exp': Rate parameter (λ).
#' - 'optim_fit_par_2': The second fit parameter for the chosen family.
#' - For 'weibull': Scale parameter (λ).
#' - For 'lnorm': Standard deviation of the log-transformed observations.
#' - For 'exp': Not applicable (set to NA).
#' - 'obj_value': The value of the objective function
#' (negative log-likelihood), which represent the minimized objective
#' function value from the optimisation. Smaller value equals better
#' optimisation.
#' @return A list containing:
#' - 'conf_levels': The conf_levels chosen to fit the quantiles.
#' - 'quantiles': The quantile results from the fit.
#' - 'par': The fit parameters for the chosen family.
#' - par_1:
#' - For 'weibull': Shape parameter (k).
#' - For 'lnorm': Mean of the log-transformed observations.
#' - For 'exp': Rate parameter (λ).
#' - 'par_2':
#' - For 'weibull': Scale parameter (λ).
#' - For 'lnorm': Standard deviation of the log-transformed observations.
#' - For 'exp': Not applicable (set to NA).
#' - 'obj_value': The value of the objective function - (negative log-likelihood), which represent the minimized
#' objective function value from the optimisation. Smaller value equals better optimisation.
#' - 'converged': Logical. TRUE if the optimisation converged.
#' - 'family': The distribution family used for the optimization.
#' - 'weibull': Uses the Weibull distribution for fitting.
Expand All @@ -51,16 +48,15 @@
#'
#' # Add into a tibble with decreasing weight for older seasons
#' data_input <- tibble::tibble(
#' observed = observations,
#' observation = observations,
#' weight = 0.8^rep(season_num_rev, each = obs)
#' )
#'
#' # Use the model
#' compute_weighted_intensity_levels(weighted_observations = data_input,
#' conf_levels = c(0.50, 0.90, 0.95),
#' family = "weibull")

compute_weighted_intensity_levels <- function(
#' fit_quantiles(weighted_observations = data_input,
#' conf_levels = c(0.50, 0.90, 0.95),
#' family = "weibull")
fit_quantiles <- function(
weighted_observations,
conf_levels = c(0.50, 0.90, 0.95),
family = c("weibull",
Expand All @@ -78,10 +74,10 @@ compute_weighted_intensity_levels <- function(
# Check input arguments
coll <- checkmate::makeAssertCollection()
checkmate::assert_tibble(weighted_observations, add = coll)
checkmate::assert_numeric(conf_levels, lower = 0, upper = 1, len = 3,
checkmate::assert_numeric(conf_levels, lower = 0, upper = 1,
unique = TRUE, sorted = TRUE, add = coll)
checkmate::assert_names(colnames(weighted_observations),
identical.to = c("observed", "weight"), add = coll)
identical.to = c("observation", "weight"), add = coll)
checkmate::assert_numeric(lower_optim, add = coll)
checkmate::assert_numeric(upper_optim, add = coll)
checkmate::reportAssertions(coll)
Expand All @@ -102,16 +98,16 @@ compute_weighted_intensity_levels <- function(
# The weighted negative loglikelihood function
nll <- function(par, weighted_observations, family = family) {
log_probability <- switch(family,
weibull = stats::dweibull(weighted_observations$observed, shape = exp(par[1]), scale = exp(par[2]),
weibull = stats::dweibull(weighted_observations$observation, shape = exp(par[1]), scale = exp(par[2]),
log = TRUE),
lnorm = stats::dlnorm(weighted_observations$observed, meanlog = par[1], sdlog = par[2], log = TRUE),
exp = stats::dexp(weighted_observations$observed, rate = exp(par[1]), log = TRUE)
lnorm = stats::dlnorm(weighted_observations$observation, meanlog = par[1], sdlog = par[2], log = TRUE),
exp = stats::dexp(weighted_observations$observation, rate = exp(par[1]), log = TRUE)
)
return(-sum(log_probability * weighted_observations$weight))
}

# Run optimisation for weighted observations
optim_obj <- stats::optim(par = init_par_fun(family = family, observations = weighted_observations$observed),
optim_obj <- stats::optim(par = init_par_fun(family = family, observations = weighted_observations$observation),
fn = nll,
weighted_observations = weighted_observations,
family = family,
Expand All @@ -135,17 +131,13 @@ compute_weighted_intensity_levels <- function(
exp = stats::qexp(p = conf_levels, rate = par_fit[1])
)

# Create a tibble with the fit parameters
optim_results <- tibble::tibble(
low_level = quantiles[1],
medium_level = quantiles[2],
high_level = quantiles[3],
optim_fit_par_1 = par_fit[1],
optim_fit_par_2 = ifelse(length(par_fit) == 2, par_fit[2], NA),
# Create return fit parameters
return(list(
conf_levels = conf_levels,
values = as.numeric(quantiles),
par = par_fit[1:2], # Returns NA in second position if optim_method = "exp"
obj_value = optim_obj$value,
converged = optim_obj$convergence == 0,
family = family
)

return(optim_results)
))
}
75 changes: 0 additions & 75 deletions R/intensity_levels.R

This file was deleted.

3 changes: 1 addition & 2 deletions R/plot.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#' Create a complete 'ggplot' appropriate to a particular data type
#'
#' @description
#' `r lifecycle::badge("stable")`
#'
#' This function generates a complete 'ggplot' object suitable for
#' visualizing time series data in an `aedseo_tsd` object. It creates a line
Expand All @@ -19,7 +18,7 @@
#' @examples
#' # Create an example aedseo_tsd object
#' aedseo_tsd_object <- tsd(
#' observed = c(100, 120, 150, 180, 220, 270),
#' observation = c(100, 120, 150, 180, 220, 270),
#' time = as.Date(c(
#' "2023-01-01",
#' "2023-01-02",
Expand Down
Loading

0 comments on commit 73a25c7

Please sign in to comment.