From 7fc61fdb2196b232ed38508e4989efa02012d616 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Mon, 16 Sep 2024 11:17:35 +0100 Subject: [PATCH 01/39] Add message about NA treatment for option "missing" --- R/opts.R | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/R/opts.R b/R/opts.R index 66d8c56d2..ab5831683 100644 --- a/R/opts.R +++ b/R/opts.R @@ -644,9 +644,23 @@ obs_opts <- function(family = c("negbin", "poisson"), .frequency = "regularly", .frequency_id = "obs_opts" ) - #nolint end + } else { + cli_inform( + c( + "i" = "{col_red(\"Explicit NA values will be treated as missing as + compared to being treated as zero observations in previous + versions.\")}", + "i" = "If you prefer for NA's to be treated as zero observations, see + solutions in {.url https://github.com/epiforecasts/EpiNow2/issues/767#issuecomment-2348805272}", #nolint + "i" = "If your data is reported at less regular intervals (for example + weekly), see the \"accumulate\" option.", + "i" = "This behaviour was introduced in version 1.5.0." + ), + .frequency = "regularly", + .frequency_id = "obs_opts" + ) } - + #nolint end if (length(phi) == 2 && is.numeric(phi)) { cli_abort( c( From 8639fda097fc4e37f9e6e39f44b0eecebc011eeb Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Mon, 16 Sep 2024 11:18:02 +0100 Subject: [PATCH 02/39] Add missing tests for obs_opts --- tests/testthat/test-obs_opts.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/testthat/test-obs_opts.R diff --git a/tests/testthat/test-obs_opts.R b/tests/testthat/test-obs_opts.R new file mode 100644 index 000000000..57d6ea9fb --- /dev/null +++ b/tests/testthat/test-obs_opts.R @@ -0,0 +1,24 @@ +test_that("obs_opts returns expected default values", { + result <- suppressWarnings(obs_opts()) + + expect_s3_class(result, "obs_opts") + expect_equal(result$family, "negbin") + expect_equal(result$weight, 1) + expect_true(result$week_effect) + expect_equal(result$week_length, 7L) + expect_equal(result$scale, list(mean = 1, sd = 0)) + expect_equal(result$accumulate, 0) + expect_true(result$likelihood) + expect_false(result$return_likelihood) +}) + +test_that("obs_opts returns expected messages", { + expect_message( + obs_opts(), + "NA values will be treated as missing" + ) + expect_message( + obs_opts(na = "accumulate"), + "modelled values that correspond to NA values" + ) +}) From 68e8e1ed78514ea38272922aa74605a21fcbec4e Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Mon, 16 Sep 2024 11:18:24 +0100 Subject: [PATCH 03/39] Add NEWS item --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 9260f8137..d200d3b17 100644 --- a/NEWS.md +++ b/NEWS.md @@ -27,6 +27,7 @@ A release that introduces model improvements to the Gaussian Process models, alo - Switch to broadcasting the day of the week effect. By @seabbs in #746 and reviewed by @jamesmbaazam. - A warning is now thrown if nonparametric PMFs passed to delay options have consecutive tail values that are below a certain low threshold as these lead to loss in speed with little gain in accuracy. By @jamesmbaazam in #752 and reviewed by @seabbs, with a subsequent bug fix in #802. - `dist_fit()` can now accept any number of `samples` without throwing a warning when `samples` < 1000 in #751 by @jamesmbaazam and reviewed by @seabbs and @sbfnk. +- `obs_opts()` now informs users about how NA observations are treated to help them decide on existing alternatives. By @jamesmbaazam in # and reviewed by . ## Package changes From bcfa71474f31a8b5e5402151c01f58554112e1c4 Mon Sep 17 00:00:00 2001 From: James Azam Date: Mon, 16 Sep 2024 14:31:41 +0100 Subject: [PATCH 04/39] Improve messages Co-authored-by: Sebastian Funk --- R/opts.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/opts.R b/R/opts.R index ab5831683..8d4695032 100644 --- a/R/opts.R +++ b/R/opts.R @@ -647,14 +647,14 @@ obs_opts <- function(family = c("negbin", "poisson"), } else { cli_inform( c( - "i" = "{col_red(\"Explicit NA values will be treated as missing as - compared to being treated as zero observations in previous - versions.\")}", - "i" = "If you prefer for NA's to be treated as zero observations, see + "i" = "{col_red(\"As of version 1.5.0 missing dates or dates with `NA` + cases are treated as missing. This is in contrast to previous versions + where these were interpreted as dates with zero cases. \")}", + "i" = "In order to treat missing or `NA` cases as zeroes, see solutions in {.url https://github.com/epiforecasts/EpiNow2/issues/767#issuecomment-2348805272}", #nolint - "i" = "If your data is reported at less regular intervals (for example - weekly), see the \"accumulate\" option.", - "i" = "This behaviour was introduced in version 1.5.0." + "i" = "If the data is reported at less regular intervals (for example + weekly), consider using `obs_opts(na=\"accumulate\")`.", + "i" = "For more information on these options, see `?obs_opts`." ), .frequency = "regularly", .frequency_id = "obs_opts" From 49d22ee6fe00878f89c40e88c81caa0a8ba4bd61 Mon Sep 17 00:00:00 2001 From: James Azam Date: Mon, 16 Sep 2024 14:32:16 +0100 Subject: [PATCH 05/39] Check if na is explicitly specified by user Co-authored-by: Sebastian Funk --- R/opts.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/opts.R b/R/opts.R index 8d4695032..40fadd1aa 100644 --- a/R/opts.R +++ b/R/opts.R @@ -644,7 +644,7 @@ obs_opts <- function(family = c("negbin", "poisson"), .frequency = "regularly", .frequency_id = "obs_opts" ) - } else { + } else if (missing(na)) { cli_inform( c( "i" = "{col_red(\"As of version 1.5.0 missing dates or dates with `NA` From 4ae8b97b693a868e6956e6ed5645afb017ccd3af Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 12:54:27 +0100 Subject: [PATCH 06/39] Change less regular to non-daily --- R/opts.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/opts.R b/R/opts.R index 40fadd1aa..6c85f6159 100644 --- a/R/opts.R +++ b/R/opts.R @@ -652,7 +652,7 @@ obs_opts <- function(family = c("negbin", "poisson"), where these were interpreted as dates with zero cases. \")}", "i" = "In order to treat missing or `NA` cases as zeroes, see solutions in {.url https://github.com/epiforecasts/EpiNow2/issues/767#issuecomment-2348805272}", #nolint - "i" = "If the data is reported at less regular intervals (for example + "i" = "If the data is reported at non-daily intervals (for example weekly), consider using `obs_opts(na=\"accumulate\")`.", "i" = "For more information on these options, see `?obs_opts`." ), From d814317aa17ae896c0d106a58962b5db37818c75 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 15:07:11 +0100 Subject: [PATCH 07/39] Create function to test for complete data --- R/checks.R | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/R/checks.R b/R/checks.R index 7d82a09a9..79cdf4980 100644 --- a/R/checks.R +++ b/R/checks.R @@ -179,3 +179,35 @@ check_sparse_pmf_tail <- function(pmf, span = 5, tol = 1e-6) { ) } } + +#' Check if data has either explicit NA values or implicit missing dates. +#' +#' @data The data to be checked +#' +#' @return FALSE, if data is incomplete +#' @keywords internal +test_data_complete <- function(data) { + data <- setDT(data) # Convert data to data.table + + # Check for explicit missingness in required columns + # (date, confirm, primary, secondary) + columns_to_check <- c( + "date", + intersect(c("confirm", "primary", "secondary"), names(data)) + ) + if (any(sapply(data[, ..columns_to_check], anyNA))) { + return(FALSE) + } + + # Check for implicit missingness by comparing the expected full date sequence + complete_dates <- seq( + min(data$date, na.rm = TRUE), + max(data$date, na.rm = TRUE), + by = "1 day" + ) + if (length(complete_dates) > length(unique(data$date))) { + return(FALSE) + } + + return(TRUE) # Return TRUE if no missing values or gaps in date sequence +} From 4902e4157efa65aba8ff382b253d87c511c2416b Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 15:07:46 +0100 Subject: [PATCH 08/39] Add tests for test_data_complete --- tests/testthat/test-checks.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 9a3fc6525..b0d7e4f52 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -146,3 +146,27 @@ test_that("check_sparse_pmf_tail throws a warning as expected", { pmf <- c(0.4, 0.30, 0.20, 0.05, 0.049995, 4.5e-06, rep(1e-7, 5)) expect_warning(check_sparse_pmf_tail(pmf), "PMF tail has") }) + +test_that("test_data_complete detects complete and incomplete data", { + # example_confirmed with explicit missing dates + ec_missing_date <- copy(example_confirmed)[c(1, 3), date := NA] + # example_confirmed with explicit missing confirm + ec_missing_confirm <- copy(example_confirmed)[c(1, 3), confirm := NA] + # example_confirmed with implicit missing (missing entries) + ec_implicit_missing <- copy(example_confirmed)[-c(1,3,5), ] + # Create a hypothetical complete example_secondary + es <- example_confirmed[, primary := confirm][, secondary := primary * 0.4] + # example_secondary with explicit missing primary + es_missing_primary <- copy(es)[c(1, 3), primary := NA] + # example_secondary with explicit missing secondary + es_missing_secondary <- copy(es)[c(1, 3), secondary := NA] + + # Expectations + expect_true(test_data_complete(example_confirmed)) + expect_true(test_data_complete(es)) + expect_false(test_data_complete(ec_missing_date)) + expect_false(test_data_complete(ec_missing_confirm)) + expect_false(test_data_complete(es_missing_primary)) + expect_false(test_data_complete(es_missing_secondary)) + expect_false(test_data_complete(ec_implicit_missing)) +}) From 6b6d695bcc902ac66c12203a1a7d573ced592120 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 17:43:56 +0100 Subject: [PATCH 09/39] Document test_data_complete --- R/checks.R | 5 +++-- man/test_data_complete.Rd | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 man/test_data_complete.Rd diff --git a/R/checks.R b/R/checks.R index 79cdf4980..0151d973e 100644 --- a/R/checks.R +++ b/R/checks.R @@ -182,9 +182,10 @@ check_sparse_pmf_tail <- function(pmf, span = 5, tol = 1e-6) { #' Check if data has either explicit NA values or implicit missing dates. #' -#' @data The data to be checked +#' @param data The data to be checked #' -#' @return FALSE, if data is incomplete +#' @return `TRUE` if data is complete, else if data has implicit or explicit +#' missingness, `FALSE`. #' @keywords internal test_data_complete <- function(data) { data <- setDT(data) # Convert data to data.table diff --git a/man/test_data_complete.Rd b/man/test_data_complete.Rd new file mode 100644 index 000000000..70a2e84d2 --- /dev/null +++ b/man/test_data_complete.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checks.R +\name{test_data_complete} +\alias{test_data_complete} +\title{Check if data has either explicit NA values or implicit missing dates.} +\usage{ +test_data_complete(data) +} +\arguments{ +\item{data}{The data to be checked} +} +\value{ +\code{TRUE} if data is complete, else if data has implicit or explicit +missingness, \code{FALSE}. +} +\description{ +Check if data has either explicit NA values or implicit missing dates. +} +\keyword{internal} From 82f1751aa54b1782b45a3cda9f998fecd67bc7f0 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 17:44:29 +0100 Subject: [PATCH 10/39] Add function to crosscheck na as missing setting with data --- R/checks.R | 40 ++++++++++++++++++++++++++++ man/check_na_setting_against_data.Rd | 29 ++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 man/check_na_setting_against_data.Rd diff --git a/R/checks.R b/R/checks.R index 0151d973e..4a63d3e6c 100644 --- a/R/checks.R +++ b/R/checks.R @@ -212,3 +212,43 @@ test_data_complete <- function(data) { return(TRUE) # Return TRUE if no missing values or gaps in date sequence } + +#' Check the na settings in obs_opts() with the data and throw messages +#' where necessary +#' +#' @details +#' This function checks the data to see if it is complete or not and then +#' checks if the user specified na to be treated as missing. If the latter is +#' false, it does nothing. If TRUE, it informs the user about how the implicit +#' or explicit missingness is treated. This function is necessary because +#' the data argument and observation model do not interact internally. +#' +#' @param obs A call to [obs_opts()] +#' @param data The raw data +#' +#' @return Called for its side effects +#' @keywords internal +check_na_setting_against_data <- function(data, obs) { + # If users are using the default treatment of NA's and their data has + # implicit or explicit NA's, inform them of what's happening and alternatives + if (!obs$accumulate && + obs$na_as_missing_default_used && + !test_data_complete(data)) { + #nolint start: duplicate_argument_linter + cli_inform( + c( + "i" = "{col_red(\"As of version 1.5.0 missing dates or dates with `NA` + cases are treated as missing. This is in contrast to previous versions + where these were interpreted as dates with zero cases. \")}", + "i" = "In order to treat missing or `NA` cases as zeroes, see + solutions in {.url https://github.com/epiforecasts/EpiNow2/issues/767#issuecomment-2348805272}", #nolint + "i" = "If the data is reported at non-daily intervals (for example + weekly), consider using `obs_opts(na=\"accumulate\")`.", + "i" = "For more information on these options, see `?obs_opts`." + ), + .frequency = "regularly", + .frequency_id = "check_na_setting_against_data" + ) + #nolint end + } +} diff --git a/man/check_na_setting_against_data.Rd b/man/check_na_setting_against_data.Rd new file mode 100644 index 000000000..4774caf80 --- /dev/null +++ b/man/check_na_setting_against_data.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checks.R +\name{check_na_setting_against_data} +\alias{check_na_setting_against_data} +\title{Check the na settings in obs_opts() with the data and throw messages +where necessary} +\usage{ +check_na_setting_against_data(data, obs) +} +\arguments{ +\item{data}{The raw data} + +\item{obs}{A call to \code{\link[=obs_opts]{obs_opts()}}} +} +\value{ +Called for its side effects +} +\description{ +Check the na settings in obs_opts() with the data and throw messages +where necessary +} +\details{ +This function checks the data to see if it is complete or not and then +checks if the user specified na to be treated as missing. If the latter is +false, it does nothing. If TRUE, it informs the user about how the implicit +or explicit missingness is treated. This function is necessary because +the data argument and observation model do not interact internally. +} +\keyword{internal} From 552cf0050905f40aa8339b4b13b5d7c59bbb033d Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 17:45:53 +0100 Subject: [PATCH 11/39] Move message elsewhere and only check if na is specified for later use --- R/opts.R | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/R/opts.R b/R/opts.R index 6c85f6159..eff96d5d6 100644 --- a/R/opts.R +++ b/R/opts.R @@ -628,6 +628,10 @@ obs_opts <- function(family = c("negbin", "poisson"), na = c("missing", "accumulate"), likelihood = TRUE, return_likelihood = FALSE) { + # Check if the user explicitly specified the "na" argument. + # NB: This has to be checked first before the na argument is used anywhere. + # See ?missing for more details. + na_default_used <- missing(na) na <- arg_match(na) if (na == "accumulate") { #nolint start: duplicate_argument_linter @@ -644,21 +648,6 @@ obs_opts <- function(family = c("negbin", "poisson"), .frequency = "regularly", .frequency_id = "obs_opts" ) - } else if (missing(na)) { - cli_inform( - c( - "i" = "{col_red(\"As of version 1.5.0 missing dates or dates with `NA` - cases are treated as missing. This is in contrast to previous versions - where these were interpreted as dates with zero cases. \")}", - "i" = "In order to treat missing or `NA` cases as zeroes, see - solutions in {.url https://github.com/epiforecasts/EpiNow2/issues/767#issuecomment-2348805272}", #nolint - "i" = "If the data is reported at non-daily intervals (for example - weekly), consider using `obs_opts(na=\"accumulate\")`.", - "i" = "For more information on these options, see `?obs_opts`." - ), - .frequency = "regularly", - .frequency_id = "obs_opts" - ) } #nolint end if (length(phi) == 2 && is.numeric(phi)) { @@ -678,7 +667,8 @@ obs_opts <- function(family = c("negbin", "poisson"), scale = scale, accumulate = as.integer(na == "accumulate"), likelihood = likelihood, - return_likelihood = return_likelihood + return_likelihood = return_likelihood, + na_as_missing_default_used = na_default_used ) for (param in c("phi", "scale")) { From b5271b73d260a76f04f69ec42d3335ef9fd7e2dc Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 17:46:08 +0100 Subject: [PATCH 12/39] Add new global --- R/utilities.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index 91867b062..014845800 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -441,6 +441,7 @@ globalVariables( "central_lower", "central_upper", "mean_sd", "sd_sd", "average_7_day", "..lowers", "..upper_CrI", "..uppers", "timing", "dataset", "last_confirm", "report_date", "secondary", "id", "conv", "meanlog", "primary", "scaled", - "scaling", "sdlog", "lookup", "new_draw", ".draw", "p", "distribution" + "scaling", "sdlog", "lookup", "new_draw", ".draw", "p", "distribution", + "columns_to_check" ) ) From 9037633b451e40b41378b4e0cd40db655953e1fb Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 17:46:52 +0100 Subject: [PATCH 13/39] Apply na cross checking function in main functions --- R/estimate_infections.R | 11 +++++++++++ R/estimate_secondary.R | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/R/estimate_infections.R b/R/estimate_infections.R index fde83e449..6a850b594 100644 --- a/R/estimate_infections.R +++ b/R/estimate_infections.R @@ -167,6 +167,15 @@ estimate_infections <- function(data, name = "EpiNow2.epinow.estimate_infections" ) } + + # If the user is using the default treatment of NA's as missing and + # their data has implicit or explicit NA's, inform them of what's + # happening and alternatives. + check_na_setting_against_data(obs = obs, data = dirty_reported_cases) + # Remove "na_as_missing_default_used" after using it above + obs$na_as_missing_default_used <- NULL + + # Create clean and complete cases # Order cases reported_cases <- create_clean_reported_cases( data, horizon, @@ -200,6 +209,8 @@ estimate_infections <- function(data, ) reported_cases <- reported_cases[-(1:backcalc$prior_window)] + + # Define stan model parameters stan_data <- create_stan_data( reported_cases, diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index 077825381..2621ef90a 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -188,6 +188,14 @@ estimate_secondary <- function(data, filter_leading_zeros = filter_leading_zeros, zero_threshold = zero_threshold ) + + # If the user is using the default treatment of NA's as missing and + # their data has implicit or explicit NA's, inform them of what's + # happening and alternatives. + check_na_setting_against_data(obs = obs, data = secondary_reports_dirty) + # Remove "na_as_missing_default_used" after using it above + obs$na_as_missing_default_used <- NULL + ## fill in missing data (required if fitting to prevalence) complete_secondary <- create_complete_cases(secondary_reports) From a26ef4ff91b7d288089e06071e7a347cea5791dd Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 17:47:34 +0100 Subject: [PATCH 14/39] Rename dirty data --- R/estimate_secondary.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index 2621ef90a..ddc112fb5 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -182,9 +182,9 @@ estimate_secondary <- function(data, assert_logical(verbose) reports <- data.table::as.data.table(data) - secondary_reports <- reports[, list(date, confirm = secondary)] + secondary_reports_dirty <- reports[, list(date, confirm = secondary)] secondary_reports <- create_clean_reported_cases( - secondary_reports, + secondary_reports_dirty, filter_leading_zeros = filter_leading_zeros, zero_threshold = zero_threshold ) @@ -198,7 +198,6 @@ estimate_secondary <- function(data, ## fill in missing data (required if fitting to prevalence) complete_secondary <- create_complete_cases(secondary_reports) - ## fill down secondary_reports[, confirm := nafill(confirm, type = "locf")] ## fill any early data up From 3b60cf377381e66178c2f180eec4b562d19579cd Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 17:48:00 +0100 Subject: [PATCH 15/39] Add test for na cross checking function --- tests/testthat/test-checks.R | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index b0d7e4f52..e989b7ea9 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -170,3 +170,30 @@ test_that("test_data_complete detects complete and incomplete data", { expect_false(test_data_complete(es_missing_secondary)) expect_false(test_data_complete(ec_implicit_missing)) }) + +test_that("check_na_setting_against_data works as expected", { + # If data is incomplete and the default na = "missing" is being used, + # expect a message + expect_message( + check_na_setting_against_data( + obs = obs_opts(), + data = copy(example_confirmed)[c(1, 3), confirm := NA] + ), + "cases are treated as missing" + ) + # If data is incomplete but the user set na = "missing", then expect no + # message + expect_no_message( + check_na_setting_against_data( + obs = obs_opts(na = "missing"), + data = copy(example_confirmed)[c(1, 3), confirm := NA] + ) + ) + # If data is complete, expect no message + expect_no_message( + check_na_setting_against_data( + obs = obs_opts(), + data = example_confirmed + ) + ) +}) From 9f356ac5c47363ad111e64d247b2b70c36c1833e Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 17:48:54 +0100 Subject: [PATCH 16/39] Add more tests to obs_opts --- tests/testthat/test-obs_opts.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/testthat/test-obs_opts.R b/tests/testthat/test-obs_opts.R index 57d6ea9fb..7517509d6 100644 --- a/tests/testthat/test-obs_opts.R +++ b/tests/testthat/test-obs_opts.R @@ -22,3 +22,8 @@ test_that("obs_opts returns expected messages", { "modelled values that correspond to NA values" ) }) + +test_that("obs_opts behaves as expected for user specified na treatment", { +# If user explicitly specifies NA as missing, then don't throw message + expect_true(obs_opts(na = "missing")$na_as_missing_default_used) +}) \ No newline at end of file From 225dac6ef8b71567a77f1672a7ed56b6a280218d Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 18:09:47 +0100 Subject: [PATCH 17/39] Remove global variable --- R/utilities.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 014845800..91867b062 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -441,7 +441,6 @@ globalVariables( "central_lower", "central_upper", "mean_sd", "sd_sd", "average_7_day", "..lowers", "..upper_CrI", "..uppers", "timing", "dataset", "last_confirm", "report_date", "secondary", "id", "conv", "meanlog", "primary", "scaled", - "scaling", "sdlog", "lookup", "new_draw", ".draw", "p", "distribution", - "columns_to_check" + "scaling", "sdlog", "lookup", "new_draw", ".draw", "p", "distribution" ) ) From 6f9f4e9ecc8fef70aef632c75ecec73b4799969c Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 18:10:12 +0100 Subject: [PATCH 18/39] Use alternative syntax to avoid linting issues --- R/checks.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/checks.R b/R/checks.R index 4a63d3e6c..168f71745 100644 --- a/R/checks.R +++ b/R/checks.R @@ -196,7 +196,7 @@ test_data_complete <- function(data) { "date", intersect(c("confirm", "primary", "secondary"), names(data)) ) - if (any(sapply(data[, ..columns_to_check], anyNA))) { + if (any(sapply(data[, columns_to_check, with = FALSE], anyNA))) { return(FALSE) } @@ -225,6 +225,7 @@ test_data_complete <- function(data) { #' #' @param obs A call to [obs_opts()] #' @param data The raw data +#' @importFrom cli cli_inform col_red #' #' @return Called for its side effects #' @keywords internal From eecf8a1f2875186936a096bfbcf2ce2b2c85a59f Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 17 Sep 2024 21:03:37 +0100 Subject: [PATCH 19/39] copy before modifying data.table --- tests/testthat/test-checks.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index e989b7ea9..2301c3788 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -155,7 +155,11 @@ test_that("test_data_complete detects complete and incomplete data", { # example_confirmed with implicit missing (missing entries) ec_implicit_missing <- copy(example_confirmed)[-c(1,3,5), ] # Create a hypothetical complete example_secondary - es <- example_confirmed[, primary := confirm][, secondary := primary * 0.4] + es <- copy(example_confirmed)[ + , primary := confirm + ][ + , secondary := primary * 0.4 + ] # example_secondary with explicit missing primary es_missing_primary <- copy(es)[c(1, 3), primary := NA] # example_secondary with explicit missing secondary From 165fbcecd084a147a9236e2fd9393999336b27cb Mon Sep 17 00:00:00 2001 From: James Azam Date: Tue, 17 Sep 2024 20:18:46 +0100 Subject: [PATCH 20/39] Fix test --- tests/testthat/test-obs_opts.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-obs_opts.R b/tests/testthat/test-obs_opts.R index 7517509d6..268c98d8c 100644 --- a/tests/testthat/test-obs_opts.R +++ b/tests/testthat/test-obs_opts.R @@ -25,5 +25,5 @@ test_that("obs_opts returns expected messages", { test_that("obs_opts behaves as expected for user specified na treatment", { # If user explicitly specifies NA as missing, then don't throw message - expect_true(obs_opts(na = "missing")$na_as_missing_default_used) + expect_false(obs_opts(na = "missing")$na_as_missing_default_used) }) \ No newline at end of file From 954890069f132bab0d1789a2ce9ad252bb2fd53d Mon Sep 17 00:00:00 2001 From: James Azam Date: Tue, 17 Sep 2024 21:05:38 +0100 Subject: [PATCH 21/39] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index d200d3b17..189f3d2fc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -28,6 +28,7 @@ A release that introduces model improvements to the Gaussian Process models, alo - A warning is now thrown if nonparametric PMFs passed to delay options have consecutive tail values that are below a certain low threshold as these lead to loss in speed with little gain in accuracy. By @jamesmbaazam in #752 and reviewed by @seabbs, with a subsequent bug fix in #802. - `dist_fit()` can now accept any number of `samples` without throwing a warning when `samples` < 1000 in #751 by @jamesmbaazam and reviewed by @seabbs and @sbfnk. - `obs_opts()` now informs users about how NA observations are treated to help them decide on existing alternatives. By @jamesmbaazam in # and reviewed by . +- Users are now informed that `NA` observations will be treated as missing instead of zero when using the default `obs_opts()`. Options to treat `NA` as zeros or accumulate them are also provided. By @jamesmbaazam in #774 and reviewed by @sbfnk. ## Package changes From 77f74af37a490520d2b4fad3ef66fa137c0e39fc Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Wed, 18 Sep 2024 10:31:52 +0100 Subject: [PATCH 22/39] Make message/warning verbose and test for message/warning --- tests/testthat/test-checks.R | 25 +++++++++++++++++++------ tests/testthat/test-obs_opts.R | 6 ++++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 2301c3788..6c5d00eae 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -143,8 +143,16 @@ test_that("check_reports_valid errors for bad 'secondary' specifications", { }) test_that("check_sparse_pmf_tail throws a warning as expected", { + # NB: The warning is set to be thrown once every 8 hours, so hard to test + # regularly. The fix is to change the local setting here to throw the + # warning on demand for the sake of multiple runs of the test within + # 8 hours. That's what the rlang call below does + rlang::local_options(rlib_warning_verbosity = "verbose") pmf <- c(0.4, 0.30, 0.20, 0.05, 0.049995, 4.5e-06, rep(1e-7, 5)) - expect_warning(check_sparse_pmf_tail(pmf), "PMF tail has") + expect_warning( + check_sparse_pmf_tail(pmf), + "PMF tail has" + ) }) test_that("test_data_complete detects complete and incomplete data", { @@ -177,23 +185,28 @@ test_that("test_data_complete detects complete and incomplete data", { test_that("check_na_setting_against_data works as expected", { # If data is incomplete and the default na = "missing" is being used, - # expect a message + # expect a message thrown once every 8 hours. + # NB: We change the local setting here to throw the message on demand, rather + # than every 8 hours, for the sake of multiple runs of the test within + # 8 hours. + rlang::local_options(rlib_message_verbosity = "verbose") expect_message( check_na_setting_against_data( obs = obs_opts(), data = copy(example_confirmed)[c(1, 3), confirm := NA] ), - "cases are treated as missing" + "version 1.5.0 missing dates or dates" ) - # If data is incomplete but the user set na = "missing", then expect no - # message + # If data is incomplete but the user explicitly set na = "missing", then + # expect no message expect_no_message( check_na_setting_against_data( obs = obs_opts(na = "missing"), data = copy(example_confirmed)[c(1, 3), confirm := NA] ) ) - # If data is complete, expect no message + # If data is complete, expect no message even when using default na as + # missing setting expect_no_message( check_na_setting_against_data( obs = obs_opts(), diff --git a/tests/testthat/test-obs_opts.R b/tests/testthat/test-obs_opts.R index 268c98d8c..5b853e80e 100644 --- a/tests/testthat/test-obs_opts.R +++ b/tests/testthat/test-obs_opts.R @@ -13,6 +13,12 @@ test_that("obs_opts returns expected default values", { }) test_that("obs_opts returns expected messages", { + # The option na = "accumulate" informs the user of what is + # going to be done once every 8 hours, so hard to test regularly. + # NB: We change the local setting here to throw the message on demand, rather + # than every 8 hours, for the sake of multiple runs of the test within + # 8 hours. + rlang::local_options(rlib_message_verbosity = "verbose") expect_message( obs_opts(), "NA values will be treated as missing" From 84823dcfbef57a38f447ad8341cd97d6e9757844 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Fri, 20 Sep 2024 17:23:02 +0100 Subject: [PATCH 23/39] Simplify comment --- R/opts.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/opts.R b/R/opts.R index eff96d5d6..bd08b2b44 100644 --- a/R/opts.R +++ b/R/opts.R @@ -628,9 +628,7 @@ obs_opts <- function(family = c("negbin", "poisson"), na = c("missing", "accumulate"), likelihood = TRUE, return_likelihood = FALSE) { - # Check if the user explicitly specified the "na" argument. - # NB: This has to be checked first before the na argument is used anywhere. - # See ?missing for more details. + # NB: This has to be checked first before the na argument is touched anywhere. na_default_used <- missing(na) na <- arg_match(na) if (na == "accumulate") { From 954443439fd146f6862d2b9d3a2257408df599fb Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Fri, 20 Sep 2024 17:27:02 +0100 Subject: [PATCH 24/39] Use vapply for type safety --- R/checks.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/checks.R b/R/checks.R index 168f71745..36270c990 100644 --- a/R/checks.R +++ b/R/checks.R @@ -196,7 +196,7 @@ test_data_complete <- function(data) { "date", intersect(c("confirm", "primary", "secondary"), names(data)) ) - if (any(sapply(data[, columns_to_check, with = FALSE], anyNA))) { + if (any(vapply(data[, columns_to_check, with = FALSE], anyNA, logical(1)))) { return(FALSE) } @@ -252,4 +252,5 @@ check_na_setting_against_data <- function(data, obs) { ) #nolint end } + obs$na_as_missing_default_used <- NULL } From 5b1ce6ee805fba1b0976981b19b3195e408e329f Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Fri, 20 Sep 2024 18:38:55 +0100 Subject: [PATCH 25/39] Add lifecycle badge --- man/check_na_setting_against_data.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/check_na_setting_against_data.Rd b/man/check_na_setting_against_data.Rd index 4774caf80..f1e67e119 100644 --- a/man/check_na_setting_against_data.Rd +++ b/man/check_na_setting_against_data.Rd @@ -16,8 +16,8 @@ check_na_setting_against_data(data, obs) Called for its side effects } \description{ -Check the na settings in obs_opts() with the data and throw messages -where necessary +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + } \details{ This function checks the data to see if it is complete or not and then From 27069a576917cc4d9d5689dfc6f7bf7b78b9d94d Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Fri, 20 Sep 2024 18:39:44 +0100 Subject: [PATCH 26/39] Add lifecycle badge --- R/checks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/checks.R b/R/checks.R index 36270c990..80ba2821b 100644 --- a/R/checks.R +++ b/R/checks.R @@ -215,8 +215,8 @@ test_data_complete <- function(data) { #' Check the na settings in obs_opts() with the data and throw messages #' where necessary +#' @description `r lifecycle::badge("experimental")` #' -#' @details #' This function checks the data to see if it is complete or not and then #' checks if the user specified na to be treated as missing. If the latter is #' false, it does nothing. If TRUE, it informs the user about how the implicit From ceab729c0d734e9828100d87884a44224a567275 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Fri, 20 Sep 2024 18:40:31 +0100 Subject: [PATCH 27/39] Improve docs --- R/checks.R | 19 +++++++++++-------- man/check_na_setting_against_data.Rd | 20 ++++++++++---------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/R/checks.R b/R/checks.R index 80ba2821b..d577bb5b0 100644 --- a/R/checks.R +++ b/R/checks.R @@ -213,21 +213,24 @@ test_data_complete <- function(data) { return(TRUE) # Return TRUE if no missing values or gaps in date sequence } -#' Check the na settings in obs_opts() with the data and throw messages -#' where necessary +#' Cross-check treatment of `NA` in obs_opts() against input data +#' #' @description `r lifecycle::badge("experimental")` #' -#' This function checks the data to see if it is complete or not and then -#' checks if the user specified na to be treated as missing. If the latter is -#' false, it does nothing. If TRUE, it informs the user about how the implicit -#' or explicit missingness is treated. This function is necessary because -#' the data argument and observation model do not interact internally. +#' This function checks the input data for implicit and/or explicit missingness +#' and checks if the user specified `na = "missing"` in [obs_opts()]. +#' If the two are TRUE, it throws a message about how the model treats +#' missingness and provides alternatives. It returns an unmodified [obs_opts()]. +#' +#' This function is necessary because the data and observation model +#' do not currently interact internally. It will be deprecated in future +#' versions when the data specification interface is enhanced. #' #' @param obs A call to [obs_opts()] #' @param data The raw data #' @importFrom cli cli_inform col_red #' -#' @return Called for its side effects +#' @return [obs_opts()] #' @keywords internal check_na_setting_against_data <- function(data, obs) { # If users are using the default treatment of NA's and their data has diff --git a/man/check_na_setting_against_data.Rd b/man/check_na_setting_against_data.Rd index f1e67e119..6bbab7cce 100644 --- a/man/check_na_setting_against_data.Rd +++ b/man/check_na_setting_against_data.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/checks.R \name{check_na_setting_against_data} \alias{check_na_setting_against_data} -\title{Check the na settings in obs_opts() with the data and throw messages -where necessary} +\title{Cross-check treatment of \code{NA} in obs_opts() against input data} \usage{ check_na_setting_against_data(data, obs) } @@ -13,17 +12,18 @@ check_na_setting_against_data(data, obs) \item{obs}{A call to \code{\link[=obs_opts]{obs_opts()}}} } \value{ -Called for its side effects +\code{\link[=obs_opts]{obs_opts()}} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} -} -\details{ -This function checks the data to see if it is complete or not and then -checks if the user specified na to be treated as missing. If the latter is -false, it does nothing. If TRUE, it informs the user about how the implicit -or explicit missingness is treated. This function is necessary because -the data argument and observation model do not interact internally. +This function checks the input data for implicit and/or explicit missingness +and checks if the user specified \code{na = "missing"} in \code{\link[=obs_opts]{obs_opts()}}. +If the two are TRUE, it throws a message about how the model treats +missingness and provides alternatives. It returns an unmodified \code{\link[=obs_opts]{obs_opts()}}. + +This function is necessary because the data and observation model +do not currently interact internally. It will be deprecated in future +versions when the data specification interface is enhanced. } \keyword{internal} From 94dccd71ebac96c780f470f505c96d84f776452c Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Fri, 20 Sep 2024 18:40:49 +0100 Subject: [PATCH 28/39] Return unmodified obs_opts --- R/checks.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/checks.R b/R/checks.R index d577bb5b0..c683df8e7 100644 --- a/R/checks.R +++ b/R/checks.R @@ -256,4 +256,5 @@ check_na_setting_against_data <- function(data, obs) { #nolint end } obs$na_as_missing_default_used <- NULL + return(obs) } From aa85bf6b3e781db051c2f5c262fddac0ad652c48 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Fri, 20 Sep 2024 18:41:42 +0100 Subject: [PATCH 29/39] Move removal of element of obs_opts() into checker function --- R/estimate_infections.R | 10 +++++----- R/estimate_secondary.R | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/estimate_infections.R b/R/estimate_infections.R index 6a850b594..a76f5275a 100644 --- a/R/estimate_infections.R +++ b/R/estimate_infections.R @@ -170,11 +170,11 @@ estimate_infections <- function(data, # If the user is using the default treatment of NA's as missing and # their data has implicit or explicit NA's, inform them of what's - # happening and alternatives. - check_na_setting_against_data(obs = obs, data = dirty_reported_cases) - # Remove "na_as_missing_default_used" after using it above - obs$na_as_missing_default_used <- NULL - + # happening and provide alternatives. + obs <- check_na_setting_against_data( + obs = obs, + data = dirty_reported_cases + ) # Create clean and complete cases # Order cases reported_cases <- create_clean_reported_cases( diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index ddc112fb5..13a01c09f 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -191,11 +191,11 @@ estimate_secondary <- function(data, # If the user is using the default treatment of NA's as missing and # their data has implicit or explicit NA's, inform them of what's - # happening and alternatives. - check_na_setting_against_data(obs = obs, data = secondary_reports_dirty) - # Remove "na_as_missing_default_used" after using it above - obs$na_as_missing_default_used <- NULL - + # happening and provide alternatives. + obs <- check_na_setting_against_data( + obs = obs, + data = secondary_reports_dirty + ) ## fill in missing data (required if fitting to prevalence) complete_secondary <- create_complete_cases(secondary_reports) ## fill down From f515a946fbcbab9bade6b716c64db2c6ba03d388 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Fri, 20 Sep 2024 18:45:16 +0100 Subject: [PATCH 30/39] Use setequal instead of checking lengths --- R/checks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/checks.R b/R/checks.R index c683df8e7..f6744a694 100644 --- a/R/checks.R +++ b/R/checks.R @@ -206,7 +206,7 @@ test_data_complete <- function(data) { max(data$date, na.rm = TRUE), by = "1 day" ) - if (length(complete_dates) > length(unique(data$date))) { + if (!setequal(complete_dates, unique(data$date))) { return(FALSE) } From 4a068fc6e4982ef68ad3b5ef14b939851ee183f2 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Fri, 20 Sep 2024 18:48:06 +0100 Subject: [PATCH 31/39] Remove new unnecessary new lines --- R/estimate_infections.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/estimate_infections.R b/R/estimate_infections.R index a76f5275a..67e59e318 100644 --- a/R/estimate_infections.R +++ b/R/estimate_infections.R @@ -209,8 +209,6 @@ estimate_infections <- function(data, ) reported_cases <- reported_cases[-(1:backcalc$prior_window)] - - # Define stan model parameters stan_data <- create_stan_data( reported_cases, From 6d9f2cfce68ad7fa9e6a336cfb7a204cbba75d7b Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 24 Sep 2024 09:54:12 +0000 Subject: [PATCH 32/39] Remove trailing white space --- R/checks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/checks.R b/R/checks.R index f6744a694..9e32df1e2 100644 --- a/R/checks.R +++ b/R/checks.R @@ -221,7 +221,7 @@ test_data_complete <- function(data) { #' and checks if the user specified `na = "missing"` in [obs_opts()]. #' If the two are TRUE, it throws a message about how the model treats #' missingness and provides alternatives. It returns an unmodified [obs_opts()]. -#' +#' #' This function is necessary because the data and observation model #' do not currently interact internally. It will be deprecated in future #' versions when the data specification interface is enhanced. From 4a09c465d88c8f1a8beac4953cc43031493942e0 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 24 Sep 2024 09:54:47 +0000 Subject: [PATCH 33/39] Add test to check for expected element introduced --- tests/testthat/test-checks.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 6c5d00eae..9637fd7fd 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -213,4 +213,18 @@ test_that("check_na_setting_against_data works as expected", { data = example_confirmed ) ) + expect_identical( + setdiff( + names( + obs_opts() + ), + names( + check_na_setting_against_data( + obs = obs_opts(), + data = example_confirmed + ) + ) + ), + "na_as_missing_default_used" + ) }) From 71fd1ebe3363be64486301a79c211c73d926a002 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 8 Oct 2024 16:17:14 +0100 Subject: [PATCH 34/39] Add new argument to pass column names --- R/checks.R | 6 +++--- R/estimate_infections.R | 3 ++- R/estimate_secondary.R | 3 ++- man/check_na_setting_against_data.Rd | 4 +++- man/test_data_complete.Rd | 4 +++- tests/testthat/test-checks.R | 29 +++++++++++++++++----------- 6 files changed, 31 insertions(+), 18 deletions(-) diff --git a/R/checks.R b/R/checks.R index 9e32df1e2..99e9a0539 100644 --- a/R/checks.R +++ b/R/checks.R @@ -183,11 +183,11 @@ check_sparse_pmf_tail <- function(pmf, span = 5, tol = 1e-6) { #' Check if data has either explicit NA values or implicit missing dates. #' #' @param data The data to be checked -#' +#' @param cols_to_check A character vector of the columns to check #' @return `TRUE` if data is complete, else if data has implicit or explicit #' missingness, `FALSE`. #' @keywords internal -test_data_complete <- function(data) { +test_data_complete <- function(data, cols_to_check) { data <- setDT(data) # Convert data to data.table # Check for explicit missingness in required columns @@ -232,7 +232,7 @@ test_data_complete <- function(data) { #' #' @return [obs_opts()] #' @keywords internal -check_na_setting_against_data <- function(data, obs) { +check_na_setting_against_data <- function(data, cols_to_check, obs) { # If users are using the default treatment of NA's and their data has # implicit or explicit NA's, inform them of what's happening and alternatives if (!obs$accumulate && diff --git a/R/estimate_infections.R b/R/estimate_infections.R index 67e59e318..7af5b2f1a 100644 --- a/R/estimate_infections.R +++ b/R/estimate_infections.R @@ -173,7 +173,8 @@ estimate_infections <- function(data, # happening and provide alternatives. obs <- check_na_setting_against_data( obs = obs, - data = dirty_reported_cases + data = dirty_reported_cases, + cols_to_check = c("date", "confirm") ) # Create clean and complete cases # Order cases diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index 13a01c09f..26c6aeeb5 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -194,7 +194,8 @@ estimate_secondary <- function(data, # happening and provide alternatives. obs <- check_na_setting_against_data( obs = obs, - data = secondary_reports_dirty + data = secondary_reports_dirty, + cols_to_check = c("date", "primary", "secondary") ) ## fill in missing data (required if fitting to prevalence) complete_secondary <- create_complete_cases(secondary_reports) diff --git a/man/check_na_setting_against_data.Rd b/man/check_na_setting_against_data.Rd index 6bbab7cce..54631c09c 100644 --- a/man/check_na_setting_against_data.Rd +++ b/man/check_na_setting_against_data.Rd @@ -4,11 +4,13 @@ \alias{check_na_setting_against_data} \title{Cross-check treatment of \code{NA} in obs_opts() against input data} \usage{ -check_na_setting_against_data(data, obs) +check_na_setting_against_data(data, cols_to_check, obs) } \arguments{ \item{data}{The raw data} +\item{cols_to_check}{A character vector of the columns to check} + \item{obs}{A call to \code{\link[=obs_opts]{obs_opts()}}} } \value{ diff --git a/man/test_data_complete.Rd b/man/test_data_complete.Rd index 70a2e84d2..ceef12f30 100644 --- a/man/test_data_complete.Rd +++ b/man/test_data_complete.Rd @@ -4,10 +4,12 @@ \alias{test_data_complete} \title{Check if data has either explicit NA values or implicit missing dates.} \usage{ -test_data_complete(data) +test_data_complete(data, cols_to_check) } \arguments{ \item{data}{The data to be checked} + +\item{cols_to_check}{A character vector of the columns to check} } \value{ \code{TRUE} if data is complete, else if data has implicit or explicit diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 9637fd7fd..14d207cdb 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -173,14 +173,17 @@ test_that("test_data_complete detects complete and incomplete data", { # example_secondary with explicit missing secondary es_missing_secondary <- copy(es)[c(1, 3), secondary := NA] + # cols to check + ep_cols <- c("date", "confirm") + es_cols <- c("date", "primary", "secondary") # Expectations - expect_true(test_data_complete(example_confirmed)) - expect_true(test_data_complete(es)) - expect_false(test_data_complete(ec_missing_date)) - expect_false(test_data_complete(ec_missing_confirm)) - expect_false(test_data_complete(es_missing_primary)) - expect_false(test_data_complete(es_missing_secondary)) - expect_false(test_data_complete(ec_implicit_missing)) + expect_true(test_data_complete(example_confirmed, ep_cols)) + expect_true(test_data_complete(es, es_cols)) + expect_false(test_data_complete(ec_missing_date, ep_cols)) + expect_false(test_data_complete(ec_missing_confirm, ep_cols)) + expect_false(test_data_complete(es_missing_primary, es_cols)) + expect_false(test_data_complete(es_missing_secondary, es_cols)) + expect_false(test_data_complete(ec_implicit_missing, ep_cols)) }) test_that("check_na_setting_against_data works as expected", { @@ -193,7 +196,8 @@ test_that("check_na_setting_against_data works as expected", { expect_message( check_na_setting_against_data( obs = obs_opts(), - data = copy(example_confirmed)[c(1, 3), confirm := NA] + data = copy(example_confirmed)[c(1, 3), confirm := NA], + cols_to_check = c("date", "confirm") ), "version 1.5.0 missing dates or dates" ) @@ -202,7 +206,8 @@ test_that("check_na_setting_against_data works as expected", { expect_no_message( check_na_setting_against_data( obs = obs_opts(na = "missing"), - data = copy(example_confirmed)[c(1, 3), confirm := NA] + data = copy(example_confirmed)[c(1, 3), confirm := NA], + cols_to_check = c("date", "confirm") ) ) # If data is complete, expect no message even when using default na as @@ -210,7 +215,8 @@ test_that("check_na_setting_against_data works as expected", { expect_no_message( check_na_setting_against_data( obs = obs_opts(), - data = example_confirmed + data = example_confirmed, + cols_to_check = c("date", "confirm") ) ) expect_identical( @@ -221,7 +227,8 @@ test_that("check_na_setting_against_data works as expected", { names( check_na_setting_against_data( obs = obs_opts(), - data = example_confirmed + data = example_confirmed, + cols_to_check = c("date", "confirm") ) ) ), From 7a0d2c497d8b2ce5545e22db86221f4ed233bd44 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 8 Oct 2024 16:18:19 +0100 Subject: [PATCH 35/39] Add error for column mismatch --- R/checks.R | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/R/checks.R b/R/checks.R index 99e9a0539..c72914b9a 100644 --- a/R/checks.R +++ b/R/checks.R @@ -186,17 +186,21 @@ check_sparse_pmf_tail <- function(pmf, span = 5, tol = 1e-6) { #' @param cols_to_check A character vector of the columns to check #' @return `TRUE` if data is complete, else if data has implicit or explicit #' missingness, `FALSE`. +#' @importFrom cli cli_abort col_blue #' @keywords internal test_data_complete <- function(data, cols_to_check) { data <- setDT(data) # Convert data to data.table - # Check for explicit missingness in required columns - # (date, confirm, primary, secondary) - columns_to_check <- c( - "date", - intersect(c("confirm", "primary", "secondary"), names(data)) - ) - if (any(vapply(data[, columns_to_check, with = FALSE], anyNA, logical(1)))) { + if (!all(cols_to_check %in% names(data))) { + cli_abort( + c( + "x" = "{.var cols_to_check} must be present in the data.", + "i" = "{.var data} has columns: {col_blue(names(data))} but you + specified {.var cols_to_expect}: {col_blue(cols_to_check)}." + ) + ) + } + # Check for explicit missingness in the specified columns return(FALSE) } From c40529378d0e8e7922aa75a424230667b2c00b19 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 8 Oct 2024 16:18:51 +0100 Subject: [PATCH 36/39] Refactor for readability --- R/checks.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/R/checks.R b/R/checks.R index c72914b9a..20150cb51 100644 --- a/R/checks.R +++ b/R/checks.R @@ -201,6 +201,10 @@ test_data_complete <- function(data, cols_to_check) { ) } # Check for explicit missingness in the specified columns + data_has_explicit_na <- any( + vapply(data[, cols_to_check, with = FALSE], anyNA, logical(1)) + ) + if (data_has_explicit_na) { return(FALSE) } @@ -210,7 +214,8 @@ test_data_complete <- function(data, cols_to_check) { max(data$date, na.rm = TRUE), by = "1 day" ) - if (!setequal(complete_dates, unique(data$date))) { + data_has_implicit_na <- !all(complete_dates %in% data$date) + if (data_has_implicit_na) { return(FALSE) } @@ -232,6 +237,7 @@ test_data_complete <- function(data, cols_to_check) { #' #' @param obs A call to [obs_opts()] #' @param data The raw data +#' @inheritParams test_data_complete #' @importFrom cli cli_inform col_red #' #' @return [obs_opts()] @@ -239,9 +245,10 @@ test_data_complete <- function(data, cols_to_check) { check_na_setting_against_data <- function(data, cols_to_check, obs) { # If users are using the default treatment of NA's and their data has # implicit or explicit NA's, inform them of what's happening and alternatives + data_is_complete <- test_data_complete(data, cols_to_check) if (!obs$accumulate && obs$na_as_missing_default_used && - !test_data_complete(data)) { + !data_is_complete) { #nolint start: duplicate_argument_linter cli_inform( c( From 28c373fff91c824799c33b5aecbc495ca37b7a94 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 8 Oct 2024 17:21:52 +0100 Subject: [PATCH 37/39] Perform missingness check on the raw data --- R/estimate_secondary.R | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index 26c6aeeb5..7de6b32d5 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -182,21 +182,20 @@ estimate_secondary <- function(data, assert_logical(verbose) reports <- data.table::as.data.table(data) - secondary_reports_dirty <- reports[, list(date, confirm = secondary)] - secondary_reports <- create_clean_reported_cases( - secondary_reports_dirty, - filter_leading_zeros = filter_leading_zeros, - zero_threshold = zero_threshold - ) - # If the user is using the default treatment of NA's as missing and # their data has implicit or explicit NA's, inform them of what's # happening and provide alternatives. obs <- check_na_setting_against_data( obs = obs, - data = secondary_reports_dirty, + data = reports, cols_to_check = c("date", "primary", "secondary") ) + secondary_reports_dirty <- reports[, list(date, confirm = secondary)] + secondary_reports <- create_clean_reported_cases( + secondary_reports_dirty, + filter_leading_zeros = filter_leading_zeros, + zero_threshold = zero_threshold + ) ## fill in missing data (required if fitting to prevalence) complete_secondary <- create_complete_cases(secondary_reports) ## fill down From b1b1efbfd9c69f4a7b03d7782a86e798513d3822 Mon Sep 17 00:00:00 2001 From: jamesaazam Date: Tue, 8 Oct 2024 17:24:56 +0100 Subject: [PATCH 38/39] Remove test that no longer applies --- tests/testthat/test-obs_opts.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/testthat/test-obs_opts.R b/tests/testthat/test-obs_opts.R index 5b853e80e..a9cb17db0 100644 --- a/tests/testthat/test-obs_opts.R +++ b/tests/testthat/test-obs_opts.R @@ -19,10 +19,6 @@ test_that("obs_opts returns expected messages", { # than every 8 hours, for the sake of multiple runs of the test within # 8 hours. rlang::local_options(rlib_message_verbosity = "verbose") - expect_message( - obs_opts(), - "NA values will be treated as missing" - ) expect_message( obs_opts(na = "accumulate"), "modelled values that correspond to NA values" From 9c578e508890913fddf4786f624f0d6d1321f1e8 Mon Sep 17 00:00:00 2001 From: James Azam Date: Fri, 11 Oct 2024 10:57:46 +0100 Subject: [PATCH 39/39] Add reviewer and PR number Co-authored-by: Sebastian Funk --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 189f3d2fc..2353c946d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -27,7 +27,7 @@ A release that introduces model improvements to the Gaussian Process models, alo - Switch to broadcasting the day of the week effect. By @seabbs in #746 and reviewed by @jamesmbaazam. - A warning is now thrown if nonparametric PMFs passed to delay options have consecutive tail values that are below a certain low threshold as these lead to loss in speed with little gain in accuracy. By @jamesmbaazam in #752 and reviewed by @seabbs, with a subsequent bug fix in #802. - `dist_fit()` can now accept any number of `samples` without throwing a warning when `samples` < 1000 in #751 by @jamesmbaazam and reviewed by @seabbs and @sbfnk. -- `obs_opts()` now informs users about how NA observations are treated to help them decide on existing alternatives. By @jamesmbaazam in # and reviewed by . +- `obs_opts()` now informs users about how NA observations are treated to help them decide on existing alternatives. By @jamesmbaazam in #774 and reviewed by @sbfnk. - Users are now informed that `NA` observations will be treated as missing instead of zero when using the default `obs_opts()`. Options to treat `NA` as zeros or accumulate them are also provided. By @jamesmbaazam in #774 and reviewed by @sbfnk. ## Package changes