From 7fc61fdb2196b232ed38508e4989efa02012d616 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Mon, 16 Sep 2024 11:17:35 +0100
Subject: [PATCH 01/39] Add message about NA treatment for option "missing"

---
 R/opts.R | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/R/opts.R b/R/opts.R
index 66d8c56d2..ab5831683 100644
--- a/R/opts.R
+++ b/R/opts.R
@@ -644,9 +644,23 @@ obs_opts <- function(family = c("negbin", "poisson"),
       .frequency = "regularly",
       .frequency_id = "obs_opts"
     )
-  #nolint end
+  } else {
+    cli_inform(
+      c(
+        "i" = "{col_red(\"Explicit NA values will be treated as missing as
+        compared to being treated as zero observations in previous
+        versions.\")}",
+        "i" = "If you prefer for NA's to be treated as zero observations, see
+        solutions in {.url https://github.com/epiforecasts/EpiNow2/issues/767#issuecomment-2348805272}", #nolint
+        "i" = "If your data is reported at less regular intervals (for example
+        weekly), see the \"accumulate\" option.",
+        "i"  = "This behaviour was introduced in version 1.5.0."
+      ),
+      .frequency = "regularly",
+      .frequency_id = "obs_opts"
+    )
   }
-
+  #nolint end
   if (length(phi) == 2 && is.numeric(phi)) {
     cli_abort(
       c(

From 8639fda097fc4e37f9e6e39f44b0eecebc011eeb Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Mon, 16 Sep 2024 11:18:02 +0100
Subject: [PATCH 02/39] Add missing tests for obs_opts

---
 tests/testthat/test-obs_opts.R | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 tests/testthat/test-obs_opts.R

diff --git a/tests/testthat/test-obs_opts.R b/tests/testthat/test-obs_opts.R
new file mode 100644
index 000000000..57d6ea9fb
--- /dev/null
+++ b/tests/testthat/test-obs_opts.R
@@ -0,0 +1,24 @@
+test_that("obs_opts returns expected default values", {
+  result <- suppressWarnings(obs_opts())
+  
+  expect_s3_class(result, "obs_opts")
+  expect_equal(result$family, "negbin")
+  expect_equal(result$weight, 1)
+  expect_true(result$week_effect)
+  expect_equal(result$week_length, 7L)
+  expect_equal(result$scale, list(mean = 1, sd = 0))
+  expect_equal(result$accumulate, 0)
+  expect_true(result$likelihood)
+  expect_false(result$return_likelihood)
+})
+
+test_that("obs_opts returns expected messages", {
+  expect_message(
+    obs_opts(),
+    "NA values will be treated as missing"
+  )
+  expect_message(
+    obs_opts(na = "accumulate"),
+    "modelled values that correspond to NA values"
+  )
+})

From 68e8e1ed78514ea38272922aa74605a21fcbec4e Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Mon, 16 Sep 2024 11:18:24 +0100
Subject: [PATCH 03/39] Add NEWS item

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index 9260f8137..d200d3b17 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -27,6 +27,7 @@ A release that introduces model improvements to the Gaussian Process models, alo
 - Switch to broadcasting the day of the week effect. By @seabbs in #746 and reviewed by @jamesmbaazam.
 - A warning is now thrown if nonparametric PMFs passed to delay options have consecutive tail values that are below a certain low threshold as these lead to loss in speed with little gain in accuracy. By @jamesmbaazam in #752 and reviewed by @seabbs, with a subsequent bug fix in #802.
 - `dist_fit()` can now accept any number of `samples` without throwing a warning when `samples` < 1000 in #751 by @jamesmbaazam and reviewed by @seabbs and @sbfnk.
+- `obs_opts()` now informs users about how NA observations are treated to help them decide on existing alternatives. By @jamesmbaazam in # and reviewed by <REVIEWER>. 
 
 ## Package changes
 

From bcfa71474f31a8b5e5402151c01f58554112e1c4 Mon Sep 17 00:00:00 2001
From: James Azam <james.azam@lshtm.ac.uk>
Date: Mon, 16 Sep 2024 14:31:41 +0100
Subject: [PATCH 04/39] Improve messages

Co-authored-by: Sebastian Funk <sebastian.funk@lshtm.ac.uk>
---
 R/opts.R | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/R/opts.R b/R/opts.R
index ab5831683..8d4695032 100644
--- a/R/opts.R
+++ b/R/opts.R
@@ -647,14 +647,14 @@ obs_opts <- function(family = c("negbin", "poisson"),
   } else {
     cli_inform(
       c(
-        "i" = "{col_red(\"Explicit NA values will be treated as missing as
-        compared to being treated as zero observations in previous
-        versions.\")}",
-        "i" = "If you prefer for NA's to be treated as zero observations, see
+        "i" = "{col_red(\"As of version 1.5.0 missing dates or dates with `NA`
+          cases are treated as missing. This is in contrast to previous versions
+          where these were interpreted as dates with zero cases. \")}",
+        "i" = "In order to treat missing or `NA` cases as zeroes, see
         solutions in {.url https://github.com/epiforecasts/EpiNow2/issues/767#issuecomment-2348805272}", #nolint
-        "i" = "If your data is reported at less regular intervals (for example
-        weekly), see the \"accumulate\" option.",
-        "i"  = "This behaviour was introduced in version 1.5.0."
+        "i" = "If the data is reported at less regular intervals (for example
+        weekly), consider using `obs_opts(na=\"accumulate\")`.",
+        "i"  = "For more information on these options, see `?obs_opts`."
       ),
       .frequency = "regularly",
       .frequency_id = "obs_opts"

From 49d22ee6fe00878f89c40e88c81caa0a8ba4bd61 Mon Sep 17 00:00:00 2001
From: James Azam <james.azam@lshtm.ac.uk>
Date: Mon, 16 Sep 2024 14:32:16 +0100
Subject: [PATCH 05/39] Check if na is explicitly specified by user

Co-authored-by: Sebastian Funk <sebastian.funk@lshtm.ac.uk>
---
 R/opts.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/opts.R b/R/opts.R
index 8d4695032..40fadd1aa 100644
--- a/R/opts.R
+++ b/R/opts.R
@@ -644,7 +644,7 @@ obs_opts <- function(family = c("negbin", "poisson"),
       .frequency = "regularly",
       .frequency_id = "obs_opts"
     )
-  } else {
+  } else if (missing(na)) {
     cli_inform(
       c(
         "i" = "{col_red(\"As of version 1.5.0 missing dates or dates with `NA`

From 4ae8b97b693a868e6956e6ed5645afb017ccd3af Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 12:54:27 +0100
Subject: [PATCH 06/39] Change less regular to non-daily

---
 R/opts.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/opts.R b/R/opts.R
index 40fadd1aa..6c85f6159 100644
--- a/R/opts.R
+++ b/R/opts.R
@@ -652,7 +652,7 @@ obs_opts <- function(family = c("negbin", "poisson"),
           where these were interpreted as dates with zero cases. \")}",
         "i" = "In order to treat missing or `NA` cases as zeroes, see
         solutions in {.url https://github.com/epiforecasts/EpiNow2/issues/767#issuecomment-2348805272}", #nolint
-        "i" = "If the data is reported at less regular intervals (for example
+        "i" = "If the data is reported at non-daily intervals (for example
         weekly), consider using `obs_opts(na=\"accumulate\")`.",
         "i"  = "For more information on these options, see `?obs_opts`."
       ),

From d814317aa17ae896c0d106a58962b5db37818c75 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 15:07:11 +0100
Subject: [PATCH 07/39] Create function to test for complete data

---
 R/checks.R | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/R/checks.R b/R/checks.R
index 7d82a09a9..79cdf4980 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -179,3 +179,35 @@ check_sparse_pmf_tail <- function(pmf, span = 5, tol = 1e-6) {
     )
   }
 }
+
+#' Check if data has either explicit NA values or implicit missing dates.
+#'
+#' @data The data to be checked
+#'
+#' @return FALSE, if data is incomplete
+#' @keywords internal
+test_data_complete <- function(data) {
+  data <- setDT(data) # Convert data to data.table
+
+  # Check for explicit missingness in required columns
+  # (date, confirm, primary, secondary)
+  columns_to_check <- c(
+    "date",
+    intersect(c("confirm", "primary", "secondary"), names(data))
+  )
+  if (any(sapply(data[, ..columns_to_check], anyNA))) {
+    return(FALSE)
+  }
+
+  # Check for implicit missingness by comparing the expected full date sequence
+  complete_dates <- seq(
+    min(data$date, na.rm = TRUE),
+    max(data$date, na.rm = TRUE),
+    by = "1 day"
+  )
+  if (length(complete_dates) > length(unique(data$date))) {
+    return(FALSE)
+  }
+
+  return(TRUE) # Return TRUE if no missing values or gaps in date sequence
+}

From 4902e4157efa65aba8ff382b253d87c511c2416b Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 15:07:46 +0100
Subject: [PATCH 08/39] Add tests for test_data_complete

---
 tests/testthat/test-checks.R | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R
index 9a3fc6525..b0d7e4f52 100644
--- a/tests/testthat/test-checks.R
+++ b/tests/testthat/test-checks.R
@@ -146,3 +146,27 @@ test_that("check_sparse_pmf_tail throws a warning as expected", {
   pmf <- c(0.4, 0.30, 0.20, 0.05,  0.049995, 4.5e-06, rep(1e-7, 5))
   expect_warning(check_sparse_pmf_tail(pmf), "PMF tail has")
 })
+
+test_that("test_data_complete detects complete and incomplete data", {
+  # example_confirmed with explicit missing dates
+  ec_missing_date <- copy(example_confirmed)[c(1, 3), date := NA]
+  # example_confirmed with explicit missing confirm
+  ec_missing_confirm <- copy(example_confirmed)[c(1, 3), confirm := NA]
+  # example_confirmed with implicit missing (missing entries)
+  ec_implicit_missing <- copy(example_confirmed)[-c(1,3,5), ]
+  # Create a hypothetical complete example_secondary
+  es <- example_confirmed[, primary := confirm][, secondary := primary * 0.4]
+  # example_secondary with explicit missing primary
+  es_missing_primary <- copy(es)[c(1, 3), primary := NA]
+  # example_secondary with explicit missing secondary
+  es_missing_secondary <- copy(es)[c(1, 3), secondary := NA]
+  
+  # Expectations
+  expect_true(test_data_complete(example_confirmed))
+  expect_true(test_data_complete(es))
+  expect_false(test_data_complete(ec_missing_date))
+  expect_false(test_data_complete(ec_missing_confirm))
+  expect_false(test_data_complete(es_missing_primary))
+  expect_false(test_data_complete(es_missing_secondary))
+  expect_false(test_data_complete(ec_implicit_missing))
+})

From 6b6d695bcc902ac66c12203a1a7d573ced592120 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 17:43:56 +0100
Subject: [PATCH 09/39] Document test_data_complete

---
 R/checks.R                |  5 +++--
 man/test_data_complete.Rd | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 man/test_data_complete.Rd

diff --git a/R/checks.R b/R/checks.R
index 79cdf4980..0151d973e 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -182,9 +182,10 @@ check_sparse_pmf_tail <- function(pmf, span = 5, tol = 1e-6) {
 
 #' Check if data has either explicit NA values or implicit missing dates.
 #'
-#' @data The data to be checked
+#' @param data The data to be checked
 #'
-#' @return FALSE, if data is incomplete
+#' @return `TRUE` if data is complete, else if data has implicit or explicit
+#' missingness, `FALSE`.
 #' @keywords internal
 test_data_complete <- function(data) {
   data <- setDT(data) # Convert data to data.table
diff --git a/man/test_data_complete.Rd b/man/test_data_complete.Rd
new file mode 100644
index 000000000..70a2e84d2
--- /dev/null
+++ b/man/test_data_complete.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/checks.R
+\name{test_data_complete}
+\alias{test_data_complete}
+\title{Check if data has either explicit NA values or implicit missing dates.}
+\usage{
+test_data_complete(data)
+}
+\arguments{
+\item{data}{The data to be checked}
+}
+\value{
+\code{TRUE} if data is complete, else if data has implicit or explicit
+missingness, \code{FALSE}.
+}
+\description{
+Check if data has either explicit NA values or implicit missing dates.
+}
+\keyword{internal}

From 82f1751aa54b1782b45a3cda9f998fecd67bc7f0 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 17:44:29 +0100
Subject: [PATCH 10/39] Add function to crosscheck na as missing setting with
 data

---
 R/checks.R                           | 40 ++++++++++++++++++++++++++++
 man/check_na_setting_against_data.Rd | 29 ++++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 man/check_na_setting_against_data.Rd

diff --git a/R/checks.R b/R/checks.R
index 0151d973e..4a63d3e6c 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -212,3 +212,43 @@ test_data_complete <- function(data) {
 
   return(TRUE) # Return TRUE if no missing values or gaps in date sequence
 }
+
+#' Check the na settings in obs_opts() with the data and throw messages
+#' where necessary
+#'
+#' @details
+#' This function checks the data to see if it is complete or not and then
+#' checks if the user specified na to be treated as missing. If the latter is
+#' false, it does nothing. If TRUE, it informs the user about how the implicit
+#' or explicit missingness is treated. This function is necessary because
+#' the data argument and observation model do not interact internally.
+#'
+#' @param obs A call to [obs_opts()]
+#' @param data The raw data
+#'
+#' @return Called for its side effects
+#' @keywords internal
+check_na_setting_against_data <- function(data, obs) {
+  # If users are using the default treatment of NA's and their data has
+  # implicit or explicit NA's, inform them of what's happening and alternatives
+  if (!obs$accumulate &&
+      obs$na_as_missing_default_used &&
+      !test_data_complete(data)) {
+    #nolint start: duplicate_argument_linter
+    cli_inform(
+      c(
+        "i" = "{col_red(\"As of version 1.5.0 missing dates or dates with `NA`
+          cases are treated as missing. This is in contrast to previous versions
+          where these were interpreted as dates with zero cases. \")}",
+        "i" = "In order to treat missing or `NA` cases as zeroes, see
+        solutions in {.url https://github.com/epiforecasts/EpiNow2/issues/767#issuecomment-2348805272}", #nolint
+        "i" = "If the data is reported at non-daily intervals (for example
+        weekly), consider using `obs_opts(na=\"accumulate\")`.",
+        "i"  = "For more information on these options, see `?obs_opts`."
+      ),
+      .frequency = "regularly",
+      .frequency_id = "check_na_setting_against_data"
+    )
+    #nolint end
+  }
+}
diff --git a/man/check_na_setting_against_data.Rd b/man/check_na_setting_against_data.Rd
new file mode 100644
index 000000000..4774caf80
--- /dev/null
+++ b/man/check_na_setting_against_data.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/checks.R
+\name{check_na_setting_against_data}
+\alias{check_na_setting_against_data}
+\title{Check the na settings in obs_opts() with the data and throw messages
+where necessary}
+\usage{
+check_na_setting_against_data(data, obs)
+}
+\arguments{
+\item{data}{The raw data}
+
+\item{obs}{A call to \code{\link[=obs_opts]{obs_opts()}}}
+}
+\value{
+Called for its side effects
+}
+\description{
+Check the na settings in obs_opts() with the data and throw messages
+where necessary
+}
+\details{
+This function checks the data to see if it is complete or not and then
+checks if the user specified na to be treated as missing. If the latter is
+false, it does nothing. If TRUE, it informs the user about how the implicit
+or explicit missingness is treated. This function is necessary because
+the data argument and observation model do not interact internally.
+}
+\keyword{internal}

From 552cf0050905f40aa8339b4b13b5d7c59bbb033d Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 17:45:53 +0100
Subject: [PATCH 11/39] Move message elsewhere and only check if na is
 specified for later use

---
 R/opts.R | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/R/opts.R b/R/opts.R
index 6c85f6159..eff96d5d6 100644
--- a/R/opts.R
+++ b/R/opts.R
@@ -628,6 +628,10 @@ obs_opts <- function(family = c("negbin", "poisson"),
                      na = c("missing", "accumulate"),
                      likelihood = TRUE,
                      return_likelihood = FALSE) {
+  # Check if the user explicitly specified the "na" argument.
+  # NB: This has to be checked first before the na argument is used anywhere.
+  # See ?missing for more details.
+  na_default_used <- missing(na)
   na <- arg_match(na)
   if (na == "accumulate") {
     #nolint start: duplicate_argument_linter
@@ -644,21 +648,6 @@ obs_opts <- function(family = c("negbin", "poisson"),
       .frequency = "regularly",
       .frequency_id = "obs_opts"
     )
-  } else if (missing(na)) {
-    cli_inform(
-      c(
-        "i" = "{col_red(\"As of version 1.5.0 missing dates or dates with `NA`
-          cases are treated as missing. This is in contrast to previous versions
-          where these were interpreted as dates with zero cases. \")}",
-        "i" = "In order to treat missing or `NA` cases as zeroes, see
-        solutions in {.url https://github.com/epiforecasts/EpiNow2/issues/767#issuecomment-2348805272}", #nolint
-        "i" = "If the data is reported at non-daily intervals (for example
-        weekly), consider using `obs_opts(na=\"accumulate\")`.",
-        "i"  = "For more information on these options, see `?obs_opts`."
-      ),
-      .frequency = "regularly",
-      .frequency_id = "obs_opts"
-    )
   }
   #nolint end
   if (length(phi) == 2 && is.numeric(phi)) {
@@ -678,7 +667,8 @@ obs_opts <- function(family = c("negbin", "poisson"),
     scale = scale,
     accumulate = as.integer(na == "accumulate"),
     likelihood = likelihood,
-    return_likelihood = return_likelihood
+    return_likelihood = return_likelihood,
+    na_as_missing_default_used = na_default_used
   )
 
   for (param in c("phi", "scale")) {

From b5271b73d260a76f04f69ec42d3335ef9fd7e2dc Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 17:46:08 +0100
Subject: [PATCH 12/39] Add new global

---
 R/utilities.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/utilities.R b/R/utilities.R
index 91867b062..014845800 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -441,6 +441,7 @@ globalVariables(
     "central_lower", "central_upper", "mean_sd", "sd_sd", "average_7_day",
     "..lowers", "..upper_CrI", "..uppers", "timing", "dataset", "last_confirm",
     "report_date", "secondary", "id", "conv", "meanlog", "primary", "scaled",
-    "scaling", "sdlog", "lookup", "new_draw", ".draw", "p", "distribution"
+    "scaling", "sdlog", "lookup", "new_draw", ".draw", "p", "distribution",
+    "columns_to_check"
   )
 )

From 9037633b451e40b41378b4e0cd40db655953e1fb Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 17:46:52 +0100
Subject: [PATCH 13/39] Apply na cross checking function in main functions

---
 R/estimate_infections.R | 11 +++++++++++
 R/estimate_secondary.R  |  8 ++++++++
 2 files changed, 19 insertions(+)

diff --git a/R/estimate_infections.R b/R/estimate_infections.R
index fde83e449..6a850b594 100644
--- a/R/estimate_infections.R
+++ b/R/estimate_infections.R
@@ -167,6 +167,15 @@ estimate_infections <- function(data,
       name = "EpiNow2.epinow.estimate_infections"
     )
   }
+
+  # If the user is using the default treatment of NA's as missing and
+  # their data has implicit or explicit NA's, inform them of what's
+  # happening and alternatives.
+  check_na_setting_against_data(obs = obs, data = dirty_reported_cases)
+  # Remove "na_as_missing_default_used" after using it above
+  obs$na_as_missing_default_used <- NULL
+
+  # Create clean and complete cases
   # Order cases
   reported_cases <- create_clean_reported_cases(
     data, horizon,
@@ -200,6 +209,8 @@ estimate_infections <- function(data,
   )
   reported_cases <- reported_cases[-(1:backcalc$prior_window)]
 
+
+
   # Define stan model parameters
   stan_data <- create_stan_data(
     reported_cases,
diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R
index 077825381..2621ef90a 100644
--- a/R/estimate_secondary.R
+++ b/R/estimate_secondary.R
@@ -188,6 +188,14 @@ estimate_secondary <- function(data,
     filter_leading_zeros = filter_leading_zeros,
     zero_threshold = zero_threshold
   )
+
+  # If the user is using the default treatment of NA's as missing and
+  # their data has implicit or explicit NA's, inform them of what's
+  # happening and alternatives.
+  check_na_setting_against_data(obs = obs, data = secondary_reports_dirty)
+  # Remove "na_as_missing_default_used" after using it above
+  obs$na_as_missing_default_used <- NULL
+
   ## fill in missing data (required if fitting to prevalence)
   complete_secondary <- create_complete_cases(secondary_reports)
 

From a26ef4ff91b7d288089e06071e7a347cea5791dd Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 17:47:34 +0100
Subject: [PATCH 14/39] Rename dirty data

---
 R/estimate_secondary.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R
index 2621ef90a..ddc112fb5 100644
--- a/R/estimate_secondary.R
+++ b/R/estimate_secondary.R
@@ -182,9 +182,9 @@ estimate_secondary <- function(data,
   assert_logical(verbose)
 
   reports <- data.table::as.data.table(data)
-  secondary_reports <- reports[, list(date, confirm = secondary)]
+  secondary_reports_dirty <- reports[, list(date, confirm = secondary)]
   secondary_reports <- create_clean_reported_cases(
-    secondary_reports,
+    secondary_reports_dirty,
     filter_leading_zeros = filter_leading_zeros,
     zero_threshold = zero_threshold
   )
@@ -198,7 +198,6 @@ estimate_secondary <- function(data,
 
   ## fill in missing data (required if fitting to prevalence)
   complete_secondary <- create_complete_cases(secondary_reports)
-
   ## fill down
   secondary_reports[, confirm := nafill(confirm, type = "locf")]
   ## fill any early data up

From 3b60cf377381e66178c2f180eec4b562d19579cd Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 17:48:00 +0100
Subject: [PATCH 15/39] Add test for na cross checking function

---
 tests/testthat/test-checks.R | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R
index b0d7e4f52..e989b7ea9 100644
--- a/tests/testthat/test-checks.R
+++ b/tests/testthat/test-checks.R
@@ -170,3 +170,30 @@ test_that("test_data_complete detects complete and incomplete data", {
   expect_false(test_data_complete(es_missing_secondary))
   expect_false(test_data_complete(ec_implicit_missing))
 })
+
+test_that("check_na_setting_against_data works as expected", {
+  # If data is incomplete and the default na = "missing" is being used,
+  # expect a message
+  expect_message(
+    check_na_setting_against_data(
+      obs = obs_opts(),
+      data = copy(example_confirmed)[c(1, 3), confirm := NA]
+    ),
+    "cases are treated as missing"
+  )
+  # If data is incomplete but the user set na = "missing", then expect no
+  # message
+  expect_no_message(
+    check_na_setting_against_data(
+      obs = obs_opts(na = "missing"),
+      data = copy(example_confirmed)[c(1, 3), confirm := NA]
+    )
+  )
+  # If data is complete, expect no message
+  expect_no_message(
+    check_na_setting_against_data(
+      obs = obs_opts(),
+      data = example_confirmed
+    )
+  )
+})

From 9f356ac5c47363ad111e64d247b2b70c36c1833e Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 17:48:54 +0100
Subject: [PATCH 16/39] Add more tests to obs_opts

---
 tests/testthat/test-obs_opts.R | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/testthat/test-obs_opts.R b/tests/testthat/test-obs_opts.R
index 57d6ea9fb..7517509d6 100644
--- a/tests/testthat/test-obs_opts.R
+++ b/tests/testthat/test-obs_opts.R
@@ -22,3 +22,8 @@ test_that("obs_opts returns expected messages", {
     "modelled values that correspond to NA values"
   )
 })
+
+test_that("obs_opts behaves as expected for user specified na treatment", {
+# If user explicitly specifies NA as missing, then don't throw message
+  expect_true(obs_opts(na = "missing")$na_as_missing_default_used)
+})
\ No newline at end of file

From 225dac6ef8b71567a77f1672a7ed56b6a280218d Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 18:09:47 +0100
Subject: [PATCH 17/39] Remove global variable

---
 R/utilities.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/utilities.R b/R/utilities.R
index 014845800..91867b062 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -441,7 +441,6 @@ globalVariables(
     "central_lower", "central_upper", "mean_sd", "sd_sd", "average_7_day",
     "..lowers", "..upper_CrI", "..uppers", "timing", "dataset", "last_confirm",
     "report_date", "secondary", "id", "conv", "meanlog", "primary", "scaled",
-    "scaling", "sdlog", "lookup", "new_draw", ".draw", "p", "distribution",
-    "columns_to_check"
+    "scaling", "sdlog", "lookup", "new_draw", ".draw", "p", "distribution"
   )
 )

From 6f9f4e9ecc8fef70aef632c75ecec73b4799969c Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 18:10:12 +0100
Subject: [PATCH 18/39] Use alternative syntax to avoid linting issues

---
 R/checks.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/checks.R b/R/checks.R
index 4a63d3e6c..168f71745 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -196,7 +196,7 @@ test_data_complete <- function(data) {
     "date",
     intersect(c("confirm", "primary", "secondary"), names(data))
   )
-  if (any(sapply(data[, ..columns_to_check], anyNA))) {
+  if (any(sapply(data[, columns_to_check, with = FALSE], anyNA))) {
     return(FALSE)
   }
 
@@ -225,6 +225,7 @@ test_data_complete <- function(data) {
 #'
 #' @param obs A call to [obs_opts()]
 #' @param data The raw data
+#' @importFrom cli cli_inform col_red
 #'
 #' @return Called for its side effects
 #' @keywords internal

From eecf8a1f2875186936a096bfbcf2ce2b2c85a59f Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 21:03:37 +0100
Subject: [PATCH 19/39] copy before modifying data.table

---
 tests/testthat/test-checks.R | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R
index e989b7ea9..2301c3788 100644
--- a/tests/testthat/test-checks.R
+++ b/tests/testthat/test-checks.R
@@ -155,7 +155,11 @@ test_that("test_data_complete detects complete and incomplete data", {
   # example_confirmed with implicit missing (missing entries)
   ec_implicit_missing <- copy(example_confirmed)[-c(1,3,5), ]
   # Create a hypothetical complete example_secondary
-  es <- example_confirmed[, primary := confirm][, secondary := primary * 0.4]
+  es <- copy(example_confirmed)[
+    , primary := confirm
+  ][
+    , secondary := primary * 0.4
+  ]
   # example_secondary with explicit missing primary
   es_missing_primary <- copy(es)[c(1, 3), primary := NA]
   # example_secondary with explicit missing secondary

From 165fbcecd084a147a9236e2fd9393999336b27cb Mon Sep 17 00:00:00 2001
From: James Azam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 20:18:46 +0100
Subject: [PATCH 20/39] Fix test

---
 tests/testthat/test-obs_opts.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-obs_opts.R b/tests/testthat/test-obs_opts.R
index 7517509d6..268c98d8c 100644
--- a/tests/testthat/test-obs_opts.R
+++ b/tests/testthat/test-obs_opts.R
@@ -25,5 +25,5 @@ test_that("obs_opts returns expected messages", {
 
 test_that("obs_opts behaves as expected for user specified na treatment", {
 # If user explicitly specifies NA as missing, then don't throw message
-  expect_true(obs_opts(na = "missing")$na_as_missing_default_used)
+  expect_false(obs_opts(na = "missing")$na_as_missing_default_used)
 })
\ No newline at end of file

From 954890069f132bab0d1789a2ce9ad252bb2fd53d Mon Sep 17 00:00:00 2001
From: James Azam <james.azam@lshtm.ac.uk>
Date: Tue, 17 Sep 2024 21:05:38 +0100
Subject: [PATCH 21/39] Update NEWS.md

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index d200d3b17..189f3d2fc 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -28,6 +28,7 @@ A release that introduces model improvements to the Gaussian Process models, alo
 - A warning is now thrown if nonparametric PMFs passed to delay options have consecutive tail values that are below a certain low threshold as these lead to loss in speed with little gain in accuracy. By @jamesmbaazam in #752 and reviewed by @seabbs, with a subsequent bug fix in #802.
 - `dist_fit()` can now accept any number of `samples` without throwing a warning when `samples` < 1000 in #751 by @jamesmbaazam and reviewed by @seabbs and @sbfnk.
 - `obs_opts()` now informs users about how NA observations are treated to help them decide on existing alternatives. By @jamesmbaazam in # and reviewed by <REVIEWER>. 
+- Users are now informed that `NA` observations will be treated as missing instead of zero when using the default `obs_opts()`. Options to treat `NA` as zeros or accumulate them are also provided. By @jamesmbaazam in #774 and reviewed by @sbfnk.
 
 ## Package changes
 

From 77f74af37a490520d2b4fad3ef66fa137c0e39fc Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Wed, 18 Sep 2024 10:31:52 +0100
Subject: [PATCH 22/39] Make message/warning verbose and test for
 message/warning

---
 tests/testthat/test-checks.R   | 25 +++++++++++++++++++------
 tests/testthat/test-obs_opts.R |  6 ++++++
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R
index 2301c3788..6c5d00eae 100644
--- a/tests/testthat/test-checks.R
+++ b/tests/testthat/test-checks.R
@@ -143,8 +143,16 @@ test_that("check_reports_valid errors for bad 'secondary' specifications", {
 })
 
 test_that("check_sparse_pmf_tail throws a warning as expected", {
+  # NB: The warning is set to be thrown once every 8 hours, so hard to test
+  # regularly. The fix is to change the local setting here to throw the
+  # warning on demand for the sake of multiple runs of the test within
+  # 8 hours. That's what the rlang call below does
+  rlang::local_options(rlib_warning_verbosity = "verbose")
   pmf <- c(0.4, 0.30, 0.20, 0.05,  0.049995, 4.5e-06, rep(1e-7, 5))
-  expect_warning(check_sparse_pmf_tail(pmf), "PMF tail has")
+  expect_warning(
+    check_sparse_pmf_tail(pmf),
+    "PMF tail has"
+  )
 })
 
 test_that("test_data_complete detects complete and incomplete data", {
@@ -177,23 +185,28 @@ test_that("test_data_complete detects complete and incomplete data", {
 
 test_that("check_na_setting_against_data works as expected", {
   # If data is incomplete and the default na = "missing" is being used,
-  # expect a message
+  # expect a message thrown once every 8 hours.
+  # NB: We change the local setting here to throw the message on demand, rather
+  # than every 8 hours, for the sake of multiple runs of the test within
+  # 8 hours.
+  rlang::local_options(rlib_message_verbosity = "verbose")
   expect_message(
     check_na_setting_against_data(
       obs = obs_opts(),
       data = copy(example_confirmed)[c(1, 3), confirm := NA]
     ),
-    "cases are treated as missing"
+    "version 1.5.0 missing dates or dates"
   )
-  # If data is incomplete but the user set na = "missing", then expect no
-  # message
+  # If data is incomplete but the user explicitly set na = "missing", then
+  # expect no message
   expect_no_message(
     check_na_setting_against_data(
       obs = obs_opts(na = "missing"),
       data = copy(example_confirmed)[c(1, 3), confirm := NA]
     )
   )
-  # If data is complete, expect no message
+  # If data is complete, expect no message even when using default na as
+  # missing setting
   expect_no_message(
     check_na_setting_against_data(
       obs = obs_opts(),
diff --git a/tests/testthat/test-obs_opts.R b/tests/testthat/test-obs_opts.R
index 268c98d8c..5b853e80e 100644
--- a/tests/testthat/test-obs_opts.R
+++ b/tests/testthat/test-obs_opts.R
@@ -13,6 +13,12 @@ test_that("obs_opts returns expected default values", {
 })
 
 test_that("obs_opts returns expected messages", {
+  # The option na = "accumulate" informs the user of what is
+  # going to be done once every 8 hours, so hard to test regularly.
+  # NB: We change the local setting here to throw the message on demand, rather
+  # than every 8 hours, for the sake of multiple runs of the test within
+  # 8 hours.
+  rlang::local_options(rlib_message_verbosity = "verbose")
   expect_message(
     obs_opts(),
     "NA values will be treated as missing"

From 84823dcfbef57a38f447ad8341cd97d6e9757844 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Fri, 20 Sep 2024 17:23:02 +0100
Subject: [PATCH 23/39] Simplify comment

---
 R/opts.R | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/R/opts.R b/R/opts.R
index eff96d5d6..bd08b2b44 100644
--- a/R/opts.R
+++ b/R/opts.R
@@ -628,9 +628,7 @@ obs_opts <- function(family = c("negbin", "poisson"),
                      na = c("missing", "accumulate"),
                      likelihood = TRUE,
                      return_likelihood = FALSE) {
-  # Check if the user explicitly specified the "na" argument.
-  # NB: This has to be checked first before the na argument is used anywhere.
-  # See ?missing for more details.
+  # NB: This has to be checked first before the na argument is touched anywhere.
   na_default_used <- missing(na)
   na <- arg_match(na)
   if (na == "accumulate") {

From 954443439fd146f6862d2b9d3a2257408df599fb Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Fri, 20 Sep 2024 17:27:02 +0100
Subject: [PATCH 24/39] Use vapply for type safety

---
 R/checks.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/checks.R b/R/checks.R
index 168f71745..36270c990 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -196,7 +196,7 @@ test_data_complete <- function(data) {
     "date",
     intersect(c("confirm", "primary", "secondary"), names(data))
   )
-  if (any(sapply(data[, columns_to_check, with = FALSE], anyNA))) {
+  if (any(vapply(data[, columns_to_check, with = FALSE], anyNA, logical(1)))) {
     return(FALSE)
   }
 
@@ -252,4 +252,5 @@ check_na_setting_against_data <- function(data, obs) {
     )
     #nolint end
   }
+  obs$na_as_missing_default_used <- NULL
 }

From 5b1ce6ee805fba1b0976981b19b3195e408e329f Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Fri, 20 Sep 2024 18:38:55 +0100
Subject: [PATCH 25/39] Add lifecycle badge

---
 man/check_na_setting_against_data.Rd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/man/check_na_setting_against_data.Rd b/man/check_na_setting_against_data.Rd
index 4774caf80..f1e67e119 100644
--- a/man/check_na_setting_against_data.Rd
+++ b/man/check_na_setting_against_data.Rd
@@ -16,8 +16,8 @@ check_na_setting_against_data(data, obs)
 Called for its side effects
 }
 \description{
-Check the na settings in obs_opts() with the data and throw messages
-where necessary
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
+
 }
 \details{
 This function checks the data to see if it is complete or not and then

From 27069a576917cc4d9d5689dfc6f7bf7b78b9d94d Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Fri, 20 Sep 2024 18:39:44 +0100
Subject: [PATCH 26/39] Add lifecycle badge

---
 R/checks.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/checks.R b/R/checks.R
index 36270c990..80ba2821b 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -215,8 +215,8 @@ test_data_complete <- function(data) {
 
 #' Check the na settings in obs_opts() with the data and throw messages
 #' where necessary
+#' @description `r lifecycle::badge("experimental")`
 #'
-#' @details
 #' This function checks the data to see if it is complete or not and then
 #' checks if the user specified na to be treated as missing. If the latter is
 #' false, it does nothing. If TRUE, it informs the user about how the implicit

From ceab729c0d734e9828100d87884a44224a567275 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Fri, 20 Sep 2024 18:40:31 +0100
Subject: [PATCH 27/39] Improve docs

---
 R/checks.R                           | 19 +++++++++++--------
 man/check_na_setting_against_data.Rd | 20 ++++++++++----------
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/R/checks.R b/R/checks.R
index 80ba2821b..d577bb5b0 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -213,21 +213,24 @@ test_data_complete <- function(data) {
   return(TRUE) # Return TRUE if no missing values or gaps in date sequence
 }
 
-#' Check the na settings in obs_opts() with the data and throw messages
-#' where necessary
+#' Cross-check treatment of `NA` in obs_opts() against input data
+#'
 #' @description `r lifecycle::badge("experimental")`
 #'
-#' This function checks the data to see if it is complete or not and then
-#' checks if the user specified na to be treated as missing. If the latter is
-#' false, it does nothing. If TRUE, it informs the user about how the implicit
-#' or explicit missingness is treated. This function is necessary because
-#' the data argument and observation model do not interact internally.
+#' This function checks the input data for implicit and/or explicit missingness
+#' and checks if the user specified `na = "missing"` in [obs_opts()].
+#' If the two are TRUE, it throws a message about how the model treats
+#' missingness and provides alternatives. It returns an unmodified [obs_opts()].
+#' 
+#' This function is necessary because the data and observation model
+#' do not currently interact internally. It will be deprecated in future
+#' versions when the data specification interface is enhanced.
 #'
 #' @param obs A call to [obs_opts()]
 #' @param data The raw data
 #' @importFrom cli cli_inform col_red
 #'
-#' @return Called for its side effects
+#' @return [obs_opts()]
 #' @keywords internal
 check_na_setting_against_data <- function(data, obs) {
   # If users are using the default treatment of NA's and their data has
diff --git a/man/check_na_setting_against_data.Rd b/man/check_na_setting_against_data.Rd
index f1e67e119..6bbab7cce 100644
--- a/man/check_na_setting_against_data.Rd
+++ b/man/check_na_setting_against_data.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/checks.R
 \name{check_na_setting_against_data}
 \alias{check_na_setting_against_data}
-\title{Check the na settings in obs_opts() with the data and throw messages
-where necessary}
+\title{Cross-check treatment of \code{NA} in obs_opts() against input data}
 \usage{
 check_na_setting_against_data(data, obs)
 }
@@ -13,17 +12,18 @@ check_na_setting_against_data(data, obs)
 \item{obs}{A call to \code{\link[=obs_opts]{obs_opts()}}}
 }
 \value{
-Called for its side effects
+\code{\link[=obs_opts]{obs_opts()}}
 }
 \description{
 \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
 
-}
-\details{
-This function checks the data to see if it is complete or not and then
-checks if the user specified na to be treated as missing. If the latter is
-false, it does nothing. If TRUE, it informs the user about how the implicit
-or explicit missingness is treated. This function is necessary because
-the data argument and observation model do not interact internally.
+This function checks the input data for implicit and/or explicit missingness
+and checks if the user specified \code{na = "missing"} in \code{\link[=obs_opts]{obs_opts()}}.
+If the two are TRUE, it throws a message about how the model treats
+missingness and provides alternatives. It returns an unmodified \code{\link[=obs_opts]{obs_opts()}}.
+
+This function is necessary because the data and observation model
+do not currently interact internally. It will be deprecated in future
+versions when the data specification interface is enhanced.
 }
 \keyword{internal}

From 94dccd71ebac96c780f470f505c96d84f776452c Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Fri, 20 Sep 2024 18:40:49 +0100
Subject: [PATCH 28/39] Return unmodified obs_opts

---
 R/checks.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/checks.R b/R/checks.R
index d577bb5b0..c683df8e7 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -256,4 +256,5 @@ check_na_setting_against_data <- function(data, obs) {
     #nolint end
   }
   obs$na_as_missing_default_used <- NULL
+  return(obs)
 }

From aa85bf6b3e781db051c2f5c262fddac0ad652c48 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Fri, 20 Sep 2024 18:41:42 +0100
Subject: [PATCH 29/39] Move removal of element of obs_opts() into checker
 function

---
 R/estimate_infections.R | 10 +++++-----
 R/estimate_secondary.R  | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/R/estimate_infections.R b/R/estimate_infections.R
index 6a850b594..a76f5275a 100644
--- a/R/estimate_infections.R
+++ b/R/estimate_infections.R
@@ -170,11 +170,11 @@ estimate_infections <- function(data,
 
   # If the user is using the default treatment of NA's as missing and
   # their data has implicit or explicit NA's, inform them of what's
-  # happening and alternatives.
-  check_na_setting_against_data(obs = obs, data = dirty_reported_cases)
-  # Remove "na_as_missing_default_used" after using it above
-  obs$na_as_missing_default_used <- NULL
-
+  # happening and provide alternatives.
+  obs <- check_na_setting_against_data(
+    obs = obs,
+    data = dirty_reported_cases
+  )
   # Create clean and complete cases
   # Order cases
   reported_cases <- create_clean_reported_cases(
diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R
index ddc112fb5..13a01c09f 100644
--- a/R/estimate_secondary.R
+++ b/R/estimate_secondary.R
@@ -191,11 +191,11 @@ estimate_secondary <- function(data,
 
   # If the user is using the default treatment of NA's as missing and
   # their data has implicit or explicit NA's, inform them of what's
-  # happening and alternatives.
-  check_na_setting_against_data(obs = obs, data = secondary_reports_dirty)
-  # Remove "na_as_missing_default_used" after using it above
-  obs$na_as_missing_default_used <- NULL
-
+  # happening and provide alternatives.
+  obs <- check_na_setting_against_data(
+    obs = obs,
+    data = secondary_reports_dirty
+  )
   ## fill in missing data (required if fitting to prevalence)
   complete_secondary <- create_complete_cases(secondary_reports)
   ## fill down

From f515a946fbcbab9bade6b716c64db2c6ba03d388 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Fri, 20 Sep 2024 18:45:16 +0100
Subject: [PATCH 30/39] Use setequal instead of checking lengths

---
 R/checks.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/checks.R b/R/checks.R
index c683df8e7..f6744a694 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -206,7 +206,7 @@ test_data_complete <- function(data) {
     max(data$date, na.rm = TRUE),
     by = "1 day"
   )
-  if (length(complete_dates) > length(unique(data$date))) {
+  if (!setequal(complete_dates, unique(data$date))) {
     return(FALSE)
   }
 

From 4a068fc6e4982ef68ad3b5ef14b939851ee183f2 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Fri, 20 Sep 2024 18:48:06 +0100
Subject: [PATCH 31/39] Remove new unnecessary new lines

---
 R/estimate_infections.R | 2 --
 1 file changed, 2 deletions(-)

diff --git a/R/estimate_infections.R b/R/estimate_infections.R
index a76f5275a..67e59e318 100644
--- a/R/estimate_infections.R
+++ b/R/estimate_infections.R
@@ -209,8 +209,6 @@ estimate_infections <- function(data,
   )
   reported_cases <- reported_cases[-(1:backcalc$prior_window)]
 
-
-
   # Define stan model parameters
   stan_data <- create_stan_data(
     reported_cases,

From 6d9f2cfce68ad7fa9e6a336cfb7a204cbba75d7b Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 24 Sep 2024 09:54:12 +0000
Subject: [PATCH 32/39] Remove trailing white space

---
 R/checks.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/checks.R b/R/checks.R
index f6744a694..9e32df1e2 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -221,7 +221,7 @@ test_data_complete <- function(data) {
 #' and checks if the user specified `na = "missing"` in [obs_opts()].
 #' If the two are TRUE, it throws a message about how the model treats
 #' missingness and provides alternatives. It returns an unmodified [obs_opts()].
-#' 
+#'
 #' This function is necessary because the data and observation model
 #' do not currently interact internally. It will be deprecated in future
 #' versions when the data specification interface is enhanced.

From 4a09c465d88c8f1a8beac4953cc43031493942e0 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 24 Sep 2024 09:54:47 +0000
Subject: [PATCH 33/39] Add test to check for expected element introduced

---
 tests/testthat/test-checks.R | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R
index 6c5d00eae..9637fd7fd 100644
--- a/tests/testthat/test-checks.R
+++ b/tests/testthat/test-checks.R
@@ -213,4 +213,18 @@ test_that("check_na_setting_against_data works as expected", {
       data = example_confirmed
     )
   )
+  expect_identical(
+    setdiff(
+      names(
+        obs_opts()
+      ),
+      names(
+        check_na_setting_against_data(
+          obs = obs_opts(),
+          data = example_confirmed
+        )
+      )
+    ),
+    "na_as_missing_default_used"
+  )
 })

From 71fd1ebe3363be64486301a79c211c73d926a002 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 8 Oct 2024 16:17:14 +0100
Subject: [PATCH 34/39] Add new argument to pass column names

---
 R/checks.R                           |  6 +++---
 R/estimate_infections.R              |  3 ++-
 R/estimate_secondary.R               |  3 ++-
 man/check_na_setting_against_data.Rd |  4 +++-
 man/test_data_complete.Rd            |  4 +++-
 tests/testthat/test-checks.R         | 29 +++++++++++++++++-----------
 6 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/R/checks.R b/R/checks.R
index 9e32df1e2..99e9a0539 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -183,11 +183,11 @@ check_sparse_pmf_tail <- function(pmf, span = 5, tol = 1e-6) {
 #' Check if data has either explicit NA values or implicit missing dates.
 #'
 #' @param data The data to be checked
-#'
+#' @param cols_to_check A character vector of the columns to check
 #' @return `TRUE` if data is complete, else if data has implicit or explicit
 #' missingness, `FALSE`.
 #' @keywords internal
-test_data_complete <- function(data) {
+test_data_complete <- function(data, cols_to_check) {
   data <- setDT(data) # Convert data to data.table
 
   # Check for explicit missingness in required columns
@@ -232,7 +232,7 @@ test_data_complete <- function(data) {
 #'
 #' @return [obs_opts()]
 #' @keywords internal
-check_na_setting_against_data <- function(data, obs) {
+check_na_setting_against_data <- function(data, cols_to_check, obs) {
   # If users are using the default treatment of NA's and their data has
   # implicit or explicit NA's, inform them of what's happening and alternatives
   if (!obs$accumulate &&
diff --git a/R/estimate_infections.R b/R/estimate_infections.R
index 67e59e318..7af5b2f1a 100644
--- a/R/estimate_infections.R
+++ b/R/estimate_infections.R
@@ -173,7 +173,8 @@ estimate_infections <- function(data,
   # happening and provide alternatives.
   obs <- check_na_setting_against_data(
     obs = obs,
-    data = dirty_reported_cases
+    data = dirty_reported_cases,
+    cols_to_check = c("date", "confirm")
   )
   # Create clean and complete cases
   # Order cases
diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R
index 13a01c09f..26c6aeeb5 100644
--- a/R/estimate_secondary.R
+++ b/R/estimate_secondary.R
@@ -194,7 +194,8 @@ estimate_secondary <- function(data,
   # happening and provide alternatives.
   obs <- check_na_setting_against_data(
     obs = obs,
-    data = secondary_reports_dirty
+    data = secondary_reports_dirty,
+    cols_to_check = c("date", "primary", "secondary")
   )
   ## fill in missing data (required if fitting to prevalence)
   complete_secondary <- create_complete_cases(secondary_reports)
diff --git a/man/check_na_setting_against_data.Rd b/man/check_na_setting_against_data.Rd
index 6bbab7cce..54631c09c 100644
--- a/man/check_na_setting_against_data.Rd
+++ b/man/check_na_setting_against_data.Rd
@@ -4,11 +4,13 @@
 \alias{check_na_setting_against_data}
 \title{Cross-check treatment of \code{NA} in obs_opts() against input data}
 \usage{
-check_na_setting_against_data(data, obs)
+check_na_setting_against_data(data, cols_to_check, obs)
 }
 \arguments{
 \item{data}{The raw data}
 
+\item{cols_to_check}{A character vector of the columns to check}
+
 \item{obs}{A call to \code{\link[=obs_opts]{obs_opts()}}}
 }
 \value{
diff --git a/man/test_data_complete.Rd b/man/test_data_complete.Rd
index 70a2e84d2..ceef12f30 100644
--- a/man/test_data_complete.Rd
+++ b/man/test_data_complete.Rd
@@ -4,10 +4,12 @@
 \alias{test_data_complete}
 \title{Check if data has either explicit NA values or implicit missing dates.}
 \usage{
-test_data_complete(data)
+test_data_complete(data, cols_to_check)
 }
 \arguments{
 \item{data}{The data to be checked}
+
+\item{cols_to_check}{A character vector of the columns to check}
 }
 \value{
 \code{TRUE} if data is complete, else if data has implicit or explicit
diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R
index 9637fd7fd..14d207cdb 100644
--- a/tests/testthat/test-checks.R
+++ b/tests/testthat/test-checks.R
@@ -173,14 +173,17 @@ test_that("test_data_complete detects complete and incomplete data", {
   # example_secondary with explicit missing secondary
   es_missing_secondary <- copy(es)[c(1, 3), secondary := NA]
   
+  # cols to check
+  ep_cols <- c("date", "confirm")
+  es_cols <- c("date", "primary", "secondary")
   # Expectations
-  expect_true(test_data_complete(example_confirmed))
-  expect_true(test_data_complete(es))
-  expect_false(test_data_complete(ec_missing_date))
-  expect_false(test_data_complete(ec_missing_confirm))
-  expect_false(test_data_complete(es_missing_primary))
-  expect_false(test_data_complete(es_missing_secondary))
-  expect_false(test_data_complete(ec_implicit_missing))
+  expect_true(test_data_complete(example_confirmed, ep_cols))
+  expect_true(test_data_complete(es, es_cols))
+  expect_false(test_data_complete(ec_missing_date, ep_cols))
+  expect_false(test_data_complete(ec_missing_confirm, ep_cols))
+  expect_false(test_data_complete(es_missing_primary, es_cols))
+  expect_false(test_data_complete(es_missing_secondary, es_cols))
+  expect_false(test_data_complete(ec_implicit_missing, ep_cols))
 })
 
 test_that("check_na_setting_against_data works as expected", {
@@ -193,7 +196,8 @@ test_that("check_na_setting_against_data works as expected", {
   expect_message(
     check_na_setting_against_data(
       obs = obs_opts(),
-      data = copy(example_confirmed)[c(1, 3), confirm := NA]
+      data = copy(example_confirmed)[c(1, 3), confirm := NA],
+      cols_to_check = c("date", "confirm")
     ),
     "version 1.5.0 missing dates or dates"
   )
@@ -202,7 +206,8 @@ test_that("check_na_setting_against_data works as expected", {
   expect_no_message(
     check_na_setting_against_data(
       obs = obs_opts(na = "missing"),
-      data = copy(example_confirmed)[c(1, 3), confirm := NA]
+      data = copy(example_confirmed)[c(1, 3), confirm := NA],
+      cols_to_check = c("date", "confirm")
     )
   )
   # If data is complete, expect no message even when using default na as
@@ -210,7 +215,8 @@ test_that("check_na_setting_against_data works as expected", {
   expect_no_message(
     check_na_setting_against_data(
       obs = obs_opts(),
-      data = example_confirmed
+      data = example_confirmed,
+      cols_to_check = c("date", "confirm")
     )
   )
   expect_identical(
@@ -221,7 +227,8 @@ test_that("check_na_setting_against_data works as expected", {
       names(
         check_na_setting_against_data(
           obs = obs_opts(),
-          data = example_confirmed
+          data = example_confirmed,
+          cols_to_check = c("date", "confirm")
         )
       )
     ),

From 7a0d2c497d8b2ce5545e22db86221f4ed233bd44 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 8 Oct 2024 16:18:19 +0100
Subject: [PATCH 35/39] Add error for column mismatch

---
 R/checks.R | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/R/checks.R b/R/checks.R
index 99e9a0539..c72914b9a 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -186,17 +186,21 @@ check_sparse_pmf_tail <- function(pmf, span = 5, tol = 1e-6) {
 #' @param cols_to_check A character vector of the columns to check
 #' @return `TRUE` if data is complete, else if data has implicit or explicit
 #' missingness, `FALSE`.
+#' @importFrom cli cli_abort col_blue
 #' @keywords internal
 test_data_complete <- function(data, cols_to_check) {
   data <- setDT(data) # Convert data to data.table
 
-  # Check for explicit missingness in required columns
-  # (date, confirm, primary, secondary)
-  columns_to_check <- c(
-    "date",
-    intersect(c("confirm", "primary", "secondary"), names(data))
-  )
-  if (any(vapply(data[, columns_to_check, with = FALSE], anyNA, logical(1)))) {
+  if (!all(cols_to_check %in% names(data))) {
+    cli_abort(
+      c(
+        "x" = "{.var cols_to_check} must be present in the data.",
+        "i" = "{.var data} has columns: {col_blue(names(data))} but you
+        specified {.var cols_to_expect}: {col_blue(cols_to_check)}."
+      )
+    )
+  }
+  # Check for explicit missingness in the specified columns
     return(FALSE)
   }
 

From c40529378d0e8e7922aa75a424230667b2c00b19 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 8 Oct 2024 16:18:51 +0100
Subject: [PATCH 36/39] Refactor for readability

---
 R/checks.R | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/R/checks.R b/R/checks.R
index c72914b9a..20150cb51 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -201,6 +201,10 @@ test_data_complete <- function(data, cols_to_check) {
     )
   }
   # Check for explicit missingness in the specified columns
+  data_has_explicit_na <- any(
+    vapply(data[, cols_to_check, with = FALSE], anyNA, logical(1))
+  )
+  if (data_has_explicit_na) {
     return(FALSE)
   }
 
@@ -210,7 +214,8 @@ test_data_complete <- function(data, cols_to_check) {
     max(data$date, na.rm = TRUE),
     by = "1 day"
   )
-  if (!setequal(complete_dates, unique(data$date))) {
+  data_has_implicit_na <- !all(complete_dates %in% data$date)
+  if (data_has_implicit_na) {
     return(FALSE)
   }
 
@@ -232,6 +237,7 @@ test_data_complete <- function(data, cols_to_check) {
 #'
 #' @param obs A call to [obs_opts()]
 #' @param data The raw data
+#' @inheritParams test_data_complete
 #' @importFrom cli cli_inform col_red
 #'
 #' @return [obs_opts()]
@@ -239,9 +245,10 @@ test_data_complete <- function(data, cols_to_check) {
 check_na_setting_against_data <- function(data, cols_to_check, obs) {
   # If users are using the default treatment of NA's and their data has
   # implicit or explicit NA's, inform them of what's happening and alternatives
+  data_is_complete <- test_data_complete(data, cols_to_check)
   if (!obs$accumulate &&
       obs$na_as_missing_default_used &&
-      !test_data_complete(data)) {
+      !data_is_complete) {
     #nolint start: duplicate_argument_linter
     cli_inform(
       c(

From 28c373fff91c824799c33b5aecbc495ca37b7a94 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 8 Oct 2024 17:21:52 +0100
Subject: [PATCH 37/39] Perform missingness check on the raw data

---
 R/estimate_secondary.R | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R
index 26c6aeeb5..7de6b32d5 100644
--- a/R/estimate_secondary.R
+++ b/R/estimate_secondary.R
@@ -182,21 +182,20 @@ estimate_secondary <- function(data,
   assert_logical(verbose)
 
   reports <- data.table::as.data.table(data)
-  secondary_reports_dirty <- reports[, list(date, confirm = secondary)]
-  secondary_reports <- create_clean_reported_cases(
-    secondary_reports_dirty,
-    filter_leading_zeros = filter_leading_zeros,
-    zero_threshold = zero_threshold
-  )
-
   # If the user is using the default treatment of NA's as missing and
   # their data has implicit or explicit NA's, inform them of what's
   # happening and provide alternatives.
   obs <- check_na_setting_against_data(
     obs = obs,
-    data = secondary_reports_dirty,
+    data = reports,
     cols_to_check = c("date", "primary", "secondary")
   )
+  secondary_reports_dirty <- reports[, list(date, confirm = secondary)]
+  secondary_reports <- create_clean_reported_cases(
+    secondary_reports_dirty,
+    filter_leading_zeros = filter_leading_zeros,
+    zero_threshold = zero_threshold
+  )
   ## fill in missing data (required if fitting to prevalence)
   complete_secondary <- create_complete_cases(secondary_reports)
   ## fill down

From b1b1efbfd9c69f4a7b03d7782a86e798513d3822 Mon Sep 17 00:00:00 2001
From: jamesaazam <james.azam@lshtm.ac.uk>
Date: Tue, 8 Oct 2024 17:24:56 +0100
Subject: [PATCH 38/39] Remove test that no longer applies

---
 tests/testthat/test-obs_opts.R | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/testthat/test-obs_opts.R b/tests/testthat/test-obs_opts.R
index 5b853e80e..a9cb17db0 100644
--- a/tests/testthat/test-obs_opts.R
+++ b/tests/testthat/test-obs_opts.R
@@ -19,10 +19,6 @@ test_that("obs_opts returns expected messages", {
   # than every 8 hours, for the sake of multiple runs of the test within
   # 8 hours.
   rlang::local_options(rlib_message_verbosity = "verbose")
-  expect_message(
-    obs_opts(),
-    "NA values will be treated as missing"
-  )
   expect_message(
     obs_opts(na = "accumulate"),
     "modelled values that correspond to NA values"

From 9c578e508890913fddf4786f624f0d6d1321f1e8 Mon Sep 17 00:00:00 2001
From: James Azam <james.azam@lshtm.ac.uk>
Date: Fri, 11 Oct 2024 10:57:46 +0100
Subject: [PATCH 39/39] Add reviewer and PR number

Co-authored-by: Sebastian Funk <sebastian.funk@lshtm.ac.uk>
---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 189f3d2fc..2353c946d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -27,7 +27,7 @@ A release that introduces model improvements to the Gaussian Process models, alo
 - Switch to broadcasting the day of the week effect. By @seabbs in #746 and reviewed by @jamesmbaazam.
 - A warning is now thrown if nonparametric PMFs passed to delay options have consecutive tail values that are below a certain low threshold as these lead to loss in speed with little gain in accuracy. By @jamesmbaazam in #752 and reviewed by @seabbs, with a subsequent bug fix in #802.
 - `dist_fit()` can now accept any number of `samples` without throwing a warning when `samples` < 1000 in #751 by @jamesmbaazam and reviewed by @seabbs and @sbfnk.
-- `obs_opts()` now informs users about how NA observations are treated to help them decide on existing alternatives. By @jamesmbaazam in # and reviewed by <REVIEWER>. 
+- `obs_opts()` now informs users about how NA observations are treated to help them decide on existing alternatives. By @jamesmbaazam in #774 and reviewed by @sbfnk. 
 - Users are now informed that `NA` observations will be treated as missing instead of zero when using the default `obs_opts()`. Options to treat `NA` as zeros or accumulate them are also provided. By @jamesmbaazam in #774 and reviewed by @sbfnk.
 
 ## Package changes