Merge pull request #136 from UCD-SERG/add-group_by-option-to-summary

Add group by option to summary.pop_data()
UCD-SERG · Jul 15, 2024 · 662c265 · 662c265
2 parents bde2b98 + 6388182
commit 662c265
Show file tree

Hide file tree

Showing 15 changed files with 296 additions and 232 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: serocalculator
 Type: Package
 Title: Estimating Infection Rates from Serological Data
-Version: 1.1.0.9000
+Version: 1.2.0
 Authors@R: c(
   person(given = "Peter", family = "Teunis", email = "[email protected]", role = c("aut", "cph"), comment = "Author of the method and original code."),
   person(given = "Kristina", family = "Lai", email = "[email protected]", role = c("aut", "cre")), 

diff --git a/NAMESPACE b/NAMESPACE
@@ -14,6 +14,7 @@ S3method(get_value,pop_data)
 S3method(get_value_var,pop_data)
 S3method(print,seroincidence)
 S3method(print,seroincidence.by)
+S3method(print,summary.pop_data)
 S3method(print,summary.seroincidence.by)
 S3method(set_age,pop_data)
 S3method(set_id,pop_data)

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,9 @@
-# serocalculator (development version)
+# serocalculator 1.2.0
+* Added `test-summary.pop_data` test
+
+* Modified `test-est.incidence` test
+
+* Added stratification to `summary.pop_data`
 
 * Added `verbose` option for `check_pop_data()`, changing default behavior
 to avoid printing an OK message.

diff --git a/data-raw/nlm_exit_codes.R → R/nlm_exit_codes.R b/data-raw/nlm_exit_codes.R → R/nlm_exit_codes.R
@@ -6,4 +6,3 @@ nlm_exit_codes <- c(
   "5" = "5: maximum step size `stepmax` exceeded five consecutive times. Either the function is unbounded below, becomes asymptotic to a finite value from above in some direction or `stepmax` is too small."
 )
 
-usethis::use_data(nlm_exit_codes, overwrite = TRUE, internal = TRUE)
diff --git a/R/summary.pop_data.R b/R/summary.pop_data.R
@@ -1,55 +1,96 @@
 #'
-#' @title Summarize a cross-sectional antibody survey data set
+#' @title Summarize cross-sectional antibody survey data
 #' @description
-#' This function is a `summary()` method for `pop_data` objects
+#' [summary()] method for `pop_data` objects
 #'
-#' @param object a `pop_data` object
+#' @param object a `pop_data` object (from [as_pop_data()])
+#' @param strata a [character()] specifying grouping column(s)
 #' @param ...  unused
 #'
-#' @returns a list containing two summary tables: one of `age` and one of `value`, stratified by `antigen_iso`
+#' @returns a `summary.pop_data` object, which is a list containing two summary tables:
+#'
+#' * `age_summary` summarizing `age`
+#' * `ab_summary` summarizing `value`, stratified by `antigen_iso`
+#'
 #' @export
 #' @examples
 #' library(dplyr)
 #'
 #' xs_data <- load_pop_data("https://osf.io/download//n6cp3/")
+#' summary(xs_data, strata = "Country")
 #'
-#' summary(xs_data)
-#'
-summary.pop_data <- function(object, ...) {
+summary.pop_data <- function(object, strata = NULL, ...) {
+  # get relevant columns from object
+  age_column <- object %>% get_age_var()
+  value_column <- object %>% get_value_var()
+  id_column <- object %>% get_id_var()
+
+  # create a list of the columns
+  cols <- c(age_column, id_column, strata)
+
   ages <-
     object %>%
-    distinct(.data$id, .data$age)
-
-  cat("\nn =", nrow(ages), "\n")
+    distinct(across(all_of(cols)))
 
-  cat("\nDistribution of age: \n\n")
   age_summary <-
     ages %>%
-    pull("age") %>%
-    summary() %>%
-    print()
-
-  cat("\nDistributions of antigen-isotype measurements:\n\n")
+    summarise(
+      .by = all_of(strata),
+      n = n(),
+      min = min(.data[[age_column]]),
+      first_quartile = quantile(.data[[age_column]], 0.25),
+      median = median(.data[[age_column]]),
+      mean = mean(.data[[age_column]]),
+      third_quartile = quantile(.data[[age_column]], 0.75),
+      max = max(.data[[age_column]])
+    )
 
   ab_summary <-
     object %>%
     dplyr::summarize(
-      .by = .data$antigen_iso,
-      Min = object %>% get_value() %>% min(na.rm = TRUE),
-      `1st Qu.` = object %>% get_value() %>% quantile(.25, na.rm = TRUE),
-      Median = object %>% get_value() %>% median(),
-      `3rd Qu.` = object %>% get_value() %>% quantile(.75, na.rm = TRUE),
-      Max = object %>% get_value() %>% max(na.rm = TRUE),
-      `# NAs` = object %>% get_value() %>% is.na() %>% sum()
-    ) %>%
-    as.data.frame() %>%
-    print()
-
-  to_return <- list(
-    n = nrow(ages),
-    age_summary = age_summary,
-    ab_summary = ab_summary
-  )
-
-  return(invisible(to_return))
+      .by = all_of(c("antigen_iso", strata)),
+      across(
+        .cols = all_of(value_column),
+        .fns = list(
+          Min = ~ min(.x, na.rm = TRUE),
+          `1st Qu.` = ~ quantile(.x, p = .25, na.rm = TRUE),
+          Median = ~ median(.x, na.rm = TRUE),
+          `3rd Qu.` = ~ quantile(.x, p = .75, na.rm = TRUE),
+          Max = ~ max(.x, na.rm = TRUE),
+          `# NAs` = ~ is.na(.x) %>% sum()
+        ),
+        .names = "{.fn}"
+      ))
+
+  to_return <- list(n = nrow(ages),
+                    age_summary = age_summary,
+                    ab_summary = ab_summary)
+
+  class(to_return) = "summary.pop_data"
+
+  return(to_return)
+}
+
+
+#' Print method for [summary.pop_data] objects
+#' @param x an object of class `"summary.pop_data"`; usually, the result of a call to [summary.pop_data()]
+#' @rdname summary.pop_data
+#' @export
+print.summary.pop_data = function(x, ...)
+{
+  n_obs = x$age_summary %>% pull("n") %>% sum()
+
+  cat("\nn =", n_obs, "\n")
+
+  cat("\nDistribution of age: \n\n")
+
+  x$age_summary %>% print()
+
+  cat("\nDistributions of antigen-isotype measurements:\n\n")
+
+  x$ab_summary %>% print()
+
+  cat("\n")
+
+  invisible(x)
 }
diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/data-raw/typhoid_results.R b/data-raw/typhoid_results.R
@@ -0,0 +1,40 @@
+# Filter population data for Pakistan
+xs_data <- load_pop_data(
+  file_path = "https://osf.io/download//n6cp3/",
+  age = "Age",
+  value = "result",
+  id = "index_id",
+  standardize = TRUE
+) %>%
+  filter(Country == "Pakistan")
+
+# get noise data
+noise <- load_noise_params("https://osf.io/download//hqy4v/") %>%
+  filter(Country == "Pakistan")
+
+# get curve data
+curve <- load_curve_params("https://osf.io/download/rtw5k/")
+
+# Initial estimates for lambda
+start <- .05
+
+# Estimate incidence
+fit <- est.incidence(
+  pop_data = xs_data,
+  curve_param = curve,
+  noise_param = noise,
+  antigen_isos = c("HlyE_IgG", "HlyE_IgA")
+)
+
+typhoid_results <- fit %>%
+  summary.seroincidence(
+    coverage = .95,
+    start = start
+  ) %>%
+  mutate(
+    ageCat = NULL,
+    antigen.iso = paste(collapse = "+", "HlyE_IgG")
+  ) %>%
+  structure(noise.parameters = noise)
+
+saveRDS(object = typhoid_results,file = "tests/testthat/fixtures/typhoid_results.rds")
diff --git a/data-raw/typhoid_results.qmd b/data-raw/typhoid_results.qmd
diff --git a/man/summary.pop_data.Rd b/man/summary.pop_data.Rd
diff --git a/tests/testthat/_snaps/est.incidence.md b/tests/testthat/_snaps/est.incidence.md
@@ -0,0 +1,12 @@
+# est.incidence() produces expected results for typhoid data
+
+    Code
+      typhoid_results
+    Output
+      # A tibble: 1 x 11
+        est.start incidence.rate      SE CI.lwr CI.upr coverage log.lik iterations
+      *     <dbl>          <dbl>   <dbl>  <dbl>  <dbl>    <dbl>   <dbl>      <int>
+      1       0.1          0.128 0.00682  0.115  0.142     0.95  -2376.          4
+      # i 3 more variables: antigen.isos <chr>, nlm.convergence.code <ord>,
+      #   antigen.iso <chr>
+