diff --git a/DESCRIPTION b/DESCRIPTION index fe03659a..a1b4a88e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: serocalculator Type: Package Title: Estimating Infection Rates from Serological Data -Version: 1.1.0.9000 +Version: 1.2.0 Authors@R: c( person(given = "Peter", family = "Teunis", email = "p.teunis@emory.edu", role = c("aut", "cph"), comment = "Author of the method and original code."), person(given = "Kristina", family = "Lai", email = "kwlai@ucdavis.edu", role = c("aut", "cre")), diff --git a/NAMESPACE b/NAMESPACE index 67dabb29..0286bee0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ S3method(get_value,pop_data) S3method(get_value_var,pop_data) S3method(print,seroincidence) S3method(print,seroincidence.by) +S3method(print,summary.pop_data) S3method(print,summary.seroincidence.by) S3method(set_age,pop_data) S3method(set_id,pop_data) diff --git a/NEWS.md b/NEWS.md index cedd2a2f..61d31397 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,9 @@ -# serocalculator (development version) +# serocalculator 1.2.0 +* Added `test-summary.pop_data` test + +* Modified `test-est.incidence` test + +* Added stratification to `summary.pop_data` * Added `verbose` option for `check_pop_data()`, changing default behavior to avoid printing an OK message. diff --git a/data-raw/nlm_exit_codes.R b/R/nlm_exit_codes.R similarity index 91% rename from data-raw/nlm_exit_codes.R rename to R/nlm_exit_codes.R index c22577ac..82711539 100644 --- a/data-raw/nlm_exit_codes.R +++ b/R/nlm_exit_codes.R @@ -6,4 +6,3 @@ nlm_exit_codes <- c( "5" = "5: maximum step size `stepmax` exceeded five consecutive times. Either the function is unbounded below, becomes asymptotic to a finite value from above in some direction or `stepmax` is too small." ) -usethis::use_data(nlm_exit_codes, overwrite = TRUE, internal = TRUE) diff --git a/R/summary.pop_data.R b/R/summary.pop_data.R index b602dbb9..bdb718c5 100644 --- a/R/summary.pop_data.R +++ b/R/summary.pop_data.R @@ -1,55 +1,96 @@ #' -#' @title Summarize a cross-sectional antibody survey data set +#' @title Summarize cross-sectional antibody survey data #' @description -#' This function is a `summary()` method for `pop_data` objects +#' [summary()] method for `pop_data` objects #' -#' @param object a `pop_data` object +#' @param object a `pop_data` object (from [as_pop_data()]) +#' @param strata a [character()] specifying grouping column(s) #' @param ... unused #' -#' @returns a list containing two summary tables: one of `age` and one of `value`, stratified by `antigen_iso` +#' @returns a `summary.pop_data` object, which is a list containing two summary tables: +#' +#' * `age_summary` summarizing `age` +#' * `ab_summary` summarizing `value`, stratified by `antigen_iso` +#' #' @export #' @examples #' library(dplyr) #' #' xs_data <- load_pop_data("https://osf.io/download//n6cp3/") +#' summary(xs_data, strata = "Country") #' -#' summary(xs_data) -#' -summary.pop_data <- function(object, ...) { +summary.pop_data <- function(object, strata = NULL, ...) { + # get relevant columns from object + age_column <- object %>% get_age_var() + value_column <- object %>% get_value_var() + id_column <- object %>% get_id_var() + + # create a list of the columns + cols <- c(age_column, id_column, strata) + ages <- object %>% - distinct(.data$id, .data$age) - - cat("\nn =", nrow(ages), "\n") + distinct(across(all_of(cols))) - cat("\nDistribution of age: \n\n") age_summary <- ages %>% - pull("age") %>% - summary() %>% - print() - - cat("\nDistributions of antigen-isotype measurements:\n\n") + summarise( + .by = all_of(strata), + n = n(), + min = min(.data[[age_column]]), + first_quartile = quantile(.data[[age_column]], 0.25), + median = median(.data[[age_column]]), + mean = mean(.data[[age_column]]), + third_quartile = quantile(.data[[age_column]], 0.75), + max = max(.data[[age_column]]) + ) ab_summary <- object %>% dplyr::summarize( - .by = .data$antigen_iso, - Min = object %>% get_value() %>% min(na.rm = TRUE), - `1st Qu.` = object %>% get_value() %>% quantile(.25, na.rm = TRUE), - Median = object %>% get_value() %>% median(), - `3rd Qu.` = object %>% get_value() %>% quantile(.75, na.rm = TRUE), - Max = object %>% get_value() %>% max(na.rm = TRUE), - `# NAs` = object %>% get_value() %>% is.na() %>% sum() - ) %>% - as.data.frame() %>% - print() - - to_return <- list( - n = nrow(ages), - age_summary = age_summary, - ab_summary = ab_summary - ) - - return(invisible(to_return)) + .by = all_of(c("antigen_iso", strata)), + across( + .cols = all_of(value_column), + .fns = list( + Min = ~ min(.x, na.rm = TRUE), + `1st Qu.` = ~ quantile(.x, p = .25, na.rm = TRUE), + Median = ~ median(.x, na.rm = TRUE), + `3rd Qu.` = ~ quantile(.x, p = .75, na.rm = TRUE), + Max = ~ max(.x, na.rm = TRUE), + `# NAs` = ~ is.na(.x) %>% sum() + ), + .names = "{.fn}" + )) + + to_return <- list(n = nrow(ages), + age_summary = age_summary, + ab_summary = ab_summary) + + class(to_return) = "summary.pop_data" + + return(to_return) +} + + +#' Print method for [summary.pop_data] objects +#' @param x an object of class `"summary.pop_data"`; usually, the result of a call to [summary.pop_data()] +#' @rdname summary.pop_data +#' @export +print.summary.pop_data = function(x, ...) +{ + n_obs = x$age_summary %>% pull("n") %>% sum() + + cat("\nn =", n_obs, "\n") + + cat("\nDistribution of age: \n\n") + + x$age_summary %>% print() + + cat("\nDistributions of antigen-isotype measurements:\n\n") + + x$ab_summary %>% print() + + cat("\n") + + invisible(x) } diff --git a/R/sysdata.rda b/R/sysdata.rda deleted file mode 100644 index 8c5f3fc1..00000000 Binary files a/R/sysdata.rda and /dev/null differ diff --git a/data-raw/typhoid_results.R b/data-raw/typhoid_results.R new file mode 100644 index 00000000..56838c0c --- /dev/null +++ b/data-raw/typhoid_results.R @@ -0,0 +1,40 @@ +# Filter population data for Pakistan +xs_data <- load_pop_data( + file_path = "https://osf.io/download//n6cp3/", + age = "Age", + value = "result", + id = "index_id", + standardize = TRUE +) %>% + filter(Country == "Pakistan") + +# get noise data +noise <- load_noise_params("https://osf.io/download//hqy4v/") %>% + filter(Country == "Pakistan") + +# get curve data +curve <- load_curve_params("https://osf.io/download/rtw5k/") + +# Initial estimates for lambda +start <- .05 + +# Estimate incidence +fit <- est.incidence( + pop_data = xs_data, + curve_param = curve, + noise_param = noise, + antigen_isos = c("HlyE_IgG", "HlyE_IgA") +) + +typhoid_results <- fit %>% + summary.seroincidence( + coverage = .95, + start = start + ) %>% + mutate( + ageCat = NULL, + antigen.iso = paste(collapse = "+", "HlyE_IgG") + ) %>% + structure(noise.parameters = noise) + +saveRDS(object = typhoid_results,file = "tests/testthat/fixtures/typhoid_results.rds") diff --git a/data-raw/typhoid_results.qmd b/data-raw/typhoid_results.qmd deleted file mode 100644 index e9b15ab4..00000000 --- a/data-raw/typhoid_results.qmd +++ /dev/null @@ -1,98 +0,0 @@ ---- -title: "typhoid_results" -format: html ---- - - - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup} -# library(devtools) -# install_github("UCD-SERG/serocalculator") -library(serocalculator) -library(tidyverse) -``` - -```{r longdata} -c.hlye.IgG <- - fs::path_package( - "extdata", - "dmcmc_hlyeigg_09.30.rds", - package = "serocalculator" - ) %>% # Load longitudinal parameters dataset - readRDS() %>% - mutate( - alpha = alpha * 365.25, # Create alpha and d - d = r - 1 - ) %>% - select(y1, alpha, d) # Select only the variables needed for analysis -``` - -``` {r simdata} -library(fs) # filesystem utility functions -p.hlye.IgG <- - fs::path_package( - package = "serocalculator", - "extdata/simpophlyeigg.2.csv" - ) %>% # Load simulated cross-sectional dataset - read_csv() %>% - rename( # rename variables - y = y.smpl, - a = a.smpl - ) %>% - select(y, a) # Select only the variables needed for analysis -``` - -``` {r conditions} -cond.hlye.IgG <- data.frame( - nu = 1.027239, # B noise - eps = 0.2, # M noise - y.low = 0.0, # low cutoff - y.high = 5e4, - antigen_iso = "HlyE_IgG" -) -``` - -```{r seroinc} -start <- .05 - -lambda <- start # initial estimate: starting value -log.lambda <- log(lambda) -log.lmin <- log(lambda / 10) -log.lmax <- log(10 * lambda) - - - -objfunc <- function(llam) { - return(res <- f_dev(llam, p.hlye.IgG, c.hlye.IgG, cond.hlye.IgG)) -} - - -fit <- nlm( - objfunc, - p = log.lambda, - hessian = TRUE, - print.level = 0, - stepmax = 1, - iterlim = 100 -) - -typhoid_results <- fit %>% - summary.seroincidence( - coverage = .95, - start = start - ) %>% - mutate( - ageCat = NULL, - antigen.iso = paste(collapse = "+", "HlyE_IgG") - ) %>% - structure(noise.parameters = cond.hlye.IgG) - -usethis::use_data(typhoid_results, internal = TRUE, overwrite = TRUE) -``` diff --git a/man/summary.pop_data.Rd b/man/summary.pop_data.Rd index 1a36d356..63faaec2 100644 --- a/man/summary.pop_data.Rd +++ b/man/summary.pop_data.Rd @@ -2,26 +2,36 @@ % Please edit documentation in R/summary.pop_data.R \name{summary.pop_data} \alias{summary.pop_data} -\title{Summarize a cross-sectional antibody survey data set} +\alias{print.summary.pop_data} +\title{Summarize cross-sectional antibody survey data} \usage{ -\method{summary}{pop_data}(object, ...) +\method{summary}{pop_data}(object, strata = NULL, ...) + +\method{print}{summary.pop_data}(x, ...) } \arguments{ -\item{object}{a \code{pop_data} object} +\item{object}{a \code{pop_data} object (from \code{\link[=as_pop_data]{as_pop_data()}})} + +\item{strata}{a \code{\link[=character]{character()}} specifying grouping column(s)} \item{...}{unused} + +\item{x}{an object of class \code{"summary.pop_data"}; usually, the result of a call to \code{\link[=summary.pop_data]{summary.pop_data()}}} } \value{ -a list containing two summary tables: one of \code{age} and one of \code{value}, stratified by \code{antigen_iso} +a \code{summary.pop_data} object, which is a list containing two summary tables: +\itemize{ +\item \code{age_summary} summarizing \code{age} +\item \code{ab_summary} summarizing \code{value}, stratified by \code{antigen_iso} +} } \description{ -This function is a \code{summary()} method for \code{pop_data} objects +\code{\link[=summary]{summary()}} method for \code{pop_data} objects } \examples{ library(dplyr) xs_data <- load_pop_data("https://osf.io/download//n6cp3/") - -summary(xs_data) +summary(xs_data, strata = "Country") } diff --git a/tests/testthat/_snaps/est.incidence.md b/tests/testthat/_snaps/est.incidence.md new file mode 100644 index 00000000..6c19b509 --- /dev/null +++ b/tests/testthat/_snaps/est.incidence.md @@ -0,0 +1,12 @@ +# est.incidence() produces expected results for typhoid data + + Code + typhoid_results + Output + # A tibble: 1 x 11 + est.start incidence.rate SE CI.lwr CI.upr coverage log.lik iterations + * + 1 0.1 0.128 0.00682 0.115 0.142 0.95 -2376. 4 + # i 3 more variables: antigen.isos , nlm.convergence.code , + # antigen.iso + diff --git a/tests/testthat/_snaps/summary.pop_data.md b/tests/testthat/_snaps/summary.pop_data.md new file mode 100644 index 00000000..31775ed4 --- /dev/null +++ b/tests/testthat/_snaps/summary.pop_data.md @@ -0,0 +1,54 @@ +# `summary.pop_data()` produces stable results when `strata = NULL` + + Code + xs_data %>% summary(strata = NULL) + Output + + n = 3336 + + Distribution of age: + + # A tibble: 1 x 7 + n min first_quartile median mean third_quartile max + + 1 3336 0.6 5 10 10.5 15 25 + + Distributions of antigen-isotype measurements: + + # A tibble: 2 x 7 + antigen_iso Min `1st Qu.` Median `3rd Qu.` Max `# NAs` + + 1 HlyE_IgA 0 0.851 1.74 3.66 133. 0 + 2 HlyE_IgG 0 1.15 2.70 6.74 219. 0 + + +# `summary.pop_data()` produces stable results with stratification + + Code + xs_data %>% summary(strata = "Country") + Output + + n = 3336 + + Distribution of age: + + # A tibble: 3 x 8 + Country n min first_quartile median mean third_quartile max + + 1 Bangladesh 802 0.6 4.9 9.2 9.43 14 18 + 2 Nepal 1546 0.9 5.3 10.8 11.3 16.7 25 + 3 Pakistan 988 0.8 4.9 9 10.1 14.8 24.3 + + Distributions of antigen-isotype measurements: + + # A tibble: 6 x 8 + antigen_iso Country Min `1st Qu.` Median `3rd Qu.` Max `# NAs` + + 1 HlyE_IgA Bangladesh 0.0418 2.11 3.58 6.70 113. 0 + 2 HlyE_IgG Bangladesh 0.119 4.97 9.32 18.9 219. 0 + 3 HlyE_IgA Nepal 0 0.563 1.02 2.05 57.5 0 + 4 HlyE_IgG Nepal 0 0.897 1.62 3.37 184. 0 + 5 HlyE_IgA Pakistan 0 1.13 2.12 3.89 133. 0 + 6 HlyE_IgG Pakistan 0.192 1.04 2.40 5.15 135. 0 + + diff --git a/tests/testthat/test-est.incidence-status.R b/tests/testthat/test-est.incidence-status.R deleted file mode 100644 index 9b513fcc..00000000 --- a/tests/testthat/test-est.incidence-status.R +++ /dev/null @@ -1,35 +0,0 @@ -test_that("`est.incidence()` produces expected results", { - curves <- load_curve_params("https://osf.io/download/rtw5k/") - noise <- load_noise_params("https://osf.io/download//hqy4v/") - xs_data_true <- load_pop_data( - file_path = "https://osf.io/download//n6cp3/", - age = "Age", - value = "result", - id = "index_id", - standardize = TRUE - ) - - est_true <- est.incidence( - pop_data = xs_data_true %>% filter(Country == "Pakistan"), - curve_params = curves, - noise_params = noise %>% filter(Country == "Pakistan"), - antigen_isos = c("HlyE_IgG", "HlyE_IgA") - ) - - xs_data_false <- load_pop_data( - file_path = "https://osf.io/download//n6cp3/", - age = "Age", - value = "result", - id = "index_id", - standardize = FALSE - ) - - est_false <- est.incidence( - pop_data = xs_data_false %>% filter(Country == "Pakistan"), - curve_params = curves, - noise_params = noise %>% filter(Country == "Pakistan"), - antigen_isos = c("HlyE_IgG", "HlyE_IgA") - ) - - expect_equal(est_true, est_false) -}) diff --git a/tests/testthat/test-est.incidence.R b/tests/testthat/test-est.incidence.R index 40eb35a9..ddd01868 100644 --- a/tests/testthat/test-est.incidence.R +++ b/tests/testthat/test-est.incidence.R @@ -1,66 +1,78 @@ test_that( "est.incidence() produces expected results for typhoid data", { - skip(message = "Skipping test of `est.incidence()` for now, because github was producing miniscule differences in SE (and thus CIs) for some reason that I don't have time to hunt down.") + # get pop data + xs_data <- load_pop_data( + file_path = "https://osf.io/download//n6cp3/", + age = "Age", + value = "result", + id = "index_id", + standardize = TRUE + ) %>% + filter(Country == "Pakistan") - library(readr) - library(dplyr) - c.hlye.IgG <- - fs::path_package( - "extdata", - "dmcmc_hlyeigg_09.30.rds", - package = "serocalculator" - ) %>% # Load longitudinal parameters dataset - readRDS() %>% - select(y1, alpha, r, antigen_iso) + # get noise data + noise <- load_noise_params("https://osf.io/download//hqy4v/") %>% + filter(Country == "Pakistan") - p.hlye.IgG <- - fs::path_package( - package = "serocalculator", - "extdata/simpophlyeigg.2.csv" - ) %>% # Load simulated cross-sectional dataset - read_csv( - col_types = cols( - a.smpl = col_double(), - y.smpl = col_double(), - i = col_double(), - t = col_double() - ) + # get curve data + curve <- load_curve_params("https://osf.io/download/rtw5k/") + + # set start + start <- .05 + + typhoid_results <- est.incidence( + pop_data = xs_data, + curve_param = curve, + noise_param = noise, + antigen_isos = c("HlyE_IgG", "HlyE_IgA") + ) %>% + summary.seroincidence( + coverage = .95, + start = start ) %>% - rename( # rename variables - y = y.smpl, - a = a.smpl + mutate( + ageCat = NULL, + antigen.iso = paste(collapse = "+", "HlyE_IgG") ) %>% - select(y, a) %>% - mutate(antigen_iso = "HlyE_IgG") + structure(noise.parameters = noise) - cond.hlye.IgG <- data.frame( - nu = 1.027239, # B noise - eps = 0.2, # M noise - y.low = 0.0, # low cutoff - y.high = 5e4, - antigen_iso = "HlyE_IgG" - ) + expect_snapshot(x = typhoid_results) + } +) - start <- .05 +test_that("`est.incidence()` produces expected results", { + curves <- load_curve_params("https://osf.io/download/rtw5k/") + noise <- load_noise_params("https://osf.io/download//hqy4v/") + xs_data_true <- load_pop_data( + file_path = "https://osf.io/download//n6cp3/", + age = "Age", + value = "result", + id = "index_id", + standardize = TRUE + ) - fit <- est.incidence( - dpop = p.hlye.IgG, - dmcmc = c.hlye.IgG, - c.age = NULL, - antigen_isos = "HlyE_IgG", - noise_params = cond.hlye.IgG, - start = start, - print.level = 2, - iterlim = 100, - stepmax = 1 - ) + est_true <- est.incidence( + pop_data = xs_data_true %>% filter(Country == "Pakistan"), + curve_params = curves, + noise_params = noise %>% filter(Country == "Pakistan"), + antigen_isos = c("HlyE_IgG", "HlyE_IgA") + ) - # compare with `typhoid_results` from data-raw/typhoid_results.qmd + xs_data_false <- load_pop_data( + file_path = "https://osf.io/download//n6cp3/", + age = "Age", + value = "result", + id = "index_id", + standardize = FALSE + ) - expect_equal( - object = fit, - expected = typhoid_results - ) - } -) + est_false <- est.incidence( + pop_data = xs_data_false %>% filter(Country == "Pakistan"), + curve_params = curves, + noise_params = noise %>% filter(Country == "Pakistan"), + antigen_isos = c("HlyE_IgG", "HlyE_IgA") + ) + + expect_equal(est_true, est_false) +}) diff --git a/tests/testthat/test-summary.pop_data.R b/tests/testthat/test-summary.pop_data.R new file mode 100644 index 00000000..298f712e --- /dev/null +++ b/tests/testthat/test-summary.pop_data.R @@ -0,0 +1,23 @@ +xs_data <- load_pop_data( + file_path = "https://osf.io/download//n6cp3/", + age = "Age", + value = "result", + id = "index_id", + standardize = TRUE +) + +test_that("`summary.pop_data()` produces an error when wrong stratification is provided", { + expect_error( + object = xs_data %>% summary(strata = "province"), + regexp = "Element `province` doesn't exist.", + fixed = TRUE + ) +}) + +test_that("`summary.pop_data()` produces stable results when `strata = NULL`", { + expect_snapshot(xs_data %>% summary(strata = NULL)) +}) + +test_that("`summary.pop_data()` produces stable results with stratification", { + expect_snapshot(xs_data %>% summary(strata = "Country")) +}) diff --git a/vignettes/articles/scrubTyphus_example.Rmd b/vignettes/articles/scrubTyphus_example.Rmd index 07763627..b1cc5045 100644 --- a/vignettes/articles/scrubTyphus_example.Rmd +++ b/vignettes/articles/scrubTyphus_example.Rmd @@ -113,7 +113,7 @@ xs_data %>% check_pop_data(verbose = TRUE) We can compute numerical summaries of our cross-sectional antibody data with a `summary()` method for `pop_data` objects: ```{r} -xs_data %>% summary() +xs_data %>% summary(strata = "country") ```