Skip to content

Commit

Permalink
Merge pull request #136 from UCD-SERG/add-group_by-option-to-summary
Browse files Browse the repository at this point in the history
Add group by option to summary.pop_data()
  • Loading branch information
d-morrison authored Jul 15, 2024
2 parents bde2b98 + 6388182 commit 662c265
Show file tree
Hide file tree
Showing 15 changed files with 296 additions and 232 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: serocalculator
Type: Package
Title: Estimating Infection Rates from Serological Data
Version: 1.1.0.9000
Version: 1.2.0
Authors@R: c(
person(given = "Peter", family = "Teunis", email = "[email protected]", role = c("aut", "cph"), comment = "Author of the method and original code."),
person(given = "Kristina", family = "Lai", email = "[email protected]", role = c("aut", "cre")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ S3method(get_value,pop_data)
S3method(get_value_var,pop_data)
S3method(print,seroincidence)
S3method(print,seroincidence.by)
S3method(print,summary.pop_data)
S3method(print,summary.seroincidence.by)
S3method(set_age,pop_data)
S3method(set_id,pop_data)
Expand Down
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# serocalculator (development version)
# serocalculator 1.2.0
* Added `test-summary.pop_data` test

* Modified `test-est.incidence` test

* Added stratification to `summary.pop_data`

* Added `verbose` option for `check_pop_data()`, changing default behavior
to avoid printing an OK message.
Expand Down
1 change: 0 additions & 1 deletion data-raw/nlm_exit_codes.R → R/nlm_exit_codes.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,3 @@ nlm_exit_codes <- c(
"5" = "5: maximum step size `stepmax` exceeded five consecutive times. Either the function is unbounded below, becomes asymptotic to a finite value from above in some direction or `stepmax` is too small."
)

usethis::use_data(nlm_exit_codes, overwrite = TRUE, internal = TRUE)
109 changes: 75 additions & 34 deletions R/summary.pop_data.R
Original file line number Diff line number Diff line change
@@ -1,55 +1,96 @@
#'
#' @title Summarize a cross-sectional antibody survey data set
#' @title Summarize cross-sectional antibody survey data
#' @description
#' This function is a `summary()` method for `pop_data` objects
#' [summary()] method for `pop_data` objects
#'
#' @param object a `pop_data` object
#' @param object a `pop_data` object (from [as_pop_data()])
#' @param strata a [character()] specifying grouping column(s)
#' @param ... unused
#'
#' @returns a list containing two summary tables: one of `age` and one of `value`, stratified by `antigen_iso`
#' @returns a `summary.pop_data` object, which is a list containing two summary tables:
#'
#' * `age_summary` summarizing `age`
#' * `ab_summary` summarizing `value`, stratified by `antigen_iso`
#'
#' @export
#' @examples
#' library(dplyr)
#'
#' xs_data <- load_pop_data("https://osf.io/download//n6cp3/")
#' summary(xs_data, strata = "Country")
#'
#' summary(xs_data)
#'
summary.pop_data <- function(object, ...) {
summary.pop_data <- function(object, strata = NULL, ...) {
# get relevant columns from object
age_column <- object %>% get_age_var()
value_column <- object %>% get_value_var()
id_column <- object %>% get_id_var()

# create a list of the columns
cols <- c(age_column, id_column, strata)

ages <-
object %>%
distinct(.data$id, .data$age)

cat("\nn =", nrow(ages), "\n")
distinct(across(all_of(cols)))

cat("\nDistribution of age: \n\n")
age_summary <-
ages %>%
pull("age") %>%
summary() %>%
print()

cat("\nDistributions of antigen-isotype measurements:\n\n")
summarise(
.by = all_of(strata),
n = n(),
min = min(.data[[age_column]]),
first_quartile = quantile(.data[[age_column]], 0.25),
median = median(.data[[age_column]]),
mean = mean(.data[[age_column]]),
third_quartile = quantile(.data[[age_column]], 0.75),
max = max(.data[[age_column]])
)

ab_summary <-
object %>%
dplyr::summarize(
.by = .data$antigen_iso,
Min = object %>% get_value() %>% min(na.rm = TRUE),
`1st Qu.` = object %>% get_value() %>% quantile(.25, na.rm = TRUE),
Median = object %>% get_value() %>% median(),
`3rd Qu.` = object %>% get_value() %>% quantile(.75, na.rm = TRUE),
Max = object %>% get_value() %>% max(na.rm = TRUE),
`# NAs` = object %>% get_value() %>% is.na() %>% sum()
) %>%
as.data.frame() %>%
print()

to_return <- list(
n = nrow(ages),
age_summary = age_summary,
ab_summary = ab_summary
)

return(invisible(to_return))
.by = all_of(c("antigen_iso", strata)),
across(
.cols = all_of(value_column),
.fns = list(
Min = ~ min(.x, na.rm = TRUE),
`1st Qu.` = ~ quantile(.x, p = .25, na.rm = TRUE),
Median = ~ median(.x, na.rm = TRUE),
`3rd Qu.` = ~ quantile(.x, p = .75, na.rm = TRUE),
Max = ~ max(.x, na.rm = TRUE),
`# NAs` = ~ is.na(.x) %>% sum()
),
.names = "{.fn}"
))

to_return <- list(n = nrow(ages),
age_summary = age_summary,
ab_summary = ab_summary)

class(to_return) = "summary.pop_data"

return(to_return)
}


#' Print method for [summary.pop_data] objects
#' @param x an object of class `"summary.pop_data"`; usually, the result of a call to [summary.pop_data()]
#' @rdname summary.pop_data
#' @export
print.summary.pop_data = function(x, ...)
{
n_obs = x$age_summary %>% pull("n") %>% sum()

cat("\nn =", n_obs, "\n")

cat("\nDistribution of age: \n\n")

x$age_summary %>% print()

cat("\nDistributions of antigen-isotype measurements:\n\n")

x$ab_summary %>% print()

cat("\n")

invisible(x)
}
Binary file removed R/sysdata.rda
Binary file not shown.
40 changes: 40 additions & 0 deletions data-raw/typhoid_results.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Filter population data for Pakistan
xs_data <- load_pop_data(
file_path = "https://osf.io/download//n6cp3/",
age = "Age",
value = "result",
id = "index_id",
standardize = TRUE
) %>%
filter(Country == "Pakistan")

# get noise data
noise <- load_noise_params("https://osf.io/download//hqy4v/") %>%
filter(Country == "Pakistan")

# get curve data
curve <- load_curve_params("https://osf.io/download/rtw5k/")

# Initial estimates for lambda
start <- .05

# Estimate incidence
fit <- est.incidence(
pop_data = xs_data,
curve_param = curve,
noise_param = noise,
antigen_isos = c("HlyE_IgG", "HlyE_IgA")
)

typhoid_results <- fit %>%
summary.seroincidence(
coverage = .95,
start = start
) %>%
mutate(
ageCat = NULL,
antigen.iso = paste(collapse = "+", "HlyE_IgG")
) %>%
structure(noise.parameters = noise)

saveRDS(object = typhoid_results,file = "tests/testthat/fixtures/typhoid_results.rds")
98 changes: 0 additions & 98 deletions data-raw/typhoid_results.qmd

This file was deleted.

24 changes: 17 additions & 7 deletions man/summary.pop_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions tests/testthat/_snaps/est.incidence.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# est.incidence() produces expected results for typhoid data

Code
typhoid_results
Output
# A tibble: 1 x 11
est.start incidence.rate SE CI.lwr CI.upr coverage log.lik iterations
* <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 0.1 0.128 0.00682 0.115 0.142 0.95 -2376. 4
# i 3 more variables: antigen.isos <chr>, nlm.convergence.code <ord>,
# antigen.iso <chr>

Loading

0 comments on commit 662c265

Please sign in to comment.