diff --git a/DESCRIPTION b/DESCRIPTION index d28b74f..748cb70 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,7 +37,6 @@ Imports: dplyr (>= 1.1.0), glue (>= 1.5.0), omopgenerics (>= 0.1.2), - lifecycle, lubridate (>= 1.0.0), magrittr (>= 2.0.0), PatientProfiles (>= 1.1.0), diff --git a/NAMESPACE b/NAMESPACE index ad48845..48462d0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,8 @@ export(plotIncidence) export(plotPrevalence) export(settings) export(suppress) +export(tableIncidence) +export(tablePrevalence) importFrom(magrittr,"%>%") importFrom(omopgenerics,attrition) importFrom(omopgenerics,bind) diff --git a/R/estimateIncidence.R b/R/estimateIncidence.R index f0a912a..309e773 100644 --- a/R/estimateIncidence.R +++ b/R/estimateIncidence.R @@ -83,7 +83,7 @@ estimateIncidence <- function(cdm, strata = list(), includeOverallStrata = TRUE) { - summarisedResult <- FALSE + summarisedResult <- TRUE startCollect <- Sys.time() @@ -401,6 +401,9 @@ estimateIncidence <- function(cdm, irs <- irs |> visOmopResults::uniteStrata() } + if(nrow(irs) == 0){ + irs <- omopgenerics::emptySummarisedResult() + } else { irs <- irs |> dplyr::distinct() |> dplyr::mutate("analysis_id" = as.integer(.data$analysis_id)) |> @@ -439,6 +442,7 @@ estimateIncidence <- function(cdm, "strata_name" = dplyr::if_else(.data$strata_name == "Overall", "overall", gsub(" and ", " &&& ", .data$strata_name)), "strata_level" = dplyr::if_else(.data$strata_level == "Overall", "overall", gsub(" and ", " &&& ", .data$strata_level)) ) + } irs <- omopgenerics::newSummarisedResult(irs, settings = analysisSettings) |> omopgenerics::suppress(minCellCount = minCellCount) diff --git a/R/estimatePrevalence.R b/R/estimatePrevalence.R index b79030f..7aeb993 100644 --- a/R/estimatePrevalence.R +++ b/R/estimatePrevalence.R @@ -193,7 +193,7 @@ estimatePrevalence <- function(cdm, includeOverallStrata = TRUE, minCellCount = 5) { - summarisedResult <- FALSE + summarisedResult <- TRUE startCollect <- Sys.time() @@ -449,6 +449,10 @@ estimatePrevalence <- function(cdm, prs <- prs |> visOmopResults::uniteStrata() } + + if(nrow(prs) == 0){ + prs <- omopgenerics::emptySummarisedResult() + } else { prs <- prs |> dplyr::distinct() |> dplyr::mutate("analysis_id" = as.integer(.data$analysis_id)) |> @@ -486,6 +490,7 @@ estimatePrevalence <- function(cdm, "strata_name" = dplyr::if_else(.data$strata_name == "Overall", "overall", gsub(" and ", " &&& ", .data$strata_name)), "strata_level" = dplyr::if_else(.data$strata_level == "Overall", "overall", gsub(" and ", " &&& ", .data$strata_level)) ) + } prs <- omopgenerics::newSummarisedResult(prs, settings = analysisSettings) |> omopgenerics::suppress(minCellCount = minCellCount) diff --git a/R/tables.R b/R/tables.R index 726cf11..806bc72 100644 --- a/R/tables.R +++ b/R/tables.R @@ -14,75 +14,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # -#' Format a point_prevalence object into a visual table. + + +#' Table of prevalence results #' -#' `r lifecycle::badge("experimental")` +#' @param result Prevalence results +#' @param type Type of table. Can be "gt", "flextable", or "tibble". +#' @param .options Table options to apply #' -#' @param result A summarised_result object with results from -#' estimatePointPrevalence() or estimatePeriodPrevalence(). -#' @param prevalenceType Type of prevalence estimates: "point" or "period". -#' @param formatEstimateName Named list of estimate name's to join, sorted by -#' computation order. Indicate estimate_name's between <...>. -#' @param header A vector containing which elements should go into the header -#' in order. Allowed are: `cdm_name`, `group`, `strata`, `additional`, -#' `variable`, `estimate`, `settings`. -#' @param splitStrata If TRUE strata columns will be splitted. -#' @param cdmName If TRUE database names will be displayed. -#' @param outcomeName If TRUE outcome cohort names will be displayed. -#' @param outcomeSettings If TRUE settings related to the outcome cohorts will -#' be displayed. -#' @param denominatorName If TRUE denominator cohort names will be displayed. -#' @param denominatorSettings If TRUE settings related to the denominator cohorts -#' will be displayed. -#' @param analysisSettings If TRUE database names will be displayed. -#' @param groupColumn Column to use as group labels. -#' @param type Type of desired formatted table, possibilities: "gt", -#' "flextable", "tibble". -#' @param .options Named list with additional formatting options. -#' IncidencePrevalence::optionsTablePrevalence() shows allowed -#' arguments and their default values. +#' @return Table of prevalence results +#' @export #' #' @examples -#' \donttest{ -#' library(IncidencePrevalence) -#' -#' cdm <- mockIncidencePrevalenceRef() -#' -#' cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator") -#' -#' prev <- estimatePointPrevalence( -#' cdm = cdm, -#' denominatorTable = "denominator", -#' outcomeTable = "outcome", -#' summarisedResult = TRUE -#' ) -#' -#' tablePrevalence(prev, prevalenceType = "point") -#' -#' CDMConnector::cdmDisconnect(cdm = cdm) -#' } -#' -#' @return A table with a formatted version of a prevalence result. -#' -#' @noRd -#' tablePrevalence <- function( result, - prevalenceType, - formatEstimateName = c( - "Denominator (N)" = "", - "Outcome (N)" = "", - "Prevalence [95% CI]" = " ( - )" - ), - header = c("variable", "estimate"), - splitStrata = TRUE, - cdmName = TRUE, - outcomeName = TRUE, - outcomeSettings = FALSE, - denominatorName = TRUE, - denominatorSettings = TRUE, - analysisSettings = FALSE, - groupColumn = NULL, type = "gt", .options = list() ) { @@ -92,6 +37,21 @@ tablePrevalence <- function( cli::cli_abort(c("x" = "Table functionality only works with results in a summarised_result format.", "i" = "These can be obtained with the argument `summarisedResult` in estimatePeriodPrevalence() and estimatePointPrevalence().")) } + formatEstimateName <- c( + "Denominator (N)" = "", + "Outcome (N)" = "", + "Prevalence [95% CI]" = " ( - )" + ) + header <- c("variable", "estimate") + splitStrata <- TRUE + cdmName <- TRUE + outcomeName <- TRUE + outcomeSettings <- FALSE + denominatorName <- TRUE + denominatorSettings <- TRUE + analysisSettings <- FALSE + groupColumn <- NULL + tableInternal( result = result, formatEstimateName = formatEstimateName, @@ -105,81 +65,24 @@ tablePrevalence <- function( analysisSettings = analysisSettings, groupColumn = groupColumn, type = type, - resultType = paste0(prevalenceType, "_prevalence"), + resultType = c("point_prevalence", "period_prevalence"), .options = .options ) } -#' Format a point_prevalence object into a visual table. -#' -#' `r lifecycle::badge("experimental")` -#' -#' @param result A summarised_result object with results from -#' estimateIncidence(). -#' @param formatEstimateName Named list of estimate name's to join, sorted by -#' computation order. Indicate estimate_name's between <...>. -#' @param header A vector containing which elements should go into the header -#' in order. Allowed are: `cdm_name`, `group`, `strata`, `additional`, -#' `variable`, `estimate`, `settings`. -#' @param splitStrata If TRUE strata columns will be splitted. -#' @param cdmName If TRUE database names will be displayed. -#' @param outcomeName If TRUE outcome cohort names will be displayed. -#' @param outcomeSettings If TRUE settings related to the outcome cohorts will -#' be displayed. -#' @param denominatorName If TRUE denominator cohort names will be displayed. -#' @param denominatorSettings If TRUE settings related to the denominator cohorts -#' will be displayed. -#' @param analysisSettings If TRUE database names will be displayed. -#' @param groupColumn Column to use as group labels. -#' @param type Type of desired formatted table, possibilities: "gt", -#' "flextable", "tibble". -#' @param .options Named list with additional formatting options. -#' IncidencePrevalence::optionsTableIncidence() shows allowed -#' arguments and their default values. -#' -#' @examples -#' \donttest{ -#' library(IncidencePrevalence) -#' -#' cdm <- mockIncidencePrevalenceRef() -#' -#' cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator") -#' -#' inc <- estimateIncidence( -#' cdm = cdm, -#' denominatorTable = "denominator", -#' outcomeTable = "outcome", -#' summarisedResult = TRUE -#' ) -#' -#' tableIncidence(inc) + +#' Table of incidence results #' -#' CDMConnector::cdmDisconnect(cdm = cdm) -#' } +#' @param result Incidence results +#' @param type Type of table. Can be "gt", "flextable", or "tibble". +#' @param .options Table options to apply #' -#' @return A table with a formatted version of incidence results. +#' @return Table of results +#' @export #' -#' @noRd tableIncidence <- function( result, - formatEstimateName = c( - "Denominator (N)" = "", - "Person-years" = "", - "Outcome (N)" = "", - "Incidence 100 person-years [95% CI]" = - " ( - - )" - ), - header = c("variable", "estimate"), - splitStrata = TRUE, - cdmName = TRUE, - outcomeName = TRUE, - outcomeSettings = FALSE, - denominatorName = TRUE, - denominatorSettings = TRUE, - analysisSettings = FALSE, - groupColumn = NULL, type = "gt", .options = list() ) { @@ -189,9 +92,26 @@ tableIncidence <- function( cli::cli_abort(c("x" = "Table functionality only works with results in a summarised_result format.", "i" = "These can be obtained with the argument `summarisedResult` in estimateIncidence().")) } + header <- c("variable", "estimate") + splitStrata <- TRUE + cdmName <- TRUE + outcomeName <- TRUE + outcomeSettings <- FALSE + denominatorName <- TRUE + denominatorSettings <- TRUE + analysisSettings <- FALSE + groupColumn <- NULL + tableInternal( result = result, - formatEstimateName = formatEstimateName, + formatEstimateName = c( + "Denominator (N)" = "", + "Person-years" = "", + "Outcome (N)" = "", + "Incidence 100 person-years [95% CI]" = + " ( - + )" + ), header = header, splitStrata = splitStrata, cdmName = cdmName, @@ -230,7 +150,7 @@ tableInternal <- function( .options = list() ) { result <- omopgenerics::newSummarisedResult(result) |> - visOmopResults::filterSettings(.data$result_type == resultType) + visOmopResults::filterSettings(.data$result_type %in% resultType) if (nrow(result) == 0) { cli::cli_abort("No results of the type {resultType} were found in the summarised result provided.") } @@ -332,7 +252,7 @@ defaultTableIncidencePrevalence <- function(.options, type) { defaults <- visOmopResults::optionsVisOmopTable() - if (type == "incidence") { + if ("incidence" %in% type) { defaults$keepNotFormatted = FALSE } diff --git a/man/tableIncidence.Rd b/man/tableIncidence.Rd new file mode 100644 index 0000000..ed27a90 --- /dev/null +++ b/man/tableIncidence.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tables.R +\name{tableIncidence} +\alias{tableIncidence} +\title{Table of incidence results} +\usage{ +tableIncidence(result, type = "gt", .options = list()) +} +\arguments{ +\item{result}{Incidence results} + +\item{type}{Type of table. Can be "gt", "flextable", or "tibble".} + +\item{.options}{Table options to apply} +} +\value{ +Table of results +} +\description{ +Table of incidence results +} diff --git a/man/tablePrevalence.Rd b/man/tablePrevalence.Rd new file mode 100644 index 0000000..56afa32 --- /dev/null +++ b/man/tablePrevalence.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tables.R +\name{tablePrevalence} +\alias{tablePrevalence} +\title{Table of prevalence results} +\usage{ +tablePrevalence(result, type = "gt", .options = list()) +} +\arguments{ +\item{result}{Prevalence results} + +\item{type}{Type of table. Can be "gt", "flextable", or "tibble".} + +\item{.options}{Table options to apply} +} +\value{ +Table of prevalence results +} +\description{ +Table of prevalence results +} diff --git a/tests/testthat/test-estimateIncidence.R b/tests/testthat/test-estimateIncidence.R index 425c616..37c84d9 100644 --- a/tests/testthat/test-estimateIncidence.R +++ b/tests/testthat/test-estimateIncidence.R @@ -10,77 +10,18 @@ test_that("mock db: check output format", { interval = "months" ) - # check estimates tibble - expect_true(all(c( - "analysis_id", - "n_persons", - "person_days", - "person_years", - "n_events", - "incidence_100000_pys", - "incidence_100000_pys_95CI_lower", - "incidence_100000_pys_95CI_upper", - "incidence_start_date", - "incidence_end_date", - "cohort_obscured", - "result_obscured", - "analysis_outcome_washout", - "analysis_repeated_events", - "analysis_interval", - "analysis_complete_database_intervals", - "analysis_min_cell_count", - "outcome_cohort_id", - "outcome_cohort_name", - "denominator_cohort_id", - "denominator_age_group", - "denominator_sex", - "denominator_days_prior_observation", - "denominator_start_date", - "denominator_end_date", - "denominator_target_cohort_definition_id", - "denominator_target_cohort_name", - "cdm_name" - ) %in% - names(inc))) - - expect_true(all(c( - "analysis_id", "number_records", "number_subjects", - "reason_id", "reason", - "excluded_records", "excluded_subjects", - "analysis_outcome_washout", - "analysis_repeated_events", - "analysis_interval", - "analysis_complete_database_intervals", - "analysis_min_cell_count", - "outcome_cohort_id", - "outcome_cohort_name", - "denominator_cohort_id", - "denominator_age_group", - "denominator_sex", - "denominator_days_prior_observation", - "denominator_start_date", - "denominator_end_date", - "denominator_target_cohort_definition_id", - "denominator_target_cohort_name", - "cdm_name" - ) %in% - names(attrition(inc)))) - my_settings <- settings(inc) expect_true(nrow(my_settings) > 0) - - - # inc <- estimateIncidence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # interval = "months", - # summarisedResult = TRUE - # ) - # expect_true("summarised_result" %in% class(inc)) - # expect_equal(colnames(inc), - # omopgenerics::resultColumns()) + inc <- estimateIncidence( + cdm = cdm, + denominatorTable = "denominator", + outcomeTable = "outcome", + interval = "months" + ) + expect_true("summarised_result" %in% class(inc)) + expect_equal(colnames(inc), + omopgenerics::resultColumns()) CDMConnector::cdm_disconnect(cdm) }) @@ -194,7 +135,10 @@ test_that("mock db: check working example 2", { minCellCount = 0, completeDatabaseIntervals = FALSE ) - expect_true(sum(inc$n_events) == 1) + expect_true(sum(as.numeric(inc |> + dplyr::filter( + estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == 1) inc <- estimateIncidence(cdm, denominatorTable = "denominator", @@ -204,7 +148,10 @@ test_that("mock db: check working example 2", { minCellCount = 0, completeDatabaseIntervals = FALSE ) - expect_true(sum(inc$n_events) == 3) + expect_true(sum(as.numeric(inc |> + dplyr::filter( + estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == 3) inc <- estimateIncidence(cdm, denominatorTable = "denominator", @@ -214,7 +161,10 @@ test_that("mock db: check working example 2", { minCellCount = 0, completeDatabaseIntervals = FALSE ) - expect_true(sum(inc$n_events) == 2) + expect_true(sum(as.numeric(inc |> + dplyr::filter( + estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == 2) # even if repeatedEvents = TRUE, # if outcomeWashout=NULL (all of history) @@ -227,7 +177,10 @@ test_that("mock db: check working example 2", { minCellCount = 0, completeDatabaseIntervals = FALSE ) - expect_true(sum(inc$n_events) == 1) + expect_true(sum(as.numeric(inc |> + dplyr::filter( + estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == 1) inc <- estimateIncidence(cdm, denominatorTable = "denominator", @@ -238,7 +191,10 @@ test_that("mock db: check working example 2", { interval = "weeks", completeDatabaseIntervals = FALSE ) - expect_true(sum(inc$n_events) == 1) + expect_true(sum(as.numeric(inc |> + dplyr::filter( + estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == 1) CDMConnector::cdm_disconnect(cdm) }) @@ -294,8 +250,8 @@ test_that("mock db: check study periods", { # we expect 12 months of which the last in december # the last month should also be included # as the person goes up to the last day of the month - expect_true(nrow(inc) == 12) - + expect_true(nrow(inc |> + dplyr::filter(estimate_name == "outcome_count")) == 12) inc <- estimateIncidence(cdm, denominatorTable = "denominator", @@ -309,7 +265,8 @@ test_that("mock db: check study periods", { # now with completeDatabaseIntervals is TRUE # we expect 10 months of which the last in november - expect_true(nrow(inc) == 10) + expect_true(nrow(inc |> + dplyr::filter(estimate_name == "outcome_count")) == 10) CDMConnector::cdm_disconnect(cdm) }) @@ -375,12 +332,18 @@ test_that("mock db: check overall", { # one person had the event before the study period # (but washout was 0 so was included) # one person had the event during the study period - expect_true(nrow(inc) == 1) - expect_true(inc$n_persons == 2) - expect_true(inc$incidence_start_date == - as.Date("2007-01-01")) - expect_true(inc$incidence_end_date == - as.Date("2010-02-05")) # date of first event + expect_true(nrow(inc |> + dplyr::filter(estimate_name == "outcome_count")) == 1) + expect_true(inc |> + dplyr::filter(estimate_name == "denominator_count") |> + dplyr::pull("estimate_value") == "2") + + expect_true(all(inc |> + visOmopResults::splitAdditional() |> + dplyr::pull("incidence_start_date") == as.Date("2007-01-01"))) + expect_true(all(inc |> + visOmopResults::splitAdditional() |> + dplyr::pull("incidence_end_date") == as.Date("2010-02-05"))) # date of first event inc <- estimateIncidence(cdm, @@ -392,11 +355,14 @@ test_that("mock db: check overall", { minCellCount = 0, completeDatabaseIntervals = FALSE ) - expect_true(nrow(inc) == 1) - expect_true(inc$incidence_start_date == - as.Date("2007-01-01")) - expect_true(inc$incidence_end_date == - as.Date("2011-06-15")) # date of end of obs + expect_true(nrow(inc |> + dplyr::filter(estimate_name == "outcome_count")) == 1) + expect_true(all(inc |> + visOmopResults::splitAdditional() |> + dplyr::pull("incidence_start_date") == as.Date("2007-01-01"))) + expect_true(all(inc |> + visOmopResults::splitAdditional() |> + dplyr::pull("incidence_end_date") == as.Date("2011-06-15"))) # date of end of obs CDMConnector::cdm_disconnect(cdm) }) @@ -452,27 +418,36 @@ test_that("mock db: check person days", { ) # in 2019 we expect person 2 to contribute from 1st july to end of December - expect_true(inc$person_days[1] == - as.numeric(difftime( - as.Date("2019-12-31"), - as.Date("2019-07-01") - )) + 1) + expect_true(inc |> + dplyr::filter(estimate_name == "person_days") |> + head(1) |> + dplyr::pull("estimate_value") == + as.numeric(difftime( + as.Date("2019-12-31"), + as.Date("2019-07-01") + )) + 1) # in 2020 we expect person 2 to contribute all year # and person 1 from 1st January to end of December - expect_true(inc$person_days[2] == - (as.numeric(difftime( - as.Date("2020-12-31"), - as.Date("2020-07-01") - )) + 1) + - (as.numeric(difftime( - as.Date("2020-12-31"), - as.Date("2020-01-01") - ) + 1))) + expect_true(inc |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value") == + (as.numeric(difftime( + as.Date("2020-12-31"), + as.Date("2020-07-01") + )) + 1) + + (as.numeric(difftime( + as.Date("2020-12-31"), + as.Date("2020-01-01") + ) + 1))) # in 2021 we expect person 2 to contribute all year # and person 1 from 1st January up to 27th june (date of their outcome) - expect_true(inc$person_days[3] == + expect_true(inc |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::filter(dplyr::row_number() == 3) |> + dplyr::pull("estimate_value") == (as.numeric(difftime( as.Date("2021-12-31"), as.Date("2021-01-01") @@ -482,9 +457,13 @@ test_that("mock db: check person days", { as.Date("2021-01-01") ) + 1))) + # in 2022 we expect person 2 to contribute all year # (person 1 is out- they have had an event) - expect_true(inc$person_days[4] == + expect_true(inc |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::filter(dplyr::row_number() == 4) |> + dplyr::pull("estimate_value") == (as.numeric(difftime( as.Date("2021-10-05"), as.Date("2021-01-01") @@ -548,9 +527,15 @@ test_that("mock db: check periods follow calendar dates", { minCellCount = 0, completeDatabaseIntervals = FALSE ) - expect_true(inc$n_events[1] == 1) - expect_true(inc$n_events[2] == 3) + expect_true(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("estimate_value") == "1") + expect_true(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value") == "3") # startDate during a month (with month as interval) cdm <- generateDenominatorCohortSet( @@ -568,9 +553,18 @@ test_that("mock db: check periods follow calendar dates", { minCellCount = 0, completeDatabaseIntervals = FALSE ) - expect_true(inc$n_events[1] == 1) - expect_true(inc$n_events[2] == 1) - expect_true(inc$n_events[3] == 1) + expect_true(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("estimate_value") == "1") + expect_true(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value") == "1") + expect_true(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::filter(dplyr::row_number() == 3) |> + dplyr::pull("estimate_value") == "1") CDMConnector::cdm_disconnect(cdm) }) @@ -627,7 +621,9 @@ test_that("mock db: check washout windows", { minCellCount = 0 ) # expect all events if we have zero days washout - expect_true(sum(incW0$n_events) == 4) + expect_true(sum(as.numeric(incW0 |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == 4) incW1 <- estimateIncidence(cdm, denominatorTable = "denominator", @@ -638,7 +634,9 @@ test_that("mock db: check washout windows", { minCellCount = 0 ) # expect three events if we have one days washout - expect_true(sum(incW1$n_events) == 3) + expect_true(sum(as.numeric(incW1 |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == 3) incW2 <- estimateIncidence(cdm, denominatorTable = "denominator", @@ -649,7 +647,9 @@ test_that("mock db: check washout windows", { minCellCount = 0 ) # expect two events if we have two days washout - expect_true(sum(incW2$n_events) == 2) + expect_true(sum(as.numeric(incW2 |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == 2) incW365 <- estimateIncidence(cdm, denominatorTable = "denominator", @@ -660,9 +660,11 @@ test_that("mock db: check washout windows", { minCellCount = 0 ) # expect one event if we have 365 days washout - expect_true(sum(incW365$n_events) == 1) + expect_true(sum(as.numeric(incW365 |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == 1) - incNull <- estimateIncidence(cdm, + incInf <- estimateIncidence(cdm, denominatorTable = "denominator", outcomeTable = "outcome", repeatedEvents = TRUE, @@ -671,12 +673,18 @@ test_that("mock db: check washout windows", { minCellCount = 0 ) # expect one event if we have NULL (all history washout) - expect_true(sum(incNull$n_events) == 1) + expect_true(sum(as.numeric(incInf |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == 1) # but, we will have move days when using the 365 day washout # as the person came back to contribute more time at risk - expect_true(sum(incNull$person_days) < - sum(incW365$person_days)) + expect_true(sum(as.numeric(incInf |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::pull("estimate_value"))) < + sum(as.numeric(incW365 |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::pull("estimate_value")))) CDMConnector::cdm_disconnect(cdm) @@ -779,7 +787,9 @@ test_that("mock db: check events overlapping with start of a period", { minCellCount = 0 ) - expect_true(all(inc$n_persons == 1)) + expect_true(all(inc |> + dplyr::filter(estimate_name == "denominator_count") |> + dplyr::pull("estimate_value") == 1)) CDMConnector::cdm_disconnect(cdm) # another example @@ -829,7 +839,9 @@ test_that("mock db: check events overlapping with start of a period", { interval = c("Years"), minCellCount = 0 ) - expect_true(all(inc2$n_persons == 1)) + expect_true(all(inc2 |> + dplyr::filter(estimate_name == "denominator_count") |> + dplyr::pull("estimate_value") == 1)) CDMConnector::cdm_disconnect(cdm) }) @@ -890,22 +902,34 @@ test_that("mock db: compare results from months and years", { ) # consistent results for months and years - expect_true(sum(incMonths$n_events) == - sum(incYears$n_events)) - expect_equal( - sum(incMonths$person_days), - sum(incYears$person_days) - ) - expect_equal( - sum(incMonths$person_years), - sum(incYears$person_years) - ) + expect_true(sum(as.numeric(incMonths |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == + sum(as.numeric(incYears |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value")))) + + expect_true(sum(as.numeric(incMonths |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::pull("estimate_value"))) == + sum(as.numeric(incYears |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::pull("estimate_value")))) + + expect_equal(sum(as.numeric(incMonths |> + dplyr::filter(estimate_name == "person_years") |> + dplyr::pull("estimate_value"))), + sum(as.numeric(incYears |> + dplyr::filter(estimate_name == "person_years") |> + dplyr::pull("estimate_value")))) + CDMConnector::cdm_disconnect(cdm) cdm <- mockIncidencePrevalenceRef(sampleSize = 1000) cdm <- generateDenominatorCohortSet( - cdm = cdm, name = "denominator", cohortDateRange = c(as.Date("2010-01-01"), as.Date("2011-12-31")) + cdm = cdm, name = "denominator", + cohortDateRange = c(as.Date("2010-01-01"), as.Date("2011-12-31")) ) incWeeks <- estimateIncidence( @@ -942,41 +966,45 @@ test_that("mock db: compare results from months and years", { ) # consistent results for months and years - expect_true(sum(incWeeks$n_events) == - sum(incYears$n_events)) - expect_true(sum(incQuarters$n_events) == - sum(incYears$n_events)) - expect_true(sum(incMonths$n_events) == - sum(incYears$n_events)) - - expect_equal( - sum(incWeeks$person_days), - sum(incYears$person_days) - ) - expect_equal( - sum(incQuarters$person_days), - sum(incYears$person_days) - ) - expect_equal( - sum(incMonths$person_days), - sum(incYears$person_days) - ) - - expect_equal( - sum(incWeeks$person_years), - sum(incYears$person_years) - ) - expect_equal( - sum(incQuarters$person_years), - sum(incYears$person_years) - ) - expect_equal( - sum(incMonths$person_years), - sum(incYears$person_years) - ) - - - CDMConnector::cdm_disconnect(cdm) + expect_equal(sum(as.numeric(incWeeks |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))), + sum(as.numeric(incYears |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value")))) + expect_equal(sum(as.numeric(incQuarters |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))), + sum(as.numeric(incYears |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value")))) + expect_equal(sum(as.numeric(incMonths |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))), + sum(as.numeric(incYears |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value")))) + + expect_equal(sum(as.numeric(incWeeks |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::pull("estimate_value"))), + sum(as.numeric(incYears |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::pull("estimate_value")))) + expect_equal(sum(as.numeric(incQuarters |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::pull("estimate_value"))), + sum(as.numeric(incYears |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::pull("estimate_value")))) + expect_equal(sum(as.numeric(incMonths |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::pull("estimate_value"))), + sum(as.numeric(incYears |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::pull("estimate_value")))) + + CDMConnector::cdm_disconnect(cdm) }) test_that("mock db: check entry and event on same day", { @@ -1023,7 +1051,10 @@ test_that("mock db: check entry and event on same day", { minCellCount = 0, completeDatabaseIntervals = FALSE ) - expect_true(sum(incWithoutRep$n_events) == 1) + + expect_true(sum(as.numeric(incWithoutRep |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == "1") incWithRep <- estimateIncidence( cdm = cdm, @@ -1035,7 +1066,9 @@ test_that("mock db: check entry and event on same day", { minCellCount = 0, completeDatabaseIntervals = FALSE ) - expect_true(sum(incWithRep$n_events) == 1) + expect_true(sum(as.numeric(incWithRep |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"))) == "1") CDMConnector::cdm_disconnect(cdm) }) @@ -1089,8 +1122,9 @@ test_that("mock db: cohort start overlaps with the outcome", { interval = c("Years"), minCellCount = 0 ) - - expect_true(all(inc$n_persons == 1L)) + expect_true(all(inc |> + dplyr::filter(estimate_name == "denominator_count") |> + dplyr::pull("estimate_value") == "1")) CDMConnector::cdm_disconnect(cdm) }) @@ -1147,7 +1181,9 @@ test_that("mock db: check outcome in previous obeservation period", { interval = c("Years"), minCellCount = 0 ) - expect_true(all(incRep$n_persons == 2)) + expect_true(all(incRep |> + dplyr::filter(estimate_name == "denominator_count") |> + dplyr::pull("estimate_value") == "2")) # with inf wash out- should only have 1 person incNoRep <- estimateIncidence( @@ -1159,7 +1195,9 @@ test_that("mock db: check outcome in previous obeservation period", { interval = c("Years"), minCellCount = 0 ) - expect_true(all(incNoRep$n_persons == 1)) + expect_true(all(incNoRep |> + dplyr::filter(estimate_name == "denominator_count") |> + dplyr::pull("estimate_value") == "1")) # with 5 year wash out- should have 2 people at the start of the study period incNoRep2 <- estimateIncidence( @@ -1171,8 +1209,10 @@ test_that("mock db: check outcome in previous obeservation period", { interval = c("Years"), minCellCount = 0 ) - expect_true(max(incNoRep2$n_persons) == 2) + expect_true(max(as.numeric(incNoRep2 |> + dplyr::filter(estimate_name == "denominator_count") |> + dplyr::pull("estimate_value"))) == 2) CDMConnector::cdm_disconnect(cdm) @@ -1236,20 +1276,56 @@ test_that("mock db: check minimum counts", { minCellCount = 0, completeDatabaseIntervals = FALSE ) - expect_true(inc$n_persons[1] == 20) - expect_true(inc$n_persons[2] == 3) - expect_true(!is.na(inc$person_days[1])) - expect_true(!is.na(inc$person_days[2])) - expect_true(!is.na(inc$person_years[1])) - expect_true(!is.na(inc$person_years[2])) - expect_true(inc$n_events[1] == 17) - expect_true(inc$n_events[2] == 3) - expect_true(!is.na(inc$incidence_100000_pys[1])) - expect_true(!is.na(inc$incidence_100000_pys[2])) - expect_true(!is.na(inc$incidence_100000_pys_95CI_lower[1])) - expect_true(!is.na(inc$incidence_100000_pys_95CI_lower[2])) - expect_true(!is.na(inc$incidence_100000_pys_95CI_upper[1])) - expect_true(!is.na(inc$incidence_100000_pys_95CI_upper[2])) + + + expect_equal(inc |> + dplyr::filter(estimate_name == "denominator_count") |> + dplyr::pull("estimate_value"), c("20", "3")) + expect_equal(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"), c("17", "3")) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("estimate_value"))) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value"))) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "person_years") |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("estimate_value"))) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "person_years") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value"))) + + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys") |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("estimate_value"))) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value"))) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys_95CI_lower") |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("estimate_value"))) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys_95CI_lower") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value"))) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys_95CI_upper") |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("estimate_value"))) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys_95CI_upper") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value"))) + inc <- estimateIncidence( cdm = cdm, @@ -1260,27 +1336,66 @@ test_that("mock db: check minimum counts", { minCellCount = 5, completeDatabaseIntervals = FALSE ) - expect_true(inc$n_persons[1] == 20) - expect_true(is.na(inc$n_persons[2])) - expect_true(!is.na(inc$person_days[1])) - expect_true(is.na(inc$person_days[2])) - expect_true(!is.na(inc$person_years[1])) - expect_true(is.na(inc$person_years[2])) - expect_true(inc$n_events[1] == 17) - expect_true(is.na(inc$n_events[2])) - expect_true(!is.na(inc$incidence_100000_pys[1])) - expect_true(is.na(inc$incidence_100000_pys[2])) - expect_true(!is.na(inc$incidence_100000_pys_95CI_lower[1])) - expect_true(is.na(inc$incidence_100000_pys_95CI_lower[2])) - expect_true(!is.na(inc$incidence_100000_pys_95CI_upper[1])) - expect_true(is.na(inc$incidence_100000_pys_95CI_upper[2])) + expect_equal(inc |> + dplyr::filter(estimate_name == "denominator_count") |> + dplyr::pull("estimate_value"), c("20", NA)) + expect_equal(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value"), c("17", NA)) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("estimate_value"))) + expect_true(is.na(inc |> + dplyr::filter(estimate_name == "person_days") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value"))) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "person_years") |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("estimate_value"))) + expect_true(is.na(inc |> + dplyr::filter(estimate_name == "person_years") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value"))) + + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys") |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("estimate_value"))) + expect_true(is.na(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value"))) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys_95CI_lower") |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("estimate_value"))) + expect_true(is.na(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys_95CI_lower") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value"))) + expect_true(!is.na(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys_95CI_upper") |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("estimate_value"))) + expect_true(is.na(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys_95CI_upper") |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("estimate_value"))) + + CDMConnector::cdm_disconnect(cdm) }) test_that("mock db: multiple overlapping outcomes", { + + # technically overlapping outcomes are not allowed + # check this edge case, but validation might also not allow this + skip_on_cran() - # two + # two people personTable <- dplyr::tibble( person_id = c(1L, 2L), gender_concept_id = c(8507L, 8532L), @@ -1300,6 +1415,7 @@ test_that("mock db: multiple overlapping outcomes", { as.Date("2021-12-31") ) ) + # two outcomes for person one outcomeTable <- dplyr::tibble( cohort_definition_id = c(1L, 1L), subject_id = c(1L, 1L), @@ -1321,10 +1437,16 @@ test_that("mock db: multiple overlapping outcomes", { outcomeTable = "outcome", outcomeWashout = 180, repeatedEvents = TRUE, - interval = c("Years"), + interval = "overall", minCellCount = 0 ) - expect_true(all(inc$n_persons) == 1) + + expect_true(inc |> + dplyr::filter(estimate_name == "denominator_count") |> + dplyr::pull("estimate_value") == "2") + expect_true(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value") == "1") CDMConnector::cdm_disconnect(cdm) # three @@ -1376,10 +1498,15 @@ test_that("mock db: multiple overlapping outcomes", { outcomeTable = "outcome", outcomeWashout = 180, repeatedEvents = TRUE, - interval = c("Years"), + interval = "overall", minCellCount = 0 ) - expect_true(all(inc$n_persons) == 1) + expect_true(inc |> + dplyr::filter(estimate_name == "denominator_count") |> + dplyr::pull("estimate_value") == "2") + expect_true(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value") == "1") CDMConnector::cdm_disconnect(cdm) }) @@ -1443,7 +1570,9 @@ test_that("mock db: cohort before period start ending after period", { completeDatabaseIntervals = FALSE, minCellCount = 0 ) - expect_true(all(inc$n_events == 1L)) + expect_true(all(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value") == "1")) # washout inc <- estimateIncidence( @@ -1456,7 +1585,9 @@ test_that("mock db: cohort before period start ending after period", { completeDatabaseIntervals = FALSE, minCellCount = 0 ) - expect_true(all(inc$n_events == 1L)) + expect_true(all(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value") == "1")) CDMConnector::cdm_disconnect(cdm) }) @@ -1561,7 +1692,10 @@ test_that("mock db: check full period requirement - year", { interval = c("Years"), minCellCount = 0 ) - expect_true(nrow(inc) == 1) + + expect_true(nrow(inc |> + dplyr::filter(estimate_name == "outcome_count")) == + 1) CDMConnector::cdm_disconnect(cdm) }) @@ -1666,7 +1800,6 @@ test_that("mock db: check full period requirement - month", { interval = c("Months"), minCellCount = 0 ) - expect_true(inc$n_persons == 1) expect_true(nrow(inc) >= 1) CDMConnector::cdm_disconnect(cdm) @@ -1724,9 +1857,20 @@ test_that("mock db: check completeDatabaseIntervals", { completeDatabaseIntervals = TRUE, minCellCount = 0 ) - expect_true(nrow(inc) == 2) - expect_true(lubridate::year(inc$incidence_start_date[1]) == "2020") - expect_true(lubridate::year(inc$incidence_start_date[2]) == "2021") + + expect_true(nrow(inc |> + dplyr::filter(estimate_name == "outcome_count")) == 2) + expect_true(lubridate::year(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("incidence_start_date")) == "2020") + expect_true(lubridate::year(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("incidence_start_date")) == "2021") + # repetitive events FALSE # - now we expect only to use 2020 (id 2 obs end is in 21) inc <- estimateIncidence( @@ -1738,8 +1882,13 @@ test_that("mock db: check completeDatabaseIntervals", { completeDatabaseIntervals = TRUE, minCellCount = 0 ) - expect_true(nrow(inc) == 1) - expect_true(lubridate::year(inc$incidence_start_date[1]) == "2020") + expect_true(nrow(inc |> + dplyr::filter(estimate_name == "outcome_count")) == 1) + expect_true(lubridate::year(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("incidence_start_date")) == "2020") # full periods required FALSE # repetitive events TRUE @@ -1755,11 +1904,29 @@ test_that("mock db: check completeDatabaseIntervals", { completeDatabaseIntervals = FALSE, minCellCount = 0 ) - expect_true(nrow(inc) == 4) - expect_true(lubridate::year(inc$incidence_start_date[1]) == "2019") - expect_true(lubridate::year(inc$incidence_start_date[2]) == "2020") - expect_true(lubridate::year(inc$incidence_start_date[3]) == "2021") - expect_true(lubridate::year(inc$incidence_start_date[4]) == "2022") + expect_true(nrow(inc |> + dplyr::filter(estimate_name == "outcome_count")) == 4) + expect_true(lubridate::year(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("incidence_start_date")) == "2019") + expect_true(lubridate::year(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("incidence_start_date")) == "2020") + expect_true(lubridate::year(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(dplyr::row_number() == 3) |> + dplyr::pull("incidence_start_date")) == "2021") + expect_true(lubridate::year(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(dplyr::row_number() == 4) |> + dplyr::pull("incidence_start_date")) == "2022") + # repetitive events FALSE inc <- estimateIncidence( cdm = cdm, @@ -1770,10 +1937,23 @@ test_that("mock db: check completeDatabaseIntervals", { completeDatabaseIntervals = FALSE, minCellCount = 0 ) - expect_true(nrow(inc) == 3) - expect_true(lubridate::year(inc$incidence_start_date[1]) == "2019") - expect_true(lubridate::year(inc$incidence_start_date[2]) == "2020") - expect_true(lubridate::year(inc$incidence_start_date[3]) == "2021") + expect_true(nrow(inc |> + dplyr::filter(estimate_name == "outcome_count")) == 3) + expect_true(lubridate::year(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::pull("incidence_start_date")) == "2019") + expect_true(lubridate::year(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(dplyr::row_number() == 2) |> + dplyr::pull("incidence_start_date")) == "2020") + expect_true(lubridate::year(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(dplyr::row_number() == 3) |> + dplyr::pull("incidence_start_date")) == "2021") CDMConnector::cdm_disconnect(cdm) }) @@ -1941,30 +2121,38 @@ test_that("mock db: check with and without study start and end date", { ) # given the settings above we would expect the same results for 2010 - expect_true(inc1A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_persons") %>% - dplyr::pull() == - inc2A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_persons") %>% - dplyr::pull()) - expect_true(inc1A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("person_days") %>% - dplyr::pull() == - inc2A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("person_days") %>% - dplyr::pull()) - expect_true(inc1A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_events") %>% - dplyr::pull() == - inc2A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_events") %>% - dplyr::pull()) + expect_equal(inc1A |> + dplyr::filter(estimate_name == "denominator_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(incidence_start_date) == 2010) |> + dplyr::pull("estimate_value"), + inc2A |> + dplyr::filter(estimate_name == "denominator_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(incidence_start_date) == 2010) |> + dplyr::pull("estimate_value")) + + expect_equal(inc1A |> + dplyr::filter(estimate_name == "person_days") |> + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(incidence_start_date) == 2010) |> + dplyr::pull("estimate_value"), + inc2A |> + dplyr::filter(estimate_name == "person_days") |> + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(incidence_start_date) == 2010) |> + dplyr::pull("estimate_value")) + + expect_equal(inc1A |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(incidence_start_date) == 2010) |> + dplyr::pull("estimate_value"), + inc2A |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(incidence_start_date) == 2010) |> + dplyr::pull("estimate_value")) # 365 washout, repetitive events inc1B <- estimateIncidence(cdm, @@ -1986,31 +2174,38 @@ test_that("mock db: check with and without study start and end date", { completeDatabaseIntervals = FALSE ) # given the settings above we would expect the same results for 2010 - expect_true(inc1B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_persons") %>% - dplyr::pull() == - inc2B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_persons") %>% - dplyr::pull()) - expect_true(inc1B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("person_days") %>% - dplyr::pull() == - inc2B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("person_days") %>% - dplyr::pull()) - - expect_true(inc1B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_events") %>% - dplyr::pull() == - inc2B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_events") %>% - dplyr::pull()) + expect_equal(inc1B |> + dplyr::filter(estimate_name == "denominator_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(incidence_start_date) == 2010) |> + dplyr::pull("estimate_value"), + inc2B |> + dplyr::filter(estimate_name == "denominator_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(incidence_start_date) == 2010) |> + dplyr::pull("estimate_value")) + + expect_equal(inc1B |> + dplyr::filter(estimate_name == "person_days") |> + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(incidence_start_date) == 2010) |> + dplyr::pull("estimate_value"), + inc2B |> + dplyr::filter(estimate_name == "person_days") |> + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(incidence_start_date) == 2010) |> + dplyr::pull("estimate_value")) + + expect_equal(inc1B |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(incidence_start_date) == 2010) |> + dplyr::pull("estimate_value"), + inc2B |> + dplyr::filter(estimate_name == "outcome_count") |> + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(incidence_start_date) == 2010) |> + dplyr::pull("estimate_value")) CDMConnector::cdm_disconnect(cdm) }) @@ -2050,38 +2245,40 @@ test_that("mock db: check study start and end date 1000", { completeDatabaseIntervals = FALSE ) + + + + expect_true(inc1A %>% + visOmopResults::splitAdditional() |> dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_persons") %>% - dplyr::pull() == - inc2A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_persons") %>% - dplyr::pull()) - expect_true(inc1A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("person_days") %>% - dplyr::pull() == + dplyr::filter(estimate_name == "denominator_count") %>% + dplyr::pull("estimate_value") == inc2A %>% + visOmopResults::splitAdditional() |> dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("person_days") %>% - dplyr::pull()) + dplyr::filter(estimate_name == "denominator_count") %>% + dplyr::pull("estimate_value")) expect_true(inc1A %>% + visOmopResults::splitAdditional() |> dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_events") %>% - dplyr::pull() == + dplyr::filter(estimate_name == "person_days") %>% + dplyr::pull("estimate_value") == inc2A %>% + visOmopResults::splitAdditional() |> dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_events") %>% - dplyr::pull()) + dplyr::filter(estimate_name == "person_days") %>% + dplyr::pull("estimate_value")) expect_true(inc1A %>% + visOmopResults::splitAdditional() |> dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("incidence_100000_pys") %>% - dplyr::pull() == + dplyr::filter(estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value") == inc2A %>% + visOmopResults::splitAdditional() |> dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("incidence_100000_pys") %>% - dplyr::pull()) + dplyr::filter(estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value")) # 365 washout, repetitive events inc1B <- estimateIncidence(cdm, @@ -2104,38 +2301,15 @@ test_that("mock db: check study start and end date 1000", { ) expect_true(inc1B %>% + visOmopResults::splitAdditional() |> dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_persons") %>% - dplyr::pull() == - inc2B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_persons") %>% - dplyr::pull()) - expect_true(inc1B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("person_days") %>% - dplyr::pull() == + dplyr::filter(estimate_name == "incidence_100000_pys") %>% + dplyr::pull("estimate_value") == inc2B %>% + visOmopResults::splitAdditional() |> dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("person_days") %>% - dplyr::pull()) - - expect_true(inc1B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_events") %>% - dplyr::pull() == - inc2B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_events") %>% - dplyr::pull()) - expect_true(inc1B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("incidence_100000_pys") %>% - dplyr::pull() == - inc1B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("incidence_100000_pys") %>% - dplyr::pull()) + dplyr::filter(estimate_name == "incidence_100000_pys") %>% + dplyr::pull("estimate_value")) CDMConnector::cdm_disconnect(cdm) # with multiple outcomes per person @@ -2174,38 +2348,18 @@ test_that("mock db: check study start and end date 1000", { completeDatabaseIntervals = FALSE ) + expect_true(inc1A %>% + visOmopResults::splitAdditional() |> dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_persons") %>% - dplyr::pull() == - inc2A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_persons") %>% - dplyr::pull()) - expect_true(inc1A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("person_days") %>% - dplyr::pull() == - inc2A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("person_days") %>% - dplyr::pull()) - expect_true(inc1A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_events") %>% - dplyr::pull() == - inc2A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_events") %>% - dplyr::pull()) - expect_true(inc1A %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("incidence_100000_pys") %>% - dplyr::pull() == + dplyr::filter(estimate_name == "incidence_100000_pys") %>% + dplyr::pull("estimate_value") == inc2A %>% + visOmopResults::splitAdditional() |> dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("incidence_100000_pys") %>% - dplyr::pull()) + dplyr::filter(estimate_name == "incidence_100000_pys") %>% + dplyr::pull("estimate_value")) + # 365 washout, repetitive events inc1B <- estimateIncidence(cdm, @@ -2227,38 +2381,17 @@ test_that("mock db: check study start and end date 1000", { completeDatabaseIntervals = FALSE ) + expect_true(inc1B %>% + visOmopResults::splitAdditional() |> dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_persons") %>% - dplyr::pull() == - inc2B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_persons") %>% - dplyr::pull()) - expect_true(inc1B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("person_days") %>% - dplyr::pull() == - inc2B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("person_days") %>% - dplyr::pull()) - expect_true(inc1B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_events") %>% - dplyr::pull() == - inc2B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("n_events") %>% - dplyr::pull()) - expect_true(inc1B %>% - dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("incidence_100000_pys") %>% - dplyr::pull() == + dplyr::filter(estimate_name == "incidence_100000_pys") %>% + dplyr::pull("estimate_value") == inc2B %>% + visOmopResults::splitAdditional() |> dplyr::filter(lubridate::year(incidence_start_date) == 2010) %>% - dplyr::select("incidence_100000_pys") %>% - dplyr::pull()) + dplyr::filter(estimate_name == "incidence_100000_pys") %>% + dplyr::pull("estimate_value")) CDMConnector::cdm_disconnect(cdm) }) @@ -2422,7 +2555,9 @@ test_that("mock db: multiple observation periods", { minCellCount = 0 ) # expect all events if we have zero days washout - expect_true(sum(incW0$n_events) == 2) + expect_true(sum(as.numeric(incW0 %>% + dplyr::filter(estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value"))) == 2) CDMConnector::cdm_disconnect(cdm) # Change the inclusion so that both patients have valid observation periods. Now 1 should have two, and 2 one. @@ -2464,7 +2599,9 @@ test_that("mock db: multiple observation periods", { minCellCount = 0 ) # expect all events if we have ten days washout - expect_true(sum(incW10$n_events) == 3) + expect_true(sum(as.numeric(incW10 %>% + dplyr::filter(estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value"))) == 3) CDMConnector::cdm_disconnect(cdm) # try event not counted for outcome but counted for washout as denominator (before observ period) @@ -2507,8 +2644,17 @@ test_that("mock db: multiple observation periods", { completeDatabaseIntervals = FALSE, minCellCount = 0 ) - expect_true(sum(inc_PreWashout$n_events) == 3) - expect_true(inc_PreWashout %>% dplyr::select(person_days) %>% sum() == as.numeric(difftime(as.Date("2005-08-11"), as.Date("2005-07-19"))) + 1 - 2 + as.numeric(difftime(as.Date("2015-01-02"), as.Date("2009-04-10"))) + 1 - 10 + as.numeric(difftime(as.Date("2011-12-11"), as.Date("2010-12-11"))) + 1 - 10 - 3) + expect_true(sum(as.numeric(inc_PreWashout %>% + dplyr::filter(estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value"))) == 3) + expect_true(sum(as.numeric(inc_PreWashout %>% + dplyr::filter(estimate_name == "person_days") %>% + dplyr::pull("estimate_value"))) == + as.numeric(difftime(as.Date("2005-08-11"), as.Date("2005-07-19"))) + + 1 - 2 + as.numeric(difftime(as.Date("2015-01-02"), + as.Date("2009-04-10"))) + 1 - 10 + + as.numeric(difftime(as.Date("2011-12-11"), as.Date("2010-12-11"))) + + 1 - 10 - 3) CDMConnector::cdm_disconnect(cdm) # multiple events in one of the observation periods of person 1 @@ -2559,17 +2705,25 @@ test_that("mock db: multiple observation periods", { ) # we should have 4 events with washout 0, but 3 events with washout 30 - expect_true(sum(inc_Mult1_W0$n_events) == 4) - expect_true(sum(inc_Mult1_W30$n_events) == 3) - expect_true(inc_Mult1_W0 %>% dplyr::select(person_days) %>% - sum() == as.numeric(difftime(as.Date("2005-08-11"), + expect_true(sum(as.numeric(inc_Mult1_W0 %>% + dplyr::filter(estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value"))) == 4) + expect_true(sum(as.numeric(inc_Mult1_W30 %>% + dplyr::filter(estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value"))) == 3) + expect_true(sum(as.numeric(inc_Mult1_W0 %>% + dplyr::filter(estimate_name == "person_days") %>% + dplyr::pull("estimate_value"))) == + as.numeric(difftime(as.Date("2005-08-11"), as.Date("2005-06-19"))) + 1 + as.numeric(difftime(as.Date("2015-01-02"), as.Date("2009-04-10"))) + 1 + as.numeric(difftime(as.Date("2011-12-11"), as.Date("2010-12-11"))) + 1) - expect_true(inc_Mult1_W30 %>% dplyr::select(person_days) %>% - sum() == as.numeric(difftime(as.Date("2005-08-11"), + expect_true(sum(as.numeric(inc_Mult1_W30 %>% + dplyr::filter(estimate_name == "person_days") %>% + dplyr::pull("estimate_value"))) == + as.numeric(difftime(as.Date("2005-08-11"), as.Date("2005-06-19"))) - 30 + as.numeric(difftime(as.Date("2015-01-02"), as.Date("2009-04-10"))) + @@ -2617,8 +2771,13 @@ test_that("mock db: multiple observation periods", { ) # we should have 2 events with washout 30 - expect_true(sum(inc_PreWashEv$n_events) == 2) - expect_true(inc_PreWashEv %>% dplyr::select(person_days) %>% sum() == + expect_true(sum(as.numeric(inc_PreWashEv %>% + dplyr::filter(estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value"))) == 2) + + expect_true(sum(as.numeric(inc_PreWashEv %>% + dplyr::filter(estimate_name == "person_days") %>% + dplyr::pull("estimate_value"))) == as.numeric(difftime(as.Date("2005-08-11"), as.Date("2005-07-19"))) - 30 + 7 + as.numeric(difftime(as.Date("2015-01-02"), @@ -2709,7 +2868,9 @@ test_that("mock db: multiple observation periods", { ) # we should have 5 events with washout 1 - expect_true(sum(inc_3op$n_events) == 5) + expect_true(sum(as.numeric(inc_3op %>% + dplyr::filter(estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value"))) == 5) # try repeated events FALSE. inc_repev <- estimateIncidence(cdm, @@ -2720,9 +2881,9 @@ test_that("mock db: multiple observation periods", { completeDatabaseIntervals = FALSE, minCellCount = 0 ) - - # we should have 1 event, - expect_true(sum(inc_repev$n_events) == 2) + expect_true(sum(as.numeric(inc_repev %>% + dplyr::filter(estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value"))) == 2) CDMConnector::cdm_disconnect(cdm) }) @@ -2746,12 +2907,21 @@ test_that("mock db: check confidence intervals", { completeDatabaseIntervals = TRUE ) - expect_equal(inc$incidence_100000_pys_95CI_lower, - epitools::pois.exact(inc$n_events, inc$person_years)$lower * 100000, + pkg_est <- inc %>% + dplyr::select("estimate_name", + "estimate_value", "additional_level") |> + tidyr::pivot_wider(names_from = "estimate_name", + values_from = "estimate_value") |> + dplyr::filter(denominator_count > 1) + + expect_equal(as.numeric(pkg_est$incidence_100000_pys_95CI_lower), + epitools::pois.exact(as.numeric(pkg_est$outcome_count), + as.numeric(pkg_est$person_years))$lower* 100000 , tolerance = 1e-2 ) - expect_equal(inc$incidence_100000_pys_95CI_upper, - epitools::pois.exact(inc$n_events, inc$person_years)$upper * 100000, + expect_equal(as.numeric(pkg_est$incidence_100000_pys_95CI_upper), + epitools::pois.exact(as.numeric(pkg_est$outcome_count), + as.numeric(pkg_est$person_years))$upper* 100000 , tolerance = 1e-2 ) @@ -2773,18 +2943,20 @@ test_that("mock db: check attrition", { # for female cohort we should have a row for those excluded for not being male expect_true(any("Not Female" == attrition(inc) %>% + dplyr::left_join(settings(inc), by = "result_id") |> dplyr::filter(denominator_sex == "Female") %>% dplyr::pull(.data$reason))) # for male, the opposite expect_true(any("Not Male" == attrition(inc) %>% + dplyr::left_join(settings(inc), by = "result_id") |> dplyr::filter(denominator_sex == "Male") %>% dplyr::pull(.data$reason))) # check we can pick out specific analysis attrition expect_true(nrow(attrition(inc) %>% - dplyr::filter(analysis_id == 1)) > 1) + dplyr::filter(result_id == 1)) > 1) expect_true(nrow(attrition(inc) %>% - dplyr::filter(analysis_id == 2)) > 1) + dplyr::filter(result_id == 2)) > 1) CDMConnector::cdm_disconnect(cdm) # check obscuring counts @@ -2875,25 +3047,7 @@ test_that("mock db: check compute permanent", { outcomeTable = "outcome", interval = "overall" ) - # if using temp tables - # we have temp tables created by dbplyr - # expect_true(any(stringr::str_starts( - # CDMConnector::listTables(attr(attr(cdm, "cdm_source"), "dbcon")), - # "dbplyr_" - # ))) - CDMConnector::cdm_disconnect(cdm) - - # using permanent - cdm <- mockIncidencePrevalenceRef(sampleSize = 1000) - cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator") - inc <- estimateIncidence( - cdm = cdm, - denominatorTable = "denominator", - outcomeTable = "outcome", - interval = "overall" - ) - - # no temp tables created by dbplyr + # we have no temp tables created by dbplyr expect_false(any(stringr::str_starts( CDMConnector::listTables(attr(attr(cdm, "cdm_source"), "dbcon")), "dbplyr_" @@ -2927,14 +3081,23 @@ test_that("mock db: empty outcome cohort", { cdm = cdm, denominatorTable = "denominator", outcomeTable = "outcome", - interval = "months" + interval = "months", + minCellCount = 0 )) - expect_true(sum(inc$n_events)==0) + expect_true(sum(as.numeric(inc |> + dplyr::filter(estimate_name == "outcome_count") |> + dplyr::pull("estimate_value")))==0) # make sure we also have a confidence interval even in the case of an empty outcome cohort - expect_true(all(inc$incidence_100000_pys == 0)) - expect_true(all(inc$incidence_100000_pys_95CI_lower == 0)) - expect_true(all(inc$incidence_100000_pys_95CI_upper > 0)) + expect_true(all(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys") |> + dplyr::pull("estimate_value")=="0")) + expect_true(all(as.numeric(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys_95CI_lower") |> + dplyr::pull("estimate_value")) == 0)) + expect_true(all(as.numeric(inc |> + dplyr::filter(estimate_name == "incidence_100000_pys_95CI_upper") |> + dplyr::pull("estimate_value")) > 0)) CDMConnector::cdm_disconnect(cdm) @@ -2961,7 +3124,8 @@ test_that("mock db: incidence using strata vars", { cdm$denominator <- cdm$denominator %>% dplyr::mutate(my_strata = dplyr::if_else(year(cohort_start_date) < 1995L, "first", "second")) %>% - dplyr::compute() + dplyr::compute(name = "denominator", + temporary = FALSE) inc <- estimateIncidence( cdm = cdm, @@ -2970,28 +3134,23 @@ test_that("mock db: incidence using strata vars", { interval = "months", strata = list(c("my_strata")) ) - expect_true(all(c("strata_name", "strata_level") %in% - colnames(inc))) - expect_true(all(c("Overall", "my_strata") %in% - unique(inc %>% - dplyr::pull("strata_name")))) - expect_true(all(c("Overall", + expect_true(all(c("overall", "my_strata") %in% + unique(inc$strata_name))) + expect_true(all(c("overall", "first", "second") %in% - unique(inc %>% - dplyr::pull("strata_level")))) + unique(inc$strata_level))) # original without strata should be the same as "Overall" strata expect_equal(inc_orig, inc %>% - dplyr::filter(strata_name == "Overall") %>% - dplyr::select(!c("strata_name", "strata_level"))) + dplyr::filter(strata_level == "overall")) cdm$denominator <- cdm$denominator %>% dplyr::mutate(my_strata2 = dplyr::if_else(month(cohort_start_date)<7, "a", "b")) %>% - dplyr::compute() - + dplyr::compute(name = "denominator", + temporary = FALSE) inc2 <- estimateIncidence( cdm = cdm, denominatorTable = "denominator", @@ -2999,16 +3158,15 @@ test_that("mock db: incidence using strata vars", { interval = "months", strata = list(c("my_strata","my_strata2")) ) - expect_true(all(c("strata_name", "strata_level") %in% - colnames(inc2))) - expect_true(all(c("Overall", "my_strata and my_strata2") %in% - unique(inc2 %>% - dplyr::pull("strata_name")))) - expect_true(all(c("Overall", - "first and a", "first and b", - "second and a", "second and b") %in% - unique(inc2 %>% - dplyr::pull("strata_level")))) + + expect_true(all(c("overall", "my_strata &&& my_strata2") %in% + unique(inc2$strata_name))) + expect_true(all(c("overall", + "first &&& a", + "second &&& a", + "first &&& b", + "second &&& b") %in% + unique(inc2$strata_level))) inc3 <- estimateIncidence( cdm = cdm, @@ -3019,20 +3177,19 @@ test_that("mock db: incidence using strata vars", { c("my_strata2"), c("my_strata", "my_strata2"))) - expect_true(all(c("strata_name", "strata_level") %in% - colnames(inc3))) - expect_true(all(c("Overall", "my_strata", "my_strata2", - "my_strata and my_strata2") %in% - unique(inc3 %>% - dplyr::pull("strata_name")))) - expect_true(all(c("Overall", + expect_true(all(c("overall", + "my_strata" , + "my_strata2" , + "my_strata &&& my_strata2") %in% + unique(inc3$strata_name))) + expect_true(all(c("overall", "first", "second", "a", "b", - "first and a", "first and b", - "second and a", "second and b") %in% - unique(inc3 %>% - dplyr::pull("strata_level")))) - + "first &&& a", + "second &&& a", + "first &&& b", + "second &&& b") %in% + unique(inc3$strata_level))) # without overall strata inc4 <- estimateIncidence( @@ -3044,20 +3201,18 @@ test_that("mock db: incidence using strata vars", { c("my_strata2"), c("my_strata", "my_strata2")), includeOverallStrata = FALSE) - expect_false("Overall" %in% unique(inc4 %>% - dplyr::pull("strata_name"))) - expect_true(all(c("my_strata", "my_strata2", - "my_strata and my_strata2") %in% - unique(inc4 %>% - dplyr::pull("strata_name")))) - expect_false("Overall" %in% unique(inc4 %>% - dplyr::pull("strata_level"))) + expect_true(all(c("my_strata" , + "my_strata2" , + "my_strata &&& my_strata2") %in% + unique(inc4$strata_name))) expect_true(all(c("first", "second", "a", "b", - "first and a", "first and b", - "second and a", "second and b") %in% - unique(inc4 %>% - dplyr::pull("strata_level")))) + "first &&& a", + "second &&& a", + "first &&& b", + "second &&& b") %in% + unique(inc4$strata_level))) + expect_error(estimateIncidence( cdm = cdm, @@ -3153,98 +3308,22 @@ test_that("mock db: multiple outcome cohort id", { expect_equal( inc_all_outcome %>% - dplyr::filter(outcome_cohort_id == 1) %>% - dplyr::pull("incidence_100000_pys"), - inc_all_outcome_1 %>% - dplyr::pull("incidence_100000_pys") + dplyr::filter(variable_level == "cohort_1")|> + dplyr::pull("estimate_value"), + inc_all_outcome_1 |> + dplyr::pull("estimate_value") ) + expect_equal( inc_all_outcome %>% - dplyr::filter(outcome_cohort_id == 2) %>% - dplyr::pull("incidence_100000_pys"), - inc_all_outcome_2 %>% - dplyr::pull("incidence_100000_pys") + dplyr::filter(variable_level == "cohort_2")|> + dplyr::pull("estimate_value"), + inc_all_outcome_2 |> + dplyr::pull("estimate_value") ) CDMConnector::cdm_disconnect(cdm) }) -test_that("test summarised result working", { -# -# cdm <- mockIncidencePrevalenceRef() -# -# cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator") -# -# inc <- estimateIncidence( -# cdm = cdm, -# denominatorTable = "denominator", -# outcomeTable = "outcome", -# interval = "months", -# summarisedResult = TRUE -# ) -# -# expect_true("summarised_result" %in% class(inc)) -# expect_no_error(visOmopResults::tidy(inc)) -# expect_true( -# all(colnames(settings(inc)) %in% c( -# 'result_id', 'result_type', 'package_name', 'package_version', 'analysis_interval', -# 'analysis_complete_database_intervals', 'denominator_cohort_name', 'denominator_age_group', -# 'denominator_sex', 'denominator_days_prior_observation', 'denominator_start_date', -# 'denominator_end_date', 'denominator_target_cohort_name', 'outcome_cohort_name', "min_cell_count" -# )) -# ) -# expect_true(unique(settings(inc)$result_type) == "incidence") -# -# # strata -# cdm$denominator <- cdm$denominator %>% -# dplyr::mutate(my_strata = dplyr::if_else(year(cohort_start_date) < 1995L, -# "first", "second")) %>% -# dplyr::compute() -# inc <- estimateIncidence( -# cdm = cdm, -# denominatorTable = "denominator", -# outcomeTable = "outcome", -# interval = "months", -# strata = list(c("my_strata")), -# summarisedResult = TRUE, -# minCellCount = 0 -# ) -# expect_true("summarised_result" %in% class(inc)) -# inc2 <- estimateIncidence( -# cdm = cdm, -# denominatorTable = "denominator", -# outcomeTable = "outcome", -# interval = "months", -# strata = list(c("my_strata")), -# minCellCount = 0 -# ) -# expect_equal( -# attrition(inc2) |> -# dplyr::mutate(result_id = as.integer(analysis_id)) |> -# dplyr::select(result_id, number_records, number_subjects, reason_id, reason, excluded_records, excluded_subjects), -# attrition(inc) -# ) -# -# inc_tidy <- inc |> visOmopResults::tidy() -# inc_sup <- inc |> omopgenerics::suppress() |> visOmopResults::tidy() -# expect_no_error(inc_tidy <- visOmopResults::tidy(inc, addSettings = FALSE)) -# expect_true(nrow(inc_tidy) == nrow(inc2)) -# expect_true(all(c("my_strata") %in% colnames(inc_tidy))) -# expect_true(all(c("overall", "first") %in% -# unique(inc_tidy %>% -# dplyr::pull("my_strata")))) -# # suppress -# sup <- inc_tidy |> dplyr::filter(denominator_count < 5 & denominator_count > 0) |> -# dplyr::select(!c("denominator_count", "outcome_count", "incidence_100000_pys", "incidence_100000_pys_95CI_lower", -# "incidence_100000_pys_95CI_upper", "person_days", "person_years")) -# -# expect_true(all(is.na(inc_sup |> dplyr::inner_join(sup) |> dplyr::pull("denominator_count")))) -# expect_true(all(is.na(inc_sup |> dplyr::inner_join(sup) |> dplyr::pull("person_days")))) -# expect_true(all(is.na(inc_sup |> dplyr::inner_join(sup) |> dplyr::pull("incidence_100000_pys")))) -# expect_false(any(is.na(inc_sup |> dplyr::inner_join(sup) |> dplyr::pull("outcome_count")))) -# -# CDMConnector::cdm_disconnect(cdm) -# -}) diff --git a/tests/testthat/test-estimatePrevalence.R b/tests/testthat/test-estimatePrevalence.R index 03f02df..020810c 100644 --- a/tests/testthat/test-estimatePrevalence.R +++ b/tests/testthat/test-estimatePrevalence.R @@ -8,69 +8,10 @@ test_that("mock db: check output format", { outcomeTable = "outcome", interval = "years" ) - - # check estimates tibble - expect_true(all(c( - "analysis_id", - "n_cases", - "n_population", - "prevalence", - "prevalence_95CI_lower", - "prevalence_95CI_upper", - "prevalence_start_date", - "prevalence_end_date", - "population_obscured", - "cases_obscured", - "result_obscured", - "outcome_cohort_id", - "outcome_cohort_name", - "analysis_type", - "analysis_time_point", - "analysis_interval", - "analysis_full_contribution", - "analysis_complete_database_intervals", - "analysis_min_cell_count", - "denominator_cohort_id", - "denominator_age_group", - "denominator_sex", - "denominator_days_prior_observation", - "denominator_start_date", - "denominator_end_date", - "denominator_target_cohort_definition_id", - "denominator_target_cohort_name", - "cdm_name" - ) %in% - names(prev))) - - expect_true(all(c( - "analysis_id", "number_records", "number_subjects", - "reason_id", "reason", - "excluded_records", "excluded_subjects", - "outcome_cohort_id", - "outcome_cohort_name", - "analysis_type", - "analysis_time_point", - "analysis_interval", - "analysis_full_contribution", - "analysis_complete_database_intervals", - "analysis_min_cell_count", - "denominator_cohort_id", - "denominator_age_group", - "denominator_sex", - "denominator_days_prior_observation", - "denominator_start_date", - "denominator_end_date", - "denominator_target_cohort_definition_id", - "denominator_target_cohort_name", - "cdm_name" - ) %in% - names(attrition(prev)))) - my_settings <- settings(prev) expect_true(nrow(my_settings) > 0) CDMConnector::cdm_disconnect(cdm) - }) test_that("mock db: checks on working example", { @@ -246,41 +187,55 @@ test_that("mock db: check minimum counts", { type = "period", interval = "months" ) - expect_true(prev$n_cases[1] == 17) - expect_true(prev$n_cases[2] == 3) - expect_true(prev$n_cases[3] == 0) - expect_true(prev$n_population[1] == 20) - expect_true(prev$n_population[2] == 3) - expect_true(prev$n_population[3] == 3) - expect_true(!is.na(prev$prevalence[1])) - expect_true(!is.na(prev$prevalence[2])) - expect_true(!is.na(prev$prevalence[3])) - expect_true(!is.na(prev$prevalence_95CI_lower[1])) - expect_true(!is.na(prev$prevalence_95CI_upper[1])) - - prev <- estimatePrevalence( - cdm = cdm, - denominatorTable = "denominator", - outcomeTable = "outcome", - minCellCount = 5, - type = "period", - interval = "months" - ) - expect_true(prev$n_cases[1] == 17) - expect_true(is.na(prev$n_cases[2])) - expect_true(!is.na(prev$n_cases[3])) # don't suppress zero - expect_true(prev$n_population[1] == 20) - expect_true(is.na(prev$n_population[2])) - expect_true(is.na(prev$n_population[3])) - expect_true(!is.na(prev$prevalence[1])) - expect_true(is.na(prev$prevalence[2])) - expect_true(is.na(prev$prevalence[3])) - expect_true(!is.na(prev$prevalence_95CI_lower[1])) - expect_true(is.na(prev$prevalence_95CI_lower[2])) - expect_true(is.na(prev$prevalence_95CI_lower[3])) - expect_true(!is.na(prev$prevalence_95CI_upper[1])) - expect_true(is.na(prev$prevalence_95CI_upper[2])) - expect_true(is.na(prev$prevalence_95CI_upper[3])) + prev_est <- prev |> + dplyr::filter(estimate_name == "outcome_count") + expect_true(prev_est$estimate_value[1] == "17") + expect_true(prev_est$estimate_value[2] == "3") + expect_true(prev_est$estimate_value[3] == "0") + prev_est <- prev |> + dplyr::filter(estimate_name == "denominator_count") + expect_true(prev_est$estimate_value[1] == "20") + expect_true(prev_est$estimate_value[2] == "3") + expect_true(prev_est$estimate_value[3] == "3") + prev_est <- prev |> + dplyr::filter(estimate_name == "prevalence") + expect_true(!is.na(prev_est$estimate_value[1])) + expect_true(!is.na(prev_est$estimate_value[2])) + expect_true(!is.na(prev_est$estimate_value[3])) + prev_est <- prev |> + dplyr::filter(estimate_name == "prevalence_95CI_lower") + expect_true(!is.na(prev_est$estimate_value[1])) + prev_est <- prev |> + dplyr::filter(estimate_name == "prevalence_95CI_upper") + expect_true(!is.na(prev_est$estimate_value[1])) + + # suppress results + prev <- omopgenerics::suppress(prev, minCellCount = 5) + prev_est <- prev |> + dplyr::filter(estimate_name == "outcome_count") + expect_true(prev_est$estimate_value[1] == "17") + expect_true(is.na(prev_est$estimate_value[2])) + expect_true(prev_est$estimate_value[3] == "0") # don't suppress zero + prev_est <- prev |> + dplyr::filter(estimate_name == "denominator_count") + expect_true(prev_est$estimate_value[1] == "20") + expect_true(is.na(prev_est$estimate_value[2])) + expect_true(is.na(prev_est$estimate_value[3])) + prev_est <- prev |> + dplyr::filter(estimate_name == "prevalence") + expect_true(!is.na(prev_est$estimate_value[1])) + expect_true(is.na(prev_est$estimate_value[2])) + expect_true(is.na(prev_est$estimate_value[3])) + prev_est <- prev |> + dplyr::filter(estimate_name == "prevalence_95CI_lower") + expect_true(!is.na(prev_est$estimate_value[1])) + expect_true(is.na(prev_est$estimate_value[2])) + expect_true(is.na(prev_est$estimate_value[3])) + prev_est <- prev |> + dplyr::filter(estimate_name == "prevalence_95CI_upper") + expect_true(!is.na(prev_est$estimate_value[1])) + expect_true(is.na(prev_est$estimate_value[2])) + expect_true(is.na(prev_est$estimate_value[3])) CDMConnector::cdm_disconnect(cdm) }) @@ -333,7 +288,9 @@ test_that("mock db: check study time periods", { # we expect 12 months of which the last in December # the last month should also be included # as the person goes up to the last day of the month - expect_true(nrow(prev) == 12) + expect_true(nrow(prev |> + dplyr::filter( + estimate_name == "outcome_count")) == 12) # overall period @@ -344,7 +301,8 @@ test_that("mock db: check study time periods", { interval = "overall" ) # just one row - expect_true(nrow(prev) == 1) + expect_true(nrow(prev |> + dplyr::filter(estimate_name == "outcome_count")) == 1) CDMConnector::cdm_disconnect(cdm) @@ -392,7 +350,8 @@ test_that("mock db: check study time periods", { fullContribution = TRUE, interval = "weeks" ) - expect_true(nrow(prev) == 45) + expect_true(nrow(prev |> + dplyr::filter(estimate_name == "outcome_count")) == 45) prev <- estimatePrevalence(cdm, denominatorTable = "denominator", @@ -401,7 +360,8 @@ test_that("mock db: check study time periods", { fullContribution = TRUE, interval = "months" ) - expect_true(nrow(prev) == 10) + expect_true(nrow(prev |> + dplyr::filter(estimate_name == "outcome_count")) == 10) prev <- estimatePrevalence(cdm, denominatorTable = "denominator", @@ -410,7 +370,8 @@ test_that("mock db: check study time periods", { fullContribution = TRUE, interval = "years" ) - expect_true(nrow(prev) == 0) + expect_true(nrow(prev |> + dplyr::filter(estimate_name == "outcome_count")) == 0) CDMConnector::cdm_disconnect(cdm) }) @@ -470,7 +431,10 @@ test_that("mock db: check fullContribution requirement", { completeDatabaseIntervals = FALSE, minCellCount = 0 ) - expect_true(all(prev$n_population == 2)) + expect_true(all(prev |> + dplyr::filter( + estimate_name == "denominator_count") |> + dplyr::pull("estimate_value") == "2")) prev <- estimatePrevalence(cdm, denominatorTable = "denominator", @@ -481,8 +445,10 @@ test_that("mock db: check fullContribution requirement", { completeDatabaseIntervals = FALSE, minCellCount = 0 ) - - expect_true(all(prev$n_population == c(2, 1, 1))) + expect_true(all(prev |> + dplyr::filter( + estimate_name == "denominator_count") |> + dplyr::pull("estimate_value") == c("2", "1", "1"))) expect_true(attrition(prev) %>% dplyr::filter(reason == @@ -545,9 +511,15 @@ test_that("mock db: check periods follow calendar dates", { fullContribution = FALSE, completeDatabaseIntervals = FALSE ) - expect_true(nrow(prev1) == 4) - expect_true(all(lubridate::year(prev1$prevalence_start_date) == - c("2010", "2011", "2012", "2013"))) + expect_true(nrow(prev1 |> + dplyr::filter( + estimate_name == "denominator_count")) == 4) + expect_true(all(lubridate::year( + prev1 |> visOmopResults::splitAdditional() |> + dplyr::filter( + estimate_name == "denominator_count") |> + dplyr::pull("prevalence_start_date")) == + c("2010", "2011", "2012", "2013"))) prev2 <- estimatePrevalence(cdm, denominatorTable = "denominator", @@ -558,9 +530,12 @@ test_that("mock db: check periods follow calendar dates", { fullContribution = FALSE, completeDatabaseIntervals = TRUE ) - expect_true(nrow(prev2) == 2) - expect_true(all(lubridate::year(prev2$prevalence_start_date) == - c("2011", "2012"))) + expect_true(all(lubridate::year( + prev2 |> visOmopResults::splitAdditional() |> + dplyr::filter( + estimate_name == "denominator_count") |> + dplyr::pull("prevalence_start_date")) == + c("2011", "2012"))) # for months cdm <- generateDenominatorCohortSet( @@ -578,8 +553,13 @@ test_that("mock db: check periods follow calendar dates", { fullContribution = FALSE, completeDatabaseIntervals = FALSE ) - expect_true(prev$prevalence_start_date[1] == - as.Date("2011-01-15")) + + expect_true(prev |> visOmopResults::splitAdditional() |> + dplyr::filter( + estimate_name == "denominator_count") |> + head(1) |> + dplyr::pull("prevalence_start_date") == + "2011-01-15") # where we expect the study to start the next month prev <- estimatePrevalence(cdm, denominatorTable = "denominator", @@ -590,8 +570,12 @@ test_that("mock db: check periods follow calendar dates", { fullContribution = FALSE, completeDatabaseIntervals = TRUE ) - expect_true(prev$prevalence_start_date[1] == - as.Date("2011-02-01")) + expect_true(prev |> visOmopResults::splitAdditional() |> + dplyr::filter( + estimate_name == "denominator_count") |> + head(1) |> + dplyr::pull("prevalence_start_date") == + "2011-02-01") # for overall prev <- estimatePrevalence(cdm, @@ -603,10 +587,18 @@ test_that("mock db: check periods follow calendar dates", { fullContribution = FALSE, completeDatabaseIntervals = FALSE ) - expect_true(prev$prevalence_start_date[1] == - as.Date("2011-01-15")) - expect_true(prev$prevalence_end_date[1] == - as.Date("2013-06-15")) + expect_true(prev |> visOmopResults::splitAdditional() |> + dplyr::filter( + estimate_name == "denominator_count") |> + head(1) |> + dplyr::pull("prevalence_start_date") == + "2011-01-15") + expect_true(prev |> visOmopResults::splitAdditional() |> + dplyr::filter( + estimate_name == "denominator_count") |> + head(1) |> + dplyr::pull("prevalence_end_date") == + "2013-06-15") CDMConnector::cdm_disconnect(cdm) }) @@ -653,7 +645,11 @@ test_that("mock db: check multiple outcome ids", { interval = "years", minCellCount = 0 ) - expect_true(all(prev[["n_cases"]] == 1)) + + expect_true(all(prev |> + dplyr::filter( + estimate_name == "outcome_count") |> + dplyr::pull("estimate_value") == "1")) CDMConnector::cdm_disconnect(cdm) }) @@ -758,7 +754,6 @@ test_that("mock db: check expected errors", { denominatorCohortId = 1 )) - CDMConnector::cdm_disconnect(cdm) }) @@ -902,19 +897,28 @@ test_that("mock db: multiple observation periods", { ) # nobody should appear in 2006 expect_true(ppe %>% - dplyr::filter(prevalence_start_date == "2006-01-01") %>% - dplyr::pull("n_population") == 0) + visOmopResults::splitAdditional() |> + dplyr::filter(prevalence_start_date == "2006-01-01", + estimate_name == "denominator_count") %>% + dplyr::pull("estimate_value") == 0) expect_true(ppe %>% - dplyr::filter(prevalence_start_date == "2006-01-01") %>% - dplyr::pull("n_cases") == 0) + visOmopResults::splitAdditional() |> + dplyr::filter(prevalence_start_date == "2006-01-01", + estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value") == 0) # one person with an event in 2005 expect_true(ppe %>% - dplyr::filter(lubridate::year(prevalence_start_date) == "2005") %>% - dplyr::pull("n_population") == 1) + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(prevalence_start_date) == "2005", + estimate_name == "denominator_count") %>% + dplyr::pull("estimate_value") == "1") expect_true(ppe %>% - dplyr::filter(lubridate::year(prevalence_start_date) == "2005") %>% - dplyr::pull("n_cases") == 1) + visOmopResults::splitAdditional() |> + dplyr::filter(lubridate::year(prevalence_start_date) == "2005", + estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value") == "1") + # as for point prevalence, we would expect no positive n_cases at default ppo <- estimatePointPrevalence( @@ -924,7 +928,11 @@ test_that("mock db: multiple observation periods", { interval = "months", minCellCount = 0 ) - expect_true(sum(ppo$n_cases) == 0) + + expect_true(sum(as.numeric(ppo %>% + visOmopResults::splitAdditional() |> + dplyr::filter(estimate_name == "outcome_count") %>% + dplyr::pull("estimate_value"))) == 0) CDMConnector::cdm_disconnect(cdm) }) @@ -941,20 +949,29 @@ test_that("mock db: check confidence intervals", { type = "point", interval = "years", minCellCount = 0 - ) %>% - dplyr::filter(n_population > 1) + ) + + pkg_est <- prev %>% + dplyr::select("estimate_name", + "estimate_value", "additional_level") |> + tidyr::pivot_wider(names_from = "estimate_name", + values_from = "estimate_value") |> + dplyr::filter(denominator_count > 1) # compare our wilson CIs with those from Hmisc - hmisc_ci <- Hmisc::binconf(prev$n_cases, prev$n_population, + hmisc_ci <- Hmisc::binconf(as.numeric(pkg_est$outcome_count), + as.numeric(pkg_est$denominator_count), alpha = 0.05, method = c("wilson"), return.df = TRUE ) - expect_equal(prev$prevalence_95CI_lower, hmisc_ci$Lower, + expect_equal(as.numeric(pkg_est$prevalence_95CI_lower), + hmisc_ci$Lower, tolerance = 1e-2 ) - expect_equal(prev$prevalence_95CI_upper, hmisc_ci$Upper, + expect_equal(as.numeric(pkg_est$prevalence_95CI_upper), + hmisc_ci$Upper, tolerance = 1e-2 ) @@ -978,19 +995,17 @@ test_that("mock db: check attrition", { # for female cohort we should have a row for those excluded for not being male expect_true(any("Not Female" == attrition(prev) %>% + dplyr::left_join(settings(prev), + by = "result_id") |> dplyr::filter(denominator_sex == "Female") %>% dplyr::pull(.data$reason))) # for male, the opposite expect_true(any("Not Male" == attrition(prev) %>% + dplyr::left_join(settings(prev), + by = "result_id") |> dplyr::filter(denominator_sex == "Male") %>% dplyr::pull(.data$reason))) - # check we can pick out specific analysis attrition - expect_true(nrow(attrition(prev) %>% - dplyr::filter(analysis_id == 1)) > 1) - expect_true(nrow(attrition(prev) %>% - dplyr::filter(analysis_id == 2)) > 1) - CDMConnector::cdm_disconnect(cdm) }) @@ -1076,7 +1091,6 @@ test_that("mock db: check compute permanent", { # using permanent (no prefix) cdm <- mockIncidencePrevalenceRef(sampleSize = 1000) - attr(attr(cdm, "cdm_source"), "write_schema") <- "main" cdm <- generateDenominatorCohortSet( cdm = cdm, name = "dpop" @@ -1129,7 +1143,8 @@ test_that("mock db: test empty outcome table works", { cdm <- mockIncidencePrevalenceRef(sampleSize = 1000) - cdm[["outcome"]] <- cdm[["outcome"]] %>% dplyr::filter(cohort_definition_id == 33) + cdm[["outcome"]] <- cdm[["outcome"]] %>% + dplyr::filter(cohort_definition_id == 33) cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator") @@ -1140,6 +1155,7 @@ test_that("mock db: test empty outcome table works", { interval = "years" )) + CDMConnector::cdm_disconnect(cdm) }) test_that("mock db: prevalence using strata vars", { @@ -1160,7 +1176,8 @@ test_that("mock db: prevalence using strata vars", { cdm$denominator <- cdm$denominator %>% dplyr::mutate(my_strata = dplyr::if_else(year(cohort_start_date) < 1990, "first", "second")) %>% - dplyr::compute() + dplyr::compute(temporary = FALSE, + name = "denominator") prev <- estimatePrevalence( cdm = cdm, @@ -1169,27 +1186,21 @@ test_that("mock db: prevalence using strata vars", { interval = "years", strata = list(c("my_strata")) ) - expect_true(all(c("strata_name", "strata_level") %in% - colnames(prev))) - expect_true(all(c("Overall", "my_strata") %in% - unique(prev %>% - dplyr::pull("strata_name")))) - expect_true(all(c("Overall", - "first", "second") %in% - unique(prev %>% - dplyr::pull("strata_level")))) + + expect_true(all(c("overall", "first", "second") %in% + unique(prev |> dplyr::pull("strata_level")))) # original without strata should be the same as "Overall" strata expect_equal(prev_orig, prev %>% - dplyr::filter(strata_name == "Overall") %>% - dplyr::select(!c("strata_name", "strata_level"))) + dplyr::filter(strata_level == "overall")) cdm$denominator <- cdm$denominator %>% dplyr::mutate(my_strata2 = dplyr::if_else(month(cohort_start_date)<7, "a", "b")) %>% - dplyr::compute() + dplyr::compute(temporary = FALSE, + name = "denominator") prev2 <- estimatePrevalence( cdm = cdm, @@ -1198,16 +1209,12 @@ test_that("mock db: prevalence using strata vars", { interval = "years", strata = list(c("my_strata","my_strata2")) ) - expect_true(all(c("strata_name", "strata_level") %in% - colnames(prev2))) - expect_true(all(c("Overall", "my_strata and my_strata2") %in% - unique(prev2 %>% - dplyr::pull("strata_name")))) - expect_true(all(c("Overall", - "first and a", "first and b", - "second and a", "second and b") %in% - unique(prev2 %>% - dplyr::pull("strata_level")))) + + expect_true(all(c("overall", "first &&& a", + "first &&& b", + "second &&& a", + "second &&& b") %in% + unique(prev2 |> dplyr::pull("strata_level")))) prev3 <- estimatePrevalence( cdm = cdm, @@ -1218,19 +1225,15 @@ test_that("mock db: prevalence using strata vars", { c("my_strata2"), c("my_strata", "my_strata2"))) - expect_true(all(c("strata_name", "strata_level") %in% - colnames(prev3))) - expect_true(all(c("Overall", "my_strata", "my_strata2", - "my_strata and my_strata2") %in% - unique(prev3 %>% - dplyr::pull("strata_name")))) - expect_true(all(c("Overall", - "first", "second", - "a", "b", - "first and a", "first and b", - "second and a", "second and b") %in% - unique(prev3 %>% - dplyr::pull("strata_level")))) + expect_true(all(c("overall", + "first", + "second", + "first &&& a", + "first &&& b", + "second &&& a", + "second &&& b") %in% + unique(prev3 |> dplyr::pull("strata_level")))) + # without overall strata @@ -1243,20 +1246,15 @@ test_that("mock db: prevalence using strata vars", { c("my_strata2"), c("my_strata", "my_strata2")), includeOverallStrata = FALSE) - expect_false("Overall" %in% unique(prev4 %>% - dplyr::pull("strata_name"))) - expect_true(all(c("my_strata", "my_strata2", - "my_strata and my_strata2") %in% - unique(prev4 %>% - dplyr::pull("strata_name")))) - expect_false("Overall" %in% unique(prev4 %>% - dplyr::pull("strata_level"))) - expect_true(all(c("first", "second", - "a", "b", - "first and a", "first and b", - "second and a", "second and b") %in% - unique(prev4 %>% - dplyr::pull("strata_level")))) + expect_true(all(c("first", + "second", + "first &&& a", + "first &&& b", + "second &&& a", + "second &&& b") %in% + unique(prev4 |> dplyr::pull("strata_level")))) + expect_false(c("overall") %in% + unique(prev4 |> dplyr::pull("strata_level"))) expect_error(estimatePrevalence( cdm = cdm, @@ -1282,105 +1280,4 @@ test_that("mock db: prevalence using strata vars", { CDMConnector::cdm_disconnect(cdm) }) -test_that("summarise result works", { - - # cdm <- mockIncidencePrevalenceRef(sampleSize = 1000, - # outPre = 0.7) - # - # cdm <- generateDenominatorCohortSet(cdm = cdm, - # name = "denominator") - # - # prev_orig <- estimatePrevalence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # interval = "years" - # ) - # - # cdm$denominator <- cdm$denominator %>% - # dplyr::mutate(my_strata = dplyr::if_else(year(cohort_start_date) < 1990, - # "first", "second")) %>% - # dplyr::compute() - # - # prev_sr <- estimatePrevalence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # interval = "years", - # strata = list(c("my_strata")), - # summarisedResult = TRUE, - # minCellCount = 0 - # ) - # - # expect_true("summarised_result" %in% class(prev_sr)) - # # suppress - # prev_sup <- prev_sr |> omopgenerics::suppress(minCellCount = 2) |> visOmopResults::tidy() - # expect_no_error(prev_tidy <- visOmopResults::tidy(prev_sr, addSettings = FALSE)) - # expect_true(all(c("my_strata") %in% colnames(prev_tidy))) - # expect_true(all(c("overall", "first") %in% - # unique(prev_tidy %>% - # dplyr::pull("my_strata")))) - # expect_true( - # all(colnames(settings(prev_sr)) == c( - # 'result_id', 'result_type', 'package_name', 'package_version', 'analysis_type', - # 'analysis_interval', 'analysis_complete_database_intervals', 'analysis_full_contribution', - # 'denominator_cohort_name', 'denominator_age_group', 'denominator_sex', - # 'denominator_days_prior_observation', 'denominator_start_date', 'denominator_end_date', - # 'denominator_target_cohort_name', 'outcome_cohort_name', 'min_cell_count' - # )) - # ) - # expect_true(unique(settings(prev_sr)$result_type) == "point_prevalence") - # - # sup <- prev_tidy |> dplyr::filter(outcome_count < 2 & outcome_count > 0) |> - # dplyr::select(!c("denominator_count", "outcome_count", "prevalence", "prevalence_95CI_lower", "prevalence_95CI_upper")) - # - # expect_true(all(is.na(prev_sup |> dplyr::inner_join(sup) |> dplyr::pull("outcome_count")))) - # expect_true(all(is.na(prev_sup |> dplyr::inner_join(sup) |> dplyr::pull("prevalence")))) - # expect_true(all(is.na(prev_sup |> dplyr::inner_join(sup) |> dplyr::pull("prevalence_95CI_upper")))) - # expect_false(any(is.na(prev_sup |> dplyr::inner_join(sup) |> dplyr::pull("denominator_count")))) - # - # prev <- estimatePrevalence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # interval = "years", - # strata = list(c("my_strata")), - # minCellCount = 0 - # ) - # expect_equal( - # attrition(prev_sr), - # attrition(prev) |> - # dplyr::mutate(result_id = as.integer(analysis_id)) |> - # dplyr::select(result_id, number_records, number_subjects, reason_id, reason, excluded_records, excluded_subjects)) - # expect_true(nrow(prev_tidy) == nrow(prev)) - # - # prev_sr <- estimatePeriodPrevalence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # interval = "years", - # summarisedResult = TRUE - # ) - # expect_true("summarised_result" %in% class(prev_sr)) - # expect_no_error(prev_tidy <- visOmopResults::tidy(prev_sr, addSettings = FALSE)) - # expect_true( - # all(colnames(prev_tidy) == c( - # 'result_id', 'cdm_name', 'denominator_cohort_name', 'variable_name', 'variable_level', - # 'prevalence_start_date', 'prevalence_end_date', 'denominator_count', 'outcome_count', 'prevalence', - # 'prevalence_95CI_lower', 'prevalence_95CI_upper' - # )) - # ) - # expect_true( - # all(colnames(settings(prev_sr)) == c( - # 'result_id', 'result_type', 'package_name', 'package_version', 'analysis_type', - # 'analysis_interval', 'analysis_complete_database_intervals', 'analysis_full_contribution', - # 'denominator_cohort_name', 'denominator_age_group', 'denominator_sex', - # 'denominator_days_prior_observation', 'denominator_start_date', 'denominator_end_date', - # 'denominator_target_cohort_name', 'outcome_cohort_name', 'min_cell_count' - # )) - # ) - # expect_true(unique(settings(prev_sr)$result_type) == "period_prevalence") - # - # CDMConnector::cdm_disconnect(cdm) -}) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index 79ea81a..8ed9f68 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -1,239 +1,9 @@ -test_that("basic incidence plot", { - # skip_if_not_installed("ggplot2") - # skip_if_not_installed("scales") - # - # cdm <- mockIncidencePrevalenceRef(sampleSize = 1000) - # cdm <- generateDenominatorCohortSet( - # cdm = cdm, name = "denominator", - # cohortDateRange = c(as.Date("2008-01-01"), as.Date("2018-01-01")) - # ) - # inc <- estimateIncidence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # summarisedResult = TRUE - # ) - # plot <- plotIncidence(inc) - # expect_true(ggplot2::is.ggplot(plot)) - # - # # with a different x axis - # cdm <- generateDenominatorCohortSet( - # cdm = cdm,name = "denominator", - # ageGroup = list( - # c(0, 30), - # c(31, 100) - # ) - # ) - # inc <- estimateIncidence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", interval = "overall", - # summarisedResult = TRUE - # ) - # plot <- plotIncidence(inc, x = "denominator_age_group") - # expect_true(ggplot2::is.ggplot(plot)) - # - # CDMConnector::cdm_disconnect(cdm) -}) - -test_that("basic prevalence plot", { - # skip_if_not_installed("ggplot2") - # skip_if_not_installed("scales") - # cdm <- mockIncidencePrevalenceRef(sampleSize = 1000) - # cdm <- generateDenominatorCohortSet( - # cdm = cdm, name = "denominator", - # cohortDateRange = c(as.Date("2008-01-01"), as.Date("2018-01-01")) - # ) - # prev <- estimatePrevalence( - # cdm = cdm, interval = "years", - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # summarisedResult = TRUE - # ) - # plot <- plotPrevalence(prev) - # expect_true(ggplot2::is.ggplot(plot)) - # - # # with a different x axis - # cdm <- generateDenominatorCohortSet( - # cdm = cdm,name = "denominator", - # cohortDateRange = c(as.Date("2010-01-01"), as.Date("2010-06-01")), - # ageGroup = list( - # c(0, 30), - # c(31, 100) - # ) - # ) - # prev <- estimatePrevalence( - # cdm = cdm, interval = "years", - # denominatorTable = "denominator", - # outcomeTable = "outcome", minCellCount = 0 - # ) - # plot <- plotPrevalence(prev, - # x = "denominator_age_group" - # ) - # expect_true(ggplot2::is.ggplot(plot)) - # - # CDMConnector::cdm_disconnect(cdm) -}) - -test_that("plot facets", { - # skip_if_not_installed("ggplot2") - # skip_if_not_installed("scales") - # cdm <- mockIncidencePrevalenceRef(sampleSize = 1000) - # cdm <- generateDenominatorCohortSet( - # cdm = cdm,name = "denominator", - # ageGroup = list( - # c(0, 30), - # c(31, 100) - # ) - # ) - # inc <- estimateIncidence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", minCellCount = 0 - # ) - # plot_orig <- plotIncidence(inc, facet = "denominator_age_group") - # inc <- estimateIncidence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # summarisedResult = TRUE, minCellCount = 0 - # ) - # plot_sr <- plotIncidence(inc, facet = "denominator_age_group") - # - # expect_true(ggplot2::is.ggplot(plot_sr)) - # - # - # - # # multiple facet grouping - # cdm <- generateDenominatorCohortSet( - # cdm = cdm,name = "denominator", - # ageGroup = list( - # c(0, 30), - # c(31, 100) - # ), - # sex = c("Male", "Female") - # ) - # inc <- estimateIncidence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # summarisedResult = TRUE - # ) - # - # plot <- plotIncidence(inc, - # facet = c( - # "denominator_age_group", - # "denominator_sex" - # ) - # ) - # expect_true(ggplot2::is.ggplot(plot)) - # - # CDMConnector::cdm_disconnect(cdm) -}) - -test_that("plot colour", { - # skip_if_not_installed("ggplot2") - # skip_if_not_installed("scales") - # cdm <- mockIncidencePrevalenceRef(sampleSize = 1000) - # cdm <- generateDenominatorCohortSet( - # cdm = cdm,name = "denominator", - # ageGroup = list( - # c(0, 30), - # c(31, 100) - # ) - # ) - # inc <- estimateIncidence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # summarisedResult = TRUE - # ) - # - # plot <- plotIncidence(inc, - # colour = "denominator_age_group", - # colour_name = "Age group" - # ) - # expect_true(ggplot2::is.ggplot(plot)) - # - # # multiple grouping - # cdm <- generateDenominatorCohortSet( - # cdm = cdm,name = "denominator", - # ageGroup = list( - # c(0, 30), - # c(31, 100) - # ), - # sex = c("Male", "Female") - # ) - # inc <- estimateIncidence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # summarisedResult = TRUE - # ) - # - # plot <- plotIncidence(inc, - # colour = c( - # "denominator_age_group", - # "denominator_sex" - # ) - # ) - # - # expect_true(ggplot2::is.ggplot(plot)) - # - # CDMConnector::cdm_disconnect(cdm) -}) - -test_that("plot options", { - # skip_if_not_installed("ggplot2") - # skip_if_not_installed("scales") - # cdm <- mockIncidencePrevalenceRef(sampleSize = 10000) - # cdm <- generateDenominatorCohortSet( - # cdm = cdm, - # name = "denominator", - # ageGroup = list(c(0, 30), - # c(31, 100)) - # ) - # inc <- estimateIncidence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # summarisedResult = TRUE - # ) - # - # plotOptions <- list('hideConfidenceInterval' = TRUE, - # 'facetNcols' = 1) - # plot <- plotIncidence(inc, - # colour = "denominator_age_group", - # colour_name = "Age group", - # options = plotOptions) - # expect_true(ggplot2::is.ggplot(plot)) - # - # # prevalence - # prev <- estimatePrevalence( - # cdm = cdm, interval = "years", - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # minCellCount = 0 - # ) - # - # plot <- plotPrevalence(prev, - # colour = c("denominator_age_group", - # "denominator_sex"), - # options = plotOptions) - # - # expect_true(ggplot2::is.ggplot(plot)) - # - # CDMConnector::cdm_disconnect(cdm) -}) - - -# original result format - test_that("basic incidence plot", { skip_on_cran() skip_if_not_installed("ggplot2") skip_if_not_installed("scales") - cdm <- mockIncidencePrevalenceRef(sampleSize = 1000) + + cdm <- mockIncidencePrevalenceRef(sampleSize = 100) cdm <- generateDenominatorCohortSet( cdm = cdm, name = "denominator", cohortDateRange = c(as.Date("2008-01-01"), as.Date("2018-01-01")) @@ -263,7 +33,7 @@ test_that("basic incidence plot", { expect_true(ggplot2::is.ggplot(plot)) CDMConnector::cdm_disconnect(cdm) - }) +}) test_that("basic prevalence plot", { skip_on_cran() @@ -297,7 +67,7 @@ test_that("basic prevalence plot", { outcomeTable = "outcome", minCellCount = 0 ) plot <- plotPrevalence(prev, - x = "denominator_age_group" + x = "denominator_age_group" ) expect_true(ggplot2::is.ggplot(plot)) @@ -316,14 +86,22 @@ test_that("plot facets", { c(31, 100) ) ) + inc <- estimateIncidence( + cdm = cdm, + denominatorTable = "denominator", + outcomeTable = "outcome", minCellCount = 0 + ) + plot_orig <- plotIncidence(inc, facet = "denominator_age_group") inc <- estimateIncidence( cdm = cdm, denominatorTable = "denominator", outcomeTable = "outcome" ) + plot_sr <- plotIncidence(inc, facet = "denominator_age_group") + + expect_true(ggplot2::is.ggplot(plot_sr)) + - plot <- plotIncidence(inc, facet = "denominator_age_group") - expect_true(ggplot2::is.ggplot(plot)) # multiple facet grouping cdm <- generateDenominatorCohortSet( @@ -341,10 +119,10 @@ test_that("plot facets", { ) plot <- plotIncidence(inc, - facet = c( - "denominator_age_group", - "denominator_sex" - ) + facet = c( + "denominator_age_group", + "denominator_sex" + ) ) expect_true(ggplot2::is.ggplot(plot)) @@ -370,8 +148,8 @@ test_that("plot colour", { ) plot <- plotIncidence(inc, - colour = "denominator_age_group", - colour_name = "Age group" + colour = "denominator_age_group", + colour_name = "Age group" ) expect_true(ggplot2::is.ggplot(plot)) @@ -391,10 +169,10 @@ test_that("plot colour", { ) plot <- plotIncidence(inc, - colour = c( - "denominator_age_group", - "denominator_sex" - ) + colour = c( + "denominator_age_group", + "denominator_sex" + ) ) expect_true(ggplot2::is.ggplot(plot)) @@ -437,7 +215,7 @@ test_that("plot options", { plot <- plotPrevalence(prev, colour = c("denominator_age_group", - "denominator_sex"), + "denominator_sex"), options = plotOptions) expect_true(ggplot2::is.ggplot(plot)) @@ -445,3 +223,4 @@ test_that("plot options", { CDMConnector::cdm_disconnect(cdm) }) + diff --git a/tests/testthat/test-tables.R b/tests/testthat/test-tables.R index 15f2819..255a8b2 100644 --- a/tests/testthat/test-tables.R +++ b/tests/testthat/test-tables.R @@ -1,118 +1,40 @@ test_that("test tables", { - # cdm <- mockIncidencePrevalenceRef() - # - # cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator") - # - # prev_period <- estimatePeriodPrevalence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # summarisedResult = TRUE - # ) - # - # gt1 <- tablePrevalence(prev_period, prevalenceType = "period", - # header = c("group", "strata"), - # denominatorSettings = FALSE) - # expect_true("gt_tbl" %in% class(gt1)) - # expect_true(all(colnames(gt1$`_data`) == c( - # 'Database name', 'Outcome cohort name', 'Estimate name', 'Prevalence start date', 'Prevalence end date', '[header]Denominator cohort name\n[header_level]Denominator cohort 1' - # ))) - # expect_true(all(unique(gt1$`_data`$`Estimate name`) == c( - # "Denominator (N)", "Outcome (N)", "Prevalence [95% CI]" - # ))) - # - # # denominator name false, outcome settings true - # expect_message( - # tib1 <- tablePrevalence( - # prev_period, - # prevalenceType = "period", - # type = "tibble", - # denominatorName = FALSE, - # outcomeSettings = TRUE, - # header = c("group", "strata"), - # denominatorSettings = FALSE - # ) - # ) - # expect_true(all(colnames(tib1) == c('Database name', 'Outcome cohort name', 'Estimate name', 'Estimate value', 'Prevalence start date', 'Prevalence end date'))) - # - # # no split strata - # fx <- tablePrevalence( - # prev_period, - # prevalenceType = "period", - # denominatorName = FALSE, - # analysisSettings = TRUE, - # outcomeName = FALSE, - # header = "cdm_name", - # splitStrata = FALSE, - # type = "tibble", - # denominatorSettings = FALSE - # ) - # expect_true(all(colnames(fx$body$dataset) == c( - # 'Strata name', 'Strata level', 'Estimate name', 'Prevalence start date', 'Prevalence end date', 'Analysis type', - # 'Analysis interval', 'Analysis complete database intervals', 'Analysis full contribution', 'Database name\nmock' - # ))) - # - # # estimate in header - # fx2 <- tablePrevalence( - # prev_period, - # header = c("strata", "estimate"), - # splitStrata = TRUE, - # type = "flextable", - # prevalenceType = "period", - # denominatorSettings = FALSE - # ) - # expect_true("flextable" %in% class(fx2)) - # expect_true(all(colnames(fx2$body$dataset) == c( - # 'Database name', 'Denominator cohort name', 'Outcome cohort name', 'Prevalence start date', 'Prevalence end date', 'Denominator (N)', 'Outcome (N)', 'Prevalence [95% CI]' - # ))) - # - # # point prevalence - # prev_point <- estimatePointPrevalence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # summarisedResult = TRUE - # ) - # gt2 <- tablePrevalence(prev_point, denominatorSettings = TRUE, prevalenceType = "point") - # expect_true(all(colnames(gt2$`_data`) == c( - # 'Database name', 'Denominator cohort name', 'Prevalence start date', 'Prevalence end date', - # 'Denominator age group', 'Denominator sex', 'Denominator days prior observation', 'Denominator start date', - # 'Denominator end date', 'Denominator target cohort name', - # '[header_level]Outcome cohort name\n[header_level]Cohort 1\n[header_level]Denominator (N)', - # '[header_level]Outcome cohort name\n[header_level]Cohort 1\n[header_level]Outcome (N)', - # '[header_level]Outcome cohort name\n[header_level]Cohort 1\n[header_level]Prevalence [95% CI]' - # ))) - # - # # incidence - # cdm$denominator <- cdm$denominator %>% - # dplyr::mutate(my_strata = dplyr::if_else(year(cohort_start_date) < 1995, - # "first", "second")) %>% - # dplyr::compute() - # inc <- estimateIncidence( - # cdm = cdm, - # denominatorTable = "denominator", - # outcomeTable = "outcome", - # interval = "months", - # strata = list(c("my_strata")), - # summarisedResult = TRUE - # ) - # tableInc <- tableIncidence(inc, - # outcomeName = FALSE, - # outcomeSettings = FALSE, - # type = "flextable", - # header = c("group", "strata"), - # denominatorSettings = FALSE) - # expect_true("flextable" %in% class(tableInc)) - # expect_true(all(colnames(tableInc$body$dataset) == c( - # 'Database name', 'Estimate name', 'Incidence start date', 'Incidence end date', - # 'Denominator cohort name\nDenominator cohort 1\nMy strata\nOverall', - # 'Denominator cohort name\nDenominator cohort 1\nMy strata\nFirst' - # ))) - # - # - # # no split strata - # expect_no_error(tableIncidence(inc, outcomeName = FALSE, - # outcomeSettings = TRUE, type = "flextable")) - # - # CDMConnector::cdm_disconnect(cdm) + cdm <- mockIncidencePrevalenceRef() + + cdm <- generateDenominatorCohortSet(cdm = cdm, name = "denominator") + + prev_period <- estimatePeriodPrevalence( + cdm = cdm, + denominatorTable = "denominator", + outcomeTable = "outcome" + ) + + gt1 <- tablePrevalence(prev_period, type = "gt") + expect_true("gt_tbl" %in% class(gt1)) + + # point prevalence + prev_point <- estimatePointPrevalence( + cdm = cdm, + denominatorTable = "denominator", + outcomeTable = "outcome" + ) + expect_no_error(gt2 <- tablePrevalence(prev_point)) + + # incidence + cdm$denominator <- cdm$denominator %>% + dplyr::mutate(my_strata = dplyr::if_else(year(cohort_start_date) < 1995, + "first", "second")) %>% + dplyr::compute() + inc <- estimateIncidence( + cdm = cdm, + denominatorTable = "denominator", + outcomeTable = "outcome", + interval = "months", + strata = list(c("my_strata")) + ) + tableInc <- tableIncidence(inc, + type = "flextable") + expect_true("flextable" %in% class(tableInc)) + + CDMConnector::cdm_disconnect(cdm) })