v0.6

darwin-eu · Dec 11, 2023 · a4969b8 · a4969b8
1 parent dc1704d
commit a4969b8
Show file tree

Hide file tree

Showing 38 changed files with 800 additions and 901 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: IncidencePrevalence
 Title: Estimate Incidence and Prevalence using the OMOP Common Data Model
-Version: 0.5.3
+Version: 0.6.0
 Authors@R: c(
     person("Edward", "Burn", email = "[email protected]", 
     role = c("aut", "cre"), 
@@ -43,14 +43,14 @@ Imports:
     purrr (>= 0.3.5),
     rlang (>= 1.0.0),
     stringr (>= 1.5.0),
+    tibble,
     tidyr (>= 1.2.0),
     tidyselect (>= 1.2.0),
     zip (>= 2.2.0)
 Suggests: 
     knitr,
     rmarkdown,
     RPostgres,
-    tibble,
     duckdb,
     odbc,
     here,

diff --git a/NAMESPACE b/NAMESPACE
@@ -14,6 +14,7 @@ export(estimatePeriodPrevalence)
 export(estimatePointPrevalence)
 export(exportIncidencePrevalenceResults)
 export(generateDenominatorCohortSet)
+export(generateTargetDenominatorCohortSet)
 export(incidenceAttrition)
 export(incidenceSet)
 export(mockIncidencePrevalenceRef)

diff --git a/R/benchmarkIncidencePrevalence.R b/R/benchmarkIncidencePrevalence.R
@@ -89,7 +89,9 @@ benchmarkIncidencePrevalence <- function(cdm,
     )
   }
   if (!is.null(outputFolder)) {
-    checkmate::assertDirectoryExists(outputFolder)
+    if(!dir.exists(outputFolder)){
+      cli::cli_abort("{outputFolder} does not exist")
+    }
   }
   if (!is.null(outputFolder)) {
     checkmate::assertCharacter(fileName,

diff --git a/R/estimateIncidence.R b/R/estimateIncidence.R
@@ -36,9 +36,8 @@
 #' start to last cohort end) can also be estimated. If more than one option is
 #' chosen then results will be estimated for each chosen interval.
 #' @param completeDatabaseIntervals TRUE/ FALSE. Where TRUE, incidence will
-#' only be estimated for those intervals where the database
-#' captures all the interval (based on the earliest and latest observation
-#' period start dates, respectively).
+#' only be estimated for those intervals where the denominator cohort
+#' captures all the interval.
 #' @param outcomeWashout The number of days used for a 'washout' period
 #' between the end of one outcome and an individual starting to contribute
 #' time at risk. If Inf, no time can be contributed after an event has
@@ -124,6 +123,11 @@ estimateIncidence <- function(cdm,
       dplyr::pull("cohort_definition_id")
   }
 
+  if(denominatorTable == outcomeTable &&
+     any(denominatorCohortId %in% outcomeCohortId)){
+    cli::cli_abort("Denominator cohort can not be the same as the outcome cohort")
+  }
+
   ## add outcome from attribute
   outcomeRef <- CDMConnector::cohortSet(cdm[[outcomeTable]]) %>%
     dplyr::filter(.env$outcomeCohortId %in% .data$cohort_definition_id) %>%
@@ -464,6 +468,9 @@ estimateIncidence <- function(cdm,
   }
   attrition <- attrition %>%
     dplyr::left_join(analysisSettings, by = "analysis_id")
+  attrition <- obscureAttrition(attrition,
+                                minCellCount = minCellCount
+  )
 
   # return results as an IncidencePrevalenceResult class
   attr(irs, "attrition") <- attrition

diff --git a/R/estimatePrevalence.R b/R/estimatePrevalence.R
@@ -29,9 +29,6 @@
 #' @param outcomeCohortId The cohort definition ids of the outcome
 #' cohorts of interest. If NULL all cohorts will be considered in the
 #' analysis.
-#' @param outcomeLookbackDays Days lookback when considering an outcome
-#' as prevalent. If NULL any prior outcome will be considered as prevalent. If
-#' 0, only ongoing outcomes will be considered as prevalent.
 #' @param interval Time intervals over which period prevalence is estimated. Can
 #' be "weeks", "months", "quarters", or "years". ISO weeks will
 #' be used for weeks. Calendar months, quarters, or years can be used as
@@ -73,7 +70,6 @@ estimatePointPrevalence <- function(cdm,
                                     outcomeTable,
                                     denominatorCohortId = NULL,
                                     outcomeCohortId = NULL,
-                                    outcomeLookbackDays = 0,
                                     interval = "years",
                                     timePoint = "start",
                                     strata = list(),
@@ -98,7 +94,6 @@ estimatePointPrevalence <- function(cdm,
     outcomeTable = outcomeTable,
     denominatorCohortId = denominatorCohortId,
     outcomeCohortId = outcomeCohortId,
-    outcomeLookbackDays = outcomeLookbackDays,
     type = "point",
     interval = interval,
     completeDatabaseIntervals = FALSE,
@@ -127,9 +122,6 @@ estimatePointPrevalence <- function(cdm,
 #' @param outcomeCohortId The cohort definition ids of the outcome
 #' cohorts of interest. If NULL all cohorts will be considered in the
 #' analysis.
-#' @param outcomeLookbackDays Days lookback when considering an outcome
-#' as prevalent. If NULL any prior outcome will be considered as prevalent. If
-#' 0, only ongoing outcomes will be considered as prevalent.
 #' @param interval Time intervals over which period prevalence is estimated.
 #' This can be "weeks", "months", "quarters", "years", or "overall".
 #' ISO weeks will be used for weeks. Calendar months, quarters, or
@@ -178,7 +170,6 @@ estimatePeriodPrevalence <- function(cdm,
                                      outcomeTable,
                                      denominatorCohortId = NULL,
                                      outcomeCohortId = NULL,
-                                     outcomeLookbackDays = 0,
                                      interval = "years",
                                      completeDatabaseIntervals = TRUE,
                                      fullContribution = FALSE,
@@ -193,7 +184,6 @@ estimatePeriodPrevalence <- function(cdm,
     outcomeTable = outcomeTable,
     denominatorCohortId = denominatorCohortId,
     outcomeCohortId = outcomeCohortId,
-    outcomeLookbackDays = outcomeLookbackDays,
     type = "period",
     interval = interval,
     completeDatabaseIntervals = completeDatabaseIntervals,
@@ -212,7 +202,6 @@ estimatePrevalence <- function(cdm,
                                outcomeTable,
                                denominatorCohortId = NULL,
                                outcomeCohortId = NULL,
-                               outcomeLookbackDays = 0,
                                type = "point",
                                interval = "months",
                                completeDatabaseIntervals = TRUE,
@@ -239,7 +228,7 @@ estimatePrevalence <- function(cdm,
   checkInputEstimatePrevalence(
     cdm, denominatorTable, outcomeTable,
     denominatorCohortId, outcomeCohortId,
-    outcomeLookbackDays, type,
+    type,
     interval, completeDatabaseIntervals,
     fullContribution, timePoint,
     minCellCount, temporary,
@@ -278,16 +267,12 @@ estimatePrevalence <- function(cdm,
 
   studySpecs <- tidyr::expand_grid(
     outcomeCohortId = outcomeCohortId,
-    outcomeLookbackDays = outcomeLookbackDays,
     denominatorCohortId = denominatorCohortId,
     interval = interval,
     timePoint = timePoint,
     fullContribution = fullContribution,
     completeDatabaseIntervals = completeDatabaseIntervals
   )
-  if (is.null(outcomeLookbackDays)) {
-    studySpecs$outcomeLookbackDays <- NA
-  }
 
   studySpecs <- studySpecs %>%
     dplyr::mutate(analysis_id = as.character(dplyr::row_number()))
@@ -317,7 +302,6 @@ estimatePrevalence <- function(cdm,
       denominatorCohortId = x$denominatorCohortId,
       outcomeTable = outcomeTable,
       outcomeCohortId = x$outcomeCohortId,
-      outcomeLookbackDays = x$outcomeLookbackDays,
       type = type,
       interval = x$interval,
       completeDatabaseIntervals = x$completeDatabaseIntervals,
@@ -342,7 +326,6 @@ estimatePrevalence <- function(cdm,
       analysis_id = x$analysis_id,
       outcome_cohort_id = x$outcomeCohortId,
       denominator_cohort_id = x$denominatorCohortId,
-      analysis_outcome_lookback_days = x$outcomeLookbackDays,
       analysis_type = type,
       analysis_interval = x$interval,
       analysis_complete_database_intervals = x$completeDatabaseIntervals,
@@ -518,6 +501,10 @@ estimatePrevalence <- function(cdm,
   ) %>%
     dplyr::select(-"denominator_cohort_id") %>%
     dplyr::relocate("analysis_id")
+  # obscure counts
+  attrition <- obscureAttrition(attrition,
+                       minCellCount = minCellCount
+  )
 
   analysisSettings <- analysisSettings %>%
     dplyr::left_join(outcomeRef, by = "outcome_cohort_id") %>%

diff --git a/R/exportIncidencePrevalenceResults.R b/R/exportIncidencePrevalenceResults.R
@@ -65,11 +65,13 @@ exportIncidencePrevalenceResults <- function(resultList,
   checkmate::assertTRUE(checkResultType,
     add = errorMessage
   )
-  checkmate::assertDirectoryExists(outputFolder,
-    add = errorMessage
-  )
   checkmate::reportAssertions(collection = errorMessage)
 
+  if(!dir.exists(outputFolder)){
+    cli::cli_abort("{outputFolder} does not exist")
+  }
+
+
   tempDir <- zipName
   tempDirCreated <- FALSE
   if (!dir.exists(tempDir)) {

diff --git a/R/generateDenominatorCohortSet.R b/R/generateDenominatorCohortSet.R
@@ -46,12 +46,83 @@
 #' all combinations of ageGroup, sex, and daysPriorObservation. If FALSE, only the
 #' first value specified for the other factors will be used. Consequently,
 #' order of values matters when requirementInteractions is FALSE.
+#' @param overwrite Whether to overwrite any existing table with the same name
+#'
+#' @return A cohort reference
+#' @importFrom rlang .data
+#' @export
+#'
+#' @examples
+#' \donttest{
+#' cdm <- mockIncidencePrevalenceRef(sampleSize = 10000)
+#' cdm <- generateDenominatorCohortSet(
+#'   cdm = cdm,
+#'   name = "denominator",
+#'   cohortDateRange = as.Date(c("2008-01-01", "2020-01-01"))
+#' )
+#' cdm$denominator
+#' }
+generateDenominatorCohortSet <- function(cdm,
+                                         name,
+                                         cohortDateRange = as.Date(c(NA, NA)),
+                                         ageGroup = list(c(0, 150)),
+                                         sex = "Both",
+                                         daysPriorObservation = 0,
+                                         requirementInteractions = TRUE,
+                                         overwrite = TRUE){
+
+  fetchDenominatorCohortSet(
+    cdm = cdm,
+    name = name,
+    cohortDateRange = cohortDateRange,
+    ageGroup = ageGroup,
+    sex = sex,
+    daysPriorObservation = daysPriorObservation,
+    requirementInteractions = requirementInteractions,
+    targetCohortTable = NULL,
+    targetCohortId = NULL,
+    overwrite = overwrite
+  )
+
+}
+
+#' Identify a set of denominator populations using a target cohort
+#'
+#' @description
+#' `generateTargetDenominatorCohortSet()` creates a set of cohorts that
+#' can be used for the denominator population in analyses of incidence,
+#' using `estimateIncidence()`, or prevalence, using `estimatePointPrevalence()`
+#' or `estimatePeriodPrevalence()`.
+#'
+#' @param cdm A CDM reference object
+#' @param name Name of the cohort table to be created.
 #' @param targetCohortTable A cohort table in the cdm reference to use
 #' to limit cohort entry and exit (with individuals only contributing to a
 #' cohort when they are contributing to the cohort in the target table).
 #' @param targetCohortId The cohort definition id for the cohort of interest
 #'  in the target table. If targetCohortTable is specified, a single targetCohortId
 #'  must also be specified.
+#' @param cohortDateRange Two dates. The first indicating the earliest cohort
+#' start date and the second indicating the latest possible cohort end date. If
+#' NULL or the first date is set as missing, the earliest observation_start_date
+#' in the observation_period table will be used for the former. If  NULL or the
+#' second date is set as missing, the latest observation_end_date in the
+#' observation_period table will be used for the latter.
+#' @param ageGroup A list of age groups for which cohorts will be generated. A
+#' value of `list(c(0,17), c(18,30))` would, for example, lead to the creation
+#' of cohorts for those aged from 0 to 17, and from 18 to 30. In this example
+#' an individual turning 18 during the time period would appear in both
+#' cohorts (leaving the first cohort the day before their 18th birthday and
+#' entering the second from the day of their 18th birthday).
+#' @param sex Sex of the cohorts. This can be one or more of: `"Male"`,
+#' `"Female"`, or `"Both"`.
+#' @param daysPriorObservation The number of days of prior observation observed in
+#' the database required for an individual to start contributing time in
+#' a cohort.
+#' @param requirementInteractions If TRUE, cohorts will be created for
+#' all combinations of ageGroup, sex, and daysPriorObservation. If FALSE, only the
+#' first value specified for the other factors will be used. Consequently,
+#' order of values matters when requirementInteractions is FALSE.
 #' @param overwrite Whether to overwrite any existing table with the same name
 #'
 #' @return A cohort reference
@@ -61,14 +132,43 @@
 #' @examples
 #' \donttest{
 #' cdm <- mockIncidencePrevalenceRef(sampleSize = 10000)
-#' cdm <- generateDenominatorCohortSet(
+#' cdm <- generateTargetDenominatorCohortSet(
 #'   cdm = cdm,
 #'   name = "denominator",
+#'   targetCohortTable = "target",
 #'   cohortDateRange = as.Date(c("2008-01-01", "2020-01-01"))
 #' )
 #' cdm$denominator
 #' }
-generateDenominatorCohortSet <- function(cdm,
+generateTargetDenominatorCohortSet <- function(cdm,
+                                               name,
+                                               targetCohortTable,
+                                               targetCohortId = NULL,
+                                               cohortDateRange = as.Date(c(NA, NA)),
+                                               ageGroup = list(c(0, 150)),
+                                               sex = "Both",
+                                               daysPriorObservation = 0,
+                                               requirementInteractions = TRUE,
+                                               overwrite = TRUE){
+
+  fetchDenominatorCohortSet(
+    cdm = cdm,
+    name = name,
+    cohortDateRange = cohortDateRange,
+    ageGroup = ageGroup,
+    sex = sex,
+    daysPriorObservation = daysPriorObservation,
+    requirementInteractions = requirementInteractions,
+    targetCohortTable = targetCohortTable,
+    targetCohortId = targetCohortId,
+    overwrite = overwrite
+  )
+
+}
+
+
+
+fetchDenominatorCohortSet <- function(cdm,
                                          name,
                                          cohortDateRange = as.Date(c(NA, NA)),
                                          ageGroup = list(c(0, 150)),
@@ -157,7 +257,7 @@ generateDenominatorCohortSet <- function(cdm,
   cohortCountRef <- NULL
   cohortAttritionRef <- NULL
 for(i in 1:length(denominatorSet)){
-  denom <-  generateSingleTargetDenominatorCohortSet(cdm = cdm,
+  denom <-  fetchSingleTargetDenominatorCohortSet(cdm = cdm,
                                                      name = name,
                                                      intermediateTable = paste0(intermediateTable, i),
                                                      popSpecs = denominatorSet[[i]])
@@ -226,7 +326,7 @@ for(i in 1:length(denominatorSet)){
 
 
 # Generates denominator cohorts for a single target id or no target
-generateSingleTargetDenominatorCohortSet <- function(cdm,
+fetchSingleTargetDenominatorCohortSet <- function(cdm,
                                          name,
                                          intermediateTable,
                                          popSpecs) {
@@ -259,7 +359,12 @@ generateSingleTargetDenominatorCohortSet <- function(cdm,
     dplyr::pull()
 
   if (denominatorPopulationNrows == 0) {
-    cli::cli_warn("- No people found for any denominator population")
+    if(all(is.na(popSpecs$targetCohortId))){
+      cli::cli_warn("- No people found for denominator population")
+    } else {
+      cli::cli_alert_info("- No people found for target cohort id {unique(popSpecs$targetCohortId)}")
+    }
+
     studyPops <- dpop$denominator_population %>%
       dplyr::select(
         "cohort_definition_id" = "gender_concept_id",