Skip to content

Commit

Permalink
v0.6
Browse files Browse the repository at this point in the history
  • Loading branch information
edward-burn committed Dec 11, 2023
1 parent dc1704d commit a4969b8
Show file tree
Hide file tree
Showing 38 changed files with 800 additions and 901 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: IncidencePrevalence
Title: Estimate Incidence and Prevalence using the OMOP Common Data Model
Version: 0.5.3
Version: 0.6.0
Authors@R: c(
person("Edward", "Burn", email = "[email protected]",
role = c("aut", "cre"),
Expand Down Expand Up @@ -43,14 +43,14 @@ Imports:
purrr (>= 0.3.5),
rlang (>= 1.0.0),
stringr (>= 1.5.0),
tibble,
tidyr (>= 1.2.0),
tidyselect (>= 1.2.0),
zip (>= 2.2.0)
Suggests:
knitr,
rmarkdown,
RPostgres,
tibble,
duckdb,
odbc,
here,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export(estimatePeriodPrevalence)
export(estimatePointPrevalence)
export(exportIncidencePrevalenceResults)
export(generateDenominatorCohortSet)
export(generateTargetDenominatorCohortSet)
export(incidenceAttrition)
export(incidenceSet)
export(mockIncidencePrevalenceRef)
Expand Down
4 changes: 3 additions & 1 deletion R/benchmarkIncidencePrevalence.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ benchmarkIncidencePrevalence <- function(cdm,
)
}
if (!is.null(outputFolder)) {
checkmate::assertDirectoryExists(outputFolder)
if(!dir.exists(outputFolder)){
cli::cli_abort("{outputFolder} does not exist")
}
}
if (!is.null(outputFolder)) {
checkmate::assertCharacter(fileName,
Expand Down
13 changes: 10 additions & 3 deletions R/estimateIncidence.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,8 @@
#' start to last cohort end) can also be estimated. If more than one option is
#' chosen then results will be estimated for each chosen interval.
#' @param completeDatabaseIntervals TRUE/ FALSE. Where TRUE, incidence will
#' only be estimated for those intervals where the database
#' captures all the interval (based on the earliest and latest observation
#' period start dates, respectively).
#' only be estimated for those intervals where the denominator cohort
#' captures all the interval.
#' @param outcomeWashout The number of days used for a 'washout' period
#' between the end of one outcome and an individual starting to contribute
#' time at risk. If Inf, no time can be contributed after an event has
Expand Down Expand Up @@ -124,6 +123,11 @@ estimateIncidence <- function(cdm,
dplyr::pull("cohort_definition_id")
}

if(denominatorTable == outcomeTable &&
any(denominatorCohortId %in% outcomeCohortId)){
cli::cli_abort("Denominator cohort can not be the same as the outcome cohort")
}

## add outcome from attribute
outcomeRef <- CDMConnector::cohortSet(cdm[[outcomeTable]]) %>%
dplyr::filter(.env$outcomeCohortId %in% .data$cohort_definition_id) %>%
Expand Down Expand Up @@ -464,6 +468,9 @@ estimateIncidence <- function(cdm,
}
attrition <- attrition %>%
dplyr::left_join(analysisSettings, by = "analysis_id")
attrition <- obscureAttrition(attrition,
minCellCount = minCellCount
)

# return results as an IncidencePrevalenceResult class
attr(irs, "attrition") <- attrition
Expand Down
23 changes: 5 additions & 18 deletions R/estimatePrevalence.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@
#' @param outcomeCohortId The cohort definition ids of the outcome
#' cohorts of interest. If NULL all cohorts will be considered in the
#' analysis.
#' @param outcomeLookbackDays Days lookback when considering an outcome
#' as prevalent. If NULL any prior outcome will be considered as prevalent. If
#' 0, only ongoing outcomes will be considered as prevalent.
#' @param interval Time intervals over which period prevalence is estimated. Can
#' be "weeks", "months", "quarters", or "years". ISO weeks will
#' be used for weeks. Calendar months, quarters, or years can be used as
Expand Down Expand Up @@ -73,7 +70,6 @@ estimatePointPrevalence <- function(cdm,
outcomeTable,
denominatorCohortId = NULL,
outcomeCohortId = NULL,
outcomeLookbackDays = 0,
interval = "years",
timePoint = "start",
strata = list(),
Expand All @@ -98,7 +94,6 @@ estimatePointPrevalence <- function(cdm,
outcomeTable = outcomeTable,
denominatorCohortId = denominatorCohortId,
outcomeCohortId = outcomeCohortId,
outcomeLookbackDays = outcomeLookbackDays,
type = "point",
interval = interval,
completeDatabaseIntervals = FALSE,
Expand Down Expand Up @@ -127,9 +122,6 @@ estimatePointPrevalence <- function(cdm,
#' @param outcomeCohortId The cohort definition ids of the outcome
#' cohorts of interest. If NULL all cohorts will be considered in the
#' analysis.
#' @param outcomeLookbackDays Days lookback when considering an outcome
#' as prevalent. If NULL any prior outcome will be considered as prevalent. If
#' 0, only ongoing outcomes will be considered as prevalent.
#' @param interval Time intervals over which period prevalence is estimated.
#' This can be "weeks", "months", "quarters", "years", or "overall".
#' ISO weeks will be used for weeks. Calendar months, quarters, or
Expand Down Expand Up @@ -178,7 +170,6 @@ estimatePeriodPrevalence <- function(cdm,
outcomeTable,
denominatorCohortId = NULL,
outcomeCohortId = NULL,
outcomeLookbackDays = 0,
interval = "years",
completeDatabaseIntervals = TRUE,
fullContribution = FALSE,
Expand All @@ -193,7 +184,6 @@ estimatePeriodPrevalence <- function(cdm,
outcomeTable = outcomeTable,
denominatorCohortId = denominatorCohortId,
outcomeCohortId = outcomeCohortId,
outcomeLookbackDays = outcomeLookbackDays,
type = "period",
interval = interval,
completeDatabaseIntervals = completeDatabaseIntervals,
Expand All @@ -212,7 +202,6 @@ estimatePrevalence <- function(cdm,
outcomeTable,
denominatorCohortId = NULL,
outcomeCohortId = NULL,
outcomeLookbackDays = 0,
type = "point",
interval = "months",
completeDatabaseIntervals = TRUE,
Expand All @@ -239,7 +228,7 @@ estimatePrevalence <- function(cdm,
checkInputEstimatePrevalence(
cdm, denominatorTable, outcomeTable,
denominatorCohortId, outcomeCohortId,
outcomeLookbackDays, type,
type,
interval, completeDatabaseIntervals,
fullContribution, timePoint,
minCellCount, temporary,
Expand Down Expand Up @@ -278,16 +267,12 @@ estimatePrevalence <- function(cdm,

studySpecs <- tidyr::expand_grid(
outcomeCohortId = outcomeCohortId,
outcomeLookbackDays = outcomeLookbackDays,
denominatorCohortId = denominatorCohortId,
interval = interval,
timePoint = timePoint,
fullContribution = fullContribution,
completeDatabaseIntervals = completeDatabaseIntervals
)
if (is.null(outcomeLookbackDays)) {
studySpecs$outcomeLookbackDays <- NA
}

studySpecs <- studySpecs %>%
dplyr::mutate(analysis_id = as.character(dplyr::row_number()))
Expand Down Expand Up @@ -317,7 +302,6 @@ estimatePrevalence <- function(cdm,
denominatorCohortId = x$denominatorCohortId,
outcomeTable = outcomeTable,
outcomeCohortId = x$outcomeCohortId,
outcomeLookbackDays = x$outcomeLookbackDays,
type = type,
interval = x$interval,
completeDatabaseIntervals = x$completeDatabaseIntervals,
Expand All @@ -342,7 +326,6 @@ estimatePrevalence <- function(cdm,
analysis_id = x$analysis_id,
outcome_cohort_id = x$outcomeCohortId,
denominator_cohort_id = x$denominatorCohortId,
analysis_outcome_lookback_days = x$outcomeLookbackDays,
analysis_type = type,
analysis_interval = x$interval,
analysis_complete_database_intervals = x$completeDatabaseIntervals,
Expand Down Expand Up @@ -518,6 +501,10 @@ estimatePrevalence <- function(cdm,
) %>%
dplyr::select(-"denominator_cohort_id") %>%
dplyr::relocate("analysis_id")
# obscure counts
attrition <- obscureAttrition(attrition,
minCellCount = minCellCount
)

analysisSettings <- analysisSettings %>%
dplyr::left_join(outcomeRef, by = "outcome_cohort_id") %>%
Expand Down
8 changes: 5 additions & 3 deletions R/exportIncidencePrevalenceResults.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,13 @@ exportIncidencePrevalenceResults <- function(resultList,
checkmate::assertTRUE(checkResultType,
add = errorMessage
)
checkmate::assertDirectoryExists(outputFolder,
add = errorMessage
)
checkmate::reportAssertions(collection = errorMessage)

if(!dir.exists(outputFolder)){
cli::cli_abort("{outputFolder} does not exist")
}


tempDir <- zipName
tempDirCreated <- FALSE
if (!dir.exists(tempDir)) {
Expand Down
115 changes: 110 additions & 5 deletions R/generateDenominatorCohortSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,83 @@
#' all combinations of ageGroup, sex, and daysPriorObservation. If FALSE, only the
#' first value specified for the other factors will be used. Consequently,
#' order of values matters when requirementInteractions is FALSE.
#' @param overwrite Whether to overwrite any existing table with the same name
#'
#' @return A cohort reference
#' @importFrom rlang .data
#' @export
#'
#' @examples
#' \donttest{
#' cdm <- mockIncidencePrevalenceRef(sampleSize = 10000)
#' cdm <- generateDenominatorCohortSet(
#' cdm = cdm,
#' name = "denominator",
#' cohortDateRange = as.Date(c("2008-01-01", "2020-01-01"))
#' )
#' cdm$denominator
#' }
generateDenominatorCohortSet <- function(cdm,
name,
cohortDateRange = as.Date(c(NA, NA)),
ageGroup = list(c(0, 150)),
sex = "Both",
daysPriorObservation = 0,
requirementInteractions = TRUE,
overwrite = TRUE){

fetchDenominatorCohortSet(
cdm = cdm,
name = name,
cohortDateRange = cohortDateRange,
ageGroup = ageGroup,
sex = sex,
daysPriorObservation = daysPriorObservation,
requirementInteractions = requirementInteractions,
targetCohortTable = NULL,
targetCohortId = NULL,
overwrite = overwrite
)

}

#' Identify a set of denominator populations using a target cohort
#'
#' @description
#' `generateTargetDenominatorCohortSet()` creates a set of cohorts that
#' can be used for the denominator population in analyses of incidence,
#' using `estimateIncidence()`, or prevalence, using `estimatePointPrevalence()`
#' or `estimatePeriodPrevalence()`.
#'
#' @param cdm A CDM reference object
#' @param name Name of the cohort table to be created.
#' @param targetCohortTable A cohort table in the cdm reference to use
#' to limit cohort entry and exit (with individuals only contributing to a
#' cohort when they are contributing to the cohort in the target table).
#' @param targetCohortId The cohort definition id for the cohort of interest
#' in the target table. If targetCohortTable is specified, a single targetCohortId
#' must also be specified.
#' @param cohortDateRange Two dates. The first indicating the earliest cohort
#' start date and the second indicating the latest possible cohort end date. If
#' NULL or the first date is set as missing, the earliest observation_start_date
#' in the observation_period table will be used for the former. If NULL or the
#' second date is set as missing, the latest observation_end_date in the
#' observation_period table will be used for the latter.
#' @param ageGroup A list of age groups for which cohorts will be generated. A
#' value of `list(c(0,17), c(18,30))` would, for example, lead to the creation
#' of cohorts for those aged from 0 to 17, and from 18 to 30. In this example
#' an individual turning 18 during the time period would appear in both
#' cohorts (leaving the first cohort the day before their 18th birthday and
#' entering the second from the day of their 18th birthday).
#' @param sex Sex of the cohorts. This can be one or more of: `"Male"`,
#' `"Female"`, or `"Both"`.
#' @param daysPriorObservation The number of days of prior observation observed in
#' the database required for an individual to start contributing time in
#' a cohort.
#' @param requirementInteractions If TRUE, cohorts will be created for
#' all combinations of ageGroup, sex, and daysPriorObservation. If FALSE, only the
#' first value specified for the other factors will be used. Consequently,
#' order of values matters when requirementInteractions is FALSE.
#' @param overwrite Whether to overwrite any existing table with the same name
#'
#' @return A cohort reference
Expand All @@ -61,14 +132,43 @@
#' @examples
#' \donttest{
#' cdm <- mockIncidencePrevalenceRef(sampleSize = 10000)
#' cdm <- generateDenominatorCohortSet(
#' cdm <- generateTargetDenominatorCohortSet(
#' cdm = cdm,
#' name = "denominator",
#' targetCohortTable = "target",
#' cohortDateRange = as.Date(c("2008-01-01", "2020-01-01"))
#' )
#' cdm$denominator
#' }
generateDenominatorCohortSet <- function(cdm,
generateTargetDenominatorCohortSet <- function(cdm,
name,
targetCohortTable,
targetCohortId = NULL,
cohortDateRange = as.Date(c(NA, NA)),
ageGroup = list(c(0, 150)),
sex = "Both",
daysPriorObservation = 0,
requirementInteractions = TRUE,
overwrite = TRUE){

fetchDenominatorCohortSet(
cdm = cdm,
name = name,
cohortDateRange = cohortDateRange,
ageGroup = ageGroup,
sex = sex,
daysPriorObservation = daysPriorObservation,
requirementInteractions = requirementInteractions,
targetCohortTable = targetCohortTable,
targetCohortId = targetCohortId,
overwrite = overwrite
)

}



fetchDenominatorCohortSet <- function(cdm,
name,
cohortDateRange = as.Date(c(NA, NA)),
ageGroup = list(c(0, 150)),
Expand Down Expand Up @@ -157,7 +257,7 @@ generateDenominatorCohortSet <- function(cdm,
cohortCountRef <- NULL
cohortAttritionRef <- NULL
for(i in 1:length(denominatorSet)){
denom <- generateSingleTargetDenominatorCohortSet(cdm = cdm,
denom <- fetchSingleTargetDenominatorCohortSet(cdm = cdm,
name = name,
intermediateTable = paste0(intermediateTable, i),
popSpecs = denominatorSet[[i]])
Expand Down Expand Up @@ -226,7 +326,7 @@ for(i in 1:length(denominatorSet)){


# Generates denominator cohorts for a single target id or no target
generateSingleTargetDenominatorCohortSet <- function(cdm,
fetchSingleTargetDenominatorCohortSet <- function(cdm,
name,
intermediateTable,
popSpecs) {
Expand Down Expand Up @@ -259,7 +359,12 @@ generateSingleTargetDenominatorCohortSet <- function(cdm,
dplyr::pull()

if (denominatorPopulationNrows == 0) {
cli::cli_warn("- No people found for any denominator population")
if(all(is.na(popSpecs$targetCohortId))){
cli::cli_warn("- No people found for denominator population")
} else {
cli::cli_alert_info("- No people found for target cohort id {unique(popSpecs$targetCohortId)}")
}

studyPops <- dpop$denominator_population %>%
dplyr::select(
"cohort_definition_id" = "gender_concept_id",
Expand Down
Loading

0 comments on commit a4969b8

Please sign in to comment.