diff --git a/DESCRIPTION b/DESCRIPTION index f0ef76c..fc924cf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: CodelistGenerator Title: Generate Code Lists for the OMOP Common Data Model -Version: 2.0.0 +Version: 2.1.0 Authors@R: c( person("Edward", "Burn", email = "edward.burn@ndorms.ox.ac.uk", role = c("aut", "cre"), diff --git a/R/codesFromConceptSet.R b/R/codesFromConceptSet.R index c296391..0710fe1 100644 --- a/R/codesFromConceptSet.R +++ b/R/codesFromConceptSet.R @@ -184,8 +184,10 @@ extractCodes <- function(file, unknown) { appendDescendants <- function(codelistTibble, cdm) { cdm[["concept_ancestor"]] %>% dplyr::select("ancestor_concept_id", "descendant_concept_id") %>% + dplyr::mutate(ancestor_concept_id = as.integer(.data$ancestor_concept_id)) %>% dplyr::inner_join( - codelistTibble %>% + codelistTibble%>% + dplyr::mutate(concept_id = as.integer(.data$concept_id)) %>% dplyr::filter(.data$include_descendants == TRUE) %>% dplyr::rename("ancestor_concept_id" = "concept_id"), by = "ancestor_concept_id", @@ -283,14 +285,16 @@ addDetails <- function(conceptList, cdm){ formatConceptList <- function(conceptList, cdm) { conceptList <- conceptList %>% dplyr::filter(.data$include_descendants == FALSE) %>% - dplyr::union( + dplyr::union_all( cdm[["concept_ancestor"]] %>% dplyr::select( "concept_id" = "ancestor_concept_id", "descendant_concept_id" ) %>% + dplyr::mutate(concept_id = as.integer(.data$concept_id)) %>% dplyr::inner_join( conceptList %>% + dplyr::mutate(concept_id = as.integer(.data$concept_id)) %>% dplyr::filter(.data$include_descendants == TRUE), copy = TRUE, by = "concept_id" diff --git a/R/drugCodes.R b/R/drugCodes.R index ac40c60..f176260 100644 --- a/R/drugCodes.R +++ b/R/drugCodes.R @@ -25,12 +25,15 @@ #' @param doseForm Only descendants codes with the specified dose form #' will be returned. If NULL, descendant codes will be returned regardless #' of dose form. -#' @param withConceptDetails If FALSE a vector of concept IDs will be returned -#' for each ATC group If TRUE a tibble will be returned with additional +#' @param withConceptDetails If FALSE, each item in the list of results (one per +#' ATC group) will contain a vector of concept IDs for each ingredient. If +#' TRUE each item in the list of results will contain a tibble with additional #' information on the identified concepts. #' -#' @return A named list, with each element containing the descendant -#' concepts for a particular ATC group +#' @return A named list, with each item containing a vector of descendant +#' concepts of an ATC group (if withConceptDetails was set as FALSE) or a +#' tibble with the descendant concepts along with additional details about them +#' (if withConceptDetails was set as TRUE). #' @export #' #' @examples @@ -159,12 +162,15 @@ getATCCodes <- function(cdm, #' @param doseForm Only descendants codes with the specified dose form #' will be returned. If NULL, descendant codes will be returned regardless #' of dose form. -#' @param withConceptDetails If FALSE a vector of concept IDs will be returned -#' for each ingredient. If TRUE a tibble will be returned with additional +#' @param withConceptDetails If FALSE, each item in the list of results (one per +#' ingredient) will contain a vector of concept IDs for each ingredient. If +#' TRUE each item in the list of results will contain a tibble with additional #' information on the identified concepts. #' -#' @return A named list, with each item containing descendant concepts of -#' an ingredient +#' @return A named list, with each item containing a vector of descendant +#' concepts of an ingredient (if withConceptDetails was set as FALSE) or a +#' tibble with the descendant concepts along with additional details about them +#' (if withConceptDetails was set as TRUE). #' @export #' #' @examples diff --git a/R/summariseCodeUse.R b/R/summariseCodeUse.R index a550618..00ffc89 100644 --- a/R/summariseCodeUse.R +++ b/R/summariseCodeUse.R @@ -25,18 +25,31 @@ summariseCodeUse <- function(x, ageGroup = NULL, minCellCount = 5){ - codeUse <- getCodeUse(x, - cdm = cdm, - cohortTable = NULL, - cohortId = NULL, - timing = "any", - countBy = countBy, - byConcept = byConcept, - byYear = byYear, - bySex = bySex, - ageGroup = ageGroup, - minCellCount = minCellCount) %>% - dplyr::mutate(cohort_name = NA) + checkmate::assertList(x) + if(length(names(x)) != length(x)){ + cli::cli_abort("Must be a named list") + } + + codeUse <- list() + for(i in seq_along(x)){ + cli::cli_inform("Getting use of codes from {names(x)[i]} ({i} of {length(x)})") + codeUse[[i]] <- getCodeUse(x[[i]], + cdm = cdm, + cohortTable = NULL, + cohortId = NULL, + timing = "any", + countBy = countBy, + byConcept = byConcept, + byYear = byYear, + bySex = bySex, + ageGroup = ageGroup, + minCellCount = minCellCount) %>% + dplyr::mutate(codelist_name = names(x)[i]) %>% + dplyr::mutate(cohort_name = NA) + } + codeUse <- dplyr::bind_rows(codeUse) + + return(codeUse) @@ -78,7 +91,10 @@ summariseCohortCodeUse <- function(x, ageGroup = NULL, minCellCount = 5){ - + checkmate::assertList(x) + if(length(names(x)) != length(x)){ + cli::cli_abort("Must be a named list") + } checkDbType(cdm = cdm, type = "cdm_reference") checkmate::assertTRUE("GeneratedCohortSet" %in% class(cdm[[cohortTable]])) checkmate::assertTRUE(all(c("cohort_definition_id", "subject_id", "cohort_start_date", @@ -91,10 +107,13 @@ summariseCohortCodeUse <- function(x, cohortCodeUse <- list() for(i in seq_along(cohortId)){ + for(j in seq_along(x)){ workingCohortName <- CDMConnector::cohort_set(cdm[[cohortTable]]) %>% dplyr::filter(.data$cohort_definition_id == cohortId[[i]]) %>% dplyr::pull("cohort_name") - cohortCodeUse[[i]] <- getCodeUse(x, + + cli::cli_inform(" Getting counts of {names(x)[j]} codes for cohort {workingCohortName}") + cohortCodeUse[[paste0(i, "_", j)]] <- getCodeUse(x[[j]], cdm = cdm, cohortTable = cohortTable, cohortId = cohortId[[i]], @@ -105,8 +124,9 @@ summariseCohortCodeUse <- function(x, bySex = bySex, ageGroup = ageGroup, minCellCount = minCellCount) %>% + dplyr::mutate(codelist_name = names(x)[j]) %>% dplyr::mutate(cohort_name = workingCohortName) - } + }} cohortCodeUse <- dplyr::bind_rows(cohortCodeUse) return(cohortCodeUse) @@ -144,7 +164,6 @@ getCodeUse <- function(x, checkAgeGroup(ageGroup = ageGroup) - if(is.null(attr(cdm, "write_schema"))){ cli::cli_abort("cdm must have a write_schema specified", call = call) @@ -154,7 +173,6 @@ getCodeUse <- function(x, tolower(paste0(sample(LETTERS, 4, replace = TRUE), collapse = ""))) - codes <- dplyr::tibble(concept_id = x) %>% dplyr::left_join(cdm[["concept"]] %>% dplyr::select("concept_id", "domain_id"), @@ -171,12 +189,8 @@ getCodeUse <- function(x, timing = timing, intermediateTable = intermediateTable) - if(!is.null(records)) { - records <- records %>% - dplyr::left_join(cdm[["concept"]] %>% - dplyr::select("concept_id", "concept_name"), - by = "concept_id") - + if(!is.null(records) && + (records %>% utils::head(1) %>% dplyr::tally() %>% dplyr::pull("n") > 0)) { if(bySex == TRUE | !is.null(ageGroup)){ records <- records %>% PatientProfiles::addDemographics(cdm = cdm, @@ -206,8 +220,13 @@ getCodeUse <- function(x, codeCounts <- codeCounts %>% dplyr::mutate(group_level = dplyr::if_else(.data$group_name == "By concept", - paste0(.data$concept_name, " (", - .data$concept_id, ")"), + paste0("Standard concept: ", + .data$standard_concept_name, " (", + .data$standard_concept_id, ")", + " Source concept: ", + .data$source_concept_name, " (", + .data$source_concept_id, ")", + " Domain: ", .data$domain_id), "Overall")) %>% dplyr::mutate(variable_type = "Numeric", variable_level = "Overall", @@ -218,7 +237,13 @@ getCodeUse <- function(x, "variable_type", "estimate_type", "estimate", - "estimate_suppressed"))) + "estimate_suppressed", + "standard_concept_name", + "standard_concept_id", + "source_concept_name", + "source_concept_id", + "domain_id" + ))) } else { codeCounts <- dplyr::tibble() @@ -249,19 +274,32 @@ addDomainInfo <- function(codes, stringr::str_detect(domain_id,"observation") ~ "observation", stringr::str_detect(domain_id,"measurement") ~ "measurement", stringr::str_detect(domain_id,"visit") ~ "visit_occurrence", - stringr::str_detect(domain_id,"procedure") ~ "procedure_occurrence" + stringr::str_detect(domain_id,"procedure") ~ "procedure_occurrence", + stringr::str_detect(domain_id,"device") ~ "device_exposure" ) ) %>% - dplyr::mutate(concept_id_name = + dplyr::mutate(standard_concept_id_name = dplyr::case_when( stringr::str_detect(domain_id,"condition") ~ "condition_concept_id", stringr::str_detect(domain_id,"drug") ~ "drug_concept_id", stringr::str_detect(domain_id,"observation") ~ "observation_concept_id", stringr::str_detect(domain_id,"measurement") ~ "measurement_concept_id", stringr::str_detect(domain_id,"visit") ~ "visit_concept_id", - stringr::str_detect(domain_id,"procedure") ~ "procedure_concept_id" + stringr::str_detect(domain_id,"procedure") ~ "procedure_concept_id", + stringr::str_detect(domain_id,"device") ~ "device_concept_id" ) ) %>% + dplyr::mutate(source_concept_id_name = + dplyr::case_when( + stringr::str_detect(domain_id,"condition") ~ "condition_source_concept_id", + stringr::str_detect(domain_id,"drug") ~ "drug_source_concept_id", + stringr::str_detect(domain_id,"observation") ~ "observation_source_concept_id", + stringr::str_detect(domain_id,"measurement") ~ "measurement_source_concept_id", + stringr::str_detect(domain_id,"visit") ~ "visit_source_concept_id", + stringr::str_detect(domain_id,"procedure") ~ "procedure_source_concept_id", + stringr::str_detect(domain_id,"device") ~ "device_source_concept_id" + ) + ) %>% dplyr::mutate(date_name = dplyr::case_when( stringr::str_detect(domain_id,"condition") ~ "condition_start_date", @@ -269,10 +307,20 @@ addDomainInfo <- function(codes, stringr::str_detect(domain_id,"observation") ~ "observation_date", stringr::str_detect(domain_id,"measurement") ~ "measurement_date", stringr::str_detect(domain_id,"visit") ~ "visit_start_date", - stringr::str_detect(domain_id,"procedure") ~ "procedure_date" + stringr::str_detect(domain_id,"procedure") ~ "procedure_date", + stringr::str_detect(domain_id,"device") ~ "device_exposure_start_date" ) ) + unsupported_domains <- codes %>% + dplyr::filter(is.na(.data$table_name)) %>% + dplyr::pull("domain_id") + + if(length(unsupported_domains)>0){ + cli::cli_warn("Concepts included from non-supported domains + ({unsupported_domains})") + } + return(codes) } @@ -286,7 +334,8 @@ getRelevantRecords <- function(codes, intermediateTable){ tableName <- purrr::discard(unique(codes$table_name), is.na) - conceptIdName <- purrr::discard(unique(codes$concept_id_name), is.na) + standardConceptIdName <- purrr::discard(unique(codes$standard_concept_id_name), is.na) + sourceConceptIdName <- purrr::discard(unique(codes$source_concept_id_name), is.na) dateName <- purrr::discard(unique(codes$date_name), is.na) if(!is.null(cohortTable)){ @@ -319,13 +368,16 @@ if(length(tableName)>0){ codeRecords <- codeRecords %>% dplyr::mutate(date = !!dplyr::sym(dateName[[1]])) %>% dplyr::mutate(year = lubridate::year(date)) %>% - dplyr::select(dplyr::all_of(c("person_id", conceptIdName[[1]], + dplyr::select(dplyr::all_of(c("person_id", + standardConceptIdName[[1]], + sourceConceptIdName[[1]], "date", "year"))) %>% - dplyr::rename("concept_id" = .env$conceptIdName[[1]]) %>% + dplyr::rename("standard_concept_id" = .env$standardConceptIdName[[1]], + "source_concept_id" = .env$sourceConceptIdName[[1]]) %>% dplyr::inner_join(codes %>% dplyr::filter(.data$table_name == tableName[[1]]) %>% - dplyr::select("concept_id"), - by = "concept_id", + dplyr::select("concept_id", "domain_id"), + by = c("standard_concept_id"="concept_id"), copy = TRUE) %>% CDMConnector::computeQuery( name = paste0(intermediateTable,"_grr"), @@ -335,7 +387,7 @@ if(length(tableName)>0){ ) } else { - codeRecords <- NULL + return(NULL) } # get for any additional domains and union @@ -355,13 +407,16 @@ if(length(tableName)>0){ workingRecords <- workingRecords %>% dplyr::mutate(date = !!dplyr::sym(dateName[[i+1]])) %>% dplyr::mutate(year = lubridate::year(date)) %>% - dplyr::select(dplyr::all_of(c("person_id", conceptIdName[[i+1]], + dplyr::select(dplyr::all_of(c("person_id", + standardConceptIdName[[i+1]], + sourceConceptIdName[[i+1]], "date", "year"))) %>% - dplyr::rename("concept_id" = .env$conceptIdName[[i+1]]) %>% + dplyr::rename("standard_concept_id" = .env$standardConceptIdName[[i+1]], + "source_concept_id" = .env$sourceConceptIdName[[i+1]]) %>% dplyr::inner_join(codes %>% dplyr::filter(.data$table_name == tableName[[i+1]]) %>% - dplyr::select("concept_id"), - by = "concept_id", + dplyr::select("concept_id", "domain_id"), + by = c("standard_concept_id"="concept_id"), copy = TRUE) codeRecords <- codeRecords %>% dplyr::union_all(workingRecords) %>% @@ -374,6 +429,24 @@ if(length(tableName)>0){ } } + if(codeRecords %>% utils::head(1) %>% dplyr::tally() %>% dplyr::pull("n") >0){ + codeRecords <- codeRecords %>% + dplyr::left_join(cdm[["concept"]] %>% + dplyr::select("concept_id", "concept_name"), + by = c("standard_concept_id"="concept_id")) %>% + dplyr::rename("standard_concept_name"="concept_name") %>% + dplyr::left_join(cdm[["concept"]] %>% + dplyr::select("concept_id", "concept_name"), + by = c("source_concept_id"="concept_id")) %>% + dplyr::rename("source_concept_name"="concept_name") %>% + CDMConnector::computeQuery( + name = paste0(intermediateTable,"_grr_cr"), + temporary = FALSE, + schema = attr(cdm, "write_schema"), + overwrite = TRUE + ) + } + return(codeRecords) } @@ -396,7 +469,11 @@ recordSummary <- records %>% if(isTRUE(byConcept)) { recordSummary <- dplyr::bind_rows(recordSummary, records %>% - dplyr::group_by(.data$concept_id, .data$concept_name) %>% + dplyr::group_by(.data$standard_concept_id, + .data$standard_concept_name, + .data$source_concept_id, + .data$source_concept_name, + .data$domain_id) %>% dplyr::tally(name = "estimate") %>% dplyr::mutate(estimate = as.integer(.data$estimate), group_name = "By concept") %>% @@ -423,9 +500,15 @@ personSummary <- records %>% if(isTRUE(byConcept)) { personSummary <- dplyr::bind_rows(personSummary, records %>% - dplyr::select("person_id", "concept_id", "concept_name") %>% + dplyr::select("person_id", + "standard_concept_id", "standard_concept_name", + "source_concept_id", "source_concept_name", "domain_id") %>% dplyr::distinct() %>% - dplyr::group_by(.data$concept_id, .data$concept_name) %>% + dplyr::group_by(.data$standard_concept_id, + .data$standard_concept_name, + .data$source_concept_id, + .data$source_concept_name, + .data$domain_id) %>% dplyr::tally(name = "estimate") %>% dplyr::mutate(estimate = as.integer(.data$estimate), group_name = "By concept") %>% @@ -531,7 +614,9 @@ groupedCounts <- dplyr::bind_rows( dplyr::collect(), records %>% dplyr::group_by(dplyr::pick(.env$groupBy, - "concept_id", "concept_name")) %>% + "standard_concept_id", "standard_concept_name", + "source_concept_id", "source_concept_name", + "domain_id")) %>% dplyr::tally(name = "estimate") %>% dplyr::mutate(estimate = as.integer(.data$estimate), group_name = "By concept" @@ -565,11 +650,15 @@ getGroupedPersonCount <- function(records, dplyr::collect(), records %>% dplyr::select(dplyr::all_of(c("person_id", - "concept_id", "concept_name", + "standard_concept_id", "standard_concept_name", + "source_concept_id", "source_concept_name", + "domain_id", .env$groupBy))) %>% dplyr::distinct() %>% dplyr::group_by(dplyr::pick(.env$groupBy, - "concept_id", "concept_name")) %>% + "standard_concept_id", "standard_concept_name", + "source_concept_id", "source_concept_name", + "domain_id")) %>% dplyr::tally(name = "estimate") %>% dplyr::mutate(estimate = as.integer(.data$estimate), group_name = "By concept" diff --git a/man/getATCCodes.Rd b/man/getATCCodes.Rd index ee8f917..38970b8 100644 --- a/man/getATCCodes.Rd +++ b/man/getATCCodes.Rd @@ -26,13 +26,16 @@ concepts for these two particular ATC groups.} will be returned. If NULL, descendant codes will be returned regardless of dose form.} -\item{withConceptDetails}{If FALSE a vector of concept IDs will be returned -for each ATC group If TRUE a tibble will be returned with additional +\item{withConceptDetails}{If FALSE, each item in the list of results (one per +ATC group) will contain a vector of concept IDs for each ingredient. If +TRUE each item in the list of results will contain a tibble with additional information on the identified concepts.} } \value{ -A named list, with each element containing the descendant -concepts for a particular ATC group +A named list, with each item containing a vector of descendant +concepts of an ATC group (if withConceptDetails was set as FALSE) or a +tibble with the descendant concepts along with additional details about them +(if withConceptDetails was set as TRUE). } \description{ Get descendant codes for ATC levels diff --git a/man/getDrugIngredientCodes.Rd b/man/getDrugIngredientCodes.Rd index e78b0e8..9fb378d 100644 --- a/man/getDrugIngredientCodes.Rd +++ b/man/getDrugIngredientCodes.Rd @@ -22,13 +22,16 @@ concepts for these two particular drug ingredients.} will be returned. If NULL, descendant codes will be returned regardless of dose form.} -\item{withConceptDetails}{If FALSE a vector of concept IDs will be returned -for each ingredient. If TRUE a tibble will be returned with additional +\item{withConceptDetails}{If FALSE, each item in the list of results (one per +ingredient) will contain a vector of concept IDs for each ingredient. If +TRUE each item in the list of results will contain a tibble with additional information on the identified concepts.} } \value{ -A named list, with each item containing descendant concepts of -an ingredient +A named list, with each item containing a vector of descendant +concepts of an ingredient (if withConceptDetails was set as FALSE) or a +tibble with the descendant concepts along with additional details about them +(if withConceptDetails was set as TRUE). } \description{ Get descendant codes for drug ingredients diff --git a/tests/testthat/test-codesFrom.R b/tests/testthat/test-codesFrom.R index 08b9633..ff4a18e 100644 --- a/tests/testthat/test-codesFrom.R +++ b/tests/testthat/test-codesFrom.R @@ -1,6 +1,9 @@ test_that("test inputs - mock", { + backends <- c("database", "arrow", "data_frame") - cdm <- mockVocabRef() + for (i in seq_along(backends)) { + # mock db + cdm <- mockVocabRef(backends[[i]]) # expected errors expect_error(codesFromConceptSet()) @@ -86,8 +89,10 @@ test_that("test inputs - mock", { "cohorts_for_mock_dups") )) + if (backends[[i]] == "database") { + CDMConnector::cdmDisconnect(cdm)} - CDMConnector::cdmDisconnect(cdm) + } }) diff --git a/tests/testthat/test-summariseCodeUse.R b/tests/testthat/test-summariseCodeUse.R index 0443e3b..fc122a0 100644 --- a/tests/testthat/test-summariseCodeUse.R +++ b/tests/testthat/test-summariseCodeUse.R @@ -13,8 +13,12 @@ skip_on_cran() cdm <- CDMConnector::cdm_from_con(con, cdm_schem = "main", write_schema = "main") acetiminophen <- c(1125315, 1127433, 40229134, - 40231925, 40162522, 19133768, 1127078) - results <- summariseCodeUse(acetiminophen, + 40231925, 40162522, 19133768, 1127078) + poliovirus_vaccine <- c(40213160) + cs <- list(acetiminophen = acetiminophen, + poliovirus_vaccine = poliovirus_vaccine) + + results <- summariseCodeUse(cs, cdm = cdm, byYear = TRUE, bySex = TRUE, @@ -30,14 +34,22 @@ skip_on_cran() "variable_name", "variable_level", "variable_type", "estimate_type", "estimate", - "estimate_suppressed", "cohort_name")) + "estimate_suppressed", + "standard_concept_name", + "standard_concept_id", + "source_concept_name", + "source_concept_id", + "domain_id", + "codelist_name", + "cohort_name")) # overall record count expect_true(results %>% dplyr::filter(group_name == "Codelist" & strata_name == "Overall" & - strata_level == "Overall", + strata_level == "Overall" & + codelist_name == "acetiminophen" & variable_name == "Record count") %>% dplyr::pull("estimate") == cdm$drug_exposure %>% @@ -50,6 +62,7 @@ skip_on_cran() dplyr::filter(group_name == "Codelist" & strata_name == "Overall" & strata_level == "Overall" & + codelist_name == "acetiminophen" & variable_name == "Person count") %>% dplyr::pull("estimate") == cdm$drug_exposure %>% @@ -64,7 +77,8 @@ skip_on_cran() expect_true(results %>% dplyr::filter(group_name == "Codelist" & strata_name == "Year" & - strata_level == "2008", + strata_level == "2008" & + codelist_name == "acetiminophen" & variable_name == "Record count") %>% dplyr::pull("estimate") == cdm$drug_exposure %>% @@ -77,7 +91,8 @@ skip_on_cran() expect_true(results %>% dplyr::filter(group_name == "Codelist" & strata_name == "Year" & - strata_level == "2008", + strata_level == "2008" & + codelist_name == "acetiminophen" & variable_name == "Person count") %>% dplyr::pull("estimate") == cdm$drug_exposure %>% @@ -93,7 +108,8 @@ skip_on_cran() expect_true(results %>% dplyr::filter(group_name == "Codelist" & strata_name == "Sex" & - strata_level == "Male", + strata_level == "Male" & + codelist_name == "acetiminophen" & variable_name == "Record count") %>% dplyr::pull("estimate") == cdm$drug_exposure %>% @@ -106,7 +122,8 @@ skip_on_cran() expect_true(results %>% dplyr::filter(group_name == "Codelist" & strata_name == "Age group and sex" & - strata_level == "18 to 65 and Male", + strata_level == "18 to 65 and Male" & + codelist_name == "acetiminophen" & variable_name == "Record count") %>% dplyr::pull("estimate") == cdm$drug_exposure %>% @@ -124,7 +141,8 @@ skip_on_cran() expect_true(results %>% dplyr::filter(group_name == "Codelist" & strata_name == "Age group and sex" & - strata_level == "18 to 65 and Male", + strata_level == "18 to 65 and Male" & + codelist_name == "acetiminophen" & variable_name == "Person count") %>% dplyr::pull("estimate") == cdm$drug_exposure %>% @@ -140,10 +158,7 @@ skip_on_cran() dplyr::tally() %>% dplyr::pull("n")) - - - - results <- summariseCodeUse(acetiminophen, + results <- summariseCodeUse(list("acetiminophen" = acetiminophen), cdm = cdm, countBy = "person", byYear = FALSE, bySex = FALSE, @@ -153,7 +168,7 @@ skip_on_cran() expect_true(nrow(results %>% dplyr::filter(variable_name == "Record count")) == 0) - results <- summariseCodeUse(acetiminophen, + results <- summariseCodeUse(list("acetiminophen" = acetiminophen), cdm = cdm, countBy = "record", byYear = FALSE, bySex = FALSE, @@ -164,7 +179,7 @@ skip_on_cran() dplyr::filter(variable_name == "Record count")) > 0) # check min cell count - results <- summariseCodeUse(acetiminophen, + results <- summariseCodeUse(list("acetiminophen" = acetiminophen), cdm = cdm, byYear = FALSE, bySex = FALSE, @@ -173,44 +188,43 @@ skip_on_cran() expect_true(max(results$estimate, na.rm = TRUE) >=75) # domains covered - # condition - expect_true(nrow(summariseCodeUse(c(4112343), + expect_true(nrow(summariseCodeUse(list(cs= c(4112343)), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = NULL))>1) # visit - expect_true(nrow(summariseCodeUse(9201, + expect_true(nrow(summariseCodeUse(list(cs= c(9201)), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = NULL))>1) # drug - expect_true(nrow(summariseCodeUse(40213160, + expect_true(nrow(summariseCodeUse(list(cs= c(40213160)), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = NULL))>1) # measurement - expect_true(nrow(summariseCodeUse(3006322, + expect_true(nrow(summariseCodeUse(list(cs= c(3006322)), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = NULL))>1) # procedure and condition - expect_true(nrow(summariseCodeUse(c(4107731,4112343), + expect_true(nrow(summariseCodeUse(list(cs= c(4107731,4112343)), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = NULL))>1) # no records - expect_message(results <- summariseCodeUse(c(999999), + expect_warning(results <- summariseCodeUse(list(cs= c(999999)), cdm = cdm, byYear = FALSE, bySex = FALSE, @@ -225,39 +239,48 @@ skip_on_cran() byYear = FALSE, bySex = FALSE, ageGroup = NULL)) - expect_error(summariseCodeUse(123, + expect_error(summariseCodeUse("123", + cdm = cdm, + byYear = FALSE, + bySex = FALSE, + ageGroup = NULL)) + expect_error(summariseCodeUse(list("123"), # not named + cdm = cdm, + byYear = FALSE, + bySex = FALSE, + ageGroup = NULL)) + expect_error(summariseCodeUse(list(a = 123), cdm = "not a cdm", byYear = FALSE, bySex = FALSE, ageGroup = NULL)) - expect_error(summariseCodeUse(123, + expect_error(summariseCodeUse(list(a = 123), cdm = cdm, byYear = "Maybe", bySex = FALSE, ageGroup = NULL)) - expect_error(summariseCodeUse(123, + expect_error(summariseCodeUse(list(a = 123), cdm = cdm, byYear = FALSE, bySex = "Maybe", ageGroup = NULL)) - expect_error(summariseCodeUse(123, + expect_error(summariseCodeUse(list(a = 123), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = 25)) - expect_error(summariseCodeUse(123, + expect_error(summariseCodeUse(list(a = 123), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = list(c(18,17)))) - expect_error(summariseCodeUse(123, + expect_error(summariseCodeUse(list(a = 123), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = list(c(0,17), c(15,20)))) - CDMConnector::cdmDisconnect(cdm) @@ -286,9 +309,9 @@ test_that("summarise cohort code use - eunomia", { overwrite = TRUE) # any - results_all <- summariseCodeUse(4134304, + results_all <- summariseCodeUse(list(cs = 4134304), cdm = cdm, minCellCount = 0) - results_cohort <- summariseCohortCodeUse(4134304, + results_cohort <- summariseCohortCodeUse(list(cs = 4134304), cdm = cdm, cohortTable = "pharyngitis", timing = "any", minCellCount = 0) @@ -301,6 +324,12 @@ test_that("summarise cohort code use - eunomia", { "variable_type", "estimate_type", "estimate", "estimate_suppressed", + "standard_concept_name", + "standard_concept_id", + "source_concept_name", + "source_concept_id", + "domain_id", + "codelist_name", "cohort_name") )) @@ -322,7 +351,7 @@ test_that("summarise cohort code use - eunomia", { # at entry - everyone in the cohort should have the code - results_cohort <- summariseCohortCodeUse(pharyngitis, + results_cohort <- summariseCohortCodeUse(list(pharyngitis = pharyngitis), cdm = cdm, cohortTable = "pharyngitis", timing = "entry", @@ -350,7 +379,7 @@ test_that("summarise cohort code use - eunomia", { dplyr::count() %>% dplyr::pull() - results_cohort_260139 <- summariseCohortCodeUse(260139, + results_cohort_260139 <- summariseCohortCodeUse(list(cs = 260139), cdm = cdm, cohortTable = "pharyngitis", timing = "entry", @@ -381,7 +410,7 @@ test_that("summarise cohort code use - eunomia", { dplyr::count() %>% dplyr::pull() - results_cohort_260139_19133873_1127433<- summariseCohortCodeUse(c(260139,19133873,1127433), + results_cohort_260139_19133873_1127433<- summariseCohortCodeUse(list(cs = c(260139,19133873,1127433)), cdm = cdm, cohortTable = "pharyngitis", timing = "entry", @@ -395,8 +424,8 @@ test_that("summarise cohort code use - eunomia", { index_260139_19133873_1127433) expect_equal(results_cohort_260139_19133873_1127433 %>% - dplyr::filter(group_level == "Acute bronchitis (260139)" & - strata_name == "Overall" & + dplyr::filter(stringr::str_detect(group_level, "Acute bronchitis")) %>% + dplyr::filter(strata_name == "Overall" & strata_level == "Overall" & variable_name == "Person count") %>% dplyr::pull("estimate"), @@ -411,20 +440,20 @@ test_that("summarise cohort code use - eunomia", { end = "observation_period_end_date", overwrite = TRUE) - results_cohort_mult <- summariseCohortCodeUse(c(260139,19133873,1127433), + results_cohort_mult <- summariseCohortCodeUse(list(cs = c(260139,19133873,1127433)), cdm = cdm, cohortTable = "cohorts", timing = "entry", minCellCount = 0) expect_true(nrow(results_cohort_mult %>% - dplyr::filter(group_level == "Acute bronchitis (260139)" & - strata_name == "Overall" & + dplyr::filter(stringr::str_detect(group_level, "Acute bronchitis")) %>% + dplyr::filter(strata_name == "Overall" & strata_level == "Overall" & variable_name == "Person count")) == 2) expect_equal(c("a", "b"), results_cohort_mult %>% - dplyr::filter(group_level == "Acute bronchitis (260139)" & - strata_name == "Overall" & + dplyr::filter(stringr::str_detect(group_level, "Acute bronchitis")) %>% + dplyr::filter(strata_name == "Overall" & strata_level == "Overall" & variable_name == "Person count") %>% dplyr::pull("cohort_name")) @@ -433,22 +462,29 @@ test_that("summarise cohort code use - eunomia", { # empty cohort - no results cdm$pharyngitis <- cdm$pharyngitis %>% dplyr::filter(cohort_definition_id == 99) - expect_true(max(summariseCohortCodeUse(4134304, + expect_true(nrow(summariseCohortCodeUse(list(cs = 4134304), cdm = cdm, cohortTable = "pharyngitis", - timing = "any", minCellCount = 0) %>% - dplyr::pull("estimate")) == 0) + timing = "any", minCellCount = 0)) == 0) # expected errors expect_error(summariseCohortCodeUse(4134304, cdm = cdm, cohortTable = "not_a_cohort", timing = "any")) - expect_error(summariseCohortCodeUse(4134304, + expect_error(summariseCohortCodeUse(list(4134304), + cdm = cdm, + cohortTable = "not_a_cohort", + timing = "any")) + expect_error(summariseCohortCodeUse(list(cs = 4134304), + cdm = cdm, + cohortTable = "not_a_cohort", + timing = "any")) + expect_error(summariseCohortCodeUse(list(cs = 4134304), cdm = cdm, cohortTable = "pharyngitis", timing = "not_a_option")) - expect_error(summariseCohortCodeUse(4134304, + expect_error(summariseCohortCodeUse(list(cs = 4134304), cdm = cdm, cohortTable = "pharyngitis", timing = c("any", "entry"))) @@ -472,7 +508,7 @@ test_that("summarise code use - redshift", { cdm_schema = Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA"), write_schema = Sys.getenv("CDM5_REDSHIFT_SCRATCH_SCHEMA")) - asthma <- c(317009, 257581) + asthma <- list(asthma = c(317009, 257581)) results <- summariseCodeUse(asthma, cdm = cdm, @@ -490,7 +526,14 @@ test_that("summarise code use - redshift", { "variable_name", "variable_level", "variable_type", "estimate_type", "estimate", - "estimate_suppressed") + "estimate_suppressed", + "standard_concept_name", + "standard_concept_id", + "source_concept_name", + "source_concept_id", + "domain_id", + "codelist_name", + "cohort_name") )) @@ -502,7 +545,7 @@ test_that("summarise code use - redshift", { variable_name == "Record count") %>% dplyr::pull("estimate") == cdm$condition_occurrence %>% - dplyr::filter(condition_concept_id %in% asthma) %>% + dplyr::filter(condition_concept_id %in% !!asthma[[1]]) %>% dplyr::tally() %>% dplyr::pull("n")) @@ -514,7 +557,7 @@ test_that("summarise code use - redshift", { variable_name == "Person count") %>% dplyr::pull("estimate") == cdm$condition_occurrence %>% - dplyr::filter(condition_concept_id %in% asthma) %>% + dplyr::filter(condition_concept_id %in% !!asthma[[1]]) %>% dplyr::select("person_id") %>% dplyr::distinct() %>% dplyr::tally() %>% @@ -529,7 +572,7 @@ test_that("summarise code use - redshift", { variable_name == "Record count") %>% dplyr::pull("estimate") == cdm$condition_occurrence %>% - dplyr::filter(condition_concept_id %in% asthma) %>% + dplyr::filter(condition_concept_id %in% !!asthma[[1]]) %>% dplyr::filter(year(condition_start_date) == 2008) %>% dplyr::tally() %>% dplyr::pull("n")) @@ -542,7 +585,7 @@ test_that("summarise code use - redshift", { variable_name == "Person count") %>% dplyr::pull("estimate") == cdm$condition_occurrence %>% - dplyr::filter(condition_concept_id %in% asthma) %>% + dplyr::filter(condition_concept_id %in% !!asthma[[1]]) %>% dplyr::filter(year(condition_start_date) == 2008) %>% dplyr::select("person_id") %>% dplyr::distinct() %>% @@ -558,7 +601,7 @@ test_that("summarise code use - redshift", { variable_name == "Record count") %>% dplyr::pull("estimate") == cdm$condition_occurrence %>% - dplyr::filter(condition_concept_id %in% asthma) %>% + dplyr::filter(condition_concept_id %in% !!asthma[[1]]) %>% PatientProfiles::addSex(cdm) %>% dplyr::filter(sex == "Male") %>% dplyr::tally() %>% @@ -571,7 +614,7 @@ test_that("summarise code use - redshift", { variable_name == "Record count") %>% dplyr::pull("estimate") == cdm$condition_occurrence %>% - dplyr::filter(condition_concept_id %in% asthma) %>% + dplyr::filter(condition_concept_id %in% !!asthma[[1]]) %>% PatientProfiles::addAge(cdm, indexDate = "condition_start_date") %>% PatientProfiles::addSex(cdm) %>% @@ -589,7 +632,7 @@ test_that("summarise code use - redshift", { variable_name == "Person count") %>% dplyr::pull("estimate") == cdm$condition_occurrence %>% - dplyr::filter(condition_concept_id %in% asthma) %>% + dplyr::filter(condition_concept_id %in% !!asthma[[1]]) %>% PatientProfiles::addAge(cdm, indexDate = "condition_start_date") %>% PatientProfiles::addSex(cdm) %>% @@ -636,42 +679,42 @@ test_that("summarise code use - redshift", { # domains covered # condition - expect_true(nrow(summariseCodeUse(c(317009), + expect_true(nrow(summariseCodeUse(list(cs = c(317009)), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = NULL))>1) # visit - expect_true(nrow(summariseCodeUse(9201, + expect_true(nrow(summariseCodeUse(list(cs = 9201), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = NULL))>1) # drug -expect_true(nrow(summariseCodeUse(19071493, +expect_true(nrow(summariseCodeUse(list(cs = 19071493), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = NULL))>1) # measurement -expect_true(nrow(summariseCodeUse(2212542, +expect_true(nrow(summariseCodeUse(list(cs = 2212542), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = NULL))>1) # procedure and condition -expect_true(nrow(summariseCodeUse(c(4261206,317009), +expect_true(nrow(summariseCodeUse(list(cs = c(4261206,317009)), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = NULL))>1) # no records -expect_message(results <- summariseCodeUse(c(999999), +expect_message(results <- summariseCodeUse(list(cs = c(999999)), cdm = cdm, byYear = FALSE, bySex = FALSE, @@ -681,38 +724,38 @@ expect_true(nrow(results) == 0) # expected errors - expect_error(summariseCodeUse("not a concept", + expect_error(summariseCodeUse(list(cs = "not a concept"), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = NULL)) - expect_error(summariseCodeUse(123, + expect_error(summariseCodeUse(list(cs = 123), cdm = "not a cdm", byYear = FALSE, bySex = FALSE, ageGroup = NULL)) - expect_error(summariseCodeUse(123, + expect_error(summariseCodeUse(list(cs = 123), cdm = cdm, byYear = "Maybe", bySex = FALSE, ageGroup = NULL)) - expect_error(summariseCodeUse(123, + expect_error(summariseCodeUse(list(cs = 123), cdm = cdm, byYear = FALSE, bySex = "Maybe", ageGroup = NULL)) - expect_error(summariseCodeUse(123, + expect_error(summariseCodeUse(list(cs = 123), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = 25)) - expect_error(summariseCodeUse(123, + expect_error(summariseCodeUse(list(cs = 123), cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = list(c(18,17)))) - expect_error(summariseCodeUse(123, - cdm = cdm, + expect_error(summariseCodeUse(list(cs = 123), + cdm = cdm, byYear = FALSE, bySex = FALSE, ageGroup = list(c(0,17),