diff --git a/DESCRIPTION b/DESCRIPTION index 8eb712e..b517584 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: CodelistGenerator -Title: Generate Code Lists for the OMOP Common Data Model -Version: 2.1.2 +Title: Identify Relevant Clinical Codes and Evaluate Their Use +Version: 2.2.0 Authors@R: c( person("Edward", "Burn", email = "edward.burn@ndorms.ox.ac.uk", role = c("aut", "cre"), @@ -13,10 +13,13 @@ License: Apache License (>= 2) Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 +Depends: + R (>= 3.5.0) Imports: CDMConnector (>= 1.1.2), checkmate (>= 2.0.0), DBI (>= 1.1.0), + duckdb, dplyr (>= 1.1.0), magrittr (>= 2.0.0), rlang (>= 1.0.0), @@ -28,24 +31,20 @@ Imports: purrr, lubridate, PatientProfiles (>= 0.3.0), - RJSONIO + RJSONIO, + vctrs Suggests: covr, - dbplyr (>= 2.2.1), knitr, - readr (>= 2.1.0), - duckdb, - DT, rmarkdown, - here (>= 1.0.0), testthat (>= 3.0.0), - kableExtra (>= 1.0.0), RPostgres, odbc, spelling, tibble Config/testthat/edition: 3 +Config/testthat/parallel: true VignetteBuilder: knitr URL: https://darwin-eu.github.io/CodelistGenerator/ Language: en-US -Config/testthat/parallel: true +LazyData: true diff --git a/NAMESPACE b/NAMESPACE index df86e87..4bf9b39 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,9 @@ export("%>%") export(achillesCodeUse) export(codesFromCohort) export(codesFromConceptSet) +export(codesInUse) export(compareCodelists) +export(findOrphanCodes) export(getATCCodes) export(getCandidateCodes) export(getConceptClassId) @@ -17,6 +19,8 @@ export(getMappings) export(getVocabVersion) export(getVocabularies) export(mockVocabRef) +export(restrictToCodesInUse) +export(sourceCodesInUse) export(summariseCodeUse) export(summariseCohortCodeUse) importFrom(magrittr,"%>%") diff --git a/NEWS.md b/NEWS.md index 6fbfc81..9247be1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# CodelistGenerator 2.2.0 +* Added functions findOrphanCodes, restrictToCodesInUse, sourceCodesInUse. +* Speed improvements in getCandidateCodes from doing search in place (e.g. on database side). +* Dropped explicit support of an Arrow cdm. + +# CodelistGenerator 2.1.1 +* Improved support of device domain. + # CodelistGenerator 2.0.0 * Simplified the interface of getCandidateCodes, with a number of arguments removed. * Added function summariseCohortCodeUse. diff --git a/R/achillesCodeUse.R b/R/achillesCodeUse.R index c7247b9..4f4ccdf 100644 --- a/R/achillesCodeUse.R +++ b/R/achillesCodeUse.R @@ -47,6 +47,7 @@ achillesCodeUse <- function(x, if("record" %in% countBy){ allRecordCount <- getAchillesRecordCounts(cdm = cdm, conceptId = allCodes) + if(nrow(allRecordCount)>=1){ allRecordCount <- allRecordCount %>% dplyr::mutate(concept_id = as.character(.data$concept_id)) %>% dplyr::left_join(codesWithDetails %>% @@ -65,12 +66,13 @@ achillesCodeUse <- function(x, standard_concept_id = .data$concept_id ) %>% dplyr::mutate(codelist_name = names(x)[i]) - + } } } if("person" %in% countBy){ allPersonCount <- getAchillesPersonCounts(cdm = cdm, conceptId = allCodes) + if(nrow(allPersonCount)>=1){ allPersonCount <- allPersonCount %>% dplyr::mutate(concept_id = as.character(.data$concept_id)) %>% dplyr::left_join(codesWithDetails %>% @@ -91,7 +93,15 @@ achillesCodeUse <- function(x, dplyr::mutate(codelist_name = names(x)[i]) } } + } + if(length(codeUse) == 0){ + cli::cli_inform( + c( + "i" = "No achilles counts found for the concepts provided." + )) + return(dplyr::tibble()) + } else { codeUse <- dplyr::bind_rows(codeUse) %>% dplyr::mutate(group_name = "By concept", strata_name = "Overall", @@ -113,12 +123,13 @@ achillesCodeUse <- function(x, "source_concept_id", "domain_id", "codelist_name","cohort_name") + codeUse <- codeUse %>% + dplyr::mutate(estimate = dplyr::if_else(.data$estimate < .env$minCellCount & + .data$estimate > 0, + NA, .data$estimate)) %>% + dplyr::mutate(standard_concept_id = as.integer(.data$standard_concept_id), + source_concept_id = as.integer(.data$source_concept_id)) - if(nrow(codeUse) == 0){ - cli::cli_inform( - c( - "i" = "No achilles counts found for the concepts provided." - )) } return(codeUse) diff --git a/R/codesFromConceptSet.R b/R/codesFromConceptSet.R index 513e015..5b7b728 100644 --- a/R/codesFromConceptSet.R +++ b/R/codesFromConceptSet.R @@ -253,18 +253,25 @@ tibbleToList <- function(codelistTibble) { addDetails <- function(conceptList, cdm){ + # will accept either a list or tibble + # will return the same type as the input + inputIsTbl <- inherits(conceptList, "tbl_df") + + if(isFALSE(inputIsTbl)){ for(i in seq_along(conceptList)){ conceptList[[i]] <- dplyr::tibble(concept_id = conceptList[[i]], concept_set = names(conceptList)[i]) } + conceptList <- dplyr::bind_rows(conceptList) + } - conceptList <- dplyr::bind_rows(conceptList) %>% - dplyr::left_join(cdm[["concept"]] %>% + conceptList <- conceptList %>% + dplyr::left_join(cdm[["concept"]] %>% dplyr::select("concept_id", "concept_name", "domain_id", "vocabulary_id", "standard_concept"), by = "concept_id", - copy = TRUE)%>% + copy = TRUE) %>% dplyr::mutate( standard_concept = ifelse(is.na(.data$standard_concept), "non-standard", .data$standard_concept @@ -281,10 +288,11 @@ addDetails <- function(conceptList, cdm){ ) ) + if(isFALSE(inputIsTbl)){ conceptList <- split( x = conceptList %>% dplyr::select(!"concept_set"), f = as.factor(conceptList$concept_set) - ) + )} return(conceptList) diff --git a/R/codesInUse.R b/R/codesInUse.R new file mode 100644 index 0000000..5589fa5 --- /dev/null +++ b/R/codesInUse.R @@ -0,0 +1,227 @@ +#' Filter a codelist to keep only the codes used in the database +#' +#' @param x A codelist +#' @param cdm cdm_reference via CDMConnector +#' @param table cdm table +#' +#' @return +#' @export +#' +#' @examples +restrictToCodesInUse <- function(x, + cdm, + table = c("condition_occurrence", + "device_exposure", + "drug_exposure", + "measurement", + "observation", + "procedure_occurrence", + "visit_occurrence")){ + +dbCodes <- codesInUse(cdm = cdm, + table = table) + +if(is.null(dbCodes)){ + for(i in seq_along(x)){ + cli::cli_inform("No codes from any codelist found in the database") + return(invisible(NULL)) + } +} else { + for(i in seq_along(x)){ + x[[i]] <- intersect(x[[i]], dbCodes) + if(!length(x[[i]]) >= 1){ + cli::cli_inform("No codes from codelist {names(x)[i]} found in the database") + } + } +} + +x <- vctrs::list_drop_empty(x) + +if(length(x) == 0){ + return(invisible(NULL)) +} + +x + +} + +#' Get codes used in the database +#' +#' @param cdm cdm_reference via CDMConnector +#' @param table cdm table +#' +#' @return +#' @export +#' +#' @examples +codesInUse <- function(cdm, + table = c("condition_occurrence", + "device_exposure", + "drug_exposure", + "measurement", + "observation", + "procedure_occurrence", + "visit_occurrence")){ + +if(!is.null(cdm[["achilles_results"]])){ + codes <- fetchAchillesCodesInUse(cdm) +} else { + # if achilles not available, query cdm + codes <- list() + for(i in seq_along(table)){ + workingTable <- table[i] + workingConcept <- dplyr::case_when( + workingTable == "condition_occurrence" ~ "condition_concept_id", + workingTable == "device_exposure" ~ "device_concept_id", + workingTable == "drug_exposure" ~ "drug_concept_id", + workingTable == "measurement" ~ "measurement_concept_id", + workingTable == "observation" ~ "observation_concept_id", + workingTable == "procedure_occurrence" ~ "procedure_concept_id", + workingTable == "visit_occurrence" ~ "visit_concept_id" + ) + + if(!is.null(cdm[[workingTable]])){ + codes[[i]] <- as.integer(cdm[[workingTable]] %>% + dplyr::select(workingConcept) %>% + dplyr::distinct() %>% + dplyr::pull()) + } else { + codes[[i]] <- NULL + } + + } + codes <- unlist(codes) + } + + codes +} + +#' Get source codes used in the database +#' +#' @param cdm cdm_reference via CDMConnector +#' @param table cdm table +#' +#' @return +#' @export +#' +#' @examples +sourceCodesInUse <- function(cdm, + table = c("condition_occurrence", + "device_exposure", + "drug_exposure", + "measurement", + "observation", + "procedure_occurrence", + "visit_occurrence")){ + + if(!is.null(cdm[["achilles_results"]])){ + codes <- fetchAchillesCodesInUse(cdm) + } else { + # if achilles not available, query cdm + codes <- list() + for(i in seq_along(table)){ + workingTable <- table[i] + workingConcept <- dplyr::case_when( + workingTable == "condition_occurrence" ~ "condition_source_concept_id", + workingTable == "device_exposure" ~ "device_source_concept_id", + workingTable == "drug_exposure" ~ "drug_source_concept_id", + workingTable == "measurement" ~ "measurement_source_concept_id", + workingTable == "observation" ~ "observation_source_concept_id", + workingTable == "procedure_occurrence" ~ "procedure_source_concept_id", + workingTable == "visit_occurrence" ~ "visit_source_concept_id" + ) + + codes[[i]] <- as.integer(cdm[[workingTable]] %>% + dplyr::select(workingConcept) %>% + dplyr::distinct() %>% + dplyr::pull()) + } + codes <- unlist(codes) + } + + codes +} + +unmappedSourceCodesInUse <- function(cdm, + table = c("condition_occurrence", + "device_exposure", + "drug_exposure", + "measurement", + "observation", + "procedure_occurrence", + "visit_occurrence")){ + + # note, no achilles query for this so will have to query the cdm + + codes <- list() + for(i in seq_along(table)){ + workingTable <- table[i] + standardConcept <- dplyr::case_when( + workingTable == "condition_occurrence" ~ "condition_concept_id", + workingTable == "device_exposure" ~ "device_concept_id", + workingTable == "drug_exposure" ~ "drug_concept_id", + workingTable == "measurement" ~ "measurement_concept_id", + workingTable == "observation" ~ "observation_concept_id", + workingTable == "procedure_occurrence" ~ "procedure_concept_id", + workingTable == "visit_occurrence" ~ "visit_concept_id" + ) + + workingConcept <- dplyr::case_when( + workingTable == "condition_occurrence" ~ "condition_source_concept_id", + workingTable == "device_exposure" ~ "device_source_concept_id", + workingTable == "drug_exposure" ~ "drug_source_concept_id", + workingTable == "measurement" ~ "measurement_source_concept_id", + workingTable == "observation" ~ "observation_source_concept_id", + workingTable == "procedure_occurrence" ~ "procedure_source_concept_id", + workingTable == "visit_occurrence" ~ "visit_source_concept_id" + ) + + # keep unmapped codes + codes[[i]] <- as.integer(cdm[[workingTable]] %>% + dplyr::filter(!!rlang::sym(standardConcept) == 0) %>% + dplyr::select(dplyr::all_of(workingConcept)) %>% + dplyr::distinct() %>% + dplyr::pull()) + codes[[i]] <- stats::na.omit(codes[[i]]) + } + + codes <- unlist(codes) + + codes +} + +fetchAchillesCodesInUse <- function(cdm){ + cdm[["achilles_results"]] %>% + dplyr::filter(.data$analysis_id %in% + c( + 401, # condition occurrence + 701, # drug_exposure + 801, # observation + 1801, # measurement + 201, # visit_occurrence + 601, # procedure_occurrence + 2101 # device_exposure + )) %>% + dplyr::select("stratum_1") %>% + dplyr::distinct() %>% + dplyr::mutate(stratum_1 = as.integer(.data$stratum_1)) %>% + dplyr::pull("stratum_1") +} + +fetchAchillesSourceCodesInUse <- function(cdm){ + cdm[["achilles_results"]] %>% + dplyr::filter(.data$analysis_id %in% + c( + 425, # condition occurrence + 725, # drug_exposure + 825, # observation + 1825, # measurement + 225, # visit_occurrence + 625, # procedure_occurrence + 2125 # device_exposure + )) %>% + dplyr::select("stratum_1") %>% + dplyr::distinct() %>% + dplyr::mutate(stratum_1 = as.integer(.data$stratum_1)) %>% + dplyr::pull("stratum_1") +} diff --git a/R/findOrphanCodes.R b/R/findOrphanCodes.R new file mode 100644 index 0000000..3d70b38 --- /dev/null +++ b/R/findOrphanCodes.R @@ -0,0 +1,82 @@ + +#' Find orphan codes related to a codelist +#' +#' @param x Codes for which to find codes related but not included (orphan +#' codes) +#' @param cdm cdm_reference via CDMConnector +#' @param domains Character vector with one or more of the OMOP CDM domain. +#' @param standardConcept Character vector with one or more of "Standard", +#' "Classification", and "Non-standard". These correspond to the flags used +#' for the standard_concept field in the concept table of the cdm. +#' @param searchInSynonyms Either TRUE or FALSE. If TRUE the code will also +#' search using both the primary name in the concept table and synonyms from +#' the concept synonym table. +#' @param searchNonStandard Either TRUE or FALSE. If TRUE the code will also +#' search via non-standard concepts. +#' @param includeDescendants Either TRUE or FALSE. +#' If TRUE descendant concepts of identified concepts +#' will be included in the candidate codelist. +#' @param includeAncestor Either TRUE or FALSE. +#' If TRUE the direct ancestor concepts of identified concepts +#' will be included in the candidate codelist. +#' @param minCellCount The minimum number of counts to reported, below which +#' results will be suppressed. If 0, all results will be reported. +#' +#' @return A codelist containing code related to (but not in) the target +#' codelist that are present used in the cdm +#' @export +#' +#' @examples +findOrphanCodes <- function(x, + cdm, + domains = "Condition", + standardConcept = "Standard", + searchInSynonyms = TRUE, + searchNonStandard = TRUE, + includeDescendants = TRUE, + includeAncestor = TRUE, + minCellCount = 5){ + + +x <- addDetails(cdm = cdm, conceptList = x) + +orphanConcepts <- list() +# rerun search +for(i in seq_along(x)){ +candidateCodes <- getCandidateCodes( + cdm = cdm, + keywords = x[[i]]$concept_name, + domains = domains, + standardConcept = standardConcept, + searchInSynonyms = searchInSynonyms, + searchNonStandard = searchNonStandard, + includeDescendants = includeDescendants, + includeAncestor = includeAncestor) + +# Exclude codes that are in the original set of codes +candidateCodes <- candidateCodes %>% + dplyr::anti_join(x[[i]] %>% + dplyr::select("concept_id"), + by = "concept_id") +# Use achilles counts to summarise code use +orphanConcepts[[i]] <- achillesCodeUse( + x = list("cs" = candidateCodes$concept_id), + cdm = cdm, + minCellCount = minCellCount +) +if(nrow(orphanConcepts[[i]]) >= 1 ){ + orphanConcepts[[i]] <- orphanConcepts[[i]] %>% + dplyr::mutate(codelist = names(x)[i]) +} else { + cli::cli_inform("-- No orphan codes found for codelist {names(x)[i]}") +} +} + +orphanConcepts <- dplyr::bind_rows(orphanConcepts) + +orphanConcepts +} + + + + diff --git a/R/findUnmappedCodes.R b/R/findUnmappedCodes.R new file mode 100644 index 0000000..d9629ea --- /dev/null +++ b/R/findUnmappedCodes.R @@ -0,0 +1,10 @@ + + +findUnmappedCodes <- function(){ + +# search non-standard codes + +# see if any have been mapped to 0 + + +} diff --git a/R/getCandidateCodes.R b/R/getCandidateCodes.R index 73c7d7f..8573c79 100644 --- a/R/getCandidateCodes.R +++ b/R/getCandidateCodes.R @@ -68,7 +68,7 @@ getCandidateCodes <- function(cdm, includeDescendants = TRUE, includeAncestor = FALSE) { - start <- Sys.time() + start <- Sys.time() ## checks for standard types of user error errorMessage <- checkmate::makeAssertCollection() @@ -148,12 +148,17 @@ getCandidateCodes <- function(cdm, if (nrow(searchResults) == 0) { cli::cli_inform("No codes found for the given search strategy") - } else { - cli::cli_alert_success( - "{nrow(searchResults)} candidate concept{?s} identified" - ) + return(searchResults) } + # add concept info + searchResults <- addDetails(cdm = cdm, + conceptList = searchResults) %>% + dplyr::filter(tolower(.data$domain_id) %in% tolower(.env$domains)) + + cli::cli_alert_success( + "{nrow(searchResults)} candidate concept{?s} identified" + ) duration <- abs(as.numeric(Sys.time() - start, units = "secs")) cli::cli_inform( "Time taken: {floor(duration/60)} minutes and {duration %% 60 %/% 1} seconds" diff --git a/R/mockVocabRef.R b/R/mockVocabRef.R index a8e9180..d3d5c01 100644 --- a/R/mockVocabRef.R +++ b/R/mockVocabRef.R @@ -17,7 +17,7 @@ #' Generate example vocabulary database #' -#' @param backend 'database' (duckdb), 'arrow' (parquet files), or 'data_frame' +#' @param backend 'database' (duckdb) or 'data_frame' #' @return cdm reference with mock vocabulary #' @export #' @@ -27,11 +27,23 @@ #' DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) mockVocabRef <- function(backend = "database") { errorMessage <- checkmate::makeAssertCollection() - checkmate::assertTRUE(backend %in% c("database", "arrow", "data_frame")) + checkmate::assertTRUE(backend %in% c("database", "data_frame")) checkmate::assertTRUE(length(backend) == 1) checkmate::reportAssertions(collection = errorMessage) - # tables + # compulsory tables + person <- dplyr::tibble( + person_id = 1, gender_concept_id = 0, year_of_birth = 1990, + race_concept_id = 0, ethnicity_concept_id = 0 + ) + observationPeriod <- dplyr::tibble( + observation_period_id = 1, person_id = 1, + observation_period_start_date = as.Date("2000-01-01"), + observation_period_end_date = as.Date("2025-12-31"), + period_type_concept_id = 0 + ) + + # vocab tables concept <- data.frame( concept_id = 1:19, concept_name = c( @@ -253,7 +265,9 @@ mockVocabRef <- function(backend = "database") { numerator_unit_concept_id = 8576, denominator_value = 0.5, denominator_unit_concept_id = 8587, - box_size = NA + box_size = NA, + valid_start_date = NA, + valid_end_date = NA ) ) @@ -267,14 +281,37 @@ mockVocabRef <- function(backend = "database") { cdm_etl_reference = NA, source_release_date = NA, cdm_release_date = NA, - cdm_version = NA, + cdm_version = "5.3", vocabulary_version = NA ) ) + # achilles tables + # count of 400 records for knee osteoarthritis + achillesAnalysis <- dplyr::tibble(analysis_id = 1, + analysis_name = 1) + achillesResults <- dplyr::tibble(analysis_id = 401, + stratum_1 = 4, + stratum_2 = NA, + stratum_3 = NA, + count_value = 100) + achillesResultsDist <- dplyr::tibble(analysis_id = 1, + count_value = 5) + # into in-memory duckdb db <- DBI::dbConnect(duckdb::duckdb(), ":memory:") - + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "person", + person, + overwrite = TRUE + ) + }) + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "observation_period", + observationPeriod, + overwrite = TRUE + ) + }) DBI::dbWithTransaction(db, { DBI::dbWriteTable(db, "concept", concept, @@ -318,40 +355,37 @@ mockVocabRef <- function(backend = "database") { ) }) - cdm <- CDMConnector::cdm_from_con(db) - return(cdm) - # if (backend == "database") { - # return(cdm) - # } - # - # if (backend %in% c("arrow", "data_frame")) { - # dOut <- tempfile() - # dir.create(dOut) - # CDMConnector::stow(cdm = cdm, path = dOut, format = "csv") - # - # if (backend == "arrow") { - # cdmArrow <- CDMConnector::cdm_from_files( - # path = dOut, - # as_data_frame = FALSE, cdm_name = "mock_vocab" - # ) - # DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) - # return(cdmArrow) - # } - # - # if (backend == "data_frame") { - # if(utils::packageVersion("CDMConnector")<"1.1.0"){ - # cdmDF <- CDMConnector::cdm_from_files( - # path = dOut, - # as_data_frame = TRUE - # ) - # } else { - # cdmDF <- CDMConnector::cdm_from_files( - # path = dOut, cdm_name = "mock", - # as_data_frame = TRUE - # ) - # } - # DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) - # return(cdmDF) - # } - # } + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "achilles_analysis", + achillesAnalysis, + overwrite = TRUE + ) + }) + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "achilles_results", + achillesResults, + overwrite = TRUE + ) + }) + DBI::dbWithTransaction(db, { + DBI::dbWriteTable(db, "achilles_results_dist", + achillesResultsDist, + overwrite = TRUE + ) + }) + + cdm <- CDMConnector::cdm_from_con(con = db, + cdm_schema = "main", + write_schema = "main", + cdm_name = "mock") + + cdm$achilles_analysis <- dplyr::tbl(db, "achilles_analysis") + cdm$achilles_results <- dplyr::tbl(db, "achilles_results") + cdm$achilles_results_dist <- dplyr::tbl(db, "achilles_results_dist") + + if (backend %in% c("data_frame")) { + cdm <- cdm %>% dplyr::collect() + } + + cdm } diff --git a/R/runSearch.R b/R/runSearch.R index 1187061..03609d0 100644 --- a/R/runSearch.R +++ b/R/runSearch.R @@ -23,6 +23,12 @@ runSearch <- function(keywords, searchNonStandard, includeDescendants, includeAncestor) { + + if(!is.null(attr(cdm, "dbcon"))){ + prefix <- paste0(sample(letters, 5, TRUE), + collapse = "") + } + # connect to relevant vocabulary tables # will return informative error if not found conceptDb <- cdm$concept @@ -31,75 +37,36 @@ runSearch <- function(keywords, conceptRelationshipDb <- cdm$concept_relationship drugStrengthDb <- cdm$drug_strength - # formatting of conceptDb variables - conceptDb <- conceptDb %>% - dplyr::mutate(domain_id = tolower(.data$domain_id)) %>% - dplyr::mutate( - standard_concept = ifelse(is.na(.data$standard_concept), - "non-standard", .data$standard_concept - ) - ) %>% - dplyr::mutate( - standard_concept = ifelse(.data$standard_concept == "C", - "classification", .data$standard_concept - ) - ) %>% - dplyr::mutate( - standard_concept = ifelse(.data$standard_concept == "S", - "standard", .data$standard_concept - ) - ) - ## domains, standardConcept vocab to lower domains <- tolower(domains) standardConcept <- tolower(standardConcept) - # new name for clarity standardConceptFlags <- standardConcept - cli::cli_inform("{domains} domain: Limiting to domains of interest") + # formatting of conceptDb variables conceptDb <- conceptDb %>% - dplyr::filter(.data$domain_id %in% .env$domains) - - concept <- conceptDb %>% - dplyr::mutate(standard_concept = tolower(.data$standard_concept)) %>% - dplyr::filter(.data$standard_concept %in% .env$standardConceptFlags) %>% - dplyr::collect() %>% - dplyr::rename_with(tolower) - - conceptAncestorDb <- conceptAncestorDb %>% - dplyr::left_join( - conceptDb %>% - dplyr::rename("ancestor_concept_id" = "concept_id") %>% - dplyr::select("ancestor_concept_id", "domain_id", "standard_concept"), - by = "ancestor_concept_id" + dplyr::mutate( + domain_id = tolower(.data$domain_id), + standard_concept = dplyr::case_when( + is.na(.data$standard_concept) ~ "non-standard", + .data$standard_concept == "C" ~ "classification", + .data$standard_concept == "S" ~ "standard", + .default = as.character(.data$standard_concept) + ) ) - if(!is.null(attr(cdm, "dbcon"))){ - conceptAncestorDb <- conceptAncestorDb %>% - CDMConnector::compute_query() - } - conceptAncestorDb <- conceptAncestorDb %>% - dplyr::filter(.data$domain_id %in% .env$domains & - .data$standard_concept %in% .env$standardConceptFlags) %>% - dplyr::select(-c("domain_id", "standard_concept")) - conceptAncestor <- conceptAncestorDb %>% - dplyr::left_join( - conceptDb %>% - dplyr::rename("descendant_concept_id" = "concept_id") %>% - dplyr::select("descendant_concept_id", "domain_id", "standard_concept"), - by = "descendant_concept_id" - ) if(!is.null(attr(cdm, "dbcon"))){ - conceptAncestor <- conceptAncestor %>% - CDMConnector::compute_query() + conceptDb <- conceptDb %>% + CDMConnector::computeQuery(name = paste0("cg_",prefix, "_1"), + temporary = FALSE, + schema = attr(cdm, "write_schema"), + overwrite = TRUE) } - conceptAncestor <- conceptAncestor %>% - dplyr::filter(.data$domain_id %in% .env$domains & - .data$standard_concept %in% .env$standardConceptFlags) %>% - dplyr::select(-c("domain_id", "standard_concept")) %>% - dplyr::collect() %>% - dplyr::rename_with(tolower) + + cli::cli_inform("{domains} domain: Limiting to domains of interest") + concept <- conceptDb %>% + dplyr::filter(.data$standard_concept %in% .env$standardConceptFlags, + .data$domain_id %in% .env$domains) # will only collect conceptSynonym later if needed if (searchInSynonyms == TRUE) { @@ -111,7 +78,10 @@ runSearch <- function(keywords, ) if(!is.null(attr(cdm, "dbcon"))){ conceptSynonymDb <- conceptSynonymDb %>% - CDMConnector::compute_query() + CDMConnector::computeQuery(name = paste0("cg_",prefix, "_2"), + temporary = FALSE, + schema = attr(cdm, "write_schema"), + overwrite = TRUE) } conceptSynonymDb <- conceptSynonymDb %>% @@ -120,7 +90,7 @@ runSearch <- function(keywords, dplyr::select(-c("domain_id", "standard_concept")) conceptSynonym <- conceptSynonymDb %>% - dplyr::collect() %>% + # dplyr::collect() %>% dplyr::rename_with(tolower) } @@ -135,7 +105,7 @@ runSearch <- function(keywords, ) %>% dplyr::filter(.data$domain_id %in% .env$domains & .data$standard_concept %in% .env$standardConceptFlags) %>% - dplyr::collect() %>% + # dplyr::collect() %>% dplyr::rename_with(tolower) } @@ -185,7 +155,10 @@ runSearch <- function(keywords, # run exclusion if (length(exclude) > 0) { - if (nrow(excludeCodes) > 0) { + if (excludeCodes %>% + utils::head(10) %>% + dplyr::tally() %>% + dplyr::pull("n") > 0) { candidateCodes <- candidateCodes %>% dplyr::anti_join( excludeCodes %>% @@ -207,10 +180,10 @@ runSearch <- function(keywords, dplyr::select("concept_id") %>% dplyr::distinct() %>% dplyr::left_join(workingConcept, - by = "concept_id" + by = "concept_id", copy = TRUE ) - candidateCodes <- dplyr::bind_rows( + candidateCodes <- dplyr::union_all( candidateCodes, candidateCodesInSynonyms %>% dplyr::mutate(found_from = "In synonyms") @@ -220,7 +193,10 @@ runSearch <- function(keywords, # run exclusion if (length(exclude) > 0) { - if (nrow(excludeCodes) > 0) { + if (excludeCodes %>% + utils::head(10) %>% + dplyr::tally() %>% + dplyr::pull("n") > 0) { candidateCodes <- candidateCodes %>% dplyr::anti_join( excludeCodes %>% @@ -232,21 +208,25 @@ runSearch <- function(keywords, candidateCodesList[[domains]] <- candidateCodes + # candidateCodes <- dplyr::bind_rows(candidateCodes %>% + # dplyr::collect()) %>% + # dplyr::distinct() - candidateCodes <- dplyr::bind_rows(candidateCodesList) - # 5) add any codes lower in the hierarchy if (includeDescendants == TRUE) { - if (nrow(candidateCodes) > 0) { + if (candidateCodes %>% + utils::head(10) %>% + dplyr::tally() %>% + dplyr::pull("n") > 0) { cli::cli_inform("{domains} domain: Adding descendants") candidateCodeDescendants <- addDescendants( workingCandidateCodes = candidateCodes, - conceptAncestorDf = conceptAncestor, + conceptAncestorDf = conceptAncestorDb, conceptDf = concept ) - candidateCodes <- dplyr::bind_rows( + candidateCodes <- dplyr::union_all( candidateCodes, candidateCodeDescendants %>% dplyr::mutate(found_from = "From descendants") %>% @@ -256,10 +236,14 @@ runSearch <- function(keywords, # run exclusion if (length(exclude) > 0) { - if (nrow(excludeCodes) > 0) { + if (excludeCodes %>% + utils::head(10) %>% + dplyr::tally() %>% + dplyr::pull("n") > 0) { candidateCodes <- candidateCodes %>% - dplyr::anti_join(excludeCodes %>% dplyr::select("concept_id"), - by = "concept_id" + dplyr::anti_join(excludeCodes %>% + dplyr::select("concept_id"), + by = "concept_id", copy = TRUE ) } } @@ -268,16 +252,19 @@ runSearch <- function(keywords, # 6) add any codes one level above in the hierarchy if (includeAncestor == TRUE) { - if (nrow(candidateCodes) > 0) { + if (candidateCodes %>% + utils::head(10) %>% + dplyr::tally() %>% + dplyr::pull("n") > 0) { cli::cli_inform("{domains} domain: Adding ancestor") candidateCodeAncestor <- addAncestor( workingCandidateCodes = candidateCodes, - conceptAncestorDf = conceptAncestor, - conceptDf = concept + conceptAncestorDf = conceptAncestorDb, + conceptDf = conceptDb ) - candidateCodes <- dplyr::bind_rows( + candidateCodes <- dplyr::union_all( candidateCodes, candidateCodeAncestor %>% dplyr::mutate(found_from = "From ancestor") @@ -286,7 +273,10 @@ runSearch <- function(keywords, # run exclusion if (length(exclude) > 0) { - if (nrow(excludeCodes) > 0) { + if (excludeCodes %>% + utils::head(10) %>% + dplyr::tally() %>% + dplyr::pull("n") > 0) { candidateCodes <- candidateCodes %>% dplyr::anti_join(excludeCodes %>% dplyr::select("concept_id"), by = "concept_id" @@ -303,37 +293,40 @@ runSearch <- function(keywords, if (searchNonStandard == TRUE) { cli::cli_inform("{domains} domain: Adding codes from non-standard") conceptNs <- conceptDb %>% - dplyr::filter(.data$domain_id %in% .env$domains) %>% dplyr::filter(.data$standard_concept == "non-standard") %>% - dplyr::collect() %>% dplyr::rename_with(tolower) - if (nrow(conceptNs) > 0) { + if (conceptNs %>% + utils::head(10) %>% + dplyr::tally() %>% + dplyr::pull("n") > 0) { candidateCodesNs <- getMatches( words = tidyWords(keywords), conceptDf = conceptNs ) } - if (nrow(conceptNs) > 0) { + if (conceptNs %>% + utils::head(10) %>% + dplyr::tally() %>% + dplyr::pull("n") > 0) { candidateCodesNs <- candidateCodesNs %>% dplyr::select("concept_id") %>% dplyr::left_join( conceptRelationshipDb %>% dplyr::filter(.data$relationship_id == "Mapped from") %>% - dplyr::collect() %>% + # dplyr::collect() %>% dplyr::rename_with(tolower), - by = c("concept_id" = "concept_id_2") + by = c("concept_id" = "concept_id_2"), copy = TRUE ) %>% dplyr::select("concept_id_1") %>% dplyr::rename("concept_id" = "concept_id_1") %>% dplyr::distinct() %>% dplyr::left_join(concept, - by = "concept_id" - ) %>% - dplyr::mutate(concept_name = tidyWords(.data$concept_name)) + by = "concept_id", copy = TRUE + ) - candidateCodes <- dplyr::bind_rows( + candidateCodes <- dplyr::union_all( candidateCodes, candidateCodesNs %>% dplyr::mutate(found_from = "From non-standard") @@ -343,7 +336,10 @@ runSearch <- function(keywords, # run exclusion if (length(exclude) > 0) { - if (nrow(excludeCodes) > 0) { + if (excludeCodes %>% + utils::head(10) %>% + dplyr::tally() %>% + dplyr::pull("n") > 0) { candidateCodes <- candidateCodes %>% dplyr::anti_join(excludeCodes %>% dplyr::select("concept_id"), by = "concept_id" @@ -352,23 +348,14 @@ runSearch <- function(keywords, } } + candidateCodes <- candidateCodes %>% + dplyr::collect() - if (nrow(candidateCodes) == 0) { - candidateCodes - } else { - # 8) Finish up - # get original names back - candidateCodes <- candidateCodes %>% - dplyr::filter(!is.na(.data$concept_id)) %>% - dplyr::select("concept_id", "found_from") %>% - dplyr::left_join(concept, - by = c("concept_id") - ) %>% - dplyr::distinct() - # if domain = "drug", add drug_strength information and dose form - if (domains == "drug") { + if (nrow(candidateCodes) > 0) { + # 8) Finish up + if (domains == "drug") { #add drug_strength information and dose form candidateCodes <- candidateCodes %>% dplyr::left_join( drugStrength %>% @@ -380,7 +367,7 @@ runSearch <- function(keywords, "numerator_value", "numerator_unit_concept_id", "denominator_value", "denominator_unit_concept_id", "box_size" ), - by = "concept_id" + by = "concept_id", copy = TRUE ) candidateCodes <- candidateCodes %>% @@ -425,21 +412,23 @@ runSearch <- function(keywords, by = "concept_id" ) } - } else { - candidateCodes <- candidateCodes %>% - dplyr::select( - "concept_id", "concept_name", - "domain_id", "concept_class_id", - "vocabulary_id", "found_from" - ) } candidateCodes <- candidateCodes %>% + dplyr::select(dplyr::any_of(c("concept_id", "found_from", + "ingredient_concept_id", "amount_value", + "amount_unit_concept_id", + "numerator_value", + "numerator_unit_concept_id", + "denominator_value", + "denominator_unit_concept_id", + "box_size", "dose_form"))) %>% dplyr::distinct() # remove duplicates (found in different ways) # keep first time it was found - # for drug, same concept_id with different ingredient_concept_id will be removed as well. + # for drug, same concept_id with different ingredient_concept_id + # will be removed as well (with only the first kept). candidateCodes <- candidateCodes %>% dplyr::group_by(.data$concept_id) %>% dplyr::mutate(seq = dplyr::row_number(.data$concept_id)) %>% @@ -447,8 +436,16 @@ runSearch <- function(keywords, dplyr::select(-"seq") %>% dplyr::ungroup() - return(candidateCodes) } + + + if(!is.null(attr(cdm, "dbcon"))){ + CDMConnector::dropTable(cdm = cdm, + name = dplyr::starts_with(paste0("cg_",prefix))) + } + + return(candidateCodes) + } @@ -476,7 +473,7 @@ tidyWords <- function(words) { getMatches <- function(words, conceptDf) { conceptDf <- conceptDf %>% # start with all - dplyr::mutate(concept_name = tidyWords(.data$concept_name)) + dplyr::mutate(concept_name = tolower(.data$concept_name)) # because there may be a lot of synonyms, get these from a loop # (stringr::str_detect slows considerably @@ -494,16 +491,38 @@ getMatches <- function(words, for (j in seq_along(workingExclude)) { if (nchar(workingExclude[j]) >= 1) { - workingConcepts <- workingConcepts %>% - dplyr::filter(stringr::str_detect( - .data$concept_name, - .env$workingExclude[j] - )) + + if(inherits(workingConcepts, "tbl_sql")){ + workingConcepts <- workingConcepts %>% + dplyr::filter(dplyr::sql(paste0("concept_name LIKE '%", .env$workingExclude[j], "%'"))) + } else { + workingConcepts <- workingConcepts %>% + dplyr::filter(stringr::str_detect( + .data$concept_name, + .env$workingExclude[j] + )) + } + } } conceptsFound[[i]] <- workingConcepts + + # %>% dplyr::collect() + } + + if(length(conceptsFound)==1){ + conceptsFound <- conceptsFound[[1]] %>% dplyr::distinct() + } else { + conceptsFoundList <- list() + for(i in 1:(length(conceptsFound)-1)){ + conceptsFoundList <- dplyr::union_all(conceptsFound[[i]], + conceptsFound[[i+1]]) + + } + conceptsFound <- conceptsFoundList %>% dplyr::distinct() + } - conceptsFound <- dplyr::bind_rows(conceptsFound) %>% dplyr::distinct() + return(conceptsFound) } @@ -511,6 +530,7 @@ getMatches <- function(words, addDescendants <- function(workingCandidateCodes, conceptAncestorDf, conceptDf) { + candidateCodeDescendants <- workingCandidateCodes %>% dplyr::select("concept_id") %>% dplyr::rename("ancestor_concept_id" = "concept_id") %>% @@ -518,17 +538,15 @@ addDescendants <- function(workingCandidateCodes, dplyr::left_join( conceptAncestorDf %>% dplyr::filter("ancestor_concept_id" != "descendant_concept_id"), - by = "ancestor_concept_id" + by = "ancestor_concept_id", copy = TRUE ) %>% dplyr::select("descendant_concept_id") %>% dplyr::filter(!is.na(.data$descendant_concept_id)) %>% dplyr::distinct() %>% dplyr::rename("concept_id" = "descendant_concept_id") - candidateCodeDescendants <- - candidateCodeDescendants %>% - dplyr::left_join(conceptDf, by = "concept_id") %>% - dplyr::mutate(concept_name = tidyWords(.data$concept_name)) + candidateCodeDescendants <- candidateCodeDescendants %>% + dplyr::left_join(conceptDf, by = "concept_id", copy = TRUE) return(candidateCodeDescendants) } @@ -540,15 +558,14 @@ addAncestor <- function(workingCandidateCodes, dplyr::select("concept_id") %>% dplyr::rename("descendant_concept_id" = "concept_id") %>% dplyr::left_join(conceptAncestorDf, - by = "descendant_concept_id" + by = "descendant_concept_id", copy = TRUE ) %>% dplyr::filter(.data$min_levels_of_separation == "1") %>% dplyr::select("ancestor_concept_id") %>% dplyr::rename("concept_id" = "ancestor_concept_id") %>% dplyr::left_join(conceptDf, - by = "concept_id" - ) %>% - dplyr::mutate(concept_name = tidyWords(.data$concept_name)) + by = "concept_id", copy = TRUE + ) # keep if not already in candidateCodes candidateCodeAncestor <- candidateCodeAncestor %>% @@ -556,8 +573,7 @@ addAncestor <- function(workingCandidateCodes, workingCandidateCodes %>% dplyr::select("concept_id"), by = "concept_id" - ) %>% - dplyr::left_join(conceptDf, by = "concept_id") + ) return(candidateCodeAncestor) } diff --git a/R/summariseCodeUse.R b/R/summariseCodeUse.R index ca00e9e..a04aa74 100644 --- a/R/summariseCodeUse.R +++ b/R/summariseCodeUse.R @@ -50,7 +50,6 @@ summariseCodeUse <- function(x, codeUse <- dplyr::bind_rows(codeUse) - return(codeUse) } @@ -366,6 +365,11 @@ if(length(tableName)>0){ dplyr::filter(.data$cohort_start_date == !!dplyr::sym(dateName[[1]])) } } + + if(is.null(codeRecords)){ + return(NULL) + } + codeRecords <- codeRecords %>% dplyr::mutate(date = !!dplyr::sym(dateName[[1]])) %>% dplyr::mutate(year = lubridate::year(date)) %>% diff --git a/README.md b/README.md index 7989b2f..e0296ab 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ getDrugIngredientCodes(cdm = cdm, name = "aspirin", withConceptDetails = TRUE) #> $aspirin #> # A tibble: 2 × 4 #> concept_id concept_name domain_id vocabulary_id -#> +#> #> 1 1112807 Aspirin Drug RxNorm #> 2 19059056 Aspirin 81 MG Oral Tablet Drug RxNorm ``` @@ -126,12 +126,12 @@ asthma_codes1 %>% glimpse() #> Rows: 2 #> Columns: 6 -#> $ concept_id 4051466, 317009 +#> $ concept_id 4051466, 317009 +#> $ found_from "From initial search", "From initial search" #> $ concept_name "Childhood asthma", "Asthma" -#> $ domain_id "condition", "condition" -#> $ concept_class_id "Clinical Finding", "Clinical Finding" +#> $ domain_id "Condition", "Condition" #> $ vocabulary_id "SNOMED", "SNOMED" -#> $ found_from "From initial search", "From initial search" +#> $ standard_concept "standard", "standard" ``` But perhaps we want to exclude certain concepts as part of the search @@ -148,12 +148,12 @@ asthma_codes2 %>% glimpse() #> Rows: 1 #> Columns: 6 -#> $ concept_id 317009 +#> $ concept_id 317009 +#> $ found_from "From initial search" #> $ concept_name "Asthma" -#> $ domain_id "condition" -#> $ concept_class_id "Clinical Finding" +#> $ domain_id "Condition" #> $ vocabulary_id "SNOMED" -#> $ found_from "From initial search" +#> $ standard_concept "standard" ``` We can compare these two code lists like so @@ -162,7 +162,7 @@ We can compare these two code lists like so compareCodelists(asthma_codes1, asthma_codes2) #> # A tibble: 2 × 3 #> concept_id concept_name codelist -#> +#> #> 1 4051466 Childhood asthma Only codelist 1 #> 2 317009 Asthma Both ``` @@ -182,12 +182,12 @@ Gastrointestinal_hemorrhage %>% glimpse() #> Rows: 1 #> Columns: 6 -#> $ concept_id 192671 +#> $ concept_id 192671 +#> $ found_from "From initial search" #> $ concept_name "Gastrointestinal hemorrhage" -#> $ domain_id "condition" -#> $ concept_class_id "Clinical Finding" +#> $ domain_id "Condition" #> $ vocabulary_id "SNOMED" -#> $ found_from "From initial search" +#> $ standard_concept "standard" ``` ## Summarising code use @@ -209,9 +209,9 @@ summariseCodeUse(list("asthma" = asthma_codes1$concept_id), #> $ estimate 101, 96, 5, 101, 96, 5 #> $ estimate_suppressed "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", "FA… #> $ standard_concept_name NA, "Childhood asthma", "Asthma", NA, "Childhood… -#> $ standard_concept_id NA, 4051466, 317009, NA, 4051466, 317009 +#> $ standard_concept_id NA, 4051466, 317009, NA, 4051466, 317009 #> $ source_concept_name NA, "Childhood asthma", "Asthma", NA, "Childhood… -#> $ source_concept_id NA, 4051466, 317009, NA, 4051466, 317009 +#> $ source_concept_id NA, 4051466, 317009, NA, 4051466, 317009 #> $ domain_id NA, "condition", "condition", NA, "condition", "… #> $ codelist_name "asthma", "asthma", "asthma", "asthma", "asthma"… #> $ cohort_name NA, NA, NA, NA, NA, NA diff --git a/extras/precomputeVignetteData.R b/extras/precomputeVignetteData.R index 79d092b..72579f0 100644 --- a/extras/precomputeVignetteData.R +++ b/extras/precomputeVignetteData.R @@ -25,95 +25,51 @@ vocabularyDatabaseSchema <- Sys.getenv("DB_VOCAB_SCHEMA") # create cdm reference cdm <- CDMConnector::cdm_from_con(con = db, cdm_schema = vocabularyDatabaseSchema, - cdm_tables = tidyselect::all_of(c("concept", - "concept_relationship", - "concept_ancestor", - "concept_synonym", - "drug_strength", - "vocabulary"))) -# vocab to arrow -# save in temp folder for this example -dOut<-here(tempdir(), "db_vocab") -dir.create(dOut) -CDMConnector::stow(cdm, dOut) - -# new cdm reference using arrow -cdm_arrow <- CDMConnector::cdm_from_files(path = dOut, - as_data_frame = FALSE) - -rm(cdm) + write_schema = "results") # intro vignette ---- -vocabVersion <- getVocabVersion(cdm = cdm_arrow) +vocabVersion <- getVocabVersion(cdm = cdm) -save( - vocabVersion, - file = here("vignettes", "introVocab.RData") -) - - -codesFromDescendants <- cdm_arrow$concept_ancestor %>% +codesFromDescendants <- cdm$concept_ancestor %>% filter(.data$ancestor_concept_id == 4182210) %>% select("descendant_concept_id") %>% rename("concept_id" = "descendant_concept_id") %>% select("concept_id") %>% - left_join(cdm_arrow$concept, + left_join(cdm$concept, by="concept_id") %>% select("concept_id", "concept_name", "domain_id", "vocabulary_id") %>% collect() -saveRDS( - codesFromDescendants, - here("vignettes", "introData01.RData") -) -dementiaCodes1 <- getCandidateCodes(cdm = cdm_arrow, +dementiaCodes1 <- getCandidateCodes(cdm = cdm, keywords = "dementia", domains = "Condition", - #searchViaSynonyms = FALSE, - #fuzzyMatch = FALSE, exclude = NULL, includeDescendants = TRUE, includeAncestor = FALSE, - #verbose=TRUE -) -saveRDS( - dementiaCodes1, - here("vignettes", "introData02.RData") ) + codeComparison <- compareCodelists(codesFromDescendants, dementiaCodes1) -saveRDS( - codeComparison, - here("vignettes", "introData03.RData") -) -icdMappings <- getMappings(cdm = cdm_arrow, + +icdMappings <- getMappings(cdm = cdm, candidateCodelist = dementiaCodes1, nonStandardVocabularies = "ICD10CM" ) -saveRDS( - icdMappings, - here("vignettes", "introData04.RData") -) -readMappings <- getMappings(cdm = cdm_arrow, + +readMappings <- getMappings(cdm = cdm, candidateCodelist = dementiaCodes1, nonStandardVocabularies = "Read" ) -saveRDS( - readMappings, - here("vignettes", "introData05.RData") -) # options vignette ------ -oaCodes1 <- getCandidateCodes(cdm = cdm_arrow, +oaCodes1 <- getCandidateCodes(cdm = cdm, keywords = "osteoarthritis", domains = "Condition", - #searchViaSynonyms = FALSE, - #fuzzyMatch = FALSE, exclude = c( "post-infection", "post-traumatic" @@ -122,17 +78,11 @@ oaCodes1 <- getCandidateCodes(cdm = cdm_arrow, includeAncestor = FALSE, #verbose = TRUE ) -saveRDS( - oaCodes1, - here("vignettes", "optionsData01.RData") -) # include desc -oaCodes2 <- getCandidateCodes(cdm = cdm_arrow, +oaCodes2 <- getCandidateCodes(cdm = cdm, keywords = "osteoarthritis", domains = "Condition", - #searchViaSynonyms = FALSE, - #fuzzyMatch = FALSE, exclude = c( "post-infection", "post-traumatic" @@ -140,18 +90,11 @@ oaCodes2 <- getCandidateCodes(cdm = cdm_arrow, includeDescendants = TRUE, includeAncestor = FALSE ) -saveRDS( - oaCodes2, - here("vignettes", "optionsData02.RData") -) # include obs -oaCodes3 <- getCandidateCodes(cdm = cdm_arrow, +oaCodes3 <- getCandidateCodes(cdm = cdm, keywords = "osteoarthritis", domains = c("Condition", "Observation"), - #searchViaSynonyms = FALSE, - #fuzzyMatch = FALSE, - #maxDistanceCost = 0.1, exclude = c( "post-infection", "post-traumatic" @@ -159,19 +102,12 @@ oaCodes3 <- getCandidateCodes(cdm = cdm_arrow, includeDescendants = FALSE, includeAncestor = FALSE ) -saveRDS( - oaCodes3, - here("vignettes", "optionsData03.RData") -) # search syn -oaCodes4 <- getCandidateCodes(cdm = cdm_arrow, +oaCodes4 <- getCandidateCodes(cdm = cdm, keywords = "osteoarthritis", domains = "Condition", searchInSynonyms = TRUE, - #searchViaSynonyms = TRUE, - #fuzzyMatch = FALSE, - #maxDistanceCost = 0.1, exclude = c( "post-infection", "post-traumatic" @@ -179,13 +115,10 @@ oaCodes4 <- getCandidateCodes(cdm = cdm_arrow, includeDescendants = FALSE, includeAncestor = FALSE ) -saveRDS( - oaCodes4, - here("vignettes", "optionsData04.RData") -) + # search source -oaCodes5 <- getCandidateCodes(cdm = cdm_arrow, +oaCodes5 <- getCandidateCodes(cdm = cdm, keywords = "osteoarthritis", domains = "Condition", searchNonStandard = TRUE, @@ -196,18 +129,12 @@ oaCodes5 <- getCandidateCodes(cdm = cdm_arrow, includeDescendants = FALSE, includeAncestor = FALSE ) -saveRDS( - oaCodes5, - here("vignettes", "optionsData04.RData") -) + # fuzzy search -oaCodes6 <- getCandidateCodes(cdm = cdm_arrow, +oaCodes6 <- getCandidateCodes(cdm = cdm, keywords = "osteoarthritis", domains = "Condition", - #searchViaSynonyms = FALSE, - #fuzzyMatch = TRUE, - #maxDistanceCost = 0.1, exclude = c( "post-infection", "post-traumatic" @@ -215,18 +142,12 @@ oaCodes6 <- getCandidateCodes(cdm = cdm_arrow, includeDescendants = FALSE, includeAncestor = FALSE ) -saveRDS( - oaCodes6, - here("vignettes", "optionsData05.RData") -) + # fuzzy search 0.2 -oaCodes7 <- getCandidateCodes(cdm = cdm_arrow, +oaCodes7 <- getCandidateCodes(cdm = cdm, keywords = "osteoarthritis", domains = "Condition", - #searchViaSynonyms = FALSE, - #fuzzyMatch = TRUE, - #maxDistanceCost = 0.2, exclude = c( "post-infection", "post-traumatic" @@ -234,18 +155,12 @@ oaCodes7 <- getCandidateCodes(cdm = cdm_arrow, includeDescendants = FALSE, includeAncestor = FALSE ) -saveRDS( - oaCodes7, - here("vignettes", "optionsData06.RData") -) + # include ancestor -oaCodes8 <- getCandidateCodes(cdm = cdm_arrow, +oaCodes8 <- getCandidateCodes(cdm = cdm, keywords = "osteoarthritis", domains = "Condition", - #searchViaSynonyms = FALSE, - #fuzzyMatch = FALSE, - #maxDistanceCost = 0.2, exclude = c( "post-infection", "post-traumatic" @@ -253,64 +168,124 @@ oaCodes8 <- getCandidateCodes(cdm = cdm_arrow, includeDescendants = FALSE, includeAncestor = TRUE ) -saveRDS( - oaCodes8, - here("vignettes", "optionsData07.RData") -) # medication vignette ------ -ac_codes_1 <- getCandidateCodes(cdm = cdm_arrow, +ac_codes_1 <- getCandidateCodes(cdm = cdm, keywords="acetaminophen", domains="drug", standardConcept="standard", includeDescendants = TRUE) -saveRDS( - ac_codes_1, - here("vignettes", "medData01.RData") -) -ac_codes_2a <- getCandidateCodes(cdm = cdm_arrow, +ac_codes_2a <- getCandidateCodes(cdm = cdm, keywords= c("acetaminophen injection", "acetaminophen intravenous"), domains="drug", standardConcept="standard", includeDescendants = TRUE) -saveRDS( - ac_codes_2a, - here("vignettes", "medData02a.RData") -) -ac_codes_2b <- getCandidateCodes(cdm = cdm_arrow, + +ac_codes_2b <- getCandidateCodes(cdm = cdm, keywords="acetaminophen", domains="drug", #doseForm = c("injection", "intravenous"), standardConcept="standard", includeDescendants = TRUE) -saveRDS( - ac_codes_2b, - here("vignettes", "medData02b.RData") -) -ac_dose_forms <- CodelistGenerator::getDoseForm(cdm = cdm_arrow) -saveRDS( - ac_dose_forms, - here("vignettes", "medDataDoseForms.RData")) -c <- compareCodelists(ac_codes_2, ac_codes_2a) -ac_codes_3 <- getCandidateCodes(cdm = cdm_arrow, +ac_dose_forms <- CodelistGenerator::getDoseForm(cdm = cdm) + + +c <- compareCodelists(ac_codes_2a, ac_codes_2b) +ac_codes_3 <- getCandidateCodes(cdm = cdm, keywords="acetaminophen", domains="drug", #conceptClassId = c("Quant Clinical Drug"), #doseForm = c("injection", "intravenous"), standardConcept="standard", includeDescendants = TRUE) + +ac_concept_class <- CodelistGenerator::getConceptClassId(cdm = cdm, + domain = "drug") + + +# save ----- +save( + vocabVersion, + file = here("inst", "introVocab.RData") +) +saveRDS( + codesFromDescendants, + here("inst", "introData01.RData") +) +saveRDS( + dementiaCodes1, + here("inst", "introData02.RData") +) +saveRDS( + codeComparison, + here("inst", "introData03.RData") +) +saveRDS( + icdMappings, + here("inst", "introData04.RData") +) +saveRDS( + readMappings, + here("inst", "introData05.RData") +) +saveRDS( + oaCodes1, + here("inst", "optionsData01.RData") +) +saveRDS( + oaCodes2, + here("inst", "optionsData02.RData") +) +saveRDS( + oaCodes3, + here("inst", "optionsData03.RData") +) +saveRDS( + oaCodes4, + here("inst", "optionsData04.RData") +) +saveRDS( + oaCodes5, + here("inst", "optionsData04.RData") +) +saveRDS( + oaCodes6, + here("inst", "optionsData05.RData") +) +saveRDS( + oaCodes7, + here("inst", "optionsData06.RData") +) +saveRDS( + oaCodes8, + here("inst", "optionsData07.RData") +) +saveRDS( + ac_codes_1, + here("inst", "medData01.RData") +) +saveRDS( + ac_codes_2a, + here("inst", "medData02a.RData") +) +saveRDS( + ac_codes_2b, + here("inst", "medData02b.RData") +) +saveRDS( + ac_dose_forms, + here("inst", "medDataDoseForms.RData") +) saveRDS( ac_codes_3, - here("vignettes", "medData03.RData") + here("inst", "medData03.RData") ) - -ac_concept_class <- CodelistGenerator::getConceptClassId(cdm = cdm_arrow, - domain = "drug") saveRDS( ac_concept_class, - here("vignettes", "medDataConceptClass.RData")) + here("inst", "medDataConceptClass.RData") +) diff --git a/vignettes/introData01.RData b/inst/introData01.RData similarity index 100% rename from vignettes/introData01.RData rename to inst/introData01.RData diff --git a/inst/introData02.RData b/inst/introData02.RData new file mode 100644 index 0000000..61e1598 Binary files /dev/null and b/inst/introData02.RData differ diff --git a/inst/introData03.RData b/inst/introData03.RData new file mode 100644 index 0000000..1269b2f Binary files /dev/null and b/inst/introData03.RData differ diff --git a/vignettes/introData04.RData b/inst/introData04.RData similarity index 100% rename from vignettes/introData04.RData rename to inst/introData04.RData diff --git a/vignettes/introData05.RData b/inst/introData05.RData similarity index 100% rename from vignettes/introData05.RData rename to inst/introData05.RData diff --git a/vignettes/introVocab.RData b/inst/introVocab.RData similarity index 59% rename from vignettes/introVocab.RData rename to inst/introVocab.RData index f9d94a4..7ac3cda 100644 Binary files a/vignettes/introVocab.RData and b/inst/introVocab.RData differ diff --git a/inst/medData01.RData b/inst/medData01.RData new file mode 100644 index 0000000..f9b1e0f Binary files /dev/null and b/inst/medData01.RData differ diff --git a/inst/medData02a.RData b/inst/medData02a.RData new file mode 100644 index 0000000..1814913 Binary files /dev/null and b/inst/medData02a.RData differ diff --git a/inst/medData02b.RData b/inst/medData02b.RData new file mode 100644 index 0000000..f9b1e0f Binary files /dev/null and b/inst/medData02b.RData differ diff --git a/inst/medData03.RData b/inst/medData03.RData new file mode 100644 index 0000000..f9b1e0f Binary files /dev/null and b/inst/medData03.RData differ diff --git a/vignettes/medDataConceptClass.RData b/inst/medDataConceptClass.RData similarity index 100% rename from vignettes/medDataConceptClass.RData rename to inst/medDataConceptClass.RData diff --git a/vignettes/medDataDoseForms.RData b/inst/medDataDoseForms.RData similarity index 100% rename from vignettes/medDataDoseForms.RData rename to inst/medDataDoseForms.RData diff --git a/inst/optionsData01.RData b/inst/optionsData01.RData new file mode 100644 index 0000000..984cff3 Binary files /dev/null and b/inst/optionsData01.RData differ diff --git a/inst/optionsData02.RData b/inst/optionsData02.RData new file mode 100644 index 0000000..18a7c27 Binary files /dev/null and b/inst/optionsData02.RData differ diff --git a/inst/optionsData03.RData b/inst/optionsData03.RData new file mode 100644 index 0000000..daf608b Binary files /dev/null and b/inst/optionsData03.RData differ diff --git a/inst/optionsData04.RData b/inst/optionsData04.RData new file mode 100644 index 0000000..674a4b6 Binary files /dev/null and b/inst/optionsData04.RData differ diff --git a/inst/optionsData05.RData b/inst/optionsData05.RData new file mode 100644 index 0000000..984cff3 Binary files /dev/null and b/inst/optionsData05.RData differ diff --git a/inst/optionsData06.RData b/inst/optionsData06.RData new file mode 100644 index 0000000..984cff3 Binary files /dev/null and b/inst/optionsData06.RData differ diff --git a/inst/optionsData07.RData b/inst/optionsData07.RData new file mode 100644 index 0000000..cff9533 Binary files /dev/null and b/inst/optionsData07.RData differ diff --git a/man/CodelistGenerator-package.Rd b/man/CodelistGenerator-package.Rd index eee3208..233ee7b 100644 --- a/man/CodelistGenerator-package.Rd +++ b/man/CodelistGenerator-package.Rd @@ -4,7 +4,7 @@ \name{CodelistGenerator-package} \alias{CodelistGenerator} \alias{CodelistGenerator-package} -\title{CodelistGenerator: Generate Code Lists for the OMOP Common Data Model} +\title{CodelistGenerator: Identify Relevant Clinical Codes and Evaluate Their Use} \description{ Generate a candidate code list for the Observational Medical Outcomes Partnership (OMOP) common data model based on string matching. For a given search strategy, a candidate code list will be returned. } diff --git a/man/codesInUse.Rd b/man/codesInUse.Rd new file mode 100644 index 0000000..ebc1aa4 --- /dev/null +++ b/man/codesInUse.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/codesInUse.R +\name{codesInUse} +\alias{codesInUse} +\title{Get codes used in the database} +\usage{ +codesInUse( + cdm, + table = c("condition_occurrence", "device_exposure", "drug_exposure", "measurement", + "observation", "procedure_occurrence", "visit_occurrence") +) +} +\arguments{ +\item{cdm}{cdm_reference via CDMConnector} + +\item{table}{cdm table} +} +\description{ +Get codes used in the database +} diff --git a/man/findOrphanCodes.Rd b/man/findOrphanCodes.Rd new file mode 100644 index 0000000..8c4ab21 --- /dev/null +++ b/man/findOrphanCodes.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/findOrphanCodes.R +\name{findOrphanCodes} +\alias{findOrphanCodes} +\title{Find orphan codes related to a codelist} +\usage{ +findOrphanCodes( + x, + cdm, + domains = "Condition", + standardConcept = "Standard", + searchInSynonyms = TRUE, + searchNonStandard = TRUE, + includeDescendants = TRUE, + includeAncestor = TRUE, + minCellCount = 5 +) +} +\arguments{ +\item{x}{Codes for which to find codes related but not included (orphan +codes)} + +\item{cdm}{cdm_reference via CDMConnector} + +\item{domains}{Character vector with one or more of the OMOP CDM domain.} + +\item{standardConcept}{Character vector with one or more of "Standard", +"Classification", and "Non-standard". These correspond to the flags used +for the standard_concept field in the concept table of the cdm.} + +\item{searchInSynonyms}{Either TRUE or FALSE. If TRUE the code will also +search using both the primary name in the concept table and synonyms from +the concept synonym table.} + +\item{searchNonStandard}{Either TRUE or FALSE. If TRUE the code will also +search via non-standard concepts.} + +\item{includeDescendants}{Either TRUE or FALSE. +If TRUE descendant concepts of identified concepts +will be included in the candidate codelist.} + +\item{includeAncestor}{Either TRUE or FALSE. +If TRUE the direct ancestor concepts of identified concepts +will be included in the candidate codelist.} + +\item{minCellCount}{The minimum number of counts to reported, below which +results will be suppressed. If 0, all results will be reported.} +} +\value{ +A codelist containing code related to (but not in) the target +codelist that are present used in the cdm +} +\description{ +Find orphan codes related to a codelist +} diff --git a/man/mockVocabRef.Rd b/man/mockVocabRef.Rd index e932b40..dd7b121 100644 --- a/man/mockVocabRef.Rd +++ b/man/mockVocabRef.Rd @@ -7,7 +7,7 @@ mockVocabRef(backend = "database") } \arguments{ -\item{backend}{'database' (duckdb), 'arrow' (parquet files), or 'data_frame'} +\item{backend}{'database' (duckdb) or 'data_frame'} } \value{ cdm reference with mock vocabulary diff --git a/man/restrictToCodesInUse.Rd b/man/restrictToCodesInUse.Rd new file mode 100644 index 0000000..79f6edb --- /dev/null +++ b/man/restrictToCodesInUse.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/codesInUse.R +\name{restrictToCodesInUse} +\alias{restrictToCodesInUse} +\title{Filter a codelist to keep only the codes used in the database} +\usage{ +restrictToCodesInUse( + x, + cdm, + table = c("condition_occurrence", "device_exposure", "drug_exposure", "measurement", + "observation", "procedure_occurrence", "visit_occurrence") +) +} +\arguments{ +\item{x}{A codelist} + +\item{cdm}{cdm_reference via CDMConnector} + +\item{table}{cdm table} +} +\description{ +Filter a codelist to keep only the codes used in the database +} diff --git a/man/sourceCodesInUse.Rd b/man/sourceCodesInUse.Rd new file mode 100644 index 0000000..2e3b1e6 --- /dev/null +++ b/man/sourceCodesInUse.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/codesInUse.R +\name{sourceCodesInUse} +\alias{sourceCodesInUse} +\title{Get source codes used in the database} +\usage{ +sourceCodesInUse( + cdm, + table = c("condition_occurrence", "device_exposure", "drug_exposure", "measurement", + "observation", "procedure_occurrence", "visit_occurrence") +) +} +\arguments{ +\item{cdm}{cdm_reference via CDMConnector} + +\item{table}{cdm table} +} +\description{ +Get source codes used in the database +} diff --git a/tests/testthat/test-achillesCodeUse.R b/tests/testthat/test-achillesCodeUse.R index ca51696..0457f14 100644 --- a/tests/testthat/test-achillesCodeUse.R +++ b/tests/testthat/test-achillesCodeUse.R @@ -1,99 +1,29 @@ test_that("achilles code use", { - testthat::skip_if(Sys.getenv("CDM5_REDSHIFT_DBNAME") == "") + # mock db + cdm <- mockVocabRef("database") - db <- DBI::dbConnect(RPostgres::Redshift(), - dbname = Sys.getenv("CDM5_REDSHIFT_DBNAME"), - host = Sys.getenv("CDM5_REDSHIFT_HOST"), - port = Sys.getenv("CDM5_REDSHIFT_PORT"), - user = Sys.getenv("CDM5_REDSHIFT_USER"), - password = Sys.getenv("CDM5_REDSHIFT_PASSWORD")) + oa <- getCandidateCodes(cdm = cdm, keywords = "osteoarthritis") + # two codes: "Osteoarthritis of knee" "Osteoarthritis of hip" + # in achilles we only have a count for "Osteoarthritis of knee" + result_achilles <- achillesCodeUse(list(oa = oa$concept_id), + cdm = cdm) + expect_true(result_achilles %>% + dplyr::pull("estimate") == 100) - cdm <- CDMConnector::cdm_from_con(con = db, - cdm_schema = Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA"), - write_schema = Sys.getenv("CDM5_REDSHIFT_SCRATCH_SCHEMA")) + # applying min cell count where estimate should be obscured + result_achilles <- achillesCodeUse(list(oa = oa$concept_id), + cdm = cdm, + minCellCount = 150) + expect_true(is.na(result_achilles %>% + dplyr::pull("estimate"))) - cdm$achilles_results <- cdm$condition_occurrence %>% - dplyr::group_by(condition_concept_id) %>% - dplyr::tally(name = "count_value") %>% - dplyr::rename("stratum_1" = "condition_concept_id") %>% - dplyr::mutate(stratum_2 = NA, - stratum_3 = NA, - analysis_id = 401) %>% - CDMConnector::computeQuery() - - asthma <- list(asthma = c(317009, 257581)) - result_achilles <- achillesCodeUse(asthma, - cdm = cdm) - result_cdm <- summariseCodeUse(asthma, cdm = cdm) - - expect_equal(result_achilles %>% - dplyr::filter(standard_concept_id == 317009, - group_name == "By concept", - variable_name == "Record count") %>% - dplyr::pull("estimate"), - result_cdm %>% - dplyr::filter(standard_concept_id == 317009, - group_name == "By concept", - variable_name == "Record count") %>% - dplyr::pull("estimate")) - - expect_equal(result_achilles %>% - dplyr::filter(standard_concept_id == 257581, - group_name == "By concept", - variable_name == "Record count") %>% - dplyr::pull("estimate"), - result_cdm %>% - dplyr::filter(standard_concept_id == 257581, - group_name == "By concept", - variable_name == "Record count") %>% - dplyr::pull("estimate")) - - - cdm$achilles_results <- cdm$condition_occurrence %>% - dplyr::group_by(person_id, condition_concept_id) %>% - dplyr::tally() %>% - dplyr::ungroup() %>% - dplyr::group_by(condition_concept_id) %>% - dplyr::tally(name = "count_value") %>% - dplyr::rename("stratum_1" = "condition_concept_id") %>% - dplyr::mutate(stratum_2 = NA, - stratum_3 = NA, - analysis_id = 400) %>% - CDMConnector::computeQuery() - - asthma <- list(asthma = c(317009, 257581)) - result_achilles <- achillesCodeUse(asthma, - cdm = cdm) - result_cdm <- summariseCodeUse(asthma, cdm = cdm) - - - expect_equal(result_achilles %>% - dplyr::filter(standard_concept_id == 317009, - group_name == "By concept", - variable_name == "Person count") %>% - dplyr::pull("estimate"), - result_cdm %>% - dplyr::filter(standard_concept_id == 317009, - group_name == "By concept", - variable_name == "Person count") %>% - dplyr::pull("estimate")) - - expect_equal(result_achilles %>% - dplyr::filter(standard_concept_id == 257581, - group_name == "By concept", - variable_name == "Person count") %>% - dplyr::pull("estimate"), - result_cdm %>% - dplyr::filter(standard_concept_id == 257581, - group_name == "By concept", - variable_name == "Person count") %>% - dplyr::pull("estimate")) # edge cases # concept id not in achilles - expect_message(achillesCodeUse(list(asthma = 123), + expect_message(result_achilles <- achillesCodeUse(list(asthma = 123), cdm = cdm)) + expect_true(nrow(result_achilles) == 0) # expected errors expect_error(achillesCodeUse(123, #not a named list diff --git a/tests/testthat/test-codesFrom.R b/tests/testthat/test-codesFrom.R index ff4a18e..32fe83c 100644 --- a/tests/testthat/test-codesFrom.R +++ b/tests/testthat/test-codesFrom.R @@ -1,5 +1,5 @@ test_that("test inputs - mock", { - backends <- c("database", "arrow", "data_frame") + backends <- c("database", "data_frame") for (i in seq_along(backends)) { # mock db diff --git a/tests/testthat/test-codesInUse.R b/tests/testthat/test-codesInUse.R new file mode 100644 index 0000000..7d57070 --- /dev/null +++ b/tests/testthat/test-codesInUse.R @@ -0,0 +1,76 @@ +test_that("tests with mock db", { + + # mock db + cdm <- mockVocabRef("database") + + codes <- getCandidateCodes( + cdm = cdm, + keywords = "arthritis", + domains = "Condition", + includeDescendants = FALSE + ) + expect_true(4 %in% + restrictToCodesInUse(list("cs" = codes$concept_id), + cdm = cdm)) + + expect_true(length(restrictToCodesInUse(list("cs1" = codes$concept_id, + "cs2" = 999), + cdm = cdm)) == 1) # will just have cs1 + + # no codes in db + codes <- getCandidateCodes( + cdm = cdm, + keywords = "hip osteoarthritis", + domains = "Condition", + includeDescendants = FALSE + ) + expect_message(restrictToCodesInUse(list("cs" = codes$concept_id), + cdm = cdm)) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + +}) + +test_that("sql server with achilles", { + + testthat::skip_if(Sys.getenv("CDM5_SQL_SERVER_SERVER") == "") + testthat::skip_if(Sys.getenv("SQL_SERVER_DRIVER") == "") + testthat::skip_if(packageVersion("CDMConnector") <= "1.2.0") + + db <- DBI::dbConnect(odbc::odbc(), + Driver = Sys.getenv("SQL_SERVER_DRIVER"), + Server = Sys.getenv("CDM5_SQL_SERVER_SERVER"), + Database = Sys.getenv("CDM5_SQL_SERVER_CDM_DATABASE"), + UID = Sys.getenv("CDM5_SQL_SERVER_USER"), + PWD = Sys.getenv("CDM5_SQL_SERVER_PASSWORD"), + TrustServerCertificate="yes", + Port = Sys.getenv("CDM5_SQL_SERVER_PORT")) + cdm <- CDMConnector::cdm_from_con(db, + cdm_schema = c("CDMV54", "dbo"), + achilles_schema = c("CDMV54", "dbo"), + write_schema = c("ohdsi", "dbo")) + + + + asthma_codes <- getCandidateCodes( + cdm = cdm, + keywords = "asthma", + domains = c("Condition"), + includeDescendants = TRUE + ) + asthma_cl <- list("cs" = asthma_codes$concept_id) + + asthma_codes_present <- restrictToCodesInUse(x = asthma_cl, + cdm = cdm) + +expect_equal(sort(asthma_codes_present[[1]]), + sort(cdm$condition_occurrence %>% + dplyr::filter(.data$condition_concept_id %in% + !!asthma_codes$concept_id) %>% + dplyr::select("condition_concept_id") %>% + dplyr::distinct() %>% + dplyr::pull())) + + + CDMConnector::cdm_disconnect(cdm) +}) diff --git a/tests/testthat/test-dbms.R b/tests/testthat/test-dbms.R new file mode 100644 index 0000000..971e09b --- /dev/null +++ b/tests/testthat/test-dbms.R @@ -0,0 +1,115 @@ +# Testing against different database platforms + + +test_that("achilles code use", { + + testthat::skip_if(Sys.getenv("CDM5_REDSHIFT_DBNAME") == "") + + db <- DBI::dbConnect(RPostgres::Redshift(), + dbname = Sys.getenv("CDM5_REDSHIFT_DBNAME"), + host = Sys.getenv("CDM5_REDSHIFT_HOST"), + port = Sys.getenv("CDM5_REDSHIFT_PORT"), + user = Sys.getenv("CDM5_REDSHIFT_USER"), + password = Sys.getenv("CDM5_REDSHIFT_PASSWORD")) + + cdm <- CDMConnector::cdm_from_con(con = db, + cdm_schema = Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA"), + write_schema = Sys.getenv("CDM5_REDSHIFT_SCRATCH_SCHEMA")) + + cdm$achilles_results <- cdm$condition_occurrence %>% + dplyr::group_by(condition_concept_id) %>% + dplyr::tally(name = "count_value") %>% + dplyr::rename("stratum_1" = "condition_concept_id") %>% + dplyr::mutate(stratum_2 = NA, + stratum_3 = NA, + analysis_id = 401) %>% + CDMConnector::computeQuery() + + asthma <- list(asthma = c(317009, 257581)) + result_achilles <- achillesCodeUse(asthma, + cdm = cdm) + result_cdm <- summariseCodeUse(asthma, cdm = cdm) + + expect_equal(result_achilles %>% + dplyr::filter(standard_concept_id == 317009, + group_name == "By concept", + variable_name == "Record count") %>% + dplyr::pull("estimate"), + result_cdm %>% + dplyr::filter(standard_concept_id == 317009, + group_name == "By concept", + variable_name == "Record count") %>% + dplyr::pull("estimate")) + + expect_equal(result_achilles %>% + dplyr::filter(standard_concept_id == 257581, + group_name == "By concept", + variable_name == "Record count") %>% + dplyr::pull("estimate"), + result_cdm %>% + dplyr::filter(standard_concept_id == 257581, + group_name == "By concept", + variable_name == "Record count") %>% + dplyr::pull("estimate")) + + + cdm$achilles_results <- cdm$condition_occurrence %>% + dplyr::group_by(person_id, condition_concept_id) %>% + dplyr::tally() %>% + dplyr::ungroup() %>% + dplyr::group_by(condition_concept_id) %>% + dplyr::tally(name = "count_value") %>% + dplyr::rename("stratum_1" = "condition_concept_id") %>% + dplyr::mutate(stratum_2 = NA, + stratum_3 = NA, + analysis_id = 400) %>% + CDMConnector::computeQuery() + + asthma <- list(asthma = c(317009, 257581)) + result_achilles <- achillesCodeUse(asthma, + cdm = cdm) + result_cdm <- summariseCodeUse(asthma, cdm = cdm) + + + expect_equal(result_achilles %>% + dplyr::filter(standard_concept_id == 317009, + group_name == "By concept", + variable_name == "Person count") %>% + dplyr::pull("estimate"), + result_cdm %>% + dplyr::filter(standard_concept_id == 317009, + group_name == "By concept", + variable_name == "Person count") %>% + dplyr::pull("estimate")) + + expect_equal(result_achilles %>% + dplyr::filter(standard_concept_id == 257581, + group_name == "By concept", + variable_name == "Person count") %>% + dplyr::pull("estimate"), + result_cdm %>% + dplyr::filter(standard_concept_id == 257581, + group_name == "By concept", + variable_name == "Person count") %>% + dplyr::pull("estimate")) + + # edge cases + # concept id not in achilles + expect_message(achillesCodeUse(list(asthma = 123), + cdm = cdm)) + + # expected errors + expect_error(achillesCodeUse(123, #not a named list + cdm = cdm)) + expect_error(achillesCodeUse(asthma, + cdm = "cdm")) # not a cdm + expect_error(achillesCodeUse(asthma, + cdm = cdm, + countBy = "not an option")) + expect_error(achillesCodeUse(asthma, + cdm = cdm, + minCellCount = "not a number")) + + CDMConnector::cdm_disconnect(cdm) +}) + diff --git a/tests/testthat/test-drugCodes.R b/tests/testthat/test-drugCodes.R index f63ee13..338e6aa 100644 --- a/tests/testthat/test-drugCodes.R +++ b/tests/testthat/test-drugCodes.R @@ -1,6 +1,6 @@ test_that("getATCCodes working", { - backends <- c("database", "arrow", "data_frame") + backends <- c("database", "data_frame") for (i in seq_along(backends)) { cdm <- mockVocabRef(backend = backends[i]) atcCodes <- getATCCodes(cdm, level = "ATC 1st") @@ -28,7 +28,7 @@ test_that("getATCCodes working", { test_that("getATCCodes expected errors", { - backends <- c("database", "arrow", "data_frame") + backends <- c("database", "data_frame") for (i in seq_along(backends)) { cdm <- mockVocabRef(backend = backends[i]) expect_error(getATCCodes(cdm, level = "Not an ATC level")) @@ -44,7 +44,7 @@ test_that("getATCCodes expected errors", { test_that("getDrugIngredientCodes working", { - backends <- c("database", "arrow", "data_frame") + backends <- c("database", "data_frame") for (i in seq_along(backends)) { cdm <- mockVocabRef(backend = backends[i]) ing_codes <- getDrugIngredientCodes(cdm) @@ -106,7 +106,7 @@ test_that("getDrugIngredientCodes working", { test_that("getDrugIngredientCodes expected errors", { - backends <- c("database", "arrow", "data_frame") + backends <- c("database","data_frame") for (i in seq_along(backends)) { cdm <- mockVocabRef(backend = backends[i]) expect_error(getDrugIngredientCodes(cdm, name = "Not an Ingredient")) diff --git a/tests/testthat/test-findOrphanCodes.R b/tests/testthat/test-findOrphanCodes.R new file mode 100644 index 0000000..fc4722c --- /dev/null +++ b/tests/testthat/test-findOrphanCodes.R @@ -0,0 +1,67 @@ +test_that("tests with mock db", { + + # mock db + cdm <- mockVocabRef("database") + + codes <- getCandidateCodes( + cdm = cdm, + keywords = "Musculoskeletal disorder", + domains = "Condition", + includeDescendants = FALSE + ) + + orphan_codes <- findOrphanCodes(x = list("msk" = codes$concept_id), + cdm = cdm, + domains = "Condition", + standardConcept = "Standard", + searchInSynonyms = FALSE, + searchNonStandard = FALSE, + includeDescendants = TRUE, + includeAncestor = FALSE) + + # we should pick up knee osteoarthritis from our achilles tables + expect_true(orphan_codes %>% + dplyr::pull("standard_concept_id") == 4) + expect_true(orphan_codes %>% + dplyr::pull("estimate") == 100) + + orphan_codes <- findOrphanCodes(x = list("msk" = codes$concept_id), + cdm = cdm, + domains = "Condition", + standardConcept = "Standard", + searchInSynonyms = FALSE, + searchNonStandard = FALSE, + includeDescendants = FALSE, + includeAncestor = FALSE) + # we will not find records now we're not looking in descendants + expect_true(nrow(orphan_codes) == 0) + + + # we shouldn't have our original codes + expect_true(nrow(findOrphanCodes(x = list("knee_oa" = 4), + cdm = cdm, + domains = "Condition", + standardConcept = "Standard", + searchInSynonyms = FALSE, + searchNonStandard = FALSE, + includeDescendants = FALSE, + includeAncestor = FALSE)) == 0) + + # min cell count + orphan_codes <- findOrphanCodes(x = list("msk" = codes$concept_id), + cdm = cdm, + domains = "Condition", + standardConcept = "Standard", + searchInSynonyms = FALSE, + searchNonStandard = FALSE, + includeDescendants = TRUE, + includeAncestor = FALSE, + minCellCount = 150) + expect_true(orphan_codes %>% + dplyr::pull("standard_concept_id") == 4) + expect_true(is.na(orphan_codes %>% + dplyr::pull("estimate"))) + + DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) + +}) diff --git a/tests/testthat/test-findUnmappedCodes.R b/tests/testthat/test-findUnmappedCodes.R new file mode 100644 index 0000000..b74a648 --- /dev/null +++ b/tests/testthat/test-findUnmappedCodes.R @@ -0,0 +1,42 @@ +test_that("achilles code use", { + + testthat::skip_if(Sys.getenv("CDM5_REDSHIFT_DBNAME") == "") + + db <- DBI::dbConnect(RPostgres::Redshift(), + dbname = Sys.getenv("CDM5_REDSHIFT_DBNAME"), + host = Sys.getenv("CDM5_REDSHIFT_HOST"), + port = Sys.getenv("CDM5_REDSHIFT_PORT"), + user = Sys.getenv("CDM5_REDSHIFT_USER"), + password = Sys.getenv("CDM5_REDSHIFT_PASSWORD")) + + cdm <- CDMConnector::cdm_from_con(con = db, + cdm_schema = Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA"), + write_schema = Sys.getenv("CDM5_REDSHIFT_SCRATCH_SCHEMA")) + + + x <- "Nonallopathic lesions" + + candidateCodes <- getCandidateCodes( + cdm = cdm, + keywords = "Nonallopathic lesions", + domains = "Condition", + standardConcept = "Non-standard", + searchInSynonyms = FALSE, + searchNonStandard = FALSE, + includeDescendants = FALSE, + includeAncestor = FALSE) + + # source codes used in the database + dbCandidateCodes <- intersect(unmappedSourceCodesInUse(cdm = cdm), + candidateCodes$concept_id) + + + a<- candidateCodes + + a<-cdm$condition_occurrence %>% + dplyr::filter(condition_concept_id == 0) %>% + dplyr::collect() + + + CDMConnector::cdm_disconnect(cdm) +}) diff --git a/tests/testthat/test-getCandidateCodes.R b/tests/testthat/test-getCandidateCodes.R index f16acaf..98639b8 100644 --- a/tests/testthat/test-getCandidateCodes.R +++ b/tests/testthat/test-getCandidateCodes.R @@ -1,5 +1,6 @@ test_that("tests with mock db", { - backends <- c("database", "arrow", "data_frame") + backends <- c("database", + "data_frame") for (i in seq_along(backends)) { # mock db @@ -31,9 +32,13 @@ test_that("tests with mock db", { domains = "Condition", includeDescendants = FALSE ) - expect_true((nrow(codes) == 2 & - codes$concept_name[1] == "Osteoarthritis of knee" & - codes$concept_name[2] == "Osteoarthritis of hip")) + expect_true(nrow(codes) == 2) + expect_true("Osteoarthritis of knee" %in% + (codes %>% + dplyr::pull("concept_name"))) + expect_true("Osteoarthritis of hip" %in% + (codes %>% + dplyr::pull("concept_name"))) # test include descendants codes <- getCandidateCodes( @@ -256,7 +261,8 @@ test_that("tests with mock db", { }) test_that("tests with mock db - multiple domains", { - backends <- c("database") + backends <- c("database", + "data_frame") for (i in seq_along(backends)) { # mock db @@ -276,12 +282,13 @@ test_that("tests with mock db - multiple domains", { codes <- getCandidateCodes( cdm = cdm, - keywords = "H/O osteoarthritis", + keywords = "h o osteoarthritis", domains = c("Condition", "Observation"), includeDescendants = FALSE ) - expect_true(all(nrow(codes) == 1 & - codes$concept_id == 9)) + expect_true(all( + nrow(codes) == 3 & + c(4,5,9) %in% codes$concept_id)) if (backends[[i]] == "database") { DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE) diff --git a/tests/testthat/test-getICD10StandardCodes.R b/tests/testthat/test-getICD10StandardCodes.R index 2392f4a..d2881c6 100644 --- a/tests/testthat/test-getICD10StandardCodes.R +++ b/tests/testthat/test-getICD10StandardCodes.R @@ -1,5 +1,5 @@ test_that("db without icd10 codes loaded", { - backends <- c("database", "arrow", "data_frame") + backends <- c("database", "data_frame") for (i in seq_along(backends)) { cdm <- mockVocabRef(backend = backends[i]) codes <- getICD10StandardCodes(cdm = cdm, diff --git a/tests/testthat/test-mockVocabRef.R b/tests/testthat/test-mockVocabRef.R index 80497a1..93ce18f 100644 --- a/tests/testthat/test-mockVocabRef.R +++ b/tests/testthat/test-mockVocabRef.R @@ -1,18 +1,13 @@ test_that("mock vocab db", { cdmDb <- mockVocabRef("database") cdmDF <- mockVocabRef("data_frame") - cdmArrow <- mockVocabRef("arrow") conceptFromDb <- cdmDb$concept %>% dplyr::collect() conceptFromDf <- cdmDF$concept %>% dplyr::collect() - conceptFromArrow <- cdmArrow$concept %>% dplyr::collect() expect_equal(conceptFromDb, conceptFromDf, ignore_attr = TRUE) - expect_equal(conceptFromDb, - conceptFromArrow, - ignore_attr = TRUE) DBI::dbDisconnect(attr(cdmDb, "dbcon"), shutdown = TRUE) }) diff --git a/tests/testthat/test-vocabUtilities.R b/tests/testthat/test-vocabUtilities.R index 7a3a42f..b18d7e7 100644 --- a/tests/testthat/test-vocabUtilities.R +++ b/tests/testthat/test-vocabUtilities.R @@ -1,5 +1,5 @@ test_that("tests with mock db", { - backends <- c("database", "arrow", "data_frame") + backends <- c("database", "data_frame") for (i in seq_along(backends)) { # mock db diff --git a/vignettes/a01_Introduction_to_CodelistGenerator.Rmd b/vignettes/a01_Introduction_to_CodelistGenerator.Rmd index 797cd04..7ccd525 100644 --- a/vignettes/a01_Introduction_to_CodelistGenerator.Rmd +++ b/vignettes/a01_Introduction_to_CodelistGenerator.Rmd @@ -20,14 +20,8 @@ knitr::opts_chunk$set( For this example we are going to generate a candidate codelist for dementia, only looking for codes in the condition domain. Let's first load some libraries ```{r, message=FALSE, warning=FALSE,echo=FALSE} -library(here) -library(readr) library(DBI) -library(here) library(dplyr) -library(stringr) -library(DT) -library(kableExtra) library(CodelistGenerator) library(CDMConnector) ``` @@ -60,7 +54,8 @@ getVocabVersion(cdm = cdm) ``` ```{r, message=FALSE, warning=FALSE,echo=FALSE} -vocabVersion <- load(here("vignettes", "introVocab.RData")) +vocabVersion <- load(system.file("introVocab.RData", + package = "CodelistGenerator")) vocabVersion ``` @@ -93,17 +88,13 @@ codesFromDescendants <- tbl( ``` ```{r, message=FALSE, warning=FALSE,echo=FALSE} -codesFromDescendants <- readRDS(here("vignettes", "introData01.RData")) +codesFromDescendants <- readRDS(system.file("introData01.RData", + package = "CodelistGenerator")) ``` ```{r, message=FALSE, warning=FALSE } -datatable(codesFromDescendants, - rownames = FALSE, - options = list( - pageLength = 10, - lengthMenu = c(10, 20, 50) - ) -) +codesFromDescendants %>% + glimpse() ``` This looks to pick up most relevant codes. But, this approach misses codes that are not a descendant of 4182210. For example, codes such as "Wandering due to dementia" (37312577; https://athena.ohdsi.org/search-terms/terms/37312577) and "Anxiety due to dementia" (37312031; https://athena.ohdsi.org/search-terms/terms/37312031) are not picked up. @@ -122,17 +113,13 @@ dementiaCodes1 <- getCandidateCodes( ``` ```{r, message=FALSE, warning=FALSE,echo=FALSE} -dementiaCodes1 <- readRDS(here("vignettes", "introData02.RData")) +dementiaCodes1 <- readRDS(system.file("introData02.RData", + package = "CodelistGenerator")) ``` ```{r, message=FALSE, warning=FALSE } -datatable(dementiaCodes1, - rownames = FALSE, - options = list( - pageLength = 10, - lengthMenu = c(10, 20, 50) - ) -) +dementiaCodes1%>% + glimpse() ``` @@ -146,33 +133,29 @@ codeComparison <- compareCodelists( ``` ```{r, message=FALSE, warning=FALSE,echo=FALSE} -codeComparison <- readRDS(here("vignettes", "introData03.RData")) +codeComparison <- readRDS(system.file("introData03.RData", + package = "CodelistGenerator")) ``` ```{r, message=FALSE, warning=FALSE } -kable(codeComparison %>% +codeComparison %>% group_by(codelist) %>% - tally()) + tally() ``` What are these extra codes picked up by CodelistGenerator? ```{r, message=FALSE, warning=FALSE } -datatable( - codeComparison %>% - filter(codelist == "Only codelist 2"), - rownames = FALSE, - options = list( - pageLength = 10, - lengthMenu = c(10, 20, 50) - ) -) +codeComparison %>% + filter(codelist == "Only codelist 2") %>% + glimpse() ``` ## Review mappings from non-standard vocabularies Perhaps we want to see what ICD10CM codes map to our candidate code list. We can get these by running ```{r, message=FALSE, warning=FALSE,echo=FALSE} -icdMappings <- readRDS(here("vignettes", "introData04.RData")) +icdMappings <- readRDS(system.file("introData04.RData", + package = "CodelistGenerator")) ``` ```{r, eval=FALSE } @@ -184,18 +167,14 @@ icdMappings <- getMappings( ``` ```{r, message=FALSE, warning=FALSE } -datatable(icdMappings, - rownames = FALSE, - options = list( - pageLength = 10, - lengthMenu = c(10, 20, 50) - ) -) +icdMappings %>% + glimpse() ``` ```{r, message=FALSE, warning=FALSE,echo=FALSE} -readMappings <- readRDS(here("vignettes", "introData05.RData")) +readMappings <- readRDS(system.file("introData05.RData", + package = "CodelistGenerator")) ``` ```{r, eval=FALSE } @@ -207,11 +186,6 @@ readMappings <- getMappings( ``` ```{r, message=FALSE, warning=FALSE } -datatable(readMappings, - rownames = FALSE, - options = list( - pageLength = 10, - lengthMenu = c(10, 20, 50) - ) -) +readMappings %>% + glimpse() ``` diff --git a/vignettes/a02_Candidate_codes_OA.Rmd b/vignettes/a02_Candidate_codes_OA.Rmd index 1da1626..28a1db2 100644 --- a/vignettes/a02_Candidate_codes_OA.Rmd +++ b/vignettes/a02_Candidate_codes_OA.Rmd @@ -17,15 +17,8 @@ knitr::opts_chunk$set( ``` ```{r, message=FALSE, warning=FALSE,echo=FALSE} -library(here) -library(readr) library(DBI) -library(here) library(dplyr) -library(dbplyr) -library(stringr) -library(DT) -library(kableExtra) library(CodelistGenerator) library(CDMConnector) ``` @@ -69,7 +62,8 @@ cdm <- CDMConnector::cdm_from_con( ## Search strategies ### Condition domain, without searching synonyms, with exclusions, without including descendants or ancestor ```{r, message=FALSE, warning=FALSE,echo=FALSE} -oaCodes1 <- readRDS(here("vignettes", "optionsData01.RData")) +oaCodes1 <- readRDS(system.file("optionsData01.RData", + package = "CodelistGenerator")) ``` To start we will search for "osteoarthritis", while excluding "post-infection" and "post-traumatic", but without searching synonyms, without searching via non-standard codes, and without including descendants or the direct ancestor of the included concepts. @@ -92,19 +86,15 @@ oaCodes1 <- getCandidateCodes( What is the candidate codelist? ```{r, message=FALSE, warning=FALSE } -datatable(oaCodes1, - rownames = FALSE, - options = list( - pageLength = 10, - lengthMenu = c(10, 20, 250) - ) -) +oaCodes1 %>% + glimpse() ``` ### Including descendants ```{r, message=FALSE, warning=FALSE,echo=FALSE} -oaCodes2 <- readRDS(here("vignettes", "optionsData02.RData")) +oaCodes2 <- readRDS(system.file("optionsData02.RData", + package = "CodelistGenerator")) ``` Now we will also include the descendants of included concepts. @@ -130,18 +120,14 @@ newCodes1To2 <- compareCodelists(oaCodes1, oaCodes2) %>% filter(codelist == "Only codelist 2") %>% select(-"codelist") -datatable(newCodes1To2, - rownames = FALSE, - options = list( - pageLength = 10, - lengthMenu = c(10, 20, 50) - ) -) +newCodes1To2 %>% + glimpse() ``` ### Including observation domain ```{r, message=FALSE, warning=FALSE,echo=FALSE} -oaCodes3 <- readRDS(here("vignettes", "optionsData03.RData")) +oaCodes3 <- readRDS(system.file("optionsData03.RData", + package = "CodelistGenerator")) ``` Now we will search the observation domain as well as the condition domain. @@ -167,18 +153,14 @@ newCodes1To3 <- compareCodelists(oaCodes1, oaCodes3) %>% filter(codelist == "Only codelist 2") %>% select(-"codelist") -datatable(newCodes1To3, - rownames = FALSE, - options = list( - pageLength = 10, - lengthMenu = c(10, 20, 50) - ) -) +newCodes1To3 %>% + glimpse() ``` ### Search synonyms ```{r, message=FALSE, warning=FALSE,echo=FALSE} -oaCodes4 <- readRDS(here("vignettes", "optionsData04.RData")) +oaCodes4 <- readRDS(system.file("optionsData04.RData", + package = "CodelistGenerator")) ``` Now we will search the concept synonym table to identify concepts to include. @@ -204,18 +186,14 @@ newCodes1To4 <- compareCodelists(oaCodes1, oaCodes4) %>% filter(codelist == "Only codelist 2") %>% select(-"codelist") -datatable(newCodes1To4, - rownames = FALSE, - options = list( - pageLength = 10, - lengthMenu = c(10, 20, 50) - ) -) +newCodes1To4 %>% + glimpse() ``` ### Search via non-standard ```{r, message=FALSE, warning=FALSE,echo=FALSE} -oaCodes5 <- readRDS(here("vignettes", "optionsData04.RData")) +oaCodes5 <- readRDS(system.file("optionsData05.RData", + package = "CodelistGenerator")) ``` Now we will search the concept synonym table to identify concepts to include. @@ -241,18 +219,14 @@ newCodes1To5 <- compareCodelists(oaCodes1, oaCodes5) %>% filter(codelist == "Only codelist 2") %>% select(-"codelist") -datatable(newCodes1To5, - rownames = FALSE, - options = list( - pageLength = 10, - lengthMenu = c(10, 20, 50) - ) -) +newCodes1To5 %>% + glimpse() ``` ### Include ancestor ```{r, message=FALSE, warning=FALSE,echo=FALSE} -oaCodes8 <- readRDS(here("vignettes", "optionsData07.RData")) +oaCodes8 <- readRDS(system.file("optionsData07.RData", + package = "CodelistGenerator")) ``` Now we include the direct ancestor of included terms. @@ -278,11 +252,6 @@ newCodes1To8 <- compareCodelists(oaCodes1, oaCodes8) %>% filter(codelist == "Only codelist 2") %>% select(-"codelist") -datatable(newCodes1To8, - rownames = FALSE, - options = list( - pageLength = 10, - lengthMenu = c(10, 20, 50) - ) -) +newCodes1To8 %>% + glimpse() ``` diff --git a/vignettes/a03_Options_for_CodelistGenerator.Rmd b/vignettes/a03_Options_for_CodelistGenerator.Rmd index 5c63617..bad5981 100644 --- a/vignettes/a03_Options_for_CodelistGenerator.Rmd +++ b/vignettes/a03_Options_for_CodelistGenerator.Rmd @@ -20,9 +20,6 @@ knitr::opts_chunk$set( ```{r, message=FALSE, warning=FALSE,echo=FALSE} library(DBI) library(dplyr) -library(dbplyr) -library(here) -library(kableExtra) library(CodelistGenerator) library(CDMConnector) ``` @@ -51,7 +48,8 @@ codes <- getCandidateCodes( includeDescendants = FALSE, ) -kable(codes) +codes %>% + glimpse() ``` Note, we would also identify it based on a partial match @@ -63,7 +61,8 @@ codes <- getCandidateCodes( includeDescendants = FALSE ) -kable(codes) +codes %>% + glimpse() ``` ## Add descendants @@ -73,12 +72,13 @@ knitr::include_graphics("mock_db_fig7.png") To include descendants of an identified code, we can set includeDescendants to TRUE ```{r} -kable(getCandidateCodes( +getCandidateCodes( cdm = cdm, keywords = "Musculoskeletal disorder", domains = "Condition", includeDescendants = TRUE -)) +) %>% + glimpse() ``` ## Multiple search terms @@ -95,7 +95,8 @@ codes <- getCandidateCodes( includeDescendants = FALSE ) -kable(codes) +codes %>% + glimpse() ``` ## Add ancestor @@ -113,7 +114,8 @@ codes <- getCandidateCodes( domains = "Condition" ) -kable(codes) +codes %>% + glimpse() ``` ## Searches with multiple words @@ -131,7 +133,8 @@ codes <- getCandidateCodes( includeDescendants = TRUE ) -kable(codes) +codes %>% + glimpse() ``` @@ -150,7 +153,8 @@ codes <- getCandidateCodes( domains = "Condition" ) -kable(codes) +codes %>% + glimpse() ``` ## Search using synonyms @@ -168,7 +172,8 @@ codes <- getCandidateCodes( searchInSynonyms = TRUE ) -kable(codes) +codes %>% + glimpse() ``` ## Search via non-standard @@ -182,7 +187,8 @@ codes <- getCandidateCodes( searchNonStandard = TRUE ) -kable(codes) +codes %>% + glimpse() ``` ## Search for both standard and non-standard concepts @@ -206,7 +212,8 @@ codes <- getCandidateCodes( standardConcept = c("Standard", "Non-standard") ) -kable(codes) +codes %>% + glimpse() ``` ```{r,echo=FALSE} diff --git a/vignettes/a04_codelists_for_medications.Rmd b/vignettes/a04_codelists_for_medications.Rmd index e9f32e9..a36f12f 100644 --- a/vignettes/a04_codelists_for_medications.Rmd +++ b/vignettes/a04_codelists_for_medications.Rmd @@ -15,15 +15,8 @@ knitr::opts_chunk$set( ``` ```{r, message=FALSE, warning=FALSE,echo=FALSE} -library(here) -library(readr) library(DBI) -library(here) library(dplyr) -library(dbplyr) -library(stringr) -library(DT) -library(kableExtra) library(CodelistGenerator) ``` @@ -57,9 +50,6 @@ First we can follow the approach of identifying high-level codes and include all ```{r, eval=FALSE} library(dplyr) library(CodelistGenerator) -library(stringr) -library(DT) -library(kableExtra) ``` ```{r, eval=FALSE} @@ -73,7 +63,8 @@ acetaminophen1 <- getCandidateCodes( ``` ```{r, message=FALSE, warning=FALSE, echo=FALSE} -acetaminophen1 <- readRDS(here("vignettes", "medData01.RData")) +acetaminophen1 <- readRDS(system.file("medData01.RData", + package = "CodelistGenerator")) ``` ```{r, message=FALSE, warning=FALSE } diff --git a/vignettes/introData02.RData b/vignettes/introData02.RData deleted file mode 100644 index c8e7e28..0000000 Binary files a/vignettes/introData02.RData and /dev/null differ diff --git a/vignettes/introData03.RData b/vignettes/introData03.RData deleted file mode 100644 index e5accdd..0000000 Binary files a/vignettes/introData03.RData and /dev/null differ diff --git a/vignettes/medData01.RData b/vignettes/medData01.RData deleted file mode 100644 index 00279f3..0000000 Binary files a/vignettes/medData01.RData and /dev/null differ diff --git a/vignettes/medData02a.RData b/vignettes/medData02a.RData deleted file mode 100644 index e1c4b0d..0000000 Binary files a/vignettes/medData02a.RData and /dev/null differ diff --git a/vignettes/medData02b.RData b/vignettes/medData02b.RData deleted file mode 100644 index 1da6877..0000000 Binary files a/vignettes/medData02b.RData and /dev/null differ diff --git a/vignettes/medData03.RData b/vignettes/medData03.RData deleted file mode 100644 index b96ef70..0000000 Binary files a/vignettes/medData03.RData and /dev/null differ diff --git a/vignettes/optionsData01.RData b/vignettes/optionsData01.RData deleted file mode 100644 index 4f0b144..0000000 Binary files a/vignettes/optionsData01.RData and /dev/null differ diff --git a/vignettes/optionsData02.RData b/vignettes/optionsData02.RData deleted file mode 100644 index 2977e0d..0000000 Binary files a/vignettes/optionsData02.RData and /dev/null differ diff --git a/vignettes/optionsData03.RData b/vignettes/optionsData03.RData deleted file mode 100644 index 9b5e3b8..0000000 Binary files a/vignettes/optionsData03.RData and /dev/null differ diff --git a/vignettes/optionsData04.RData b/vignettes/optionsData04.RData deleted file mode 100644 index 8975123..0000000 Binary files a/vignettes/optionsData04.RData and /dev/null differ diff --git a/vignettes/optionsData05.RData b/vignettes/optionsData05.RData deleted file mode 100644 index 6f66f11..0000000 Binary files a/vignettes/optionsData05.RData and /dev/null differ diff --git a/vignettes/optionsData06.RData b/vignettes/optionsData06.RData deleted file mode 100644 index 3e3e9d9..0000000 Binary files a/vignettes/optionsData06.RData and /dev/null differ diff --git a/vignettes/optionsData07.RData b/vignettes/optionsData07.RData deleted file mode 100644 index 3c675ee..0000000 Binary files a/vignettes/optionsData07.RData and /dev/null differ