darwin-eu · edward-burn · Jan 24, 2024 · Dec 29, 2023 · Dec 30, 2023 · Dec 31, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: CodelistGenerator
-Title: Generate Code Lists for the OMOP Common Data Model
-Version: 2.1.2
+Title: Identify Relevant Clinical Codes and Evaluate Their Use
+Version: 2.2.0
 Authors@R: c(
     person("Edward", "Burn", email = "[email protected]", 
     role = c("aut", "cre"),
@@ -13,10 +13,13 @@ License: Apache License (>= 2)
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
+Depends:
+    R (>= 3.5.0)
 Imports:
     CDMConnector (>= 1.1.2),
     checkmate (>= 2.0.0),
     DBI (>= 1.1.0),
+    duckdb,
     dplyr (>= 1.1.0),
     magrittr (>= 2.0.0),
     rlang (>= 1.0.0),
@@ -28,24 +31,20 @@ Imports:
     purrr,
     lubridate,
     PatientProfiles (>= 0.3.0),
-    RJSONIO
+    RJSONIO,
+    vctrs
 Suggests: 
     covr,
-    dbplyr (>= 2.2.1),
     knitr,
-    readr (>= 2.1.0),
-    duckdb,
-    DT,
     rmarkdown,
-    here (>= 1.0.0),
     testthat (>= 3.0.0),
-    kableExtra (>= 1.0.0),
     RPostgres,
     odbc,
     spelling,
     tibble
 Config/testthat/edition: 3
+Config/testthat/parallel: true
 VignetteBuilder: knitr
 URL: https://darwin-eu.github.io/CodelistGenerator/
 Language: en-US
-Config/testthat/parallel: true
+LazyData: true
diff --git a/NAMESPACE b/NAMESPACE
@@ -4,7 +4,9 @@ export("%>%")
 export(achillesCodeUse)
 export(codesFromCohort)
 export(codesFromConceptSet)
+export(codesInUse)
 export(compareCodelists)
+export(findOrphanCodes)
 export(getATCCodes)
 export(getCandidateCodes)
 export(getConceptClassId)
@@ -17,6 +19,8 @@ export(getMappings)
 export(getVocabVersion)
 export(getVocabularies)
 export(mockVocabRef)
+export(restrictToCodesInUse)
+export(sourceCodesInUse)
 export(summariseCodeUse)
 export(summariseCohortCodeUse)
 importFrom(magrittr,"%>%")

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,11 @@
+# CodelistGenerator 2.2.0
+* Added functions findOrphanCodes, restrictToCodesInUse, sourceCodesInUse.
+* Speed improvements in getCandidateCodes from doing search in place (e.g. on database side).
+* Dropped explicit support of an Arrow cdm.
+
+# CodelistGenerator 2.1.1
+* Improved support of device domain.
+
 # CodelistGenerator 2.0.0
 * Simplified the interface of getCandidateCodes, with a number of arguments removed.
 * Added function summariseCohortCodeUse.

diff --git a/R/achillesCodeUse.R b/R/achillesCodeUse.R
@@ -12,12 +12,12 @@
 #' @export
 #'
 #' @examples
 achillesCodeUse <- function(x,
                            cdm,
                            countBy = c("record", "person"),
                            minCellCount = 5) {

  errorMessage <- checkmate::makeAssertCollection()
  checkDbType(cdm = cdm, type = "cdm_reference", messageStore = errorMessage)
  checkmate::assertTRUE(all(countBy %in% c("record", "person")),
                        add = errorMessage)
@@ -26,11 +26,11 @@
  checkmate::reportAssertions(collection = errorMessage)

  checkmate::assertList(x)
  if(length(names(x)) != length(x)){
    cli::cli_abort("Must be a named list")
  }

  if(is.null(cdm[["achilles_results"]])){
    cli::cli_abort("No achilles tables found in cdm reference")
  }

@@ -47,6 +47,7 @@
 
   if("record" %in% countBy){
   allRecordCount <- getAchillesRecordCounts(cdm = cdm, conceptId = allCodes)
+  if(nrow(allRecordCount)>=1){
   allRecordCount <- allRecordCount %>%
     dplyr::mutate(concept_id = as.character(.data$concept_id)) %>%
     dplyr::left_join(codesWithDetails %>%
@@ -65,12 +66,13 @@
                     standard_concept_id = .data$concept_id
                     ) %>%
       dplyr::mutate(codelist_name = names(x)[i])
-
+    }
   }
   }
 
   if("person" %in% countBy){
   allPersonCount <- getAchillesPersonCounts(cdm = cdm, conceptId = allCodes)
+  if(nrow(allPersonCount)>=1){
   allPersonCount <- allPersonCount %>%
     dplyr::mutate(concept_id = as.character(.data$concept_id)) %>%
     dplyr::left_join(codesWithDetails %>%
@@ -91,7 +93,15 @@
       dplyr::mutate(codelist_name = names(x)[i])
   }
   }
+  }
 
+  if(length(codeUse) == 0){
+    cli::cli_inform(
+      c(
+        "i" = "No achilles counts found for the concepts provided."
+      ))
+    return(dplyr::tibble())
+  } else {
   codeUse <- dplyr::bind_rows(codeUse) %>%
     dplyr::mutate(group_name = "By concept",
            strata_name = "Overall",
@@ -113,12 +123,13 @@
                   "source_concept_id",
                   "domain_id", "codelist_name","cohort_name")
 
+    codeUse <- codeUse %>%
+      dplyr::mutate(estimate = dplyr::if_else(.data$estimate < .env$minCellCount &
+                                                .data$estimate > 0,
+                                              NA, .data$estimate)) %>%
+      dplyr::mutate(standard_concept_id = as.integer(.data$standard_concept_id),
+                    source_concept_id = as.integer(.data$source_concept_id))
 
-  if(nrow(codeUse) == 0){
-    cli::cli_inform(
-      c(
-        "i" = "No achilles counts found for the concepts provided."
-      ))
   }
 
   return(codeUse)

diff --git a/R/codesFromConceptSet.R b/R/codesFromConceptSet.R
@@ -253,18 +253,25 @@ tibbleToList <- function(codelistTibble) {
 
 addDetails <- function(conceptList, cdm){
 
+  # will accept either a list or tibble
+  # will return the same type as the input
+  inputIsTbl <- inherits(conceptList, "tbl_df")
+
+  if(isFALSE(inputIsTbl)){
   for(i in seq_along(conceptList)){
     conceptList[[i]] <- dplyr::tibble(concept_id = conceptList[[i]],
                                       concept_set = names(conceptList)[i])
   }
+    conceptList <- dplyr::bind_rows(conceptList)
+    }
 
-  conceptList <- dplyr::bind_rows(conceptList) %>%
-    dplyr::left_join(cdm[["concept"]] %>%
+  conceptList <- conceptList %>%
+     dplyr::left_join(cdm[["concept"]] %>%
       dplyr::select("concept_id", "concept_name",
                 "domain_id", "vocabulary_id",
                 "standard_concept"),
                      by = "concept_id",
-                     copy = TRUE)%>%
+                     copy = TRUE) %>%
     dplyr::mutate(
       standard_concept = ifelse(is.na(.data$standard_concept),
                                 "non-standard", .data$standard_concept
@@ -281,10 +288,11 @@ addDetails <- function(conceptList, cdm){
       )
     )
 
+  if(isFALSE(inputIsTbl)){
    conceptList <- split(
     x = conceptList %>% dplyr::select(!"concept_set"),
     f = as.factor(conceptList$concept_set)
-  )
+  )}
 
    return(conceptList)