diff --git a/DESCRIPTION b/DESCRIPTION
index 8eb712e..b517584 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: CodelistGenerator
-Title: Generate Code Lists for the OMOP Common Data Model
-Version: 2.1.2
+Title: Identify Relevant Clinical Codes and Evaluate Their Use
+Version: 2.2.0
 Authors@R: c(
     person("Edward", "Burn", email = "edward.burn@ndorms.ox.ac.uk", 
     role = c("aut", "cre"),
@@ -13,10 +13,13 @@ License: Apache License (>= 2)
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
+Depends:
+    R (>= 3.5.0)
 Imports:
     CDMConnector (>= 1.1.2),
     checkmate (>= 2.0.0),
     DBI (>= 1.1.0),
+    duckdb,
     dplyr (>= 1.1.0),
     magrittr (>= 2.0.0),
     rlang (>= 1.0.0),
@@ -28,24 +31,20 @@ Imports:
     purrr,
     lubridate,
     PatientProfiles (>= 0.3.0),
-    RJSONIO
+    RJSONIO,
+    vctrs
 Suggests: 
     covr,
-    dbplyr (>= 2.2.1),
     knitr,
-    readr (>= 2.1.0),
-    duckdb,
-    DT,
     rmarkdown,
-    here (>= 1.0.0),
     testthat (>= 3.0.0),
-    kableExtra (>= 1.0.0),
     RPostgres,
     odbc,
     spelling,
     tibble
 Config/testthat/edition: 3
+Config/testthat/parallel: true
 VignetteBuilder: knitr
 URL: https://darwin-eu.github.io/CodelistGenerator/
 Language: en-US
-Config/testthat/parallel: true
+LazyData: true
diff --git a/NAMESPACE b/NAMESPACE
index df86e87..4bf9b39 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,7 +4,9 @@ export("%>%")
 export(achillesCodeUse)
 export(codesFromCohort)
 export(codesFromConceptSet)
+export(codesInUse)
 export(compareCodelists)
+export(findOrphanCodes)
 export(getATCCodes)
 export(getCandidateCodes)
 export(getConceptClassId)
@@ -17,6 +19,8 @@ export(getMappings)
 export(getVocabVersion)
 export(getVocabularies)
 export(mockVocabRef)
+export(restrictToCodesInUse)
+export(sourceCodesInUse)
 export(summariseCodeUse)
 export(summariseCohortCodeUse)
 importFrom(magrittr,"%>%")
diff --git a/NEWS.md b/NEWS.md
index 6fbfc81..9247be1 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,11 @@
+# CodelistGenerator 2.2.0
+* Added functions findOrphanCodes, restrictToCodesInUse, sourceCodesInUse.
+* Speed improvements in getCandidateCodes from doing search in place (e.g. on database side).
+* Dropped explicit support of an Arrow cdm.
+
+# CodelistGenerator 2.1.1
+* Improved support of device domain.
+
 # CodelistGenerator 2.0.0
 * Simplified the interface of getCandidateCodes, with a number of arguments removed.
 * Added function summariseCohortCodeUse.
diff --git a/R/achillesCodeUse.R b/R/achillesCodeUse.R
index c7247b9..4f4ccdf 100644
--- a/R/achillesCodeUse.R
+++ b/R/achillesCodeUse.R
@@ -47,6 +47,7 @@ achillesCodeUse <- function(x,
 
   if("record" %in% countBy){
   allRecordCount <- getAchillesRecordCounts(cdm = cdm, conceptId = allCodes)
+  if(nrow(allRecordCount)>=1){
   allRecordCount <- allRecordCount %>%
     dplyr::mutate(concept_id = as.character(.data$concept_id)) %>%
     dplyr::left_join(codesWithDetails %>%
@@ -65,12 +66,13 @@ achillesCodeUse <- function(x,
                     standard_concept_id = .data$concept_id
                     ) %>%
       dplyr::mutate(codelist_name = names(x)[i])
-
+    }
   }
   }
 
   if("person" %in% countBy){
   allPersonCount <- getAchillesPersonCounts(cdm = cdm, conceptId = allCodes)
+  if(nrow(allPersonCount)>=1){
   allPersonCount <- allPersonCount %>%
     dplyr::mutate(concept_id = as.character(.data$concept_id)) %>%
     dplyr::left_join(codesWithDetails %>%
@@ -91,7 +93,15 @@ achillesCodeUse <- function(x,
       dplyr::mutate(codelist_name = names(x)[i])
   }
   }
+  }
 
+  if(length(codeUse) == 0){
+    cli::cli_inform(
+      c(
+        "i" = "No achilles counts found for the concepts provided."
+      ))
+    return(dplyr::tibble())
+  } else {
   codeUse <- dplyr::bind_rows(codeUse) %>%
     dplyr::mutate(group_name = "By concept",
            strata_name = "Overall",
@@ -113,12 +123,13 @@ achillesCodeUse <- function(x,
                   "source_concept_id",
                   "domain_id", "codelist_name","cohort_name")
 
+    codeUse <- codeUse %>%
+      dplyr::mutate(estimate = dplyr::if_else(.data$estimate < .env$minCellCount &
+                                                .data$estimate > 0,
+                                              NA, .data$estimate)) %>%
+      dplyr::mutate(standard_concept_id = as.integer(.data$standard_concept_id),
+                    source_concept_id = as.integer(.data$source_concept_id))
 
-  if(nrow(codeUse) == 0){
-    cli::cli_inform(
-      c(
-        "i" = "No achilles counts found for the concepts provided."
-      ))
   }
 
   return(codeUse)
diff --git a/R/codesFromConceptSet.R b/R/codesFromConceptSet.R
index 513e015..5b7b728 100644
--- a/R/codesFromConceptSet.R
+++ b/R/codesFromConceptSet.R
@@ -253,18 +253,25 @@ tibbleToList <- function(codelistTibble) {
 
 addDetails <- function(conceptList, cdm){
 
+  # will accept either a list or tibble
+  # will return the same type as the input
+  inputIsTbl <- inherits(conceptList, "tbl_df")
+
+  if(isFALSE(inputIsTbl)){
   for(i in seq_along(conceptList)){
     conceptList[[i]] <- dplyr::tibble(concept_id = conceptList[[i]],
                                       concept_set = names(conceptList)[i])
   }
+    conceptList <- dplyr::bind_rows(conceptList)
+    }
 
-  conceptList <- dplyr::bind_rows(conceptList) %>%
-    dplyr::left_join(cdm[["concept"]] %>%
+  conceptList <- conceptList %>%
+     dplyr::left_join(cdm[["concept"]] %>%
       dplyr::select("concept_id", "concept_name",
                 "domain_id", "vocabulary_id",
                 "standard_concept"),
                      by = "concept_id",
-                     copy = TRUE)%>%
+                     copy = TRUE) %>%
     dplyr::mutate(
       standard_concept = ifelse(is.na(.data$standard_concept),
                                 "non-standard", .data$standard_concept
@@ -281,10 +288,11 @@ addDetails <- function(conceptList, cdm){
       )
     )
 
+  if(isFALSE(inputIsTbl)){
    conceptList <- split(
     x = conceptList %>% dplyr::select(!"concept_set"),
     f = as.factor(conceptList$concept_set)
-  )
+  )}
 
    return(conceptList)
 
diff --git a/R/codesInUse.R b/R/codesInUse.R
new file mode 100644
index 0000000..5589fa5
--- /dev/null
+++ b/R/codesInUse.R
@@ -0,0 +1,227 @@
+#' Filter a codelist to keep only the codes used in the database
+#'
+#' @param x A codelist
+#' @param cdm cdm_reference via CDMConnector
+#' @param table cdm table
+#'
+#' @return
+#' @export
+#'
+#' @examples
+restrictToCodesInUse <- function(x,
+                               cdm,
+                               table = c("condition_occurrence",
+                                         "device_exposure",
+                                         "drug_exposure",
+                                         "measurement",
+                                         "observation",
+                                         "procedure_occurrence",
+                                         "visit_occurrence")){
+
+dbCodes <- codesInUse(cdm = cdm,
+                      table = table)
+
+if(is.null(dbCodes)){
+  for(i in seq_along(x)){
+    cli::cli_inform("No codes from any codelist found in the database")
+    return(invisible(NULL))
+  }
+} else {
+  for(i in seq_along(x)){
+    x[[i]] <- intersect(x[[i]], dbCodes)
+    if(!length(x[[i]]) >= 1){
+    cli::cli_inform("No codes from codelist {names(x)[i]} found in the database")
+    }
+  }
+}
+
+x <- vctrs::list_drop_empty(x)
+
+if(length(x) == 0){
+  return(invisible(NULL))
+}
+
+x
+
+}
+
+#' Get codes used in the database
+#'
+#' @param cdm cdm_reference via CDMConnector
+#' @param table cdm table
+#'
+#' @return
+#' @export
+#'
+#' @examples
+codesInUse <- function(cdm,
+                       table = c("condition_occurrence",
+                                 "device_exposure",
+                                 "drug_exposure",
+                                 "measurement",
+                                 "observation",
+                                 "procedure_occurrence",
+                                 "visit_occurrence")){
+
+if(!is.null(cdm[["achilles_results"]])){
+  codes <- fetchAchillesCodesInUse(cdm)
+} else {
+  # if achilles not available, query cdm
+  codes <- list()
+  for(i in seq_along(table)){
+  workingTable <- table[i]
+  workingConcept <- dplyr::case_when(
+    workingTable == "condition_occurrence" ~ "condition_concept_id",
+    workingTable == "device_exposure" ~ "device_concept_id",
+    workingTable == "drug_exposure" ~ "drug_concept_id",
+    workingTable == "measurement" ~ "measurement_concept_id",
+    workingTable == "observation" ~ "observation_concept_id",
+    workingTable == "procedure_occurrence" ~ "procedure_concept_id",
+    workingTable == "visit_occurrence" ~ "visit_concept_id"
+    )
+
+  if(!is.null(cdm[[workingTable]])){
+    codes[[i]] <- as.integer(cdm[[workingTable]] %>%
+                               dplyr::select(workingConcept) %>%
+                               dplyr::distinct() %>%
+                               dplyr::pull())
+  } else {
+    codes[[i]] <- NULL
+  }
+
+  }
+  codes <- unlist(codes)
+  }
+
+  codes
+}
+
+#' Get source codes used in the database
+#'
+#' @param cdm cdm_reference via CDMConnector
+#' @param table cdm table
+#'
+#' @return
+#' @export
+#'
+#' @examples
+sourceCodesInUse <- function(cdm,
+                                 table = c("condition_occurrence",
+                                           "device_exposure",
+                                           "drug_exposure",
+                                           "measurement",
+                                           "observation",
+                                           "procedure_occurrence",
+                                           "visit_occurrence")){
+
+  if(!is.null(cdm[["achilles_results"]])){
+    codes <- fetchAchillesCodesInUse(cdm)
+  } else {
+    # if achilles not available, query cdm
+    codes <- list()
+    for(i in seq_along(table)){
+      workingTable <- table[i]
+      workingConcept <- dplyr::case_when(
+        workingTable == "condition_occurrence" ~ "condition_source_concept_id",
+        workingTable == "device_exposure" ~ "device_source_concept_id",
+        workingTable == "drug_exposure" ~ "drug_source_concept_id",
+        workingTable == "measurement" ~ "measurement_source_concept_id",
+        workingTable == "observation" ~ "observation_source_concept_id",
+        workingTable == "procedure_occurrence" ~ "procedure_source_concept_id",
+        workingTable == "visit_occurrence" ~ "visit_source_concept_id"
+      )
+
+      codes[[i]] <- as.integer(cdm[[workingTable]] %>%
+                                 dplyr::select(workingConcept) %>%
+                                 dplyr::distinct() %>%
+                                 dplyr::pull())
+    }
+    codes <- unlist(codes)
+  }
+
+  codes
+}
+
+unmappedSourceCodesInUse <- function(cdm,
+                                         table = c("condition_occurrence",
+                                                   "device_exposure",
+                                                   "drug_exposure",
+                                                   "measurement",
+                                                   "observation",
+                                                   "procedure_occurrence",
+                                                   "visit_occurrence")){
+
+  # note, no achilles query for this so will have to query the cdm
+
+  codes <- list()
+  for(i in seq_along(table)){
+    workingTable <- table[i]
+    standardConcept <- dplyr::case_when(
+      workingTable == "condition_occurrence" ~ "condition_concept_id",
+      workingTable == "device_exposure" ~ "device_concept_id",
+      workingTable == "drug_exposure" ~ "drug_concept_id",
+      workingTable == "measurement" ~ "measurement_concept_id",
+      workingTable == "observation" ~ "observation_concept_id",
+      workingTable == "procedure_occurrence" ~ "procedure_concept_id",
+      workingTable == "visit_occurrence" ~ "visit_concept_id"
+    )
+
+    workingConcept <- dplyr::case_when(
+      workingTable == "condition_occurrence" ~ "condition_source_concept_id",
+      workingTable == "device_exposure" ~ "device_source_concept_id",
+      workingTable == "drug_exposure" ~ "drug_source_concept_id",
+      workingTable == "measurement" ~ "measurement_source_concept_id",
+      workingTable == "observation" ~ "observation_source_concept_id",
+      workingTable == "procedure_occurrence" ~ "procedure_source_concept_id",
+      workingTable == "visit_occurrence" ~ "visit_source_concept_id"
+    )
+
+    # keep unmapped codes
+    codes[[i]] <- as.integer(cdm[[workingTable]] %>%
+      dplyr::filter(!!rlang::sym(standardConcept) == 0) %>%
+      dplyr::select(dplyr::all_of(workingConcept)) %>%
+      dplyr::distinct() %>%
+      dplyr::pull())
+    codes[[i]] <- stats::na.omit(codes[[i]])
+  }
+
+  codes <- unlist(codes)
+
+  codes
+}
+
+fetchAchillesCodesInUse <- function(cdm){
+  cdm[["achilles_results"]] %>%
+    dplyr::filter(.data$analysis_id %in%
+      c(
+        401, # condition occurrence
+        701, # drug_exposure
+        801, # observation
+        1801, # measurement
+        201, # visit_occurrence
+        601, # procedure_occurrence
+        2101 # device_exposure
+      )) %>%
+    dplyr::select("stratum_1") %>%
+    dplyr::distinct() %>%
+    dplyr::mutate(stratum_1 = as.integer(.data$stratum_1)) %>%
+    dplyr::pull("stratum_1")
+}
+
+fetchAchillesSourceCodesInUse <- function(cdm){
+  cdm[["achilles_results"]] %>%
+    dplyr::filter(.data$analysis_id %in%
+                    c(
+                      425, # condition occurrence
+                      725, # drug_exposure
+                      825, # observation
+                      1825, # measurement
+                      225, # visit_occurrence
+                      625, # procedure_occurrence
+                      2125 # device_exposure
+                    )) %>%
+    dplyr::select("stratum_1") %>%
+    dplyr::distinct() %>%
+    dplyr::mutate(stratum_1 = as.integer(.data$stratum_1)) %>%
+    dplyr::pull("stratum_1")
+}
diff --git a/R/findOrphanCodes.R b/R/findOrphanCodes.R
new file mode 100644
index 0000000..3d70b38
--- /dev/null
+++ b/R/findOrphanCodes.R
@@ -0,0 +1,82 @@
+
+#' Find orphan codes related to a codelist
+#'
+#' @param x Codes for which to find codes related but not included (orphan
+#' codes)
+#' @param cdm cdm_reference via CDMConnector
+#' @param domains Character vector with one or more of the OMOP CDM domain.
+#' @param standardConcept  Character vector with one or more of "Standard",
+#' "Classification", and "Non-standard". These correspond to the flags used
+#' for the standard_concept field in the concept table of the cdm.
+#' @param searchInSynonyms Either TRUE or FALSE. If TRUE the code will also
+#' search using both the primary name in the concept table and synonyms from
+#' the concept synonym table.
+#' @param searchNonStandard Either TRUE or FALSE. If TRUE the code will also
+#' search via non-standard concepts.
+#' @param includeDescendants Either TRUE or FALSE.
+#' If TRUE descendant concepts of identified concepts
+#' will be included in the candidate codelist.
+#' @param includeAncestor Either TRUE or FALSE.
+#' If TRUE the direct ancestor concepts of identified concepts
+#'  will be included in the candidate codelist.
+#' @param minCellCount The minimum number of counts to reported, below which
+#' results will be suppressed. If 0, all results will be reported.
+#'
+#' @return A codelist containing code related to (but not in) the target
+#' codelist that are present used in the cdm
+#' @export
+#'
+#' @examples
+findOrphanCodes <- function(x,
+                            cdm,
+                            domains = "Condition",
+                            standardConcept = "Standard",
+                            searchInSynonyms = TRUE,
+                            searchNonStandard = TRUE,
+                            includeDescendants = TRUE,
+                            includeAncestor = TRUE,
+                            minCellCount = 5){
+
+
+x <- addDetails(cdm = cdm, conceptList = x)
+
+orphanConcepts <- list()
+# rerun search
+for(i in seq_along(x)){
+candidateCodes <- getCandidateCodes(
+    cdm = cdm,
+    keywords = x[[i]]$concept_name,
+    domains = domains,
+    standardConcept = standardConcept,
+    searchInSynonyms = searchInSynonyms,
+    searchNonStandard = searchNonStandard,
+    includeDescendants = includeDescendants,
+    includeAncestor = includeAncestor)
+
+# Exclude codes that are in the original set of codes
+candidateCodes <- candidateCodes %>%
+  dplyr::anti_join(x[[i]] %>%
+                     dplyr::select("concept_id"),
+                   by = "concept_id")
+# Use achilles counts to summarise code use
+orphanConcepts[[i]] <- achillesCodeUse(
+  x = list("cs" = candidateCodes$concept_id),
+  cdm = cdm,
+  minCellCount = minCellCount
+)
+if(nrow(orphanConcepts[[i]]) >= 1 ){
+  orphanConcepts[[i]] <- orphanConcepts[[i]] %>%
+    dplyr::mutate(codelist = names(x)[i])
+} else {
+  cli::cli_inform("-- No orphan codes found for codelist {names(x)[i]}")
+}
+}
+
+orphanConcepts <- dplyr::bind_rows(orphanConcepts)
+
+orphanConcepts
+}
+
+
+
+
diff --git a/R/findUnmappedCodes.R b/R/findUnmappedCodes.R
new file mode 100644
index 0000000..d9629ea
--- /dev/null
+++ b/R/findUnmappedCodes.R
@@ -0,0 +1,10 @@
+
+
+findUnmappedCodes <- function(){
+
+# search non-standard codes
+
+# see if any have been mapped to 0
+
+
+}
diff --git a/R/getCandidateCodes.R b/R/getCandidateCodes.R
index 73c7d7f..8573c79 100644
--- a/R/getCandidateCodes.R
+++ b/R/getCandidateCodes.R
@@ -68,7 +68,7 @@ getCandidateCodes <- function(cdm,
                               includeDescendants = TRUE,
                               includeAncestor = FALSE) {
 
-    start <- Sys.time()
+  start <- Sys.time()
 
   ## checks for standard types of user error
   errorMessage <- checkmate::makeAssertCollection()
@@ -148,12 +148,17 @@ getCandidateCodes <- function(cdm,
 
   if (nrow(searchResults) == 0) {
     cli::cli_inform("No codes found for the given search strategy")
-  } else {
-    cli::cli_alert_success(
-      "{nrow(searchResults)} candidate concept{?s} identified"
-    )
+    return(searchResults)
   }
 
+  # add concept info
+  searchResults <- addDetails(cdm = cdm,
+             conceptList = searchResults) %>%
+    dplyr::filter(tolower(.data$domain_id) %in% tolower(.env$domains))
+
+  cli::cli_alert_success(
+    "{nrow(searchResults)} candidate concept{?s} identified"
+  )
     duration <- abs(as.numeric(Sys.time() - start, units = "secs"))
     cli::cli_inform(
       "Time taken: {floor(duration/60)} minutes and {duration %% 60 %/% 1} seconds"
diff --git a/R/mockVocabRef.R b/R/mockVocabRef.R
index a8e9180..d3d5c01 100644
--- a/R/mockVocabRef.R
+++ b/R/mockVocabRef.R
@@ -17,7 +17,7 @@
 
 #' Generate example vocabulary database
 #'
-#' @param backend 'database' (duckdb), 'arrow' (parquet files), or 'data_frame'
+#' @param backend 'database' (duckdb) or 'data_frame'
 #' @return cdm reference with mock vocabulary
 #' @export
 #'
@@ -27,11 +27,23 @@
 #' DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
 mockVocabRef <- function(backend = "database") {
   errorMessage <- checkmate::makeAssertCollection()
-  checkmate::assertTRUE(backend %in% c("database", "arrow", "data_frame"))
+  checkmate::assertTRUE(backend %in% c("database", "data_frame"))
   checkmate::assertTRUE(length(backend) == 1)
   checkmate::reportAssertions(collection = errorMessage)
 
-  # tables
+  # compulsory tables
+  person <- dplyr::tibble(
+    person_id = 1, gender_concept_id = 0, year_of_birth = 1990,
+    race_concept_id = 0, ethnicity_concept_id = 0
+  )
+  observationPeriod <- dplyr::tibble(
+    observation_period_id = 1, person_id = 1,
+    observation_period_start_date = as.Date("2000-01-01"),
+    observation_period_end_date = as.Date("2025-12-31"),
+    period_type_concept_id = 0
+  )
+
+  # vocab tables
   concept <- data.frame(
     concept_id = 1:19,
     concept_name = c(
@@ -253,7 +265,9 @@ mockVocabRef <- function(backend = "database") {
       numerator_unit_concept_id = 8576,
       denominator_value = 0.5,
       denominator_unit_concept_id = 8587,
-      box_size = NA
+      box_size = NA,
+      valid_start_date = NA,
+      valid_end_date = NA
     )
   )
 
@@ -267,14 +281,37 @@ mockVocabRef <- function(backend = "database") {
       cdm_etl_reference = NA,
       source_release_date = NA,
       cdm_release_date = NA,
-      cdm_version = NA,
+      cdm_version = "5.3",
       vocabulary_version  = NA
     )
   )
 
+  # achilles tables
+  # count of 400 records for knee osteoarthritis
+  achillesAnalysis <- dplyr::tibble(analysis_id = 1,
+                                    analysis_name = 1)
+  achillesResults <- dplyr::tibble(analysis_id = 401,
+                             stratum_1 = 4,
+                             stratum_2 = NA,
+                             stratum_3 = NA,
+                             count_value = 100)
+  achillesResultsDist <- dplyr::tibble(analysis_id = 1,
+                                  count_value = 5)
+
   # into in-memory duckdb
   db <- DBI::dbConnect(duckdb::duckdb(), ":memory:")
-
+  DBI::dbWithTransaction(db, {
+    DBI::dbWriteTable(db, "person",
+                      person,
+                      overwrite = TRUE
+    )
+  })
+  DBI::dbWithTransaction(db, {
+    DBI::dbWriteTable(db, "observation_period",
+                      observationPeriod,
+                      overwrite = TRUE
+    )
+  })
   DBI::dbWithTransaction(db, {
     DBI::dbWriteTable(db, "concept",
       concept,
@@ -318,40 +355,37 @@ mockVocabRef <- function(backend = "database") {
     )
   })
 
-  cdm <- CDMConnector::cdm_from_con(db)
-  return(cdm)
-  # if (backend == "database") {
-  #   return(cdm)
-  # }
-  #
-  # if (backend %in% c("arrow", "data_frame")) {
-  #   dOut <- tempfile()
-  #   dir.create(dOut)
-  #   CDMConnector::stow(cdm = cdm, path = dOut, format = "csv")
-  #
-  #   if (backend == "arrow") {
-  #     cdmArrow <- CDMConnector::cdm_from_files(
-  #       path = dOut,
-  #       as_data_frame = FALSE, cdm_name = "mock_vocab"
-  #     )
-  #     DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
-  #     return(cdmArrow)
-  #   }
-  #
-  #   if (backend == "data_frame") {
-  #     if(utils::packageVersion("CDMConnector")<"1.1.0"){
-  #       cdmDF <- CDMConnector::cdm_from_files(
-  #         path = dOut,
-  #         as_data_frame = TRUE
-  #       )
-  #     } else {
-  #       cdmDF <- CDMConnector::cdm_from_files(
-  #         path = dOut, cdm_name = "mock",
-  #         as_data_frame = TRUE
-  #       )
-  #     }
-  #     DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
-  #     return(cdmDF)
-  #   }
-  # }
+  DBI::dbWithTransaction(db, {
+    DBI::dbWriteTable(db, "achilles_analysis",
+                      achillesAnalysis,
+                      overwrite = TRUE
+    )
+  })
+  DBI::dbWithTransaction(db, {
+    DBI::dbWriteTable(db, "achilles_results",
+                      achillesResults,
+                      overwrite = TRUE
+    )
+  })
+  DBI::dbWithTransaction(db, {
+    DBI::dbWriteTable(db, "achilles_results_dist",
+                      achillesResultsDist,
+                      overwrite = TRUE
+    )
+  })
+
+  cdm <- CDMConnector::cdm_from_con(con = db,
+                                    cdm_schema = "main",
+                                    write_schema = "main",
+                                    cdm_name = "mock")
+
+  cdm$achilles_analysis <- dplyr::tbl(db, "achilles_analysis")
+  cdm$achilles_results <- dplyr::tbl(db, "achilles_results")
+  cdm$achilles_results_dist <- dplyr::tbl(db, "achilles_results_dist")
+
+  if (backend %in% c("data_frame")) {
+    cdm <- cdm %>% dplyr::collect()
+  }
+
+  cdm
 }
diff --git a/R/runSearch.R b/R/runSearch.R
index 1187061..03609d0 100644
--- a/R/runSearch.R
+++ b/R/runSearch.R
@@ -23,6 +23,12 @@ runSearch <- function(keywords,
                       searchNonStandard,
                       includeDescendants,
                       includeAncestor) {
+
+  if(!is.null(attr(cdm, "dbcon"))){
+   prefix <- paste0(sample(letters, 5, TRUE),
+                    collapse = "")
+  }
+
   # connect to relevant vocabulary tables
   # will return informative error if not found
   conceptDb <- cdm$concept
@@ -31,75 +37,36 @@ runSearch <- function(keywords,
   conceptRelationshipDb <- cdm$concept_relationship
   drugStrengthDb <- cdm$drug_strength
 
-  # formatting of conceptDb variables
-  conceptDb <- conceptDb %>%
-    dplyr::mutate(domain_id = tolower(.data$domain_id)) %>%
-    dplyr::mutate(
-      standard_concept = ifelse(is.na(.data$standard_concept),
-        "non-standard", .data$standard_concept
-      )
-    ) %>%
-    dplyr::mutate(
-      standard_concept = ifelse(.data$standard_concept == "C",
-        "classification", .data$standard_concept
-      )
-    ) %>%
-    dplyr::mutate(
-      standard_concept = ifelse(.data$standard_concept == "S",
-        "standard", .data$standard_concept
-      )
-    )
-
   ## domains, standardConcept vocab to lower
   domains <- tolower(domains)
   standardConcept <- tolower(standardConcept)
-
   # new name for clarity
   standardConceptFlags <- standardConcept
 
-  cli::cli_inform("{domains} domain: Limiting to domains of interest")
+  # formatting of conceptDb variables
   conceptDb <- conceptDb %>%
-    dplyr::filter(.data$domain_id %in% .env$domains)
-
-  concept <- conceptDb %>%
-    dplyr::mutate(standard_concept = tolower(.data$standard_concept)) %>%
-    dplyr::filter(.data$standard_concept %in% .env$standardConceptFlags) %>%
-    dplyr::collect() %>%
-    dplyr::rename_with(tolower)
-
-  conceptAncestorDb <- conceptAncestorDb %>%
-    dplyr::left_join(
-      conceptDb %>%
-        dplyr::rename("ancestor_concept_id" = "concept_id") %>%
-        dplyr::select("ancestor_concept_id", "domain_id", "standard_concept"),
-      by = "ancestor_concept_id"
+    dplyr::mutate(
+      domain_id = tolower(.data$domain_id),
+      standard_concept = dplyr::case_when(
+        is.na(.data$standard_concept) ~ "non-standard",
+        .data$standard_concept == "C" ~ "classification",
+        .data$standard_concept == "S" ~ "standard",
+        .default = as.character(.data$standard_concept)
+      )
     )
-  if(!is.null(attr(cdm, "dbcon"))){
-    conceptAncestorDb <- conceptAncestorDb %>%
-      CDMConnector::compute_query()
-  }
-  conceptAncestorDb <- conceptAncestorDb %>%
-    dplyr::filter(.data$domain_id %in% .env$domains &
-                  .data$standard_concept %in% .env$standardConceptFlags) %>%
-    dplyr::select(-c("domain_id", "standard_concept"))
 
-  conceptAncestor <- conceptAncestorDb %>%
-    dplyr::left_join(
-      conceptDb %>%
-        dplyr::rename("descendant_concept_id" = "concept_id") %>%
-        dplyr::select("descendant_concept_id", "domain_id", "standard_concept"),
-      by = "descendant_concept_id"
-    )
   if(!is.null(attr(cdm, "dbcon"))){
-    conceptAncestor <- conceptAncestor %>%
-      CDMConnector::compute_query()
+    conceptDb <- conceptDb %>%
+    CDMConnector::computeQuery(name = paste0("cg_",prefix, "_1"),
+                                temporary = FALSE,
+                                schema = attr(cdm, "write_schema"),
+                                overwrite = TRUE)
   }
-  conceptAncestor <- conceptAncestor %>%
-    dplyr::filter(.data$domain_id %in% .env$domains &
-                  .data$standard_concept %in% .env$standardConceptFlags) %>%
-    dplyr::select(-c("domain_id", "standard_concept")) %>%
-    dplyr::collect() %>%
-    dplyr::rename_with(tolower)
+
+  cli::cli_inform("{domains} domain: Limiting to domains of interest")
+  concept <- conceptDb %>%
+    dplyr::filter(.data$standard_concept %in% .env$standardConceptFlags,
+                  .data$domain_id %in% .env$domains)
 
   # will only collect conceptSynonym later if needed
   if (searchInSynonyms == TRUE) {
@@ -111,7 +78,10 @@ runSearch <- function(keywords,
     )
   if(!is.null(attr(cdm, "dbcon"))){
   conceptSynonymDb <- conceptSynonymDb %>%
-    CDMConnector::compute_query()
+    CDMConnector::computeQuery(name = paste0("cg_",prefix, "_2"),
+                               temporary = FALSE,
+                               schema = attr(cdm, "write_schema"),
+                               overwrite = TRUE)
   }
 
   conceptSynonymDb <- conceptSynonymDb %>%
@@ -120,7 +90,7 @@ runSearch <- function(keywords,
     dplyr::select(-c("domain_id", "standard_concept"))
 
     conceptSynonym <- conceptSynonymDb %>%
-      dplyr::collect() %>%
+      # dplyr::collect() %>%
       dplyr::rename_with(tolower)
   }
 
@@ -135,7 +105,7 @@ runSearch <- function(keywords,
       ) %>%
       dplyr::filter(.data$domain_id %in% .env$domains &
                     .data$standard_concept %in% .env$standardConceptFlags) %>%
-      dplyr::collect() %>%
+      # dplyr::collect() %>%
       dplyr::rename_with(tolower)
   }
 
@@ -185,7 +155,10 @@ runSearch <- function(keywords,
 
   # run exclusion
   if (length(exclude) > 0) {
-    if (nrow(excludeCodes) > 0) {
+    if (excludeCodes %>%
+             utils::head(10) %>%
+             dplyr::tally() %>%
+             dplyr::pull("n") > 0) {
       candidateCodes <- candidateCodes %>%
         dplyr::anti_join(
           excludeCodes %>%
@@ -207,10 +180,10 @@ runSearch <- function(keywords,
         dplyr::select("concept_id") %>%
         dplyr::distinct() %>%
         dplyr::left_join(workingConcept,
-          by = "concept_id"
+          by = "concept_id", copy = TRUE
         )
 
-    candidateCodes <- dplyr::bind_rows(
+    candidateCodes <- dplyr::union_all(
       candidateCodes,
       candidateCodesInSynonyms %>%
         dplyr::mutate(found_from = "In synonyms")
@@ -220,7 +193,10 @@ runSearch <- function(keywords,
 
   # run exclusion
   if (length(exclude) > 0) {
-    if (nrow(excludeCodes) > 0) {
+    if (excludeCodes %>%
+        utils::head(10) %>%
+        dplyr::tally() %>%
+        dplyr::pull("n") > 0) {
       candidateCodes <- candidateCodes %>%
         dplyr::anti_join(
           excludeCodes %>%
@@ -232,21 +208,25 @@ runSearch <- function(keywords,
 
   candidateCodesList[[domains]] <- candidateCodes
 
+  # candidateCodes <- dplyr::bind_rows(candidateCodes %>%
+  #                                      dplyr::collect()) %>%
+  #   dplyr::distinct()
 
 
-  candidateCodes <- dplyr::bind_rows(candidateCodesList)
-
   # 5) add any codes lower in the hierarchy
   if (includeDescendants == TRUE) {
-    if (nrow(candidateCodes) > 0) {
+    if (candidateCodes %>%
+          utils::head(10) %>%
+          dplyr::tally() %>%
+          dplyr::pull("n") > 0) {
       cli::cli_inform("{domains} domain: Adding descendants")
       candidateCodeDescendants <- addDescendants(
         workingCandidateCodes = candidateCodes,
-        conceptAncestorDf = conceptAncestor,
+        conceptAncestorDf = conceptAncestorDb,
         conceptDf = concept
       )
 
-      candidateCodes <- dplyr::bind_rows(
+      candidateCodes <- dplyr::union_all(
         candidateCodes,
         candidateCodeDescendants %>%
           dplyr::mutate(found_from = "From descendants") %>%
@@ -256,10 +236,14 @@ runSearch <- function(keywords,
 
       # run exclusion
       if (length(exclude) > 0) {
-        if (nrow(excludeCodes) > 0) {
+        if (excludeCodes %>%
+            utils::head(10) %>%
+            dplyr::tally() %>%
+            dplyr::pull("n") > 0) {
           candidateCodes <- candidateCodes %>%
-            dplyr::anti_join(excludeCodes %>% dplyr::select("concept_id"),
-              by = "concept_id"
+            dplyr::anti_join(excludeCodes %>%
+                               dplyr::select("concept_id"),
+              by = "concept_id", copy = TRUE
             )
         }
       }
@@ -268,16 +252,19 @@ runSearch <- function(keywords,
 
   # 6) add any codes one level above in the hierarchy
   if (includeAncestor == TRUE) {
-    if (nrow(candidateCodes) > 0) {
+    if (candidateCodes %>%
+        utils::head(10) %>%
+        dplyr::tally() %>%
+        dplyr::pull("n") > 0) {
       cli::cli_inform("{domains} domain: Adding ancestor")
 
       candidateCodeAncestor <- addAncestor(
         workingCandidateCodes = candidateCodes,
-        conceptAncestorDf = conceptAncestor,
-        conceptDf = concept
+        conceptAncestorDf = conceptAncestorDb,
+        conceptDf = conceptDb
       )
 
-      candidateCodes <- dplyr::bind_rows(
+      candidateCodes <- dplyr::union_all(
         candidateCodes,
         candidateCodeAncestor %>%
           dplyr::mutate(found_from = "From ancestor")
@@ -286,7 +273,10 @@ runSearch <- function(keywords,
 
       # run exclusion
       if (length(exclude) > 0) {
-        if (nrow(excludeCodes) > 0) {
+        if (excludeCodes %>%
+            utils::head(10) %>%
+            dplyr::tally() %>%
+            dplyr::pull("n") > 0) {
           candidateCodes <- candidateCodes %>%
             dplyr::anti_join(excludeCodes %>% dplyr::select("concept_id"),
               by = "concept_id"
@@ -303,37 +293,40 @@ runSearch <- function(keywords,
   if (searchNonStandard == TRUE) {
     cli::cli_inform("{domains} domain: Adding codes from non-standard")
     conceptNs <- conceptDb %>%
-      dplyr::filter(.data$domain_id %in% .env$domains) %>%
       dplyr::filter(.data$standard_concept == "non-standard") %>%
-      dplyr::collect() %>%
       dplyr::rename_with(tolower)
 
-    if (nrow(conceptNs) > 0) {
+    if (conceptNs %>%
+        utils::head(10) %>%
+        dplyr::tally() %>%
+        dplyr::pull("n") > 0) {
       candidateCodesNs <- getMatches(
         words = tidyWords(keywords),
         conceptDf = conceptNs
       )
     }
 
-    if (nrow(conceptNs) > 0) {
+    if (conceptNs %>%
+        utils::head(10) %>%
+        dplyr::tally() %>%
+        dplyr::pull("n") > 0) {
       candidateCodesNs <- candidateCodesNs %>%
         dplyr::select("concept_id") %>%
         dplyr::left_join(
           conceptRelationshipDb %>%
             dplyr::filter(.data$relationship_id == "Mapped from") %>%
-            dplyr::collect() %>%
+            # dplyr::collect() %>%
             dplyr::rename_with(tolower),
-          by = c("concept_id" = "concept_id_2")
+          by = c("concept_id" = "concept_id_2"), copy = TRUE
         ) %>%
         dplyr::select("concept_id_1") %>%
         dplyr::rename("concept_id" = "concept_id_1") %>%
         dplyr::distinct() %>%
         dplyr::left_join(concept,
-          by = "concept_id"
-        ) %>%
-        dplyr::mutate(concept_name = tidyWords(.data$concept_name))
+          by = "concept_id", copy = TRUE
+        )
 
-      candidateCodes <- dplyr::bind_rows(
+      candidateCodes <- dplyr::union_all(
         candidateCodes,
         candidateCodesNs %>%
           dplyr::mutate(found_from = "From non-standard")
@@ -343,7 +336,10 @@ runSearch <- function(keywords,
 
     # run exclusion
     if (length(exclude) > 0) {
-      if (nrow(excludeCodes) > 0) {
+      if (excludeCodes %>%
+          utils::head(10) %>%
+          dplyr::tally() %>%
+          dplyr::pull("n") > 0) {
         candidateCodes <- candidateCodes %>%
           dplyr::anti_join(excludeCodes %>% dplyr::select("concept_id"),
             by = "concept_id"
@@ -352,23 +348,14 @@ runSearch <- function(keywords,
     }
   }
 
+  candidateCodes <- candidateCodes %>%
+    dplyr::collect()
 
-  if (nrow(candidateCodes) == 0) {
-    candidateCodes
-  } else {
-    # 8) Finish up
-    # get original names back
-    candidateCodes <- candidateCodes %>%
-      dplyr::filter(!is.na(.data$concept_id)) %>%
-      dplyr::select("concept_id", "found_from") %>%
-      dplyr::left_join(concept,
-        by = c("concept_id")
-      ) %>%
-      dplyr::distinct()
 
-    # if domain = "drug", add drug_strength information and dose form
 
-    if (domains == "drug") {
+  if (nrow(candidateCodes) > 0) {
+  # 8) Finish up
+    if (domains == "drug") { #add drug_strength information and dose form
       candidateCodes <- candidateCodes %>%
         dplyr::left_join(
           drugStrength %>%
@@ -380,7 +367,7 @@ runSearch <- function(keywords,
               "numerator_value", "numerator_unit_concept_id", "denominator_value",
               "denominator_unit_concept_id", "box_size"
             ),
-          by = "concept_id"
+          by = "concept_id", copy = TRUE
         )
 
       candidateCodes <- candidateCodes %>%
@@ -425,21 +412,23 @@ runSearch <- function(keywords,
             by = "concept_id"
           )
       }
-    } else {
-      candidateCodes <- candidateCodes %>%
-        dplyr::select(
-          "concept_id", "concept_name",
-          "domain_id", "concept_class_id",
-          "vocabulary_id", "found_from"
-        )
     }
 
     candidateCodes <- candidateCodes %>%
+      dplyr::select(dplyr::any_of(c("concept_id", "found_from",
+                                    "ingredient_concept_id", "amount_value",
+                                    "amount_unit_concept_id",
+                                    "numerator_value",
+                                    "numerator_unit_concept_id",
+                                    "denominator_value",
+                                    "denominator_unit_concept_id",
+                                    "box_size", "dose_form"))) %>%
       dplyr::distinct()
 
     # remove duplicates (found in different ways)
     # keep first time it was found
-    # for drug,  same concept_id with different ingredient_concept_id will be removed as well.
+    # for drug,  same concept_id with different ingredient_concept_id
+    # will be removed as well (with only the first kept).
     candidateCodes <- candidateCodes %>%
       dplyr::group_by(.data$concept_id) %>%
       dplyr::mutate(seq = dplyr::row_number(.data$concept_id)) %>%
@@ -447,8 +436,16 @@ runSearch <- function(keywords,
       dplyr::select(-"seq") %>%
       dplyr::ungroup()
 
-    return(candidateCodes)
   }
+
+
+  if(!is.null(attr(cdm, "dbcon"))){
+    CDMConnector::dropTable(cdm = cdm,
+                            name = dplyr::starts_with(paste0("cg_",prefix)))
+  }
+
+  return(candidateCodes)
+
 }
 
 
@@ -476,7 +473,7 @@ tidyWords <- function(words) {
 getMatches <- function(words,
                        conceptDf) {
   conceptDf <- conceptDf %>% # start with all
-    dplyr::mutate(concept_name = tidyWords(.data$concept_name))
+    dplyr::mutate(concept_name = tolower(.data$concept_name))
 
   # because there may be a lot of synonyms, get these from a loop
   # (stringr::str_detect slows considerably
@@ -494,16 +491,38 @@ getMatches <- function(words,
 
     for (j in seq_along(workingExclude)) {
       if (nchar(workingExclude[j]) >= 1) {
-        workingConcepts <- workingConcepts %>%
-          dplyr::filter(stringr::str_detect(
-            .data$concept_name,
-            .env$workingExclude[j]
-          ))
+
+        if(inherits(workingConcepts, "tbl_sql")){
+          workingConcepts <- workingConcepts %>%
+            dplyr::filter(dplyr::sql(paste0("concept_name LIKE '%", .env$workingExclude[j], "%'")))
+        } else {
+          workingConcepts <- workingConcepts %>%
+            dplyr::filter(stringr::str_detect(
+              .data$concept_name,
+              .env$workingExclude[j]
+            ))
+        }
+
       }
     }
     conceptsFound[[i]] <- workingConcepts
+
+    # %>% dplyr::collect()
+  }
+
+  if(length(conceptsFound)==1){
+    conceptsFound <- conceptsFound[[1]] %>% dplyr::distinct()
+  } else {
+    conceptsFoundList <- list()
+    for(i in 1:(length(conceptsFound)-1)){
+      conceptsFoundList <- dplyr::union_all(conceptsFound[[i]],
+                                            conceptsFound[[i+1]])
+
+    }
+    conceptsFound <- conceptsFoundList %>% dplyr::distinct()
+
   }
-  conceptsFound <- dplyr::bind_rows(conceptsFound) %>% dplyr::distinct()
+
 
   return(conceptsFound)
 }
@@ -511,6 +530,7 @@ getMatches <- function(words,
 addDescendants <- function(workingCandidateCodes,
                            conceptAncestorDf,
                            conceptDf) {
+
   candidateCodeDescendants <- workingCandidateCodes %>%
     dplyr::select("concept_id") %>%
     dplyr::rename("ancestor_concept_id" = "concept_id") %>%
@@ -518,17 +538,15 @@ addDescendants <- function(workingCandidateCodes,
     dplyr::left_join(
       conceptAncestorDf %>%
         dplyr::filter("ancestor_concept_id" != "descendant_concept_id"),
-      by = "ancestor_concept_id"
+      by = "ancestor_concept_id", copy = TRUE
     ) %>%
     dplyr::select("descendant_concept_id") %>%
     dplyr::filter(!is.na(.data$descendant_concept_id)) %>%
     dplyr::distinct() %>%
     dplyr::rename("concept_id" = "descendant_concept_id")
 
-  candidateCodeDescendants <-
-    candidateCodeDescendants %>%
-    dplyr::left_join(conceptDf, by = "concept_id") %>%
-    dplyr::mutate(concept_name = tidyWords(.data$concept_name))
+  candidateCodeDescendants <- candidateCodeDescendants %>%
+    dplyr::left_join(conceptDf, by = "concept_id", copy = TRUE)
 
   return(candidateCodeDescendants)
 }
@@ -540,15 +558,14 @@ addAncestor <- function(workingCandidateCodes,
     dplyr::select("concept_id") %>%
     dplyr::rename("descendant_concept_id" = "concept_id") %>%
     dplyr::left_join(conceptAncestorDf,
-      by = "descendant_concept_id"
+      by = "descendant_concept_id", copy = TRUE
     ) %>%
     dplyr::filter(.data$min_levels_of_separation == "1") %>%
     dplyr::select("ancestor_concept_id") %>%
     dplyr::rename("concept_id" = "ancestor_concept_id") %>%
     dplyr::left_join(conceptDf,
-      by = "concept_id"
-    ) %>%
-    dplyr::mutate(concept_name = tidyWords(.data$concept_name))
+      by = "concept_id", copy = TRUE
+    )
 
   # keep if not already in candidateCodes
   candidateCodeAncestor <- candidateCodeAncestor %>%
@@ -556,8 +573,7 @@ addAncestor <- function(workingCandidateCodes,
       workingCandidateCodes %>%
         dplyr::select("concept_id"),
       by = "concept_id"
-    ) %>%
-    dplyr::left_join(conceptDf, by = "concept_id")
+    )
 
   return(candidateCodeAncestor)
 }
diff --git a/R/summariseCodeUse.R b/R/summariseCodeUse.R
index ca00e9e..a04aa74 100644
--- a/R/summariseCodeUse.R
+++ b/R/summariseCodeUse.R
@@ -50,7 +50,6 @@ summariseCodeUse <- function(x,
   codeUse <- dplyr::bind_rows(codeUse)
 
 
-
  return(codeUse)
 
 }
@@ -366,6 +365,11 @@ if(length(tableName)>0){
         dplyr::filter(.data$cohort_start_date == !!dplyr::sym(dateName[[1]]))
     }
   }
+
+  if(is.null(codeRecords)){
+    return(NULL)
+  }
+
   codeRecords <- codeRecords %>%
     dplyr::mutate(date = !!dplyr::sym(dateName[[1]])) %>%
     dplyr::mutate(year = lubridate::year(date)) %>%
diff --git a/README.md b/README.md
index 7989b2f..e0296ab 100644
--- a/README.md
+++ b/README.md
@@ -87,7 +87,7 @@ getDrugIngredientCodes(cdm = cdm, name = "aspirin", withConceptDetails = TRUE)
 #> $aspirin
 #> # A tibble: 2 × 4
 #>   concept_id concept_name              domain_id vocabulary_id
-#>        <dbl> <chr>                     <chr>     <chr>        
+#>        <int> <chr>                     <chr>     <chr>        
 #> 1    1112807 Aspirin                   Drug      RxNorm       
 #> 2   19059056 Aspirin 81 MG Oral Tablet Drug      RxNorm
 ```
@@ -126,12 +126,12 @@ asthma_codes1 %>%
   glimpse()
 #> Rows: 2
 #> Columns: 6
-#> $ concept_id       <dbl> 4051466, 317009
+#> $ concept_id       <int> 4051466, 317009
+#> $ found_from       <chr> "From initial search", "From initial search"
 #> $ concept_name     <chr> "Childhood asthma", "Asthma"
-#> $ domain_id        <chr> "condition", "condition"
-#> $ concept_class_id <chr> "Clinical Finding", "Clinical Finding"
+#> $ domain_id        <chr> "Condition", "Condition"
 #> $ vocabulary_id    <chr> "SNOMED", "SNOMED"
-#> $ found_from       <chr> "From initial search", "From initial search"
+#> $ standard_concept <chr> "standard", "standard"
 ```
 
 But perhaps we want to exclude certain concepts as part of the search
@@ -148,12 +148,12 @@ asthma_codes2 %>%
   glimpse()
 #> Rows: 1
 #> Columns: 6
-#> $ concept_id       <dbl> 317009
+#> $ concept_id       <int> 317009
+#> $ found_from       <chr> "From initial search"
 #> $ concept_name     <chr> "Asthma"
-#> $ domain_id        <chr> "condition"
-#> $ concept_class_id <chr> "Clinical Finding"
+#> $ domain_id        <chr> "Condition"
 #> $ vocabulary_id    <chr> "SNOMED"
-#> $ found_from       <chr> "From initial search"
+#> $ standard_concept <chr> "standard"
 ```
 
 We can compare these two code lists like so
@@ -162,7 +162,7 @@ We can compare these two code lists like so
 compareCodelists(asthma_codes1, asthma_codes2)
 #> # A tibble: 2 × 3
 #>   concept_id concept_name     codelist       
-#>        <dbl> <chr>            <chr>          
+#>        <int> <chr>            <chr>          
 #> 1    4051466 Childhood asthma Only codelist 1
 #> 2     317009 Asthma           Both
 ```
@@ -182,12 +182,12 @@ Gastrointestinal_hemorrhage %>%
   glimpse()
 #> Rows: 1
 #> Columns: 6
-#> $ concept_id       <dbl> 192671
+#> $ concept_id       <int> 192671
+#> $ found_from       <chr> "From initial search"
 #> $ concept_name     <chr> "Gastrointestinal hemorrhage"
-#> $ domain_id        <chr> "condition"
-#> $ concept_class_id <chr> "Clinical Finding"
+#> $ domain_id        <chr> "Condition"
 #> $ vocabulary_id    <chr> "SNOMED"
-#> $ found_from       <chr> "From initial search"
+#> $ standard_concept <chr> "standard"
 ```
 
 ## Summarising code use
@@ -209,9 +209,9 @@ summariseCodeUse(list("asthma" = asthma_codes1$concept_id),
 #> $ estimate              <int> 101, 96, 5, 101, 96, 5
 #> $ estimate_suppressed   <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", "FA…
 #> $ standard_concept_name <chr> NA, "Childhood asthma", "Asthma", NA, "Childhood…
-#> $ standard_concept_id   <dbl> NA, 4051466, 317009, NA, 4051466, 317009
+#> $ standard_concept_id   <int> NA, 4051466, 317009, NA, 4051466, 317009
 #> $ source_concept_name   <chr> NA, "Childhood asthma", "Asthma", NA, "Childhood…
-#> $ source_concept_id     <dbl> NA, 4051466, 317009, NA, 4051466, 317009
+#> $ source_concept_id     <int> NA, 4051466, 317009, NA, 4051466, 317009
 #> $ domain_id             <chr> NA, "condition", "condition", NA, "condition", "…
 #> $ codelist_name         <chr> "asthma", "asthma", "asthma", "asthma", "asthma"…
 #> $ cohort_name           <lgl> NA, NA, NA, NA, NA, NA
diff --git a/extras/precomputeVignetteData.R b/extras/precomputeVignetteData.R
index 79d092b..72579f0 100644
--- a/extras/precomputeVignetteData.R
+++ b/extras/precomputeVignetteData.R
@@ -25,95 +25,51 @@ vocabularyDatabaseSchema <- Sys.getenv("DB_VOCAB_SCHEMA")
 # create cdm reference
 cdm <- CDMConnector::cdm_from_con(con = db,
                                   cdm_schema = vocabularyDatabaseSchema,
-                                  cdm_tables = tidyselect::all_of(c("concept",
-                                                                    "concept_relationship",
-                                                                    "concept_ancestor",
-                                                                    "concept_synonym",
-                                                                    "drug_strength",
-                                                                    "vocabulary")))
-# vocab to arrow
-# save in temp folder for this example
-dOut<-here(tempdir(), "db_vocab")
-dir.create(dOut)
-CDMConnector::stow(cdm, dOut)
-
-# new cdm reference using arrow
-cdm_arrow <- CDMConnector::cdm_from_files(path = dOut,
-                                          as_data_frame = FALSE)
-
-rm(cdm)
+                                  write_schema = "results")
 
 # intro vignette ----
-vocabVersion <- getVocabVersion(cdm = cdm_arrow)
+vocabVersion <- getVocabVersion(cdm = cdm)
 
-save(
-  vocabVersion,
-  file = here("vignettes", "introVocab.RData")
-)
-
-
-codesFromDescendants <- cdm_arrow$concept_ancestor %>%
+codesFromDescendants <- cdm$concept_ancestor %>%
   filter(.data$ancestor_concept_id == 4182210) %>%
   select("descendant_concept_id") %>%
   rename("concept_id" = "descendant_concept_id") %>%
   select("concept_id")   %>%
-  left_join(cdm_arrow$concept,
+  left_join(cdm$concept,
             by="concept_id")  %>%
   select("concept_id", "concept_name", "domain_id", "vocabulary_id") %>%
   collect()
 
-saveRDS(
-  codesFromDescendants,
-  here("vignettes", "introData01.RData")
-)
 
-dementiaCodes1 <- getCandidateCodes(cdm = cdm_arrow,
+dementiaCodes1 <- getCandidateCodes(cdm = cdm,
   keywords = "dementia",
   domains = "Condition",
-  #searchViaSynonyms = FALSE,
-  #fuzzyMatch = FALSE,
   exclude = NULL,
   includeDescendants = TRUE,
   includeAncestor = FALSE,
-  #verbose=TRUE
-)
-saveRDS(
-  dementiaCodes1,
-  here("vignettes", "introData02.RData")
 )
 
+
 codeComparison <- compareCodelists(codesFromDescendants,
                   dementiaCodes1)
-saveRDS(
-  codeComparison,
-  here("vignettes", "introData03.RData")
-)
 
-icdMappings <- getMappings(cdm = cdm_arrow,
+
+icdMappings <- getMappings(cdm = cdm,
   candidateCodelist = dementiaCodes1,
   nonStandardVocabularies = "ICD10CM"
 )
-saveRDS(
-  icdMappings,
-  here("vignettes", "introData04.RData")
-)
 
-readMappings <- getMappings(cdm = cdm_arrow,
+
+readMappings <- getMappings(cdm = cdm,
   candidateCodelist = dementiaCodes1,
   nonStandardVocabularies = "Read"
 )
-saveRDS(
-  readMappings,
-  here("vignettes", "introData05.RData")
-)
 
 
 # options vignette ------
-oaCodes1 <- getCandidateCodes(cdm = cdm_arrow,
+oaCodes1 <- getCandidateCodes(cdm = cdm,
   keywords = "osteoarthritis",
   domains = "Condition",
-  #searchViaSynonyms = FALSE,
-  #fuzzyMatch = FALSE,
   exclude = c(
     "post-infection",
     "post-traumatic"
@@ -122,17 +78,11 @@ oaCodes1 <- getCandidateCodes(cdm = cdm_arrow,
   includeAncestor = FALSE,
   #verbose = TRUE
 )
-saveRDS(
-  oaCodes1,
-  here("vignettes", "optionsData01.RData")
-)
 
 # include desc
-oaCodes2 <- getCandidateCodes(cdm = cdm_arrow,
+oaCodes2 <- getCandidateCodes(cdm = cdm,
   keywords = "osteoarthritis",
   domains = "Condition",
-  #searchViaSynonyms = FALSE,
-  #fuzzyMatch = FALSE,
   exclude = c(
     "post-infection",
     "post-traumatic"
@@ -140,18 +90,11 @@ oaCodes2 <- getCandidateCodes(cdm = cdm_arrow,
   includeDescendants = TRUE,
   includeAncestor = FALSE
 )
-saveRDS(
-  oaCodes2,
-  here("vignettes", "optionsData02.RData")
-)
 
 # include obs
-oaCodes3 <- getCandidateCodes(cdm = cdm_arrow,
+oaCodes3 <- getCandidateCodes(cdm = cdm,
   keywords = "osteoarthritis",
   domains = c("Condition", "Observation"),
-  #searchViaSynonyms = FALSE,
-  #fuzzyMatch = FALSE,
-  #maxDistanceCost = 0.1,
   exclude = c(
     "post-infection",
     "post-traumatic"
@@ -159,19 +102,12 @@ oaCodes3 <- getCandidateCodes(cdm = cdm_arrow,
   includeDescendants = FALSE,
   includeAncestor = FALSE
 )
-saveRDS(
-  oaCodes3,
-  here("vignettes", "optionsData03.RData")
-)
 
 # search syn
-oaCodes4 <- getCandidateCodes(cdm = cdm_arrow,
+oaCodes4 <- getCandidateCodes(cdm = cdm,
   keywords = "osteoarthritis",
   domains = "Condition",
   searchInSynonyms = TRUE,
-  #searchViaSynonyms = TRUE,
-  #fuzzyMatch = FALSE,
-  #maxDistanceCost = 0.1,
   exclude = c(
     "post-infection",
     "post-traumatic"
@@ -179,13 +115,10 @@ oaCodes4 <- getCandidateCodes(cdm = cdm_arrow,
   includeDescendants = FALSE,
   includeAncestor = FALSE
 )
-saveRDS(
-  oaCodes4,
-  here("vignettes", "optionsData04.RData")
-)
+
 
 # search source
-oaCodes5 <- getCandidateCodes(cdm = cdm_arrow,
+oaCodes5 <- getCandidateCodes(cdm = cdm,
   keywords = "osteoarthritis",
   domains = "Condition",
   searchNonStandard = TRUE,
@@ -196,18 +129,12 @@ oaCodes5 <- getCandidateCodes(cdm = cdm_arrow,
   includeDescendants = FALSE,
   includeAncestor = FALSE
 )
-saveRDS(
-  oaCodes5,
-  here("vignettes", "optionsData04.RData")
-)
+
 
 # fuzzy search
-oaCodes6 <- getCandidateCodes(cdm = cdm_arrow,
+oaCodes6 <- getCandidateCodes(cdm = cdm,
   keywords = "osteoarthritis",
   domains = "Condition",
-  #searchViaSynonyms = FALSE,
-  #fuzzyMatch = TRUE,
-  #maxDistanceCost = 0.1,
   exclude = c(
     "post-infection",
     "post-traumatic"
@@ -215,18 +142,12 @@ oaCodes6 <- getCandidateCodes(cdm = cdm_arrow,
   includeDescendants = FALSE,
   includeAncestor = FALSE
 )
-saveRDS(
-  oaCodes6,
-  here("vignettes", "optionsData05.RData")
-)
+
 
 # fuzzy search 0.2
-oaCodes7 <- getCandidateCodes(cdm = cdm_arrow,
+oaCodes7 <- getCandidateCodes(cdm = cdm,
   keywords = "osteoarthritis",
   domains = "Condition",
-  #searchViaSynonyms = FALSE,
-  #fuzzyMatch = TRUE,
-  #maxDistanceCost = 0.2,
   exclude = c(
     "post-infection",
     "post-traumatic"
@@ -234,18 +155,12 @@ oaCodes7 <- getCandidateCodes(cdm = cdm_arrow,
   includeDescendants = FALSE,
   includeAncestor = FALSE
 )
-saveRDS(
-  oaCodes7,
-  here("vignettes", "optionsData06.RData")
-)
+
 
 # include ancestor
-oaCodes8 <- getCandidateCodes(cdm = cdm_arrow,
+oaCodes8 <- getCandidateCodes(cdm = cdm,
   keywords = "osteoarthritis",
   domains = "Condition",
-  #searchViaSynonyms = FALSE,
-  #fuzzyMatch = FALSE,
-  #maxDistanceCost = 0.2,
   exclude = c(
     "post-infection",
     "post-traumatic"
@@ -253,64 +168,124 @@ oaCodes8 <- getCandidateCodes(cdm = cdm_arrow,
   includeDescendants = FALSE,
   includeAncestor = TRUE
 )
-saveRDS(
-  oaCodes8,
-  here("vignettes", "optionsData07.RData")
-)
 
 # medication vignette ------
-ac_codes_1 <- getCandidateCodes(cdm = cdm_arrow,
+ac_codes_1 <- getCandidateCodes(cdm = cdm,
                                      keywords="acetaminophen",
                                      domains="drug",
                                      standardConcept="standard",
                                      includeDescendants = TRUE)
-saveRDS(
-  ac_codes_1,
-  here("vignettes", "medData01.RData")
-)
 
-ac_codes_2a <- getCandidateCodes(cdm = cdm_arrow,
+ac_codes_2a <- getCandidateCodes(cdm = cdm,
                                  keywords= c("acetaminophen injection",
                                              "acetaminophen intravenous"),
                                  domains="drug",
                                  standardConcept="standard",
                                  includeDescendants = TRUE)
-saveRDS(
-  ac_codes_2a,
-  here("vignettes", "medData02a.RData")
-)
 
-ac_codes_2b <- getCandidateCodes(cdm = cdm_arrow,
+
+ac_codes_2b <- getCandidateCodes(cdm = cdm,
                                 keywords="acetaminophen",
                                 domains="drug",
                                 #doseForm = c("injection", "intravenous"),
                                 standardConcept="standard",
                                 includeDescendants = TRUE)
-saveRDS(
-  ac_codes_2b,
-  here("vignettes", "medData02b.RData")
-)
 
-ac_dose_forms <- CodelistGenerator::getDoseForm(cdm = cdm_arrow)
-saveRDS(
-  ac_dose_forms,
-  here("vignettes", "medDataDoseForms.RData"))
 
-c <- compareCodelists(ac_codes_2, ac_codes_2a)
-ac_codes_3 <- getCandidateCodes(cdm = cdm_arrow,
+ac_dose_forms <- CodelistGenerator::getDoseForm(cdm = cdm)
+
+
+c <- compareCodelists(ac_codes_2a, ac_codes_2b)
+ac_codes_3 <- getCandidateCodes(cdm = cdm,
                                 keywords="acetaminophen",
                                 domains="drug",
                                 #conceptClassId = c("Quant Clinical Drug"),
                                 #doseForm = c("injection", "intravenous"),
                                 standardConcept="standard",
                                 includeDescendants = TRUE)
+
+ac_concept_class <- CodelistGenerator::getConceptClassId(cdm = cdm,
+                                                         domain = "drug")
+
+
+# save -----
+save(
+  vocabVersion,
+  file = here("inst", "introVocab.RData")
+)
+saveRDS(
+  codesFromDescendants,
+  here("inst", "introData01.RData")
+)
+saveRDS(
+  dementiaCodes1,
+  here("inst", "introData02.RData")
+)
+saveRDS(
+  codeComparison,
+  here("inst", "introData03.RData")
+)
+saveRDS(
+  icdMappings,
+  here("inst", "introData04.RData")
+)
+saveRDS(
+  readMappings,
+  here("inst", "introData05.RData")
+)
+saveRDS(
+  oaCodes1,
+  here("inst", "optionsData01.RData")
+)
+saveRDS(
+  oaCodes2,
+  here("inst", "optionsData02.RData")
+)
+saveRDS(
+  oaCodes3,
+  here("inst", "optionsData03.RData")
+)
+saveRDS(
+  oaCodes4,
+  here("inst", "optionsData04.RData")
+)
+saveRDS(
+  oaCodes5,
+  here("inst", "optionsData04.RData")
+)
+saveRDS(
+  oaCodes6,
+  here("inst", "optionsData05.RData")
+)
+saveRDS(
+  oaCodes7,
+  here("inst", "optionsData06.RData")
+)
+saveRDS(
+  oaCodes8,
+  here("inst", "optionsData07.RData")
+)
+saveRDS(
+  ac_codes_1,
+  here("inst", "medData01.RData")
+)
+saveRDS(
+  ac_codes_2a,
+  here("inst", "medData02a.RData")
+)
+saveRDS(
+  ac_codes_2b,
+  here("inst", "medData02b.RData")
+)
+saveRDS(
+  ac_dose_forms,
+  here("inst", "medDataDoseForms.RData")
+)
 saveRDS(
   ac_codes_3,
-  here("vignettes", "medData03.RData")
+  here("inst", "medData03.RData")
 )
-
-ac_concept_class <- CodelistGenerator::getConceptClassId(cdm = cdm_arrow,
-                                                         domain = "drug")
 saveRDS(
   ac_concept_class,
-  here("vignettes", "medDataConceptClass.RData"))
+  here("inst", "medDataConceptClass.RData")
+)
diff --git a/vignettes/introData01.RData b/inst/introData01.RData
similarity index 100%
rename from vignettes/introData01.RData
rename to inst/introData01.RData
diff --git a/inst/introData02.RData b/inst/introData02.RData
new file mode 100644
index 0000000..61e1598
Binary files /dev/null and b/inst/introData02.RData differ
diff --git a/inst/introData03.RData b/inst/introData03.RData
new file mode 100644
index 0000000..1269b2f
Binary files /dev/null and b/inst/introData03.RData differ
diff --git a/vignettes/introData04.RData b/inst/introData04.RData
similarity index 100%
rename from vignettes/introData04.RData
rename to inst/introData04.RData
diff --git a/vignettes/introData05.RData b/inst/introData05.RData
similarity index 100%
rename from vignettes/introData05.RData
rename to inst/introData05.RData
diff --git a/vignettes/introVocab.RData b/inst/introVocab.RData
similarity index 59%
rename from vignettes/introVocab.RData
rename to inst/introVocab.RData
index f9d94a4..7ac3cda 100644
Binary files a/vignettes/introVocab.RData and b/inst/introVocab.RData differ
diff --git a/inst/medData01.RData b/inst/medData01.RData
new file mode 100644
index 0000000..f9b1e0f
Binary files /dev/null and b/inst/medData01.RData differ
diff --git a/inst/medData02a.RData b/inst/medData02a.RData
new file mode 100644
index 0000000..1814913
Binary files /dev/null and b/inst/medData02a.RData differ
diff --git a/inst/medData02b.RData b/inst/medData02b.RData
new file mode 100644
index 0000000..f9b1e0f
Binary files /dev/null and b/inst/medData02b.RData differ
diff --git a/inst/medData03.RData b/inst/medData03.RData
new file mode 100644
index 0000000..f9b1e0f
Binary files /dev/null and b/inst/medData03.RData differ
diff --git a/vignettes/medDataConceptClass.RData b/inst/medDataConceptClass.RData
similarity index 100%
rename from vignettes/medDataConceptClass.RData
rename to inst/medDataConceptClass.RData
diff --git a/vignettes/medDataDoseForms.RData b/inst/medDataDoseForms.RData
similarity index 100%
rename from vignettes/medDataDoseForms.RData
rename to inst/medDataDoseForms.RData
diff --git a/inst/optionsData01.RData b/inst/optionsData01.RData
new file mode 100644
index 0000000..984cff3
Binary files /dev/null and b/inst/optionsData01.RData differ
diff --git a/inst/optionsData02.RData b/inst/optionsData02.RData
new file mode 100644
index 0000000..18a7c27
Binary files /dev/null and b/inst/optionsData02.RData differ
diff --git a/inst/optionsData03.RData b/inst/optionsData03.RData
new file mode 100644
index 0000000..daf608b
Binary files /dev/null and b/inst/optionsData03.RData differ
diff --git a/inst/optionsData04.RData b/inst/optionsData04.RData
new file mode 100644
index 0000000..674a4b6
Binary files /dev/null and b/inst/optionsData04.RData differ
diff --git a/inst/optionsData05.RData b/inst/optionsData05.RData
new file mode 100644
index 0000000..984cff3
Binary files /dev/null and b/inst/optionsData05.RData differ
diff --git a/inst/optionsData06.RData b/inst/optionsData06.RData
new file mode 100644
index 0000000..984cff3
Binary files /dev/null and b/inst/optionsData06.RData differ
diff --git a/inst/optionsData07.RData b/inst/optionsData07.RData
new file mode 100644
index 0000000..cff9533
Binary files /dev/null and b/inst/optionsData07.RData differ
diff --git a/man/CodelistGenerator-package.Rd b/man/CodelistGenerator-package.Rd
index eee3208..233ee7b 100644
--- a/man/CodelistGenerator-package.Rd
+++ b/man/CodelistGenerator-package.Rd
@@ -4,7 +4,7 @@
 \name{CodelistGenerator-package}
 \alias{CodelistGenerator}
 \alias{CodelistGenerator-package}
-\title{CodelistGenerator: Generate Code Lists for the OMOP Common Data Model}
+\title{CodelistGenerator: Identify Relevant Clinical Codes and Evaluate Their Use}
 \description{
 Generate a candidate code list for the Observational Medical Outcomes Partnership (OMOP) common data model based on string matching. For a given search strategy, a candidate code list will be returned.
 }
diff --git a/man/codesInUse.Rd b/man/codesInUse.Rd
new file mode 100644
index 0000000..ebc1aa4
--- /dev/null
+++ b/man/codesInUse.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/codesInUse.R
+\name{codesInUse}
+\alias{codesInUse}
+\title{Get codes used in the database}
+\usage{
+codesInUse(
+  cdm,
+  table = c("condition_occurrence", "device_exposure", "drug_exposure", "measurement",
+    "observation", "procedure_occurrence", "visit_occurrence")
+)
+}
+\arguments{
+\item{cdm}{cdm_reference via CDMConnector}
+
+\item{table}{cdm table}
+}
+\description{
+Get codes used in the database
+}
diff --git a/man/findOrphanCodes.Rd b/man/findOrphanCodes.Rd
new file mode 100644
index 0000000..8c4ab21
--- /dev/null
+++ b/man/findOrphanCodes.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/findOrphanCodes.R
+\name{findOrphanCodes}
+\alias{findOrphanCodes}
+\title{Find orphan codes related to a codelist}
+\usage{
+findOrphanCodes(
+  x,
+  cdm,
+  domains = "Condition",
+  standardConcept = "Standard",
+  searchInSynonyms = TRUE,
+  searchNonStandard = TRUE,
+  includeDescendants = TRUE,
+  includeAncestor = TRUE,
+  minCellCount = 5
+)
+}
+\arguments{
+\item{x}{Codes for which to find codes related but not included (orphan
+codes)}
+
+\item{cdm}{cdm_reference via CDMConnector}
+
+\item{domains}{Character vector with one or more of the OMOP CDM domain.}
+
+\item{standardConcept}{Character vector with one or more of "Standard",
+"Classification", and "Non-standard". These correspond to the flags used
+for the standard_concept field in the concept table of the cdm.}
+
+\item{searchInSynonyms}{Either TRUE or FALSE. If TRUE the code will also
+search using both the primary name in the concept table and synonyms from
+the concept synonym table.}
+
+\item{searchNonStandard}{Either TRUE or FALSE. If TRUE the code will also
+search via non-standard concepts.}
+
+\item{includeDescendants}{Either TRUE or FALSE.
+If TRUE descendant concepts of identified concepts
+will be included in the candidate codelist.}
+
+\item{includeAncestor}{Either TRUE or FALSE.
+If TRUE the direct ancestor concepts of identified concepts
+will be included in the candidate codelist.}
+
+\item{minCellCount}{The minimum number of counts to reported, below which
+results will be suppressed. If 0, all results will be reported.}
+}
+\value{
+A codelist containing code related to (but not in) the target
+codelist that are present used in the cdm
+}
+\description{
+Find orphan codes related to a codelist
+}
diff --git a/man/mockVocabRef.Rd b/man/mockVocabRef.Rd
index e932b40..dd7b121 100644
--- a/man/mockVocabRef.Rd
+++ b/man/mockVocabRef.Rd
@@ -7,7 +7,7 @@
 mockVocabRef(backend = "database")
 }
 \arguments{
-\item{backend}{'database' (duckdb), 'arrow' (parquet files), or 'data_frame'}
+\item{backend}{'database' (duckdb) or 'data_frame'}
 }
 \value{
 cdm reference with mock vocabulary
diff --git a/man/restrictToCodesInUse.Rd b/man/restrictToCodesInUse.Rd
new file mode 100644
index 0000000..79f6edb
--- /dev/null
+++ b/man/restrictToCodesInUse.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/codesInUse.R
+\name{restrictToCodesInUse}
+\alias{restrictToCodesInUse}
+\title{Filter a codelist to keep only the codes used in the database}
+\usage{
+restrictToCodesInUse(
+  x,
+  cdm,
+  table = c("condition_occurrence", "device_exposure", "drug_exposure", "measurement",
+    "observation", "procedure_occurrence", "visit_occurrence")
+)
+}
+\arguments{
+\item{x}{A codelist}
+
+\item{cdm}{cdm_reference via CDMConnector}
+
+\item{table}{cdm table}
+}
+\description{
+Filter a codelist to keep only the codes used in the database
+}
diff --git a/man/sourceCodesInUse.Rd b/man/sourceCodesInUse.Rd
new file mode 100644
index 0000000..2e3b1e6
--- /dev/null
+++ b/man/sourceCodesInUse.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/codesInUse.R
+\name{sourceCodesInUse}
+\alias{sourceCodesInUse}
+\title{Get source codes used in the database}
+\usage{
+sourceCodesInUse(
+  cdm,
+  table = c("condition_occurrence", "device_exposure", "drug_exposure", "measurement",
+    "observation", "procedure_occurrence", "visit_occurrence")
+)
+}
+\arguments{
+\item{cdm}{cdm_reference via CDMConnector}
+
+\item{table}{cdm table}
+}
+\description{
+Get source codes used in the database
+}
diff --git a/tests/testthat/test-achillesCodeUse.R b/tests/testthat/test-achillesCodeUse.R
index ca51696..0457f14 100644
--- a/tests/testthat/test-achillesCodeUse.R
+++ b/tests/testthat/test-achillesCodeUse.R
@@ -1,99 +1,29 @@
 test_that("achilles code use", {
 
-  testthat::skip_if(Sys.getenv("CDM5_REDSHIFT_DBNAME") == "")
+  # mock db
+  cdm <- mockVocabRef("database")
 
-  db <-  DBI::dbConnect(RPostgres::Redshift(),
-                        dbname   = Sys.getenv("CDM5_REDSHIFT_DBNAME"),
-                        host     = Sys.getenv("CDM5_REDSHIFT_HOST"),
-                        port     = Sys.getenv("CDM5_REDSHIFT_PORT"),
-                        user     = Sys.getenv("CDM5_REDSHIFT_USER"),
-                        password = Sys.getenv("CDM5_REDSHIFT_PASSWORD"))
+  oa <- getCandidateCodes(cdm = cdm, keywords = "osteoarthritis")
+  # two codes: "Osteoarthritis of knee" "Osteoarthritis of hip"
+  # in achilles we only have a count for "Osteoarthritis of knee"
+  result_achilles <- achillesCodeUse(list(oa = oa$concept_id),
+                                     cdm = cdm)
+  expect_true(result_achilles %>%
+    dplyr::pull("estimate") == 100)
 
-  cdm <- CDMConnector::cdm_from_con(con = db,
-                                    cdm_schema = Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA"),
-                                    write_schema = Sys.getenv("CDM5_REDSHIFT_SCRATCH_SCHEMA"))
+  # applying min cell count where estimate should be obscured
+  result_achilles <- achillesCodeUse(list(oa = oa$concept_id),
+                                     cdm = cdm,
+                                     minCellCount = 150)
+  expect_true(is.na(result_achilles %>%
+                dplyr::pull("estimate")))
 
-  cdm$achilles_results <- cdm$condition_occurrence %>%
-    dplyr::group_by(condition_concept_id) %>%
-    dplyr::tally(name = "count_value") %>%
-    dplyr::rename("stratum_1" = "condition_concept_id") %>%
-    dplyr::mutate(stratum_2 = NA,
-                  stratum_3 = NA,
-                  analysis_id = 401) %>%
-    CDMConnector::computeQuery()
-
-  asthma <- list(asthma = c(317009, 257581))
-  result_achilles <- achillesCodeUse(asthma,
-                  cdm = cdm)
-  result_cdm <- summariseCodeUse(asthma, cdm = cdm)
-
- expect_equal(result_achilles %>%
-    dplyr::filter(standard_concept_id == 317009,
-                  group_name == "By concept",
-                  variable_name == "Record count") %>%
-    dplyr::pull("estimate"),
-  result_cdm %>%
-    dplyr::filter(standard_concept_id == 317009,
-                  group_name == "By concept",
-                  variable_name == "Record count") %>%
-    dplyr::pull("estimate"))
-
- expect_equal(result_achilles %>%
-                dplyr::filter(standard_concept_id == 257581,
-                              group_name == "By concept",
-                              variable_name == "Record count") %>%
-                dplyr::pull("estimate"),
-              result_cdm %>%
-                dplyr::filter(standard_concept_id == 257581,
-                              group_name == "By concept",
-                              variable_name == "Record count") %>%
-                dplyr::pull("estimate"))
-
-
- cdm$achilles_results <- cdm$condition_occurrence %>%
-   dplyr::group_by(person_id, condition_concept_id) %>%
-   dplyr::tally() %>%
-   dplyr::ungroup() %>%
-   dplyr::group_by(condition_concept_id) %>%
-   dplyr::tally(name = "count_value") %>%
-   dplyr::rename("stratum_1" = "condition_concept_id") %>%
-   dplyr::mutate(stratum_2 = NA,
-                 stratum_3 = NA,
-                 analysis_id = 400) %>%
-   CDMConnector::computeQuery()
-
- asthma <- list(asthma = c(317009, 257581))
- result_achilles <- achillesCodeUse(asthma,
-                                    cdm = cdm)
- result_cdm <- summariseCodeUse(asthma, cdm = cdm)
-
-
- expect_equal(result_achilles %>%
-                dplyr::filter(standard_concept_id == 317009,
-                              group_name == "By concept",
-                              variable_name == "Person count") %>%
-                dplyr::pull("estimate"),
-              result_cdm %>%
-                dplyr::filter(standard_concept_id == 317009,
-                              group_name == "By concept",
-                              variable_name == "Person count") %>%
-                dplyr::pull("estimate"))
-
- expect_equal(result_achilles %>%
-                dplyr::filter(standard_concept_id == 257581,
-                              group_name == "By concept",
-                              variable_name == "Person count") %>%
-                dplyr::pull("estimate"),
-              result_cdm %>%
-                dplyr::filter(standard_concept_id == 257581,
-                              group_name == "By concept",
-                              variable_name == "Person count") %>%
-                dplyr::pull("estimate"))
 
  # edge cases
  # concept id not in achilles
- expect_message(achillesCodeUse(list(asthma = 123),
+ expect_message(result_achilles <- achillesCodeUse(list(asthma = 123),
                                     cdm = cdm))
+ expect_true(nrow(result_achilles) == 0)
 
  # expected errors
  expect_error(achillesCodeUse(123, #not a named list
diff --git a/tests/testthat/test-codesFrom.R b/tests/testthat/test-codesFrom.R
index ff4a18e..32fe83c 100644
--- a/tests/testthat/test-codesFrom.R
+++ b/tests/testthat/test-codesFrom.R
@@ -1,5 +1,5 @@
 test_that("test inputs - mock", {
-  backends <- c("database", "arrow", "data_frame")
+  backends <- c("database", "data_frame")
 
   for (i in seq_along(backends)) {
     # mock db
diff --git a/tests/testthat/test-codesInUse.R b/tests/testthat/test-codesInUse.R
new file mode 100644
index 0000000..7d57070
--- /dev/null
+++ b/tests/testthat/test-codesInUse.R
@@ -0,0 +1,76 @@
+test_that("tests with mock db", {
+
+  # mock db
+  cdm <- mockVocabRef("database")
+
+  codes <- getCandidateCodes(
+    cdm = cdm,
+    keywords = "arthritis",
+    domains = "Condition",
+    includeDescendants = FALSE
+  )
+  expect_true(4 %in%
+                restrictToCodesInUse(list("cs" = codes$concept_id),
+                       cdm = cdm))
+
+  expect_true(length(restrictToCodesInUse(list("cs1" = codes$concept_id,
+                       "cs2" = 999),
+                       cdm = cdm)) == 1) # will just have cs1
+
+  # no codes in db
+  codes <- getCandidateCodes(
+    cdm = cdm,
+    keywords = "hip osteoarthritis",
+    domains = "Condition",
+    includeDescendants = FALSE
+  )
+ expect_message(restrictToCodesInUse(list("cs" = codes$concept_id),
+                       cdm = cdm))
+
+  DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+
+})
+
+test_that("sql server with achilles", {
+
+  testthat::skip_if(Sys.getenv("CDM5_SQL_SERVER_SERVER") == "")
+  testthat::skip_if(Sys.getenv("SQL_SERVER_DRIVER") == "")
+  testthat::skip_if(packageVersion("CDMConnector") <= "1.2.0")
+
+  db <- DBI::dbConnect(odbc::odbc(),
+                       Driver   = Sys.getenv("SQL_SERVER_DRIVER"),
+                       Server   = Sys.getenv("CDM5_SQL_SERVER_SERVER"),
+                       Database = Sys.getenv("CDM5_SQL_SERVER_CDM_DATABASE"),
+                       UID      = Sys.getenv("CDM5_SQL_SERVER_USER"),
+                       PWD      = Sys.getenv("CDM5_SQL_SERVER_PASSWORD"),
+                       TrustServerCertificate="yes",
+                       Port     = Sys.getenv("CDM5_SQL_SERVER_PORT"))
+  cdm <- CDMConnector::cdm_from_con(db,
+                                    cdm_schema = c("CDMV54", "dbo"),
+                                    achilles_schema = c("CDMV54", "dbo"),
+                                    write_schema = c("ohdsi", "dbo"))
+
+
+
+  asthma_codes <- getCandidateCodes(
+    cdm = cdm,
+    keywords = "asthma",
+    domains = c("Condition"),
+    includeDescendants = TRUE
+  )
+  asthma_cl <- list("cs" = asthma_codes$concept_id)
+
+  asthma_codes_present <- restrictToCodesInUse(x = asthma_cl,
+                                               cdm = cdm)
+
+expect_equal(sort(asthma_codes_present[[1]]),
+             sort(cdm$condition_occurrence %>%
+   dplyr::filter(.data$condition_concept_id %in%
+                   !!asthma_codes$concept_id) %>%
+   dplyr::select("condition_concept_id") %>%
+   dplyr::distinct() %>%
+   dplyr::pull()))
+
+
+  CDMConnector::cdm_disconnect(cdm)
+})
diff --git a/tests/testthat/test-dbms.R b/tests/testthat/test-dbms.R
new file mode 100644
index 0000000..971e09b
--- /dev/null
+++ b/tests/testthat/test-dbms.R
@@ -0,0 +1,115 @@
+# Testing against different database platforms
+
+
+test_that("achilles code use", {
+
+  testthat::skip_if(Sys.getenv("CDM5_REDSHIFT_DBNAME") == "")
+
+  db <-  DBI::dbConnect(RPostgres::Redshift(),
+                        dbname   = Sys.getenv("CDM5_REDSHIFT_DBNAME"),
+                        host     = Sys.getenv("CDM5_REDSHIFT_HOST"),
+                        port     = Sys.getenv("CDM5_REDSHIFT_PORT"),
+                        user     = Sys.getenv("CDM5_REDSHIFT_USER"),
+                        password = Sys.getenv("CDM5_REDSHIFT_PASSWORD"))
+
+  cdm <- CDMConnector::cdm_from_con(con = db,
+                                    cdm_schema = Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA"),
+                                    write_schema = Sys.getenv("CDM5_REDSHIFT_SCRATCH_SCHEMA"))
+
+  cdm$achilles_results <- cdm$condition_occurrence %>%
+    dplyr::group_by(condition_concept_id) %>%
+    dplyr::tally(name = "count_value") %>%
+    dplyr::rename("stratum_1" = "condition_concept_id") %>%
+    dplyr::mutate(stratum_2 = NA,
+                  stratum_3 = NA,
+                  analysis_id = 401) %>%
+    CDMConnector::computeQuery()
+
+  asthma <- list(asthma = c(317009, 257581))
+  result_achilles <- achillesCodeUse(asthma,
+                                     cdm = cdm)
+  result_cdm <- summariseCodeUse(asthma, cdm = cdm)
+
+  expect_equal(result_achilles %>%
+                 dplyr::filter(standard_concept_id == 317009,
+                               group_name == "By concept",
+                               variable_name == "Record count") %>%
+                 dplyr::pull("estimate"),
+               result_cdm %>%
+                 dplyr::filter(standard_concept_id == 317009,
+                               group_name == "By concept",
+                               variable_name == "Record count") %>%
+                 dplyr::pull("estimate"))
+
+  expect_equal(result_achilles %>%
+                 dplyr::filter(standard_concept_id == 257581,
+                               group_name == "By concept",
+                               variable_name == "Record count") %>%
+                 dplyr::pull("estimate"),
+               result_cdm %>%
+                 dplyr::filter(standard_concept_id == 257581,
+                               group_name == "By concept",
+                               variable_name == "Record count") %>%
+                 dplyr::pull("estimate"))
+
+
+  cdm$achilles_results <- cdm$condition_occurrence %>%
+    dplyr::group_by(person_id, condition_concept_id) %>%
+    dplyr::tally() %>%
+    dplyr::ungroup() %>%
+    dplyr::group_by(condition_concept_id) %>%
+    dplyr::tally(name = "count_value") %>%
+    dplyr::rename("stratum_1" = "condition_concept_id") %>%
+    dplyr::mutate(stratum_2 = NA,
+                  stratum_3 = NA,
+                  analysis_id = 400) %>%
+    CDMConnector::computeQuery()
+
+  asthma <- list(asthma = c(317009, 257581))
+  result_achilles <- achillesCodeUse(asthma,
+                                     cdm = cdm)
+  result_cdm <- summariseCodeUse(asthma, cdm = cdm)
+
+
+  expect_equal(result_achilles %>%
+                 dplyr::filter(standard_concept_id == 317009,
+                               group_name == "By concept",
+                               variable_name == "Person count") %>%
+                 dplyr::pull("estimate"),
+               result_cdm %>%
+                 dplyr::filter(standard_concept_id == 317009,
+                               group_name == "By concept",
+                               variable_name == "Person count") %>%
+                 dplyr::pull("estimate"))
+
+  expect_equal(result_achilles %>%
+                 dplyr::filter(standard_concept_id == 257581,
+                               group_name == "By concept",
+                               variable_name == "Person count") %>%
+                 dplyr::pull("estimate"),
+               result_cdm %>%
+                 dplyr::filter(standard_concept_id == 257581,
+                               group_name == "By concept",
+                               variable_name == "Person count") %>%
+                 dplyr::pull("estimate"))
+
+  # edge cases
+  # concept id not in achilles
+  expect_message(achillesCodeUse(list(asthma = 123),
+                                 cdm = cdm))
+
+  # expected errors
+  expect_error(achillesCodeUse(123, #not a named list
+                               cdm = cdm))
+  expect_error(achillesCodeUse(asthma,
+                               cdm = "cdm")) # not a cdm
+  expect_error(achillesCodeUse(asthma,
+                               cdm = cdm,
+                               countBy = "not an option"))
+  expect_error(achillesCodeUse(asthma,
+                               cdm = cdm,
+                               minCellCount = "not a number"))
+
+  CDMConnector::cdm_disconnect(cdm)
+})
+
diff --git a/tests/testthat/test-drugCodes.R b/tests/testthat/test-drugCodes.R
index f63ee13..338e6aa 100644
--- a/tests/testthat/test-drugCodes.R
+++ b/tests/testthat/test-drugCodes.R
@@ -1,6 +1,6 @@
 test_that("getATCCodes working", {
 
-  backends <- c("database", "arrow", "data_frame")
+  backends <- c("database", "data_frame")
   for (i in seq_along(backends)) {
   cdm <- mockVocabRef(backend = backends[i])
   atcCodes <- getATCCodes(cdm, level = "ATC 1st")
@@ -28,7 +28,7 @@ test_that("getATCCodes working", {
 
 test_that("getATCCodes expected errors", {
 
-  backends <- c("database", "arrow", "data_frame")
+  backends <- c("database", "data_frame")
   for (i in seq_along(backends)) {
     cdm <- mockVocabRef(backend = backends[i])
     expect_error(getATCCodes(cdm, level = "Not an ATC level"))
@@ -44,7 +44,7 @@ test_that("getATCCodes expected errors", {
 
 test_that("getDrugIngredientCodes working", {
 
-  backends <- c("database", "arrow", "data_frame")
+  backends <- c("database", "data_frame")
   for (i in seq_along(backends)) {
     cdm <- mockVocabRef(backend = backends[i])
     ing_codes <- getDrugIngredientCodes(cdm)
@@ -106,7 +106,7 @@ test_that("getDrugIngredientCodes working", {
 
 test_that("getDrugIngredientCodes expected errors", {
 
-  backends <- c("database", "arrow", "data_frame")
+  backends <- c("database","data_frame")
   for (i in seq_along(backends)) {
     cdm <- mockVocabRef(backend = backends[i])
     expect_error(getDrugIngredientCodes(cdm, name = "Not an Ingredient"))
diff --git a/tests/testthat/test-findOrphanCodes.R b/tests/testthat/test-findOrphanCodes.R
new file mode 100644
index 0000000..fc4722c
--- /dev/null
+++ b/tests/testthat/test-findOrphanCodes.R
@@ -0,0 +1,67 @@
+test_that("tests with mock db", {
+
+    # mock db
+    cdm <- mockVocabRef("database")
+
+    codes <- getCandidateCodes(
+      cdm = cdm,
+      keywords = "Musculoskeletal disorder",
+      domains = "Condition",
+      includeDescendants = FALSE
+    )
+
+   orphan_codes <- findOrphanCodes(x = list("msk" = codes$concept_id),
+                    cdm = cdm,
+                    domains = "Condition",
+                    standardConcept = "Standard",
+                    searchInSynonyms = FALSE,
+                    searchNonStandard = FALSE,
+                    includeDescendants = TRUE,
+                    includeAncestor = FALSE)
+
+   # we should pick up knee osteoarthritis from our achilles tables
+   expect_true(orphan_codes %>%
+     dplyr::pull("standard_concept_id") == 4)
+   expect_true(orphan_codes %>%
+                       dplyr::pull("estimate") == 100)
+
+   orphan_codes <- findOrphanCodes(x = list("msk" = codes$concept_id),
+                                   cdm = cdm,
+                                   domains = "Condition",
+                                   standardConcept = "Standard",
+                                   searchInSynonyms = FALSE,
+                                   searchNonStandard = FALSE,
+                                   includeDescendants = FALSE,
+                                   includeAncestor = FALSE)
+   # we will not find records now we're not looking in descendants
+   expect_true(nrow(orphan_codes) == 0)
+
+
+   # we shouldn't have our original codes
+   expect_true(nrow(findOrphanCodes(x = list("knee_oa" = 4),
+                   cdm = cdm,
+                   domains = "Condition",
+                   standardConcept = "Standard",
+                   searchInSynonyms = FALSE,
+                   searchNonStandard = FALSE,
+                   includeDescendants = FALSE,
+                   includeAncestor = FALSE)) == 0)
+
+   # min cell count
+   orphan_codes <- findOrphanCodes(x = list("msk" = codes$concept_id),
+                                   cdm = cdm,
+                                   domains = "Condition",
+                                   standardConcept = "Standard",
+                                   searchInSynonyms = FALSE,
+                                   searchNonStandard = FALSE,
+                                   includeDescendants = TRUE,
+                                   includeAncestor = FALSE,
+                                   minCellCount = 150)
+   expect_true(orphan_codes %>%
+                 dplyr::pull("standard_concept_id") == 4)
+   expect_true(is.na(orphan_codes %>%
+     dplyr::pull("estimate")))
+
+ DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
+
+})
diff --git a/tests/testthat/test-findUnmappedCodes.R b/tests/testthat/test-findUnmappedCodes.R
new file mode 100644
index 0000000..b74a648
--- /dev/null
+++ b/tests/testthat/test-findUnmappedCodes.R
@@ -0,0 +1,42 @@
+test_that("achilles code use", {
+
+  testthat::skip_if(Sys.getenv("CDM5_REDSHIFT_DBNAME") == "")
+
+  db <-  DBI::dbConnect(RPostgres::Redshift(),
+                        dbname   = Sys.getenv("CDM5_REDSHIFT_DBNAME"),
+                        host     = Sys.getenv("CDM5_REDSHIFT_HOST"),
+                        port     = Sys.getenv("CDM5_REDSHIFT_PORT"),
+                        user     = Sys.getenv("CDM5_REDSHIFT_USER"),
+                        password = Sys.getenv("CDM5_REDSHIFT_PASSWORD"))
+
+  cdm <- CDMConnector::cdm_from_con(con = db,
+                                    cdm_schema = Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA"),
+                                    write_schema = Sys.getenv("CDM5_REDSHIFT_SCRATCH_SCHEMA"))
+
+
+  x <- "Nonallopathic lesions"
+
+  candidateCodes <- getCandidateCodes(
+    cdm = cdm,
+    keywords = "Nonallopathic lesions",
+    domains = "Condition",
+    standardConcept = "Non-standard",
+    searchInSynonyms = FALSE,
+    searchNonStandard = FALSE,
+    includeDescendants = FALSE,
+    includeAncestor = FALSE)
+
+  # source codes used in the database
+  dbCandidateCodes <- intersect(unmappedSourceCodesInUse(cdm = cdm),
+                                candidateCodes$concept_id)
+
+
+  a<- candidateCodes
+
+  a<-cdm$condition_occurrence %>%
+    dplyr::filter(condition_concept_id == 0) %>%
+    dplyr::collect()
+
+
+  CDMConnector::cdm_disconnect(cdm)
+})
diff --git a/tests/testthat/test-getCandidateCodes.R b/tests/testthat/test-getCandidateCodes.R
index f16acaf..98639b8 100644
--- a/tests/testthat/test-getCandidateCodes.R
+++ b/tests/testthat/test-getCandidateCodes.R
@@ -1,5 +1,6 @@
 test_that("tests with mock db", {
-  backends <- c("database", "arrow", "data_frame")
+  backends <- c("database",
+                "data_frame")
 
   for (i in seq_along(backends)) {
     # mock db
@@ -31,9 +32,13 @@ test_that("tests with mock db", {
       domains = "Condition",
       includeDescendants = FALSE
     )
-    expect_true((nrow(codes) == 2 &
-      codes$concept_name[1] == "Osteoarthritis of knee" &
-      codes$concept_name[2] == "Osteoarthritis of hip"))
+    expect_true(nrow(codes) == 2)
+    expect_true("Osteoarthritis of knee" %in%
+      (codes %>%
+      dplyr::pull("concept_name")))
+    expect_true("Osteoarthritis of hip" %in%
+                  (codes %>%
+                     dplyr::pull("concept_name")))
 
     # test include descendants
     codes <- getCandidateCodes(
@@ -256,7 +261,8 @@ test_that("tests with mock db", {
 })
 
 test_that("tests with mock db - multiple domains", {
-  backends <- c("database")
+  backends <- c("database",
+                "data_frame")
 
   for (i in seq_along(backends)) {
     # mock db
@@ -276,12 +282,13 @@ test_that("tests with mock db - multiple domains", {
 
     codes <- getCandidateCodes(
       cdm = cdm,
-      keywords = "H/O osteoarthritis",
+      keywords = "h o osteoarthritis",
       domains = c("Condition", "Observation"),
       includeDescendants = FALSE
     )
-    expect_true(all(nrow(codes) == 1 &
-      codes$concept_id == 9))
+    expect_true(all(
+      nrow(codes) == 3 &
+      c(4,5,9) %in% codes$concept_id))
 
     if (backends[[i]] == "database") {
       DBI::dbDisconnect(attr(cdm, "dbcon"), shutdown = TRUE)
diff --git a/tests/testthat/test-getICD10StandardCodes.R b/tests/testthat/test-getICD10StandardCodes.R
index 2392f4a..d2881c6 100644
--- a/tests/testthat/test-getICD10StandardCodes.R
+++ b/tests/testthat/test-getICD10StandardCodes.R
@@ -1,5 +1,5 @@
 test_that("db without icd10 codes loaded", {
-  backends <- c("database", "arrow", "data_frame")
+  backends <- c("database", "data_frame")
   for (i in seq_along(backends)) {
     cdm <- mockVocabRef(backend = backends[i])
     codes <- getICD10StandardCodes(cdm = cdm,
diff --git a/tests/testthat/test-mockVocabRef.R b/tests/testthat/test-mockVocabRef.R
index 80497a1..93ce18f 100644
--- a/tests/testthat/test-mockVocabRef.R
+++ b/tests/testthat/test-mockVocabRef.R
@@ -1,18 +1,13 @@
 test_that("mock vocab db", {
   cdmDb <- mockVocabRef("database")
   cdmDF <- mockVocabRef("data_frame")
-  cdmArrow <- mockVocabRef("arrow")
 
   conceptFromDb <- cdmDb$concept %>% dplyr::collect()
   conceptFromDf <- cdmDF$concept %>% dplyr::collect()
-  conceptFromArrow <- cdmArrow$concept %>% dplyr::collect()
 
   expect_equal(conceptFromDb,
                     conceptFromDf,
                     ignore_attr = TRUE)
-  expect_equal(conceptFromDb,
-               conceptFromArrow,
-               ignore_attr = TRUE)
 
   DBI::dbDisconnect(attr(cdmDb, "dbcon"), shutdown = TRUE)
 })
diff --git a/tests/testthat/test-vocabUtilities.R b/tests/testthat/test-vocabUtilities.R
index 7a3a42f..b18d7e7 100644
--- a/tests/testthat/test-vocabUtilities.R
+++ b/tests/testthat/test-vocabUtilities.R
@@ -1,5 +1,5 @@
 test_that("tests with mock db", {
-  backends <- c("database", "arrow", "data_frame")
+  backends <- c("database", "data_frame")
 
   for (i in seq_along(backends)) {
     # mock db
diff --git a/vignettes/a01_Introduction_to_CodelistGenerator.Rmd b/vignettes/a01_Introduction_to_CodelistGenerator.Rmd
index 797cd04..7ccd525 100644
--- a/vignettes/a01_Introduction_to_CodelistGenerator.Rmd
+++ b/vignettes/a01_Introduction_to_CodelistGenerator.Rmd
@@ -20,14 +20,8 @@ knitr::opts_chunk$set(
 For this example we are going to generate a candidate codelist for dementia, only looking for codes in the condition domain. Let's first load some libraries
 
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-library(here)
-library(readr)
 library(DBI)
-library(here)
 library(dplyr)
-library(stringr)
-library(DT)
-library(kableExtra)
 library(CodelistGenerator)
 library(CDMConnector)
 ```
@@ -60,7 +54,8 @@ getVocabVersion(cdm = cdm)
 ```
 
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-vocabVersion <- load(here("vignettes", "introVocab.RData"))
+vocabVersion <- load(system.file("introVocab.RData", 
+                 package = "CodelistGenerator"))
 vocabVersion
 ```
 
@@ -93,17 +88,13 @@ codesFromDescendants <- tbl(
 ```
 
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-codesFromDescendants <- readRDS(here("vignettes", "introData01.RData"))
+codesFromDescendants <- readRDS(system.file("introData01.RData", 
+                                            package = "CodelistGenerator"))
 ```
 
 ```{r,  message=FALSE, warning=FALSE }  
-datatable(codesFromDescendants,
-  rownames = FALSE,
-  options = list(
-    pageLength = 10,
-    lengthMenu = c(10, 20, 50)
-  )
-)
+codesFromDescendants %>% 
+  glimpse()
 ```
 
 This looks to pick up most relevant codes. But, this approach misses codes that are not a descendant of 4182210. For example, codes such as "Wandering due to dementia" (37312577; https://athena.ohdsi.org/search-terms/terms/37312577) and "Anxiety due to dementia" (37312031; https://athena.ohdsi.org/search-terms/terms/37312031) are not picked up.
@@ -122,17 +113,13 @@ dementiaCodes1 <- getCandidateCodes(
 ```
 
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-dementiaCodes1 <- readRDS(here("vignettes", "introData02.RData"))
+dementiaCodes1 <- readRDS(system.file("introData02.RData", 
+                                            package = "CodelistGenerator"))
 ```
 
 ```{r,  message=FALSE, warning=FALSE }
-datatable(dementiaCodes1,
-  rownames = FALSE,
-  options = list(
-    pageLength = 10,
-    lengthMenu = c(10, 20, 50)
-  )
-)
+dementiaCodes1%>% 
+  glimpse()
 ```
 
 
@@ -146,33 +133,29 @@ codeComparison <- compareCodelists(
 ```
 
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-codeComparison <- readRDS(here("vignettes", "introData03.RData"))
+codeComparison <- readRDS(system.file("introData03.RData", 
+                                            package = "CodelistGenerator"))
 ```
 
 ```{r,  message=FALSE, warning=FALSE } 
-kable(codeComparison %>%
+codeComparison %>%
   group_by(codelist) %>%
-  tally())
+  tally()
 ```
 
 What are these extra codes picked up by CodelistGenerator?
 ```{r,  message=FALSE, warning=FALSE }
-datatable(
-  codeComparison %>%
-    filter(codelist == "Only codelist 2"),
-  rownames = FALSE,
-  options = list(
-    pageLength = 10,
-    lengthMenu = c(10, 20, 50)
-  )
-)
+codeComparison %>%
+  filter(codelist == "Only codelist 2") %>% 
+  glimpse()
 ```
 
 ## Review mappings from non-standard vocabularies
 Perhaps we want to see what ICD10CM codes map to our candidate code list. We can get these by running
 
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-icdMappings <- readRDS(here("vignettes", "introData04.RData"))
+icdMappings <- readRDS(system.file("introData04.RData", 
+                                            package = "CodelistGenerator"))
 ```
 
 ```{r,  eval=FALSE }
@@ -184,18 +167,14 @@ icdMappings <- getMappings(
 ```
 
 ```{r,  message=FALSE, warning=FALSE }
-datatable(icdMappings,
-  rownames = FALSE,
-  options = list(
-    pageLength = 10,
-    lengthMenu = c(10, 20, 50)
-  )
-)
+icdMappings %>% 
+  glimpse()
 ```
 
 
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-readMappings <- readRDS(here("vignettes", "introData05.RData"))
+readMappings <- readRDS(system.file("introData05.RData", 
+                                            package = "CodelistGenerator"))
 ```
 
 ```{r,  eval=FALSE }
@@ -207,11 +186,6 @@ readMappings <- getMappings(
 ```
 
 ```{r,  message=FALSE, warning=FALSE }
-datatable(readMappings,
-  rownames = FALSE,
-  options = list(
-    pageLength = 10,
-    lengthMenu = c(10, 20, 50)
-  )
-)
+readMappings %>% 
+  glimpse()
 ```
diff --git a/vignettes/a02_Candidate_codes_OA.Rmd b/vignettes/a02_Candidate_codes_OA.Rmd
index 1da1626..28a1db2 100644
--- a/vignettes/a02_Candidate_codes_OA.Rmd
+++ b/vignettes/a02_Candidate_codes_OA.Rmd
@@ -17,15 +17,8 @@ knitr::opts_chunk$set(
 ```
 
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-library(here)
-library(readr)
 library(DBI)
-library(here)
 library(dplyr)
-library(dbplyr)
-library(stringr)
-library(DT)
-library(kableExtra)
 library(CodelistGenerator)
 library(CDMConnector)
 ```
@@ -69,7 +62,8 @@ cdm <- CDMConnector::cdm_from_con(
 ## Search strategies
 ### Condition domain, without searching synonyms, with exclusions, without including descendants or ancestor
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-oaCodes1 <- readRDS(here("vignettes", "optionsData01.RData"))
+oaCodes1 <- readRDS(system.file("optionsData01.RData", 
+                                            package = "CodelistGenerator"))
 ```
 
 To start we will search for "osteoarthritis", while excluding "post-infection" and "post-traumatic", but without searching synonyms, without searching via non-standard codes, and without including descendants or the direct ancestor of the included concepts.
@@ -92,19 +86,15 @@ oaCodes1 <- getCandidateCodes(
 
 What is the candidate codelist?
 ```{r,  message=FALSE, warning=FALSE }  
-datatable(oaCodes1,
-  rownames = FALSE,
-  options = list(
-    pageLength = 10,
-    lengthMenu = c(10, 20, 250)
-  )
-)
+oaCodes1 %>% 
+  glimpse()
 ```
 
 
 ### Including descendants
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-oaCodes2 <- readRDS(here("vignettes", "optionsData02.RData"))
+oaCodes2 <- readRDS(system.file("optionsData02.RData", 
+                                            package = "CodelistGenerator"))
 ```
 
 Now we will also include the descendants of included concepts.
@@ -130,18 +120,14 @@ newCodes1To2 <- compareCodelists(oaCodes1, oaCodes2) %>%
   filter(codelist == "Only codelist 2") %>%
   select(-"codelist")
 
-datatable(newCodes1To2,
-  rownames = FALSE,
-  options = list(
-    pageLength = 10,
-    lengthMenu = c(10, 20, 50)
-  )
-)
+newCodes1To2 %>% 
+  glimpse()
 ```
 
 ### Including observation domain
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-oaCodes3 <- readRDS(here("vignettes", "optionsData03.RData"))
+oaCodes3 <- readRDS(system.file("optionsData03.RData", 
+                                            package = "CodelistGenerator"))
 ```
 
 Now we will search the observation domain as well as the condition domain.
@@ -167,18 +153,14 @@ newCodes1To3 <- compareCodelists(oaCodes1, oaCodes3) %>%
   filter(codelist == "Only codelist 2") %>%
   select(-"codelist")
 
-datatable(newCodes1To3,
-  rownames = FALSE,
-  options = list(
-    pageLength = 10,
-    lengthMenu = c(10, 20, 50)
-  )
-)
+newCodes1To3 %>% 
+  glimpse()
 ```
 
 ### Search synonyms
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-oaCodes4 <- readRDS(here("vignettes", "optionsData04.RData"))
+oaCodes4 <- readRDS(system.file("optionsData04.RData", 
+                                            package = "CodelistGenerator"))
 ```
 
 Now we will search the concept synonym table to identify concepts to include.
@@ -204,18 +186,14 @@ newCodes1To4 <- compareCodelists(oaCodes1, oaCodes4) %>%
   filter(codelist == "Only codelist 2") %>%
   select(-"codelist")
 
-datatable(newCodes1To4,
-  rownames = FALSE,
-  options = list(
-    pageLength = 10,
-    lengthMenu = c(10, 20, 50)
-  )
-)
+newCodes1To4 %>% 
+  glimpse()
 ```
 
 ### Search via non-standard
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-oaCodes5 <- readRDS(here("vignettes", "optionsData04.RData"))
+oaCodes5 <- readRDS(system.file("optionsData05.RData", 
+                                            package = "CodelistGenerator"))
 ```
 
 Now we will search the concept synonym table to identify concepts to include.
@@ -241,18 +219,14 @@ newCodes1To5 <- compareCodelists(oaCodes1, oaCodes5) %>%
   filter(codelist == "Only codelist 2") %>%
   select(-"codelist")
 
-datatable(newCodes1To5,
-  rownames = FALSE,
-  options = list(
-    pageLength = 10,
-    lengthMenu = c(10, 20, 50)
-  )
-)
+newCodes1To5 %>% 
+  glimpse()
 ```
 
 ### Include ancestor
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-oaCodes8 <- readRDS(here("vignettes", "optionsData07.RData"))
+oaCodes8 <- readRDS(system.file("optionsData07.RData", 
+                                            package = "CodelistGenerator"))
 ```
 
 Now we include the direct ancestor of included terms.
@@ -278,11 +252,6 @@ newCodes1To8 <- compareCodelists(oaCodes1, oaCodes8) %>%
   filter(codelist == "Only codelist 2") %>%
   select(-"codelist")
 
-datatable(newCodes1To8,
-  rownames = FALSE,
-  options = list(
-    pageLength = 10,
-    lengthMenu = c(10, 20, 50)
-  )
-)
+newCodes1To8 %>% 
+  glimpse()
 ```
diff --git a/vignettes/a03_Options_for_CodelistGenerator.Rmd b/vignettes/a03_Options_for_CodelistGenerator.Rmd
index 5c63617..bad5981 100644
--- a/vignettes/a03_Options_for_CodelistGenerator.Rmd
+++ b/vignettes/a03_Options_for_CodelistGenerator.Rmd
@@ -20,9 +20,6 @@ knitr::opts_chunk$set(
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
 library(DBI)
 library(dplyr)
-library(dbplyr)
-library(here)
-library(kableExtra)
 library(CodelistGenerator)
 library(CDMConnector)
 ```
@@ -51,7 +48,8 @@ codes <- getCandidateCodes(
   includeDescendants = FALSE,
 )
 
-kable(codes)
+codes %>% 
+  glimpse()
 ```
 
 Note, we would also identify it based on a partial match
@@ -63,7 +61,8 @@ codes <- getCandidateCodes(
   includeDescendants = FALSE
 )
 
-kable(codes)
+codes %>% 
+  glimpse()
 ```
 
 ## Add descendants
@@ -73,12 +72,13 @@ knitr::include_graphics("mock_db_fig7.png")
 
 To include descendants of an identified code, we can set includeDescendants to TRUE
 ```{r}
-kable(getCandidateCodes(
+getCandidateCodes(
   cdm = cdm,
   keywords = "Musculoskeletal disorder",
   domains = "Condition",
   includeDescendants = TRUE
-))
+) %>% 
+  glimpse()
 ```
 
 ## Multiple search terms 
@@ -95,7 +95,8 @@ codes <- getCandidateCodes(
   includeDescendants = FALSE
 )
 
-kable(codes)
+codes %>% 
+  glimpse()
 ```
 
 ## Add ancestor
@@ -113,7 +114,8 @@ codes <- getCandidateCodes(
   domains = "Condition"
 )
 
-kable(codes)
+codes %>% 
+  glimpse()
 ```
 
 ## Searches with multiple words
@@ -131,7 +133,8 @@ codes <- getCandidateCodes(
   includeDescendants = TRUE
 )
 
-kable(codes)
+codes %>% 
+  glimpse()
 ```
 
 
@@ -150,7 +153,8 @@ codes <- getCandidateCodes(
   domains = "Condition"
 )
 
-kable(codes)
+codes %>% 
+  glimpse()
 ```
 
 ## Search using synonyms
@@ -168,7 +172,8 @@ codes <- getCandidateCodes(
   searchInSynonyms = TRUE
 )
 
-kable(codes)
+codes %>% 
+  glimpse()
 ```
 
 ## Search via non-standard
@@ -182,7 +187,8 @@ codes <- getCandidateCodes(
   searchNonStandard = TRUE
 )
 
-kable(codes)
+codes %>% 
+  glimpse()
 ```
 
 ## Search for both standard and non-standard concepts
@@ -206,7 +212,8 @@ codes <- getCandidateCodes(
   standardConcept = c("Standard", "Non-standard")
 )
 
-kable(codes)
+codes %>% 
+  glimpse()
 ```
 
 ```{r,echo=FALSE}
diff --git a/vignettes/a04_codelists_for_medications.Rmd b/vignettes/a04_codelists_for_medications.Rmd
index e9f32e9..a36f12f 100644
--- a/vignettes/a04_codelists_for_medications.Rmd
+++ b/vignettes/a04_codelists_for_medications.Rmd
@@ -15,15 +15,8 @@ knitr::opts_chunk$set(
 ```
 
 ```{r,  message=FALSE, warning=FALSE,echo=FALSE}
-library(here)
-library(readr)
 library(DBI)
-library(here)
 library(dplyr)
-library(dbplyr)
-library(stringr)
-library(DT)
-library(kableExtra)
 library(CodelistGenerator)
 ```
 
@@ -57,9 +50,6 @@ First we can follow the approach of identifying high-level codes and include all
 ```{r, eval=FALSE}
 library(dplyr)
 library(CodelistGenerator)
-library(stringr)
-library(DT)
-library(kableExtra)
 ```
 
 ```{r, eval=FALSE}
@@ -73,7 +63,8 @@ acetaminophen1 <- getCandidateCodes(
 ```
 
 ```{r,  message=FALSE, warning=FALSE, echo=FALSE}
-acetaminophen1 <- readRDS(here("vignettes", "medData01.RData"))
+acetaminophen1 <- readRDS(system.file("medData01.RData", 
+                                            package = "CodelistGenerator"))
 ```
 
 ```{r,  message=FALSE, warning=FALSE }  
diff --git a/vignettes/introData02.RData b/vignettes/introData02.RData
deleted file mode 100644
index c8e7e28..0000000
Binary files a/vignettes/introData02.RData and /dev/null differ
diff --git a/vignettes/introData03.RData b/vignettes/introData03.RData
deleted file mode 100644
index e5accdd..0000000
Binary files a/vignettes/introData03.RData and /dev/null differ
diff --git a/vignettes/medData01.RData b/vignettes/medData01.RData
deleted file mode 100644
index 00279f3..0000000
Binary files a/vignettes/medData01.RData and /dev/null differ
diff --git a/vignettes/medData02a.RData b/vignettes/medData02a.RData
deleted file mode 100644
index e1c4b0d..0000000
Binary files a/vignettes/medData02a.RData and /dev/null differ
diff --git a/vignettes/medData02b.RData b/vignettes/medData02b.RData
deleted file mode 100644
index 1da6877..0000000
Binary files a/vignettes/medData02b.RData and /dev/null differ
diff --git a/vignettes/medData03.RData b/vignettes/medData03.RData
deleted file mode 100644
index b96ef70..0000000
Binary files a/vignettes/medData03.RData and /dev/null differ
diff --git a/vignettes/optionsData01.RData b/vignettes/optionsData01.RData
deleted file mode 100644
index 4f0b144..0000000
Binary files a/vignettes/optionsData01.RData and /dev/null differ
diff --git a/vignettes/optionsData02.RData b/vignettes/optionsData02.RData
deleted file mode 100644
index 2977e0d..0000000
Binary files a/vignettes/optionsData02.RData and /dev/null differ
diff --git a/vignettes/optionsData03.RData b/vignettes/optionsData03.RData
deleted file mode 100644
index 9b5e3b8..0000000
Binary files a/vignettes/optionsData03.RData and /dev/null differ
diff --git a/vignettes/optionsData04.RData b/vignettes/optionsData04.RData
deleted file mode 100644
index 8975123..0000000
Binary files a/vignettes/optionsData04.RData and /dev/null differ
diff --git a/vignettes/optionsData05.RData b/vignettes/optionsData05.RData
deleted file mode 100644
index 6f66f11..0000000
Binary files a/vignettes/optionsData05.RData and /dev/null differ
diff --git a/vignettes/optionsData06.RData b/vignettes/optionsData06.RData
deleted file mode 100644
index 3e3e9d9..0000000
Binary files a/vignettes/optionsData06.RData and /dev/null differ
diff --git a/vignettes/optionsData07.RData b/vignettes/optionsData07.RData
deleted file mode 100644
index 3c675ee..0000000
Binary files a/vignettes/optionsData07.RData and /dev/null differ