Skip to content

Commit

Permalink
Merge pull request #100 from darwin-eu/develop
Browse files Browse the repository at this point in the history
various updates for v2.0
  • Loading branch information
edward-burn authored Oct 9, 2023
2 parents cd266a6 + 1352674 commit d3cfcd0
Show file tree
Hide file tree
Showing 24 changed files with 1,061 additions and 1,055 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: CodelistGenerator
Title: Generate Code Lists for the OMOP Common Data Model
Version: 1.7.0
Version: 2.0.0
Authors@R: c(
person("Edward", "Burn", email = "[email protected]",
role = c("aut", "cre"),
Expand All @@ -14,7 +14,7 @@ Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Imports:
CDMConnector (>= 1.0.0),
CDMConnector (>= 1.1.2),
checkmate (>= 2.0.0),
DBI (>= 1.1.0),
dplyr (>= 1.1.0),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export(getVocabVersion)
export(getVocabularies)
export(mockVocabRef)
export(summariseCodeUse)
export(summariseCohortCodeUse)
importFrom(magrittr,"%>%")
importFrom(rlang,.data)
importFrom(rlang,.env)
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# CodelistGenerator 2.0.0
* Simplified the interface of getCandidateCodes, with a number of arguments removed.
* Added function summariseCohortCodeUse.

# CodelistGenerator 1.7.0
* Added function codesFromCohort.

Expand Down
15 changes: 7 additions & 8 deletions R/drugCodes.R
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ getATCCodes <- function(cdm,
workingName <- atc_groups %>%
dplyr::filter(.data$concept_id == names(atc_descendants)[i]) %>%
dplyr::pull("concept_name")
workingName <- stringr::str_to_lower(workingName)
workingName <- stringr::str_replace_all(workingName, " ", "_")

if(isFALSE(withConceptDetails)){
atc_descendants[[i]] <- atc_descendants[[i]] %>%
Expand All @@ -142,10 +144,7 @@ getATCCodes <- function(cdm,
dplyr::select(!"ancestor_concept_id")
}

names(atc_descendants)[i] <- paste0(
workingLevel, ": ", workingName,
" (", names(atc_descendants)[i], ")"
)
names(atc_descendants)[i] <- workingName
}
}
return(atc_descendants)
Expand Down Expand Up @@ -249,6 +248,9 @@ getDrugIngredientCodes <- function(cdm,
workingName <- ingredientConcepts %>%
dplyr::filter(.data$concept_id == names(ingredientCodes)[[i]]) %>%
dplyr::pull("concept_name")
workingName <- stringr::str_to_lower(workingName)
workingName <- stringr::str_replace_all(workingName, " ", "_")


if(isFALSE(withConceptDetails)){
ingredientCodes[[i]] <- ingredientCodes[[i]] %>%
Expand All @@ -260,10 +262,7 @@ getDrugIngredientCodes <- function(cdm,
dplyr::select(!"ancestor_concept_id")
}

names(ingredientCodes)[[i]] <- paste0(
"Ingredient", ": ", workingName,
" (", names(ingredientCodes)[[i]], ")"
)
names(ingredientCodes)[[i]] <- workingName
}
return(ingredientCodes)
}
Expand Down
117 changes: 15 additions & 102 deletions R/getCandidateCodes.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,39 +31,20 @@
#' @param exclude Character vector of words
#' to identify concepts to exclude.
#' @param domains Character vector with one or more of the OMOP CDM domain.
#' @param conceptClassId Character vector with one or more concept class
#' of the Concept
#' @param doseForm The dose form associated with a drug
#' @param vocabularyId Character vector with one or more vocabulary
#' of the Concept
#' @param standardConcept Character vector with one or more of "Standard",
#' "Classification", and "Non-standard". These correspond to the flags used
#' for the standard_concept field in the concept table of the cdm.
#' @param exactMatch Either TRUE or FALSE. If TRUE only exact matches of
#' keywords will be identified when running the initial search.
#' @param searchInSynonyms Either TRUE or FALSE. If TRUE the code will also
#' search using both the primary name in the concept table and synonyms from
#' the concept synonym table.
#' @param searchViaSynonyms Either TRUE or FALSE. If TRUE the code will also
#' search via the concept synonym table.
#' @param searchNonStandard Either TRUE or FALSE. If TRUE the code will also
#' search via non-standard concepts.
#' @param includeSequela Either TRUE or FALSE. If TRUE, codes associated via
#' a concept relationship of 'Due to of' or 'Occurs before' will also be
#' identified.
#' @param includeDescendants Either TRUE or FALSE.
#' If TRUE descendant concepts of identified concepts
#' will be included in the candidate codelist.
#' @param includeAncestor Either TRUE or FALSE.
#' If TRUE the direct ancestor concepts of identified concepts
#' will be included in the candidate codelist.
#' @param fuzzyMatch Either TRUE or FALSE. If TRUE the fuzzy matching
#' will be used, with approximate matches identified.
#' @param maxDistanceCost, The
#' maximum number/fraction of match cost (generalized Levenshtein distance)
#' for fuzzy matching (see ??base::agrep for further details).
#' @param verbose Either TRUE or FALSE.
#' If TRUE, progress will be reported.

#'
#' @return tibble
Expand All @@ -81,64 +62,23 @@ getCandidateCodes <- function(cdm,
keywords,
exclude = NULL,
domains = "Condition",
conceptClassId = NULL,
doseForm = NULL,
vocabularyId = NULL,
standardConcept = "Standard",
exactMatch = FALSE,
searchInSynonyms = FALSE,
searchViaSynonyms = FALSE,
searchNonStandard = FALSE,
includeSequela = FALSE,
includeDescendants = TRUE,
includeAncestor = FALSE,
fuzzyMatch = FALSE,
maxDistanceCost = 0.1,
verbose = FALSE) {
if (verbose == TRUE) {
# to report time taken at the end
start <- Sys.time()

# summary of search strategy
message(glue::glue("Search strategy"))
message(glue::glue("-- keywords: {toString(keywords)}"))
message(glue::glue("-- domains: {toString(domains)}"))
message(glue::glue("-- conceptClassId: {toString(conceptClassId)}"))
message(glue::glue("-- vocabularyId: {toString(vocabularyId)}"))
message(glue::glue("-- exactMatch: {toString(exactMatch)}"))
message(glue::glue("-- standardConcept: {toString(standardConcept)}"))
message(glue::glue("-- searchInSynonyms: {toString(searchInSynonyms)}"))
message(glue::glue("-- searchViaSynonyms: {toString(searchViaSynonyms)}"))
message(glue::glue("-- searchNonStandard: {toString(searchNonStandard)}"))
message(glue::glue("-- fuzzyMatch: {toString(fuzzyMatch)}"))
message(glue::glue("-- maxDistanceCost: {toString(maxDistanceCost)}"))
message(glue::glue("-- exclude: {toString(exclude)}"))
message(glue::glue("-- includeDescendants: {toString(includeDescendants)}"))
message(glue::glue("-- includeAncestor: {toString(includeAncestor)}"))
includeAncestor = FALSE) {

# now we´ll start checking the inputs
message("Checking inputs")
}
start <- Sys.time()

## checks for standard types of user error
errorMessage <- checkmate::makeAssertCollection()
checkDbType(cdm = cdm, type = "cdm_reference", messageStore = errorMessage)
checkmate::assertVector(keywords, add = errorMessage)
checkmate::assertVector(exclude,
checkmate::assertCharacter(keywords, add = errorMessage)
checkmate::assertCharacter(exclude,
null.ok = TRUE,
add = errorMessage
)
checkmate::assertVector(domains, add = errorMessage)
checkmate::assertVector(conceptClassId,
add = errorMessage,
null.ok = TRUE
)
checkmate::assertCharacter(doseForm, add = errorMessage,
null.ok = TRUE)
checkmate::assertVector(vocabularyId,
add = errorMessage,
null.ok = TRUE
)
checkmate::assertVector(standardConcept, add = errorMessage)
standardConceptCheck <- all(tolower(standardConcept) %in%
c(
Expand All @@ -148,19 +88,14 @@ getCandidateCodes <- function(cdm,
))
if (!isTRUE(standardConceptCheck)) {
errorMessage$push(
"- standardConcept must be from Standard, Non-stanadard, or Classification"
"- standardConcept must be from Standard, Non-standard, or Classification"
)
}
checkmate::assertTRUE(standardConceptCheck, add = errorMessage)
checkmate::assert_logical(exactMatch, add = errorMessage)
checkmate::assert_logical(searchInSynonyms, add = errorMessage)
checkmate::assert_logical(searchViaSynonyms, add = errorMessage)
checkmate::assert_logical(searchNonStandard, add = errorMessage)
checkmate::assert_logical(includeDescendants, add = errorMessage)
checkmate::assert_logical(includeAncestor, add = errorMessage)
checkmate::assert_logical(fuzzyMatch, add = errorMessage)
checkmate::assert_numeric(maxDistanceCost, add = errorMessage)
checkmate::assert_logical(verbose, add = errorMessage)
checkmate::reportAssertions(collection = errorMessage)

errorMessage <- checkmate::makeAssertCollection()
Expand All @@ -177,20 +112,8 @@ getCandidateCodes <- function(cdm,
checkmate::reportAssertions(collection = errorMessage)

errorMessage <- checkmate::makeAssertCollection()
if(exactMatch == TRUE) {
checkmate::assert_false(fuzzyMatch, add = errorMessage)
if (!isFALSE(fuzzyMatch)) {
errorMessage$push(
"- fuzzyMatch must be FALSE if exactMatch is TRUE"
)
}
}
checkmate::reportAssertions(collection = errorMessage)

if (verbose == TRUE) {
message("Starting search")
}

# run search by domain
searchSpecs <- data.frame(
id = seq_along(domains),
Expand All @@ -206,45 +129,35 @@ getCandidateCodes <- function(cdm,
cdm = cdm,
exclude = exclude,
domains = x$domain,
conceptClassId = conceptClassId,
doseForm = doseForm,
vocabularyId = vocabularyId,
standardConcept = standardConcept,
exactMatch = exactMatch,
searchInSynonyms = searchInSynonyms,
searchViaSynonyms = searchViaSynonyms,
searchNonStandard = searchNonStandard,
includeSequela = includeSequela,
fuzzyMatch = fuzzyMatch,
maxDistanceCost = maxDistanceCost,
includeDescendants = includeDescendants,
includeAncestor = includeAncestor,
verbose = verbose
includeAncestor = includeAncestor
)

return(result)
})

# drop any empty tibbles
# drop any empty tibbles and put results from each domain together
searchResults <- searchResults[lapply(searchResults, nrow) > 0]

# put the results from each domain together
searchResults <- dplyr::bind_rows(searchResults,
.id = NULL
) %>%
dplyr::distinct()

if (nrow(searchResults) == 0) {
message(glue::glue("-- No codes found for given search strategy"))
cli::cli_inform("No codes found for the given search strategy")
} else {
cli::cli_alert_success(
"{nrow(searchResults)} candidate concept{?s} identified"
)
}

# return results
if (verbose == TRUE) {
duration <- abs(as.numeric(Sys.time() - start, units = "secs"))
message(glue::glue(
"Time: {floor(duration/60)} minutes and {duration %% 60 %/% 1} seconds"
))
}
cli::cli_inform(
"Time taken: {floor(duration/60)} minutes and {duration %% 60 %/% 1} seconds"
)

return(searchResults)
}
Loading

0 comments on commit d3cfcd0

Please sign in to comment.