Skip to content

Commit

Permalink
Merge pull request #202 from darwin-eu/stratifyByConcept
Browse files Browse the repository at this point in the history
stratifyByConcept
  • Loading branch information
edward-burn authored Jul 11, 2024
2 parents a4b213f + 44e1aee commit 4bb5cb9
Show file tree
Hide file tree
Showing 11 changed files with 190 additions and 5 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export(getVocabularies)
export(mockVocabRef)
export(restrictToCodesInUse)
export(sourceCodesInUse)
export(stratifyByConcept)
export(stratifyByDoseUnit)
export(stratifyByRouteCategory)
export(subsetOnDoseUnit)
Expand Down
87 changes: 87 additions & 0 deletions R/stratifyByConcept.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Copyright 2024 DARWIN EU®
#
# This file is part of CodelistGenerator
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


#' Stratify a codelist by the concepts included within it
#'
#' @param x A codelist
#' @param cdm A cdm reference
#' @param keepOriginal Whether to keep the original codelist and append the
#' stratify (if TRUE) or just return the stratified codelist (if FALSE).
#'
#' @return A codelist
#' @export
#'
stratifyByConcept <- function(x,
cdm,
keepOriginal = FALSE){

x_start <- x

if(inherits(x_start, "codelist")){
x <- addDetails(x, cdm = cdm)
}

for(i in seq_along(x)){
x[[i]] <- x[[i]] |>
dplyr::mutate(c_name = names(x[i])) |>
dplyr::mutate(new_c_name = paste0(.data$c_name, "_",
omopgenerics::toSnakeCase(.data$concept_name)))
}

x <- purrr::list_rbind(x)

if(any(is.na(x$concept_name))){
nMissingConceptName <- sum(is.na(x$concept_name))
cli::cli_warn("Dropping {nMissingConceptName} concepts that do not have a concept name")
x <- x |>
dplyr::filter(!is.na(.data$concept_name))
}

x <- split(x,
x[, c("new_c_name")]
)

if(inherits(x_start, "codelist")){
for(i in seq_along(x)){
x[[i]] <- x[[i]] |>
dplyr::pull("concept_id")
}
}

if(inherits(x_start, "codelist_with_details")){
for(i in seq_along(x)){
x[[i]] <- x[[i]] |>
dplyr::select(!"c_name") |>
dplyr::select(!"new_c_name")
}
}

if(isTRUE(keepOriginal)){
x <- purrr::list_flatten(list(x_start, x))
}

x <- x[order(names(x))]

if(inherits(x_start, "codelist")){
x <- omopgenerics::newCodelist(x)
} else{
x <- omopgenerics::newCodelistWithDetails(x)
}

x

}
2 changes: 2 additions & 0 deletions R/stratifyByDoseUnit.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,15 @@ stratifyByDoseUnit <- function(x, cdm, keepOriginal = FALSE){

CDMConnector::dropTable(cdm = cdm, name = tableCodelist)

result <- result[order(names(result))]

if(isFALSE(withDetails)){
result <- omopgenerics::newCodelist(result)
} else{
result <- omopgenerics::newCodelistWithDetails(result)
}


result

}
2 changes: 2 additions & 0 deletions R/stratifyByRoute.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,15 @@ stratifyByRouteCategory <- function(x, cdm, keepOriginal = FALSE){

CDMConnector::dropTable(cdm = cdm, name = tableCodelist)

result <- result[order(names(result))]

if(isFALSE(withDetails)){
result <- omopgenerics::newCodelist(result)
} else{
result <- omopgenerics::newCodelistWithDetails(result)
}


result

}
2 changes: 1 addition & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ reference:
- matches("codesFromCohort|codesFromConceptSet")
- subtitle: Codelist utility functions
- contents:
- matches("codesInUse|compareCodelists|subsetToCodesInUse|restrictToCodesInUse|subsetOnRouteCategory|stratifyByRouteCategory|stratifyByDoseUnit|subsetOnDoseUnit")
- matches("codesInUse|compareCodelists|subsetToCodesInUse|restrictToCodesInUse|subsetOnRouteCategory|stratifyByRouteCategory|stratifyByDoseUnit|stratifyByConcept|subsetOnDoseUnit")
- subtitle: Vocabulary utility functions
- contents:
- matches("getVocabVersion|getVocabularies|getConceptClassId|getDomains|getDescendants|getDoseForm|doseFormToRoute|getRouteCategories|getRoutes|getDoseUnit|getRelationshipId|getMappings|sourceCodesInUse")
Expand Down
2 changes: 1 addition & 1 deletion man/CodelistGenerator-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/stratifyByConcept.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions tests/testthat/test-dbms.R
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,12 @@ test_that("postgres", {
drug_codes <- getDrugIngredientCodes(cdm, name = c("metformin",
"diclofenac"))

# we can stratify by each concept contained
drug_codes_by_concept <- stratifyByConcept(drug_codes,
cdm = cdm)

drug_codes_by_concept_used <- subsetToCodesInUse(drug_codes_by_concept, cdm)

# if we subset to oral both should still have codes
expect_true(length(subsetOnRouteCategory(drug_codes, cdm,
routeCategory = "oral")) == 2)
Expand Down
37 changes: 37 additions & 0 deletions tests/testthat/test-stratifyByConcept.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
test_that("multiplication works", {

cdm <- mockVocabRef()
cl <- omopgenerics::newCodelist(list(a = c(1,2,3),
b = c(3,4,5)))

cl_s1 <- stratifyByConcept(cl, cdm, keepOriginal = FALSE)
expect_true(length(cl_s1) == 6)


cl_s2 <- stratifyByConcept(cl, cdm, keepOriginal = TRUE)
expect_true(length(cl_s2) == 8)
expect_true(all(sort(names(cl)) == sort(setdiff(names(cl_s2), names(cl_s1)))))



cl <- omopgenerics::newCodelistWithDetails(list(a = data.frame(concept_id = c(1,2,3),
concept_name = c("a", "b", "c")),
b = data.frame(concept_id = c(1,2,3),
concept_name = c("c", "d", "e"))))

cl_s1 <- stratifyByConcept(cl, cdm, keepOriginal = FALSE)
expect_true(length(cl_s1) == 6)


cl_s2 <- stratifyByConcept(cl, cdm, keepOriginal = TRUE)
expect_true(length(cl_s2) == 8)
expect_true(all(sort(names(cl)) == sort(setdiff(names(cl_s2), names(cl_s1)))))

# if concepts are not in the cdm
cdm <- mockVocabRef()
cl <- omopgenerics::newCodelist(list(a = c(1,2,3),
b = c(3,4,5,99999)))
expect_warning(cl_s1 <- stratifyByConcept(cl, cdm, keepOriginal = FALSE))
expect_true(length(cl_s1) == 6) # concept 99999 will have been dropped

})
13 changes: 13 additions & 0 deletions tests/testthat/test-stratifyByDoseUnit.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
test_that("stratifyByDoseUnit in mock", {
cdm <- mockVocabRef()
ing <- getDrugIngredientCodes(cdm = cdm)

# no dose units in the mock
expect_no_error(stratifyByDoseUnit(x = ing, cdm = cdm))


# expected errors
expect_error(stratifyByDoseUnit(x = ing, cdm = "a"))
expect_error(stratifyByDoseUnit(x = "a", cdm = cdm))

})
21 changes: 18 additions & 3 deletions tests/testthat/test-subsetOnDoseUnit.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
test_that("multiplication works", {
expect_equal(2 * 2, 4)
})
test_that("subsetOnDoseUnit in mock", {
cdm <- mockVocabRef()
ing <- getDrugIngredientCodes(cdm = cdm)

# no dose units in the mock
expect_no_error(subsetOnDoseUnit(x = ing, cdm = cdm,
doseUnit = "milligram"))


# expected errors
expect_error(subsetOnDoseUnit(x = ing, cdm = cdm,
doseUnit = 1))
expect_error(subsetOnDoseUnit(x = ing, cdm = "a",
doseUnit = "milligram"))
expect_error(subsetOnDoseUnit(x = "a", cdm = "a",
doseUnit = "milligram"))

})

0 comments on commit 4bb5cb9

Please sign in to comment.