From 29c233fc6b29d771cd164778dfdf887f0708cf7f Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Tue, 26 Sep 2023 12:46:32 +0200 Subject: [PATCH 01/12] Fixing source link --- _pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index cf0dc04..e02c7c5 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -7,7 +7,7 @@ search: repo: url: home: https://github.com/pharmaverse/datacutr - source: https://github.com/pharmaverse/datacutr + source: https://github.com/pharmaverse/datacutr/blob/main/ issue: https://github.com/pharmaverse/datacutr/issues/ user: https://github.com/ news: From 68a9661aa293eef87dde71c0e2576e50532c3b26 Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Mon, 2 Oct 2023 18:20:34 +0200 Subject: [PATCH 02/12] Updating special_dm_cut and date_cut to account for the case that DCUTDTM (datacut date) is null. --- .lintr | 2 +- R/date_cut.R | 20 ++++++----- R/global.R | 3 +- R/special_dm_cut.R | 16 +++++---- tests/testthat/test-date_cut.R | 46 +++++++++++++++++++++++++ tests/testthat/test-special_dm_cut.R | 50 ++++++++++++++++++++++++++++ 6 files changed, 119 insertions(+), 18 deletions(-) diff --git a/.lintr b/.lintr index abb2318..5ac008d 100644 --- a/.lintr +++ b/.lintr @@ -1,4 +1,4 @@ -linters: with_defaults(line_length_linter(100), object_usage_linter=NULL, cyclocomp_linter(complexity_limit = 20)) +linters: with_defaults(line_length_linter(100), object_name_linter = NULL, object_usage_linter=NULL, cyclocomp_linter(complexity_limit = 20)) exclusions: list( "R/data.R", "tests/testthat") diff --git a/R/date_cut.R b/R/date_cut.R index 02989d4..6f3e58c 100644 --- a/R/date_cut.R +++ b/R/date_cut.R @@ -64,20 +64,19 @@ date_cut <- function(dataset_sdtm, (length(get_duplicates(dataset_cut$USUBJID)) == 0), msg = "Duplicate patients in the DCUT (dataset_cut) dataset, please update." ) - assert_that( - (any(is.na(mutate(dataset_cut, !!cut_var))) == FALSE), - msg = "At least one patient with missing datacut date (cut_var) in the DCUT - (dataset_cut) dataset, please update." + ifelse(any(is.na(mutate(dataset_cut, !!cut_var))) == TRUE, + print("At least one patient with missing datacut date (cut_var) in the DCUT + (dataset_cut) dataset, all records for this/these patient(s) will be kept."), NA ) - dcut <- dataset_cut %>% mutate(DCUT_TEMP_DCUTDTM = !!cut_var) %>% - subset(select = c(USUBJID, DCUT_TEMP_DCUTDTM)) + subset(select = c(USUBJID, DCUT_TEMP_DCUTDTM)) %>% + mutate(TEMP_DCUT_KEEP = "Y") - assert_that(is.POSIXt(dcut$DCUT_TEMP_DCUTDTM), + ifelse(!is.na(dcut$DCUT_TEMP_DCUTDTM), assert_that(is.POSIXt(dcut$DCUT_TEMP_DCUTDTM), msg = "cut_var is expected to be of date type POSIXt" - ) + ), NA) attributes(dcut$USUBJID)$label <- attributes(dataset_sdtm$USUBJID)$label @@ -92,7 +91,10 @@ date_cut <- function(dataset_sdtm, # Flag records to be removed - those occurring after cut date and patients not in dcut dataset dataset <- dataset_sdtm_pt %>% mutate(DCUT_TEMP_REMOVE = ifelse((DCUT_TEMP_SDTM_DATE > DCUT_TEMP_DCUTDTM) | - is.na(DCUT_TEMP_DCUTDTM), "Y", NA_character_)) + is.na(TEMP_DCUT_KEEP), "Y", NA_character_)) + + # Ensure variable is character + dataset$DCUT_TEMP_REMOVE <- as.character(dataset$DCUT_TEMP_REMOVE) dataset <- drop_temp_vars(dsin = dataset, drop_dcut_temp = FALSE) diff --git a/R/global.R b/R/global.R index 8f12d61..304559f 100644 --- a/R/global.R +++ b/R/global.R @@ -20,5 +20,6 @@ globalVariables(c( "DCUT_TEMP_DATE", "source_data", "DCUT_TEMP_REMOVE", - "DCUT_TEMP_DTHCHANGE" + "DCUT_TEMP_DTHCHANGE", + "TEMP_DCUT_KEEP" )) diff --git a/R/special_dm_cut.R b/R/special_dm_cut.R index 8cea8a6..3c4b3bf 100644 --- a/R/special_dm_cut.R +++ b/R/special_dm_cut.R @@ -48,10 +48,9 @@ special_dm_cut <- function(dataset_dm, (length(get_duplicates(dataset_cut$USUBJID)) == 0), msg = "Duplicate patients in the DCUT (dataset_cut) dataset, please update." ) - assert_that( - (any(is.na(mutate(dataset_cut, !!cut_var))) == FALSE), - msg = "At least one patient with missing datacut date (cut_var) in the DCUT - (dataset_cut) dataset, please update." + ifelse(any(is.na(mutate(dataset_cut, !!cut_var))) == TRUE, + print("At least one patient with missing datacut date (cut_var) in the DCUT + (dataset_cut) dataset, all records for this/these patient(s) will be kept."), NA ) assert_data_frame(dataset_dm, required_vars = exprs(USUBJID, DTHDTC) @@ -69,16 +68,19 @@ special_dm_cut <- function(dataset_dm, by = "USUBJID" ) - assert_that(is.POSIXt(dm_temp$DCUT_TEMP_DCUTDTM), + ifelse(!is.na(dm_temp$DCUT_TEMP_DCUTDTM), assert_that(is.POSIXt(dm_temp$DCUT_TEMP_DCUTDTM), msg = "cut_var is expected to be of date type POSIXt" - ) + ), NA) # Flag records with Death Date after Cut date dataset_updatedth <- dm_temp %>% mutate(DCUT_TEMP_DTHCHANGE = case_when( - DCUT_TEMP_DTHDT > DCUT_TEMP_DCUTDTM ~ "Y", + !is.na(DCUT_TEMP_DCUTDTM) & (DCUT_TEMP_DTHDT > DCUT_TEMP_DCUTDTM) ~ "Y", TRUE ~ as.character(NA) )) + # Ensure variable is character + dataset_updatedth$DCUT_TEMP_REMOVE <- as.character(dataset_updatedth$DCUT_TEMP_REMOVE) + dataset_updatedth } diff --git a/tests/testthat/test-date_cut.R b/tests/testthat/test-date_cut.R index c6bc055..b77a59a 100644 --- a/tests/testthat/test-date_cut.R +++ b/tests/testthat/test-date_cut.R @@ -178,3 +178,49 @@ test_that("All SDTMv dates are after datacut date", { expected_ae4 ) }) + +# Test 5 - Datacut date is NA + +input_ae5 <- tibble::tribble( + ~STUDYID, ~USUBJID, ~AESEQ, ~AESTDTC, + "my_study", "subject1", 1, "2021-01-02", + "my_study", "subject1", 2, "2021-08-31", + "my_study", "subject1", 3, "2021-10-10", + "my_study", "subject2", 2, "2021-02-20", + "my_study", "subject3", 1, "2021-03-02" +) + +input_dcut5 <- tibble::tribble( + ~STUDYID, ~USUBJID, ~DCUTDTM, + "my_study", "subject1", NA, + "my_study", "subject2", NA, + "my_study", "subject3", NA +) + + +expected_ae5 <- tibble::tribble( + ~STUDYID, ~USUBJID, ~AESEQ, ~AESTDTC, ~DCUT_TEMP_SDTM_DATE, ~DCUT_TEMP_DCUTDTM, + ~DCUT_TEMP_REMOVE, + "my_study", "subject1", 1, "2021-01-02", ymd_hms("2021-01-02T00:00:00"), + NA, NA_character_, + "my_study", "subject1", 2, "2021-08-31", ymd_hms("2021-08-31T00:00:00"), + NA, NA_character_, + "my_study", "subject1", 3, "2021-10-10", ymd_hms("2021-10-10T00:00:00"), + NA, NA_character_, + "my_study", "subject2", 2, "2021-02-20", ymd_hms("2021-02-20T00:00:00"), + NA, NA_character_, + "my_study", "subject3", 1, "2021-03-02", ymd_hms("2021-03-02T00:00:00"), + NA, NA_character_ +) + +test_that("DCUTDTM is NA", { + expect_equal( + date_cut( + dataset_sdtm = input_ae5, + sdtm_date_var = AESTDTC, + dataset_cut = input_dcut5, + cut_var = DCUTDTM + ), + expected_ae5 + ) +}) diff --git a/tests/testthat/test-special_dm_cut.R b/tests/testthat/test-special_dm_cut.R index 145f781..e47cc90 100644 --- a/tests/testthat/test-special_dm_cut.R +++ b/tests/testthat/test-special_dm_cut.R @@ -60,3 +60,53 @@ test_that("Error thrown if cut_var is not a POSIXt input", { regexp = "cut_var is expected to be of date type POSIXt" ) }) + +dcut_na <- tibble::tribble( + ~USUBJID, ~DCUTDTM, + "01-701-1015", NA, + "01-701-1023", NA, + "01-701-1028", NA, + "01-701-1033", NA, + "01-701-1047", NA, + "01-701-1057", NA, + "01-701-1097", NA, + "01-701-1111", NA, + "01-701-1115", NA, + "01-701-1118", NA +) + +dm_expect_na <- tibble::tribble( + ~USUBJID, ~DTHDTC, ~DTHFL, ~DCUT_TEMP_REMOVE, + ~DCUT_TEMP_DTHDT, ~DCUT_TEMP_DCUTDTM, ~DCUT_TEMP_DTHCHANGE, + "01-701-1015", "", "", NA_character_, + NA, NA, NA_character_, + "01-701-1023", "2014-10-20", "Y", NA_character_, + ymd_hms("2014-10-20T00:00:00"), NA, NA_character_, + "01-701-1028", "2014-10-21", "Y", NA_character_, + ymd_hms("2014-10-21T00:00:00"), NA, NA_character_, + "01-701-1033", "2014-10-19", "Y", NA_character_, + ymd_hms("2014-10-19T00:00:00"), NA, NA_character_, + "01-701-1047", "2014-10-31", "Y", NA_character_, + ymd_hms("2014-10-31T00:00:00"), NA, NA_character_, + "01-701-1057", "2025-10-20", "Y", NA_character_, + ymd_hms("2025-10-20T00:00:00"), NA, NA_character_, + "01-701-1097", "2002-10-20", "Y", NA_character_, + ymd_hms("2002-10-20T00:00:00"), NA, NA_character_, + "01-701-1111", "", "Y", NA_character_, + NA, NA, NA_character_, + "01-701-1115", "", "Y", NA_character_, + NA, NA, NA_character_, + "01-701-1118", "2014-11-20", "", NA_character_, + ymd_hms("2014-11-20T00:00:00"), NA, NA_character_ +) + +test_that("Tests all expected outcomes when datacut date is NA", { + testthat::expect_equal( + special_dm_cut( + dataset_dm = dm, + dataset_cut = dcut_na, + cut_var = DCUTDTM + ), + dm_expect_na + ) +}) From 9f61f8d9992f700a53a755bb55a9186494a18d7a Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Wed, 11 Oct 2023 12:55:50 +0200 Subject: [PATCH 03/12] Updating warning message. --- R/date_cut.R | 3 +-- R/special_dm_cut.R | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/R/date_cut.R b/R/date_cut.R index 6f3e58c..a2bdb40 100644 --- a/R/date_cut.R +++ b/R/date_cut.R @@ -65,8 +65,7 @@ date_cut <- function(dataset_sdtm, msg = "Duplicate patients in the DCUT (dataset_cut) dataset, please update." ) ifelse(any(is.na(mutate(dataset_cut, !!cut_var))) == TRUE, - print("At least one patient with missing datacut date (cut_var) in the DCUT - (dataset_cut) dataset, all records for this/these patient(s) will be kept."), NA + print("At least 1 patient with missing datacut date, all records will be kept."), NA ) dcut <- dataset_cut %>% diff --git a/R/special_dm_cut.R b/R/special_dm_cut.R index 3c4b3bf..b854373 100644 --- a/R/special_dm_cut.R +++ b/R/special_dm_cut.R @@ -49,8 +49,7 @@ special_dm_cut <- function(dataset_dm, msg = "Duplicate patients in the DCUT (dataset_cut) dataset, please update." ) ifelse(any(is.na(mutate(dataset_cut, !!cut_var))) == TRUE, - print("At least one patient with missing datacut date (cut_var) in the DCUT - (dataset_cut) dataset, all records for this/these patient(s) will be kept."), NA + print("At least 1 patient with missing datacut date, all records will be kept."), NA ) assert_data_frame(dataset_dm, required_vars = exprs(USUBJID, DTHDTC) From ae8afeeefbad71775804abcbe18d672177d71746 Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Thu, 12 Oct 2023 13:03:17 +0200 Subject: [PATCH 04/12] Update news file. --- NEWS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 41199cd..700459f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,9 @@ - N/A ## Updates of Existing Functions -- N/A +- Update to `date_cut()` and special_dm_cut()` functions to allow for +datacut date to be null. In this case, all records for this patient +will be kept/left unchanged. ## Breaking Changes - N/A From 7faba8a0208a07856b5b8c667352e444d61edf3f Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Thu, 12 Oct 2023 13:03:17 +0200 Subject: [PATCH 05/12] Update news file. --- NEWS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 41199cd..b14d78a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,9 @@ - N/A ## Updates of Existing Functions -- N/A +- Update to `date_cut()` and `special_dm_cut()` functions to allow for +datacut date to be null. In this case, all records for this patient +will be kept/left unchanged. ## Breaking Changes - N/A From 7e87ab161e5dc969919cbfd43a5bd67f9c2f76f8 Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Tue, 17 Oct 2023 17:56:57 +0200 Subject: [PATCH 06/12] Fixing lintr error --- .lintr | 1 + 1 file changed, 1 insertion(+) diff --git a/.lintr b/.lintr index b5d74ab..b78e223 100644 --- a/.lintr +++ b/.lintr @@ -2,6 +2,7 @@ linters: linters_with_defaults( line_length_linter(100), object_usage_linter=NULL, infix_spaces_linter=NULL, + indentation_linter=NULL, cyclocomp_linter(complexity_limit = 22) ) exclusions: list( From 3ebef49e864087dcaf993bf7b9d2064be1420414 Mon Sep 17 00:00:00 2001 From: "Harris, Alana {MDBB~Welwyn}" Date: Tue, 31 Oct 2023 13:10:24 +0100 Subject: [PATCH 07/12] Updating news file --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index ba11b08..001acc6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,7 +13,7 @@ - N/A ## Various -- Minor documentation updates #171 #173 #177 +- Minor documentation updates #171 #173 #177 #184 # datacutr 0.1.0 From 6721c1191ef3cc8b8f644a1af9c1201356e02de5 Mon Sep 17 00:00:00 2001 From: Tim Barnett Date: Tue, 28 Nov 2023 11:33:00 +0000 Subject: [PATCH 08/12] Updated warning for expectation of dm naming for special_dm #172 --- R/process_cut.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/process_cut.R b/R/process_cut.R index c413fba..b987483 100644 --- a/R/process_cut.R +++ b/R/process_cut.R @@ -91,6 +91,9 @@ no_cut_v empty, in which case a default value of vector() will be used." msg = "special_dm must be either TRUE or FALSE" ) if (special_dm) { + assert_that("dm" %in% names(source_data), + msg = "dataset `dm` is missing but special_dm processing expects this" + ) assert_that( setequal(names(source_sdtm_data), c( patient_cut_v, date_cut_m[, 1], no_cut_v, From de2b7fd2f754f969368c67d29bea4bd9332660f4 Mon Sep 17 00:00:00 2001 From: Tim Barnett Date: Tue, 28 Nov 2023 11:57:45 +0000 Subject: [PATCH 09/12] Adjust style of message #172 --- R/process_cut.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/process_cut.R b/R/process_cut.R index b987483..64eb2cf 100644 --- a/R/process_cut.R +++ b/R/process_cut.R @@ -92,7 +92,7 @@ no_cut_v empty, in which case a default value of vector() will be used." ) if (special_dm) { assert_that("dm" %in% names(source_data), - msg = "dataset `dm` is missing but special_dm processing expects this" + msg = "dataset `dm` is missing but special_dm processing expects this" ) assert_that( setequal(names(source_sdtm_data), c( From 6e53a2b6a5d6cd56abb733f433ab445b43a6c1e4 Mon Sep 17 00:00:00 2001 From: Tim Barnett Date: Tue, 28 Nov 2023 12:56:46 +0000 Subject: [PATCH 10/12] Amend source data ref to source_sdtm_data #172 --- R/process_cut.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/process_cut.R b/R/process_cut.R index 64eb2cf..2fe5918 100644 --- a/R/process_cut.R +++ b/R/process_cut.R @@ -91,7 +91,7 @@ no_cut_v empty, in which case a default value of vector() will be used." msg = "special_dm must be either TRUE or FALSE" ) if (special_dm) { - assert_that("dm" %in% names(source_data), + assert_that("dm" %in% names(source_sdtm_data), msg = "dataset `dm` is missing but special_dm processing expects this" ) assert_that( From 09d9974d1ed449b3a5307151b5d7656002d535e1 Mon Sep 17 00:00:00 2001 From: Tim Barnett Date: Wed, 29 Nov 2023 10:13:14 +0000 Subject: [PATCH 11/12] Update pharmaverse links #172 --- .github/pull_request_template.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index eecd262..4016cd5 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,11 +1,11 @@ -Thank you for your Pull Request! [admiraldev](https://pharmaverse.github.io/admiraldev/devel/index.html) have developed this task checklist from the [Development Process Guide](https://pharmaverse.github.io/admiraldev/main/articles/development_process.html) to help with the final steps of the process. Completing the below tasks helps to ensure our reviewers can maximize their time on your code as well as making sure the datacutr codebase remains robust and consistent. +Thank you for your Pull Request! [admiraldev](https://pharmaverse.github.io/admiraldev/index.html) have developed this task checklist from the [Development Process Guide](https://pharmaverse.github.io/admiraldev/articles/development_process.html) to help with the final steps of the process. Completing the below tasks helps to ensure our reviewers can maximize their time on your code as well as making sure the datacutr codebase remains robust and consistent. Please check off each taskbox as an acknowledgment that you completed the task or check off that it is not relevant to your Pull Request. This checklist is part of the Github Action workflows and the Pull Request will not be merged into the `devel` branch until you have checked off each task. - [ ] Place Closes # into the beginning of your Pull Request Title (Use Edit button in top-right if you need to update) - [ ] Code is formatted according to the [tidyverse style guide](https://style.tidyverse.org/). Run `styler::style_file()` to style R and Rmd files -- [ ] Updated relevant unit tests or have written new unit tests - See [Unit Test Guide](https://pharmaverse.github.io/admiraldev/main/articles/unit_test_guidance.html#writing-unit-tests-in-admiral-) -- [ ] If you removed/replaced any function and/or function parameters, did you fully follow the [deprecation guidance](https://pharmaverse.github.io/admiraldev/main/articles/programming_strategy.html#deprecation-1)? +- [ ] Updated relevant unit tests or have written new unit tests - See [Unit Test Guide](https://pharmaverse.github.io/admiraldev/articles/unit_test_guidance.html#writing-unit-tests-in-admiral-) +- [ ] If you removed/replaced any function and/or function parameters, did you fully follow the [deprecation guidance](https://pharmaverse.github.io/admiraldev/articles/programming_strategy.html#deprecation)? - [ ] Update to all relevant roxygen headers and examples - [ ] Run `devtools::document()` so all `.Rd` files in the `man` folder and the `NAMESPACE` file in the project root are updated appropriately - [ ] Address any updates needed for vignettes and/or templates From 87d03d0c25e0444c2fd55ccecee23b90b777d25d Mon Sep 17 00:00:00 2001 From: Tim Barnett Date: Mon, 11 Dec 2023 14:13:04 +0000 Subject: [PATCH 12/12] Update NEWS.md for process_cut warning addition #172 --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 2a609d9..6fc23d7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,7 @@ - Update to `date_cut()` and `special_dm_cut()` functions to allow for datacut date to be null. In this case, all records for this patient will be kept/left unchanged. +- Warning added to `process_cut` if expected dataset `dm` is missing ## Breaking Changes - N/A