Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #188 Update process_cut to use NULL as the default for empty input arguments #201

Merged
merged 5 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .lintr
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ linters: linters_with_defaults(
line_length_linter(100),
object_usage_linter=NULL,
infix_spaces_linter=NULL,
indentation_linter=NULL,
cyclocomp_linter(complexity_limit = 22)
)
exclusions: list(
Expand Down
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,14 @@
## Updates of Existing Functions
- Update to `impute_dcutdtc()`, `date_cut()` and `special_dm_cut()` functions to allow for
datacut date to be null. In this case, all records for this patient
will be kept/left unchanged. (#179, #188, #189, #190)
will be kept/left unchanged. (#179, #189, #190)
- Warning added to `process_cut` if expected dataset `dm` is missing (#172)
- Warning added to `create_dcut` if cut date being passed as `NULL`,
and not valid date or `NA`/`""` (#181)
- `process_cut` updated so that the `patient_cut_v`, `date_cut_m` and `no_cut_v`
arguments have a default value of `NULL` (#188)
- `process_cut` updated to have more detailed error messages when incorrect datasets
are fed in (#180)

## Breaking Changes
- Added dependency on `admiraldev` >= 0.3.0 (#173)
Expand Down
130 changes: 81 additions & 49 deletions R/process_cut.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,80 +52,112 @@
#' )
#'
process_cut <- function(source_sdtm_data,
patient_cut_v = vector(),
date_cut_m = matrix(nrow = 0, ncol = 2),
no_cut_v = vector(),
patient_cut_v = NULL,
date_cut_m = NULL,
no_cut_v = NULL,
dataset_cut,
cut_var,
special_dm = TRUE) {
# Assertions for input parameters -----------------------------------------------
assert_that(is.list(source_sdtm_data),
msg = "source_sdtm_data must be a list"
msg = "source_sdtm_data must be of class list"
)

assert_that(all(unlist(lapply(source_sdtm_data, is.data.frame))),
msg = "All elements of the list source_sdtm_data must be a dataframe"
msg = "All elements of source_sdtm_data must be a dataframe"
)
assert_that(all(is.vector(patient_cut_v), patient_cut_v != ""),
msg = "patient_cut_v must be a vector. \n
Note: If you do not wish to use a patient cut on any SDTMv domains, then please leave
patient_cut_v empty, in which case a default value of vector() will be used."

assert_that(all(is.vector(patient_cut_v) | is.null(patient_cut_v), patient_cut_v != ""),
msg = "patient_cut_v must be a vector or NULL. \n
Note: If no SDTMv domains use a patient cut, then please leave patient_cut_v
empty, in which case a default value of NULL will be used."
)
assert_that(all(is.matrix(date_cut_m), date_cut_m != ""),
msg = "date_cut_m must be a matrix \n
Note: If you do not wish to use a date cut on any SDTMv domains, then please leave
date_cut_m empty, in which case a default value of matrix(nrow=0, ncol=2) will be used."

assert_that(all(is.matrix(date_cut_m) | is.null(date_cut_m), date_cut_m != ""),
msg = "date_cut_m must be a matrix or NULL. \n
Note: If no SDTMv domains use a date cut, then please leave date_cut_m
empty, in which case a default value of NULL will be used."
)
assert_that(ncol(date_cut_m) == 2,
msg = "date_cut_m must be a matrix with two columns"

assert_that(any(ncol(date_cut_m) == 2, is.null(date_cut_m)),
msg = "date_cut_m must be a matrix with two columns or NULL."
)
assert_that(all(is.vector(no_cut_v), no_cut_v != ""),
msg = "no_cut_v must be a vector. \n

assert_that(all(is.vector(no_cut_v) | is.null(no_cut_v), no_cut_v != ""),
msg = "no_cut_v must be a vector or NULL. \n
Note: If you do not wish to leave any SDTMv domains uncut, then please leave
no_cut_v empty, in which case a default value of vector() will be used."
no_cut_v empty, in which case a default value of NULL will be used."
)

cut_var <- assert_symbol(enexpr(cut_var))
assert_data_frame(dataset_cut,
required_vars = exprs(USUBJID, !!cut_var)
)

assert_that(is.logical(special_dm),
msg = "special_dm must be either TRUE or FALSE"
)

sdtm_inputs <- names(source_sdtm_data)
cut_inputs <- c(patient_cut_v, date_cut_m[, 1], no_cut_v)

if (special_dm) {
assert_that("dm" %in% names(source_sdtm_data),
msg = "dataset `dm` is missing but special_dm processing expects this"
)
assert_that(
setequal(names(source_sdtm_data), c(
patient_cut_v, date_cut_m[, 1], no_cut_v,
"dm"
)),
msg = "Inconsistency between input SDTMv datasets and the SDTMv datasets
listed under each cut approach. Please check for the two likely issues below... \n
1) There are input SDTMv datasets where no cut method has been defined.
2) A cut method has been defined for a SDTMv dataset that does not exist in the
source SDTMv data."
)
assert_that(
length(unique(c(patient_cut_v, date_cut_m[, 1], no_cut_v, "dm")))
== length(c(patient_cut_v, date_cut_m[, 1], no_cut_v, "dm")),
msg = "The number of SDTMv datasets in the source data does not match the
number of SDTMv datasets in which a cut approach has been defined."
)
} else {
assert_that(setequal(names(source_sdtm_data), c(patient_cut_v, date_cut_m[, 1], no_cut_v)),
msg = "Inconsistency between input SDTMv datasets and the SDTMv datasets
listed under each cut approach. Please check for the two likely issues below... \n
1) There are input SDTMv datasets where no cut method has been defined.
2) A cut method has been defined for a SDTMv dataset that does not exist in the source SDTMv data."
)
assert_that(
length(unique(c(patient_cut_v, date_cut_m[, 1], no_cut_v)))
== length(c(patient_cut_v, date_cut_m[, 1], no_cut_v)),
msg = "The number of SDTMv datasets in the source data does not match the
number of SDTMv datasets in which a cut approach has been defined."
msg = "dataset `dm` is missing from source_sdtm_data but special_dm processing expects this"
)
cut_inputs <- append(cut_inputs, "dm")
}

sdtm_inputs_dups <- c()
no_cut_method <- c()
for (i in seq_len(length(sdtm_inputs))) {
if ((sdtm_inputs[i] %in% sdtm_inputs[-i]) && !(sdtm_inputs[i] %in% sdtm_inputs_dups)) {
sdtm_inputs_dups <- append(sdtm_inputs_dups, sdtm_inputs[i])
}
if (!(sdtm_inputs[i] %in% cut_inputs) && !(sdtm_inputs[i] %in% no_cut_method)) {
no_cut_method <- append(no_cut_method, sdtm_inputs[i])
}
}
error_msg1 <- paste0(
paste(sdtm_inputs_dups, collapse = " & "),
" exists more than once in source_sdtm_data"
)
assert_that(is.null(sdtm_inputs_dups),
msg = error_msg1
)
error_msg2 <- paste0(
paste(no_cut_method, collapse = " & "),
" exists in source_sdtm_data but no cut method has been assigned"
)
assert_that(is.null(no_cut_method),
msg = error_msg2
)

cut_inputs_dups <- c()
no_sdtm <- c()
for (i in seq_len(length(cut_inputs))) {
if ((cut_inputs[i] %in% cut_inputs[-i]) && !(cut_inputs[i] %in% cut_inputs_dups)) {
cut_inputs_dups <- append(cut_inputs_dups, cut_inputs[i])
}
if (!(cut_inputs[i] %in% sdtm_inputs) && !(cut_inputs[i] %in% no_sdtm)) {
no_sdtm <- append(no_sdtm, cut_inputs[i])
}
}
error_msg3 <- paste0(
"Multiple cut types have been assigned for ",
paste(cut_inputs_dups, collapse = " & ")
)
assert_that(is.null(cut_inputs_dups),
msg = error_msg3
)
error_msg4 <- paste0(
"Cut types have been assigned for ", paste(no_sdtm, collapse = " & "),
" which does not exist in source_sdtm_data"
)
assert_that(is.null(no_sdtm),
msg = error_msg4
)

# Conduct Patient-Level Cut ------------------------------------------------------

patient_cut_data <- lapply(
Expand Down
6 changes: 3 additions & 3 deletions man/process_cut.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 4 additions & 8 deletions tests/testthat/test-process_cut.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,7 @@ test_that("Test that process_cut() errors when a source SDTM dataset is not
cut_var = DCUTDTM,
special_dm = TRUE
),
regexp = "Inconsistency between input SDTMv datasets and the SDTMv datasets
listed under each cut approach."
regexp = "sc exists in source_sdtm_data but no cut method has been assigned"
)
})

Expand All @@ -124,8 +123,7 @@ test_that("Test that process_cut() errors when an input list includes a source
cut_var = DCUTDTM,
special_dm = TRUE
),
regexp = "Inconsistency between input SDTMv datasets and the SDTMv datasets
listed under each cut approach."
regexp = "Cut types have been assigned for vs which does not exist in source_sdtm_data"
)
})

Expand All @@ -147,8 +145,7 @@ test_that("Test that process_cut() errors when a source SDTMv dataset is
cut_var = DCUTDTM,
special_dm = TRUE
),
regexp = "The number of SDTMv datasets in the source data does not match the
number of SDTMv datasets in which a cut approach has been defined."
regexp = "Multiple cut types have been assigned for ae"
)
})

Expand All @@ -170,8 +167,7 @@ test_that("Test that process_cut() errors when special_dm = TRUE and dm is also
cut_var = DCUTDTM,
special_dm = TRUE
),
regexp = "The number of SDTMv datasets in the source data does not match the
number of SDTMv datasets in which a cut approach has been defined."
regexp = "Multiple cut types have been assigned for dm"
)
})

Expand Down
Loading