Skip to content

Commit

Permalink
Add round_id_variable expected pattern match check when validating co…
Browse files Browse the repository at this point in the history
…nfig. Resolves #68
  • Loading branch information
annakrystalli committed Dec 13, 2024
1 parent ac8f7bc commit f051b6a
Show file tree
Hide file tree
Showing 6 changed files with 630 additions and 1 deletion.
67 changes: 66 additions & 1 deletion R/validate-config-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,72 @@ validate_mt_property_unique_vals <- function(model_task_grp,
}
}

# Check that modeling task round ids match the expected round ID patterns when
# round_id_from_variable = TRUE
validate_mt_round_id_pattern <- function(model_task_grp,
model_task_i,
round_i,
schema,
round_id_from_variable,
round_id_var) {
if (!round_id_from_variable) {
return(NULL)
}
round_id_var_vals <- purrr::pluck(
model_task_grp, "task_ids", round_id_var
)

invalid_vals <- purrr::map(
round_id_var_vals,
\(.x) {
if (is.null(.x)) {
return(NULL)
}
valid <- stringr::str_detect(
.x,
"^(\\d{4}-\\d{2}-\\d{2})$|^[A-Za-z0-9_]+$"
)
invalid <- .x[!valid]
if (length(invalid) == 0L) {
return(NULL)
}
invalid
}
)
if (any(lengths(invalid_vals) > 0L)) {
# Collapse invalid values into a single string
invalid_vals_msg <- purrr::compact(invalid_vals) |>
purrr::map_chr(
~ glue::glue_collapse(glue::glue("'{.x}'"), ", ", last = " and ")
)
invalid_vals_msg <- glue::glue_collapse(
glue::glue("{names(invalid_vals_msg)}: {invalid_vals_msg}"),
sep = "; "
)

error_row <- data.frame(
instancePath = paste0(
glue::glue(
get_error_path(schema, "/task_ids", "instance")
), "/",
round_id_var
),
schemaPath = get_error_path(
schema,
glue::glue("task_ids/{round_id_var}"),
"schema"
),
keyword = "round_id variable pattern",
message = glue::glue(
"round_id variable '{round_id_var}' values must be either ISO formatted
dates or alphanumeric characters separated by '_'."
),
schema = "^([0-9]{4}-[0-9]{2}-[0-9]{2})$|^[A-Za-z0-9_]+$",
data = glue::glue("invalid values: {invalid_vals_msg}")
)
return(error_row)
}
}
## ROUND LEVEL VALIDATIONS ----
# Check that round id variables are consistent across modeling tasks
validate_round_ids_consistent <- function(round, round_i,
Expand Down Expand Up @@ -596,7 +662,6 @@ validate_round_derived_task_ids <- function(round, round_i, schema) {
out
}


## CONFIG LEVEL VALIDATIONS ----
# Validate that round IDs are unique across all rounds in config file
validate_round_ids_unique <- function(config_tasks, schema) {
Expand Down
14 changes: 14 additions & 0 deletions R/validate_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ perform_dynamic_config_validations <- function(validation) {
## Dynamic schema validation utilities ----
val_round <- function(round, round_i, schema) {
model_task_grps <- round[["model_tasks"]]
round_id_from_variable <- round[["round_id_from_variable"]]
round_id_var <- round[["round_id"]]

c(
purrr::imap(
Expand Down Expand Up @@ -233,6 +235,18 @@ val_round <- function(round, round_i, schema) {
schema = schema
)
),
purrr::imap(
model_task_grps,
\(.x, .y) {
validate_mt_round_id_pattern(
model_task_grp = .x, model_task_i = .y,
round_i = round_i,
schema = schema,
round_id_from_variable = round_id_from_variable,
round_id_var = round_id_var
)
}
),
list(
validate_round_ids_consistent(
round = round,
Expand Down
23 changes: 23 additions & 0 deletions R/view_config_val_errors.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,20 @@ clean_error_df <- function(errors_tbl) {
if (is.null(errors_tbl)) {
return(NULL)
}

# Move any custom error messages to the message column
if (!is.null(errors_tbl$parentSchema$errorMessage)) {
error_msg <- !is.na(errors_tbl$parentSchema$errorMessage)
errors_tbl$message[error_msg] <- errors_tbl$parentSchema$errorMessage[error_msg]
}

errors_tbl[c("dataPath", "parentSchema")] <- NULL
errors_tbl <- errors_tbl[!grepl("oneOf.+", errors_tbl$schemaPath), ]
# remove superfluous if error. The "then" error is what we are interested in
errors_tbl <- errors_tbl[!errors_tbl$keyword == "if", ]
errors_tbl <- remove_superfluous_enum_rows(errors_tbl)


# Get rid of unnecessarily verbose data entry when a data column is a data.frame
if (inherits(errors_tbl$data, "data.frame")) {
errors_tbl$data <- ""
Expand Down Expand Up @@ -269,6 +279,16 @@ extract_params_to_data <- function(errors_tbl,
errors_tbl
}

escape_pattern_dollar <- function(error_df) {
is_pattern <- grepl("pattern", error_df[["keyword"]])
error_df[["schema"]][is_pattern] <- gsub(
"$", "&#36;",
error_df[["schema"]][is_pattern],
fixed = TRUE
)
error_df
}

render_errors_df <- function(error_df) {
schema_version <- attr(error_df, "schema_version")
schema_url <- attr(error_df, "schema_url")
Expand All @@ -286,6 +306,9 @@ render_errors_df <- function(error_df) {
error_df[["schemaPath"]] <- purrr::map_chr(error_df[["schemaPath"]], path_to_tree)
error_df[["instancePath"]] <- purrr::map_chr(error_df[["instancePath"]], path_to_tree)
error_df[["message"]] <- paste("\u274c", error_df[["message"]])
# Escape `$` characters to ensure regex pattern does not trigger equation
# formatting in markdown
error_df <- escape_pattern_dollar(error_df)


# Create table ----
Expand Down
64 changes: 64 additions & 0 deletions tests/testthat/test-validate_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -251,3 +251,67 @@ test_that("v4 validation works", {
)
)
})

test_that("v4.0.1 round_id pattern validation works", {
skip_if_offline()
# TODO: remove branch argument when v4.0.1 is released.
schema <- download_tasks_schema("v4.0.1", branch = "br-v4.0.1")

# Test that regex pattern matching for round_id properties in jsonvalidate
# identifies expected errors (when round_id_from_variable: false).
expect_false(
res_round_id <- suppressMessages(
validate_config(
config_path = testthat::test_path(
"testdata",
"v4.0.1-tasks-fail-round-id-pattern.json"
),
branch = "br-v4.0.1"
)
)
)
errors_id <- attr(res_round_id, "errors")
expect_equal(nrow(errors_id), 2L)
expect_equal(
errors_id$message,
c(
"must match pattern \"^([0-9]{4}-[0-9]{2}-[0-9]{2})$|^[A-Za-z0-9_]+$\"",
"must match \"then\" schema"
)
)
expect_equal(
errors_id$schema[[1]],
"^([0-9]{4}-[0-9]{2}-[0-9]{2})$|^[A-Za-z0-9_]+$"
)

# Test that dynamic regex pattern matching for round_id variable values
# identifies expected errors (when round_id_from_variable: true).
expect_false(
res_round_id_val <- suppressMessages(
validate_config(
config_path = testthat::test_path(
"testdata",
"v4.0.1-tasks-fail-round-id-val-pattern.json"
),
branch = "br-v4.0.1"
)
)
)

errors_vals <- attr(res_round_id_val, "errors")
expect_equal(nrow(errors_vals), 1L)
expect_equal(
errors_vals$message,
structure(
"round_id variable 'round_id_var' values must be either ISO formatted\ndates or alphanumeric characters separated by '_'.", # nolint: line_length_linter
class = c(
"glue",
"character"
)
)
)
expect_equal(
errors_vals$schema,
"^([0-9]{4}-[0-9]{2}-[0-9]{2})$|^[A-Za-z0-9_]+$"
)
})
Loading

0 comments on commit f051b6a

Please sign in to comment.