Skip to content

Commit

Permalink
Merge pull request #166 from frictionlessdata/cli_schema
Browse files Browse the repository at this point in the history
Use CLI for schema-related tests
  • Loading branch information
peterdesmet authored Dec 28, 2023
2 parents db9a07e + e02ec29 commit 848ef02
Show file tree
Hide file tree
Showing 13 changed files with 209 additions and 164 deletions.
22 changes: 22 additions & 0 deletions R/check_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' Check data
#'
#' Check if an object is a non-empty data frame.
#'
#' @param data A data frame.
#' @return `TRUE` or error.
#' @family check functions
#' @noRd
check_data <- function(data) {
if (
!is.data.frame(data) ||
replace_null(dim(data)[1], 0) == 0 ||
replace_null(dim(data)[2], 0) == 0
) {
cli::cli_abort(
"{.arg data} must be a data frame containing data.",
class = "frictionless_error_data_invalid"
)
}

return(TRUE)
}
5 changes: 2 additions & 3 deletions R/check_package.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ check_package <- function(package) {
cli::cli_abort(
"{.arg package} must be a list describing a Data Package created with
{.fun read_package} or {.fun create_package}.",
class = "frictionless_error_package_incorrect"
class = "frictionless_error_package_invalid"
)
}

Expand All @@ -30,6 +30,5 @@ check_package <- function(package) {
)
}

# Return TRUE
TRUE
return(TRUE)
}
90 changes: 39 additions & 51 deletions R/check_schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,31 @@
#' @noRd
check_schema <- function(schema, data = NULL) {
# Check schema is list with property fields
assertthat::assert_that(
is.list(schema) & "fields" %in% names(schema) & is.list(schema["fields"]),
msg = glue::glue("`schema` must be a list with property `fields`.")
)
if (
!is.list(schema) ||
!"fields" %in% names(schema) ||
!is.list(schema["fields"])
) {
cli::cli_abort(
"{.arg schema} must be a list with a {.field fields} property.",
class = "frictionless_error_schema_invalid"
)
}
fields <- schema$fields

# Check fields have names
field_names <- purrr::map_chr(fields, ~ replace_null(.x$name, NA_character_))
assertthat::assert_that(
all(!is.na(field_names)),
msg = glue::glue(
"All fields in `schema` must have property `name`.",
"\u2139 Field(s) {field_numbers_collapse} don't have a name.",
.sep = "\n",
field_numbers_collapse = glue::glue_collapse(
glue::backtick(which(is.na(field_names))),
sep = ", "
)
fields_without_name <- as.character(which(is.na(field_names)))
if (any(is.na(field_names))) {
cli::cli_abort(
c(
"All fields in {.arg schema} must have a {.field name} property.",
"x" = "Field{?s} {fields_without_name} {?doesn't/don't} have a
{.field name}."
),
class = "frictionless_error_fields_without_name"
)
)
}

# Check fields have valid types (a mix of valid types and undefined is ok)
field_types <- purrr::map_chr(fields, ~ replace_null(.x$type, NA_character_))
Expand All @@ -39,49 +44,32 @@ check_schema <- function(schema, data = NULL) {
NA_character_
)
invalid_types <- setdiff(field_types, valid_types)
assertthat::assert_that(
all(is.na(field_types)) | length(invalid_types) == 0,
msg = glue::glue(
"All fields in `schema` must have valid `type`.",
"Type {invalid_types_collapse} is invalid.",
.sep = " ",
invalid_types_collapse = glue::glue_collapse(
glue::backtick(invalid_types),
sep = ", "
)
if (length(invalid_types) > 0) {
cli::cli_abort(
c(
"All fields in {.arg schema} must have a valid {.field type}.",
"x" = "Type{?s} {.val {invalid_types}} {?is/are} invalid."
),
class = "frictionless_error_fields_type_invalid"
)
)
}

# Check data when present
if (!is.null(data)) {
assertthat::assert_that(
is.data.frame(data) &
replace_null(dim(data)[1], 0) != 0 &
replace_null(dim(data)[2], 0) != 0,
msg = glue::glue(
"`data` must be a data frame containing data."
)
)
check_data(data)

col_names <- colnames(data)
assertthat::assert_that(
identical(field_names, col_names),
msg = glue::glue(
"Field names in `schema` must match column names in data:",
"\u2139 Field names: {field_names_collapse}",
"\u2139 Column names: {col_names_collapse}",
.sep = "\n",
field_names_collapse = glue::glue_collapse(
glue::backtick(field_names),
sep = ", "
if (!identical(field_names, col_names)) {
cli::cli_abort(
c(
"Field names in {.arg schema} must match column names in {.arg data}.",
"i" = "Field name{?s}: {.val {field_names}}",
"i" = "Column name{?s}: {.val {col_names}}"
),
col_names_collapse = glue::glue_collapse(
glue::backtick(col_names),
sep = ", "
)
class = "frictionless_error_fields_colnames_mismatch"
)
)
} else {
return(TRUE)
}
}

return(TRUE)
}
9 changes: 1 addition & 8 deletions R/create_schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,7 @@
#' str(schema)
create_schema <- function(data) {
# Check data
assertthat::assert_that(
is.data.frame(data) &
replace_null(dim(data)[1], 0) != 0 &
replace_null(dim(data)[2], 0) != 0,
msg = glue::glue(
"`data` must be a data frame containing data."
)
)
check_data(data)

# Columns with all NA are considered logical by R (and read_delim)
# Set those to character, since string is a better default for Table Schema
Expand Down
23 changes: 12 additions & 11 deletions R/get_schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,21 @@ get_schema <- function(package, resource_name) {
resource <- get_resource(package, resource_name)

# Check resource is tabular-data-resource (expected for resources with schema)
assertthat::assert_that(
replace_null(resource$profile, "") == "tabular-data-resource",
msg = glue::glue(
"Resource `{resource_name}` must have property `profile` with value",
"`tabular-data-resource`.",
.sep = " "
if (replace_null(resource$profile, "") != "tabular-data-resource") {
cli::cli_abort(
"Resource {.val {resource_name}} must have a {.field profile} property
with value {.val tabular-data-resource}.",
class = "frictionless_error_resource_not_tabular"
)
)
}

# Get schema
assertthat::assert_that(
!is.null(resource$schema),
msg = glue::glue("Resource `{resource_name}` must have property `schema`.")
)
if (is.null(resource$schema)) {
cli::cli_abort(
"Resource {.val {resource_name}} must have a {.field schema} property.",
class = "frictionless_error_resource_without_schema"
)
}
schema <- read_descriptor(resource$schema, package$directory, safe = TRUE)

# Check schema
Expand Down
20 changes: 4 additions & 16 deletions tests/testthat/test-add_resource.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ test_that("add_resource() returns a valid Data Package", {
))
})

test_that("add_resource() returns error on incorrect Data Package", {
test_that("add_resource() returns error on invalid Data Package", {
df <- data.frame("col_1" = c(1, 2), "col_2" = c("a", "b"))
expect_error(
add_resource(list(), "new", df),
class = "frictionless_error_package_incorrect"
class = "frictionless_error_package_invalid"
)
})

Expand Down Expand Up @@ -124,25 +124,13 @@ test_that("add_resource() returns error on mismatching schema and data", {
# df
expect_error(
add_resource(p, "new", df, schema_invalid),
paste(
"Field names in `schema` must match column names in data:",
"ℹ Field names: `no_such_col`, `col_2`",
"ℹ Column names: `col_1`, `col_2`",
sep = "\n"
),
fixed = TRUE
class = "frictionless_error_fields_colnames_mismatch"
)

# csv
expect_error(
add_resource(p, "new", df_csv, schema_invalid),
paste(
"Field names in `schema` must match column names in data:",
"ℹ Field names: `no_such_col`, `col_2`",
"ℹ Column names: `col_1`, `col_2`",
sep = "\n"
),
fixed = TRUE
class = "frictionless_error_fields_colnames_mismatch"
)

# For more tests see test-check_schema.R
Expand Down
19 changes: 19 additions & 0 deletions tests/testthat/test-check_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
test_that("check_data() returns error on invalid or empty data frame", {
expect_error(
check_data("not_a_df"),
class = "frictionless_error_data_invalid"
)
expect_error(
check_data(data.frame()),
class = "frictionless_error_data_invalid"
)
expect_error(
check_data(data.frame("col_1" = character(0))),
class = "frictionless_error_data_invalid"
)
expect_error(
create_schema("not_a_df"),
regexp = "`data` must be a data frame containing data.",
fixed = TRUE
)
})
12 changes: 6 additions & 6 deletions tests/testthat/test-check_package.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ test_that("check_package() returns TRUE on valid Data Package", {
expect_true(check_package(example_package))
})

test_that("check_package() returns error on incorrect Data Package", {
test_that("check_package() returns error on invalid Data Package", {
# Valid package
p <- list(
resources = list(),
Expand All @@ -21,32 +21,32 @@ test_that("check_package() returns error on incorrect Data Package", {
# Must be a list
expect_error(
check_package("not_a_list"),
class = "frictionless_error_package_incorrect"
class = "frictionless_error_package_invalid"
)
# Must have resources as list
p_invalid <- p
p_invalid$resources <- NULL
expect_error(
check_package(p_invalid),
class = "frictionless_error_package_incorrect"
class = "frictionless_error_package_invalid"
)
p_invalid$resources <- vector(mode = "character")
expect_error(
check_package(p_invalid),
class = "frictionless_error_package_incorrect"
class = "frictionless_error_package_invalid"
)

# Must have directory as character
p_invalid <- p
p_invalid$directory <- NULL
expect_error(
check_package(p_invalid),
class = "frictionless_error_package_incorrect"
class = "frictionless_error_package_invalid"
)
p_invalid$directory <- logical()
expect_error(
check_package(p_invalid),
class = "frictionless_error_package_incorrect"
class = "frictionless_error_package_invalid"
)
})

Expand Down
Loading

0 comments on commit 848ef02

Please sign in to comment.