From c97f6bd79014129deb88bc71b6a3e4f455fd1824 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Thu, 9 Nov 2023 17:22:23 +0000 Subject: [PATCH] Error if wrong schema in `pl$DataFrame()` (#486) --- NEWS.md | 2 ++ R/dataframe__frame.R | 9 ++++++++- tests/testthat/test-dataframe.R | 9 ++++++--- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3789dc834..b6a0fe110 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ - The argument `quote_style` in `$write_csv()` and `$sink_csv()` can now take the value `"never"` (#483). +- `pl$DataFrame()` now errors if the variables specified in `schema` do not exist + in the data (#486). # polars 0.10.0 diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index ad4ccd4cf..7c97143b8 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -136,6 +136,13 @@ DataFrame pl$DataFrame = function(..., make_names_unique = TRUE, schema = NULL) { largs = unpack_list(...) + uw = \(res) unwrap(res, "in $DataFrame():") + + if (!is.null(schema) && !all(names(schema) %in% names(largs))) { + Err_plain("Some columns in `schema` are not in the DataFrame.") |> + uw() + } + # no args crete empty DataFrame if (length(largs) == 0L) { return(.pr$DataFrame$default()) @@ -188,7 +195,7 @@ pl$DataFrame = function(..., make_names_unique = TRUE, schema = NULL) { }) |> do.call(what = pl$select) }) |> - unwrap("in pl$DataFrame()") + uw() } diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 9a754f5c6..a883abb67 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -160,15 +160,18 @@ test_that("DataFrame, custom schema", { expect_no_error( pl$DataFrame(list(schema = 1), schema = list(schema = pl$Float32)) ) - # errors if incorrect datatype + # incorrect datatype expect_error(pl$DataFrame(x = 1, schema = list(schema = foo))) expect_error( pl$DataFrame(x = 1, schema = list(x = "foo")), "expected RPolarsDataType" ) - # TODO: why doesn't this error? - # expect_error(pl$DataFrame(x = 1, schema = list(schema = pl$foo))) + # wrong variable name in schema + expect_error( + pl$DataFrame(x = 1, schema = list(schema = pl$Float32)), + "Some columns in `schema` are not in the DataFrame" + ) })