From 9f4a59485ba310856dc7892a31415456dd04d15d Mon Sep 17 00:00:00 2001 From: eitsupi <50911393+eitsupi@users.noreply.github.com> Date: Thu, 11 Apr 2024 01:23:48 +0900 Subject: [PATCH] fix!: convert R list to Series correctly via `as_polars_series()` (#1021) Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- NAMESPACE | 2 ++ NEWS.md | 30 ++++++++++++++++++++++++++++++ R/as_polars.R | 22 ++++++++++++++++++++++ man/as_polars_series.Rd | 3 +++ tests/testthat/test-as_polars.R | 13 +++++++++++++ tests/testthat/test-expr_list.R | 2 +- 6 files changed, 71 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 446c0104a..97ac52165 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -176,8 +176,10 @@ S3method(as_polars_series,clock_time_point) S3method(as_polars_series,clock_zoned_time) S3method(as_polars_series,data.frame) S3method(as_polars_series,default) +S3method(as_polars_series,list) S3method(as_polars_series,nanoarrow_array) S3method(as_polars_series,nanoarrow_array_stream) +S3method(as_polars_series,rpolars_raw_list) S3method(as_polars_series,vctrs_rcrd) S3method(c,RPolarsSeries) S3method(dim,RPolarsDataFrame) diff --git a/NEWS.md b/NEWS.md index f664bd63b..1c8dbb753 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,6 +13,36 @@ ### Other breaking changes +- R objects inside an R list are now converted to Polars data types via + `as_polars_series()` (#1021). For example, up to polars 0.15.1, + data.frames inside a list were converted to a nested List type: + + ```r + pl$select(nested_data = pl$lit(list(data.frame(a = 1)))) + #> shape: (1, 1) + #> ┌─────────────────┐ + #> │ nested_data │ + #> │ --- │ + #> │ list[list[f64]] │ + #> ╞═════════════════╡ + #> │ [[1.0]] │ + #> └─────────────────┘ + ``` + + From 0.16.0, data.frames inside a list are converted to the polars Struct type: + + ```r + pl$select(nested_data = pl$lit(list(data.frame(a = 1)))) + #> shape: (1, 1) + #> ┌─────────────────┐ + #> │ nested_data │ + #> │ --- │ + #> │ list[struct[1]] │ + #> ╞═════════════════╡ + #> │ [{1.0}] │ + #> └─────────────────┘ + ``` + - Several functions have been rewritten to match the behavior of Python Polars. - In `pl$Series()` arguments are changed. diff --git a/R/as_polars.R b/R/as_polars.R index 24b6c0260..b6daf615b 100644 --- a/R/as_polars.R +++ b/R/as_polars.R @@ -523,3 +523,25 @@ as_polars_series.clock_zoned_time = function(x, name = NULL, ...) { ... )$dt$replace_time_zone(time_zone) } + + +# TODO: rewrite `recursive_robjname2series_tree` in Rust side +#' @rdname as_polars_series +#' @export +as_polars_series.list = function(x, name = NULL, ...) { + lapply(x, \(child) { + if (is.null(child)) { + NULL # if `NULL`, the type will be resolved later + } else { + as_polars_series(child) + } + }) |> + as_polars_series.default(name = name) +} + + +# TODO: reconsider `rpolars_raw_list` +#' @export +as_polars_series.rpolars_raw_list = function(x, name = NULL, ...) { + as_polars_series.default(x, name = name) +} diff --git a/man/as_polars_series.Rd b/man/as_polars_series.Rd index 09b4e942b..88e32c06b 100644 --- a/man/as_polars_series.Rd +++ b/man/as_polars_series.Rd @@ -17,6 +17,7 @@ \alias{as_polars_series.clock_time_point} \alias{as_polars_series.clock_sys_time} \alias{as_polars_series.clock_zoned_time} +\alias{as_polars_series.list} \title{To polars Series} \usage{ as_polars_series(x, name = NULL, ...) @@ -50,6 +51,8 @@ as_polars_series(x, name = NULL, ...) \method{as_polars_series}{clock_sys_time}(x, name = NULL, ...) \method{as_polars_series}{clock_zoned_time}(x, name = NULL, ...) + +\method{as_polars_series}{list}(x, name = NULL, ...) } \arguments{ \item{x}{Object to convert into \link[=Series_class]{a polars Series}.} diff --git a/tests/testthat/test-as_polars.R b/tests/testthat/test-as_polars.R index 82390034c..423edba8d 100644 --- a/tests/testthat/test-as_polars.R +++ b/tests/testthat/test-as_polars.R @@ -427,3 +427,16 @@ test_that("clock_zoned_time may returns empty time zone", { expect_s3_class(as_polars_series(clock::zoned_time_now(zone = "")), "RPolarsSeries") }) + + +test_that("as_polars_series for nested type", { + expect_true( + as_polars_series(list(list(data.frame(a = 1))))$dtype == pl$List(pl$List(pl$Struct(a = pl$Float64))) + ) + + # TODO: this shouldn't error + expect_grepl_error( + as_polars_series(list(as_polars_series(NULL), as_polars_series(1L))), + "One element was null and another was i32" + ) +}) diff --git a/tests/testthat/test-expr_list.R b/tests/testthat/test-expr_list.R index 607064af1..eff33755f 100644 --- a/tests/testthat/test-expr_list.R +++ b/tests/testthat/test-expr_list.R @@ -1,5 +1,5 @@ test_that("list$len", { - df = pl$DataFrame(list_of_strs = as_polars_series(list(c("a", "b"), "c", character(), list(), NULL))) + df = pl$DataFrame(list_of_strs = as_polars_series(list(c("a", "b"), "c", character(), NULL, NULL))) l = df$with_columns(pl$col("list_of_strs")$list$len()$alias("list_of_strs_lengths"))$to_list() expect_identical(