From 5be67e8c21af0286a32ebd141f628126766cef59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Havelund=20Welling?= Date: Tue, 8 Aug 2023 15:17:24 +0200 Subject: [PATCH] Fix `$describe()` bug for column names containing a `:` (#342) Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- R/dataframe__frame.R | 6 +++--- tests/testthat/test-dataframe.R | 11 +++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 46f5e3e0a..fe451c570 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1465,12 +1465,12 @@ DataFrame_describe = function(percentiles = c(.25, .75)) { # compute aggregates df_aggs = do.call(self$select, largs) - e_col_row_names = pl$lit(df_aggs$columns)$str$split(":") + e_col_row_names = pl$lit(df_aggs$columns)$str$splitn(":", 2) # pivotize df_pivot = pl$select( - e_col_row_names$arr$first()$alias("rowname"), - e_col_row_names$arr$last()$alias("colname"), + e_col_row_names$struct$field("field_0")$alias("rowname"), + e_col_row_names$struct$field("field_1")$alias("colname"), pl$lit(unlist(as.data.frame(df_aggs)))$alias("value") )$pivot( values = "value", index = "rowname", columns = "colname" diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 7809a8cc7..a795481e9 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -965,6 +965,13 @@ test_that("describe", { pl$DataFrame(mtcars)$describe(perc = numeric())$to_list(), pl$DataFrame(mtcars)$describe(perc = NULL)$to_list() ) + + # names using internal separator ":" in column names, should also just work. + df = pl$DataFrame("foo:bar:jazz" = 1, pl$Series(2, name = ""), "foobar" = 3) + expect_identical( + df$describe()$columns, + c("describe", df$columns) + ) }) test_that("glimpse", { @@ -1019,8 +1026,8 @@ test_that("explode", { ) ) }) - + test_that("with_row_count", { df = pl$DataFrame(mtcars) - expect_identical(df$with_row_count("idx", 42)$select(pl$col("idx"))$to_data_frame()$idx, as.double(42:(41+nrow(mtcars)))) + expect_identical(df$with_row_count("idx", 42)$select(pl$col("idx"))$to_data_frame()$idx, as.double(42:(41 + nrow(mtcars)))) })