diff --git a/NEWS.md b/NEWS.md index 3626dd379..61df3a836 100644 --- a/NEWS.md +++ b/NEWS.md @@ -103,6 +103,7 @@ - New functions `pl$int_range()` and `pl$int_ranges()` (#968). - New string method `$str$extract_groups()` (#979). - New string method `$str$find()` (#985). +- Method `$over()` gains an argument `mapping_strategy` (#984, #988). ### Bug fixes diff --git a/R/expr__expr.R b/R/expr__expr.R index 4306fb1fc..05a4003bf 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -1829,7 +1829,7 @@ Expr_last = use_extendr_wrapper #' df = pl$DataFrame( #' a = c("a", "a", "b", "b", "b"), #' b = c(1, 2, 3, 5, 3), -#' c = c(5, 4, 3, 2, 1) +#' c = c(5, 4, 2, 1, 3) #' ) #' #' df$with_columns( @@ -1855,6 +1855,11 @@ Expr_last = use_extendr_wrapper #' df$with_columns( #' pl$col("c")$min()$over("a", pl$col("b") %% 2)$name$suffix("_min") #' ) +#' +#' # Alternative mapping strategy: join values in a list output +#' df$with_columns( +#' top_2 = pl$col("c")$top_k(2)$over("a", mapping_strategy = "join") +#' ) Expr_over = function(..., mapping_strategy = "group_to_rows") { list_of_exprs = list2(...) |> lapply(\(x) { diff --git a/man/Expr_over.Rd b/man/Expr_over.Rd index 08eca74fa..84bcc9256 100644 --- a/man/Expr_over.Rd +++ b/man/Expr_over.Rd @@ -35,7 +35,7 @@ The outcome is similar to how window functions work in df = pl$DataFrame( a = c("a", "a", "b", "b", "b"), b = c(1, 2, 3, 5, 3), - c = c(5, 4, 3, 2, 1) + c = c(5, 4, 2, 1, 3) ) df$with_columns( @@ -61,4 +61,9 @@ df$with_columns( df$with_columns( pl$col("c")$min()$over("a", pl$col("b") \%\% 2)$name$suffix("_min") ) + +# Alternative mapping strategy: join values in a list output +df$with_columns( + top_2 = pl$col("c")$top_k(2)$over("a", mapping_strategy = "join") +) } diff --git a/tests/testthat/test-expr_expr.R b/tests/testthat/test-expr_expr.R index b1eb9d8bf..2a1905a3e 100644 --- a/tests/testthat/test-expr_expr.R +++ b/tests/testthat/test-expr_expr.R @@ -241,30 +241,30 @@ test_that("min max", { expect_equal(names(fails), character()) }) -test_that("over", { - df = pl$DataFrame(list( +test_that("$over()", { + df = pl$DataFrame( val = 1:5, a = c("+", "+", "-", "-", "+"), b = c("+", "-", "+", "-", "+") - ))$select( + )$select( pl$col("val")$count()$over("a", pl$col("b")) ) # with vector of column names - df2 = pl$DataFrame(list( + df2 = pl$DataFrame( val = 1:5, a = c("+", "+", "-", "-", "+"), b = c("+", "-", "+", "-", "+") - ))$select( + )$select( pl$col("val")$count()$over(c("a", "b")) ) over_vars = c("a", "b") - df3 = pl$DataFrame(list( + df3 = pl$DataFrame( val = 1:5, a = c("+", "+", "-", "-", "+"), b = c("+", "-", "+", "-", "+") - ))$select( + )$select( pl$col("val")$count()$over(over_vars) ) @@ -299,6 +299,25 @@ test_that("over", { ) }) +test_that("$over() with mapping_strategy", { + df = pl$DataFrame( + val = 1:5, + a = c("+", "+", "-", "-", "+") + ) + + expect_grepl_error( + df$select(pl$col("val")$top_k(2)$over("a")), + "length of the window expression did not match that of the group" + ) + + expect_identical( + df$select(pl$col("val")$top_k(2)$over("a", mapping_strategy = "join"))$to_list(), + list( + val = list(c(5L, 2L), c(5L, 2L), c(4L, 3L), c(4L, 3L), c(5L, 2L)) + ) + ) +}) + test_that("col DataType + col(s) + col regex", { # one Datatype expect_equal(