Skip to content

Commit

Permalink
Implement concat_str() (#349)
Browse files Browse the repository at this point in the history
Co-authored-by: sorhawell <[email protected]>
Co-authored-by: Macronova <[email protected]>
  • Loading branch information
3 people authored Aug 8, 2023
1 parent 53a6243 commit d3c8077
Show file tree
Hide file tree
Showing 25 changed files with 172 additions and 63 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
- New method `$clone()` for `LazyFrame` (#347).
- `$with_column()` is now deprecated (following upstream `polars`). It will be
removed in 0.9.0. It should be replaced with `$with_columns()` (#313).
- New lazy function translated: `concat_str()` to concatenate several columns
into one (#349).
- New stat functions `pl$cov()`, `pl$rolling_cov()` `pl$corr()`, `pl$rolling_corr()` (#351).

# polars 0.7.0
Expand Down
9 changes: 4 additions & 5 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -1003,13 +1003,13 @@ DataFrame_to_list = function(unnest_structs = TRUE) {
#' @keywords DataFrame
#' @examples
#' # inner join by default
#' df1 <- pl$DataFrame(list(key = 1:3, payload = c("f", "i", NA)))
#' df2 <- pl$DataFrame(list(key = c(3L, 4L, 5L, NA_integer_)))
#' df1 = pl$DataFrame(list(key = 1:3, payload = c("f", "i", NA)))
#' df2 = pl$DataFrame(list(key = c(3L, 4L, 5L, NA_integer_)))
#' df1$join(other = df2, on = "key")
#'
#' # cross join
#' df1 <- pl$DataFrame(x = letters[1:3])
#' df2 <- pl$DataFrame(y = 1:4)
#' df1 = pl$DataFrame(x = letters[1:3])
#' df2 = pl$DataFrame(y = 1:4)
#' df1$join(other = df2, how = "cross")
#'
DataFrame_join = function(
Expand Down Expand Up @@ -1560,7 +1560,6 @@ DataFrame_glimpse = function(..., return_as_string = FALSE) {
#' df
#'
#' df$explode("numbers")

DataFrame_explode = function(columns, ...) {
self$lazy()$explode(columns, ...)$collect()
}
12 changes: 6 additions & 6 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -3322,7 +3322,7 @@ Expr_diff = function(n = 1, null_behavior = "ignore") {
#' @keywords Expr
#' @examples
#' df = pl$DataFrame(list(a = c(10L, 11L, 12L, NA_integer_, 12L)))
#' df$with_column(pl$col("a")$pct_change()$alias("pct_change"))
#' df$with_columns(pl$col("a")$pct_change()$alias("pct_change"))
Expr_pct_change = function(n = 1) {
unwrap(.pr$Expr$pct_change(self, n))
}
Expand Down Expand Up @@ -3417,7 +3417,7 @@ Expr_kurtosis = function(fisher = TRUE, bias = TRUE) {
#'
#' @examples
#' df = pl$DataFrame(foo = c(-50L, 5L, NA_integer_, 50L))
#' df$with_column(pl$col("foo")$clip(1L, 10L)$alias("foo_clipped"))
#' df$with_columns(pl$col("foo")$clip(1L, 10L)$alias("foo_clipped"))
Expr_clip = function(min, max) {
unwrap(.pr$Expr$clip(self, wrap_e(min), wrap_e(max)))
}
Expand All @@ -3427,7 +3427,7 @@ Expr_clip = function(min, max) {
#' @aliases clip_min
#' @keywords Expr
#' @examples
#' df$with_column(pl$col("foo")$clip_min(1L)$alias("foo_clipped"))
#' df$with_columns(pl$col("foo")$clip_min(1L)$alias("foo_clipped"))
Expr_clip_min = function(min) {
unwrap(.pr$Expr$clip_min(self, wrap_e(min)))
}
Expand All @@ -3437,7 +3437,7 @@ Expr_clip_min = function(min) {
#' @aliases clip_max
#' @keywords Expr
#' @examples
#' df$with_column(pl$col("foo")$clip_max(10L)$alias("foo_clipped"))
#' df$with_columns(pl$col("foo")$clip_max(10L)$alias("foo_clipped"))
Expr_clip_max = function(max) {
unwrap(.pr$Expr$clip_max(self, wrap_e(max)))
}
Expand Down Expand Up @@ -4182,7 +4182,7 @@ Expr_list = function() {
#' f = c("a", "b", "c"),
#' g = c(0.1, 1.32, 0.12),
#' h = c(TRUE, NA, FALSE)
#' )$with_column(pl$col("b")$cast(pl$Int64) * 32L)$select(pl$all()$shrink_dtype())
#' )$with_columns(pl$col("b")$cast(pl$Int64) * 32L)$select(pl$all()$shrink_dtype())
Expr_shrink_dtype = "use_extendr_wrapper"


Expand All @@ -4204,7 +4204,7 @@ Expr_shrink_dtype = "use_extendr_wrapper"
#' )$agg(
#' pl$col("value") * 3L
#' )
#' df_with_list$with_column(
#' df_with_list$with_columns(
#' pl$col("value")$arr$lengths()$alias("group_size")
#' )
Expr_arr = method_as_property(function() {
Expand Down
4 changes: 2 additions & 2 deletions R/expr__list.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#' @aliases lengths arr.lengths arr_lengths
#' @examples
#' df = pl$DataFrame(list_of_strs = pl$Series(list(c("a", "b"), "c")))
#' df$with_column(pl$col("list_of_strs")$arr$lengths()$alias("list_of_strs_lengths"))
#' df$with_columns(pl$col("list_of_strs")$arr$lengths()$alias("list_of_strs_lengths"))
ExprArr_lengths = function() .pr$Expr$arr_lengths(self)

#' Sum lists
Expand Down Expand Up @@ -415,7 +415,7 @@ ExprArr_to_struct = function(
#' @aliases arr_eval arr.eval
#' @examples
#' df = pl$DataFrame(a = list(c(1, 8, 3), b = c(4, 5, 2)))
#' df$select(pl$all()$cast(pl$dtypes$Int64))$with_column(
#' df$select(pl$all()$cast(pl$dtypes$Int64))$with_columns(
#' pl$concat_list(c("a", "b"))$arr$eval(pl$element()$rank())$alias("rank")
#' )
ExprArr_eval = function(expr, parallel = FALSE) {
Expand Down
2 changes: 2 additions & 0 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ sum_exprs <- function(exprs) .Call(wrap__sum_exprs, exprs)

concat_list <- function(exprs) .Call(wrap__concat_list, exprs)

concat_str <- function(dotdotdot, separator) .Call(wrap__concat_str, dotdotdot, separator)

r_date_range <- function(start, stop, every, closed, name, tu, tz) .Call(wrap__r_date_range, start, stop, every, closed, name, tu, tz)

r_date_range_lazy <- function(start, end, every, closed, tz) .Call(wrap__r_date_range_lazy, start, end, every, closed, tz)
Expand Down
44 changes: 37 additions & 7 deletions R/functions__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -529,12 +529,12 @@ pl$approx_unique = function(column) { #-> int or Expr
#' df = pl$DataFrame(a = 1:2, b = 3:4, c = 5:6)
#'
#' # column as list
#' df$with_column(pl$sum(list("a", "c")))
#' df$with_column(pl$sum(list("a", "c", 42L)))
#' df$with_columns(pl$sum(list("a", "c")))
#' df$with_columns(pl$sum(list("a", "c", 42L)))
#'
#' # two eqivalent lines
#' df$with_column(pl$sum(list(pl$col("a") + pl$col("b"), "c")))
#' df$with_column(pl$sum(list("*")))
#' df$with_columns(pl$sum(list(pl$col("a") + pl$col("b"), "c")))
#' df$with_columns(pl$sum(list("*")))
pl$sum = function(...) {
column = list2(...)
if (length(column) == 1L) column <- column[[1L]]
Expand Down Expand Up @@ -576,7 +576,7 @@ pl$sum = function(...) {
#' d = c(1:2, NA_real_, -Inf)
#' )
#' # use min to get first non Null value for each row, otherwise insert 99.9
#' df$with_column(
#' df$with_columns(
#' pl$min("a", "b", "c", 99.9)$alias("d")
#' )
#'
Expand Down Expand Up @@ -623,7 +623,7 @@ pl$min = function(...) {
#' c = c(1:3, NA_real_)
#' )
#' # use coalesce to get first non Null value for each row, otherwise insert 99.9
#' df$with_column(
#' df$with_columns(
#' pl$coalesce("a", "b", "c", 99.9)$alias("d")
#' )
#'
Expand Down Expand Up @@ -669,7 +669,7 @@ pl$max = function(...) {
#' c = c(1:3, NA_real_)
#' )
#' # use coalesce to get first non Null value for each row, otherwise insert 99.9
#' df$with_column(
#' df$with_columns(
#' pl$coalesce("a", "b", "c", 99.9)$alias("d")
#' )
#'
Expand Down Expand Up @@ -845,6 +845,36 @@ pl$struct = function(
)
}

#' Horizontally concatenate columns into a single string column
#'
#' @param ... Columns to concatenate into a single string column. Accepts
#' expressions. Strings are parsed as column names, other non-expression inputs
#' are parsed as literals. Non-Utf8 columns are cast to Utf8.
#' @param separator String that will be used to separate the values of each
#' column.
#' @name pl_concat_str
#' @return Expr
#' @examples
#' df = pl$DataFrame(
#' a = c(1, 2, 3),
#' b = c("dogs", "cats", NA),
#' c = c("play", "swim", "walk")
#' )
#'
#' df$with_columns(
#' pl$concat_str(
#' pl$col("a") * 2,
#' "b",
#' "c",
#' pl$lit("!"),
#' separator = " "
#' )$alias("full_sentence")
#' )
#'
pl$concat_str = function(..., separator = "") {
concat_str(list2(...), separator) |> unwrap("in $concat_str()")
}

#' Covariance
#' @name pl_cov
#' @description Calculates the covariance between two columns / expressions.
Expand Down
8 changes: 4 additions & 4 deletions man/DataFrame_join.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/Expr_arr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/Expr_clip.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/Expr_pct_change.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/Expr_shrink_dtype.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/arr_eval.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/arr_lengths.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/pl_coalesce.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions man/pl_concat_str.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/pl_max.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/pl_min.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/pl_pl.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions man/pl_sum.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions src/rust/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ impl LazyFrame {
}

fn with_column(&self, expr: &Expr) -> LazyFrame {
R!("warning('`with_column()` is deprecated and will be removed in polars 0.9.0. Please use `with_columns()` instead.')")
.expect("warning will not fail");
LazyFrame(self.0.clone().with_column(expr.0.clone()))
}

Expand Down
Loading

0 comments on commit d3c8077

Please sign in to comment.