Skip to content

Commit

Permalink
Implement $with_colums_seq() and $select_seq() (#1003)
Browse files Browse the repository at this point in the history
  • Loading branch information
etiennebacher authored Apr 1, 2024
1 parent 5565192 commit 67769df
Show file tree
Hide file tree
Showing 14 changed files with 374 additions and 71 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@
- Method `$over()` gains an argument `mapping_strategy` (#984, #988).
- New method `$item()` for `DataFrame` and `Series` (#992).
- New active binding `<Series>$struct$fields` (#1002).
- New methods `$select_seq()` and `$with_columns_seq()` for `DataFrame` and
`LazyFrame` (#1003).

### Bug fixes

Expand Down
53 changes: 53 additions & 0 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,26 @@ DataFrame_select = function(...) {
unwrap("in $select()")
}

#' @inherit DataFrame_select title params return
#'
#' @description
#' Similar to `dplyr::mutate()`. However, it discards unmentioned columns (like
#' `.()` in `data.table`).
#'
#' This will run all expression sequentially instead of in parallel. Use this
#' when the work per expression is cheap. Otherwise, `$select()` should be
#' preferred.
#'
#' @examples
#' pl$DataFrame(iris)$select_seq(
#' pl$col("Sepal.Length")$abs()$alias("abs_SL"),
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
DataFrame_select_seq = function(...) {
.pr$DataFrame$select_seq(self, unpack_list(..., .context = "in $select_seq()")) |>
unwrap("in $select_seq()")
}

#' Drop in place
#' @name DataFrame_drop_in_place
#' @description Drop a single column in-place and return the dropped column.
Expand Down Expand Up @@ -821,6 +841,39 @@ DataFrame_with_columns = function(...) {
unwrap("in $with_columns()")
}

#' @inherit DataFrame_with_columns title params return
#'
#' @description
#' Add columns or modify existing ones with expressions. This is
#' the equivalent of `dplyr::mutate()` as it keeps unmentioned columns (unlike
#' `$select()`).
#'
#' This will run all expression sequentially instead of in parallel. Use this
#' when the work per expression is cheap. Otherwise, `$with_columns()` should be
#' preferred.
#'
#' @examples
#' pl$DataFrame(iris)$with_columns_seq(
#' pl$col("Sepal.Length")$abs()$alias("abs_SL"),
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
#'
#' # same query
#' l_expr = list(
#' pl$col("Sepal.Length")$abs()$alias("abs_SL"),
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
#' pl$DataFrame(iris)$with_columns_seq(l_expr)
#'
#' pl$DataFrame(iris)$with_columns_seq(
#' pl$col("Sepal.Length")$abs(), # not named expr will keep name "Sepal.Length"
#' SW_add_2 = (pl$col("Sepal.Width") + 2)
#' )
DataFrame_with_columns_seq = function(...) {
.pr$DataFrame$with_columns_seq(self, unpack_list(..., .context = "in $with_columns_seq()")) |>
unwrap("in $with_columns_seq()")
}


#' @inherit LazyFrame_head title details
#' @param n Number of rows to return. If a negative value is passed,
Expand Down
8 changes: 7 additions & 1 deletion R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,12 @@ RPolarsDataFrame$drop_in_place <- function(names) .Call(wrap__RPolarsDataFrame__

RPolarsDataFrame$select <- function(exprs) .Call(wrap__RPolarsDataFrame__select, self, exprs)

RPolarsDataFrame$select_seq <- function(exprs) .Call(wrap__RPolarsDataFrame__select_seq, self, exprs)

RPolarsDataFrame$with_columns <- function(exprs) .Call(wrap__RPolarsDataFrame__with_columns, self, exprs)

RPolarsDataFrame$with_columns_seq <- function(exprs) .Call(wrap__RPolarsDataFrame__with_columns_seq, self, exprs)

RPolarsDataFrame$to_struct <- function(name) .Call(wrap__RPolarsDataFrame__to_struct, self, name)

RPolarsDataFrame$unnest <- function(names) .Call(wrap__RPolarsDataFrame__unnest, self, names)
Expand Down Expand Up @@ -1144,11 +1148,13 @@ RPolarsLazyFrame$slice <- function(offset, length) .Call(wrap__RPolarsLazyFrame_

RPolarsLazyFrame$with_columns <- function(exprs) .Call(wrap__RPolarsLazyFrame__with_columns, self, exprs)

RPolarsLazyFrame$with_columns_seq <- function(exprs) .Call(wrap__RPolarsLazyFrame__with_columns_seq, self, exprs)

RPolarsLazyFrame$unnest <- function(names) .Call(wrap__RPolarsLazyFrame__unnest, self, names)

RPolarsLazyFrame$select <- function(exprs) .Call(wrap__RPolarsLazyFrame__select, self, exprs)

RPolarsLazyFrame$select_str_as_lit <- function(exprs) .Call(wrap__RPolarsLazyFrame__select_str_as_lit, self, exprs)
RPolarsLazyFrame$select_seq <- function(exprs) .Call(wrap__RPolarsLazyFrame__select_seq, self, exprs)

RPolarsLazyFrame$tail <- function(n) .Call(wrap__RPolarsLazyFrame__tail, self, n)

Expand Down
44 changes: 42 additions & 2 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,23 @@ LazyFrame_select = function(...) {
unwrap("in $select()")
}

#' @title Select and modify columns of a LazyFrame
#' @inherit LazyFrame_select title
#' @inherit DataFrame_select_seq description params
#' @return A LazyFrame
#' @examples
#' pl$LazyFrame(iris)$select_seq(
#' pl$col("Sepal.Length")$abs()$alias("abs_SL"),
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
LazyFrame_select_seq = function(...) {
.pr$LazyFrame$select_seq(self, unpack_list(..., .context = "in $select_seq()")) |>
unwrap("in $select_seq()")
}

#' Select and modify columns of a LazyFrame
#'
#' @inherit DataFrame_with_columns description params
#' @keywords LazyFrame
#'
#' @return A LazyFrame
#' @examples
#' pl$LazyFrame(iris)$with_columns(
Expand All @@ -298,6 +312,32 @@ LazyFrame_with_columns = function(...) {
unwrap("in $with_columns()")
}

#' @inherit LazyFrame_with_columns title
#' @inherit DataFrame_with_columns_seq description params
#'
#' @return A LazyFrame
#' @examples
#' pl$LazyFrame(iris)$with_columns_seq(
#' pl$col("Sepal.Length")$abs()$alias("abs_SL"),
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
#'
#' # same query
#' l_expr = list(
#' pl$col("Sepal.Length")$abs()$alias("abs_SL"),
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
#' pl$LazyFrame(iris)$with_columns_seq(l_expr)
#'
#' pl$LazyFrame(iris)$with_columns_seq(
#' pl$col("Sepal.Length")$abs(), # not named expr will keep name "Sepal.Length"
#' SW_add_2 = (pl$col("Sepal.Width") + 2)
#' )
LazyFrame_with_columns_seq = function(...) {
.pr$LazyFrame$with_columns_seq(self, unpack_list(..., .context = "in $with_columns_seq()")) |>
unwrap("in $with_columns_seq()")
}


#' @inherit DataFrame_with_row_index title description params
#' @return A new LazyFrame with a counter column in front
Expand Down
30 changes: 30 additions & 0 deletions man/DataFrame_select_seq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions man/DataFrame_with_columns_seq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions man/LazyFrame_select_seq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/LazyFrame_with_columns.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions man/LazyFrame_with_columns_seq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 11 additions & 8 deletions src/rust/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::lazy::dsl::*;
use crate::rdataframe::RPolarsDataFrame as RDF;
use crate::rdatatype::{new_ipc_compression, new_parquet_compression, RPolarsDataType};
use crate::robj_to;
use crate::rpolarserr::{polars_to_rpolars_err, RPolarsErr, RResult, WithRctx};
use crate::rpolarserr::{polars_to_rpolars_err, RPolarsErr, RResult};
use crate::utils::{r_result_list, try_f64_into_usize};
use extendr_api::prelude::*;
use pl::{AsOfOptions, Duration, RollingGroupOptions};
Expand Down Expand Up @@ -291,24 +291,27 @@ impl RPolarsLazyFrame {
}

pub fn with_columns(&self, exprs: Robj) -> RResult<Self> {
let exprs =
robj_to!(VecPLExprColNamed, exprs).when("preparing expressions for $with_columns()")?;
let exprs = robj_to!(VecPLExprColNamed, exprs)?;
Ok(RPolarsLazyFrame(self.clone().0.with_columns(exprs)))
}

pub fn with_columns_seq(&self, exprs: Robj) -> RResult<Self> {
let exprs = robj_to!(VecPLExprColNamed, exprs)?;
Ok(RPolarsLazyFrame(self.clone().0.with_columns_seq(exprs)))
}

pub fn unnest(&self, names: Vec<String>) -> RResult<Self> {
Ok(RPolarsLazyFrame(self.clone().0.unnest(names)))
}

pub fn select(&self, exprs: Robj) -> RResult<Self> {
let exprs =
robj_to!(VecPLExprColNamed, exprs).when("preparing expressions for $select()")?;
let exprs = robj_to!(VecPLExprColNamed, exprs)?;
Ok(RPolarsLazyFrame(self.clone().0.select(exprs)))
}

pub fn select_str_as_lit(&self, exprs: Robj) -> RResult<Self> {
let exprs = robj_to!(VecPLExprNamed, exprs).when("preparing columns for DataFrame")?;
Ok(RPolarsLazyFrame(self.clone().0.select(exprs)))
pub fn select_seq(&self, exprs: Robj) -> RResult<Self> {
let exprs = robj_to!(VecPLExprColNamed, exprs)?;
Ok(RPolarsLazyFrame(self.clone().0.select_seq(exprs)))
}

fn tail(&self, n: Robj) -> Result<RPolarsLazyFrame, String> {
Expand Down
8 changes: 8 additions & 0 deletions src/rust/src/rdataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,10 +308,18 @@ impl RPolarsDataFrame {
self.lazy().select(exprs)?.collect()
}

pub fn select_seq(&self, exprs: Robj) -> RResult<RPolarsDataFrame> {
self.lazy().select_seq(exprs)?.collect()
}

pub fn with_columns(&self, exprs: Robj) -> RResult<RPolarsDataFrame> {
self.lazy().with_columns(exprs)?.collect()
}

pub fn with_columns_seq(&self, exprs: Robj) -> RResult<RPolarsDataFrame> {
self.lazy().with_columns_seq(exprs)?.collect()
}

pub fn to_struct(&self, name: Robj) -> RResult<RPolarsSeries> {
use pl::IntoSeries;
let name = robj_to!(Option, str, name)?.unwrap_or("");
Expand Down
Loading

0 comments on commit 67769df

Please sign in to comment.