Skip to content

Commit

Permalink
Enable $explode() for Data/LazyFrame (#314)
Browse files Browse the repository at this point in the history
Co-authored-by: sorhawell <[email protected]>
  • Loading branch information
etiennebacher and sorhawell authored Jul 27, 2023
1 parent 4b4ce97 commit c24eb71
Show file tree
Hide file tree
Showing 11 changed files with 235 additions and 12 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# polars (development version)

## What's changed

- New method `$explode()` for `DataFrame` and `LazyFrame`.

# polars 0.7.0

## BREAKING CHANGES
Expand Down
18 changes: 18 additions & 0 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -1545,3 +1545,21 @@ DataFrame_glimpse = function(..., return_as_string = FALSE) {
# chose return type
if (return_as_string) output else invisible(cat(output))
}


#' @inherit LazyFrame_explode title params
#'
#' @keywords DataFrame
#' @return DataFrame
#' @examples
#' df = pl$DataFrame(
#' letters = c("a", "a", "b", "c"),
#' numbers = list(1, c(2, 3), c(4, 5), c(6, 7, 8))
#' )
#' df
#'
#' df$explode("numbers")

DataFrame_explode = function(columns, ...) {
self$lazy()$explode(columns, ...)$collect()
}
2 changes: 2 additions & 0 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,8 @@ LazyFrame$rename <- function(existing, new) .Call(wrap__LazyFrame__rename, self,

LazyFrame$schema <- function() .Call(wrap__LazyFrame__schema, self)

LazyFrame$explode <- function(columns, dotdotdot_args) .Call(wrap__LazyFrame__explode, self, columns, dotdotdot_args)

#' @export
`$.LazyFrame` <- function (self, name) { func <- LazyFrame[[name]]; environment(func) <- environment(); func }

Expand Down
22 changes: 22 additions & 0 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -925,3 +925,25 @@ LazyFrame_dtypes = method_as_property(function() {
result() |>
unwrap("in $dtypes()")
})

#' @title Explode the DataFrame to long format by exploding the given columns
#' @keywords LazyFrame
#'
#' @param columns Column(s) to be exploded. `Into<Expr>`, list of `Into<Expr>` or a char vec.
#' Only columns of DataType `List` or `Utf8` can be exploded.
#' @param ... More columns to explode as above but provided as separate arguments
#'
#' @return LazyFrame
#' @examples
#' df = pl$LazyFrame(
#' letters = c("a", "a", "b", "c"),
#' numbers = list(1, c(2, 3), c(4, 5), c(6, 7, 8))
#' )
#' df
#'
#' df$explode("numbers")$collect()
LazyFrame_explode = function(columns = list(), ...) {
dotdotdot_args = list2(...)
.pr$LazyFrame$explode(self, columns, dotdotdot_args) |>
unwrap("in explode():")
}
30 changes: 30 additions & 0 deletions man/DataFrame_explode.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions man/LazyFrame_explode.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions man/nanoarrow.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions src/rust/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::rdatatype::new_quantile_interpolation_option;
use crate::rdatatype::new_unique_keep_strategy;
use crate::rdatatype::{new_asof_strategy, RPolarsDataType};
use crate::robj_to;
use crate::rpolarserr::rerr;
use crate::rpolarserr::RResult;
use crate::rpolarserr::{Rctx, WithRctx};
use crate::utils::wrappers::null_to_opt;
Expand Down Expand Up @@ -388,6 +389,18 @@ impl LazyFrame {
pairs.map(|(name, ty)| (name, RPolarsDataType(ty.clone()))),
))
}

fn explode(&self, columns: Robj, dotdotdot_args: Robj) -> RResult<LazyFrame> {
let mut columns: Vec<pl::Expr> = robj_to!(Vec, PLExprCol, columns)?;
let mut ddd_args: Vec<pl::Expr> = robj_to!(Vec, PLExprCol, dotdotdot_args)?;
columns.append(&mut ddd_args);
if columns.is_empty() {
rerr()
.plain("neither have any elements, cannot use explode without Expr(s)")
.when("joining Exprs from input [columns] and input [...]")?;
}
Ok(self.0.clone().explode(columns).into())
}
}

#[derive(Clone)]
Expand Down
31 changes: 23 additions & 8 deletions src/rust/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,7 @@ pub fn robj_to_datatype(robj: extendr_api::Robj) -> RResult<RPolarsDataType> {
// wrap_e allows to also convert any allowed non Exp
pub fn robj_to_rexpr(robj: extendr_api::Robj, str_to_lit: bool) -> RResult<Expr> {
let robj = unpack_r_result_list(robj)?;
let robj_clone = robj.clone(); //reserve shallowcopy for writing err msg

//use R side wrap_e to convert any R value into Expr or
use extendr_api::*;
Expand All @@ -616,7 +617,9 @@ pub fn robj_to_rexpr(robj: extendr_api::Robj, str_to_lit: bool) -> RResult<Expr>
.plain("internal error: polars:::result failed to catch this error")?;

// handle any error from wrap_e
let robj_expr = unpack_r_result_list(robj_result_expr).when("converting R value to expr")?;
let robj_expr = unpack_r_result_list(robj_result_expr)
.bad_robj(&robj_clone)
.plain("cannot be converted into an Expr")?;

//PolarsExpr -> RExpr
let res: ExtendrResult<ExternalPtr<Expr>> = robj_expr.clone().try_into();
Expand Down Expand Up @@ -644,9 +647,11 @@ pub fn list_expr_to_vec_pl_expr(robj: Robj, str_to_lit: bool) -> RResult<Vec<pl:
.as_list()
.ok_or(RPolarsErr::new())
.mistyped(tn::<List>())?;
let iter = l
.iter()
.map(|(_, robj)| robj_to_rexpr(robj, str_to_lit).map(|e| e.0));
let iter = l.iter().enumerate().map(|(i, (_, robj))| {
robj_to_rexpr(robj.clone(), str_to_lit)
.when(format!("converting element {} into an Expr", i + 1))
.map(|e| e.0)
});
crate::utils::collect_hinted_result_rerr::<pl::Expr>(l.len(), iter)
}

Expand Down Expand Up @@ -711,6 +716,10 @@ macro_rules! robj_to_inner {
$crate::utils::robj_to_rexpr($a, false)
};

(PLExprCol, $a:ident) => {
$crate::utils::robj_to_rexpr($a, false).map(|ok| ok.0)
};

(VecPLExpr, $a:ident) => {
$crate::utils::list_expr_to_vec_pl_expr($a, true)
};
Expand Down Expand Up @@ -783,10 +792,16 @@ macro_rules! robj_to {
};
if x.is_list() {
// convert each element in list to $type
let iter = x.as_list().unwrap().iter().enumerate().map(|(i, (_, $a))| {
robj_to!($type, $a, format!("element no. [{}] of ", i + 1))
});
$crate::utils::collect_hinted_result_rerr::<$type>(x.len(), iter)
let iter =
x.as_list().unwrap().iter().enumerate().map(|(i, (_, $a))| {
robj_to!($type, $a, format!("element no. [{}] ", i + 1))
});

//TODO reintroduce collect_hinted_result_rerr as trait not a generic
//generic forces $type to be a literal type in scrop not e.g. PLExprCol
//$crate::utils::collect_hinted_result_rerr::<$type>(x.len(), iter)
let x: Result<_, _> = iter.collect();
x
} else {
// single value without list, convert as is and wrap in a list
let $a = x;
Expand Down
44 changes: 44 additions & 0 deletions tests/testthat/test-dataframe.R
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,50 @@ test_that("glimpse", {
expect_true(is_string(pl$DataFrame(iris)$glimpse(return_as_string = TRUE)))
})

test_that("explode", {
df = pl$DataFrame(
letters = c("a", "a", "b", "c"),
numbers = list(1, c(2, 3), c(4, 5), c(6, 7, 8))
)
expect_equal(
df$explode("numbers")$to_data_frame(),
data.frame(
letters = c(rep("a", 3), "b", "b", rep("c", 3)),
numbers = 1:8
)
)

# empty values -> NA

df = pl$DataFrame(
letters = c("a", "a", "b", "c"),
numbers = list(1, NULL, c(4, 5), c(6, 7, 8))
)
expect_equal(
df$explode("numbers")$to_data_frame(),
data.frame(
letters = c(rep("a", 2), "b", "b", rep("c", 3)),
numbers = c(1, NA, 4:8)
)
)

# several cols to explode

df = pl$DataFrame(
letters = c("a", "a", "b", "c"),
numbers = list(1, NULL, c(4, 5), c(6, 7, 8)),
numbers2 = list(1, NULL, c(4, 5), c(6, 7, 8))
)
expect_equal(
df$explode("numbers", pl$col("numbers2"))$to_data_frame(),
data.frame(
letters = c(rep("a", 2), "b", "b", rep("c", 3)),
numbers = c(1, NA, 4:8),
numbers2 = c(1, NA, 4:8)
)
)
})

test_that("with_row_count", {
df = pl$DataFrame(mtcars)
expect_identical(df$with_row_count("idx", 42)$select(pl$col("idx"))$to_data_frame()$idx, as.double(42:(41+nrow(mtcars))))
Expand Down
45 changes: 45 additions & 0 deletions tests/testthat/test-lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,51 @@ test_that("select with list of exprs", {
expect_equal(x6$columns, c("mpg", "hp"))
})


test_that("explode", {
df = pl$LazyFrame(
letters = c("a", "a", "b", "c"),
numbers = list(1, c(2, 3), c(4, 5), c(6, 7, 8))
)
expect_equal(
df$explode("numbers")$collect()$to_data_frame(),
data.frame(
letters = c(rep("a", 3), "b", "b", rep("c", 3)),
numbers = 1:8
)
)

# empty values -> NA

df = pl$LazyFrame(
letters = c("a", "a", "b", "c"),
numbers = list(1, NULL, c(4, 5), c(6, 7, 8))
)
expect_equal(
df$explode("numbers")$collect()$to_data_frame(),
data.frame(
letters = c(rep("a", 2), "b", "b", rep("c", 3)),
numbers = c(1, NA, 4:8)
)
)

# several cols to explode

df = pl$LazyFrame(
letters = c("a", "a", "b", "c"),
numbers = list(1, NULL, c(4, 5), c(6, 7, 8)),
numbers2 = list(1, NULL, c(4, 5), c(6, 7, 8))
)
expect_equal(
df$explode("numbers", pl$col("numbers2"))$collect()$to_data_frame(),
data.frame(
letters = c(rep("a", 2), "b", "b", rep("c", 3)),
numbers = c(1, NA, 4:8),
numbers2 = c(1, NA, 4:8)
)
)
})

test_that("width", {
dat = pl$LazyFrame(mtcars)
expect_equal(dat$width, 11)
Expand Down

0 comments on commit c24eb71

Please sign in to comment.