Skip to content

Commit

Permalink
implement $with_context()
Browse files Browse the repository at this point in the history
  • Loading branch information
etiennebacher committed Nov 8, 2023
1 parent cb53e75 commit ca7238e
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 0 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
- New methods `$peak_min()` and `$peak_max()` to find local minima and maxima in
an Expr (#462).
- New methods `$read_ndjson()` and `$scan_ndjson()` (#471).
- New method `$with_context()` for `LazyFrame` to have access to columns from
other Data/LazyFrames during the computation.

# polars 0.9.0

Expand Down
2 changes: 2 additions & 0 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,8 @@ LazyFrame$explode <- function(dotdotdot) .Call(wrap__LazyFrame__explode, self, d

LazyFrame$clone_see_me_macro <- function() .Call(wrap__LazyFrame__clone_see_me_macro, self)

LazyFrame$with_context <- function(other) .Call(wrap__LazyFrame__with_context, self, other)

#' @export
`$.LazyFrame` <- function (self, name) { func <- LazyFrame[[name]]; environment(func) <- environment(); func }

Expand Down
34 changes: 34 additions & 0 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -1547,3 +1547,37 @@ LazyFrame_unnest = function(names = NULL) {
}
unwrap(.pr$LazyFrame$unnest(self, names), "in $unnest():")
}

#' Add an external context to the computation graph
#'
#' This allows expressions to also access columns from DataFrames or LazyFrames
#' that are not part of this one.
#'
#' @param other Data/LazyFrame to have access to. This can be a list of DataFrames
#' and LazyFrames.
#' @return A LazyFrame
#'
#' @examples
#' lf = pl$LazyFrame(a = c(1, 2, 3), b = c("a", "c", NA))
#' lf_other = pl$LazyFrame(c = c("foo", "ham"))
#'
#' lf$with_context(lf_other)$select(
#' pl$col("b") + pl$col("c")$first()
#' )$collect()
#'
#' # Fill nulls with the median from another lazyframe:
#' train_lf = pl$LazyFrame(
#' feature_0 = c(-1.0, 0, 1), feature_1 = c(-1.0, 0, 1)
#' )
#' test_lf = pl$LazyFrame(
#' feature_0 = c(-1.0, NA, 1), feature_1 = c(-1.0, 0, 1)
#' )
#'
#' test_lf$with_context(train_lf$select(pl$all()$name$suffix("_train")))$select(
#' pl$col("feature_0")$fill_null(pl$col("feature_0_train")$median())
#' )$collect()

LazyFrame_with_context = function(other) {
.pr$LazyFrame$with_context(self, other) |>
unwrap("in with_context():")
}
39 changes: 39 additions & 0 deletions man/LazyFrame_with_context.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions src/rust/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,14 @@ impl LazyFrame {
pub fn clone_see_me_macro(&self) -> LazyFrame {
self.clone()
}

pub fn with_context(&self, contexts: Robj) -> RResult<Self> {
let contexts = robj_to!(Vec, LazyFrame, contexts)?
.into_iter()
.map(|ldf| ldf.0)
.collect::<Vec<_>>();
Ok(self.0.clone().with_context(contexts).into())
}
}

#[derive(Clone)]
Expand Down
26 changes: 26 additions & 0 deletions tests/testthat/test-lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -845,3 +845,29 @@ test_that("opt_toggles", {
lf_new_opts$sink_ipc(tmpf, inherit_optimization = TRUE)
expect_identical(pl$scan_ipc(tmpf, memmap = FALSE)$collect()$to_data_frame(), df_defaults)
})

test_that("with_context works", {
lf = pl$LazyFrame(a = c(1, 2, 3), b = c("a", "c", NA))
lf_other = pl$LazyFrame(c = c("foo", "ham"))

expect_identical(
lf$with_context(lf_other)$select(
pl$col("b") + pl$col("c")$first()
)$collect()$to_data_frame(),
data.frame(b = c("afoo", "cfoo", NA))
)

train_lf = pl$LazyFrame(
feature_0 = c(-1.0, 0, 1), feature_1 = c(-1.0, 0, 1)
)
test_lf = pl$LazyFrame(
feature_0 = c(-1.0, NA, 1), feature_1 = c(-1.0, 0, 1)
)

expect_identical(
test_lf$with_context(train_lf$select(pl$all()$name$suffix("_train")))$select(
pl$col("feature_0")$fill_null(pl$col("feature_0_train")$median())
)$collect()$to_data_frame(),
data.frame(feature_0 = c(-1, 0, 1))
)
})

0 comments on commit ca7238e

Please sign in to comment.