diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 5896a24ce..0a2645912 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -1049,7 +1049,9 @@ LazyFrame$schema <- function() .Call(wrap__LazyFrame__schema, self) LazyFrame$fetch <- function(n_rows) .Call(wrap__LazyFrame__fetch, self, n_rows) -LazyFrame$optimization_toggle <- function(type_coercion, predicate_pushdown, projection_pushdown, simplify_expr, slice_pushdown, comm_subplan_elim, comm_subexpr_elim, streaming) .Call(wrap__LazyFrame__optimization_toggle, self, type_coercion, predicate_pushdown, projection_pushdown, simplify_expr, slice_pushdown, comm_subplan_elim, comm_subexpr_elim, streaming) +LazyFrame$set_optimization_toggle <- function(type_coercion, predicate_pushdown, projection_pushdown, simplify_expression, slice_pushdown, comm_subplan_elim, comm_subexpr_elim, streaming) .Call(wrap__LazyFrame__set_optimization_toggle, self, type_coercion, predicate_pushdown, projection_pushdown, simplify_expression, slice_pushdown, comm_subplan_elim, comm_subexpr_elim, streaming) + +LazyFrame$get_optimization_toggle <- function() .Call(wrap__LazyFrame__get_optimization_toggle, self) LazyFrame$profile <- function() .Call(wrap__LazyFrame__profile, self) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 527997b93..89bbdee79 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -276,9 +276,20 @@ LazyFrame_with_row_count = function(name, offset = NULL) { #' @examples pl$LazyFrame(iris)$filter(pl$col("Species") == "setosa")$collect() LazyFrame_filter = "use_extendr_wrapper" -#' @title Collect a query into a DataFrame -#' @description `$collect()` performs the query on the LazyFrame. It returns a -#' DataFrame +#' @title Get optimization settings +#' @description Get the current optimization toggles for the lazy query +#' @keywords LazyFrame +#' @return List of optimization toggles +#' @examples +#' pl$LazyFrame(mtcars)$get_optimization_toggle() +LazyFrame_get_optimization_toggle = function() { + self |> + .pr$LazyFrame$get_optimization_toggle() +} + +#' @title Configure optimization toggles +#' @description Configure the optimization toggles for the lazy query +#' @keywords LazyFrame #' @param type_coercion Boolean. Coerce types such that operations succeed and #' run on minimal required memory. #' @param predicate_pushdown Boolean. Applies filters as early as possible at @@ -293,11 +304,42 @@ LazyFrame_filter = "use_extendr_wrapper" #' occur on self-joins or unions. #' @param comm_subexpr_elim Boolean. Common subexpressions will be cached and #' reused. +#' @param streaming Boolean. Run parts of the query in a streaming fashion +#' (this is in an alpha state). +#' @return LazyFrame with specified optimization toggles +#' @examples +#' pl$LazyFrame(mtcars)$set_optimization_toggle(type_coercion = FALSE) +LazyFrame_set_optimization_toggle = function( + type_coercion = TRUE, + predicate_pushdown = TRUE, + projection_pushdown = TRUE, + simplify_expression = TRUE, + slice_pushdown = TRUE, + comm_subplan_elim = TRUE, + comm_subexpr_elim = TRUE, + streaming = FALSE) { + self |> + .pr$LazyFrame$set_optimization_toggle( + type_coercion, + predicate_pushdown, + projection_pushdown, + simplify_expression, + slice_pushdown, + comm_subplan_elim, + comm_subexpr_elim, + streaming + ) +} + +#' @title Collect a query into a DataFrame +#' @description `$collect()` performs the query on the LazyFrame. It returns a +#' DataFrame +#' @inheritParams LazyFrame_set_optimization_toggle #' @param no_optimization Boolean. Sets the following parameters to `FALSE`: #' `predicate_pushdown`, `projection_pushdown`, `slice_pushdown`, #' `comm_subplan_elim`, `comm_subexpr_elim`. -#' @param streaming Boolean. Run parts of the query in a streaming fashion -#' (this is in an alpha state). +#' @param inherit_optimization Boolean. Use existing optimization settings +#' regardless the settings specified in this function call. #' @param collect_in_background Boolean. Detach this query from R session. #' Computation will start in background. Get a handle which later can be converted #' into the resulting DataFrame. Useful in interactive mode to not lock R session. @@ -326,8 +368,9 @@ LazyFrame_collect = function( slice_pushdown = TRUE, comm_subplan_elim = TRUE, comm_subexpr_elim = TRUE, - no_optimization = FALSE, streaming = FALSE, + no_optimization = FALSE, + inherit_optimization = FALSE, collect_in_background = FALSE) { if (isTRUE(no_optimization)) { predicate_pushdown = FALSE @@ -341,14 +384,12 @@ LazyFrame_collect = function( comm_subplan_elim = FALSE } - collect_f = if (isTRUE(collect_in_background)) { - \(...) Ok(.pr$LazyFrame$collect_in_background(...)) - } else { - .pr$LazyFrame$collect - } + collect_f = ifelse(isTRUE(collect_in_background), \(...) Ok(.pr$LazyFrame$collect_in_background(...)), .pr$LazyFrame$collect) - self |> - .pr$LazyFrame$optimization_toggle( + lf = self + + if (isFALSE(inherit_optimization)) { + lf = self$set_optimization_toggle( type_coercion, predicate_pushdown, projection_pushdown, @@ -357,8 +398,11 @@ LazyFrame_collect = function( comm_subplan_elim, comm_subexpr_elim, streaming - ) |> - and_then(collect_f) |> + ) |> unwrap("in $collect():") + } + + lf |> + collect_f() |> unwrap("in $collect():") } @@ -459,16 +503,19 @@ LazyFrame_sink_parquet = function( predicate_pushdown = TRUE, projection_pushdown = TRUE, simplify_expression = TRUE, + slice_pushdown = TRUE, no_optimization = FALSE, - slice_pushdown = TRUE) { + inherit_optimization = FALSE) { if (isTRUE(no_optimization)) { predicate_pushdown = FALSE projection_pushdown = FALSE slice_pushdown = FALSE } - call_ctx = "in $sink_parquet(...)" - self |> - .pr$LazyFrame$optimization_toggle( + + lf = self + + if (isFALSE(inherit_optimization)) { + lf = self$set_optimization_toggle( type_coercion, predicate_pushdown, projection_pushdown, @@ -476,9 +523,11 @@ LazyFrame_sink_parquet = function( slice_pushdown, comm_subplan_elim = FALSE, comm_subexpr_elim = FALSE, - streaming = TRUE - ) |> - unwrap(call_ctx) |> + streaming = FALSE + ) |> unwrap("in $sink_parquet()") + } + + lf |> .pr$LazyFrame$sink_parquet( path, compression, @@ -488,7 +537,7 @@ LazyFrame_sink_parquet = function( data_pagesize_limit, maintain_order ) |> - unwrap(call_ctx) |> + unwrap("in $sink_parquet()") |> invisible() } @@ -526,16 +575,19 @@ LazyFrame_sink_ipc = function( predicate_pushdown = TRUE, projection_pushdown = TRUE, simplify_expression = TRUE, + slice_pushdown = TRUE, no_optimization = FALSE, - slice_pushdown = TRUE) { + inherit_optimization = FALSE) { if (isTRUE(no_optimization)) { predicate_pushdown = FALSE projection_pushdown = FALSE slice_pushdown = FALSE } - self |> - .pr$LazyFrame$optimization_toggle( + lf = self + + if (isFALSE(inherit_optimization)) { + lf = self$set_optimization_toggle( type_coercion, predicate_pushdown, projection_pushdown, @@ -543,15 +595,17 @@ LazyFrame_sink_ipc = function( slice_pushdown, comm_subplan_elim = FALSE, comm_subexpr_elim = FALSE, - streaming = TRUE - ) |> - unwrap("in $sink_ipc(...)") |> + streaming = FALSE + ) |> unwrap("in $sink_ipc()") + } + + lf |> .pr$LazyFrame$sink_ipc( path, compression, maintain_order ) |> - unwrap("in LazyFrame$sink_ipc(...)") |> + unwrap("in $sink_ipc()") |> invisible() } @@ -1179,8 +1233,9 @@ LazyFrame_fetch = function( slice_pushdown = TRUE, comm_subplan_elim = TRUE, comm_subexpr_elim = TRUE, + streaming = FALSE, no_optimization = FALSE, - streaming = FALSE) { + inherit_optimization = FALSE) { if (isTRUE(no_optimization)) { predicate_pushdown = FALSE projection_pushdown = FALSE @@ -1193,8 +1248,10 @@ LazyFrame_fetch = function( comm_subplan_elim = FALSE } - self |> - .pr$LazyFrame$optimization_toggle( + lf = self + + if (isFALSE(inherit_optimization)) { + lf = self$set_optimization_toggle( type_coercion, predicate_pushdown, projection_pushdown, @@ -1203,8 +1260,10 @@ LazyFrame_fetch = function( comm_subplan_elim, comm_subexpr_elim, streaming - ) |> - and_then(\(self) .pr$LazyFrame$fetch(self, n_rows)) |> + ) |> unwrap("in $fetch()") + } + + .pr$LazyFrame$fetch(lf, n_rows) |> unwrap("in $fetch()") } diff --git a/R/rust_result.R b/R/rust_result.R index dce58a82f..03b9e427d 100644 --- a/R/rust_result.R +++ b/R/rust_result.R @@ -56,7 +56,7 @@ Err = function(x) { #' @param f a closure that takes the err part as input #' @return same R object wrapped in a Err-result map_err = function(x, f) { - if (is_err(x)) x$err = f(x$err) + if (is_err(x)) x$err <- f(x$err) x } @@ -66,7 +66,7 @@ map_err = function(x, f) { #' @return same R object wrapped in a Err-result #' @noRd map = function(x, f) { - if (is_ok(x)) x$ok = f(x$ok) + if (is_ok(x)) x$ok <- f(x$ok) x } diff --git a/flake.lock b/flake.lock index a65decfac..adb712d36 100644 --- a/flake.lock +++ b/flake.lock @@ -6,11 +6,11 @@ "rust-analyzer-src": "rust-analyzer-src" }, "locked": { - "lastModified": 1680416435, - "narHash": "sha256-YlOhVkns/dIte+Yne64+FfmSmF74z3dtodBvuQ200i0=", + "lastModified": 1695795747, + "narHash": "sha256-XJSUxeaNLJwV1G428zcEbnflZpXA46syQIqUpX/I2WU=", "owner": "nix-community", "repo": "fenix", - "rev": "a53d9e5683ffbba0e76ae22b875ddf27eedcd4dd", + "rev": "1f42ae94a17632a9021820d279b23e53e3c10f90", "type": "github" }, "original": { @@ -24,11 +24,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1689068808, - "narHash": "sha256-6ixXo3wt24N/melDWjq70UuHQLxGV8jZvooRanIHXw0=", + "lastModified": 1694529238, + "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", "owner": "numtide", "repo": "flake-utils", - "rev": "919d646de7be200f3bf08cb76ae1f09402b6f9b4", + "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", "type": "github" }, "original": { @@ -39,11 +39,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1689940971, - "narHash": "sha256-397xShPnFqPC59Bmpo3lS+/Aw0yoDRMACGo1+h2VJMo=", + "lastModified": 1695644571, + "narHash": "sha256-asS9dCCdlt1lPq0DLwkVBbVoEKuEuz+Zi3DG7pR/RxA=", "owner": "nixos", "repo": "nixpkgs", - "rev": "9ca785644d067445a4aa749902b29ccef61f7476", + "rev": "6500b4580c2a1f3d0f980d32d285739d8e156d92", "type": "github" }, "original": { @@ -55,11 +55,11 @@ }, "nixpkgs_2": { "locked": { - "lastModified": 1690031011, - "narHash": "sha256-kzK0P4Smt7CL53YCdZCBbt9uBFFhE0iNvCki20etAf4=", + "lastModified": 1695644571, + "narHash": "sha256-asS9dCCdlt1lPq0DLwkVBbVoEKuEuz+Zi3DG7pR/RxA=", "owner": "nixos", "repo": "nixpkgs", - "rev": "12303c652b881435065a98729eb7278313041e49", + "rev": "6500b4580c2a1f3d0f980d32d285739d8e156d92", "type": "github" }, "original": { @@ -79,11 +79,11 @@ "rust-analyzer-src": { "flake": false, "locked": { - "lastModified": 1689936585, - "narHash": "sha256-tpEKMKIkzq3MoDviXdqE/AjDMLj3H4zlIZmeQicyPsA=", + "lastModified": 1695737913, + "narHash": "sha256-TDnQ5qOnDa1sIIJxJtUCYifAQ71h78A+Xoci2PkcR4I=", "owner": "rust-lang", "repo": "rust-analyzer", - "rev": "899dd84b4dbc53bab02553f77f6d7c3187d33637", + "rev": "3b1b58c225d35414d90078dc7e06ca74a49cad0c", "type": "github" }, "original": { diff --git a/man/Expr_map.Rd b/man/Expr_map.Rd index 6a693d74a..30de117e3 100644 --- a/man/Expr_map.Rd +++ b/man/Expr_map.Rd @@ -49,10 +49,10 @@ to see and view number of parallel R sessions. \examples{ pl$DataFrame(iris)$ select( - pl$col("Sepal.Length")$map(\(x) { - paste("cheese", as.character(x$to_vector())) - }, pl$dtypes$Utf8) - ) + pl$col("Sepal.Length")$map(\(x) { + paste("cheese", as.character(x$to_vector())) + }, pl$dtypes$Utf8) +) # R parallel process example, use Sys.sleep() to imitate some CPU expensive # computation. diff --git a/man/IO_sink_ipc.Rd b/man/IO_sink_ipc.Rd index 0f4db37e2..da4b2a9d3 100644 --- a/man/IO_sink_ipc.Rd +++ b/man/IO_sink_ipc.Rd @@ -12,8 +12,9 @@ LazyFrame_sink_ipc( predicate_pushdown = TRUE, projection_pushdown = TRUE, simplify_expression = TRUE, + slice_pushdown = TRUE, no_optimization = FALSE, - slice_pushdown = TRUE + inherit_optimization = FALSE ) } \arguments{ @@ -40,12 +41,15 @@ at the scan level.} \item{simplify_expression}{Boolean. Various optimizations, such as constant folding and replacing expensive operations with faster alternatives.} +\item{slice_pushdown}{Boolean. Only load the required slice from the scan +level. Don't materialize sliced outputs (e.g. \code{join$head(10)}).} + \item{no_optimization}{Boolean. Sets the following parameters to \code{FALSE}: \code{predicate_pushdown}, \code{projection_pushdown}, \code{slice_pushdown}, \code{comm_subplan_elim}, \code{comm_subexpr_elim}.} -\item{slice_pushdown}{Boolean. Only load the required slice from the scan -level. Don't materialize sliced outputs (e.g. \code{join$head(10)}).} +\item{inherit_optimization}{Boolean. Use existing optimization settings +regardless the settings specified in this function call.} } \description{ This writes the output of a query directly to an Arrow IPC file without collecting diff --git a/man/IO_sink_parquet.Rd b/man/IO_sink_parquet.Rd index a331e5b4f..16ef6d2d2 100644 --- a/man/IO_sink_parquet.Rd +++ b/man/IO_sink_parquet.Rd @@ -16,8 +16,9 @@ LazyFrame_sink_parquet( predicate_pushdown = TRUE, projection_pushdown = TRUE, simplify_expression = TRUE, + slice_pushdown = TRUE, no_optimization = FALSE, - slice_pushdown = TRUE + inherit_optimization = FALSE ) } \arguments{ @@ -71,12 +72,15 @@ at the scan level.} \item{simplify_expression}{Boolean. Various optimizations, such as constant folding and replacing expensive operations with faster alternatives.} +\item{slice_pushdown}{Boolean. Only load the required slice from the scan +level. Don't materialize sliced outputs (e.g. \code{join$head(10)}).} + \item{no_optimization}{Boolean. Sets the following parameters to \code{FALSE}: \code{predicate_pushdown}, \code{projection_pushdown}, \code{slice_pushdown}, \code{comm_subplan_elim}, \code{comm_subexpr_elim}.} -\item{slice_pushdown}{Boolean. Only load the required slice from the scan -level. Don't materialize sliced outputs (e.g. \code{join$head(10)}).} +\item{inherit_optimization}{Boolean. Use existing optimization settings +regardless the settings specified in this function call.} } \description{ This writes the output of a query directly to a Parquet file without collecting diff --git a/man/LazyFrame_collect.Rd b/man/LazyFrame_collect.Rd index 850ce2b9e..69b8e4a82 100644 --- a/man/LazyFrame_collect.Rd +++ b/man/LazyFrame_collect.Rd @@ -12,8 +12,9 @@ LazyFrame_collect( slice_pushdown = TRUE, comm_subplan_elim = TRUE, comm_subexpr_elim = TRUE, - no_optimization = FALSE, streaming = FALSE, + no_optimization = FALSE, + inherit_optimization = FALSE, collect_in_background = FALSE ) } @@ -39,12 +40,15 @@ occur on self-joins or unions.} \item{comm_subexpr_elim}{Boolean. Common subexpressions will be cached and reused.} +\item{streaming}{Boolean. Run parts of the query in a streaming fashion +(this is in an alpha state).} + \item{no_optimization}{Boolean. Sets the following parameters to \code{FALSE}: \code{predicate_pushdown}, \code{projection_pushdown}, \code{slice_pushdown}, \code{comm_subplan_elim}, \code{comm_subexpr_elim}.} -\item{streaming}{Boolean. Run parts of the query in a streaming fashion -(this is in an alpha state).} +\item{inherit_optimization}{Boolean. Use existing optimization settings +regardless the settings specified in this function call.} \item{collect_in_background}{Boolean. Detach this query from R session. Computation will start in background. Get a handle which later can be converted diff --git a/man/LazyFrame_fetch.Rd b/man/LazyFrame_fetch.Rd index 8a4e792fa..88c5102ba 100644 --- a/man/LazyFrame_fetch.Rd +++ b/man/LazyFrame_fetch.Rd @@ -13,8 +13,9 @@ LazyFrame_fetch( slice_pushdown = TRUE, comm_subplan_elim = TRUE, comm_subexpr_elim = TRUE, + streaming = FALSE, no_optimization = FALSE, - streaming = FALSE + inherit_optimization = FALSE ) } \arguments{ @@ -41,12 +42,15 @@ occur on self-joins or unions.} \item{comm_subexpr_elim}{Boolean. Common subexpressions will be cached and reused.} +\item{streaming}{Boolean. Run parts of the query in a streaming fashion +(this is in an alpha state).} + \item{no_optimization}{Boolean. Sets the following parameters to \code{FALSE}: \code{predicate_pushdown}, \code{projection_pushdown}, \code{slice_pushdown}, \code{comm_subplan_elim}, \code{comm_subexpr_elim}.} -\item{streaming}{Boolean. Run parts of the query in a streaming fashion -(this is in an alpha state).} +\item{inherit_optimization}{Boolean. Use existing optimization settings +regardless the settings specified in this function call.} } \value{ A DataFrame of maximum n_rows diff --git a/man/LazyFrame_get_optimization_toggle.Rd b/man/LazyFrame_get_optimization_toggle.Rd new file mode 100644 index 000000000..c23bdebab --- /dev/null +++ b/man/LazyFrame_get_optimization_toggle.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_get_optimization_toggle} +\alias{LazyFrame_get_optimization_toggle} +\title{Get optimization settings} +\usage{ +LazyFrame_get_optimization_toggle() +} +\value{ +List of optimization toggles +} +\description{ +Get the current optimization toggles for the lazy query +} +\examples{ +pl$LazyFrame(mtcars)$get_optimization_toggle() +} +\keyword{LazyFrame} diff --git a/man/LazyFrame_set_optimization_toggle.Rd b/man/LazyFrame_set_optimization_toggle.Rd new file mode 100644 index 000000000..d1d3db90e --- /dev/null +++ b/man/LazyFrame_set_optimization_toggle.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe__lazy.R +\name{LazyFrame_set_optimization_toggle} +\alias{LazyFrame_set_optimization_toggle} +\title{Configure optimization toggles} +\usage{ +LazyFrame_set_optimization_toggle( + type_coercion = TRUE, + predicate_pushdown = TRUE, + projection_pushdown = TRUE, + simplify_expression = TRUE, + slice_pushdown = TRUE, + comm_subplan_elim = TRUE, + comm_subexpr_elim = TRUE, + streaming = FALSE +) +} +\arguments{ +\item{type_coercion}{Boolean. Coerce types such that operations succeed and +run on minimal required memory.} + +\item{predicate_pushdown}{Boolean. Applies filters as early as possible at +scan level.} + +\item{projection_pushdown}{Boolean. Select only the columns that are needed +at the scan level.} + +\item{simplify_expression}{Boolean. Various optimizations, such as constant +folding and replacing expensive operations with faster alternatives.} + +\item{slice_pushdown}{Boolean. Only load the required slice from the scan +level. Don't materialize sliced outputs (e.g. \code{join$head(10)}).} + +\item{comm_subplan_elim}{Boolean. Will try to cache branching subplans that +occur on self-joins or unions.} + +\item{comm_subexpr_elim}{Boolean. Common subexpressions will be cached and +reused.} + +\item{streaming}{Boolean. Run parts of the query in a streaming fashion +(this is in an alpha state).} +} +\value{ +LazyFrame with specified optimization toggles +} +\description{ +Configure the optimization toggles for the lazy query +} +\examples{ +pl$LazyFrame(mtcars)$set_optimization_toggle(type_coercion = FALSE) +} +\keyword{LazyFrame} diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 1ea33e5de..057efef5e 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -442,12 +442,12 @@ impl LazyFrame { } #[allow(clippy::too_many_arguments)] - fn optimization_toggle( + fn set_optimization_toggle( &self, type_coercion: Robj, predicate_pushdown: Robj, projection_pushdown: Robj, - simplify_expr: Robj, + simplify_expression: Robj, slice_pushdown: Robj, comm_subplan_elim: Robj, comm_subexpr_elim: Robj, @@ -458,7 +458,7 @@ impl LazyFrame { .clone() .with_type_coercion(robj_to!(bool, type_coercion)?) .with_predicate_pushdown(robj_to!(bool, predicate_pushdown)?) - .with_simplify_expr(robj_to!(bool, simplify_expr)?) + .with_simplify_expr(robj_to!(bool, simplify_expression)?) .with_slice_pushdown(robj_to!(bool, slice_pushdown)?) .with_streaming(robj_to!(bool, streaming)?) .with_projection_pushdown(robj_to!(bool, projection_pushdown)?) @@ -468,6 +468,30 @@ impl LazyFrame { Ok(ldf.into()) } + fn get_optimization_toggle(&self) -> List { + let pl::OptState { + projection_pushdown, + predicate_pushdown, + type_coercion, + simplify_expr, + file_caching: _, + slice_pushdown, + comm_subplan_elim, + comm_subexpr_elim, + streaming, + } = self.0.get_current_optimizations(); + list!( + type_coercion = type_coercion, + predicate_pushdown = predicate_pushdown, + projection_pushdown = projection_pushdown, + simplify_expression = simplify_expr, + slice_pushdown = slice_pushdown, + comm_subplan_elim = comm_subplan_elim, + comm_subexpr_elim = comm_subexpr_elim, + streaming = streaming, + ) + } + fn profile(&self) -> RResult { profile_with_r_func_support(self.0.clone()).map(|(r, p)| list!(result = r, profile = p)) } diff --git a/src/rust/src/rlib.rs b/src/rust/src/rlib.rs index 433fbc95e..1d90cd526 100644 --- a/src/rust/src/rlib.rs +++ b/src/rust/src/rlib.rs @@ -2,11 +2,11 @@ use crate::lazy::dsl::Expr; use crate::lazy::dsl::ProtoExprArray; use crate::rdataframe::DataFrame; use crate::robj_to; +use crate::rpolarserr::{rdbg, RResult}; +use crate::series::Series; use crate::utils::extendr_concurrent::{ParRObj, ThreadCom}; use crate::RFnSignature; use crate::CONFIG; -use crate::rpolarserr::{rdbg, RResult}; -use crate::series::Series; use extendr_api::prelude::*; use polars::prelude as pl; use polars_core::functions as pl_functions; @@ -62,7 +62,7 @@ fn r_date_range_lazy( time_zone: Robj, explode: Robj, ) -> RResult { - let expr = polars::lazy::dsl::functions::date_range( + let expr = polars::lazy::prelude::date_range( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, robj_to!(pl_duration, every)?, diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index b5688e81b..b976162cf 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -794,3 +794,40 @@ test_that("unnest", { to_data_frame() ) }) + +test_that("opt_toggles", { + # some optimization settings + opt_settings = list( + type_coercion = FALSE, + predicate_pushdown = TRUE, + projection_pushdown = TRUE, + simplify_expression = TRUE, + slice_pushdown = FALSE, + comm_subplan_elim = FALSE, + comm_subexpr_elim = FALSE, + streaming = TRUE + ) + opt_settings2 = lapply(opt_settings, `!`) + + # some LazyFrames + lf = pl$LazyFrame(mtcars)$select(pl$col("mpg") * 0.42) + lf_new_opts = do.call(lf$set_optimization_toggle, opt_settings)$ok + lf_new_opts2 = do.call(lf$set_optimization_toggle, opt_settings2)$ok + + # Check set/get roundtrip + expect_identical(lf_new_opts$get_optimization_toggle(), opt_settings) + expect_identical(lf_new_opts2$get_optimization_toggle(), opt_settings2) + + # collect - same result, no matter opts + df_new_opts = lf_new_opts$collect(inherit_optimization = TRUE)$to_data_frame() + df_new_opts2 = lf_new_opts2$collect(inherit_optimization = TRUE)$to_data_frame() + df_defaults = lf$collect()$to_data_frame() + expect_identical(df_new_opts, df_defaults) + expect_identical(df_new_opts2, df_defaults) + + # sink_ipc - same results + tmpf = tempfile() + on.exit(unlink(tmpf)) + lf_new_opts$sink_ipc(tmpf, inherit_optimization = TRUE) + expect_identical(pl$scan_ipc(tmpf, memmap = FALSE)$collect()$to_data_frame(), df_defaults) +})