diff --git a/R/expr__expr.R b/R/expr__expr.R index 6df25d8e9..c41d23f35 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -701,7 +701,7 @@ construct_ProtoExprArray = function(...) { #' # map a,b,c,d sequentially #' pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select( #' pl$all()$map(\(s) { -#' Sys.sleep(.5) +#' Sys.sleep(.1) #' s * 2 #' }) #' )$collect() |> system.time() @@ -712,7 +712,7 @@ construct_ProtoExprArray = function(...) { #' pl$options$rpool_cap #' pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select( #' pl$all()$map(\(s) { -#' Sys.sleep(.5) +#' Sys.sleep(.1) #' s * 2 #' }, in_background = TRUE) #' )$collect() |> system.time() @@ -721,7 +721,7 @@ construct_ProtoExprArray = function(...) { #' pl$options$rpool_cap #' pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select( #' pl$all()$map(\(s) { -#' Sys.sleep(.5) +#' Sys.sleep(.1) #' s * 2 #' }, in_background = TRUE) #' )$collect() |> system.time() diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index da5cb4844..95a8d9d60 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -431,7 +431,7 @@ LazyFrame_collect = function( #' # Some expression which does contain a map #' expr = pl$col("mpg")$map( #' \(x) { -#' Sys.sleep(.5) +#' Sys.sleep(.1) #' x * 0.43 #' }, #' in_background = TRUE # set TRUE if collecting in background queries with $map or $apply @@ -1408,20 +1408,19 @@ LazyFrame_fetch = function( #' agg(pl$col(pl$Float64)$apply(r_func))$ #' profile() LazyFrame_profile = function( - type_coercion = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - comm_subplan_elim = TRUE, - comm_subexpr_elim = TRUE, - streaming = FALSE, - no_optimization = FALSE, - inherit_optimization = FALSE, - collect_in_background = FALSE, - show_plot = FALSE, - truncate_nodes = 0) { - + type_coercion = TRUE, + predicate_pushdown = TRUE, + projection_pushdown = TRUE, + simplify_expression = TRUE, + slice_pushdown = TRUE, + comm_subplan_elim = TRUE, + comm_subexpr_elim = TRUE, + streaming = FALSE, + no_optimization = FALSE, + inherit_optimization = FALSE, + collect_in_background = FALSE, + show_plot = FALSE, + truncate_nodes = 0) { if (isTRUE(no_optimization)) { predicate_pushdown = FALSE projection_pushdown = FALSE diff --git a/R/options.R b/R/options.R index 4b6d510d6..b2f86d37a 100644 --- a/R/options.R +++ b/R/options.R @@ -52,6 +52,8 @@ polars_optreq$rpool_cap = list() # rust-side options already check args #' @docType NULL #' #' @details +#' All args must be explicitly and fully named. +#' #' `pl$options$rpool_active` indicates the number of R sessions already #' spawned in pool. `pl$options$rpool_cap` indicates the maximum number of new R #' sessions that can be spawned. Anytime a polars thread worker needs a background @@ -100,8 +102,19 @@ pl$set_options = function( # modified in the first call) args_modified = names(as.list(sys.call()[-1])) + if (is.null(args_modified) || any(nchar(args_modified) == 0L)) { + Err_plain("all args must be named") |> + unwrap("in pl$set_options") + } + for (i in seq_along(args_modified)) { - value = get(args_modified[i]) + value = result(get(args_modified[i])) |> + map_err(\(rp_err) { + rp_err$ + hint("arg-name does not match any defined args of `?set_options`")$ + bad_arg(args_modified[i]) + }) |> + unwrap("in pl$set_options") # each argument has its own input requirements validation = c() @@ -263,26 +276,28 @@ pl$with_string_cache = function(expr) { #' Multi-process communication has overhead because all data must be #' serialized/de-serialized and sent via buffers. Using multiple R sessions #' will likely only give a speed-up in a `low io - high cpu` scenario. Native -#' polars query syntax runs in threads and have no overhead. +#' polars query syntax runs in threads and have no overhead. Polars has as default +#' double as many thread workers as cores. If any worker are queuing for or using R sessions, +#' other workers can still continue any native polars parts as much as possible. #' #' @return -#' `pl$get_global_rpool_cap()` returns a list with two elements `available` -#' and `capacity`. `available` is the number of R sessions are already spawned -#' in pool. `capacity` is the limit of new R sessions to spawn. Anytime a polars +#' `pl$options$rpool_cap` returns the capacity ("limit") of co-running external R sessions / +#' processes. `pl$options$rpool_active` is the number of R sessions are already spawned +#' in the pool. `rpool_cap` is the limit of new R sessions to spawn. Anytime a polars #' thread worker needs a background R session specifically to run R code embedded #' in a query via `$map(..., in_background = TRUE)` or #' `$apply(..., in_background = TRUE)`, it will obtain any R session idling in -#' rpool, or spawn a new R session (process) and add it to pool if `capacity` +#' rpool, or spawn a new R session (process) if `capacity` #' is not already reached. If `capacity` is already reached, the thread worker -#' will sleep until an R session is idling. +#' will sleep and in a R job queue until an R session is idle. #' #' @keywords options #' @examples -#' default = pl$get_global_rpool_cap() -#' print(default) -#' pl$set_global_rpool_cap(8) -#' pl$get_global_rpool_cap() -#' pl$set_global_rpool_cap(default$capacity) +#' default = pl$options$rpool_cap |> print() +#' pl$set_options(rpool_cap = 8) +#' pl$options$rpool_cap +#' pl$set_options(rpool_cap = default) +#' pl$options$rpool_cap pl$get_global_rpool_cap = function() { warning( "in pl$get_global_rpool_cap(): Deprecated. Use pl$options$rpool_cap instead.", diff --git a/R/rbackground.R b/R/rbackground.R index 8f222000c..a9491c7a4 100644 --- a/R/rbackground.R +++ b/R/rbackground.R @@ -65,7 +65,7 @@ print.RThreadHandle = function(x, ...) as.character(x) |> cat("\n") #' [`$apply()`][Expr_apply] #' @examples #' prexpr = pl$col("mpg")$map(\(x) { -#' Sys.sleep(1.5) +#' Sys.sleep(.1) #' x * 0.43 #' }, in_background = TRUE)$alias("kml") #' handle = pl$LazyFrame(mtcars)$with_columns(prexpr)$collect_in_background() diff --git a/R/zzz.R b/R/zzz.R index 3d2bad0a3..383f78724 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -105,7 +105,8 @@ move_env_elements(Expr, pl, c("lit"), remove = FALSE) #' Get Memory Address #' @name pl_mem_address -#' @description mimics pl$mem_address +#' @description Get underlying mem address a rust object (via ExtPtr). Expert use only. +#' @details Does not give meaningful answers for regular R objects. #' @param robj an R object #' @aliases mem_address #' @return String of mem address @@ -147,22 +148,24 @@ pl$mem_address = mem_address makeActiveBinding( "rpool_cap", \(arg) { - if(missing(arg)) { + if (missing(arg)) { unwrap(get_global_rpool_cap())$capacity } else { unwrap(set_global_rpool_cap(arg)) } - }, env = polars_optenv + }, + env = polars_optenv ) makeActiveBinding( "rpool_active", \(arg) { - if(missing(arg)) { + if (missing(arg)) { unwrap(get_global_rpool_cap())$active } else { unwrap(stop("internal error: polars_optenv$rpool_active cannot be set directly")) } - }, env = polars_optenv + }, + env = polars_optenv ) setup_renv() diff --git a/man/Expr_map.Rd b/man/Expr_map.Rd index 30de117e3..348d3ab21 100644 --- a/man/Expr_map.Rd +++ b/man/Expr_map.Rd @@ -60,7 +60,7 @@ pl$DataFrame(iris)$ # map a,b,c,d sequentially pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select( pl$all()$map(\(s) { - Sys.sleep(.5) + Sys.sleep(.1) s * 2 }) )$collect() |> system.time() @@ -71,7 +71,7 @@ pl$set_options(rpool_cap = 4) # set back to 4, the default pl$options$rpool_cap pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select( pl$all()$map(\(s) { - Sys.sleep(.5) + Sys.sleep(.1) s * 2 }, in_background = TRUE) )$collect() |> system.time() @@ -80,7 +80,7 @@ pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select( pl$options$rpool_cap pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select( pl$all()$map(\(s) { - Sys.sleep(.5) + Sys.sleep(.1) s * 2 }, in_background = TRUE) )$collect() |> system.time() diff --git a/man/LazyFrame_collect_in_background.Rd b/man/LazyFrame_collect_in_background.Rd index 75822e26a..6dba86dbe 100644 --- a/man/LazyFrame_collect_in_background.Rd +++ b/man/LazyFrame_collect_in_background.Rd @@ -28,7 +28,7 @@ R session is not available for polars execution. See also examples below. # Some expression which does contain a map expr = pl$col("mpg")$map( \(x) { - Sys.sleep(.5) + Sys.sleep(.1) x * 0.43 }, in_background = TRUE # set TRUE if collecting in background queries with $map or $apply diff --git a/man/RThreadHandle_RThreadHandle_class.Rd b/man/RThreadHandle_RThreadHandle_class.Rd index 95a7bb512..b1afd3588 100644 --- a/man/RThreadHandle_RThreadHandle_class.Rd +++ b/man/RThreadHandle_RThreadHandle_class.Rd @@ -29,7 +29,7 @@ session. } \examples{ prexpr = pl$col("mpg")$map(\(x) { - Sys.sleep(1.5) + Sys.sleep(.1) x * 0.43 }, in_background = TRUE)$alias("kml") handle = pl$LazyFrame(mtcars)$with_columns(prexpr)$collect_in_background() diff --git a/man/global_rpool_cap.Rd b/man/global_rpool_cap.Rd index b99b4bbb6..aea602d42 100644 --- a/man/global_rpool_cap.Rd +++ b/man/global_rpool_cap.Rd @@ -8,15 +8,15 @@ \item{n}{Integer, the capacity limit R sessions to process R code.} } \value{ -\code{pl$get_global_rpool_cap()} returns a list with two elements \code{available} -and \code{capacity}. \code{available} is the number of R sessions are already spawned -in pool. \code{capacity} is the limit of new R sessions to spawn. Anytime a polars +\code{pl$options$rpool_cap} returns the capacity ("limit") of co-running external R sessions / +processes. \code{pl$options$rpool_active} is the number of R sessions are already spawned +in the pool. \code{rpool_cap} is the limit of new R sessions to spawn. Anytime a polars thread worker needs a background R session specifically to run R code embedded in a query via \verb{$map(..., in_background = TRUE)} or \verb{$apply(..., in_background = TRUE)}, it will obtain any R session idling in -rpool, or spawn a new R session (process) and add it to pool if \code{capacity} +rpool, or spawn a new R session (process) if \code{capacity} is not already reached. If \code{capacity} is already reached, the thread worker -will sleep until an R session is idling. +will sleep and in a R job queue until an R session is idle. } \description{ Deprecated. Use pl$options to get, and pl$set_options() to set. @@ -27,13 +27,15 @@ serialize + shared memory buffers via the rust crate \code{ipc-channel}. Multi-process communication has overhead because all data must be serialized/de-serialized and sent via buffers. Using multiple R sessions will likely only give a speed-up in a \verb{low io - high cpu} scenario. Native -polars query syntax runs in threads and have no overhead. +polars query syntax runs in threads and have no overhead. Polars has as default +double as many thread workers as cores. If any worker are queuing for or using R sessions, +other workers can still continue any native polars parts as much as possible. } \examples{ -default = pl$get_global_rpool_cap() -print(default) -pl$set_global_rpool_cap(8) -pl$get_global_rpool_cap() -pl$set_global_rpool_cap(default$capacity) +default = pl$options$rpool_cap |> print() +pl$set_options(rpool_cap = 8) +pl$options$rpool_cap +pl$set_options(rpool_cap = default) +pl$options$rpool_cap } \keyword{options} diff --git a/man/pl_mem_address.Rd b/man/pl_mem_address.Rd index c64f49955..2a0e31b31 100644 --- a/man/pl_mem_address.Rd +++ b/man/pl_mem_address.Rd @@ -11,7 +11,10 @@ String of mem address } \description{ -mimics pl$mem_address +Get underlying mem address a rust object (via ExtPtr). Expert use only. +} +\details{ +Does not give meaningful answers for regular R objects. } \examples{ pl$mem_address(pl$Series(1:3)) diff --git a/man/polars_options.Rd b/man/polars_options.Rd index 4ffb43b87..985f9b182 100644 --- a/man/polars_options.Rd +++ b/man/polars_options.Rd @@ -35,6 +35,8 @@ Get and set polars options. See sections "Value" and "Examples" below for more details. } \details{ +All args must be explicitly and fully named. + \code{pl$options$rpool_active} indicates the number of R sessions already spawned in pool. \code{pl$options$rpool_cap} indicates the maximum number of new R sessions that can be spawned. Anytime a polars thread worker needs a background diff --git a/tests/testthat/test-options.R b/tests/testthat/test-options.R index b0e0f1bb4..e21224c29 100644 --- a/tests/testthat/test-options.R +++ b/tests/testthat/test-options.R @@ -35,4 +35,20 @@ test_that("pl$options$ read-write", { # reset_options() works pl$reset_options() expect_identical(pl$options, old_options) + + # all set_options args must be named + expect_identical( + pl$set_options(42) |> get_err_ctx("Plain"), + "all args must be named" + ) + expect_identical( + pl$set_options(rpool_cap = 42, 42) |> get_err_ctx("Plain"), + "all args must be named" + ) + + # incomplete/misspelled name not allowed + expect_identical( + pl$set_options(rpo = 42) |> get_err_ctx("Hint"), + "arg-name does not match any defined args of `?set_options`" + ) }) diff --git a/tests/testthat/test-rbackground.R b/tests/testthat/test-rbackground.R index 8c6c229ee..6a454d560 100644 --- a/tests/testthat/test-rbackground.R +++ b/tests/testthat/test-rbackground.R @@ -24,7 +24,7 @@ test_that("Test using $map() in background", { compute = lf$select(pl$col("y")$map(\(x) x * x, in_background = FALSE)) compute_bg = lf$select(pl$col("y")$map(\(x) { - Sys.sleep(2) + Sys.sleep(.3) x * x }, in_background = TRUE)) res_ref = compute$collect()$to_data_frame()