Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

minor error handling set_options + misc docs #421

Merged
merged 10 commits into from
Oct 26, 2023
Merged
6 changes: 3 additions & 3 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,7 @@ construct_ProtoExprArray = function(...) {
#' # map a,b,c,d sequentially
#' pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select(
#' pl$all()$map(\(s) {
#' Sys.sleep(.5)
#' Sys.sleep(.1)
#' s * 2
#' })
#' )$collect() |> system.time()
Expand All @@ -712,7 +712,7 @@ construct_ProtoExprArray = function(...) {
#' pl$options$rpool_cap
#' pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select(
#' pl$all()$map(\(s) {
#' Sys.sleep(.5)
#' Sys.sleep(.1)
#' s * 2
#' }, in_background = TRUE)
#' )$collect() |> system.time()
Expand All @@ -721,7 +721,7 @@ construct_ProtoExprArray = function(...) {
#' pl$options$rpool_cap
#' pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select(
#' pl$all()$map(\(s) {
#' Sys.sleep(.5)
#' Sys.sleep(.1)
#' s * 2
#' }, in_background = TRUE)
#' )$collect() |> system.time()
Expand Down
29 changes: 14 additions & 15 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ LazyFrame_collect = function(
#' # Some expression which does contain a map
#' expr = pl$col("mpg")$map(
#' \(x) {
#' Sys.sleep(.5)
#' Sys.sleep(.1)
#' x * 0.43
#' },
#' in_background = TRUE # set TRUE if collecting in background queries with $map or $apply
Expand Down Expand Up @@ -1408,20 +1408,19 @@ LazyFrame_fetch = function(
#' agg(pl$col(pl$Float64)$apply(r_func))$
#' profile()
LazyFrame_profile = function(
type_coercion = TRUE,
predicate_pushdown = TRUE,
projection_pushdown = TRUE,
simplify_expression = TRUE,
slice_pushdown = TRUE,
comm_subplan_elim = TRUE,
comm_subexpr_elim = TRUE,
streaming = FALSE,
no_optimization = FALSE,
inherit_optimization = FALSE,
collect_in_background = FALSE,
show_plot = FALSE,
truncate_nodes = 0) {

type_coercion = TRUE,
predicate_pushdown = TRUE,
projection_pushdown = TRUE,
simplify_expression = TRUE,
slice_pushdown = TRUE,
comm_subplan_elim = TRUE,
comm_subexpr_elim = TRUE,
streaming = FALSE,
no_optimization = FALSE,
inherit_optimization = FALSE,
collect_in_background = FALSE,
show_plot = FALSE,
truncate_nodes = 0) {
if (isTRUE(no_optimization)) {
predicate_pushdown = FALSE
projection_pushdown = FALSE
Expand Down
39 changes: 27 additions & 12 deletions R/options.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ polars_optreq$rpool_cap = list() # rust-side options already check args
#' @docType NULL
#'
#' @details
#' All args must be explicitly and fully named.
#'
#' `pl$options$rpool_active` indicates the number of R sessions already
#' spawned in pool. `pl$options$rpool_cap` indicates the maximum number of new R
#' sessions that can be spawned. Anytime a polars thread worker needs a background
Expand Down Expand Up @@ -100,8 +102,19 @@ pl$set_options = function(
# modified in the first call)
args_modified = names(as.list(sys.call()[-1]))

if (is.null(args_modified) || any(nchar(args_modified) == 0L)) {
Err_plain("all args must be named") |>
unwrap("in pl$set_options")
}

for (i in seq_along(args_modified)) {
value = get(args_modified[i])
value = result(get(args_modified[i])) |>
map_err(\(rp_err) {
rp_err$
hint("arg-name does not match any defined args of `?set_options`")$
bad_arg(args_modified[i])
}) |>
unwrap("in pl$set_options")

# each argument has its own input requirements
validation = c()
Expand Down Expand Up @@ -263,26 +276,28 @@ pl$with_string_cache = function(expr) {
#' Multi-process communication has overhead because all data must be
#' serialized/de-serialized and sent via buffers. Using multiple R sessions
#' will likely only give a speed-up in a `low io - high cpu` scenario. Native
#' polars query syntax runs in threads and have no overhead.
#' polars query syntax runs in threads and have no overhead. Polars has as default
#' double as many thread workers as cores. If any worker are queuing for or using R sessions,
#' other workers can still continue any native polars parts as much as possible.
#'
#' @return
#' `pl$get_global_rpool_cap()` returns a list with two elements `available`
#' and `capacity`. `available` is the number of R sessions are already spawned
#' in pool. `capacity` is the limit of new R sessions to spawn. Anytime a polars
#' `pl$options$rpool_cap` returns the capacity ("limit") of co-running external R sessions /
#' processes. `pl$options$rpool_active` is the number of R sessions are already spawned
#' in the pool. `rpool_cap` is the limit of new R sessions to spawn. Anytime a polars
#' thread worker needs a background R session specifically to run R code embedded
#' in a query via `$map(..., in_background = TRUE)` or
#' `$apply(..., in_background = TRUE)`, it will obtain any R session idling in
#' rpool, or spawn a new R session (process) and add it to pool if `capacity`
#' rpool, or spawn a new R session (process) if `capacity`
#' is not already reached. If `capacity` is already reached, the thread worker
#' will sleep until an R session is idling.
#' will sleep and in a R job queue until an R session is idle.
#'
#' @keywords options
#' @examples
#' default = pl$get_global_rpool_cap()
#' print(default)
#' pl$set_global_rpool_cap(8)
#' pl$get_global_rpool_cap()
#' pl$set_global_rpool_cap(default$capacity)
#' default = pl$options$rpool_cap |> print()
#' pl$set_options(rpool_cap = 8)
#' pl$options$rpool_cap
#' pl$set_options(rpool_cap = default)
#' pl$options$rpool_cap
pl$get_global_rpool_cap = function() {
warning(
"in pl$get_global_rpool_cap(): Deprecated. Use pl$options$rpool_cap instead.",
Expand Down
2 changes: 1 addition & 1 deletion R/rbackground.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ print.RThreadHandle = function(x, ...) as.character(x) |> cat("\n")
#' [`<Expr>$apply()`][Expr_apply]
#' @examples
#' prexpr = pl$col("mpg")$map(\(x) {
#' Sys.sleep(1.5)
#' Sys.sleep(.1)
#' x * 0.43
#' }, in_background = TRUE)$alias("kml")
#' handle = pl$LazyFrame(mtcars)$with_columns(prexpr)$collect_in_background()
Expand Down
13 changes: 8 additions & 5 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ move_env_elements(Expr, pl, c("lit"), remove = FALSE)

#' Get Memory Address
#' @name pl_mem_address
#' @description mimics pl$mem_address
#' @description Get underlying mem address a rust object (via ExtPtr). Expert use only.
#' @details Does not give meaningful answers for regular R objects.
#' @param robj an R object
#' @aliases mem_address
#' @return String of mem address
Expand Down Expand Up @@ -147,22 +148,24 @@ pl$mem_address = mem_address
makeActiveBinding(
"rpool_cap",
\(arg) {
if(missing(arg)) {
if (missing(arg)) {
unwrap(get_global_rpool_cap())$capacity
} else {
unwrap(set_global_rpool_cap(arg))
}
}, env = polars_optenv
},
env = polars_optenv
)
makeActiveBinding(
"rpool_active",
\(arg) {
if(missing(arg)) {
if (missing(arg)) {
unwrap(get_global_rpool_cap())$active
} else {
unwrap(stop("internal error: polars_optenv$rpool_active cannot be set directly"))
}
}, env = polars_optenv
},
env = polars_optenv
)

setup_renv()
Expand Down
6 changes: 3 additions & 3 deletions man/Expr_map.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/LazyFrame_collect_in_background.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/RThreadHandle_RThreadHandle_class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 13 additions & 11 deletions man/global_rpool_cap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/pl_mem_address.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/polars_options.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test-options.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,20 @@ test_that("pl$options$ read-write", {
# reset_options() works
pl$reset_options()
expect_identical(pl$options, old_options)

# all set_options args must be named
expect_identical(
pl$set_options(42) |> get_err_ctx("Plain"),
"all args must be named"
)
expect_identical(
pl$set_options(rpool_cap = 42, 42) |> get_err_ctx("Plain"),
"all args must be named"
)

# incomplete/misspelled name not allowed
expect_identical(
pl$set_options(rpo = 42) |> get_err_ctx("Hint"),
"arg-name does not match any defined args of `?set_options`"
)
})
2 changes: 1 addition & 1 deletion tests/testthat/test-rbackground.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ test_that("Test using $map() in background", {

compute = lf$select(pl$col("y")$map(\(x) x * x, in_background = FALSE))
compute_bg = lf$select(pl$col("y")$map(\(x) {
Sys.sleep(2)
Sys.sleep(.3)
x * x
}, in_background = TRUE))
res_ref = compute$collect()$to_data_frame()
Expand Down