Skip to content

Commit

Permalink
minor error handling set_options + misc docs (#421)
Browse files Browse the repository at this point in the history
Co-authored-by: eitsupi <[email protected]>
  • Loading branch information
sorhawell and eitsupi authored Oct 26, 2023
1 parent b475e98 commit 17bea56
Show file tree
Hide file tree
Showing 13 changed files with 94 additions and 54 deletions.
6 changes: 3 additions & 3 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,7 @@ construct_ProtoExprArray = function(...) {
#' # map a,b,c,d sequentially
#' pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select(
#' pl$all()$map(\(s) {
#' Sys.sleep(.5)
#' Sys.sleep(.1)
#' s * 2
#' })
#' )$collect() |> system.time()
Expand All @@ -712,7 +712,7 @@ construct_ProtoExprArray = function(...) {
#' pl$options$rpool_cap
#' pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select(
#' pl$all()$map(\(s) {
#' Sys.sleep(.5)
#' Sys.sleep(.1)
#' s * 2
#' }, in_background = TRUE)
#' )$collect() |> system.time()
Expand All @@ -721,7 +721,7 @@ construct_ProtoExprArray = function(...) {
#' pl$options$rpool_cap
#' pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select(
#' pl$all()$map(\(s) {
#' Sys.sleep(.5)
#' Sys.sleep(.1)
#' s * 2
#' }, in_background = TRUE)
#' )$collect() |> system.time()
Expand Down
29 changes: 14 additions & 15 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ LazyFrame_collect = function(
#' # Some expression which does contain a map
#' expr = pl$col("mpg")$map(
#' \(x) {
#' Sys.sleep(.5)
#' Sys.sleep(.1)
#' x * 0.43
#' },
#' in_background = TRUE # set TRUE if collecting in background queries with $map or $apply
Expand Down Expand Up @@ -1408,20 +1408,19 @@ LazyFrame_fetch = function(
#' agg(pl$col(pl$Float64)$apply(r_func))$
#' profile()
LazyFrame_profile = function(
type_coercion = TRUE,
predicate_pushdown = TRUE,
projection_pushdown = TRUE,
simplify_expression = TRUE,
slice_pushdown = TRUE,
comm_subplan_elim = TRUE,
comm_subexpr_elim = TRUE,
streaming = FALSE,
no_optimization = FALSE,
inherit_optimization = FALSE,
collect_in_background = FALSE,
show_plot = FALSE,
truncate_nodes = 0) {

type_coercion = TRUE,
predicate_pushdown = TRUE,
projection_pushdown = TRUE,
simplify_expression = TRUE,
slice_pushdown = TRUE,
comm_subplan_elim = TRUE,
comm_subexpr_elim = TRUE,
streaming = FALSE,
no_optimization = FALSE,
inherit_optimization = FALSE,
collect_in_background = FALSE,
show_plot = FALSE,
truncate_nodes = 0) {
if (isTRUE(no_optimization)) {
predicate_pushdown = FALSE
projection_pushdown = FALSE
Expand Down
39 changes: 27 additions & 12 deletions R/options.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ polars_optreq$rpool_cap = list() # rust-side options already check args
#' @docType NULL
#'
#' @details
#' All args must be explicitly and fully named.
#'
#' `pl$options$rpool_active` indicates the number of R sessions already
#' spawned in pool. `pl$options$rpool_cap` indicates the maximum number of new R
#' sessions that can be spawned. Anytime a polars thread worker needs a background
Expand Down Expand Up @@ -100,8 +102,19 @@ pl$set_options = function(
# modified in the first call)
args_modified = names(as.list(sys.call()[-1]))

if (is.null(args_modified) || any(nchar(args_modified) == 0L)) {
Err_plain("all args must be named") |>
unwrap("in pl$set_options")
}

for (i in seq_along(args_modified)) {
value = get(args_modified[i])
value = result(get(args_modified[i])) |>
map_err(\(rp_err) {
rp_err$
hint("arg-name does not match any defined args of `?set_options`")$
bad_arg(args_modified[i])
}) |>
unwrap("in pl$set_options")

# each argument has its own input requirements
validation = c()
Expand Down Expand Up @@ -263,26 +276,28 @@ pl$with_string_cache = function(expr) {
#' Multi-process communication has overhead because all data must be
#' serialized/de-serialized and sent via buffers. Using multiple R sessions
#' will likely only give a speed-up in a `low io - high cpu` scenario. Native
#' polars query syntax runs in threads and have no overhead.
#' polars query syntax runs in threads and have no overhead. Polars has as default
#' double as many thread workers as cores. If any worker are queuing for or using R sessions,
#' other workers can still continue any native polars parts as much as possible.
#'
#' @return
#' `pl$get_global_rpool_cap()` returns a list with two elements `available`
#' and `capacity`. `available` is the number of R sessions are already spawned
#' in pool. `capacity` is the limit of new R sessions to spawn. Anytime a polars
#' `pl$options$rpool_cap` returns the capacity ("limit") of co-running external R sessions /
#' processes. `pl$options$rpool_active` is the number of R sessions are already spawned
#' in the pool. `rpool_cap` is the limit of new R sessions to spawn. Anytime a polars
#' thread worker needs a background R session specifically to run R code embedded
#' in a query via `$map(..., in_background = TRUE)` or
#' `$apply(..., in_background = TRUE)`, it will obtain any R session idling in
#' rpool, or spawn a new R session (process) and add it to pool if `capacity`
#' rpool, or spawn a new R session (process) if `capacity`
#' is not already reached. If `capacity` is already reached, the thread worker
#' will sleep until an R session is idling.
#' will sleep and in a R job queue until an R session is idle.
#'
#' @keywords options
#' @examples
#' default = pl$get_global_rpool_cap()
#' print(default)
#' pl$set_global_rpool_cap(8)
#' pl$get_global_rpool_cap()
#' pl$set_global_rpool_cap(default$capacity)
#' default = pl$options$rpool_cap |> print()
#' pl$set_options(rpool_cap = 8)
#' pl$options$rpool_cap
#' pl$set_options(rpool_cap = default)
#' pl$options$rpool_cap
pl$get_global_rpool_cap = function() {
warning(
"in pl$get_global_rpool_cap(): Deprecated. Use pl$options$rpool_cap instead.",
Expand Down
2 changes: 1 addition & 1 deletion R/rbackground.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ print.RThreadHandle = function(x, ...) as.character(x) |> cat("\n")
#' [`<Expr>$apply()`][Expr_apply]
#' @examples
#' prexpr = pl$col("mpg")$map(\(x) {
#' Sys.sleep(1.5)
#' Sys.sleep(.1)
#' x * 0.43
#' }, in_background = TRUE)$alias("kml")
#' handle = pl$LazyFrame(mtcars)$with_columns(prexpr)$collect_in_background()
Expand Down
13 changes: 8 additions & 5 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ move_env_elements(Expr, pl, c("lit"), remove = FALSE)

#' Get Memory Address
#' @name pl_mem_address
#' @description mimics pl$mem_address
#' @description Get underlying mem address a rust object (via ExtPtr). Expert use only.
#' @details Does not give meaningful answers for regular R objects.
#' @param robj an R object
#' @aliases mem_address
#' @return String of mem address
Expand Down Expand Up @@ -147,22 +148,24 @@ pl$mem_address = mem_address
makeActiveBinding(
"rpool_cap",
\(arg) {
if(missing(arg)) {
if (missing(arg)) {
unwrap(get_global_rpool_cap())$capacity
} else {
unwrap(set_global_rpool_cap(arg))
}
}, env = polars_optenv
},
env = polars_optenv
)
makeActiveBinding(
"rpool_active",
\(arg) {
if(missing(arg)) {
if (missing(arg)) {
unwrap(get_global_rpool_cap())$active
} else {
unwrap(stop("internal error: polars_optenv$rpool_active cannot be set directly"))
}
}, env = polars_optenv
},
env = polars_optenv
)

setup_renv()
Expand Down
6 changes: 3 additions & 3 deletions man/Expr_map.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/LazyFrame_collect_in_background.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/RThreadHandle_RThreadHandle_class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 13 additions & 11 deletions man/global_rpool_cap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/pl_mem_address.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/polars_options.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test-options.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,20 @@ test_that("pl$options$ read-write", {
# reset_options() works
pl$reset_options()
expect_identical(pl$options, old_options)

# all set_options args must be named
expect_identical(
pl$set_options(42) |> get_err_ctx("Plain"),
"all args must be named"
)
expect_identical(
pl$set_options(rpool_cap = 42, 42) |> get_err_ctx("Plain"),
"all args must be named"
)

# incomplete/misspelled name not allowed
expect_identical(
pl$set_options(rpo = 42) |> get_err_ctx("Hint"),
"arg-name does not match any defined args of `?set_options`"
)
})
2 changes: 1 addition & 1 deletion tests/testthat/test-rbackground.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ test_that("Test using $map() in background", {

compute = lf$select(pl$col("y")$map(\(x) x * x, in_background = FALSE))
compute_bg = lf$select(pl$col("y")$map(\(x) {
Sys.sleep(2)
Sys.sleep(.3)
x * x
}, in_background = TRUE))
res_ref = compute$collect()$to_data_frame()
Expand Down

0 comments on commit 17bea56

Please sign in to comment.