diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index ba38b27a7..accac13c7 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -65,6 +65,8 @@ test_robj_to_expr <- function(robj) .Call(wrap__test_robj_to_expr, robj) test_wrong_call_pl_lit <- function(robj) .Call(wrap__test_wrong_call_pl_lit, robj) +test_robj_to_rchoice <- function(robj) .Call(wrap__test_robj_to_rchoice, robj) + polars_features <- function() .Call(wrap__polars_features) concat_lf <- function(l, rechunk, parallel, to_supertypes) .Call(wrap__concat_lf, l, rechunk, parallel, to_supertypes) @@ -317,6 +319,8 @@ RPolarsErr$mistyped <- function(s) .Call(wrap__RPolarsErr__mistyped, self, s) RPolarsErr$misvalued <- function(s) .Call(wrap__RPolarsErr__misvalued, self, s) +RPolarsErr$notachoice <- function(s) .Call(wrap__RPolarsErr__notachoice, self, s) + RPolarsErr$plain <- function(s) .Call(wrap__RPolarsErr__plain, self, s) RPolarsErr$rcall <- function(c) .Call(wrap__RPolarsErr__rcall, self, c) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 785b0ffa9..193525a5a 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -982,35 +982,30 @@ LazyFrame_join = function( suffix = "_right", allow_parallel = TRUE, force_parallel = FALSE) { - if (inherits(other, "LazyFrame")) { - # nothing - } else if (inherits(other, "DataFrame")) { - other = other - } else { - stop(paste("Expected a `LazyFrame` as join table, got ", class(other))) - } - how_opts = c("inner", "left", "outer", "semi", "anti", "cross") - how = match.arg(how[1L], how_opts) + uw = \(res) unwrap(res, "in $join():") + + if (inherits(other, "DataFrame")) { + other = other$lazy() + } if (!is.null(on)) { - rexprs = do.call(construct_ProtoExprArray, as.list(on)) - rexprs_left = rexprs - rexprs_right = rexprs + rexprs_right = rexprs_left = as.list(on) } else if ((!is.null(left_on) && !is.null(right_on))) { - rexprs_left = do.call(construct_ProtoExprArray, as.list(left_on)) - rexprs_right = do.call(construct_ProtoExprArray, as.list(right_on)) + rexprs_left = as.list(left_on) + rexprs_right = as.list(right_on) } else if (how != "cross") { - stop("must specify `on` OR ( `left_on` AND `right_on` ) ") + Err_plain("must specify `on` OR ( `left_on` AND `right_on` ) ") |> uw() } else { - rexprs_left = do.call(construct_ProtoExprArray, as.list(self$columns)) - rexprs_right = do.call(construct_ProtoExprArray, as.list(other$columns)) + rexprs_left = as.list(self$columns) + rexprs_right = as.list(other$columns) } .pr$LazyFrame$join( self, other, rexprs_left, rexprs_right, how, suffix, allow_parallel, force_parallel - ) + ) |> + uw() } diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 5949de990..cb60a4025 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -7,8 +7,8 @@ use crate::lazy::dsl::*; use crate::rdataframe::DataFrame as RDF; use crate::rdatatype::{ - new_asof_strategy, new_ipc_compression, new_join_type, new_parquet_compression, - new_unique_keep_strategy, RPolarsDataType, + new_asof_strategy, new_ipc_compression, new_parquet_compression, new_unique_keep_strategy, + RPolarsDataType, }; use crate::robj_to; use crate::rpolarserr::{polars_to_rpolars_err, RResult, Rctx, WithRctx}; @@ -406,31 +406,27 @@ impl LazyFrame { #[allow(clippy::too_many_arguments)] fn join( &self, - other: &LazyFrame, - left_on: &ProtoExprArray, - right_on: &ProtoExprArray, - how: &str, - suffix: &str, - allow_parallel: bool, - force_parallel: bool, - ) -> LazyFrame { - let ldf = self.0.clone(); - let other = other.0.clone(); - let left_on = pra_to_vec(left_on, "select"); - let right_on = pra_to_vec(right_on, "select"); - let how = new_join_type(how); - - LazyFrame( - ldf.join_builder() - .with(other) - .left_on(left_on) - .right_on(right_on) - .allow_parallel(allow_parallel) - .force_parallel(force_parallel) - .how(how) - .suffix(suffix) + other: Robj, + left_on: Robj, + right_on: Robj, + how: Robj, + suffix: Robj, + allow_parallel: Robj, + force_parallel: Robj, + ) -> RResult { + Ok(LazyFrame( + self.0 + .clone() + .join_builder() + .with(robj_to!(PLLazyFrame, other)?) + .left_on(robj_to!(VecPLExprCol, left_on)?) + .right_on(robj_to!(VecPLExprCol, right_on)?) + .allow_parallel(robj_to!(bool, allow_parallel)?) + .force_parallel(robj_to!(bool, force_parallel)?) + .how(robj_to!(JoinType, how)?) + .suffix(robj_to!(str, suffix)?) .finish(), - ) + )) } pub fn sort_by_exprs( diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 1b8632a6c..0a6d982b3 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -668,7 +668,7 @@ impl Expr { min_periods: robj_to!(usize, min_periods)?, center: robj_to!(bool, center)?, by: robj_to!(Option, String, by)?, - closed_window: robj_to!(Option, new_closed_window, closed)?, + closed_window: robj_to!(Option, ClosedWindow, closed)?, fn_params: Some(pl::Arc::new(pl::RollingQuantileParams { prob: robj_to!(f64, quantile)?, interpol: robj_to!(new_quantile_interpolation_option, interpolation)?, @@ -2472,7 +2472,7 @@ pub fn make_rolling_options( min_periods: robj_to!(usize, min_periods)?, center: robj_to!(bool, center)?, by: robj_to!(Option, String, by_null)?, - closed_window: robj_to!(Option, new_closed_window, closed_null)?, + closed_window: robj_to!(Option, ClosedWindow, closed_null)?, ..Default::default() }) } diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index cc3a41252..79756da7d 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -14,6 +14,8 @@ pub struct RField(pub pl::Field); use pl::UniqueKeepStrategy; use polars::prelude::AsofStrategy; +use crate::utils::robj_to_rchoice; + #[extendr] impl RField { fn new(name: String, datatype: &RPolarsDataType) -> RField { @@ -245,18 +247,6 @@ impl DataTypeVector { } } -pub fn new_join_type(s: &str) -> pl::JoinType { - match s { - "cross" => pl::JoinType::Cross, - "inner" => pl::JoinType::Inner, - "left" => pl::JoinType::Left, - "outer" => pl::JoinType::Outer, - "semi" => pl::JoinType::Semi, - "anti" => pl::JoinType::Anti, - _ => panic!("polars internal error: jointype not recognized"), - } -} - pub fn new_asof_strategy(s: &str) -> Result { match s { "forward" => Ok(AsofStrategy::Forward), @@ -296,20 +286,6 @@ pub fn new_quantile_interpolation_option(robj: Robj) -> RResult RResult { - let s = robj_to_string(robj.clone())?; - use pl::ClosedWindow as CW; - match s.as_str() { - "both" => Ok(CW::Both), - "left" => Ok(CW::Left), - "none" => Ok(CW::None), - "right" => Ok(CW::Right), - _ => rerr() - .bad_val("ClosedWindow choice: [{}] is not any of 'both', 'left', 'none' or 'right'") - .bad_robj(&robj), - } -} - pub fn new_null_behavior( s: &str, ) -> std::result::Result { @@ -512,6 +488,34 @@ pub fn new_rolling_cov_options( }) } +pub fn robj_to_join_type(robj: Robj) -> RResult { + let s = robj_to_rchoice(robj)?; + match s.as_str() { + "cross" => Ok(pl::JoinType::Cross), + "inner" => Ok(pl::JoinType::Inner), + "left" => Ok(pl::JoinType::Left), + "outer" => Ok(pl::JoinType::Outer), + "semi" => Ok(pl::JoinType::Semi), + "anti" => Ok(pl::JoinType::Anti), + s => rerr().bad_val(format!( + "JoinType choice ['{s}'] should be one of 'cross', 'inner', 'left', 'outer', 'semi', 'anti'" + )), + } +} + +pub fn robj_to_closed_window(robj: Robj) -> RResult { + use pl::ClosedWindow as CW; + match robj_to_rchoice(robj)?.as_str() { + "both" => Ok(CW::Both), + "left" => Ok(CW::Left), + "none" => Ok(CW::None), + "right" => Ok(CW::Right), + s => rerr().bad_val(format!( + "ClosedWindow choice ['{s}'] should be one of 'both', 'left', 'none', 'right'" + )), + } +} + extendr_module! { mod rdatatype; impl RPolarsDataType; diff --git a/src/rust/src/rlib.rs b/src/rust/src/rlib.rs index 9bf4eadd5..3b5c7dea5 100644 --- a/src/rust/src/rlib.rs +++ b/src/rust/src/rlib.rs @@ -5,11 +5,11 @@ use crate::robj_to; use crate::rpolarserr::{rdbg, RResult}; use crate::series::Series; use crate::utils::extendr_concurrent::{ParRObj, ThreadCom}; +use crate::utils::robj_to_rchoice; use crate::RFnSignature; use crate::CONFIG; use extendr_api::prelude::*; use polars::prelude as pl; - use std::result::Result; #[extendr] @@ -65,7 +65,7 @@ fn r_date_range_lazy( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, robj_to!(pl_duration, every)?, - robj_to!(new_closed_window, closed)?, + robj_to!(ClosedWindow, closed)?, robj_to!(Option, timeunit, time_unit)?, robj_to!(Option, String, time_zone)?, ); @@ -221,6 +221,12 @@ fn test_wrong_call_pl_lit(robj: Robj) -> RResult { Ok(R!("pl$lit({{robj}})")?) // this call should have been polars::pl$lit(... } +#[extendr] +fn test_robj_to_rchoice(robj: Robj) -> RResult { + // robj can be any non-zero length char vec, will return first string. + robj_to_rchoice(robj) +} + #[extendr] fn polars_features() -> List { list!( @@ -301,6 +307,7 @@ extendr_module! { fn test_print_string; fn test_robj_to_expr; fn test_wrong_call_pl_lit; + fn test_robj_to_rchoice; //feature flags fn polars_features; diff --git a/src/rust/src/rpolarserr.rs b/src/rust/src/rpolarserr.rs index 78b98eba4..4e2575e2d 100644 --- a/src/rust/src/rpolarserr.rs +++ b/src/rust/src/rpolarserr.rs @@ -25,6 +25,8 @@ pub enum Rctx { Mistyped(String), #[error("Expected a value that {0}")] Misvalued(String), + #[error("Not a valid R choice because {0}")] + NotAChoice(String), #[error("{0}")] Plain(String), #[error("Encountered the following error in Rust-Polars:\n\t{0}")] @@ -51,6 +53,7 @@ pub trait WithRctx { fn hint(self, cause: impl Into) -> RResult; fn mistyped(self, ty: impl Into) -> RResult; fn misvalued(self, scope: impl Into) -> RResult; + fn notachoice(self, scope: impl Into) -> RResult; fn plain(self, msg: impl Into) -> RResult; fn when(self, env: impl Into) -> RResult; } @@ -96,6 +99,10 @@ impl> WithRctx for core::result::Result { self.ctx(Rctx::Misvalued(scope.into())) } + fn notachoice(self, scope: impl Into) -> RResult { + self.ctx(Rctx::NotAChoice(scope.into())) + } + fn plain(self, msg: impl Into) -> RResult { self.ctx(Rctx::Plain(msg.into())) } @@ -129,6 +136,7 @@ impl RPolarsErr { Hint(msg) => ("Hint", msg), Mistyped(ty) => ("TypeMismatch", ty), Misvalued(scope) => ("ValueOutOfScope", scope), + NotAChoice(err) => ("NotAChoice", err), Plain(msg) => ("PlainErrorMessage", msg), Polars(err) => ("PolarsError", err), When(target) => ("When", target), @@ -166,6 +174,10 @@ impl RPolarsErr { self.push_back_rctx(Rctx::Misvalued(s)) } + pub fn notachoice(&self, s: String) -> Self { + self.push_back_rctx(Rctx::NotAChoice(s)) + } + pub fn plain(&self, s: String) -> Self { self.push_back_rctx(Rctx::Plain(s)) } diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index b7d3760c5..10fe8609a 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -8,16 +8,19 @@ use crate::lazy::dsl::Expr; use crate::rdatatype::RPolarsDataType; use crate::rpolarserr::{polars_to_rpolars_err, rdbg, rerr, RPolarsErr, RResult, WithRctx}; use crate::series::Series; -use extendr_api::prelude::list; + use std::any::type_name as tn; //use std::intrinsics::read_via_copy; use crate::lazy::dsl::robj_to_col; use crate::rdataframe::{DataFrame, LazyFrame}; use extendr_api::eval_string_with_params; +use extendr_api::prelude::{list, Result as EResult, Strings}; use extendr_api::Attributes; +use extendr_api::CanBeNA; use extendr_api::ExternalPtr; use extendr_api::Result as ExtendrResult; use extendr_api::R; + use polars::prelude as pl; //macro to translate polars NULLs and emulate R NA value of any type @@ -531,6 +534,37 @@ pub fn robj_to_str<'a>(robj: extendr_api::Robj) -> RResult<&'a str> { } } +// This conversion assists conversion of R choice char vec e.g. c("a", "b") +// Only the first element "a" will passed on as String +// conversion error if not a char vec with none-zero length. +// NA is not allowed +// other conversions will use it e.g. robj_to_join_type() or robj_to_closed_window() +pub fn robj_to_rchoice(robj: extendr_api::Robj) -> RResult { + let robj = unpack_r_result_list(robj)?; + let robj_clone = robj.clone(); + let s_res: EResult = robj.try_into(); + let opt_str = s_res.map(|s| s.iter().next().map(|rstr| rstr.clone())); + match opt_str { + // NA_CHARACTER not allowed as first element return error + Ok(Some(rstr)) if rstr.is_na() => { + Err(RPolarsErr::new().notachoice("NA_character is not allowed".into())) + } + + // At least one string, return first string + Ok(Some(rstr)) => Ok(rstr.to_string()), + + // Not character vector, return Error + Err(_extendr_err) => { + //let rpolars_err: RPolarsErr = _extendr_err.into(); extendr error not that helpful + Err(RPolarsErr::new().notachoice("input is not a character vector".into())) + } + + // An empty chr vec, return Error + Ok(None) => Err(RPolarsErr::new().notachoice("character vector has zero length".into())), + } + .map_err(|err| err.bad_robj(robj_clone)) +} + pub fn robj_to_usize(robj: extendr_api::Robj) -> RResult { robj_to_u64(robj).and_then(try_u64_into_usize) } @@ -930,8 +964,8 @@ macro_rules! robj_to_inner { (timeunit, $a:ident) => { $crate::rdatatype::robj_to_timeunit($a) }; - (new_closed_window, $a:ident) => { - $crate::rdatatype::new_closed_window($a) + (ClosedWindow, $a:ident) => { + $crate::rdatatype::robj_to_closed_window($a) }; (new_quantile_interpolation_option, $a:ident) => { $crate::rdatatype::new_quantile_interpolation_option($a) @@ -1016,6 +1050,10 @@ macro_rules! robj_to_inner { $crate::utils::robj_to_quote_style($a) }; + (JoinType, $a:ident) => { + $crate::rdatatype::robj_to_join_type($a) + }; + (RArrow_schema, $a:ident) => { $crate::utils::robj_to_rarrow_schema($a) }; diff --git a/tests/testthat/test-joins.R b/tests/testthat/test-joins.R index 8b63dd607..dd003a503 100644 --- a/tests/testthat/test-joins.R +++ b/tests/testthat/test-joins.R @@ -28,6 +28,16 @@ test_that("lazyframe join examples", { apple = c("x", "y", "z", NA) ) ) + + # error on unknown how choice + ctx = df$join(other_df, on = "ham", how = "not a choice") |> get_err_ctx() + expect_true(startsWith(ctx$BadValue, "JoinType choice ['not a choice'] should be one of")) + + # error on invalid choice + ctx = df$join(other_df, on = "ham", how = 42) |> get_err_ctx() + expect_true("NotAChoice" %in% names(ctx)) + + }) @@ -117,4 +127,5 @@ test_that("cross join, DataFrame", { x_right = rep(letters[1:3], 3) ) ) + }) diff --git a/tests/testthat/test-robj_to_rchoice.R b/tests/testthat/test-robj_to_rchoice.R new file mode 100644 index 000000000..5bfc4c106 --- /dev/null +++ b/tests/testthat/test-robj_to_rchoice.R @@ -0,0 +1,23 @@ +test_that("robj_to_rchoice", { + + # gets first value in char vec + expect_identical(test_robj_to_rchoice(c("a","b", NA))$ok, "a") + + # ... or string + expect_identical(test_robj_to_rchoice(c("a"))$ok, "a") + + # NA chr not allowed as first element + ctx = test_robj_to_rchoice(NA_character_)$err$contexts() + expect_identical(ctx$NotAChoice, "NA_character is not allowed") + + # empty chr vec not allowed as first element + ctx = test_robj_to_rchoice(character())$err$contexts() + expect_identical( + ctx$NotAChoice, + "character vector has zero length" + ) + + # non char vec / string not allowed + ctx = test_robj_to_rchoice(42)$err$contexts() + expect_identical(ctx$NotAChoice, "input is not a character vector") +})