Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
etiennebacher committed Mar 29, 2024
1 parent 5812f3e commit ea39e00
Show file tree
Hide file tree
Showing 15 changed files with 91 additions and 164 deletions.
2 changes: 0 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ S3method("$",RPolarsExprStructNameSpace)
S3method("$",RPolarsGroupBy)
S3method("$",RPolarsLazyFrame)
S3method("$",RPolarsLazyGroupBy)
S3method("$",RPolarsProtoExprArray)
S3method("$",RPolarsRField)
S3method("$",RPolarsRNullValues)
S3method("$",RPolarsRThreadHandle)
Expand Down Expand Up @@ -106,7 +105,6 @@ S3method("[[",RPolarsExpr)
S3method("[[",RPolarsGroupBy)
S3method("[[",RPolarsLazyFrame)
S3method("[[",RPolarsLazyGroupBy)
S3method("[[",RPolarsProtoExprArray)
S3method("[[",RPolarsRField)
S3method("[[",RPolarsRNullValues)
S3method("[[",RPolarsRThreadHandle)
Expand Down
1 change: 0 additions & 1 deletion R/after-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ extendr_method_to_pure_functions = function(env, class_name = NULL) {
.pr$DataTypeVector = extendr_method_to_pure_functions(RPolarsDataTypeVector)
.pr$RField = extendr_method_to_pure_functions(RPolarsRField)
.pr$Expr = extendr_method_to_pure_functions(RPolarsExpr)
.pr$ProtoExprArray = extendr_method_to_pure_functions(RPolarsProtoExprArray)
.pr$When = extendr_method_to_pure_functions(RPolarsWhen)
.pr$Then = extendr_method_to_pure_functions(RPolarsThen)
.pr$ChainedWhen = extendr_method_to_pure_functions(RPolarsChainedWhen)
Expand Down
85 changes: 19 additions & 66 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -661,68 +661,6 @@ Expr_is_null = use_extendr_wrapper
Expr_is_not_null = use_extendr_wrapper


# TODO move this function in to rust with input list of args
# TODO deprecate context feature
#' construct proto Expr array from args
#' @noRd
#' @param ... any Expr or string
#'
#'
#'
#' @return RPolarsProtoExprArray object
#'
#' @examples .pr$env$construct_ProtoExprArray(pl$col("Species"), "Sepal.Width")
construct_ProtoExprArray = function(...) {
pra = RPolarsProtoExprArray$new()
args = list2(...)

# deal with list of expressions
is_list = which(vapply(args, is.list, FUN.VALUE = logical(1L)))
for (i in seq_along(is_list)) {
tmp = unlist(args[[is_list[i]]], recursive = FALSE)
args[[is_list[i]]] = NULL
args = append(tmp, args)
}
args = Filter(Negate(is.null), args)

arg_names = names(args)


# if args not named load in Expr and string
if (is.null(arg_names)) {
if (length(args) == 1 && is.list(args)) {
args = unlist(args)
}
for (i in args) {
# if (is_string(i)) {
# pra$push_back_str(i)
# next
# }
pra$push_back_rexpr(wrap_e(i, str_to_lit = FALSE))
}

# if args named, convert string to col and alias any column by name if a name
} else {
for (i in seq_along(args)) {
arg = args[[i]]
name = arg_names[i]

expr = wrap_e(arg, str_to_lit = FALSE)


if (nchar(name) >= 1L) {
expr = expr$alias(name)
}
pra$push_back_rexpr(expr) # rust method
}
}



pra
}


## TODO allow list to be formed from recursive R lists
## TODO Contribute polars, seems polars now prefer word f or function in map/apply/rolling/apply
# over lambda. However lambda is still in examples.
Expand Down Expand Up @@ -1872,7 +1810,17 @@ Expr_last = use_extendr_wrapper
#' This applies an expression on groups and returns the same number of rows as
#' the input (contrarily to `$group_by()` + `$agg()`).
#'
#' @param ... Character vector indicating the columns to group by.
#' @param expr Columns to group by. Can be an Expr or something coercible to an
#' Expr. Strings are parsed as column names.
#' @param ... Not used.
#' @param mapping_strategy One of the following:
#' * `"group_to_rows"` (default): if the aggregation results in multiple values,
#' assign them back to their position in the DataFrame. This can only be done
#' if the group yields the same elements before aggregation as after.
#' * `"join"`: join the groups as `List<group_dtype>` to the row positions. Note
#' that this can be memory intensive.
#' * `"explode"`: don’t do any mapping, but simply flatten the group. This only
#' makes sense if the input data is sorted.
#'
#' @return Expr
#' @examples
Expand All @@ -1892,9 +1840,14 @@ Expr_last = use_extendr_wrapper
#' )$with_columns(
#' count = pl$col("val")$count()$over(over_vars)
#' )
Expr_over = function(...) {
pra = construct_ProtoExprArray(...)
.pr$Expr$over(self, pra)
Expr_over = function(expr, ..., mapping_strategy = "group_to_rows") {
expr = c(
wrap_elist_result(expr, str_to_lit = FALSE) |>
unwrap("in $over():"),
list2(...)
)
.pr$Expr$over(self, expr, mapping_strategy) |>
unwrap("in $over():")
}

#' Check whether each value is unique
Expand Down
18 changes: 1 addition & 17 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -918,7 +918,7 @@ RPolarsExpr$div <- function(other) .Call(wrap__RPolarsExpr__div, self, other)

RPolarsExpr$pow <- function(exponent) .Call(wrap__RPolarsExpr__pow, self, exponent)

RPolarsExpr$over <- function(proto_exprs) .Call(wrap__RPolarsExpr__over, self, proto_exprs)
RPolarsExpr$over <- function(expr, mapping) .Call(wrap__RPolarsExpr__over, self, expr, mapping)

RPolarsExpr$print <- function() invisible(.Call(wrap__RPolarsExpr__print, self))

Expand Down Expand Up @@ -1084,22 +1084,6 @@ RPolarsExpr$rolling <- function(index_column, period, offset, closed, check_sort
#' @export
`[[.RPolarsExpr` <- `$.RPolarsExpr`

RPolarsProtoExprArray <- new.env(parent = emptyenv())

RPolarsProtoExprArray$new <- function() .Call(wrap__RPolarsProtoExprArray__new)

RPolarsProtoExprArray$push_back_str <- function(s) invisible(.Call(wrap__RPolarsProtoExprArray__push_back_str, self, s))

RPolarsProtoExprArray$push_back_rexpr <- function(r) invisible(.Call(wrap__RPolarsProtoExprArray__push_back_rexpr, self, r))

RPolarsProtoExprArray$print <- function() invisible(.Call(wrap__RPolarsProtoExprArray__print, self))

#' @export
`$.RPolarsProtoExprArray` <- function (self, name) { func <- RPolarsProtoExprArray[[name]]; environment(func) <- environment(); func }

#' @export
`[[.RPolarsProtoExprArray` <- `$.RPolarsProtoExprArray`

RPolarsLazyFrame <- new.env(parent = emptyenv())

RPolarsLazyFrame$print <- function() .Call(wrap__RPolarsLazyFrame__print, self)
Expand Down
11 changes: 5 additions & 6 deletions R/functions__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -500,18 +500,17 @@ pl_max = function(...) {
#' @examples
#' df = pl$DataFrame(
#' a = NA_real_,
#' b = c(1:2, NA_real_, NA_real_),
#' c = c(1:3, NA_real_)
#' b = c(1L, 4L, NA_real_, NA_real_),
#' c = c(2:4, NA_real_)
#' )
#'
#' # use coalesce to get first non Null value for each row, otherwise insert 99.9
#' df$with_columns(
#' pl$coalesce("a", "b", "c", 99.9)$alias("d")
#' )
#'
pl_coalesce = function(...) {
column = list2(...)
pra = do.call(construct_ProtoExprArray, column)
coalesce_exprs(pra)
coalesce_exprs(list2(...)) |>
unwrap("in pl$coalesce():")
}


Expand Down
5 changes: 3 additions & 2 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -1089,8 +1089,9 @@ LazyFrame_tail = function(n = 5L) {
#' tmp$drop_nulls("mpg")$collect()$height
#' tmp$drop_nulls(c("mpg", "hp"))$collect()$height
LazyFrame_drop_nulls = function(subset = NULL) {
pra = do.call(construct_ProtoExprArray, as.list(subset))
.pr$LazyFrame$drop_nulls(self, pra)
if (!is.null(subset)) subset = as.list(subset)
.pr$LazyFrame$drop_nulls(self, subset) |>
unwrap("in $drop_nulls():")
}

#' @inherit DataFrame_unique title description params
Expand Down
18 changes: 16 additions & 2 deletions man/Expr_over.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/pl_coalesce.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 7 additions & 6 deletions src/rust/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,13 +320,14 @@ impl RPolarsLazyFrame {
RPolarsLazyFrame(new_df)
}

fn drop_nulls(&self, subset: &RPolarsProtoExprArray) -> RPolarsLazyFrame {
if subset.0.is_empty() {
RPolarsLazyFrame(self.0.clone().drop_nulls(None))
fn drop_nulls(&self, subset: Robj) -> RResult<Self> {
let subset = robj_to!(Option, VecPLExprCol, subset)?;
let out = if subset.is_some() {
RPolarsLazyFrame(self.0.clone().drop_nulls(subset))
} else {
let vec = pra_to_vec(subset, "select");
RPolarsLazyFrame(self.0.clone().drop_nulls(Some(vec)))
}
RPolarsLazyFrame(self.0.clone().drop_nulls(None))
};
Ok(out.into())
}

fn unique(&self, subset: Robj, keep: Robj, maintain_order: Robj) -> RResult<RPolarsLazyFrame> {
Expand Down
55 changes: 9 additions & 46 deletions src/rust/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1774,10 +1774,15 @@ impl RPolarsExpr {
Ok(self.0.clone().pow(robj_to!(PLExpr, exponent)?).into())
}

//expr "funnies"
pub fn over(&self, proto_exprs: &RPolarsProtoExprArray) -> Self {
let ve = pra_to_vec(proto_exprs, "select");
self.0.clone().over(ve).into()
pub fn over(&self, expr: Robj, mapping: Robj) -> RResult<Self> {
Ok(self
.0
.clone()
.over_with_options(
robj_to!(VecPLExprCol, expr)?,
robj_to!(WindowMapping, mapping)?,
)
.into())
}

pub fn print(&self) {
Expand Down Expand Up @@ -2617,47 +2622,6 @@ impl ProtoRexpr {
}
}

//and array of expression or proto expressions.
#[derive(Clone, Debug)]
pub struct RPolarsProtoExprArray(pub Vec<ProtoRexpr>);

impl Default for RPolarsProtoExprArray {
fn default() -> Self {
Self::new()
}
}

#[extendr]
impl RPolarsProtoExprArray {
pub fn new() -> Self {
RPolarsProtoExprArray(Vec::new())
}

pub fn push_back_str(&mut self, s: &str) {
self.0.push(ProtoRexpr::new_str(s));
}

pub fn push_back_rexpr(&mut self, r: &RPolarsExpr) {
self.0.push(ProtoRexpr::new_expr(r));
}

pub fn print(&self) {
rprintln!("{:#?}", self);
}
}

impl RPolarsProtoExprArray {
pub fn to_vec(&self, context: &str) -> Vec<pl::Expr> {
self.0.iter().map(|re| re.to_rexpr(context).0).collect()
}
}

//external function as extendr-api do not allow methods returning unwrapped structs
//deprecate use method instead
pub fn pra_to_vec(pra: &RPolarsProtoExprArray, context: &str) -> Vec<pl::Expr> {
pra.0.iter().map(|re| re.to_rexpr(context).0).collect()
}

//make options rolling options from R friendly arguments, handle conversion errors
pub fn make_rolling_options(
window_size: Robj,
Expand Down Expand Up @@ -2711,7 +2675,6 @@ pub fn create_cols_from_datatypes(list_of_dtypes: Robj) -> RResult<RPolarsExpr>
extendr_module! {
mod dsl;
impl RPolarsExpr;
impl RPolarsProtoExprArray;
fn internal_wrap_e;
fn create_col;
fn create_cols_from_strs;
Expand Down
12 changes: 12 additions & 0 deletions src/rust/src/rdatatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,18 @@ pub fn robj_to_non_existent(robj: Robj) -> RResult<pl::NonExistent> {
}
}

pub fn robj_to_window_mapping(robj: Robj) -> RResult<pl::WindowMapping> {
use pl::WindowMapping as WM;
match robj_to_rchoice(robj)?.to_lowercase().as_str() {
"group_to_rows" => Ok(WM::GroupsToRows),
"join" => Ok(WM::Join),
"explode" => Ok(WM::Explode),
s => rerr().bad_val(format!(
"WindowMapping choice ('{s}') must be one of 'group_to_rows', 'join', 'explode'"
)),
}
}

pub fn literal_to_any_value(litval: pl::LiteralValue) -> RResult<pl::AnyValue<'static>> {
use pl::AnyValue as av;
use pl::LiteralValue as lv;
Expand Down
7 changes: 3 additions & 4 deletions src/rust/src/rlib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::lazy::dsl::{RPolarsExpr, RPolarsProtoExprArray};
use crate::lazy::dsl::RPolarsExpr;
use crate::rdataframe::RPolarsDataFrame;
use crate::robj_to;
use crate::series::RPolarsSeries;
Expand Down Expand Up @@ -65,9 +65,8 @@ fn any_horizontal(dotdotdot: Robj) -> RResult<RPolarsExpr> {
}

#[extendr]
fn coalesce_exprs(exprs: &RPolarsProtoExprArray) -> RPolarsExpr {
let exprs: Vec<pl::Expr> = exprs.to_vec("select");
pl::coalesce(exprs.as_slice()).into()
fn coalesce_exprs(exprs: Robj) -> RResult<RPolarsExpr> {
Ok(pl::coalesce(&robj_to!(VecPLExprCol, exprs)?).into())
}

#[extendr]
Expand Down
4 changes: 4 additions & 0 deletions src/rust/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,10 @@ macro_rules! robj_to_inner {
(NonExistent, $a:ident) => {
$crate::rdatatype::robj_to_non_existent($a)
};

(WindowMapping, $a:ident) => {
$crate::rdatatype::robj_to_window_mapping($a)
};
}

//convert any Robj to appropriate rust type with informative error Strings
Expand Down
Loading

0 comments on commit ea39e00

Please sign in to comment.