From b98ac3a08c285316290077146a90553f907daa9c Mon Sep 17 00:00:00 2001 From: sorhawell Date: Fri, 13 Oct 2023 21:42:28 +0200 Subject: [PATCH 01/14] quote some {} in doc text --- R/expr__expr.R | 28 ++++++++++++++-------------- R/series__series.R | 12 ++++++------ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/R/expr__expr.R b/R/expr__expr.R index 06a0c0ca2..8888cc02d 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -1373,7 +1373,7 @@ Expr_rechunk = "use_extendr_wrapper" #' @aliases Expr_cumsum #' @name Expr_cumsum #' @details -#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to #' Int64 before summing to prevent overflow issues. #' @format NULL #' @examples @@ -1394,7 +1394,7 @@ Expr_cumsum = function(reverse = FALSE) { #' @aliases cumprod #' @name Expr_cumprod #' @details -#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to #' Int64 before summing to prevent overflow issues. #' #' @format NULL @@ -1415,7 +1415,7 @@ Expr_cumprod = function(reverse = FALSE) { #' @aliases cummin #' @name Expr_cummin #' @details -#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to #' Int64 before summing to prevent overflow issues. #' #' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} @@ -1437,7 +1437,7 @@ Expr_cummin = function(reverse = FALSE) { #' @aliases cummin #' @name Expr_cummin #' @details -#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to #' Int64 before summing to prevent overflow issues. #' #' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} @@ -1460,7 +1460,7 @@ Expr_cummax = function(reverse = FALSE) { #' @aliases cumcount #' @name Expr_cumcount #' @details -#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to #' Int64 before summing to prevent overflow issues. #' #' cumcount does not seem to count within lists. @@ -2065,7 +2065,7 @@ Expr_nan_min = "use_extendr_wrapper" #' Get sum value #' #' @details -#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to #' Int64 before summing to prevent overflow issues. #' #' @return Expr @@ -2664,7 +2664,7 @@ Expr_reinterpret = function(signed = TRUE) { #' The printing will happen when the expression evaluates, not when it is formed. #' @param fmt format string, should contain one set of `{}` where object will be printed #' This formatting mimics python "string".format() use in pypolars. The string can -#' contain any thing but should have exactly one set of curly bracket {}. +#' contain any thing but should have exactly one set of curly bracket `{}`. #' @return Expr #' @aliases inspect #' @examples @@ -2782,7 +2782,7 @@ prepare_rolling_window_args = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed : {'left', 'right', 'both', 'none'} +#' @param closed String options `{'left', 'right', 'both', 'none'}` #' Define whether the temporal window interval is closed or not. #' #' @@ -2848,7 +2848,7 @@ Expr_rolling_min = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed : {'left', 'right', 'both', 'none'} +#' @param closed String options `{'left', 'right', 'both', 'none'}` #' Define whether the temporal window interval is closed or not. #' #' @@ -2914,7 +2914,7 @@ Expr_rolling_max = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed : {'left', 'right', 'both', 'none'} +#' @param closed String options `{'left', 'right', 'both', 'none'}` #' Define whether the temporal window interval is closed or not. #' @details #' This functionality is experimental and may change without it being considered a @@ -2980,7 +2980,7 @@ Expr_rolling_mean = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed : {'left', 'right', 'both', 'none'} +#' @param closed String options `{'left', 'right', 'both', 'none'}` #' Define whether the temporal window interval is closed or not. #' @details #' This functionality is experimental and may change without it being considered a @@ -3111,7 +3111,7 @@ Expr_rolling_std = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed : {'left', 'right', 'both', 'none'} +#' @param closed String options `{'left', 'right', 'both', 'none'}` #' Define whether the temporal window interval is closed or not. #' #' @@ -3177,7 +3177,7 @@ Expr_rolling_var = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed : {'left', 'right', 'both', 'none'} +#' @param closed String options `{'left', 'right', 'both', 'none'}` #' Define whether the temporal window interval is closed or not. #' #' @@ -3248,7 +3248,7 @@ Expr_rolling_median = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed : {'left', 'right', 'both', 'none'} +#' @param closed String options `{'left', 'right', 'both', 'none'}` #' Define whether the temporal window interval is closed or not. #' #' diff --git a/R/series__series.R b/R/series__series.R index e34b57c77..8fde08f8a 100644 --- a/R/series__series.R +++ b/R/series__series.R @@ -572,7 +572,7 @@ Series_clone = "use_extendr_wrapper" #' @aliases Series_cumsum #' @name Series_cumsum #' @details -#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to #' Int64 before summing to prevent overflow issues. #' @examples #' pl$Series(c(1:2, NA, 3, NaN, 4, Inf))$cumsum() @@ -586,7 +586,7 @@ Series_cumsum = function(reverse = FALSE) { #' @return R scalar value #' @keywords Series #' @details -#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to #' Int64 before summing to prevent overflow issues. #' @examples #' pl$Series(c(1:2, NA, 3, 5))$sum() # a NA is dropped always @@ -601,7 +601,7 @@ Series_sum = function() { #' @return R scalar value #' @keywords Series #' @details -#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to #' Int64 before meanming to prevent overflow issues. #' @examples #' pl$Series(c(1:2, NA, 3, 5))$mean() # a NA is dropped always @@ -616,7 +616,7 @@ Series_mean = function() { #' @return R scalar value #' @keywords Series #' @details -#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to #' Int64 before medianming to prevent overflow issues. #' @examples #' pl$Series(c(1:2, NA, 3, 5))$median() # a NA is dropped always @@ -631,7 +631,7 @@ Series_median = function() { #' @return R scalar value #' @keywords Series #' @details -#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to #' Int64 before maxming to prevent overflow issues. #' @examples #' pl$Series(c(1:2, NA, 3, 5))$max() # a NA is dropped always @@ -646,7 +646,7 @@ Series_max = function() { #' @return R scalar value #' @keywords Series #' @details -#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to #' Int64 before taking the min to prevent overflow issues. #' @examples #' pl$Series(c(1:2, NA, 3, 5))$min() # a NA is dropped always From a9986353a64c5d6a8d8cbf9051e120a531ca8ee8 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Fri, 13 Oct 2023 23:04:39 +0200 Subject: [PATCH 02/14] roxygen --- man/Expr_cumcount.Rd | 2 +- man/Expr_cummin.Rd | 4 ++-- man/Expr_cumprod.Rd | 2 +- man/Expr_cumsum.Rd | 2 +- man/Expr_inspect.Rd | 2 +- man/Expr_rolling_max.Rd | 2 +- man/Expr_rolling_mean.Rd | 2 +- man/Expr_rolling_median.Rd | 2 +- man/Expr_rolling_min.Rd | 2 +- man/Expr_rolling_quantile.Rd | 2 +- man/Expr_rolling_sum.Rd | 2 +- man/Expr_rolling_var.Rd | 2 +- man/Expr_sum.Rd | 2 +- man/Series_cumsum.Rd | 2 +- man/Series_max.Rd | 2 +- man/Series_mean.Rd | 2 +- man/Series_median.Rd | 2 +- man/Series_min.Rd | 2 +- man/Series_sum.Rd | 2 +- 19 files changed, 20 insertions(+), 20 deletions(-) diff --git a/man/Expr_cumcount.Rd b/man/Expr_cumcount.Rd index 0db25280e..4ae4ff14d 100644 --- a/man/Expr_cumcount.Rd +++ b/man/Expr_cumcount.Rd @@ -18,7 +18,7 @@ Get an array with the cumulative count computed at every element. Counting from 0 to len } \details{ -Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to Int64 before summing to prevent overflow issues. cumcount does not seem to count within lists. diff --git a/man/Expr_cummin.Rd b/man/Expr_cummin.Rd index 62ad143c8..febc059f2 100644 --- a/man/Expr_cummin.Rd +++ b/man/Expr_cummin.Rd @@ -24,12 +24,12 @@ Get an array with the cumulative min computed at every element. Get an array with the cumulative max computed at every element. } \details{ -Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to Int64 before summing to prevent overflow issues. See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to Int64 before summing to prevent overflow issues. See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} diff --git a/man/Expr_cumprod.Rd b/man/Expr_cumprod.Rd index 27a8af3fb..bf101a40e 100644 --- a/man/Expr_cumprod.Rd +++ b/man/Expr_cumprod.Rd @@ -17,7 +17,7 @@ Expr Get an array with the cumulative product computed at every element. } \details{ -Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to Int64 before summing to prevent overflow issues. } \examples{ diff --git a/man/Expr_cumsum.Rd b/man/Expr_cumsum.Rd index c21021528..0d6786e94 100644 --- a/man/Expr_cumsum.Rd +++ b/man/Expr_cumsum.Rd @@ -16,7 +16,7 @@ Expr Get an array with the cumulative sum computed at every element. } \details{ -Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to Int64 before summing to prevent overflow issues. } \examples{ diff --git a/man/Expr_inspect.Rd b/man/Expr_inspect.Rd index bd45bc60b..f5c7807ed 100644 --- a/man/Expr_inspect.Rd +++ b/man/Expr_inspect.Rd @@ -10,7 +10,7 @@ Expr_inspect(fmt = "{}") \arguments{ \item{fmt}{format string, should contain one set of \code{{}} where object will be printed This formatting mimics python "string".format() use in pypolars. The string can -contain any thing but should have exactly one set of curly bracket {}.} +contain any thing but should have exactly one set of curly bracket \code{{}}.} } \value{ Expr diff --git a/man/Expr_rolling_max.Rd b/man/Expr_rolling_max.Rd index 9a9791af9..a5d50fd77 100644 --- a/man/Expr_rolling_max.Rd +++ b/man/Expr_rolling_max.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{: {'left', 'right', 'both', 'none'} +\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_mean.Rd b/man/Expr_rolling_mean.Rd index e2e3e0401..89d413520 100644 --- a/man/Expr_rolling_mean.Rd +++ b/man/Expr_rolling_mean.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{: {'left', 'right', 'both', 'none'} +\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_median.Rd b/man/Expr_rolling_median.Rd index 157a9b2f7..a5f2c4365 100644 --- a/man/Expr_rolling_median.Rd +++ b/man/Expr_rolling_median.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{: {'left', 'right', 'both', 'none'} +\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_min.Rd b/man/Expr_rolling_min.Rd index a223eeedc..633f90362 100644 --- a/man/Expr_rolling_min.Rd +++ b/man/Expr_rolling_min.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{: {'left', 'right', 'both', 'none'} +\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_quantile.Rd b/man/Expr_rolling_quantile.Rd index 95ad33667..e5284056c 100644 --- a/man/Expr_rolling_quantile.Rd +++ b/man/Expr_rolling_quantile.Rd @@ -50,7 +50,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{: {'left', 'right', 'both', 'none'} +\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_sum.Rd b/man/Expr_rolling_sum.Rd index 4fda3efe1..5e1db9a98 100644 --- a/man/Expr_rolling_sum.Rd +++ b/man/Expr_rolling_sum.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{: {'left', 'right', 'both', 'none'} +\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_var.Rd b/man/Expr_rolling_var.Rd index 3d0bd8850..b9bbdf487 100644 --- a/man/Expr_rolling_var.Rd +++ b/man/Expr_rolling_var.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{: {'left', 'right', 'both', 'none'} +\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_sum.Rd b/man/Expr_sum.Rd index 68c84cb34..2a3ce301b 100644 --- a/man/Expr_sum.Rd +++ b/man/Expr_sum.Rd @@ -13,7 +13,7 @@ Expr Get sum value } \details{ -Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to Int64 before summing to prevent overflow issues. } \examples{ diff --git a/man/Series_cumsum.Rd b/man/Series_cumsum.Rd index b5668604e..df997c19d 100644 --- a/man/Series_cumsum.Rd +++ b/man/Series_cumsum.Rd @@ -16,7 +16,7 @@ Series Get an array with the cumulative sum computed at every element. } \details{ -Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to Int64 before summing to prevent overflow issues. } \examples{ diff --git a/man/Series_max.Rd b/man/Series_max.Rd index 873fa3f72..492108215 100644 --- a/man/Series_max.Rd +++ b/man/Series_max.Rd @@ -13,7 +13,7 @@ R scalar value Reduce Series with max } \details{ -Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to Int64 before maxming to prevent overflow issues. } \examples{ diff --git a/man/Series_mean.Rd b/man/Series_mean.Rd index 9f1f4fba0..89b258cd9 100644 --- a/man/Series_mean.Rd +++ b/man/Series_mean.Rd @@ -13,7 +13,7 @@ R scalar value Reduce Series with mean } \details{ -Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to Int64 before meanming to prevent overflow issues. } \examples{ diff --git a/man/Series_median.Rd b/man/Series_median.Rd index d02454fa0..1c7302f5f 100644 --- a/man/Series_median.Rd +++ b/man/Series_median.Rd @@ -13,7 +13,7 @@ R scalar value Reduce Series with median } \details{ -Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to Int64 before medianming to prevent overflow issues. } \examples{ diff --git a/man/Series_min.Rd b/man/Series_min.Rd index d8602f1af..dab6f5015 100644 --- a/man/Series_min.Rd +++ b/man/Series_min.Rd @@ -13,7 +13,7 @@ R scalar value Reduce Series with min } \details{ -Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to Int64 before taking the min to prevent overflow issues. } \examples{ diff --git a/man/Series_sum.Rd b/man/Series_sum.Rd index 55ae68b60..ea6b86239 100644 --- a/man/Series_sum.Rd +++ b/man/Series_sum.Rd @@ -13,7 +13,7 @@ R scalar value Reduce Series with sum } \details{ -Dtypes in {Int8, UInt8, Int16, UInt16} are cast to +Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to Int64 before summing to prevent overflow issues. } \examples{ From f74524a4fea4e0c23bb9aa2e28796672ed509903 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Fri, 13 Oct 2023 23:32:44 +0200 Subject: [PATCH 03/14] one extra pair of braces --- R/expr__expr.R | 2 +- man/Expr_rolling_std.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/expr__expr.R b/R/expr__expr.R index 8888cc02d..e0060c21b 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -3045,7 +3045,7 @@ Expr_rolling_sum = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed : {'left', 'right', 'both', 'none'} +#' @param closed : `{'left', 'right', 'both', 'none'}` #' Define whether the temporal window interval is closed or not. #' #' diff --git a/man/Expr_rolling_std.Rd b/man/Expr_rolling_std.Rd index dec916d0a..bad50284a 100644 --- a/man/Expr_rolling_std.Rd +++ b/man/Expr_rolling_std.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{: {'left', 'right', 'both', 'none'} +\item{closed}{: \verb{\{'left', 'right', 'both', 'none'\}} Define whether the temporal window interval is closed or not.} } \value{ From 45eb76638518c2fc23f6f16b07ffc9e0ff2d9847 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Sat, 14 Oct 2023 12:40:47 +0200 Subject: [PATCH 04/14] try remove braces --- R/expr__expr.R | 15 ++++++++------- man/Expr_rolling_std.Rd | 8 ++++---- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/R/expr__expr.R b/R/expr__expr.R index e0060c21b..23ad981ed 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -3042,10 +3042,10 @@ Expr_rolling_sum = function( #' @param center #' Set the labels at the center of the window #' @param by -#' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must +#' If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must #' set the column that will be used to determine the windows. This column must -#' be of dtype `{Date, Datetime}` -#' @param closed : `{'left', 'right', 'both', 'none'}` +#' be of DataType: Date or DateTime. +#' @param closed string option `c("left", "right", "both", "none")`. #' Define whether the temporal window interval is closed or not. #' #' @@ -3066,13 +3066,14 @@ Expr_rolling_std = function( min_periods = NULL, center = FALSE, # :bool, by = NULL, # : Nullable, - closed = "left" # ;: Nullable, + closed = c("left", "right", "both", "none") ) { wargs = prepare_rolling_window_args(window_size, min_periods) - unwrap(.pr$Expr$rolling_std( + .pr$Expr$rolling_std( self, wargs$window_size, weights, - wargs$min_periods, center, by, closed - )) + wargs$min_periods, center, by, closed[1L] + ) |> + unwrap("in $rolling_std(): ") } #' Rolling var diff --git a/man/Expr_rolling_std.Rd b/man/Expr_rolling_std.Rd index bad50284a..b42617e09 100644 --- a/man/Expr_rolling_std.Rd +++ b/man/Expr_rolling_std.Rd @@ -10,7 +10,7 @@ Expr_rolling_std( min_periods = NULL, center = FALSE, by = NULL, - closed = "left" + closed = c("left", "right", "both", "none") ) } \arguments{ @@ -40,11 +40,11 @@ a result. If None, it will be set equal to window size.} \item{center}{Set the labels at the center of the window} -\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \verb{"3s}, you must +\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you must set the column that will be used to determine the windows. This column must -be of dtype \verb{\{Date, Datetime\}}} +be of DataType: Date or DateTime.} -\item{closed}{: \verb{\{'left', 'right', 'both', 'none'\}} +\item{closed}{string option \code{c("left", "right", "both", "none")}. Define whether the temporal window interval is closed or not.} } \value{ From 87e5693f382007567005db0320faa981ab6de776 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Tue, 17 Oct 2023 00:52:51 +0200 Subject: [PATCH 05/14] robj_to_roption --- R/expr__expr.R | 2 +- R/extendr-wrappers.R | 2 ++ src/rust/src/rdatatype.rs | 11 +++++------ src/rust/src/rlib.rs | 10 ++++++++-- src/rust/src/utils/mod.rs | 26 +++++++++++++++++++++++++- tests/testthat/test-datatype.R | 16 ++++++++++++++++ 6 files changed, 57 insertions(+), 10 deletions(-) diff --git a/R/expr__expr.R b/R/expr__expr.R index 23ad981ed..918a4bd83 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -3071,7 +3071,7 @@ Expr_rolling_std = function( wargs = prepare_rolling_window_args(window_size, min_periods) .pr$Expr$rolling_std( self, wargs$window_size, weights, - wargs$min_periods, center, by, closed[1L] + wargs$min_periods, center, by, closed ) |> unwrap("in $rolling_std(): ") } diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 0a2645912..88f7c16d3 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -65,6 +65,8 @@ test_robj_to_expr <- function(robj) .Call(wrap__test_robj_to_expr, robj) test_wrong_call_pl_lit <- function(robj) .Call(wrap__test_wrong_call_pl_lit, robj) +test_robj_to_roption <- function(robj) .Call(wrap__test_robj_to_roption, robj) + polars_features <- function() .Call(wrap__polars_features) concat_lf <- function(l, rechunk, parallel, to_supertypes) .Call(wrap__concat_lf, l, rechunk, parallel, to_supertypes) diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index cc3a41252..9e72cf0e4 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -1,6 +1,6 @@ use crate::robj_to; use crate::utils::wrappers::Wrap; -use crate::utils::{r_result_list, robj_to_string}; +use crate::utils::{r_result_list, robj_to_roption, robj_to_string}; use extendr_api::prelude::*; use polars::prelude as pl; use polars_core::prelude::QuantileInterpolOptions; @@ -297,16 +297,15 @@ pub fn new_quantile_interpolation_option(robj: Robj) -> RResult RResult { - let s = robj_to_string(robj.clone())?; use pl::ClosedWindow as CW; - match s.as_str() { + match robj_to_roption(robj)?.as_str() { "both" => Ok(CW::Both), "left" => Ok(CW::Left), "none" => Ok(CW::None), "right" => Ok(CW::Right), - _ => rerr() - .bad_val("ClosedWindow choice: [{}] is not any of 'both', 'left', 'none' or 'right'") - .bad_robj(&robj), + s => rerr().bad_val(format!( + "ClosedWindow choice ['{s}'] is not any of 'both', 'left', 'none' or 'right'" + )), } } diff --git a/src/rust/src/rlib.rs b/src/rust/src/rlib.rs index 1d90cd526..9314db0f4 100644 --- a/src/rust/src/rlib.rs +++ b/src/rust/src/rlib.rs @@ -5,12 +5,11 @@ use crate::robj_to; use crate::rpolarserr::{rdbg, RResult}; use crate::series::Series; use crate::utils::extendr_concurrent::{ParRObj, ThreadCom}; +use crate::utils::robj_to_roption; use crate::RFnSignature; use crate::CONFIG; use extendr_api::prelude::*; use polars::prelude as pl; -use polars_core::functions as pl_functions; - use std::result::Result; #[extendr] @@ -222,6 +221,12 @@ fn test_wrong_call_pl_lit(robj: Robj) -> RResult { Ok(R!("pl$lit({{robj}})")?) // this call should have been polars::pl$lit(... } +#[extendr] +fn test_robj_to_roption(robj: Robj) -> RResult { + // robj can be any non-zero length char vec, will return first string. + robj_to_roption(robj) +} + #[extendr] fn polars_features() -> List { list!( @@ -302,6 +307,7 @@ extendr_module! { fn test_print_string; fn test_robj_to_expr; fn test_wrong_call_pl_lit; + fn test_robj_to_roption; //feature flags fn polars_features; diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index aa28bb6eb..df1f993c1 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -8,16 +8,19 @@ use crate::lazy::dsl::Expr; use crate::rdatatype::RPolarsDataType; use crate::rpolarserr::{polars_to_rpolars_err, rdbg, rerr, RPolarsErr, RResult, WithRctx}; use crate::series::Series; -use extendr_api::prelude::list; + use std::any::type_name as tn; //use std::intrinsics::read_via_copy; use crate::lazy::dsl::robj_to_col; use crate::rdataframe::{DataFrame, LazyFrame}; use extendr_api::eval_string_with_params; +use extendr_api::prelude::{list, Result as EResult, Strings}; use extendr_api::Attributes; +use extendr_api::CanBeNA; use extendr_api::ExternalPtr; use extendr_api::Result as ExtendrResult; use extendr_api::R; + use polars::prelude as pl; //macro to translate polars NULLs and emulate R NA value of any type @@ -531,6 +534,27 @@ pub fn robj_to_str<'a>(robj: extendr_api::Robj) -> RResult<&'a str> { } } +// this conversion exists to support R option char vec e.g. c("a", "b"), it will only get "a" and igonore the rest +// conversion error if not a char vec or zero length +pub fn robj_to_roption(robj: extendr_api::Robj) -> RResult { + let robj = unpack_r_result_list(robj)?; + let robj_clone = robj.clone(); + let s_res: EResult = robj.try_into(); + let opt_str = s_res.map(|s| s.iter().next().map(|rstr| rstr.clone())); + match opt_str { + Ok(Some(rstr)) if rstr.is_na() => { + Err(RPolarsErr::new().plain("an R option/choice should not be a NA_character".into())) + } + Ok(Some(rstr)) => Ok(rstr.to_string()), + Err(_) => { + Err(RPolarsErr::new().plain("an R option/choice should be a character vector".into())) + } + Ok(None) => Err(RPolarsErr::new() + .plain("an R option/choice character vector cannot have zero length".into())), + } + .map_err(|err| err.bad_robj(robj_clone)) +} + pub fn robj_to_usize(robj: extendr_api::Robj) -> RResult { robj_to_u64(robj).and_then(try_u64_into_usize) } diff --git a/tests/testthat/test-datatype.R b/tests/testthat/test-datatype.R index b524cedfb..8098eeee4 100644 --- a/tests/testthat/test-datatype.R +++ b/tests/testthat/test-datatype.R @@ -32,3 +32,19 @@ test_that("plStruct", { err_state = result(pl$Struct(bin = pl$Binary, pl$Boolean, bob = 42)) }) + + +test_that("robj_to_roption",{ + + + + # gets first value in char vec + expect_identical(test_robj_to_roption(c("a","b"))$ok, "a") + + # gets only string value + expect_identical(test_robj_to_roption(c("a"))$ok, "a") + + # gets + expect_identical(test_robj_to_roption(NA_character_) ) + +}) From 63aa6d0cc7c00484a703fa64c002845ee6b7777b Mon Sep 17 00:00:00 2001 From: sorhawell Date: Fri, 20 Oct 2023 21:31:27 +0200 Subject: [PATCH 06/14] revert braces changes --- R/expr__expr.R | 41 ++++++++++++++++++------------------ R/extendr-wrappers.R | 2 +- R/series__series.R | 12 +++++------ man/Expr_cumcount.Rd | 2 +- man/Expr_cummin.Rd | 4 ++-- man/Expr_cumprod.Rd | 2 +- man/Expr_cumsum.Rd | 2 +- man/Expr_inspect.Rd | 2 +- man/Expr_rolling_max.Rd | 2 +- man/Expr_rolling_mean.Rd | 2 +- man/Expr_rolling_median.Rd | 2 +- man/Expr_rolling_min.Rd | 2 +- man/Expr_rolling_quantile.Rd | 2 +- man/Expr_rolling_std.Rd | 8 +++---- man/Expr_rolling_sum.Rd | 2 +- man/Expr_rolling_var.Rd | 2 +- man/Expr_sum.Rd | 2 +- man/Series_cumsum.Rd | 2 +- man/Series_max.Rd | 2 +- man/Series_mean.Rd | 2 +- man/Series_median.Rd | 2 +- man/Series_min.Rd | 2 +- man/Series_sum.Rd | 2 +- 23 files changed, 51 insertions(+), 52 deletions(-) diff --git a/R/expr__expr.R b/R/expr__expr.R index 8da5ecab7..11558c3a9 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -1373,7 +1373,7 @@ Expr_rechunk = "use_extendr_wrapper" #' @aliases Expr_cumsum #' @name Expr_cumsum #' @details -#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to +#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to #' Int64 before summing to prevent overflow issues. #' @format NULL #' @examples @@ -1394,7 +1394,7 @@ Expr_cumsum = function(reverse = FALSE) { #' @aliases cumprod #' @name Expr_cumprod #' @details -#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to +#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to #' Int64 before summing to prevent overflow issues. #' #' @format NULL @@ -1415,7 +1415,7 @@ Expr_cumprod = function(reverse = FALSE) { #' @aliases cummin #' @name Expr_cummin #' @details -#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to +#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to #' Int64 before summing to prevent overflow issues. #' #' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} @@ -1437,7 +1437,7 @@ Expr_cummin = function(reverse = FALSE) { #' @aliases cummin #' @name Expr_cummin #' @details -#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to +#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to #' Int64 before summing to prevent overflow issues. #' #' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} @@ -1460,7 +1460,7 @@ Expr_cummax = function(reverse = FALSE) { #' @aliases cumcount #' @name Expr_cumcount #' @details -#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to +#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to #' Int64 before summing to prevent overflow issues. #' #' cumcount does not seem to count within lists. @@ -2065,7 +2065,7 @@ Expr_nan_min = "use_extendr_wrapper" #' Get sum value #' #' @details -#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to +#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to #' Int64 before summing to prevent overflow issues. #' #' @return Expr @@ -2664,7 +2664,7 @@ Expr_reinterpret = function(signed = TRUE) { #' The printing will happen when the expression evaluates, not when it is formed. #' @param fmt format string, should contain one set of `{}` where object will be printed #' This formatting mimics python "string".format() use in pypolars. The string can -#' contain any thing but should have exactly one set of curly bracket `{}`. +#' contain any thing but should have exactly one set of curly bracket {}. #' @return Expr #' @aliases inspect #' @examples @@ -2782,7 +2782,7 @@ prepare_rolling_window_args = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed String options `{'left', 'right', 'both', 'none'}` +#' @param closed : {'left', 'right', 'both', 'none'} #' Define whether the temporal window interval is closed or not. #' #' @@ -2848,7 +2848,7 @@ Expr_rolling_min = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed String options `{'left', 'right', 'both', 'none'}` +#' @param closed : {'left', 'right', 'both', 'none'} #' Define whether the temporal window interval is closed or not. #' #' @@ -2914,7 +2914,7 @@ Expr_rolling_max = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed String options `{'left', 'right', 'both', 'none'}` +#' @param closed : {'left', 'right', 'both', 'none'} #' Define whether the temporal window interval is closed or not. #' @details #' This functionality is experimental and may change without it being considered a @@ -2980,7 +2980,7 @@ Expr_rolling_mean = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed String options `{'left', 'right', 'both', 'none'}` +#' @param closed : {'left', 'right', 'both', 'none'} #' Define whether the temporal window interval is closed or not. #' @details #' This functionality is experimental and may change without it being considered a @@ -3042,10 +3042,10 @@ Expr_rolling_sum = function( #' @param center #' Set the labels at the center of the window #' @param by -#' If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must +#' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must -#' be of DataType: Date or DateTime. -#' @param closed string option `c("left", "right", "both", "none")`. +#' be of dtype `{Date, Datetime}` +#' @param closed : {'left', 'right', 'both', 'none'} #' Define whether the temporal window interval is closed or not. #' #' @@ -3066,14 +3066,13 @@ Expr_rolling_std = function( min_periods = NULL, center = FALSE, # :bool, by = NULL, # : Nullable, - closed = c("left", "right", "both", "none") + closed = "left" # ;: Nullable, ) { wargs = prepare_rolling_window_args(window_size, min_periods) - .pr$Expr$rolling_std( + unwrap(.pr$Expr$rolling_std( self, wargs$window_size, weights, wargs$min_periods, center, by, closed - ) |> - unwrap("in $rolling_std(): ") + )) } #' Rolling var @@ -3112,7 +3111,7 @@ Expr_rolling_std = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed String options `{'left', 'right', 'both', 'none'}` +#' @param closed : {'left', 'right', 'both', 'none'} #' Define whether the temporal window interval is closed or not. #' #' @@ -3178,7 +3177,7 @@ Expr_rolling_var = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed String options `{'left', 'right', 'both', 'none'}` +#' @param closed : {'left', 'right', 'both', 'none'} #' Define whether the temporal window interval is closed or not. #' #' @@ -3249,7 +3248,7 @@ Expr_rolling_median = function( #' If the `window_size` is temporal for instance `"5h"` or `"3s`, you must #' set the column that will be used to determine the windows. This column must #' be of dtype `{Date, Datetime}` -#' @param closed String options `{'left', 'right', 'both', 'none'}` +#' @param closed : {'left', 'right', 'both', 'none'} #' Define whether the temporal window interval is closed or not. #' #' diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index d56e7fa23..82a0a0037 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -985,7 +985,7 @@ LazyFrame$sink_parquet <- function(path, compression_method, compression_level, LazyFrame$sink_ipc <- function(path, compression_method, maintain_order) .Call(wrap__LazyFrame__sink_ipc, self, path, compression_method, maintain_order) -LazyFrame$sink_csv <- function(path, has_header, separator, line_terminator, quote, batch_size, datetime_format, date_format, time_format, float_precision, null_values, quote_style, maintain_order) .Call(wrap__LazyFrame__sink_csv, self, path, has_header, separator, line_terminator, quote, batch_size, datetime_format, date_format, time_format, float_precision, null_values, quote_style, maintain_order) +LazyFrame$sink_csv <- function(path, has_header, separator, line_terminator, quote, batch_size, datetime_format, date_format, time_format, float_precision, null_value, quote_style, maintain_order) .Call(wrap__LazyFrame__sink_csv, self, path, has_header, separator, line_terminator, quote, batch_size, datetime_format, date_format, time_format, float_precision, null_value, quote_style, maintain_order) LazyFrame$first <- function() .Call(wrap__LazyFrame__first, self) diff --git a/R/series__series.R b/R/series__series.R index f7896224b..0b602c47f 100644 --- a/R/series__series.R +++ b/R/series__series.R @@ -572,7 +572,7 @@ Series_clone = "use_extendr_wrapper" #' @aliases Series_cumsum #' @name Series_cumsum #' @details -#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to +#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to #' Int64 before summing to prevent overflow issues. #' @examples #' pl$Series(c(1:2, NA, 3, NaN, 4, Inf))$cumsum() @@ -586,7 +586,7 @@ Series_cumsum = function(reverse = FALSE) { #' @return R scalar value #' @keywords Series #' @details -#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to +#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to #' Int64 before summing to prevent overflow issues. #' @examples #' pl$Series(c(1:2, NA, 3, 5))$sum() # a NA is dropped always @@ -601,7 +601,7 @@ Series_sum = function() { #' @return R scalar value #' @keywords Series #' @details -#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to +#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to #' Int64 before meanming to prevent overflow issues. #' @examples #' pl$Series(c(1:2, NA, 3, 5))$mean() # a NA is dropped always @@ -616,7 +616,7 @@ Series_mean = function() { #' @return R scalar value #' @keywords Series #' @details -#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to +#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to #' Int64 before medianming to prevent overflow issues. #' @examples #' pl$Series(c(1:2, NA, 3, 5))$median() # a NA is dropped always @@ -631,7 +631,7 @@ Series_median = function() { #' @return R scalar value #' @keywords Series #' @details -#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to +#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to #' Int64 before maxming to prevent overflow issues. #' @examples #' pl$Series(c(1:2, NA, 3, 5))$max() # a NA is dropped always @@ -646,7 +646,7 @@ Series_max = function() { #' @return R scalar value #' @keywords Series #' @details -#' Dtypes in `{Int8, UInt8, Int16, UInt16}` are cast to +#' Dtypes in {Int8, UInt8, Int16, UInt16} are cast to #' Int64 before taking the min to prevent overflow issues. #' @examples #' pl$Series(c(1:2, NA, 3, 5))$min() # a NA is dropped always diff --git a/man/Expr_cumcount.Rd b/man/Expr_cumcount.Rd index 4ae4ff14d..0db25280e 100644 --- a/man/Expr_cumcount.Rd +++ b/man/Expr_cumcount.Rd @@ -18,7 +18,7 @@ Get an array with the cumulative count computed at every element. Counting from 0 to len } \details{ -Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to +Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues. cumcount does not seem to count within lists. diff --git a/man/Expr_cummin.Rd b/man/Expr_cummin.Rd index febc059f2..62ad143c8 100644 --- a/man/Expr_cummin.Rd +++ b/man/Expr_cummin.Rd @@ -24,12 +24,12 @@ Get an array with the cumulative min computed at every element. Get an array with the cumulative max computed at every element. } \details{ -Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to +Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues. See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to +Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues. See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} diff --git a/man/Expr_cumprod.Rd b/man/Expr_cumprod.Rd index bf101a40e..27a8af3fb 100644 --- a/man/Expr_cumprod.Rd +++ b/man/Expr_cumprod.Rd @@ -17,7 +17,7 @@ Expr Get an array with the cumulative product computed at every element. } \details{ -Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to +Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues. } \examples{ diff --git a/man/Expr_cumsum.Rd b/man/Expr_cumsum.Rd index 0d6786e94..c21021528 100644 --- a/man/Expr_cumsum.Rd +++ b/man/Expr_cumsum.Rd @@ -16,7 +16,7 @@ Expr Get an array with the cumulative sum computed at every element. } \details{ -Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to +Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues. } \examples{ diff --git a/man/Expr_inspect.Rd b/man/Expr_inspect.Rd index f5c7807ed..bd45bc60b 100644 --- a/man/Expr_inspect.Rd +++ b/man/Expr_inspect.Rd @@ -10,7 +10,7 @@ Expr_inspect(fmt = "{}") \arguments{ \item{fmt}{format string, should contain one set of \code{{}} where object will be printed This formatting mimics python "string".format() use in pypolars. The string can -contain any thing but should have exactly one set of curly bracket \code{{}}.} +contain any thing but should have exactly one set of curly bracket {}.} } \value{ Expr diff --git a/man/Expr_rolling_max.Rd b/man/Expr_rolling_max.Rd index a5d50fd77..9a9791af9 100644 --- a/man/Expr_rolling_max.Rd +++ b/man/Expr_rolling_max.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} +\item{closed}{: {'left', 'right', 'both', 'none'} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_mean.Rd b/man/Expr_rolling_mean.Rd index 89d413520..e2e3e0401 100644 --- a/man/Expr_rolling_mean.Rd +++ b/man/Expr_rolling_mean.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} +\item{closed}{: {'left', 'right', 'both', 'none'} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_median.Rd b/man/Expr_rolling_median.Rd index a5f2c4365..157a9b2f7 100644 --- a/man/Expr_rolling_median.Rd +++ b/man/Expr_rolling_median.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} +\item{closed}{: {'left', 'right', 'both', 'none'} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_min.Rd b/man/Expr_rolling_min.Rd index 633f90362..a223eeedc 100644 --- a/man/Expr_rolling_min.Rd +++ b/man/Expr_rolling_min.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} +\item{closed}{: {'left', 'right', 'both', 'none'} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_quantile.Rd b/man/Expr_rolling_quantile.Rd index e5284056c..95ad33667 100644 --- a/man/Expr_rolling_quantile.Rd +++ b/man/Expr_rolling_quantile.Rd @@ -50,7 +50,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} +\item{closed}{: {'left', 'right', 'both', 'none'} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_std.Rd b/man/Expr_rolling_std.Rd index b42617e09..dec916d0a 100644 --- a/man/Expr_rolling_std.Rd +++ b/man/Expr_rolling_std.Rd @@ -10,7 +10,7 @@ Expr_rolling_std( min_periods = NULL, center = FALSE, by = NULL, - closed = c("left", "right", "both", "none") + closed = "left" ) } \arguments{ @@ -40,11 +40,11 @@ a result. If None, it will be set equal to window size.} \item{center}{Set the labels at the center of the window} -\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you must +\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \verb{"3s}, you must set the column that will be used to determine the windows. This column must -be of DataType: Date or DateTime.} +be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{string option \code{c("left", "right", "both", "none")}. +\item{closed}{: {'left', 'right', 'both', 'none'} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_sum.Rd b/man/Expr_rolling_sum.Rd index 5e1db9a98..4fda3efe1 100644 --- a/man/Expr_rolling_sum.Rd +++ b/man/Expr_rolling_sum.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} +\item{closed}{: {'left', 'right', 'both', 'none'} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_rolling_var.Rd b/man/Expr_rolling_var.Rd index b9bbdf487..3d0bd8850 100644 --- a/man/Expr_rolling_var.Rd +++ b/man/Expr_rolling_var.Rd @@ -44,7 +44,7 @@ a result. If None, it will be set equal to window size.} set the column that will be used to determine the windows. This column must be of dtype \verb{\{Date, Datetime\}}} -\item{closed}{String options \verb{\{'left', 'right', 'both', 'none'\}} +\item{closed}{: {'left', 'right', 'both', 'none'} Define whether the temporal window interval is closed or not.} } \value{ diff --git a/man/Expr_sum.Rd b/man/Expr_sum.Rd index 2a3ce301b..68c84cb34 100644 --- a/man/Expr_sum.Rd +++ b/man/Expr_sum.Rd @@ -13,7 +13,7 @@ Expr Get sum value } \details{ -Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to +Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues. } \examples{ diff --git a/man/Series_cumsum.Rd b/man/Series_cumsum.Rd index df997c19d..b5668604e 100644 --- a/man/Series_cumsum.Rd +++ b/man/Series_cumsum.Rd @@ -16,7 +16,7 @@ Series Get an array with the cumulative sum computed at every element. } \details{ -Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to +Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues. } \examples{ diff --git a/man/Series_max.Rd b/man/Series_max.Rd index 492108215..873fa3f72 100644 --- a/man/Series_max.Rd +++ b/man/Series_max.Rd @@ -13,7 +13,7 @@ R scalar value Reduce Series with max } \details{ -Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to +Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before maxming to prevent overflow issues. } \examples{ diff --git a/man/Series_mean.Rd b/man/Series_mean.Rd index 89b258cd9..9f1f4fba0 100644 --- a/man/Series_mean.Rd +++ b/man/Series_mean.Rd @@ -13,7 +13,7 @@ R scalar value Reduce Series with mean } \details{ -Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to +Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before meanming to prevent overflow issues. } \examples{ diff --git a/man/Series_median.Rd b/man/Series_median.Rd index 1c7302f5f..d02454fa0 100644 --- a/man/Series_median.Rd +++ b/man/Series_median.Rd @@ -13,7 +13,7 @@ R scalar value Reduce Series with median } \details{ -Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to +Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before medianming to prevent overflow issues. } \examples{ diff --git a/man/Series_min.Rd b/man/Series_min.Rd index dab6f5015..d8602f1af 100644 --- a/man/Series_min.Rd +++ b/man/Series_min.Rd @@ -13,7 +13,7 @@ R scalar value Reduce Series with min } \details{ -Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to +Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before taking the min to prevent overflow issues. } \examples{ diff --git a/man/Series_sum.Rd b/man/Series_sum.Rd index ea6b86239..55ae68b60 100644 --- a/man/Series_sum.Rd +++ b/man/Series_sum.Rd @@ -13,7 +13,7 @@ R scalar value Reduce Series with sum } \details{ -Dtypes in \verb{\{Int8, UInt8, Int16, UInt16\}} are cast to +Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues. } \examples{ From f497d08e411f8873d2c48271fb5d9b87e6d231b1 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Fri, 20 Oct 2023 21:53:20 +0200 Subject: [PATCH 07/14] move utest to new file --- tests/testthat/test-datatype.R | 13 ------------- tests/testthat/test-robj_to_option.R | 12 ++++++++++++ 2 files changed, 12 insertions(+), 13 deletions(-) create mode 100644 tests/testthat/test-robj_to_option.R diff --git a/tests/testthat/test-datatype.R b/tests/testthat/test-datatype.R index 8098eeee4..e6f685fbe 100644 --- a/tests/testthat/test-datatype.R +++ b/tests/testthat/test-datatype.R @@ -34,17 +34,4 @@ test_that("plStruct", { }) -test_that("robj_to_roption",{ - - - # gets first value in char vec - expect_identical(test_robj_to_roption(c("a","b"))$ok, "a") - - # gets only string value - expect_identical(test_robj_to_roption(c("a"))$ok, "a") - - # gets - expect_identical(test_robj_to_roption(NA_character_) ) - -}) diff --git a/tests/testthat/test-robj_to_option.R b/tests/testthat/test-robj_to_option.R new file mode 100644 index 000000000..b500a3bd1 --- /dev/null +++ b/tests/testthat/test-robj_to_option.R @@ -0,0 +1,12 @@ +test_that("robj_to_roption",{ + + # gets first value in char vec + expect_identical(test_robj_to_roption(c("a","b"))$ok, "a") + + # gets only string value + expect_identical(test_robj_to_roption(c("a"))$ok, "a") + + # gets + expect_identical(test_robj_to_roption(NA_character_) ) + +}) From d07b3767f559a047e207b9c93a80c2106f683180 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Fri, 20 Oct 2023 21:53:51 +0200 Subject: [PATCH 08/14] add comments include extendr conversion err as ctx --- src/rust/src/utils/mod.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index 3392dd27d..cc1fae852 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -542,13 +542,21 @@ pub fn robj_to_roption(robj: extendr_api::Robj) -> RResult { let s_res: EResult = robj.try_into(); let opt_str = s_res.map(|s| s.iter().next().map(|rstr| rstr.clone())); match opt_str { + // NA_CHARACTER not allowed as first element return error Ok(Some(rstr)) if rstr.is_na() => { Err(RPolarsErr::new().plain("an R option/choice should not be a NA_character".into())) } + + // At least one string, return first string Ok(Some(rstr)) => Ok(rstr.to_string()), - Err(_) => { - Err(RPolarsErr::new().plain("an R option/choice should be a character vector".into())) + + // Not character vector, return Error + Err(extendr_err) => { + let rpolars_err: RPolarsErr = extendr_err.into(); + Err(rpolars_err.plain("an R option/choice should be a character vector".into())) } + + // An empty chr vec, return Error Ok(None) => Err(RPolarsErr::new() .plain("an R option/choice character vector cannot have zero length".into())), } From 87c8f1d4be1da4446544927910da7275a677906a Mon Sep 17 00:00:00 2001 From: sorhawell Date: Fri, 20 Oct 2023 22:01:31 +0200 Subject: [PATCH 09/14] update utest --- tests/testthat/test-robj_to_option.R | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-robj_to_option.R b/tests/testthat/test-robj_to_option.R index b500a3bd1..998493680 100644 --- a/tests/testthat/test-robj_to_option.R +++ b/tests/testthat/test-robj_to_option.R @@ -1,12 +1,23 @@ -test_that("robj_to_roption",{ +test_that("robj_to_roption", { # gets first value in char vec - expect_identical(test_robj_to_roption(c("a","b"))$ok, "a") + expect_identical(test_robj_to_roption(c("a","b", NA))$ok, "a") - # gets only string value + # ... or string expect_identical(test_robj_to_roption(c("a"))$ok, "a") - # gets - expect_identical(test_robj_to_roption(NA_character_) ) + # NA chr not allowed as first element + ctx = test_robj_to_roption(NA_character_)$err$contexts() + expect_identical(ctx$PlainErrorMessage, "an R option/choice should not be a NA_character") + # empty chr vec not allowed as first element + ctx = test_robj_to_roption(character())$err$contexts() + expect_identical( + ctx$PlainErrorMessage, + "an R option/choice character vector cannot have zero length" + ) + + # non char vec / string not allowed + ctx = test_robj_to_roption(42)$err$contexts() + expect_identical(ctx$PlainErrorMessage, "an R option/choice should be a character vector") }) From 4131dd652511722d7d8922eaead9ca3e89d9e78a Mon Sep 17 00:00:00 2001 From: sorhawell Date: Tue, 24 Oct 2023 00:19:05 +0200 Subject: [PATCH 10/14] refactor LazyFrame$join using it --- R/lazyframe__lazy.R | 31 +++++++++------------- src/rust/src/lazy/dataframe.rs | 48 ++++++++++++++++------------------ src/rust/src/rdatatype.rs | 12 --------- src/rust/src/utils/mod.rs | 19 ++++++++++++++ 4 files changed, 54 insertions(+), 56 deletions(-) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 097fc4f3b..05178db01 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -979,35 +979,30 @@ LazyFrame_join = function( suffix = "_right", allow_parallel = TRUE, force_parallel = FALSE) { - if (inherits(other, "LazyFrame")) { - # nothing - } else if (inherits(other, "DataFrame")) { - other = other - } else { - stopf(paste("Expected a `LazyFrame` as join table, got ", class(other))) - } - how_opts = c("inner", "left", "outer", "semi", "anti", "cross") - how = match.arg(how[1L], how_opts) + uw = \(res) unwrap(res, "in $join():") + + if (inherits(other, "DataFrame")) { + other = other$lazy() + } if (!is.null(on)) { - rexprs = do.call(construct_ProtoExprArray, as.list(on)) - rexprs_left = rexprs - rexprs_right = rexprs + rexprs_right = rexprs_left = as.list(on) } else if ((!is.null(left_on) && !is.null(right_on))) { - rexprs_left = do.call(construct_ProtoExprArray, as.list(left_on)) - rexprs_right = do.call(construct_ProtoExprArray, as.list(right_on)) + rexprs_left = as.list(left_on) + rexprs_right = as.list(right_on) } else if (how != "cross") { - stopf("must specify `on` OR ( `left_on` AND `right_on` ) ") + Err_plain("must specify `on` OR ( `left_on` AND `right_on` ) ") |> uw() } else { - rexprs_left = do.call(construct_ProtoExprArray, as.list(self$columns)) - rexprs_right = do.call(construct_ProtoExprArray, as.list(other$columns)) + rexprs_left = as.list(self$columns) + rexprs_right = as.list(other$columns) } .pr$LazyFrame$join( self, other, rexprs_left, rexprs_right, how, suffix, allow_parallel, force_parallel - ) + ) |> + uw() } diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index f219d4a87..674e2a935 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -7,8 +7,8 @@ use crate::lazy::dsl::*; use crate::rdataframe::DataFrame as RDF; use crate::rdatatype::{ - new_asof_strategy, new_ipc_compression, new_join_type, new_parquet_compression, - new_unique_keep_strategy, RPolarsDataType, + new_asof_strategy, new_ipc_compression, new_parquet_compression, new_unique_keep_strategy, + RPolarsDataType, }; use crate::robj_to; use crate::rpolarserr::{polars_to_rpolars_err, RResult, Rctx, WithRctx}; @@ -407,31 +407,27 @@ impl LazyFrame { #[allow(clippy::too_many_arguments)] fn join( &self, - other: &LazyFrame, - left_on: &ProtoExprArray, - right_on: &ProtoExprArray, - how: &str, - suffix: &str, - allow_parallel: bool, - force_parallel: bool, - ) -> LazyFrame { - let ldf = self.0.clone(); - let other = other.0.clone(); - let left_on = pra_to_vec(left_on, "select"); - let right_on = pra_to_vec(right_on, "select"); - let how = new_join_type(how); - - LazyFrame( - ldf.join_builder() - .with(other) - .left_on(left_on) - .right_on(right_on) - .allow_parallel(allow_parallel) - .force_parallel(force_parallel) - .how(how) - .suffix(suffix) + other: Robj, + left_on: Robj, + right_on: Robj, + how: Robj, + suffix: Robj, + allow_parallel: Robj, + force_parallel: Robj, + ) -> RResult { + Ok(LazyFrame( + self.0 + .clone() + .join_builder() + .with(robj_to!(PLLazyFrame, other)?) + .left_on(robj_to!(VecPLExprCol, left_on)?) + .right_on(robj_to!(VecPLExprCol, right_on)?) + .allow_parallel(robj_to!(bool, allow_parallel)?) + .force_parallel(robj_to!(bool, force_parallel)?) + .how(robj_to!(JoinType, how)?) + .suffix(robj_to!(str, suffix)?) .finish(), - ) + )) } pub fn sort_by_exprs( diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index 9e72cf0e4..824606b23 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -245,18 +245,6 @@ impl DataTypeVector { } } -pub fn new_join_type(s: &str) -> pl::JoinType { - match s { - "cross" => pl::JoinType::Cross, - "inner" => pl::JoinType::Inner, - "left" => pl::JoinType::Left, - "outer" => pl::JoinType::Outer, - "semi" => pl::JoinType::Semi, - "anti" => pl::JoinType::Anti, - _ => panic!("polars internal error: jointype not recognized"), - } -} - pub fn new_asof_strategy(s: &str) -> Result { match s { "forward" => Ok(AsofStrategy::Forward), diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index cc1fae852..6f768f933 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -488,6 +488,21 @@ pub fn unpack_r_result_list(robj: extendr_api::Robj) -> RResult { res } +pub fn robj_to_join_type(robj: Robj) -> RResult { + let s = robj_to_roption(robj)?; + match s.as_str() { + "cross" => Ok(pl::JoinType::Cross), + "inner" => Ok(pl::JoinType::Inner), + "left" => Ok(pl::JoinType::Left), + "outer" => Ok(pl::JoinType::Outer), + "semi" => Ok(pl::JoinType::Semi), + "anti" => Ok(pl::JoinType::Anti), + s => rerr().bad_val(format!( + "[{s}] JoinType is not any of 'cross', 'inner', 'left', 'outer', 'semi', 'anti'" + )), + } +} + //None if not real or Na. pub fn robj_bit64_to_opt_i64(robj: Robj) -> Option { robj.as_real() @@ -1048,6 +1063,10 @@ macro_rules! robj_to_inner { $crate::utils::robj_to_quote_style($a) }; + (JoinType, $a:ident) => { + $crate::utils::robj_to_join_type($a) + }; + (RArrow_schema, $a:ident) => { $crate::utils::robj_to_rarrow_schema($a) }; From 37f82de5c1b7807385547d7a2eeb1ea4e0a70771 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Sun, 5 Nov 2023 22:55:47 +0100 Subject: [PATCH 11/14] add NotAChoice error type + tidy up --- R/extendr-wrappers.R | 4 ++- src/rust/src/lazy/dsl.rs | 4 +-- src/rust/src/rdatatype.rs | 45 ++++++++++++++++++--------- src/rust/src/rlib.rs | 10 +++--- src/rust/src/rpolarserr.rs | 12 +++++++ src/rust/src/utils/mod.rs | 37 +++++++--------------- tests/testthat/test-robj_to_option.R | 23 -------------- tests/testthat/test-robj_to_rchoice.R | 23 ++++++++++++++ 8 files changed, 88 insertions(+), 70 deletions(-) delete mode 100644 tests/testthat/test-robj_to_option.R create mode 100644 tests/testthat/test-robj_to_rchoice.R diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 1a06e0c3f..2bda128af 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -65,7 +65,7 @@ test_robj_to_expr <- function(robj) .Call(wrap__test_robj_to_expr, robj) test_wrong_call_pl_lit <- function(robj) .Call(wrap__test_wrong_call_pl_lit, robj) -test_robj_to_roption <- function(robj) .Call(wrap__test_robj_to_roption, robj) +test_robj_to_rchoice <- function(robj) .Call(wrap__test_robj_to_rchoice, robj) polars_features <- function() .Call(wrap__polars_features) @@ -319,6 +319,8 @@ RPolarsErr$mistyped <- function(s) .Call(wrap__RPolarsErr__mistyped, self, s) RPolarsErr$misvalued <- function(s) .Call(wrap__RPolarsErr__misvalued, self, s) +RPolarsErr$notachoice <- function(s) .Call(wrap__RPolarsErr__notachoice, self, s) + RPolarsErr$plain <- function(s) .Call(wrap__RPolarsErr__plain, self, s) RPolarsErr$rcall <- function(c) .Call(wrap__RPolarsErr__rcall, self, c) diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 1b8632a6c..0a6d982b3 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -668,7 +668,7 @@ impl Expr { min_periods: robj_to!(usize, min_periods)?, center: robj_to!(bool, center)?, by: robj_to!(Option, String, by)?, - closed_window: robj_to!(Option, new_closed_window, closed)?, + closed_window: robj_to!(Option, ClosedWindow, closed)?, fn_params: Some(pl::Arc::new(pl::RollingQuantileParams { prob: robj_to!(f64, quantile)?, interpol: robj_to!(new_quantile_interpolation_option, interpolation)?, @@ -2472,7 +2472,7 @@ pub fn make_rolling_options( min_periods: robj_to!(usize, min_periods)?, center: robj_to!(bool, center)?, by: robj_to!(Option, String, by_null)?, - closed_window: robj_to!(Option, new_closed_window, closed_null)?, + closed_window: robj_to!(Option, ClosedWindow, closed_null)?, ..Default::default() }) } diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index 824606b23..098a169df 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -1,6 +1,6 @@ use crate::robj_to; use crate::utils::wrappers::Wrap; -use crate::utils::{r_result_list, robj_to_roption, robj_to_string}; +use crate::utils::{r_result_list, robj_to_string}; use extendr_api::prelude::*; use polars::prelude as pl; use polars_core::prelude::QuantileInterpolOptions; @@ -14,6 +14,8 @@ pub struct RField(pub pl::Field); use pl::UniqueKeepStrategy; use polars::prelude::AsofStrategy; +use crate::utils::robj_to_rchoice; + #[extendr] impl RField { fn new(name: String, datatype: &RPolarsDataType) -> RField { @@ -284,19 +286,6 @@ pub fn new_quantile_interpolation_option(robj: Robj) -> RResult RResult { - use pl::ClosedWindow as CW; - match robj_to_roption(robj)?.as_str() { - "both" => Ok(CW::Both), - "left" => Ok(CW::Left), - "none" => Ok(CW::None), - "right" => Ok(CW::Right), - s => rerr().bad_val(format!( - "ClosedWindow choice ['{s}'] is not any of 'both', 'left', 'none' or 'right'" - )), - } -} - pub fn new_null_behavior( s: &str, ) -> std::result::Result { @@ -499,6 +488,34 @@ pub fn new_rolling_cov_options( }) } +pub fn robj_to_join_type(robj: Robj) -> RResult { + let s = robj_to_rchoice(robj)?; + match s.as_str() { + "cross" => Ok(pl::JoinType::Cross), + "inner" => Ok(pl::JoinType::Inner), + "left" => Ok(pl::JoinType::Left), + "outer" => Ok(pl::JoinType::Outer), + "semi" => Ok(pl::JoinType::Semi), + "anti" => Ok(pl::JoinType::Anti), + s => rerr().bad_val(format!( + "JoinType choice ['{s}'] is not any of 'cross', 'inner', 'left', 'outer', 'semi', 'anti'" + )), + } +} + +pub fn robj_to_closed_window(robj: Robj) -> RResult { + use pl::ClosedWindow as CW; + match robj_to_rchoice(robj)?.as_str() { + "both" => Ok(CW::Both), + "left" => Ok(CW::Left), + "none" => Ok(CW::None), + "right" => Ok(CW::Right), + s => rerr().bad_val(format!( + "ClosedWindow choice ['{s}'] is not any of 'both', 'left', 'none' or 'right'" + )), + } +} + extendr_module! { mod rdatatype; impl RPolarsDataType; diff --git a/src/rust/src/rlib.rs b/src/rust/src/rlib.rs index b3c7ac4a5..3b5c7dea5 100644 --- a/src/rust/src/rlib.rs +++ b/src/rust/src/rlib.rs @@ -5,7 +5,7 @@ use crate::robj_to; use crate::rpolarserr::{rdbg, RResult}; use crate::series::Series; use crate::utils::extendr_concurrent::{ParRObj, ThreadCom}; -use crate::utils::robj_to_roption; +use crate::utils::robj_to_rchoice; use crate::RFnSignature; use crate::CONFIG; use extendr_api::prelude::*; @@ -65,7 +65,7 @@ fn r_date_range_lazy( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, robj_to!(pl_duration, every)?, - robj_to!(new_closed_window, closed)?, + robj_to!(ClosedWindow, closed)?, robj_to!(Option, timeunit, time_unit)?, robj_to!(Option, String, time_zone)?, ); @@ -222,9 +222,9 @@ fn test_wrong_call_pl_lit(robj: Robj) -> RResult { } #[extendr] -fn test_robj_to_roption(robj: Robj) -> RResult { +fn test_robj_to_rchoice(robj: Robj) -> RResult { // robj can be any non-zero length char vec, will return first string. - robj_to_roption(robj) + robj_to_rchoice(robj) } #[extendr] @@ -307,7 +307,7 @@ extendr_module! { fn test_print_string; fn test_robj_to_expr; fn test_wrong_call_pl_lit; - fn test_robj_to_roption; + fn test_robj_to_rchoice; //feature flags fn polars_features; diff --git a/src/rust/src/rpolarserr.rs b/src/rust/src/rpolarserr.rs index 78b98eba4..4e2575e2d 100644 --- a/src/rust/src/rpolarserr.rs +++ b/src/rust/src/rpolarserr.rs @@ -25,6 +25,8 @@ pub enum Rctx { Mistyped(String), #[error("Expected a value that {0}")] Misvalued(String), + #[error("Not a valid R choice because {0}")] + NotAChoice(String), #[error("{0}")] Plain(String), #[error("Encountered the following error in Rust-Polars:\n\t{0}")] @@ -51,6 +53,7 @@ pub trait WithRctx { fn hint(self, cause: impl Into) -> RResult; fn mistyped(self, ty: impl Into) -> RResult; fn misvalued(self, scope: impl Into) -> RResult; + fn notachoice(self, scope: impl Into) -> RResult; fn plain(self, msg: impl Into) -> RResult; fn when(self, env: impl Into) -> RResult; } @@ -96,6 +99,10 @@ impl> WithRctx for core::result::Result { self.ctx(Rctx::Misvalued(scope.into())) } + fn notachoice(self, scope: impl Into) -> RResult { + self.ctx(Rctx::NotAChoice(scope.into())) + } + fn plain(self, msg: impl Into) -> RResult { self.ctx(Rctx::Plain(msg.into())) } @@ -129,6 +136,7 @@ impl RPolarsErr { Hint(msg) => ("Hint", msg), Mistyped(ty) => ("TypeMismatch", ty), Misvalued(scope) => ("ValueOutOfScope", scope), + NotAChoice(err) => ("NotAChoice", err), Plain(msg) => ("PlainErrorMessage", msg), Polars(err) => ("PolarsError", err), When(target) => ("When", target), @@ -166,6 +174,10 @@ impl RPolarsErr { self.push_back_rctx(Rctx::Misvalued(s)) } + pub fn notachoice(&self, s: String) -> Self { + self.push_back_rctx(Rctx::NotAChoice(s)) + } + pub fn plain(&self, s: String) -> Self { self.push_back_rctx(Rctx::Plain(s)) } diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index 6f768f933..827fb5f34 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -488,21 +488,6 @@ pub fn unpack_r_result_list(robj: extendr_api::Robj) -> RResult { res } -pub fn robj_to_join_type(robj: Robj) -> RResult { - let s = robj_to_roption(robj)?; - match s.as_str() { - "cross" => Ok(pl::JoinType::Cross), - "inner" => Ok(pl::JoinType::Inner), - "left" => Ok(pl::JoinType::Left), - "outer" => Ok(pl::JoinType::Outer), - "semi" => Ok(pl::JoinType::Semi), - "anti" => Ok(pl::JoinType::Anti), - s => rerr().bad_val(format!( - "[{s}] JoinType is not any of 'cross', 'inner', 'left', 'outer', 'semi', 'anti'" - )), - } -} - //None if not real or Na. pub fn robj_bit64_to_opt_i64(robj: Robj) -> Option { robj.as_real() @@ -549,9 +534,12 @@ pub fn robj_to_str<'a>(robj: extendr_api::Robj) -> RResult<&'a str> { } } -// this conversion exists to support R option char vec e.g. c("a", "b"), it will only get "a" and igonore the rest -// conversion error if not a char vec or zero length -pub fn robj_to_roption(robj: extendr_api::Robj) -> RResult { +// This conversion assists conversion of R choice char vec e.g. c("a", "b") +// Only the first element "a" will passed on as String +// conversion error if not a char vec with none-zero length. +// NA is not allowed +// other conversions will use it e.g. robj_to_join_type() or robj_to_closed_window() +pub fn robj_to_rchoice(robj: extendr_api::Robj) -> RResult { let robj = unpack_r_result_list(robj)?; let robj_clone = robj.clone(); let s_res: EResult = robj.try_into(); @@ -559,7 +547,7 @@ pub fn robj_to_roption(robj: extendr_api::Robj) -> RResult { match opt_str { // NA_CHARACTER not allowed as first element return error Ok(Some(rstr)) if rstr.is_na() => { - Err(RPolarsErr::new().plain("an R option/choice should not be a NA_character".into())) + Err(RPolarsErr::new().notachoice("NA_character is not allowed".into())) } // At least one string, return first string @@ -568,12 +556,11 @@ pub fn robj_to_roption(robj: extendr_api::Robj) -> RResult { // Not character vector, return Error Err(extendr_err) => { let rpolars_err: RPolarsErr = extendr_err.into(); - Err(rpolars_err.plain("an R option/choice should be a character vector".into())) + Err(rpolars_err.notachoice("input is not a character vector".into())) } // An empty chr vec, return Error - Ok(None) => Err(RPolarsErr::new() - .plain("an R option/choice character vector cannot have zero length".into())), + Ok(None) => Err(RPolarsErr::new().notachoice("character vector has zero length".into())), } .map_err(|err| err.bad_robj(robj_clone)) } @@ -977,8 +964,8 @@ macro_rules! robj_to_inner { (timeunit, $a:ident) => { $crate::rdatatype::robj_to_timeunit($a) }; - (new_closed_window, $a:ident) => { - $crate::rdatatype::new_closed_window($a) + (ClosedWindow, $a:ident) => { + $crate::rdatatype::robj_to_closed_window($a) }; (new_quantile_interpolation_option, $a:ident) => { $crate::rdatatype::new_quantile_interpolation_option($a) @@ -1064,7 +1051,7 @@ macro_rules! robj_to_inner { }; (JoinType, $a:ident) => { - $crate::utils::robj_to_join_type($a) + $crate::rdatatype::robj_to_join_type($a) }; (RArrow_schema, $a:ident) => { diff --git a/tests/testthat/test-robj_to_option.R b/tests/testthat/test-robj_to_option.R deleted file mode 100644 index 998493680..000000000 --- a/tests/testthat/test-robj_to_option.R +++ /dev/null @@ -1,23 +0,0 @@ -test_that("robj_to_roption", { - - # gets first value in char vec - expect_identical(test_robj_to_roption(c("a","b", NA))$ok, "a") - - # ... or string - expect_identical(test_robj_to_roption(c("a"))$ok, "a") - - # NA chr not allowed as first element - ctx = test_robj_to_roption(NA_character_)$err$contexts() - expect_identical(ctx$PlainErrorMessage, "an R option/choice should not be a NA_character") - - # empty chr vec not allowed as first element - ctx = test_robj_to_roption(character())$err$contexts() - expect_identical( - ctx$PlainErrorMessage, - "an R option/choice character vector cannot have zero length" - ) - - # non char vec / string not allowed - ctx = test_robj_to_roption(42)$err$contexts() - expect_identical(ctx$PlainErrorMessage, "an R option/choice should be a character vector") -}) diff --git a/tests/testthat/test-robj_to_rchoice.R b/tests/testthat/test-robj_to_rchoice.R new file mode 100644 index 000000000..5bfc4c106 --- /dev/null +++ b/tests/testthat/test-robj_to_rchoice.R @@ -0,0 +1,23 @@ +test_that("robj_to_rchoice", { + + # gets first value in char vec + expect_identical(test_robj_to_rchoice(c("a","b", NA))$ok, "a") + + # ... or string + expect_identical(test_robj_to_rchoice(c("a"))$ok, "a") + + # NA chr not allowed as first element + ctx = test_robj_to_rchoice(NA_character_)$err$contexts() + expect_identical(ctx$NotAChoice, "NA_character is not allowed") + + # empty chr vec not allowed as first element + ctx = test_robj_to_rchoice(character())$err$contexts() + expect_identical( + ctx$NotAChoice, + "character vector has zero length" + ) + + # non char vec / string not allowed + ctx = test_robj_to_rchoice(42)$err$contexts() + expect_identical(ctx$NotAChoice, "input is not a character vector") +}) From 4308ce58cd1bd06be86441b99f6e241fcb2e8adf Mon Sep 17 00:00:00 2001 From: sorhawell Date: Sun, 5 Nov 2023 23:06:03 +0100 Subject: [PATCH 12/14] add a utest on join --- tests/testthat/test-joins.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/testthat/test-joins.R b/tests/testthat/test-joins.R index 8b63dd607..d008678c6 100644 --- a/tests/testthat/test-joins.R +++ b/tests/testthat/test-joins.R @@ -28,6 +28,16 @@ test_that("lazyframe join examples", { apple = c("x", "y", "z", NA) ) ) + + # error on unknown how choice + ctx = df$join(other_df, on = "ham", how = "not a choice") |> get_err_ctx() + expect_true(startsWith(ctx$BadValue, "JoinType choice ['not a choice'] is not any of")) + + # error on invalid choice + ctx = df$join(other_df, on = "ham", how = 42) |> get_err_ctx() + expect_true("NotAChoice" %in% names(ctx)) + + }) @@ -117,4 +127,7 @@ test_that("cross join, DataFrame", { x_right = rep(letters[1:3], 3) ) ) + }) + + From 1f68394a0b9551688fd96553a29f53c9a8ebf267 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Sun, 5 Nov 2023 23:30:34 +0100 Subject: [PATCH 13/14] drop wrapping extendr error on invalid R choice --- src/rust/src/utils/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index 827fb5f34..7014d12f6 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -554,9 +554,9 @@ pub fn robj_to_rchoice(robj: extendr_api::Robj) -> RResult { Ok(Some(rstr)) => Ok(rstr.to_string()), // Not character vector, return Error - Err(extendr_err) => { - let rpolars_err: RPolarsErr = extendr_err.into(); - Err(rpolars_err.notachoice("input is not a character vector".into())) + Err(_extendr_err) => { + //let rpolars_err: RPolarsErr = _extendr_err.into(); extendr error not that helpful + Err(RPolarsErr::new().notachoice("input is not a character vector".into())) } // An empty chr vec, return Error From 2fd69c1d4c1ed0455bd5d182aaf1e033d6b61bb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Havelund=20Welling?= Date: Mon, 6 Nov 2023 23:14:26 +0100 Subject: [PATCH 14/14] Apply suggestions from code review Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- src/rust/src/rdatatype.rs | 4 ++-- tests/testthat/test-datatype.R | 3 --- tests/testthat/test-joins.R | 4 +--- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index 098a169df..79756da7d 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -498,7 +498,7 @@ pub fn robj_to_join_type(robj: Robj) -> RResult { "semi" => Ok(pl::JoinType::Semi), "anti" => Ok(pl::JoinType::Anti), s => rerr().bad_val(format!( - "JoinType choice ['{s}'] is not any of 'cross', 'inner', 'left', 'outer', 'semi', 'anti'" + "JoinType choice ['{s}'] should be one of 'cross', 'inner', 'left', 'outer', 'semi', 'anti'" )), } } @@ -511,7 +511,7 @@ pub fn robj_to_closed_window(robj: Robj) -> RResult { "none" => Ok(CW::None), "right" => Ok(CW::Right), s => rerr().bad_val(format!( - "ClosedWindow choice ['{s}'] is not any of 'both', 'left', 'none' or 'right'" + "ClosedWindow choice ['{s}'] should be one of 'both', 'left', 'none', 'right'" )), } } diff --git a/tests/testthat/test-datatype.R b/tests/testthat/test-datatype.R index e6f685fbe..b524cedfb 100644 --- a/tests/testthat/test-datatype.R +++ b/tests/testthat/test-datatype.R @@ -32,6 +32,3 @@ test_that("plStruct", { err_state = result(pl$Struct(bin = pl$Binary, pl$Boolean, bob = 42)) }) - - - diff --git a/tests/testthat/test-joins.R b/tests/testthat/test-joins.R index d008678c6..dd003a503 100644 --- a/tests/testthat/test-joins.R +++ b/tests/testthat/test-joins.R @@ -31,7 +31,7 @@ test_that("lazyframe join examples", { # error on unknown how choice ctx = df$join(other_df, on = "ham", how = "not a choice") |> get_err_ctx() - expect_true(startsWith(ctx$BadValue, "JoinType choice ['not a choice'] is not any of")) + expect_true(startsWith(ctx$BadValue, "JoinType choice ['not a choice'] should be one of")) # error on invalid choice ctx = df$join(other_df, on = "ham", how = 42) |> get_err_ctx() @@ -129,5 +129,3 @@ test_that("cross join, DataFrame", { ) }) - -