Skip to content

Commit

Permalink
Merge branch 'main' into rewrite-makevars
Browse files Browse the repository at this point in the history
  • Loading branch information
eitsupi authored Jan 14, 2024
2 parents 3f86100 + 1f2a4d0 commit 16f207e
Show file tree
Hide file tree
Showing 34 changed files with 1,638 additions and 190 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ Collate:
'functions__lazy.R'
'functions__whenthen.R'
'group_by.R'
'group_by_dynamic.R'
'group_by_rolling.R'
'info.R'
'ipc.R'
'is_polars.R'
Expand Down
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ S3method("$",RPolarsChainedWhen)
S3method("$",RPolarsDataFrame)
S3method("$",RPolarsDataType)
S3method("$",RPolarsDataTypeVector)
S3method("$",RPolarsDynamicGroupBy)
S3method("$",RPolarsErr)
S3method("$",RPolarsExpr)
S3method("$",RPolarsExprBinNameSpace)
Expand All @@ -26,6 +27,7 @@ S3method("$",RPolarsProtoExprArray)
S3method("$",RPolarsRField)
S3method("$",RPolarsRNullValues)
S3method("$",RPolarsRThreadHandle)
S3method("$",RPolarsRollingGroupBy)
S3method("$",RPolarsSQLContext)
S3method("$",RPolarsSeries)
S3method("$",RPolarsStringCacheHolder)
Expand Down Expand Up @@ -68,6 +70,7 @@ S3method("[[",RPolarsChainedWhen)
S3method("[[",RPolarsDataFrame)
S3method("[[",RPolarsDataType)
S3method("[[",RPolarsDataTypeVector)
S3method("[[",RPolarsDynamicGroupBy)
S3method("[[",RPolarsErr)
S3method("[[",RPolarsExpr)
S3method("[[",RPolarsGroupBy)
Expand All @@ -77,6 +80,7 @@ S3method("[[",RPolarsProtoExprArray)
S3method("[[",RPolarsRField)
S3method("[[",RPolarsRNullValues)
S3method("[[",RPolarsRThreadHandle)
S3method("[[",RPolarsRollingGroupBy)
S3method("[[",RPolarsSQLContext)
S3method("[[",RPolarsSeries)
S3method("[[",RPolarsStringCacheHolder)
Expand All @@ -88,12 +92,14 @@ S3method("|",RPolarsExpr)
S3method(.DollarNames,RPolarsChainedThen)
S3method(.DollarNames,RPolarsChainedWhen)
S3method(.DollarNames,RPolarsDataFrame)
S3method(.DollarNames,RPolarsDynamicGroupBy)
S3method(.DollarNames,RPolarsErr)
S3method(.DollarNames,RPolarsExpr)
S3method(.DollarNames,RPolarsGroupBy)
S3method(.DollarNames,RPolarsLazyFrame)
S3method(.DollarNames,RPolarsRField)
S3method(.DollarNames,RPolarsRThreadHandle)
S3method(.DollarNames,RPolarsRollingGroupBy)
S3method(.DollarNames,RPolarsSQLContext)
S3method(.DollarNames,RPolarsSeries)
S3method(.DollarNames,RPolarsThen)
Expand All @@ -112,9 +118,11 @@ S3method(as.matrix,RPolarsLazyFrame)
S3method(as.vector,RPolarsSeries)
S3method(as_polars_df,ArrowTabular)
S3method(as_polars_df,RPolarsDataFrame)
S3method(as_polars_df,RPolarsDynamicGroupBy)
S3method(as_polars_df,RPolarsGroupBy)
S3method(as_polars_df,RPolarsLazyFrame)
S3method(as_polars_df,RPolarsLazyGroupBy)
S3method(as_polars_df,RPolarsRollingGroupBy)
S3method(as_polars_df,RPolarsSeries)
S3method(as_polars_df,data.frame)
S3method(as_polars_df,default)
Expand Down Expand Up @@ -158,13 +166,15 @@ S3method(print,RPolarsChainedThen)
S3method(print,RPolarsChainedWhen)
S3method(print,RPolarsDataFrame)
S3method(print,RPolarsDataType)
S3method(print,RPolarsDynamicGroupBy)
S3method(print,RPolarsErr)
S3method(print,RPolarsExpr)
S3method(print,RPolarsGroupBy)
S3method(print,RPolarsLazyFrame)
S3method(print,RPolarsLazyGroupBy)
S3method(print,RPolarsRField)
S3method(print,RPolarsRThreadHandle)
S3method(print,RPolarsRollingGroupBy)
S3method(print,RPolarsSQLContext)
S3method(print,RPolarsSeries)
S3method(print,RPolarsThen)
Expand Down
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@

### What's changed

- New method `$rolling()` for `DataFrame` and `LazyFrame` (#682).
- New method `$rolling()` for `DataFrame` and `LazyFrame`. When this is
applied, it creates an object of class `RPolarsRollingGroupBy` (#682, #694).
- New method `$group_by_dynamic()` for `DataFrame` and `LazyFrame`. When this
is applied, it creates an object of class `RPolarsDynamicGroupBy` (#691).
- New method `$sink_ndjson()` for LazyFrame (#681).
- New function `pl$duration()` to create a duration by components (week, day,
hour, etc.), and use them with date(time) variables (#692).
Expand Down
7 changes: 7 additions & 0 deletions R/as_polars.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ as_polars_df.RPolarsGroupBy = function(x, ...) {
x$ungroup()
}

#' @rdname as_polars_df
#' @export
as_polars_df.RPolarsRollingGroupBy = as_polars_df.RPolarsGroupBy

#' @rdname as_polars_df
#' @export
as_polars_df.RPolarsDynamicGroupBy = as_polars_df.RPolarsGroupBy

#' @rdname as_polars_df
#' @export
Expand Down
96 changes: 92 additions & 4 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -1828,8 +1828,96 @@ DataFrame_write_ndjson = function(file) {
#' pl$max("a")$alias("max_a")
#' )
DataFrame_rolling = function(index_column, period, offset = NULL, closed = "right", by = NULL, check_sorted = TRUE) {
out = self$lazy()$rolling(index_column, period, offset, closed, by, check_sorted)
attr(out, "is_rolling_group_by") = TRUE
class(out) = "RPolarsGroupBy"
out
if (is.null(offset)) {
offset = paste0("-", period)
}
construct_rolling_group_by(self, index_column, period, offset, closed, by, check_sorted)
}

#' @inherit LazyFrame_group_by_dynamic title description details params
#' @return A [GroupBy][GroupBy_class] object
#'
#' @examples
#' df = pl$DataFrame(
#' time = pl$date_range(
#' start = strptime("2021-12-16 00:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
#' end = strptime("2021-12-16 03:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
#' interval = "30m",
#' eager = TRUE,
#' ),
#' n = 0:6
#' )
#'
#' # get the sum in the following hour relative to the "time" column
#' df$group_by_dynamic("time", every = "1h")$agg(
#' vals = pl$col("n"),
#' sum = pl$col("n")$sum()
#' )
#'
#' # using "include_boundaries = TRUE" is helpful to see the period considered
#' df$group_by_dynamic("time", every = "1h", include_boundaries = TRUE)$agg(
#' vals = pl$col("n")
#' )
#'
#' # in the example above, the values didn't include the one *exactly* 1h after
#' # the start because "closed = 'left'" by default.
#' # Changing it to "right" includes values that are exactly 1h after. Note that
#' # the value at 00:00:00 now becomes included in the interval [23:00:00 - 00:00:00],
#' # even if this interval wasn't there originally
#' df$group_by_dynamic("time", every = "1h", closed = "right")$agg(
#' vals = pl$col("n")
#' )
#' # To keep both boundaries, we use "closed = 'both'". Some values now belong to
#' # several groups:
#' df$group_by_dynamic("time", every = "1h", closed = "both")$agg(
#' vals = pl$col("n")
#' )
#'
#' # Dynamic group bys can also be combined with grouping on normal keys
#' df = df$with_columns(groups = pl$Series(c("a", "a", "a", "b", "b", "a", "a")))
#' df
#'
#' df$group_by_dynamic(
#' "time",
#' every = "1h",
#' closed = "both",
#' by = "groups",
#' include_boundaries = TRUE
#' )$agg(pl$col("n"))
#'
#' # We can also create a dynamic group by based on an index column
#' df = pl$LazyFrame(
#' idx = 0:5,
#' A = c("A", "A", "B", "B", "B", "C")
#' )$with_columns(pl$col("idx")$set_sorted())
#' df
#'
#' df$group_by_dynamic(
#' "idx",
#' every = "2i",
#' period = "3i",
#' include_boundaries = TRUE,
#' closed = "right"
#' )$agg(A_agg_list = pl$col("A"))
DataFrame_group_by_dynamic = function(
index_column,
every,
period = NULL,
offset = NULL,
include_boundaries = FALSE,
closed = "left",
label = "left",
by = NULL,
start_by = "window",
check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", every)
}
if (is.null(period)) {
period = every
}
construct_group_by_dynamic(
self, index_column, every, period, offset, include_boundaries, closed, label,
by, start_by, check_sorted
)
}
8 changes: 6 additions & 2 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -3507,6 +3507,7 @@ Expr_peak_max = function() {
#' column represents an index, it has to be either Int32 or Int64. Note that
#' Int32 gets temporarily cast to Int64, so if performance matters use an Int64
#' column.
#' @param ... Ignored.
#' @param period Length of the window, must be non-negative.
#' @param offset Offset of the window. Default is `-period`.
#' @param closed Define which sides of the temporal interval are closed
Expand Down Expand Up @@ -3569,8 +3570,11 @@ Expr_peak_max = function() {
#' df$with_columns(
#' sum_a_offset1 = pl$sum("a")$rolling(index_column = "dt", period = "2d", offset = "1d")
#' )
Expr_rolling = function(index_column, period, offset = NULL,
closed = "right", check_sorted = TRUE) {
Expr_rolling = function(
index_column,
...,
period, offset = NULL,
closed = "right", check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", period)
}
Expand Down
2 changes: 2 additions & 0 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1121,6 +1121,8 @@ RPolarsLazyFrame$with_context <- function(contexts) .Call(wrap__RPolarsLazyFrame

RPolarsLazyFrame$rolling <- function(index_column, period, offset, closed, by, check_sorted) .Call(wrap__RPolarsLazyFrame__rolling, self, index_column, period, offset, closed, by, check_sorted)

RPolarsLazyFrame$group_by_dynamic <- function(index_column, every, period, offset, label, include_boundaries, closed, by, start_by, check_sorted) .Call(wrap__RPolarsLazyFrame__group_by_dynamic, self, index_column, every, period, offset, label, include_boundaries, closed, by, start_by, check_sorted)

#' @export
`$.RPolarsLazyFrame` <- function (self, name) { func <- RPolarsLazyFrame[[name]]; environment(func) <- environment(); func }

Expand Down
67 changes: 28 additions & 39 deletions R/group_by.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
NULL



RPolarsGroupBy = new.env(parent = emptyenv())

#' @export
Expand All @@ -25,28 +24,30 @@ RPolarsGroupBy = new.env(parent = emptyenv())
#' @export
`[[.RPolarsGroupBy` = `$.RPolarsGroupBy`

#' @title auto complete $-access into a polars object
#' @description called by the interactive R session internally
#' @param x GroupBy
#' @param pattern code-stump as string to auto-complete
#' @return char vec
#' @export
#' @inherit .DollarNames.RPolarsDataFrame return
#' @noRd
.DollarNames.RPolarsGroupBy = function(x, pattern = "") {
paste0(ls(RPolarsGroupBy, pattern = pattern), "()")
}


#' The internal GroupBy constructor
#' @return The input as grouped DataFrame
#' @noRd
construct_group_by = function(df, groupby_input, maintain_order) {
if (!inherits(df, "RPolarsDataFrame")) stop("internal error: construct_group called not on DataFrame")
df = df$clone()
attr(df, "private") = list(groupby_input = unlist(groupby_input), maintain_order = maintain_order)
class(df) = "RPolarsGroupBy"
df
if (!inherits(df, "RPolarsDataFrame")) {
stop("internal error: construct_group called not on DataFrame")
}
# Make an empty object. Store everything (including data) in attributes, so
# that we can keep the RPolarsDataFrame class on the data but still return
# a RPolarsGroupBy object here.
out = c(" ")
attr(out, "private") = list(
dat = df$clone(),
groupby_input = unlist(groupby_input),
maintain_order = maintain_order
)
class(out) = "RPolarsGroupBy"
out
}


Expand All @@ -58,13 +59,13 @@ construct_group_by = function(df, groupby_input, maintain_order) {
#' @return self
#' @export
#'
#' @examples pl$DataFrame(iris)$group_by("Species")
#' @examples
#' pl$DataFrame(iris)$group_by("Species")
print.RPolarsGroupBy = function(x, ...) {
.pr$DataFrame$print(x)
cat("groups: ")
prv = attr(x, "private")
cat(toString(prv$groupby_input))
cat("\nmaintain order: ", prv$maintain_order)
.pr$DataFrame$print(prv$dat)
cat("groups:", toString(prv$groupby_input))
cat("\nmaintain order:", prv$maintain_order)
invisible(x)
}

Expand All @@ -86,18 +87,13 @@ print.RPolarsGroupBy = function(x, ...) {
#' pl$col("bar")$mean()$alias("bar_tail_sum")
#' )
GroupBy_agg = function(...) {
if (isTRUE(attributes(self)[["is_rolling_group_by"]])) {
class(self) = "RPolarsLazyGroupBy"
self$agg(unpack_list(..., .context = "in $agg():"))$collect(no_optimization = TRUE)
} else {
class(self) = "RPolarsDataFrame"
self$lazy()$group_by(
attr(self, "private")$groupby_input,
maintain_order = attr(self, "private")$maintain_order
)$
agg(...)$
collect(no_optimization = TRUE)
}
prv = attr(self, "private")
prv$dat$lazy()$group_by(
prv$groupby_input,
maintain_order = prv$maintain_order
)$
agg(...)$
collect(no_optimization = TRUE)
}


Expand Down Expand Up @@ -300,13 +296,6 @@ GroupBy_null_count = function() {
#'
#' gb$ungroup()
GroupBy_ungroup = function() {
if (isTRUE(attributes(self)[["is_rolling_group_by"]])) {
class(self) = "RPolarsLazyGroupBy"
self = self$ungroup()$collect(no_optimization = TRUE)
} else {
self = .pr$DataFrame$clone_in_rust(self)
class(self) = "RPolarsDataFrame"
attr(self, "private") = NULL
}
self
prv = attr(self, "private")
prv$dat
}
Loading

0 comments on commit 16f207e

Please sign in to comment.