Skip to content

Commit

Permalink
Merge branch 'main' into struct-named-elements
Browse files Browse the repository at this point in the history
  • Loading branch information
eitsupi committed May 22, 2024
2 parents 8ca2570 + a1d34a4 commit 9fea4e8
Show file tree
Hide file tree
Showing 53 changed files with 948 additions and 959 deletions.
3 changes: 3 additions & 0 deletions .github/actions/setup/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ inputs:
runs:
using: composite
steps:
- name: Should not update rustup
shell: bash
run: rustup set auto-self-update disable
- name: Update Rust
if: inputs.rust-nightly != 'true' && env.LIBR_POLARS_FEATURES != 'full_features'
shell: bash
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: polars
Title: Lightning-Fast 'DataFrame' Library
Version: 0.16.3.9000
Version: 0.16.4.9000
Depends: R (>= 4.2)
Imports: utils, codetools, methods
Authors@R:
Expand Down Expand Up @@ -118,5 +118,5 @@ Collate:
'zzz.R'
Config/rextendr/version: 0.3.1
VignetteBuilder: knitr
Config/polars/LibVersion: 0.39.3
Config/polars/LibVersion: 0.39.4
Config/polars/RustToolchainVersion: nightly-2024-04-15
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ S3method(as_polars_df,RPolarsLazyFrame)
S3method(as_polars_df,RPolarsLazyGroupBy)
S3method(as_polars_df,RPolarsRollingGroupBy)
S3method(as_polars_df,RPolarsSeries)
S3method(as_polars_df,RecordBatchReader)
S3method(as_polars_df,data.frame)
S3method(as_polars_df,default)
S3method(as_polars_df,nanoarrow_array)
Expand All @@ -171,6 +172,7 @@ S3method(as_polars_series,RPolarsChainedThen)
S3method(as_polars_series,RPolarsExpr)
S3method(as_polars_series,RPolarsSeries)
S3method(as_polars_series,RPolarsThen)
S3method(as_polars_series,RecordBatchReader)
S3method(as_polars_series,clock_sys_time)
S3method(as_polars_series,clock_time_point)
S3method(as_polars_series,clock_zoned_time)
Expand Down Expand Up @@ -210,6 +212,7 @@ S3method(names,RPolarsGroupBy)
S3method(names,RPolarsLazyFrame)
S3method(names,RPolarsLazyGroupBy)
S3method(parse_as_polars_duration_string,character)
S3method(parse_as_polars_duration_string,default)
S3method(parse_as_polars_duration_string,difftime)
S3method(plain,RPolarsErr)
S3method(plain,character)
Expand Down
15 changes: 15 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,18 @@

### Breaking changes

- As warned in v0.16.0, the order of arguments in `pl$Series` is changed (#1071).
The first argument is now `name`, and the second argument is `values`.
- `$to_struct()` on an Expr is removed. This method is now only available for
`Series`, `DataFrame`, and in the `$list` and `$arr` subnamespaces. For example,
`pl$col("a", "b", "c")$to_struct()` should be replaced with
`pl$struct(c("a", "b", "c"))` (#1092).
- `pl$Struct()` now only accepts named inputs and objects of class `RPolarsField`.
For example, `pl$Struct(pl$Boolean)` doesn't work anymore and should be named
like `pl$Struct(a = pl$Boolean)` (#1053).

## Polars R Package 0.16.4

### New features

- `pl$read_ipc()` can read a raw vector of Apache Arrow IPC file (#1072).
Expand All @@ -21,6 +29,13 @@
- New S3 methods `nanoarrow::as_nanoarrow_array_stream()` and `nanoarrow::infer_nanoarrow_schema()`
for `RPolarsSeries` (#1076).
- New method `$dt$is_leap_year()` (#1077).
- `as_polars_df()` and `as_polars_series()` supports `arrow::RecordBatchReader` (#1078).
- The new `experimental` argument for `as_polars_df(<ArrowTabular>)`, `as_polars_df(<RecordBatchReader>)`,
`as_polars_series(<nanoarrow_array_stream>)`, and `as_polars_df(<nanoarrow_array_stream>)` (#1078).
If `experimental = TRUE`, these functions switch to use
[the Arrow C stream interface](https://arrow.apache.org/docs/format/CStreamInterface.html) internally.
At this point, the performance is degraded under the expected use cases,
so the default is set to `experimental = FALSE`.

## Polars R Package 0.16.3

Expand Down
116 changes: 80 additions & 36 deletions R/as_polars.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,36 @@
#' [$collect()][LazyFrame_collect] or [$fetch()][LazyFrame_fetch], depending on
#' whether the number of rows to fetch is infinite or not.
#' @rdname as_polars_df
#' @inheritParams as_polars_series
#' @param x Object to convert to a polars DataFrame.
#' @param ... Additional arguments passed to methods.
#' @return a [DataFrame][DataFrame_class]
#' @examplesIf requireNamespace("arrow", quietly = TRUE)
#' # Convert the row names of a data frame to a column
#' as_polars_df(mtcars, rownames = "car")
#'
#' # Convert an arrow Table to a polars DataFrame
#' at = arrow::arrow_table(x = 1:5, y = 6:10)
#' as_polars_df(at)
#'
#' # Convert an arrow Table, with renaming all columns
#' # Convert a data frame, with renaming all columns
#' as_polars_df(
#' at,
#' data.frame(x = 1, y = 2),
#' schema = c("a", "b")
#' )
#'
#' # Convert an arrow Table, with renaming and casting all columns
#' # Convert a data frame, with renaming and casting all columns
#' as_polars_df(
#' at,
#' data.frame(x = 1, y = 2),
#' schema = list(b = pl$Int64, a = pl$String)
#' )
#'
#' # Convert an arrow Table, with casting some columns
#' # Convert a data frame, with casting some columns
#' as_polars_df(
#' at,
#' data.frame(x = 1, y = 2),
#' schema_overrides = list(y = pl$String) # cast some columns
#' )
#'
#' # Convert an arrow Table to a polars DataFrame
#' at = arrow::arrow_table(x = 1:5, y = 6:10)
#' as_polars_df(at)
#'
#' # Create a polars DataFrame from a data.frame
#' lf = as_polars_df(mtcars)$lazy()
#'
Expand Down Expand Up @@ -212,13 +213,33 @@ as_polars_df.ArrowTabular = function(
...,
rechunk = TRUE,
schema = NULL,
schema_overrides = NULL) {
schema_overrides = NULL,
experimental = FALSE) {
arrow_to_rpldf(
x,
rechunk = rechunk,
schema = schema,
schema_overrides = schema_overrides
)
schema_overrides = schema_overrides,
experimental = experimental
) |>
result() |>
unwrap("in as_polars_df():")
}


#' @rdname as_polars_df
#' @export
as_polars_df.RecordBatchReader = function(x, ..., experimental = FALSE) {
uw = \(res) unwrap(res, "in as_polars_df(<RecordBatchReader>):")

if (isTRUE(experimental)) {
as_polars_series(x, name = "")$to_frame()$unnest("") |>
result() |>
uw()
} else {
.pr$DataFrame$from_arrow_record_batches(x$batches()) |>
uw()
}
}


Expand Down Expand Up @@ -247,20 +268,16 @@ as_polars_df.nanoarrow_array = function(x, ...) {

#' @rdname as_polars_df
#' @export
as_polars_df.nanoarrow_array_stream = function(x, ...) {
if (!inherits(nanoarrow::infer_nanoarrow_ptype(x$get_schema()), "data.frame")) {
as_polars_df.nanoarrow_array_stream = function(x, ..., experimental = FALSE) {
if (!identical(nanoarrow::nanoarrow_schema_parse(x$get_schema())$type, "struct")) {
Err_plain("Can't convert non-struct array stream to RPolarsDataFrame") |>
unwrap("in as_polars_df(<nanoarrow_array_stream>):")
}

series = as_polars_series.nanoarrow_array_stream(x, name = NULL)

if (length(series)) {
series$to_frame()$unnest("")
} else {
# TODO: support 0-length array stream
pl$DataFrame()
}
as_polars_series.nanoarrow_array_stream(
x,
name = "", experimental = experimental
)$to_frame()$unnest("")
}


Expand Down Expand Up @@ -397,6 +414,20 @@ as_polars_series.Array = function(x, name = NULL, ..., rechunk = TRUE) {
as_polars_series.ChunkedArray = as_polars_series.Array


#' @rdname as_polars_series
#' @export
as_polars_series.RecordBatchReader = function(x, name = NULL, ...) {
stream_out = polars_allocate_array_stream()
x$export_to_c(stream_out)

.pr$Series$import_stream(
name %||% "",
stream_out
) |>
unwrap("in as_polars_series(<RecordBatchReader>):")
}


#' @rdname as_polars_series
#' @export
as_polars_series.nanoarrow_array = function(x, name = NULL, ...) {
Expand All @@ -406,26 +437,39 @@ as_polars_series.nanoarrow_array = function(x, name = NULL, ...) {
}


#' @param experimental If `TRUE`, use experimental Arrow C stream interface inside the function.
#' This argument is experimental and may be removed in the future.
#' @rdname as_polars_series
#' @export
as_polars_series.nanoarrow_array_stream = function(x, name = NULL, ...) {
as_polars_series.nanoarrow_array_stream = function(x, name = NULL, ..., experimental = FALSE) {
on.exit(x$release())

list_of_arrays = nanoarrow::collect_array_stream(x, validate = FALSE)
if (isTRUE(experimental)) {
stream_out = polars_allocate_array_stream()
nanoarrow::nanoarrow_pointer_export(x, stream_out)

if (length(list_of_arrays) < 1L) {
# TODO: support 0-length array stream
out = pl$Series(name = name)
} else {
out = as_polars_series.nanoarrow_array(list_of_arrays[[1L]], name = name)
lapply(
list_of_arrays[-1L],
\(array) .pr$Series$append_mut(out, as_polars_series.nanoarrow_array(array))
.pr$Series$import_stream(
name %||% "",
stream_out
) |>
invisible()
}
unwrap("in as_polars_series(<nanoarrow_array_stream>):")
} else {
list_of_arrays = nanoarrow::collect_array_stream(x, validate = FALSE)

out
if (length(list_of_arrays) < 1L) {
# TODO: support 0-length array stream
out = pl$Series(name = name)
} else {
out = as_polars_series.nanoarrow_array(list_of_arrays[[1L]], name = name)
lapply(
list_of_arrays[-1L],
\(array) .pr$Series$append_mut(out, as_polars_series.nanoarrow_array(array))
) |>
invisible()
}

out
}
}


Expand Down
8 changes: 4 additions & 4 deletions R/construction.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
#' If schema names or types do not match `x`, the columns will be renamed/recast.
#' If `NULL` (default), convert columns as is.
#' @param schema_overrides named list of DataTypes. Cast some columns to the DataType.
#' @param experimental If `TRUE`, use the Arrow C stream interface.
#' @noRd
#' @return RPolarsDataFrame
arrow_to_rpldf = function(at, schema = NULL, schema_overrides = NULL, rechunk = TRUE) {
arrow_to_rpldf = function(
at, schema = NULL, schema_overrides = NULL, rechunk = TRUE, ..., experimental = FALSE) {
# new column names by schema, #todo get names if schema not NULL
n_cols = at$num_columns

Expand Down Expand Up @@ -53,9 +55,7 @@ arrow_to_rpldf = function(at, schema = NULL, schema_overrides = NULL, rechunk =
if (tbl$num_rows == 0L) {
rdf = pl$DataFrame() # TODO: support creating 0-row DataFrame
} else {
rdf = unwrap(
.pr$DataFrame$from_arrow_record_batches(arrow::as_record_batch_reader(tbl)$batches())
)
rdf = as_polars_df(arrow::as_record_batch_reader(tbl), experimental = experimental)
}
} else {
rdf = pl$DataFrame()
Expand Down
18 changes: 7 additions & 11 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -1127,8 +1127,8 @@ DataFrame_to_struct = function(name = "") {
#' c = 6:10
#' )$
#' select(
#' pl$col("b")$to_struct(),
#' pl$col("a", "c")$to_struct()$alias("a_and_c")
#' pl$struct("b"),
#' pl$struct(c("a", "c"))$alias("a_and_c")
#' )
#' df
#'
Expand Down Expand Up @@ -2131,9 +2131,8 @@ DataFrame_rolling = function(
closed = "right",
group_by = NULL,
check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", period) # TODO: `paste0` should be executed after `period` is parsed as string
}
period = parse_as_polars_duration_string(period)
offset = parse_as_polars_duration_string(offset) %||% negate_duration_string(period)
construct_rolling_group_by(self, index_column, period, offset, closed, group_by, check_sorted)
}

Expand Down Expand Up @@ -2216,12 +2215,9 @@ DataFrame_group_by_dynamic = function(
group_by = NULL,
start_by = "window",
check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", every) # TODO: `paste0` should be executed after `period` is parsed as string
}
if (is.null(period)) {
period = every
}
every = parse_as_polars_duration_string(every)
offset = parse_as_polars_duration_string(offset) %||% negate_duration_string(every)
period = parse_as_polars_duration_string(period) %||% every
construct_group_by_dynamic(
self, index_column, every, period, offset, include_boundaries, closed, label,
group_by, start_by, check_sorted
Expand Down
6 changes: 3 additions & 3 deletions R/datatype.R
Original file line number Diff line number Diff line change
Expand Up @@ -240,14 +240,14 @@ DataType_Duration = function(time_unit = "us") {
#' )
#' }
#'
#' # Finally, one can use the method `$to_struct()` to convert existing columns
#' # or `Series` to a `Struct`:
#' # Finally, one can use `pl$struct()` to convert existing columns or `Series`
#' # to a `Struct`:
#' x = pl$DataFrame(
#' a = 1:2,
#' b = list(c("x", "y"), "z")
#' )
#'
#' out = x$select(pl$col("a", "b")$to_struct())
#' out = x$select(pl$struct(c("a", "b")))
#' out
#'
#' out$schema
Expand Down
10 changes: 2 additions & 8 deletions R/expr__array.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,35 +10,29 @@
#' df$with_columns(sum = pl$col("values")$arr$sum())
ExprArr_sum = function() .pr$Expr$arr_sum(self)

# TODO: add example with NA when this is fixed:
# https://github.com/pola-rs/polars/issues/14359

#' Find the maximum value in an array
#'
#' @return Expr
#' @inherit ExprStr_to_titlecase details
#' @aliases arr_max
#' @examples
#' df = pl$DataFrame(
#' values = list(c(1, 2), c(3, 4), c(5, 6)),
#' values = list(c(1, 2), c(3, 4), c(NA_real_, NA_real_)),
#' schema = list(values = pl$Array(pl$Float64, 2))
#' )
#' df$with_columns(max = pl$col("values")$arr$max())
ExprArr_max = function() {
.pr$Expr$arr_max(self)
}

# TODO: add example with NA when this is fixed:
# https://github.com/pola-rs/polars/issues/14359

#' Find the minimum value in an array
#'
#' @inherit ExprStr_to_titlecase details
#' @return Expr
#' @aliases arr_min
#' @examples
#' df = pl$DataFrame(
#' values = list(c(1, 2), c(3, 4), c(5, 6)),
#' values = list(c(1, 2), c(3, 4), c(NA_real_, NA_real_)),
#' schema = list(values = pl$Array(pl$Float64, 2))
#' )
#' df$with_columns(min = pl$col("values")$arr$min())
Expand Down
Loading

0 comments on commit 9fea4e8

Please sign in to comment.