diff --git a/NAMESPACE b/NAMESPACE index a9d6625f4..334e29b6e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,10 +9,10 @@ S3method("$",ChainedWhen) S3method("$",DataFrame) S3method("$",DataTypeVector) S3method("$",Expr) -S3method("$",ExprArrNameSpace) S3method("$",ExprBinNameSpace) S3method("$",ExprCatNameSpace) S3method("$",ExprDTNameSpace) +S3method("$",ExprListNameSpace) S3method("$",ExprMetaNameSpace) S3method("$",ExprStrNameSpace) S3method("$",ExprStructNameSpace) @@ -54,7 +54,7 @@ S3method(">",Series) S3method(">=",Expr) S3method(">=",Series) S3method("[",DataFrame) -S3method("[",ExprArrNameSpace) +S3method("[",ExprListNameSpace) S3method("[",LazyFrame) S3method("[[",ChainedThen) S3method("[[",ChainedWhen) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 8d0756b6c..4800a9a7e 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -117,7 +117,7 @@ DataFrame #' d = list(1:1, 1:2, 1:3, 1:4, 1:5) #' ) # directly from vectors #' -#' # from a list of vectors or data.frame +#' # from a list of vectors #' pl$DataFrame(list( #' a = c(1, 2, 3, 4, 5), #' b = 1:5, @@ -125,6 +125,9 @@ DataFrame #' d = list(1L, 1:2, 1:3, 1:4, 1:5) #' )) #' +#' # from a data.frame +#' pl$DataFrame(mtcars) + pl$DataFrame = function(..., make_names_unique = TRUE, parallel = FALSE, via_select =TRUE) { @@ -321,6 +324,15 @@ DataFrame.property_setters = new.env(parent = emptyenv()) #' @param offset positive integer offset for the start of the counter #' @return A new `DataFrame` object with a counter column in front #' @docType NULL +#' @examples +#' df = pl$DataFrame(mtcars) +#' +#' # by default, the index starts at 0 (to mimic the behavior of Python Polars) +#' df$with_row_count("idx") +#' +#' # but in R, we use a 1-index +#' df$with_row_count("idx", offset = 1) + DataFrame_with_row_count = function(name, offset = NULL) { .pr$DataFrame$with_row_count(self, name, offset) |> unwrap() } @@ -341,50 +353,61 @@ DataFrame_with_row_count = function(name, offset = NULL) { #' # set + get values #' df$columns = letters[1:5] # <- is fine too #' df$columns + DataFrame_columns = method_as_property(function() { .pr$DataFrame$columns(self) }, setter = TRUE) + # define setter function -DataFrame.property_setters$columns = - function(self, names) unwrap(.pr$DataFrame$set_column_names_mut(self, names)) +DataFrame.property_setters$columns = function(self, names) { + unwrap(.pr$DataFrame$set_column_names_mut(self, names)) +} #' @title Drop columns of a DataFrame #' @keywords DataFrame -#' @param columns A character vector containing the names of the column(s) to -#' remove from the DataFrame. +#' @param columns A character vector with the names of the column(s) to remove +#' from the DataFrame. #' @return DataFrame #' @examples pl$DataFrame(mtcars)$drop(c("mpg", "hp")) + DataFrame_drop = function(columns) { self$lazy()$drop(columns)$collect() } -#' @title Drop nulls -#' @description Drop all rows that contain null values. +#' @title Drop nulls (missing values) +#' @description Drop all rows that contain nulls (which correspond to `NA` in R). #' @keywords DataFrame -#' @param subset string or vector of strings. Column name(s) for which null values are considered. If set to NULL (default), use all columns. +#' @param subset A character vector with the names of the column(s) for which +#' nulls are considered. If `NULL` (default), use all columns. #' #' @return DataFrame #' @examples #' tmp = mtcars #' tmp[1:3, "mpg"] = NA #' tmp[4, "hp"] = NA -#' pl$DataFrame(tmp)$drop_nulls()$height -#' pl$DataFrame(tmp)$drop_nulls("mpg")$height -#' pl$DataFrame(tmp)$drop_nulls(c("mpg", "hp"))$height +#' tmp = pl$DataFrame(tmp) +#' +#' # number of rows in `tmp` before dropping nulls +#' tmp$height +#' +#' tmp$drop_nulls()$height +#' tmp$drop_nulls("mpg")$height +#' tmp$drop_nulls(c("mpg", "hp"))$height + DataFrame_drop_nulls = function(subset = NULL) { self$lazy()$drop_nulls(subset)$collect() } -#' @title DataFrame_unique -#' @description Drop duplicate rows from this dataframe. +#' @title Drop duplicated rows +#' #' @keywords DataFrame #' -#' @param subset string or vector of strings. Column name(s) to consider when -#' identifying duplicates. If set to NULL (default), use all columns. -#' @param keep string. Which of the duplicate rows to keep: +#' @param subset A character vector with the names of the column(s) to use to +#' identify duplicates. If `NULL` (default), use all columns. +#' @param keep Which of the duplicate rows to keep: #' * "first": Keep first unique row. #' * "last": Keep last unique row. #' * "none": Don’t keep duplicate rows. @@ -395,100 +418,106 @@ DataFrame_drop_nulls = function(subset = NULL) { #' @return DataFrame #' @examples #' df = pl$DataFrame( -#' x = as.numeric(c(1, 1:5)), -#' y = as.numeric(c(1, 1:5)), -#' z = as.numeric(c(1, 1, 1:4)) +#' x = sample(10, 100, rep = TRUE), +#' y = sample(10, 100, rep = TRUE) #' ) +#' df$height +#' #' df$unique()$height -#' df$unique(subset = c("x", "z"), keep = "last")$height +#' df$unique(subset = "x")$height +#' +#' df$unique(keep = "last") +#' +#' # only keep unique rows +#' df$unique(keep = "none") + DataFrame_unique = function(subset = NULL, keep = "first", maintain_order = FALSE) { self$lazy()$unique(subset, keep, maintain_order)$collect() } -#' Shape of DataFrame +#' Dimensions of a DataFrame #' @name DataFrame_shape #' @description Get shape/dimensions of DataFrame #' -#' @return two length numeric vector of c(nrows,ncols) -#' @keywords DataFrame +#' @return Numeric vector of length two with the number of rows and the number +#' of columns. +#' @keywords DataFrame #' @examples -#' df = pl$DataFrame(iris)$shape -#' +#' pl$DataFrame(iris)$shape + DataFrame_shape = method_as_property(function() { .pr$DataFrame$shape(self) }) -#' Height of DataFrame +#' Number of rows of a DataFrame #' @name DataFrame_height -#' @description Get height(nrow) of DataFrame +#' @description Get the number of rows (height) of a DataFrame #' -#' @return height as numeric +#' @return The number of rows of the DataFrame #' @aliases height nrow -#' @keywords DataFrame +#' @keywords DataFrame #' @examples #' pl$DataFrame(iris)$height -#' + DataFrame_height = method_as_property(function() { .pr$DataFrame$shape(self)[1L] }) - -#' Width of DataFrame +#' Number of columns of a DataFrame #' @name DataFrame_width -#' @description Get width(ncol) of DataFrame +#' @description Get the number of columns (width) of a DataFrame #' -#' @return width as numeric scalar -#' @keywords DataFrame +#' @return The number of columns of a DataFrame +#' @keywords DataFrame #' @examples #' pl$DataFrame(iris)$width -#' + DataFrame_width = method_as_property(function() { .pr$DataFrame$shape(self)[2L] }) - - -#' DataFrame dtypes +#' Data types information #' @name DataFrame_dtypes -#' @description Get the data types of columns in DataFrame. -#' Data types can also be found in column headers when printing the DataFrame. -#' -#' @return width as numeric scalar -#' @keywords DataFrame +#' @description Get the data type of all columns. You can see all available +#' types with `names(pl$dtypes)`. The data type of each column is also shown +#' when printing the DataFrame. +#' +#' @return +#' `$dtypes` returns an unnamed list with the data type of each column. +#' `$schema` returns a named list with the column names and the data type of +#' each column. +#' @keywords DataFrame #' @examples #' pl$DataFrame(iris)$dtypes #' +#' pl$DataFrame(iris)$schema + DataFrame_dtypes = method_as_property(function() { .pr$DataFrame$dtypes(self) }) -#' DataFrame dtype strings +#' Data types information #' @name DataFrame_dtype_strings -#' @description Get column types as strings. +#' @description Get the data type of all columns as strings. You can see all +#' available types with `names(pl$dtypes)`. The data type of each column is also +#' shown when printing the DataFrame. #' #' @docType NULL #' @format NULL -#' @return string vector +#' @return A character vector with the data type of each column #' @keywords DataFrame #' @examples #' pl$DataFrame(iris)$dtype_strings() + DataFrame_dtype_strings = "use_extendr_wrapper" -#' DataFrame dtypes -#' @name DataFrame_dtypes -#' @description Get dtypes of columns in DataFrame. -#' Dtypes can also be found in column headers when printing the DataFrame. -#' -#' @return width as numeric scalar -#' @keywords DataFrame -#' @examples -#' pl$DataFrame(iris)$schema -#' +#' @rdname DataFrame_dtypes + DataFrame_schema = method_as_property(function() { .pr$DataFrame$schema(self) }) @@ -537,7 +566,7 @@ DataFrameCompareToOtherDF = function(self, other, op) { #' @keywords DataFrame LazyFrame_new #' @examples #' pl$DataFrame(iris)$lazy() -#' + DataFrame_lazy = "use_extendr_wrapper" #' Clone a DataFrame @@ -554,53 +583,66 @@ DataFrame_lazy = "use_extendr_wrapper" #' df3 = df1 #' pl$mem_address(df1) != pl$mem_address(df2) #' pl$mem_address(df1) == pl$mem_address(df3) -#' + DataFrame_clone = function() { .pr$DataFrame$clone_see_me_macro(self) } #' Get columns (as Series) #' @name DataFrame_get_columns -#' @description get columns as list of series +#' @description Extract all DataFrame columns as a list of Polars series. #' -#' @return list of series +#' @return A list of series #' @keywords DataFrame #' @docType NULL #' @format NULL #' @examples -#' df = pl$DataFrame(iris[1, ]) +#' df = pl$DataFrame(iris[1:2, ]) #' df$get_columns() DataFrame_get_columns = "use_extendr_wrapper" -#' Get Column (as one Series) +#' Get column (as one Series) #' @name DataFrame_get_column -#' @description get one column by name as series +#' @description Extract a DataFrame column as a Polars series. #' -#' @param name name of column to extract as Series +#' @param name Name of the column to extract. #' #' @return Series #' @aliases DataFrame_get_column #' @keywords DataFrame #' @examples -#' df = pl$DataFrame(iris[1, ]) +#' df = pl$DataFrame(iris[1:2, ]) #' df$get_column("Species") + DataFrame_get_column = function(name) { unwrap(.pr$DataFrame$get_column(self, name), "in $get_column():") } -#' Get Series by idx, if there -#' -#' @param idx numeric default 0, zero-index of what column to return as Series +#' Get column by index #' #' @name DataFrame_to_series -#' @description get one column by idx as series from DataFrame. -#' Unlike get_column this method will not fail if no series found at idx but -#' return a NULL, idx is zero idx. +#' @description Extract a DataFrame column (by index) as a Polars series. Unlike +#' `get_column()`, this method will not fail but will return a `NULL` if the +#' index doesn't exist in the DataFrame. Keep in mind that Polars is 0-indexed +#' so "0" is the first column. +#' +#' @param idx Index of the column to return as Series. Defaults to 0, which is +#' the first column. #' #' @return Series or NULL #' @keywords DataFrame #' @examples -#' pl$DataFrame(a = 1:4)$to_series() +#' df = pl$DataFrame(iris[1:10, ]) +#' +#' # default is to extract the first column +#' df$to_series() +#' +#' # Polars is 0-indexed, so we use idx = 1 to extract the *2nd* column +#' df$to_series(idx = 1) +#' +#' # doesn't error if the column isn't there +#' df$to_series(idx = 8) + DataFrame_to_series = function(idx = 0) { if (!is.numeric(idx) || isTRUE(idx < 0)) { pstop(err = "idx must be non-negative numeric") @@ -608,7 +650,7 @@ DataFrame_to_series = function(idx = 0) { .pr$DataFrame$select_at_idx(self, idx)$ok } -#' DataFrame Sort +#' Sort a DataFrame #' @inherit LazyFrame_sort details description params #' @return DataFrame #' @keywords DataFrame @@ -623,6 +665,7 @@ DataFrame_to_series = function(idx = 0) { #' df$sort(c("cyl", "mpg"), descending = TRUE) #' df$sort(c("cyl", "mpg"), descending = c(TRUE, FALSE)) #' df$sort(pl$col("cyl"), pl$col("mpg")) + DataFrame_sort = function( by, ..., @@ -638,7 +681,7 @@ DataFrame_sort = function( #' Select and modify columns of a DataFrame #' @name DataFrame_select -#' @description Related to dplyr `mutate()`. However, it discards unmentioned +#' @description Similar to `dplyr::mutate()`. However, it discards unmentioned #' columns (like `.()` in `data.table`). #' #' @param ... Columns to keep. Those can be expressions (e.g `pl$col("a")`), @@ -654,6 +697,7 @@ DataFrame_sort = function( #' pl$col("Sepal.Length")$abs()$alias("abs_SL"), #' (pl$col("Sepal.Length") + 2)$alias("add_2_SL") #' ) + DataFrame_select = function(...) { .pr$DataFrame$select(self, unpack_list(...)) |> unwrap("in $select()") @@ -671,58 +715,84 @@ DataFrame_select = function(...) { #' x = dat$drop_in_place("Species") #' x #' dat$columns + DataFrame_drop_in_place = function(name) { .pr$DataFrame$drop_in_place(self, name) } -#' Drop in place +#' Compare two DataFrames #' @name DataFrame_frame_equal -#' @description Check if DataFrame is equal to other. +#' @description Check if two DataFrames are equal. #' #' @param other DataFrame to compare with. -#' @return bool -#' @keywords DataFrame +#' @return A boolean. +#' @keywords DataFrame #' @examples #' dat1 = pl$DataFrame(iris) #' dat2 = pl$DataFrame(iris) #' dat3 = pl$DataFrame(mtcars) #' dat1$frame_equal(dat2) #' dat1$frame_equal(dat3) + DataFrame_frame_equal = function(other) { .pr$DataFrame$frame_equal(self, other) } -#' @title Shift -#' @description Shift the values by a given period. +#' Shift a DataFrame +#' +#' @description Shift the values by a given period. If the period (`n`) is positive, +#' then `n` rows will be inserted at the top of the DataFrame and the last `n` +#' rows will be discarded. Vice-versa if the period is negative. In the end, +#' the total number of rows of the DataFrame doesn't change. +#' #' @keywords DataFrame -#' @param periods integer Number of periods to shift (may be negative). +#' @param periods Number of periods to shift (can be negative). #' @return DataFrame -#' @examples pl$DataFrame(mtcars)$shift(2) +#' @examples +#' pl$DataFrame(mtcars)$shift(2) +#' +#' pl$DataFrame(mtcars)$shift(-2) + DataFrame_shift = function(periods = 1) { self$lazy()$shift(periods)$collect() } #' @title Shift and fill -#' @description Shift the values by a given period and fill the resulting null values. +#' +#' @description Shift the values by a given period and fill the resulting null +#' values. See the docs of `$shift()` for more details on shifting. #' @keywords DataFrame -#' @param fill_value Fill values with the result of this expression. -#' @param periods Integer indicating the number of periods to shift (may be -#' negative). +#' +#' @param fill_value Fill new `NULL` values with this value. Must of length 1. +#' A logical value will be converted to numeric. +#' @param periods Number of periods to shift (can be negative). #' @return DataFrame -#' @examples pl$DataFrame(mtcars)$shift_and_fill(0, 2) +#' @examples +#' df = pl$DataFrame(mtcars) +#' +#' # insert two rows filled with 0 at the top of the DataFrame +#' df$shift_and_fill(0, 2) +#' +#' # automatic conversion of logical value to numeric +#' df$shift_and_fill(TRUE, 2) + DataFrame_shift_and_fill = function(fill_value, periods = 1) { self$lazy()$shift_and_fill(fill_value, periods)$collect() } -#' @title Modify/append column(s) -#' @description Add or modify columns with expressions +#' Modify/append column(s) +#' +#' Add columns or modify existing ones with expressions. This is +#' the equivalent of `dplyr::mutate()` as it keeps unmentioned columns (unlike +#' `$select()`). +#' **`$with_column()` function is deprecated, use `$with_columns()` instead.** +#' #' @name DataFrame_with_columns #' @aliases with_columns -#' @param ... any expressions or string column name, or same wrapped in a list. If first and only -#' element is a list, it is unwrap as a list of args. -#' @keywords DataFrame -#' @return DataFrame -#' @details Like dplyr `mutate()` as it keeps unmentioned columns unlike $select(). +#' @param ... Any expressions or string column name, or same wrapped in a list. +#' If first and only element is a list, it is unwrapped as a list of args. +#' @keywords DataFrame +#' @return A DataFrame #' @examples #' pl$DataFrame(iris)$with_columns( #' pl$col("Sepal.Length")$abs()$alias("abs_SL"), @@ -742,18 +812,16 @@ DataFrame_shift_and_fill = function(fill_value, periods = 1) { #' pl$col("Sepal.Length")$abs(), # not named expr will keep name "Sepal.Length" #' SW_add_2 = (pl$col("Sepal.Width") + 2) #' ) + DataFrame_with_columns = function(...) { .pr$DataFrame$with_columns(self, unpack_list(...)) |> unwrap("in $with_columns()") } -#' modify/append one column #' @rdname DataFrame_with_columns #' @aliases with_column #' @param expr a single expression or string -#' @keywords DataFrame -#' @return DataFrame -#' @details with_column is derived from with_columns but takes only one expression argument + DataFrame_with_column = function(expr) { warning("`with_column()` is deprecated and will be removed in polars 0.9.0. Please use `with_columns()` instead.") self$with_columns(expr) @@ -764,58 +832,74 @@ DataFrame_with_column = function(expr) { #' Limit a DataFrame #' @name DataFrame_limit #' @description Take some maximum number of rows. -#' @param n Positive numeric or integer number not larger than 2^32 +#' @param n Positive number not larger than 2^32. #' -#' @details Any number will converted to u32. +#' @details Any number will converted to u32. Negative raises error. #' @keywords DataFrame #' @return DataFrame #' @examples #' pl$DataFrame(iris)$limit(6) -#' + DataFrame_limit = function(n) { self$lazy()$limit(n)$collect() } #' Head of a DataFrame #' @name DataFrame_head -#' @description Get the first n rows of the query. -#' @param n positive numeric or integer number not larger than 2^32 +#' @description Get the first `n` rows of the query. +#' @param n Positive number not larger than 2^32. #' -#' @details any number will converted to u32. Negative raises error +#' @inherit DataFrame_limit details #' @keywords DataFrame #' @return DataFrame + DataFrame_head = function(n) { self$lazy()$head(n)$collect() } #' Tail a DataFrame #' @name DataFrame_tail -#' @description Get the last n rows. -#' @param n positive numeric of integer number not larger than 2^32 +#' @description Get the last `n` rows. +#' @param n Positive number not larger than 2^32. #' -#' @details any number will converted to u32. Negative raises error +#' @inherit DataFrame_limit details #' @keywords DataFrame #' @return DataFrame + DataFrame_tail = function(n) { self$lazy()$tail(n)$collect() } -#' filter DataFrame -#' @aliases DataFrame_filter -#' @description DataFrame$filter(bool_expr) +#' Filter rows of a DataFrame +#' @name DataFrame_filter +#' +#' @description This is equivalent to `dplyr::filter()`. Note that rows where +#' the condition returns `NA` are dropped, unlike base subsetting with `[`. #' -#' @param bool_expr Polars expression which will evaluate to a bool pl$Series +#' @param bool_expr Polars expression which will evaluate to a boolean. #' @keywords DataFrame -#' @return filtered DataFrame -#' @examples pl$DataFrame(iris)$lazy()$filter(pl$col("Sepal.Length") > 5)$collect() -#' @name DataFrame_filter +#' @return A DataFrame with only the rows where the conditions are `TRUE`. +#' @examples +#' df = pl$DataFrame(iris) +#' +#' df$filter(pl$col("Sepal.Length") > 5) +#' +#' # rows where condition is NA are dropped +#' iris2 = iris +#' iris2[c(1, 3, 5), "Species"] = NA +#' df = pl$DataFrame(iris2) +#' +#' df$filter(pl$col("Species") == "setosa") + DataFrame_filter = function(bool_expr) { .pr$DataFrame$lazy(self)$filter(bool_expr)$collect() } -#' groupby a DataFrame -#' @description create GroupBy from DataFrame +#' Group a DataFrame +#' @description This doesn't modify the data but only stores information about +#' the group structure. This structure can then be used by several functions +#' (`$agg()`, `$filter()`, etc.). #' @inherit LazyFrame_groupby #' @keywords DataFrame #' @return GroupBy (a DataFrame with special groupby methods like `$agg()`) @@ -824,30 +908,30 @@ DataFrame_filter = function(bool_expr) { #' foo = c("one", "two", "two", "one", "two"), #' bar = c(5, 3, 2, 4, 1) #' )$groupby("foo", maintain_order = TRUE) -#' print(gb) +#' +#' gb #' #' gb$agg( #' pl$col("bar")$sum()$suffix("_sum"), #' pl$col("bar")$mean()$alias("bar_tail_sum") #' ) + DataFrame_groupby = function(..., maintain_order = pl$options$default_maintain_order()) { # clone the DataFrame, bundle args as attributes. Non fallible. construct_groupby(self, groupby_input = unpack_list(...), maintain_order = maintain_order) } - - - #' Return Polars DataFrame as R data.frame #' -#' @param ... any args pased to as.data.frame() +#' @param ... Any args pased to `as.data.frame()`. #' #' @return An R data.frame #' @keywords DataFrame #' @examples #' df = pl$DataFrame(iris[1:3, ]) #' df$to_data_frame() + DataFrame_to_data_frame = function(...) { # do not unnest structs and mark with I to also preserve categoricals as is l = lapply(self$to_list(unnest_structs = FALSE), I) @@ -873,8 +957,7 @@ DataFrame_as_data_frame = DataFrame_to_data_frame # DataFrame_to_data_frame = DataFrame_to_data_frame #' @rdname DataFrame_to_data_frame -#' @param x DataFrame -#' @param ... any params passed to as.data.frame +#' @param x A DataFrame #' #' @return data.frame #' @export @@ -882,17 +965,18 @@ as.data.frame.DataFrame = function(x, ...) { x$to_data_frame(...) } -#' return polars DataFrame as R lit of vectors +#' Return Polars DataFrame as a list of vectors #' -#' @param unnest_structs bool default true, as calling $unnest() on any struct column +#' @param unnest_structs Boolean. If `TRUE` (default), then `$unnest()` is applied +#' on any struct column. #' #' @name to_list #' #' @details -#' This implementation for simplicity reasons relies on unnesting all structs before -#' exporting to R. unnest_structs = FALSE, the previous struct columns will be re- -#' nested. A struct in a R is a lists of lists, where each row is a list of values. -#' Such a structure is not very typical or efficient in R. +#' For simplicity reasons, this implementation relies on unnesting all structs +#' before exporting to R. If `unnest_structs = FALSE`, then `struct` columns +#' will be returned as nested lists, where each row is a list of values. Such a +#' structure is not very typical or efficient in R. #' #' @return R list of vectors #' @keywords DataFrame @@ -906,19 +990,23 @@ DataFrame_to_list = function(unnest_structs = TRUE) { } } - - -#' join DataFrame with other DataFrame +#' Join DataFrames #' +#' This function can do both mutating joins (adding columns based on matching +#' observations, for example with `how = "left"`) and filtering joins (keeping +#' observations based on matching observations, for example with `how = "inner"`). #' #' @param other DataFrame -#' @param on named columns as char vector of named columns, or list of expressions and/or strings. -#' @param left_on names of columns in self LazyFrame, order should match. Type, see on param. -#' @param right_on names of columns in other LazyFrame, order should match. Type, see on param. -#' @param how a string selecting one of the following methods: inner, left, outer, semi, anti, cross -#' @param suffix name to added right table -#' @param allow_parallel bool -#' @param force_parallel bool +#' @param on Either a vector of column names or a list of expressions and/or +#' strings. Use `left_on` and `right_on` if the column names to match on are +#' different between the two DataFrames. +#' @param left_on,right_on Same as `on` but only for the left or the right +#' DataFrame. They must have the same length. +#' @param how One of the following methods: "inner", "left", "outer", "semi", +#' "anti", "cross". +#' @param suffix Suffix to add to duplicated column names. +#' @param allow_parallel Boolean. +#' @param force_parallel Boolean. #' @return DataFrame #' @keywords DataFrame #' @examples @@ -931,7 +1019,7 @@ DataFrame_to_list = function(unnest_structs = TRUE) { #' df1 = pl$DataFrame(x = letters[1:3]) #' df2 = pl$DataFrame(y = 1:4) #' df1$join(other = df2, how = "cross") -#' + DataFrame_join = function( other, # : LazyFrame or DataFrame, left_on = NULL, # : str | pli.Expr | Sequence[str | pli.Expr] | None = None, @@ -948,9 +1036,9 @@ DataFrame_join = function( )$collect() } -#' to_struct -#' @param name name of new Series -#' @return to_struct() returns a Series +#' Convert DataFrame to a Series of type "struct" +#' @param name Name given to the new Series +#' @return A Series of type "struct" #' @aliases to_struct #' @keywords DataFrame #' @examples @@ -958,9 +1046,14 @@ DataFrame_join = function( #' df = pl$DataFrame(a = 1:5, b = c("one", "two", "three", "four", "five")) #' s = df$to_struct() #' s -#' s$to_r() # to r list -#' df_s = s$to_frame() # place series in a new DataFrame -#' df_s$unnest() # back to starting df +#' +#' # convert to an R list +#' s$to_r() +#' +#' # Convert back to a DataFrame +#' df_s = s$to_frame() +#' df_s + DataFrame_to_struct = function(name = "") { .pr$DataFrame$to_struct(self, name) } @@ -969,29 +1062,37 @@ DataFrame_to_struct = function(name = "") { ## TODO contribute polars add r-polars defaults for to_struct and unnest #' Unnest a DataFrame struct columns. #' @keywords DataFrame -#' @param names names of struct columns to unnest, default NULL unnest any struct column -#' @return $unnest() returns a DataFrame with all column including any that has been unnested +#' @param names Names of the struct columns to unnest. If `NULL` (default), then +#' all "struct" columns are unnested. +#' @return A DataFrame where all "struct" columns are unnested. Non-struct +#' columns are not modified. +#' @examples +#' df = pl$DataFrame(a = 1:5, b = c("one", "two", "three", "four", "five")) +#' df = df$to_struct()$to_frame() +#' df +#' +#' df$unnest() + DataFrame_unnest = function(names = NULL) { unwrap(.pr$DataFrame$unnest(self, names), "in $unnest():") } - -#' @title First -#' @description Get the first row of the DataFrame. +#' @title Get the first row of the DataFrame. #' @keywords DataFrame -#' @return A new `DataFrame` object with applied filter. +#' @return A DataFrame with one row. #' @examples pl$DataFrame(mtcars)$first() + DataFrame_first = function() { self$lazy()$first()$collect() } -#' @title Last -#' @description Get the last row of the DataFrame. +#' @title Get the last row of the DataFrame. #' @keywords DataFrame -#' @return A new `DataFrame` object with applied filter. +#' @return A DataFrame with one row. #' @examples pl$DataFrame(mtcars)$last() + DataFrame_last = function() { self$lazy()$last()$collect() } @@ -999,8 +1100,9 @@ DataFrame_last = function() { #' @title Max #' @description Aggregate the columns in the DataFrame to their maximum value. #' @keywords DataFrame -#' @return A new `DataFrame` object with applied aggregation. +#' @return A DataFrame with one row. #' @examples pl$DataFrame(mtcars)$max() + DataFrame_max = function() { self$lazy()$max()$collect() } @@ -1008,8 +1110,9 @@ DataFrame_max = function() { #' @title Mean #' @description Aggregate the columns in the DataFrame to their mean value. #' @keywords DataFrame -#' @return A new `DataFrame` object with applied aggregation. +#' @return A DataFrame with one row. #' @examples pl$DataFrame(mtcars)$mean() + DataFrame_mean = function() { self$lazy()$mean()$collect() } @@ -1017,8 +1120,9 @@ DataFrame_mean = function() { #' @title Median #' @description Aggregate the columns in the DataFrame to their median value. #' @keywords DataFrame -#' @return A new `DataFrame` object with applied aggregation. +#' @return A DataFrame with one row. #' @examples pl$DataFrame(mtcars)$median() + DataFrame_median = function() { self$lazy()$median()$collect() } @@ -1026,8 +1130,9 @@ DataFrame_median = function() { #' @title Min #' @description Aggregate the columns in the DataFrame to their minimum value. #' @keywords DataFrame -#' @return A new `DataFrame` object with applied aggregation. +#' @return A DataFrame with one row. #' @examples pl$DataFrame(mtcars)$min() + DataFrame_min = function() { self$lazy()$min()$collect() } @@ -1035,8 +1140,9 @@ DataFrame_min = function() { #' @title Sum #' @description Aggregate the columns of this DataFrame to their sum values. #' @keywords DataFrame -#' @return A new `DataFrame` object with applied aggregation. +#' @return A DataFrame with one row. #' @examples pl$DataFrame(mtcars)$sum() + DataFrame_sum = function() { self$lazy()$sum()$collect() } @@ -1044,47 +1150,55 @@ DataFrame_sum = function() { #' @title Var #' @description Aggregate the columns of this DataFrame to their variance values. #' @keywords DataFrame -#' @param ddof integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. -#' @return A new `DataFrame` object with applied aggregation. +#' @param ddof Delta Degrees of Freedom: the divisor used in the calculation is +#' N - ddof, where N represents the number of elements. By default ddof is 1. +#' @return A DataFrame with one row. #' @examples pl$DataFrame(mtcars)$var() + DataFrame_var = function(ddof = 1) { self$lazy()$var(ddof)$collect() } #' @title Std -#' @description Aggregate the columns of this DataFrame to their standard deviation values. +#' @description Aggregate the columns of this DataFrame to their standard +#' deviation values. #' @keywords DataFrame -#' @param ddof integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. -#' @return A new `DataFrame` object with applied aggregation. +#' @param ddof Delta Degrees of Freedom: the divisor used in the calculation is +#' N - ddof, where N represents the number of elements. By default ddof is 1. +#' @return A DataFrame with one row. #' @examples pl$DataFrame(mtcars)$std() + DataFrame_std = function(ddof = 1) { self$lazy()$std(ddof)$collect() } #' @title Quantile -#' @description Aggregate the columns in the DataFrame to their quantile value. +#' @description Aggregate the columns in the DataFrame to a unique quantile +#' value. Use `$describe()` to specify several quantiles. #' @keywords DataFrame -#' @param quantile numeric Quantile between 0.0 and 1.0. -#' @param interpolation string Interpolation method: "nearest", "higher", "lower", "midpoint", or "linear". +#' @param quantile Numeric of length 1 between 0 and 1. +#' @param interpolation Interpolation method: "nearest", "higher", "lower", +#' "midpoint", or "linear". #' @return DataFrame #' @examples pl$DataFrame(mtcars)$quantile(.4) + DataFrame_quantile = function(quantile, interpolation = "nearest") { self$lazy()$quantile(quantile, interpolation)$collect() } #' @title Reverse -#' @description Reverse the DataFrame. -#' @keywords LazyFrame +#' @description Reverse the DataFrame (the last row becomes the first one, etc.). #' @return DataFrame #' @examples pl$DataFrame(mtcars)$reverse() + DataFrame_reverse = function() { self$lazy()$reverse()$collect() } -#' @title Fill NaN -#' @description Fill floating point NaN values by an Expression evaluation. +#' @title Fill `NaN` +#' @description Fill `NaN` values by an Expression evaluation. #' @keywords DataFrame -#' @param fill_value Value to fill NaN with. +#' @param fill_value Value to fill `NaN` with. #' @return DataFrame #' @examples #' df = pl$DataFrame( @@ -1092,40 +1206,53 @@ DataFrame_reverse = function() { #' b = c(1.5, NaN, NaN, 4) #' ) #' df$fill_nan(99) + DataFrame_fill_nan = function(fill_value) { self$lazy()$fill_nan(fill_value)$collect() } -#' @title Fill null -#' @description Fill null values using the specified value or strategy. +#' @title Fill nulls +#' @description Fill null values (which correspond to `NA` in R) using the +#' specified value or strategy. #' @keywords DataFrame -#' @param fill_value Value to fill `NA` with. +#' @param fill_value Value to fill nulls with. #' @return DataFrame #' @examples -#' pl$DataFrame( +#' df = pl$DataFrame( #' a = c(1.5, 2, NA, 4), #' b = c(1.5, NA, NA, 4) -#' )$fill_null(99) +#' ) +#' +#' df$fill_null(99) +#' +#' df$fill_null(pl$col("a")$mean()) + DataFrame_fill_null = function(fill_value) { self$lazy()$fill_null(fill_value)$collect() } #' @title Slice -#' @description Get a slice of this DataFrame. -#' @keywords LazyFrame +#' @description Get a slice of the DataFrame. #' @return DataFrame -#' @param offset integer -#' @param length integer or NULL +#' @param offset Start index, can be a negative value. This is 0-indexed, so +#' `offset = 1` doesn't include the first row. +#' @param length Length of the slice. If `NULL` (default), all rows starting at +#' the offset will be selected. #' @examples +#' # skip the first 2 rows and take the 4 following rows #' pl$DataFrame(mtcars)$slice(2, 4) -#' mtcars[2:6, ] +#' +#' # this is equivalent to: +#' mtcars[3:6, ] + DataFrame_slice = function(offset, length = NULL) { self$lazy()$slice(offset, length)$collect() } -#' @title Null count -#' @description Create a new DataFrame that shows the null counts per column. +#' @title Count null values +#' @description Create a new DataFrame that shows the null (which correspond +#' to `NA` in R) counts per column. #' @keywords DataFrame #' @return DataFrame #' @docType NULL @@ -1135,18 +1262,21 @@ DataFrame_slice = function(offset, length = NULL) { #' x = mtcars #' x[1, 2:3] = NA #' pl$DataFrame(x)$null_count() + DataFrame_null_count = "use_extendr_wrapper" #' @title Estimated size -#' @description Return an estimation of the total (heap) allocated size of the DataFrame. +#' @description Return an estimation of the total (heap) allocated size of the +#' DataFrame. #' @keywords DataFrame -#' @return Bytes +#' @return Estimated size in bytes #' @docType NULL #' @format NULL #' @format function #' @examples #' pl$DataFrame(mtcars)$estimated_size() + DataFrame_estimated_size = "use_extendr_wrapper" @@ -1155,9 +1285,9 @@ DataFrame_estimated_size = "use_extendr_wrapper" #' @inherit LazyFrame_join_asof #' @param other DataFrame or LazyFrame #' @keywords DataFrame -#' @return new joined DataFrame +#' @return New joined DataFrame #' @examples -#' # create two DataFrame to join asof +#' # create two DataFrames to join asof #' gdp = pl$DataFrame( #' date = as.Date(c("2015-1-1", "2016-1-1", "2017-5-1", "2018-1-1", "2019-1-1")), #' gdp = c(4321, 4164, 4411, 4566, 4696), @@ -1189,6 +1319,7 @@ DataFrame_estimated_size = "use_extendr_wrapper" #' #' # only look 11 days back (numeric tolerance depends on polars type, is in days) #' pop$join_asof(gdp, on = "date", strategy = "backward", tolerance = 11) + DataFrame_join_asof = function( other, ..., @@ -1239,9 +1370,11 @@ DataFrame_join_asof = function( #' df = pl$DataFrame( #' a = c("x", "y", "z"), #' b = c(1, 3, 5), -#' c = c(2, 4, 6) +#' c = c(2, 4, 6), +#' d = c(7, 8, 9) #' ) -#' df$melt(id_vars = "a", value_vars = c("b", "c")) +#' df$melt(id_vars = "a", value_vars = c("b", "c", "d")) + DataFrame_melt = function( id_vars = NULL, value_vars = NULL, @@ -1255,17 +1388,19 @@ DataFrame_melt = function( -#' Create a spreadsheet-style pivot table as a DataFrame. -#' @param values Column values to aggregate. Can be multiple columns if the `columns` -#' arguments contains multiple columns as well. +#' Pivot data from long to wide +#' @param values Column values to aggregate. Can be multiple columns if the +#' `columns` arguments contains multiple columns as well. #' @param index One or multiple keys to group by. -#' @param columns Name of the column(s) whose values will be used as the header of the output -#' DataFrame. -#' @param aggregate_function -#' String naming Expr to aggregate with, or an Expr e.g. `pl$element()$sum()`, -#' examples of strings:'first', 'sum', 'max', 'min', 'mean', 'median', 'last', 'count' -#' @param maintain_order Sort the grouped keys so that the output order is predictable. -#' @param sort_columns Sort the transposed columns by name. Default is by order of discovery. +#' @param columns Name of the column(s) whose values will be used as the header +#' of the output DataFrame. +#' @param aggregate_function One of: +#' - string indicating the expressions to aggregate with, such as 'first', +#' 'sum', 'max', 'min', 'mean', 'median', 'last', 'count'), +#' - an Expr e.g. `pl$element()$sum()` +#' @inheritParams DataFrame_unique +#' @param sort_columns Sort the transposed columns by name. Default is by order +#' of discovery. #' @param separator Used as separator/delimiter in generated column names. #' #' @return DataFrame @@ -1276,23 +1411,27 @@ DataFrame_melt = function( #' bar = c("A", "B", "C", "A", "B", "C"), #' baz = c(1, 2, 3, 4, 5, 6) #' ) +#' df +#' #' df$pivot( -#' values = "baz", index = "foo", columns = "bar", aggregate_function = "first" +#' values = "baz", index = "foo", columns = "bar" #' ) #' -#' #' # Run an expression as aggregation function #' df = pl$DataFrame( #' col1 = c("a", "a", "a", "b", "b", "b"), #' col2 = c("x", "x", "x", "x", "y", "y"), #' col3 = c(6, 7, 3, 2, 5, 7) #' ) +#' df +#' #' df$pivot( #' index = "col1", #' columns = "col2", #' values = "col3", #' aggregate_function = pl$element()$tanh()$mean() #' ) + DataFrame_pivot = function( values, index, @@ -1327,22 +1466,33 @@ DataFrame_pivot = function( #' @keywords DataFrame #' @param ... One of the following: #' - params like `new_name = "old_name"` to rename selected variables. -#' - as above but, but params wrapped in a list +#' - as above but with params wrapped in a list #' @return DataFrame #' @examples -#' pl$DataFrame(mtcars)$ -#' rename(miles_per_gallon = "mpg", horsepower = "hp") +#' df = pl$DataFrame(mtcars) +#' +#' df$rename(miles_per_gallon = "mpg", horsepower = "hp") +#' +#' replacements <- list(miles_per_gallon = "mpg", horsepower = "hp") +#' df$rename(replacements) + DataFrame_rename = function(...) { self$lazy()$rename(...)$collect() } #' @title Summary statistics for a DataFrame +#' +#' @description This returns the total number of rows, the number of missing +#' values, the mean, standard deviation, min, max, median and the percentiles +#' specified in the argument `percentiles`. +#' #' @param percentiles One or more percentiles to include in the summary statistics. #' All values must be in the range `[0; 1]`. #' @keywords DataFrame #' @return DataFrame #' @examples #' pl$DataFrame(iris)$describe() + DataFrame_describe = function(percentiles = c(.25, .75)) { perc = percentiles @@ -1406,10 +1556,12 @@ DataFrame_describe = function(percentiles = c(.25, .75)) { #' @title Glimpse values in a DataFrame #' @keywords DataFrame #' @param ... not used -#' @param return_as_string Boolean (default `FALSE`). If `TRUE`, return the output as a string. +#' @param return_as_string Boolean (default `FALSE`). If `TRUE`, return the +#' output as a string. #' @return DataFrame #' @examples #' pl$DataFrame(iris)$glimpse() + DataFrame_glimpse = function(..., return_as_string = FALSE) { # guard input if (!is_bool(return_as_string)) { @@ -1426,7 +1578,7 @@ DataFrame_glimpse = function(..., return_as_string = FALSE) { max_col_name_trunc = 50 parse_column_ = \(col_name, dtype) { dtype_str = dtype_str_repr(dtype) |> unwrap_or(paste0("??", str_string(dtype))) - if (inherits(dtype, "RPolarsDataType")) dtype_str <- paste0("<", dtype_str, ">") + if (inherits(dtype, "RPolarsDataType")) dtype_str <- paste0(" <", dtype_str, ">") val = self$select(pl$col(col_name)$slice(0, max_num_value))$to_list()[[1]] val_str = paste(val, collapse = ", ") if (nchar(col_name) > max_col_name_trunc) { @@ -1480,6 +1632,7 @@ DataFrame_glimpse = function(..., return_as_string = FALSE) { #' df #' #' df$explode("numbers") + DataFrame_explode = function(...) { self$lazy()$explode(...)$collect() } diff --git a/R/expr__expr.R b/R/expr__expr.R index 64f162db8..cb89a6fe3 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -921,7 +921,7 @@ Expr_apply = function(f, return_type = NULL, strict_return_type = TRUE, allow_fa #' # vectors to literal implicitly #' (pl$lit(2) + 1:4) / 4:1 Expr_lit = function(x) { - .Call(wrap__Expr__lit, x) |> #use .call reduces eval from 22us to 15us, not a bottle-next anyways + .Call(wrap__Expr__lit, x) |> # use .call reduces eval from 22us to 15us, not a bottle-next anyways unwrap("in $lit()") } @@ -4252,13 +4252,32 @@ Expr_shrink_dtype = "use_extendr_wrapper" -#' arr: list related methods +#' arr: list related methods DEPRECATED +#' @description +#' Deprecated since 0.8.1, will be removed in 0.9.0. +#' USE `$list$...` instead. Subnamespace is simply renamed. +#' @keywords Expr +#' @return Expr +#' @seealso \code{\link[=Expr_list]{$list$...}} +Expr_arr = method_as_property(function() { + if (!isTRUE(runtime_state$warned_deprecate_sns_arr_expr)) { + warning( + "in $arr$: `$arr$...` is deprecated since 0.8.1 and removed from polars 0.9.0.", + "Use `$list$` instead. It is only a renaming to match py-polars renaming.", + call. = FALSE + ) + runtime_state$warned_deprecate_sns_arr_expr = TRUE + } + expr_list_make_sub_ns(self) +}) + +#' list: list related methods #' @description #' Create an object namespace of all list related methods. #' See the individual method pages for full details #' @keywords Expr #' @return Expr -#' @aliases arr_ns +#' @aliases list_ns #' @examples #' df_with_list = pl$DataFrame( #' group = c(1, 1, 2, 2, 3), @@ -4270,10 +4289,10 @@ Expr_shrink_dtype = "use_extendr_wrapper" #' pl$col("value") * 3L #' ) #' df_with_list$with_columns( -#' pl$col("value")$arr$lengths()$alias("group_size") +#' pl$col("value")$list$lengths()$alias("group_size") #' ) -Expr_arr = method_as_property(function() { - expr_arr_make_sub_ns(self) +Expr_list = method_as_property(function() { + expr_list_make_sub_ns(self) }) @@ -4394,7 +4413,7 @@ Expr_to_struct = function() { #' pl$Series(list(1:1, 1:2, 1:3, 1:4)) #' $print() #' $to_lit() -#' $arr$lengths() +#' $list$lengths() #' $sum() #' $cast(pl$dtypes$Int8) #' $lit_to_s() @@ -4414,7 +4433,7 @@ Expr_lit_to_s = function() { #' pl$Series(list(1:1, 1:2, 1:3, 1:4)) #' $print() #' $to_lit() -#' $arr$lengths() +#' $list$lengths() #' $sum() #' $cast(pl$dtypes$Int8) #' $lit_to_df() diff --git a/R/expr__list.R b/R/expr__list.R index 883f899b8..68f183c71 100644 --- a/R/expr__list.R +++ b/R/expr__list.R @@ -1,135 +1,135 @@ -# this file sources list-expression functions to be bundled in the 'expr$arr' sub namespace +# this file sources list-expression functions to be bundled in the 'expr$list' sub namespace # the sub name space is instantiated from Expr_arr- function # bundling these functions into an environment, depends on a macro call in zzz.R -# expr_arr_make_sub_ns = macro_new_subnamespace("^ExprArr_", "ExprArrNameSpace") +# expr_list_make_sub_ns = macro_new_subnamespace("^ExprList_", "ExprListNameSpace") ## TODO revisit array, list terminology and pick one way, e.g list of sublists or list of elements #' Lengths arrays in list -#' @rdname arr_lengths -#' @name arr_lengths +#' @rdname list_lengths +#' @name ExprList_lengths #' @description #' Get the length of the arrays as UInt32 -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases lengths arr.lengths arr_lengths +#' @aliases lengths list_lengths #' @examples #' df = pl$DataFrame(list_of_strs = pl$Series(list(c("a", "b"), "c"))) -#' df$with_columns(pl$col("list_of_strs")$arr$lengths()$alias("list_of_strs_lengths")) -ExprArr_lengths = function() .pr$Expr$arr_lengths(self) +#' df$with_columns(pl$col("list_of_strs")$list$lengths()$alias("list_of_strs_lengths")) +ExprList_lengths = function() .pr$Expr$list_lengths(self) #' Sum lists -#' @name arr_sum +#' @name ExprList_sum #' @description #' Sum all the lists in the array. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_sum arr.sum +#' @aliases list_sum #' @examples #' df = pl$DataFrame(values = pl$Series(list(1L, 2:3))) -#' df$select(pl$col("values")$arr$sum()) -ExprArr_sum = function() .pr$Expr$lst_sum(self) +#' df$select(pl$col("values")$list$sum()) +ExprList_sum = function() .pr$Expr$list_sum(self) #' Max lists -#' @name arr_max +#' @name ExprList_max #' @description #' Compute the max value of the lists in the array. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases Expr_arr_max Expr_arr.max +#' @aliases Expr_list_max #' @examples #' df = pl$DataFrame(values = pl$Series(list(1L, 2:3))) -#' df$select(pl$col("values")$arr$max()) -ExprArr_max = function() .pr$Expr$lst_max(self) +#' df$select(pl$col("values")$list$max()) +ExprList_max = function() .pr$Expr$list_max(self) #' #' Min lists -#' @name arr_min +#' @name ExprList_min #' @description #' Compute the min value of the lists in the array. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases Expr_arr_min Expr_arr.min +#' @aliases Expr_list_min #' @examples #' df = pl$DataFrame(values = pl$Series(list(1L, 2:3))) -#' df$select(pl$col("values")$arr$min()) -ExprArr_min = function() .pr$Expr$lst_min(self) +#' df$select(pl$col("values")$list$min()) +ExprList_min = function() .pr$Expr$list_min(self) #' Mean of lists -#' @name arr_mean +#' @name ExprList_mean #' @description #' Compute the mean value of the lists in the array. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_mean arr.mean +#' @aliases list_mean #' @examples #' df = pl$DataFrame(values = pl$Series(list(1L, 2:3))) -#' df$select(pl$col("values")$arr$mean()) -ExprArr_mean = function() .pr$Expr$lst_mean(self) +#' df$select(pl$col("values")$list$mean()) +ExprList_mean = function() .pr$Expr$list_mean(self) #' @inherit Expr_sort title description return #' @param descending Sort values in descending order -#' @name arr_sort -ExprArr_sort = function(descending = FALSE) .pr$Expr$lst_sort(self, descending) +#' @name ExprList_sort +ExprList_sort = function(descending = FALSE) .pr$Expr$list_sort(self, descending) #' Reverse list -#' @name arr_reverse +#' @name ExprList_reverse #' @description #' Reverse the arrays in the list. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_reverse arr.reverse +#' @aliases list_reverse #' @examples #' df = pl$DataFrame(list( #' values = list(3:1, c(9L, 1:2)) #' )) -#' df$select(pl$col("values")$arr$reverse()) -ExprArr_reverse = function() .pr$Expr$lst_reverse(self) +#' df$select(pl$col("values")$list$reverse()) +ExprList_reverse = function() .pr$Expr$list_reverse(self) #' Unique list -#' @name arr_unique +#' @name ExprList_unique #' @description #' Get the unique/distinct values in the list. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_unique arr.unique +#' @aliases list_unique #' @examples #' df = pl$DataFrame(list(a = list(1, 1, 2))) -#' df$select(pl$col("a")$arr$unique()) -ExprArr_unique = function() .pr$Expr$lst_unique(self) +#' df$select(pl$col("a")$list$unique()) +ExprList_unique = function() .pr$Expr$list_unique(self) #' concat another list #' @description Concat the arrays in a Series dtype List in linear time. #' @param other Rlist, Expr or column of same type as self. -#' @name arr_concat -#' @keywords ExprArr +#' @name ExprList_concat +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_concat arr.concat +#' @aliases list_concat #' @examples #' df = pl$DataFrame( #' a = list("a", "x"), #' b = list(c("b", "c"), c("y", "z")) #' ) -#' df$select(pl$col("a")$arr$concat(pl$col("b"))) +#' df$select(pl$col("a")$list$concat(pl$col("b"))) #' -#' df$select(pl$col("a")$arr$concat("hello from R")) +#' df$select(pl$col("a")$list$concat("hello from R")) #' -#' df$select(pl$col("a")$arr$concat(list("hello", c("hello", "world")))) -ExprArr_concat = function(other) { +#' df$select(pl$col("a")$list$concat(list("hello", c("hello", "world")))) +ExprList_concat = function(other) { pl$concat_list(list(self, other)) } #' Get list -#' @name arr_get +#' @name ExprList_get #' @description Get the value by index in the sublists. #' @param index numeric vector or Expr of length 1 or same length of Series. #' if length 1 pick same value from each sublist, if length as Series/column, @@ -138,220 +138,220 @@ ExprArr_concat = function(other) { #' So index `0` would return the first item of every sublist #' and index `-1` would return the last item of every sublist #' if an index is out of bounds, it will return a `None`. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases Expr_arr_get Expr_arr.get +#' @aliases Expr_list_get #' @examples #' df = pl$DataFrame(list(a = list(3:1, NULL, 1:2))) # NULL or integer() or list() -#' df$select(pl$col("a")$arr$get(0)) -#' df$select(pl$col("a")$arr$get(c(2, 0, -1))) -ExprArr_get = function(index) .pr$Expr$lst_get(self, wrap_e(index, str_to_lit = FALSE)) +#' df$select(pl$col("a")$list$get(0)) +#' df$select(pl$col("a")$list$get(c(2, 0, -1))) +ExprList_get = function(index) .pr$Expr$list_get(self, wrap_e(index, str_to_lit = FALSE)) #' Get list -#' @rdname arr_get +#' @rdname list_get #' @export -#' @param x ExprArrNameSpace +#' @param x ExprListNameSpace #' @param index value to get #' @details -#' `[.ExprArrNameSpace` used as e.g. `pl$col("a")$arr[0]` same as `pl$col("a")$get(0)` +#' `[.ExprListNameSpace` used as e.g. `pl$col("a")$arr[0]` same as `pl$col("a")$get(0)` #' @examples #' df = pl$DataFrame(list(a = list(3:1, NULL, 1:2))) # NULL or integer() or list() -#' df$select(pl$col("a")$arr[0]) -#' df$select(pl$col("a")$arr[c(2, 0, -1)]) -`[.ExprArrNameSpace` = function(x, index) { # S3 sub class-name set in zzz.R +#' df$select(pl$col("a")$list[0]) +#' df$select(pl$col("a")$list[c(2, 0, -1)]) +`[.ExprListNameSpace` = function(x, index) { # S3 sub class-name set in zzz.R x$get(index) } #' take in sublists -#' @name arr_take +#' @name ExprList_take #' @description Get the take value of the sublists. -#' @keywords ExprArr +#' @keywords ExprList #' @param index R list of integers for each sub-element or Expr or Series of type `List[usize]` #' @param null_on_oob boolean #' @format function #' @return Expr -#' @aliases arr_take arr.take +#' @aliases list_take #' @examples #' df = pl$DataFrame(list(a = list(c(3, 2, 1), 1, c(1, 2)))) # #' idx = pl$Series(list(0:1, integer(), c(1L, 999L))) -#' df$select(pl$col("a")$arr$take(pl$lit(idx), null_on_oob = TRUE)) +#' df$select(pl$col("a")$list$take(pl$lit(idx), null_on_oob = TRUE)) #' #' # with implicit conversion to Expr -#' df$select(pl$col("a")$arr$take(list(0:1, integer(), c(1L, 999L)), null_on_oob = TRUE)) +#' df$select(pl$col("a")$list$take(list(0:1, integer(), c(1L, 999L)), null_on_oob = TRUE)) #' #' # by some column name, must cast to an Int/Uint type to work -#' df$select(pl$col("a")$arr$take(pl$col("a")$cast(pl$List(pl$UInt64)), null_on_oob = TRUE)) -ExprArr_take = function(index, null_on_oob = FALSE) { +#' df$select(pl$col("a")$list$take(pl$col("a")$cast(pl$List(pl$UInt64)), null_on_oob = TRUE)) +ExprList_take = function(index, null_on_oob = FALSE) { expr = wrap_e(index, str_to_lit = FALSE) - .pr$Expr$lst_take(self, expr, null_on_oob) |> + .pr$Expr$list_take(self, expr, null_on_oob) |> unwrap("in $take()") } #' First in sublists -#' @name arr_first +#' @name ExprList_first #' @description Get the first value of the sublists. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_first arr.first +#' @aliases list_first #' @examples #' df = pl$DataFrame(list(a = list(3:1, NULL, 1:2))) # NULL or integer() or list() -#' df$select(pl$col("a")$arr$first()) -ExprArr_first = function(index) .pr$Expr$lst_get(self, wrap_e(0L, str_to_lit = FALSE)) +#' df$select(pl$col("a")$list$first()) +ExprList_first = function(index) .pr$Expr$list_get(self, wrap_e(0L, str_to_lit = FALSE)) #' Last in sublists -#' @name arr_last +#' @name ExprList_last #' @description Get the last value of the sublists. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_last arr.last +#' @aliases list_last #' @examples #' df = pl$DataFrame(list(a = list(3:1, NULL, 1:2))) # NULL or integer() or list() -#' df$select(pl$col("a")$arr$last()) -ExprArr_last = function(index) .pr$Expr$lst_get(self, wrap_e(-1L, str_to_lit = FALSE)) +#' df$select(pl$col("a")$list$last()) +ExprList_last = function(index) .pr$Expr$list_get(self, wrap_e(-1L, str_to_lit = FALSE)) #' Sublists contains -#' @name arr_contains +#' @name ExprList_contains #' @description Check if sublists contain the given item. #' @param item any into Expr/literal -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr of a boolean mask -#' @aliases arr_contains arr.contains +#' @aliases list_contains #' @examples #' df = pl$DataFrame(list(a = list(3:1, NULL, 1:2))) # NULL or integer() or list() -#' df$select(pl$col("a")$arr$contains(1L)) -ExprArr_contains = function(other) .pr$Expr$arr_contains(self, wrap_e(other)) +#' df$select(pl$col("a")$list$contains(1L)) +ExprList_contains = function(other) .pr$Expr$list_contains(self, wrap_e(other)) #' Join sublists -#' @name arr_join +#' @name ExprList_join #' @description #' Join all string items in a sublist and place a separator between them. #' This errors if inner type of list `!= Utf8`. #' @param separator string to separate the items with -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Series of dtype Utf8 -#' @aliases arr_join arr.join +#' @aliases list_join #' @examples #' df = pl$DataFrame(list(s = list(c("a", "b", "c"), c("x", "y")))) -#' df$select(pl$col("s")$arr$join(" ")) -ExprArr_join = function(separator) .pr$Expr$lst_join(self, separator) +#' df$select(pl$col("s")$list$join(" ")) +ExprList_join = function(separator) .pr$Expr$list_join(self, separator) #' Arg min sublists -#' @name arr_arg_min +#' @name ExprList_arg_min #' @description Retrieve the index of the minimal value in every sublist. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_arg_min arr.arg_min +#' @aliases list_arg_min #' @examples #' df = pl$DataFrame(list(s = list(1:2, 2:1))) -#' df$select(pl$col("s")$arr$arg_min()) -ExprArr_arg_min = function() .pr$Expr$lst_arg_min(self) +#' df$select(pl$col("s")$list$arg_min()) +ExprList_arg_min = function() .pr$Expr$list_arg_min(self) #' Arg max sublists -#' @name arr_arg_max +#' @name ExprList_arg_max #' @description Retrieve the index of the maximum value in every sublist. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases Expr_arr_arg_max Expr_arr.arg_max +#' @aliases Expr_list_arg_max #' @examples #' df = pl$DataFrame(list(s = list(1:2, 2:1))) -#' df$select(pl$col("s")$arr$arg_max()) -ExprArr_arg_max = function() .pr$Expr$lst_arg_max(self) +#' df$select(pl$col("s")$list$arg_max()) +ExprList_arg_max = function() .pr$Expr$list_arg_max(self) ## TODO contribute polars support negative n values for Diff sublist #' Diff sublists -#' @name arr_diff +#' @name ExprList_diff #' @description Calculate the n-th discrete difference of every sublist. #' @param n Number of slots to shift #' @param null_behavior choice "ignore"(default) "drop" -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases Expr_arr_diff Expr_arr.diff +#' @aliases Expr_list_diff #' @examples #' df = pl$DataFrame(list(s = list(1:4, c(10L, 2L, 1L)))) -#' df$select(pl$col("s")$arr$diff()) -ExprArr_diff = function(n = 1, null_behavior = "ignore") { - unwrap(.pr$Expr$lst_diff(self, n, null_behavior)) +#' df$select(pl$col("s")$list$diff()) +ExprList_diff = function(n = 1, null_behavior = "ignore") { + unwrap(.pr$Expr$list_diff(self, n, null_behavior)) } #' Shift sublists -#' @name arr_shift +#' @name ExprList_shift #' @description Shift values by the given period. #' @param periods Value. Number of places to shift (may be negative). -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_shift arr.shift +#' @aliases list_shift #' @examples #' df = pl$DataFrame(list(s = list(1:4, c(10L, 2L, 1L)))) -#' df$select(pl$col("s")$arr$shift()) -ExprArr_shift = function(periods = 1) unwrap(.pr$Expr$lst_shift(self, periods)) +#' df$select(pl$col("s")$list$shift()) +ExprList_shift = function(periods = 1) unwrap(.pr$Expr$list_shift(self, periods)) #' Slice sublists -#' @name arr_slice +#' @name ExprList_slice #' @description Slice every sublist. #' @param offset value or Expr. Start index. Negative indexing is supported. #' @param length value or Expr. #' Length of the slice. If set to ``None`` (default), the slice is taken to the #' end of the list. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_slice arr.slice +#' @aliases list_slice #' @examples #' df = pl$DataFrame(list(s = list(1:4, c(10L, 2L, 1L)))) -#' df$select(pl$col("s")$arr$slice(2)) -ExprArr_slice = function(offset, length = NULL) { +#' df$select(pl$col("s")$list$slice(2)) +ExprList_slice = function(offset, length = NULL) { offset = wrap_e(offset, str_to_lit = FALSE) if (!is.null(length)) { length = wrap_e(length, str_to_lit = FALSE) } - .pr$Expr$lst_slice(self, offset, length) + .pr$Expr$list_slice(self, offset, length) } # TODO contribute polars let head and tail support negative indicies also regular head tail #' Heads of sublists -#' @name arr_head +#' @name ExprList_head #' @description head the first `n` values of every sublist. #' @param n Numeric or Expr, number of values to return for each sublist. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_head arr.head +#' @aliases list_head #' @examples #' df = pl$DataFrame(list(a = list(1:4, c(10L, 2L, 1L)))) -#' df$select(pl$col("a")$arr$head(2)) -ExprArr_head = function(n = 5L) { - self$arr$slice(0L, n) +#' df$select(pl$col("a")$list$head(2)) +ExprList_head = function(n = 5L) { + self$list$slice(0L, n) } #' Tails of sublists -#' @name arr_tail +#' @name ExprList_tail #' @description tail the first `n` values of every sublist. #' @param n Numeric or Expr, number of values to return for each sublist. -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_tail arr.tail +#' @aliases list_tail #' @examples #' df = pl$DataFrame(list(a = list(1:4, c(10L, 2L, 1L)))) -#' df$select(pl$col("a")$arr$tail(2)) -ExprArr_tail = function(n = 5L) { +#' df$select(pl$col("a")$list$tail(2)) +ExprList_tail = function(n = 5L) { offset = -wrap_e(n, str_to_lit = FALSE) - self$arr$slice(offset, n) + self$list$slice(offset, n) } @@ -374,20 +374,20 @@ ExprArr_tail = function(n = 5L) { #' the current schema to determine which columns to select. #' It is advised to set this value in a lazy query. #' -#' @name arr_to_struct -#' @keywords ExprArr +#' @name ExprList_to_struct +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_to_struct arr.to_struct +#' @aliases list_to_struct #' @examples #' df = pl$DataFrame(list(a = list(1:3, 1:2))) -#' df2 = df$select(pl$col("a")$arr$to_struct( +#' df2 = df$select(pl$col("a")$list$to_struct( #' name_generator = \(idx) paste0("hello_you_", idx) #' )) #' df2$unnest() #' #' df2$to_list() -ExprArr_to_struct = function( +ExprList_to_struct = function( n_field_strategy = "first_non_null", name_generator = NULL, upper_bound = 0) { # extendr_concurrent now only supports series communication, wrap out of series # wrapped into series on rust side @@ -403,13 +403,13 @@ ExprArr_to_struct = function( name_generator_wrapped = NULL } - unwrap(.pr$Expr$lst_to_struct( + unwrap(.pr$Expr$list_to_struct( self, n_field_strategy, name_generator_wrapped, upper_bound )) } #' eval sublists (kinda like lapply) -#' @name arr_eval +#' @name ExprList_eval #' @description Run any polars expression against the lists' elements. #' @param Expr Expression to run. Note that you can select an element with `pl$first()`, or #' `pl$col()` @@ -418,15 +418,15 @@ ExprArr_to_struct = function( #' Parallelism is worth it if there is enough work to do per thread. #' This likely should not be use in the groupby context, because we already #' parallel execution per group -#' @keywords ExprArr +#' @keywords ExprList #' @format function #' @return Expr -#' @aliases arr_eval arr.eval +#' @aliases list_eval #' @examples #' df = pl$DataFrame(a = list(c(1, 8, 3), b = c(4, 5, 2))) #' df$select(pl$all()$cast(pl$dtypes$Int64))$with_columns( -#' pl$concat_list(c("a", "b"))$arr$eval(pl$element()$rank())$alias("rank") +#' pl$concat_list(c("a", "b"))$list$eval(pl$element()$rank())$alias("rank") #' ) -ExprArr_eval = function(expr, parallel = FALSE) { - .pr$Expr$lst_eval(self, expr, parallel) +ExprList_eval = function(expr, parallel = FALSE) { + .pr$Expr$list_eval(self, expr, parallel) } diff --git a/R/expr__string.R b/R/expr__string.R index 67f5f5894..76443a39e 100644 --- a/R/expr__string.R +++ b/R/expr__string.R @@ -151,7 +151,7 @@ ExprStr_n_chars = function() { #' #' # Series list of strings to Series of concatenated strings #' df = pl$DataFrame(list(bar = list(c("a", "b", "c"), c("1", "2", NA)))) -#' df$select(pl$col("bar")$arr$eval(pl$col()$str$concat())$arr$first()) +#' df$select(pl$col("bar")$list$eval(pl$col()$str$concat())$list$first()) ExprStr_concat = function(delimiter = "-") { .pr$Expr$str_concat(self, delimiter) } diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 84ce9b42b..2152b2f7d 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -587,43 +587,43 @@ Expr$implode <- function() .Call(wrap__Expr__implode, self) Expr$shrink_dtype <- function() .Call(wrap__Expr__shrink_dtype, self) -Expr$arr_lengths <- function() .Call(wrap__Expr__arr_lengths, self) +Expr$list_lengths <- function() .Call(wrap__Expr__list_lengths, self) -Expr$arr_contains <- function(other) .Call(wrap__Expr__arr_contains, self, other) +Expr$list_contains <- function(other) .Call(wrap__Expr__list_contains, self, other) -Expr$lst_max <- function() .Call(wrap__Expr__lst_max, self) +Expr$list_max <- function() .Call(wrap__Expr__list_max, self) -Expr$lst_min <- function() .Call(wrap__Expr__lst_min, self) +Expr$list_min <- function() .Call(wrap__Expr__list_min, self) -Expr$lst_sum <- function() .Call(wrap__Expr__lst_sum, self) +Expr$list_sum <- function() .Call(wrap__Expr__list_sum, self) -Expr$lst_mean <- function() .Call(wrap__Expr__lst_mean, self) +Expr$list_mean <- function() .Call(wrap__Expr__list_mean, self) -Expr$lst_sort <- function(descending) .Call(wrap__Expr__lst_sort, self, descending) +Expr$list_sort <- function(descending) .Call(wrap__Expr__list_sort, self, descending) -Expr$lst_reverse <- function() .Call(wrap__Expr__lst_reverse, self) +Expr$list_reverse <- function() .Call(wrap__Expr__list_reverse, self) -Expr$lst_unique <- function() .Call(wrap__Expr__lst_unique, self) +Expr$list_unique <- function() .Call(wrap__Expr__list_unique, self) -Expr$lst_take <- function(index, null_on_oob) .Call(wrap__Expr__lst_take, self, index, null_on_oob) +Expr$list_take <- function(index, null_on_oob) .Call(wrap__Expr__list_take, self, index, null_on_oob) -Expr$lst_get <- function(index) .Call(wrap__Expr__lst_get, self, index) +Expr$list_get <- function(index) .Call(wrap__Expr__list_get, self, index) -Expr$lst_join <- function(separator) .Call(wrap__Expr__lst_join, self, separator) +Expr$list_join <- function(separator) .Call(wrap__Expr__list_join, self, separator) -Expr$lst_arg_min <- function() .Call(wrap__Expr__lst_arg_min, self) +Expr$list_arg_min <- function() .Call(wrap__Expr__list_arg_min, self) -Expr$lst_arg_max <- function() .Call(wrap__Expr__lst_arg_max, self) +Expr$list_arg_max <- function() .Call(wrap__Expr__list_arg_max, self) -Expr$lst_diff <- function(n, null_behavior) .Call(wrap__Expr__lst_diff, self, n, null_behavior) +Expr$list_diff <- function(n, null_behavior) .Call(wrap__Expr__list_diff, self, n, null_behavior) -Expr$lst_shift <- function(periods) .Call(wrap__Expr__lst_shift, self, periods) +Expr$list_shift <- function(periods) .Call(wrap__Expr__list_shift, self, periods) -Expr$lst_slice <- function(offset, length) .Call(wrap__Expr__lst_slice, self, offset, length) +Expr$list_slice <- function(offset, length) .Call(wrap__Expr__list_slice, self, offset, length) -Expr$lst_eval <- function(expr, parallel) .Call(wrap__Expr__lst_eval, self, expr, parallel) +Expr$list_eval <- function(expr, parallel) .Call(wrap__Expr__list_eval, self, expr, parallel) -Expr$lst_to_struct <- function(width_strat, name_gen, upper_bound) .Call(wrap__Expr__lst_to_struct, self, width_strat, name_gen, upper_bound) +Expr$list_to_struct <- function(width_strat, name_gen, upper_bound) .Call(wrap__Expr__list_to_struct, self, width_strat, name_gen, upper_bound) Expr$str_to_date <- function(format, strict, exact, cache, use_earliest) .Call(wrap__Expr__str_to_date, self, format, strict, exact, cache, use_earliest) diff --git a/R/series__series.R b/R/series__series.R index 5b1523e7b..a1beade97 100644 --- a/R/series__series.R +++ b/R/series__series.R @@ -960,20 +960,49 @@ Series_is_numeric = function() { } -#' arr: list related methods on Series of dtype List +#' arr: list related methods on Series of dtype List DEPRECATED #' @description -#' Create an object namespace of all list related methods. -#' See the individual method pages for full details +#' DEPRECATED AND REMOVED FROM polars 0.9.0 use `$list$` instead #' @keywords Series -#' @return Expr +#' @return Series #' @aliases Series_arr +#' @seealso \code{\link[=Series_list]{$list$...}} #' @examples #' s = pl$Series(list(1:3, 1:2, NULL)) #' s #' s$arr$first() Series_arr = method_as_property(function() { + if (!isTRUE(runtime_state$warned_deprecate_sns_arr_series)) { + warning( + "in $arr$: ", + "`$arr$...` is deprecated since 0.8.1 and removed from polars 0.9.0. ", + "Use `$list$` instead. It is only a renaming to match py-polars renaming.", + call. = FALSE + ) + runtime_state$warned_deprecate_sns_arr_series = TRUE + } + df = pl$DataFrame(self) + arr = expr_list_make_sub_ns(pl$col(self$name)) + lapply(arr, \(f) { + \(...) df$select(f(...)) + }) +}) + + +#' list: list related methods on Series of dtype List +#' @description +#' Create an object namespace of all list related methods. +#' See the individual method pages for full details +#' @keywords Series +#' @return Series +#' @aliases Series_list +#' @examples +#' s = pl$Series(list(1:3, 1:2, NULL)) +#' s +#' s$list$first() +Series_list = method_as_property(function() { df = pl$DataFrame(self) - arr = expr_arr_make_sub_ns(pl$col(self$name)) + arr = expr_list_make_sub_ns(pl$col(self$name)) lapply(arr, \(f) { \(...) df$select(f(...)) }) @@ -1044,7 +1073,7 @@ Series_expr = method_as_property(function() { #' pl$Series(list(1:1, 1:2, 1:3, 1:4)) #' $print() #' $to_lit() -#' $arr$lengths() +#' $list$lengths() #' $sum() #' $cast(pl$dtypes$Int8) #' $lit_to_s() diff --git a/R/zzz.R b/R/zzz.R index e8967a913..92162aa9a 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -28,8 +28,8 @@ replace_private_with_pub_methods(Expr, "^Expr_") # configure subnames spaces of Expr #' @export -`$.ExprArrNameSpace` = sub_name_space_accessor_function -expr_arr_make_sub_ns = macro_new_subnamespace("^ExprArr_", "ExprArrNameSpace") +`$.ExprListNameSpace` = sub_name_space_accessor_function +expr_list_make_sub_ns = macro_new_subnamespace("^ExprList_", "ExprListNameSpace") #' @export `$.ExprStrNameSpace` = sub_name_space_accessor_function diff --git a/docs/make-docs.R b/docs/make-docs.R index ac19396c0..d9818f811 100644 --- a/docs/make-docs.R +++ b/docs/make-docs.R @@ -134,7 +134,7 @@ make_doc_hierarchy = function() { # order determines order in sidebar classes = c( "pl", "Series", "DataFrame", "LazyFrame", "GroupBy", - "LazyGroupBy", "arr", "ExprBin", "ExprDT", "ExprMeta", "ExprStr", "ExprStruct", + "LazyGroupBy", "ExprList", "ExprBin", "ExprDT", "ExprMeta", "ExprStr", "ExprStruct", "Expr", "RThreadHandle" ) for (cl in classes) { @@ -147,7 +147,7 @@ make_doc_hierarchy = function() { # expr: nested nam = c( "Expr" = "All others", - "arr" = "Array", + "ExprList" = "List", "ExprBin" = "Binary", "ExprDT" = "DateTime", "ExprMeta" = "Meta", diff --git a/man/DataFrame_clone.Rd b/man/DataFrame_clone.Rd index 5014dc64b..3ce4c224d 100644 --- a/man/DataFrame_clone.Rd +++ b/man/DataFrame_clone.Rd @@ -19,6 +19,5 @@ df2 = df1$clone() df3 = df1 pl$mem_address(df1) != pl$mem_address(df2) pl$mem_address(df1) == pl$mem_address(df3) - } \keyword{DataFrame} diff --git a/man/DataFrame_describe.Rd b/man/DataFrame_describe.Rd index 1cabeda53..421171690 100644 --- a/man/DataFrame_describe.Rd +++ b/man/DataFrame_describe.Rd @@ -14,7 +14,9 @@ All values must be in the range \verb{[0; 1]}.} DataFrame } \description{ -Summary statistics for a DataFrame +This returns the total number of rows, the number of missing +values, the mean, standard deviation, min, max, median and the percentiles +specified in the argument \code{percentiles}. } \examples{ pl$DataFrame(iris)$describe() diff --git a/man/DataFrame_drop.Rd b/man/DataFrame_drop.Rd index a5ccce0b1..b33481ab5 100644 --- a/man/DataFrame_drop.Rd +++ b/man/DataFrame_drop.Rd @@ -7,8 +7,8 @@ DataFrame_drop(columns) } \arguments{ -\item{columns}{A character vector containing the names of the column(s) to -remove from the DataFrame.} +\item{columns}{A character vector with the names of the column(s) to remove +from the DataFrame.} } \value{ DataFrame diff --git a/man/DataFrame_drop_nulls.Rd b/man/DataFrame_drop_nulls.Rd index aef196e4c..4cdde8770 100644 --- a/man/DataFrame_drop_nulls.Rd +++ b/man/DataFrame_drop_nulls.Rd @@ -2,25 +2,31 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_drop_nulls} \alias{DataFrame_drop_nulls} -\title{Drop nulls} +\title{Drop nulls (missing values)} \usage{ DataFrame_drop_nulls(subset = NULL) } \arguments{ -\item{subset}{string or vector of strings. Column name(s) for which null values are considered. If set to NULL (default), use all columns.} +\item{subset}{A character vector with the names of the column(s) for which +nulls are considered. If \code{NULL} (default), use all columns.} } \value{ DataFrame } \description{ -Drop all rows that contain null values. +Drop all rows that contain nulls (which correspond to \code{NA} in R). } \examples{ tmp = mtcars tmp[1:3, "mpg"] = NA tmp[4, "hp"] = NA -pl$DataFrame(tmp)$drop_nulls()$height -pl$DataFrame(tmp)$drop_nulls("mpg")$height -pl$DataFrame(tmp)$drop_nulls(c("mpg", "hp"))$height +tmp = pl$DataFrame(tmp) + +# number of rows in `tmp` before dropping nulls +tmp$height + +tmp$drop_nulls()$height +tmp$drop_nulls("mpg")$height +tmp$drop_nulls(c("mpg", "hp"))$height } \keyword{DataFrame} diff --git a/man/DataFrame_dtype_strings.Rd b/man/DataFrame_dtype_strings.Rd index c6a264ea7..9a356aa3c 100644 --- a/man/DataFrame_dtype_strings.Rd +++ b/man/DataFrame_dtype_strings.Rd @@ -2,15 +2,17 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_dtype_strings} \alias{DataFrame_dtype_strings} -\title{DataFrame dtype strings} +\title{Data types information} \usage{ DataFrame_dtype_strings } \value{ -string vector +A character vector with the data type of each column } \description{ -Get column types as strings. +Get the data type of all columns as strings. You can see all +available types with \code{names(pl$dtypes)}. The data type of each column is also +shown when printing the DataFrame. } \examples{ pl$DataFrame(iris)$dtype_strings() diff --git a/man/DataFrame_dtypes.Rd b/man/DataFrame_dtypes.Rd index 1903f8784..c43403bb9 100644 --- a/man/DataFrame_dtypes.Rd +++ b/man/DataFrame_dtypes.Rd @@ -3,28 +3,25 @@ \name{DataFrame_dtypes} \alias{DataFrame_dtypes} \alias{DataFrame_schema} -\title{DataFrame dtypes} +\title{Data types information} \usage{ DataFrame_dtypes() DataFrame_schema() } \value{ -width as numeric scalar - -width as numeric scalar +\verb{$dtypes} returns an unnamed list with the data type of each column. +\verb{$schema} returns a named list with the column names and the data type of +each column. } \description{ -Get the data types of columns in DataFrame. -Data types can also be found in column headers when printing the DataFrame. - -Get dtypes of columns in DataFrame. -Dtypes can also be found in column headers when printing the DataFrame. +Get the data type of all columns. You can see all available +types with \code{names(pl$dtypes)}. The data type of each column is also shown +when printing the DataFrame. } \examples{ pl$DataFrame(iris)$dtypes pl$DataFrame(iris)$schema - } \keyword{DataFrame} diff --git a/man/DataFrame_estimated_size.Rd b/man/DataFrame_estimated_size.Rd index 3b8a7d483..0cd256025 100644 --- a/man/DataFrame_estimated_size.Rd +++ b/man/DataFrame_estimated_size.Rd @@ -10,10 +10,11 @@ function DataFrame_estimated_size } \value{ -Bytes +Estimated size in bytes } \description{ -Return an estimation of the total (heap) allocated size of the DataFrame. +Return an estimation of the total (heap) allocated size of the +DataFrame. } \examples{ pl$DataFrame(mtcars)$estimated_size() diff --git a/man/DataFrame_fill_nan.Rd b/man/DataFrame_fill_nan.Rd index 4aeeaec18..ea0f87094 100644 --- a/man/DataFrame_fill_nan.Rd +++ b/man/DataFrame_fill_nan.Rd @@ -2,18 +2,18 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_fill_nan} \alias{DataFrame_fill_nan} -\title{Fill NaN} +\title{Fill \code{NaN}} \usage{ DataFrame_fill_nan(fill_value) } \arguments{ -\item{fill_value}{Value to fill NaN with.} +\item{fill_value}{Value to fill \code{NaN} with.} } \value{ DataFrame } \description{ -Fill floating point NaN values by an Expression evaluation. +Fill \code{NaN} values by an Expression evaluation. } \examples{ df = pl$DataFrame( diff --git a/man/DataFrame_fill_null.Rd b/man/DataFrame_fill_null.Rd index 234211d46..8e6dc818d 100644 --- a/man/DataFrame_fill_null.Rd +++ b/man/DataFrame_fill_null.Rd @@ -2,23 +2,28 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_fill_null} \alias{DataFrame_fill_null} -\title{Fill null} +\title{Fill nulls} \usage{ DataFrame_fill_null(fill_value) } \arguments{ -\item{fill_value}{Value to fill \code{NA} with.} +\item{fill_value}{Value to fill nulls with.} } \value{ DataFrame } \description{ -Fill null values using the specified value or strategy. +Fill null values (which correspond to \code{NA} in R) using the +specified value or strategy. } \examples{ -pl$DataFrame( +df = pl$DataFrame( a = c(1.5, 2, NA, 4), b = c(1.5, NA, NA, 4) -)$fill_null(99) +) + +df$fill_null(99) + +df$fill_null(pl$col("a")$mean()) } \keyword{DataFrame} diff --git a/man/DataFrame_filter.Rd b/man/DataFrame_filter.Rd index 23071ad1a..feb895424 100644 --- a/man/DataFrame_filter.Rd +++ b/man/DataFrame_filter.Rd @@ -2,20 +2,30 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_filter} \alias{DataFrame_filter} -\title{filter DataFrame} +\title{Filter rows of a DataFrame} \usage{ DataFrame_filter(bool_expr) } \arguments{ -\item{bool_expr}{Polars expression which will evaluate to a bool pl$Series} +\item{bool_expr}{Polars expression which will evaluate to a boolean.} } \value{ -filtered DataFrame +A DataFrame with only the rows where the conditions are \code{TRUE}. } \description{ -DataFrame$filter(bool_expr) +This is equivalent to \code{dplyr::filter()}. Note that rows where +the condition returns \code{NA} are dropped, unlike base subsetting with \code{[}. } \examples{ -pl$DataFrame(iris)$lazy()$filter(pl$col("Sepal.Length") > 5)$collect() +df = pl$DataFrame(iris) + +df$filter(pl$col("Sepal.Length") > 5) + +# rows where condition is NA are dropped +iris2 = iris +iris2[c(1, 3, 5), "Species"] = NA +df = pl$DataFrame(iris2) + +df$filter(pl$col("Species") == "setosa") } \keyword{DataFrame} diff --git a/man/DataFrame_first.Rd b/man/DataFrame_first.Rd index 7f063e765..d28997d03 100644 --- a/man/DataFrame_first.Rd +++ b/man/DataFrame_first.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_first} \alias{DataFrame_first} -\title{First} +\title{Get the first row of the DataFrame.} \usage{ DataFrame_first() } \value{ -A new \code{DataFrame} object with applied filter. +A DataFrame with one row. } \description{ Get the first row of the DataFrame. diff --git a/man/DataFrame_frame_equal.Rd b/man/DataFrame_frame_equal.Rd index e49ecc517..b24505530 100644 --- a/man/DataFrame_frame_equal.Rd +++ b/man/DataFrame_frame_equal.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_frame_equal} \alias{DataFrame_frame_equal} -\title{Drop in place} +\title{Compare two DataFrames} \usage{ DataFrame_frame_equal(other) } @@ -10,10 +10,10 @@ DataFrame_frame_equal(other) \item{other}{DataFrame to compare with.} } \value{ -bool +A boolean. } \description{ -Check if DataFrame is equal to other. +Check if two DataFrames are equal. } \examples{ dat1 = pl$DataFrame(iris) diff --git a/man/DataFrame_get_column.Rd b/man/DataFrame_get_column.Rd index 4cf57a4fe..9418a7648 100644 --- a/man/DataFrame_get_column.Rd +++ b/man/DataFrame_get_column.Rd @@ -2,21 +2,21 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_get_column} \alias{DataFrame_get_column} -\title{Get Column (as one Series)} +\title{Get column (as one Series)} \usage{ DataFrame_get_column(name) } \arguments{ -\item{name}{name of column to extract as Series} +\item{name}{Name of the column to extract.} } \value{ Series } \description{ -get one column by name as series +Extract a DataFrame column as a Polars series. } \examples{ -df = pl$DataFrame(iris[1, ]) +df = pl$DataFrame(iris[1:2, ]) df$get_column("Species") } \keyword{DataFrame} diff --git a/man/DataFrame_get_columns.Rd b/man/DataFrame_get_columns.Rd index aa7a3151b..c4f054ff8 100644 --- a/man/DataFrame_get_columns.Rd +++ b/man/DataFrame_get_columns.Rd @@ -7,13 +7,13 @@ DataFrame_get_columns } \value{ -list of series +A list of series } \description{ -get columns as list of series +Extract all DataFrame columns as a list of Polars series. } \examples{ -df = pl$DataFrame(iris[1, ]) +df = pl$DataFrame(iris[1:2, ]) df$get_columns() } \keyword{DataFrame} diff --git a/man/DataFrame_glimpse.Rd b/man/DataFrame_glimpse.Rd index 44f83673e..aff837d39 100644 --- a/man/DataFrame_glimpse.Rd +++ b/man/DataFrame_glimpse.Rd @@ -9,7 +9,8 @@ DataFrame_glimpse(..., return_as_string = FALSE) \arguments{ \item{...}{not used} -\item{return_as_string}{Boolean (default \code{FALSE}). If \code{TRUE}, return the output as a string.} +\item{return_as_string}{Boolean (default \code{FALSE}). If \code{TRUE}, return the +output as a string.} } \value{ DataFrame diff --git a/man/DataFrame_groupby.Rd b/man/DataFrame_groupby.Rd index b243b7f64..f00809fd2 100644 --- a/man/DataFrame_groupby.Rd +++ b/man/DataFrame_groupby.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_groupby} \alias{DataFrame_groupby} -\title{groupby a DataFrame} +\title{Group a DataFrame} \usage{ DataFrame_groupby(..., maintain_order = pl$options$default_maintain_order()) } @@ -18,14 +18,17 @@ FALSE = slightly faster, but not deterministic order. Default is FALSE, can be c GroupBy (a DataFrame with special groupby methods like \verb{$agg()}) } \description{ -create GroupBy from DataFrame +This doesn't modify the data but only stores information about +the group structure. This structure can then be used by several functions +(\verb{$agg()}, \verb{$filter()}, etc.). } \examples{ gb = pl$DataFrame( foo = c("one", "two", "two", "one", "two"), bar = c(5, 3, 2, 4, 1) )$groupby("foo", maintain_order = TRUE) -print(gb) + +gb gb$agg( pl$col("bar")$sum()$suffix("_sum"), diff --git a/man/DataFrame_head.Rd b/man/DataFrame_head.Rd index ed474a0fe..757ad91dd 100644 --- a/man/DataFrame_head.Rd +++ b/man/DataFrame_head.Rd @@ -7,15 +7,15 @@ DataFrame_head(n) } \arguments{ -\item{n}{positive numeric or integer number not larger than 2^32} +\item{n}{Positive number not larger than 2^32.} } \value{ DataFrame } \description{ -Get the first n rows of the query. +Get the first \code{n} rows of the query. } \details{ -any number will converted to u32. Negative raises error +Any number will converted to u32. Negative raises error. } \keyword{DataFrame} diff --git a/man/DataFrame_height.Rd b/man/DataFrame_height.Rd index 913eb426d..756d405a1 100644 --- a/man/DataFrame_height.Rd +++ b/man/DataFrame_height.Rd @@ -4,18 +4,17 @@ \alias{DataFrame_height} \alias{height} \alias{nrow} -\title{Height of DataFrame} +\title{Number of rows of a DataFrame} \usage{ DataFrame_height() } \value{ -height as numeric +The number of rows of the DataFrame } \description{ -Get height(nrow) of DataFrame +Get the number of rows (height) of a DataFrame } \examples{ pl$DataFrame(iris)$height - } \keyword{DataFrame} diff --git a/man/DataFrame_join.Rd b/man/DataFrame_join.Rd index 151aaf708..71b12fb78 100644 --- a/man/DataFrame_join.Rd +++ b/man/DataFrame_join.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_join} \alias{DataFrame_join} -\title{join DataFrame with other DataFrame} +\title{Join DataFrames} \usage{ DataFrame_join( other, @@ -18,25 +18,29 @@ DataFrame_join( \arguments{ \item{other}{DataFrame} -\item{left_on}{names of columns in self LazyFrame, order should match. Type, see on param.} +\item{left_on, right_on}{Same as \code{on} but only for the left or the right +DataFrame. They must have the same length.} -\item{right_on}{names of columns in other LazyFrame, order should match. Type, see on param.} +\item{on}{Either a vector of column names or a list of expressions and/or +strings. Use \code{left_on} and \code{right_on} if the column names to match on are +different between the two DataFrames.} -\item{on}{named columns as char vector of named columns, or list of expressions and/or strings.} +\item{how}{One of the following methods: "inner", "left", "outer", "semi", +"anti", "cross".} -\item{how}{a string selecting one of the following methods: inner, left, outer, semi, anti, cross} +\item{suffix}{Suffix to add to duplicated column names.} -\item{suffix}{name to added right table} +\item{allow_parallel}{Boolean.} -\item{allow_parallel}{bool} - -\item{force_parallel}{bool} +\item{force_parallel}{Boolean.} } \value{ DataFrame } \description{ -join DataFrame with other DataFrame +This function can do both mutating joins (adding columns based on matching +observations, for example with \code{how = "left"}) and filtering joins (keeping +observations based on matching observations, for example with \code{how = "inner"}). } \examples{ # inner join by default @@ -48,6 +52,5 @@ df1$join(other = df2, on = "key") df1 = pl$DataFrame(x = letters[1:3]) df2 = pl$DataFrame(y = 1:4) df1$join(other = df2, how = "cross") - } \keyword{DataFrame} diff --git a/man/DataFrame_join_asof.Rd b/man/DataFrame_join_asof.Rd index 36fff327d..364ca0233 100644 --- a/man/DataFrame_join_asof.Rd +++ b/man/DataFrame_join_asof.Rd @@ -74,7 +74,7 @@ computation of both DataFrames up to the join in parallel.} DataFrames up to the join in parallel.} } \value{ -new joined DataFrame +New joined DataFrame } \description{ Perform an asof join. @@ -95,7 +95,7 @@ For each row in the left DataFrame: The default is "backward". } \examples{ -# create two DataFrame to join asof +# create two DataFrames to join asof gdp = pl$DataFrame( date = as.Date(c("2015-1-1", "2016-1-1", "2017-5-1", "2018-1-1", "2019-1-1")), gdp = c(4321, 4164, 4411, 4566, 4696), diff --git a/man/DataFrame_last.Rd b/man/DataFrame_last.Rd index 6b2e0503a..6057763db 100644 --- a/man/DataFrame_last.Rd +++ b/man/DataFrame_last.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_last} \alias{DataFrame_last} -\title{Last} +\title{Get the last row of the DataFrame.} \usage{ DataFrame_last() } \value{ -A new \code{DataFrame} object with applied filter. +A DataFrame with one row. } \description{ Get the last row of the DataFrame. diff --git a/man/DataFrame_lazy.Rd b/man/DataFrame_lazy.Rd index 8702adb39..debe9af80 100644 --- a/man/DataFrame_lazy.Rd +++ b/man/DataFrame_lazy.Rd @@ -15,7 +15,6 @@ Start a new lazy query from a DataFrame. } \examples{ pl$DataFrame(iris)$lazy() - } \keyword{DataFrame} \keyword{LazyFrame_new} diff --git a/man/DataFrame_limit.Rd b/man/DataFrame_limit.Rd index ba6dc9b1f..a0916903f 100644 --- a/man/DataFrame_limit.Rd +++ b/man/DataFrame_limit.Rd @@ -7,7 +7,7 @@ DataFrame_limit(n) } \arguments{ -\item{n}{Positive numeric or integer number not larger than 2^32} +\item{n}{Positive number not larger than 2^32.} } \value{ DataFrame @@ -16,10 +16,9 @@ DataFrame Take some maximum number of rows. } \details{ -Any number will converted to u32. +Any number will converted to u32. Negative raises error. } \examples{ pl$DataFrame(iris)$limit(6) - } \keyword{DataFrame} diff --git a/man/DataFrame_max.Rd b/man/DataFrame_max.Rd index 5e9145ee0..9f38ab0bf 100644 --- a/man/DataFrame_max.Rd +++ b/man/DataFrame_max.Rd @@ -7,7 +7,7 @@ DataFrame_max() } \value{ -A new \code{DataFrame} object with applied aggregation. +A DataFrame with one row. } \description{ Aggregate the columns in the DataFrame to their maximum value. diff --git a/man/DataFrame_mean.Rd b/man/DataFrame_mean.Rd index 8361ffb78..ff21ca28d 100644 --- a/man/DataFrame_mean.Rd +++ b/man/DataFrame_mean.Rd @@ -7,7 +7,7 @@ DataFrame_mean() } \value{ -A new \code{DataFrame} object with applied aggregation. +A DataFrame with one row. } \description{ Aggregate the columns in the DataFrame to their mean value. diff --git a/man/DataFrame_median.Rd b/man/DataFrame_median.Rd index 5dc01f63f..7a9336056 100644 --- a/man/DataFrame_median.Rd +++ b/man/DataFrame_median.Rd @@ -7,7 +7,7 @@ DataFrame_median() } \value{ -A new \code{DataFrame} object with applied aggregation. +A DataFrame with one row. } \description{ Aggregate the columns in the DataFrame to their median value. diff --git a/man/DataFrame_melt.Rd b/man/DataFrame_melt.Rd index ba27aae92..13d101fae 100644 --- a/man/DataFrame_melt.Rd +++ b/man/DataFrame_melt.Rd @@ -39,8 +39,9 @@ two non-identifier columns, 'variable' and 'value'. df = pl$DataFrame( a = c("x", "y", "z"), b = c(1, 3, 5), - c = c(2, 4, 6) + c = c(2, 4, 6), + d = c(7, 8, 9) ) -df$melt(id_vars = "a", value_vars = c("b", "c")) +df$melt(id_vars = "a", value_vars = c("b", "c", "d")) } \keyword{DataFrame} diff --git a/man/DataFrame_min.Rd b/man/DataFrame_min.Rd index 0c0b42319..a23ab0871 100644 --- a/man/DataFrame_min.Rd +++ b/man/DataFrame_min.Rd @@ -7,7 +7,7 @@ DataFrame_min() } \value{ -A new \code{DataFrame} object with applied aggregation. +A DataFrame with one row. } \description{ Aggregate the columns in the DataFrame to their minimum value. diff --git a/man/DataFrame_null_count.Rd b/man/DataFrame_null_count.Rd index e3c124396..2bf5f838b 100644 --- a/man/DataFrame_null_count.Rd +++ b/man/DataFrame_null_count.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_null_count} \alias{DataFrame_null_count} -\title{Null count} +\title{Count null values} \format{ function } @@ -13,7 +13,8 @@ DataFrame_null_count DataFrame } \description{ -Create a new DataFrame that shows the null counts per column. +Create a new DataFrame that shows the null (which correspond +to \code{NA} in R) counts per column. } \examples{ x = mtcars diff --git a/man/DataFrame_pivot.Rd b/man/DataFrame_pivot.Rd index 4cf5f3c28..8fe7a1c02 100644 --- a/man/DataFrame_pivot.Rd +++ b/man/DataFrame_pivot.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_pivot} \alias{DataFrame_pivot} -\title{Create a spreadsheet-style pivot table as a DataFrame.} +\title{Pivot data from long to wide} \usage{ DataFrame_pivot( values, @@ -15,20 +15,27 @@ DataFrame_pivot( ) } \arguments{ -\item{values}{Column values to aggregate. Can be multiple columns if the \code{columns} -arguments contains multiple columns as well.} +\item{values}{Column values to aggregate. Can be multiple columns if the +\code{columns} arguments contains multiple columns as well.} \item{index}{One or multiple keys to group by.} -\item{columns}{Name of the column(s) whose values will be used as the header of the output -DataFrame.} +\item{columns}{Name of the column(s) whose values will be used as the header +of the output DataFrame.} -\item{aggregate_function}{String naming Expr to aggregate with, or an Expr e.g. \code{pl$element()$sum()}, -examples of strings:'first', 'sum', 'max', 'min', 'mean', 'median', 'last', 'count'} +\item{aggregate_function}{One of: +\itemize{ +\item string indicating the expressions to aggregate with, such as 'first', +'sum', 'max', 'min', 'mean', 'median', 'last', 'count'), +\item an Expr e.g. \code{pl$element()$sum()} +}} -\item{maintain_order}{Sort the grouped keys so that the output order is predictable.} +\item{maintain_order}{Keep the same order as the original \code{DataFrame}. Setting +this to \code{TRUE} makes it more expensive to compute and blocks the possibility +to run on the streaming engine.} -\item{sort_columns}{Sort the transposed columns by name. Default is by order of discovery.} +\item{sort_columns}{Sort the transposed columns by name. Default is by order +of discovery.} \item{separator}{Used as separator/delimiter in generated column names.} } @@ -36,7 +43,7 @@ examples of strings:'first', 'sum', 'max', 'min', 'mean', 'median', 'last', 'cou DataFrame } \description{ -Create a spreadsheet-style pivot table as a DataFrame. +Pivot data from long to wide } \examples{ df = pl$DataFrame( @@ -44,17 +51,20 @@ df = pl$DataFrame( bar = c("A", "B", "C", "A", "B", "C"), baz = c(1, 2, 3, 4, 5, 6) ) +df + df$pivot( - values = "baz", index = "foo", columns = "bar", aggregate_function = "first" + values = "baz", index = "foo", columns = "bar" ) - # Run an expression as aggregation function df = pl$DataFrame( col1 = c("a", "a", "a", "b", "b", "b"), col2 = c("x", "x", "x", "x", "y", "y"), col3 = c(6, 7, 3, 2, 5, 7) ) +df + df$pivot( index = "col1", columns = "col2", diff --git a/man/DataFrame_quantile.Rd b/man/DataFrame_quantile.Rd index 69e98e2fb..5b54b537a 100644 --- a/man/DataFrame_quantile.Rd +++ b/man/DataFrame_quantile.Rd @@ -7,15 +7,17 @@ DataFrame_quantile(quantile, interpolation = "nearest") } \arguments{ -\item{quantile}{numeric Quantile between 0.0 and 1.0.} +\item{quantile}{Numeric of length 1 between 0 and 1.} -\item{interpolation}{string Interpolation method: "nearest", "higher", "lower", "midpoint", or "linear".} +\item{interpolation}{Interpolation method: "nearest", "higher", "lower", +"midpoint", or "linear".} } \value{ DataFrame } \description{ -Aggregate the columns in the DataFrame to their quantile value. +Aggregate the columns in the DataFrame to a unique quantile +value. Use \verb{$describe()} to specify several quantiles. } \examples{ pl$DataFrame(mtcars)$quantile(.4) diff --git a/man/DataFrame_rename.Rd b/man/DataFrame_rename.Rd index f3d31d8bc..7f5345e0a 100644 --- a/man/DataFrame_rename.Rd +++ b/man/DataFrame_rename.Rd @@ -10,7 +10,7 @@ DataFrame_rename(...) \item{...}{One of the following: \itemize{ \item params like \code{new_name = "old_name"} to rename selected variables. -\item as above but, but params wrapped in a list +\item as above but with params wrapped in a list }} } \value{ @@ -20,7 +20,11 @@ DataFrame Rename columns of a DataFrame } \examples{ -pl$DataFrame(mtcars)$ - rename(miles_per_gallon = "mpg", horsepower = "hp") +df = pl$DataFrame(mtcars) + +df$rename(miles_per_gallon = "mpg", horsepower = "hp") + +replacements <- list(miles_per_gallon = "mpg", horsepower = "hp") +df$rename(replacements) } \keyword{DataFrame} diff --git a/man/DataFrame_reverse.Rd b/man/DataFrame_reverse.Rd index b4e8a7277..36d44b271 100644 --- a/man/DataFrame_reverse.Rd +++ b/man/DataFrame_reverse.Rd @@ -10,9 +10,8 @@ DataFrame_reverse() DataFrame } \description{ -Reverse the DataFrame. +Reverse the DataFrame (the last row becomes the first one, etc.). } \examples{ pl$DataFrame(mtcars)$reverse() } -\keyword{LazyFrame} diff --git a/man/DataFrame_select.Rd b/man/DataFrame_select.Rd index 4e7a90848..57b6f8345 100644 --- a/man/DataFrame_select.Rd +++ b/man/DataFrame_select.Rd @@ -18,7 +18,7 @@ DataFrame DataFrame } \description{ -Related to dplyr \code{mutate()}. However, it discards unmentioned +Similar to \code{dplyr::mutate()}. However, it discards unmentioned columns (like \code{.()} in \code{data.table}). } \examples{ diff --git a/man/DataFrame_shape.Rd b/man/DataFrame_shape.Rd index 295b63dbc..bfe6a710f 100644 --- a/man/DataFrame_shape.Rd +++ b/man/DataFrame_shape.Rd @@ -2,18 +2,18 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_shape} \alias{DataFrame_shape} -\title{Shape of DataFrame} +\title{Dimensions of a DataFrame} \usage{ DataFrame_shape() } \value{ -two length numeric vector of c(nrows,ncols) +Numeric vector of length two with the number of rows and the number +of columns. } \description{ Get shape/dimensions of DataFrame } \examples{ -df = pl$DataFrame(iris)$shape - +pl$DataFrame(iris)$shape } \keyword{DataFrame} diff --git a/man/DataFrame_shift.Rd b/man/DataFrame_shift.Rd index e43790ebe..7b7eee7c8 100644 --- a/man/DataFrame_shift.Rd +++ b/man/DataFrame_shift.Rd @@ -2,20 +2,25 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_shift} \alias{DataFrame_shift} -\title{Shift} +\title{Shift a DataFrame} \usage{ DataFrame_shift(periods = 1) } \arguments{ -\item{periods}{integer Number of periods to shift (may be negative).} +\item{periods}{Number of periods to shift (can be negative).} } \value{ DataFrame } \description{ -Shift the values by a given period. +Shift the values by a given period. If the period (\code{n}) is positive, +then \code{n} rows will be inserted at the top of the DataFrame and the last \code{n} +rows will be discarded. Vice-versa if the period is negative. In the end, +the total number of rows of the DataFrame doesn't change. } \examples{ pl$DataFrame(mtcars)$shift(2) + +pl$DataFrame(mtcars)$shift(-2) } \keyword{DataFrame} diff --git a/man/DataFrame_shift_and_fill.Rd b/man/DataFrame_shift_and_fill.Rd index 42365cc72..5453eb5ac 100644 --- a/man/DataFrame_shift_and_fill.Rd +++ b/man/DataFrame_shift_and_fill.Rd @@ -7,18 +7,25 @@ DataFrame_shift_and_fill(fill_value, periods = 1) } \arguments{ -\item{fill_value}{Fill values with the result of this expression.} +\item{fill_value}{Fill new \code{NULL} values with this value. Must of length 1. +A logical value will be converted to numeric.} -\item{periods}{Integer indicating the number of periods to shift (may be -negative).} +\item{periods}{Number of periods to shift (can be negative).} } \value{ DataFrame } \description{ -Shift the values by a given period and fill the resulting null values. +Shift the values by a given period and fill the resulting null +values. See the docs of \verb{$shift()} for more details on shifting. } \examples{ -pl$DataFrame(mtcars)$shift_and_fill(0, 2) +df = pl$DataFrame(mtcars) + +# insert two rows filled with 0 at the top of the DataFrame +df$shift_and_fill(0, 2) + +# automatic conversion of logical value to numeric +df$shift_and_fill(TRUE, 2) } \keyword{DataFrame} diff --git a/man/DataFrame_slice.Rd b/man/DataFrame_slice.Rd index 44cdd44a1..adbafd073 100644 --- a/man/DataFrame_slice.Rd +++ b/man/DataFrame_slice.Rd @@ -7,18 +7,22 @@ DataFrame_slice(offset, length = NULL) } \arguments{ -\item{offset}{integer} +\item{offset}{Start index, can be a negative value. This is 0-indexed, so +\code{offset = 1} doesn't include the first row.} -\item{length}{integer or NULL} +\item{length}{Length of the slice. If \code{NULL} (default), all rows starting at +the offset will be selected.} } \value{ DataFrame } \description{ -Get a slice of this DataFrame. +Get a slice of the DataFrame. } \examples{ +# skip the first 2 rows and take the 4 following rows pl$DataFrame(mtcars)$slice(2, 4) -mtcars[2:6, ] + +# this is equivalent to: +mtcars[3:6, ] } -\keyword{LazyFrame} diff --git a/man/DataFrame_sort.Rd b/man/DataFrame_sort.Rd index adf99acbc..a5af4ec30 100644 --- a/man/DataFrame_sort.Rd +++ b/man/DataFrame_sort.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_sort} \alias{DataFrame_sort} -\title{DataFrame Sort} +\title{Sort a DataFrame} \usage{ DataFrame_sort( by, diff --git a/man/DataFrame_std.Rd b/man/DataFrame_std.Rd index 0d5605edb..816e83210 100644 --- a/man/DataFrame_std.Rd +++ b/man/DataFrame_std.Rd @@ -7,13 +7,15 @@ DataFrame_std(ddof = 1) } \arguments{ -\item{ddof}{integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.} +\item{ddof}{Delta Degrees of Freedom: the divisor used in the calculation is +N - ddof, where N represents the number of elements. By default ddof is 1.} } \value{ -A new \code{DataFrame} object with applied aggregation. +A DataFrame with one row. } \description{ -Aggregate the columns of this DataFrame to their standard deviation values. +Aggregate the columns of this DataFrame to their standard +deviation values. } \examples{ pl$DataFrame(mtcars)$std() diff --git a/man/DataFrame_sum.Rd b/man/DataFrame_sum.Rd index 7b2f85780..d6018b971 100644 --- a/man/DataFrame_sum.Rd +++ b/man/DataFrame_sum.Rd @@ -7,7 +7,7 @@ DataFrame_sum() } \value{ -A new \code{DataFrame} object with applied aggregation. +A DataFrame with one row. } \description{ Aggregate the columns of this DataFrame to their sum values. diff --git a/man/DataFrame_tail.Rd b/man/DataFrame_tail.Rd index 1567ba507..1463c0d2b 100644 --- a/man/DataFrame_tail.Rd +++ b/man/DataFrame_tail.Rd @@ -7,15 +7,15 @@ DataFrame_tail(n) } \arguments{ -\item{n}{positive numeric of integer number not larger than 2^32} +\item{n}{Positive number not larger than 2^32.} } \value{ DataFrame } \description{ -Get the last n rows. +Get the last \code{n} rows. } \details{ -any number will converted to u32. Negative raises error +Any number will converted to u32. Negative raises error. } \keyword{DataFrame} diff --git a/man/DataFrame_to_data_frame.Rd b/man/DataFrame_to_data_frame.Rd index f779fe4df..c79c5bd07 100644 --- a/man/DataFrame_to_data_frame.Rd +++ b/man/DataFrame_to_data_frame.Rd @@ -10,9 +10,9 @@ DataFrame_to_data_frame(...) \method{as.data.frame}{DataFrame}(x, ...) } \arguments{ -\item{...}{any params passed to as.data.frame} +\item{...}{Any args pased to \code{as.data.frame()}.} -\item{x}{DataFrame} +\item{x}{A DataFrame} } \value{ An R data.frame diff --git a/man/DataFrame_to_series.Rd b/man/DataFrame_to_series.Rd index 6de609c7e..4a6f269f0 100644 --- a/man/DataFrame_to_series.Rd +++ b/man/DataFrame_to_series.Rd @@ -2,22 +2,33 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_to_series} \alias{DataFrame_to_series} -\title{Get Series by idx, if there} +\title{Get column by index} \usage{ DataFrame_to_series(idx = 0) } \arguments{ -\item{idx}{numeric default 0, zero-index of what column to return as Series} +\item{idx}{Index of the column to return as Series. Defaults to 0, which is +the first column.} } \value{ Series or NULL } \description{ -get one column by idx as series from DataFrame. -Unlike get_column this method will not fail if no series found at idx but -return a NULL, idx is zero idx. +Extract a DataFrame column (by index) as a Polars series. Unlike +\code{get_column()}, this method will not fail but will return a \code{NULL} if the +index doesn't exist in the DataFrame. Keep in mind that Polars is 0-indexed +so "0" is the first column. } \examples{ -pl$DataFrame(a = 1:4)$to_series() +df = pl$DataFrame(iris[1:10, ]) + +# default is to extract the first column +df$to_series() + +# Polars is 0-indexed, so we use idx = 1 to extract the *2nd* column +df$to_series(idx = 1) + +# doesn't error if the column isn't there +df$to_series(idx = 8) } \keyword{DataFrame} diff --git a/man/DataFrame_to_struct.Rd b/man/DataFrame_to_struct.Rd index f166b4c88..ed2fd26f2 100644 --- a/man/DataFrame_to_struct.Rd +++ b/man/DataFrame_to_struct.Rd @@ -3,26 +3,30 @@ \name{DataFrame_to_struct} \alias{DataFrame_to_struct} \alias{to_struct} -\title{to_struct} +\title{Convert DataFrame to a Series of type "struct"} \usage{ DataFrame_to_struct(name = "") } \arguments{ -\item{name}{name of new Series} +\item{name}{Name given to the new Series} } \value{ -to_struct() returns a Series +A Series of type "struct" } \description{ -to_struct +Convert DataFrame to a Series of type "struct" } \examples{ # round-trip conversion from DataFrame with two columns df = pl$DataFrame(a = 1:5, b = c("one", "two", "three", "four", "five")) s = df$to_struct() s -s$to_r() # to r list -df_s = s$to_frame() # place series in a new DataFrame -df_s$unnest() # back to starting df + +# convert to an R list +s$to_r() + +# Convert back to a DataFrame +df_s = s$to_frame() +df_s } \keyword{DataFrame} diff --git a/man/DataFrame_unique.Rd b/man/DataFrame_unique.Rd index cc6578602..976190e34 100644 --- a/man/DataFrame_unique.Rd +++ b/man/DataFrame_unique.Rd @@ -2,15 +2,15 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_unique} \alias{DataFrame_unique} -\title{DataFrame_unique} +\title{Drop duplicated rows} \usage{ DataFrame_unique(subset = NULL, keep = "first", maintain_order = FALSE) } \arguments{ -\item{subset}{string or vector of strings. Column name(s) to consider when -identifying duplicates. If set to NULL (default), use all columns.} +\item{subset}{A character vector with the names of the column(s) to use to +identify duplicates. If \code{NULL} (default), use all columns.} -\item{keep}{string. Which of the duplicate rows to keep: +\item{keep}{Which of the duplicate rows to keep: \itemize{ \item "first": Keep first unique row. \item "last": Keep last unique row. @@ -25,15 +25,21 @@ to run on the streaming engine.} DataFrame } \description{ -Drop duplicate rows from this dataframe. +Drop duplicated rows } \examples{ df = pl$DataFrame( - x = as.numeric(c(1, 1:5)), - y = as.numeric(c(1, 1:5)), - z = as.numeric(c(1, 1, 1:4)) + x = sample(10, 100, rep = TRUE), + y = sample(10, 100, rep = TRUE) ) +df$height + df$unique()$height -df$unique(subset = c("x", "z"), keep = "last")$height +df$unique(subset = "x")$height + +df$unique(keep = "last") + +# only keep unique rows +df$unique(keep = "none") } \keyword{DataFrame} diff --git a/man/DataFrame_unnest.Rd b/man/DataFrame_unnest.Rd index f981e5479..1ed924b32 100644 --- a/man/DataFrame_unnest.Rd +++ b/man/DataFrame_unnest.Rd @@ -7,12 +7,21 @@ DataFrame_unnest(names = NULL) } \arguments{ -\item{names}{names of struct columns to unnest, default NULL unnest any struct column} +\item{names}{Names of the struct columns to unnest. If \code{NULL} (default), then +all "struct" columns are unnested.} } \value{ -$unnest() returns a DataFrame with all column including any that has been unnested +A DataFrame where all "struct" columns are unnested. Non-struct +columns are not modified. } \description{ Unnest a DataFrame struct columns. } +\examples{ +df = pl$DataFrame(a = 1:5, b = c("one", "two", "three", "four", "five")) +df = df$to_struct()$to_frame() +df + +df$unnest() +} \keyword{DataFrame} diff --git a/man/DataFrame_var.Rd b/man/DataFrame_var.Rd index a351c54c4..f9d9320d1 100644 --- a/man/DataFrame_var.Rd +++ b/man/DataFrame_var.Rd @@ -7,10 +7,11 @@ DataFrame_var(ddof = 1) } \arguments{ -\item{ddof}{integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.} +\item{ddof}{Delta Degrees of Freedom: the divisor used in the calculation is +N - ddof, where N represents the number of elements. By default ddof is 1.} } \value{ -A new \code{DataFrame} object with applied aggregation. +A DataFrame with one row. } \description{ Aggregate the columns of this DataFrame to their variance values. diff --git a/man/DataFrame_width.Rd b/man/DataFrame_width.Rd index 33def4cec..dc8888b6e 100644 --- a/man/DataFrame_width.Rd +++ b/man/DataFrame_width.Rd @@ -2,18 +2,17 @@ % Please edit documentation in R/dataframe__frame.R \name{DataFrame_width} \alias{DataFrame_width} -\title{Width of DataFrame} +\title{Number of columns of a DataFrame} \usage{ DataFrame_width() } \value{ -width as numeric scalar +The number of columns of a DataFrame } \description{ -Get width(ncol) of DataFrame +Get the number of columns (width) of a DataFrame } \examples{ pl$DataFrame(iris)$width - } \keyword{DataFrame} diff --git a/man/DataFrame_with_columns.Rd b/man/DataFrame_with_columns.Rd index 8ed51e7c4..cfab7b9d2 100644 --- a/man/DataFrame_with_columns.Rd +++ b/man/DataFrame_with_columns.Rd @@ -12,23 +12,19 @@ DataFrame_with_columns(...) DataFrame_with_column(expr) } \arguments{ -\item{...}{any expressions or string column name, or same wrapped in a list. If first and only -element is a list, it is unwrap as a list of args.} +\item{...}{Any expressions or string column name, or same wrapped in a list. +If first and only element is a list, it is unwrapped as a list of args.} \item{expr}{a single expression or string} } \value{ -DataFrame - -DataFrame +A DataFrame } \description{ -Add or modify columns with expressions -} -\details{ -Like dplyr \code{mutate()} as it keeps unmentioned columns unlike $select(). - -with_column is derived from with_columns but takes only one expression argument +Add columns or modify existing ones with expressions. This is +the equivalent of \code{dplyr::mutate()} as it keeps unmentioned columns (unlike +\verb{$select()}). +\strong{\verb{$with_column()} function is deprecated, use \verb{$with_columns()} instead.} } \examples{ pl$DataFrame(iris)$with_columns( diff --git a/man/DataFrame_with_row_count.Rd b/man/DataFrame_with_row_count.Rd index 1f9860a51..4cba92f9a 100644 --- a/man/DataFrame_with_row_count.Rd +++ b/man/DataFrame_with_row_count.Rd @@ -17,4 +17,13 @@ A new \code{DataFrame} object with a counter column in front \description{ Add a new column at index 0 that counts the rows } +\examples{ +df = pl$DataFrame(mtcars) + +# by default, the index starts at 0 (to mimic the behavior of Python Polars) +df$with_row_count("idx") + +# but in R, we use a 1-index +df$with_row_count("idx", offset = 1) +} \keyword{DataFrame} diff --git a/man/arr_arg_max.Rd b/man/ExprList_arg_max.Rd similarity index 66% rename from man/arr_arg_max.Rd rename to man/ExprList_arg_max.Rd index f9c895056..c32005aaa 100644 --- a/man/arr_arg_max.Rd +++ b/man/ExprList_arg_max.Rd @@ -1,9 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_arg_max} -\alias{arr_arg_max} -\alias{Expr_arr_arg_max} -\alias{Expr_arr.arg_max} +\name{ExprList_arg_max} +\alias{ExprList_arg_max} +\alias{Expr_list_arg_max} \title{Arg max sublists} \format{ function @@ -16,6 +15,6 @@ Retrieve the index of the maximum value in every sublist. } \examples{ df = pl$DataFrame(list(s = list(1:2, 2:1))) -df$select(pl$col("s")$arr$arg_max()) +df$select(pl$col("s")$list$arg_max()) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_arg_min.Rd b/man/ExprList_arg_min.Rd similarity index 68% rename from man/arr_arg_min.Rd rename to man/ExprList_arg_min.Rd index 177112ab1..160d5e33c 100644 --- a/man/arr_arg_min.Rd +++ b/man/ExprList_arg_min.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_arg_min} -\alias{arr_arg_min} -\alias{arr.arg_min} +\name{ExprList_arg_min} +\alias{ExprList_arg_min} +\alias{list_arg_min} \title{Arg min sublists} \format{ function @@ -15,6 +15,6 @@ Retrieve the index of the minimal value in every sublist. } \examples{ df = pl$DataFrame(list(s = list(1:2, 2:1))) -df$select(pl$col("s")$arr$arg_min()) +df$select(pl$col("s")$list$arg_min()) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_concat.Rd b/man/ExprList_concat.Rd similarity index 60% rename from man/arr_concat.Rd rename to man/ExprList_concat.Rd index 2c74cc4a1..812bbbbf4 100644 --- a/man/arr_concat.Rd +++ b/man/ExprList_concat.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_concat} -\alias{arr_concat} -\alias{arr.concat} +\name{ExprList_concat} +\alias{ExprList_concat} +\alias{list_concat} \title{concat another list} \format{ function @@ -21,10 +21,10 @@ df = pl$DataFrame( a = list("a", "x"), b = list(c("b", "c"), c("y", "z")) ) -df$select(pl$col("a")$arr$concat(pl$col("b"))) +df$select(pl$col("a")$list$concat(pl$col("b"))) -df$select(pl$col("a")$arr$concat("hello from R")) +df$select(pl$col("a")$list$concat("hello from R")) -df$select(pl$col("a")$arr$concat(list("hello", c("hello", "world")))) +df$select(pl$col("a")$list$concat(list("hello", c("hello", "world")))) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_contains.Rd b/man/ExprList_contains.Rd similarity index 73% rename from man/arr_contains.Rd rename to man/ExprList_contains.Rd index f20060bbc..5e27c65cd 100644 --- a/man/arr_contains.Rd +++ b/man/ExprList_contains.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_contains} -\alias{arr_contains} -\alias{arr.contains} +\name{ExprList_contains} +\alias{ExprList_contains} +\alias{list_contains} \title{Sublists contains} \format{ function @@ -18,6 +18,6 @@ Check if sublists contain the given item. } \examples{ df = pl$DataFrame(list(a = list(3:1, NULL, 1:2))) # NULL or integer() or list() -df$select(pl$col("a")$arr$contains(1L)) +df$select(pl$col("a")$list$contains(1L)) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_diff.Rd b/man/ExprList_diff.Rd similarity index 75% rename from man/arr_diff.Rd rename to man/ExprList_diff.Rd index 2ae3d6124..ae84f27b0 100644 --- a/man/arr_diff.Rd +++ b/man/ExprList_diff.Rd @@ -1,9 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_diff} -\alias{arr_diff} -\alias{Expr_arr_diff} -\alias{Expr_arr.diff} +\name{ExprList_diff} +\alias{ExprList_diff} +\alias{Expr_list_diff} \title{Diff sublists} \format{ function @@ -21,6 +20,6 @@ Calculate the n-th discrete difference of every sublist. } \examples{ df = pl$DataFrame(list(s = list(1:4, c(10L, 2L, 1L)))) -df$select(pl$col("s")$arr$diff()) +df$select(pl$col("s")$list$diff()) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_eval.Rd b/man/ExprList_eval.Rd similarity index 82% rename from man/arr_eval.Rd rename to man/ExprList_eval.Rd index 14d43edae..63da56b6e 100644 --- a/man/arr_eval.Rd +++ b/man/ExprList_eval.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_eval} -\alias{arr_eval} -\alias{arr.eval} +\name{ExprList_eval} +\alias{ExprList_eval} +\alias{list_eval} \title{eval sublists (kinda like lapply)} \format{ function @@ -26,7 +26,7 @@ Run any polars expression against the lists' elements. \examples{ df = pl$DataFrame(a = list(c(1, 8, 3), b = c(4, 5, 2))) df$select(pl$all()$cast(pl$dtypes$Int64))$with_columns( - pl$concat_list(c("a", "b"))$arr$eval(pl$element()$rank())$alias("rank") + pl$concat_list(c("a", "b"))$list$eval(pl$element()$rank())$alias("rank") ) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_first.Rd b/man/ExprList_first.Rd similarity index 71% rename from man/arr_first.Rd rename to man/ExprList_first.Rd index 7ac367e6f..15cefc656 100644 --- a/man/arr_first.Rd +++ b/man/ExprList_first.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_first} -\alias{arr_first} -\alias{arr.first} +\name{ExprList_first} +\alias{ExprList_first} +\alias{list_first} \title{First in sublists} \format{ function @@ -15,6 +15,6 @@ Get the first value of the sublists. } \examples{ df = pl$DataFrame(list(a = list(3:1, NULL, 1:2))) # NULL or integer() or list() -df$select(pl$col("a")$arr$first()) +df$select(pl$col("a")$list$first()) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/ExprList_get.Rd b/man/ExprList_get.Rd new file mode 100644 index 000000000..0d0cb9f3f --- /dev/null +++ b/man/ExprList_get.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__list.R +\name{ExprList_get} +\alias{ExprList_get} +\alias{Expr_list_get} +\title{Get list} +\format{ +function +} +\arguments{ +\item{index}{numeric vector or Expr of length 1 or same length of Series. +if length 1 pick same value from each sublist, if length as Series/column, +pick by individual index across sublists. + +So index \code{0} would return the first item of every sublist +and index \code{-1} would return the last item of every sublist +if an index is out of bounds, it will return a \code{None}.} +} +\value{ +Expr +} +\description{ +Get the value by index in the sublists. +} +\examples{ +df = pl$DataFrame(list(a = list(3:1, NULL, 1:2))) # NULL or integer() or list() +df$select(pl$col("a")$list$get(0)) +df$select(pl$col("a")$list$get(c(2, 0, -1))) +} +\keyword{ExprList} diff --git a/man/arr_head.Rd b/man/ExprList_head.Rd similarity index 76% rename from man/arr_head.Rd rename to man/ExprList_head.Rd index 8edc70e47..761a8a1d8 100644 --- a/man/arr_head.Rd +++ b/man/ExprList_head.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_head} -\alias{arr_head} -\alias{arr.head} +\name{ExprList_head} +\alias{ExprList_head} +\alias{list_head} \title{Heads of sublists} \format{ function @@ -18,6 +18,6 @@ head the first \code{n} values of every sublist. } \examples{ df = pl$DataFrame(list(a = list(1:4, c(10L, 2L, 1L)))) -df$select(pl$col("a")$arr$head(2)) +df$select(pl$col("a")$list$head(2)) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_join.Rd b/man/ExprList_join.Rd similarity index 79% rename from man/arr_join.Rd rename to man/ExprList_join.Rd index e1e3957f8..77bf7e9ed 100644 --- a/man/arr_join.Rd +++ b/man/ExprList_join.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_join} -\alias{arr_join} -\alias{arr.join} +\name{ExprList_join} +\alias{ExprList_join} +\alias{list_join} \title{Join sublists} \format{ function @@ -19,6 +19,6 @@ This errors if inner type of list \verb{!= Utf8}. } \examples{ df = pl$DataFrame(list(s = list(c("a", "b", "c"), c("x", "y")))) -df$select(pl$col("s")$arr$join(" ")) +df$select(pl$col("s")$list$join(" ")) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_last.Rd b/man/ExprList_last.Rd similarity index 72% rename from man/arr_last.Rd rename to man/ExprList_last.Rd index 7312549b6..fd3d3a919 100644 --- a/man/arr_last.Rd +++ b/man/ExprList_last.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_last} -\alias{arr_last} -\alias{arr.last} +\name{ExprList_last} +\alias{ExprList_last} +\alias{list_last} \title{Last in sublists} \format{ function @@ -15,6 +15,6 @@ Get the last value of the sublists. } \examples{ df = pl$DataFrame(list(a = list(3:1, NULL, 1:2))) # NULL or integer() or list() -df$select(pl$col("a")$arr$last()) +df$select(pl$col("a")$list$last()) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_max.Rd b/man/ExprList_max.Rd similarity index 68% rename from man/arr_max.Rd rename to man/ExprList_max.Rd index d23c5f405..05ddc9325 100644 --- a/man/arr_max.Rd +++ b/man/ExprList_max.Rd @@ -1,9 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_max} -\alias{arr_max} -\alias{Expr_arr_max} -\alias{Expr_arr.max} +\name{ExprList_max} +\alias{ExprList_max} +\alias{Expr_list_max} \title{Max lists} \format{ function @@ -16,6 +15,6 @@ Compute the max value of the lists in the array. } \examples{ df = pl$DataFrame(values = pl$Series(list(1L, 2:3))) -df$select(pl$col("values")$arr$max()) +df$select(pl$col("values")$list$max()) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_mean.Rd b/man/ExprList_mean.Rd similarity index 70% rename from man/arr_mean.Rd rename to man/ExprList_mean.Rd index c41d33330..49b980690 100644 --- a/man/arr_mean.Rd +++ b/man/ExprList_mean.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_mean} -\alias{arr_mean} -\alias{arr.mean} +\name{ExprList_mean} +\alias{ExprList_mean} +\alias{list_mean} \title{Mean of lists} \format{ function @@ -15,6 +15,6 @@ Compute the mean value of the lists in the array. } \examples{ df = pl$DataFrame(values = pl$Series(list(1L, 2:3))) -df$select(pl$col("values")$arr$mean()) +df$select(pl$col("values")$list$mean()) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_min.Rd b/man/ExprList_min.Rd similarity index 68% rename from man/arr_min.Rd rename to man/ExprList_min.Rd index 0fc1d0894..f717c5bfd 100644 --- a/man/arr_min.Rd +++ b/man/ExprList_min.Rd @@ -1,9 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_min} -\alias{arr_min} -\alias{Expr_arr_min} -\alias{Expr_arr.min} +\name{ExprList_min} +\alias{ExprList_min} +\alias{Expr_list_min} \title{#' Min lists} \format{ function @@ -16,6 +15,6 @@ Compute the min value of the lists in the array. } \examples{ df = pl$DataFrame(values = pl$Series(list(1L, 2:3))) -df$select(pl$col("values")$arr$min()) +df$select(pl$col("values")$list$min()) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_reverse.Rd b/man/ExprList_reverse.Rd similarity index 67% rename from man/arr_reverse.Rd rename to man/ExprList_reverse.Rd index 24bda3b0b..e183740d9 100644 --- a/man/arr_reverse.Rd +++ b/man/ExprList_reverse.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_reverse} -\alias{arr_reverse} -\alias{arr.reverse} +\name{ExprList_reverse} +\alias{ExprList_reverse} +\alias{list_reverse} \title{Reverse list} \format{ function @@ -17,6 +17,6 @@ Reverse the arrays in the list. df = pl$DataFrame(list( values = list(3:1, c(9L, 1:2)) )) -df$select(pl$col("values")$arr$reverse()) +df$select(pl$col("values")$list$reverse()) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_shift.Rd b/man/ExprList_shift.Rd similarity index 74% rename from man/arr_shift.Rd rename to man/ExprList_shift.Rd index 6dc971b67..4a5364ecb 100644 --- a/man/arr_shift.Rd +++ b/man/ExprList_shift.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_shift} -\alias{arr_shift} -\alias{arr.shift} +\name{ExprList_shift} +\alias{ExprList_shift} +\alias{list_shift} \title{Shift sublists} \format{ function @@ -18,6 +18,6 @@ Shift values by the given period. } \examples{ df = pl$DataFrame(list(s = list(1:4, c(10L, 2L, 1L)))) -df$select(pl$col("s")$arr$shift()) +df$select(pl$col("s")$list$shift()) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_slice.Rd b/man/ExprList_slice.Rd similarity index 79% rename from man/arr_slice.Rd rename to man/ExprList_slice.Rd index 07be649d9..596360177 100644 --- a/man/arr_slice.Rd +++ b/man/ExprList_slice.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_slice} -\alias{arr_slice} -\alias{arr.slice} +\name{ExprList_slice} +\alias{ExprList_slice} +\alias{list_slice} \title{Slice sublists} \format{ function @@ -22,6 +22,6 @@ Slice every sublist. } \examples{ df = pl$DataFrame(list(s = list(1:4, c(10L, 2L, 1L)))) -df$select(pl$col("s")$arr$slice(2)) +df$select(pl$col("s")$list$slice(2)) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_sort.Rd b/man/ExprList_sort.Rd similarity index 88% rename from man/arr_sort.Rd rename to man/ExprList_sort.Rd index 7552bc5a4..719401909 100644 --- a/man/arr_sort.Rd +++ b/man/ExprList_sort.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_sort} -\alias{arr_sort} +\name{ExprList_sort} +\alias{ExprList_sort} \title{Expr_sort} \arguments{ \item{descending}{Sort values in descending order} diff --git a/man/arr_sum.Rd b/man/ExprList_sum.Rd similarity index 68% rename from man/arr_sum.Rd rename to man/ExprList_sum.Rd index 706fc56e5..a9841646e 100644 --- a/man/arr_sum.Rd +++ b/man/ExprList_sum.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_sum} -\alias{arr_sum} -\alias{arr.sum} +\name{ExprList_sum} +\alias{ExprList_sum} +\alias{list_sum} \title{Sum lists} \format{ function @@ -15,6 +15,6 @@ Sum all the lists in the array. } \examples{ df = pl$DataFrame(values = pl$Series(list(1L, 2:3))) -df$select(pl$col("values")$arr$sum()) +df$select(pl$col("values")$list$sum()) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_tail.Rd b/man/ExprList_tail.Rd similarity index 76% rename from man/arr_tail.Rd rename to man/ExprList_tail.Rd index 9d247ad16..85428ce99 100644 --- a/man/arr_tail.Rd +++ b/man/ExprList_tail.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_tail} -\alias{arr_tail} -\alias{arr.tail} +\name{ExprList_tail} +\alias{ExprList_tail} +\alias{list_tail} \title{Tails of sublists} \format{ function @@ -18,6 +18,6 @@ tail the first \code{n} values of every sublist. } \examples{ df = pl$DataFrame(list(a = list(1:4, c(10L, 2L, 1L)))) -df$select(pl$col("a")$arr$tail(2)) +df$select(pl$col("a")$list$tail(2)) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_take.Rd b/man/ExprList_take.Rd similarity index 63% rename from man/arr_take.Rd rename to man/ExprList_take.Rd index 448689d2c..5466918dd 100644 --- a/man/arr_take.Rd +++ b/man/ExprList_take.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_take} -\alias{arr_take} -\alias{arr.take} +\name{ExprList_take} +\alias{ExprList_take} +\alias{list_take} \title{take in sublists} \format{ function @@ -21,12 +21,12 @@ Get the take value of the sublists. \examples{ df = pl$DataFrame(list(a = list(c(3, 2, 1), 1, c(1, 2)))) # idx = pl$Series(list(0:1, integer(), c(1L, 999L))) -df$select(pl$col("a")$arr$take(pl$lit(idx), null_on_oob = TRUE)) +df$select(pl$col("a")$list$take(pl$lit(idx), null_on_oob = TRUE)) # with implicit conversion to Expr -df$select(pl$col("a")$arr$take(list(0:1, integer(), c(1L, 999L)), null_on_oob = TRUE)) +df$select(pl$col("a")$list$take(list(0:1, integer(), c(1L, 999L)), null_on_oob = TRUE)) # by some column name, must cast to an Int/Uint type to work -df$select(pl$col("a")$arr$take(pl$col("a")$cast(pl$List(pl$UInt64)), null_on_oob = TRUE)) +df$select(pl$col("a")$list$take(pl$col("a")$cast(pl$List(pl$UInt64)), null_on_oob = TRUE)) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_to_struct.Rd b/man/ExprList_to_struct.Rd similarity index 88% rename from man/arr_to_struct.Rd rename to man/ExprList_to_struct.Rd index 4f83a014a..22e3592cb 100644 --- a/man/arr_to_struct.Rd +++ b/man/ExprList_to_struct.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_to_struct} -\alias{arr_to_struct} -\alias{arr.to_struct} +\name{ExprList_to_struct} +\alias{ExprList_to_struct} +\alias{list_to_struct} \title{List to Struct} \format{ function @@ -32,11 +32,11 @@ List to Struct } \examples{ df = pl$DataFrame(list(a = list(1:3, 1:2))) -df2 = df$select(pl$col("a")$arr$to_struct( +df2 = df$select(pl$col("a")$list$to_struct( name_generator = \(idx) paste0("hello_you_", idx) )) df2$unnest() df2$to_list() } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/arr_unique.Rd b/man/ExprList_unique.Rd similarity index 68% rename from man/arr_unique.Rd rename to man/ExprList_unique.Rd index 7e4f857da..dcdbea6b3 100644 --- a/man/arr_unique.Rd +++ b/man/ExprList_unique.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_unique} -\alias{arr_unique} -\alias{arr.unique} +\name{ExprList_unique} +\alias{ExprList_unique} +\alias{list_unique} \title{Unique list} \format{ function @@ -15,6 +15,6 @@ Get the unique/distinct values in the list. } \examples{ df = pl$DataFrame(list(a = list(1, 1, 2))) -df$select(pl$col("a")$arr$unique()) +df$select(pl$col("a")$list$unique()) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/ExprStr_concat.Rd b/man/ExprStr_concat.Rd index 6c04f2113..5343f279f 100644 --- a/man/ExprStr_concat.Rd +++ b/man/ExprStr_concat.Rd @@ -20,6 +20,6 @@ df$select(pl$col("foo")$str$concat("-")) # Series list of strings to Series of concatenated strings df = pl$DataFrame(list(bar = list(c("a", "b", "c"), c("1", "2", NA)))) -df$select(pl$col("bar")$arr$eval(pl$col()$str$concat())$arr$first()) +df$select(pl$col("bar")$list$eval(pl$col()$str$concat())$list$first()) } \keyword{ExprStr} diff --git a/man/Expr_arr.Rd b/man/Expr_arr.Rd index 027fedd15..2d50a731a 100644 --- a/man/Expr_arr.Rd +++ b/man/Expr_arr.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_arr} \alias{Expr_arr} -\alias{arr_ns} -\title{arr: list related methods} +\title{arr: list related methods DEPRECATED} \usage{ Expr_arr() } @@ -11,21 +10,10 @@ Expr_arr() Expr } \description{ -Create an object namespace of all list related methods. -See the individual method pages for full details +Deprecated since 0.8.1, will be removed in 0.9.0. +USE \verb{$list$...} instead. Subnamespace is simply renamed. } -\examples{ -df_with_list = pl$DataFrame( - group = c(1, 1, 2, 2, 3), - value = c(1:5) -)$groupby( - "group", - maintain_order = TRUE -)$agg( - pl$col("value") * 3L -) -df_with_list$with_columns( - pl$col("value")$arr$lengths()$alias("group_size") -) +\seealso{ +\code{\link[=Expr_list]{$list$...}} } \keyword{Expr} diff --git a/man/Expr_list.Rd b/man/Expr_list.Rd index 0cb586ba8..c17eb6fa3 100644 --- a/man/Expr_list.Rd +++ b/man/Expr_list.Rd @@ -4,15 +4,23 @@ \alias{Expr_list} \alias{Expr_implode} \alias{list} +\alias{list_ns} \title{Wrap column in list} \usage{ Expr_implode + +Expr_list() } \value{ +Expr + Expr } \description{ Aggregate values into a list. + +Create an object namespace of all list related methods. +See the individual method pages for full details } \details{ use to_struct to wrap a DataFrame. Notice implode() is sometimes referred to @@ -24,5 +32,17 @@ df = pl$DataFrame( b = 4:6 ) df$select(pl$all()$implode()) +df_with_list = pl$DataFrame( + group = c(1, 1, 2, 2, 3), + value = c(1:5) +)$groupby( + "group", + maintain_order = TRUE +)$agg( + pl$col("value") * 3L +) +df_with_list$with_columns( + pl$col("value")$list$lengths()$alias("group_size") +) } \keyword{Expr} diff --git a/man/Expr_lit_to_df.Rd b/man/Expr_lit_to_df.Rd index bf5247ae7..65e8e8d14 100644 --- a/man/Expr_lit_to_df.Rd +++ b/man/Expr_lit_to_df.Rd @@ -18,7 +18,7 @@ collect an expression based on literals into a DataFrame pl$Series(list(1:1, 1:2, 1:3, 1:4)) $print() $to_lit() - $arr$lengths() + $list$lengths() $sum() $cast(pl$dtypes$Int8) $lit_to_df() diff --git a/man/Expr_lit_to_s.Rd b/man/Expr_lit_to_s.Rd index 53cfa29ec..fab228980 100644 --- a/man/Expr_lit_to_s.Rd +++ b/man/Expr_lit_to_s.Rd @@ -18,7 +18,7 @@ collect an expression based on literals into a Series pl$Series(list(1:1, 1:2, 1:3, 1:4)) $print() $to_lit() - $arr$lengths() + $list$lengths() $sum() $cast(pl$dtypes$Int8) $lit_to_s() diff --git a/man/Series_arr.Rd b/man/Series_arr.Rd index 8994c4fce..1ec7c80e0 100644 --- a/man/Series_arr.Rd +++ b/man/Series_arr.Rd @@ -2,20 +2,22 @@ % Please edit documentation in R/series__series.R \name{Series_arr} \alias{Series_arr} -\title{arr: list related methods on Series of dtype List} +\title{arr: list related methods on Series of dtype List DEPRECATED} \usage{ Series_arr() } \value{ -Expr +Series } \description{ -Create an object namespace of all list related methods. -See the individual method pages for full details +DEPRECATED AND REMOVED FROM polars 0.9.0 use \verb{$list$} instead } \examples{ s = pl$Series(list(1:3, 1:2, NULL)) s s$arr$first() } +\seealso{ +\code{\link[=Series_list]{$list$...}} +} \keyword{Series} diff --git a/man/Series_list.Rd b/man/Series_list.Rd new file mode 100644 index 000000000..b72be22f4 --- /dev/null +++ b/man/Series_list.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/series__series.R +\name{Series_list} +\alias{Series_list} +\title{list: list related methods on Series of dtype List} +\usage{ +Series_list() +} +\value{ +Series +} +\description{ +Create an object namespace of all list related methods. +See the individual method pages for full details +} +\examples{ +s = pl$Series(list(1:3, 1:2, NULL)) +s +s$list$first() +} +\keyword{Series} diff --git a/man/Series_to_lit.Rd b/man/Series_to_lit.Rd index 36aa0e491..a9b0f238a 100644 --- a/man/Series_to_lit.Rd +++ b/man/Series_to_lit.Rd @@ -18,7 +18,7 @@ convert Series to literal to perform modification and return pl$Series(list(1:1, 1:2, 1:3, 1:4)) $print() $to_lit() - $arr$lengths() + $list$lengths() $sum() $cast(pl$dtypes$Int8) $lit_to_s() diff --git a/man/arr_get.Rd b/man/arr_get.Rd deleted file mode 100644 index 19a33df89..000000000 --- a/man/arr_get.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__list.R -\name{arr_get} -\alias{arr_get} -\alias{Expr_arr_get} -\alias{Expr_arr.get} -\alias{[.ExprArrNameSpace} -\title{Get list} -\format{ -function -} -\usage{ -\method{[}{ExprArrNameSpace}(x, index) -} -\arguments{ -\item{x}{ExprArrNameSpace} - -\item{index}{value to get} -} -\value{ -Expr -} -\description{ -Get the value by index in the sublists. -} -\details{ -\verb{[.ExprArrNameSpace} used as e.g. \code{pl$col("a")$arr[0]} same as \code{pl$col("a")$get(0)} -} -\examples{ -df = pl$DataFrame(list(a = list(3:1, NULL, 1:2))) # NULL or integer() or list() -df$select(pl$col("a")$arr$get(0)) -df$select(pl$col("a")$arr$get(c(2, 0, -1))) -df = pl$DataFrame(list(a = list(3:1, NULL, 1:2))) # NULL or integer() or list() -df$select(pl$col("a")$arr[0]) -df$select(pl$col("a")$arr[c(2, 0, -1)]) -} -\keyword{ExprArr} diff --git a/man/list_get.Rd b/man/list_get.Rd new file mode 100644 index 000000000..8afbe1ad0 --- /dev/null +++ b/man/list_get.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__list.R +\name{[.ExprListNameSpace} +\alias{[.ExprListNameSpace} +\title{Get list} +\usage{ +\method{[}{ExprListNameSpace}(x, index) +} +\arguments{ +\item{x}{ExprListNameSpace} + +\item{index}{value to get} +} +\description{ +Get list +} +\details{ +\verb{[.ExprListNameSpace} used as e.g. \code{pl$col("a")$arr[0]} same as \code{pl$col("a")$get(0)} +} +\examples{ +df = pl$DataFrame(list(a = list(3:1, NULL, 1:2))) # NULL or integer() or list() +df$select(pl$col("a")$list[0]) +df$select(pl$col("a")$list[c(2, 0, -1)]) +} diff --git a/man/arr_lengths.Rd b/man/list_lengths.Rd similarity index 63% rename from man/arr_lengths.Rd rename to man/list_lengths.Rd index fcc138646..204c77832 100644 --- a/man/arr_lengths.Rd +++ b/man/list_lengths.Rd @@ -1,9 +1,9 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__list.R -\name{arr_lengths} -\alias{arr_lengths} +\name{ExprList_lengths} +\alias{ExprList_lengths} \alias{lengths} -\alias{arr.lengths} +\alias{list_lengths} \title{Lengths arrays in list} \format{ function @@ -16,6 +16,6 @@ Get the length of the arrays as UInt32 } \examples{ df = pl$DataFrame(list_of_strs = pl$Series(list(c("a", "b"), "c"))) -df$with_columns(pl$col("list_of_strs")$arr$lengths()$alias("list_of_strs_lengths")) +df$with_columns(pl$col("list_of_strs")$list$lengths()$alias("list_of_strs_lengths")) } -\keyword{ExprArr} +\keyword{ExprList} diff --git a/man/pl_DataFrame.Rd b/man/pl_DataFrame.Rd index 758b25fed..9efcc0d94 100644 --- a/man/pl_DataFrame.Rd +++ b/man/pl_DataFrame.Rd @@ -30,7 +30,7 @@ pl$DataFrame( d = list(1:1, 1:2, 1:3, 1:4, 1:5) ) # directly from vectors -# from a list of vectors or data.frame +# from a list of vectors pl$DataFrame(list( a = c(1, 2, 3, 4, 5), b = 1:5, @@ -38,5 +38,7 @@ pl$DataFrame(list( d = list(1L, 1:2, 1:3, 1:4, 1:5) )) +# from a data.frame +pl$DataFrame(mtcars) } \keyword{DataFrame_new} diff --git a/man/to_list.Rd b/man/to_list.Rd index f5e8c6cc7..b7d756b9d 100644 --- a/man/to_list.Rd +++ b/man/to_list.Rd @@ -3,24 +3,25 @@ \name{to_list} \alias{to_list} \alias{DataFrame_to_list} -\title{return polars DataFrame as R lit of vectors} +\title{Return Polars DataFrame as a list of vectors} \usage{ DataFrame_to_list(unnest_structs = TRUE) } \arguments{ -\item{unnest_structs}{bool default true, as calling $unnest() on any struct column} +\item{unnest_structs}{Boolean. If \code{TRUE} (default), then \verb{$unnest()} is applied +on any struct column.} } \value{ R list of vectors } \description{ -return polars DataFrame as R lit of vectors +Return Polars DataFrame as a list of vectors } \details{ -This implementation for simplicity reasons relies on unnesting all structs before -exporting to R. unnest_structs = FALSE, the previous struct columns will be re- -nested. A struct in a R is a lists of lists, where each row is a list of values. -Such a structure is not very typical or efficient in R. +For simplicity reasons, this implementation relies on unnesting all structs +before exporting to R. If \code{unnest_structs = FALSE}, then \code{struct} columns +will be returned as nested lists, where each row is a list of values. Such a +structure is not very typical or efficient in R. } \examples{ pl$DataFrame(iris)$to_list() diff --git a/src/rust/src/arrow_interop/to_rust.rs b/src/rust/src/arrow_interop/to_rust.rs index 4ec71243a..186d2a0df 100644 --- a/src/rust/src/arrow_interop/to_rust.rs +++ b/src/rust/src/arrow_interop/to_rust.rs @@ -99,8 +99,8 @@ pub fn to_rust_df(rb: Robj) -> Result { arr.data_type(), ArrowDataType::Utf8 | ArrowDataType::Dictionary(_, _, _) ); - let arr_res: Result<_, String> = Ok(arr); - arr_res + let list_res: Result<_, String> = Ok(arr); + list_res }); let arrays_vec = crate::utils::collect_hinted_result(n_columns, array_iter)?; diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 1b6b305d5..773d85941 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -1044,31 +1044,31 @@ impl Expr { //arr/list methods - fn arr_lengths(&self) -> Self { + fn list_lengths(&self) -> Self { self.0.clone().list().lengths().into() } - pub fn arr_contains(&self, other: &Expr) -> Expr { + pub fn list_contains(&self, other: &Expr) -> Expr { self.0.clone().list().contains(other.0.clone()).into() } - fn lst_max(&self) -> Self { + fn list_max(&self) -> Self { self.0.clone().list().max().into() } - fn lst_min(&self) -> Self { + fn list_min(&self) -> Self { self.0.clone().list().min().into() } - fn lst_sum(&self) -> Self { - self.0.clone().list().sum().with_fmt("arr.sum").into() + fn list_sum(&self) -> Self { + self.0.clone().list().sum().with_fmt("list.sum").into() } - fn lst_mean(&self) -> Self { - self.0.clone().list().mean().with_fmt("arr.mean").into() + fn list_mean(&self) -> Self { + self.0.clone().list().mean().with_fmt("list.mean").into() } - fn lst_sort(&self, descending: bool) -> Self { + fn list_sort(&self, descending: bool) -> Self { self.0 .clone() .list() @@ -1076,24 +1076,24 @@ impl Expr { descending: descending, ..Default::default() }) - .with_fmt("arr.sort") + .with_fmt("list.sort") .into() } - fn lst_reverse(&self) -> Self { + fn list_reverse(&self) -> Self { self.0 .clone() .list() .reverse() - .with_fmt("arr.reverse") + .with_fmt("list.reverse") .into() } - fn lst_unique(&self) -> Self { - self.0.clone().list().unique().with_fmt("arr.unique").into() + fn list_unique(&self) -> Self { + self.0.clone().list().unique().with_fmt("list.unique").into() } - fn lst_take(&self, index: Robj, null_on_oob: Robj) -> RResult { + fn list_take(&self, index: Robj, null_on_oob: Robj) -> RResult { Ok(self .0 .clone() @@ -1102,44 +1102,44 @@ impl Expr { .into()) } - fn lst_get(&self, index: &Expr) -> Self { + fn list_get(&self, index: &Expr) -> Self { self.0.clone().list().get(index.clone().0).into() } - fn lst_join(&self, separator: &str) -> Self { + fn list_join(&self, separator: &str) -> Self { self.0.clone().list().join(separator).into() } - fn lst_arg_min(&self) -> Self { + fn list_arg_min(&self) -> Self { self.0.clone().list().arg_min().into() } - fn lst_arg_max(&self) -> Self { + fn list_arg_max(&self) -> Self { self.0.clone().list().arg_max().into() } - fn lst_diff(&self, n: f64, null_behavior: &str) -> List { + fn list_diff(&self, n: f64, null_behavior: &str) -> List { let expr_res = || -> Result { Ok(Expr(self.0.clone().list().diff( try_f64_into_i64(n)?, new_null_behavior(null_behavior)?, ))) }() - .map_err(|err| format!("arr.diff: {}", err)); + .map_err(|err| format!("list.diff: {}", err)); r_result_list(expr_res) } - fn lst_shift(&self, periods: f64) -> List { + fn list_shift(&self, periods: f64) -> List { let expr_res = || -> Result { Ok(Expr( self.0.clone().list().shift(try_f64_into_i64(periods)?), )) }() - .map_err(|err| format!("arr.shift: {}", err)); + .map_err(|err| format!("list.shift: {}", err)); r_result_list(expr_res) } - fn lst_slice(&self, offset: &Expr, length: Nullable<&Expr>) -> Self { + fn list_slice(&self, offset: &Expr, length: Nullable<&Expr>) -> Self { let length = match null_to_opt(length) { Some(i) => i.0.clone(), None => dsl::lit(i64::MAX), @@ -1147,12 +1147,17 @@ impl Expr { self.0.clone().list().slice(offset.0.clone(), length).into() } - fn lst_eval(&self, expr: &Expr, parallel: bool) -> Self { + fn list_eval(&self, expr: &Expr, parallel: bool) -> Self { use pl::*; self.0.clone().list().eval(expr.0.clone(), parallel).into() } - fn lst_to_struct(&self, width_strat: &str, name_gen: Nullable, upper_bound: f64) -> List { + fn list_to_struct( + &self, + width_strat: &str, + name_gen: Nullable, + upper_bound: f64, + ) -> List { use crate::rdatatype::new_width_strategy; use crate::utils::extendr_concurrent::ParRObj; use pl::NamedFrom; diff --git a/tests/testthat/_snaps/dataframe.md b/tests/testthat/_snaps/dataframe.md index 957c9a39a..47884b371 100644 --- a/tests/testthat/_snaps/dataframe.md +++ b/tests/testthat/_snaps/dataframe.md @@ -25,16 +25,16 @@ Code pl$DataFrame(mtcars)$with_columns(pl$lit(42)$cast(pl$Int8))$glimpse() Output - & mpg 21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2 - & cyl 6, 6, 4, 6, 8, 6, 8, 4, 4, 6 - & disp 160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, 167.6 - & hp 110, 110, 93, 110, 175, 105, 245, 62, 95, 123 - & drat 3.9, 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92 - & wt 2.62, 2.875, 2.32, 3.215, 3.44, 3.46, 3.57, 3.19, 3.15, 3.44 - & qsec 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3 - & vs 0, 0, 1, 1, 0, 1, 0, 1, 1, 1 - & am 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 - & gear 4, 4, 4, 3, 3, 3, 3, 4, 4, 4 - & carb 4, 4, 1, 1, 2, 1, 4, 2, 2, 4 - & literal 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 + & mpg 21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2 + & cyl 6, 6, 4, 6, 8, 6, 8, 4, 4, 6 + & disp 160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, 167.6 + & hp 110, 110, 93, 110, 175, 105, 245, 62, 95, 123 + & drat 3.9, 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92 + & wt 2.62, 2.875, 2.32, 3.215, 3.44, 3.46, 3.57, 3.19, 3.15, 3.44 + & qsec 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3 + & vs 0, 0, 1, 1, 0, 1, 0, 1, 1, 1 + & am 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 + & gear 4, 4, 4, 3, 3, 3, 3, 4, 4, 4 + & carb 4, 4, 1, 1, 2, 1, 4, 2, 2, 4 + & literal 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 diff --git a/tests/testthat/test-expr_arr.R b/tests/testthat/test-expr_arr.R index d52b1d3b8..e021447ea 100644 --- a/tests/testthat/test-expr_arr.R +++ b/tests/testthat/test-expr_arr.R @@ -1,6 +1,6 @@ -test_that("arr$lengths", { +test_that("list$lengths", { df = pl$DataFrame(list_of_strs = pl$Series(list(c("a", "b"), "c", character(), list(), NULL))) - l = df$with_columns(pl$col("list_of_strs")$arr$lengths()$alias("list_of_strs_lengths"))$to_list() + l = df$with_columns(pl$col("list_of_strs")$list$lengths()$alias("list_of_strs_lengths"))$to_list() expect_identical( l |> lapply(\(x) if (inherits(x, "integer64")) as.numeric(x) else x), @@ -12,7 +12,7 @@ test_that("arr$lengths", { }) -test_that("arr$sum max min mean", { +test_that("list$sum max min mean", { # outcommented ones have different behavior in R and polars ints = list( @@ -46,10 +46,10 @@ test_that("arr$sum max min mean", { df = pl$DataFrame(list(x = ints)) p_res = df$select( - pl$col("x")$arr$sum()$alias("sum"), - pl$col("x")$arr$max()$alias("max"), - pl$col("x")$arr$min()$alias("min"), - pl$col("x")$arr$mean()$alias("mean") + pl$col("x")$list$sum()$alias("sum"), + pl$col("x")$list$max()$alias("max"), + pl$col("x")$list$min()$alias("min"), + pl$col("x")$list$mean()$alias("mean") )$to_list() r_res = list( @@ -65,10 +65,10 @@ test_that("arr$sum max min mean", { df = pl$DataFrame(list(x = floats)) p_res = df$select( - pl$col("x")$arr$sum()$alias("sum"), - pl$col("x")$arr$max()$alias("max"), - pl$col("x")$arr$min()$alias("min"), - pl$col("x")$arr$mean()$alias("mean") + pl$col("x")$list$sum()$alias("sum"), + pl$col("x")$list$max()$alias("max"), + pl$col("x")$list$min()$alias("min"), + pl$col("x")$list$mean()$alias("mean") )$to_list() r_res = list( @@ -84,14 +84,14 @@ test_that("arr$sum max min mean", { ) }) -test_that("arr$reverse", { +test_that("list$reverse", { l = list( l_i32 = list(1:5, c(NA_integer_, 3:1)), l_f64 = list(c(1, 3, 2, 4, NA, Inf), (3:1) * 1), l_char = list(letters, LETTERS) ) df = pl$DataFrame(l) - p_res = df$select(pl$all()$arr$reverse())$to_list() + p_res = df$select(pl$all()$list$reverse())$to_list() r_res = lapply(l, lapply, rev) expect_identical(p_res, r_res) }) @@ -99,26 +99,26 @@ test_that("arr$reverse", { -test_that("arr$unique arr$sort", { +test_that("list$unique arr$sort", { l = list( l_i32 = list(c(1:2, 1:2), c(NA_integer_, NA_integer_, 3L, 1:2)), l_f64 = list(c(1, 1, 2, 3, NA, Inf, NA, Inf), c(1)), l_char = list(c(letters, letters), c("a", "a", "b")) ) df = pl$DataFrame(l) - p_res = df$select(pl$all()$arr$unique()$arr$sort())$to_list() + p_res = df$select(pl$all()$list$unique()$list$sort())$to_list() r_res = lapply(l, lapply, \(x) sort(unique(x), na.last = FALSE)) expect_equal(p_res, r_res) df = pl$DataFrame(l) - p_res = df$select(pl$all()$arr$unique()$arr$sort(descending = TRUE))$to_list() + p_res = df$select(pl$all()$list$unique()$list$sort(descending = TRUE))$to_list() r_res = lapply(l, lapply, \(x) sort(unique(x), na.last = FALSE, decr = TRUE)) expect_equal(p_res, r_res) }) -test_that("arr$get", { +test_that("list$get", { l = list( l_i32 = list(c(1:2, 1:2), c(NA_integer_, NA_integer_, 3L, 1:2), integer()), l_f64 = list(c(1, 1, 2, 3, NA, Inf, NA, Inf), c(1), numeric()), @@ -127,7 +127,7 @@ test_that("arr$get", { for (i in -5:5) { df = pl$DataFrame(l) - p_res = df$select(pl$all()$arr$get(i))$to_list() + p_res = df$select(pl$all()$list$get(i))$to_list() r_res = lapply(l, sapply, \(x) pcase( i >= 0, x[i + 1], i < 0, rev(x)[-i], @@ -139,19 +139,19 @@ test_that("arr$get", { test_that("take", { l = list(1:3, 1:2, 1:1) - l_roundtrip = pl$lit(l)$arr$take(lapply(l, "-", 1L))$to_r() + l_roundtrip = pl$lit(l)$list$take(lapply(l, "-", 1L))$to_r() expect_identical(l_roundtrip, l) l = list(1:3, 4:5, 6L) expect_identical( - pl$lit(l)$arr$take(list(c(0:3), 0L, 0L), null_on_oob = TRUE)$to_r(), + pl$lit(l)$list$take(list(c(0:3), 0L, 0L), null_on_oob = TRUE)$to_r(), list(c(1:3, NA), 4L, 6L) ) expected_err = "Take indices are out of bounds." - expect_grepl_error(pl$lit(l)$arr$take(list(c(0:3), 0L, 0L))$to_r(), expected_err) + expect_grepl_error(pl$lit(l)$list$take(list(c(0:3), 0L, 0L))$to_r(), expected_err) }) test_that("first last head tail", { @@ -164,17 +164,17 @@ test_that("first last head tail", { # first - p_res = df$select(pl$all()$arr$first())$to_list() + p_res = df$select(pl$all()$list$first())$to_list() r_res = lapply(l, sapply, function(x) x[1]) expect_equal(p_res, r_res) # last - p_res = df$select(pl$all()$arr$last())$to_list() + p_res = df$select(pl$all()$list$last())$to_list() r_res = lapply(l, sapply, function(x) rev(x)[1]) expect_equal(p_res, r_res) for (i in 0:5) { - p_res = df$select(pl$all()$arr$head(i))$to_list() + p_res = df$select(pl$all()$list$head(i))$to_list() r_res = lapply(l, lapply, \(x) pcase( i >= 0, head(x, i), i < 0, head(x, i), @@ -184,7 +184,7 @@ test_that("first last head tail", { } for (i in 0:5) { - p_res = df$select(pl$all()$arr$tail(i))$to_list() + p_res = df$select(pl$all()$list$tail(i))$to_list() r_res = lapply(l, lapply, \(x) pcase( i >= 0, tail(x, i), i < 0, tail(x, i), @@ -198,7 +198,7 @@ test_that("first last head tail", { test_that("join", { l = list(letters, as.character(1:5)) s = pl$Series(l) - l_act = s$to_lit()$arr$join("-")$lit_to_df()$to_list() + l_act = s$to_lit()$list$join("-")$lit_to_df()$to_list() l_exp = list(sapply(l, paste, collapse = "-")) names(l_exp) = "" expect_identical(l_act, l_exp) @@ -212,8 +212,8 @@ test_that("arg_min arg_max", { ) df = pl$DataFrame(l) - l_act_arg_min = df$select(pl$all()$arr$arg_min())$to_list() - l_act_arg_max = df$select(pl$all()$arr$arg_max())$to_list() + l_act_arg_min = df$select(pl$all()$list$arg_min())$to_list() + l_act_arg_max = df$select(pl$all()$list$arg_max())$to_list() # not the same as R NA is min l_exp_arg_min = list( @@ -245,15 +245,15 @@ test_that("diff", { x - data.table::shift(x, n) } - l_act_diff_1 = df$select(pl$all()$arr$diff())$to_list() + l_act_diff_1 = df$select(pl$all()$list$diff())$to_list() l_exp_diff_1 = lapply(l, sapply, r_diff) expect_identical(l_act_diff_1, l_exp_diff_1) - l_act_diff_2 = df$select(pl$all()$arr$diff(n = 2))$to_list() + l_act_diff_2 = df$select(pl$all()$list$diff(n = 2))$to_list() l_exp_diff_2 = lapply(l, sapply, r_diff, n = 2) expect_identical(l_act_diff_2, l_exp_diff_2) - l_act_diff_0 = df$select(pl$all()$arr$diff(n = 0))$to_list() + l_act_diff_0 = df$select(pl$all()$list$diff(n = 0))$to_list() l_exp_diff_0 = lapply(l, sapply, r_diff, n = 0) expect_identical(l_act_diff_0, l_exp_diff_0) }) @@ -272,19 +272,19 @@ test_that("shift", { data.table::shift(x, n) # <3 data.table } - l_act_diff_1 = df$select(pl$all()$arr$shift())$to_list() + l_act_diff_1 = df$select(pl$all()$list$shift())$to_list() l_exp_diff_1 = lapply(l, sapply, r_shift) expect_identical(l_act_diff_1, l_exp_diff_1) - l_act_diff_2 = df$select(pl$all()$arr$shift(2))$to_list() + l_act_diff_2 = df$select(pl$all()$list$shift(2))$to_list() l_exp_diff_2 = lapply(l, sapply, r_shift, 2) expect_identical(l_act_diff_2, l_exp_diff_2) - l_act_diff_0 = df$select(pl$all()$arr$shift(0))$to_list() + l_act_diff_0 = df$select(pl$all()$list$shift(0))$to_list() l_exp_diff_0 = lapply(l, sapply, r_shift, 0) expect_identical(l_act_diff_0, l_exp_diff_0) - l_act_diff_m1 = df$select(pl$all()$arr$shift(-1))$to_list() + l_act_diff_m1 = df$select(pl$all()$list$shift(-1))$to_list() l_exp_diff_m1 = lapply(l, sapply, r_shift, -1) expect_identical(l_act_diff_m1, l_exp_diff_m1) }) @@ -310,31 +310,31 @@ test_that("slice", { x[s] } - l_act_slice = df$select(pl$all()$arr$slice(0, 3))$to_list() + l_act_slice = df$select(pl$all()$list$slice(0, 3))$to_list() l_exp_slice = lapply(l, lapply, r_slice, 0, 3) expect_identical(l_act_slice, l_exp_slice) - l_act_slice = df$select(pl$all()$arr$slice(1, 3))$to_list() + l_act_slice = df$select(pl$all()$list$slice(1, 3))$to_list() l_exp_slice = lapply(l, lapply, r_slice, 1, 3) expect_identical(l_act_slice, l_exp_slice) - l_act_slice = df$select(pl$all()$arr$slice(1, 5))$to_list() + l_act_slice = df$select(pl$all()$list$slice(1, 5))$to_list() l_exp_slice = lapply(l, lapply, r_slice, 1, 5) expect_identical(l_act_slice, l_exp_slice) - l_act_slice = df$select(pl$all()$arr$slice(-1, 1))$to_list() + l_act_slice = df$select(pl$all()$list$slice(-1, 1))$to_list() l_exp_slice = lapply(l, lapply, r_slice, -1, 1) expect_identical(l_act_slice, l_exp_slice) l2 = list(a = list(1:3, 1:2, 1:1, integer())) df2 = pl$DataFrame(l2) - l_act_slice = df2$select(pl$all()$arr$slice(-2, 2))$to_list() + l_act_slice = df2$select(pl$all()$list$slice(-2, 2))$to_list() l_exp_slice = lapply(l2, lapply, r_slice, -2, 2) expect_identical(l_act_slice, l_exp_slice) - l_act_slice = df2$select(pl$all()$arr$slice(1, ))$to_list() + l_act_slice = df2$select(pl$all()$list$slice(1, ))$to_list() l_exp_slice = lapply(l2, lapply, r_slice, 1) expect_identical(l_act_slice, l_exp_slice) }) @@ -348,9 +348,9 @@ test_that("contains", { df = pl$DataFrame(l) l_act = df$select( - pl$col("i32")$arr$contains(2L), - pl$col("f64")$arr$contains(Inf), - pl$col("utf")$arr$contains("a") + pl$col("i32")$list$contains(2L), + pl$col("f64")$list$contains(Inf), + pl$col("utf")$list$contains("a") )$to_list() l_exp = list( @@ -370,17 +370,17 @@ test_that("concat", { ) expect_identical( - df$select(pl$col("a")$arr$concat(pl$col("b")))$to_list(), + df$select(pl$col("a")$list$concat(pl$col("b")))$to_list(), list(a = list(c("a", "b", "c"), c("x", "y", "z"))) ) expect_identical( - df$select(pl$col("a")$arr$concat("hello from R"))$to_list(), + df$select(pl$col("a")$list$concat("hello from R"))$to_list(), list(a = list(c("a", "hello from R"), c("x", "hello from R"))) ) expect_identical( - df$select(pl$col("a")$arr$concat(c("hello", "world")))$to_list(), + df$select(pl$col("a")$list$concat(c("hello", "world")))$to_list(), list(a = list(c("a", "hello"), c("x", "world"))) ) }) @@ -390,12 +390,12 @@ test_that("concat", { test_that("to_struct", { l = list(integer(), 1:2, 1:3, 1:2) df = pl$DataFrame(list(a = l)) - act_1 = df$select(pl$col("a")$arr$to_struct( + act_1 = df$select(pl$col("a")$list$to_struct( n_field_strategy = "first_non_null", name_generator = \(idx) paste0("hello_you_", idx) ))$to_list() - act_2 = df$select(pl$col("a")$arr$to_struct( + act_2 = df$select(pl$col("a")$list$to_struct( n_field_strategy = "max_width", name_generator = \(idx) paste0("hello_you_", idx) ))$to_list() @@ -425,7 +425,7 @@ test_that("to_struct", { test_that("eval", { df = pl$DataFrame(a = list(a = c(1, 8, 3), b = c(4, 5, 2))) l_act = df$select(pl$all()$cast(pl$dtypes$Float64))$with_columns( - pl$concat_list(c("a", "b"))$arr$eval(pl$element()$rank())$alias("rank") + pl$concat_list(c("a", "b"))$list$eval(pl$element()$rank())$alias("rank") )$to_list() expect_identical( l_act, @@ -436,3 +436,12 @@ test_that("eval", { ) ) }) + + +test_that("Series$list$ warn once but give same ns as $list$", { + runtime_state$warned_deprecate_sns_arr_expr = FALSE + expect_warning(pl$lit(42)$arr) + expect_no_warning(pl$lit(42)$arr) + expect_no_warning(pl$lit(42)$list) + expect_identical(ls(pl$lit(42)$arr), ls(pl$lit(42)$list)) +}) diff --git a/tests/testthat/test-expr_string.R b/tests/testthat/test-expr_string.R index 9e9fd31ea..bd8bb1c44 100644 --- a/tests/testthat/test-expr_string.R +++ b/tests/testthat/test-expr_string.R @@ -120,7 +120,7 @@ test_that("str$concat", { # Series list of strings to Series of concatenated strings df = pl$DataFrame(list(bar = list(c("a", "b", "c"), c("1", "2", "æ")))) expect_identical( - df$select(pl$col("bar")$arr$eval(pl$col()$str$concat())$arr$first())$to_list()$bar, + df$select(pl$col("bar")$list$eval(pl$col()$str$concat())$list$first())$to_list()$bar, sapply(df$to_list()[[1]], paste, collapse = "-") ) }) diff --git a/tests/testthat/test-series.R b/tests/testthat/test-series.R index 0d5ba774c..ec2f33623 100644 --- a/tests/testthat/test-series.R +++ b/tests/testthat/test-series.R @@ -534,3 +534,12 @@ test_that("n_unique", { expect_identical(pl$Series(x)$n_unique(), 6) expect_grepl_error(pl$Series(c())$n_unique(), "operation not supported for dtype") }) + + +test_that("$list$ warn once but give same ns as $list$", { + runtime_state$warned_deprecate_sns_arr_series = FALSE + expect_warning(pl$Series(42)$arr) + expect_no_warning(pl$Series(42)$arr) + expect_no_warning(pl$Series(42)$list) + expect_identical(ls(pl$Series(42)$arr), ls(pl$Series(42)$list)) +}) diff --git a/vignettes/userguide.Rmd b/vignettes/userguide.Rmd index e909c7180..cbe77b89a 100755 --- a/vignettes/userguide.Rmd +++ b/vignettes/userguide.Rmd @@ -470,7 +470,7 @@ grades$with_columns( # select all columns except the intermediate list pl$all()$exclude("all_grades"), # compute the rank by calling `arr$eval` - pl$col("all_grades")$arr$eval(rank_pct, parallel = TRUE)$alias("grades_rank") + pl$col("all_grades")$list$eval(rank_pct, parallel = TRUE)$alias("grades_rank") )) ```