From 4ad895093f0bffa2366f89f991e1404845d2d7ed Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Mon, 1 Jul 2024 13:26:44 +0200 Subject: [PATCH] use ".r" instead of "comment" as chunk engine --- vignettes/userguide.Rmd | 154 ++++++++++++++++++++-------------------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/vignettes/userguide.Rmd b/vignettes/userguide.Rmd index 3537305ae..e2663d4b8 100755 --- a/vignettes/userguide.Rmd +++ b/vignettes/userguide.Rmd @@ -16,7 +16,7 @@ options(rmarkdown.html_vignette.check_title = FALSE) ``` -````{comment} +```{.r} These functions/methods are either missing, broken, or Vincent can't figure out how to use them. * `Series_shift` @@ -33,7 +33,7 @@ Requires new Polars version: * `df$sample()` * `df$describe()` -```` +``` [The Polars User Guide](https://pola-rs.github.io/polars-book/user-guide/) is a detailed tutorial about the Polars DataFrame library. Its goal is to introduce you to Polars by going through examples and comparing it to other solutions. Some design choices are introduced there. The guide also introduces you to optimal usage of Polars. The Polars User Guide is available at this link: @@ -271,82 +271,82 @@ dataset$ collect() ``` - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +```{.r} +compute_age = function() 2021 - pl$col("birthday")$dt$year() + +avg_birthday = function(gender) { + compute_age()$filter(pl$col("gender") == gender)$mean()$alias(sprintf("avg %s birthday", gender)) +} + +q = ( + dataset$lazy()$ + group_by("state")$ + agg( + avg_birthday("M"), + avg_birthday("F"), + (pl$col("gender") == "M")$sum()$alias("# male"), + (pl$col("gender") == "F")$sum()$alias("# female") + )$ + limit(5) +) +q$collect() + +# +# get_person <- function() pl$col("first_name") + pl$lit(" ") + pl$col("last_name") +# q = ( +# dataset$lazy() +# $sort("birthday", descending=True) +# $group_by(["state"]) +# $agg( +# [ +# get_person()$first()$alias("youngest"), +# get_person()$last()$alias("oldest"), +# ] +# ) +# $limit(5) +# ) +# q$collect() +# +# get_person <- function() pl$col("first_name") + pl$lit(" ") + pl$col("last_name") +# q = ( +# dataset$lazy() +# $sort("birthday", descending=True) +# $group_by(["state"]) +# $agg( +# [ +# get_person()$first()$alias("youngest"), +# get_person()$last()$alias("oldest"), +# get_person()$sort()$first()$alias("alphabetical_first"), +# ] +# ) +# $limit(5) +# ) +# q$collect() +# +# q = ( +# dataset$lazy() +# $sort("birthday", descending=True) +# $group_by(["state"]) +# $agg( +# [ +# get_person()$first()$alias("youngest"), +# get_person()$last()$alias("oldest"), +# get_person()$sort()$first()$alias("alphabetical_first"), +# pl$col("gender")$sort_by("first_name")$first()$alias("gender"), +# ] +# ) +# $sort("state") +# $limit(5) +# ) +# q$collect() +``` ## Folds -```{comment} +```{.r} df = pl$DataFrame( "a" = c(1, 2, 3), "b" = c(10, 20, 30) @@ -396,7 +396,7 @@ df = pl$read_csv( ) ``` -```{comment} +```{.r} df$select( "Type 1", "Type 2", @@ -407,7 +407,7 @@ df$select( ``` -```{comment} +```{.r} filtered = df$ filter(pl$col("Type 2") == "Psychic")$ select(c("Name", "Type 1", "Speed")) @@ -449,7 +449,7 @@ df$sort("Type 1")$select( # List context and row wise computations -```{comment} +```{.r} grades = pl$DataFrame( "student" = c("bas", "laura", "tim", "jenny"), "arithmetic" = c(10, 5, 6, 8), @@ -477,7 +477,7 @@ grades$with_columns( # Custom functions -```{comment} +```{.r} df = pl$DataFrame( "keys" = c("a", "a", "b"), "values" = c(10, 7, 1) @@ -621,7 +621,7 @@ df$group_by("fruits")$ ``` -```{comment} +```{.r} # We can explode the list column "cars" to a new row for each element in the list df$ # sort("cars")$