From fa23ecac0e04eb31e36259aac209d6daee4bd3f0 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Mon, 15 Apr 2024 13:13:40 +0200 Subject: [PATCH] init --- NEWS.md | 1 + R/expr__string.R | 42 +++++++++---------- R/extendr-wrappers.R | 2 +- man/ExprStr_len_bytes.Rd | 2 +- man/ExprStr_len_chars.Rd | 2 +- ...Str_parse_int.Rd => ExprStr_to_integer.Rd} | 12 +++--- src/rust/src/lazy/dsl.rs | 3 +- tests/testthat/_snaps/after-wrappers.md | 16 +++---- tests/testthat/test-expr_string.R | 12 +++--- 9 files changed, 46 insertions(+), 46 deletions(-) rename man/{ExprStr_parse_int.Rd => ExprStr_to_integer.Rd} (74%) diff --git a/NEWS.md b/NEWS.md index b19a2d9c7..04010074b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -154,6 +154,7 @@ - The following deprecated functions are now removed: `pl$threadpool_size()`, `$with_row_count()`, `$with_row_count()` (#965). - In `$group_by_dynamic()`, the first datapoint is always preserved (#1034). +- `$str$parse_int()` is renamed `$str$to_integer()` (#1037). ### New features diff --git a/R/expr__string.R b/R/expr__string.R index 38a5e3e74..5e96f78bb 100644 --- a/R/expr__string.R +++ b/R/expr__string.R @@ -391,7 +391,7 @@ ExprStr_strip_chars_end = function(matches = NULL) { #' some_floats_expr$cast(pl$Int64)$cast(pl$String)$str$zfill(5)$to_r() ExprStr_zfill = function(alignment) { .pr$Expr$str_zfill(self, alignment) |> - unwrap("in str$zfill():") + unwrap("in $str$zfill():") } @@ -409,7 +409,7 @@ ExprStr_zfill = function(alignment) { #' df$select(pl$col("a")$str$pad_end(8, "*")) ExprStr_pad_end = function(width, fillchar = " ") { .pr$Expr$str_pad_end(self, width, fillchar) |> - unwrap("in str$pad_end(): ") + unwrap("in $str$pad_end(): ") } @@ -425,7 +425,7 @@ ExprStr_pad_end = function(width, fillchar = " ") { #' df$select(pl$col("a")$str$pad_start(8, "*")) ExprStr_pad_start = function(width, fillchar = " ") { .pr$Expr$str_pad_start(self, width, fillchar) |> - unwrap("in str$pad_start(): ") + unwrap("in $str$pad_start(): ") } @@ -528,7 +528,7 @@ ExprStr_starts_with = function(sub) { #' df$select(pl$col("json_val")$str$json_decode(dtype)) ExprStr_json_decode = function(dtype, infer_schema_length = 100) { .pr$Expr$str_json_decode(self, dtype, infer_schema_length) |> - unwrap("in str$json_decode():") + unwrap("in $str$json_decode():") } #' Extract the first match of JSON string with the provided JSONPath expression @@ -549,7 +549,7 @@ ExprStr_json_decode = function(dtype, infer_schema_length = 100) { #' df$select(pl$col("json_val")$str$json_path_match("$.a")) ExprStr_json_path_match = function(json_path) { .pr$Expr$str_json_path_match(self, json_path) |> - unwrap("in str$json_path_match(): ") + unwrap("in $str$json_path_match(): ") } @@ -636,7 +636,7 @@ ExprStr_encode = function(encoding) { #' ) ExprStr_extract = function(pattern, group_index) { .pr$Expr$str_extract(self, pattern, group_index) |> - unwrap("in str$extract(): ") + unwrap("in $str$extract(): ") } @@ -699,7 +699,7 @@ ExprStr_count_matches = function(pattern, ..., literal = FALSE) { ExprStr_split = function(by, inclusive = FALSE) { unwrap( .pr$Expr$str_split(self, result(by), result(inclusive)), - context = "in str$split():" + context = "in $str$split():" ) } @@ -723,7 +723,7 @@ ExprStr_split = function(by, inclusive = FALSE) { ExprStr_split_exact = function(by, n, inclusive = FALSE) { unwrap( .pr$Expr$str_split_exact(self, by, result(n), result(inclusive)), - context = "in str$split_exact():" + context = "in $str$split_exact():" ) } @@ -749,7 +749,7 @@ ExprStr_split_exact = function(by, n, inclusive = FALSE) { #' s3 = pl$col("s")$str$splitn(by = "_", 3) #' ) ExprStr_splitn = function(by, n) { - .pr$Expr$str_splitn(self, result(by), result(n)) |> unwrap("in str$splitn():") + .pr$Expr$str_splitn(self, result(by), result(n)) |> unwrap("in $str$splitn():") } @@ -850,7 +850,7 @@ ExprStr_replace_all = function(pattern, value, ..., literal = FALSE) { #' ) ExprStr_slice = function(offset, length = NULL) { .pr$Expr$str_slice(self, result(offset), result(length)) |> - unwrap("in str$slice():") + unwrap("in $str$slice():") } #' Returns a column with a separate row for every string character @@ -862,7 +862,7 @@ ExprStr_slice = function(offset, length = NULL) { #' df$select(pl$col("a")$str$explode()) ExprStr_explode = function() { .pr$Expr$str_explode(self) |> - unwrap("in str$explode():") + unwrap("in $str$explode():") } # TODO: rename to `to_integer` @@ -877,14 +877,14 @@ ExprStr_explode = function() { #' @return Expr: Series of dtype i32. #' @examples #' df = pl$DataFrame(bin = c("110", "101", "010")) -#' df$select(pl$col("bin")$str$parse_int()) -#' df$select(pl$col("bin")$str$parse_int(10)) +#' df$select(pl$col("bin")$str$to_integer()) +#' df$select(pl$col("bin")$str$to_integer(10)) #' #' # Convert to null if the string is not a valid integer when `strict = FALSE` #' df = pl$DataFrame(x = c("1", "2", "foo")) -#' df$select(pl$col("x")$str$parse_int(10, FALSE)) -ExprStr_parse_int = function(base = 2, strict = TRUE) { - .pr$Expr$str_parse_int(self, base, strict) |> unwrap("in str$parse_int():") +#' df$select(pl$col("x")$str$to_integer(10, FALSE)) +ExprStr_to_integer = function(base = 2, strict = TRUE) { + .pr$Expr$str_to_integer(self, base, strict) |> unwrap("in $str$to_integer():") } #' Returns string values in reversed order @@ -896,7 +896,7 @@ ExprStr_parse_int = function(base = 2, strict = TRUE) { #' df$with_columns(reversed = pl$col("text")$str$reverse()) ExprStr_reverse = function() { .pr$Expr$str_reverse(self) |> - unwrap("in str$reverse():") + unwrap("in $str$reverse():") } #' Use the aho-corasick algorithm to find matches @@ -924,7 +924,7 @@ ExprStr_reverse = function() { #' ) ExprStr_contains_any = function(patterns, ..., ascii_case_insensitive = FALSE) { .pr$Expr$str_contains_any(self, patterns, ascii_case_insensitive) |> - unwrap("in str$contains_any():") + unwrap("in $str$contains_any():") } #' Use the aho-corasick algorithm to replace many matches @@ -962,7 +962,7 @@ ExprStr_contains_any = function(patterns, ..., ascii_case_insensitive = FALSE) { #' ) ExprStr_replace_many = function(patterns, replace_with, ascii_case_insensitive = FALSE) { .pr$Expr$str_replace_many(self, patterns, replace_with, ascii_case_insensitive) |> - unwrap("in str$replace_many():") + unwrap("in $str$replace_many():") } @@ -1000,7 +1000,7 @@ ExprStr_replace_many = function(patterns, replace_with, ascii_case_insensitive = #' )$unnest("captures") ExprStr_extract_groups = function(pattern) { .pr$Expr$str_extract_groups(self, pattern) |> - unwrap("in str$extract_groups():") + unwrap("in $str$extract_groups():") } #' Return the index position of the first substring matching a pattern @@ -1024,5 +1024,5 @@ ExprStr_extract_groups = function(pattern) { #' ) ExprStr_find = function(pattern, ..., literal = FALSE, strict = TRUE) { .pr$Expr$str_find(self, pattern, literal, strict) |> - unwrap("in str$find():") + unwrap("in $str$find():") } diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index f45195176..b6120a72e 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -1054,7 +1054,7 @@ RPolarsExpr$str_slice <- function(offset, length) .Call(wrap__RPolarsExpr__str_s RPolarsExpr$str_explode <- function() .Call(wrap__RPolarsExpr__str_explode, self) -RPolarsExpr$str_parse_int <- function(base, strict) .Call(wrap__RPolarsExpr__str_parse_int, self, base, strict) +RPolarsExpr$str_to_integer <- function(base, strict) .Call(wrap__RPolarsExpr__str_to_integer, self, base, strict) RPolarsExpr$str_reverse <- function() .Call(wrap__RPolarsExpr__str_reverse, self) diff --git a/man/ExprStr_len_bytes.Rd b/man/ExprStr_len_bytes.Rd index 6baecca54..29c2db918 100644 --- a/man/ExprStr_len_bytes.Rd +++ b/man/ExprStr_len_bytes.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__string.R \name{ExprStr_len_bytes} \alias{ExprStr_len_bytes} -\title{Get the number of bytes in strings} +\title{Get the number of bytes in $strings} \usage{ ExprStr_len_bytes() } diff --git a/man/ExprStr_len_chars.Rd b/man/ExprStr_len_chars.Rd index 8ff228e15..0befaf9cd 100644 --- a/man/ExprStr_len_chars.Rd +++ b/man/ExprStr_len_chars.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__string.R \name{ExprStr_len_chars} \alias{ExprStr_len_chars} -\title{Get the number of characters in strings} +\title{Get the number of characters in $strings} \usage{ ExprStr_len_chars() } diff --git a/man/ExprStr_parse_int.Rd b/man/ExprStr_to_integer.Rd similarity index 74% rename from man/ExprStr_parse_int.Rd rename to man/ExprStr_to_integer.Rd index fdf599e16..e805f00b3 100644 --- a/man/ExprStr_parse_int.Rd +++ b/man/ExprStr_to_integer.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__string.R -\name{ExprStr_parse_int} -\alias{ExprStr_parse_int} +\name{ExprStr_to_integer} +\alias{ExprStr_to_integer} \title{Parse integers with base radix from strings} \usage{ -ExprStr_parse_int(base = 2, strict = TRUE) +ExprStr_to_integer(base = 2, strict = TRUE) } \arguments{ \item{base}{Positive integer which is the base of the string we are parsing. @@ -21,11 +21,11 @@ Parse integers with base 2 by default. } \examples{ df = pl$DataFrame(bin = c("110", "101", "010")) -df$select(pl$col("bin")$str$parse_int()) -df$select(pl$col("bin")$str$parse_int(10)) +df$select(pl$col("bin")$str$to_integer()) +df$select(pl$col("bin")$str$to_integer(10)) # Convert to null if the string is not a valid integer when `strict = FALSE` df = pl$DataFrame(x = c("1", "2", "foo")) -df$select(pl$col("x")$str$parse_int(10, FALSE)) +df$select(pl$col("x")$str$to_integer(10, FALSE)) } \keyword{ExprStr} diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index fee626b35..64fc4721b 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -2308,13 +2308,12 @@ impl RPolarsExpr { } // TODO: rename to `str_to_integer` - pub fn str_parse_int(&self, base: Robj, strict: Robj) -> RResult { + pub fn str_to_integer(&self, base: Robj, strict: Robj) -> RResult { Ok(self .0 .clone() .str() .to_integer(robj_to!(PLExprCol, base)?, robj_to!(bool, strict)?) - .with_fmt("str.parse_int") .into()) } diff --git a/tests/testthat/_snaps/after-wrappers.md b/tests/testthat/_snaps/after-wrappers.md index e38cb3ee5..3941605f6 100644 --- a/tests/testthat/_snaps/after-wrappers.md +++ b/tests/testthat/_snaps/after-wrappers.md @@ -415,14 +415,14 @@ [277] "str_json_decode" "str_json_path_match" [279] "str_len_bytes" "str_len_chars" [281] "str_pad_end" "str_pad_start" - [283] "str_parse_int" "str_replace" - [285] "str_replace_all" "str_replace_many" - [287] "str_reverse" "str_slice" - [289] "str_split" "str_split_exact" - [291] "str_splitn" "str_starts_with" - [293] "str_strip_chars" "str_strip_chars_end" - [295] "str_strip_chars_start" "str_to_date" - [297] "str_to_datetime" "str_to_lowercase" + [283] "str_replace" "str_replace_all" + [285] "str_replace_many" "str_reverse" + [287] "str_slice" "str_split" + [289] "str_split_exact" "str_splitn" + [291] "str_starts_with" "str_strip_chars" + [293] "str_strip_chars_end" "str_strip_chars_start" + [295] "str_to_date" "str_to_datetime" + [297] "str_to_integer" "str_to_lowercase" [299] "str_to_time" "str_to_titlecase" [301] "str_to_uppercase" "str_zfill" [303] "struct_field_by_name" "struct_rename_fields" diff --git a/tests/testthat/test-expr_string.R b/tests/testthat/test-expr_string.R index 4c7c45900..70dfadc1b 100644 --- a/tests/testthat/test-expr_string.R +++ b/tests/testthat/test-expr_string.R @@ -685,28 +685,28 @@ test_that("str$str_explode", { }) -test_that("str$parse_int", { +test_that("str$to_integer", { expect_identical( - pl$lit(c("110", "101", "010"))$str$parse_int(2)$to_r(), + pl$lit(c("110", "101", "010"))$str$to_integer(2)$to_r(), c(6, 5, 2) ) expect_identical( - pl$lit(c("110", "101", "010"))$str$parse_int()$to_r(), + pl$lit(c("110", "101", "010"))$str$to_integer()$to_r(), c(6, 5, 2) ) expect_identical( - pl$lit(c("110", "101", "010"))$str$parse_int(10)$to_r(), + pl$lit(c("110", "101", "010"))$str$to_integer(10)$to_r(), c(110, 101, 10) ) expect_identical( - pl$lit(c("110", "101", "hej"))$str$parse_int(10, FALSE)$to_r(), + pl$lit(c("110", "101", "hej"))$str$to_integer(10, FALSE)$to_r(), c(110, 101, NA) ) - expect_grepl_error(pl$lit("foo")$str$parse_int()$to_r(), "strict integer parsing failed for 1 value") + expect_grepl_error(pl$lit("foo")$str$to_integer()$to_r(), "strict integer parsing failed for 1 value") }) test_that("str$reverse()", {