Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor!: $str$parse_int() -> $str$to_integer() #1038

Merged
merged 12 commits into from
Apr 15, 2024
9 changes: 7 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@
- In `$dt$convert_time_zone()` and `$dt$replace_time_zone()`, the `tz`
argument is renamed to `time_zone` (#944).
- In `$str$strptime()`, the argument `datatype` is renamed to `dtype` (#939).
- In `$str$parse_int()`, argument `radix` is renamed to `base` (#1034).
- In `$str$to_integer()` (renamed from `$str$parse_int()`), argument `radix` is
renamed to `base` (#1038).

2. Change in the way arguments are passed:

Expand All @@ -84,6 +85,8 @@
the `literal` argument must be named (#987).
- In `$str$strptime()`, `$str$to_date()`, `$str$to_datetime()`, and
`$str$to_time()`, all arguments (except the first one) must be named (#939).
- In `$str$to_integer()` (renamed from `$str$parse_int()`), all arguments
should be named (#1038).
eitsupi marked this conversation as resolved.
Show resolved Hide resolved
- In `pl$date_range()`, the arguments `closed`, `time_unit`, and `time_zone`
must be named (#950).
- In `$set_sorted()` and `$sort_by()`, argument `descending` must be named
Expand Down Expand Up @@ -136,7 +139,8 @@
only accepts `"auto"`, `"columns"`, `"row_groups"`, and `"none"`.
Previously, it also accepted upper-case notation of `"auto"`, `"columns"`,
`"none"`, and `"RowGroups"` instead of `"row_groups"` (#1033).

- In `$str$to_integer()` (renamed from `$str$parse_int()`), the default
value of `base` is changed from `2` to `10L` (#1038).
eitsupi marked this conversation as resolved.
Show resolved Hide resolved

- The usage of `pl$date_range()` to create a range of `Datetime` data type is
deprecated. `pl$date_range()` will always create a range of `Date` data type
Expand All @@ -154,6 +158,7 @@
- The following deprecated functions are now removed: `pl$threadpool_size()`,
`<DataFrame>$with_row_count()`, `<LazyFrame>$with_row_count()` (#965).
- In `$group_by_dynamic()`, the first datapoint is always preserved (#1034).
- `$str$parse_int()` is renamed to `$str$to_integer()` (and completely rewritten) (#1038).
eitsupi marked this conversation as resolved.
Show resolved Hide resolved


### New features
Expand Down
70 changes: 36 additions & 34 deletions R/expr__string.R
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ ExprStr_strip_chars_end = function(matches = NULL) {
#' some_floats_expr$cast(pl$Int64)$cast(pl$String)$str$zfill(5)$to_r()
ExprStr_zfill = function(alignment) {
.pr$Expr$str_zfill(self, alignment) |>
unwrap("in str$zfill():")
unwrap("in $str$zfill():")
}


Expand All @@ -409,7 +409,7 @@ ExprStr_zfill = function(alignment) {
#' df$select(pl$col("a")$str$pad_end(8, "*"))
ExprStr_pad_end = function(width, fillchar = " ") {
.pr$Expr$str_pad_end(self, width, fillchar) |>
unwrap("in str$pad_end(): ")
unwrap("in $str$pad_end(): ")
}


Expand All @@ -425,7 +425,7 @@ ExprStr_pad_end = function(width, fillchar = " ") {
#' df$select(pl$col("a")$str$pad_start(8, "*"))
ExprStr_pad_start = function(width, fillchar = " ") {
.pr$Expr$str_pad_start(self, width, fillchar) |>
unwrap("in str$pad_start(): ")
unwrap("in $str$pad_start(): ")
}


Expand Down Expand Up @@ -528,7 +528,7 @@ ExprStr_starts_with = function(sub) {
#' df$select(pl$col("json_val")$str$json_decode(dtype))
ExprStr_json_decode = function(dtype, infer_schema_length = 100) {
.pr$Expr$str_json_decode(self, dtype, infer_schema_length) |>
unwrap("in str$json_decode():")
unwrap("in $str$json_decode():")
}

#' Extract the first match of JSON string with the provided JSONPath expression
Expand All @@ -549,7 +549,7 @@ ExprStr_json_decode = function(dtype, infer_schema_length = 100) {
#' df$select(pl$col("json_val")$str$json_path_match("$.a"))
ExprStr_json_path_match = function(json_path) {
.pr$Expr$str_json_path_match(self, json_path) |>
unwrap("in str$json_path_match(): ")
unwrap("in $str$json_path_match(): ")
}


Expand Down Expand Up @@ -636,7 +636,7 @@ ExprStr_encode = function(encoding) {
#' )
ExprStr_extract = function(pattern, group_index) {
.pr$Expr$str_extract(self, pattern, group_index) |>
unwrap("in str$extract(): ")
unwrap("in $str$extract(): ")
}


Expand Down Expand Up @@ -699,7 +699,7 @@ ExprStr_count_matches = function(pattern, ..., literal = FALSE) {
ExprStr_split = function(by, inclusive = FALSE) {
unwrap(
.pr$Expr$str_split(self, result(by), result(inclusive)),
context = "in str$split():"
context = "in $str$split():"
)
}

Expand All @@ -723,7 +723,7 @@ ExprStr_split = function(by, inclusive = FALSE) {
ExprStr_split_exact = function(by, n, inclusive = FALSE) {
unwrap(
.pr$Expr$str_split_exact(self, by, result(n), result(inclusive)),
context = "in str$split_exact():"
context = "in $str$split_exact():"
)
}

Expand All @@ -749,7 +749,7 @@ ExprStr_split_exact = function(by, n, inclusive = FALSE) {
#' s3 = pl$col("s")$str$splitn(by = "_", 3)
#' )
ExprStr_splitn = function(by, n) {
.pr$Expr$str_splitn(self, result(by), result(n)) |> unwrap("in str$splitn():")
.pr$Expr$str_splitn(self, result(by), result(n)) |> unwrap("in $str$splitn():")
}


Expand Down Expand Up @@ -850,7 +850,7 @@ ExprStr_replace_all = function(pattern, value, ..., literal = FALSE) {
#' )
ExprStr_slice = function(offset, length = NULL) {
.pr$Expr$str_slice(self, result(offset), result(length)) |>
unwrap("in str$slice():")
unwrap("in $str$slice():")
}

#' Returns a column with a separate row for every string character
Expand All @@ -862,29 +862,31 @@ ExprStr_slice = function(offset, length = NULL) {
#' df$select(pl$col("a")$str$explode())
ExprStr_explode = function() {
.pr$Expr$str_explode(self) |>
unwrap("in str$explode():")
unwrap("in $str$explode():")
}

# TODO: rename to `to_integer`
#' Parse integers with base radix from strings

#' Convert a String column into an Int64 column with base radix
#'
#' @description Parse integers with base 2 by default.
#' @keywords ExprStr
#' @param base Positive integer which is the base of the string we are parsing.
#' Default is 2.
#' @param strict If `TRUE` (default), integer overflow will raise an error.
#' Otherwise, they will be converted to `null`.
#' @return Expr: Series of dtype i32.
#' @param ... Ignored.
#' @param base A positive integer or expression which is the base of the string
#' we are parsing. Characters are parsed as column names. Default: `10L`.
#' @param strict A logical. If `TRUE` (default), raise any ParseError or overflow
#' as ComputeError. If `FALSE`, silently convert to `null`.
eitsupi marked this conversation as resolved.
Show resolved Hide resolved
#' @return [Expression][Expr_class] of data type `Int64`.
#' @examples
#' df = pl$DataFrame(bin = c("110", "101", "010"))
#' df$select(pl$col("bin")$str$parse_int())
#' df$select(pl$col("bin")$str$parse_int(10))
#'
#' # Convert to null if the string is not a valid integer when `strict = FALSE`
#' df = pl$DataFrame(x = c("1", "2", "foo"))
#' df$select(pl$col("x")$str$parse_int(10, FALSE))
ExprStr_parse_int = function(base = 2, strict = TRUE) {
.pr$Expr$str_parse_int(self, base, strict) |> unwrap("in str$parse_int():")
#' df = pl$DataFrame(bin = c("110", "101", "010", "invalid"))
#' df$with_columns(
#' parsed = pl$col("bin")$str$to_integer(base = 2, strict = FALSE)
#' )
#'
#' df = pl$DataFrame(hex = c("fa1e", "ff00", "cafe", NA))
#' df$with_columns(
#' parsed = pl$col("hex")$str$to_integer(base = 16, strict = TRUE)
#' )
ExprStr_to_integer = function(..., base = 10L, strict = TRUE) {
.pr$Expr$str_to_integer(self, base, strict) |>
unwrap("in $str$to_integer():")
}

#' Returns string values in reversed order
Expand All @@ -896,7 +898,7 @@ ExprStr_parse_int = function(base = 2, strict = TRUE) {
#' df$with_columns(reversed = pl$col("text")$str$reverse())
ExprStr_reverse = function() {
.pr$Expr$str_reverse(self) |>
unwrap("in str$reverse():")
unwrap("in $str$reverse():")
}

#' Use the aho-corasick algorithm to find matches
Expand Down Expand Up @@ -924,7 +926,7 @@ ExprStr_reverse = function() {
#' )
ExprStr_contains_any = function(patterns, ..., ascii_case_insensitive = FALSE) {
.pr$Expr$str_contains_any(self, patterns, ascii_case_insensitive) |>
unwrap("in str$contains_any():")
unwrap("in $str$contains_any():")
}

#' Use the aho-corasick algorithm to replace many matches
Expand Down Expand Up @@ -962,7 +964,7 @@ ExprStr_contains_any = function(patterns, ..., ascii_case_insensitive = FALSE) {
#' )
ExprStr_replace_many = function(patterns, replace_with, ascii_case_insensitive = FALSE) {
.pr$Expr$str_replace_many(self, patterns, replace_with, ascii_case_insensitive) |>
unwrap("in str$replace_many():")
unwrap("in $str$replace_many():")
}


Expand Down Expand Up @@ -1000,7 +1002,7 @@ ExprStr_replace_many = function(patterns, replace_with, ascii_case_insensitive =
#' )$unnest("captures")
ExprStr_extract_groups = function(pattern) {
.pr$Expr$str_extract_groups(self, pattern) |>
unwrap("in str$extract_groups():")
unwrap("in $str$extract_groups():")
}

#' Return the index position of the first substring matching a pattern
Expand All @@ -1024,5 +1026,5 @@ ExprStr_extract_groups = function(pattern) {
#' )
ExprStr_find = function(pattern, ..., literal = FALSE, strict = TRUE) {
.pr$Expr$str_find(self, pattern, literal, strict) |>
unwrap("in str$find():")
unwrap("in $str$find():")
}
2 changes: 1 addition & 1 deletion R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1054,7 +1054,7 @@ RPolarsExpr$str_slice <- function(offset, length) .Call(wrap__RPolarsExpr__str_s

RPolarsExpr$str_explode <- function() .Call(wrap__RPolarsExpr__str_explode, self)

RPolarsExpr$str_parse_int <- function(base, strict) .Call(wrap__RPolarsExpr__str_parse_int, self, base, strict)
RPolarsExpr$str_to_integer <- function(base, strict) .Call(wrap__RPolarsExpr__str_to_integer, self, base, strict)

RPolarsExpr$str_reverse <- function() .Call(wrap__RPolarsExpr__str_reverse, self)

Expand Down
2 changes: 1 addition & 1 deletion man/ExprStr_len_bytes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/ExprStr_len_chars.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 0 additions & 31 deletions man/ExprStr_parse_int.Rd

This file was deleted.

34 changes: 34 additions & 0 deletions man/ExprStr_to_integer.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions src/rust/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2307,14 +2307,15 @@ impl RPolarsExpr {
Ok(self.0.clone().str().explode().into())
}

// TODO: rename to `str_to_integer`
pub fn str_parse_int(&self, base: Robj, strict: Robj) -> RResult<Self> {
pub fn str_to_integer(&self, base: Robj, strict: Robj) -> RResult<Self> {
let base = robj_to!(PLExprCol, base)?;
let strict = robj_to!(bool, strict)?;
Ok(self
.0
.clone()
.str()
.to_integer(robj_to!(PLExprCol, base)?, robj_to!(bool, strict)?)
.with_fmt("str.parse_int")
.to_integer(base, strict)
.with_fmt("str.to_integer")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can be removed (especially since it's the python syntax)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it might be the case too, but I cannot be sure because there are a large number of the same items in other places.

.into())
}

Expand Down
16 changes: 8 additions & 8 deletions tests/testthat/_snaps/after-wrappers.md
Original file line number Diff line number Diff line change
Expand Up @@ -415,14 +415,14 @@
[277] "str_json_decode" "str_json_path_match"
[279] "str_len_bytes" "str_len_chars"
[281] "str_pad_end" "str_pad_start"
[283] "str_parse_int" "str_replace"
[285] "str_replace_all" "str_replace_many"
[287] "str_reverse" "str_slice"
[289] "str_split" "str_split_exact"
[291] "str_splitn" "str_starts_with"
[293] "str_strip_chars" "str_strip_chars_end"
[295] "str_strip_chars_start" "str_to_date"
[297] "str_to_datetime" "str_to_lowercase"
[283] "str_replace" "str_replace_all"
[285] "str_replace_many" "str_reverse"
[287] "str_slice" "str_split"
[289] "str_split_exact" "str_splitn"
[291] "str_starts_with" "str_strip_chars"
[293] "str_strip_chars_end" "str_strip_chars_start"
[295] "str_to_date" "str_to_datetime"
[297] "str_to_integer" "str_to_lowercase"
[299] "str_to_time" "str_to_titlecase"
[301] "str_to_uppercase" "str_zfill"
[303] "struct_field_by_name" "struct_rename_fields"
Expand Down
12 changes: 6 additions & 6 deletions tests/testthat/test-expr_string.R
Original file line number Diff line number Diff line change
Expand Up @@ -685,28 +685,28 @@ test_that("str$str_explode", {
})


test_that("str$parse_int", {
test_that("str$to_integer", {
expect_identical(
pl$lit(c("110", "101", "010"))$str$parse_int(2)$to_r(),
pl$lit(c("110", "101", "010"))$str$to_integer(2)$to_r(),
c(6, 5, 2)
)

expect_identical(
pl$lit(c("110", "101", "010"))$str$parse_int()$to_r(),
pl$lit(c("110", "101", "010"))$str$to_integer()$to_r(),
eitsupi marked this conversation as resolved.
Show resolved Hide resolved
c(6, 5, 2)
)

expect_identical(
pl$lit(c("110", "101", "010"))$str$parse_int(10)$to_r(),
pl$lit(c("110", "101", "010"))$str$to_integer(10)$to_r(),
c(110, 101, 10)
)

expect_identical(
pl$lit(c("110", "101", "hej"))$str$parse_int(10, FALSE)$to_r(),
pl$lit(c("110", "101", "hej"))$str$to_integer(10, FALSE)$to_r(),
c(110, 101, NA)
)

expect_grepl_error(pl$lit("foo")$str$parse_int()$to_r(), "strict integer parsing failed for 1 value")
expect_grepl_error(pl$lit("foo")$str$to_integer()$to_r(), "strict integer parsing failed for 1 value")
})

test_that("str$reverse()", {
Expand Down
Loading