Skip to content

Commit

Permalink
refactor!: rewrite $str$count_matches(), $str$relace(), `$str$rep…
Browse files Browse the repository at this point in the history
…lace_all()`
  • Loading branch information
eitsupi committed Mar 30, 2024
1 parent 1f7c927 commit 20aaf90
Show file tree
Hide file tree
Showing 7 changed files with 199 additions and 107 deletions.
115 changes: 70 additions & 45 deletions R/expr__string.R
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ ExprStr_pad_start = function(width, fillchar = " ") {
#' )
ExprStr_contains = function(pattern, ..., literal = FALSE, strict = TRUE) {
.pr$Expr$str_contains(self, pattern, literal, strict) |>
unwrap("in str$contains():")
unwrap("in $str$contains():")
}


Expand Down Expand Up @@ -655,27 +655,18 @@ ExprStr_extract_all = function(pattern) {

#' Count all successive non-overlapping regex matches
#'
#' @keywords ExprStr
#' @param pattern A valid regex pattern
#' @param literal Treat pattern as a literal string.
#'
#' @return
#' UInt32 array. Contains null if original value is null or regex capture nothing.
#'
#' @inheritParams ExprStr_contains
#' @return Expression of data type `UInt32`.
#' Returns `null` if the original value is `null`.
#' @examples
#' df = pl$DataFrame(foo = c("123 bla 45 asd", "xyz 678 910t"))
#' df$select(
#' pl$col("foo")$str$count_matches(r"{(\d)}")$alias("count digits")
#' )
#' df = pl$DataFrame(foo = c("12 dbc 3xy", "cat\\w", "1zy3\\d\\d", NA))
#'
#' # we can use Polars expressions as pattern so that it's not necessarily the
#' # same for all rows
#' df2 = pl$DataFrame(foo = c("hello", "hi there"), pat = c("ell", "e"))
#' df2$with_columns(
#' pl$col("foo")$str$count_matches(pl$col("pat"))$alias("reg_count")
#' df$with_columns(
#' count_digits = pl$col("foo")$str$count_matches(r"(\d)"),
#' count_slash_d = pl$col("foo")$str$count_matches(r"(\d)", literal = TRUE)
#' )
ExprStr_count_matches = function(pattern, literal = FALSE) {
.pr$Expr$str_count_matches(self, wrap_e(pattern), literal) |>
ExprStr_count_matches = function(pattern, ..., literal = FALSE) {
.pr$Expr$str_count_matches(self, pattern, literal) |>
unwrap("in $str$count_matches():")
}

Expand Down Expand Up @@ -757,46 +748,80 @@ ExprStr_splitn = function(by, n) {

#' Replace first matching regex/literal substring with a new string value
#'
#' @keywords ExprStr
#' @param pattern Regex pattern, can be an Expr.
#' @param value Replacement, can be an Expr.
#' @param literal Treat pattern as a literal string.
#'
#' @return Expr of String Series
#' @inherit ExprStr_contains details
#' @section Capture groups:
#' The dollar sign (`$`) is a special character related to capture groups.
#' To refer to a literal dollar sign, use `$$` instead or set `literal` to `TRUE`.
#' @inheritParams ExprStr_contains
#' @param value A character or an [Expr][Expr_class] of string
#' that will replace the matched substring.
#' @param n A number of matches to replace.
#' @return [Expr][Expr_class] of String type
#' @seealso
#' - [`<Expr>$str$replace_all()`][ExprStr_replace_all]
#' @examples
#' df = pl$DataFrame(id = 1L:2L, text = c("123abc", "abc456"))
#' df$with_columns(pl$col("text")$str$replace(r"(abc\b)", "ABC"))
#'
#' @seealso `$str$replace_all()`: Replace all matching regex/literal substrings.
#' # Capture groups are supported.
#' # Use `${1}` in the value string to refer to the first capture group in the pattern,
#' # `${2}` to refer to the second capture group, and so on.
#' # You can also use named capture groups.
#' df = pl$DataFrame(word = c("hat", "hut"))
#' df$with_columns(
#' positional = pl$col("word")$str$replace("h(.)t", "b${1}d"),
#' named = pl$col("word")$str$replace("h(?<vowel>.)t", "b${vowel}d")
#' )
#'
#' @examples
#' df = pl$DataFrame(id = c(1, 2), text = c("123abc", "abc456"))
#' # Apply case-insensitive string replacement using the `(?i)` flag.
#' df = pl$DataFrame(
#' city = "Philadelphia",
#' season = c("Spring", "Summer", "Autumn", "Winter"),
#' weather = c("Rainy", "Sunny", "Cloudy", "Snowy")
#' )
#' df$with_columns(
#' pl$col("text")$str$replace(r"{abc\b}", "ABC")
#' pl$col("weather")$str$replace("(?i)foggy|rainy|cloudy|snowy", "Sunny")
#' )
ExprStr_replace = function(pattern, value, literal = FALSE) {
.pr$Expr$str_replace(self, wrap_e_result(pattern), wrap_e_result(value), result(literal)) |>
unwrap("in str$replace():")
ExprStr_replace = function(pattern, value, ..., literal = FALSE, n = 1L) {
.pr$Expr$str_replace(self, pattern, value, literal, n) |>
unwrap("in $str$replace():")
}



#' Replace all matching regex/literal substrings with a new string value
#'
#' @keywords ExprStr
#' @param pattern Regex pattern, can be an Expr.
#' @param value Replacement, can be an Expr.
#' @param literal Treat pattern as a literal string.
#'
#' @return Expr of String Series
#' @inherit ExprStr_replace details sections params return
#' @seealso
#' - [`<Expr>$str$replace()`][ExprStr_replace]
#' @examples
#' df = pl$DataFrame(id = 1L:2L, text = c("abcabc", "123a123"))
#' df$with_columns(pl$col("text")$str$replace_all("a", "-"))
#'
#' @seealso `$str$replace()`: Replace first matching regex/literal substring.
#' # Capture groups are supported.
#' # Use `${1}` in the value string to refer to the first capture group in the pattern,
#' # `${2}` to refer to the second capture group, and so on.
#' # You can also use named capture groups.
#' df = pl$DataFrame(word = c("hat", "hut"))
#' df$with_columns(
#' positional = pl$col("word")$str$replacePall("h(.)t", "b${1}d"),
#' named = pl$col("word")$str$replace_all("h(?<vowel>.)t", "b${vowel}d")
#' )
#'
#' @examples
#' df = pl$DataFrame(id = c(1, 2), text = c("abcabc", "123a123"))
#' # Apply case-insensitive string replacement using the `(?i)` flag.
#' df = pl$DataFrame(
#' city = "Philadelphia",
#' season = c("Spring", "Summer", "Autumn", "Winter"),
#' weather = c("Rainy", "Sunny", "Cloudy", "Snowy")
#' )
#' df$with_columns(
#' pl$col("text")$str$replace_all("a", "-")
#' pl$col("weather")$str$replace_all(
#' "(?i)foggy|rainy|cloudy|snowy", "Sunny"
#' )
#' )
ExprStr_replace_all = function(pattern, value, literal = FALSE) {
.pr$Expr$str_replace_all(self, wrap_e_result(pattern), wrap_e_result(value), result(literal)) |>
unwrap("in str$replace_all():")
ExprStr_replace_all = function(pattern, value, ..., literal = FALSE) {
.pr$Expr$str_replace_all(self, pattern, value, literal) |>
unwrap("in $str$replace_all():")
}


Expand Down
6 changes: 3 additions & 3 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -994,7 +994,7 @@ RPolarsExpr$str_extract_all <- function(pattern) .Call(wrap__RPolarsExpr__str_ex

RPolarsExpr$str_extract_groups <- function(pattern) .Call(wrap__RPolarsExpr__str_extract_groups, self, pattern)

RPolarsExpr$str_count_matches <- function(pattern, literal) .Call(wrap__RPolarsExpr__str_count_matches, self, pattern, literal)
RPolarsExpr$str_count_matches <- function(pat, literal) .Call(wrap__RPolarsExpr__str_count_matches, self, pat, literal)

RPolarsExpr$str_to_date <- function(format, strict, exact, cache) .Call(wrap__RPolarsExpr__str_to_date, self, format, strict, exact, cache)

Expand All @@ -1008,9 +1008,9 @@ RPolarsExpr$str_split_exact <- function(by, n, inclusive) .Call(wrap__RPolarsExp

RPolarsExpr$str_splitn <- function(by, n) .Call(wrap__RPolarsExpr__str_splitn, self, by, n)

RPolarsExpr$str_replace <- function(pattern, value, literal) .Call(wrap__RPolarsExpr__str_replace, self, pattern, value, literal)
RPolarsExpr$str_replace <- function(pat, value, literal, n) .Call(wrap__RPolarsExpr__str_replace, self, pat, value, literal, n)

RPolarsExpr$str_replace_all <- function(pattern, value, literal) .Call(wrap__RPolarsExpr__str_replace_all, self, pattern, value, literal)
RPolarsExpr$str_replace_all <- function(pat, value, literal) .Call(wrap__RPolarsExpr__str_replace_all, self, pat, value, literal)

RPolarsExpr$str_slice <- function(offset, length) .Call(wrap__RPolarsExpr__str_slice, self, offset, length)

Expand Down
27 changes: 13 additions & 14 deletions man/ExprStr_count_matches.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 47 additions & 9 deletions man/ExprStr_replace.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 47 additions & 9 deletions man/ExprStr_replace_all.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 20aaf90

Please sign in to comment.