From e2aa38b2725cddda2cf280e68843d4bcee99a5e4 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sat, 30 Mar 2024 09:47:52 +0000 Subject: [PATCH 1/5] refactor!: rewrite `$str$count_matches()`, `$str$relace()`, `$str$replace_all()` --- R/expr__string.R | 119 ++++++++++++++++++------------ R/extendr-wrappers.R | 6 +- man/ExprStr_count_matches.Rd | 27 ++++--- man/ExprStr_replace.Rd | 56 +++++++++++--- man/ExprStr_replace_all.Rd | 56 +++++++++++--- src/rust/src/lazy/dsl.rs | 40 ++++------ tests/testthat/test-expr_string.R | 6 +- 7 files changed, 201 insertions(+), 109 deletions(-) diff --git a/R/expr__string.R b/R/expr__string.R index e4c678bc6..42cc041df 100644 --- a/R/expr__string.R +++ b/R/expr__string.R @@ -468,7 +468,7 @@ ExprStr_pad_start = function(width, fillchar = " ") { #' ) ExprStr_contains = function(pattern, ..., literal = FALSE, strict = TRUE) { .pr$Expr$str_contains(self, pattern, literal, strict) |> - unwrap("in str$contains():") + unwrap("in $str$contains():") } @@ -662,27 +662,18 @@ ExprStr_extract_all = function(pattern) { #' Count all successive non-overlapping regex matches #' -#' @keywords ExprStr -#' @param pattern A valid regex pattern -#' @param literal Treat pattern as a literal string. -#' -#' @return -#' UInt32 array. Contains null if original value is null or regex capture nothing. -#' +#' @inheritParams ExprStr_contains +#' @return Expression of data type `UInt32`. +#' Returns `null` if the original value is `null`. #' @examples -#' df = pl$DataFrame(foo = c("123 bla 45 asd", "xyz 678 910t")) -#' df$select( -#' pl$col("foo")$str$count_matches(r"{(\d)}")$alias("count digits") -#' ) +#' df = pl$DataFrame(foo = c("12 dbc 3xy", "cat\\w", "1zy3\\d\\d", NA)) #' -#' # we can use Polars expressions as pattern so that it's not necessarily the -#' # same for all rows -#' df2 = pl$DataFrame(foo = c("hello", "hi there"), pat = c("ell", "e")) -#' df2$with_columns( -#' pl$col("foo")$str$count_matches(pl$col("pat"))$alias("reg_count") +#' df$with_columns( +#' count_digits = pl$col("foo")$str$count_matches(r"(\d)"), +#' count_slash_d = pl$col("foo")$str$count_matches(r"(\d)", literal = TRUE) #' ) -ExprStr_count_matches = function(pattern, literal = FALSE) { - .pr$Expr$str_count_matches(self, wrap_e(pattern), literal) |> +ExprStr_count_matches = function(pattern, ..., literal = FALSE) { + .pr$Expr$str_count_matches(self, pattern, literal) |> unwrap("in $str$count_matches():") } @@ -764,46 +755,80 @@ ExprStr_splitn = function(by, n) { #' Replace first matching regex/literal substring with a new string value #' -#' @keywords ExprStr -#' @param pattern Regex pattern, can be an Expr. -#' @param value Replacement, can be an Expr. -#' @param literal Treat pattern as a literal string. -#' -#' @return Expr of String Series -#' -#' @seealso `$str$replace_all()`: Replace all matching regex/literal substrings. -#' +#' @inherit ExprStr_contains details +#' @section Capture groups: +#' The dollar sign (`$`) is a special character related to capture groups. +#' To refer to a literal dollar sign, use `$$` instead or set `literal` to `TRUE`. +#' @inheritParams ExprStr_contains +#' @param value A character or an [Expr][Expr_class] of string +#' that will replace the matched substring. +#' @param n A number of matches to replace. +#' @return [Expr][Expr_class] of String type +#' @seealso +#' - [`$str$replace_all()`][ExprStr_replace_all] #' @examples -#' df = pl$DataFrame(id = c(1, 2), text = c("123abc", "abc456")) +#' df = pl$DataFrame(id = 1L:2L, text = c("123abc", "abc456")) +#' df$with_columns(pl$col("text")$str$replace(r"(abc\b)", "ABC")) +#' +#' # Capture groups are supported. +#' # Use `${1}` in the value string to refer to the first capture group in the pattern, +#' # `${2}` to refer to the second capture group, and so on. +#' # You can also use named capture groups. +#' df = pl$DataFrame(word = c("hat", "hut")) +#' df$with_columns( +#' positional = pl$col("word")$str$replace("h(.)t", "b${1}d"), +#' named = pl$col("word")$str$replace("h(?.)t", "b${vowel}d") +#' ) +#' +#' # Apply case-insensitive string replacement using the `(?i)` flag. +#' df = pl$DataFrame( +#' city = "Philadelphia", +#' season = c("Spring", "Summer", "Autumn", "Winter"), +#' weather = c("Rainy", "Sunny", "Cloudy", "Snowy") +#' ) #' df$with_columns( -#' pl$col("text")$str$replace(r"{abc\b}", "ABC") +#' pl$col("weather")$str$replace("(?i)foggy|rainy|cloudy|snowy", "Sunny") #' ) -ExprStr_replace = function(pattern, value, literal = FALSE) { - .pr$Expr$str_replace(self, wrap_e_result(pattern), wrap_e_result(value), result(literal)) |> - unwrap("in str$replace():") +ExprStr_replace = function(pattern, value, ..., literal = FALSE, n = 1L) { + .pr$Expr$str_replace(self, pattern, value, literal, n) |> + unwrap("in $str$replace():") } #' Replace all matching regex/literal substrings with a new string value #' -#' @keywords ExprStr -#' @param pattern Regex pattern, can be an Expr. -#' @param value Replacement, can be an Expr. -#' @param literal Treat pattern as a literal string. -#' -#' @return Expr of String Series -#' -#' @seealso `$str$replace()`: Replace first matching regex/literal substring. -#' +#' @inherit ExprStr_replace details sections params return +#' @seealso +#' - [`$str$replace()`][ExprStr_replace] #' @examples -#' df = pl$DataFrame(id = c(1, 2), text = c("abcabc", "123a123")) +#' df = pl$DataFrame(id = 1L:2L, text = c("abcabc", "123a123")) +#' df$with_columns(pl$col("text")$str$replace_all("a", "-")) +#' +#' # Capture groups are supported. +#' # Use `${1}` in the value string to refer to the first capture group in the pattern, +#' # `${2}` to refer to the second capture group, and so on. +#' # You can also use named capture groups. +#' df = pl$DataFrame(word = c("hat", "hut")) #' df$with_columns( -#' pl$col("text")$str$replace_all("a", "-") +#' positional = pl$col("word")$str$replace_all("h(.)t", "b${1}d"), +#' named = pl$col("word")$str$replace_all("h(?.)t", "b${vowel}d") +#' ) +#' +#' # Apply case-insensitive string replacement using the `(?i)` flag. +#' df = pl$DataFrame( +#' city = "Philadelphia", +#' season = c("Spring", "Summer", "Autumn", "Winter"), +#' weather = c("Rainy", "Sunny", "Cloudy", "Snowy") +#' ) +#' df$with_columns( +#' pl$col("weather")$str$replace_all( +#' "(?i)foggy|rainy|cloudy|snowy", "Sunny" +#' ) #' ) -ExprStr_replace_all = function(pattern, value, literal = FALSE) { - .pr$Expr$str_replace_all(self, wrap_e_result(pattern), wrap_e_result(value), result(literal)) |> - unwrap("in str$replace_all():") +ExprStr_replace_all = function(pattern, value, ..., literal = FALSE) { + .pr$Expr$str_replace_all(self, pattern, value, literal) |> + unwrap("in $str$replace_all():") } diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 999975724..ff8ea13af 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -994,7 +994,7 @@ RPolarsExpr$str_extract_all <- function(pattern) .Call(wrap__RPolarsExpr__str_ex RPolarsExpr$str_extract_groups <- function(pattern) .Call(wrap__RPolarsExpr__str_extract_groups, self, pattern) -RPolarsExpr$str_count_matches <- function(pattern, literal) .Call(wrap__RPolarsExpr__str_count_matches, self, pattern, literal) +RPolarsExpr$str_count_matches <- function(pat, literal) .Call(wrap__RPolarsExpr__str_count_matches, self, pat, literal) RPolarsExpr$str_to_date <- function(format, strict, exact, cache) .Call(wrap__RPolarsExpr__str_to_date, self, format, strict, exact, cache) @@ -1008,9 +1008,9 @@ RPolarsExpr$str_split_exact <- function(by, n, inclusive) .Call(wrap__RPolarsExp RPolarsExpr$str_splitn <- function(by, n) .Call(wrap__RPolarsExpr__str_splitn, self, by, n) -RPolarsExpr$str_replace <- function(pattern, value, literal) .Call(wrap__RPolarsExpr__str_replace, self, pattern, value, literal) +RPolarsExpr$str_replace <- function(pat, value, literal, n) .Call(wrap__RPolarsExpr__str_replace, self, pat, value, literal, n) -RPolarsExpr$str_replace_all <- function(pattern, value, literal) .Call(wrap__RPolarsExpr__str_replace_all, self, pattern, value, literal) +RPolarsExpr$str_replace_all <- function(pat, value, literal) .Call(wrap__RPolarsExpr__str_replace_all, self, pat, value, literal) RPolarsExpr$str_slice <- function(offset, length) .Call(wrap__RPolarsExpr__str_slice, self, offset, length) diff --git a/man/ExprStr_count_matches.Rd b/man/ExprStr_count_matches.Rd index bedf0ed79..885c475aa 100644 --- a/man/ExprStr_count_matches.Rd +++ b/man/ExprStr_count_matches.Rd @@ -4,30 +4,29 @@ \alias{ExprStr_count_matches} \title{Count all successive non-overlapping regex matches} \usage{ -ExprStr_count_matches(pattern, literal = FALSE) +ExprStr_count_matches(pattern, ..., literal = FALSE) } \arguments{ -\item{pattern}{A valid regex pattern} +\item{pattern}{A character or something can be coerced to a string \link[=Expr_class]{Expr} +of a valid regex pattern, compatible with the \href{https://docs.rs/regex/latest/regex/}{regex crate}.} -\item{literal}{Treat pattern as a literal string.} +\item{...}{Ignored.} + +\item{literal}{Logical. If \code{TRUE} (default), treat \code{pattern} as a literal string, +not as a regular expression.} } \value{ -UInt32 array. Contains null if original value is null or regex capture nothing. +Expression of data type \code{UInt32}. +Returns \code{null} if the original value is \code{null}. } \description{ Count all successive non-overlapping regex matches } \examples{ -df = pl$DataFrame(foo = c("123 bla 45 asd", "xyz 678 910t")) -df$select( - pl$col("foo")$str$count_matches(r"{(\d)}")$alias("count digits") -) +df = pl$DataFrame(foo = c("12 dbc 3xy", "cat\\\\w", "1zy3\\\\d\\\\d", NA)) -# we can use Polars expressions as pattern so that it's not necessarily the -# same for all rows -df2 = pl$DataFrame(foo = c("hello", "hi there"), pat = c("ell", "e")) -df2$with_columns( - pl$col("foo")$str$count_matches(pl$col("pat"))$alias("reg_count") +df$with_columns( + count_digits = pl$col("foo")$str$count_matches(r"(\d)"), + count_slash_d = pl$col("foo")$str$count_matches(r"(\d)", literal = TRUE) ) } -\keyword{ExprStr} diff --git a/man/ExprStr_replace.Rd b/man/ExprStr_replace.Rd index d458f2ae6..98237c307 100644 --- a/man/ExprStr_replace.Rd +++ b/man/ExprStr_replace.Rd @@ -4,28 +4,66 @@ \alias{ExprStr_replace} \title{Replace first matching regex/literal substring with a new string value} \usage{ -ExprStr_replace(pattern, value, literal = FALSE) +ExprStr_replace(pattern, value, ..., literal = FALSE, n = 1L) } \arguments{ -\item{pattern}{Regex pattern, can be an Expr.} +\item{pattern}{A character or something can be coerced to a string \link[=Expr_class]{Expr} +of a valid regex pattern, compatible with the \href{https://docs.rs/regex/latest/regex/}{regex crate}.} -\item{value}{Replacement, can be an Expr.} +\item{value}{A character or an \link[=Expr_class]{Expr} of string +that will replace the matched substring.} -\item{literal}{Treat pattern as a literal string.} +\item{...}{Ignored.} + +\item{literal}{Logical. If \code{TRUE} (default), treat \code{pattern} as a literal string, +not as a regular expression.} + +\item{n}{A number of matches to replace.} } \value{ -Expr of String Series +\link[=Expr_class]{Expr} of String type } \description{ Replace first matching regex/literal substring with a new string value } +\details{ +To modify regular expression behaviour (such as case-sensitivity) with flags, +use the inline \code{(?iLmsuxU)} syntax. See the regex crate’s section on +\href{https://docs.rs/regex/latest/regex/#grouping-and-flags}{grouping and flags} +for additional information about the use of inline expression modifiers. +} +\section{Capture groups}{ + +The dollar sign (\code{$}) is a special character related to capture groups. +To refer to a literal dollar sign, use \verb{$$} instead or set \code{literal} to \code{TRUE}. +} + \examples{ -df = pl$DataFrame(id = c(1, 2), text = c("123abc", "abc456")) +df = pl$DataFrame(id = 1L:2L, text = c("123abc", "abc456")) +df$with_columns(pl$col("text")$str$replace(r"(abc\b)", "ABC")) + +# Capture groups are supported. +# Use `${1}` in the value string to refer to the first capture group in the pattern, +# `${2}` to refer to the second capture group, and so on. +# You can also use named capture groups. +df = pl$DataFrame(word = c("hat", "hut")) +df$with_columns( + positional = pl$col("word")$str$replace("h(.)t", "b${1}d"), + named = pl$col("word")$str$replace("h(?.)t", "b${vowel}d") +) + +# Apply case-insensitive string replacement using the `(?i)` flag. +df = pl$DataFrame( + city = "Philadelphia", + season = c("Spring", "Summer", "Autumn", "Winter"), + weather = c("Rainy", "Sunny", "Cloudy", "Snowy") +) df$with_columns( - pl$col("text")$str$replace(r"{abc\b}", "ABC") + pl$col("weather")$str$replace("(?i)foggy|rainy|cloudy|snowy", "Sunny") ) } \seealso{ -\verb{$str$replace_all()}: Replace all matching regex/literal substrings. +\itemize{ +\item \code{\link[=ExprStr_replace_all]{$str$replace_all()}} +} } -\keyword{ExprStr} diff --git a/man/ExprStr_replace_all.Rd b/man/ExprStr_replace_all.Rd index 36e48d7e0..2a19992bb 100644 --- a/man/ExprStr_replace_all.Rd +++ b/man/ExprStr_replace_all.Rd @@ -4,28 +4,66 @@ \alias{ExprStr_replace_all} \title{Replace all matching regex/literal substrings with a new string value} \usage{ -ExprStr_replace_all(pattern, value, literal = FALSE) +ExprStr_replace_all(pattern, value, ..., literal = FALSE) } \arguments{ -\item{pattern}{Regex pattern, can be an Expr.} +\item{pattern}{A character or something can be coerced to a string \link[=Expr_class]{Expr} +of a valid regex pattern, compatible with the \href{https://docs.rs/regex/latest/regex/}{regex crate}.} -\item{value}{Replacement, can be an Expr.} +\item{value}{A character or an \link[=Expr_class]{Expr} of string +that will replace the matched substring.} -\item{literal}{Treat pattern as a literal string.} +\item{...}{Ignored.} + +\item{literal}{Logical. If \code{TRUE} (default), treat \code{pattern} as a literal string, +not as a regular expression.} } \value{ -Expr of String Series +\link[=Expr_class]{Expr} of String type } \description{ Replace all matching regex/literal substrings with a new string value } +\details{ +To modify regular expression behaviour (such as case-sensitivity) with flags, +use the inline \code{(?iLmsuxU)} syntax. See the regex crate’s section on +\href{https://docs.rs/regex/latest/regex/#grouping-and-flags}{grouping and flags} +for additional information about the use of inline expression modifiers. +} +\section{Capture groups}{ + +The dollar sign (\code{$}) is a special character related to capture groups. +To refer to a literal dollar sign, use \verb{$$} instead or set \code{literal} to \code{TRUE}. +} + \examples{ -df = pl$DataFrame(id = c(1, 2), text = c("abcabc", "123a123")) +df = pl$DataFrame(id = 1L:2L, text = c("abcabc", "123a123")) +df$with_columns(pl$col("text")$str$replace_all("a", "-")) + +# Capture groups are supported. +# Use `${1}` in the value string to refer to the first capture group in the pattern, +# `${2}` to refer to the second capture group, and so on. +# You can also use named capture groups. +df = pl$DataFrame(word = c("hat", "hut")) df$with_columns( - pl$col("text")$str$replace_all("a", "-") + positional = pl$col("word")$str$replace_all("h(.)t", "b${1}d"), + named = pl$col("word")$str$replace_all("h(?.)t", "b${vowel}d") +) + +# Apply case-insensitive string replacement using the `(?i)` flag. +df = pl$DataFrame( + city = "Philadelphia", + season = c("Spring", "Summer", "Autumn", "Winter"), + weather = c("Rainy", "Sunny", "Cloudy", "Snowy") +) +df$with_columns( + pl$col("weather")$str$replace_all( + "(?i)foggy|rainy|cloudy|snowy", "Sunny" + ) ) } \seealso{ -\verb{$str$replace()}: Replace first matching regex/literal substring. +\itemize{ +\item \code{\link[=ExprStr_replace]{$str$replace()}} +} } -\keyword{ExprStr} diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 47df3d6ac..5d6d33d0a 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -2155,13 +2155,10 @@ impl RPolarsExpr { Ok(self.0.clone().str().extract_groups(pattern)?.into()) } - pub fn str_count_matches(&self, pattern: Robj, literal: Robj) -> RResult { - Ok(self - .0 - .clone() - .str() - .count_matches(robj_to!(PLExpr, pattern)?, robj_to!(bool, literal)?) - .into()) + pub fn str_count_matches(&self, pat: Robj, literal: Robj) -> RResult { + let pat = robj_to!(PLExpr, pat)?; + let literal = robj_to!(bool, literal)?; + Ok(self.0.clone().str().count_matches(pat, literal).into()) } pub fn str_to_date( @@ -2265,38 +2262,33 @@ impl RPolarsExpr { pub fn str_replace( &self, - pattern: Robj, + pat: Robj, value: Robj, literal: Robj, + n: Robj, ) -> Result { + let pat = robj_to!(PLExpr, pat)?; + let value = robj_to!(PLExpr, value)?; + let literal = robj_to!(bool, literal)?; + let n = robj_to!(i64, n)?; Ok(self .0 .clone() .str() - .replace( - robj_to!(Expr, pattern)?.0, - robj_to!(Expr, value)?.0, - robj_to!(bool, literal)?, - ) + .replace_n(pat, value, literal, n) .into()) } pub fn str_replace_all( &self, - pattern: Robj, + pat: Robj, value: Robj, literal: Robj, ) -> Result { - Ok(self - .0 - .clone() - .str() - .replace_all( - robj_to!(Expr, pattern)?.0, - robj_to!(Expr, value)?.0, - robj_to!(bool, literal)?, - ) - .into()) + let pat = robj_to!(PLExpr, pat)?; + let value = robj_to!(PLExpr, value)?; + let literal = robj_to!(bool, literal)?; + Ok(self.0.clone().str().replace_all(pat, value, literal).into()) } pub fn str_slice(&self, offset: Robj, length: Robj) -> Result { diff --git a/tests/testthat/test-expr_string.R b/tests/testthat/test-expr_string.R index 8d470a5dd..3559ccf2c 100644 --- a/tests/testthat/test-expr_string.R +++ b/tests/testthat/test-expr_string.R @@ -604,13 +604,13 @@ test_that("str$replace", { ) expect_identical( - pl$lit(c("123abc", "abc456"))$str$replace(r"{abc\b}", "ABC", TRUE)$to_r(), + pl$lit(c("123abc", "abc456"))$str$replace(r"{abc\b}", "ABC", literal = TRUE)$to_r(), c("123abc", "abc456") ) e = pl$lit(r"{(abc\b)}") expect_identical( - pl$lit(c("123abc", "abc456"))$str$replace(e, "ABC", FALSE)$to_r(), + pl$lit(c("123abc", "abc456"))$str$replace(e, "ABC", literal = FALSE)$to_r(), c("123ABC", "abc456") ) @@ -637,7 +637,7 @@ test_that("str$replace_all", { ) expect_identical( - pl$lit(c("abcabc", "123a123"))$str$replace_all("^12", "-", TRUE)$to_r(), + pl$lit(c("abcabc", "123a123"))$str$replace_all("^12", "-", literal = TRUE)$to_r(), c("abcabc", "123a123") ) }) From f8f088d6fc2528bdd105f7f145275b6d68f5c224 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sat, 30 Mar 2024 09:47:52 +0000 Subject: [PATCH 2/5] test: add more test and update docs about error --- R/expr__string.R | 3 +++ man/ExprStr_replace.Rd | 5 ++++- tests/testthat/test-expr_string.R | 10 ++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/R/expr__string.R b/R/expr__string.R index 42cc041df..b7d11bf36 100644 --- a/R/expr__string.R +++ b/R/expr__string.R @@ -763,6 +763,9 @@ ExprStr_splitn = function(by, n) { #' @param value A character or an [Expr][Expr_class] of string #' that will replace the matched substring. #' @param n A number of matches to replace. +#' Note that regex replacement with `n > 1` not yet supported, +#' so raise an error if `n > 1` and `pattern` includes regex pattern +#' and `literal = FALSE`. #' @return [Expr][Expr_class] of String type #' @seealso #' - [`$str$replace_all()`][ExprStr_replace_all] diff --git a/man/ExprStr_replace.Rd b/man/ExprStr_replace.Rd index 98237c307..c68058a37 100644 --- a/man/ExprStr_replace.Rd +++ b/man/ExprStr_replace.Rd @@ -18,7 +18,10 @@ that will replace the matched substring.} \item{literal}{Logical. If \code{TRUE} (default), treat \code{pattern} as a literal string, not as a regular expression.} -\item{n}{A number of matches to replace.} +\item{n}{A number of matches to replace. +Note that regex replacement with \code{n > 1} not yet supported, +so raise an error if \code{n > 1} and \code{pattern} includes regex pattern +and \code{literal = FALSE}.} } \value{ \link[=Expr_class]{Expr} of String type diff --git a/tests/testthat/test-expr_string.R b/tests/testthat/test-expr_string.R index 3559ccf2c..068202cea 100644 --- a/tests/testthat/test-expr_string.R +++ b/tests/testthat/test-expr_string.R @@ -618,6 +618,16 @@ test_that("str$replace", { pl$lit(c("abcabc", "123a123"))$str$replace("ab", "__")$to_r(), c("__cabc", "123a123") ) + + expect_identical( + pl$lit(c("ababab", "123a123"))$str$replace("a", "_", n = 2)$to_r(), + c("_b_bab", "123_123") + ) + + expect_error( + pl$lit("1234")$str$replace(r"{\d}", "foo", n = 2)$to_r(), + "regex replacement with 'n > 1' not yet supported" + ) }) test_that("str$replace_all", { From 5866626aaff3acf7c94f24269123a91a750aacc3 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sat, 30 Mar 2024 09:51:09 +0000 Subject: [PATCH 3/5] docs(news): add items --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index c0372a51d..bc461d4f0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -76,6 +76,8 @@ `$to_vector()` (#938). - In `$str$contains()`, the arguments `literal` and `strict` should be named arguments (#982). - In `$str$contains_any()`, the `ascii_case_insensitive` argument must be named (#986). +- In `$str$count_matches()`, `$str$replace()` and `$str$replace_all()`, + the `literal` argument must be named (#987). - In `$str$strptime()`, `$str$to_date()`, `$str$to_datetime()`, and `$str$to_time()`, all arguments (except the first one) must be named (#939). - In `$str$strptime()`, the argument `datatype` is renamed `dtype` (#939). @@ -104,6 +106,7 @@ - New functions `pl$int_range()` and `pl$int_ranges()` (#968). - New string method `$str$extract_groups()` (#979). - New string method `$str$find()` (#985). +- New argument `n` in `$str$replace()` (#987). ### Bug fixes From 829e3f0cb8a8a926b45582bb7b516c04d28b97e1 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sat, 30 Mar 2024 10:23:14 +0000 Subject: [PATCH 4/5] docs: tweak --- R/expr__string.R | 5 ++--- man/ExprStr_count_matches.Rd | 2 +- man/ExprStr_replace.Rd | 6 +++--- man/ExprStr_replace_all.Rd | 6 +++--- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/R/expr__string.R b/R/expr__string.R index b7d11bf36..e823b3021 100644 --- a/R/expr__string.R +++ b/R/expr__string.R @@ -663,7 +663,7 @@ ExprStr_extract_all = function(pattern) { #' Count all successive non-overlapping regex matches #' #' @inheritParams ExprStr_contains -#' @return Expression of data type `UInt32`. +#' @return [Expr][Expr_class] of data type `UInt32`. #' Returns `null` if the original value is `null`. #' @examples #' df = pl$DataFrame(foo = c("12 dbc 3xy", "cat\\w", "1zy3\\d\\d", NA)) @@ -755,11 +755,10 @@ ExprStr_splitn = function(by, n) { #' Replace first matching regex/literal substring with a new string value #' -#' @inherit ExprStr_contains details +#' @inherit ExprStr_contains details params #' @section Capture groups: #' The dollar sign (`$`) is a special character related to capture groups. #' To refer to a literal dollar sign, use `$$` instead or set `literal` to `TRUE`. -#' @inheritParams ExprStr_contains #' @param value A character or an [Expr][Expr_class] of string #' that will replace the matched substring. #' @param n A number of matches to replace. diff --git a/man/ExprStr_count_matches.Rd b/man/ExprStr_count_matches.Rd index 885c475aa..55c3266f5 100644 --- a/man/ExprStr_count_matches.Rd +++ b/man/ExprStr_count_matches.Rd @@ -16,7 +16,7 @@ of a valid regex pattern, compatible with the \href{https://docs.rs/regex/latest not as a regular expression.} } \value{ -Expression of data type \code{UInt32}. +\link[=Expr_class]{Expr} of data type \code{UInt32}. Returns \code{null} if the original value is \code{null}. } \description{ diff --git a/man/ExprStr_replace.Rd b/man/ExprStr_replace.Rd index c68058a37..f9c7ead8d 100644 --- a/man/ExprStr_replace.Rd +++ b/man/ExprStr_replace.Rd @@ -30,9 +30,9 @@ and \code{literal = FALSE}.} Replace first matching regex/literal substring with a new string value } \details{ -To modify regular expression behaviour (such as case-sensitivity) with flags, -use the inline \code{(?iLmsuxU)} syntax. See the regex crate’s section on -\href{https://docs.rs/regex/latest/regex/#grouping-and-flags}{grouping and flags} +To modify regular expression behaviour (such as case-sensitivity) +with flags, use the inline \code{(?iLmsuxU)} syntax. See the regex crate’s section +on \href{https://docs.rs/regex/latest/regex/#grouping-and-flags}{grouping and flags} for additional information about the use of inline expression modifiers. } \section{Capture groups}{ diff --git a/man/ExprStr_replace_all.Rd b/man/ExprStr_replace_all.Rd index 2a19992bb..2c73ae341 100644 --- a/man/ExprStr_replace_all.Rd +++ b/man/ExprStr_replace_all.Rd @@ -25,9 +25,9 @@ not as a regular expression.} Replace all matching regex/literal substrings with a new string value } \details{ -To modify regular expression behaviour (such as case-sensitivity) with flags, -use the inline \code{(?iLmsuxU)} syntax. See the regex crate’s section on -\href{https://docs.rs/regex/latest/regex/#grouping-and-flags}{grouping and flags} +To modify regular expression behaviour (such as case-sensitivity) +with flags, use the inline \code{(?iLmsuxU)} syntax. See the regex crate’s section +on \href{https://docs.rs/regex/latest/regex/#grouping-and-flags}{grouping and flags} for additional information about the use of inline expression modifiers. } \section{Capture groups}{ From 255077faf4aa8686c7143e5be2390c5d468d14bc Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sat, 30 Mar 2024 10:23:33 +0000 Subject: [PATCH 5/5] tests: fix capturing error message --- tests/testthat/test-expr_string.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-expr_string.R b/tests/testthat/test-expr_string.R index 068202cea..753dfb011 100644 --- a/tests/testthat/test-expr_string.R +++ b/tests/testthat/test-expr_string.R @@ -624,7 +624,7 @@ test_that("str$replace", { c("_b_bab", "123_123") ) - expect_error( + expect_grepl_error( pl$lit("1234")$str$replace(r"{\d}", "foo", n = 2)$to_r(), "regex replacement with 'n > 1' not yet supported" )