Skip to content

Commit

Permalink
fix: Capture groups should be ignored in replace when literal=True (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
corwinjoy authored Oct 25, 2024
1 parent 53cdb1c commit a38dea6
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 5 deletions.
14 changes: 10 additions & 4 deletions crates/polars-plan/src/dsl/function_expr/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -844,20 +844,26 @@ fn replace_n<'a>(
"replacement value length ({}) does not match string column length ({})",
len_val, ca.len(),
);
let literal = literal || is_literal_pat(&pat);
let lit = is_literal_pat(&pat);
let literal_pat = literal || lit;

if literal {
if literal_pat {
pat = escape(&pat)
}

let reg = Regex::new(&pat)?;
let lit = pat.chars().all(|c| !c.is_ascii_punctuation());

let f = |s: &'a str, val: &'a str| {
if lit && (s.len() <= 32) {
Cow::Owned(s.replacen(&pat, val, 1))
} else {
reg.replace(s, val)
// According to the docs for replace
// when literal = True then capture groups are ignored.
if literal {
reg.replace(s, NoExpand(val))
} else {
reg.replace(s, val)
}
}
};
Ok(iter_and_replace(ca, val, f))
Expand Down
34 changes: 33 additions & 1 deletion py-polars/tests/unit/operations/namespaces/string/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ def test_replace_all() -> None:
)


def test_replace_literal_no_caputures() -> None:
def test_replace_all_literal_no_caputures() -> None:
# When using literal = True, capture groups should be disabled

# Single row code path in Rust
Expand Down Expand Up @@ -1034,6 +1034,38 @@ def test_replace_literal_no_caputures() -> None:
assert df2.get_column("text2")[1] == "I lost $2 yesterday."


def test_replace_literal_no_caputures() -> None:
# When using literal = True, capture groups should be disabled

# Single row code path in Rust
df = pl.DataFrame({"text": ["I found <amt> yesterday."], "amt": ["$1"]})
df = df.with_columns(
pl.col("text").str.replace("<amt>", pl.col("amt"), literal=True).alias("text2")
)
assert df.get_column("text2")[0] == "I found $1 yesterday."

# Multi-row code path in Rust
# A string shorter than 32 chars,
# and one longer than 32 chars to test both sub-paths
df2 = pl.DataFrame(
{
"text": [
"I found <amt> yesterday.",
"I lost <amt> yesterday and this string is longer than 32 characters.",
],
"amt": ["$1", "$2"],
}
)
df2 = df2.with_columns(
pl.col("text").str.replace("<amt>", pl.col("amt"), literal=True).alias("text2")
)
assert df2.get_column("text2")[0] == "I found $1 yesterday."
assert (
df2.get_column("text2")[1]
== "I lost $2 yesterday and this string is longer than 32 characters."
)


def test_replace_expressions() -> None:
df = pl.DataFrame({"foo": ["123 bla 45 asd", "xyz 678 910t"], "value": ["A", "B"]})
out = df.select([pl.col("foo").str.replace(pl.col("foo").first(), pl.col("value"))])
Expand Down

0 comments on commit a38dea6

Please sign in to comment.