Skip to content

Commit

Permalink
depr(python, rust!): Rename str.concat to str.join (#16790)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Jun 6, 2024
1 parent c6b2834 commit fd4c71e
Show file tree
Hide file tree
Showing 10 changed files with 207 additions and 123 deletions.
4 changes: 2 additions & 2 deletions crates/polars-ops/src/chunked_array/strings/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use arrow::compute::cast::utf8_to_utf8view;
use polars_core::prelude::*;

// Vertically concatenate all strings in a StringChunked.
pub fn str_concat(ca: &StringChunked, delimiter: &str, ignore_nulls: bool) -> StringChunked {
pub fn str_join(ca: &StringChunked, delimiter: &str, ignore_nulls: bool) -> StringChunked {
if ca.is_empty() {
return StringChunked::new(ca.name(), &[""]);
}
Expand Down Expand Up @@ -142,7 +142,7 @@ mod test {
fn test_str_concat() {
let ca = Int32Chunked::new("foo", &[Some(1), None, Some(3)]);
let ca_str = ca.cast(&DataType::String).unwrap();
let out = str_concat(ca_str.str().unwrap(), "-", true);
let out = str_join(ca_str.str().unwrap(), "-", true);

let out = out.get(0);
assert_eq!(out, Some("1-3"));
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-plan/src/dsl/function_expr/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ impl From<StringFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
ConcatVertical {
delimiter,
ignore_nulls,
} => map!(strings::concat, &delimiter, ignore_nulls),
} => map!(strings::join, &delimiter, ignore_nulls),
#[cfg(feature = "concat_str")]
ConcatHorizontal {
delimiter,
Expand Down Expand Up @@ -709,10 +709,10 @@ fn to_time(s: &Series, options: &StrptimeOptions) -> PolarsResult<Series> {
}

#[cfg(feature = "concat_str")]
pub(super) fn concat(s: &Series, delimiter: &str, ignore_nulls: bool) -> PolarsResult<Series> {
pub(super) fn join(s: &Series, delimiter: &str, ignore_nulls: bool) -> PolarsResult<Series> {
let str_s = s.cast(&DataType::String)?;
let concat = polars_ops::chunked_array::str_concat(str_s.str()?, delimiter, ignore_nulls);
Ok(concat.into_series())
let joined = polars_ops::chunked_array::str_join(str_s.str()?, delimiter, ignore_nulls);
Ok(joined.into_series())
}

#[cfg(feature = "concat_str")]
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ impl StringNameSpace {
///
/// * `delimiter` - A string that will act as delimiter between values.
#[cfg(feature = "concat_str")]
pub fn concat(self, delimiter: &str, ignore_nulls: bool) -> Expr {
pub fn join(self, delimiter: &str, ignore_nulls: bool) -> Expr {
self.0
.apply_private(
StringFunction::ConcatVertical {
Expand Down
146 changes: 95 additions & 51 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,57 +434,6 @@ def len_chars(self) -> Expr:
"""
return wrap_expr(self._pyexpr.str_len_chars())

def concat(
self, delimiter: str | None = None, *, ignore_nulls: bool = True
) -> Expr:
"""
Vertically concatenate the string values in the column to a single string value.
Parameters
----------
delimiter
The delimiter to insert between consecutive string values.
ignore_nulls
Ignore null values (default).
If set to `False`, null values will be propagated. This means that
if the column contains any null values, the output is null.
Returns
-------
Expr
Expression of data type :class:`String`.
Examples
--------
>>> df = pl.DataFrame({"foo": [1, None, 2]})
>>> df.select(pl.col("foo").str.concat("-"))
shape: (1, 1)
┌─────┐
│ foo │
│ --- │
│ str │
╞═════╡
│ 1-2 │
└─────┘
>>> df.select(pl.col("foo").str.concat("-", ignore_nulls=False))
shape: (1, 1)
┌──────┐
│ foo │
│ --- │
│ str │
╞══════╡
│ null │
└──────┘
"""
if delimiter is None:
issue_deprecation_warning(
"The default `delimiter` for `str.concat` will change from '-' to an empty string."
" Pass a delimiter to silence this warning.",
version="0.20.5",
)
delimiter = "-"
return wrap_expr(self._pyexpr.str_concat(delimiter, ignore_nulls))

def to_uppercase(self) -> Expr:
"""
Transform to uppercase variant.
Expand Down Expand Up @@ -2554,6 +2503,101 @@ def replace_many(
)
)

def join(self, delimiter: str = "", *, ignore_nulls: bool = True) -> Expr:
"""
Vertically concatenate the string values in the column to a single string value.
Parameters
----------
delimiter
The delimiter to insert between consecutive string values.
ignore_nulls
Ignore null values (default).
If set to `False`, null values will be propagated. This means that
if the column contains any null values, the output is null.
Returns
-------
Expr
Expression of data type :class:`String`.
Examples
--------
>>> df = pl.DataFrame({"foo": [1, None, 3]})
>>> df.select(pl.col("foo").str.join("-"))
shape: (1, 1)
┌─────┐
│ foo │
│ --- │
│ str │
╞═════╡
│ 1-3 │
└─────┘
>>> df.select(pl.col("foo").str.join(ignore_nulls=False))
shape: (1, 1)
┌──────┐
│ foo │
│ --- │
│ str │
╞══════╡
│ null │
└──────┘
"""
return wrap_expr(self._pyexpr.str_join(delimiter, ignore_nulls=ignore_nulls))

def concat(
self, delimiter: str | None = None, *, ignore_nulls: bool = True
) -> Expr:
"""
Vertically concatenate the string values in the column to a single string value.
Parameters
----------
delimiter
The delimiter to insert between consecutive string values.
ignore_nulls
Ignore null values (default).
If set to `False`, null values will be propagated. This means that
if the column contains any null values, the output is null.
Returns
-------
Expr
Expression of data type :class:`String`.
Examples
--------
>>> df = pl.DataFrame({"foo": [1, None, 2]})
>>> df.select(pl.col("foo").str.concat("-")) # doctest: +SKIP
shape: (1, 1)
┌─────┐
│ foo │
│ --- │
│ str │
╞═════╡
│ 1-2 │
└─────┘
>>> df.select(
... pl.col("foo").str.concat("-", ignore_nulls=False)
... ) # doctest: +SKIP
shape: (1, 1)
┌──────┐
│ foo │
│ --- │
│ str │
╞══════╡
│ null │
└──────┘
"""
if delimiter is None:
issue_deprecation_warning(
"The default `delimiter` for `str.concat` will change from '-' to an empty string."
" Pass a delimiter to silence this warning.",
version="0.20.5",
)
delimiter = "-"
return self.join(delimiter, ignore_nulls=ignore_nulls)


def _validate_format_argument(format: str | None) -> None:
if format is not None and ".%f" in format:
Expand Down
112 changes: 76 additions & 36 deletions py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,42 +361,6 @@ def len_chars(self) -> Series:
]
"""

def concat(
self, delimiter: str | None = None, *, ignore_nulls: bool = True
) -> Series:
"""
Vertically concatenate the string values in the column to a single string value.
Parameters
----------
delimiter
The delimiter to insert between consecutive string values.
ignore_nulls
Ignore null values (default).
If set to `False`, null values will be propagated. This means that
if the column contains any null values, the output is null.
Returns
-------
Series
Series of data type :class:`String`.
Examples
--------
>>> pl.Series([1, None, 2]).str.concat("-")
shape: (1,)
Series: '' [str]
[
"1-2"
]
>>> pl.Series([1, None, 2]).str.concat("-", ignore_nulls=False)
shape: (1,)
Series: '' [str]
[
null
]
"""

def contains(
self, pattern: str | Expr, *, literal: bool = False, strict: bool = True
) -> Series:
Expand Down Expand Up @@ -1904,3 +1868,79 @@ def replace_many(
"Can me feel the love tonight"
]
"""

def join(self, delimiter: str = "", *, ignore_nulls: bool = True) -> Series:
"""
Vertically concatenate the string values in the column to a single string value.
Parameters
----------
delimiter
The delimiter to insert between consecutive string values.
ignore_nulls
Ignore null values (default).
If set to `False`, null values will be propagated. This means that
if the column contains any null values, the output is null.
Returns
-------
Series
Series of data type :class:`String`.
Examples
--------
>>> s = pl.Series([1, None, 3])
>>> s.str.join("-")
shape: (1,)
Series: '' [str]
[
"1-3"
]
>>> s.str.join(ignore_nulls=False)
shape: (1,)
Series: '' [str]
[
null
]
"""

@deprecate_function(
"Use `str.join` instead. Note that the default `delimiter` for `str.join`"
" is an empty string instead of a hyphen.",
version="1.0.0",
)
def concat(
self, delimiter: str | None = None, *, ignore_nulls: bool = True
) -> Series:
"""
Vertically concatenate the string values in the column to a single string value.
Parameters
----------
delimiter
The delimiter to insert between consecutive string values.
ignore_nulls
Ignore null values (default).
If set to `False`, null values will be propagated. This means that
if the column contains any null values, the output is null.
Returns
-------
Series
Series of data type :class:`String`.
Examples
--------
>>> pl.Series([1, None, 2]).str.concat("-") # doctest: +SKIP
shape: (1,)
Series: '' [str]
[
"1-2"
]
>>> pl.Series([1, None, 2]).str.concat(ignore_nulls=False) # doctest: +SKIP
shape: (1,)
Series: '' [str]
[
null
]
"""
4 changes: 2 additions & 2 deletions py-polars/src/expr/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ use crate::PyExpr;

#[pymethods]
impl PyExpr {
fn str_concat(&self, delimiter: &str, ignore_nulls: bool) -> Self {
fn str_join(&self, delimiter: &str, ignore_nulls: bool) -> Self {
self.inner
.clone()
.str()
.concat(delimiter, ignore_nulls)
.join(delimiter, ignore_nulls)
.into()
}

Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/expr/test_expr_apply_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def test_expression_15183() -> None:
{"a": [1, 2, 3, 4, 5, 2, 3, 5, 1], "b": [1, 2, 3, 1, 2, 3, 1, 2, 3]}
)
.group_by("a")
.agg(pl.col.b.unique().sort().str.concat("-").str.split("-"))
.agg(pl.col.b.unique().sort().str.join("-").str.split("-"))
.sort("a")
).to_dict(as_series=False) == {
"a": [1, 2, 3, 4, 5],
Expand Down
Loading

0 comments on commit fd4c71e

Please sign in to comment.