diff --git a/crates/polars-ops/src/chunked_array/strings/concat.rs b/crates/polars-ops/src/chunked_array/strings/concat.rs index c0dff309e446..ea557267a1d2 100644 --- a/crates/polars-ops/src/chunked_array/strings/concat.rs +++ b/crates/polars-ops/src/chunked_array/strings/concat.rs @@ -3,7 +3,7 @@ use arrow::compute::cast::utf8_to_utf8view; use polars_core::prelude::*; // Vertically concatenate all strings in a StringChunked. -pub fn str_concat(ca: &StringChunked, delimiter: &str, ignore_nulls: bool) -> StringChunked { +pub fn str_join(ca: &StringChunked, delimiter: &str, ignore_nulls: bool) -> StringChunked { if ca.is_empty() { return StringChunked::new(ca.name(), &[""]); } @@ -142,7 +142,7 @@ mod test { fn test_str_concat() { let ca = Int32Chunked::new("foo", &[Some(1), None, Some(3)]); let ca_str = ca.cast(&DataType::String).unwrap(); - let out = str_concat(ca_str.str().unwrap(), "-", true); + let out = str_join(ca_str.str().unwrap(), "-", true); let out = out.get(0); assert_eq!(out, Some("1-3")); diff --git a/crates/polars-plan/src/dsl/function_expr/strings.rs b/crates/polars-plan/src/dsl/function_expr/strings.rs index 2f2f80e2e6d5..2ca0b93771b5 100644 --- a/crates/polars-plan/src/dsl/function_expr/strings.rs +++ b/crates/polars-plan/src/dsl/function_expr/strings.rs @@ -330,7 +330,7 @@ impl From for SpecialEq> { ConcatVertical { delimiter, ignore_nulls, - } => map!(strings::concat, &delimiter, ignore_nulls), + } => map!(strings::join, &delimiter, ignore_nulls), #[cfg(feature = "concat_str")] ConcatHorizontal { delimiter, @@ -709,10 +709,10 @@ fn to_time(s: &Series, options: &StrptimeOptions) -> PolarsResult { } #[cfg(feature = "concat_str")] -pub(super) fn concat(s: &Series, delimiter: &str, ignore_nulls: bool) -> PolarsResult { +pub(super) fn join(s: &Series, delimiter: &str, ignore_nulls: bool) -> PolarsResult { let str_s = s.cast(&DataType::String)?; - let concat = polars_ops::chunked_array::str_concat(str_s.str()?, delimiter, ignore_nulls); - Ok(concat.into_series()) + let joined = polars_ops::chunked_array::str_join(str_s.str()?, delimiter, ignore_nulls); + Ok(joined.into_series()) } #[cfg(feature = "concat_str")] diff --git a/crates/polars-plan/src/dsl/string.rs b/crates/polars-plan/src/dsl/string.rs index abcc56e148e4..a7dab14686db 100644 --- a/crates/polars-plan/src/dsl/string.rs +++ b/crates/polars-plan/src/dsl/string.rs @@ -304,7 +304,7 @@ impl StringNameSpace { /// /// * `delimiter` - A string that will act as delimiter between values. #[cfg(feature = "concat_str")] - pub fn concat(self, delimiter: &str, ignore_nulls: bool) -> Expr { + pub fn join(self, delimiter: &str, ignore_nulls: bool) -> Expr { self.0 .apply_private( StringFunction::ConcatVertical { diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index f1360e449af6..3e24c299a6ed 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -434,57 +434,6 @@ def len_chars(self) -> Expr: """ return wrap_expr(self._pyexpr.str_len_chars()) - def concat( - self, delimiter: str | None = None, *, ignore_nulls: bool = True - ) -> Expr: - """ - Vertically concatenate the string values in the column to a single string value. - - Parameters - ---------- - delimiter - The delimiter to insert between consecutive string values. - ignore_nulls - Ignore null values (default). - If set to `False`, null values will be propagated. This means that - if the column contains any null values, the output is null. - - Returns - ------- - Expr - Expression of data type :class:`String`. - - Examples - -------- - >>> df = pl.DataFrame({"foo": [1, None, 2]}) - >>> df.select(pl.col("foo").str.concat("-")) - shape: (1, 1) - ┌─────┐ - │ foo │ - │ --- │ - │ str │ - ╞═════╡ - │ 1-2 │ - └─────┘ - >>> df.select(pl.col("foo").str.concat("-", ignore_nulls=False)) - shape: (1, 1) - ┌──────┐ - │ foo │ - │ --- │ - │ str │ - ╞══════╡ - │ null │ - └──────┘ - """ - if delimiter is None: - issue_deprecation_warning( - "The default `delimiter` for `str.concat` will change from '-' to an empty string." - " Pass a delimiter to silence this warning.", - version="0.20.5", - ) - delimiter = "-" - return wrap_expr(self._pyexpr.str_concat(delimiter, ignore_nulls)) - def to_uppercase(self) -> Expr: """ Transform to uppercase variant. @@ -2554,6 +2503,101 @@ def replace_many( ) ) + def join(self, delimiter: str = "", *, ignore_nulls: bool = True) -> Expr: + """ + Vertically concatenate the string values in the column to a single string value. + + Parameters + ---------- + delimiter + The delimiter to insert between consecutive string values. + ignore_nulls + Ignore null values (default). + If set to `False`, null values will be propagated. This means that + if the column contains any null values, the output is null. + + Returns + ------- + Expr + Expression of data type :class:`String`. + + Examples + -------- + >>> df = pl.DataFrame({"foo": [1, None, 3]}) + >>> df.select(pl.col("foo").str.join("-")) + shape: (1, 1) + ┌─────┐ + │ foo │ + │ --- │ + │ str │ + ╞═════╡ + │ 1-3 │ + └─────┘ + >>> df.select(pl.col("foo").str.join(ignore_nulls=False)) + shape: (1, 1) + ┌──────┐ + │ foo │ + │ --- │ + │ str │ + ╞══════╡ + │ null │ + └──────┘ + """ + return wrap_expr(self._pyexpr.str_join(delimiter, ignore_nulls=ignore_nulls)) + + def concat( + self, delimiter: str | None = None, *, ignore_nulls: bool = True + ) -> Expr: + """ + Vertically concatenate the string values in the column to a single string value. + + Parameters + ---------- + delimiter + The delimiter to insert between consecutive string values. + ignore_nulls + Ignore null values (default). + If set to `False`, null values will be propagated. This means that + if the column contains any null values, the output is null. + + Returns + ------- + Expr + Expression of data type :class:`String`. + + Examples + -------- + >>> df = pl.DataFrame({"foo": [1, None, 2]}) + >>> df.select(pl.col("foo").str.concat("-")) # doctest: +SKIP + shape: (1, 1) + ┌─────┐ + │ foo │ + │ --- │ + │ str │ + ╞═════╡ + │ 1-2 │ + └─────┘ + >>> df.select( + ... pl.col("foo").str.concat("-", ignore_nulls=False) + ... ) # doctest: +SKIP + shape: (1, 1) + ┌──────┐ + │ foo │ + │ --- │ + │ str │ + ╞══════╡ + │ null │ + └──────┘ + """ + if delimiter is None: + issue_deprecation_warning( + "The default `delimiter` for `str.concat` will change from '-' to an empty string." + " Pass a delimiter to silence this warning.", + version="0.20.5", + ) + delimiter = "-" + return self.join(delimiter, ignore_nulls=ignore_nulls) + def _validate_format_argument(format: str | None) -> None: if format is not None and ".%f" in format: diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py index 0868754e5dc5..0788886cd871 100644 --- a/py-polars/polars/series/string.py +++ b/py-polars/polars/series/string.py @@ -361,42 +361,6 @@ def len_chars(self) -> Series: ] """ - def concat( - self, delimiter: str | None = None, *, ignore_nulls: bool = True - ) -> Series: - """ - Vertically concatenate the string values in the column to a single string value. - - Parameters - ---------- - delimiter - The delimiter to insert between consecutive string values. - ignore_nulls - Ignore null values (default). - If set to `False`, null values will be propagated. This means that - if the column contains any null values, the output is null. - - Returns - ------- - Series - Series of data type :class:`String`. - - Examples - -------- - >>> pl.Series([1, None, 2]).str.concat("-") - shape: (1,) - Series: '' [str] - [ - "1-2" - ] - >>> pl.Series([1, None, 2]).str.concat("-", ignore_nulls=False) - shape: (1,) - Series: '' [str] - [ - null - ] - """ - def contains( self, pattern: str | Expr, *, literal: bool = False, strict: bool = True ) -> Series: @@ -1904,3 +1868,79 @@ def replace_many( "Can me feel the love tonight" ] """ + + def join(self, delimiter: str = "", *, ignore_nulls: bool = True) -> Series: + """ + Vertically concatenate the string values in the column to a single string value. + + Parameters + ---------- + delimiter + The delimiter to insert between consecutive string values. + ignore_nulls + Ignore null values (default). + If set to `False`, null values will be propagated. This means that + if the column contains any null values, the output is null. + + Returns + ------- + Series + Series of data type :class:`String`. + + Examples + -------- + >>> s = pl.Series([1, None, 3]) + >>> s.str.join("-") + shape: (1,) + Series: '' [str] + [ + "1-3" + ] + >>> s.str.join(ignore_nulls=False) + shape: (1,) + Series: '' [str] + [ + null + ] + """ + + @deprecate_function( + "Use `str.join` instead. Note that the default `delimiter` for `str.join`" + " is an empty string instead of a hyphen.", + version="1.0.0", + ) + def concat( + self, delimiter: str | None = None, *, ignore_nulls: bool = True + ) -> Series: + """ + Vertically concatenate the string values in the column to a single string value. + + Parameters + ---------- + delimiter + The delimiter to insert between consecutive string values. + ignore_nulls + Ignore null values (default). + If set to `False`, null values will be propagated. This means that + if the column contains any null values, the output is null. + + Returns + ------- + Series + Series of data type :class:`String`. + + Examples + -------- + >>> pl.Series([1, None, 2]).str.concat("-") # doctest: +SKIP + shape: (1,) + Series: '' [str] + [ + "1-2" + ] + >>> pl.Series([1, None, 2]).str.concat(ignore_nulls=False) # doctest: +SKIP + shape: (1,) + Series: '' [str] + [ + null + ] + """ diff --git a/py-polars/src/expr/string.rs b/py-polars/src/expr/string.rs index 4903413d604c..25c1830de69e 100644 --- a/py-polars/src/expr/string.rs +++ b/py-polars/src/expr/string.rs @@ -7,11 +7,11 @@ use crate::PyExpr; #[pymethods] impl PyExpr { - fn str_concat(&self, delimiter: &str, ignore_nulls: bool) -> Self { + fn str_join(&self, delimiter: &str, ignore_nulls: bool) -> Self { self.inner .clone() .str() - .concat(delimiter, ignore_nulls) + .join(delimiter, ignore_nulls) .into() } diff --git a/py-polars/tests/unit/expr/test_expr_apply_eval.py b/py-polars/tests/unit/expr/test_expr_apply_eval.py index 9e8fa0881947..73e501f705bd 100644 --- a/py-polars/tests/unit/expr/test_expr_apply_eval.py +++ b/py-polars/tests/unit/expr/test_expr_apply_eval.py @@ -7,7 +7,7 @@ def test_expression_15183() -> None: {"a": [1, 2, 3, 4, 5, 2, 3, 5, 1], "b": [1, 2, 3, 1, 2, 3, 1, 2, 3]} ) .group_by("a") - .agg(pl.col.b.unique().sort().str.concat("-").str.split("-")) + .agg(pl.col.b.unique().sort().str.join("-").str.split("-")) .sort("a") ).to_dict(as_series=False) == { "a": [1, 2, 3, 4, 5], diff --git a/py-polars/tests/unit/operations/namespaces/string/test_concat.py b/py-polars/tests/unit/operations/namespaces/string/test_concat.py index 78fdc038da3e..13ee591cd3a8 100644 --- a/py-polars/tests/unit/operations/namespaces/string/test_concat.py +++ b/py-polars/tests/unit/operations/namespaces/string/test_concat.py @@ -6,71 +6,71 @@ from polars.testing import assert_series_equal -def test_str_concat() -> None: +def test_str_join() -> None: s = pl.Series(["1", None, "2", None]) # propagate null assert_series_equal( - s.str.concat("-", ignore_nulls=False), pl.Series([None], dtype=pl.String) + s.str.join("-", ignore_nulls=False), pl.Series([None], dtype=pl.String) ) # ignore null - assert_series_equal(s.str.concat("-"), pl.Series(["1-2"])) + assert_series_equal(s.str.join(), pl.Series(["12"])) # str None/null is ok s = pl.Series(["1", "None", "2", "null"]) assert_series_equal( - s.str.concat("-", ignore_nulls=False), pl.Series(["1-None-2-null"]) + s.str.join("-", ignore_nulls=False), pl.Series(["1-None-2-null"]) ) - assert_series_equal(s.str.concat("-"), pl.Series(["1-None-2-null"])) + assert_series_equal(s.str.join("-"), pl.Series(["1-None-2-null"])) -def test_str_concat2() -> None: +def test_str_join2() -> None: df = pl.DataFrame({"foo": [1, None, 2, None]}) - out = df.select(pl.col("foo").str.concat("-", ignore_nulls=False)) + out = df.select(pl.col("foo").str.join(ignore_nulls=False)) assert out.item() is None - out = df.select(pl.col("foo").str.concat("-")) - assert out.item() == "1-2" + out = df.select(pl.col("foo").str.join()) + assert out.item() == "12" -def test_str_concat_all_null() -> None: +def test_str_join_all_null() -> None: s = pl.Series([None, None, None], dtype=pl.String) assert_series_equal( - s.str.concat("-", ignore_nulls=False), pl.Series([None], dtype=pl.String) + s.str.join(ignore_nulls=False), pl.Series([None], dtype=pl.String) ) - assert_series_equal(s.str.concat("-", ignore_nulls=True), pl.Series([""])) + assert_series_equal(s.str.join(ignore_nulls=True), pl.Series([""])) -def test_str_concat_empty_list() -> None: +def test_str_join_empty_list() -> None: s = pl.Series([], dtype=pl.String) - assert_series_equal(s.str.concat("-", ignore_nulls=False), pl.Series([""])) - assert_series_equal(s.str.concat("-", ignore_nulls=True), pl.Series([""])) + assert_series_equal(s.str.join(ignore_nulls=False), pl.Series([""])) + assert_series_equal(s.str.join(ignore_nulls=True), pl.Series([""])) -def test_str_concat_empty_list2() -> None: +def test_str_join_empty_list2() -> None: s = pl.Series([], dtype=pl.String) df = pl.DataFrame({"foo": s}) - result = df.select(pl.col("foo").str.concat("-")).item() + result = df.select(pl.col("foo").str.join()).item() expected = "" assert result == expected -def test_str_concat_empty_list_agg_context() -> None: +def test_str_join_empty_list_agg_context() -> None: df = pl.DataFrame(data={"i": [1], "v": [None]}, schema_overrides={"v": pl.String}) - result = df.group_by("i").agg(pl.col("v").drop_nulls().str.concat("-"))["v"].item() + result = df.group_by("i").agg(pl.col("v").drop_nulls().str.join())["v"].item() expected = "" assert result == expected -def test_str_concat_datetime() -> None: +def test_str_join_datetime() -> None: df = pl.DataFrame({"d": [datetime(2020, 1, 1), None, datetime(2022, 1, 1)]}) - out = df.select(pl.col("d").str.concat("|", ignore_nulls=True)) + out = df.select(pl.col("d").str.join("|", ignore_nulls=True)) assert out.item() == "2020-01-01 00:00:00.000000|2022-01-01 00:00:00.000000" - out = df.select(pl.col("d").str.concat("|", ignore_nulls=False)) + out = df.select(pl.col("d").str.join("|", ignore_nulls=False)) assert out.item() is None -def test_str_concat_delimiter_deprecated() -> None: +def test_str_concat_deprecated() -> None: s = pl.Series(["1", None, "2", None]) with pytest.deprecated_call(): result = s.str.concat() diff --git a/py-polars/tests/unit/operations/namespaces/string/test_string.py b/py-polars/tests/unit/operations/namespaces/string/test_string.py index b19920a958a5..a760a36bfcc0 100644 --- a/py-polars/tests/unit/operations/namespaces/string/test_string.py +++ b/py-polars/tests/unit/operations/namespaces/string/test_string.py @@ -766,13 +766,13 @@ def test_extract_binary() -> None: assert out[0] == "aron" -def test_str_concat_returns_scalar() -> None: +def test_str_join_returns_scalar() -> None: df = pl.DataFrame( [pl.Series("val", ["A", "B", "C", "D"]), pl.Series("id", [1, 1, 2, 2])] ) grouped = ( df.group_by("id") - .agg(pl.col("val").str.concat(delimiter=",").alias("grouped")) + .agg(pl.col("val").str.join(delimiter=",").alias("grouped")) .get_column("grouped") ) assert grouped.dtype == pl.String diff --git a/py-polars/tests/unit/sql/test_table_operations.py b/py-polars/tests/unit/sql/test_table_operations.py index 5992dd71e701..3056105a239b 100644 --- a/py-polars/tests/unit/sql/test_table_operations.py +++ b/py-polars/tests/unit/sql/test_table_operations.py @@ -39,7 +39,7 @@ def test_explain_query(test_frame: pl.LazyFrame) -> None: with pl.SQLContext(frame=test_frame) as ctx: plan = ( ctx.execute("EXPLAIN SELECT * FROM frame") - .select(pl.col("Logical Plan").str.concat("")) + .select(pl.col("Logical Plan").str.join()) .collect() .item() )