From 62ecdd3e14e485baa84750b92a6615bc8d680b54 Mon Sep 17 00:00:00 2001 From: Wainberg Date: Fri, 19 Jan 2024 12:56:55 -0500 Subject: [PATCH] feat(python): allow df.rename and lf.rename to take a renaming function (#13708) Co-authored-by: Wainberg --- py-polars/polars/dataframe/frame.py | 16 +++++++++++++-- py-polars/polars/lazyframe/frame.py | 25 ++++++++++++++++++----- py-polars/tests/unit/dataframe/test_df.py | 6 ++++++ py-polars/tests/unit/test_lazy.py | 8 ++++++++ 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 141718c9b9ce..cfdc3f096301 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -4059,14 +4059,15 @@ def reverse(self) -> DataFrame: """ return self.select(F.col("*").reverse()) - def rename(self, mapping: dict[str, str]) -> DataFrame: + def rename(self, mapping: dict[str, str] | Callable[[str], str]) -> DataFrame: """ Rename column names. Parameters ---------- mapping - Key value pairs that map from old name to new name. + Key value pairs that map from old name to new name, or a function + that takes the old name as input and returns the new name. Examples -------- @@ -4084,6 +4085,17 @@ def rename(self, mapping: dict[str, str]) -> DataFrame: │ 2 ┆ 7 ┆ b │ │ 3 ┆ 8 ┆ c │ └───────┴─────┴─────┘ + >>> df.rename(lambda column_name: "c" + column_name[1:]) + shape: (3, 3) + ┌─────┬─────┬─────┐ + │ coo ┆ car ┆ cam │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ str │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 6 ┆ a │ + │ 2 ┆ 7 ┆ b │ + │ 3 ┆ 8 ┆ c │ + └─────┴─────┴─────┘ """ return self.lazy().rename(mapping).collect(_eager=True) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 8fd866e05b20..63d5df0c80fe 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -4165,14 +4165,15 @@ def drop( drop_cols = _expand_selectors(self, *columns) return self._from_pyldf(self._ldf.drop(drop_cols)) - def rename(self, mapping: dict[str, str]) -> Self: + def rename(self, mapping: dict[str, str] | Callable[[str], str]) -> Self: """ Rename column names. Parameters ---------- mapping - Key value pairs that map from old name to new name. + Key value pairs that map from old name to new name, or a function + that takes the old name as input and returns the new name. Notes ----- @@ -4199,10 +4200,24 @@ def rename(self, mapping: dict[str, str]) -> Self: │ 2 ┆ 7 ┆ b │ │ 3 ┆ 8 ┆ c │ └───────┴─────┴─────┘ + >>> lf.rename(lambda column_name: "c" + column_name[1:]).collect() + shape: (3, 3) + ┌─────┬─────┬─────┐ + │ coo ┆ car ┆ cam │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ str │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 6 ┆ a │ + │ 2 ┆ 7 ┆ b │ + │ 3 ┆ 8 ┆ c │ + └─────┴─────┴─────┘ """ - existing = list(mapping.keys()) - new = list(mapping.values()) - return self._from_pyldf(self._ldf.rename(existing, new)) + if callable(mapping): + return self.select(F.all().name.map(mapping)) + else: + existing = list(mapping.keys()) + new = list(mapping.values()) + return self._from_pyldf(self._ldf.rename(existing, new)) def reverse(self) -> Self: """ diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 9577f8c92cc1..483b04dc8edf 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -1156,6 +1156,12 @@ def test_rename(df: pl.DataFrame) -> None: _ = out[["foos", "bars"]] +def test_rename_lambda() -> None: + df = pl.DataFrame({"a": [1], "b": [2], "c": [3]}) + out = df.rename(lambda col: "foo" if col == "a" else "bar" if col == "b" else col) + assert out.columns == ["foo", "bar", "c"] + + def test_write_csv() -> None: df = pl.DataFrame( { diff --git a/py-polars/tests/unit/test_lazy.py b/py-polars/tests/unit/test_lazy.py index 85330e1d4f17..043cd0144fed 100644 --- a/py-polars/tests/unit/test_lazy.py +++ b/py-polars/tests/unit/test_lazy.py @@ -923,6 +923,14 @@ def test_with_column_renamed(fruits_cars: pl.DataFrame) -> None: assert res.columns[0] == "C" +def test_rename_lambda() -> None: + ldf = pl.LazyFrame({"a": [1], "b": [2], "c": [3]}) + out = ldf.rename( + lambda col: "foo" if col == "a" else "bar" if col == "b" else col + ).collect() + assert out.columns == ["foo", "bar", "c"] + + def test_reverse() -> None: out = pl.LazyFrame({"a": [1, 2], "b": [3, 4]}).reverse() expected = pl.DataFrame({"a": [2, 1], "b": [4, 3]})