chore(python): Update docstring/test/etc usage of select and `with_…

…columns` to idiomatic form (pola-rs#16801)
Wouittone · Jun 22, 2024 · f5d0a64 · f5d0a64
1 parent da9256a
commit f5d0a64
Show file tree

Hide file tree

Showing 49 changed files with 478 additions and 699 deletions.
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
@@ -7808,10 +7808,8 @@ def unstack(
                 fill_values = [fill_values for _ in range(df.width)]
 
             df = df.select(
-                [
-                    s.extend_constant(next_fill, n_fill)
-                    for s, next_fill in zip(df, fill_values)
-                ]
+                s.extend_constant(next_fill, n_fill)
+                for s, next_fill in zip(df, fill_values)
             )
 
         if how == "horizontal":
@@ -8403,14 +8401,12 @@ def with_columns(
         │ 4.0 ┆ 13.0 ┆ true  │
         └─────┴──────┴───────┘
 
-        Multiple columns can be added by passing a list of expressions.
+        Multiple columns can be added using positional arguments.
 
         >>> df.with_columns(
-        ...     [
-        ...         (pl.col("a") ** 2).alias("a^2"),
-        ...         (pl.col("b") / 2).alias("b/2"),
-        ...         (pl.col("c").not_()).alias("not c"),
-        ...     ]
+        ...     (pl.col("a") ** 2).alias("a^2"),
+        ...     (pl.col("b") / 2).alias("b/2"),
+        ...     (pl.col("c").not_()).alias("not c"),
         ... )
         shape: (4, 6)
         ┌─────┬──────┬───────┬─────┬──────┬───────┐
@@ -8424,12 +8420,14 @@ def with_columns(
         │ 4   ┆ 13.0 ┆ true  ┆ 16  ┆ 6.5  ┆ false │
         └─────┴──────┴───────┴─────┴──────┴───────┘
 
-        Multiple columns also can be added using positional arguments instead of a list.
+        Multiple columns can also be added by passing a list of expressions.
 
         >>> df.with_columns(
-        ...     (pl.col("a") ** 2).alias("a^2"),
-        ...     (pl.col("b") / 2).alias("b/2"),
-        ...     (pl.col("c").not_()).alias("not c"),
+        ...     [
+        ...         (pl.col("a") ** 2).alias("a^2"),
+        ...         (pl.col("b") / 2).alias("b/2"),
+        ...         (pl.col("c").not_()).alias("not c"),
+        ...     ]
         ... )
         shape: (4, 6)
         ┌─────┬──────┬───────┬─────┬──────┬───────┐

diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py
@@ -389,13 +389,11 @@ def to_physical(self) -> Self:
         function.
 
         >>> pl.DataFrame({"vals": ["a", "x", None, "a"]}).with_columns(
-        ...     [
-        ...         pl.col("vals").cast(pl.Categorical),
-        ...         pl.col("vals")
-        ...         .cast(pl.Categorical)
-        ...         .to_physical()
-        ...         .alias("vals_physical"),
-        ...     ]
+        ...     pl.col("vals").cast(pl.Categorical),
+        ...     pl.col("vals")
+        ...     .cast(pl.Categorical)
+        ...     .to_physical()
+        ...     .alias("vals_physical"),
         ... )
         shape: (4, 2)
         ┌──────┬───────────────┐
@@ -1726,10 +1724,8 @@ def cast(self, dtype: PolarsDataType | type[Any], *, strict: bool = True) -> Sel
         ...     }
         ... )
         >>> df.with_columns(
-        ...     [
-        ...         pl.col("a").cast(pl.Float64),
-        ...         pl.col("b").cast(pl.Int32),
-        ...     ]
+        ...     pl.col("a").cast(pl.Float64),
+        ...     pl.col("b").cast(pl.Int32),
         ... )
         shape: (3, 2)
         ┌─────┬─────┐

diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py
@@ -1767,12 +1767,10 @@ def split_exact(self, by: IntoExpr, n: int, *, inclusive: bool = False) -> Expr:
         each part to a new column.
 
         >>> df.with_columns(
-        ...     [
-        ...         pl.col("x")
-        ...         .str.split_exact("_", 1)
-        ...         .struct.rename_fields(["first_part", "second_part"])
-        ...         .alias("fields"),
-        ...     ]
+        ...     pl.col("x")
+        ...     .str.split_exact("_", 1)
+        ...     .struct.rename_fields(["first_part", "second_part"])
+        ...     .alias("fields")
         ... ).unnest("fields")
         shape: (4, 3)
         ┌──────┬────────────┬─────────────┐
@@ -1832,12 +1830,10 @@ def splitn(self, by: IntoExpr, n: int) -> Expr:
         each part to a new column.
 
         >>> df.with_columns(
-        ...     [
-        ...         pl.col("s")
-        ...         .str.splitn(" ", 2)
-        ...         .struct.rename_fields(["first_part", "second_part"])
-        ...         .alias("fields"),
-        ...     ]
+        ...     pl.col("s")
+        ...     .str.splitn(" ", 2)
+        ...     .struct.rename_fields(["first_part", "second_part"])
+        ...     .alias("fields")
         ... ).unnest("fields")
         shape: (4, 3)
         ┌─────────────┬────────────┬─────────────┐

diff --git a/py-polars/polars/functions/eager.py b/py-polars/polars/functions/eager.py
@@ -163,10 +163,8 @@ def concat(
                 x.join(y, how="full", on=common_cols, suffix="_PL_CONCAT_RIGHT")
                 # Coalesce full outer join columns
                 .with_columns(
-                    [
-                        F.coalesce([name, f"{name}_PL_CONCAT_RIGHT"])
-                        for name in common_cols
-                    ]
+                    F.coalesce([name, f"{name}_PL_CONCAT_RIGHT"])
+                    for name in common_cols
                 )
                 .drop([f"{name}_PL_CONCAT_RIGHT" for name in common_cols])
             ),

diff --git a/py-polars/polars/io/spreadsheet/_write_utils.py b/py-polars/polars/io/spreadsheet/_write_utils.py
@@ -250,20 +250,18 @@ def _xl_inject_dummy_table_columns(
                 df_select_cols.insert(insert_idx, col)
 
     df = df.select(
-        [
-            (
-                col
-                if col in df_original_columns
-                else (
-                    F.lit(None).cast(
-                        cast_lookup.get(col, dtype)  # type:ignore[arg-type]
-                    )
-                    if dtype or (col in cast_lookup and cast_lookup[col] is not None)
-                    else F.lit(None)
-                ).alias(col)
-            )
-            for col in df_select_cols
-        ]
+        (
+            col
+            if col in df_original_columns
+            else (
+                F.lit(None).cast(
+                    cast_lookup.get(col, dtype)  # type:ignore[arg-type]
+                )
+                if dtype or (col in cast_lookup and cast_lookup[col] is not None)
+                else F.lit(None)
+            ).alias(col)
+        )
+        for col in df_select_cols
     )
     return df
 

diff --git a/py-polars/polars/io/spreadsheet/functions.py b/py-polars/polars/io/spreadsheet/functions.py
@@ -853,9 +853,7 @@ def _read_spreadsheet_calamine(
                 type_checks.append(check_cast)
 
     if type_checks:
-        apply_cast = df.select(
-            [d[0].all(ignore_nulls=True) for d in type_checks],
-        ).row(0)
+        apply_cast = df.select(d[0].all(ignore_nulls=True) for d in type_checks).row(0)
         if downcast := [
             cast for apply, (_, cast) in zip(apply_cast, type_checks) if apply
         ]:

diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py
@@ -4084,14 +4084,12 @@ def with_columns(
         │ 4.0 ┆ 13.0 ┆ true  │
         └─────┴──────┴───────┘
 
-        Multiple columns can be added by passing a list of expressions.
+        Multiple columns can be added using positional arguments.
 
         >>> lf.with_columns(
-        ...     [
-        ...         (pl.col("a") ** 2).alias("a^2"),
-        ...         (pl.col("b") / 2).alias("b/2"),
-        ...         (pl.col("c").not_()).alias("not c"),
-        ...     ]
+        ...     (pl.col("a") ** 2).alias("a^2"),
+        ...     (pl.col("b") / 2).alias("b/2"),
+        ...     (pl.col("c").not_()).alias("not c"),
         ... ).collect()
         shape: (4, 6)
         ┌─────┬──────┬───────┬─────┬──────┬───────┐
@@ -4105,12 +4103,14 @@ def with_columns(
         │ 4   ┆ 13.0 ┆ true  ┆ 16  ┆ 6.5  ┆ false │
         └─────┴──────┴───────┴─────┴──────┴───────┘
 
-        Multiple columns also can be added using positional arguments instead of a list.
+        Multiple columns can also be added by passing a list of expressions.
 
         >>> lf.with_columns(
-        ...     (pl.col("a") ** 2).alias("a^2"),
-        ...     (pl.col("b") / 2).alias("b/2"),
-        ...     (pl.col("c").not_()).alias("not c"),
+        ...     [
+        ...         (pl.col("a") ** 2).alias("a^2"),
+        ...         (pl.col("b") / 2).alias("b/2"),
+        ...         (pl.col("c").not_()).alias("not c"),
+        ...     ]
         ... ).collect()
         shape: (4, 6)
         ┌─────┬──────┬───────┬─────┬──────┬───────┐
@@ -4142,8 +4142,8 @@ def with_columns(
         │ 4   ┆ 13.0 ┆ true  ┆ 52.0 ┆ false │
         └─────┴──────┴───────┴──────┴───────┘
 
-        Expressions with multiple outputs can be automatically instantiated as Structs
-        by enabling the setting `Config.set_auto_structify(True)`:
+        Expressions with multiple outputs can automatically be instantiated as Structs
+        by enabling the experimental setting `Config.set_auto_structify(True)`:
 
         >>> with pl.Config(auto_structify=True):
         ...     lf.drop("c").with_columns(
@@ -5914,7 +5914,7 @@ def set_sorted(
         columns = parse_as_list_of_expressions(column, *more_columns)
 
         return self.with_columns(
-            [wrap_expr(e).set_sorted(descending=descending) for e in columns]
+            wrap_expr(e).set_sorted(descending=descending) for e in columns
         )
 
     @unstable()

diff --git a/py-polars/tests/unit/conftest.py b/py-polars/tests/unit/conftest.py
@@ -47,13 +47,11 @@ def df() -> pl.DataFrame:
         }
     )
     return df.with_columns(
-        [
-            pl.col("date").cast(pl.Date),
-            pl.col("datetime").cast(pl.Datetime),
-            pl.col("strings").cast(pl.Categorical).alias("cat"),
-            pl.col("strings").cast(pl.Enum(["foo", "ham", "bar"])).alias("enum"),
-            pl.col("time").cast(pl.Time),
-        ]
+        pl.col("date").cast(pl.Date),
+        pl.col("datetime").cast(pl.Datetime),
+        pl.col("strings").cast(pl.Categorical).alias("cat"),
+        pl.col("strings").cast(pl.Enum(["foo", "ham", "bar"])).alias("enum"),
+        pl.col("time").cast(pl.Time),
     )
 
 

diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py
@@ -1063,11 +1063,9 @@ def test_init_only_columns() -> None:
             ],
         )
         expected = pl.DataFrame({"a": [], "b": [], "c": []}).with_columns(
-            [
-                pl.col("a").cast(pl.Date),
-                pl.col("b").cast(pl.UInt64),
-                pl.col("c").cast(pl.Int8),
-            ]
+            pl.col("a").cast(pl.Date),
+            pl.col("b").cast(pl.UInt64),
+            pl.col("c").cast(pl.Int8),
         )
         expected.insert_column(3, pl.Series("d", [], pl.List(pl.UInt8)))
 

diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py
@@ -401,20 +401,19 @@ def test_take_misc(fruits_cars: pl.DataFrame) -> None:
 
     for index in [[0, 1], pl.Series([0, 1]), np.array([0, 1])]:
         out = df.sort("fruits").select(
-            [
-                pl.col("B")
-                .reverse()
-                .gather(index)  # type: ignore[arg-type]
-                .over("fruits", mapping_strategy="join"),
-                "fruits",
-            ]
+            pl.col("B")
+            .reverse()
+            .gather(index)  # type: ignore[arg-type]
+            .over("fruits", mapping_strategy="join"),
+            "fruits",
         )
 
         assert out[0, "B"].to_list() == [2, 3]
         assert out[4, "B"].to_list() == [1, 4]
 
     out = df.sort("fruits").select(
-        [pl.col("B").reverse().get(pl.lit(1)).over("fruits"), "fruits"]
+        pl.col("B").reverse().get(pl.lit(1)).over("fruits"),
+        "fruits",
     )
     assert out[0, "B"] == 3
     assert out[4, "B"] == 4
@@ -1755,10 +1754,8 @@ def test_fill_null() -> None:
     )
 
     assert df.select(
-        [
-            pl.all().forward_fill().name.suffix("_forward"),
-            pl.all().backward_fill().name.suffix("_backward"),
-        ]
+        pl.all().forward_fill().name.suffix("_forward"),
+        pl.all().backward_fill().name.suffix("_backward"),
     ).to_dict(as_series=False) == {
         "c_forward": [
             ["Apple", "Orange"],
@@ -2082,10 +2079,8 @@ def test_fill_null_limits() -> None:
             "c": [True, None, None, None, False, True, None, None, None, False],
         }
     ).select(
-        [
-            pl.all().fill_null(strategy="forward", limit=2),
-            pl.all().fill_null(strategy="backward", limit=2).name.suffix("_backward"),
-        ]
+        pl.all().fill_null(strategy="forward", limit=2),
+        pl.all().fill_null(strategy="backward", limit=2).name.suffix("_backward"),
     ).to_dict(as_series=False) == {
         "a": [1, 1, 1, None, 5, 6, 6, 6, None, 10],
         "b": ["a", "a", "a", None, "b", "c", "c", "c", None, "d"],
@@ -2147,11 +2142,9 @@ def test_selection_regex_and_multicol() -> None:
 
     # Selection only
     test_df.select(
-        [
-            pl.col(["a", "b", "c"]).name.suffix("_list"),
-            pl.all().exclude("foo").name.suffix("_wild"),
-            pl.col("^\\w$").name.suffix("_regex"),
-        ]
+        pl.col(["a", "b", "c"]).name.suffix("_list"),
+        pl.all().exclude("foo").name.suffix("_wild"),
+        pl.col("^\\w$").name.suffix("_regex"),
     )
 
     # Multi * Single
@@ -2615,9 +2608,7 @@ def test_format_empty_df() -> None:
             pl.Series("val2", [], dtype=pl.Categorical),
         ]
     ).select(
-        [
-            pl.format("{}:{}", pl.col("val1"), pl.col("val2")).alias("cat"),
-        ]
+        pl.format("{}:{}", pl.col("val1"), pl.col("val2")).alias("cat"),
     )
     assert df.shape == (0, 1)
     assert df.dtypes == [pl.String]
@@ -2637,7 +2628,7 @@ def test_deadlocks_3409() -> None:
     assert (
         pl.DataFrame({"col1": [1, 2, 3]})
         .with_columns(
-            [pl.col("col1").cumulative_eval(pl.element().map_batches(lambda x: 0))]
+            pl.col("col1").cumulative_eval(pl.element().map_batches(lambda x: 0))
         )
         .to_dict(as_series=False)
     ) == {"col1": [0, 0, 0]}
@@ -2728,12 +2719,10 @@ def test_window_deadlock() -> None:
         }
     )
 
-    df = df.select(
-        [
-            pl.col("*"),  # select all
-            pl.col("random").sum().over("groups").alias("sum[random]/groups"),
-            pl.col("random").implode().over("names").alias("random/name"),
-        ]
+    _df = df.select(
+        pl.col("*"),  # select all
+        pl.col("random").sum().over("groups").alias("sum[random]/groups"),
+        pl.col("random").implode().over("names").alias("random/name"),
     )
 
 

diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py
@@ -401,7 +401,7 @@ def test_categorical_error_on_local_cmp() -> None:
 
 def test_cast_null_to_categorical() -> None:
     assert pl.DataFrame().with_columns(
-        [pl.lit(None).cast(pl.Categorical).alias("nullable_enum")]
+        pl.lit(None).cast(pl.Categorical).alias("nullable_enum")
     ).dtypes == [pl.Categorical]
 
 
@@ -485,7 +485,7 @@ def test_stringcache() -> None:
     with pl.StringCache():
         # create a large enough column that the categorical map is reallocated
         df = pl.DataFrame({"cats": pl.arange(0, N, eager=True)}).select(
-            [pl.col("cats").cast(pl.String).cast(pl.Categorical)]
+            pl.col("cats").cast(pl.String).cast(pl.Categorical)
         )
         assert df.filter(pl.col("cats").is_in(["1", "2"])).to_dict(as_series=False) == {
             "cats": ["1", "2"]