diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 25c4a3cdfea7a..9fc727ab71ca7 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7808,10 +7808,8 @@ def unstack( fill_values = [fill_values for _ in range(df.width)] df = df.select( - [ - s.extend_constant(next_fill, n_fill) - for s, next_fill in zip(df, fill_values) - ] + s.extend_constant(next_fill, n_fill) + for s, next_fill in zip(df, fill_values) ) if how == "horizontal": @@ -8403,14 +8401,12 @@ def with_columns( │ 4.0 ┆ 13.0 ┆ true │ └─────┴──────┴───────┘ - Multiple columns can be added by passing a list of expressions. + Multiple columns can be added using positional arguments. >>> df.with_columns( - ... [ - ... (pl.col("a") ** 2).alias("a^2"), - ... (pl.col("b") / 2).alias("b/2"), - ... (pl.col("c").not_()).alias("not c"), - ... ] + ... (pl.col("a") ** 2).alias("a^2"), + ... (pl.col("b") / 2).alias("b/2"), + ... (pl.col("c").not_()).alias("not c"), ... ) shape: (4, 6) ┌─────┬──────┬───────┬─────┬──────┬───────┐ @@ -8424,12 +8420,14 @@ def with_columns( │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │ └─────┴──────┴───────┴─────┴──────┴───────┘ - Multiple columns also can be added using positional arguments instead of a list. + Multiple columns can also be added by passing a list of expressions. >>> df.with_columns( - ... (pl.col("a") ** 2).alias("a^2"), - ... (pl.col("b") / 2).alias("b/2"), - ... (pl.col("c").not_()).alias("not c"), + ... [ + ... (pl.col("a") ** 2).alias("a^2"), + ... (pl.col("b") / 2).alias("b/2"), + ... (pl.col("c").not_()).alias("not c"), + ... ] ... ) shape: (4, 6) ┌─────┬──────┬───────┬─────┬──────┬───────┐ diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 89a7404fad470..4b74959af9f9d 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -389,13 +389,11 @@ def to_physical(self) -> Self: function. >>> pl.DataFrame({"vals": ["a", "x", None, "a"]}).with_columns( - ... [ - ... pl.col("vals").cast(pl.Categorical), - ... pl.col("vals") - ... .cast(pl.Categorical) - ... .to_physical() - ... .alias("vals_physical"), - ... ] + ... pl.col("vals").cast(pl.Categorical), + ... pl.col("vals") + ... .cast(pl.Categorical) + ... .to_physical() + ... .alias("vals_physical"), ... ) shape: (4, 2) ┌──────┬───────────────┐ @@ -1726,10 +1724,8 @@ def cast(self, dtype: PolarsDataType | type[Any], *, strict: bool = True) -> Sel ... } ... ) >>> df.with_columns( - ... [ - ... pl.col("a").cast(pl.Float64), - ... pl.col("b").cast(pl.Int32), - ... ] + ... pl.col("a").cast(pl.Float64), + ... pl.col("b").cast(pl.Int32), ... ) shape: (3, 2) ┌─────┬─────┐ diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index 3e24c299a6ed3..16c3ce349346b 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -1767,12 +1767,10 @@ def split_exact(self, by: IntoExpr, n: int, *, inclusive: bool = False) -> Expr: each part to a new column. >>> df.with_columns( - ... [ - ... pl.col("x") - ... .str.split_exact("_", 1) - ... .struct.rename_fields(["first_part", "second_part"]) - ... .alias("fields"), - ... ] + ... pl.col("x") + ... .str.split_exact("_", 1) + ... .struct.rename_fields(["first_part", "second_part"]) + ... .alias("fields") ... ).unnest("fields") shape: (4, 3) ┌──────┬────────────┬─────────────┐ @@ -1832,12 +1830,10 @@ def splitn(self, by: IntoExpr, n: int) -> Expr: each part to a new column. >>> df.with_columns( - ... [ - ... pl.col("s") - ... .str.splitn(" ", 2) - ... .struct.rename_fields(["first_part", "second_part"]) - ... .alias("fields"), - ... ] + ... pl.col("s") + ... .str.splitn(" ", 2) + ... .struct.rename_fields(["first_part", "second_part"]) + ... .alias("fields") ... ).unnest("fields") shape: (4, 3) ┌─────────────┬────────────┬─────────────┐ diff --git a/py-polars/polars/functions/eager.py b/py-polars/polars/functions/eager.py index 9230d985c89c4..0eb271f424c40 100644 --- a/py-polars/polars/functions/eager.py +++ b/py-polars/polars/functions/eager.py @@ -163,10 +163,8 @@ def concat( x.join(y, how="full", on=common_cols, suffix="_PL_CONCAT_RIGHT") # Coalesce full outer join columns .with_columns( - [ - F.coalesce([name, f"{name}_PL_CONCAT_RIGHT"]) - for name in common_cols - ] + F.coalesce([name, f"{name}_PL_CONCAT_RIGHT"]) + for name in common_cols ) .drop([f"{name}_PL_CONCAT_RIGHT" for name in common_cols]) ), diff --git a/py-polars/polars/io/spreadsheet/_write_utils.py b/py-polars/polars/io/spreadsheet/_write_utils.py index 08dad777e6e9e..a12839b15d25a 100644 --- a/py-polars/polars/io/spreadsheet/_write_utils.py +++ b/py-polars/polars/io/spreadsheet/_write_utils.py @@ -250,20 +250,18 @@ def _xl_inject_dummy_table_columns( df_select_cols.insert(insert_idx, col) df = df.select( - [ - ( - col - if col in df_original_columns - else ( - F.lit(None).cast( - cast_lookup.get(col, dtype) # type:ignore[arg-type] - ) - if dtype or (col in cast_lookup and cast_lookup[col] is not None) - else F.lit(None) - ).alias(col) - ) - for col in df_select_cols - ] + ( + col + if col in df_original_columns + else ( + F.lit(None).cast( + cast_lookup.get(col, dtype) # type:ignore[arg-type] + ) + if dtype or (col in cast_lookup and cast_lookup[col] is not None) + else F.lit(None) + ).alias(col) + ) + for col in df_select_cols ) return df diff --git a/py-polars/polars/io/spreadsheet/functions.py b/py-polars/polars/io/spreadsheet/functions.py index 2b122dc5798c3..131276b61da7f 100644 --- a/py-polars/polars/io/spreadsheet/functions.py +++ b/py-polars/polars/io/spreadsheet/functions.py @@ -853,9 +853,7 @@ def _read_spreadsheet_calamine( type_checks.append(check_cast) if type_checks: - apply_cast = df.select( - [d[0].all(ignore_nulls=True) for d in type_checks], - ).row(0) + apply_cast = df.select(d[0].all(ignore_nulls=True) for d in type_checks).row(0) if downcast := [ cast for apply, (_, cast) in zip(apply_cast, type_checks) if apply ]: diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index bcdc961154279..a1ea726351ea4 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -4084,14 +4084,12 @@ def with_columns( │ 4.0 ┆ 13.0 ┆ true │ └─────┴──────┴───────┘ - Multiple columns can be added by passing a list of expressions. + Multiple columns can be added using positional arguments. >>> lf.with_columns( - ... [ - ... (pl.col("a") ** 2).alias("a^2"), - ... (pl.col("b") / 2).alias("b/2"), - ... (pl.col("c").not_()).alias("not c"), - ... ] + ... (pl.col("a") ** 2).alias("a^2"), + ... (pl.col("b") / 2).alias("b/2"), + ... (pl.col("c").not_()).alias("not c"), ... ).collect() shape: (4, 6) ┌─────┬──────┬───────┬─────┬──────┬───────┐ @@ -4105,12 +4103,14 @@ def with_columns( │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │ └─────┴──────┴───────┴─────┴──────┴───────┘ - Multiple columns also can be added using positional arguments instead of a list. + Multiple columns can also be added by passing a list of expressions. >>> lf.with_columns( - ... (pl.col("a") ** 2).alias("a^2"), - ... (pl.col("b") / 2).alias("b/2"), - ... (pl.col("c").not_()).alias("not c"), + ... [ + ... (pl.col("a") ** 2).alias("a^2"), + ... (pl.col("b") / 2).alias("b/2"), + ... (pl.col("c").not_()).alias("not c"), + ... ] ... ).collect() shape: (4, 6) ┌─────┬──────┬───────┬─────┬──────┬───────┐ @@ -4142,8 +4142,8 @@ def with_columns( │ 4 ┆ 13.0 ┆ true ┆ 52.0 ┆ false │ └─────┴──────┴───────┴──────┴───────┘ - Expressions with multiple outputs can be automatically instantiated as Structs - by enabling the setting `Config.set_auto_structify(True)`: + Expressions with multiple outputs can automatically be instantiated as Structs + by enabling the experimental setting `Config.set_auto_structify(True)`: >>> with pl.Config(auto_structify=True): ... lf.drop("c").with_columns( @@ -5914,7 +5914,7 @@ def set_sorted( columns = parse_as_list_of_expressions(column, *more_columns) return self.with_columns( - [wrap_expr(e).set_sorted(descending=descending) for e in columns] + wrap_expr(e).set_sorted(descending=descending) for e in columns ) @unstable() diff --git a/py-polars/tests/unit/conftest.py b/py-polars/tests/unit/conftest.py index fb0661230d56d..c073699ce9486 100644 --- a/py-polars/tests/unit/conftest.py +++ b/py-polars/tests/unit/conftest.py @@ -47,13 +47,11 @@ def df() -> pl.DataFrame: } ) return df.with_columns( - [ - pl.col("date").cast(pl.Date), - pl.col("datetime").cast(pl.Datetime), - pl.col("strings").cast(pl.Categorical).alias("cat"), - pl.col("strings").cast(pl.Enum(["foo", "ham", "bar"])).alias("enum"), - pl.col("time").cast(pl.Time), - ] + pl.col("date").cast(pl.Date), + pl.col("datetime").cast(pl.Datetime), + pl.col("strings").cast(pl.Categorical).alias("cat"), + pl.col("strings").cast(pl.Enum(["foo", "ham", "bar"])).alias("enum"), + pl.col("time").cast(pl.Time), ) diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py index a617c035c87bd..0e0f626beb2a2 100644 --- a/py-polars/tests/unit/constructors/test_constructors.py +++ b/py-polars/tests/unit/constructors/test_constructors.py @@ -1063,11 +1063,9 @@ def test_init_only_columns() -> None: ], ) expected = pl.DataFrame({"a": [], "b": [], "c": []}).with_columns( - [ - pl.col("a").cast(pl.Date), - pl.col("b").cast(pl.UInt64), - pl.col("c").cast(pl.Int8), - ] + pl.col("a").cast(pl.Date), + pl.col("b").cast(pl.UInt64), + pl.col("c").cast(pl.Int8), ) expected.insert_column(3, pl.Series("d", [], pl.List(pl.UInt8))) diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 77c8d57ecd5e4..f9f78d1a9bc14 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -401,20 +401,19 @@ def test_take_misc(fruits_cars: pl.DataFrame) -> None: for index in [[0, 1], pl.Series([0, 1]), np.array([0, 1])]: out = df.sort("fruits").select( - [ - pl.col("B") - .reverse() - .gather(index) # type: ignore[arg-type] - .over("fruits", mapping_strategy="join"), - "fruits", - ] + pl.col("B") + .reverse() + .gather(index) # type: ignore[arg-type] + .over("fruits", mapping_strategy="join"), + "fruits", ) assert out[0, "B"].to_list() == [2, 3] assert out[4, "B"].to_list() == [1, 4] out = df.sort("fruits").select( - [pl.col("B").reverse().get(pl.lit(1)).over("fruits"), "fruits"] + pl.col("B").reverse().get(pl.lit(1)).over("fruits"), + "fruits", ) assert out[0, "B"] == 3 assert out[4, "B"] == 4 @@ -1755,10 +1754,8 @@ def test_fill_null() -> None: ) assert df.select( - [ - pl.all().forward_fill().name.suffix("_forward"), - pl.all().backward_fill().name.suffix("_backward"), - ] + pl.all().forward_fill().name.suffix("_forward"), + pl.all().backward_fill().name.suffix("_backward"), ).to_dict(as_series=False) == { "c_forward": [ ["Apple", "Orange"], @@ -2082,10 +2079,8 @@ def test_fill_null_limits() -> None: "c": [True, None, None, None, False, True, None, None, None, False], } ).select( - [ - pl.all().fill_null(strategy="forward", limit=2), - pl.all().fill_null(strategy="backward", limit=2).name.suffix("_backward"), - ] + pl.all().fill_null(strategy="forward", limit=2), + pl.all().fill_null(strategy="backward", limit=2).name.suffix("_backward"), ).to_dict(as_series=False) == { "a": [1, 1, 1, None, 5, 6, 6, 6, None, 10], "b": ["a", "a", "a", None, "b", "c", "c", "c", None, "d"], @@ -2147,11 +2142,9 @@ def test_selection_regex_and_multicol() -> None: # Selection only test_df.select( - [ - pl.col(["a", "b", "c"]).name.suffix("_list"), - pl.all().exclude("foo").name.suffix("_wild"), - pl.col("^\\w$").name.suffix("_regex"), - ] + pl.col(["a", "b", "c"]).name.suffix("_list"), + pl.all().exclude("foo").name.suffix("_wild"), + pl.col("^\\w$").name.suffix("_regex"), ) # Multi * Single @@ -2615,9 +2608,7 @@ def test_format_empty_df() -> None: pl.Series("val2", [], dtype=pl.Categorical), ] ).select( - [ - pl.format("{}:{}", pl.col("val1"), pl.col("val2")).alias("cat"), - ] + pl.format("{}:{}", pl.col("val1"), pl.col("val2")).alias("cat"), ) assert df.shape == (0, 1) assert df.dtypes == [pl.String] @@ -2637,7 +2628,7 @@ def test_deadlocks_3409() -> None: assert ( pl.DataFrame({"col1": [1, 2, 3]}) .with_columns( - [pl.col("col1").cumulative_eval(pl.element().map_batches(lambda x: 0))] + pl.col("col1").cumulative_eval(pl.element().map_batches(lambda x: 0)) ) .to_dict(as_series=False) ) == {"col1": [0, 0, 0]} @@ -2728,12 +2719,10 @@ def test_window_deadlock() -> None: } ) - df = df.select( - [ - pl.col("*"), # select all - pl.col("random").sum().over("groups").alias("sum[random]/groups"), - pl.col("random").implode().over("names").alias("random/name"), - ] + _df = df.select( + pl.col("*"), # select all + pl.col("random").sum().over("groups").alias("sum[random]/groups"), + pl.col("random").implode().over("names").alias("random/name"), ) diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py index 27da3ef92e546..a3ac6d90d74e0 100644 --- a/py-polars/tests/unit/datatypes/test_categorical.py +++ b/py-polars/tests/unit/datatypes/test_categorical.py @@ -401,7 +401,7 @@ def test_categorical_error_on_local_cmp() -> None: def test_cast_null_to_categorical() -> None: assert pl.DataFrame().with_columns( - [pl.lit(None).cast(pl.Categorical).alias("nullable_enum")] + pl.lit(None).cast(pl.Categorical).alias("nullable_enum") ).dtypes == [pl.Categorical] @@ -485,7 +485,7 @@ def test_stringcache() -> None: with pl.StringCache(): # create a large enough column that the categorical map is reallocated df = pl.DataFrame({"cats": pl.arange(0, N, eager=True)}).select( - [pl.col("cats").cast(pl.String).cast(pl.Categorical)] + pl.col("cats").cast(pl.String).cast(pl.Categorical) ) assert df.filter(pl.col("cats").is_in(["1", "2"])).to_dict(as_series=False) == { "cats": ["1", "2"] diff --git a/py-polars/tests/unit/datatypes/test_list.py b/py-polars/tests/unit/datatypes/test_list.py index 48b5e0a0b6e21..a2c0ea1eace93 100644 --- a/py-polars/tests/unit/datatypes/test_list.py +++ b/py-polars/tests/unit/datatypes/test_list.py @@ -115,18 +115,14 @@ def test_cast_inner() -> None: def test_list_empty_group_by_result_3521() -> None: # Create a left relation where the join column contains a null value left = pl.DataFrame().with_columns( - [ - pl.lit(1).alias("group_by_column"), - pl.lit(None).cast(pl.Int32).alias("join_column"), - ] + pl.lit(1).alias("group_by_column"), + pl.lit(None).cast(pl.Int32).alias("join_column"), ) # Create a right relation where there is a column to count distinct on right = pl.DataFrame().with_columns( - [ - pl.lit(1).alias("join_column"), - pl.lit(1).alias("n_unique_column"), - ] + pl.lit(1).alias("join_column"), + pl.lit(1).alias("n_unique_column"), ) # Calculate n_unique after dropping nulls @@ -143,23 +139,19 @@ def test_list_empty_group_by_result_3521() -> None: def test_list_fill_null() -> None: df = pl.DataFrame({"C": [["a", "b", "c"], [], [], ["d", "e"]]}) assert df.with_columns( - [ - pl.when(pl.col("C").list.len() == 0) - .then(None) - .otherwise(pl.col("C")) - .alias("C") - ] + pl.when(pl.col("C").list.len() == 0) + .then(None) + .otherwise(pl.col("C")) + .alias("C") ).to_series().to_list() == [["a", "b", "c"], None, None, ["d", "e"]] def test_list_fill_list() -> None: assert pl.DataFrame({"a": [[1, 2, 3], []]}).select( - [ - pl.when(pl.col("a").list.len() == 0) - .then([5]) - .otherwise(pl.col("a")) - .alias("filled") - ] + pl.when(pl.col("a").list.len() == 0) + .then([5]) + .otherwise(pl.col("a")) + .alias("filled") ).to_dict(as_series=False) == {"filled": [[1, 2, 3], [5]]} diff --git a/py-polars/tests/unit/datatypes/test_struct.py b/py-polars/tests/unit/datatypes/test_struct.py index 285f29a3babb7..5da36647fb27d 100644 --- a/py-polars/tests/unit/datatypes/test_struct.py +++ b/py-polars/tests/unit/datatypes/test_struct.py @@ -104,13 +104,11 @@ def test_struct_hashes() -> None: def test_struct_unnesting() -> None: df_base = pl.DataFrame({"a": [1, 2]}) df = df_base.select( - [ - pl.all().alias("a_original"), - pl.col("a") - .map_elements(lambda x: {"a": x, "b": x * 2, "c": x % 2 == 0}) - .struct.rename_fields(["a", "a_squared", "mod2eq0"]) - .alias("foo"), - ] + pl.all().alias("a_original"), + pl.col("a") + .map_elements(lambda x: {"a": x, "b": x * 2, "c": x % 2 == 0}) + .struct.rename_fields(["a", "a_squared", "mod2eq0"]) + .alias("foo"), ) expected = pl.DataFrame( { @@ -130,13 +128,11 @@ def test_struct_unnesting() -> None: out = ( df_base.lazy() .select( - [ - pl.all().alias("a_original"), - pl.col("a") - .map_elements(lambda x: {"a": x, "b": x * 2, "c": x % 2 == 0}) - .struct.rename_fields(["a", "a_squared", "mod2eq0"]) - .alias("foo"), - ] + pl.all().alias("a_original"), + pl.col("a") + .map_elements(lambda x: {"a": x, "b": x * 2, "c": x % 2 == 0}) + .struct.rename_fields(["a", "a_squared", "mod2eq0"]) + .alias("foo"), ) .unnest("foo") .collect() @@ -278,7 +274,7 @@ def test_list_to_struct() -> None: df = pl.DataFrame({"a": [[1, 2], [1, 2, 3]]}) assert df.select( - [pl.col("a").list.to_struct(fields=lambda idx: f"col_name_{idx}")] + pl.col("a").list.to_struct(fields=lambda idx: f"col_name_{idx}") ).to_series().to_list() == [ {"col_name_0": 1, "col_name_1": 2}, {"col_name_0": 1, "col_name_1": 2}, @@ -286,7 +282,7 @@ def test_list_to_struct() -> None: df = pl.DataFrame({"a": [[1, 2], [1, 2, 3]]}) assert df.select( - [pl.col("a").list.to_struct(n_field_strategy="max_width")] + pl.col("a").list.to_struct(n_field_strategy="max_width") ).to_series().to_list() == [ {"field_0": 1, "field_1": 2, "field_2": None}, {"field_0": 1, "field_1": 2, "field_2": 3}, @@ -317,10 +313,8 @@ def test_struct_list_head_tail() -> None: ] } ).with_columns( - [ - pl.col("list_of_struct").list.head(1).alias("head"), - pl.col("list_of_struct").list.tail(1).alias("tail"), - ] + pl.col("list_of_struct").list.head(1).alias("head"), + pl.col("list_of_struct").list.tail(1).alias("tail"), ).to_dict(as_series=False) == { "list_of_struct": [ [{"a": 1, "b": 4}, {"a": 3, "b": 6}], @@ -401,9 +395,9 @@ def test_struct_concat_list() -> None: [{"a": 6, "b": 7}], ], } - ).with_columns( - [pl.col("list_struct1").list.concat("list_struct2").alias("result")] - )["result"].to_list() == [ + ).with_columns(pl.col("list_struct1").list.concat("list_struct2").alias("result"))[ + "result" + ].to_list() == [ [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 6, "b": 7}, {"a": 8, "b": 9}], [{"a": 1, "b": 2}, {"a": 6, "b": 7}], ] @@ -589,7 +583,7 @@ def test_struct_getitem() -> None: assert pl.Series([{"a": 1, "b": 2}]).struct[1].name == "b" assert pl.Series([{"a": 1, "b": 2}]).struct[-1].name == "b" assert pl.Series([{"a": 1, "b": 2}]).to_frame().select( - [pl.col("").struct[0]] + pl.col("").struct[0] ).to_dict(as_series=False) == {"a": [1]} diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py index 6c2b82c44fddf..85762f2164388 100644 --- a/py-polars/tests/unit/datatypes/test_temporal.py +++ b/py-polars/tests/unit/datatypes/test_temporal.py @@ -214,12 +214,10 @@ def test_from_pydatetime() -> None: def test_int_to_python_datetime() -> None: df = pl.DataFrame({"a": [100_000_000, 200_000_000]}).with_columns( - [ - pl.col("a").cast(pl.Datetime).alias("b"), - pl.col("a").cast(pl.Datetime("ms")).alias("c"), - pl.col("a").cast(pl.Datetime("us")).alias("d"), - pl.col("a").cast(pl.Datetime("ns")).alias("e"), - ] + pl.col("a").cast(pl.Datetime).alias("b"), + pl.col("a").cast(pl.Datetime("ms")).alias("c"), + pl.col("a").cast(pl.Datetime("us")).alias("d"), + pl.col("a").cast(pl.Datetime("ns")).alias("e"), ) assert df.rows() == [ ( @@ -244,12 +242,8 @@ def test_int_to_python_datetime() -> None: ] assert df.select( - [ - getattr(pl.col("a").cast(pl.Duration).dt, f"total_{unit}")().alias( - f"u[{unit}]" - ) - for unit in ("milliseconds", "microseconds", "nanoseconds") - ] + getattr(pl.col("a").cast(pl.Duration).dt, f"total_{unit}")().alias(f"u[{unit}]") + for unit in ("milliseconds", "microseconds", "nanoseconds") ).rows() == [ (100000, 100000000, 100000000000), (200000, 200000000, 200000000000), @@ -258,12 +252,10 @@ def test_int_to_python_datetime() -> None: def test_int_to_python_timedelta() -> None: df = pl.DataFrame({"a": [100_001, 200_002]}).with_columns( - [ - pl.col("a").cast(pl.Duration).alias("b"), - pl.col("a").cast(pl.Duration("ms")).alias("c"), - pl.col("a").cast(pl.Duration("us")).alias("d"), - pl.col("a").cast(pl.Duration("ns")).alias("e"), - ] + pl.col("a").cast(pl.Duration).alias("b"), + pl.col("a").cast(pl.Duration("ms")).alias("c"), + pl.col("a").cast(pl.Duration("us")).alias("d"), + pl.col("a").cast(pl.Duration("ns")).alias("e"), ) assert df.rows() == [ ( @@ -282,9 +274,10 @@ def test_int_to_python_timedelta() -> None: ), ] - assert df.select( - [pl.col(col).cast(pl.Int64) for col in ("c", "d", "e")] - ).rows() == [(100001, 100001, 100001), (200002, 200002, 200002)] + assert df.select(pl.col(col).cast(pl.Int64) for col in ("c", "d", "e")).rows() == [ + (100001, 100001, 100001), + (200002, 200002, 200002), + ] def test_datetime_consistency() -> None: @@ -302,13 +295,11 @@ def test_datetime_consistency() -> None: assert df.filter(pl.col("date") == date_literal).rows() == [(dt,)] ddf = df.select( - [ - pl.col("date"), - pl.lit(dt).alias("dt"), - pl.lit(dt).cast(pl.Datetime("ms")).alias("dt_ms"), - pl.lit(dt).cast(pl.Datetime("us")).alias("dt_us"), - pl.lit(dt).cast(pl.Datetime("ns")).alias("dt_ns"), - ] + pl.col("date"), + pl.lit(dt).alias("dt"), + pl.lit(dt).cast(pl.Datetime("ms")).alias("dt_ms"), + pl.lit(dt).cast(pl.Datetime("us")).alias("dt_us"), + pl.lit(dt).cast(pl.Datetime("ns")).alias("dt_ns"), ) assert ddf.schema == { "date": pl.Datetime("us"), @@ -776,31 +767,29 @@ def test_temporal_dtypes_map_elements( ): assert_frame_equal( df.with_columns( - [ - # don't actually do this; native expressions are MUCH faster ;) - pl.col("timestamp") - .map_elements( - lambda x: const_dtm, - skip_nulls=skip_nulls, - return_dtype=pl.Datetime, - ) - .alias("const_dtm"), - # note: the below now trigger a PolarsInefficientMapWarning - pl.col("timestamp") - .map_elements( - lambda x: x and x.date(), - skip_nulls=skip_nulls, - return_dtype=pl.Date, - ) - .alias("date"), - pl.col("timestamp") - .map_elements( - lambda x: x and x.time(), - skip_nulls=skip_nulls, - return_dtype=pl.Time, - ) - .alias("time"), - ] + # don't actually do this; native expressions are MUCH faster ;) + pl.col("timestamp") + .map_elements( + lambda x: const_dtm, + skip_nulls=skip_nulls, + return_dtype=pl.Datetime, + ) + .alias("const_dtm"), + # note: the below now trigger a PolarsInefficientMapWarning + pl.col("timestamp") + .map_elements( + lambda x: x and x.date(), + skip_nulls=skip_nulls, + return_dtype=pl.Date, + ) + .alias("date"), + pl.col("timestamp") + .map_elements( + lambda x: x and x.time(), + skip_nulls=skip_nulls, + return_dtype=pl.Time, + ) + .alias("time"), ), pl.DataFrame( [ @@ -1024,10 +1013,8 @@ def test_sum_duration() -> None: {"name": "Jen", "duration": timedelta(seconds=60)}, ] ).select( - [ - pl.col("duration").sum(), - pl.col("duration").dt.total_seconds().alias("sec").sum(), - ] + pl.col("duration").sum(), + pl.col("duration").dt.total_seconds().alias("sec").sum(), ).to_dict(as_series=False) == { "duration": [timedelta(seconds=150)], "sec": [150], @@ -1082,10 +1069,8 @@ def test_date_timedelta() -> None: {"date": pl.date_range(date(2001, 1, 1), date(2001, 1, 3), "1d", eager=True)} ) assert df.with_columns( - [ - (pl.col("date") + timedelta(days=1)).alias("date_plus_one"), - (pl.col("date") - timedelta(days=1)).alias("date_min_one"), - ] + (pl.col("date") + timedelta(days=1)).alias("date_plus_one"), + (pl.col("date") - timedelta(days=1)).alias("date_min_one"), ).to_dict(as_series=False) == { "date": [date(2001, 1, 1), date(2001, 1, 2), date(2001, 1, 3)], "date_plus_one": [date(2001, 1, 2), date(2001, 1, 3), date(2001, 1, 4)], @@ -1963,13 +1948,11 @@ def test_truncate_by_multiple_weeks() -> None: assert ( df.select( - [ - pl.col("date").dt.truncate("2w").alias("2w"), - pl.col("date").dt.truncate("3w").alias("3w"), - pl.col("date").dt.truncate("4w").alias("4w"), - pl.col("date").dt.truncate("5w").alias("5w"), - pl.col("date").dt.truncate("17w").alias("17w"), - ] + pl.col("date").dt.truncate("2w").alias("2w"), + pl.col("date").dt.truncate("3w").alias("3w"), + pl.col("date").dt.truncate("4w").alias("4w"), + pl.col("date").dt.truncate("5w").alias("5w"), + pl.col("date").dt.truncate("17w").alias("17w"), ) ).to_dict(as_series=False) == { "2w": [date(2022, 4, 18), date(2022, 11, 28)], @@ -2140,10 +2123,8 @@ def test_round_by_week() -> None: assert ( df.select( - [ - pl.col("date").dt.round("7d").alias("7d"), - pl.col("date").dt.round("1w").alias("1w"), - ] + pl.col("date").dt.round("7d").alias("7d"), + pl.col("date").dt.round("1w").alias("1w"), ) ).to_dict(as_series=False) == { "7d": [date(1998, 4, 9), date(2022, 12, 1)], @@ -2177,21 +2158,17 @@ def test_tz_aware_day_weekday() -> None: ) df = df.with_columns( - [ - pl.col("date").dt.convert_time_zone("Asia/Tokyo").alias("tk_date"), - pl.col("date").dt.convert_time_zone("America/New_York").alias("ny_date"), - ] + pl.col("date").dt.convert_time_zone("Asia/Tokyo").alias("tk_date"), + pl.col("date").dt.convert_time_zone("America/New_York").alias("ny_date"), ) assert df.select( - [ - pl.col("date").dt.day().alias("day"), - pl.col("tk_date").dt.day().alias("tk_day"), - pl.col("ny_date").dt.day().alias("ny_day"), - pl.col("date").dt.weekday().alias("weekday"), - pl.col("tk_date").dt.weekday().alias("tk_weekday"), - pl.col("ny_date").dt.weekday().alias("ny_weekday"), - ] + pl.col("date").dt.day().alias("day"), + pl.col("tk_date").dt.day().alias("tk_day"), + pl.col("ny_date").dt.day().alias("ny_day"), + pl.col("date").dt.weekday().alias("weekday"), + pl.col("tk_date").dt.weekday().alias("tk_weekday"), + pl.col("ny_date").dt.weekday().alias("ny_weekday"), ).to_dict(as_series=False) == { "day": [1, 4, 7], "tk_day": [1, 4, 7], diff --git a/py-polars/tests/unit/expr/test_exprs.py b/py-polars/tests/unit/expr/test_exprs.py index e51231490e70c..39485d6dc4291 100644 --- a/py-polars/tests/unit/expr/test_exprs.py +++ b/py-polars/tests/unit/expr/test_exprs.py @@ -278,11 +278,9 @@ def test_power_by_expression() -> None: out = pl.DataFrame( {"a": [1, None, None, 4, 5, 6], "b": [1, 2, None, 4, None, 6]} ).select( - [ - pl.col("a").pow(pl.col("b")).alias("pow_expr"), - (pl.col("a") ** pl.col("b")).alias("pow_op"), - (2 ** pl.col("b")).alias("pow_op_left"), - ] + pl.col("a").pow(pl.col("b")).alias("pow_expr"), + (pl.col("a") ** pl.col("b")).alias("pow_op"), + (2 ** pl.col("b")).alias("pow_op_left"), ) for pow_col in ("pow_expr", "pow_op"): diff --git a/py-polars/tests/unit/functions/as_datatype/test_as_datatype.py b/py-polars/tests/unit/functions/as_datatype/test_as_datatype.py index 472ee64493cfe..22a87dadcfcbe 100644 --- a/py-polars/tests/unit/functions/as_datatype/test_as_datatype.py +++ b/py-polars/tests/unit/functions/as_datatype/test_as_datatype.py @@ -201,10 +201,8 @@ def test_list_concat_rolling_window() -> None: } ) out = df.with_columns( - [pl.col("A").shift(i).alias(f"A_lag_{i}") for i in range(3)] - ).select( - [pl.concat_list([f"A_lag_{i}" for i in range(3)][::-1]).alias("A_rolling")] - ) + pl.col("A").shift(i).alias(f"A_lag_{i}") for i in range(3) + ).select(pl.concat_list([f"A_lag_{i}" for i in range(3)][::-1]).alias("A_rolling")) assert out.shape == (5, 1) s = out.to_series() @@ -221,18 +219,12 @@ def test_list_concat_rolling_window() -> None: out = ( df.with_columns(pl.col("A").reshape((-1, 1))) # first turn into a list .with_columns( - [ - pl.col("A").shift(i).alias(f"A_lag_{i}") - for i in range(3) # slice the lists to a lag - ] + pl.col("A").shift(i).alias(f"A_lag_{i}") + for i in range(3) # slice the lists to a lag ) .select( - [ - pl.all(), - pl.concat_list([f"A_lag_{i}" for i in range(3)][::-1]).alias( - "A_rolling" - ), - ] + pl.all(), + pl.concat_list([f"A_lag_{i}" for i in range(3)][::-1]).alias("A_rolling"), ) ) assert out.shape == (5, 5) @@ -249,11 +241,9 @@ def test_list_concat_rolling_window() -> None: def test_concat_list_reverse_struct_fields() -> None: df = pl.DataFrame({"nums": [1, 2, 3, 4], "letters": ["a", "b", "c", "d"]}).select( - [ - pl.col("nums"), - pl.struct(["letters", "nums"]).alias("combo"), - pl.struct(["nums", "letters"]).alias("reverse_combo"), - ] + pl.col("nums"), + pl.struct(["letters", "nums"]).alias("combo"), + pl.struct(["nums", "letters"]).alias("reverse_combo"), ) result1 = df.select(pl.concat_list(["combo", "reverse_combo"])) result2 = df.select(pl.concat_list(["combo", "combo"])) diff --git a/py-polars/tests/unit/functions/test_functions.py b/py-polars/tests/unit/functions/test_functions.py index 6a7c01b625b6f..5acb17857bea9 100644 --- a/py-polars/tests/unit/functions/test_functions.py +++ b/py-polars/tests/unit/functions/test_functions.py @@ -187,10 +187,8 @@ def test_null_handling_correlation() -> None: df = pl.DataFrame({"a": [1, 2, 3, None, 4], "b": [1, 2, 3, 10, 4]}) out = df.select( - [ - pl.corr("a", "b").alias("pearson"), - pl.corr("a", "b", method="spearman").alias("spearman"), - ] + pl.corr("a", "b").alias("pearson"), + pl.corr("a", "b", method="spearman").alias("spearman"), ) assert out["pearson"][0] == pytest.approx(1.0) assert out["spearman"][0] == pytest.approx(1.0) @@ -489,11 +487,9 @@ def test_lazy_functions() -> None: # regex selection out = df.select( - [ - pl.struct(pl.max("^a|b$")).alias("x"), - pl.struct(pl.min("^.*[bc]$")).alias("y"), - pl.struct(pl.sum("^[^a]$")).alias("z"), - ] + pl.struct(pl.max("^a|b$")).alias("x"), + pl.struct(pl.min("^.*[bc]$")).alias("y"), + pl.struct(pl.sum("^[^a]$")).alias("z"), ) assert out.rows() == [ ({"a": "foo", "b": 3}, {"b": 1, "c": -1.0}, {"b": 6, "c": 5.0}) diff --git a/py-polars/tests/unit/functions/test_when_then.py b/py-polars/tests/unit/functions/test_when_then.py index 8315b08015979..3914999cdc873 100644 --- a/py-polars/tests/unit/functions/test_when_then.py +++ b/py-polars/tests/unit/functions/test_when_then.py @@ -166,12 +166,10 @@ def test_type_coercion_when_then_otherwise_2806() -> None: out = ( pl.DataFrame({"names": ["foo", "spam", "spam"], "nrs": [1, 2, 3]}) .select( - [ - pl.when(pl.col("names") == "spam") - .then(pl.col("nrs") * 2) - .otherwise(pl.lit("other")) - .alias("new_col"), - ] + pl.when(pl.col("names") == "spam") + .then(pl.col("nrs") * 2) + .otherwise(pl.lit("other")) + .alias("new_col"), ) .to_series() ) diff --git a/py-polars/tests/unit/interop/numpy/test_ufunc_expr.py b/py-polars/tests/unit/interop/numpy/test_ufunc_expr.py index ad288c03593d5..e3516bb58cc16 100644 --- a/py-polars/tests/unit/interop/numpy/test_ufunc_expr.py +++ b/py-polars/tests/unit/interop/numpy/test_ufunc_expr.py @@ -12,11 +12,9 @@ def test_ufunc() -> None: df = pl.DataFrame([pl.Series("a", [1, 2, 3, 4], dtype=pl.UInt8)]) out = df.select( - [ - np.power(pl.col("a"), 2).alias("power_uint8"), # type: ignore[call-overload] - np.power(pl.col("a"), 2.0).alias("power_float64"), # type: ignore[call-overload] - np.power(pl.col("a"), 2, dtype=np.uint16).alias("power_uint16"), # type: ignore[call-overload] - ] + np.power(pl.col("a"), 2).alias("power_uint8"), # type: ignore[call-overload] + np.power(pl.col("a"), 2.0).alias("power_float64"), # type: ignore[call-overload] + np.power(pl.col("a"), 2, dtype=np.uint16).alias("power_uint16"), # type: ignore[call-overload] ) expected = pl.DataFrame( [ @@ -33,11 +31,9 @@ def test_ufunc_expr_not_first() -> None: """Check numpy ufunc expressions also work if expression not the first argument.""" df = pl.DataFrame([pl.Series("a", [1, 2, 3], dtype=pl.Float64)]) out = df.select( - [ - np.power(2.0, cast(Any, pl.col("a"))).alias("power"), - (2.0 / cast(Any, pl.col("a"))).alias("divide_scalar"), - (np.array([2, 2, 2]) / cast(Any, pl.col("a"))).alias("divide_array"), - ] + np.power(2.0, cast(Any, pl.col("a"))).alias("power"), + (2.0 / cast(Any, pl.col("a"))).alias("divide_scalar"), + (np.array([2, 2, 2]) / cast(Any, pl.col("a"))).alias("divide_array"), ) expected = pl.DataFrame( [ @@ -52,11 +48,9 @@ def test_ufunc_expr_not_first() -> None: def test_lazy_ufunc() -> None: ldf = pl.LazyFrame([pl.Series("a", [1, 2, 3, 4], dtype=pl.UInt8)]) out = ldf.select( - [ - np.power(cast(Any, pl.col("a")), 2).alias("power_uint8"), - np.power(cast(Any, pl.col("a")), 2.0).alias("power_float64"), - np.power(cast(Any, pl.col("a")), 2, dtype=np.uint16).alias("power_uint16"), - ] + np.power(cast(Any, pl.col("a")), 2).alias("power_uint8"), + np.power(cast(Any, pl.col("a")), 2.0).alias("power_float64"), + np.power(cast(Any, pl.col("a")), 2, dtype=np.uint16).alias("power_uint16"), ) expected = pl.DataFrame( [ @@ -72,11 +66,9 @@ def test_lazy_ufunc_expr_not_first() -> None: """Check numpy ufunc expressions also work if expression not the first argument.""" ldf = pl.LazyFrame([pl.Series("a", [1, 2, 3], dtype=pl.Float64)]) out = ldf.select( - [ - np.power(2.0, cast(Any, pl.col("a"))).alias("power"), - (2.0 / cast(Any, pl.col("a"))).alias("divide_scalar"), - (np.array([2, 2, 2]) / cast(Any, pl.col("a"))).alias("divide_array"), - ] + np.power(2.0, cast(Any, pl.col("a"))).alias("power"), + (2.0 / cast(Any, pl.col("a"))).alias("divide_scalar"), + (np.array([2, 2, 2]) / cast(Any, pl.col("a"))).alias("divide_array"), ) expected = pl.DataFrame( [ diff --git a/py-polars/tests/unit/interop/test_to_pandas.py b/py-polars/tests/unit/interop/test_to_pandas.py index 44cb310520610..6e059683459e0 100644 --- a/py-polars/tests/unit/interop/test_to_pandas.py +++ b/py-polars/tests/unit/interop/test_to_pandas.py @@ -33,10 +33,8 @@ def test_to_pandas() -> None: }, schema_overrides={"a": pl.UInt8}, ).with_columns( - [ - pl.col("e").cast(pl.Categorical).alias("h"), - pl.col("f").cast(pl.Categorical).alias("i"), - ] + pl.col("e").cast(pl.Categorical).alias("h"), + pl.col("f").cast(pl.Categorical).alias("i"), ) pd_out = df.to_pandas() diff --git a/py-polars/tests/unit/io/test_csv.py b/py-polars/tests/unit/io/test_csv.py index d6a2c8d549cb7..8bb270c02288e 100644 --- a/py-polars/tests/unit/io/test_csv.py +++ b/py-polars/tests/unit/io/test_csv.py @@ -76,11 +76,9 @@ def test_to_from_buffer(df_no_lists: pl.DataFrame) -> None: read_df = pl.read_csv(buf, try_parse_dates=True) read_df = read_df.with_columns( - [ - pl.col("cat").cast(pl.Categorical), - pl.col("enum").cast(pl.Enum(["foo", "ham", "bar"])), - pl.col("time").cast(pl.Time), - ] + pl.col("cat").cast(pl.Categorical), + pl.col("enum").cast(pl.Enum(["foo", "ham", "bar"])), + pl.col("time").cast(pl.Time), ) assert_frame_equal(df, read_df, categorical_as_str=True) with pytest.raises(AssertionError): @@ -98,11 +96,9 @@ def test_to_from_file(df_no_lists: pl.DataFrame, tmp_path: Path) -> None: read_df = pl.read_csv(file_path, try_parse_dates=True) read_df = read_df.with_columns( - [ - pl.col("cat").cast(pl.Categorical), - pl.col("enum").cast(pl.Enum(["foo", "ham", "bar"])), - pl.col("time").cast(pl.Time), - ] + pl.col("cat").cast(pl.Categorical), + pl.col("enum").cast(pl.Enum(["foo", "ham", "bar"])), + pl.col("time").cast(pl.Time), ) assert_frame_equal(df, read_df, categorical_as_str=True) diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py index 6642a35f4aa14..e4f341b18566e 100644 --- a/py-polars/tests/unit/operations/aggregation/test_aggregations.py +++ b/py-polars/tests/unit/operations/aggregation/test_aggregations.py @@ -295,7 +295,7 @@ def test_sum_empty_and_null_set() -> None: def test_horizontal_sum_null_to_identity() -> None: assert pl.DataFrame({"a": [1, 5], "b": [10, None]}).select( - [pl.sum_horizontal(["a", "b"])] + pl.sum_horizontal(["a", "b"]) ).to_series().to_list() == [11, 5] @@ -566,7 +566,7 @@ def test_min_max_2850() -> None: for _ in range(10): permuted = df.sample(fraction=1.0, seed=0) computed = permuted.select( - [pl.col("id").min().alias("min"), pl.col("id").max().alias("max")] + pl.col("id").min().alias("min"), pl.col("id").max().alias("max") ) assert cast(int, computed[0, "min"]) == minimum assert cast(float, computed[0, "max"]) == maximum diff --git a/py-polars/tests/unit/operations/map/test_map_elements.py b/py-polars/tests/unit/operations/map/test_map_elements.py index 19016ce462b48..98bf9cf996e19 100644 --- a/py-polars/tests/unit/operations/map/test_map_elements.py +++ b/py-polars/tests/unit/operations/map/test_map_elements.py @@ -201,19 +201,17 @@ def test_map_elements_object_dtypes() -> None: assert pl.DataFrame( {"a": pl.Series([1, 2, "a", 4, 5], dtype=pl.Object)} ).with_columns( - [ - pl.col("a").map_elements(lambda x: x * 2, return_dtype=pl.Object), - pl.col("a") - .map_elements( - lambda x: isinstance(x, (int, float)), return_dtype=pl.Boolean - ) - .alias("is_numeric1"), - pl.col("a") - .map_elements( - lambda x: isinstance(x, (int, float)), return_dtype=pl.Boolean - ) - .alias("is_numeric_infer"), - ] + pl.col("a").map_elements(lambda x: x * 2, return_dtype=pl.Object), + pl.col("a") + .map_elements( + lambda x: isinstance(x, (int, float)), return_dtype=pl.Boolean + ) + .alias("is_numeric1"), + pl.col("a") + .map_elements( + lambda x: isinstance(x, (int, float)), return_dtype=pl.Boolean + ) + .alias("is_numeric_infer"), ).to_dict(as_series=False) == { "a": [2, 4, "aa", 8, 10], "is_numeric1": [True, True, False, True, True], @@ -223,11 +221,9 @@ def test_map_elements_object_dtypes() -> None: def test_map_elements_explicit_list_output_type() -> None: out = pl.DataFrame({"str": ["a", "b"]}).with_columns( - [ - pl.col("str").map_elements( - lambda _: pl.Series([1, 2, 3]), return_dtype=pl.List(pl.Int64) - ) - ] + pl.col("str").map_elements( + lambda _: pl.Series([1, 2, 3]), return_dtype=pl.List(pl.Int64) + ) ) assert out.dtypes == [pl.List(pl.Int64)] diff --git a/py-polars/tests/unit/operations/namespaces/array/test_array.py b/py-polars/tests/unit/operations/namespaces/array/test_array.py index 80405b24ba08f..57f8cb7e597c1 100644 --- a/py-polars/tests/unit/operations/namespaces/array/test_array.py +++ b/py-polars/tests/unit/operations/namespaces/array/test_array.py @@ -406,7 +406,7 @@ def test_array_to_struct() -> None: {"a": [[1, 2, None], [1, 2, 3]]}, schema={"a": pl.Array(pl.Int8, 3)} ) assert df.select( - [pl.col("a").arr.to_struct(fields=lambda idx: f"col_name_{idx}")] + pl.col("a").arr.to_struct(fields=lambda idx: f"col_name_{idx}") ).to_series().to_list() == [ {"col_name_0": 1, "col_name_1": 2, "col_name_2": None}, {"col_name_0": 1, "col_name_1": 2, "col_name_2": 3}, diff --git a/py-polars/tests/unit/operations/namespaces/list/test_list.py b/py-polars/tests/unit/operations/namespaces/list/test_list.py index 8365269ce1992..59404ee9322d6 100644 --- a/py-polars/tests/unit/operations/namespaces/list/test_list.py +++ b/py-polars/tests/unit/operations/namespaces/list/test_list.py @@ -47,10 +47,8 @@ def test_list_arr_get() -> None: pl.DataFrame( {"a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]]} ).with_columns( - [ - pl.col("a").list.get(i, null_on_oob=False).alias(f"get_{i}") - for i in range(4) - ] + pl.col("a").list.get(i, null_on_oob=False).alias(f"get_{i}") + for i in range(4) ) # get by indexes where some are out of bounds @@ -115,7 +113,7 @@ def test_list_arr_get_null_on_oob() -> None: assert pl.DataFrame( {"a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]]} ).with_columns( - [pl.col("a").list.get(i, null_on_oob=True).alias(f"get_{i}") for i in range(4)] + pl.col("a").list.get(i, null_on_oob=True).alias(f"get_{i}") for i in range(4) ).to_dict(as_series=False) == { "a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]], "get_0": [1, 2, 3, 4, 7, None], @@ -224,15 +222,13 @@ def test_list_arr_empty() -> None: df = pl.DataFrame({"cars": [[1, 2, 3], [2, 3], [4], []]}) out = df.select( - [ - pl.col("cars").list.first().alias("cars_first"), - pl.when(pl.col("cars").list.first() == 2) - .then(1) - .when(pl.col("cars").list.contains(2)) - .then(2) - .otherwise(3) - .alias("cars_literal"), - ] + pl.col("cars").list.first().alias("cars_first"), + pl.when(pl.col("cars").list.first() == 2) + .then(1) + .when(pl.col("cars").list.contains(2)) + .then(2) + .otherwise(3) + .alias("cars_literal"), ) expected = pl.DataFrame( {"cars_first": [1, 2, 4, None], "cars_literal": [2, 1, 3, 3]}, @@ -356,12 +352,10 @@ def test_list_eval_dtype_inference() -> None: assert grades.with_columns( pl.concat_list(pl.all().exclude("student")).alias("all_grades") ).select( - [ - pl.col("all_grades") - .list.eval(rank_pct, parallel=True) - .alias("grades_rank") - .list.first() - ] + pl.col("all_grades") + .list.eval(rank_pct, parallel=True) + .alias("grades_rank") + .list.first() ).to_series().to_list() == [ 0.3333333333333333, 0.6666666666666666, @@ -425,18 +419,12 @@ def test_arr_contains_categorical() -> None: def test_list_eval_type_coercion() -> None: last_non_null_value = pl.element().fill_null(3).last() - df = pl.DataFrame( - { - "array_cols": [[1, None]], - } - ) + df = pl.DataFrame({"array_cols": [[1, None]]}) assert df.select( - [ - pl.col("array_cols") - .list.eval(last_non_null_value, parallel=False) - .alias("col_last") - ] + pl.col("array_cols") + .list.eval(last_non_null_value, parallel=False) + .alias("col_last") ).to_dict(as_series=False) == {"col_last": [[3]]} diff --git a/py-polars/tests/unit/operations/namespaces/string/test_string.py b/py-polars/tests/unit/operations/namespaces/string/test_string.py index a760a36bfcc06..f19bd913d63fc 100644 --- a/py-polars/tests/unit/operations/namespaces/string/test_string.py +++ b/py-polars/tests/unit/operations/namespaces/string/test_string.py @@ -448,10 +448,8 @@ def test_str_to_integer_base_literal() -> None: with pytest.raises(pl.ComputeError): df.with_columns( - [ - pl.col("bin").str.to_integer(base=2), - pl.col("hex").str.to_integer(base=16), - ] + pl.col("bin").str.to_integer(base=2), + pl.col("hex").str.to_integer(base=16), ) @@ -464,11 +462,9 @@ def test_str_strip_chars_expr() -> None: ) all_expr = df.select( - [ - pl.col("s").str.strip_chars(pl.col("pat")).alias("strip_chars"), - pl.col("s").str.strip_chars_start(pl.col("pat")).alias("strip_chars_start"), - pl.col("s").str.strip_chars_end(pl.col("pat")).alias("strip_chars_end"), - ] + pl.col("s").str.strip_chars(pl.col("pat")).alias("strip_chars"), + pl.col("s").str.strip_chars_start(pl.col("pat")).alias("strip_chars_start"), + pl.col("s").str.strip_chars_end(pl.col("pat")).alias("strip_chars_end"), ) expected = pl.DataFrame( @@ -841,14 +837,12 @@ def test_contains_expr() -> None: ) assert df.select( - [ - pl.col("text") - .str.contains(pl.col("pattern"), literal=False, strict=False) - .alias("contains"), - pl.col("text") - .str.contains(pl.col("pattern"), literal=True) - .alias("contains_lit"), - ] + pl.col("text") + .str.contains(pl.col("pattern"), literal=False, strict=False) + .alias("contains"), + pl.col("text") + .str.contains(pl.col("pattern"), literal=True) + .alias("contains_lit"), ).to_dict(as_series=False) == { "contains": [True, True, False, None, None, None], "contains_lit": [False, True, False, None, None, False], @@ -1107,14 +1101,12 @@ def test_starts_ends_with() -> None: ) assert df.select( - [ - pl.col("a").str.ends_with("pop").alias("ends_pop"), - pl.col("a").str.ends_with(pl.lit(None)).alias("ends_None"), - pl.col("a").str.ends_with(pl.col("sub")).alias("ends_sub"), - pl.col("a").str.starts_with("ham").alias("starts_ham"), - pl.col("a").str.starts_with(pl.lit(None)).alias("starts_None"), - pl.col("a").str.starts_with(pl.col("sub")).alias("starts_sub"), - ] + pl.col("a").str.ends_with("pop").alias("ends_pop"), + pl.col("a").str.ends_with(pl.lit(None)).alias("ends_None"), + pl.col("a").str.ends_with(pl.col("sub")).alias("ends_sub"), + pl.col("a").str.starts_with("ham").alias("starts_ham"), + pl.col("a").str.starts_with(pl.lit(None)).alias("starts_None"), + pl.col("a").str.starts_with(pl.col("sub")).alias("starts_sub"), ).to_dict(as_series=False) == { "ends_pop": [False, False, True, None], "ends_None": [None, None, None, None], diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py index f2eed19af83e6..9fb3a12d1030a 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py @@ -677,11 +677,9 @@ def test_date_time_combine(tzinfo: ZoneInfo | None, time_zone: str | None) -> No # Combine datetime/date with time df = df.select( - [ - pl.col("dtm").dt.combine(pl.col("tm")).alias("d1"), # datetime & time - pl.col("dt").dt.combine(pl.col("tm")).alias("d2"), # date & time - pl.col("dt").dt.combine(time(4, 5, 6)).alias("d3"), # date & specified time - ] + pl.col("dtm").dt.combine(pl.col("tm")).alias("d1"), # datetime & time + pl.col("dt").dt.combine(pl.col("tm")).alias("d2"), # date & time + pl.col("dt").dt.combine(time(4, 5, 6)).alias("d3"), # date & specified time ) # Assert that the new columns have the expected values and datatypes @@ -782,10 +780,8 @@ def test_date_offset() -> None: # Add two new columns to the DataFrame using the offset_by() method df = df.with_columns( - [ - df["dates"].dt.offset_by("1y").alias("date_plus_1y"), - df["dates"].dt.offset_by("-1y2mo").alias("date_min"), - ] + df["dates"].dt.offset_by("1y").alias("date_plus_1y"), + df["dates"].dt.offset_by("-1y2mo").alias("date_min"), ) # Assert that the day of the month for all the dates in new columns is 1 diff --git a/py-polars/tests/unit/operations/namespaces/test_binary.py b/py-polars/tests/unit/operations/namespaces/test_binary.py index 79dfd6c7e1ca0..f5e3eeba99196 100644 --- a/py-polars/tests/unit/operations/namespaces/test_binary.py +++ b/py-polars/tests/unit/operations/namespaces/test_binary.py @@ -58,11 +58,9 @@ def test_contains_with_expr() -> None: ) assert df.select( - [ - pl.col("bin").bin.contains(pl.col("lit1")).alias("contains_1"), - pl.col("bin").bin.contains(pl.col("lit2")).alias("contains_2"), - pl.col("bin").bin.contains(pl.lit(None)).alias("contains_3"), - ] + pl.col("bin").bin.contains(pl.col("lit1")).alias("contains_1"), + pl.col("bin").bin.contains(pl.col("lit2")).alias("contains_2"), + pl.col("bin").bin.contains(pl.lit(None)).alias("contains_3"), ).to_dict(as_series=False) == { "contains_1": [True, True, False, None], "contains_2": [None, True, False, None], @@ -78,14 +76,12 @@ def test_starts_ends_with() -> None: "start": [b"ha", b"nga", None, b"anything"], } ).select( - [ - pl.col("a").bin.ends_with(b"pop").alias("end_lit"), - pl.col("a").bin.ends_with(pl.lit(None)).alias("end_none"), - pl.col("a").bin.ends_with(pl.col("end")).alias("end_expr"), - pl.col("a").bin.starts_with(b"ham").alias("start_lit"), - pl.col("a").bin.ends_with(pl.lit(None)).alias("start_none"), - pl.col("a").bin.starts_with(pl.col("start")).alias("start_expr"), - ] + pl.col("a").bin.ends_with(b"pop").alias("end_lit"), + pl.col("a").bin.ends_with(pl.lit(None)).alias("end_none"), + pl.col("a").bin.ends_with(pl.col("end")).alias("end_expr"), + pl.col("a").bin.starts_with(b"ham").alias("start_lit"), + pl.col("a").bin.ends_with(pl.lit(None)).alias("start_none"), + pl.col("a").bin.starts_with(pl.col("start")).alias("start_expr"), ).to_dict(as_series=False) == { "end_lit": [False, False, True, None], "end_none": [None, None, None, None], diff --git a/py-polars/tests/unit/operations/namespaces/test_categorical.py b/py-polars/tests/unit/operations/namespaces/test_categorical.py index 03ca6497eb0dd..708abf7eed4d8 100644 --- a/py-polars/tests/unit/operations/namespaces/test_categorical.py +++ b/py-polars/tests/unit/operations/namespaces/test_categorical.py @@ -6,9 +6,7 @@ def test_categorical_lexical_sort() -> None: df = pl.DataFrame( {"cats": ["z", "z", "k", "a", "b"], "vals": [3, 1, 2, 2, 3]} ).with_columns( - [ - pl.col("cats").cast(pl.Categorical("lexical")), - ] + pl.col("cats").cast(pl.Categorical("lexical")), ) out = df.sort(["cats"]) diff --git a/py-polars/tests/unit/operations/namespaces/test_name.py b/py-polars/tests/unit/operations/namespaces/test_name.py index cf08b7154c22d..eac08e537a888 100644 --- a/py-polars/tests/unit/operations/namespaces/test_name.py +++ b/py-polars/tests/unit/operations/namespaces/test_name.py @@ -53,12 +53,10 @@ def test_name_update_all() -> None: ) assert ( df.select( - [ - pl.col("col2").append(pl.col("other")), - pl.col("col1").append(pl.col("other")).name.keep(), - pl.col("col1").append(pl.col("other")).name.prefix("prefix_"), - pl.col("col1").append(pl.col("other")).name.suffix("_suffix"), - ] + pl.col("col2").append(pl.col("other")), + pl.col("col1").append(pl.col("other")).name.keep(), + pl.col("col1").append(pl.col("other")).name.prefix("prefix_"), + pl.col("col1").append(pl.col("other")).name.suffix("_suffix"), ) ).schema == OrderedDict( [ diff --git a/py-polars/tests/unit/operations/namespaces/test_strptime.py b/py-polars/tests/unit/operations/namespaces/test_strptime.py index 32eb357e1813f..ab49e64dbb883 100644 --- a/py-polars/tests/unit/operations/namespaces/test_strptime.py +++ b/py-polars/tests/unit/operations/namespaces/test_strptime.py @@ -349,9 +349,7 @@ def test_datetime_strptime_patterns_consistent() -> None: ], ).to_frame() s = df.with_columns( - [ - pl.col("date").str.to_datetime(strict=False).alias("parsed"), - ] + pl.col("date").str.to_datetime(strict=False).alias("parsed"), )["parsed"] assert s.null_count() == 1 assert s[5] is None diff --git a/py-polars/tests/unit/operations/rolling/test_rolling.py b/py-polars/tests/unit/operations/rolling/test_rolling.py index 90e50d7e479d8..59443a13857af 100644 --- a/py-polars/tests/unit/operations/rolling/test_rolling.py +++ b/py-polars/tests/unit/operations/rolling/test_rolling.py @@ -47,22 +47,20 @@ def test_rolling_kernels_and_rolling( example_df: pl.DataFrame, period: str | timedelta, closed: ClosedInterval ) -> None: out1 = example_df.set_sorted("dt").select( - [ - pl.col("dt"), - # this differs from group_by aggregation because the empty window is - # null here - # where the sum aggregation of an empty set is 0 - pl.col("values") - .rolling_sum_by("dt", period, closed=closed) - .fill_null(0) - .alias("sum"), - pl.col("values").rolling_var_by("dt", period, closed=closed).alias("var"), - pl.col("values").rolling_mean_by("dt", period, closed=closed).alias("mean"), - pl.col("values").rolling_std_by("dt", period, closed=closed).alias("std"), - pl.col("values") - .rolling_quantile_by("dt", period, quantile=0.2, closed=closed) - .alias("quantile"), - ] + pl.col("dt"), + # this differs from group_by aggregation because the empty window is + # null here + # where the sum aggregation of an empty set is 0 + pl.col("values") + .rolling_sum_by("dt", period, closed=closed) + .fill_null(0) + .alias("sum"), + pl.col("values").rolling_var_by("dt", period, closed=closed).alias("var"), + pl.col("values").rolling_mean_by("dt", period, closed=closed).alias("mean"), + pl.col("values").rolling_std_by("dt", period, closed=closed).alias("std"), + pl.col("values") + .rolling_quantile_by("dt", period, quantile=0.2, closed=closed) + .alias("quantile"), ) out2 = ( example_df.set_sorted("dt") @@ -267,12 +265,10 @@ def test_rolling_extrema() -> None: } ) ).with_columns( - [ - pl.when(pl.int_range(0, pl.len(), eager=False) < 2) - .then(None) - .otherwise(pl.all()) - .name.suffix("_nulls") - ] + pl.when(pl.int_range(0, pl.len(), eager=False) < 2) + .then(None) + .otherwise(pl.all()) + .name.suffix("_nulls") ) assert df.select([pl.all().rolling_min(3)]).to_dict(as_series=False) == { @@ -584,10 +580,8 @@ def test_rolling_cov_corr() -> None: df = pl.DataFrame({"x": [3, 3, 3, 5, 8], "y": [3, 4, 4, 4, 8]}) res = df.select( - [ - pl.rolling_cov("x", "y", window_size=3).alias("cov"), - pl.rolling_corr("x", "y", window_size=3).alias("corr"), - ] + pl.rolling_cov("x", "y", window_size=3).alias("cov"), + pl.rolling_corr("x", "y", window_size=3).alias("corr"), ).to_dict(as_series=False) assert res["cov"][2:] == pytest.approx([0.0, 0.0, 5.333333333333336]) assert res["corr"][2:] == pytest.approx([nan, nan, 0.9176629354822473], nan_ok=True) @@ -610,19 +604,15 @@ def test_rolling_empty_window_9406(time_unit: TimeUnit) -> None: assert_frame_equal( pl.DataFrame([datecol, rmax]), df.select( - [ - pl.col("d"), - pl.col("x").rolling_max_by("d", window_size="3d", closed="left"), - ] + pl.col("d"), + pl.col("x").rolling_max_by("d", window_size="3d", closed="left"), ), ) assert_frame_equal( pl.DataFrame([datecol, rmin]), df.select( - [ - pl.col("d"), - pl.col("x").rolling_min_by("d", window_size="3d", closed="left"), - ] + pl.col("d"), + pl.col("x").rolling_min_by("d", window_size="3d", closed="left"), ), ) diff --git a/py-polars/tests/unit/operations/test_ewm.py b/py-polars/tests/unit/operations/test_ewm.py index 6818df8b8cd80..05b7a07ca09cd 100644 --- a/py-polars/tests/unit/operations/test_ewm.py +++ b/py-polars/tests/unit/operations/test_ewm.py @@ -211,9 +211,7 @@ def test_ewm_with_multiple_chunks() -> None: ], schema=["a", "b", "c"], ).with_columns( - [ - pl.col(pl.Float64).log().diff().name.prefix("ld_"), - ] + pl.col(pl.Float64).log().diff().name.prefix("ld_"), ) assert df0.n_chunks() == 1 diff --git a/py-polars/tests/unit/operations/test_group_by.py b/py-polars/tests/unit/operations/test_group_by.py index 8e01666fad358..6bba541e0613a 100644 --- a/py-polars/tests/unit/operations/test_group_by.py +++ b/py-polars/tests/unit/operations/test_group_by.py @@ -460,16 +460,11 @@ def test_arg_sort_sort_by_groups_update__4360() -> None: out = df.with_columns( pl.col("col2").arg_sort().over("group").alias("col2_arg_sort") ).with_columns( - [ - pl.col("col1") - .sort_by(pl.col("col2_arg_sort")) - .over("group") - .alias("result_a"), - pl.col("col1") - .sort_by(pl.col("col2").arg_sort()) - .over("group") - .alias("result_b"), - ] + pl.col("col1").sort_by(pl.col("col2_arg_sort")).over("group").alias("result_a"), + pl.col("col1") + .sort_by(pl.col("col2").arg_sort()) + .over("group") + .alias("result_b"), ) assert_series_equal(out["result_a"], out["result_b"], check_names=False) diff --git a/py-polars/tests/unit/operations/test_join.py b/py-polars/tests/unit/operations/test_join.py index 8d3a7ccfaa1bd..41dd7eaa0cdde 100644 --- a/py-polars/tests/unit/operations/test_join.py +++ b/py-polars/tests/unit/operations/test_join.py @@ -218,7 +218,7 @@ def test_joins_dispatch() -> None: "datetime": [13241324, 12341256, 12341234, 13241324], } ).with_columns( - [pl.col("date").str.strptime(pl.Date), pl.col("datetime").cast(pl.Datetime)] + pl.col("date").str.strptime(pl.Date), pl.col("datetime").cast(pl.Datetime) ) join_strategies: list[JoinStrategy] = ["left", "inner", "full"] diff --git a/py-polars/tests/unit/operations/test_pivot.py b/py-polars/tests/unit/operations/test_pivot.py index cf71597fe30c7..8fb683fad1346 100644 --- a/py-polars/tests/unit/operations/test_pivot.py +++ b/py-polars/tests/unit/operations/test_pivot.py @@ -397,7 +397,7 @@ def test_pivot_negative_duration() -> None: df2 = pl.DataFrame({"delta": [timedelta(days=i) for i in (-2, -1, 0, 1)]}) df = df1.join(df2, how="cross").with_columns( - [pl.Series(name="value", values=range(len(df1) * len(df2)))] + pl.Series(name="value", values=range(len(df1) * len(df2))) ) assert df.pivot( index="delta", columns="root", values="value", aggregate_function=None diff --git a/py-polars/tests/unit/operations/test_sort.py b/py-polars/tests/unit/operations/test_sort.py index 35650d2228b9d..b115ccb2e6c24 100644 --- a/py-polars/tests/unit/operations/test_sort.py +++ b/py-polars/tests/unit/operations/test_sort.py @@ -175,10 +175,8 @@ def test_expr_arg_sort_nulls_last( def test_arg_sort_window_functions() -> None: df = pl.DataFrame({"Id": [1, 1, 2, 2, 3, 3], "Age": [1, 2, 3, 4, 5, 6]}) out = df.select( - [ - pl.col("Age").arg_sort().over("Id").alias("arg_sort"), - pl.arg_sort_by("Age").over("Id").alias("arg_sort_by"), - ] + pl.col("Age").arg_sort().over("Id").alias("arg_sort"), + pl.arg_sort_by("Age").over("Id").alias("arg_sort_by"), ) assert ( out["arg_sort"].to_list() == out["arg_sort_by"].to_list() == [0, 1, 0, 1, 0, 1] @@ -216,10 +214,8 @@ def test_sort_aggregation_fast_paths() -> None: ) expected = df.select( - [ - pl.all().max().name.suffix("_max"), - pl.all().min().name.suffix("_min"), - ] + pl.all().max().name.suffix("_max"), + pl.all().min().name.suffix("_min"), ) assert expected.to_dict(as_series=False) == { @@ -238,16 +234,14 @@ def test_sort_aggregation_fast_paths() -> None: for descending in [True, False]: for null_last in [True, False]: out = df.select( - [ - pl.all() - .sort(descending=descending, nulls_last=null_last) - .max() - .name.suffix("_max"), - pl.all() - .sort(descending=descending, nulls_last=null_last) - .min() - .name.suffix("_min"), - ] + pl.all() + .sort(descending=descending, nulls_last=null_last) + .max() + .name.suffix("_max"), + pl.all() + .sort(descending=descending, nulls_last=null_last) + .min() + .name.suffix("_min"), ) assert_frame_equal(out, expected) @@ -331,10 +325,8 @@ def test_arg_sort_rank_nans() -> None: } ) .with_columns( - [ - pl.col("val").rank().alias("rank"), - pl.col("val").arg_sort().alias("arg_sort"), - ] + pl.col("val").rank().alias("rank"), + pl.col("val").arg_sort().alias("arg_sort"), ) .select(["rank", "arg_sort"]) ).to_dict(as_series=False) == {"rank": [1.0, 2.0], "arg_sort": [0, 1]} @@ -443,10 +435,8 @@ def test_sort_by_in_over_5499() -> None: } ) assert df.select( - [ - pl.col("idx").sort_by("a").over("group").alias("sorted_1"), - pl.col("idx").shift(1).sort_by("a").over("group").alias("sorted_2"), - ] + pl.col("idx").sort_by("a").over("group").alias("sorted_1"), + pl.col("idx").shift(1).sort_by("a").over("group").alias("sorted_2"), ).to_dict(as_series=False) == { "sorted_1": [0, 2, 1, 4, 5, 3], "sorted_2": [None, 1, 0, 3, 4, None], diff --git a/py-polars/tests/unit/operations/test_window.py b/py-polars/tests/unit/operations/test_window.py index 959def0dc45a6..5bb8eb320b283 100644 --- a/py-polars/tests/unit/operations/test_window.py +++ b/py-polars/tests/unit/operations/test_window.py @@ -72,11 +72,9 @@ def stdize_out(value: str, control_for: str) -> pl.Expr: ) out = df.select( - [ - "*", - stdize_out("val1", "cat").alias("out1"), - stdize_out("val2", "cat").alias("out2"), - ] + "*", + stdize_out("val1", "cat").alias("out1"), + stdize_out("val2", "cat").alias("out2"), ) assert out["out1"].to_list() == out["out2"].to_list() @@ -91,18 +89,16 @@ def test_window_function_cache() -> None: "values": range(5), } ).with_columns( - [ - pl.col("values") - .over("groups", mapping_strategy="join") - .alias("values_list"), # aggregation to list + join - pl.col("values") - .over("groups", mapping_strategy="explode") - .alias("values_flat"), # aggregation to list + explode and concat back - pl.col("values") - .reverse() - .over("groups", mapping_strategy="explode") - .alias("values_rev"), # use flatten to reverse within a group - ] + pl.col("values") + .over("groups", mapping_strategy="join") + .alias("values_list"), # aggregation to list + join + pl.col("values") + .over("groups", mapping_strategy="explode") + .alias("values_flat"), # aggregation to list + explode and concat back + pl.col("values") + .reverse() + .over("groups", mapping_strategy="explode") + .alias("values_rev"), # use flatten to reverse within a group ) assert out["values_list"].to_list() == [ @@ -213,13 +209,8 @@ def test_window_cached_keys_sorted_update_4183() -> None: } ) result = df.sort(by=["customer_ID", "date"]).select( - [ - pl.count("date").over(pl.col("customer_ID")).alias("count"), - pl.col("date") - .rank(method="ordinal") - .over(pl.col("customer_ID")) - .alias("rank"), - ] + pl.count("date").over(pl.col("customer_ID")).alias("count"), + pl.col("date").rank(method="ordinal").over(pl.col("customer_ID")).alias("rank"), ) expected = pl.DataFrame( {"count": [2, 2, 1], "rank": [1, 2, 1]}, @@ -334,14 +325,8 @@ def test_window_function_implode_contention_8536() -> None: ) assert df.select( - [ - (pl.lit("LE").is_in(pl.col("memo").over("policy", mapping_strategy="join"))) - | ( - pl.lit("RM").is_in( - pl.col("memo").over("policy", mapping_strategy="join") - ) - ) - ] + (pl.lit("LE").is_in(pl.col("memo").over("policy", mapping_strategy="join"))) + | (pl.lit("RM").is_in(pl.col("memo").over("policy", mapping_strategy="join"))) ).to_series().to_list() == [ True, True, @@ -439,10 +424,8 @@ def test_window_10417() -> None: df = pl.DataFrame({"a": [1], "b": [1.2], "c": [2.1]}) assert df.lazy().with_columns( - [ - pl.col("b") - pl.col("b").mean().over("a"), - pl.col("c") - pl.col("c").mean().over("a"), - ] + pl.col("b") - pl.col("b").mean().over("a"), + pl.col("c") - pl.col("c").mean().over("a"), ).collect().to_dict(as_series=False) == {"a": [1], "b": [0.0], "c": [0.0]} diff --git a/py-polars/tests/unit/operations/test_with_columns.py b/py-polars/tests/unit/operations/test_with_columns.py index 89c3b4eb700b4..7ab27e5820994 100644 --- a/py-polars/tests/unit/operations/test_with_columns.py +++ b/py-polars/tests/unit/operations/test_with_columns.py @@ -36,17 +36,15 @@ def test_with_columns() -> None: # as exprs list dx = df.with_columns( - [ - (pl.col("a") * pl.col("b")).alias("d"), - ~pl.col("c").alias("e"), - srs_named, - pl.lit(True).alias("g"), - pl.lit(1).alias("h"), - pl.lit(3.2).alias("i"), - pl.col("a").alias("j"), - pl.lit(None).alias("k"), - pl.lit(datetime.datetime(2001, 1, 1, 0, 0)).alias("l"), - ] + (pl.col("a") * pl.col("b")).alias("d"), + ~pl.col("c").alias("e"), + srs_named, + pl.lit(True).alias("g"), + pl.lit(1).alias("h"), + pl.lit(3.2).alias("i"), + pl.col("a").alias("j"), + pl.lit(None).alias("k"), + pl.lit(datetime.datetime(2001, 1, 1, 0, 0)).alias("l"), ) assert_frame_equal(dx, expected) diff --git a/py-polars/tests/unit/operations/unique/test_unique_counts.py b/py-polars/tests/unit/operations/unique/test_unique_counts.py index 143d9548ac74b..0ca4fe82091e7 100644 --- a/py-polars/tests/unit/operations/unique/test_unique_counts.py +++ b/py-polars/tests/unit/operations/unique/test_unique_counts.py @@ -18,11 +18,9 @@ def test_unique_counts_on_dates() -> None: ), } ).with_columns( - [ - pl.col("dt_ns").dt.cast_time_unit("us").alias("dt_us"), - pl.col("dt_ns").dt.cast_time_unit("ms").alias("dt_ms"), - pl.col("dt_ns").cast(pl.Date).alias("date"), - ] + pl.col("dt_ns").dt.cast_time_unit("us").alias("dt_us"), + pl.col("dt_ns").dt.cast_time_unit("ms").alias("dt_ms"), + pl.col("dt_ns").cast(pl.Date).alias("date"), ).select(pl.all().unique_counts().sum()).to_dict(as_series=False) == { "dt_ns": [3], "dt_us": [3], diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index 484100d9d8e74..87409793d7fb8 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -818,12 +818,10 @@ def test_fill_null() -> None: df = pl.DataFrame({"a": [1, None, 2, None]}) out = df.with_columns( - [ - pl.col("a").cast(pl.UInt8).alias("u8"), - pl.col("a").cast(pl.UInt16).alias("u16"), - pl.col("a").cast(pl.UInt32).alias("u32"), - pl.col("a").cast(pl.UInt64).alias("u64"), - ] + pl.col("a").cast(pl.UInt8).alias("u8"), + pl.col("a").cast(pl.UInt16).alias("u16"), + pl.col("a").cast(pl.UInt32).alias("u32"), + pl.col("a").cast(pl.UInt64).alias("u64"), ).fill_null(3) assert out.to_dict(as_series=False) == { @@ -1153,11 +1151,9 @@ def test_bitwise() -> None: df = pl.DataFrame([a, b]) out = df.select( - [ - (pl.col("a") & pl.col("b")).alias("and"), - (pl.col("a") | pl.col("b")).alias("or"), - (pl.col("a") ^ pl.col("b")).alias("xor"), - ] + (pl.col("a") & pl.col("b")).alias("and"), + (pl.col("a") | pl.col("b")).alias("or"), + (pl.col("a") ^ pl.col("b")).alias("xor"), ) assert_series_equal(out["and"], pl.Series("and", [1, 0, 1])) assert_series_equal(out["or"], pl.Series("or", [3, 6, 7])) diff --git a/py-polars/tests/unit/streaming/test_streaming.py b/py-polars/tests/unit/streaming/test_streaming.py index 17303c6bc5e30..46d968902a9c1 100644 --- a/py-polars/tests/unit/streaming/test_streaming.py +++ b/py-polars/tests/unit/streaming/test_streaming.py @@ -245,7 +245,7 @@ def test_streaming_empty_df() -> None: def test_streaming_duplicate_cols_5537() -> None: assert pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}).lazy().with_columns( - [(pl.col("a") * 2).alias("foo"), (pl.col("a") * 3)] + (pl.col("a") * 2).alias("foo"), (pl.col("a") * 3) ).collect(streaming=True).to_dict(as_series=False) == { "a": [3, 6, 9], "b": [1, 2, 3], diff --git a/py-polars/tests/unit/test_api.py b/py-polars/tests/unit/test_api.py index 5b83b53d61fc4..ba391f040e5cb 100644 --- a/py-polars/tests/unit/test_api.py +++ b/py-polars/tests/unit/test_api.py @@ -61,12 +61,10 @@ def nearest(self, p: int) -> pl.Expr: df = pl.DataFrame([1.4, 24.3, 55.0, 64.001], schema=["n"]) assert df.select( - [ - pl.col("n"), - pl.col("n").power.next(p=2).alias("next_pow2"), # type: ignore[attr-defined] - pl.col("n").power.previous(p=2).alias("prev_pow2"), # type: ignore[attr-defined] - pl.col("n").power.nearest(p=2).alias("nearest_pow2"), # type: ignore[attr-defined] - ] + pl.col("n"), + pl.col("n").power.next(p=2).alias("next_pow2"), # type: ignore[attr-defined] + pl.col("n").power.previous(p=2).alias("prev_pow2"), # type: ignore[attr-defined] + pl.col("n").power.nearest(p=2).alias("nearest_pow2"), # type: ignore[attr-defined] ).rows() == [ (1.4, 2, 1, 1), (24.3, 32, 16, 32), diff --git a/py-polars/tests/unit/test_cse.py b/py-polars/tests/unit/test_cse.py index 167c43cfa8dc4..0081bc15ca435 100644 --- a/py-polars/tests/unit/test_cse.py +++ b/py-polars/tests/unit/test_cse.py @@ -764,9 +764,7 @@ def test_cse_series_collision_16138() -> None: ] factor_holdings = holdings.lazy().with_columns( - [ - pl.coalesce(currency_factor_query_dict).alias("currency_factor"), - ] + pl.coalesce(currency_factor_query_dict).alias("currency_factor"), ) assert factor_holdings.collect(comm_subexpr_elim=True).to_dict(as_series=False) == { diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py index fc00fdd574c8d..861caa19e4b5d 100644 --- a/py-polars/tests/unit/test_errors.py +++ b/py-polars/tests/unit/test_errors.py @@ -237,7 +237,7 @@ def test_is_nan_on_non_boolean() -> None: def test_window_expression_different_group_length() -> None: try: pl.DataFrame({"groups": ["a", "a", "b", "a", "b"]}).select( - [pl.col("groups").map_elements(lambda _: pl.Series([1, 2])).over("groups")] + pl.col("groups").map_elements(lambda _: pl.Series([1, 2])).over("groups") ) except pl.ComputeError as exc: msg = str(exc) diff --git a/py-polars/tests/unit/test_lazy.py b/py-polars/tests/unit/test_lazy.py index 6d7380320c710..74cc9987a8499 100644 --- a/py-polars/tests/unit/test_lazy.py +++ b/py-polars/tests/unit/test_lazy.py @@ -308,11 +308,9 @@ def test_window_function() -> None: assert ldf.width == 4 q = ldf.with_columns( - [ - pl.sum("A").over("fruits").alias("fruit_sum_A"), - pl.first("B").over("fruits").alias("fruit_first_B"), - pl.max("B").over("cars").alias("cars_max_B"), - ] + pl.sum("A").over("fruits").alias("fruit_sum_A"), + pl.first("B").over("fruits").alias("fruit_first_B"), + pl.max("B").over("cars").alias("cars_max_B"), ) assert q.width == 7 @@ -674,19 +672,17 @@ def test_backward_fill() -> None: def test_rolling(fruits_cars: pl.DataFrame) -> None: ldf = fruits_cars.lazy() out = ldf.select( - [ - pl.col("A").rolling_min(3, min_periods=1).alias("1"), - pl.col("A").rolling_min(3).alias("1b"), - pl.col("A").rolling_mean(3, min_periods=1).alias("2"), - pl.col("A").rolling_mean(3).alias("2b"), - pl.col("A").rolling_max(3, min_periods=1).alias("3"), - pl.col("A").rolling_max(3).alias("3b"), - pl.col("A").rolling_sum(3, min_periods=1).alias("4"), - pl.col("A").rolling_sum(3).alias("4b"), - # below we use .round purely for the ability to do assert frame equality - pl.col("A").rolling_std(3).round(1).alias("std"), - pl.col("A").rolling_var(3).round(1).alias("var"), - ] + pl.col("A").rolling_min(3, min_periods=1).alias("1"), + pl.col("A").rolling_min(3).alias("1b"), + pl.col("A").rolling_mean(3, min_periods=1).alias("2"), + pl.col("A").rolling_mean(3).alias("2b"), + pl.col("A").rolling_max(3, min_periods=1).alias("3"), + pl.col("A").rolling_max(3).alias("3b"), + pl.col("A").rolling_sum(3, min_periods=1).alias("4"), + pl.col("A").rolling_sum(3).alias("4b"), + # below we use .round purely for the ability to do assert frame equality + pl.col("A").rolling_std(3).round(1).alias("std"), + pl.col("A").rolling_var(3).round(1).alias("var"), ) assert_frame_equal( @@ -708,10 +704,8 @@ def test_rolling(fruits_cars: pl.DataFrame) -> None: ) out_single_val_variance = ldf.select( - [ - pl.col("A").rolling_std(3, min_periods=1).round(decimals=4).alias("std"), - pl.col("A").rolling_var(3, min_periods=1).round(decimals=1).alias("var"), - ] + pl.col("A").rolling_std(3, min_periods=1).round(decimals=4).alias("std"), + pl.col("A").rolling_var(3, min_periods=1).round(decimals=1).alias("var"), ).collect() assert cast(float, out_single_val_variance[0, "std"]) is None @@ -721,41 +715,39 @@ def test_rolling(fruits_cars: pl.DataFrame) -> None: def test_arr_namespace(fruits_cars: pl.DataFrame) -> None: ldf = fruits_cars.lazy() out = ldf.select( - [ - "fruits", - pl.col("B") - .over("fruits", mapping_strategy="join") - .list.min() - .alias("B_by_fruits_min1"), - pl.col("B") - .min() - .over("fruits", mapping_strategy="join") - .alias("B_by_fruits_min2"), - pl.col("B") - .over("fruits", mapping_strategy="join") - .list.max() - .alias("B_by_fruits_max1"), - pl.col("B") - .max() - .over("fruits", mapping_strategy="join") - .alias("B_by_fruits_max2"), - pl.col("B") - .over("fruits", mapping_strategy="join") - .list.sum() - .alias("B_by_fruits_sum1"), - pl.col("B") - .sum() - .over("fruits", mapping_strategy="join") - .alias("B_by_fruits_sum2"), - pl.col("B") - .over("fruits", mapping_strategy="join") - .list.mean() - .alias("B_by_fruits_mean1"), - pl.col("B") - .mean() - .over("fruits", mapping_strategy="join") - .alias("B_by_fruits_mean2"), - ] + "fruits", + pl.col("B") + .over("fruits", mapping_strategy="join") + .list.min() + .alias("B_by_fruits_min1"), + pl.col("B") + .min() + .over("fruits", mapping_strategy="join") + .alias("B_by_fruits_min2"), + pl.col("B") + .over("fruits", mapping_strategy="join") + .list.max() + .alias("B_by_fruits_max1"), + pl.col("B") + .max() + .over("fruits", mapping_strategy="join") + .alias("B_by_fruits_max2"), + pl.col("B") + .over("fruits", mapping_strategy="join") + .list.sum() + .alias("B_by_fruits_sum1"), + pl.col("B") + .sum() + .over("fruits", mapping_strategy="join") + .alias("B_by_fruits_sum2"), + pl.col("B") + .over("fruits", mapping_strategy="join") + .list.mean() + .alias("B_by_fruits_mean1"), + pl.col("B") + .mean() + .over("fruits", mapping_strategy="join") + .alias("B_by_fruits_mean2"), ) expected = pl.DataFrame( { @@ -789,19 +781,17 @@ def test_arithmetic() -> None: ldf = pl.LazyFrame({"a": [1, 2, 3]}) out = ldf.select( - [ - (pl.col("a") % 2).alias("1"), - (2 % pl.col("a")).alias("2"), - (1 // pl.col("a")).alias("3"), - (1 * pl.col("a")).alias("4"), - (1 + pl.col("a")).alias("5"), - (1 - pl.col("a")).alias("6"), - (pl.col("a") // 2).alias("7"), - (pl.col("a") * 2).alias("8"), - (pl.col("a") + 2).alias("9"), - (pl.col("a") - 2).alias("10"), - (-pl.col("a")).alias("11"), - ] + (pl.col("a") % 2).alias("1"), + (2 % pl.col("a")).alias("2"), + (1 // pl.col("a")).alias("3"), + (1 * pl.col("a")).alias("4"), + (1 + pl.col("a")).alias("5"), + (1 - pl.col("a")).alias("6"), + (pl.col("a") // 2).alias("7"), + (pl.col("a") * 2).alias("8"), + (pl.col("a") + 2).alias("9"), + (pl.col("a") - 2).alias("10"), + (-pl.col("a")).alias("11"), ) expected = pl.DataFrame( { @@ -832,10 +822,8 @@ def test_float_floor_divide() -> None: def test_argminmax() -> None: ldf = pl.LazyFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 2, 2]}) out = ldf.select( - [ - pl.col("a").arg_min().alias("min"), - pl.col("a").arg_max().alias("max"), - ] + pl.col("a").arg_min().alias("min"), + pl.col("a").arg_max().alias("max"), ).collect() assert out["max"][0] == 4 assert out["min"][0] == 0 @@ -1009,11 +997,9 @@ def test_self_join() -> None: out = ( ldf.join(other=ldf, left_on="manager_id", right_on="employee_id", how="left") .select( - [ - pl.col("employee_id"), - pl.col("employee_name"), - pl.col("employee_name_right").alias("manager_name"), - ] + pl.col("employee_id"), + pl.col("employee_name"), + pl.col("employee_name_right").alias("manager_name"), ) .collect() ) @@ -1111,7 +1097,7 @@ def test_update_schema_after_projection_pd_t4157() -> None: def test_type_coercion_unknown_4190() -> None: df = ( pl.LazyFrame({"a": [1, 2, 3], "b": [1, 2, 3]}).with_columns( - [pl.col("a") & pl.col("a").fill_null(True)] + pl.col("a") & pl.col("a").fill_null(True) ) ).collect() assert df.shape == (3, 2) @@ -1123,12 +1109,10 @@ def test_lazy_cache_same_key() -> None: # these have the same schema, but should not be used by cache as they are different add_node = ldf.select([(pl.col("a") + pl.col("b")).alias("a"), pl.col("c")]).cache() - mult_node = ldf.select( - [(pl.col("a") * pl.col("b")).alias("a"), pl.col("c")] - ).cache() + mult_node = ldf.select((pl.col("a") * pl.col("b")).alias("a"), pl.col("c")).cache() result = mult_node.join(add_node, on="c", suffix="_mult").select( - [(pl.col("a") - pl.col("a_mult")).alias("a"), pl.col("c")] + (pl.col("a") - pl.col("a_mult")).alias("a"), pl.col("c") ) expected = pl.LazyFrame({"a": [-1, 2, 7], "c": ["x", "y", "z"]}) assert_frame_equal(result, expected) @@ -1141,7 +1125,7 @@ def test_lazy_cache_hit(monkeypatch: Any, capfd: Any) -> None: add_node = ldf.select([(pl.col("a") + pl.col("b")).alias("a"), pl.col("c")]).cache() result = add_node.join(add_node, on="c", suffix="_mult").select( - [(pl.col("a") - pl.col("a_mult")).alias("a"), pl.col("c")] + (pl.col("a") - pl.col("a_mult")).alias("a"), pl.col("c") ) expected = pl.LazyFrame({"a": [0, 0, 0], "c": ["x", "y", "z"]}) assert_frame_equal(result, expected) @@ -1238,13 +1222,11 @@ def test_from_epoch(input_dtype: pl.PolarsDataType) -> None: ) ldf_result = ldf.select( - [ - pl.from_epoch(pl.col("timestamp_d"), time_unit="d"), - pl.from_epoch(pl.col("timestamp_s"), time_unit="s"), - pl.from_epoch(pl.col("timestamp_ms"), time_unit="ms"), - pl.from_epoch(pl.col("timestamp_us"), time_unit="us"), - pl.from_epoch(pl.col("timestamp_ns"), time_unit="ns"), - ] + pl.from_epoch(pl.col("timestamp_d"), time_unit="d"), + pl.from_epoch(pl.col("timestamp_s"), time_unit="s"), + pl.from_epoch(pl.col("timestamp_ms"), time_unit="ms"), + pl.from_epoch(pl.col("timestamp_us"), time_unit="us"), + pl.from_epoch(pl.col("timestamp_ns"), time_unit="ns"), ).collect() assert_frame_equal(ldf_result, expected) @@ -1264,10 +1246,8 @@ def test_from_epoch_str() -> None: with pytest.raises(ComputeError): ldf.select( - [ - pl.from_epoch(pl.col("timestamp_ms"), time_unit="ms"), - pl.from_epoch(pl.col("timestamp_us"), time_unit="us"), - ] + pl.from_epoch(pl.col("timestamp_ms"), time_unit="ms"), + pl.from_epoch(pl.col("timestamp_us"), time_unit="us"), ).collect() diff --git a/py-polars/tests/unit/test_projections.py b/py-polars/tests/unit/test_projections.py index 638d7e6f08ba6..4ee7e1ad99a61 100644 --- a/py-polars/tests/unit/test_projections.py +++ b/py-polars/tests/unit/test_projections.py @@ -74,11 +74,9 @@ def test_unnest_projection_pushdown() -> None: .unnest("variable") ) mlf = mlf.select( - [ - pl.col("field_1").cast(pl.Categorical).alias("row"), - pl.col("field_2").cast(pl.Categorical).alias("col"), - pl.col("value"), - ] + pl.col("field_1").cast(pl.Categorical).alias("row"), + pl.col("field_2").cast(pl.Categorical).alias("col"), + pl.col("value"), ) out = mlf.collect().to_dict(as_series=False) assert out == { diff --git a/py-polars/tests/unit/test_queries.py b/py-polars/tests/unit/test_queries.py index 7b9531accd252..5e8c935f865fa 100644 --- a/py-polars/tests/unit/test_queries.py +++ b/py-polars/tests/unit/test_queries.py @@ -65,11 +65,7 @@ def test_overflow_uint16_agg_mean() -> None: "col3": [64 for _ in range(1025)], } ) - .with_columns( - [ - pl.col("col3").cast(pl.UInt16), - ] - ) + .with_columns(pl.col("col3").cast(pl.UInt16)) .group_by(["col1"]) .agg(pl.col("col3").mean()) .to_dict(as_series=False) diff --git a/py-polars/tests/unit/test_schema.py b/py-polars/tests/unit/test_schema.py index 409f44833e318..d777af51a5249 100644 --- a/py-polars/tests/unit/test_schema.py +++ b/py-polars/tests/unit/test_schema.py @@ -108,18 +108,16 @@ def test_pow_dtype() -> None: df = ( df.with_columns([pl.col("foo").cast(pl.UInt32)]) .with_columns( - [ - (pl.col("foo") * 2**2).alias("scaled_foo"), - (pl.col("foo") * 2**2.1).alias("scaled_foo2"), - (pl.col("a") ** pl.col("h")).alias("a_pow_h"), - (pl.col("b") ** pl.col("h")).alias("b_pow_h"), - (pl.col("c") ** pl.col("h")).alias("c_pow_h"), - (pl.col("d") ** pl.col("h")).alias("d_pow_h"), - (pl.col("e") ** pl.col("h")).alias("e_pow_h"), - (pl.col("f") ** pl.col("h")).alias("f_pow_h"), - (pl.col("g") ** pl.col("h")).alias("g_pow_h"), - (pl.col("h") ** pl.col("h")).alias("h_pow_h"), - ] + (pl.col("foo") * 2**2).alias("scaled_foo"), + (pl.col("foo") * 2**2.1).alias("scaled_foo2"), + (pl.col("a") ** pl.col("h")).alias("a_pow_h"), + (pl.col("b") ** pl.col("h")).alias("b_pow_h"), + (pl.col("c") ** pl.col("h")).alias("c_pow_h"), + (pl.col("d") ** pl.col("h")).alias("d_pow_h"), + (pl.col("e") ** pl.col("h")).alias("e_pow_h"), + (pl.col("f") ** pl.col("h")).alias("f_pow_h"), + (pl.col("g") ** pl.col("h")).alias("g_pow_h"), + (pl.col("h") ** pl.col("h")).alias("h_pow_h"), ) .drop(["a", "b", "c", "d", "e", "f", "g", "h"]) ) @@ -238,14 +236,15 @@ def test_join_as_of_by_schema() -> None: def test_unknown_map_elements() -> None: df = pl.DataFrame( - {"Amount": [10, 1, 1, 5], "Flour": ["1000g", "100g", "50g", "75g"]} + { + "Amount": [10, 1, 1, 5], + "Flour": ["1000g", "100g", "50g", "75g"], + } ) q = df.lazy().select( - [ - pl.col("Amount"), - pl.col("Flour").map_elements(lambda x: 100.0) / pl.col("Amount"), - ] + pl.col("Amount"), + pl.col("Flour").map_elements(lambda x: 100.0) / pl.col("Amount"), ) assert q.collect().to_dict(as_series=False) == {