Skip to content

Commit

Permalink
chore(python): Update docstring/test/etc usage of select and `with_…
Browse files Browse the repository at this point in the history
…columns` to idiomatic form (pola-rs#16801)
  • Loading branch information
alexander-beedie authored and Wouittone committed Jun 22, 2024
1 parent da9256a commit f5d0a64
Show file tree
Hide file tree
Showing 49 changed files with 478 additions and 699 deletions.
26 changes: 12 additions & 14 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7808,10 +7808,8 @@ def unstack(
fill_values = [fill_values for _ in range(df.width)]

df = df.select(
[
s.extend_constant(next_fill, n_fill)
for s, next_fill in zip(df, fill_values)
]
s.extend_constant(next_fill, n_fill)
for s, next_fill in zip(df, fill_values)
)

if how == "horizontal":
Expand Down Expand Up @@ -8403,14 +8401,12 @@ def with_columns(
│ 4.0 ┆ 13.0 ┆ true │
└─────┴──────┴───────┘
Multiple columns can be added by passing a list of expressions.
Multiple columns can be added using positional arguments.
>>> df.with_columns(
... [
... (pl.col("a") ** 2).alias("a^2"),
... (pl.col("b") / 2).alias("b/2"),
... (pl.col("c").not_()).alias("not c"),
... ]
... (pl.col("a") ** 2).alias("a^2"),
... (pl.col("b") / 2).alias("b/2"),
... (pl.col("c").not_()).alias("not c"),
... )
shape: (4, 6)
┌─────┬──────┬───────┬─────┬──────┬───────┐
Expand All @@ -8424,12 +8420,14 @@ def with_columns(
│ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
└─────┴──────┴───────┴─────┴──────┴───────┘
Multiple columns also can be added using positional arguments instead of a list.
Multiple columns can also be added by passing a list of expressions.
>>> df.with_columns(
... (pl.col("a") ** 2).alias("a^2"),
... (pl.col("b") / 2).alias("b/2"),
... (pl.col("c").not_()).alias("not c"),
... [
... (pl.col("a") ** 2).alias("a^2"),
... (pl.col("b") / 2).alias("b/2"),
... (pl.col("c").not_()).alias("not c"),
... ]
... )
shape: (4, 6)
┌─────┬──────┬───────┬─────┬──────┬───────┐
Expand Down
18 changes: 7 additions & 11 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,13 +389,11 @@ def to_physical(self) -> Self:
function.
>>> pl.DataFrame({"vals": ["a", "x", None, "a"]}).with_columns(
... [
... pl.col("vals").cast(pl.Categorical),
... pl.col("vals")
... .cast(pl.Categorical)
... .to_physical()
... .alias("vals_physical"),
... ]
... pl.col("vals").cast(pl.Categorical),
... pl.col("vals")
... .cast(pl.Categorical)
... .to_physical()
... .alias("vals_physical"),
... )
shape: (4, 2)
┌──────┬───────────────┐
Expand Down Expand Up @@ -1726,10 +1724,8 @@ def cast(self, dtype: PolarsDataType | type[Any], *, strict: bool = True) -> Sel
... }
... )
>>> df.with_columns(
... [
... pl.col("a").cast(pl.Float64),
... pl.col("b").cast(pl.Int32),
... ]
... pl.col("a").cast(pl.Float64),
... pl.col("b").cast(pl.Int32),
... )
shape: (3, 2)
┌─────┬─────┐
Expand Down
20 changes: 8 additions & 12 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1767,12 +1767,10 @@ def split_exact(self, by: IntoExpr, n: int, *, inclusive: bool = False) -> Expr:
each part to a new column.
>>> df.with_columns(
... [
... pl.col("x")
... .str.split_exact("_", 1)
... .struct.rename_fields(["first_part", "second_part"])
... .alias("fields"),
... ]
... pl.col("x")
... .str.split_exact("_", 1)
... .struct.rename_fields(["first_part", "second_part"])
... .alias("fields")
... ).unnest("fields")
shape: (4, 3)
┌──────┬────────────┬─────────────┐
Expand Down Expand Up @@ -1832,12 +1830,10 @@ def splitn(self, by: IntoExpr, n: int) -> Expr:
each part to a new column.
>>> df.with_columns(
... [
... pl.col("s")
... .str.splitn(" ", 2)
... .struct.rename_fields(["first_part", "second_part"])
... .alias("fields"),
... ]
... pl.col("s")
... .str.splitn(" ", 2)
... .struct.rename_fields(["first_part", "second_part"])
... .alias("fields")
... ).unnest("fields")
shape: (4, 3)
┌─────────────┬────────────┬─────────────┐
Expand Down
6 changes: 2 additions & 4 deletions py-polars/polars/functions/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,8 @@ def concat(
x.join(y, how="full", on=common_cols, suffix="_PL_CONCAT_RIGHT")
# Coalesce full outer join columns
.with_columns(
[
F.coalesce([name, f"{name}_PL_CONCAT_RIGHT"])
for name in common_cols
]
F.coalesce([name, f"{name}_PL_CONCAT_RIGHT"])
for name in common_cols
)
.drop([f"{name}_PL_CONCAT_RIGHT" for name in common_cols])
),
Expand Down
26 changes: 12 additions & 14 deletions py-polars/polars/io/spreadsheet/_write_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,20 +250,18 @@ def _xl_inject_dummy_table_columns(
df_select_cols.insert(insert_idx, col)

df = df.select(
[
(
col
if col in df_original_columns
else (
F.lit(None).cast(
cast_lookup.get(col, dtype) # type:ignore[arg-type]
)
if dtype or (col in cast_lookup and cast_lookup[col] is not None)
else F.lit(None)
).alias(col)
)
for col in df_select_cols
]
(
col
if col in df_original_columns
else (
F.lit(None).cast(
cast_lookup.get(col, dtype) # type:ignore[arg-type]
)
if dtype or (col in cast_lookup and cast_lookup[col] is not None)
else F.lit(None)
).alias(col)
)
for col in df_select_cols
)
return df

Expand Down
4 changes: 1 addition & 3 deletions py-polars/polars/io/spreadsheet/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,9 +853,7 @@ def _read_spreadsheet_calamine(
type_checks.append(check_cast)

if type_checks:
apply_cast = df.select(
[d[0].all(ignore_nulls=True) for d in type_checks],
).row(0)
apply_cast = df.select(d[0].all(ignore_nulls=True) for d in type_checks).row(0)
if downcast := [
cast for apply, (_, cast) in zip(apply_cast, type_checks) if apply
]:
Expand Down
26 changes: 13 additions & 13 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4084,14 +4084,12 @@ def with_columns(
│ 4.0 ┆ 13.0 ┆ true │
└─────┴──────┴───────┘
Multiple columns can be added by passing a list of expressions.
Multiple columns can be added using positional arguments.
>>> lf.with_columns(
... [
... (pl.col("a") ** 2).alias("a^2"),
... (pl.col("b") / 2).alias("b/2"),
... (pl.col("c").not_()).alias("not c"),
... ]
... (pl.col("a") ** 2).alias("a^2"),
... (pl.col("b") / 2).alias("b/2"),
... (pl.col("c").not_()).alias("not c"),
... ).collect()
shape: (4, 6)
┌─────┬──────┬───────┬─────┬──────┬───────┐
Expand All @@ -4105,12 +4103,14 @@ def with_columns(
│ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
└─────┴──────┴───────┴─────┴──────┴───────┘
Multiple columns also can be added using positional arguments instead of a list.
Multiple columns can also be added by passing a list of expressions.
>>> lf.with_columns(
... (pl.col("a") ** 2).alias("a^2"),
... (pl.col("b") / 2).alias("b/2"),
... (pl.col("c").not_()).alias("not c"),
... [
... (pl.col("a") ** 2).alias("a^2"),
... (pl.col("b") / 2).alias("b/2"),
... (pl.col("c").not_()).alias("not c"),
... ]
... ).collect()
shape: (4, 6)
┌─────┬──────┬───────┬─────┬──────┬───────┐
Expand Down Expand Up @@ -4142,8 +4142,8 @@ def with_columns(
│ 4 ┆ 13.0 ┆ true ┆ 52.0 ┆ false │
└─────┴──────┴───────┴──────┴───────┘
Expressions with multiple outputs can be automatically instantiated as Structs
by enabling the setting `Config.set_auto_structify(True)`:
Expressions with multiple outputs can automatically be instantiated as Structs
by enabling the experimental setting `Config.set_auto_structify(True)`:
>>> with pl.Config(auto_structify=True):
... lf.drop("c").with_columns(
Expand Down Expand Up @@ -5914,7 +5914,7 @@ def set_sorted(
columns = parse_as_list_of_expressions(column, *more_columns)

return self.with_columns(
[wrap_expr(e).set_sorted(descending=descending) for e in columns]
wrap_expr(e).set_sorted(descending=descending) for e in columns
)

@unstable()
Expand Down
12 changes: 5 additions & 7 deletions py-polars/tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,11 @@ def df() -> pl.DataFrame:
}
)
return df.with_columns(
[
pl.col("date").cast(pl.Date),
pl.col("datetime").cast(pl.Datetime),
pl.col("strings").cast(pl.Categorical).alias("cat"),
pl.col("strings").cast(pl.Enum(["foo", "ham", "bar"])).alias("enum"),
pl.col("time").cast(pl.Time),
]
pl.col("date").cast(pl.Date),
pl.col("datetime").cast(pl.Datetime),
pl.col("strings").cast(pl.Categorical).alias("cat"),
pl.col("strings").cast(pl.Enum(["foo", "ham", "bar"])).alias("enum"),
pl.col("time").cast(pl.Time),
)


Expand Down
8 changes: 3 additions & 5 deletions py-polars/tests/unit/constructors/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1063,11 +1063,9 @@ def test_init_only_columns() -> None:
],
)
expected = pl.DataFrame({"a": [], "b": [], "c": []}).with_columns(
[
pl.col("a").cast(pl.Date),
pl.col("b").cast(pl.UInt64),
pl.col("c").cast(pl.Int8),
]
pl.col("a").cast(pl.Date),
pl.col("b").cast(pl.UInt64),
pl.col("c").cast(pl.Int8),
)
expected.insert_column(3, pl.Series("d", [], pl.List(pl.UInt8)))

Expand Down
51 changes: 20 additions & 31 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,20 +401,19 @@ def test_take_misc(fruits_cars: pl.DataFrame) -> None:

for index in [[0, 1], pl.Series([0, 1]), np.array([0, 1])]:
out = df.sort("fruits").select(
[
pl.col("B")
.reverse()
.gather(index) # type: ignore[arg-type]
.over("fruits", mapping_strategy="join"),
"fruits",
]
pl.col("B")
.reverse()
.gather(index) # type: ignore[arg-type]
.over("fruits", mapping_strategy="join"),
"fruits",
)

assert out[0, "B"].to_list() == [2, 3]
assert out[4, "B"].to_list() == [1, 4]

out = df.sort("fruits").select(
[pl.col("B").reverse().get(pl.lit(1)).over("fruits"), "fruits"]
pl.col("B").reverse().get(pl.lit(1)).over("fruits"),
"fruits",
)
assert out[0, "B"] == 3
assert out[4, "B"] == 4
Expand Down Expand Up @@ -1755,10 +1754,8 @@ def test_fill_null() -> None:
)

assert df.select(
[
pl.all().forward_fill().name.suffix("_forward"),
pl.all().backward_fill().name.suffix("_backward"),
]
pl.all().forward_fill().name.suffix("_forward"),
pl.all().backward_fill().name.suffix("_backward"),
).to_dict(as_series=False) == {
"c_forward": [
["Apple", "Orange"],
Expand Down Expand Up @@ -2082,10 +2079,8 @@ def test_fill_null_limits() -> None:
"c": [True, None, None, None, False, True, None, None, None, False],
}
).select(
[
pl.all().fill_null(strategy="forward", limit=2),
pl.all().fill_null(strategy="backward", limit=2).name.suffix("_backward"),
]
pl.all().fill_null(strategy="forward", limit=2),
pl.all().fill_null(strategy="backward", limit=2).name.suffix("_backward"),
).to_dict(as_series=False) == {
"a": [1, 1, 1, None, 5, 6, 6, 6, None, 10],
"b": ["a", "a", "a", None, "b", "c", "c", "c", None, "d"],
Expand Down Expand Up @@ -2147,11 +2142,9 @@ def test_selection_regex_and_multicol() -> None:

# Selection only
test_df.select(
[
pl.col(["a", "b", "c"]).name.suffix("_list"),
pl.all().exclude("foo").name.suffix("_wild"),
pl.col("^\\w$").name.suffix("_regex"),
]
pl.col(["a", "b", "c"]).name.suffix("_list"),
pl.all().exclude("foo").name.suffix("_wild"),
pl.col("^\\w$").name.suffix("_regex"),
)

# Multi * Single
Expand Down Expand Up @@ -2615,9 +2608,7 @@ def test_format_empty_df() -> None:
pl.Series("val2", [], dtype=pl.Categorical),
]
).select(
[
pl.format("{}:{}", pl.col("val1"), pl.col("val2")).alias("cat"),
]
pl.format("{}:{}", pl.col("val1"), pl.col("val2")).alias("cat"),
)
assert df.shape == (0, 1)
assert df.dtypes == [pl.String]
Expand All @@ -2637,7 +2628,7 @@ def test_deadlocks_3409() -> None:
assert (
pl.DataFrame({"col1": [1, 2, 3]})
.with_columns(
[pl.col("col1").cumulative_eval(pl.element().map_batches(lambda x: 0))]
pl.col("col1").cumulative_eval(pl.element().map_batches(lambda x: 0))
)
.to_dict(as_series=False)
) == {"col1": [0, 0, 0]}
Expand Down Expand Up @@ -2728,12 +2719,10 @@ def test_window_deadlock() -> None:
}
)

df = df.select(
[
pl.col("*"), # select all
pl.col("random").sum().over("groups").alias("sum[random]/groups"),
pl.col("random").implode().over("names").alias("random/name"),
]
_df = df.select(
pl.col("*"), # select all
pl.col("random").sum().over("groups").alias("sum[random]/groups"),
pl.col("random").implode().over("names").alias("random/name"),
)


Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/unit/datatypes/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def test_categorical_error_on_local_cmp() -> None:

def test_cast_null_to_categorical() -> None:
assert pl.DataFrame().with_columns(
[pl.lit(None).cast(pl.Categorical).alias("nullable_enum")]
pl.lit(None).cast(pl.Categorical).alias("nullable_enum")
).dtypes == [pl.Categorical]


Expand Down Expand Up @@ -485,7 +485,7 @@ def test_stringcache() -> None:
with pl.StringCache():
# create a large enough column that the categorical map is reallocated
df = pl.DataFrame({"cats": pl.arange(0, N, eager=True)}).select(
[pl.col("cats").cast(pl.String).cast(pl.Categorical)]
pl.col("cats").cast(pl.String).cast(pl.Categorical)
)
assert df.filter(pl.col("cats").is_in(["1", "2"])).to_dict(as_series=False) == {
"cats": ["1", "2"]
Expand Down
Loading

0 comments on commit f5d0a64

Please sign in to comment.