From b17f51f2ec2d22bc127d63d94a90bbb10e55c713 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Sat, 8 Jun 2024 01:20:06 +0200 Subject: [PATCH] depr(python): Rename parameter `descending` to `reverse` in `top_k` methods --- py-polars/polars/dataframe/frame.py | 18 ++++--- py-polars/polars/expr/expr.py | 30 ++++++----- py-polars/polars/lazyframe/frame.py | 22 ++++---- py-polars/src/expr/general.rs | 11 ++-- py-polars/src/lazyframe/mod.rs | 8 +-- py-polars/tests/unit/operations/test_top_k.py | 51 ++++++++++--------- 6 files changed, 75 insertions(+), 65 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index c5fa988c5a14b..20bbef0813491 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -4720,18 +4720,19 @@ def sql(self, query: str, *, table_name: str = "self") -> Self: ctx.register(name=name, frame=self) return ctx.execute(query) # type: ignore[return-value] + @deprecate_renamed_parameter("descending", "reverse", version="1.0.0") def top_k( self, k: int, *, by: IntoExpr | Iterable[IntoExpr], - descending: bool | Sequence[bool] = False, + reverse: bool | Sequence[bool] = False, ) -> DataFrame: """ Return the `k` largest rows. Non-null elements are always preferred over null elements, regardless of - the value of `descending`. The output is not guaranteed to be in any + the value of `reverse`. The output is not guaranteed to be in any particular order, call :func:`sort` after this function if you wish the output to be sorted. @@ -4742,7 +4743,7 @@ def top_k( by Column(s) used to determine the top rows. Accepts expression input. Strings are parsed as column names. - descending + reverse Consider the `k` smallest elements of the `by` column(s) (instead of the `k` largest). This can be specified per column by passing a sequence of booleans. @@ -4792,7 +4793,7 @@ def top_k( """ return ( self.lazy() - .top_k(k, by=by, descending=descending) + .top_k(k, by=by, reverse=reverse) .collect( projection_pushdown=False, predicate_pushdown=False, @@ -4801,18 +4802,19 @@ def top_k( ) ) + @deprecate_renamed_parameter("descending", "reverse", version="1.0.0") def bottom_k( self, k: int, *, by: IntoExpr | Iterable[IntoExpr], - descending: bool | Sequence[bool] = False, + reverse: bool | Sequence[bool] = False, ) -> DataFrame: """ Return the `k` smallest rows. Non-null elements are always preferred over null elements, regardless of - the value of `descending`. The output is not guaranteed to be in any + the value of `reverse`. The output is not guaranteed to be in any particular order, call :func:`sort` after this function if you wish the output to be sorted. @@ -4823,7 +4825,7 @@ def bottom_k( by Column(s) used to determine the bottom rows. Accepts expression input. Strings are parsed as column names. - descending + reverse Consider the `k` largest elements of the `by` column(s) (instead of the `k` smallest). This can be specified per column by passing a sequence of booleans. @@ -4873,7 +4875,7 @@ def bottom_k( """ return ( self.lazy() - .bottom_k(k, by=by, descending=descending) + .bottom_k(k, by=by, reverse=reverse) .collect( projection_pushdown=False, predicate_pushdown=False, diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index d306736c205ca..657654ce6a8db 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -26,7 +26,11 @@ import polars._reexport as pl from polars import functions as F from polars._utils.convert import negate_duration_string, parse_as_duration_string -from polars._utils.deprecation import deprecate_function, issue_deprecation_warning +from polars._utils.deprecation import ( + deprecate_function, + deprecate_renamed_parameter, + issue_deprecation_warning, +) from polars._utils.parse_expr_input import ( parse_as_expression, parse_as_list_of_expressions, @@ -1876,18 +1880,19 @@ def top_k(self, k: int | IntoExprColumn = 5) -> Self: k = parse_as_expression(k) return self._from_pyexpr(self._pyexpr.top_k(k)) + @deprecate_renamed_parameter("descending", "reverse", version="1.0.0") def top_k_by( self, by: IntoExpr | Iterable[IntoExpr], k: int | IntoExprColumn = 5, *, - descending: bool | Sequence[bool] = False, + reverse: bool | Sequence[bool] = False, ) -> Self: r""" Return the elements corresponding to the `k` largest elements of the `by` column(s). Non-null elements are always preferred over null elements, regardless of - the value of `descending`. The output is not guaranteed to be in any + the value of `reverse`. The output is not guaranteed to be in any particular order, call :func:`sort` after this function if you wish the output to be sorted. @@ -1902,7 +1907,7 @@ def top_k_by( Accepts expression input. Strings are parsed as column names. k Number of elements to return. - descending + reverse Consider the `k` smallest elements of the `by` column(s) (instead of the `k` largest). This can be specified per column by passing a sequence of booleans. @@ -1995,8 +2000,8 @@ def top_k_by( """ # noqa: W505 k = parse_as_expression(k) by = parse_as_list_of_expressions(by) - descending = extend_bool(descending, len(by), "descending", "by") - return self._from_pyexpr(self._pyexpr.top_k_by(by, k=k, descending=descending)) + reverse = extend_bool(reverse, len(by), "reverse", "by") + return self._from_pyexpr(self._pyexpr.top_k_by(by, k=k, reverse=reverse)) def bottom_k(self, k: int | IntoExprColumn = 5) -> Self: r""" @@ -2048,18 +2053,19 @@ def bottom_k(self, k: int | IntoExprColumn = 5) -> Self: k = parse_as_expression(k) return self._from_pyexpr(self._pyexpr.bottom_k(k)) + @deprecate_renamed_parameter("descending", "reverse", version="1.0.0") def bottom_k_by( self, by: IntoExpr | Iterable[IntoExpr], k: int | IntoExprColumn = 5, *, - descending: bool | Sequence[bool] = False, + reverse: bool | Sequence[bool] = False, ) -> Self: r""" Return the elements corresponding to the `k` smallest elements of the `by` column(s). Non-null elements are always preferred over null elements, regardless of - the value of `descending`. The output is not guaranteed to be in any + the value of `reverse`. The output is not guaranteed to be in any particular order, call :func:`sort` after this function if you wish the output to be sorted. @@ -2074,7 +2080,7 @@ def bottom_k_by( Accepts expression input. Strings are parsed as column names. k Number of elements to return. - descending + reverse Consider the `k` largest elements of the `by` column(s) (instead of the `k` smallest). This can be specified per column by passing a sequence of booleans. @@ -2167,10 +2173,8 @@ def bottom_k_by( """ # noqa: W505 k = parse_as_expression(k) by = parse_as_list_of_expressions(by) - descending = extend_bool(descending, len(by), "descending", "by") - return self._from_pyexpr( - self._pyexpr.bottom_k_by(by, k=k, descending=descending) - ) + reverse = extend_bool(reverse, len(by), "reverse", "by") + return self._from_pyexpr(self._pyexpr.bottom_k_by(by, k=k, reverse=reverse)) def arg_sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: """ diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 93b4b78533f69..49be56ea44234 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -1371,18 +1371,19 @@ def sql(self, query: str, *, table_name: str = "self") -> Self: ctx.register(name=name, frame=self) return ctx.execute(query) # type: ignore[return-value] + @deprecate_renamed_parameter("descending", "reverse", version="1.0.0") def top_k( self, k: int, *, by: IntoExpr | Iterable[IntoExpr], - descending: bool | Sequence[bool] = False, + reverse: bool | Sequence[bool] = False, ) -> Self: """ Return the `k` largest rows. Non-null elements are always preferred over null elements, regardless of - the value of `descending`. The output is not guaranteed to be in any + the value of `reverse`. The output is not guaranteed to be in any particular order, call :func:`sort` after this function if you wish the output to be sorted. @@ -1393,7 +1394,7 @@ def top_k( by Column(s) used to determine the top rows. Accepts expression input. Strings are parsed as column names. - descending + reverse Consider the `k` smallest elements of the `by` column(s) (instead of the `k` largest). This can be specified per column by passing a sequence of booleans. @@ -1442,21 +1443,22 @@ def top_k( └─────┴─────┘ """ by = parse_as_list_of_expressions(by) - descending = extend_bool(descending, len(by), "descending", "by") - return self._from_pyldf(self._ldf.top_k(k, by=by, descending=descending)) + reverse = extend_bool(reverse, len(by), "reverse", "by") + return self._from_pyldf(self._ldf.top_k(k, by=by, reverse=reverse)) + @deprecate_renamed_parameter("descending", "reverse", version="1.0.0") def bottom_k( self, k: int, *, by: IntoExpr | Iterable[IntoExpr], - descending: bool | Sequence[bool] = False, + reverse: bool | Sequence[bool] = False, ) -> Self: """ Return the `k` smallest rows. Non-null elements are always preferred over null elements, regardless of - the value of `descending`. The output is not guaranteed to be in any + the value of `reverse`. The output is not guaranteed to be in any particular order, call :func:`sort` after this function if you wish the output to be sorted. @@ -1467,7 +1469,7 @@ def bottom_k( by Column(s) used to determine the bottom rows. Accepts expression input. Strings are parsed as column names. - descending + reverse Consider the `k` largest elements of the `by` column(s) (instead of the `k` smallest). This can be specified per column by passing a sequence of booleans. @@ -1516,8 +1518,8 @@ def bottom_k( └─────┴─────┘ """ by = parse_as_list_of_expressions(by) - descending = extend_bool(descending, len(by), "descending", "by") - return self._from_pyldf(self._ldf.bottom_k(k, by=by, descending=descending)) + reverse = extend_bool(reverse, len(by), "reverse", "by") + return self._from_pyldf(self._ldf.bottom_k(k, by=by, reverse=reverse)) def profile( self, diff --git a/py-polars/src/expr/general.rs b/py-polars/src/expr/general.rs index 059df272829b9..2d22577d7e403 100644 --- a/py-polars/src/expr/general.rs +++ b/py-polars/src/expr/general.rs @@ -304,9 +304,9 @@ impl PyExpr { } #[cfg(feature = "top_k")] - fn top_k_by(&self, by: Vec, k: Self, descending: Vec) -> Self { + fn top_k_by(&self, by: Vec, k: Self, reverse: Vec) -> Self { let by = by.into_iter().map(|e| e.inner).collect::>(); - self.inner.clone().top_k_by(k.inner, by, descending).into() + self.inner.clone().top_k_by(k.inner, by, reverse).into() } #[cfg(feature = "top_k")] @@ -315,12 +315,9 @@ impl PyExpr { } #[cfg(feature = "top_k")] - fn bottom_k_by(&self, by: Vec, k: Self, descending: Vec) -> Self { + fn bottom_k_by(&self, by: Vec, k: Self, reverse: Vec) -> Self { let by = by.into_iter().map(|e| e.inner).collect::>(); - self.inner - .clone() - .bottom_k_by(k.inner, by, descending) - .into() + self.inner.clone().bottom_k_by(k.inner, by, reverse).into() } #[cfg(feature = "peaks")] diff --git a/py-polars/src/lazyframe/mod.rs b/py-polars/src/lazyframe/mod.rs index 73dbc4ccdb464..575ed868985d0 100644 --- a/py-polars/src/lazyframe/mod.rs +++ b/py-polars/src/lazyframe/mod.rs @@ -555,24 +555,24 @@ impl PyLazyFrame { .into() } - fn top_k(&self, k: IdxSize, by: Vec, descending: Vec) -> Self { + fn top_k(&self, k: IdxSize, by: Vec, reverse: Vec) -> Self { let ldf = self.ldf.clone(); let exprs = by.to_exprs(); ldf.top_k( k, exprs, - SortMultipleOptions::new().with_order_descending_multi(descending), + SortMultipleOptions::new().with_order_descending_multi(reverse), ) .into() } - fn bottom_k(&self, k: IdxSize, by: Vec, descending: Vec) -> Self { + fn bottom_k(&self, k: IdxSize, by: Vec, reverse: Vec) -> Self { let ldf = self.ldf.clone(); let exprs = by.to_exprs(); ldf.bottom_k( k, exprs, - SortMultipleOptions::new().with_order_descending_multi(descending), + SortMultipleOptions::new().with_order_descending_multi(reverse), ) .into() } diff --git a/py-polars/tests/unit/operations/test_top_k.py b/py-polars/tests/unit/operations/test_top_k.py index 4c3578ed252a7..7ba182457136a 100644 --- a/py-polars/tests/unit/operations/test_top_k.py +++ b/py-polars/tests/unit/operations/test_top_k.py @@ -81,12 +81,12 @@ def test_top_k() -> None: ) assert_frame_equal( - df.top_k(3, by=["a", "b"], descending=True), + df.top_k(3, by=["a", "b"], reverse=True), pl.DataFrame({"a": [1, 2, 2], "b": [3, 2, 2]}), check_row_order=False, ) assert_frame_equal( - df.bottom_k(4, by=["a", "b"], descending=True), + df.bottom_k(4, by=["a", "b"], reverse=True), pl.DataFrame({"a": [4, 3, 2, 2], "b": [4, 1, 3, 2]}), check_row_order=False, ) @@ -117,8 +117,8 @@ def test_top_k() -> None: assert_frame_equal( df2.select( - pl.col("a", "b").top_k_by("a", 2, descending=True).name.suffix("_top_by_a"), - pl.col("a", "b").top_k_by("b", 2, descending=True).name.suffix("_top_by_b"), + pl.col("a", "b").top_k_by("a", 2, reverse=True).name.suffix("_top_by_a"), + pl.col("a", "b").top_k_by("b", 2, reverse=True).name.suffix("_top_by_b"), ), pl.DataFrame( { @@ -150,10 +150,10 @@ def test_top_k() -> None: assert_frame_equal( df2.select( pl.col("a", "b") - .bottom_k_by("a", 2, descending=True) + .bottom_k_by("a", 2, reverse=True) .name.suffix("_bottom_by_a"), pl.col("a", "b") - .bottom_k_by("b", 2, descending=True) + .bottom_k_by("b", 2, reverse=True) .name.suffix("_bottom_by_b"), ), pl.DataFrame( @@ -238,10 +238,10 @@ def test_top_k() -> None: assert_frame_equal( df2.select( pl.col("a", "b", "c") - .top_k_by(["c", "a"], 2, descending=[True, False]) + .top_k_by(["c", "a"], 2, reverse=[True, False]) .name.suffix("_top_by_ca"), pl.col("a", "b", "c") - .top_k_by(["c", "b"], 2, descending=[True, False]) + .top_k_by(["c", "b"], 2, reverse=[True, False]) .name.suffix("_top_by_cb"), ), pl.DataFrame( @@ -260,10 +260,10 @@ def test_top_k() -> None: assert_frame_equal( df2.select( pl.col("a", "b", "c") - .bottom_k_by(["c", "a"], 2, descending=[True, False]) + .bottom_k_by(["c", "a"], 2, reverse=[True, False]) .name.suffix("_bottom_by_ca"), pl.col("a", "b", "c") - .bottom_k_by(["c", "b"], 2, descending=[True, False]) + .bottom_k_by(["c", "b"], 2, reverse=[True, False]) .name.suffix("_bottom_by_cb"), ), pl.DataFrame( @@ -282,10 +282,10 @@ def test_top_k() -> None: assert_frame_equal( df2.select( pl.col("a", "b", "c") - .top_k_by(["c", "a"], 2, descending=[False, True]) + .top_k_by(["c", "a"], 2, reverse=[False, True]) .name.suffix("_top_by_ca"), pl.col("a", "b", "c") - .top_k_by(["c", "b"], 2, descending=[False, True]) + .top_k_by(["c", "b"], 2, reverse=[False, True]) .name.suffix("_top_by_cb"), ), pl.DataFrame( @@ -304,10 +304,10 @@ def test_top_k() -> None: assert_frame_equal( df2.select( pl.col("a", "b", "c") - .top_k_by(["c", "a"], 2, descending=[False, True]) + .top_k_by(["c", "a"], 2, reverse=[False, True]) .name.suffix("_bottom_by_ca"), pl.col("a", "b", "c") - .top_k_by(["c", "b"], 2, descending=[False, True]) + .top_k_by(["c", "b"], 2, reverse=[False, True]) .name.suffix("_bottom_by_cb"), ), pl.DataFrame( @@ -325,29 +325,29 @@ def test_top_k() -> None: with pytest.raises( ValueError, - match=r"the length of `descending` \(2\) does not match the length of `by` \(1\)", + match=r"the length of `reverse` \(2\) does not match the length of `by` \(1\)", ): - df2.select(pl.all().top_k_by("a", 2, descending=[True, False])) + df2.select(pl.all().top_k_by("a", 2, reverse=[True, False])) with pytest.raises( ValueError, - match=r"the length of `descending` \(2\) does not match the length of `by` \(1\)", + match=r"the length of `reverse` \(2\) does not match the length of `by` \(1\)", ): - df2.select(pl.all().bottom_k_by("a", 2, descending=[True, False])) + df2.select(pl.all().bottom_k_by("a", 2, reverse=[True, False])) -def test_top_k_descending() -> None: +def test_top_k_reverse() -> None: df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - result = df.top_k(1, by=["a", "b"], descending=True) + result = df.top_k(1, by=["a", "b"], reverse=True) expected = pl.DataFrame({"a": [1], "b": [4]}) assert_frame_equal(result, expected, check_row_order=False) - result = df.top_k(1, by=["a", "b"], descending=[True, True]) + result = df.top_k(1, by=["a", "b"], reverse=[True, True]) assert_frame_equal(result, expected, check_row_order=False) with pytest.raises( ValueError, - match=r"the length of `descending` \(1\) does not match the length of `by` \(2\)", + match=r"the length of `reverse` \(1\) does not match the length of `by` \(2\)", ): - df.top_k(1, by=["a", "b"], descending=[True]) + df.top_k(1, by=["a", "b"], reverse=[True]) def test_top_k_9385() -> None: @@ -393,3 +393,8 @@ def test_bottom_k_nulls(s: pl.Series, should_sort: bool) -> None: result = s.bottom_k(s.len() * 2) assert_series_equal(result, s, check_order=False) + + +def test_top_k_descending_deprecated() -> None: + with pytest.deprecated_call(): + pl.col("a").top_k_by("b", descending=True)