From 12e60578aeeb3a3809ef9822c1316f8a2490c6ac Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Sat, 26 Oct 2024 04:11:35 -0800 Subject: [PATCH] feat(api): add `name` kwarg to `Table.value_counts()` (#10361) Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> --- ibis/expr/types/relations.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 9c33d9dd89f9..50f56ddeeda3 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -4566,9 +4566,15 @@ def window_by( return WindowedTable(self, time_col) - def value_counts(self) -> ir.Table: + def value_counts(self, *, name: str | None = None) -> ir.Table: """Compute a frequency table of this table's values. + Parameters + ---------- + name + The name to use for the frequency column. A suitable name will be + automatically generated if not provided. + Returns ------- Table @@ -4591,16 +4597,16 @@ def value_counts(self) -> ir.Table: │ Adelie │ Torgersen │ NULL │ NULL │ NULL │ … │ │ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ … │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ - >>> t.year.value_counts().order_by("year") - ┏━━━━━━━┳━━━━━━━━━━━━┓ - ┃ year ┃ year_count ┃ - ┡━━━━━━━╇━━━━━━━━━━━━┩ - │ int64 │ int64 │ - ├───────┼────────────┤ - │ 2007 │ 110 │ - │ 2008 │ 114 │ - │ 2009 │ 120 │ - └───────┴────────────┘ + >>> t.year.value_counts(name="n").order_by("year") + ┏━━━━━━━┳━━━━━━━┓ + ┃ year ┃ n ┃ + ┡━━━━━━━╇━━━━━━━┩ + │ int64 │ int64 │ + ├───────┼───────┤ + │ 2007 │ 110 │ + │ 2008 │ 114 │ + │ 2009 │ 120 │ + └───────┴───────┘ >>> t[["year", "island"]].value_counts().order_by("year", "island") ┏━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓ ┃ year ┃ island ┃ year_island_count ┃ @@ -4619,9 +4625,9 @@ def value_counts(self) -> ir.Table: └───────┴───────────┴───────────────────┘ """ columns = self.columns - return self.group_by(columns).agg( - lambda t: t.count().name("_".join(columns) + "_count") - ) + if name is None: + name = "_".join(columns) + "_count" + return self.group_by(columns).agg(lambda t: t.count().name(name)) def unnest( self, column, offset: str | None = None, keep_empty: bool = False