From 095291b80f3a2a852c55a43c725d970e2f0dfff4 Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Thu, 6 Jun 2024 18:51:22 -0400 Subject: [PATCH 01/12] feat(python): add style namespace (defers to Great Tables) --- py-polars/polars/dataframe/frame.py | 52 +++++++++++++++++++++++++++++ py-polars/polars/dependencies.py | 4 +++ py-polars/polars/meta/versions.py | 1 + py-polars/pyproject.toml | 1 + 4 files changed, 58 insertions(+) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 25c4a3cdfea7..192975dbf2f0 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -76,12 +76,14 @@ UInt64, ) from polars.dependencies import ( + _GREAT_TABLES_AVAILABLE, _HVPLOT_AVAILABLE, _PANDAS_AVAILABLE, _PYARROW_AVAILABLE, _check_for_numpy, _check_for_pandas, _check_for_pyarrow, + great_tables, hvplot, import_optional, ) @@ -112,6 +114,7 @@ import jax import numpy.typing as npt import torch + from great_tables import GT from hvplot.plotting.core import hvPlotTabularPolars from xlsxwriter import Workbook @@ -607,6 +610,55 @@ def plot(self) -> hvPlotTabularPolars: hvplot.post_patch() return hvplot.plotting.core.hvPlotTabularPolars(self) + @property + def style(self) -> GT: + """ + Create a Great Table for styling. + + Polars does not implement styling logic itself, but instead defers to + the Great Tables package. Please see the `Great Tables reference `_ + for more information and documentation. + + Examples + -------- + + Import some styling helpers, and create example data: + + >>> import polars.selectors as cs + >>> from great_tables import loc, style + >>> df = pl.DataFrame( + ... { + ... "site_id": [0, 1, 2], + ... "measure_a": [5, 4, 6], + ... "measure_b": [7, 3, 3], + ... } + ... ) + + Emphasize the site_id as row names: + + >>> df.style.tab_stub(rowname_col="site_id") # doctest: +SKIP + + Fill the background for the highest measure_a value row: + + >>> df.style.tab_style( + ... style.fill("yellow"), + ... loc.body(rows=pl.col("measure_a") == pl.col("measure_a").max()) + ... ) # doctest: +SKIP + + Put a spanner (high-level label) over measure columns: + + >>> df.style.tab_spanner("Measures", cs.starts_with("measure")) # doctest: +SKIP + + Format measure_b values to two decimal places: + + >>> df.style.fmt_number("measure_b", decimals=2) # doctest: +SKIP + + """ + if not _GREAT_TABLES_AVAILABLE: + raise ModuleNotFoundError("great_tables is required for `.style`") + + return great_tables.GT(self) + @property def shape(self) -> tuple[int, int]: """ diff --git a/py-polars/polars/dependencies.py b/py-polars/polars/dependencies.py index d970fb5673ff..0917cb179c6a 100644 --- a/py-polars/polars/dependencies.py +++ b/py-polars/polars/dependencies.py @@ -11,6 +11,7 @@ _DELTALAKE_AVAILABLE = True _FSSPEC_AVAILABLE = True _GEVENT_AVAILABLE = True +_GREAT_TABLES_AVAILABLE = True _HVPLOT_AVAILABLE = True _HYPOTHESIS_AVAILABLE = True _NUMPY_AVAILABLE = True @@ -152,6 +153,7 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]: import deltalake import fsspec import gevent + import great_tables import hvplot import hypothesis import numpy @@ -175,6 +177,7 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]: # heavy/optional third party libs deltalake, _DELTALAKE_AVAILABLE = _lazy_import("deltalake") fsspec, _FSSPEC_AVAILABLE = _lazy_import("fsspec") + great_tables, _GREAT_TABLES_AVAILABLE = _lazy_import("great_tables") hvplot, _HVPLOT_AVAILABLE = _lazy_import("hvplot") hypothesis, _HYPOTHESIS_AVAILABLE = _lazy_import("hypothesis") numpy, _NUMPY_AVAILABLE = _lazy_import("numpy") @@ -301,6 +304,7 @@ def import_optional( "deltalake", "fsspec", "gevent", + "great_tables", "hvplot", "numpy", "pandas", diff --git a/py-polars/polars/meta/versions.py b/py-polars/polars/meta/versions.py index 6a5eb8a788f1..02b71c6a92bb 100644 --- a/py-polars/polars/meta/versions.py +++ b/py-polars/polars/meta/versions.py @@ -69,6 +69,7 @@ def _get_dependency_info() -> dict[str, str]: "fastexcel", "fsspec", "gevent", + "great_tables", "hvplot", "matplotlib", "nest_asyncio", diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index bd6cc2954a26..8e7afa8abd2e 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -56,6 +56,7 @@ plot = ["hvplot >= 0.9.1"] pyarrow = ["pyarrow >= 7.0.0"] pydantic = ["pydantic"] sqlalchemy = ["sqlalchemy", "pandas"] +style = ["great-tables >= 0.8.0"] timezone = ["backports.zoneinfo; python_version < '3.9'", "tzdata; platform_system == 'Windows'"] xlsx2csv = ["xlsx2csv >= 0.8.0"] xlsxwriter = ["xlsxwriter"] From 23aa164f3351dd4976930e7ef0ac0aa3bdf72ae0 Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Fri, 7 Jun 2024 12:29:58 -0400 Subject: [PATCH 02/12] docs(python): document DataFrame.style property --- py-polars/docs/source/reference/dataframe/index.rst | 1 + py-polars/docs/source/reference/dataframe/style.rst | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 py-polars/docs/source/reference/dataframe/style.rst diff --git a/py-polars/docs/source/reference/dataframe/index.rst b/py-polars/docs/source/reference/dataframe/index.rst index 509b1e3f5ede..21f7cac4c046 100644 --- a/py-polars/docs/source/reference/dataframe/index.rst +++ b/py-polars/docs/source/reference/dataframe/index.rst @@ -17,6 +17,7 @@ This page gives an overview of all public DataFrame methods. modify_select miscellaneous plot + style .. currentmodule:: polars diff --git a/py-polars/docs/source/reference/dataframe/style.rst b/py-polars/docs/source/reference/dataframe/style.rst new file mode 100644 index 000000000000..ebe9a3fabf2e --- /dev/null +++ b/py-polars/docs/source/reference/dataframe/style.rst @@ -0,0 +1,7 @@ +===== +Style +===== + +.. currentmodule:: polars + +.. autoproperty:: DataFrame.style \ No newline at end of file From ebf3036a8a9487ecd137bd836c4b7f1a784a1ad5 Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Fri, 7 Jun 2024 12:31:57 -0400 Subject: [PATCH 03/12] docs(python): add misc styling page to user guide --- docs/src/python/user-guide/misc/styling.py | 151 +++++++++++++++++++++ docs/user-guide/misc/styling.md | 65 +++++++++ mkdocs.yml | 1 + 3 files changed, 217 insertions(+) create mode 100644 docs/src/python/user-guide/misc/styling.py create mode 100644 docs/user-guide/misc/styling.md diff --git a/docs/src/python/user-guide/misc/styling.py b/docs/src/python/user-guide/misc/styling.py new file mode 100644 index 000000000000..5b570e4f1e49 --- /dev/null +++ b/docs/src/python/user-guide/misc/styling.py @@ -0,0 +1,151 @@ +# --8<-- [start:dataframe] +import polars as pl +import polars.selectors as cs + +path = "docs/data/iris.csv" + +df = ( + pl.scan_csv(path) + .group_by("species") + .agg(cs.starts_with("petal").mean().round(3)) + .collect() +) +print(df) +# --8<-- [end:dataframe] + +# --8<-- [start:structure-header] +df.style.tab_header(title="Iris Data", subtitle="Mean measurement values per species") +# --8<-- [end:structure-header] + +# --8<-- [start:structure-header-out] +print( + df.style.tab_header( + title="Iris Data", subtitle="Mean measurement values per species" + ).as_raw_html() +) +# --8<-- [end:structure-header-out] + + +# --8<-- [start:structure-stub] +df.style.tab_stub(rowname_col="species") +# --8<-- [end:structure-stub] + +# --8<-- [start:structure-stub-out] +print(df.style.tab_stub(rowname_col="species").as_raw_html()) +# --8<-- [end:structure-stub-out] + +# --8<-- [start:structure-spanner] +( + df.style.tab_spanner("Petal", cs.starts_with("petal")).cols_label( + petal_length="Length", petal_width="Width" + ) +) +# --8<-- [end:structure-spanner] + +# --8<-- [start:structure-spanner-out] +print( + df.style.tab_spanner("Petal", cs.starts_with("petal")) + .cols_label(petal_length="Length", petal_width="Width") + .as_raw_html() +) +# --8<-- [end:structure-spanner-out] + +# --8<-- [start:format-number] +df.style.fmt_number("petal_width", decimals=1) +# --8<-- [end:format-number] + + +# --8<-- [start:format-number-out] +print(df.style.fmt_number("petal_width", decimals=1).as_raw_html()) +# --8<-- [end:format-number-out] + + +# --8<-- [start:style-simple] +from great_tables import loc, style + +df.style.tab_style( + style.fill("yellow"), + loc.body( + rows=pl.col("petal_length") == pl.col("petal_length").max(), + ), +) +# --8<-- [end:style-simple] + +# --8<-- [start:style-simple-out] +from great_tables import loc, style + +print( + df.style.tab_style( + style.fill("yellow"), + loc.body( + rows=pl.col("petal_length") == pl.col("petal_length").max(), + ), + ).as_raw_html() +) +# --8<-- [end:style-simple-out] + + +# --8<-- [start:style-bold-column] +from great_tables import loc, style + +df.style.tab_style( + style.text(weight="bold"), + loc.body(columns="species"), +) +# --8<-- [end:style-bold-column] + +# --8<-- [start:style-bold-column-out] +from great_tables import loc, style + +print( + df.style.tab_style( + style.text(weight="bold"), + loc.body(columns="species"), + ).as_raw_html() +) +# --8<-- [end:style-bold-column-out] + +# --8<-- [start:full-example] +from great_tables import loc, style + +( + df.style.tab_header( + title="Iris Data", subtitle="Mean measurement values per species" + ) + .tab_stub(rowname_col="species") + .cols_label(petal_length="Length", petal_width="Width") + .tab_spanner("Petal", cs.starts_with("petal")) + .fmt_number("petal_width", decimals=2) + .tab_style( + style.fill("yellow"), + loc.body( + rows=pl.col("petal_length") == pl.col("petal_length").max(), + ), + ) +) +# --8<-- [end:full-example] + +# --8<-- [start:full-example-out] +from great_tables import loc, style + +print( + df.style.tab_header( + title="Iris Data", subtitle="Mean measurement values per species" + ) + .tab_stub(rowname_col="species") + .cols_label(petal_length="Length", petal_width="Width") + .tab_spanner("Petal", cs.starts_with("petal")) + .fmt_number("petal_width", decimals=2) + .tab_style( + style.fill("yellow"), + loc.body( + rows=pl.col("petal_length") == pl.col("petal_length").max(), + ), + ) + .tab_style( + style.text(weight="bold"), + loc.body(columns="species"), + ) + .as_raw_html() +) +# --8<-- [end:full-example-out] diff --git a/docs/user-guide/misc/styling.md b/docs/user-guide/misc/styling.md new file mode 100644 index 000000000000..57a8d44d75e1 --- /dev/null +++ b/docs/user-guide/misc/styling.md @@ -0,0 +1,65 @@ +# Styling + +Data in a Polars `DataFrame` can be styled for presentation use the `DataFrame.style` property. This returns a `GT` object from [Great Tables](https://posit-dev.github.io/great-tables/articles/intro.html), which enables structuring, formatting, and styling for table display. + +{{code_block('user-guide/misc/styling','dataframe',[])}} + +```python exec="on" result="text" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:dataframe" +``` + +## Structure: add header title + +{{code_block('user-guide/misc/styling','structure-header',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:structure-header-out" +``` + +## Structure: add row stub + +{{code_block('user-guide/misc/styling','structure-stub',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:structure-stub-out" +``` + +## Structure: add column spanner + +{{code_block('user-guide/misc/styling','structure-spanner',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:structure-spanner-out" +``` + +## Format: limit decimal places + +{{code_block('user-guide/misc/styling','format-number',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:format-number-out" +``` + +## Style: highlight max row + +{{code_block('user-guide/misc/styling','style-simple',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:style-simple-out" +``` + +## Style: bold species column + +{{code_block('user-guide/misc/styling','style-bold-column',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:style-bold-column-out" +``` + +## Full example + +{{code_block('user-guide/misc/styling','full-example',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:full-example-out" +``` diff --git a/mkdocs.yml b/mkdocs.yml index 6673d17741ce..c0394149aa11 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -81,6 +81,7 @@ nav: - Misc: - user-guide/misc/multiprocessing.md - user-guide/misc/visualization.md + - user-guide/misc/styling.md - user-guide/misc/comparison.md - API reference: api/index.md From 4aff8f17219495ccdbe13ad2d34e1a3a1c69451b Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Fri, 7 Jun 2024 12:55:28 -0400 Subject: [PATCH 04/12] chore(python): make linter happy --- py-polars/polars/dataframe/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 192975dbf2f0..f67322d4cd17 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -621,7 +621,6 @@ def style(self) -> GT: Examples -------- - Import some styling helpers, and create example data: >>> import polars.selectors as cs @@ -647,7 +646,7 @@ def style(self) -> GT: Put a spanner (high-level label) over measure columns: - >>> df.style.tab_spanner("Measures", cs.starts_with("measure")) # doctest: +SKIP + >>> df.style.tab_spanner("Measures", cs.starts_with("measure")) # doctest: +SKIP Format measure_b values to two decimal places: @@ -655,7 +654,8 @@ def style(self) -> GT: """ if not _GREAT_TABLES_AVAILABLE: - raise ModuleNotFoundError("great_tables is required for `.style`") + msg = "great_tables is required for `.style`" + raise ModuleNotFoundError(msg) return great_tables.GT(self) From 96da0b6eec65f8f00619b484d76b3abc970cd302 Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Fri, 7 Jun 2024 12:58:41 -0400 Subject: [PATCH 05/12] chore(python): fix lint errors on style docstring --- py-polars/polars/dataframe/frame.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index f67322d4cd17..5172ecc1dbb8 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -635,22 +635,24 @@ def style(self) -> GT: Emphasize the site_id as row names: - >>> df.style.tab_stub(rowname_col="site_id") # doctest: +SKIP + >>> df.style.tab_stub(rowname_col="site_id") # doctest: +SKIP Fill the background for the highest measure_a value row: >>> df.style.tab_style( ... style.fill("yellow"), ... loc.body(rows=pl.col("measure_a") == pl.col("measure_a").max()) - ... ) # doctest: +SKIP + ... ) # doctest: +SKIP Put a spanner (high-level label) over measure columns: - >>> df.style.tab_spanner("Measures", cs.starts_with("measure")) # doctest: +SKIP + >>> df.style.tab_spanner( + ... "Measures", cs.starts_with("measure") + ... ) # doctest: +SKIP Format measure_b values to two decimal places: - >>> df.style.fmt_number("measure_b", decimals=2) # doctest: +SKIP + >>> df.style.fmt_number("measure_b", decimals=2) # doctest: +SKIP """ if not _GREAT_TABLES_AVAILABLE: From d4bcdcd291ce4501e15e23ab79f76a28195bdd2f Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Fri, 7 Jun 2024 13:00:01 -0400 Subject: [PATCH 06/12] chore(python): fix lint errors on style docstring --- py-polars/polars/dataframe/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 5172ecc1dbb8..5ad74bb609da 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -641,7 +641,7 @@ def style(self) -> GT: >>> df.style.tab_style( ... style.fill("yellow"), - ... loc.body(rows=pl.col("measure_a") == pl.col("measure_a").max()) + ... loc.body(rows=pl.col("measure_a") == pl.col("measure_a").max()), ... ) # doctest: +SKIP Put a spanner (high-level label) over measure columns: From a0dfdf15ad63fa58d706dc4ab51d5323a724916d Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Fri, 7 Jun 2024 13:15:59 -0400 Subject: [PATCH 07/12] dev: include style optional dependency in all --- py-polars/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index 8e7afa8abd2e..8d4a138a77bf 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -61,7 +61,7 @@ timezone = ["backports.zoneinfo; python_version < '3.9'", "tzdata; platform_syst xlsx2csv = ["xlsx2csv >= 0.8.0"] xlsxwriter = ["xlsxwriter"] all = [ - "polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,iceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]", + "polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,style,numpy,pandas,plot,pyarrow,pydantic,iceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]", ] [tool.maturin] From 82373fdc5bbbea6ba078cf1db6d28fe5921f4eb3 Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Fri, 7 Jun 2024 13:16:23 -0400 Subject: [PATCH 08/12] dev: mypy ignore missing import for great_tables --- py-polars/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index 8d4a138a77bf..7a377e566f79 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -90,6 +90,7 @@ module = [ "deltalake.*", "fsspec.*", "gevent", + "great_tables", "hvplot.*", "jax.*", "kuzu", From 23472427ea3bf526c1ef962dd256e15716a2bd6f Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Fri, 7 Jun 2024 13:28:28 -0400 Subject: [PATCH 09/12] docs: mark style property as unstable --- py-polars/polars/dataframe/frame.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 5ad74bb609da..daf69b373b2f 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -611,10 +611,15 @@ def plot(self) -> hvPlotTabularPolars: return hvplot.plotting.core.hvPlotTabularPolars(self) @property + @unstable() def style(self) -> GT: """ Create a Great Table for styling. + .. warning:: + This functionality is currently considered **unstable**. It may be + changed at any point without it being considered a breaking change. + Polars does not implement styling logic itself, but instead defers to the Great Tables package. Please see the `Great Tables reference `_ for more information and documentation. From b4f180f25ef8c25653e8945c630f2910c9934aca Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Fri, 7 Jun 2024 13:29:34 -0400 Subject: [PATCH 10/12] ci: add great-tables to requirements-dev --- py-polars/requirements-dev.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/py-polars/requirements-dev.txt b/py-polars/requirements-dev.txt index eaf3ea2c2804..aa930920683e 100644 --- a/py-polars/requirements-dev.txt +++ b/py-polars/requirements-dev.txt @@ -47,6 +47,8 @@ zstandard # Plotting hvplot>=0.9.1 matplotlib +# Styling +great-tables>=0.8.0 # Other gevent nest_asyncio From 15d20e4077a7c1255f8876829db492f6c7b4e22e Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Fri, 7 Jun 2024 16:55:48 -0400 Subject: [PATCH 11/12] ci: great tables contingent on py3.9 Co-authored-by: Marco Edward Gorelli --- py-polars/requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/requirements-dev.txt b/py-polars/requirements-dev.txt index aa930920683e..aae62fe07ede 100644 --- a/py-polars/requirements-dev.txt +++ b/py-polars/requirements-dev.txt @@ -48,7 +48,7 @@ zstandard hvplot>=0.9.1 matplotlib # Styling -great-tables>=0.8.0 +great-tables>=0.8.0; python_version >= '3.9' # Other gevent nest_asyncio From 96d94eaf1efeabaca4db9dbc4ef45737f31f2e0a Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Tue, 11 Jun 2024 10:58:18 -0400 Subject: [PATCH 12/12] docs: add style install note to guide --- docs/user-guide/installation.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/user-guide/installation.md b/docs/user-guide/installation.md index 03ac7f534bfc..5795f7905fce 100644 --- a/docs/user-guide/installation.md +++ b/docs/user-guide/installation.md @@ -89,6 +89,7 @@ pip install 'polars[numpy,fsspec]' | xlsx2csv | Support for reading from Excel files | | deltalake | Support for reading from Delta Lake Tables | | plot | Support for plotting Dataframes | +| style | Support for styling Dataframes | | timezone | Timezone support, only needed if 1. you are on Python < 3.9 and/or 2. you are on Windows, otherwise no dependencies will be installed | ### Rust