tconbeer · tconbeer · Nov 8, 2023 · Nov 8, 2023 · Nov 8, 2023 · Nov 8, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,14 +7,19 @@ All notable changes to this project will be documented in this file.
 ### Features
 
 -   Adds a `null_rep: str` argument when initializing the data table; this string will be used to replace missing data.
+-   Adds a `NumpyBackend` that uses Numpy Record Arrays; this backend is marginally slower than the `ArrowBackend` in most scenarios ([#23](https://github.com/tconbeer/textual-fastdatatable/issues/23)).
+
+### Bug Fixes
+
+-   Fixes a crash when using `ArrowBackend.from_records(has_header=False)`.
 
 ### Performance
 
 -   Drastically improves performance for tables that are much wider than the viewport ([#12](https://github.com/tconbeer/textual-fastdatatable/issues/12)). 
 
 ### Benchmarks
 
--   Improves benchmarks to exclude data load times and include more information about first paint and scroll performance.
+-   Improves benchmarks to exclude data load times, disable garbage collection, and include more information about first paint and scroll performance.
 
 ## [0.1.4] - 2023-11-06
 

diff --git a/Makefile b/Makefile
@@ -17,4 +17,8 @@ serve:
 
 .PHONY: profile
 profile:
-	pyinstrument -r html -o profile.html "src/scripts/run.py"
+	pyinstrument -r html -o profile.html "src/scripts/run_numpy_wide.py"
+
+.PHONY: benchmark
+benchmark:
+	python src/scripts/benchmark.py > /dev/null
diff --git a/README.md b/README.md
@@ -5,21 +5,23 @@ Textual's built-in DataTable widget is beautiful and powerful, but it can be slo
 
 Here are some benchmarks on my relatively weak laptop. For each benchmark, we initialize a Textual App that
 loads a dataset from a parquet file and mounts a data table; it then scrolls around the table
-(10 pagedowns and 15 right arrows). For the built-in table, the data is loaded into memory before the timer
-is started; the Arrow back-end reads directly from parquet, so the timer is started immediately.
+(10 pagedowns and 15 right arrows). 
+
+For the built-in table and the others marked "from Records", the data is loaded into memory before the timer
+is started; for the "Arrow from Parquet" back-end, the timer is started immediately.
 
 The times in each column represent the time to the first paint of the table, and the time after scrolling
 is completed (we wait until the table is fully rendered after each scroll):
 
-Records |Built-In DataTable | FastDataTable (Arrow)
---------|--------|--------
-lap_times_100.parquet |   0.024s /   1.741s |   0.020s /   1.751s
-lap_times_1000.parquet |   0.107s /   1.997s |   0.022s /   1.913s
-lap_times_10000.parquet |   1.071s /   3.016s |   0.022s /   1.956s
-lap_times_100000.parquet |  10.803s /  13.086s |   0.038s /   2.162s
-lap_times_538121.parquet |  60.795s /  64.837s |   0.085s /   1.928s
-wide_10000.parquet |   4.655s /   9.987s |   0.025s /   3.205s
-wide_100000.parquet |  49.764s /  55.258s |   0.062s /   3.209s
+Records | Built-In DataTable | FastDataTable (Arrow from Parquet) | FastDataTable (Arrow from Records) | FastDataTable (Numpy from Records) 
+--------|--------|--------|--------|--------
+lap_times_100.parquet |   0.019s /   1.716s |   0.012s /   1.724s |    0.011s /   1.700s |   0.011s /   1.688s
+lap_times_1000.parquet |   0.103s /   1.931s |   0.011s /   1.859s |    0.011s /   1.799s |   0.015s /   1.848s
+lap_times_10000.parquet |   0.977s /   2.824s |   0.013s /   1.834s |    0.016s /   1.812s |   0.078s /   1.869s
+lap_times_100000.parquet |  11.773s /  13.770s |   0.025s /   1.790s |    0.156s /   1.824s |   0.567s /   2.347s
+lap_times_538121.parquet |  62.960s /  65.760s |   0.077s /   1.803s |    0.379s /   2.234s |   3.324s /   5.031s
+wide_10000.parquet |   5.110s /  10.539s |   0.024s /   3.373s |    0.042s /   3.278s |   0.369s /   3.461s
+wide_100000.parquet |  51.144s /  56.604s |   0.054s /   3.294s |    0.429s /   3.642s |   3.628s /   6.732s
 
 
 **NB:** FastDataTable currently does not support rows with a height of more than one line. See below for

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,7 +16,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry.dependencies]
 python = "^3.8"
-textual = ">=0.38.0"
+textual = ">=0.41.0"
 pyarrow = ">=7.0.0"
 
 [tool.poetry.group.dev.dependencies]

diff --git a/src/scripts/benchmark.py b/src/scripts/benchmark.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import gc
 from pathlib import Path
 from time import perf_counter
 
@@ -9,6 +10,7 @@
 from textual.pilot import Pilot
 from textual.types import CSSPathType
 from textual.widgets import DataTable as BuiltinDataTable
+from textual_fastdatatable import ArrowBackend, NumpyBackend
 from textual_fastdatatable import DataTable as FastDataTable
 
 BENCHMARK_DATA = Path(__file__).parent.parent.parent / "tests" / "data"
@@ -68,8 +70,56 @@ def compose(self) -> ComposeResult:
         yield FastDataTable(data=self.data_path)
 
 
+class ArrowBackendAppFromRecords(App):
+    TITLE = "FastDataTable (Arrow from Records)"
+
+    def __init__(
+        self,
+        data_path: Path,
+        driver_class: type[Driver] | None = None,
+        css_path: CSSPathType | None = None,
+        watch_css: bool = False,
+    ):
+        super().__init__(driver_class, css_path, watch_css)
+        self.data_path = data_path
+
+    def compose(self) -> ComposeResult:
+        df = pd.read_parquet(self.data_path)
+        rows = [tuple(row) for row in df.itertuples(index=False)]
+        self.start = perf_counter()
+        backend = ArrowBackend.from_records(rows, has_header=False)
+        table = FastDataTable(
+            backend=backend, column_labels=[str(col) for col in df.columns]
+        )
+        yield table
+
+
+class NumpyApp(App):
+    TITLE = "FastDataTable (Numpy from Records)"
+
+    def __init__(
+        self,
+        data_path: Path,
+        driver_class: type[Driver] | None = None,
+        css_path: CSSPathType | None = None,
+        watch_css: bool = False,
+    ):
+        super().__init__(driver_class, css_path, watch_css)
+        self.data_path = data_path
+
+    def compose(self) -> ComposeResult:
+        df = pd.read_parquet(self.data_path)
+        rows = [tuple(row) for row in df.itertuples(index=False)]
+        self.start = perf_counter()
+        backend = NumpyBackend(rows)
+        table = FastDataTable(
+            backend=backend, column_labels=[str(col) for col in df.columns]
+        )
+        yield table
+
+
 if __name__ == "__main__":
-    app_defs = [BuiltinApp, ArrowBackendApp]
+    app_defs = [ArrowBackendAppFromRecords, BuiltinApp, ArrowBackendApp, NumpyApp]
     bench = [
         (f"lap_times_{n}.parquet", 3 if n <= 10000 else 1)
         for n in [100, 1000, 10000, 100000, 538121]
@@ -89,9 +139,12 @@ def compose(self) -> ComposeResult:
             for i, app_cls in enumerate(app_defs):
                 for _ in range(tries):
                     app = app_cls(BENCHMARK_DATA / p)
+                    gc.disable()
                     fp, el = app.run(headless=True, auto_pilot=scroller)  # type: ignore
+                    gc.collect()
                     first_paint[i].append(fp)
                     elapsed[i].append(el)
+            gc.enable()
             avg_first_paint = [sum(app_times) / tries for app_times in first_paint]
             avg_elapsed = [sum(app_times) / tries for app_times in elapsed]
             formatted = [

diff --git a/src/scripts/run_arrow_wide.py b/src/scripts/run_arrow_wide.py
@@ -5,12 +5,14 @@
 from textual.app import App, ComposeResult
 from textual.driver import Driver
 from textual.types import CSSPathType
-from textual_fastdatatable import DataTable as FastDataTable
+from textual_fastdatatable import DataTable
 
 BENCHMARK_DATA = Path(__file__).parent.parent.parent / "tests" / "data"
 
 
 class ArrowBackendApp(App):
+    TITLE = "FastDataTable (Arrow)"
+
     def __init__(
         self,
         data_path: Path,
@@ -22,7 +24,7 @@ def __init__(
         self.data_path = data_path
 
     def compose(self) -> ComposeResult:
-        yield FastDataTable(data=self.data_path)
+        yield DataTable(data=self.data_path)
 
 
 if __name__ == "__main__":

diff --git a/src/scripts/run_builtin_wide.py b/src/scripts/run_builtin_wide.py
@@ -6,7 +6,7 @@
 from textual.app import App, ComposeResult
 from textual.driver import Driver
 from textual.types import CSSPathType
-from textual.widgets import DataTable as BuiltinDataTable
+from textual.widgets import DataTable
 
 BENCHMARK_DATA = Path(__file__).parent.parent.parent / "tests" / "data"
 
@@ -27,7 +27,7 @@ def __init__(
     def compose(self) -> ComposeResult:
         df = pd.read_parquet(self.data_path)
         rows = [tuple(row) for row in df.itertuples(index=False)]
-        table: BuiltinDataTable = BuiltinDataTable()
+        table: DataTable = DataTable()
         table.add_columns(*[str(col) for col in df.columns])
         for row in rows:
             table.add_row(*row, height=1, label=None)

diff --git a/src/scripts/run_numpy_wide.py b/src/scripts/run_numpy_wide.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pandas as pd
+from textual.app import App, ComposeResult
+from textual.driver import Driver
+from textual.types import CSSPathType
+from textual_fastdatatable import DataTable, NumpyBackend
+
+BENCHMARK_DATA = Path(__file__).parent.parent.parent / "tests" / "data"
+
+
+class NumpyApp(App):
+    TITLE = "FastDataTable (Numpy)"
+
+    def __init__(
+        self,
+        data_path: Path,
+        driver_class: type[Driver] | None = None,
+        css_path: CSSPathType | None = None,
+        watch_css: bool = False,
+    ):
+        super().__init__(driver_class, css_path, watch_css)
+        self.data_path = data_path
+
+    def compose(self) -> ComposeResult:
+        df = pd.read_parquet(self.data_path)
+        rows = [tuple(row) for row in df.itertuples(index=False)]
+        backend = NumpyBackend(rows)
+        table = DataTable(
+            backend=backend, column_labels=[str(col) for col in df.columns]
+        )
+        yield table
+
+
+if __name__ == "__main__":
+    app = NumpyApp(data_path=BENCHMARK_DATA / "wide_10000.parquet")
+    app.run()
diff --git a/src/textual_fastdatatable/__init__.py b/src/textual_fastdatatable/__init__.py
@@ -1,13 +1,15 @@
 from textual_fastdatatable.backend import (
     ArrowBackend,
     DataTableBackend,
+    NumpyBackend,
     create_backend,
 )
 from textual_fastdatatable.data_table import DataTable
 
 __all__ = [
     "DataTable",
     "ArrowBackend",
+    "NumpyBackend",
     "DataTableBackend",
     "create_backend",
 ]