fix: do not crash if table is initialized with no data

tconbeer · Nov 6, 2023 · b679902 · b679902
1 parent d7b7786
commit b679902
Show file tree

Hide file tree

Showing 6 changed files with 53 additions and 10 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,8 @@ All notable changes to this project will be documented in this file.
 
 ## [Unreleased]
 
+-   Fixes a crash when computing the widths of columns with no rows ([#19](https://github.com/tconbeer/textual-fastdatatable/issues/19)).
+
 ## [0.1.3] - 2023-10-09
 
 -   Fixes a crash when creating a column from a null or complex type.

diff --git a/pyproject.toml b/pyproject.toml
@@ -45,6 +45,7 @@ target-version = "py38"
 python_version = "3.8"
 files = [
     "src/**/*.py",
+    "tests/unit_tests/**/*.py"
 ]
 mypy_path = "src:stubs"
 

diff --git a/src/textual_fastdatatable/backend.py b/src/textual_fastdatatable/backend.py
@@ -177,10 +177,14 @@ def column_content_widths(self) -> list[int]:
                     ],
                     names=self.data.column_names,
                 )
-            self._column_content_widths = [
+            content_widths = [
                 pc.max(pc.utf8_length(arr).fill_null(0)).as_py()
                 for arr in self._string_data.itercolumns()
             ]
+            # pc.max returns None for each column without rows; we need to return 0
+            # instead.
+            self._column_content_widths = [cw or 0 for cw in content_widths]
+
         return self._column_content_widths
 
     def get_row_at(self, index: int) -> Sequence[Any]:
@@ -279,11 +283,12 @@ def _safe_cast_arr_to_str(arr: pa._PandasConvertible) -> pa._PandasConvertible:
         and other nested types), we fall back to Python.
         """
         try:
-            return arr.cast(
+            arr = arr.cast(
                 pa.string(),
                 safe=False,
             )
         except pl.ArrowNotImplementedError:
             # todo: vectorize this with a pyarrow udf
             native_list = arr.to_pylist()
-            return pa.array([str(i) for i in native_list], type=pa.string())
+            arr = pa.array([str(i) for i in native_list], type=pa.string())
+        return arr.fill_null("")
diff --git a/stubs/pyarrow/__init__.pyi b/stubs/pyarrow/__init__.pyi
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, Iterable, Iterator, Literal, Mapping, Type, TypeVar
+from typing import Any, Iterable, Iterator, Literal, Mapping, Sequence, Type, TypeVar
 
 from .compute import CastOptions
 from .types import DataType as DataType
@@ -69,20 +69,22 @@ class _Tabular:
         self: T, field_: str | Field, column: Array | ChunkedArray
     ) -> T: ...
     def column(self, i: int | str) -> _PandasConvertible: ...
+    def equals(self: T, other: T, check_metadata: bool = False) -> bool: ...
     def itercolumns(self) -> Iterator[_PandasConvertible]: ...
+    def select(self: T, columns: Sequence[str | int]) -> T: ...
     def set_column(
         self: T, i: int, field_: str | Field, column: Array | ChunkedArray
     ) -> T: ...
-    def sort_by(
-        self: T,
-        sorting: str | list[tuple[str, Literal["ascending", "descending"]]],
-        **kwargs: Any,
-    ) -> T: ...
     def slice(  # noqa: A003
         self: T,
         offset: int = 0,
         length: int | None = None,
     ) -> T: ...
+    def sort_by(
+        self: T,
+        sorting: str | list[tuple[str, Literal["ascending", "descending"]]],
+        **kwargs: Any,
+    ) -> T: ...
     def to_pylist(self) -> list[dict[str, Any]]: ...
 
 class RecordBatch(_Tabular): ...

diff --git a/stubs/pyarrow/parquet.pyi b/stubs/pyarrow/parquet.pyi
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, BinaryIO
+from typing import Any, BinaryIO, Literal
 
 from . import NativeFile, Schema, Table
 from .compute import Expression
@@ -31,3 +31,30 @@ def read_table(
     thrift_string_size_limit: int | None = None,
     thrift_container_size_limit: int | None = None,
 ) -> Table: ...
+def write_table(
+    table: Table,
+    where: str | NativeFile,
+    row_group_size: int | None = None,
+    version: Literal["1.0", "2.4", "2.6"] = "2.6",
+    use_dictionary: bool | list = True,
+    compression: Literal["none", "snappy", "gzip", "brotli", "lz4", "zstd"]
+    | dict[str, Literal["none", "snappy", "gzip", "brotli", "lz4", "zstd"]] = "snappy",
+    write_statistics: bool | list = True,
+    use_deprecated_int96_timestamps: bool | None = None,
+    coerce_timestamps: str | None = None,
+    allow_truncated_timestamps: bool = False,
+    data_page_size: int | None = None,
+    flavor: Literal["spark"] | None = None,
+    filesystem: FileSystem | None = None,
+    compression_level: int | dict | None = None,
+    use_byte_stream_split: bool | list = False,
+    column_encoding: str | dict | None = None,
+    data_page_version: Literal["1.0", "2.0"] = "1.0",
+    use_compliant_nested_type: bool = True,
+    encryption_properties: Any | None = None,
+    write_batch_size: int | None = None,
+    dictionary_pagesize_limit: int | None = None,
+    store_schema: bool = True,
+    write_page_index: bool = False,
+    **kwargs: Any,
+) -> None: ...
diff --git a/tests/unit_tests/test_arrow_backend.py b/tests/unit_tests/test_arrow_backend.py
@@ -157,3 +157,9 @@ def test_sort(backend: ArrowBackend) -> None:
 
     backend.sort(by=[("first column", "ascending")])
     assert backend.data.equals(original_table)
+
+
+def test_empty_query() -> None:
+    data: dict[str, list] = {"a": []}
+    backend = ArrowBackend.from_pydict(data)
+    assert backend.column_content_widths == [0]