Skip to content

Commit

Permalink
fix: handle dupe column labels (#27)
Browse files Browse the repository at this point in the history
  • Loading branch information
tconbeer authored Nov 10, 2023
1 parent 18c0232 commit ae61b87
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ repos:
hooks:
- id: black
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.291
rev: v0.1.4
hooks:
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Bug Fixes

- Tables with the ArrowBackend no longer display incorrect output when column labels are duplicated ([#26](https://github.com/tconbeer/textual-fastdatatable/issues/26)).


## [0.2.0] - 2023-11-08

### Features
Expand Down
16 changes: 15 additions & 1 deletion src/textual_fastdatatable/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,21 @@ def sort(

class ArrowBackend(DataTableBackend):
def __init__(self, data: pa.Table) -> None:
self.data: pa.Table = data
# Arrow allows duplicate field names, but a table's to_pylist() and
# to_pydict() methods will drop duplicate-named fields!
field_names: list[str] = []
renamed = False
for field in data.column_names:
n = 0
while field in field_names:
field = f"{field}{n}"
renamed = True
n += 1
field_names.append(field)
if renamed:
self.data: pa.Table = data.rename_columns(field_names)
else:
self.data = data
self._string_data: pa.Table | None = None
self._column_content_widths: list[int] = []

Expand Down
18 changes: 15 additions & 3 deletions stubs/pyarrow/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ from __future__ import annotations

from typing import Any, Iterable, Iterator, Literal, Mapping, Sequence, Type, TypeVar

import pandas as pd

from .compute import CastOptions
from .types import DataType as DataType
from .types import string as string
Expand Down Expand Up @@ -71,6 +73,7 @@ class _Tabular:
def column(self, i: int | str) -> _PandasConvertible: ...
def equals(self: T, other: T, check_metadata: bool = False) -> bool: ...
def itercolumns(self) -> Iterator[_PandasConvertible]: ...
def rename_columns(self: T, names: list[str]) -> T: ...
def select(self: T, columns: Sequence[str | int]) -> T: ...
def set_column(
self: T, i: int, field_: str | Field, column: Array | ChunkedArray
Expand Down Expand Up @@ -107,11 +110,20 @@ def array(
safe: bool = True,
memory_pool: MemoryPool | None = None,
) -> Array | ChunkedArray: ...
def concat_arrays(
arrays: Iterable[Array], memory_pool: MemoryPool | None = None
) -> Array: ...
def nulls(
size: int,
type: DataType | None = None, # noqa: A002
memory_pool: MemoryPool | None = None,
) -> Array: ...
def concat_arrays(
arrays: Iterable[Array], memory_pool: MemoryPool | None = None
) -> Array: ...
def table(
data: pd.DataFrame
| Mapping[str, _PandasConvertible | list]
| list[_PandasConvertible],
names: list[str] | None = None,
schema: Schema | None = None,
metadata: Mapping | None = None,
nthreads: int | None = None,
) -> Table: ...
9 changes: 9 additions & 0 deletions tests/unit_tests/test_arrow_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,12 @@ def test_empty_query() -> None:
data: dict[str, list] = {"a": []}
backend = ArrowBackend.from_pydict(data)
assert backend.column_content_widths == [0]


def test_dupe_column_labels() -> None:
arr = pa.array([0, 1, 2, 3])
tab = pa.table([arr] * 3, names=["a", "a", "a"])
backend = ArrowBackend(data=tab)
assert backend.column_count == 3
assert backend.row_count == 4
assert backend.get_row_at(2) == [2, 2, 2]

0 comments on commit ae61b87

Please sign in to comment.