Skip to content

Commit

Permalink
Enforce int32 for offsets
Browse files Browse the repository at this point in the history
  • Loading branch information
hombit committed May 29, 2024
1 parent 41cce93 commit 4dcd87a
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
7 changes: 3 additions & 4 deletions src/nested_pandas/series/ext_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,12 +616,11 @@ def list_offsets(self) -> pa.Array:

chunks = []
# The offset of the current chunk in the flat array.
# It is 0 for the first chunk, and the last offset of the previous chunk for the next chunks,
# as a pa.Scalar.
chunk_offset: pa.Scalar | int = 0
# Offset arrays use int32 type, so we cast to it
chunk_offset = pa.scalar(0, type=pa.int32())
for chunk in self._pa_array.iterchunks():
list_array = cast(pa.ListArray, chunk.field(0))
if chunk_offset == 0:
if chunk_offset.equals(pa.scalar(0, type=pa.int32())):
offsets = list_array.offsets
else:
offsets = pa.compute.add(list_array.offsets[1:], chunk_offset)
Expand Down
3 changes: 3 additions & 0 deletions src/nested_pandas/series/packer.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,4 +297,7 @@ def calculate_sorted_index_offsets(index: pd.Index) -> np.ndarray:
offset_but_last = np.nonzero(~index.duplicated(keep="first"))[0]
offset = np.append(offset_but_last, len(index))

# Arrow uses int32 for offsets
offset = offset.astype(np.int32)

return offset

0 comments on commit 4dcd87a

Please sign in to comment.