Skip to content

Commit

Permalink
Backport PR pandas-dev#60222 on branch 2.3.x (ENH (string dtype): acc…
Browse files Browse the repository at this point in the history
…ept string_view in addition to string/large_string for ArrowStringArray input) (pandas-dev#60223)

Backport PR pandas-dev#60222: ENH (string dtype): accept string_view in addition to string/large_string for ArrowStringArray input

Co-authored-by: Joris Van den Bossche <[email protected]>
  • Loading branch information
meeseeksmachine and jorisvandenbossche authored Nov 6, 2024
1 parent 678266c commit b5d0615
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 0 deletions.
7 changes: 7 additions & 0 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pandas.compat import (
pa_version_under10p1,
pa_version_under13p0,
pa_version_under16p0,
)
from pandas.util._exceptions import find_stack_level

Expand Down Expand Up @@ -65,6 +66,10 @@ def _chk_pyarrow_available() -> None:
raise ImportError(msg)


def _is_string_view(typ):
return not pa_version_under16p0 and pa.types.is_string_view(typ)


# TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from
# ObjectStringArrayMixin because we want to have the object-dtype based methods as
# fallback for the ones that pyarrow doesn't yet support
Expand Down Expand Up @@ -122,11 +127,13 @@ def __init__(self, values) -> None:
_chk_pyarrow_available()
if isinstance(values, (pa.Array, pa.ChunkedArray)) and (
pa.types.is_string(values.type)
or _is_string_view(values.type)
or (
pa.types.is_dictionary(values.type)
and (
pa.types.is_string(values.type.value_type)
or pa.types.is_large_string(values.type.value_type)
or _is_string_view(values.type.value_type)
)
)
):
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/arrays/string_/test_string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,20 @@ def test_constructor_valid_string_type_value_dictionary(string_type, chunked):
assert pa.types.is_large_string(arr._pa_array.type)


@pytest.mark.parametrize("chunked", [True, False])
def test_constructor_valid_string_view(chunked):
# requires pyarrow>=18 for casting string_view to string
pa = pytest.importorskip("pyarrow", minversion="18")

arr = pa.array(["1", "2", "3"], pa.string_view())
if chunked:
arr = pa.chunked_array(arr)

arr = ArrowStringArray(arr)
# dictionary type get converted to dense large string array
assert pa.types.is_large_string(arr._pa_array.type)


def test_constructor_from_list():
# GH#27673
pytest.importorskip("pyarrow")
Expand Down

0 comments on commit b5d0615

Please sign in to comment.