Skip to content

Commit

Permalink
Backport PR #56013 on branch 2.3.x (BUG: get_indexer rountripping thr…
Browse files Browse the repository at this point in the history
…ough string dtype) (#60339)

Backport PR #56013: BUG: get_indexer rountripping through string dtype

Co-authored-by: Patrick Hoefler <[email protected]>
  • Loading branch information
meeseeksmachine and phofl authored Nov 17, 2024
1 parent 38565aa commit 7958d6c
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 2 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ Interval

Indexing
^^^^^^^^
-
- Fixed bug in :meth:`Index.get_indexer` round-tripping through string dtype when ``infer_string`` is enabled (:issue:`55834`)
-

Missing
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6695,7 +6695,16 @@ def _maybe_cast_listlike_indexer(self, target) -> Index:
"""
Analogue to maybe_cast_indexer for get_indexer instead of get_loc.
"""
return ensure_index(target)
target_index = ensure_index(target)
if (
not hasattr(target, "dtype")
and self.dtype == object
and target_index.dtype == "string"
):
# If we started with a list-like, avoid inference to string dtype if self
# is object dtype (coercing to string dtype will alter the missing values)
target_index = Index(target, dtype=self.dtype)
return target_index

@final
def _validate_indexer(
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/indexes/object/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,15 @@ def test_get_indexer_with_NA_values(
expected = np.array([0, 1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)

def test_get_indexer_infer_string_missing_values(self):
# ensure the passed list is not cast to string but to object so that
# the None value is matched in the index
# https://github.com/pandas-dev/pandas/issues/55834
idx = Index(["a", "b", None], dtype="object")
result = idx.get_indexer([None, "x"])
expected = np.array([2, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)


class TestGetIndexerNonUnique:
def test_get_indexer_non_unique_nas(self, nulls_fixture):
Expand Down

0 comments on commit 7958d6c

Please sign in to comment.