Skip to content

Commit

Permalink
Backport PR pandas-dev#57233: REGR: Fix to_numpy conversion for arrow…
Browse files Browse the repository at this point in the history
… ea with float dtype given
  • Loading branch information
phofl authored and meeseeksmachine committed Feb 7, 2024
1 parent 45fc954 commit e4357b1
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Fixed regressions
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
- Fixed regression in :meth:`Index.join` raising ``TypeError`` when joining an empty index to a non-empty index containing mixed dtype values (:issue:`57048`)
- Fixed regression in :meth:`Series.pct_change` raising a ``ValueError`` for an empty :class:`Series` (:issue:`57056`)
- Fixed regression in :meth:`Series.to_numpy` when dtype is given as float and the data contains NaNs (:issue:`57121`)

.. ---------------------------------------------------------------------------
.. _whatsnew_221.bug_fixes:
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/arrays/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def to_numpy_dtype_inference(
dtype = arr.dtype.numpy_dtype # type: ignore[union-attr]
elif dtype is not None:
dtype = np.dtype(dtype)
if na_value is lib.no_default and hasna and dtype.kind == "f":
na_value = np.nan
dtype_given = True
else:
dtype_given = True
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/arrays/boolean/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,6 @@ def test_to_numpy(box):
# converting to int or float without specifying na_value raises
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
arr.to_numpy(dtype="int64")
with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"):
arr.to_numpy(dtype="float64")


def test_to_numpy_copy():
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/arrays/floating/test_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ def test_to_numpy_float(box):
tm.assert_numpy_array_equal(result, expected)

arr = con([0.1, 0.2, None], dtype="Float64")
with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"):
result = arr.to_numpy(dtype="float64")
result = arr.to_numpy(dtype="float64")
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
tm.assert_numpy_array_equal(result, expected)

# need to explicitly specify na_value
result = arr.to_numpy(dtype="float64", na_value=np.nan)
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
Expand Down Expand Up @@ -100,7 +100,7 @@ def test_to_numpy_dtype(box, dtype):
tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"])
@pytest.mark.parametrize("dtype", ["int32", "int64", "bool"])
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
def test_to_numpy_na_raises(box, dtype):
con = pd.Series if box else pd.array
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/integer/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def test_to_numpy_dtype(dtype, in_series):
tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize("dtype", ["float64", "int64", "bool"])
@pytest.mark.parametrize("dtype", ["int64", "bool"])
def test_to_numpy_na_raises(dtype):
a = pd.array([0, 1, None], dtype="Int64")
with pytest.raises(ValueError, match=dtype):
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/series/methods/test_to_numpy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas import (
NA,
Series,
Expand All @@ -23,3 +25,12 @@ def test_to_numpy_cast_before_setting_na():
result = ser.to_numpy(dtype=np.float64, na_value=np.nan)
expected = np.array([1.0])
tm.assert_numpy_array_equal(result, expected)


@td.skip_if_no("pyarrow")
def test_to_numpy_arrow_dtype_given():
# GH#57121
ser = Series([1, NA], dtype="int64[pyarrow]")
result = ser.to_numpy(dtype="float64")
expected = np.array([1.0, np.nan])
tm.assert_numpy_array_equal(result, expected)

0 comments on commit e4357b1

Please sign in to comment.