Skip to content

Commit

Permalink
Backport PR pandas-dev#55817: COMPAT: Numpy int64 Windows default for…
Browse files Browse the repository at this point in the history
… Numpy 2.0
  • Loading branch information
mroeschke authored and meeseeksmachine committed Nov 6, 2023
1 parent bf91684 commit 34d8715
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 16 deletions.
3 changes: 2 additions & 1 deletion pandas/compat/numpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
np_version_gte1p24 = _nlv >= Version("1.24")
np_version_gte1p24p3 = _nlv >= Version("1.24.3")
np_version_gte1p25 = _nlv >= Version("1.25")
np_version_gt2 = _nlv >= Version("2.0.0.dev0")
is_numpy_dev = _nlv.dev is not None
_min_numpy_ver = "1.22.4"

Expand All @@ -26,7 +27,7 @@
np_long: type
np_ulong: type

if _nlv >= Version("2.0.0.dev0"):
if np_version_gt2:
try:
with warnings.catch_warnings():
warnings.filterwarnings(
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/base/reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def check_reduce(self, s, op_name, skipna):
expected = exp_op(skipna=skipna)
tm.assert_almost_equal(result, expected)

def _get_expected_reduction_dtype(self, arr, op_name: str):
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
# Find the expected dtype when the given reduction is done on a DataFrame
# column with this array. The default assumes float64-like behavior,
# i.e. retains the dtype.
Expand All @@ -58,7 +58,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):

kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}

cmp_dtype = self._get_expected_reduction_dtype(arr, op_name)
cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna)

# The DataFrame method just calls arr._reduce with keepdims=True,
# so this first check is perfunctory.
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ def test_reduce_series_boolean(

return super().test_reduce_series_boolean(data, all_boolean_reductions, skipna)

def _get_expected_reduction_dtype(self, arr, op_name: str):
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
if op_name in ["max", "min"]:
cmp_dtype = arr.dtype
elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":
Expand Down
29 changes: 23 additions & 6 deletions pandas/tests/extension/test_masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
IS64,
is_platform_windows,
)
from pandas.compat.numpy import np_version_gt2

import pandas as pd
import pandas._testing as tm
Expand All @@ -40,7 +41,7 @@
)
from pandas.tests.extension import base

is_windows_or_32bit = is_platform_windows() or not IS64
is_windows_or_32bit = (is_platform_windows() and not np_version_gt2) or not IS64

pytestmark = [
pytest.mark.filterwarnings(
Expand Down Expand Up @@ -325,7 +326,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
expected = pd.NA
tm.assert_almost_equal(result, expected)

def _get_expected_reduction_dtype(self, arr, op_name: str):
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
if tm.is_float_dtype(arr.dtype):
cmp_dtype = arr.dtype.name
elif op_name in ["mean", "median", "var", "std", "skew"]:
Expand All @@ -335,16 +336,32 @@ def _get_expected_reduction_dtype(self, arr, op_name: str):
elif arr.dtype in ["Int64", "UInt64"]:
cmp_dtype = arr.dtype.name
elif tm.is_signed_integer_dtype(arr.dtype):
cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
# TODO: Why does Window Numpy 2.0 dtype depend on skipna?
cmp_dtype = (
"Int32"
if (is_platform_windows() and (not np_version_gt2 or not skipna))
or not IS64
else "Int64"
)
elif tm.is_unsigned_integer_dtype(arr.dtype):
cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64"
cmp_dtype = (
"UInt32"
if (is_platform_windows() and (not np_version_gt2 or not skipna))
or not IS64
else "UInt64"
)
elif arr.dtype.kind == "b":
if op_name in ["mean", "median", "var", "std", "skew"]:
cmp_dtype = "Float64"
elif op_name in ["min", "max"]:
cmp_dtype = "boolean"
elif op_name in ["sum", "prod"]:
cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
cmp_dtype = (
"Int32"
if (is_platform_windows() and (not np_version_gt2 or not skipna))
or not IS64
else "Int64"
)
else:
raise TypeError("not supposed to reach this")
else:
Expand All @@ -360,7 +377,7 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
# overwrite to ensure pd.NA is tested instead of np.nan
# https://github.com/pandas-dev/pandas/issues/30958
length = 64
if not IS64 or is_platform_windows():
if is_windows_or_32bit:
# Item "ExtensionDtype" of "Union[dtype[Any], ExtensionDtype]" has
# no attribute "itemsize"
if not ser.dtype.itemsize == 8: # type: ignore[union-attr]
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
IS64,
is_platform_windows,
)
from pandas.compat.numpy import np_version_gt2
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -131,7 +132,7 @@ class TestDataFrameSelectReindex:
# test_indexing

@pytest.mark.xfail(
not IS64 or is_platform_windows(),
not IS64 or (is_platform_windows() and not np_version_gt2),
reason="Passes int32 values to DatetimeArray in make_na_array on "
"windows, 32bit linux builds",
)
Expand Down
14 changes: 9 additions & 5 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
IS64,
is_platform_windows,
)
from pandas.compat.numpy import np_version_gt2
import pandas.util._test_decorators as td

import pandas as pd
Expand All @@ -32,6 +33,7 @@
nanops,
)

is_windows_np2_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64
is_windows_or_is32 = is_platform_windows() or not IS64


Expand Down Expand Up @@ -1766,13 +1768,13 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype):
@pytest.mark.parametrize(
"opname, dtype, exp_value, exp_dtype",
[
("sum", "Int8", 0, ("Int32" if is_windows_or_is32 else "Int64")),
("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
("sum", "Int8", 0, ("Int32" if is_windows_np2_or_is32 else "Int64")),
("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")),
("sum", "Int64", 0, "Int64"),
("prod", "Int64", 1, "Int64"),
("sum", "UInt8", 0, ("UInt32" if is_windows_or_is32 else "UInt64")),
("prod", "UInt8", 1, ("UInt32" if is_windows_or_is32 else "UInt64")),
("sum", "UInt8", 0, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
("prod", "UInt8", 1, ("UInt32" if is_windows_np2_or_is32 else "UInt64")),
("sum", "UInt64", 0, "UInt64"),
("prod", "UInt64", 1, "UInt64"),
("sum", "Float32", 0, "Float32"),
Expand All @@ -1787,6 +1789,8 @@ def test_df_empty_nullable_min_count_0(self, opname, dtype, exp_value, exp_dtype
expected = Series([exp_value, exp_value], dtype=exp_dtype)
tm.assert_series_equal(result, expected)

# TODO: why does min_count=1 impact the resulting Windows dtype
# differently than min_count=0?
@pytest.mark.parametrize(
"opname, dtype, exp_dtype",
[
Expand Down

0 comments on commit 34d8715

Please sign in to comment.