Skip to content

Commit

Permalink
TEST-#7049: Add some sanity tests with pyarrow-backed pandas datafram…
Browse files Browse the repository at this point in the history
…es (#7199)

Co-authored-by: Iaroslav Igoshev <[email protected]>
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev and YarShev authored Apr 22, 2024
1 parent 5eb3a1d commit 3abd961
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 0 deletions.
2 changes: 2 additions & 0 deletions modin/core/dataframe/algebra/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ def try_compute_new_dtypes(

try:
if infer_dtypes == "bool" or is_bool_dtype(result_dtype):
# FIXME: https://github.com/modin-project/modin/issues/7203
# can be `pandas.api.types.pandas_dtype("bool[pyarrow]")` depending on the data
dtypes = maybe_build_dtypes_series(
first, second, dtype=pandas.api.types.pandas_dtype(bool)
)
Expand Down
9 changes: 9 additions & 0 deletions modin/tests/pandas/dataframe/test_map_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from decimal import Decimal

import matplotlib
import numpy as np
import pandas
Expand Down Expand Up @@ -1797,6 +1799,13 @@ def test_constructor(data):
df_equals(pandas_df, modin_df)


def test_pyarrow_constructor():
pa = pytest.importorskip("pyarrow")

data = [[Decimal("3.19"), None], [None, Decimal("-1.23")]]
df_equals(*create_test_dfs(data, dtype=pd.ArrowDtype(pa.decimal128(3, scale=2))))


@pytest.mark.parametrize(
"data",
[
Expand Down
64 changes: 64 additions & 0 deletions modin/tests/pandas/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,6 +1387,70 @@ def test_constructor_arrow_extension_array():
df_equals(md_ser.dtypes, pd_ser.dtypes)


def test_pyarrow_backed_constructor():
pa = pytest.importorskip("pyarrow")
data = list("abcd")
df_equals(*create_test_series(data, dtype="string[pyarrow]"))
df_equals(*create_test_series(data, dtype=pd.ArrowDtype(pa.string())))

data = [["hello"], ["there"]]
list_str_type = pa.list_(pa.string())
df_equals(*create_test_series(data, dtype=pd.ArrowDtype(list_str_type)))


def test_pyarrow_backed_functions():
pytest.importorskip("pyarrow")
modin_series, pandas_series = create_test_series(
[-1.545, 0.211, None], dtype="float32[pyarrow]"
)
df_equals(modin_series.mean(), pandas_series.mean())

def comparator(df1, df2):
df_equals(df1, df2)
df_equals(df1.dtypes, df2.dtypes)

if StorageFormat.get() != "Hdk":
# FIXME: HDK should also work in this case
eval_general(
modin_series,
pandas_series,
lambda ser: ser
+ (modin_series if isinstance(ser, pd.Series) else pandas_series),
comparator=comparator,
)

# FIXME: https://github.com/modin-project/modin/issues/7203
# eval_general(
# modin_series,
# pandas_series,
# lambda ser: ser > (ser + 1),
# comparator=comparator,
# )

eval_general(
modin_series,
pandas_series,
lambda ser: ser.dropna(),
comparator=comparator,
)

eval_general(
modin_series,
pandas_series,
lambda ser: ser.isna(),
comparator=comparator,
)

if StorageFormat.get() != "Hdk":
# FIXME: HDK should also work in this case
eval_general(
modin_series,
pandas_series,
lambda ser: ser.fillna(0),
comparator=comparator,
)


def test_pyarrow_array_retrieve():
pa = pytest.importorskip("pyarrow")
modin_series, pandas_series = create_test_series(
Expand Down

0 comments on commit 3abd961

Please sign in to comment.