TEST-#7049: Add some sanity tests with pyarrow-backed pandas datafram…

…es (#7199) Co-authored-by: Iaroslav Igoshev <[email protected]> Signed-off-by: Anatoly Myachev <[email protected]>
modin-project · Apr 22, 2024 · 3abd961 · 3abd961
1 parent 5eb3a1d
commit 3abd961
Show file tree

Hide file tree

Showing 3 changed files with 75 additions and 0 deletions.
diff --git a/modin/core/dataframe/algebra/binary.py b/modin/core/dataframe/algebra/binary.py
@@ -250,6 +250,8 @@ def try_compute_new_dtypes(
 
     try:
         if infer_dtypes == "bool" or is_bool_dtype(result_dtype):
+            # FIXME: https://github.com/modin-project/modin/issues/7203
+            # can be `pandas.api.types.pandas_dtype("bool[pyarrow]")` depending on the data
             dtypes = maybe_build_dtypes_series(
                 first, second, dtype=pandas.api.types.pandas_dtype(bool)
             )

diff --git a/modin/tests/pandas/dataframe/test_map_metadata.py b/modin/tests/pandas/dataframe/test_map_metadata.py
@@ -11,6 +11,8 @@
 # ANY KIND, either express or implied. See the License for the specific language
 # governing permissions and limitations under the License.
 
+from decimal import Decimal
+
 import matplotlib
 import numpy as np
 import pandas
@@ -1797,6 +1799,13 @@ def test_constructor(data):
     df_equals(pandas_df, modin_df)
 
 
+def test_pyarrow_constructor():
+    pa = pytest.importorskip("pyarrow")
+
+    data = [[Decimal("3.19"), None], [None, Decimal("-1.23")]]
+    df_equals(*create_test_dfs(data, dtype=pd.ArrowDtype(pa.decimal128(3, scale=2))))
+
+
 @pytest.mark.parametrize(
     "data",
     [

diff --git a/modin/tests/pandas/test_series.py b/modin/tests/pandas/test_series.py
@@ -1387,6 +1387,70 @@ def test_constructor_arrow_extension_array():
     df_equals(md_ser.dtypes, pd_ser.dtypes)
 
 
+def test_pyarrow_backed_constructor():
+    pa = pytest.importorskip("pyarrow")
+    data = list("abcd")
+    df_equals(*create_test_series(data, dtype="string[pyarrow]"))
+    df_equals(*create_test_series(data, dtype=pd.ArrowDtype(pa.string())))
+
+    data = [["hello"], ["there"]]
+    list_str_type = pa.list_(pa.string())
+    df_equals(*create_test_series(data, dtype=pd.ArrowDtype(list_str_type)))
+
+
+def test_pyarrow_backed_functions():
+    pytest.importorskip("pyarrow")
+    modin_series, pandas_series = create_test_series(
+        [-1.545, 0.211, None], dtype="float32[pyarrow]"
+    )
+    df_equals(modin_series.mean(), pandas_series.mean())
+
+    def comparator(df1, df2):
+        df_equals(df1, df2)
+        df_equals(df1.dtypes, df2.dtypes)
+
+    if StorageFormat.get() != "Hdk":
+        # FIXME: HDK should also work in this case
+        eval_general(
+            modin_series,
+            pandas_series,
+            lambda ser: ser
+            + (modin_series if isinstance(ser, pd.Series) else pandas_series),
+            comparator=comparator,
+        )
+
+    # FIXME: https://github.com/modin-project/modin/issues/7203
+    # eval_general(
+    #    modin_series,
+    #    pandas_series,
+    #    lambda ser: ser > (ser + 1),
+    #    comparator=comparator,
+    # )
+
+    eval_general(
+        modin_series,
+        pandas_series,
+        lambda ser: ser.dropna(),
+        comparator=comparator,
+    )
+
+    eval_general(
+        modin_series,
+        pandas_series,
+        lambda ser: ser.isna(),
+        comparator=comparator,
+    )
+
+    if StorageFormat.get() != "Hdk":
+        # FIXME: HDK should also work in this case
+        eval_general(
+            modin_series,
+            pandas_series,
+            lambda ser: ser.fillna(0),
+            comparator=comparator,
+        )
+
+
 def test_pyarrow_array_retrieve():
     pa = pytest.importorskip("pyarrow")
     modin_series, pandas_series = create_test_series(