diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ab5746eca1b18..9427d410e700a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -709,6 +709,7 @@ I/O - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`) - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`) - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`) +- Bug in :meth:`read_parquet` raising ``ValueError`` if the multi-index contains a level with bools and if that multi-index is on the columns, then while the parquet can be written with the ``pyarrow`` engine, it cannot be read back in using ``pyarrow``. (:issue:`60508`) Period ^^^^^^ diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 086f7d2da6640..2a1b3a16c9d67 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -125,6 +125,14 @@ def _astype_nansafe( ) raise ValueError(msg) + if arr.dtype == object and dtype == bool: + # If the dtype is bool and the array is object, we need to replace + # the False and True of the object type in the ndarray with the + # bool type to ensure that the type conversion is correct + arr[arr == "False"] = np.False_ + arr[arr == "True"] = np.True_ + return arr.astype(dtype, copy=copy) + if copy or arr.dtype == object or dtype == object: # Explicit copy, or required since NumPy can't view from / to object. return arr.astype(dtype, copy=True) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 7919bb956dc7a..04e9b9906c204 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1468,3 +1468,15 @@ def test_invalid_dtype_backend(self, engine): df.to_parquet(path) with pytest.raises(ValueError, match=msg): read_parquet(path, dtype_backend="numpy") + + def test_bool_multiIndex_roundtrip_through_parquet(self, pa): + # GH 60508 + df = pd.DataFrame( + [[1, 2], [4, 5]], + columns=pd.MultiIndex.from_tuples([(True, 'B'), (False, 'C')]), + ) + with tm.ensure_clean("test.parquet") as path: + df.to_parquet(path, engine=pa) + + result = read_parquet(path, engine=pa) + tm.assert_frame_equal(result, df)