Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into string-dtype-index-…
Browse files Browse the repository at this point in the history
…engine
  • Loading branch information
jorisvandenbossche committed Nov 17, 2024
2 parents 2a4aed2 + 34c080c commit 13fa689
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 15 deletions.
2 changes: 2 additions & 0 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import numpy as np

from pandas._libs import lib
from pandas.util._decorators import set_module
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -149,6 +150,7 @@ def concat(
) -> DataFrame | Series: ...


@set_module("pandas")
def concat(
objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
*,
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ def test_set_module():
assert pd.Period.__module__ == "pandas"
assert pd.Timestamp.__module__ == "pandas"
assert pd.Timedelta.__module__ == "pandas"
assert pd.concat.__module__ == "pandas"
assert pd.isna.__module__ == "pandas"
assert pd.notna.__module__ == "pandas"
assert pd.merge.__module__ == "pandas"
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/io/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas import (
DataFrame,
date_range,
Expand Down Expand Up @@ -176,7 +178,9 @@ def test_excel_options(fsspectest):
assert fsspectest.test[0] == "read"


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
@pytest.mark.xfail(
using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string) fastparquet"
)
def test_to_parquet_new_file(cleared_fs, df1):
"""Regression test for writing to a not-yet-existent GCS Parquet file."""
pytest.importorskip("fastparquet")
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/io/test_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat.pyarrow import pa_version_under17p0

from pandas import (
Expand Down Expand Up @@ -207,7 +205,6 @@ def test_to_csv_compression_encoding_gcs(
tm.assert_frame_equal(df, read_df)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
def test_to_parquet_gcs_new_file(monkeypatch, tmpdir):
"""Regression test for writing to a not-yet-existent GCS Parquet file."""
pytest.importorskip("fastparquet")
Expand Down
45 changes: 34 additions & 11 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1174,9 +1174,17 @@ def test_non_nanosecond_timestamps(self, temp_file):


class TestParquetFastParquet(Base):
@pytest.mark.xfail(reason="datetime_with_nat gets incorrect values")
def test_basic(self, fp, df_full):
def test_basic(self, fp, df_full, request):
pytz = pytest.importorskip("pytz")
import fastparquet

if Version(fastparquet.__version__) < Version("2024.11.0"):
request.applymarker(
pytest.mark.xfail(
reason=("datetime_with_nat gets incorrect values"),
)
)

tz = pytz.timezone("US/Eastern")
df = df_full

Expand Down Expand Up @@ -1213,11 +1221,17 @@ def test_duplicate_columns(self, fp):
msg = "Cannot create parquet dataset with duplicate column names"
self.check_error_on_write(df, fp, ValueError, msg)

@pytest.mark.xfail(
Version(np.__version__) >= Version("2.0.0"),
reason="fastparquet uses np.float_ in numpy2",
)
def test_bool_with_none(self, fp):
def test_bool_with_none(self, fp, request):
import fastparquet

if Version(fastparquet.__version__) < Version("2024.11.0") and Version(
np.__version__
) >= Version("2.0.0"):
request.applymarker(
pytest.mark.xfail(
reason=("fastparquet uses np.float_ in numpy2"),
)
)
df = pd.DataFrame({"a": [True, None, False]})
expected = pd.DataFrame({"a": [1.0, np.nan, 0.0]}, dtype="float16")
# Fastparquet bug in 0.7.1 makes it so that this dtype becomes
Expand Down Expand Up @@ -1331,10 +1345,19 @@ def test_empty_dataframe(self, fp):
expected = df.copy()
check_round_trip(df, fp, expected=expected)

@pytest.mark.xfail(
reason="fastparquet bug, see https://github.com/dask/fastparquet/issues/929"
)
def test_timezone_aware_index(self, fp, timezone_aware_date_list):
def test_timezone_aware_index(self, fp, timezone_aware_date_list, request):
import fastparquet

if Version(fastparquet.__version__) < Version("2024.11.0"):
request.applymarker(
pytest.mark.xfail(
reason=(
"fastparquet bug, see "
"https://github.com/dask/fastparquet/issues/929"
),
)
)

idx = 5 * [timezone_aware_date_list]

df = pd.DataFrame(index=idx, data={"index_as_col": idx})
Expand Down

0 comments on commit 13fa689

Please sign in to comment.