Skip to content

Commit

Permalink
Backport PR pandas-dev#57089: BUG: wide_to_long with string columns
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored and meeseeksmachine committed Jan 28, 2024
1 parent 1550858 commit a67defa
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Fixed regressions
- Fixed memory leak in :func:`read_csv` (:issue:`57039`)
- Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`)
- Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
- Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`)
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
- Fixed regression in :meth:`Series.pct_change` raising a ``ValueError`` for an empty :class:`Series` (:issue:`57056`)
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,8 +458,7 @@ def wide_to_long(

def get_var_names(df, stub: str, sep: str, suffix: str):
regex = rf"^{re.escape(stub)}{re.escape(sep)}{suffix}$"
pattern = re.compile(regex)
return df.columns[df.columns.str.match(pattern)]
return df.columns[df.columns.str.match(regex)]

def melt_stub(df, stub: str, i, j, value_vars, sep: str):
newdf = melt(
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,14 +1336,14 @@ def contains(
return self._wrap_result(result, fill_value=na, returns_string=False)

@forbid_nonstring_types(["bytes"])
def match(self, pat, case: bool = True, flags: int = 0, na=None):
def match(self, pat: str, case: bool = True, flags: int = 0, na=None):
"""
Determine if each string starts with a match of a regular expression.
Parameters
----------
pat : str
Character sequence or regular expression.
Character sequence.
case : bool, default True
If True, case sensitive.
flags : int, default 0 (no flags)
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/reshape/test_melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1220,3 +1220,33 @@ def test_missing_stubname(self, dtype):
new_level = expected.index.levels[0].astype(dtype)
expected.index = expected.index.set_levels(new_level, level=0)
tm.assert_frame_equal(result, expected)


def test_wide_to_long_pyarrow_string_columns():
# GH 57066
pytest.importorskip("pyarrow")
df = DataFrame(
{
"ID": {0: 1},
"R_test1": {0: 1},
"R_test2": {0: 1},
"R_test3": {0: 2},
"D": {0: 1},
}
)
df.columns = df.columns.astype("string[pyarrow_numpy]")
result = wide_to_long(
df, stubnames="R", i="ID", j="UNPIVOTED", sep="_", suffix=".*"
)
expected = DataFrame(
[[1, 1], [1, 1], [1, 2]],
columns=Index(["D", "R"], dtype=object),
index=pd.MultiIndex.from_arrays(
[
[1, 1, 1],
Index(["test1", "test2", "test3"], dtype="string[pyarrow_numpy]"),
],
names=["ID", "UNPIVOTED"],
),
)
tm.assert_frame_equal(result, expected)

0 comments on commit a67defa

Please sign in to comment.