Skip to content

Commit

Permalink
OrangeDataFrame: Fix patched constructor
Browse files Browse the repository at this point in the history
This is a proper solution that would work before and would continue
to work in pandas>=3 -- if it was not for a bug in pandas
(pandas-dev/pandas#59913).

Hence, this commit also (dynamically) patches the bug in pandas.
  • Loading branch information
janezd committed Oct 4, 2024
1 parent 140aef4 commit 5b3b6bb
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 16 deletions.
38 changes: 22 additions & 16 deletions Orange/data/pandas_compat.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Pandas DataFrame↔Table conversion helpers"""
from unittest.mock import patch
from functools import partial

import numpy as np
from scipy import sparse as sp
from scipy.sparse import csr_matrix
import pandas as pd
from pandas.core.arrays import SparseArray
import pandas.core.arrays.sparse.accessor
from pandas.api.types import (
is_object_dtype,
is_datetime64_any_dtype,
Expand All @@ -22,6 +23,19 @@
__all__ = ['table_from_frame', 'table_to_frame']


# Patch a bug in pandas SparseFrameAccessor.to_dense
# As of pandas=3.0.0.dev0+1524.g23c497bb2f, to_dense ignores _constructor
# and alwats returns DataFrame.
# Remove this if https://github.com/pandas-dev/pandas/issues/59913 is fixed
def to_dense(self):
# pylint: disable=protected-access
data = {k: v.array.to_dense() for k, v in self._parent.items()}
constr = self._parent._constructor
return constr(data, index=self._parent.index, columns=self._parent.columns)

pandas.core.arrays.sparse.accessor.SparseFrameAccessor.to_dense = to_dense


class OrangeDataFrame(pd.DataFrame):
_metadata = ["orange_variables", "orange_weights",
"orange_attributes", "orange_role"]
Expand Down Expand Up @@ -74,8 +88,6 @@ def __init__(self, *args, **kwargs):
data = dict(enumerate(sparrays))
super().__init__(data, index=index, **kwargs)
self.columns = columns
# a hack to keep Orange df _metadata in sparse->dense conversion
self.sparse.to_dense = self.__patch_constructor(self.sparse.to_dense)
else:
copy = kwargs.pop("copy", False)
super().__init__(
Expand All @@ -88,21 +100,15 @@ def __init__(self, *args, **kwargs):
if table.W.size > 0 else {})
self.orange_attributes = table.attributes

def __patch_constructor(self, method):
def new_method(*args, **kwargs):
with patch(
'pandas.DataFrame',
OrangeDataFrame
):
df = method(*args, **kwargs)
df.__finalize__(self)
return df

return new_method

@property
def _constructor(self):
return OrangeDataFrame
return partial(self.from_existing, self)

@staticmethod
def from_existing(existing, *args, **kwargs):
self = type(existing)(*args, **kwargs)
self.__finalize__(existing)
return self

def to_orange_table(self):
return table_from_frame(self)
Expand Down
5 changes: 5 additions & 0 deletions Orange/data/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
pytz = None

class TestPandasCompat(unittest.TestCase):
def test_patch_for_to_dense(self):
if pd.__version__ >= "3" and "dev" not in pd.__version__:
self.fail("Try removing the patch for to_dense in pandas_compat.\n"
"If successful, remove this test.")

def test_table_from_frame(self):
nan = np.nan
df = pd.DataFrame([['a', 1, pd.Timestamp('2017-12-19')],
Expand Down

0 comments on commit 5b3b6bb

Please sign in to comment.