From 13550dcb620ddd8f76cb2292c6f2b6855344f6e9 Mon Sep 17 00:00:00 2001 From: bixbyr Date: Wed, 31 Jan 2024 20:55:00 -0800 Subject: [PATCH 1/2] Fix several deprecation warnings in pandas 2.1 which became actual errors in 2.2. Also updated several test cases to use iloc when comparing results. --- AUTHORS.rst | 1 + django_pandas/io.py | 2 +- django_pandas/tests/test_manager.py | 15 +++++++-------- django_pandas/utils.py | 3 +-- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index a8590bf..350c50e 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -29,3 +29,4 @@ Contributions - `Anton Ian Sipos `_ - `Chuan-Jhe Hwong `_ - `Thomas Grainger `_ +- `Ryan Smith `_ diff --git a/django_pandas/io.py b/django_pandas/io.py index 1859bfa..1bca871 100644 --- a/django_pandas/io.py +++ b/django_pandas/io.py @@ -84,7 +84,7 @@ def read_frame(qs, fieldnames=(), index_col=None, coerce_float=False, """ if fieldnames: - fieldnames = pd.unique(fieldnames) + fieldnames = pd.unique(pd.Series(fieldnames)) if index_col is not None and index_col not in fieldnames: # Add it to the field names if not already there fieldnames = tuple(fieldnames) + (index_col,) diff --git a/django_pandas/tests/test_manager.py b/django_pandas/tests/test_manager.py index ce070ae..f867209 100644 --- a/django_pandas/tests/test_manager.py +++ b/django_pandas/tests/test_manager.py @@ -87,9 +87,9 @@ def setUp(self): col4=cols['col4'])) WideTimeSeriesDateField.objects.bulk_create(create_list) - create_list = [LongTimeSeries(date_ix=r[0], series_name=r[1][0], - value=r[1][1]) - for r in self.ts2.iterrows()] + create_list = [LongTimeSeries(date_ix=timestamp, series_name=s.iloc[0], + value=s.iloc[1]) + for timestamp, s in self.ts2.iterrows()] LongTimeSeries.objects.bulk_create(create_list) @@ -222,11 +222,10 @@ def setUp(self): 'value_col_d': np.random.randn(11), 'value_col_e': np.random.randn(11), 'value_col_f': np.random.randn(11)}) - - create_list = [PivotData(row_col_a=r[1][0], row_col_b=r[1][1], - row_col_c=r[1][2], value_col_d=r[1][3], - value_col_e=r[1][4], value_col_f=r[1][5]) - for r in self.data.iterrows()] + create_list = [PivotData(row_col_a=r.iloc[0], row_col_b=r.iloc[1], + row_col_c=r.iloc[2], value_col_d=r.iloc[3], + value_col_e=r.iloc[4], value_col_f=r.iloc[5]) + for _, r in self.data.iterrows()] PivotData.objects.bulk_create(create_list) diff --git a/django_pandas/utils.py b/django_pandas/utils.py index 935ca24..0778f9d 100644 --- a/django_pandas/utils.py +++ b/django_pandas/utils.py @@ -48,8 +48,7 @@ def get_cache_key_from_pk(pk): def inner(pk_series): pk_series = pk_series.astype(object).where(pk_series.notnull(), None) - cache_keys = pk_series.apply( - get_cache_key_from_pk, convert_dtype=False) + cache_keys = pk_series.apply(get_cache_key_from_pk) unique_cache_keys = list(filter(None, cache_keys.unique())) if not unique_cache_keys: From 9caf00cfc449fc9a7a61b601933977e31393438a Mon Sep 17 00:00:00 2001 From: bixbyr Date: Wed, 6 Mar 2024 20:23:52 -0800 Subject: [PATCH 2/2] Fix tests which broke in pandas 2.2.0 because makeTimeDataFrame was a non-public method which was removed. This was done by just taking the parts we needed. Also fixed a couple formatting issues which violate python style guides. --- django_pandas/io.py | 2 +- django_pandas/managers.py | 6 ++-- django_pandas/tests/test_manager.py | 43 +++++++++++++++++++++++++---- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/django_pandas/io.py b/django_pandas/io.py index 1bca871..35a67ab 100644 --- a/django_pandas/io.py +++ b/django_pandas/io.py @@ -151,7 +151,7 @@ def read_frame(qs, fieldnames=(), index_col=None, coerce_float=False, df.set_index(index_col, inplace=True) if datetime_index: - df.index = pd.to_datetime(df.index, errors="ignore") + df.index = pd.to_datetime(df.index) return df diff --git a/django_pandas/managers.py b/django_pandas/managers.py index f910439..48738ed 100644 --- a/django_pandas/managers.py +++ b/django_pandas/managers.py @@ -241,9 +241,9 @@ def to_timeseries(self, fieldnames=(), verbose=True, if freq is not None: if agg_kwargs is None: - agg_kwargs=dict() + agg_kwargs = dict() if agg_args is None: - agg_args=[] + agg_args = [] df = df.resample(freq, **rs_kwargs).agg(*agg_args, **agg_kwargs) return df @@ -253,7 +253,7 @@ def to_dataframe(self, fieldnames=(), verbose=True, index=None, """ Returns a DataFrame from the queryset - Paramaters + Parameters ----------- fieldnames: The model field names(columns) to utilise in creating diff --git a/django_pandas/tests/test_manager.py b/django_pandas/tests/test_manager.py index f867209..ba7fb2d 100644 --- a/django_pandas/tests/test_manager.py +++ b/django_pandas/tests/test_manager.py @@ -1,8 +1,12 @@ +from datetime import datetime + from django.test import TestCase import pandas as pd import numpy as np import pickle import django +from pandas.core.indexes.datetimes import bdate_range + from .models import ( DataFrame, WideTimeSeries, WideTimeSeriesDateField, LongTimeSeries, PivotData, Dude, Car, Spot @@ -68,8 +72,28 @@ def unpivot(self, frame): 'date': np.tile(np.array(frame.index), K)} return pd.DataFrame(data, columns=['date', 'variable', 'value']) + def _makeTimeDataFrame(self, n_rows: int) -> pd.DataFrame: + # Beginning in 2.2 pandas._testing.makeTimeDataFrame was removed, however all that is required for the tests + # in this module is a dataframe with columns A, B, C, D of random values indexed by a DatetimeIndex. + data = {} + for c in ['A', 'B', 'C', 'D']: + dt = datetime(2000, 1, 1) + dr = bdate_range(dt, periods=n_rows, freq='B', name=c) + pd.DatetimeIndex(dr, name=c) + + data[c] = pd.Series( + np.random.default_rng(2).standard_normal(n_rows), + index=pd.DatetimeIndex(dr, name=c), + name=c, + ) + return pd.DataFrame(data) + def setUp(self): - self.ts = tm.makeTimeDataFrame(100) + if PANDAS_VERSIONINFO >= '2.2.0': + self.ts = self._makeTimeDataFrame(100) + else: + self.ts = tm.makeTimeDataFrame(100) + self.ts2 = self.unpivot(self.ts).set_index('date') self.ts.columns = ['col1', 'col2', 'col3', 'col4'] create_list = [] @@ -125,18 +149,24 @@ def test_longstorage(self): def test_resampling(self): qs = LongTimeSeries.objects.all() - rs_kwargs = {'kind': 'period'} agg_args = None agg_kwargs = None if PANDAS_VERSIONINFO >= '0.25.0': agg_kwargs = {'func': 'sum'} else: - agg_args= ['sum'] + agg_args = ['sum'] + + if PANDAS_VERSIONINFO >= '2.2.0': + freq = 'ME' + else: + freq = 'M' + df = qs.to_timeseries(index='date_ix', pivot_columns='series_name', values='value', storage='long', - freq='M', rs_kwargs=rs_kwargs, + freq=freq, agg_args=agg_args, agg_kwargs=agg_kwargs) + df.index = pd.PeriodIndex(df.index) self.assertEqual([d.month for d in qs.dates('date_ix', 'month')], df.index.month.tolist()) @@ -147,9 +177,10 @@ def test_resampling(self): qs2 = WideTimeSeries.objects.all() df1 = qs2.to_timeseries(index='date_ix', storage='wide', - freq='M', rs_kwargs=rs_kwargs, + freq=freq, agg_args=agg_args, - agg_kwargs = agg_kwargs) + agg_kwargs=agg_kwargs) + df1.index = pd.PeriodIndex(df1.index) self.assertEqual([d.month for d in qs.dates('date_ix', 'month')], df1.index.month.tolist())