From ca919c7ba9ecfb40b84bf0eb820f559714970f9a Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 13 Feb 2024 14:32:54 -0500 Subject: [PATCH 1/7] rolling statistics --- tests/test_generic.py | 35 +++++++++++++++++++++++++++ xclim/indices/generic.py | 52 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/tests/test_generic.py b/tests/test_generic.py index c471ccab9..292a097ba 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -31,6 +31,41 @@ def test_season(self, q_series): assert o[0] == 31 + 29 +class TestSelectRollingResampleOp: + def test_rollingmax(self, q_series): + q = q_series(np.arange(1, 366 + 365 + 365 + 1)) # 1st year is leap + o = generic.select_rolling_resample_op( + q, "max", window=14, window_center=False, window_op="mean" + ) + np.testing.assert_array_equal( + [ + np.mean(np.arange(353, 366 + 1)), + np.mean(np.arange(353 + 365, 366 + 365 + 1)), + np.mean(np.arange(353 + 365 * 2, 366 + 365 * 2 + 1)), + ], + o.values, + ) + + def test_rollingmaxindexer(self, q_series): + q = q_series(np.arange(1, 366 + 365 + 365 + 1)) # 1st year is leap + o = generic.select_rolling_resample_op( + q, "min", window=14, window_center=False, window_op="max", season="DJF" + ) + np.testing.assert_array_equal( + [14, 367, 367 + 365], o.values + ) # 14th day for 1st year, then Jan 1st for the next two + + def test_freq(self, q_series): + q = q_series(np.arange(1, 366 + 365 + 365 + 1)) # 1st year is leap + o = generic.select_rolling_resample_op( + q, "max", window=3, window_center=True, window_op="sum", freq="MS" + ) + np.testing.assert_array_equal( + [np.sum([30, 31, 32]), np.sum([30 + 29, 31 + 29, 32 + 29])], + o.isel(time=slice(0, 2)).values, + ) + + class TestThresholdCount: def test_simple(self, tas_series): ts = tas_series(np.arange(365)) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 1190e935a..257a7dabb 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -104,6 +104,58 @@ def select_resample_op( return to_agg_units(out, da, op) +def select_rolling_resample_op( + da: xr.DataArray, + op: str, + window: int, + window_center: bool = True, + window_op: str = "mean", + freq: str = "YS", + out_units=None, + **indexer, +) -> xr.DataArray: + """Apply operation over each period that is part of the index selection, using a rolling window before the operation. + + Parameters + ---------- + da : xr.DataArray + Input data. + op : str {'min', 'max', 'mean', 'std', 'var', 'count', 'sum', 'integral', 'argmax', 'argmin'} or func + Reduce operation. Can either be a DataArray method or a function that can be applied to a DataArray. + window : int + Size of the rolling window (centered). + window_center : bool + If True, the window is centered on the date. If False, the window is right-aligned. + window_op : str {'min', 'max', 'mean', 'std', 'var', 'count', 'sum', 'integral'} + Operation to apply to the rolling window. Default: 'mean'. + freq : str + Resampling frequency defining the periods as defined in :ref:`timeseries.resampling`. Applied after the rolling window. + out_units : str, optional + Output units to assign. Only necessary if `op` is function not supported by :py:func:`xclim.core.units.to_agg_units`. + indexer : {dim: indexer, }, optional + Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values, + month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, all values are + considered. + + Returns + ------- + xr.DataArray + The array for which the operation has been applied over each period. + """ + rolled = getattr(da.rolling(time=window, center=window_center), window_op)() + rolled = select_time(rolled, **indexer) + r = rolled.resample(time=freq) + if isinstance(op, str): + out = getattr(r, op.replace("integral", "sum"))(dim="time", keep_attrs=True) + else: + with xr.set_options(keep_attrs=True): + out = r.map(op) + op = op.__name__ + if out_units is not None: + return out.assign_attrs(units=out_units) + return to_agg_units(out, da, op) + + def doymax(da: xr.DataArray) -> xr.DataArray: """Return the day of year of the maximum value.""" i = da.argmax(dim="time") From d2b06644574ec765af312e5cbed52015d62267c9 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 13 Feb 2024 14:37:08 -0500 Subject: [PATCH 2/7] changes.rst --- CHANGES.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index f86f97581..3fb0565d3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Changelog v0.48 (unreleased) ------------------ -Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`), Dante Castro (:user:`profesorpaiche`). +Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`), Dante Castro (:user:`profesorpaiche`), Gabriel Rondeau-Genesse (:user:`RondeauG`). Announcements ^^^^^^^^^^^^^ @@ -22,6 +22,7 @@ New features and enhancements * New ``xclim.core.calendar.stack_periods`` and ``unstack_periods`` for performing ``rolling(time=...).construct(..., stride=...)`` but with non-uniform temporal periods like years or months. They replace ``xclim.sdba.processing.construct_moving_yearly_window`` and ``unpack_moving_yearly_window`` which are deprecated and will be removed in a future release. * New ``as_dataset`` options for ``xclim.set_options``. When True, indicators will output Datasets instead of DataArrays. (:issue:`1257`, :pull:`1625`). * Added new option for UTCI calculation to cap low wind velocities to a minimum of 0.5 m/s following Bröde (2012) guidelines. (:issue:`1634`, :pull:`1635`). +* Added a new `xclim.indices.generic.select_rolling_resample_op` function to allow for computing rolling statistics. (:issue:`1480`, :pull:`1643`). Breaking changes ^^^^^^^^^^^^^^^^ From d68b6421dc8b520b139c22c951106a900ee97b88 Mon Sep 17 00:00:00 2001 From: RondeauG <38501935+RondeauG@users.noreply.github.com> Date: Tue, 13 Feb 2024 15:55:46 -0500 Subject: [PATCH 3/7] Update xclim/indices/generic.py Co-authored-by: Pascal Bourgault --- xclim/indices/generic.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 257a7dabb..eb5b922da 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -143,17 +143,8 @@ def select_rolling_resample_op( The array for which the operation has been applied over each period. """ rolled = getattr(da.rolling(time=window, center=window_center), window_op)() - rolled = select_time(rolled, **indexer) - r = rolled.resample(time=freq) - if isinstance(op, str): - out = getattr(r, op.replace("integral", "sum"))(dim="time", keep_attrs=True) - else: - with xr.set_options(keep_attrs=True): - out = r.map(op) - op = op.__name__ - if out_units is not None: - return out.assign_attrs(units=out_units) - return to_agg_units(out, da, op) + rolled = to_agg_units(rolled, da, window_op) + return select_resample_op(rolled, op=op, freq=freq, out_units=out_units, **indexer) def doymax(da: xr.DataArray) -> xr.DataArray: From 506162260f0ad9c406b91fcc4c6b0cad8739da5c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 Feb 2024 20:56:40 +0000 Subject: [PATCH 4/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xclim/indices/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index eb5b922da..ee3c7cd69 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -143,7 +143,7 @@ def select_rolling_resample_op( The array for which the operation has been applied over each period. """ rolled = getattr(da.rolling(time=window, center=window_center), window_op)() - rolled = to_agg_units(rolled, da, window_op) + rolled = to_agg_units(rolled, da, window_op) return select_resample_op(rolled, op=op, freq=freq, out_units=out_units, **indexer) From 63654a5f96286b08dd0503f6356f19dff8c34623 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 13 Feb 2024 16:48:54 -0500 Subject: [PATCH 5/7] tests some units --- tests/test_generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_generic.py b/tests/test_generic.py index 292a097ba..86c225202 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -45,6 +45,7 @@ def test_rollingmax(self, q_series): ], o.values, ) + assert o.attrs["units"] == "m3 s-1" def test_rollingmaxindexer(self, q_series): q = q_series(np.arange(1, 366 + 365 + 365 + 1)) # 1st year is leap @@ -54,6 +55,7 @@ def test_rollingmaxindexer(self, q_series): np.testing.assert_array_equal( [14, 367, 367 + 365], o.values ) # 14th day for 1st year, then Jan 1st for the next two + assert o.attrs["units"] == "m3 s-1" def test_freq(self, q_series): q = q_series(np.arange(1, 366 + 365 + 365 + 1)) # 1st year is leap From 1d359f4e1a9570584552acd971644cfb55db97f2 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 15 Feb 2024 15:34:35 -0500 Subject: [PATCH 6/7] Fix tests --- tests/test_generic.py | 3 ++- xclim/indices/generic.py | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index 86c225202..777e80d75 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -60,12 +60,13 @@ def test_rollingmaxindexer(self, q_series): def test_freq(self, q_series): q = q_series(np.arange(1, 366 + 365 + 365 + 1)) # 1st year is leap o = generic.select_rolling_resample_op( - q, "max", window=3, window_center=True, window_op="sum", freq="MS" + q, "max", window=3, window_center=True, window_op="integral", freq="MS" ) np.testing.assert_array_equal( [np.sum([30, 31, 32]), np.sum([30 + 29, 31 + 29, 32 + 29])], o.isel(time=slice(0, 2)).values, ) + assert o.attrs["units"] == "m3" class TestThresholdCount: diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index ee3c7cd69..8d92d08c1 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -142,7 +142,10 @@ def select_rolling_resample_op( xr.DataArray The array for which the operation has been applied over each period. """ - rolled = getattr(da.rolling(time=window, center=window_center), window_op)() + rolled = getattr( + da.rolling(time=window, center=window_center), + window_op.replace("integral", "sum"), + )() rolled = to_agg_units(rolled, da, window_op) return select_resample_op(rolled, op=op, freq=freq, out_units=out_units, **indexer) From 1f8b76f56d8b37c413bf7978584328dd9bc93239 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Fri, 16 Feb 2024 13:10:49 -0500 Subject: [PATCH 7/7] fix test --- tests/test_generic.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index 777e80d75..7ec0771cd 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -63,7 +63,10 @@ def test_freq(self, q_series): q, "max", window=3, window_center=True, window_op="integral", freq="MS" ) np.testing.assert_array_equal( - [np.sum([30, 31, 32]), np.sum([30 + 29, 31 + 29, 32 + 29])], + [ + np.sum([30, 31, 32]) * 86400, + np.sum([30 + 29, 31 + 29, 32 + 29]) * 86400, + ], # m3 s-1 being summed by the frequency of the data o.isel(time=slice(0, 2)).values, ) assert o.attrs["units"] == "m3"