From 48efef8b506f95894f5ccbfbfccb86bd5346b799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Wed, 12 Jun 2024 21:39:52 -0400 Subject: [PATCH 1/5] black ice events --- CHANGES.rst | 11 ++++- tests/test_indices.py | 28 +++++++++++- xclim/core/units.py | 1 + xclim/indices/_threshold.py | 89 ++++++++++++++++++++++++++++++++++++- xclim/indices/run_length.py | 34 +++++++++++--- 5 files changed, 154 insertions(+), 9 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 60120c214..722a7767a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,16 @@ Changelog v0.50.0 (unreleased) -------------------- -Contributors to this version: Trevor James Smith (:user:`Zeitsperre`). +Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Éric Dupuis (:user:`coxipi`). + +New indicators +^^^^^^^^^^^^^^ +* New indicator ``black_ice_events`` gives statistics about black ice sequences. + + +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* ``xclim.indices.run_length.runs_with_holes`` allows to input a condition that must be met for a run to start and a second condition that must be met for the run to stop. Internal changes ^^^^^^^^^^^^^^^^ diff --git a/tests/test_indices.py b/tests/test_indices.py index e2a8ccc47..4e802ac0b 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -23,7 +23,7 @@ from xclim import indices as xci from xclim.core.calendar import convert_calendar, date_range, percentile_doy from xclim.core.options import set_options -from xclim.core.units import ValidationError, convert_units_to, units +from xclim.core.units import ValidationError, convert_units_to, rate2amount, units K2C = 273.15 @@ -338,6 +338,32 @@ def test_bedd(self, method, end_date, deg_days, max_deg_days): if method == "icclim": np.testing.assert_array_equal(bedd, bedd_high_lat) + def test_black_ice_events(self, open_dataset): + times = pd.date_range("1950-01-01", "1950-01-31", freq="D") + da = xr.DataArray( + np.zeros(len(times)), + dims={"time"}, + coords={"time": times}, + attrs={"units": "kg m-2 s-1"}, + ) + + # Two sequences separated by 3 days, which will be split with window_stop = 3 + da[0:10] = 1 + da[13:20] = 1 + out = xci.black_ice_events( + da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 + ) + assert (out.run_lengths == [10, 7]).all() + + # Two sequences separated by 2 days, which will form a single large event + da[10] = 1 + out = xci.black_ice_events( + da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 + ) + pram = rate2amount(da) + assert out.run_lengths == 20 + assert out.cumulative_precipitation == pram.sum() + def test_cool_night_index(self, open_dataset): ds = open_dataset("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200701-200712.nc") ds = ds.rename(dict(tas="tasmin")) diff --git a/xclim/core/units.py b/xclim/core/units.py index 7f432eab7..12de3ac7a 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -409,6 +409,7 @@ def cf_conversion( FREQ_UNITS = { "D": "d", "W": "week", + "h": "h", } """ Resampling frequency units for :py:func:`xclim.core.units.infer_sampling_units`. diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 150c55b04..16b3104fb 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -6,11 +6,12 @@ import numpy as np import xarray -from xclim.core.calendar import doy_from_string, get_calendar, select_time +from xclim.core.calendar import doy_from_string, get_calendar, parse_offset, select_time from xclim.core.missing import at_least_n_valid from xclim.core.units import ( convert_units_to, declare_units, + ensure_cf_units, pint2cfunits, rate2amount, str2pint, @@ -36,6 +37,7 @@ # -------------------------------------------------- # __all__ = [ + "black_ice_events", "calm_days", "cold_spell_days", "cold_spell_frequency", @@ -105,6 +107,91 @@ ] +@declare_units(pr="[precipitation]", thresh="[precipitation]") +def black_ice_events( + pr: xarray.DataArray, + thresh: Quantified = "1 kg m-2 d-1", + window_start: int = 3, + window_stop: int = 3, + freq: str | None = None, +) -> xarray.Dataset: + r"""Black ice events. + + Parameters + ---------- + pr : xarray.DataArray + Black ice precipitation (`prfr`) + thresh : Quantified + Threshold that must be exceeded to be considered an event + window_start: int + Number of time steps above the threshold required to start an event + window_stop : int + Number of time steps below the threshold required to stop an event + freq : str + Resampling frequency. + + Returns + ------- + xarray.DataArray, [time] + Number of days with average near-surface wind speed below threshold. + """ + freq = xarray.infer_freq(pr.time) + mag, units, _, _ = parse_offset(freq) + # condition to respect for `window_start` time steps to start a run + thresh = convert_units_to(thresh, pr) + da_start = pr >= thresh + da_stop = not da_start + + # Get basic blocks to work with, our runs with holes and the lengths of those runs + # Series of ones indicating where we have continuous runs of black ice with pauses + # not exceeding `window_stop` + runs = rl.runs_with_holes(da_start, window_start, da_stop, window_stop) + + # Compute the length of black ice events + # I think int16 is safe enough + ds = rl.rle(runs).to_dataset(name="run_lengths") + ds["run_lengths"] = ds.run_lengths.astype(np.int16) + ds.run_lengths.attrs["units"] = "" + + # Time duration where the precipitation threshold is exceeded during an event + # (duration of complete run - duration of holes in the run ) + ds["precipitation_duration"] = ( + rl._cumsum_reset( + da_start.where(runs == 1), index="first", reset_on_zero=False + ).astype(np.int16) + * mag + ) + ds["precipitation_duration"].attrs["units"] = units + + # Cumulated precipitation in a given black ice event + pram = rate2amount(pr) + ds["cumulative_precipitation"] = rl._cumsum_reset( + pram.where(runs == 1), index="first", reset_on_zero=False + ) + ds["cumulative_precipitation"].attrs["units"] = pram.units + + # Reduce time dim to event dimension + mask = (ds.run_lengths > 0).any(dim=[d for d in ds.dims if d != "time"]) + ds = ds.where(mask).dropna(dim="time").rename({"time": "event"}) + + # start time : with current implementation of time reduction above, + # this is not necessary, but it could be if we choose another way. + # ds["start"] = ds["time"].broadcast_like(ds) + + # Other indices that could be completely done outside of the function, no input needed anymore + ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) + ds.number_of_events.attrs["units"] = "" + + ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] + units = ( + f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" + ) + ds["rate"].attrs["units"] = ensure_cf_units(units) + + ds.attrs["units"] = "" + return ds + + @declare_units(sfcWind="[speed]", thresh="[speed]") def calm_days( sfcWind: xarray.DataArray, thresh: Quantified = "2 m s-1", freq: str = "MS" diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index b3015230a..2917d5150 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -118,10 +118,11 @@ def resample_and_rl( return out -def _cumsum_reset_on_zero( +def _cumsum_reset( da: xr.DataArray, dim: str = "time", index: str = "last", + reset_on_zero: bool = True, ) -> xr.DataArray: """Compute the cumulative sum for each series of numbers separated by zero. @@ -134,6 +135,9 @@ def _cumsum_reset_on_zero( index : {'first', 'last'} If 'first', the largest value of the cumulative sum is indexed with the first element in the run. If 'last'(default), with the last element in the run. + reset_on_zero : bool + If True, the cumulative sum is reset on each zero value of `da`. Otherwise, the cumulative sum resets + on NaNs. Default is True. Returns ------- @@ -145,7 +149,10 @@ def _cumsum_reset_on_zero( # Example: da == 100110111 -> cs_s == 100120123 cs = da.cumsum(dim=dim) # cumulative sum e.g. 111233456 - cs2 = cs.where(da == 0) # keep only numbers at positions of zeroes e.g. N11NN3NNN + cond = da == 0 if reset_on_zero else da.isnull() # reset condition + cs2 = cs.where( + cond + ) # keep only numbers at positions of zeroes e.g. N11NN3NNN (default) cs2[{dim: 0}] = 0 # put a zero in front e.g. 011NN3NNN cs2 = cs2.ffill(dim=dim) # e.g. 011113333 out = cs - cs2 @@ -186,7 +193,7 @@ def rle( da = da[{dim: slice(None, None, -1)}] # Get cumulative sum for each series of 1, e.g. da == 100110111 -> cs_s == 100120123 - cs_s = _cumsum_reset_on_zero(da, dim) + cs_s = _cumsum_reset(da, dim) # Keep total length of each series (and also keep 0's), e.g. 100120123 -> 100N20NN3 # Keep numbers with a 0 to the right and also the last number @@ -495,7 +502,7 @@ def find_boundary_run(runs, position): else: # _cusum_reset_on_zero() is an intermediate step in rle, which is sufficient here - d = _cumsum_reset_on_zero(da, dim=dim, index=position) + d = _cumsum_reset(da, dim=dim, index=position) d = xr.where(d >= window, 1, 0) # for "first" run, return "first" element in the run (and conversely for "last" run) if freq is not None: @@ -744,8 +751,8 @@ def extract_events( da_start = da_start.astype(int).fillna(0) da_stop = da_stop.astype(int).fillna(0) - start_runs = _cumsum_reset_on_zero(da_start, dim=dim, index="first") - stop_runs = _cumsum_reset_on_zero(da_stop, dim=dim, index="first") + start_runs = _cumsum_reset(da_start, dim=dim, index="first") + stop_runs = _cumsum_reset(da_stop, dim=dim, index="first") start_positions = xr.where(start_runs >= window_start, 1, np.NaN) stop_positions = xr.where(stop_runs >= window_stop, 0, np.NaN) @@ -755,6 +762,21 @@ def extract_events( return runs +def runs_with_holes(da_start, window_start, da_stop, window_stop, dim="time"): + """Runs with holes""" + da_start = da_start.astype(int).fillna(0) + da_stop = da_stop.astype(int).fillna(0) + + start_runs = _cumsum_reset(da_start, dim=dim, index="first") + stop_runs = _cumsum_reset(da_stop, dim=dim, index="first") + start_positions = xr.where(start_runs >= window_start, 1, np.NaN) + stop_positions = xr.where(stop_runs >= window_stop, 0, np.NaN) + + # start positions (1) are f-filled until a stop position (0) is met + runs = stop_positions.combine_first(start_positions).ffill(dim=dim).fillna(0) + return runs + + def season( da: xr.DataArray, window: int, From 35b0f75754dddb9f693d75c596fcce515a430bae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Wed, 12 Jun 2024 21:47:10 -0400 Subject: [PATCH 2/5] black ice -> freezing rain --- CHANGES.rst | 2 +- tests/test_indices.py | 6 +- xclim/indices/_threshold.py | 172 ++++++++++++++++++------------------ 3 files changed, 90 insertions(+), 90 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 722a7767a..e791b36c4 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,7 +8,7 @@ Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Éric Dup New indicators ^^^^^^^^^^^^^^ -* New indicator ``black_ice_events`` gives statistics about black ice sequences. +* New indicator ``freezing_rain_events`` gives statistics about freezing rain sequences. New features and enhancements diff --git a/tests/test_indices.py b/tests/test_indices.py index 4e802ac0b..48e43929e 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -338,7 +338,7 @@ def test_bedd(self, method, end_date, deg_days, max_deg_days): if method == "icclim": np.testing.assert_array_equal(bedd, bedd_high_lat) - def test_black_ice_events(self, open_dataset): + def test_freezing_rain_events(self, open_dataset): times = pd.date_range("1950-01-01", "1950-01-31", freq="D") da = xr.DataArray( np.zeros(len(times)), @@ -350,14 +350,14 @@ def test_black_ice_events(self, open_dataset): # Two sequences separated by 3 days, which will be split with window_stop = 3 da[0:10] = 1 da[13:20] = 1 - out = xci.black_ice_events( + out = xci.freezing_rain_events( da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 ) assert (out.run_lengths == [10, 7]).all() # Two sequences separated by 2 days, which will form a single large event da[10] = 1 - out = xci.black_ice_events( + out = xci.freezing_rain_events( da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 ) pram = rate2amount(da) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 16b3104fb..3a7bd3cf1 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -37,7 +37,6 @@ # -------------------------------------------------- # __all__ = [ - "black_ice_events", "calm_days", "cold_spell_days", "cold_spell_frequency", @@ -54,6 +53,7 @@ "first_day_temperature_above", "first_day_temperature_below", "first_snowfall", + "freezing_rain_events", "frost_free_season_end", "frost_free_season_length", "frost_free_season_start", @@ -107,91 +107,6 @@ ] -@declare_units(pr="[precipitation]", thresh="[precipitation]") -def black_ice_events( - pr: xarray.DataArray, - thresh: Quantified = "1 kg m-2 d-1", - window_start: int = 3, - window_stop: int = 3, - freq: str | None = None, -) -> xarray.Dataset: - r"""Black ice events. - - Parameters - ---------- - pr : xarray.DataArray - Black ice precipitation (`prfr`) - thresh : Quantified - Threshold that must be exceeded to be considered an event - window_start: int - Number of time steps above the threshold required to start an event - window_stop : int - Number of time steps below the threshold required to stop an event - freq : str - Resampling frequency. - - Returns - ------- - xarray.DataArray, [time] - Number of days with average near-surface wind speed below threshold. - """ - freq = xarray.infer_freq(pr.time) - mag, units, _, _ = parse_offset(freq) - # condition to respect for `window_start` time steps to start a run - thresh = convert_units_to(thresh, pr) - da_start = pr >= thresh - da_stop = not da_start - - # Get basic blocks to work with, our runs with holes and the lengths of those runs - # Series of ones indicating where we have continuous runs of black ice with pauses - # not exceeding `window_stop` - runs = rl.runs_with_holes(da_start, window_start, da_stop, window_stop) - - # Compute the length of black ice events - # I think int16 is safe enough - ds = rl.rle(runs).to_dataset(name="run_lengths") - ds["run_lengths"] = ds.run_lengths.astype(np.int16) - ds.run_lengths.attrs["units"] = "" - - # Time duration where the precipitation threshold is exceeded during an event - # (duration of complete run - duration of holes in the run ) - ds["precipitation_duration"] = ( - rl._cumsum_reset( - da_start.where(runs == 1), index="first", reset_on_zero=False - ).astype(np.int16) - * mag - ) - ds["precipitation_duration"].attrs["units"] = units - - # Cumulated precipitation in a given black ice event - pram = rate2amount(pr) - ds["cumulative_precipitation"] = rl._cumsum_reset( - pram.where(runs == 1), index="first", reset_on_zero=False - ) - ds["cumulative_precipitation"].attrs["units"] = pram.units - - # Reduce time dim to event dimension - mask = (ds.run_lengths > 0).any(dim=[d for d in ds.dims if d != "time"]) - ds = ds.where(mask).dropna(dim="time").rename({"time": "event"}) - - # start time : with current implementation of time reduction above, - # this is not necessary, but it could be if we choose another way. - # ds["start"] = ds["time"].broadcast_like(ds) - - # Other indices that could be completely done outside of the function, no input needed anymore - ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) - ds.number_of_events.attrs["units"] = "" - - ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] - units = ( - f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" - ) - ds["rate"].attrs["units"] = ensure_cf_units(units) - - ds.attrs["units"] = "" - return ds - - @declare_units(sfcWind="[speed]", thresh="[speed]") def calm_days( sfcWind: xarray.DataArray, thresh: Quantified = "2 m s-1", freq: str = "MS" @@ -2743,6 +2658,91 @@ def wetdays_prop( return fwd +@declare_units(pr="[precipitation]", thresh="[precipitation]") +def freezing_rain_events( + pr: xarray.DataArray, + thresh: Quantified = "1 kg m-2 d-1", + window_start: int = 3, + window_stop: int = 3, + freq: str | None = None, +) -> xarray.Dataset: + r"""Black ice events. + + Parameters + ---------- + pr : xarray.DataArray + Black ice precipitation (`prfr`) + thresh : Quantified + Threshold that must be exceeded to be considered an event + window_start: int + Number of time steps above the threshold required to start an event + window_stop : int + Number of time steps below the threshold required to stop an event + freq : str + Resampling frequency. + + Returns + ------- + xarray.DataArray, [time] + Number of days with average near-surface wind speed below threshold. + """ + freq = xarray.infer_freq(pr.time) + mag, units, _, _ = parse_offset(freq) + # condition to respect for `window_start` time steps to start a run + thresh = convert_units_to(thresh, pr) + da_start = pr >= thresh + da_stop = (1 - da_start).astype(bool) + + # Get basic blocks to work with, our runs with holes and the lengths of those runs + # Series of ones indicating where we have continuous runs of freezing rain with pauses + # not exceeding `window_stop` + runs = rl.runs_with_holes(da_start, window_start, da_stop, window_stop) + + # Compute the length of freezing rain events + # I think int16 is safe enough + ds = rl.rle(runs).to_dataset(name="run_lengths") + ds["run_lengths"] = ds.run_lengths.astype(np.int16) + ds.run_lengths.attrs["units"] = "" + + # Time duration where the precipitation threshold is exceeded during an event + # (duration of complete run - duration of holes in the run ) + ds["precipitation_duration"] = ( + rl._cumsum_reset( + da_start.where(runs == 1), index="first", reset_on_zero=False + ).astype(np.int16) + * mag + ) + ds["precipitation_duration"].attrs["units"] = units + + # Cumulated precipitation in a given freezing rain event + pram = rate2amount(pr) + ds["cumulative_precipitation"] = rl._cumsum_reset( + pram.where(runs == 1), index="first", reset_on_zero=False + ) + ds["cumulative_precipitation"].attrs["units"] = pram.units + + # Reduce time dim to event dimension + mask = (ds.run_lengths > 0).any(dim=[d for d in ds.dims if d != "time"]) + ds = ds.where(mask).dropna(dim="time").rename({"time": "event"}) + + # start time : with current implementation of time reduction above, + # this is not necessary, but it could be if we choose another way. + # ds["start"] = ds["time"].broadcast_like(ds) + + # Other indices that could be completely done outside of the function, no input needed anymore + ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) + ds.number_of_events.attrs["units"] = "" + + ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] + units = ( + f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" + ) + ds["rate"].attrs["units"] = ensure_cf_units(units) + + ds.attrs["units"] = "" + return ds + + @declare_units(tasmin="[temperature]", thresh="[temperature]") def maximum_consecutive_frost_days( tasmin: xarray.DataArray, From 44d868112529ef8c6fd38de578eac4a1a1205a1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Wed, 12 Jun 2024 21:50:12 -0400 Subject: [PATCH 3/5] update CHANGES --- CHANGES.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index bdedc28b1..d4cac4682 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,14 +8,14 @@ Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Éric Dup New indicators ^^^^^^^^^^^^^^ -* New indicator ``freezing_rain_events`` gives statistics about freezing rain sequences. +* New indicator ``freezing_rain_events`` gives statistics about freezing rain sequences. (:pull:`1778`). New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * New properties: Bivariate Spell Length (``xclim.sdba.properties.bivariate_spell_length``), generalized spell lengths with an argument for `window`, and specific spell lengths with `window` fixed to 1 (``xclim.sdba.propertiies.threshold_count``, ``xclim.sdba.propertiies.bivariate_threshold_count``). (:pull:`1758`). * New option `normalize` in ``sdba.measures.taylordiagram`` to obtain normalized Taylor diagrams (divide standard deviations by standard deviation of the reference). (:pull:`1764`). -* ``xclim.indices.run_length.runs_with_holes`` allows to input a condition that must be met for a run to start and a second condition that must be met for the run to stop. +* ``xclim.indices.run_length.runs_with_holes`` allows to input a condition that must be met for a run to start and a second condition that must be met for the run to stop. (:pull:`1778`). Breaking changes ^^^^^^^^^^^^^^^^ From d036823472e0aa6057346480f5d62c2d562fe289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Fri, 14 Jun 2024 19:57:51 -0400 Subject: [PATCH 4/5] event dimension with padding --- tests/test_indices.py | 8 +++--- xclim/indices/_threshold.py | 56 +++++++++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/tests/test_indices.py b/tests/test_indices.py index 8305ba727..297175f51 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -353,16 +353,16 @@ def test_freezing_rain_events(self, open_dataset): out = xci.freezing_rain_events( da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 ) - assert (out.run_lengths == [10, 7]).all() + assert (out.run_lengths.values[0:2] == [10, 7]).all() # Two sequences separated by 2 days, which will form a single large event - da[10] = 1 + da[12] = 1 out = xci.freezing_rain_events( da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 ) pram = rate2amount(da) - assert out.run_lengths == 20 - assert out.cumulative_precipitation == pram.sum() + assert out.run_lengths.values[0] == 20 + assert (out.cumulative_precipitation - pram.sum()).sum() == 0 def test_cool_night_index(self, open_dataset): ds = open_dataset("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200701-200712.nc") diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 6b7a29561..6ed7601f9 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -2664,12 +2664,12 @@ def freezing_rain_events( window_stop: int = 3, freq: str | None = None, ) -> xarray.Dataset: - r"""Black ice events. + r"""Freezing rain events. Parameters ---------- pr : xarray.DataArray - Black ice precipitation (`prfr`) + Freezing precipitation (`prfr`) thresh : Quantified Threshold that must be exceeded to be considered an event window_start: int @@ -2681,8 +2681,7 @@ def freezing_rain_events( Returns ------- - xarray.DataArray, [time] - Number of days with average near-surface wind speed below threshold. + xarray.Dataset """ freq = xarray.infer_freq(pr.time) mag, units, _, _ = parse_offset(freq) @@ -2712,25 +2711,58 @@ def freezing_rain_events( ) ds["precipitation_duration"].attrs["units"] = units - # Cumulated precipitation in a given freezing rain event + # # Cumulated precipitation in a given freezing rain event pram = rate2amount(pr) ds["cumulative_precipitation"] = rl._cumsum_reset( pram.where(runs == 1), index="first", reset_on_zero=False ) ds["cumulative_precipitation"].attrs["units"] = pram.units - # Reduce time dim to event dimension - mask = (ds.run_lengths > 0).any(dim=[d for d in ds.dims if d != "time"]) - ds = ds.where(mask).dropna(dim="time").rename({"time": "event"}) - - # start time : with current implementation of time reduction above, - # this is not necessary, but it could be if we choose another way. - # ds["start"] = ds["time"].broadcast_like(ds) + # Keep time as a variable, it will be used to keep start of events + ds["start"] = ds["time"].broadcast_like(ds) # .astype(int) + # I have to convert it to an integer for the filtering, time object won't do + # Since there are conversion needing a time object earlier, I think it's ok + # to assume this here? + time_min = ds.start.min() + ds["start"] = (ds.start - time_min).astype("timedelta64[s]").astype(int) + + # Filter events: Reduce time dimension + def _filter_events(da, rl, max_event_number): + out = np.full(max_event_number, np.NaN) + events_start = da[rl > 0] + out[: len(events_start)] = events_start + return out + + max_event_number = int(np.ceil(pr.time.size / (window_start + window_stop))) + v_attrs = {v: ds[v].attrs for v in ds.data_vars} + ds = xarray.apply_ufunc( + _filter_events, + ds, + ds.run_lengths, + input_core_dims=[["time"], ["time"]], + output_core_dims=[["event"]], + kwargs=dict(max_event_number=max_event_number), + output_sizes={"event": max_event_number}, + dask="parallelized", + vectorize=True, + ).assign_attrs(ds.attrs) + + ds["event"] = np.arange(1, ds.event.size + 1) + for v in ds.data_vars: + ds[v].attrs = v_attrs[v] + + # convert back start to a time + ds["start"] = time_min.astype("datetime64[ns]") + ds["start"].astype( + "timedelta64[ns]" + ) # Other indices that could be completely done outside of the function, no input needed anymore + + # number of events ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) ds.number_of_events.attrs["units"] = "" + # mean rate of precipitation during event ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] units = ( f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" From d556d95a560a156bc8c705a9ecb57accf8676df5 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 6 Sep 2024 17:44:39 -0400 Subject: [PATCH 5/5] first pass to generalize --- xclim/indices/_threshold.py | 121 +-------------------------------- xclim/indices/generic.py | 132 +++++++++++++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 121 deletions(-) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index a6989aaca..2e7d9ea8d 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -8,12 +8,11 @@ import xarray from xclim.core import DayOfYearStr, Quantified -from xclim.core.calendar import doy_from_string, get_calendar, parse_offset +from xclim.core.calendar import doy_from_string, get_calendar from xclim.core.missing import at_least_n_valid from xclim.core.units import ( convert_units_to, declare_units, - ensure_cf_units, pint2cfunits, rate2amount, str2pint, @@ -55,7 +54,6 @@ "first_day_temperature_above", "first_day_temperature_below", "first_snowfall", - "freezing_rain_events", "frost_free_season_end", "frost_free_season_length", "frost_free_season_start", @@ -2624,123 +2622,6 @@ def wetdays_prop( return fwd -@declare_units(pr="[precipitation]", thresh="[precipitation]") -def freezing_rain_events( - pr: xarray.DataArray, - thresh: Quantified = "1 kg m-2 d-1", - window_start: int = 3, - window_stop: int = 3, - freq: str | None = None, -) -> xarray.Dataset: - r"""Freezing rain events. - - Parameters - ---------- - pr : xarray.DataArray - Freezing precipitation (`prfr`) - thresh : Quantified - Threshold that must be exceeded to be considered an event - window_start: int - Number of time steps above the threshold required to start an event - window_stop : int - Number of time steps below the threshold required to stop an event - freq : str - Resampling frequency. - - Returns - ------- - xarray.Dataset - """ - freq = xarray.infer_freq(pr.time) - mag, units, _, _ = parse_offset(freq) - # condition to respect for `window_start` time steps to start a run - thresh = convert_units_to(thresh, pr) - da_start = pr >= thresh - da_stop = (1 - da_start).astype(bool) - - # Get basic blocks to work with, our runs with holes and the lengths of those runs - # Series of ones indicating where we have continuous runs of freezing rain with pauses - # not exceeding `window_stop` - runs = rl.runs_with_holes(da_start, window_start, da_stop, window_stop) - - # Compute the length of freezing rain events - # I think int16 is safe enough - ds = rl.rle(runs).to_dataset(name="run_lengths") - ds["run_lengths"] = ds.run_lengths.astype(np.int16) - ds.run_lengths.attrs["units"] = "" - - # Time duration where the precipitation threshold is exceeded during an event - # (duration of complete run - duration of holes in the run ) - ds["precipitation_duration"] = ( - rl._cumsum_reset( - da_start.where(runs == 1), index="first", reset_on_zero=False - ).astype(np.int16) - * mag - ) - ds["precipitation_duration"].attrs["units"] = units - - # # Cumulated precipitation in a given freezing rain event - pram = rate2amount(pr) - ds["cumulative_precipitation"] = rl._cumsum_reset( - pram.where(runs == 1), index="first", reset_on_zero=False - ) - ds["cumulative_precipitation"].attrs["units"] = pram.units - - # Keep time as a variable, it will be used to keep start of events - ds["start"] = ds["time"].broadcast_like(ds) # .astype(int) - # I have to convert it to an integer for the filtering, time object won't do - # Since there are conversion needing a time object earlier, I think it's ok - # to assume this here? - time_min = ds.start.min() - ds["start"] = (ds.start - time_min).astype("timedelta64[s]").astype(int) - - # Filter events: Reduce time dimension - def _filter_events(da, rl, max_event_number): - out = np.full(max_event_number, np.NaN) - events_start = da[rl > 0] - out[: len(events_start)] = events_start - return out - - max_event_number = int(np.ceil(pr.time.size / (window_start + window_stop))) - v_attrs = {v: ds[v].attrs for v in ds.data_vars} - ds = xarray.apply_ufunc( - _filter_events, - ds, - ds.run_lengths, - input_core_dims=[["time"], ["time"]], - output_core_dims=[["event"]], - kwargs=dict(max_event_number=max_event_number), - output_sizes={"event": max_event_number}, - dask="parallelized", - vectorize=True, - ).assign_attrs(ds.attrs) - - ds["event"] = np.arange(1, ds.event.size + 1) - for v in ds.data_vars: - ds[v].attrs = v_attrs[v] - - # convert back start to a time - ds["start"] = time_min.astype("datetime64[ns]") + ds["start"].astype( - "timedelta64[ns]" - ) - - # Other indices that could be completely done outside of the function, no input needed anymore - - # number of events - ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) - ds.number_of_events.attrs["units"] = "" - - # mean rate of precipitation during event - ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] - units = ( - f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" - ) - ds["rate"].attrs["units"] = ensure_cf_units(units) - - ds.attrs["units"] = "" - return ds - - @declare_units(tasmin="[temperature]", thresh="[temperature]") def maximum_consecutive_frost_days( tasmin: xarray.DataArray, diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 43bc85720..42080a21d 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -18,12 +18,18 @@ from xarray.coding.cftime_offsets import _MONTH_ABBREVIATIONS # noqa from xclim.core import DayOfYearStr, Quantified -from xclim.core.calendar import doy_to_days_since, get_calendar, select_time +from xclim.core.calendar import ( + doy_to_days_since, + get_calendar, + parse_offset, + select_time, +) from xclim.core.units import ( convert_units_to, declare_relative_units, infer_context, pint2cfunits, + rate2amount, str2pint, to_agg_units, ) @@ -1474,3 +1480,127 @@ def detrend( trend = xr.polyval(ds[dim], coeff.polyfit_coefficients) with xr.set_options(keep_attrs=True): return ds - trend + + +@declare_relative_units(thresh="") +def thresholded_events( + data: xr.DataArray, + thresh: Quantified = "1 kg m-2 d-1", + window_start: int = 3, + window_stop: int = 3, + freq: str | None = None, + data_is_rate: bool = False, +) -> xr.Dataset: + r"""Thresholded events. + + Parameters + ---------- + data : xr.DataArray + Variable. + thresh : Quantified + Threshold that must be exceeded to be considered an event + window_start: int + Number of time steps above the threshold required to start an event + window_stop : int + Number of time steps below the threshold required to stop an event + freq : str + Resampling frequency. + data_is_rate : bool + True if the data is a rate that needs to be converted to an amount + when computing an accumulation. + + Returns + ------- + xr.Dataset + """ + freq = xr.infer_freq(data.time) + mag, units, _, _ = parse_offset(freq) + # condition to respect for `window_start` time steps to start a run + thresh = convert_units_to(thresh, data) + da_start = data >= thresh + da_stop = (1 - da_start).astype(bool) + + # Get basic blocks to work with, our runs with holes and the lengths of those runs + # Series of ones indicating where we have continuous runs of freezing rain with pauses + # not exceeding `window_stop` + runs = rl.runs_with_holes(da_start, window_start, da_stop, window_stop) + + # Compute the length of freezing rain events + # I think int16 is safe enough + ds = rl.rle(runs).astype(np.int16).to_dataset(name="run_lengths") + ds.run_lengths.attrs["units"] = "" + + # Time duration where the precipitation threshold is exceeded during an event + # (duration of complete run - duration of holes in the run ) + ds["effective_duration"] = ( + rl._cumsum_reset( + da_start.where(runs == 1), index="first", reset_on_zero=False + ).astype(np.int16) + * mag + ) + ds["effective_duration"].attrs["units"] = units + + # # Cumulated precipitation in a given freezing rain event + if data_is_rate: + dataam = rate2amount(data) + else: + dataam = data + ds["event_accumulation"] = rl._cumsum_reset( + dataam.where(runs == 1), index="first", reset_on_zero=False + ) + ds["event_accumulation"].attrs["units"] = dataam.units + + # Keep time as a variable, it will be used to keep start of events + ds["start"] = ds["time"].broadcast_like(ds) # .astype(int) + # I have to convert it to an integer for the filtering, time object won't do + # Since there are conversion needing a time object earlier, I think it's ok + # to assume this here? + time_min = ds.start.min() + ds["start"] = (ds.start - time_min).astype("timedelta64[s]").astype(int) + + # Filter events: Reduce time dimension + def _filter_events(da, rl, max_event_number): + out = np.full(max_event_number, np.NaN) + events_start = da[rl > 0] + out[: len(events_start)] = events_start + return out + + max_event_number = int(np.ceil(data.time.size / (window_start + window_stop))) + v_attrs = {v: ds[v].attrs for v in ds.data_vars} + ds = xr.apply_ufunc( + _filter_events, + ds, + ds.run_lengths, + input_core_dims=[["time"], ["time"]], + output_core_dims=[["event"]], + kwargs=dict(max_event_number=max_event_number), + output_sizes={"event": max_event_number}, + dask="parallelized", + vectorize=True, + ).assign_attrs(ds.attrs) + + ds["event"] = np.arange(1, ds.event.size + 1) + for v in ds.data_vars: + ds[v].attrs = v_attrs[v] + + # convert back start to a time + # TODO fix for calendars + ds["start"] = time_min.astype("datetime64[ns]") + ds["start"].astype( + "timedelta64[ns]" + ) + return ds + + # # Other indices that could be completely done outside of the function, no input needed anymore + # # number of events + # ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) + # ds.number_of_events.attrs["units"] = "" + + # # mean rate of precipitation during event + # ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] + # units = ( + # f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" + # ) + # ds["rate"].attrs["units"] = ensure_cf_units(units) + + # ds.attrs["units"] = "" + # return ds