Ouranosinc · aulemahal · Oct 10, 2024 · Jun 13, 2024 · Jun 13, 2024 · Jun 13, 2024
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -13,8 +13,8 @@ Announcements
 
 New indicators
 ^^^^^^^^^^^^^^
-* New ``heat_spell_frequency``, ``heat_spell_max_length`` and ``heat_spell_total_length`` : spell length statistics on a bivariate condition that uses the average over a window by default. (:pull:`1885`).
-* New ``hot_spell_max_magnitude``: yields the magnitude of the most intensive heat wave. (:pull:`1926`).
+* New ``heat_spell_frequency``, ``heat_spell_max_length`` and ``heat_spell_total_length`` : spell length statistics on a bivariate condition that uses the average over a window by default. (:pull:`1885`, :pull:`1778`).
+* New ``hot_spell_max_magnitude`` : yields the magnitude of the most intensive heat wave. (:pull:`1926`).
 * New ``chill_portion`` and ``chill_unit``: chill portion based on the Dynamic Model and chill unit based on the Utah model indicators. (:issue:`1753`, :pull:`1909`).
 * New ``water_cycle_intensity``: yields the sum of precipitation and actual evapotranspiration. (:issue:`410`, :pull:`1947`).
 
@@ -25,6 +25,9 @@ New features and enhancements
 * ``xclim.indices.run_length.windowed_max_run_sum`` accumulates positive values across runs and yields the the maximum valued run. (:pull:`1926`).
 * Helper function ``xclim.indices.helpers.make_hourly_temperature`` to estimate hourly temperatures from daily min and max temperatures. (:pull:`1909`).
 * New global option ``resample_map_blocks`` to wrap all ``resample().map()`` code inside a ``xr.map_blocks`` to lower the number of dask tasks. Uses utility ``xclim.indices.helpers.resample_map`` and requires ``flox`` to ensure the chunking allows such block-mapping. Defaults to False. (:pull:`1848`).
+* ``xclim.indices.run_length.runs_with_holes`` allows to input a condition that must be met for a run to start and a second condition that must be met for the run to stop. (:pull:`1778`).
+* New generic compute function ``xclim.indices.generic.thresholded_events`` that finds events based on a threshold condition and returns basic stats for each. See also ``xclim.indices.run_length.find_events``. (:pull:`1778`).
+* ``xclim.core.units.rate2amount`` and ``xclim.core.units.amount2rate`` can now also accept quantities (pint objects or strings), in which case the ``dim`` argument must be the ``time`` coordinate through which we can find the sampling rate. (:pull:`1778`).
 
 Bug fixes
 ^^^^^^^^^

diff --git a/tests/test_generic.py b/tests/test_generic.py
@@ -8,6 +8,7 @@
 
 from xclim.core.calendar import doy_to_days_since, select_time
 from xclim.indices import generic
+from xclim.testing.helpers import assert_lazy
 
 K2C = 273.15
 
@@ -768,3 +769,126 @@ def test_spell_length_statistics_multi(tasmin_series, tasmax_series):
     )
     xr.testing.assert_equal(outs, outm)
     np.testing.assert_allclose(outc, 1)
+
+
+class TestThresholdedEvents:
+
+    @pytest.mark.parametrize("use_dask", [True, False])
+    def test_simple(self, pr_series, use_dask):
+        arr = np.array([0, 0, 0, 1, 2, 3, 0, 3, 3, 10, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 3, 3, 2, 0, 0, 0, 2, 0, 0, 0, 0])  # fmt: skip
+        pr = pr_series(arr, start="2000-01-01", units="mm")
+        if use_dask:
+            pr = pr.chunk(-1)
+
+        with assert_lazy:
+            out = generic.thresholded_events(
+                pr,
+                thresh="1 mm",
+                op=">=",
+                window=3,
+            )
+
+        assert out.event.size == np.ceil(arr.size / (3 + 1))
+        out = out.load().dropna("event", how="all")
+
+        np.testing.assert_array_equal(out.event_length, [3, 3, 4, 4])
+        np.testing.assert_array_equal(out.event_effective_length, [3, 3, 4, 4])
+        np.testing.assert_array_equal(out.event_sum, [6, 16, 7, 9])
+        np.testing.assert_array_equal(
+            out.event_start,
+            np.array(
+                ["2000-01-04", "2000-01-08", "2000-01-16", "2000-01-26"],
+                dtype="datetime64[ns]",
+            ),
+        )
+
+    @pytest.mark.parametrize("use_dask", [True, False])
+    def test_diff_windows(self, pr_series, use_dask):
+        arr = np.array([0, 0, 0, 1, 2, 3, 0, 3, 3, 10, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 3, 3, 2, 0, 0, 0, 2, 0, 0, 0, 0])  # fmt: skip
+        pr = pr_series(arr, start="2000-01-01", units="mm")
+        if use_dask:
+            pr = pr.chunk(-1)
+
+        # different window stop
+        out = (
+            generic.thresholded_events(
+                pr, thresh="2 mm", op=">=", window=3, window_stop=4
+            )
+            .load()
+            .dropna("event", how="all")
+        )
+
+        np.testing.assert_array_equal(out.event_length, [3, 3, 7])
+        np.testing.assert_array_equal(out.event_effective_length, [3, 3, 4])
+        np.testing.assert_array_equal(out.event_sum, [16, 6, 10])
+        np.testing.assert_array_equal(
+            out.event_start,
+            np.array(
+                ["2000-01-08", "2000-01-17", "2000-01-27"], dtype="datetime64[ns]"
+            ),
+        )
+
+    @pytest.mark.parametrize("use_dask", [True, False])
+    def test_cftime(self, pr_series, use_dask):
+        arr = np.array([0, 0, 0, 1, 2, 3, 0, 3, 3, 10, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 3, 3, 2, 0, 0, 0, 2, 0, 0, 0, 0])  # fmt: skip
+        pr = pr_series(arr, start="2000-01-01", units="mm").convert_calendar("noleap")
+        if use_dask:
+            pr = pr.chunk(-1)
+
+        # cftime
+        with assert_lazy:
+            out = generic.thresholded_events(
+                pr,
+                thresh="1 mm",
+                op=">=",
+                window=3,
+                window_stop=3,
+            )
+        out = out.load().dropna("event", how="all")
+
+        np.testing.assert_array_equal(out.event_length, [7, 4, 4])
+        np.testing.assert_array_equal(out.event_effective_length, [6, 4, 4])
+        np.testing.assert_array_equal(out.event_sum, [22, 7, 9])
+        exp = xr.DataArray(
+            [1, 2, 3],
+            dims=("time",),
+            coords={
+                "time": np.array(
+                    ["2000-01-04", "2000-01-16", "2000-01-26"], dtype="datetime64[ns]"
+                )
+            },
+        )
+        np.testing.assert_array_equal(
+            out.event_start, exp.convert_calendar("noleap").time
+        )
+
+    @pytest.mark.parametrize("use_dask", [True, False])
+    def test_freq(self, pr_series, use_dask):
+        jan = [0, 0, 0, 1, 2, 3, 0, 3, 3, 10, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 3, 2, 3, 2]  # fmt: skip
+        fev = [2, 2, 1, 0, 0, 0, 3, 3, 4, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  # fmt: skip
+        pr = pr_series(np.array(jan + fev), start="2000-01-01", units="mm")
+        if use_dask:
+            pr = pr.chunk(-1)
+
+        with assert_lazy:
+            out = generic.thresholded_events(
+                pr, thresh="1 mm", op=">=", window=3, freq="MS", window_stop=3
+            )
+        assert out.event_length.shape == (2, 6)
+        out = out.load().dropna("event", how="all")
+
+        np.testing.assert_array_equal(out.event_length, [[7, 6, 4], [3, 5, np.nan]])
+        np.testing.assert_array_equal(
+            out.event_effective_length, [[6, 6, 4], [3, 5, np.nan]]
+        )
+        np.testing.assert_array_equal(out.event_sum, [[22, 12, 10], [5, 17, np.nan]])
+        np.testing.assert_array_equal(
+            out.event_start,
+            np.array(
+                [
+                    ["2000-01-04", "2000-01-17", "2000-01-28"],
+                    ["2000-02-01", "2000-02-07", "NaT"],
+                ],
+                dtype="datetime64[ns]",
+            ),
+        )
diff --git a/tests/test_run_length.py b/tests/test_run_length.py
@@ -126,9 +126,8 @@ def test_rle(ufunc, use_dask, index):
 
 @pytest.mark.parametrize("use_dask", [True, False])
 @pytest.mark.parametrize("index", ["first", "last"])
-def test_extract_events_identity(use_dask, index):
-    # implement more tests, this is just to show that this reproduces the behaviour
-    # of rle
+def test_runs_with_holes_identity(use_dask, index):
+    # This test reproduces the behaviour or `rle`
     values = np.zeros((10, 365, 4, 4))
     time = pd.date_range("2000-01-01", periods=365, freq="D")
     values[:, 1:11, ...] = 1
@@ -137,19 +136,19 @@ def test_extract_events_identity(use_dask, index):
     if use_dask:
         da = da.chunk({"a": 1, "b": 2})
 
-    events = rl.extract_events(da != 0, 1, da == 0, 1)
+    events = rl.runs_with_holes(da != 0, 1, da == 0, 1)
     expected = da
     np.testing.assert_array_equal(events, expected)
 
 
-def test_extract_events():
+def test_runs_with_holes():
     values = np.zeros(365)
     time = pd.date_range("2000-01-01", periods=365, freq="D")
     a = [0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0]
     values[0 : len(a)] = a
     da = xr.DataArray(values, coords={"time": time}, dims=("time"))
 
-    events = rl.extract_events(da == 1, 1, da == 0, 3)
+    events = rl.runs_with_holes(da == 1, 1, da == 0, 3)
 
     expected = values * 0
     expected[1:11] = 1

diff --git a/tests/test_temperature.py b/tests/test_temperature.py
@@ -640,6 +640,20 @@ def test_1d(self, tasmax_series, tasmin_series):
         )
         np.testing.assert_allclose(hsf.values[:1], 0)
 
+    def test_gap(self, tasmax_series, tasmin_series):
+        tn1 = np.zeros(366)
+        tx1 = np.zeros(366)
+        tn1[:10] = np.array([20, 23, 23, 23, 20, 20, 23, 23, 23, 23])
+        tx1[:10] = np.array([29, 31, 31, 31, 28, 28, 31, 31, 31, 31])
+
+        tn = tasmin_series(tn1 + K2C, start="1/1/2000")
+        tx = tasmax_series(tx1 + K2C, start="1/1/2000")
+
+        hsf = atmos.heat_spell_frequency(
+            tn, tx, thresh_tasmin="22.1 C", thresh_tasmax="30.1 C", freq="YS", min_gap=3
+        )
+        np.testing.assert_allclose(hsf.values[:1], 1)
+
 
 class TestHeatSpellMaxLength:
     def test_1d(self, tasmax_series, tasmin_series):

diff --git a/xclim/core/units.py b/xclim/core/units.py
@@ -413,6 +413,7 @@ def cf_conversion(
 FREQ_UNITS = {
     "D": "d",
     "W": "week",
+    "h": "h",
 }
 """
 Resampling frequency units for :py:func:`xclim.core.units.infer_sampling_units`.
@@ -622,8 +623,8 @@ def to_agg_units(
 
 
 def _rate_and_amount_converter(
-    da: xr.DataArray,
-    dim: str = "time",
+    da: Quantified,
+    dim: str | xr.DataArray = "time",
     to: str = "amount",
     sampling_rate_from_coord: bool = False,
     out_units: str | None = None,
@@ -632,10 +633,27 @@ def _rate_and_amount_converter(
     m = 1
     u = None  # Default to assume a non-uniform axis
     label: Literal["lower", "upper"] = "lower"  # Default to "lower" label for diff
-    time = da[dim]
+    if isinstance(dim, str):
+        if not isinstance(da, xr.DataArray):
+            raise ValueError(
+                "If `dim` is a string, the data to convert must be a DataArray."
+            )
+        time = da[dim]
+    else:
+        time = dim
+        dim = time.name
+
+    # We accept str, Quantity or DataArray
+    # Ensure the code below has a DataArray, so its simpler
+    # We convert back at the end
+    orig_da = da
+    if isinstance(da, str):
+        da = str2pint(da)
+    if isinstance(da, units.Quantity):
+        da = xr.DataArray(da.magnitude, attrs={"units": f"{da.units:~cf}"})
 
     try:
-        freq = xr.infer_freq(da[dim])
+        freq = xr.infer_freq(time)
     except ValueError as err:
         if sampling_rate_from_coord:
             freq = None
@@ -669,7 +687,7 @@ def _rate_and_amount_converter(
                 ),
                 dims=(dim,),
                 name=dim,
-                attrs=da[dim].attrs,
+                attrs=time.attrs,
             )
         else:
             m, u = multi, FREQ_UNITS[base]
@@ -683,7 +701,7 @@ def _rate_and_amount_converter(
         # and `label` has been updated accordingly.
         dt = (
             time.diff(dim, label=label)
-            .reindex({dim: da[dim]}, method="ffill")
+            .reindex({dim: time}, method="ffill")
             .astype(float)
         )
         dt = dt / 1e9  # Convert to seconds
@@ -716,15 +734,17 @@ def _rate_and_amount_converter(
         out = out.assign_attrs(standard_name=new_name)
 
     if out_units:
-        out = cast(xr.DataArray, convert_units_to(out, out_units))
+        out = convert_units_to(out, out_units)
 
+    if not isinstance(orig_da, xr.DataArray):
+        out = units.Quantity(out.item(), out.attrs["units"])
     return out
 
 
 @_register_conversion("amount2rate", "from")
 def rate2amount(
-    rate: xr.DataArray,
-    dim: str = "time",
+    rate: Quantified,
+    dim: str | xr.DataArray = "time",
     sampling_rate_from_coord: bool = False,
     out_units: str | None = None,
 ) -> xr.DataArray:
@@ -738,10 +758,10 @@ def rate2amount(
 
     Parameters
     ----------
-    rate : xr.DataArray
+    rate : xr.DataArray, pint.Quantity or string
         "Rate" variable, with units of "amount" per time. Ex: Precipitation in "mm / d".
-    dim : str
-        The time dimension.
+    dim : str or DataArray
+        The name of time dimension or the coordinate itself.
     sampling_rate_from_coord : boolean
         For data with irregular time coordinates. If True, the diff of the time coordinate will be used as the sampling rate,
         meaning each data point will be assumed to apply for the interval ending at the next point. See notes.
@@ -756,7 +776,7 @@ def rate2amount(
 
     Returns
     -------
-    xr.DataArray
+    xr.DataArray or Quantity
 
     Examples
     --------
@@ -804,8 +824,8 @@ def rate2amount(
 
 @_register_conversion("amount2rate", "to")
 def amount2rate(
-    amount: xr.DataArray,
-    dim: str = "time",
+    amount: Quantified,
+    dim: str | xr.DataArray = "time",
     sampling_rate_from_coord: bool = False,
     out_units: str | None = None,
 ) -> xr.DataArray:
@@ -819,10 +839,10 @@ def amount2rate(
 
     Parameters
     ----------
-    amount : xr.DataArray
+    amount : xr.DataArray, pint.Quantity or string
         "amount" variable. Ex: Precipitation amount in "mm".
-    dim : str
-        The time dimension.
+    dim : str or xr.DataArray
+        The name of the time dimension or the time coordinate itself.
     sampling_rate_from_coord : boolean
         For data with irregular time coordinates.
         If True, the diff of the time coordinate will be used as the sampling rate,
@@ -839,7 +859,7 @@ def amount2rate(
 
     Returns
     -------
-    xr.DataArray
+    xr.DataArray or Quantity
 
     See Also
     --------
@@ -1157,12 +1177,16 @@ def check_units(
         )
 
 
-def _check_output_has_units(out: xr.DataArray | tuple[xr.DataArray]) -> None:
+def _check_output_has_units(
+    out: xr.DataArray | tuple[xr.DataArray] | xr.Dataset,
+) -> None:
     """Perform very basic sanity check on the output.
 
     Indices are responsible for unit management. If this fails, it's a developer's error.
     """
-    if not isinstance(out, tuple):
+    if isinstance(out, xr.Dataset):
+        out = out.data_vars.values()
+    elif not isinstance(out, tuple):
         out = (out,)
 
     for outd in out: