Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Thresholded events - Quantified rate2amount #1778

Merged
merged 26 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
48efef8
black ice events
coxipi Jun 13, 2024
35b0f75
black ice -> freezing rain
coxipi Jun 13, 2024
dab33b9
Merge branch 'main' of github.com:Ouranosinc/xclim into black_ice
coxipi Jun 13, 2024
44d8681
update CHANGES
coxipi Jun 13, 2024
d036823
event dimension with padding
coxipi Jun 14, 2024
d61182c
merge main
aulemahal Sep 6, 2024
d556d95
first pass to generalize
aulemahal Sep 6, 2024
8ab9442
Merge branch 'main' into black_ice
aulemahal Oct 2, 2024
d27ba5d
cleaner?
aulemahal Oct 2, 2024
c5ff6d3
merge resample-map
aulemahal Oct 3, 2024
3a4ebbb
Working find events
aulemahal Oct 3, 2024
74b8cc9
Add quantified support to rate2amount
aulemahal Oct 3, 2024
9d80208
Fix bad merge...
aulemahal Oct 3, 2024
29a8271
Merge branch 'resample-map' into black_ice
aulemahal Oct 3, 2024
cb6d2cc
Merge branch 'resample-map' into black_ice
aulemahal Oct 7, 2024
f411144
fix fmt, fix tests
aulemahal Oct 7, 2024
cc02b19
Add min_gap to spell length stats
aulemahal Oct 7, 2024
385d59b
Merge branch 'resample-map' into black_ice
aulemahal Oct 7, 2024
1241551
Merge branch 'resample-map' into black_ice
aulemahal Oct 8, 2024
6080aa3
fix test
aulemahal Oct 8, 2024
4457d11
Merge branch 'resample-map' into black_ice
aulemahal Oct 8, 2024
12a4738
Merge branch 'resample-map' into black_ice
aulemahal Oct 9, 2024
d32cf58
Apply suggestions from code review
aulemahal Oct 9, 2024
24e2f7a
Merge branch 'black_ice' of github.com:Ouranosinc/xclim into black_ice
aulemahal Oct 9, 2024
fb6cf7d
Suggestions from review
aulemahal Oct 9, 2024
0f03cdd
Merge branch 'main' into black_ice
aulemahal Oct 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ Contributors to this version: Adrien Lamarche (:user:`LamAdr`), Trevor James Smi
New indicators
^^^^^^^^^^^^^^
* New ``heat_spell_frequency``, ``heat_spell_max_length`` and ``heat_spell_total_length`` : spell length statistics on a bivariate condition that uses the average over a window by default. (:pull:`1885`).
* New indicator ``freezing_rain_events`` gives statistics about freezing rain sequences. (:pull:`1778`).

New features and enhancements
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* New generic ``xclim.indices.generic.spell_mask`` that returns a mask of which days are part of a spell. Supports multivariate conditions and weights. Used in new generic index ``xclim.indices.generic.bivariate_spell_length_statistics`` that extends ``spell_length_statistics`` to two variables. (:pull:`1885`).
* Indicator parameters can now be assigned a new name, different from the argument name in the compute function. (:pull:`1885`).
* ``xclim.indices.run_length.runs_with_holes`` allows to input a condition that must be met for a run to start and a second condition that must be met for the run to stop. (:pull:`1778`).

Bug fixes
^^^^^^^^^
Expand Down
28 changes: 27 additions & 1 deletion tests/test_indices.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from xclim.core import ValidationError
from xclim.core.calendar import percentile_doy
from xclim.core.options import set_options
from xclim.core.units import convert_units_to, units
from xclim.core.units import convert_units_to, rate2amount, units

K2C = 273.15

Expand Down Expand Up @@ -346,6 +346,32 @@ def test_bedd(self, method, end_date, deg_days, max_deg_days):
if method == "icclim":
np.testing.assert_array_equal(bedd, bedd_high_lat)

def test_freezing_rain_events(self, open_dataset):
times = pd.date_range("1950-01-01", "1950-01-31", freq="D")
da = xr.DataArray(
np.zeros(len(times)),
dims={"time"},
coords={"time": times},
attrs={"units": "kg m-2 s-1"},
)

# Two sequences separated by 3 days, which will be split with window_stop = 3
da[0:10] = 1
da[13:20] = 1
out = xci.freezing_rain_events(
da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3
)
assert (out.run_lengths.values[0:2] == [10, 7]).all()

# Two sequences separated by 2 days, which will form a single large event
da[12] = 1
out = xci.freezing_rain_events(
da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3
)
pram = rate2amount(da)
assert out.run_lengths.values[0] == 20
assert (out.cumulative_precipitation - pram.sum()).sum() == 0

def test_cool_night_index(self, open_dataset):
ds = open_dataset("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200701-200712.nc")
ds = ds.rename(dict(tas="tasmin"))
Expand Down
1 change: 1 addition & 0 deletions xclim/core/units.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ def cf_conversion(
FREQ_UNITS = {
"D": "d",
"W": "week",
"h": "h",
}
"""
Resampling frequency units for :py:func:`xclim.core.units.infer_sampling_units`.
Expand Down
132 changes: 131 additions & 1 deletion xclim/indices/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,18 @@
from xarray.coding.cftime_offsets import _MONTH_ABBREVIATIONS # noqa

from xclim.core import DayOfYearStr, Quantified
from xclim.core.calendar import doy_to_days_since, get_calendar, select_time
from xclim.core.calendar import (
doy_to_days_since,
get_calendar,
parse_offset,
select_time,
)
from xclim.core.units import (
convert_units_to,
declare_relative_units,
infer_context,
pint2cfunits,
rate2amount,
str2pint,
to_agg_units,
)
Expand Down Expand Up @@ -1474,3 +1480,127 @@ def detrend(
trend = xr.polyval(ds[dim], coeff.polyfit_coefficients)
with xr.set_options(keep_attrs=True):
return ds - trend


@declare_relative_units(thresh="<data>")
def thresholded_events(
data: xr.DataArray,
thresh: Quantified = "1 kg m-2 d-1",
window_start: int = 3,
window_stop: int = 3,
freq: str | None = None,
data_is_rate: bool = False,
) -> xr.Dataset:
r"""Thresholded events.

Parameters
----------
data : xr.DataArray
Variable.
thresh : Quantified
Threshold that must be exceeded to be considered an event
window_start: int
Number of time steps above the threshold required to start an event
window_stop : int
Number of time steps below the threshold required to stop an event
freq : str
Resampling frequency.
data_is_rate : bool
True if the data is a rate that needs to be converted to an amount
when computing an accumulation.

Returns
-------
xr.Dataset
"""
freq = xr.infer_freq(data.time)
mag, units, _, _ = parse_offset(freq)
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
# condition to respect for `window_start` time steps to start a run
thresh = convert_units_to(thresh, data)
da_start = data >= thresh
da_stop = (1 - da_start).astype(bool)

# Get basic blocks to work with, our runs with holes and the lengths of those runs
# Series of ones indicating where we have continuous runs of freezing rain with pauses
# not exceeding `window_stop`
runs = rl.runs_with_holes(da_start, window_start, da_stop, window_stop)

# Compute the length of freezing rain events
# I think int16 is safe enough
ds = rl.rle(runs).astype(np.int16).to_dataset(name="run_lengths")
ds.run_lengths.attrs["units"] = ""

# Time duration where the precipitation threshold is exceeded during an event
# (duration of complete run - duration of holes in the run )
ds["effective_duration"] = (
rl._cumsum_reset(
da_start.where(runs == 1), index="first", reset_on_zero=False
).astype(np.int16)
* mag
)
ds["effective_duration"].attrs["units"] = units

# # Cumulated precipitation in a given freezing rain event
if data_is_rate:
dataam = rate2amount(data)
else:
dataam = data
ds["event_accumulation"] = rl._cumsum_reset(
dataam.where(runs == 1), index="first", reset_on_zero=False
)
ds["event_accumulation"].attrs["units"] = dataam.units

# Keep time as a variable, it will be used to keep start of events
ds["start"] = ds["time"].broadcast_like(ds) # .astype(int)
# I have to convert it to an integer for the filtering, time object won't do
# Since there are conversion needing a time object earlier, I think it's ok
# to assume this here?
time_min = ds.start.min()
ds["start"] = (ds.start - time_min).astype("timedelta64[s]").astype(int)

# Filter events: Reduce time dimension
def _filter_events(da, rl, max_event_number):
out = np.full(max_event_number, np.NaN)
events_start = da[rl > 0]
out[: len(events_start)] = events_start
return out

max_event_number = int(np.ceil(data.time.size / (window_start + window_stop)))
v_attrs = {v: ds[v].attrs for v in ds.data_vars}
ds = xr.apply_ufunc(
_filter_events,
ds,
ds.run_lengths,
input_core_dims=[["time"], ["time"]],
output_core_dims=[["event"]],
kwargs=dict(max_event_number=max_event_number),
output_sizes={"event": max_event_number},
dask="parallelized",
vectorize=True,
).assign_attrs(ds.attrs)

ds["event"] = np.arange(1, ds.event.size + 1)
for v in ds.data_vars:
ds[v].attrs = v_attrs[v]

# convert back start to a time
# TODO fix for calendars
ds["start"] = time_min.astype("datetime64[ns]") + ds["start"].astype(
"timedelta64[ns]"
)
return ds

# # Other indices that could be completely done outside of the function, no input needed anymore
# # number of events
# ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16)
# ds.number_of_events.attrs["units"] = ""

# # mean rate of precipitation during event
# ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"]
# units = (
# f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}"
# )
# ds["rate"].attrs["units"] = ensure_cf_units(units)

# ds.attrs["units"] = ""
# return ds
38 changes: 30 additions & 8 deletions xclim/indices/run_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,11 @@ def resample_and_rl(
return out


def _cumsum_reset_on_zero(
def _cumsum_reset(
da: xr.DataArray,
dim: str = "time",
index: str = "last",
reset_on_zero: bool = True,
) -> xr.DataArray:
"""Compute the cumulative sum for each series of numbers separated by zero.

Expand All @@ -135,6 +136,9 @@ def _cumsum_reset_on_zero(
index : {'first', 'last'}
If 'first', the largest value of the cumulative sum is indexed with the first element in the run.
If 'last'(default), with the last element in the run.
reset_on_zero : bool
If True, the cumulative sum is reset on each zero value of `da`. Otherwise, the cumulative sum resets
on NaNs. Default is True.

Returns
-------
Expand All @@ -146,7 +150,10 @@ def _cumsum_reset_on_zero(

# Example: da == 100110111 -> cs_s == 100120123
cs = da.cumsum(dim=dim) # cumulative sum e.g. 111233456
cs2 = cs.where(da == 0) # keep only numbers at positions of zeroes e.g. N11NN3NNN
cond = da == 0 if reset_on_zero else da.isnull() # reset condition
cs2 = cs.where(
cond
) # keep only numbers at positions of zeroes e.g. N11NN3NNN (default)
cs2[{dim: 0}] = 0 # put a zero in front e.g. 011NN3NNN
cs2 = cs2.ffill(dim=dim) # e.g. 011113333
out = cs - cs2
Expand Down Expand Up @@ -187,7 +194,7 @@ def rle(
da = da[{dim: slice(None, None, -1)}]

# Get cumulative sum for each series of 1, e.g. da == 100110111 -> cs_s == 100120123
cs_s = _cumsum_reset_on_zero(da, dim)
cs_s = _cumsum_reset(da, dim)

# Keep total length of each series (and also keep 0's), e.g. 100120123 -> 100N20NN3
# Keep numbers with a 0 to the right and also the last number
Expand Down Expand Up @@ -496,7 +503,7 @@ def find_boundary_run(runs, position):

else:
# _cusum_reset_on_zero() is an intermediate step in rle, which is sufficient here
d = _cumsum_reset_on_zero(da, dim=dim, index=position)
d = _cumsum_reset(da, dim=dim, index=position)
d = xr.where(d >= window, 1, 0)
# for "first" run, return "first" element in the run (and conversely for "last" run)
if freq is not None:
Expand Down Expand Up @@ -745,17 +752,32 @@ def extract_events(
da_start = da_start.astype(int).fillna(0)
da_stop = da_stop.astype(int).fillna(0)

start_runs = _cumsum_reset_on_zero(da_start, dim=dim, index="first")
stop_runs = _cumsum_reset_on_zero(da_stop, dim=dim, index="first")
start_positions = xr.where(start_runs >= window_start, 1, np.nan)
stop_positions = xr.where(stop_runs >= window_stop, 0, np.nan)
start_runs = _cumsum_reset(da_start, dim=dim, index="first")
stop_runs = _cumsum_reset(da_stop, dim=dim, index="first")
start_positions = xr.where(start_runs >= window_start, 1, np.NaN)
stop_positions = xr.where(stop_runs >= window_stop, 0, np.NaN)

# start positions (1) are f-filled until a stop position (0) is met
runs = stop_positions.combine_first(start_positions).ffill(dim=dim).fillna(0)

return runs


def runs_with_holes(da_start, window_start, da_stop, window_stop, dim="time"):
"""Runs with holes"""
da_start = da_start.astype(int).fillna(0)
da_stop = da_stop.astype(int).fillna(0)

start_runs = _cumsum_reset(da_start, dim=dim, index="first")
stop_runs = _cumsum_reset(da_stop, dim=dim, index="first")
start_positions = xr.where(start_runs >= window_start, 1, np.NaN)
stop_positions = xr.where(stop_runs >= window_stop, 0, np.NaN)

# start positions (1) are f-filled until a stop position (0) is met
runs = stop_positions.combine_first(start_positions).ffill(dim=dim).fillna(0)
return runs


def season_start(
da: xr.DataArray,
window: int,
Expand Down
Loading