From d8e2415c6fbfb7b64d8ba442312991a1ac01f2c6 Mon Sep 17 00:00:00 2001 From: Adrien Lamarche Date: Thu, 8 Aug 2024 09:58:44 -0400 Subject: [PATCH 01/12] SSI --- xclim/indicators/land/_streamflow.py | 28 +++++- xclim/indices/_hydrology.py | 129 +++++++++++++++++++++++++++ xclim/indices/stats.py | 6 +- 3 files changed, 161 insertions(+), 2 deletions(-) diff --git a/xclim/indicators/land/_streamflow.py b/xclim/indicators/land/_streamflow.py index 4823da005..51dc05872 100644 --- a/xclim/indicators/land/_streamflow.py +++ b/xclim/indicators/land/_streamflow.py @@ -5,13 +5,19 @@ from xclim.core.cfchecks import check_valid from xclim.core.indicator import ResamplingIndicator from xclim.core.units import declare_units -from xclim.indices import base_flow_index, generic, rb_flashiness_index +from xclim.indices import ( + base_flow_index, + generic, + rb_flashiness_index, + standardized_streamflow_index, +) __all__ = [ "base_flow_index", "doy_qmax", "doy_qmin", "rb_flashiness_index", + "standardized_streamflow_index", ] @@ -25,6 +31,13 @@ def cfcheck(q): check_valid(q, "standard_name", "water_volume_transport_in_river_channel") +class StandardizedIndexes(ResamplingIndicator): + """Resampling but flexible inputs indicators.""" + + src_freq = ["D", "MS"] + context = "hydro" + + base_flow_index = Streamflow( title="Base flow index", identifier="base_flow_index", @@ -71,3 +84,16 @@ def cfcheck(q): compute=declare_units(da="[discharge]")(generic.select_resample_op), parameters=dict(op=generic.doymin, out_units=None), ) + + +standardized_streamflow_index = StandardizedIndexes( + title="Standardized Streamflow Index (SSI)", + identifier="ssi", + units="", + standard_name="ssi", + long_name="Standardized Streamflow Index (SSI)", + description="FILLME", + abstract="FILLME", + cell_methods="", + compute=standardized_streamflow_index, +) diff --git a/xclim/indices/_hydrology.py b/xclim/indices/_hydrology.py index 3785157af..b247f32a4 100644 --- a/xclim/indices/_hydrology.py +++ b/xclim/indices/_hydrology.py @@ -7,6 +7,8 @@ from xclim.core.calendar import get_calendar from xclim.core.missing import at_least_n_valid from xclim.core.units import declare_units, rate2amount +from xclim.core.utils import DateStr, Quantified +from xclim.indices.stats import standardized_index from . import generic @@ -19,6 +21,7 @@ "snow_melt_we_max", "snw_max", "snw_max_doy", + "standardized_streamflow_index", ] @@ -104,6 +107,132 @@ def rb_flashiness_index(q: xarray.DataArray, freq: str = "YS") -> xarray.DataArr return out +@declare_units( + q="[discharge]", + params="[]", +) +def standardized_streamflow_index( + q: xarray.DataArray, + freq: str | None = "MS", + window: int = 1, + dist: str = "genextreme", + method: str = "ML", + fitkwargs: dict | None = None, + cal_start: DateStr | None = None, + cal_end: DateStr | None = None, + params: Quantified | None = None, + **indexer, +) -> xarray.DataArray: + r"""Standardized Streamflow Index (SSI). + + Parameters + ---------- + q : xarray.DataArray + Rate of river discharge. + freq : str, optional + Resampling frequency. A monthly or daily frequency is expected. Option `None` assumes that desired resampling + has already been applied input dataset and will skip the resampling step. + window : int + Averaging window length relative to the resampling frequency. For example, if `freq="MS"`, + i.e. a monthly resampling, the window is an integer number of months. + dist : {"genextreme", "fisk"} + Name of the univariate distribution. (see :py:mod:`scipy.stats`). + method : {'APP', 'ML', 'PWM'} + Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate). The approximate method + uses a deterministic function that doesn't involve any optimization. + fitkwargs : dict, optional + Kwargs passed to ``xclim.indices.stats.fit`` used to impose values of certains parameters (`floc`, `fscale`). + cal_start : DateStr, optional + Start date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. + Default option `None` means that the calibration period begins at the start of the input dataset. + cal_end : DateStr, optional + End date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. + Default option `None` means that the calibration period finishes at the end of the input dataset. + params : xarray.DataArray + Fit parameters. + The `params` can be computed using ``xclim.indices.stats.standardized_index_fit_params`` in advance. + The output can be given here as input, and it overrides other options. + \*\*indexer + Indexing parameters to compute the indicator on a temporal subset of the data. + It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. + + Returns + ------- + xarray.DataArray, [unitless] + Standardized Streamflow Index. + + Notes + ----- + * N-month SSI / N-day SSI is determined by choosing the `window = N` and the appropriate frequency `freq`. + * Supported statistical distributions are: ["genextreme", "fisk"], where "fisk" is scipy's implementation of + a log-logistic distribution + * If `params` is given as input, it overrides the `cal_start`, `cal_end`, `freq` and `window`, `dist` and `method` options. + * "APP" method only supports two-parameter distributions. Parameter `loc` needs to be fixed to use method `APP`. + * The standardized index is bounded by ±8.21. 8.21 is the largest standardized index as constrained by the float64 precision in + the inversion to the normal distribution. + * The results from `climate_indices` library can be reproduced with `method = "APP"` and `fitwkargs = {"floc": 0}` + + Example + ------- + >>> from datetime import datetime + >>> from xclim.indices import standardized_streamflow_index + >>> ds = xr.open_dataset(path_to_q_file) + >>> q = ds.q + >>> cal_start, cal_end = "1990-05-01", "1990-08-31" + >>> ssi_3 = standardized_streamflow_index( + ... q, + ... freq="MS", + ... window=3, + ... dist="genextreme", + ... method="ML", + ... cal_start=cal_start, + ... cal_end=cal_end, + ... ) # Computing SSI-3 months using a GEV distribution for the fit + >>> # Fitting parameters can also be obtained first, then re-used as input. + >>> from xclim.indices.stats import standardized_index_fit_params + >>> params = standardized_index_fit_params( + ... q.sel(time=slice(cal_start, cal_end)), + ... freq="MS", + ... window=3, + ... dist="genextreme", + ... method="ML", + ... ) # First getting params + >>> ssi_3 = standardized_streamflow_index(q, params=params) + + References + ---------- + CHANGEME :cite:cts:`mckee_relationship_1993` + """ + fitkwargs = fitkwargs or {} + dist_methods = {"genextreme": ["ML", "APP", "PWM"], "fisk": ["ML", "APP"]} + if dist in dist_methods.keys(): + if method not in dist_methods[dist]: + raise NotImplementedError( + f"{method} method is not implemented for {dist} distribution" + ) + else: + raise NotImplementedError(f"{dist} distribution is not yet implemented.") + + # Precipitation is expected to be zero-inflated + # zero_inflated = True + zero_inflated = False + ssi = standardized_index( + q, + freq=freq, + window=window, + dist=dist, + method=method, + zero_inflated=zero_inflated, + fitkwargs=fitkwargs, + cal_start=cal_start, + cal_end=cal_end, + params=params, + **indexer, + ) + + return ssi + + @declare_units(snd="[length]") def snd_max(snd: xarray.DataArray, freq: str = "YS-JUL") -> xarray.DataArray: """Maximum snow depth. diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index 93363f8ab..6acb4abfb 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -767,7 +767,11 @@ def standardized_index_fit_params( da = da + convert_units_to(offset, da, context="hydro") # "WPM" method doesn't seem to work for gamma or pearson3 - dist_and_methods = {"gamma": ["ML", "APP", "PWM"], "fisk": ["ML", "APP"]} + dist_and_methods = { + "gamma": ["ML", "APP", "PWM"], + "fisk": ["ML", "APP"], + "genextreme": ["ML", "APP", "PWM"], + } dist = get_dist(dist) if dist.name not in dist_and_methods: raise NotImplementedError(f"The distribution `{dist.name}` is not supported.") From 51f01ed52b1f82b8fd29496fac82eb652e5ab7fa Mon Sep 17 00:00:00 2001 From: Adrien Lamarche Date: Mon, 12 Aug 2024 15:46:23 -0400 Subject: [PATCH 02/12] test streamflow --- tests/test_hydrology.py | 37 ++++++++++++++++++++++++++++ tests/test_indices.py | 48 +++++++++++++++++++++++++++++++++++++ xclim/indices/_hydrology.py | 1 - 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/tests/test_hydrology.py b/tests/test_hydrology.py index 0a967a42a..fd06556c0 100644 --- a/tests/test_hydrology.py +++ b/tests/test_hydrology.py @@ -1,8 +1,13 @@ from __future__ import annotations +from pathlib import Path + import numpy as np +import pytest from xclim import indices as xci +from xclim import land +from xclim.core.units import convert_units_to class TestBaseFlowIndex: @@ -23,6 +28,38 @@ def test_simple(self, q_series): np.testing.assert_array_equal(out, 2) +class TestStandardizedStreamflow: + nc_ds = Path("Raven", "q_sim.nc") + + @pytest.mark.slow + def test_3d_data_with_nans(self, open_dataset): + # test with data + ds = open_dataset(self.nc_ds) + q = ds.q_obs.sel(time=slice("2008")).rename("q") + qMM = convert_units_to(q, "mm**3/s", context="hydro") # noqa + # put a nan somewhere + qMM.values[10] = np.nan + q.values[10] = np.nan + + out1 = land.standardized_streamflow_index( + q, + freq="MS", + window=1, + dist="genextreme", + method="APP", + fitkwargs={"floc": 0}, + ) + out2 = land.standardized_streamflow_index( + qMM, + freq="MS", + window=1, + dist="genextreme", + method="APP", + fitkwargs={"floc": 0}, + ) + np.testing.assert_array_almost_equal(out1, out2, 3) + + class TestSnwMax: def test_simple(self, snw_series): a = np.zeros(366) diff --git a/tests/test_indices.py b/tests/test_indices.py index a9386087f..83241e939 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -751,6 +751,54 @@ def test_standardized_precipitation_evapotranspiration_index( np.testing.assert_allclose(spei.values, values, rtol=0, atol=diff_tol) + # reference results: Obtained with R package `standaRdized` + @pytest.mark.slow + @pytest.mark.parametrize( + "freq, window, dist, method, values, diff_tol", + [ + ( + "D", + 1, + "genextreme", + "ML", + [0.5331, 0.5338, 0.5098, 0.4656, 0.4937], + 9e-2, + ), + ( + "D", + 12, + "genextreme", + "ML", + [0.4414, 0.4695, 0.4861, 0.4838, 0.4877], + 9e-2, + ), + ], + ) + def test_standardized_streamflow_index( + self, open_dataset, freq, window, dist, method, values, diff_tol + ): + ds = open_dataset("Raven/q_sim.nc") + q = ds.q_obs.rename("q") + q_cal = ds.q_sim.rename("q").fillna(ds.q_sim.mean()) + if freq == "D": + q = q.sel(time=slice("2008-01-01", "2008-01-30")).fillna(ds.q_obs.mean()) + else: + q = q.sel(time=slice("2008-01-01", "2008-12-31")).fillna(ds.q_obs.mean()) + fitkwargs = {} + params = xci.stats.standardized_index_fit_params( + q_cal, + freq=freq, + window=window, + dist=dist, + method=method, + fitkwargs=fitkwargs, + zero_inflated=True, + ) + ssi = xci.standardized_streamflow_index(q, params=params) + ssi = ssi.isel(time=slice(-11, -1, 2)).values.flatten() + + np.testing.assert_allclose(ssi, values, rtol=0, atol=diff_tol) + @pytest.mark.parametrize( "indexer", [ diff --git a/xclim/indices/_hydrology.py b/xclim/indices/_hydrology.py index b247f32a4..f05274600 100644 --- a/xclim/indices/_hydrology.py +++ b/xclim/indices/_hydrology.py @@ -170,7 +170,6 @@ def standardized_streamflow_index( * "APP" method only supports two-parameter distributions. Parameter `loc` needs to be fixed to use method `APP`. * The standardized index is bounded by ±8.21. 8.21 is the largest standardized index as constrained by the float64 precision in the inversion to the normal distribution. - * The results from `climate_indices` library can be reproduced with `method = "APP"` and `fitwkargs = {"floc": 0}` Example ------- From 5c9a11d16341c0a91df0166e1625eff091a8c374 Mon Sep 17 00:00:00 2001 From: Adrien Lamarche Date: Mon, 12 Aug 2024 16:21:43 -0400 Subject: [PATCH 03/12] metadata, translation --- xclim/data/fr.json | 6 ++++++ xclim/indicators/land/_streamflow.py | 11 +++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/xclim/data/fr.json b/xclim/data/fr.json index 5c395e112..128c22006 100644 --- a/xclim/data/fr.json +++ b/xclim/data/fr.json @@ -287,6 +287,12 @@ "title": "Indice de précipitation évapotranspiration standardisé (SPEI)", "abstract": "Budget d'eau (précipitation - évapotranspiration) sur une fenêtre mobile, normalisé tel que la moyenne du SPEI est 0 pour les données de calibration. L'unité de la fenêtre est l'unité de temps déterminée par la fréquence de rééchantillonnage." }, + "SSI": { + "long_name": "Indice d'écoulement standardisé (SSI)", + "description": "Écoulement sur une fenêtre mobile de {window} {freq:nom}, normalisée telle que la moyenne du SSI est 0 pour les données de calibration. L'unité de la fenêtre 'X' est l'unité de temps déterminée par la fréquence de rééchantillonage,", + "title": "Indice d'écoulement standardisé (SSI)", + "abstract": "Écoulement sur une fenêtre mobile, normalisée telle que la moyenne du SSI est 0 pour les données de calibration. L'unité de la fenêtre est l'unité de temps déterminée par la fréquence de rééchantillonnage." + }, "DC": { "long_name": "Indice de sécheresse", "description": "Code numérique estimant la teneur en humidité moyenne des couches organiques.", diff --git a/xclim/indicators/land/_streamflow.py b/xclim/indicators/land/_streamflow.py index 51dc05872..6cc54c787 100644 --- a/xclim/indicators/land/_streamflow.py +++ b/xclim/indicators/land/_streamflow.py @@ -31,7 +31,7 @@ def cfcheck(q): check_valid(q, "standard_name", "water_volume_transport_in_river_channel") -class StandardizedIndexes(ResamplingIndicator): +class StandardizedStreamflowIndexes(ResamplingIndicator): """Resampling but flexible inputs indicators.""" src_freq = ["D", "MS"] @@ -86,14 +86,17 @@ class StandardizedIndexes(ResamplingIndicator): ) -standardized_streamflow_index = StandardizedIndexes( +standardized_streamflow_index = StandardizedStreamflowIndexes( title="Standardized Streamflow Index (SSI)", identifier="ssi", units="", standard_name="ssi", long_name="Standardized Streamflow Index (SSI)", - description="FILLME", - abstract="FILLME", + description="Streamflow over a moving {window}-X window, normalized such that SSI averages to 0 for " + "calibration data. The window unit `X` is the minimal time period defined by resampling frequency {freq}.", + abstract="Streamflow over a moving window, normalized such that SSI averages to 0 for the calibration data. " + "The window unit `X` is the minimal time period defined by the resampling frequency.", cell_methods="", + keywords="streamflow", compute=standardized_streamflow_index, ) From 3e2e840b7a47b8ed0bd1ad692d881dcc68db7910 Mon Sep 17 00:00:00 2001 From: Adrien Lamarche Date: Tue, 13 Aug 2024 08:59:53 -0400 Subject: [PATCH 04/12] ssi reference --- docs/references.bib | 16 +++++++++++++++- xclim/indices/_agro.py | 2 +- xclim/indices/_hydrology.py | 8 ++++++-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/docs/references.bib b/docs/references.bib index 9f2419dd9..8f3a87535 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -2153,7 +2153,7 @@ @article{droogers2002 type = {Article} } -@Article{robin_2019, +@article{robin_2019, author = {Robin, Y. and Vrac, M. and Naveau, P. and Yiou, P.}, title = {Multivariate stochastic bias corrections with optimal transport}, journal = {Hydrology and Earth System Sciences}, @@ -2203,3 +2203,17 @@ @article{knol_1989 publisher = "Springer", number = "1", } + +@article{vicente-serrano_2012, + author = {Sergio M. Vicente-Serrano and Juan I. López-Moreno and Santiago Beguería and Jorge Lorenzo-Lacruz and Cesar Azorin-Molina and Enrique Morán-Tejeda}, + title = {Accurate Computation of a Streamflow Drought Index}, + journal = {Journal of Hydrologic Engineering}, + volume = {17}, + number = {2}, + pages = {318-332}, + year = {2012}, + doi = {10.1061/(ASCE)HE.1943-5584.0000433}, + URL = {https://ascelibrary.org/doi/abs/10.1061/%28ASCE%29HE.1943-5584.0000433}, + eprint = {https://ascelibrary.org/doi/pdf/10.1061/%28ASCE%29HE.1943-5584.0000433}, + abstract = "In this study, the authors investigated an approach to calculate the standardized streamflow index (SSI), which allows accurate spatial and temporal comparison of the hydrological conditions of a stream or set of streams. For this purpose, the capability of six three-parameter distributions (lognormal, Pearson Type III, log-logistic, general extreme value, generalized Pareto, and Weibull) and two different approaches to select the most suitable distribution the best monthly fit (BMF) and the minimum orthogonal distance (MD), were tested by using a monthly streamflow data set for the Ebro Basin (Spain). This large Mediterranean basin is characterized by high variability in the magnitude of streamflows and in seasonal regimes. The results show that the most commonly used probability distributions for flow frequency analysis provided good fits to the streamflow series. Thus, the visual inspection of the L-moment diagrams and the results of the Kolmogorov-Smirnov test did not enable the selection of a single optimum probability distribution. However, no single probability distribution for all the series was suitable for obtaining a robust standardized streamflow series because each of the distributions had one or more limitations. The BMF and MD approaches improved the results in the expected average, standard deviation, and the frequencies of extreme events of the SSI series in relation to the selection of a unique distribution for each station. The BMF and MD approaches involved using different probability distributions for each gauging station and month of the year to calculate the SSI. Both approaches are easy to apply and they provide very similar results in the quality of the obtained hydrological drought indexes. The proposed procedures are very flexible for analyses involving contrasting hydrological regimes and flow characteristics." +} diff --git a/xclim/indices/_agro.py b/xclim/indices/_agro.py index 6cb6c277e..43839c05d 100644 --- a/xclim/indices/_agro.py +++ b/xclim/indices/_agro.py @@ -1172,7 +1172,7 @@ def standardized_precipitation_index( >>> from xclim.indices import standardized_precipitation_index >>> ds = xr.open_dataset(path_to_pr_file) >>> pr = ds.pr - >>> cal_start, cal_end = "1990-05-01", "1990-08-31" + >>> cal_start, cal_end = "1990-05-01", "1992-06-01" >>> spi_3 = standardized_precipitation_index( ... pr, ... freq="MS", diff --git a/xclim/indices/_hydrology.py b/xclim/indices/_hydrology.py index f05274600..3371322e9 100644 --- a/xclim/indices/_hydrology.py +++ b/xclim/indices/_hydrology.py @@ -177,7 +177,7 @@ def standardized_streamflow_index( >>> from xclim.indices import standardized_streamflow_index >>> ds = xr.open_dataset(path_to_q_file) >>> q = ds.q - >>> cal_start, cal_end = "1990-05-01", "1990-08-31" + >>> cal_start, cal_end = "2006-05-01", "2008-06-01" >>> ssi_3 = standardized_streamflow_index( ... q, ... freq="MS", @@ -198,9 +198,13 @@ def standardized_streamflow_index( ... ) # First getting params >>> ssi_3 = standardized_streamflow_index(q, params=params) + See Also + -------- + standardized_precipitation_index + References ---------- - CHANGEME :cite:cts:`mckee_relationship_1993` + :cite:cts:`vicente-serrano_2012` """ fitkwargs = fitkwargs or {} dist_methods = {"genextreme": ["ML", "APP", "PWM"], "fisk": ["ML", "APP"]} From 59bfcfbbffa84d7f90ed54add0d84f554a2bb268 Mon Sep 17 00:00:00 2001 From: Adrien Lamarche Date: Tue, 13 Aug 2024 09:55:51 -0400 Subject: [PATCH 05/12] ssi test against self --- tests/test_indices.py | 93 ++++++++++++++++++++++++++++++++++++++++-- xclim/indices/stats.py | 2 +- 2 files changed, 91 insertions(+), 4 deletions(-) diff --git a/tests/test_indices.py b/tests/test_indices.py index 83241e939..f6e38cf25 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -751,11 +751,11 @@ def test_standardized_precipitation_evapotranspiration_index( np.testing.assert_allclose(spei.values, values, rtol=0, atol=diff_tol) - # reference results: Obtained with R package `standaRdized` @pytest.mark.slow @pytest.mark.parametrize( "freq, window, dist, method, values, diff_tol", [ + # reference results: Obtained with R package `standaRdized` ( "D", 1, @@ -772,6 +772,93 @@ def test_standardized_precipitation_evapotranspiration_index( [0.4414, 0.4695, 0.4861, 0.4838, 0.4877], 9e-2, ), + # reference results : xclim version where the test was implemented + ( + "D", + 1, + "genextreme", + "ML", + [0.6105, 0.6167, 0.5957, 0.5520, 0.5794], + 2e-2, + ), + ( + "D", + 1, + "genextreme", + "APP", + [-0.0259, -0.0141, -0.0080, -0.0098, 0.0089], + 2e-2, + ), + ("D", 1, "fisk", "ML", [0.3514, 0.3741, 0.1349, 0.4332, 0.1724], 2e-2), + ("D", 1, "fisk", "APP", [0.3321, 0.3477, 0.3536, 0.3468, 0.3723], 2e-2), + ( + "D", + 12, + "genextreme", + "ML", + [0.5131, 0.5442, 0.5645, 0.5660, 0.5720], + 2e-2, + ), + ( + "D", + 12, + "genextreme", + "APP", + [-0.0697, -0.0550, -0.0416, -0.0308, -0.0194], + 2e-2, + ), + ("D", 12, "fisk", "ML", [0.2096, 0.2728, 0.3259, 0.3466, 0.2836], 2e-2), + ("D", 12, "fisk", "APP", [0.2667, 0.2893, 0.3088, 0.3233, 0.3385], 2e-2), + ( + "MS", + 1, + "genextreme", + "ML", + [0.7315, -1.4919, -0.5405, 0.9965, -0.7449], + 2e-2, + ), + ( + "MS", + 1, + "genextreme", + "APP", + [0.0979, -1.6806, -0.5345, 0.7355, -0.7583], + 2e-2, + ), + ("MS", 1, "fisk", "ML", [0.5331, -1.5777, -0.4363, 0.2525, -0.8149], 2e-2), + ("MS", 1, "fisk", "APP", [0.4663, -1.9076, -0.5362, 0.8070, -0.8035], 2e-2), + ( + "MS", + 12, + "genextreme", + "ML", + [-0.9795, -1.0398, -1.9019, -1.6970, -1.4761], + 2e-2, + ), + ( + "MS", + 12, + "genextreme", + "APP", + [-0.9095, -1.0996, -1.9207, -2.2665, -2.1746], + 2e-2, + ), + ( + "MS", + 12, + "fisk", + "ML", + [-1.0776, -1.0827, -1.9333, -1.7764, -1.8391], + 2e-2, + ), + ( + "MS", + 12, + "fisk", + "APP", + [-0.9607, -1.1265, -1.7004, -1.8747, -1.8132], + 2e-2, + ), ], ) def test_standardized_streamflow_index( @@ -783,8 +870,8 @@ def test_standardized_streamflow_index( if freq == "D": q = q.sel(time=slice("2008-01-01", "2008-01-30")).fillna(ds.q_obs.mean()) else: - q = q.sel(time=slice("2008-01-01", "2008-12-31")).fillna(ds.q_obs.mean()) - fitkwargs = {} + q = q.sel(time=slice("2008-01-01", "2009-12-31")).fillna(ds.q_obs.mean()) + fitkwargs = {"floc": 0} if method == "APP" else {} params = xci.stats.standardized_index_fit_params( q_cal, freq=freq, diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index 6acb4abfb..14e72b808 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -756,7 +756,7 @@ def standardized_index_fit_params( if method == "APP": if "floc" not in fitkwargs.keys(): raise ValueError( - "The APP method is only supported for two-parameter distributions with `gamma` or `fisk` with `loc` being fixed." + "The APP method is only supported for two-parameter distributions with `gamma`, `fisk` or `genextreme` with `loc` being fixed." "Pass a value for `floc` in `fitkwargs`." ) if offset is not None: From ee91fcca3673d3e17cb5a6b8ccbaa290f09b8ff5 Mon Sep 17 00:00:00 2001 From: LamAdr <102169875+LamAdr@users.noreply.github.com> Date: Tue, 13 Aug 2024 11:55:46 -0400 Subject: [PATCH 06/12] Update xclim/indices/_hydrology.py Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- xclim/indices/_hydrology.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/indices/_hydrology.py b/xclim/indices/_hydrology.py index 3371322e9..455d42062 100644 --- a/xclim/indices/_hydrology.py +++ b/xclim/indices/_hydrology.py @@ -148,7 +148,7 @@ def standardized_streamflow_index( cal_end : DateStr, optional End date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. Default option `None` means that the calibration period finishes at the end of the input dataset. - params : xarray.DataArray + params : xarray.DataArray, optional Fit parameters. The `params` can be computed using ``xclim.indices.stats.standardized_index_fit_params`` in advance. The output can be given here as input, and it overrides other options. From dad3889c9d3cdb7fb7ea6e7a34cb317e0764103a Mon Sep 17 00:00:00 2001 From: LamAdr <102169875+LamAdr@users.noreply.github.com> Date: Tue, 13 Aug 2024 11:55:57 -0400 Subject: [PATCH 07/12] Update xclim/indices/_hydrology.py Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- xclim/indices/_hydrology.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/indices/_hydrology.py b/xclim/indices/_hydrology.py index 455d42062..132d44c4d 100644 --- a/xclim/indices/_hydrology.py +++ b/xclim/indices/_hydrology.py @@ -152,7 +152,7 @@ def standardized_streamflow_index( Fit parameters. The `params` can be computed using ``xclim.indices.stats.standardized_index_fit_params`` in advance. The output can be given here as input, and it overrides other options. - \*\*indexer + \*\*indexer : dict Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. From fe4f552b49e8ca93b70dd386d9dc312a2c7ead92 Mon Sep 17 00:00:00 2001 From: LamAdr <102169875+LamAdr@users.noreply.github.com> Date: Tue, 13 Aug 2024 11:56:06 -0400 Subject: [PATCH 08/12] Update xclim/indices/_hydrology.py Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- xclim/indices/_hydrology.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/indices/_hydrology.py b/xclim/indices/_hydrology.py index 132d44c4d..29220f792 100644 --- a/xclim/indices/_hydrology.py +++ b/xclim/indices/_hydrology.py @@ -211,7 +211,7 @@ def standardized_streamflow_index( if dist in dist_methods.keys(): if method not in dist_methods[dist]: raise NotImplementedError( - f"{method} method is not implemented for {dist} distribution" + f"{method} method is not implemented for {dist} distribution." ) else: raise NotImplementedError(f"{dist} distribution is not yet implemented.") From fe501a8f5191444608d5211ec2fb32e5945e0deb Mon Sep 17 00:00:00 2001 From: LamAdr <102169875+LamAdr@users.noreply.github.com> Date: Tue, 13 Aug 2024 11:56:16 -0400 Subject: [PATCH 09/12] Update xclim/indices/stats.py Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- xclim/indices/stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index 14e72b808..18e919b70 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -756,7 +756,7 @@ def standardized_index_fit_params( if method == "APP": if "floc" not in fitkwargs.keys(): raise ValueError( - "The APP method is only supported for two-parameter distributions with `gamma`, `fisk` or `genextreme` with `loc` being fixed." + "The APP method is only supported for two-parameter distributions with `gamma`, `fisk`, or `genextreme` with `loc` being fixed." "Pass a value for `floc` in `fitkwargs`." ) if offset is not None: From fc180f6427d8277edcb7a44c1245b725c86bfae4 Mon Sep 17 00:00:00 2001 From: Adrien Lamarche Date: Fri, 23 Aug 2024 15:25:54 -0400 Subject: [PATCH 10/12] sgi --- docs/references.bib | 12 +++ tests/test_hydrology.py | 3 +- xclim/data/fr.json | 6 ++ xclim/indicators/land/_streamflow.py | 18 ++++ xclim/indices/_hydrology.py | 135 ++++++++++++++++++++++++++- xclim/indices/stats.py | 3 +- 6 files changed, 171 insertions(+), 6 deletions(-) diff --git a/docs/references.bib b/docs/references.bib index 8f3a87535..85589cadb 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -2217,3 +2217,15 @@ @article{vicente-serrano_2012 eprint = {https://ascelibrary.org/doi/pdf/10.1061/%28ASCE%29HE.1943-5584.0000433}, abstract = "In this study, the authors investigated an approach to calculate the standardized streamflow index (SSI), which allows accurate spatial and temporal comparison of the hydrological conditions of a stream or set of streams. For this purpose, the capability of six three-parameter distributions (lognormal, Pearson Type III, log-logistic, general extreme value, generalized Pareto, and Weibull) and two different approaches to select the most suitable distribution the best monthly fit (BMF) and the minimum orthogonal distance (MD), were tested by using a monthly streamflow data set for the Ebro Basin (Spain). This large Mediterranean basin is characterized by high variability in the magnitude of streamflows and in seasonal regimes. The results show that the most commonly used probability distributions for flow frequency analysis provided good fits to the streamflow series. Thus, the visual inspection of the L-moment diagrams and the results of the Kolmogorov-Smirnov test did not enable the selection of a single optimum probability distribution. However, no single probability distribution for all the series was suitable for obtaining a robust standardized streamflow series because each of the distributions had one or more limitations. The BMF and MD approaches improved the results in the expected average, standard deviation, and the frequencies of extreme events of the SSI series in relation to the selection of a unique distribution for each station. The BMF and MD approaches involved using different probability distributions for each gauging station and month of the year to calculate the SSI. Both approaches are easy to apply and they provide very similar results in the quality of the obtained hydrological drought indexes. The proposed procedures are very flexible for analyses involving contrasting hydrological regimes and flow characteristics." } + +@article{bloomfield_2013, + AUTHOR = {Bloomfield, J. P. and Marchant, B. P.}, + TITLE = {Analysis of groundwater drought building on the standardised precipitation index approach}, + JOURNAL = {Hydrology and Earth System Sciences}, + VOLUME = {17}, + YEAR = {2013}, + NUMBER = {12}, + PAGES = {4769--4787}, + URL = {https://hess.copernicus.org/articles/17/4769/2013/}, + DOI = {10.5194/hess-17-4769-2013} +} diff --git a/tests/test_hydrology.py b/tests/test_hydrology.py index fd06556c0..9d567031d 100644 --- a/tests/test_hydrology.py +++ b/tests/test_hydrology.py @@ -29,10 +29,9 @@ def test_simple(self, q_series): class TestStandardizedStreamflow: - nc_ds = Path("Raven", "q_sim.nc") - @pytest.mark.slow def test_3d_data_with_nans(self, open_dataset): + nc_ds = Path("Raven", "q_sim.nc") # test with data ds = open_dataset(self.nc_ds) q = ds.q_obs.sel(time=slice("2008")).rename("q") diff --git a/xclim/data/fr.json b/xclim/data/fr.json index 128c22006..611de48fb 100644 --- a/xclim/data/fr.json +++ b/xclim/data/fr.json @@ -269,6 +269,12 @@ "title": "Précipitation liquide accumulée totale", "abstract": "Précipitation liquide accumulée totale. Les précipitations sont considérées liquides lorsque la température quotidienne moyenne est au-dessus de 0°C." }, + "SGI": { + "long_name": "Indice d'eau souterraine standardisé (SGI)", + "description": "Niveau de l'eau souterraine sur une fenêtre mobile de {window} {freq:nom}, normalisé telle que la moyenne du SGI est 0 pour les données de calibration. L'unité de la fenêtre 'X' est l'unité de temps déterminée par la fréquence de rééchantillonage,", + "title": "Indice d'eau souterraine standardisé (SGI)", + "abstract": "Niveau de l'eau souterraine sur une fenêtre mobile, normalisée telle que la moyenne du SGI est 0 pour les données de calibration. L'unité de la fenêtre est l'unité de temps déterminée par la fréquence de rééchantillonnage." + }, "SOLIDPRCPTOT": { "long_name": "Précipitation totale lorsque la température moyenne est en dessous ou égale à {thresh}", "description": "Précipitation solide totale {freq:f}, estimée comme la précipitation lorsque la température moyenne est en dessous ou égale à {thresh}.", diff --git a/xclim/indicators/land/_streamflow.py b/xclim/indicators/land/_streamflow.py index 6cc54c787..e7a269c6a 100644 --- a/xclim/indicators/land/_streamflow.py +++ b/xclim/indicators/land/_streamflow.py @@ -9,6 +9,7 @@ base_flow_index, generic, rb_flashiness_index, + standardized_groundwater_index, standardized_streamflow_index, ) @@ -17,6 +18,7 @@ "doy_qmax", "doy_qmin", "rb_flashiness_index", + "standardized_groundwater_index", "standardized_streamflow_index", ] @@ -100,3 +102,19 @@ class StandardizedStreamflowIndexes(ResamplingIndicator): keywords="streamflow", compute=standardized_streamflow_index, ) + + +standardized_groundwater_index = StandardizedStreamflowIndexes( + title="Standardized Groundwater Index (SGI)", + identifier="sgi", + units="", + standard_name="sgi", + long_name="Standardized Groundwater Index (SGI)", + description="Groundwater over a moving {window}-X window, normalized such that SGI averages to 0 for " + "calibration data. The window unit `X` is the minimal time period defined by resampling frequency {freq}.", + abstract="Groundwater over a moving window, normalized such that SGI averages to 0 for the calibration data. " + "The window unit `X` is the minimal time period defined by the resampling frequency.", + cell_methods="", + keywords="groundwater", + compute=standardized_groundwater_index, +) diff --git a/xclim/indices/_hydrology.py b/xclim/indices/_hydrology.py index 29220f792..1bc4034a8 100644 --- a/xclim/indices/_hydrology.py +++ b/xclim/indices/_hydrology.py @@ -21,6 +21,7 @@ "snow_melt_we_max", "snw_max", "snw_max_doy", + "standardized_groundwater_index", "standardized_streamflow_index", ] @@ -152,7 +153,7 @@ def standardized_streamflow_index( Fit parameters. The `params` can be computed using ``xclim.indices.stats.standardized_index_fit_params`` in advance. The output can be given here as input, and it overrides other options. - \*\*indexer : dict + \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. @@ -216,8 +217,6 @@ def standardized_streamflow_index( else: raise NotImplementedError(f"{dist} distribution is not yet implemented.") - # Precipitation is expected to be zero-inflated - # zero_inflated = True zero_inflated = False ssi = standardized_index( q, @@ -411,3 +410,133 @@ def melt_and_precip_max( out = agg.resample(time=freq).max(dim="time") out.attrs["units"] = snw.units return out + + +@declare_units( + head="[length]", + params="[]", +) +def standardized_groundwater_index( + head: xarray.DataArray, + freq: str | None = "MS", + window: int = 1, + dist: str = "genextreme", + method: str = "ML", + fitkwargs: dict | None = None, + cal_start: DateStr | None = None, + cal_end: DateStr | None = None, + params: Quantified | None = None, + **indexer, +) -> xarray.DataArray: + r"""Standardized Groundwater Index (SGI). + + Parameters + ---------- + head : xarray.DataArray + Groundwater head level. + freq : str, optional + Resampling frequency. A monthly or daily frequency is expected. Option `None` assumes that desired resampling + has already been applied input dataset and will skip the resampling step. + window : int + Averaging window length relative to the resampling frequency. For example, if `freq="MS"`, + i.e. a monthly resampling, the window is an integer number of months. + dist : {"gamma", "genextreme", "lognorm"} + Name of the univariate distribution. (see :py:mod:`scipy.stats`). + method : {'APP', 'ML', 'PWM'} + Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate). The approximate method + uses a deterministic function that doesn't involve any optimization. + fitkwargs : dict, optional + Kwargs passed to ``xclim.indices.stats.fit`` used to impose values of certains parameters (`floc`, `fscale`). + cal_start : DateStr, optional + Start date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. + Default option `None` means that the calibration period begins at the start of the input dataset. + cal_end : DateStr, optional + End date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. + Default option `None` means that the calibration period finishes at the end of the input dataset. + params : xarray.DataArray, optional + Fit parameters. + The `params` can be computed using ``xclim.indices.stats.standardized_index_fit_params`` in advance. + The output can be given here as input, and it overrides other options. + \*\*indexer + Indexing parameters to compute the indicator on a temporal subset of the data. + It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. + + Returns + ------- + xarray.DataArray, [unitless] + Standardized Groundwater Index. + + Notes + ----- + * N-month SGI / N-day SGI is determined by choosing the `window = N` and the appropriate frequency `freq`. + * Supported statistical distributions are: ["gamma", "genextreme", "lognorm"] + * If `params` is given as input, it overrides the `cal_start`, `cal_end`, `freq` and `window`, `dist` and `method` options. + * "APP" method only supports two-parameter distributions. Parameter `loc` needs to be fixed to use method `APP`. + * The standardized index is bounded by ±8.21. 8.21 is the largest standardized index as constrained by the float64 precision in + the inversion to the normal distribution. + + Example + ------- + >>> from datetime import datetime + >>> from xclim.indices import standardized_groundwater_index + >>> ds = xr.open_dataset(path_to_head_file) + >>> head = ds.head + >>> cal_start, cal_end = "2006-05-01", "2008-06-01" + >>> sgi_3 = standardized_groundwater_index( + ... head, + ... freq="MS", + ... window=3, + ... dist="gamma", + ... method="ML", + ... cal_start=cal_start, + ... cal_end=cal_end, + ... ) # Computing SGI-3 months using a Gamma distribution for the fit + >>> # Fitting parameters can also be obtained first, then re-used as input. + >>> from xclim.indices.stats import standardized_index_fit_params + >>> params = standardized_index_fit_params( + ... head.sel(time=slice(cal_start, cal_end)), + ... freq="MS", + ... window=3, + ... dist="gamma", + ... method="ML", + ... ) # First getting params + >>> sgi_3 = standardized_streamflow_index(head, params=params) + + See Also + -------- + standardized_precipitation_index + + References + ---------- + :cite:cts:`bloomfield_2013` + """ + fitkwargs = fitkwargs or {} + dist_methods = { + "gamma": ["ML", "APP", "PWM"], + "genextreme": ["ML", "APP", "PWM"], + "lognorm": ["ML", "APP"], + } + if dist in dist_methods.keys(): + if method not in dist_methods[dist]: + raise NotImplementedError( + f"{method} method is not implemented for {dist} distribution." + ) + else: + raise NotImplementedError(f"{dist} distribution is not yet implemented.") + + zero_inflated = False + sgi = standardized_index( + head, + freq=freq, + window=window, + dist=dist, + method=method, + zero_inflated=zero_inflated, + fitkwargs=fitkwargs, + cal_start=cal_start, + cal_end=cal_end, + params=params, + **indexer, + ) + + return sgi diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index 18e919b70..a05803454 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -756,7 +756,7 @@ def standardized_index_fit_params( if method == "APP": if "floc" not in fitkwargs.keys(): raise ValueError( - "The APP method is only supported for two-parameter distributions with `gamma`, `fisk`, or `genextreme` with `loc` being fixed." + "The APP method is only supported for two-parameter distributions with `gamma`, `fisk`, `lognorm`, or `genextreme` with `loc` being fixed." "Pass a value for `floc` in `fitkwargs`." ) if offset is not None: @@ -771,6 +771,7 @@ def standardized_index_fit_params( "gamma": ["ML", "APP", "PWM"], "fisk": ["ML", "APP"], "genextreme": ["ML", "APP", "PWM"], + "lognorm": ["ML", "APP"], } dist = get_dist(dist) if dist.name not in dist_and_methods: From 610962b8c5c591a4509097873957516aee3fd3d8 Mon Sep 17 00:00:00 2001 From: Adrien Lamarche Date: Fri, 23 Aug 2024 16:17:54 -0400 Subject: [PATCH 11/12] lognorm APP --- xclim/indices/stats.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index a05803454..63a1770e3 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -537,6 +537,23 @@ def _fit_start(x, dist: str, **fitkwargs: Any) -> tuple[tuple, dict]: c0 = np.pi * m / np.sqrt(3) / np.sqrt(m2 - m**2) kwargs = {"scale": scale0, "loc": loc0} return (c0,), kwargs + + if dist in ["lognorm"]: + if "floc" in fitkwargs: + loc0 = fitkwargs["floc"] + else: + # muralidhar_1992 + xs = sorted(x) + x1, xn, xp = xs[0], xs[-1], xs[int(len(x) / 2)] + loc0 = (x1 * xn - xp**2) / (x1 + xn - 2 * xp) + x_pos = x - loc0 + x_pos = x_pos[x_pos > 0] + log_x_pos = np.log(x_pos) + shape0 = log_x_pos.std() + scale0 = np.exp(log_x_pos.mean()) + kwargs = {"scale": scale0, "loc": loc0} + return (shape0,), kwargs + return (), {} @@ -756,7 +773,8 @@ def standardized_index_fit_params( if method == "APP": if "floc" not in fitkwargs.keys(): raise ValueError( - "The APP method is only supported for two-parameter distributions with `gamma`, `fisk`, `lognorm`, or `genextreme` with `loc` being fixed." + "The APP method is only supported for two-parameter distributions with `gamma`, `fisk`, `lognorm`, or `genextreme`" + "with `loc` being fixed." "Pass a value for `floc` in `fitkwargs`." ) if offset is not None: From 97ee37ae05947b9b401f289d6827a03c578c1d84 Mon Sep 17 00:00:00 2001 From: Adrien Lamarche Date: Fri, 23 Aug 2024 16:33:38 -0400 Subject: [PATCH 12/12] pre-commit --- tests/test_hydrology.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_hydrology.py b/tests/test_hydrology.py index 9d567031d..6132405cc 100644 --- a/tests/test_hydrology.py +++ b/tests/test_hydrology.py @@ -33,7 +33,7 @@ class TestStandardizedStreamflow: def test_3d_data_with_nans(self, open_dataset): nc_ds = Path("Raven", "q_sim.nc") # test with data - ds = open_dataset(self.nc_ds) + ds = open_dataset(nc_ds) q = ds.q_obs.sel(time=slice("2008")).rename("q") qMM = convert_units_to(q, "mm**3/s", context="hydro") # noqa # put a nan somewhere