diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 119b7a6a2..b19408d70 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,7 +20,7 @@ on: - submitted env: - XCLIM_TESTDATA_BRANCH: v2023.9.12 + XCLIM_TESTDATA_BRANCH: v2023.12.14 concurrency: # For a given workflow, if we push to the same branch, cancel all previous builds on that branch except on master. diff --git a/CHANGES.rst b/CHANGES.rst index f69c45d54..8b62586f9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,16 @@ Changelog ========= + +v0.48 (unreleased) +------------------ +Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`). + +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* Added uncertainty partitioning method `lafferty_sriver` from Lafferty and Sriver (2023), which can partition uncertainty related to the downscaling method. (:issue:`1497`, :pull:`1529`). + + v0.47.0 (2023-12-01) -------------------- Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`). diff --git a/docs/api.rst b/docs/api.rst index 65631ee85..ab78a2716 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -65,6 +65,9 @@ Ensembles Module .. autofunction:: xclim.ensembles.hawkins_sutton :noindex: +.. autofunction:: xclim.ensembles.lafferty_sriver + :noindex: + Units Handling Submodule ======================== diff --git a/docs/notebooks/partitioning.ipynb b/docs/notebooks/partitioning.ipynb index 76ccf61f6..01bd60004 100644 --- a/docs/notebooks/partitioning.ipynb +++ b/docs/notebooks/partitioning.ipynb @@ -79,7 +79,11 @@ "source": [ "## Create an ensemble \n", "\n", - "Here we combine the different models and scenarios into a single DataArray with dimensions `model` and `scenario`. Note that the names of those dimensions are important for the uncertainty partitioning algorithm to work. " + "Here we combine the different models and scenarios into a single DataArray with dimensions `model` and `scenario`. Note that the names of those dimensions are important for the uncertainty partitioning algorithm to work. \n", + "\n", + "
\n", + "Note that the [xscen library](https://xscen.readthedocs.io/en/latest/index.html) provides a helper function `xscen.ensembles.get_partition_input` to build partition ensembles.\n", + "
" ] }, { @@ -137,7 +141,11 @@ "id": "41af418d-9e92-433c-800c-6ba28ff7684c", "metadata": {}, "source": [ - "From there, it's relatively straightforward to compute the relative strength of uncertainties, and create graphics similar to those found in scientific papers. " + "From there, it's relatively straightforward to compute the relative strength of uncertainties, and create graphics similar to those found in scientific papers. \n", + "\n", + "
\n", + "Note that the [figanos library](https://figanos.readthedocs.io/en/latest/) provides a function `fg.partition` to plot the graph below.\n", + "
" ] }, { @@ -238,7 +246,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/docs/references.bib b/docs/references.bib index 4d9afb8ce..9b11742f2 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -2086,3 +2086,20 @@ @inbook{ year={2023}, pages={1927–2058} } + +@article{Lafferty2023, + abstract = {Efforts to diagnose the risks of a changing climate often rely on downscaled and bias-corrected climate information, making it important to understand the uncertainties and potential biases of this approach. Here, we perform a variance decomposition to partition uncertainty in global climate projections and quantify the relative importance of downscaling and bias-correction. We analyze simple climate metrics such as annual temperature and precipitation averages, as well as several indices of climate extremes. We find that downscaling and bias-correction often contribute substantial uncertainty to local decision-relevant climate outcomes, though our results are strongly heterogeneous across space, time, and climate metrics. Our results can provide guidance to impact modelers and decision-makers regarding the uncertainties associated with downscaling and bias-correction when performing local-scale analyses, as neglecting to account for these uncertainties may risk overconfidence relative to the full range of possible climate futures.}, + author = {David C. Lafferty and Ryan L. Sriver}, + doi = {10.1038/s41612-023-00486-0}, + issn = {2397-3722}, + issue = {1}, + journal = {npj Climate and Atmospheric Science 2023 6:1}, + keywords = {Atmospheric science,Climate,Climate and Earth system modelling,Projection and prediction,change impacts}, + month = {9}, + pages = {1-13}, + publisher = {Nature Publishing Group}, + title = {Downscaling and bias-correction contribute considerable uncertainty to local climate projections in CMIP6}, + volume = {6}, + url = {https://www.nature.com/articles/s41612-023-00486-0}, + year = {2023}, +} diff --git a/tests/conftest.py b/tests/conftest.py index ffb64c11e..b3c7339ea 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,6 +24,7 @@ from xclim.testing import helpers from xclim.testing.helpers import test_timeseries from xclim.testing.utils import _default_cache_dir # noqa +from xclim.testing.utils import get_file from xclim.testing.utils import open_dataset as _open_dataset if not __xclim_version__.endswith("-beta") and helpers.TESTDATA_BRANCH == "main": @@ -429,6 +430,30 @@ def ensemble_dataset_objects() -> dict: return edo +@pytest.fixture(scope="session") +def lafferty_sriver_ds() -> xr.Dataset: + """Get data from Lafferty & Sriver unit test. + + Notes + ----- + https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/tree/main/unit_test + """ + fn = get_file( + "uncertainty_partitioning/seattle_avg_tas.csv", + cache_dir=_default_cache_dir, + branch=helpers.TESTDATA_BRANCH, + ) + + df = pd.read_csv(fn, parse_dates=["time"]).rename( + columns={"ssp": "scenario", "ensemble": "downscaling"} + ) + + # Make xarray dataset + return xr.Dataset.from_dataframe( + df.set_index(["scenario", "model", "downscaling", "time"]) + ) + + @pytest.fixture(scope="session", autouse=True) def gather_session_data(threadsafe_data_dir, worker_id, xdoctest_namespace): """Gather testing data on pytest run. diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index cce0a3961..070a49d2e 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -3,7 +3,7 @@ import numpy as np import xarray as xr -from xclim.ensembles import hawkins_sutton +from xclim.ensembles import fractional_uncertainty, hawkins_sutton, lafferty_sriver from xclim.ensembles._filters import _concat_hist, _model_in_all_scens, _single_member @@ -67,3 +67,92 @@ def test_hawkins_sutton_synthetic(random): su.sel(time=slice("2020", None)).mean() > su.sel(time=slice("2000", "2010")).mean() ) + + +def test_lafferty_sriver_synthetic(random): + """Test logic of Lafferty & Sriver's implementation using synthetic data.""" + # Time, scenario, model, downscaling + # Here the scenarios don't change over time, so there should be no model variability (since it's relative to the + # reference period. + sm = np.arange(10, 41, 10) # Scenario mean (4) + mm = np.arange(-6, 7, 1) # Model mean (13) + dm = np.arange(-2, 3, 1) # Downscaling mean (5) + mean = ( + dm[np.newaxis, np.newaxis, :] + + mm[np.newaxis, :, np.newaxis] + + sm[:, np.newaxis, np.newaxis] + ) + + # Natural variability + r = random.standard_normal((4, 13, 5, 60)) + + x = r + mean[:, :, :, np.newaxis] + time = xr.date_range("1970-01-01", periods=60, freq="Y") + da = xr.DataArray( + x, dims=("scenario", "model", "downscaling", "time"), coords={"time": time} + ) + m, v = lafferty_sriver(da) + # Mean uncertainty over time + vm = v.mean(dim="time") + + # Check that the mean uncertainty + np.testing.assert_array_almost_equal(m.mean(dim="time"), 25, decimal=1) + + # Check that model uncertainty > variability + assert vm.sel(uncertainty="model") > vm.sel(uncertainty="variability") + + # Smoke test with polynomial of order 2 + fit = da.polyfit(dim="time", deg=2, skipna=True) + sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where(da.notnull()) + lafferty_sriver(da, sm=sm) + + +def test_lafferty_sriver(lafferty_sriver_ds): + g, u = lafferty_sriver(lafferty_sriver_ds.tas) + + fu = fractional_uncertainty(u) + + # Assertions based on expected results from + # https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/blob/main/unit_test/unit_test_check.ipynb + assert fu.sel(time="2020", uncertainty="downscaling") > fu.sel( + time="2020", uncertainty="model" + ) + assert fu.sel(time="2020", uncertainty="variability") > fu.sel( + time="2020", uncertainty="scenario" + ) + assert ( + fu.sel(time="2090", uncertainty="scenario").data + > fu.sel(time="2020", uncertainty="scenario").data + ) + assert ( + fu.sel(time="2090", uncertainty="downscaling").data + < fu.sel(time="2020", uncertainty="downscaling").data + ) + + def graph(): + """Return graphic like in https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/blob/main/unit_test/unit_test_check.ipynb""" + from matplotlib import pyplot as plt + + udict = { + "Scenario": fu.sel(uncertainty="scenario").to_numpy().flatten(), + "Model": fu.sel(uncertainty="model").to_numpy().flatten(), + "Downscaling": fu.sel(uncertainty="downscaling").to_numpy().flatten(), + "Variability": fu.sel(uncertainty="variability").to_numpy().flatten(), + } + + fig, ax = plt.subplots() + ax.stackplot( + np.arange(2015, 2101), + udict.values(), + labels=udict.keys(), + alpha=1, + colors=["#00CC89", "#6869B3", "#CC883C", "#FFFF99"], + edgecolor="white", + lw=1.5, + ) + ax.set_xlim([2020, 2095]) + ax.set_ylim([0, 100]) + ax.legend(loc="upper left") + plt.show() + + # graph() diff --git a/xclim/ensembles/__init__.py b/xclim/ensembles/__init__.py index 27deb868b..9144811cb 100644 --- a/xclim/ensembles/__init__.py +++ b/xclim/ensembles/__init__.py @@ -10,7 +10,7 @@ from __future__ import annotations from ._base import create_ensemble, ensemble_mean_std_max_min, ensemble_percentiles -from ._partitioning import hawkins_sutton +from ._partitioning import fractional_uncertainty, hawkins_sutton, lafferty_sriver from ._reduce import ( kkz_reduce_ensemble, kmeans_reduce_ensemble, diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 6b7f93aef..5daeb034c 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -3,8 +3,7 @@ Uncertainty Partitioning ======================== -This module implements methods and tools meant to partition climate projection uncertainties into different components: -natural variability, GHG scenario and climate models. +This module implements methods and tools meant to partition climate projection uncertainties into different components. """ @@ -18,6 +17,7 @@ Implemented partitioning algorithms: - `hawkins_sutton` + - `lafferty_sriver` # References for other more recent algorithms that could be added here. @@ -50,6 +50,8 @@ - evin_2019 """ +# TODO: Add ref for Brekke and Barsugli (2013) + def hawkins_sutton( da: xr.DataArray, @@ -62,18 +64,18 @@ def hawkins_sutton( Parameters ---------- - da : xr.DataArray - Time series with dimensions 'time', 'scenario' and 'model'. - sm : xr.DataArray, optional - Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a - 4th-order polynomial. Results are sensitive to the choice of smoothing function, use this to set another - polynomial order, or a LOESS curve. - weights : xr.DataArray, optional - Weights to be applied to individual models. Should have `model` dimension. - baseline : [str, str] - Start and end year of the reference period. - kind : {'+', '*'} - Whether the mean over the reference period should be subtracted (+) or divided by (*). + da: xr.DataArray + Time series with dimensions 'time', 'scenario' and 'model'. + sm: xr.DataArray, optional + Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a 4th order + polynomial. Results are sensitive to the choice of smoothing function, use this to set another polynomial + order, or a LOESS curve. + weights: xr.DataArray, optional + Weights to be applied to individual models. Should have `model` dimension. + baseline: (str, str) + Start and end year of the reference period. + kind: {'+', '*'} + Whether the mean over the reference period should be subtracted (+) or divided by (*). Returns ------- @@ -90,6 +92,9 @@ def hawkins_sutton( - annual time series starting in 1950 and ending in 2100; - the same models are available for all scenarios. + To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the + output. + References ---------- :cite:cts:`hawkins_2009,hawkins_2011` @@ -154,6 +159,10 @@ def hawkins_sutton( u = pd.Index(["variability", "model", "scenario", "total"], name="uncertainty") uncertainty = xr.concat([nv_u, model_u, scenario_u, total], dim=u) + # Keep a trace of the elements for each uncertainty component + for d in ["model", "scenario"]: + uncertainty.attrs[d] = da[d].values + # Mean projection: G(t) g = sm.weighted(weights).mean(dim="model").mean(dim="scenario") @@ -184,3 +193,126 @@ def hawkins_sutton_09_weighting(da, obs, baseline=("1971", "2000")): xm = da.sel(time=baseline[1]) - mm xm = xm.drop("time").squeeze() return 1 / (obs + np.abs(xm - obs)) + + +def lafferty_sriver( + da: xr.DataArray, + sm: xr.DataArray = None, + bb13: bool = False, +): + """Return the mean and partitioned variance of an ensemble based on method from Lafferty and Sriver (2023). + + Parameters + ---------- + da: xr.DataArray + Time series with dimensions 'time', 'scenario', 'downscaling' and 'model'. + sm: xr.DataArray + Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a 4th order + polynomial. Results are sensitive to the choice of smoothing function, use this to set another polynomial + order, or a LOESS curve. + bb13: bool + Whether to apply the Brekke and Barsugli (2013) method to estimate scenario uncertainty, where the variance + over scenarios is computed before taking the mean over models and downscaling methods. + + Returns + ------- + xr.DataArray, xr.DataArray + The mean relative to the baseline, and the components of variance of the ensemble. These components are + coordinates along the `uncertainty` dimension: `variability`, `model`, `scenario`, `downscaling` and `total`. + + Notes + ----- + To prepare input data, make sure `da` has dimensions `time`, `scenario`, `downscaling` and `model`, + e.g. `da.rename({"experiment": "scenario"})`. + + To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the + output. + + References + ---------- + :cite:cts:`Lafferty2023` + """ + if xr.infer_freq(da.time)[0] not in ["A", "Y"]: + raise ValueError("This algorithm expects annual time series.") + + if not {"time", "scenario", "model", "downscaling"}.issubset(da.dims): + raise ValueError( + "DataArray dimensions should include 'time', 'scenario', 'downscaling' and 'model'." + ) + + if sm is None: + # Fit a 4th order polynomial + fit = da.polyfit(dim="time", deg=4, skipna=True) + sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where( + da.notnull() + ) + + # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference + # between the extracted forced response and the raw outputs, averaged over all outputs." + nv_u = ( + (da - sm) + .rolling(time=11, center=True) + .var(dim="time") + .mean(dim=["scenario", "model", "downscaling"]) + ) + + # Scenario uncertainty: U_s(t) + if bb13: + scenario_u = sm.var(dim="scenario").mean(dim=["model", "downscaling"]) + else: + scenario_u = sm.mean(dim=["model", "downscaling"]).var(dim="scenario") + + # Model uncertainty: U_m(t) + + ## Count the number of parent models that have been downscaled using method $d$ for scenario $s$. + ## In the paper, weights are constant, here they may vary across time if there are missing values. + mw = sm.count("model") + # In https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/blob/main/unit_test/lafferty_sriver.py + # weights are set to zero when there is only one model, but the var for a single element is 0 anyway. + model_u = sm.var(dim="model").weighted(mw).mean(dim=["scenario", "downscaling"]) + + # Downscaling uncertainty: U_d(t) + dw = sm.count("downscaling") + downscaling_u = ( + sm.var(dim="downscaling").weighted(dw).mean(dim=["scenario", "model"]) + ) + + # Total uncertainty: T(t) + total = nv_u + scenario_u + model_u + downscaling_u + + # Create output array with the uncertainty components + u = pd.Index( + ["model", "scenario", "downscaling", "variability", "total"], name="uncertainty" + ) + uncertainty = xr.concat([model_u, scenario_u, downscaling_u, nv_u, total], dim=u) + + # Keep a trace of the elements for each uncertainty component + for d in ["model", "scenario", "downscaling"]: + uncertainty.attrs[d] = da[d].values + + # Mean projection: + # This is not part of the original algorithm, but we want all partition algos to have similar outputs. + g = sm.mean(dim="model").mean(dim="scenario").mean(dim="downscaling") + + return g, uncertainty + + +def fractional_uncertainty(u: xr.DataArray): + """ + Return the fractional uncertainty. + + Parameters + ---------- + u: xr.DataArray + Array with uncertainty components along the `uncertainty` dimension. + + Returns + ------- + xr.DataArray + Fractional, or relative uncertainty with respect to the total uncertainty. + """ + uncertainty = u / u.sel(uncertainty="total") * 100 + uncertainty.attrs.update(u.attrs) + uncertainty.attrs["long_name"] = "Fraction of total variance" + uncertainty.attrs["units"] = "%" + return uncertainty diff --git a/xclim/testing/helpers.py b/xclim/testing/helpers.py index 85d1f953a..b19ad498d 100644 --- a/xclim/testing/helpers.py +++ b/xclim/testing/helpers.py @@ -140,6 +140,7 @@ def populate_testing_data( "sdba/ahccd_1950-2013.nc", "sdba/nrcan_1950-2013.nc", "uncertainty_partitioning/cmip5_pr_global_mon.nc", + "uncertainty_partitioning/seattle_avg_tas.csv", ] data = dict() diff --git a/xclim/testing/utils.py b/xclim/testing/utils.py index 1084d197e..c15a92054 100644 --- a/xclim/testing/utils.py +++ b/xclim/testing/utils.py @@ -80,7 +80,7 @@ def file_md5_checksum(f_name): def get_file( name: str | os.PathLike | Sequence[str | os.PathLike], github_url: str = "https://github.com/Ouranosinc/xclim-testdata", - branch: str = "master", + branch: str = "main", cache_dir: Path = _default_cache_dir, ) -> Path | list[Path]: """Return a file from an online GitHub-like repository.