diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 119b7a6a2..b19408d70 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -20,7 +20,7 @@ on:
- submitted
env:
- XCLIM_TESTDATA_BRANCH: v2023.9.12
+ XCLIM_TESTDATA_BRANCH: v2023.12.14
concurrency:
# For a given workflow, if we push to the same branch, cancel all previous builds on that branch except on master.
diff --git a/CHANGES.rst b/CHANGES.rst
index f69c45d54..8b62586f9 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -2,6 +2,16 @@
Changelog
=========
+
+v0.48 (unreleased)
+------------------
+Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`).
+
+New features and enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+* Added uncertainty partitioning method `lafferty_sriver` from Lafferty and Sriver (2023), which can partition uncertainty related to the downscaling method. (:issue:`1497`, :pull:`1529`).
+
+
v0.47.0 (2023-12-01)
--------------------
Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`).
diff --git a/docs/api.rst b/docs/api.rst
index 65631ee85..ab78a2716 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -65,6 +65,9 @@ Ensembles Module
.. autofunction:: xclim.ensembles.hawkins_sutton
:noindex:
+.. autofunction:: xclim.ensembles.lafferty_sriver
+ :noindex:
+
Units Handling Submodule
========================
diff --git a/docs/notebooks/partitioning.ipynb b/docs/notebooks/partitioning.ipynb
index 76ccf61f6..01bd60004 100644
--- a/docs/notebooks/partitioning.ipynb
+++ b/docs/notebooks/partitioning.ipynb
@@ -79,7 +79,11 @@
"source": [
"## Create an ensemble \n",
"\n",
- "Here we combine the different models and scenarios into a single DataArray with dimensions `model` and `scenario`. Note that the names of those dimensions are important for the uncertainty partitioning algorithm to work. "
+ "Here we combine the different models and scenarios into a single DataArray with dimensions `model` and `scenario`. Note that the names of those dimensions are important for the uncertainty partitioning algorithm to work. \n",
+ "\n",
+ "
\n",
+ "Note that the [xscen library](https://xscen.readthedocs.io/en/latest/index.html) provides a helper function `xscen.ensembles.get_partition_input` to build partition ensembles.\n",
+ "
"
]
},
{
@@ -137,7 +141,11 @@
"id": "41af418d-9e92-433c-800c-6ba28ff7684c",
"metadata": {},
"source": [
- "From there, it's relatively straightforward to compute the relative strength of uncertainties, and create graphics similar to those found in scientific papers. "
+ "From there, it's relatively straightforward to compute the relative strength of uncertainties, and create graphics similar to those found in scientific papers. \n",
+ "\n",
+ "\n",
+ "Note that the [figanos library](https://figanos.readthedocs.io/en/latest/) provides a function `fg.partition` to plot the graph below.\n",
+ "
"
]
},
{
@@ -238,7 +246,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.8"
+ "version": "3.9.13"
}
},
"nbformat": 4,
diff --git a/docs/references.bib b/docs/references.bib
index 4d9afb8ce..9b11742f2 100644
--- a/docs/references.bib
+++ b/docs/references.bib
@@ -2086,3 +2086,20 @@ @inbook{
year={2023},
pages={1927–2058}
}
+
+@article{Lafferty2023,
+ abstract = {Efforts to diagnose the risks of a changing climate often rely on downscaled and bias-corrected climate information, making it important to understand the uncertainties and potential biases of this approach. Here, we perform a variance decomposition to partition uncertainty in global climate projections and quantify the relative importance of downscaling and bias-correction. We analyze simple climate metrics such as annual temperature and precipitation averages, as well as several indices of climate extremes. We find that downscaling and bias-correction often contribute substantial uncertainty to local decision-relevant climate outcomes, though our results are strongly heterogeneous across space, time, and climate metrics. Our results can provide guidance to impact modelers and decision-makers regarding the uncertainties associated with downscaling and bias-correction when performing local-scale analyses, as neglecting to account for these uncertainties may risk overconfidence relative to the full range of possible climate futures.},
+ author = {David C. Lafferty and Ryan L. Sriver},
+ doi = {10.1038/s41612-023-00486-0},
+ issn = {2397-3722},
+ issue = {1},
+ journal = {npj Climate and Atmospheric Science 2023 6:1},
+ keywords = {Atmospheric science,Climate,Climate and Earth system modelling,Projection and prediction,change impacts},
+ month = {9},
+ pages = {1-13},
+ publisher = {Nature Publishing Group},
+ title = {Downscaling and bias-correction contribute considerable uncertainty to local climate projections in CMIP6},
+ volume = {6},
+ url = {https://www.nature.com/articles/s41612-023-00486-0},
+ year = {2023},
+}
diff --git a/tests/conftest.py b/tests/conftest.py
index ffb64c11e..b3c7339ea 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -24,6 +24,7 @@
from xclim.testing import helpers
from xclim.testing.helpers import test_timeseries
from xclim.testing.utils import _default_cache_dir # noqa
+from xclim.testing.utils import get_file
from xclim.testing.utils import open_dataset as _open_dataset
if not __xclim_version__.endswith("-beta") and helpers.TESTDATA_BRANCH == "main":
@@ -429,6 +430,30 @@ def ensemble_dataset_objects() -> dict:
return edo
+@pytest.fixture(scope="session")
+def lafferty_sriver_ds() -> xr.Dataset:
+ """Get data from Lafferty & Sriver unit test.
+
+ Notes
+ -----
+ https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/tree/main/unit_test
+ """
+ fn = get_file(
+ "uncertainty_partitioning/seattle_avg_tas.csv",
+ cache_dir=_default_cache_dir,
+ branch=helpers.TESTDATA_BRANCH,
+ )
+
+ df = pd.read_csv(fn, parse_dates=["time"]).rename(
+ columns={"ssp": "scenario", "ensemble": "downscaling"}
+ )
+
+ # Make xarray dataset
+ return xr.Dataset.from_dataframe(
+ df.set_index(["scenario", "model", "downscaling", "time"])
+ )
+
+
@pytest.fixture(scope="session", autouse=True)
def gather_session_data(threadsafe_data_dir, worker_id, xdoctest_namespace):
"""Gather testing data on pytest run.
diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py
index cce0a3961..070a49d2e 100644
--- a/tests/test_partitioning.py
+++ b/tests/test_partitioning.py
@@ -3,7 +3,7 @@
import numpy as np
import xarray as xr
-from xclim.ensembles import hawkins_sutton
+from xclim.ensembles import fractional_uncertainty, hawkins_sutton, lafferty_sriver
from xclim.ensembles._filters import _concat_hist, _model_in_all_scens, _single_member
@@ -67,3 +67,92 @@ def test_hawkins_sutton_synthetic(random):
su.sel(time=slice("2020", None)).mean()
> su.sel(time=slice("2000", "2010")).mean()
)
+
+
+def test_lafferty_sriver_synthetic(random):
+ """Test logic of Lafferty & Sriver's implementation using synthetic data."""
+ # Time, scenario, model, downscaling
+ # Here the scenarios don't change over time, so there should be no model variability (since it's relative to the
+ # reference period.
+ sm = np.arange(10, 41, 10) # Scenario mean (4)
+ mm = np.arange(-6, 7, 1) # Model mean (13)
+ dm = np.arange(-2, 3, 1) # Downscaling mean (5)
+ mean = (
+ dm[np.newaxis, np.newaxis, :]
+ + mm[np.newaxis, :, np.newaxis]
+ + sm[:, np.newaxis, np.newaxis]
+ )
+
+ # Natural variability
+ r = random.standard_normal((4, 13, 5, 60))
+
+ x = r + mean[:, :, :, np.newaxis]
+ time = xr.date_range("1970-01-01", periods=60, freq="Y")
+ da = xr.DataArray(
+ x, dims=("scenario", "model", "downscaling", "time"), coords={"time": time}
+ )
+ m, v = lafferty_sriver(da)
+ # Mean uncertainty over time
+ vm = v.mean(dim="time")
+
+ # Check that the mean uncertainty
+ np.testing.assert_array_almost_equal(m.mean(dim="time"), 25, decimal=1)
+
+ # Check that model uncertainty > variability
+ assert vm.sel(uncertainty="model") > vm.sel(uncertainty="variability")
+
+ # Smoke test with polynomial of order 2
+ fit = da.polyfit(dim="time", deg=2, skipna=True)
+ sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where(da.notnull())
+ lafferty_sriver(da, sm=sm)
+
+
+def test_lafferty_sriver(lafferty_sriver_ds):
+ g, u = lafferty_sriver(lafferty_sriver_ds.tas)
+
+ fu = fractional_uncertainty(u)
+
+ # Assertions based on expected results from
+ # https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/blob/main/unit_test/unit_test_check.ipynb
+ assert fu.sel(time="2020", uncertainty="downscaling") > fu.sel(
+ time="2020", uncertainty="model"
+ )
+ assert fu.sel(time="2020", uncertainty="variability") > fu.sel(
+ time="2020", uncertainty="scenario"
+ )
+ assert (
+ fu.sel(time="2090", uncertainty="scenario").data
+ > fu.sel(time="2020", uncertainty="scenario").data
+ )
+ assert (
+ fu.sel(time="2090", uncertainty="downscaling").data
+ < fu.sel(time="2020", uncertainty="downscaling").data
+ )
+
+ def graph():
+ """Return graphic like in https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/blob/main/unit_test/unit_test_check.ipynb"""
+ from matplotlib import pyplot as plt
+
+ udict = {
+ "Scenario": fu.sel(uncertainty="scenario").to_numpy().flatten(),
+ "Model": fu.sel(uncertainty="model").to_numpy().flatten(),
+ "Downscaling": fu.sel(uncertainty="downscaling").to_numpy().flatten(),
+ "Variability": fu.sel(uncertainty="variability").to_numpy().flatten(),
+ }
+
+ fig, ax = plt.subplots()
+ ax.stackplot(
+ np.arange(2015, 2101),
+ udict.values(),
+ labels=udict.keys(),
+ alpha=1,
+ colors=["#00CC89", "#6869B3", "#CC883C", "#FFFF99"],
+ edgecolor="white",
+ lw=1.5,
+ )
+ ax.set_xlim([2020, 2095])
+ ax.set_ylim([0, 100])
+ ax.legend(loc="upper left")
+ plt.show()
+
+ # graph()
diff --git a/xclim/ensembles/__init__.py b/xclim/ensembles/__init__.py
index 27deb868b..9144811cb 100644
--- a/xclim/ensembles/__init__.py
+++ b/xclim/ensembles/__init__.py
@@ -10,7 +10,7 @@
from __future__ import annotations
from ._base import create_ensemble, ensemble_mean_std_max_min, ensemble_percentiles
-from ._partitioning import hawkins_sutton
+from ._partitioning import fractional_uncertainty, hawkins_sutton, lafferty_sriver
from ._reduce import (
kkz_reduce_ensemble,
kmeans_reduce_ensemble,
diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py
index 6b7f93aef..5daeb034c 100644
--- a/xclim/ensembles/_partitioning.py
+++ b/xclim/ensembles/_partitioning.py
@@ -3,8 +3,7 @@
Uncertainty Partitioning
========================
-This module implements methods and tools meant to partition climate projection uncertainties into different components:
-natural variability, GHG scenario and climate models.
+This module implements methods and tools meant to partition climate projection uncertainties into different components.
"""
@@ -18,6 +17,7 @@
Implemented partitioning algorithms:
- `hawkins_sutton`
+ - `lafferty_sriver`
# References for other more recent algorithms that could be added here.
@@ -50,6 +50,8 @@
- evin_2019
"""
+# TODO: Add ref for Brekke and Barsugli (2013)
+
def hawkins_sutton(
da: xr.DataArray,
@@ -62,18 +64,18 @@ def hawkins_sutton(
Parameters
----------
- da : xr.DataArray
- Time series with dimensions 'time', 'scenario' and 'model'.
- sm : xr.DataArray, optional
- Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a
- 4th-order polynomial. Results are sensitive to the choice of smoothing function, use this to set another
- polynomial order, or a LOESS curve.
- weights : xr.DataArray, optional
- Weights to be applied to individual models. Should have `model` dimension.
- baseline : [str, str]
- Start and end year of the reference period.
- kind : {'+', '*'}
- Whether the mean over the reference period should be subtracted (+) or divided by (*).
+ da: xr.DataArray
+ Time series with dimensions 'time', 'scenario' and 'model'.
+ sm: xr.DataArray, optional
+ Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a 4th order
+ polynomial. Results are sensitive to the choice of smoothing function, use this to set another polynomial
+ order, or a LOESS curve.
+ weights: xr.DataArray, optional
+ Weights to be applied to individual models. Should have `model` dimension.
+ baseline: (str, str)
+ Start and end year of the reference period.
+ kind: {'+', '*'}
+ Whether the mean over the reference period should be subtracted (+) or divided by (*).
Returns
-------
@@ -90,6 +92,9 @@ def hawkins_sutton(
- annual time series starting in 1950 and ending in 2100;
- the same models are available for all scenarios.
+ To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the
+ output.
+
References
----------
:cite:cts:`hawkins_2009,hawkins_2011`
@@ -154,6 +159,10 @@ def hawkins_sutton(
u = pd.Index(["variability", "model", "scenario", "total"], name="uncertainty")
uncertainty = xr.concat([nv_u, model_u, scenario_u, total], dim=u)
+ # Keep a trace of the elements for each uncertainty component
+ for d in ["model", "scenario"]:
+ uncertainty.attrs[d] = da[d].values
+
# Mean projection: G(t)
g = sm.weighted(weights).mean(dim="model").mean(dim="scenario")
@@ -184,3 +193,126 @@ def hawkins_sutton_09_weighting(da, obs, baseline=("1971", "2000")):
xm = da.sel(time=baseline[1]) - mm
xm = xm.drop("time").squeeze()
return 1 / (obs + np.abs(xm - obs))
+
+
+def lafferty_sriver(
+ da: xr.DataArray,
+ sm: xr.DataArray = None,
+ bb13: bool = False,
+):
+ """Return the mean and partitioned variance of an ensemble based on method from Lafferty and Sriver (2023).
+
+ Parameters
+ ----------
+ da: xr.DataArray
+ Time series with dimensions 'time', 'scenario', 'downscaling' and 'model'.
+ sm: xr.DataArray
+ Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a 4th order
+ polynomial. Results are sensitive to the choice of smoothing function, use this to set another polynomial
+ order, or a LOESS curve.
+ bb13: bool
+ Whether to apply the Brekke and Barsugli (2013) method to estimate scenario uncertainty, where the variance
+ over scenarios is computed before taking the mean over models and downscaling methods.
+
+ Returns
+ -------
+ xr.DataArray, xr.DataArray
+ The mean relative to the baseline, and the components of variance of the ensemble. These components are
+ coordinates along the `uncertainty` dimension: `variability`, `model`, `scenario`, `downscaling` and `total`.
+
+ Notes
+ -----
+ To prepare input data, make sure `da` has dimensions `time`, `scenario`, `downscaling` and `model`,
+ e.g. `da.rename({"experiment": "scenario"})`.
+
+ To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the
+ output.
+
+ References
+ ----------
+ :cite:cts:`Lafferty2023`
+ """
+ if xr.infer_freq(da.time)[0] not in ["A", "Y"]:
+ raise ValueError("This algorithm expects annual time series.")
+
+ if not {"time", "scenario", "model", "downscaling"}.issubset(da.dims):
+ raise ValueError(
+ "DataArray dimensions should include 'time', 'scenario', 'downscaling' and 'model'."
+ )
+
+ if sm is None:
+ # Fit a 4th order polynomial
+ fit = da.polyfit(dim="time", deg=4, skipna=True)
+ sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where(
+ da.notnull()
+ )
+
+ # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference
+ # between the extracted forced response and the raw outputs, averaged over all outputs."
+ nv_u = (
+ (da - sm)
+ .rolling(time=11, center=True)
+ .var(dim="time")
+ .mean(dim=["scenario", "model", "downscaling"])
+ )
+
+ # Scenario uncertainty: U_s(t)
+ if bb13:
+ scenario_u = sm.var(dim="scenario").mean(dim=["model", "downscaling"])
+ else:
+ scenario_u = sm.mean(dim=["model", "downscaling"]).var(dim="scenario")
+
+ # Model uncertainty: U_m(t)
+
+ ## Count the number of parent models that have been downscaled using method $d$ for scenario $s$.
+ ## In the paper, weights are constant, here they may vary across time if there are missing values.
+ mw = sm.count("model")
+ # In https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/blob/main/unit_test/lafferty_sriver.py
+ # weights are set to zero when there is only one model, but the var for a single element is 0 anyway.
+ model_u = sm.var(dim="model").weighted(mw).mean(dim=["scenario", "downscaling"])
+
+ # Downscaling uncertainty: U_d(t)
+ dw = sm.count("downscaling")
+ downscaling_u = (
+ sm.var(dim="downscaling").weighted(dw).mean(dim=["scenario", "model"])
+ )
+
+ # Total uncertainty: T(t)
+ total = nv_u + scenario_u + model_u + downscaling_u
+
+ # Create output array with the uncertainty components
+ u = pd.Index(
+ ["model", "scenario", "downscaling", "variability", "total"], name="uncertainty"
+ )
+ uncertainty = xr.concat([model_u, scenario_u, downscaling_u, nv_u, total], dim=u)
+
+ # Keep a trace of the elements for each uncertainty component
+ for d in ["model", "scenario", "downscaling"]:
+ uncertainty.attrs[d] = da[d].values
+
+ # Mean projection:
+ # This is not part of the original algorithm, but we want all partition algos to have similar outputs.
+ g = sm.mean(dim="model").mean(dim="scenario").mean(dim="downscaling")
+
+ return g, uncertainty
+
+
+def fractional_uncertainty(u: xr.DataArray):
+ """
+ Return the fractional uncertainty.
+
+ Parameters
+ ----------
+ u: xr.DataArray
+ Array with uncertainty components along the `uncertainty` dimension.
+
+ Returns
+ -------
+ xr.DataArray
+ Fractional, or relative uncertainty with respect to the total uncertainty.
+ """
+ uncertainty = u / u.sel(uncertainty="total") * 100
+ uncertainty.attrs.update(u.attrs)
+ uncertainty.attrs["long_name"] = "Fraction of total variance"
+ uncertainty.attrs["units"] = "%"
+ return uncertainty
diff --git a/xclim/testing/helpers.py b/xclim/testing/helpers.py
index 85d1f953a..b19ad498d 100644
--- a/xclim/testing/helpers.py
+++ b/xclim/testing/helpers.py
@@ -140,6 +140,7 @@ def populate_testing_data(
"sdba/ahccd_1950-2013.nc",
"sdba/nrcan_1950-2013.nc",
"uncertainty_partitioning/cmip5_pr_global_mon.nc",
+ "uncertainty_partitioning/seattle_avg_tas.csv",
]
data = dict()
diff --git a/xclim/testing/utils.py b/xclim/testing/utils.py
index 1084d197e..c15a92054 100644
--- a/xclim/testing/utils.py
+++ b/xclim/testing/utils.py
@@ -80,7 +80,7 @@ def file_md5_checksum(f_name):
def get_file(
name: str | os.PathLike | Sequence[str | os.PathLike],
github_url: str = "https://github.com/Ouranosinc/xclim-testdata",
- branch: str = "master",
+ branch: str = "main",
cache_dir: Path = _default_cache_dir,
) -> Path | list[Path]:
"""Return a file from an online GitHub-like repository.