From b800a1c0105ecc8bc855fa0fbfdcec0107a8a5a0 Mon Sep 17 00:00:00 2001
From: Robbi Bishop-Taylor <Robbi.BishopTaylor@ga.gov.au>
Date: Mon, 17 Jun 2024 13:57:48 +1000
Subject: [PATCH] Add ensemble tide modelling functionality to `model_tides`
 (#1231)

* Add ensemble tide modelling functionality to model_tides

* Update test_coastal.py

* Remove test

* Updates to IDW, xr_interpolate and ensemble tide modelling code"

* Doco updates

* Switch ensemble rankings from high to low = good

* Update docstring

* Fix doco

* Add interpolation notebook
---
 Tests/dea_tools/test_coastal.py | 231 +++++++++++++++++++++++++
 Tools/dea_tools/coastal.py      | 291 +++++++++++++++++++++++++++-----
 2 files changed, 483 insertions(+), 39 deletions(-)

diff --git a/Tests/dea_tools/test_coastal.py b/Tests/dea_tools/test_coastal.py
index f3ece21db..d58e4a99e 100644
--- a/Tests/dea_tools/test_coastal.py
+++ b/Tests/dea_tools/test_coastal.py
@@ -17,6 +17,7 @@
 
 GAUGE_X = 122.2183
 GAUGE_Y = -18.0008
+ENSEMBLE_MODELS = ["FES2014", "HAMTIDE11"]  # simplified for tests
 
 
 @pytest.fixture()
@@ -301,6 +302,183 @@ def test_model_tides_mode(mode, models, output_format):
             )
 
 
+# Test ensemble modelling functionality
+def test_model_tides_ensemble():
+    # Input params
+    x = [122.14, 144.910368]
+    y = [-17.91, -37.919491]
+    times = pd.date_range("2020", "2021", periods=2)
+
+    # Default, only ensemble requested
+    modelled_tides_df = model_tides(
+        x=x,
+        y=y,
+        time=times,
+        model="ensemble",
+        ensemble_models=ENSEMBLE_MODELS,
+    )
+
+    assert modelled_tides_df.index.names == ["time", "x", "y"]
+    assert modelled_tides_df.columns.tolist() == ["tide_model", "tide_m"]
+    assert all(modelled_tides_df.tide_model == "ensemble")
+
+    # Default, ensemble + other models requested
+    models = ["FES2014", "HAMTIDE11", "ensemble"]
+    modelled_tides_df = model_tides(
+        x=x,
+        y=y,
+        time=times,
+        model=models,
+        ensemble_models=ENSEMBLE_MODELS,
+    )
+
+    assert modelled_tides_df.index.names == ["time", "x", "y"]
+    assert modelled_tides_df.columns.tolist() == ["tide_model", "tide_m"]
+    assert set(modelled_tides_df.tide_model) == set(models)
+    assert np.allclose(
+        modelled_tides_df.tide_m,
+        [
+            -2.819,
+            -1.850,
+            -0.215,
+            0.037,
+            -2.623,
+            -1.803,
+            0.073,
+            -0.069,
+            -2.721,
+            -1.826,
+            -0.071,
+            -0.0158,
+        ],
+        rtol=0.02,
+    )
+
+    # One-to-one mode
+    modelled_tides_df = model_tides(
+        x=x,
+        y=y,
+        time=times,
+        model=models,
+        mode="one-to-one",
+        ensemble_models=ENSEMBLE_MODELS,
+    )
+
+    assert modelled_tides_df.index.names == ["time", "x", "y"]
+    assert modelled_tides_df.columns.tolist() == ["tide_model", "tide_m"]
+    assert set(modelled_tides_df.tide_model) == set(models)
+
+    # Wide mode, default
+    modelled_tides_df = model_tides(
+        x=x,
+        y=y,
+        time=times,
+        model=models,
+        output_format="wide",
+        ensemble_models=ENSEMBLE_MODELS,
+    )
+
+    # Check that expected models exist, and that ensemble is approx average
+    # of other two models
+    assert set(modelled_tides_df.columns) == set(models)
+    assert np.allclose(
+        0.5 * (modelled_tides_df.FES2014 + modelled_tides_df.HAMTIDE11),
+        modelled_tides_df.ensemble,
+    )
+
+    # Wide mode, top n == 1
+    modelled_tides_df = model_tides(
+        x=x,
+        y=y,
+        time=times,
+        model=models,
+        output_format="wide",
+        ensemble_top_n=1,
+        ensemble_models=ENSEMBLE_MODELS,
+    )
+
+    # Check that expected models exist, and that ensemble is equal to at
+    # least one of the other models
+    assert set(modelled_tides_df.columns) == set(models)
+    assert all(
+        (modelled_tides_df.FES2014 == modelled_tides_df.ensemble)
+        | (modelled_tides_df.HAMTIDE11 == modelled_tides_df.ensemble)
+    )
+
+    # Check that correct model is the closest at each row
+    closer_model = modelled_tides_df.apply(
+        lambda row: (
+            "FES2014"
+            if abs(row["ensemble"] - row["FES2014"])
+            < abs(row["ensemble"] - row["HAMTIDE11"])
+            else "HAMTIDE11"
+        ),
+        axis=1,
+    ).tolist()
+    assert closer_model == ["FES2014", "HAMTIDE11", "FES2014", "HAMTIDE11"]
+
+    # Check values are expected
+    assert np.allclose(
+        modelled_tides_df.ensemble, [-2.819, 0.0730, -1.850, -0.069], rtol=0.01
+    )
+
+    # Wide mode, custom functions
+    ensemble_funcs = {
+        "ensemble-best": lambda x: x["rank"] == 1,
+        "ensemble-worst": lambda x: x["rank"] == 2,
+        "ensemble-mean-top2": lambda x: x["rank"].isin([1, 2]),
+        "ensemble-mean-weighted": lambda x: 3 - x["rank"],
+        "ensemble-mean": lambda x: x["rank"] <= 2,
+    }
+    modelled_tides_df = model_tides(
+        x=x,
+        y=y,
+        time=times,
+        model=models,
+        output_format="wide",
+        ensemble_func=ensemble_funcs,
+        ensemble_models=ENSEMBLE_MODELS,
+    )
+
+    # Check that expected models exist, and that valid data is produced
+    assert set(modelled_tides_df.columns) == set(
+        [
+            "FES2014",
+            "HAMTIDE11",
+            "ensemble-best",
+            "ensemble-worst",
+            "ensemble-mean-top2",
+            "ensemble-mean-weighted",
+            "ensemble-mean",
+        ]
+    )
+    assert all(modelled_tides_df.notnull())
+
+    # Long mode, custom functions
+    modelled_tides_df = model_tides(
+        x=x,
+        y=y,
+        time=times,
+        model=models,
+        output_format="long",
+        ensemble_func=ensemble_funcs,
+        ensemble_models=ENSEMBLE_MODELS,
+    )
+
+    # Check that expected models exist in "tide_model" column
+    assert set(modelled_tides_df.tide_model) == set(
+        [
+            "FES2014",
+            "HAMTIDE11",
+            "ensemble-best",
+            "ensemble-worst",
+            "ensemble-mean-top2",
+            "ensemble-mean-weighted",
+            "ensemble-mean",
+        ]
+    )
+
+
 # Run tests for default and custom resolutions
 @pytest.mark.parametrize("resolution", [None, "custom"])
 def test_pixel_tides(satellite_ds, measured_tides_ds, resolution):
@@ -488,6 +666,59 @@ def test_pixel_tides_dask(satellite_ds, dask_chunks):
         assert output_chunks == dask_chunks
 
 
+# Run test pixel tides and ensemble modelling
+def test_pixel_tides_ensemble(satellite_ds):
+    # Model tides using `pixel_tides` and default ensemble model
+    modelled_tides_ds, _ = pixel_tides(
+        satellite_ds,
+        model="ensemble",
+        ensemble_models=ENSEMBLE_MODELS,
+    )
+
+    assert modelled_tides_ds.tide_model == "ensemble"
+
+    # Model tides using `pixel_tides` and multiple models including
+    # ensemble and custom IDW params
+    models = ["FES2014", "HAMTIDE11", "ensemble"]
+    modelled_tides_ds, _ = pixel_tides(
+        satellite_ds,
+        model=models,
+        ensemble_models=ENSEMBLE_MODELS,
+        k=10, 
+        max_dist=20000,
+    )
+
+    assert "tide_model" in modelled_tides_ds.dims
+    assert set(modelled_tides_ds.tide_model.values) == set(models)
+
+    # Model tides using `pixel_tides` and custom ensemble funcs
+    ensemble_funcs = {
+        "ensemble-best": lambda x: x["rank"] == 1,
+        "ensemble-worst": lambda x: x["rank"] == 2,
+        "ensemble-mean-top2": lambda x: x["rank"].isin([1, 2]),
+        "ensemble-mean-weighted": lambda x: 3 - x["rank"],
+        "ensemble-mean": lambda x: x["rank"] <= 2,
+    }
+    modelled_tides_ds, _ = pixel_tides(
+        satellite_ds,
+        model=models,
+        ensemble_func=ensemble_funcs,
+        ensemble_models=ENSEMBLE_MODELS,
+    )
+
+    assert set(modelled_tides_ds.tide_model.values) == set(
+        [
+            "FES2014",
+            "HAMTIDE11",
+            "ensemble-best",
+            "ensemble-worst",
+            "ensemble-mean-top2",
+            "ensemble-mean-weighted",
+            "ensemble-mean",
+        ]
+    )
+
+
 @pytest.mark.parametrize(
     "ebb_flow, swap_dims, tidepost_lat, tidepost_lon",
     [
diff --git a/Tools/dea_tools/coastal.py b/Tools/dea_tools/coastal.py
index 479f2b56f..e5b7fc9e0 100644
--- a/Tools/dea_tools/coastal.py
+++ b/Tools/dea_tools/coastal.py
@@ -16,7 +16,7 @@
 If you would like to report an issue with this script, you can file one 
 on GitHub (https://github.com/GeoscienceAustralia/dea-notebooks/issues/new).
 
-Last modified: November 2023
+Last modified: June 2024
 
 """
 
@@ -40,6 +40,7 @@
 
 from datacube.utils.geometry import CRS
 from dea_tools.datahandling import parallel_apply
+from dea_tools.spatial import idw
 
 # Fix converters for tidal plot
 from pandas.plotting import register_matplotlib_converters
@@ -118,9 +119,11 @@ def _intersect_dist(transect_gdf, lines_gdf, mode=mode):
         if mode == "distance":
             start_point = Point(transect_gdf.geometry.coords[0])
             point_df = intersect_points.apply(
-                lambda x: pd.Series({"start": start_point, "end": x})
-                if x.type == "Point"
-                else pd.Series({"start": None, "end": None})
+                lambda x: (
+                    pd.Series({"start": start_point, "end": x})
+                    if x.type == "Point"
+                    else pd.Series({"start": None, "end": None})
+                )
             )
 
         # In width mode, identify transects with multiple intersections, and
@@ -128,9 +131,11 @@ def _intersect_dist(transect_gdf, lines_gdf, mode=mode):
         # intersection for the end point when measuring distances
         if mode == "width":
             point_df = intersect_points.apply(
-                lambda x: pd.Series({"start": x.geoms[0], "end": x.geoms[-1]})
-                if x.type == "MultiPoint"
-                else pd.Series({"start": None, "end": None})
+                lambda x: (
+                    pd.Series({"start": x.geoms[0], "end": x.geoms[-1]})
+                    if x.type == "MultiPoint"
+                    else pd.Series({"start": None, "end": None})
+                )
             )
 
         # Calculate distances between valid start and end points
@@ -407,6 +412,172 @@ def _model_tides(
     return tide_df
 
 
+def _ensemble_model(
+    x,
+    y,
+    crs,
+    tide_df,
+    ensemble_models,
+    ensemble_func=None,
+    ensemble_top_n=3,
+    ranking_points="https://dea-public-data-dev.s3-ap-southeast-2.amazonaws.com/derivative/dea_intertidal/supplementary/rankings_ensemble_2017-2019.geojson",
+    ranking_valid_perc=0.02,
+    **idw_kwargs,
+):
+    """
+    Combine multiple tide models into a single locally optimised
+    ensemble tide model using external model ranking data (e.g.
+    satellite altimetry or NDWI-tide correlations along the coastline)
+    to inform the selection of the best local models.
+
+    This function performs the following steps:
+    1. Loads model ranking points from a GeoJSON file, filters them
+       based on the valid data percentage, and retains relevant columns
+    2. Interpolates the model rankings into the requested x and y
+       coordinates using Inverse Weighted Interpolation (IDW)
+    3. Uses rankings to combine multiple tide models into a single
+       optimised ensemble model (by default, by taking the mean of the
+       top 3 ranked models)
+    4. Returns a DataFrame with the combined ensemble model predictions
+
+    Parameters
+    ----------
+    x : array-like
+        Array of x-coordinates where the ensemble model predictions are
+        required.
+    y : array-like
+        Array of y-coordinates where the ensemble model predictions are
+        required.
+    crs : string
+        Input coordinate reference system for x and y coordinates. Used
+        to ensure that interpolations are performed in the correct CRS.
+    tide_df : pandas.DataFrame
+        DataFrame containing tide model predictions with columns
+        `["time", "x", "y", "tide_m", "tide_model"]`.
+    ensemble_models : list
+        A list of models to include in the ensemble modelling process.
+        All values must exist as columns with the prefix "rank_" in
+        `ranking_points`.
+    ensemble_func : dict, optional
+        By default, a simple ensemble model will be calculated by taking
+        the mean of the `ensemble_top_n` tide models at each location.
+        However, a dictionary containing more complex ensemble
+        calculations can also be provided. Dictionary keys are used
+        to name output ensemble models; functions should take a column
+        named "rank" and convert it to a weighting, e.g.:
+        `ensemble_func = {"ensemble-custom": lambda x: x["rank"] <= 3}`
+    ensemble_top_n : int, optional
+        If `ensemble_func` is None, this sets the number of top models
+        to include in the mean ensemble calculation. Defaults to 3.
+    ranking_points : str, optional
+        Path to the GeoJSON file containing model ranking points. This
+        dataset should include columns containing rankings for each tide
+        model, named with the prefix "rank_". e.g. "rank_FES2014".
+        Low values should represent high rankings (e.g. 1 = top ranked).
+    ranking_valid_perc : float, optional
+        Minimum percentage of valid data required to include a model
+        rank point in the analysis, as defined in a column named
+        "valid_perc". Defaults to 0.02.
+    **idw_kwargs
+        Optional keyword arguments to pass to the `idw` function used
+        for interpolation. Useful values include `k` (number of nearest
+        neighbours to use in interpolation), `max_dist` (maximum
+        distance to nearest neighbours), and `k_min` (minimum number of
+        neighbours required after `max_dist` is applied).
+
+    Returns
+    -------
+    pandas.DataFrame
+        DataFrame containing the ensemble model predictions, matching
+        the format of the input `tide_df` (e.g. columns `["time", "x",
+        "y", "tide_m", "tide_model"]`. By default the 'tide_model'
+        column will be labeled "ensemble" for the combined model
+        predictions (but if a custom dictionary of ensemble functions is
+        provided via `ensemble_func`, each ensemble will be named using
+        the provided dictionary keys).
+    """
+
+    # Load model ranks points and reproject to same CRS as x and y
+    model_ranking_cols = [f"rank_{m}" for m in ensemble_models]
+    model_ranks_gdf = (
+        gpd.read_file(ranking_points)
+        .to_crs(crs)
+        .query(f"valid_perc > {ranking_valid_perc}")
+        .dropna()[model_ranking_cols + ["geometry"]]
+    )
+
+    # Use points to interpolate model rankings into requested x and y
+    id_kwargs_str = "" if idw_kwargs == {} else idw_kwargs
+    print(f"Interpolating model rankings using IDW interpolation {id_kwargs_str}")
+    ensemble_ranks_df = (
+        # Run IDW interpolation on subset of ranking columns
+        pd.DataFrame(
+            idw(
+                input_z=model_ranks_gdf[model_ranking_cols],
+                input_x=model_ranks_gdf.geometry.x,
+                input_y=model_ranks_gdf.geometry.y,
+                output_x=x,
+                output_y=y,
+                **idw_kwargs,
+            ),
+            columns=model_ranking_cols,
+        )
+        .assign(x=x, y=y)
+        # Drop any duplicates then melt columns into long format
+        .drop_duplicates()
+        .melt(id_vars=["x", "y"], var_name="tide_model", value_name="rank")
+        # Remore "rank_" prefix to get plain model names
+        .replace({"^rank_": ""}, regex=True)
+        # Set index columns and rank across groups
+        .set_index(["tide_model", "x", "y"])
+        .groupby(["x", "y"])
+        .rank()
+    )
+
+    # If no custom ensemble funcs are provided, use a default ensemble
+    # calculation that takes the mean of the top N tide models
+    if ensemble_func is None:
+        ensemble_func = {"ensemble": lambda x: x["rank"] <= ensemble_top_n}
+
+    # Create output list to hold computed ensemble model outputs
+    ensemble_list = []
+
+    # Loop through all provided ensemble generation functions
+    for ensemble_n, ensemble_f in ensemble_func.items():
+
+        print(f"Combining models into single {ensemble_n} model")
+
+        # Join ranks to input tide data, compute weightings and group
+        grouped = (
+            # Add tide model as an index so we can join with model ranks
+            tide_df.set_index("tide_model", append=True).join(ensemble_ranks_df)
+            # Add temp columns containing weightings and weighted values
+            .assign(
+                weights=ensemble_f,  # use custom func to compute weights
+                weighted=lambda i: i.tide_m * i.weights,
+            )
+            # Groupby is specified in a weird order here as this seems
+            # to be the easiest way to preserve correct index sorting
+            .groupby(["x", "y", "time"])
+        )
+
+        # Use weightings to combine multiple models into single ensemble
+        ensemble_df = (
+            # Calculate weighted mean and convert back to dataframe
+            grouped.weighted.sum()
+            .div(grouped.weights.sum())
+            .to_frame("tide_m")
+            # Label ensemble model and ensure indexes are in expected order
+            .assign(tide_model=ensemble_n)
+            .reorder_levels(["time", "x", "y"], axis=0)
+        )
+
+        ensemble_list.append(ensemble_df)
+
+    # Combine all ensemble models and return as a single dataframe
+    return pd.concat(ensemble_list)
+
+
 def model_tides(
     x,
     y,
@@ -422,15 +593,17 @@ def model_tides(
     parallel_splits=5,
     output_units="m",
     output_format="long",
-    epsg=None,
+    ensemble_models=None,
+    **ensemble_kwargs,
 ):
     """
     Compute tides at multiple points and times using tidal harmonics.
 
-    This function supports any tidal model supported by
-    `pyTMD`, including the FES2014 Finite Element Solution
-    tide model, and the TPXO8-atlas and TPXO9-atlas-v5
-    TOPEX/POSEIDON global tide models.
+    This function supports all tidal models supported by `pyTMD`,
+    including FES Finite Element Solution models, TPXO TOPEX/POSEIDON
+    models, EOT Empirical Ocean Tide models, GOT Global Ocean Tide
+    models, and HAMTIDE Hamburg direct data Assimilation Methods for
+    Tides models.
 
     This function requires access to tide model data files.
     These should be placed in a folder with subfolders matching
@@ -474,12 +647,14 @@ def model_tides(
         model tides in UTC time.
     model : string, optional
         The tide model used to model tides. Options include:
-        - "FES2014" (only pre-configured option on DEA Sandbox)
+        - "FES2014" (pre-configured on DEA Sandbox)
         - "TPXO9-atlas-v5"
         - "TPXO8-atlas"
         - "EOT20"
         - "HAMTIDE11"
         - "GOT4.10"
+        - "ensemble" (advanced ensemble tide model functionality;
+          combining multiple models based on external model rankings)
     directory : string, optional
         The directory containing tide model data files. If no path is
         provided, this will default to the environment variable
@@ -496,7 +671,7 @@ def model_tides(
         Input coordinate reference system for x and y coordinates.
         Defaults to "EPSG:4326" (WGS84; degrees latitude, longitude).
     method : string, optional
-        Method used to interpolate tidal contsituents
+        Method used to interpolate tidal constituents
         from model files. Options include:
         - "spline": scipy bivariate spline interpolation (default)
         - "bilinear": quick bilinear interpolation
@@ -542,8 +717,19 @@ def model_tides(
         results stacked vertically along "tide_model" and "tide_m"
         columns), or wide format (with a column for each tide model).
         Defaults to "long".
-    epsg : int, DEPRECATED
-        Deprecated; use "crs" instead.
+    ensemble_models : list, optional
+        An optional list of models used to generate the ensemble tide
+        model if "ensemble" tide modelling is requested. Defaults to 
+        ["FES2014", "TPXO9-atlas-v5", "EOT20", "HAMTIDE11", "GOT4.10",
+        "FES2012", "TPXO8-atlas-v1"].
+    **ensemble_kwargs :
+        Keyword arguments used to customise the generation of optional
+        ensemble tide models if "ensemble" modelling are requested.
+        These are passed to the underlying `_ensemble_model` function.
+        Useful parameters include `ranking_points` (path to model
+        rankings data), `k` (for controlling how model rankings are
+        interpolated), and `ensemble_top_n` (how many top models to use
+        in the ensemble calculation).
 
     Returns
     -------
@@ -551,16 +737,6 @@ def model_tides(
     combination of time and point coordinates.
 
     """
-
-    # Deprecate `epsg` param
-    if epsg is not None:
-        warn(
-            "The `epsg` parameter is deprecated; please use `crs` to "
-            "provide CRS information in the form 'EPSG:XXXX'",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-
     # Set tide modelling files directory. If no custom path is provided,
     # first try global environmental var, then "/var/share/tide_models"
     if directory is None:
@@ -579,7 +755,7 @@ def model_tides(
         time = time.to_datetime64()
 
     # Turn inputs into arrays for consistent handling
-    model = np.atleast_1d(model)
+    models_requested = np.atleast_1d(model)
     x = np.atleast_1d(x)
     y = np.atleast_1d(y)
     time = np.atleast_1d(time)
@@ -603,23 +779,47 @@ def model_tides(
             "you intended to model multiple timesteps at each point."
         )
 
-    # Verify that all provided models are in list of supported models
+    # Verify that all provided models are supported
     valid_models = [
         "FES2014",
         "TPXO9-atlas-v5",
-        "TPXO8-atlas",
         "EOT20",
         "HAMTIDE11",
         "GOT4.10",
-        "FES2012",  # Requires custom tide model definition file
+        "TPXO8-atlas",
         "TPXO8-atlas-v1",  # Requires custom tide model definition file
+        "FES2012",  # Requires custom tide model definition file
+        "ensemble",  # Advanced ensemble model functionality
     ]
-    if not all(m in valid_models for m in model):
+    if not all(m in valid_models for m in models_requested):
         raise ValueError(
-            f"One or more of the models requested {model} is not valid. "
-            f"The following models are currently supported: {valid_models}"
+            f"One or more of the models requested {models_requested} is "
+            f"not valid. The following models are currently supported: "
+            f"{valid_models}"
+        )
+
+    # If ensemble modelling is requested, use a custom list of models
+    # for subsequent processing
+    if "ensemble" in models_requested:
+        print("Running ensemble tide modelling")
+        models_to_process = (
+            ensemble_models
+            if ensemble_models is not None
+            else [
+                "FES2014",
+                "TPXO9-atlas-v5",
+                "EOT20",
+                "HAMTIDE11",
+                "GOT4.10",
+                "FES2012",
+                "TPXO8-atlas-v1",
+            ]
         )
 
+    # Otherwise, models to process are the same as those requested
+    else:
+        models_to_process = models_requested
+
     # Update tide modelling func to add default keyword arguments that
     # are used for every iteration during parallel processing
     iter_func = partial(
@@ -637,12 +837,12 @@ def model_tides(
     parallel_splits = min(parallel_splits, len(x))
 
     # Parallelise if either multiple models or multiple splits requested
-    if parallel & ((len(model) > 1) | (parallel_splits > 1)):
+    if parallel & ((len(models_to_process) > 1) | (parallel_splits > 1)):
         from concurrent.futures import ProcessPoolExecutor
         from tqdm import tqdm
 
         with ProcessPoolExecutor() as executor:
-            print(f"Modelling tides using {', '.join(model)} in parallel")
+            print(f"Modelling tides using {', '.join(models_to_process)} in parallel")
 
             # Optionally split lon/lat points into `splits_n` chunks
             # that will be applied in parallel
@@ -659,7 +859,7 @@ def model_tides(
                 model_iters, x_iters, y_iters = zip(
                     *[
                         (m, x_split[i], y_split[i])
-                        for m in model
+                        for m in models_to_process
                         for i in range(parallel_splits)
                     ]
                 )
@@ -669,7 +869,7 @@ def model_tides(
                 model_iters, x_iters, y_iters, time_iters = zip(
                     *[
                         (m, x_split[i], y_split[i], time_split[i])
-                        for m in model
+                        for m in models_to_process
                         for i in range(parallel_splits)
                     ]
                 )
@@ -686,7 +886,7 @@ def model_tides(
     else:
         model_outputs = []
 
-        for model_i in model:
+        for model_i in models_to_process:
             print(f"Modelling tides using {model_i}")
             tide_df = iter_func(model_i, x, y, time)
             model_outputs.append(tide_df)
@@ -694,6 +894,19 @@ def model_tides(
     # Combine outputs into a single dataframe
     tide_df = pd.concat(model_outputs, axis=0)
 
+    # Optionally compute ensemble model and add to dataframe
+    if "ensemble" in models_requested:
+        ensemble_df = _ensemble_model(
+            x, y, crs, tide_df, models_to_process, **ensemble_kwargs
+        )
+
+        # Update requested models with any custom ensemble models, then
+        # filter the dataframe to keep only models originally requested
+        models_requested = np.union1d(models_requested, ensemble_df.tide_model.unique())
+        tide_df = pd.concat([tide_df, ensemble_df]).query(
+            "tide_model in @models_requested"
+        )
+
     # Optionally convert to a wide format dataframe with a tide model in
     # each dataframe column
     if output_format == "wide":
@@ -755,7 +968,7 @@ def _pixel_tides_resample(
     """
     # Determine spatial dimensions
     y_dim, x_dim = ds.odc.spatial_dims
-    
+
     # Convert array to Dask, using no chunking along y and x dims,
     # and a single chunk for each timestep/quantile and tide model
     tides_lowres_dask = tides_lowres.chunk(