SciTools · fnattino · Jul 25, 2024 · Jul 25, 2024 · Aug 27, 2024 · Sep 11, 2024
diff --git a/docs/src/userguide/interpolation_and_regridding.rst b/docs/src/userguide/interpolation_and_regridding.rst
@@ -29,9 +29,9 @@ The following are the regridding schemes that are currently available in Iris:
 * point in cell regridding (:class:`iris.analysis.PointInCell`) and
 * area-weighted regridding (:class:`iris.analysis.AreaWeighted`, first-order conservative).
 
-The linear, nearest-neighbor, and area-weighted regridding schemes support
-lazy regridding, i.e. if the source cube has lazy data, the resulting cube
-will also have lazy data.
+The linear and nearest-neighbour interpolation schemes, and the linear, nearest-neighbour,
+and area-weighted regridding schemes support lazy regridding, i.e. if the source cube has lazy data,
+the resulting cube will also have lazy data.
 See :doc:`real_and_lazy_data` for an introduction to lazy data.
 See :doc:`../further_topics/which_regridder_to_use` for a more in depth overview of the different regridders.
 
@@ -417,22 +417,22 @@ In each case ``result`` will be the input cube regridded to the grid defined by
 the target grid cube (in this case ``rotated_psl``) that we used to define the
 cached regridder.
 
-Regridding Lazy Data
-^^^^^^^^^^^^^^^^^^^^
+Interpolating and Regridding Lazy Data
+--------------------------------------
 
-If you are working with large cubes, especially when you are regridding to a
-high resolution target grid, you may run out of memory when trying to
-regrid a cube. When this happens, make sure the input cube has lazy data
+If you are working with large cubes, you may run out of memory when trying to
+interpolate or regrid a cube. For instance, this might happen when regridding to a
+high resolution target grid. When this happens, make sure the input cube has lazy data
 
     >>> air_temp = iris.load_cube(iris.sample_data_path('A1B_north_america.nc'))
     >>> air_temp
     <iris 'Cube' of air_temperature / (K) (time: 240; latitude: 37; longitude: 49)>
     >>> air_temp.has_lazy_data()
     True
 
-and the regridding scheme supports lazy data. All regridding schemes described
-here support lazy data. If you still run out of memory even while using lazy
-data, inspect the
+and the interpolation or regridding scheme supports lazy data. All interpolation and
+regridding schemes described here with exception of the the point-in-cell regridder
+support lazy data. If you still run out of memory even while using lazy data, inspect the
 `chunks <https://docs.dask.org/en/latest/array-chunks.html>`__
 :
 
@@ -455,6 +455,6 @@ dimension, to regrid it in 8 chunks of 30 timesteps at a time:
 Assuming that Dask is configured such that it processes only a few chunks of
 the data array at a time, this will further reduce memory use.
 
-Note that chunking in the horizontal dimensions is not supported by the
-regridding schemes. Chunks in these dimensions will automatically be combined
+Note that chunking in the horizontal dimensions is not supported by the interpolation
+and regridding schemes. Chunks in these dimensions will automatically be combined
 before regridding.
diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
@@ -61,6 +61,10 @@ This document explains the changes made to Iris for this release
    :doc:`/whatsnew/3.11` will only be available when
    ``iris.FUTURE.date_microseconds == True``.
 
+#. `@fnattino`_ enabled lazy cube interpolation using the linear and
+   nearest-neighbour interpolators (:class:`iris.analysis.Linear` and
+   :class:`iris.analysis.Nearest`). (:pull:`6084`)
+
 
 🔥 Deprecations
 ===============
@@ -102,7 +106,7 @@ This document explains the changes made to Iris for this release
     Whatsnew author names (@github name) in alphabetical order. Note that,
     core dev names are automatically included by the common_links.inc:
 
-
+.. _@fnattino: httsps://github.com/fnattino
 
 
 .. comment

diff --git a/lib/iris/analysis/_interpolation.py b/lib/iris/analysis/_interpolation.py
@@ -12,6 +12,7 @@
 from numpy.lib.stride_tricks import as_strided
 import numpy.ma as ma
 
+from iris._lazy_data import map_complete_blocks
 from iris.coords import AuxCoord, DimCoord
 import iris.util
 
@@ -163,6 +164,15 @@ def snapshot_grid(cube):
     return x.copy(), y.copy()
 
 
+def _interpolated_dtype(dtype, method):
+    """Determine the minimum base dtype required by the underlying interpolator."""
+    if method == "nearest":
+        result = dtype
+    else:
+        result = np.result_type(_DEFAULT_DTYPE, dtype)
+    return result
+
+
 class RectilinearInterpolator:
     """Provide support for performing nearest-neighbour or linear interpolation.
 
@@ -200,13 +210,8 @@ def __init__(self, src_cube, coords, method, extrapolation_mode):
               set to NaN.
 
         """
-        # Trigger any deferred loading of the source cube's data and snapshot
-        # its state to ensure that the interpolator is impervious to external
-        # changes to the original source cube. The data is loaded to prevent
-        # the snapshot having lazy data, avoiding the potential for the
-        # same data to be loaded again and again.
-        if src_cube.has_lazy_data():
-            src_cube.data
+        # Snapshot the cube state to ensure that the interpolator is impervious
+        # to external changes to the original source cube.
         self._src_cube = src_cube.copy()
         # Coordinates defining the dimensions to be interpolated.
         self._src_coords = [self._src_cube.coord(coord) for coord in coords]
@@ -277,17 +282,27 @@ def _account_for_inverted(self, data):
             data = data[tuple(dim_slices)]
         return data
 
-    def _interpolate(self, data, interp_points):
+    @staticmethod
+    def _interpolate(
+        data,
+        src_points,
+        interp_points,
+        interp_shape,
+        method="linear",
+        extrapolation_mode="nanmask",
+    ):
         """Interpolate a data array over N dimensions.
 
-        Create and cache the underlying interpolator instance before invoking
-        it to perform interpolation over the data at the given coordinate point
-        values.
+        Create the interpolator instance before invoking it to perform
+        interpolation over the data at the given coordinate point values.
 
         Parameters
         ----------
         data : ndarray
             A data array, to be interpolated in its first 'N' dimensions.
+        src_points :
+            The point values defining the dimensions to be interpolated.
+            (len(src_points) should be N).
         interp_points : ndarray
             An array of interpolation coordinate values.
             Its shape is (..., N) where N is the number of interpolation
@@ -296,44 +311,53 @@ def _interpolate(self, data, interp_points):
             coordinate, which is mapped to the i'th data dimension.
             The other (leading) dimensions index over the different required
             sample points.
+        interp_shape :
+            The shape of the interpolated array in its first 'N' dimensions
+            (len(interp_shape) should be N).
+        method : str
+            Interpolation method (see :class:`iris.analysis._interpolation.RectilinearInterpolator`).
+        extrapolation_mode : str
+            Extrapolation mode (see :class:`iris.analysis._interpolation.RectilinearInterpolator`).
 
         Returns
         -------
         :class:`np.ndarray`.
-            Its shape is "points_shape + extra_shape",
+            Its shape is "interp_shape + extra_shape",
             where "extra_shape" is the remaining non-interpolated dimensions of
-            the data array (i.e. 'data.shape[N:]'), and "points_shape" is the
-            leading dimensions of interp_points,
-            (i.e. 'interp_points.shape[:-1]').
-
+            the data array (i.e. 'data.shape[N:]').
         """
         from iris.analysis._scipy_interpolate import _RegularGridInterpolator
 
-        dtype = self._interpolated_dtype(data.dtype)
+        dtype = _interpolated_dtype(data.dtype, method)
         if data.dtype != dtype:
             # Perform dtype promotion.
             data = data.astype(dtype)
 
-        mode = EXTRAPOLATION_MODES[self._mode]
-        if self._interpolator is None:
-            # Cache the interpolator instance.
-            # NB. The constructor of the _RegularGridInterpolator class does
-            # some unnecessary checks on the fill_value parameter,
-            # so we set it afterwards instead. Sneaky. ;-)
-            self._interpolator = _RegularGridInterpolator(
-                self._src_points,
-                data,
-                method=self.method,
-                bounds_error=mode.bounds_error,
-                fill_value=None,
-            )
-        else:
-            self._interpolator.values = data
+        # Determine the shape of the interpolated result.
+        ndims_interp = len(interp_shape)
+        extra_shape = data.shape[ndims_interp:]
+        final_shape = [*interp_shape, *extra_shape]
+
+        mode = EXTRAPOLATION_MODES[extrapolation_mode]
+        _data = np.ma.getdata(data)
+        # NB. The constructor of the _RegularGridInterpolator class does
+        # some unnecessary checks on the fill_value parameter,
+        # so we set it afterwards instead. Sneaky. ;-)
+        interpolator = _RegularGridInterpolator(
+            src_points,
+            _data,
+            method=method,
+            bounds_error=mode.bounds_error,
+            fill_value=None,
+        )
+        interpolator.fill_value = mode.fill_value
+        result = interpolator(interp_points)
 
-        # We may be re-using a cached interpolator, so ensure the fill
-        # value is set appropriately for extrapolating data values.
-        self._interpolator.fill_value = mode.fill_value
-        result = self._interpolator(interp_points)
+        # The interpolated result has now shape "points_shape + extra_shape"
+        # where "points_shape" is the leading dimension of "interp_points"
+        # (i.e. 'interp_points.shape[:-1]'). We reshape it to match the shape
+        # of the interpolated dimensions.
+        result = result.reshape(final_shape)
 
         if result.dtype != data.dtype:
             # Cast the data dtype to be as expected. Note that, the dtype
@@ -346,13 +370,11 @@ def _interpolate(self, data, interp_points):
             # `data` is not a masked array.
             src_mask = np.ma.getmaskarray(data)
             # Switch the extrapolation to work with mask values.
-            self._interpolator.fill_value = mode.mask_fill_value
-            self._interpolator.values = src_mask
-            mask_fraction = self._interpolator(interp_points)
+            interpolator.fill_value = mode.mask_fill_value
+            interpolator.values = src_mask
+            mask_fraction = interpolator(interp_points)
             new_mask = mask_fraction > 0
-            if ma.isMaskedArray(data) or np.any(new_mask):
-                result = np.ma.MaskedArray(result, new_mask)
-
+            result = np.ma.MaskedArray(result, new_mask)
         return result
 
     def _resample_coord(self, sample_points, coord, coord_dims):
@@ -458,14 +480,6 @@ def _validate(self):
                     msg = "Cannot interpolate over the non-monotonic coordinate {}."
                     raise ValueError(msg.format(coord.name()))
 
-    def _interpolated_dtype(self, dtype):
-        """Determine the minimum base dtype required by the underlying interpolator."""
-        if self._method == "nearest":
-            result = dtype
-        else:
-            result = np.result_type(_DEFAULT_DTYPE, dtype)
-        return result
-
     def _points(self, sample_points, data, data_dims=None):
         """Interpolate at the specified points.
 
@@ -490,9 +504,8 @@ def _points(self, sample_points, data, data_dims=None):
 
         Returns
         -------
-        :class:`~numpy.ndarray` or :class:`~numpy.ma.MaskedArray`
-            An :class:`~numpy.ndarray` or :class:`~numpy.ma.MaskedArray`
-            instance of the interpolated data.
+        ndarray
+            The interpolated data array.
 
         """
         dims = list(range(self._src_cube.ndim))
@@ -530,19 +543,15 @@ def _points(self, sample_points, data, data_dims=None):
         _, src_order = zip(*sorted(dmap.items(), key=operator.itemgetter(0)))
 
         # Prepare the sample points for interpolation and calculate the
-        # shape of the interpolated result.
+        # shape of the interpolated dimensions.
         interp_points = []
         interp_shape = []
         for index, points in enumerate(sample_points):
-            dtype = self._interpolated_dtype(self._src_points[index].dtype)
+            dtype = _interpolated_dtype(self._src_points[index].dtype, self._method)
             points = np.array(points, dtype=dtype, ndmin=1)
             interp_points.append(points)
             interp_shape.append(points.size)
 
-        interp_shape.extend(
-            length for dim, length in enumerate(data.shape) if dim not in di
-        )
-
         # Convert the interpolation points into a cross-product array
         # with shape (n_cross_points, n_dims)
         interp_points = np.asarray([pts for pts in product(*interp_points)])
@@ -554,9 +563,21 @@ def _points(self, sample_points, data, data_dims=None):
             # Transpose data in preparation for interpolation.
             data = np.transpose(data, interp_order)
 
-        # Interpolate and reshape the data ...
-        result = self._interpolate(data, interp_points)
-        result = result.reshape(interp_shape)
+        # Interpolate the data, ensuring the interpolated dimensions
+        # are not chunked.
+        dims_not_chunked = [dmap[d] for d in di]
+        result = map_complete_blocks(
+            data,
+            self._interpolate,
+            dims=dims_not_chunked,
+            out_sizes=interp_shape,
+            dtype=_interpolated_dtype(data.dtype, self._method),
+            src_points=self._src_points,
+            interp_points=interp_points,
+            interp_shape=interp_shape,
+            method=self._method,
+            extrapolation_mode=self._mode,
+        )
 
         if src_order != dims:
             # Restore the interpolated result to the original
@@ -568,6 +589,9 @@ def _points(self, sample_points, data, data_dims=None):
     def __call__(self, sample_points, collapse_scalar=True):
         """Construct a cube from the specified orthogonal interpolation points.
 
+        If the source cube has lazy data, the returned cube will also
+        have lazy data.
+
         Parameters
         ----------
         sample_points :
@@ -585,14 +609,22 @@ def __call__(self, sample_points, collapse_scalar=True):
             of the cube will be the number of original cube dimensions minus
             the number of scalar coordinates, if collapse_scalar is True.
 
+        Notes
+        -----
+        .. note::
+
+            If the source cube has lazy data,
+            `chunks <https://docs.dask.org/en/latest/array-chunks.html>`__
+            in the interpolated dimensions will be combined before regridding.
+
         """
         if len(sample_points) != len(self._src_coords):
             msg = "Expected sample points for {} coordinates, got {}."
             raise ValueError(msg.format(len(self._src_coords), len(sample_points)))
 
         sample_points = _canonical_sample_points(self._src_coords, sample_points)
 
-        data = self._src_cube.data
+        data = self._src_cube.core_data()
         # Interpolate the cube payload.
         interpolated_data = self._points(sample_points, data)
 

diff --git a/lib/iris/tests/unit/analysis/interpolation/test_RectilinearInterpolator.py b/lib/iris/tests/unit/analysis/interpolation/test_RectilinearInterpolator.py
@@ -499,24 +499,37 @@ def test_orthogonal_cube_squash(self):
         self.assertEqual(result_cube, non_collapsed_cube[0, ...])
 
 
+class Test___call___real_data(ThreeDimCube):
+    def test_src_cube_data_loaded(self):
+        # If the source cube has real data when the interpolator is
+        # instantiated, then the interpolated result should also have
+        # real data.
+        self.assertFalse(self.cube.has_lazy_data())
+
+        # Perform interpolation and check the data is real.
+        interpolator = RectilinearInterpolator(
+            self.cube, ["latitude"], LINEAR, EXTRAPOLATE
+        )
+        res = interpolator([[1.5]])
+        self.assertFalse(res.has_lazy_data())
+
+
 class Test___call___lazy_data(ThreeDimCube):
     def test_src_cube_data_loaded(self):
-        # RectilinearInterpolator operates using a snapshot of the source cube.
         # If the source cube has lazy data when the interpolator is
-        # instantiated we want to make sure the source cube's data is
-        # loaded as a consequence of interpolation to avoid the risk
-        # of loading it again and again.
+        # instantiated, then the interpolated result should also have
+        # lazy data.
 
         # Modify self.cube to have lazy data.
         self.cube.data = as_lazy_data(self.data)
         self.assertTrue(self.cube.has_lazy_data())
 
-        # Perform interpolation and check the data has been loaded.
+        # Perform interpolation and check the data is lazy..
         interpolator = RectilinearInterpolator(
             self.cube, ["latitude"], LINEAR, EXTRAPOLATE
         )
-        interpolator([[1.5]])
-        self.assertFalse(self.cube.has_lazy_data())
+        res = interpolator([[1.5]])
+        self.assertTrue(res.has_lazy_data())
 
 
 class Test___call___time(tests.IrisTest):