From e616dc696a09d92ad969eca07448ce99994a6bde Mon Sep 17 00:00:00 2001
From: Jack Lovell <jacklovell1990@gmail.com>
Date: Fri, 4 Mar 2022 14:17:45 +0000
Subject: [PATCH 1/2] Re-write 1D caching object

Avoid inverting the spline matrix every time, and use some raysect
library functions for doing the interpolation between points.
---
 cherab/core/math/caching/caching1d.pxd        |  30 +-
 cherab/core/math/caching/caching1d.pyx        | 269 ++++++------------
 .../core/math/caching/tests/test_caching1d.py |   2 +-
 3 files changed, 101 insertions(+), 200 deletions(-)

diff --git a/cherab/core/math/caching/caching1d.pxd b/cherab/core/math/caching/caching1d.pxd
index b37246b4..7bf18fda 100644
--- a/cherab/core/math/caching/caching1d.pxd
+++ b/cherab/core/math/caching/caching1d.pxd
@@ -1,3 +1,5 @@
+# cython: language_level=3
+
 # Copyright 2016-2018 Euratom
 # Copyright 2016-2018 United Kingdom Atomic Energy Authority
 # Copyright 2016-2018 Centro de Investigaciones Energéticas, Medioambientales y Tecnológicas
@@ -17,25 +19,15 @@
 # under the Licence.
 
 from cherab.core.math.function cimport Function1D
-from numpy cimport ndarray, int8_t
-
-cdef class Caching1D(Function1D):
 
-    cdef readonly:
-        Function1D function
-        int no_boundary_error
-        ndarray x_np
-        double[::1] x_domain_view
-        int top_index_x
-        double x_min, x_delta_inv
-        double data_min, data_max, data_delta, data_delta_inv
-        double[::1] x_view, x2_view, x3_view
-        double[::1] data_view
-        double[:,::1] coeffs_view
-        int8_t[::1] calculated_view
 
-    cdef double evaluate(self, double px) except? -1e999
-
-    cdef double _evaluate(self, double px, int i_x)
+cdef class Caching1D(Function1D):
+    cdef:
+        Function1D _function
+        bint _no_boundary_error
+        double _xmin, _xmax, _resolution
+        unsigned char[::1] _sampled
+        double[::1] _xsamples, _fsamples
+        int _nsamples
 
-    cdef double _evaluate_polynomial_derivative(self, int i_x, double px, int der_x)
+    cdef double _get_and_cache(self, int index) except? -1e999
diff --git a/cherab/core/math/caching/caching1d.pyx b/cherab/core/math/caching/caching1d.pyx
index 586753c7..62501d44 100644
--- a/cherab/core/math/caching/caching1d.pyx
+++ b/cherab/core/math/caching/caching1d.pyx
@@ -18,19 +18,13 @@
 # See the Licence for the specific language governing permissions and limitations
 # under the Licence.
 
-from numpy import array, empty, int8, float64, concatenate, linspace
-from numpy.linalg import solve
+import numpy as np
 
 cimport cython
-from libc.math cimport isnan
-from numpy cimport ndarray, PyArray_ZEROS, NPY_FLOAT64, npy_intp, import_array
+from raysect.core.math.cython.utility cimport find_index
+from raysect.core.math.cython.interpolation.cubic cimport calc_coefficients_1d, evaluate_cubic_1d
 from cherab.core.math.function cimport autowrap_function1d
-from cherab.core.math.interpolators.utility cimport find_index, derivatives_array, factorial
 
-# required by numpy c-api
-import_array()
-
-EPSILON = 1.e-7
 
 cdef class Caching1D(Function1D):
     """
@@ -78,181 +72,96 @@ cdef class Caching1D(Function1D):
        1.5968720
     """
 
-    def __init__(self, object function1d, tuple space_area, double resolution, no_boundary_error=False, function_boundaries=None):
-
-        cdef:
-            double minx, maxx, deltax
-
-        self.function = autowrap_function1d(function1d)
-        self.no_boundary_error = no_boundary_error
-
-        minx, maxx = space_area
-        deltax = resolution
-
-        if minx >= maxx:
-            raise ValueError('Coordinate range is not consistent, minimum must be less than maximum ({} >= {})!'.format(minx, maxx))
-        if deltax <= EPSILON:
-            raise ValueError('Resolution must be bigger ({} <= {})!'.format(deltax, EPSILON))
-
-        self.x_np = concatenate((array([minx - deltax]), linspace(minx - EPSILON, maxx + EPSILON, max(int((maxx - minx) / deltax) + 1, 2)), array([maxx + deltax])))
-
-        self.x_domain_view = self.x_np
-
-        self.top_index_x = len(self.x_np) - 1
-
-        # Initialise the caching array
-        self.coeffs_view = empty((self.top_index_x - 2, 4), dtype=float64)
-        self.coeffs_view[:,::1] = float('NaN')
-        self.calculated_view = empty((self.top_index_x - 2,), dtype=int8)
-        self.calculated_view[:] = False
-
-        # Normalise coordinates and data
-        self.x_delta_inv = 1 / (self.x_np.max() - self.x_np.min())
-        self.x_min = self.x_np.min()
-        self.x_np = (self.x_np - self.x_min) * self.x_delta_inv
-        if function_boundaries is not None:
-            self.data_min, self.data_max = function_boundaries
-            self.data_delta = self.data_max - self.data_min
-            # If data contains only one value (not filtered before) cancel the
-            # normalisation scaling by setting data_delta to 1:
-            if self.data_delta == 0:
-                self.data_delta = 1
-        else:
-            # no normalisation of data values
-            self.data_delta = 1
-            self.data_min = 0
-            self.data_max = float('NaN')
-        self.data_delta_inv = 1 / self.data_delta
-
-        self.data_view = empty((self.top_index_x+1,), dtype=float64)
-        self.data_view[::1] = float('NaN')
-
-        # obtain coordinates memory views
-        self.x_view = self.x_np
-        self.x2_view = self.x_np*self.x_np
-        self.x3_view = self.x_np*self.x_np*self.x_np
-
+    # The implementation works as follows:
+    # - From space_area and resolution, generate an x grid of sampled points.
+    # - Store a list of which points have already been evaluated, and their values.
+    # - For each new value x to evaluate, check if the sample points either side
+    #   have already been evaluated.
+    #   - If not, evaluate and store them; update the list of already-evaluated points.
+    # - Calculate the derivative of the function at these points using the 2nd order
+    #   approximation. This requires additionally evaluating the points either side
+    #   of the nearest 2 points to x.
+    # - Use raysect.core.math.cython.interpolation.cubic's utilities to calculate
+    #   the polynomial coefficients for the 1D cubic and then evaluate the cubic
+    #   interpolation of the function at the position x.
+
+    def __init__(self, function1d, space_area, resolution, no_boundary_error=False, function_boundaries=None):
+        self._function = autowrap_function1d(function1d)
+        if len(space_area) != 2:
+            raise ValueError("Space area should be a tuple (xmin, xmax).")
+        self._xmin, self._xmax = space_area
+        if resolution <= 0:
+            raise ValueError("Resolution must be greater than zero.")
+        self._resolution = resolution
+        nsamples = int((self._xmax - self._xmin) / resolution + 1)
+        xsamples = np.linspace(self._xmin, self._xmax, nsamples)
+        fsamples = np.empty_like(xsamples)
+        sampled = np.full_like(fsamples, False, dtype='uint8')
+        self._nsamples = nsamples
+        self._xsamples = xsamples
+        self._fsamples = fsamples
+        self._sampled = sampled
+        self._no_boundary_error = no_boundary_error
+        # TODO: normalise using function_boundaries to avoid numerical issues.
+
+    @cython.initializedcheck(False)
     @cython.boundscheck(False)
     @cython.wraparound(False)
-    cdef double evaluate(self, double px) except? -1e999:
-        """
-        Evaluate the cached 1D function.
-
-        The function is cached in the vicinity of px if not already done.
-
-        :param double px: coordinate
-        :return: The evaluated value
-        """
-
-        cdef int i_x
-
-        i_x = find_index(self.x_domain_view, px)
-
-        if 1 <= i_x <= self.top_index_x - 2:
-            return self._evaluate(px, i_x)
-
-        # value is outside of permitted limits
-        if self.no_boundary_error:
-            return self.function.evaluate(px)
+    cdef double _get_and_cache(self, int index) except? -1e999:
+        cdef double x, fsamp
+
+        if not self._sampled[index]:
+            x = self._xsamples[index]
+            fsamp = self._function.evaluate(x)
+            self._sampled[index] = True
+            self._fsamples[index] = fsamp
         else:
-            min_range_x = self.x_domain_view[1]
-            max_range_x = self.x_domain_view[self.top_index_x - 1]
-
-            raise ValueError("The specified value (x={}) is outside the range of the supplied data: "
-                             "x bounds=({}, {})".format(px, min_range_x, max_range_x))
+            fsamp = self._fsamples[index]
+        return fsamp
 
-    @cython.cdivision(True)
+    # TODO: investigate whether to cache the cubic spline coefficients instead
+    # of re-calculating them every time. Performance/memory trade-off.
+    # Raysect caches the coefficients.
+    @cython.initializedcheck(False)
     @cython.boundscheck(False)
     @cython.wraparound(False)
-    cdef double _evaluate(self, double px, int i_x):
-        """
-        Calculate if not already done then evaluate the polynomial valid in the
-        area given by i_x at position px.
-        Calculation of the polynomial includes sampling the function where not
-        already done and interpolating.
-
-        :param double px: coordinate
-        :param int i_x: index of the area of interest
-        :return: The evaluated value
-        """
-
+    @cython.cdivision(True)
+    cdef double evaluate(self, double x) except? -1e999:
         cdef:
-            int u, l, i, i_x_p
-            double value
-            double delta_x
-            npy_intp cv_size
-            npy_intp cm_size[2]
-            double[::1] cv_view, coeffs_view
-            double[:, ::1] cm_view
-
-        # If the concerned polynomial has not yet been calculated:
-        i_x_p = i_x - 1  # polynomial index
-        if not self.calculated_view[i_x_p]:
-
-            # sample the data needed
-            for u in range(i_x-1, i_x+3):
-                if isnan(self.data_view[u]):
-                    value = self.function.evaluate(self.x_domain_view[u])
-                    if not isnan(value):
-                        # data values are normalised here
-                        self.data_view[u] = (value - self.data_min) * self.data_delta_inv
-
-            # Create constraint matrix (un-optimised)
-            # cv_view = zeros((4,), dtype=float64)  # constraints vector
-            # cm_view = zeros((4, 4), dtype=float64)  # constraints matrix
-
-            # Create constraint matrix (optimised using numpy c-api)
-            cv_size = 4
-            cv_view = PyArray_ZEROS(1, &cv_size, NPY_FLOAT64, 0)
-            cm_size[:] = [4, 4]
-            cm_view = PyArray_ZEROS(2, cm_size, NPY_FLOAT64, 0)
-
-            # Fill the constraints matrix
-            l = 0
-            for u in range(i_x, i_x+2):
-
-                # knot values
-                cm_view[l, 0] = 1.
-                cm_view[l, 1] = self.x_view[u]
-                cm_view[l, 2] = self.x2_view[u]
-                cm_view[l, 3] = self.x3_view[u]
-                cv_view[l] = self.data_view[u]
-                l += 1
-
-                # derivative
-                cm_view[l, 1] = 1.
-                cm_view[l, 2] = 2.*self.x_view[u]
-                cm_view[l, 3] = 3.*self.x2_view[u]
-                delta_x = self.x_view[u+1] - self.x_view[u-1]
-                cv_view[l] = (self.data_view[u+1] - self.data_view[u-1])/delta_x
-                l += 1
-
-            # Solve the linear system and fill the caching coefficients array
-            coeffs_view = solve(cm_view, cv_view)
-            self.coeffs_view[i_x_p, :] = coeffs_view
-
-            # Denormalisation
-            for i in range(4):
-                coeffs_view[i] = self.data_delta * (self.x_delta_inv ** i / factorial(i) * self._evaluate_polynomial_derivative(i_x_p, -self.x_delta_inv * self.x_min, i))
-            coeffs_view[0] = coeffs_view[0] + self.data_min
-            self.coeffs_view[i_x_p, :] = coeffs_view
-
-            self.calculated_view[i_x_p] = True
-
-        return self.coeffs_view[i_x_p, 0] + self.coeffs_view[i_x_p,  1] * px + self.coeffs_view[i_x_p,  2] * px * px + self.coeffs_view[i_x_p, 3] * px * px * px
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    cdef double _evaluate_polynomial_derivative(self, int i_x, double px, int der_x):
-        """
-        Evaluate the derivatives of the polynomial valid in the area given by
-        'i_x' at position 'px'. The order of derivative along each axis is
-        given by 'der_x'.
-        """
-
-        cdef double[::1] x_values
-
-        x_values = derivatives_array(px, der_x)
-
-        return x_values[0]*self.coeffs_view[i_x, 0] + x_values[1]*self.coeffs_view[i_x, 1] + x_values[2]*self.coeffs_view[i_x, 2] + x_values[3]*self.coeffs_view[i_x, 3]
-
+            int lower_index
+            double fprev, fnext, feval, xnorm
+            double a[4]  # Cubic coefficients
+            double f[2]  # Function evaluations
+            double dfdx[2]  # Function derivative evaluations
+
+        if self._xmin <= x < self._xmax:
+            lower_index = find_index(self._xsamples, x)
+            f[0] = self._get_and_cache(lower_index)
+            f[1] = self._get_and_cache(lower_index + 1)
+            # Calculate derivatives. calc_coefficiets_1d requires derivatives
+            # normalised to the unit interval, i.e. for dx = 1.
+            if lower_index == 0:
+                dfdx[0] = (f[1] - f[0])
+            else:
+                fprev = self._get_and_cache(lower_index - 1)
+                dfdx[0] = (f[1] - fprev) / 2
+            if lower_index == self._nsamples - 2:
+                dfdx[1] = (f[1] - f[0])
+            else:
+                fnext = self._get_and_cache(lower_index + 2)
+                dfdx[1] = (fnext - f[0]) / 2
+            calc_coefficients_1d(f, dfdx, a)
+            xnorm = (x - self._xsamples[lower_index]) / self._resolution
+            feval = evaluate_cubic_1d(a, xnorm)
+            return feval
+
+        # Special case if x is exactly at the maximum allowed value.
+        if x == self._xmax:
+            return self._get_and_cache(self._nsamples - 1)
+
+        if self._no_boundary_error:
+            return self._function.evaluate(x)
+
+        raise ValueError(
+            "x is outside the permitted range ({}, {})".format(self._xmin, self._xmax)
+        )
diff --git a/cherab/core/math/caching/tests/test_caching1d.py b/cherab/core/math/caching/tests/test_caching1d.py
index af16e24e..15af54ed 100644
--- a/cherab/core/math/caching/tests/test_caching1d.py
+++ b/cherab/core/math/caching/tests/test_caching1d.py
@@ -36,4 +36,4 @@ def test_values(self):
 
 
 if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
+    unittest.main()

From 93ff2ac4dee7e4751e441fbfec5e458bd2399787 Mon Sep 17 00:00:00 2001
From: Jack Lovell <jacklovell1990@gmail.com>
Date: Fri, 4 Mar 2022 16:12:34 +0000
Subject: [PATCH 2/2] Small optimisations to 1D caching

Exploit uniform spacing of samples to:
* Avoid bisecting search for nearest index in sample array
* Drop the array of sampled x points and calculate them analytically
  instead. This will reduce memory usage (useful for higher dimension
  caching objects).
---
 cherab/core/math/caching/caching1d.pxd |  2 +-
 cherab/core/math/caching/caching1d.pyx | 25 ++++++++++++-------------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/cherab/core/math/caching/caching1d.pxd b/cherab/core/math/caching/caching1d.pxd
index 7bf18fda..6f5bd446 100644
--- a/cherab/core/math/caching/caching1d.pxd
+++ b/cherab/core/math/caching/caching1d.pxd
@@ -27,7 +27,7 @@ cdef class Caching1D(Function1D):
         bint _no_boundary_error
         double _xmin, _xmax, _resolution
         unsigned char[::1] _sampled
-        double[::1] _xsamples, _fsamples
+        double[::1] _fsamples
         int _nsamples
 
     cdef double _get_and_cache(self, int index) except? -1e999
diff --git a/cherab/core/math/caching/caching1d.pyx b/cherab/core/math/caching/caching1d.pyx
index 62501d44..81ce6ca3 100644
--- a/cherab/core/math/caching/caching1d.pyx
+++ b/cherab/core/math/caching/caching1d.pyx
@@ -21,7 +21,6 @@
 import numpy as np
 
 cimport cython
-from raysect.core.math.cython.utility cimport find_index
 from raysect.core.math.cython.interpolation.cubic cimport calc_coefficients_1d, evaluate_cubic_1d
 from cherab.core.math.function cimport autowrap_function1d
 
@@ -73,7 +72,6 @@ cdef class Caching1D(Function1D):
     """
 
     # The implementation works as follows:
-    # - From space_area and resolution, generate an x grid of sampled points.
     # - Store a list of which points have already been evaluated, and their values.
     # - For each new value x to evaluate, check if the sample points either side
     #   have already been evaluated.
@@ -84,6 +82,9 @@ cdef class Caching1D(Function1D):
     # - Use raysect.core.math.cython.interpolation.cubic's utilities to calculate
     #   the polynomial coefficients for the 1D cubic and then evaluate the cubic
     #   interpolation of the function at the position x.
+    # - Note that the sampled points have uniform spacing: this enables a few
+    #   optimisations such as analytic calculations of the array index and x
+    #   values for the nearest samples to the requested x.
 
     def __init__(self, function1d, space_area, resolution, no_boundary_error=False, function_boundaries=None):
         self._function = autowrap_function1d(function1d)
@@ -93,14 +94,9 @@ cdef class Caching1D(Function1D):
         if resolution <= 0:
             raise ValueError("Resolution must be greater than zero.")
         self._resolution = resolution
-        nsamples = int((self._xmax - self._xmin) / resolution + 1)
-        xsamples = np.linspace(self._xmin, self._xmax, nsamples)
-        fsamples = np.empty_like(xsamples)
-        sampled = np.full_like(fsamples, False, dtype='uint8')
-        self._nsamples = nsamples
-        self._xsamples = xsamples
-        self._fsamples = fsamples
-        self._sampled = sampled
+        self._nsamples = int((self._xmax - self._xmin) / resolution + 1)
+        self._fsamples = np.empty(self._nsamples)
+        self._sampled = np.full(self._nsamples, False, dtype='uint8')
         self._no_boundary_error = no_boundary_error
         # TODO: normalise using function_boundaries to avoid numerical issues.
 
@@ -111,7 +107,7 @@ cdef class Caching1D(Function1D):
         cdef double x, fsamp
 
         if not self._sampled[index]:
-            x = self._xsamples[index]
+            x = self._xmin + index * self._resolution
             fsamp = self._function.evaluate(x)
             self._sampled[index] = True
             self._fsamples[index] = fsamp
@@ -135,7 +131,8 @@ cdef class Caching1D(Function1D):
             double dfdx[2]  # Function derivative evaluations
 
         if self._xmin <= x < self._xmax:
-            lower_index = find_index(self._xsamples, x)
+            # Sampling is uniform, so lower_index is determined analytically.
+            lower_index = int((x - self._xmin) // self._resolution)
             f[0] = self._get_and_cache(lower_index)
             f[1] = self._get_and_cache(lower_index + 1)
             # Calculate derivatives. calc_coefficiets_1d requires derivatives
@@ -151,7 +148,9 @@ cdef class Caching1D(Function1D):
                 fnext = self._get_and_cache(lower_index + 2)
                 dfdx[1] = (fnext - f[0]) / 2
             calc_coefficients_1d(f, dfdx, a)
-            xnorm = (x - self._xsamples[lower_index]) / self._resolution
+            # lower_x = xmin + resolution * index
+            # xnorm = (x - lower_x) / res = (x - xmin) / resolution - index
+            xnorm = (x - self._xmin) / self._resolution - lower_index
             feval = evaluate_cubic_1d(a, xnorm)
             return feval