From e616dc696a09d92ad969eca07448ce99994a6bde Mon Sep 17 00:00:00 2001 From: Jack Lovell Date: Fri, 4 Mar 2022 14:17:45 +0000 Subject: [PATCH 1/2] Re-write 1D caching object Avoid inverting the spline matrix every time, and use some raysect library functions for doing the interpolation between points. --- cherab/core/math/caching/caching1d.pxd | 30 +- cherab/core/math/caching/caching1d.pyx | 269 ++++++------------ .../core/math/caching/tests/test_caching1d.py | 2 +- 3 files changed, 101 insertions(+), 200 deletions(-) diff --git a/cherab/core/math/caching/caching1d.pxd b/cherab/core/math/caching/caching1d.pxd index b37246b4..7bf18fda 100644 --- a/cherab/core/math/caching/caching1d.pxd +++ b/cherab/core/math/caching/caching1d.pxd @@ -1,3 +1,5 @@ +# cython: language_level=3 + # Copyright 2016-2018 Euratom # Copyright 2016-2018 United Kingdom Atomic Energy Authority # Copyright 2016-2018 Centro de Investigaciones Energéticas, Medioambientales y Tecnológicas @@ -17,25 +19,15 @@ # under the Licence. from cherab.core.math.function cimport Function1D -from numpy cimport ndarray, int8_t - -cdef class Caching1D(Function1D): - cdef readonly: - Function1D function - int no_boundary_error - ndarray x_np - double[::1] x_domain_view - int top_index_x - double x_min, x_delta_inv - double data_min, data_max, data_delta, data_delta_inv - double[::1] x_view, x2_view, x3_view - double[::1] data_view - double[:,::1] coeffs_view - int8_t[::1] calculated_view - cdef double evaluate(self, double px) except? -1e999 - - cdef double _evaluate(self, double px, int i_x) +cdef class Caching1D(Function1D): + cdef: + Function1D _function + bint _no_boundary_error + double _xmin, _xmax, _resolution + unsigned char[::1] _sampled + double[::1] _xsamples, _fsamples + int _nsamples - cdef double _evaluate_polynomial_derivative(self, int i_x, double px, int der_x) + cdef double _get_and_cache(self, int index) except? -1e999 diff --git a/cherab/core/math/caching/caching1d.pyx b/cherab/core/math/caching/caching1d.pyx index 586753c7..62501d44 100644 --- a/cherab/core/math/caching/caching1d.pyx +++ b/cherab/core/math/caching/caching1d.pyx @@ -18,19 +18,13 @@ # See the Licence for the specific language governing permissions and limitations # under the Licence. -from numpy import array, empty, int8, float64, concatenate, linspace -from numpy.linalg import solve +import numpy as np cimport cython -from libc.math cimport isnan -from numpy cimport ndarray, PyArray_ZEROS, NPY_FLOAT64, npy_intp, import_array +from raysect.core.math.cython.utility cimport find_index +from raysect.core.math.cython.interpolation.cubic cimport calc_coefficients_1d, evaluate_cubic_1d from cherab.core.math.function cimport autowrap_function1d -from cherab.core.math.interpolators.utility cimport find_index, derivatives_array, factorial -# required by numpy c-api -import_array() - -EPSILON = 1.e-7 cdef class Caching1D(Function1D): """ @@ -78,181 +72,96 @@ cdef class Caching1D(Function1D): 1.5968720 """ - def __init__(self, object function1d, tuple space_area, double resolution, no_boundary_error=False, function_boundaries=None): - - cdef: - double minx, maxx, deltax - - self.function = autowrap_function1d(function1d) - self.no_boundary_error = no_boundary_error - - minx, maxx = space_area - deltax = resolution - - if minx >= maxx: - raise ValueError('Coordinate range is not consistent, minimum must be less than maximum ({} >= {})!'.format(minx, maxx)) - if deltax <= EPSILON: - raise ValueError('Resolution must be bigger ({} <= {})!'.format(deltax, EPSILON)) - - self.x_np = concatenate((array([minx - deltax]), linspace(minx - EPSILON, maxx + EPSILON, max(int((maxx - minx) / deltax) + 1, 2)), array([maxx + deltax]))) - - self.x_domain_view = self.x_np - - self.top_index_x = len(self.x_np) - 1 - - # Initialise the caching array - self.coeffs_view = empty((self.top_index_x - 2, 4), dtype=float64) - self.coeffs_view[:,::1] = float('NaN') - self.calculated_view = empty((self.top_index_x - 2,), dtype=int8) - self.calculated_view[:] = False - - # Normalise coordinates and data - self.x_delta_inv = 1 / (self.x_np.max() - self.x_np.min()) - self.x_min = self.x_np.min() - self.x_np = (self.x_np - self.x_min) * self.x_delta_inv - if function_boundaries is not None: - self.data_min, self.data_max = function_boundaries - self.data_delta = self.data_max - self.data_min - # If data contains only one value (not filtered before) cancel the - # normalisation scaling by setting data_delta to 1: - if self.data_delta == 0: - self.data_delta = 1 - else: - # no normalisation of data values - self.data_delta = 1 - self.data_min = 0 - self.data_max = float('NaN') - self.data_delta_inv = 1 / self.data_delta - - self.data_view = empty((self.top_index_x+1,), dtype=float64) - self.data_view[::1] = float('NaN') - - # obtain coordinates memory views - self.x_view = self.x_np - self.x2_view = self.x_np*self.x_np - self.x3_view = self.x_np*self.x_np*self.x_np - + # The implementation works as follows: + # - From space_area and resolution, generate an x grid of sampled points. + # - Store a list of which points have already been evaluated, and their values. + # - For each new value x to evaluate, check if the sample points either side + # have already been evaluated. + # - If not, evaluate and store them; update the list of already-evaluated points. + # - Calculate the derivative of the function at these points using the 2nd order + # approximation. This requires additionally evaluating the points either side + # of the nearest 2 points to x. + # - Use raysect.core.math.cython.interpolation.cubic's utilities to calculate + # the polynomial coefficients for the 1D cubic and then evaluate the cubic + # interpolation of the function at the position x. + + def __init__(self, function1d, space_area, resolution, no_boundary_error=False, function_boundaries=None): + self._function = autowrap_function1d(function1d) + if len(space_area) != 2: + raise ValueError("Space area should be a tuple (xmin, xmax).") + self._xmin, self._xmax = space_area + if resolution <= 0: + raise ValueError("Resolution must be greater than zero.") + self._resolution = resolution + nsamples = int((self._xmax - self._xmin) / resolution + 1) + xsamples = np.linspace(self._xmin, self._xmax, nsamples) + fsamples = np.empty_like(xsamples) + sampled = np.full_like(fsamples, False, dtype='uint8') + self._nsamples = nsamples + self._xsamples = xsamples + self._fsamples = fsamples + self._sampled = sampled + self._no_boundary_error = no_boundary_error + # TODO: normalise using function_boundaries to avoid numerical issues. + + @cython.initializedcheck(False) @cython.boundscheck(False) @cython.wraparound(False) - cdef double evaluate(self, double px) except? -1e999: - """ - Evaluate the cached 1D function. - - The function is cached in the vicinity of px if not already done. - - :param double px: coordinate - :return: The evaluated value - """ - - cdef int i_x - - i_x = find_index(self.x_domain_view, px) - - if 1 <= i_x <= self.top_index_x - 2: - return self._evaluate(px, i_x) - - # value is outside of permitted limits - if self.no_boundary_error: - return self.function.evaluate(px) + cdef double _get_and_cache(self, int index) except? -1e999: + cdef double x, fsamp + + if not self._sampled[index]: + x = self._xsamples[index] + fsamp = self._function.evaluate(x) + self._sampled[index] = True + self._fsamples[index] = fsamp else: - min_range_x = self.x_domain_view[1] - max_range_x = self.x_domain_view[self.top_index_x - 1] - - raise ValueError("The specified value (x={}) is outside the range of the supplied data: " - "x bounds=({}, {})".format(px, min_range_x, max_range_x)) + fsamp = self._fsamples[index] + return fsamp - @cython.cdivision(True) + # TODO: investigate whether to cache the cubic spline coefficients instead + # of re-calculating them every time. Performance/memory trade-off. + # Raysect caches the coefficients. + @cython.initializedcheck(False) @cython.boundscheck(False) @cython.wraparound(False) - cdef double _evaluate(self, double px, int i_x): - """ - Calculate if not already done then evaluate the polynomial valid in the - area given by i_x at position px. - Calculation of the polynomial includes sampling the function where not - already done and interpolating. - - :param double px: coordinate - :param int i_x: index of the area of interest - :return: The evaluated value - """ - + @cython.cdivision(True) + cdef double evaluate(self, double x) except? -1e999: cdef: - int u, l, i, i_x_p - double value - double delta_x - npy_intp cv_size - npy_intp cm_size[2] - double[::1] cv_view, coeffs_view - double[:, ::1] cm_view - - # If the concerned polynomial has not yet been calculated: - i_x_p = i_x - 1 # polynomial index - if not self.calculated_view[i_x_p]: - - # sample the data needed - for u in range(i_x-1, i_x+3): - if isnan(self.data_view[u]): - value = self.function.evaluate(self.x_domain_view[u]) - if not isnan(value): - # data values are normalised here - self.data_view[u] = (value - self.data_min) * self.data_delta_inv - - # Create constraint matrix (un-optimised) - # cv_view = zeros((4,), dtype=float64) # constraints vector - # cm_view = zeros((4, 4), dtype=float64) # constraints matrix - - # Create constraint matrix (optimised using numpy c-api) - cv_size = 4 - cv_view = PyArray_ZEROS(1, &cv_size, NPY_FLOAT64, 0) - cm_size[:] = [4, 4] - cm_view = PyArray_ZEROS(2, cm_size, NPY_FLOAT64, 0) - - # Fill the constraints matrix - l = 0 - for u in range(i_x, i_x+2): - - # knot values - cm_view[l, 0] = 1. - cm_view[l, 1] = self.x_view[u] - cm_view[l, 2] = self.x2_view[u] - cm_view[l, 3] = self.x3_view[u] - cv_view[l] = self.data_view[u] - l += 1 - - # derivative - cm_view[l, 1] = 1. - cm_view[l, 2] = 2.*self.x_view[u] - cm_view[l, 3] = 3.*self.x2_view[u] - delta_x = self.x_view[u+1] - self.x_view[u-1] - cv_view[l] = (self.data_view[u+1] - self.data_view[u-1])/delta_x - l += 1 - - # Solve the linear system and fill the caching coefficients array - coeffs_view = solve(cm_view, cv_view) - self.coeffs_view[i_x_p, :] = coeffs_view - - # Denormalisation - for i in range(4): - coeffs_view[i] = self.data_delta * (self.x_delta_inv ** i / factorial(i) * self._evaluate_polynomial_derivative(i_x_p, -self.x_delta_inv * self.x_min, i)) - coeffs_view[0] = coeffs_view[0] + self.data_min - self.coeffs_view[i_x_p, :] = coeffs_view - - self.calculated_view[i_x_p] = True - - return self.coeffs_view[i_x_p, 0] + self.coeffs_view[i_x_p, 1] * px + self.coeffs_view[i_x_p, 2] * px * px + self.coeffs_view[i_x_p, 3] * px * px * px - - @cython.boundscheck(False) - @cython.wraparound(False) - cdef double _evaluate_polynomial_derivative(self, int i_x, double px, int der_x): - """ - Evaluate the derivatives of the polynomial valid in the area given by - 'i_x' at position 'px'. The order of derivative along each axis is - given by 'der_x'. - """ - - cdef double[::1] x_values - - x_values = derivatives_array(px, der_x) - - return x_values[0]*self.coeffs_view[i_x, 0] + x_values[1]*self.coeffs_view[i_x, 1] + x_values[2]*self.coeffs_view[i_x, 2] + x_values[3]*self.coeffs_view[i_x, 3] - + int lower_index + double fprev, fnext, feval, xnorm + double a[4] # Cubic coefficients + double f[2] # Function evaluations + double dfdx[2] # Function derivative evaluations + + if self._xmin <= x < self._xmax: + lower_index = find_index(self._xsamples, x) + f[0] = self._get_and_cache(lower_index) + f[1] = self._get_and_cache(lower_index + 1) + # Calculate derivatives. calc_coefficiets_1d requires derivatives + # normalised to the unit interval, i.e. for dx = 1. + if lower_index == 0: + dfdx[0] = (f[1] - f[0]) + else: + fprev = self._get_and_cache(lower_index - 1) + dfdx[0] = (f[1] - fprev) / 2 + if lower_index == self._nsamples - 2: + dfdx[1] = (f[1] - f[0]) + else: + fnext = self._get_and_cache(lower_index + 2) + dfdx[1] = (fnext - f[0]) / 2 + calc_coefficients_1d(f, dfdx, a) + xnorm = (x - self._xsamples[lower_index]) / self._resolution + feval = evaluate_cubic_1d(a, xnorm) + return feval + + # Special case if x is exactly at the maximum allowed value. + if x == self._xmax: + return self._get_and_cache(self._nsamples - 1) + + if self._no_boundary_error: + return self._function.evaluate(x) + + raise ValueError( + "x is outside the permitted range ({}, {})".format(self._xmin, self._xmax) + ) diff --git a/cherab/core/math/caching/tests/test_caching1d.py b/cherab/core/math/caching/tests/test_caching1d.py index af16e24e..15af54ed 100644 --- a/cherab/core/math/caching/tests/test_caching1d.py +++ b/cherab/core/math/caching/tests/test_caching1d.py @@ -36,4 +36,4 @@ def test_values(self): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() From 93ff2ac4dee7e4751e441fbfec5e458bd2399787 Mon Sep 17 00:00:00 2001 From: Jack Lovell Date: Fri, 4 Mar 2022 16:12:34 +0000 Subject: [PATCH 2/2] Small optimisations to 1D caching Exploit uniform spacing of samples to: * Avoid bisecting search for nearest index in sample array * Drop the array of sampled x points and calculate them analytically instead. This will reduce memory usage (useful for higher dimension caching objects). --- cherab/core/math/caching/caching1d.pxd | 2 +- cherab/core/math/caching/caching1d.pyx | 25 ++++++++++++------------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/cherab/core/math/caching/caching1d.pxd b/cherab/core/math/caching/caching1d.pxd index 7bf18fda..6f5bd446 100644 --- a/cherab/core/math/caching/caching1d.pxd +++ b/cherab/core/math/caching/caching1d.pxd @@ -27,7 +27,7 @@ cdef class Caching1D(Function1D): bint _no_boundary_error double _xmin, _xmax, _resolution unsigned char[::1] _sampled - double[::1] _xsamples, _fsamples + double[::1] _fsamples int _nsamples cdef double _get_and_cache(self, int index) except? -1e999 diff --git a/cherab/core/math/caching/caching1d.pyx b/cherab/core/math/caching/caching1d.pyx index 62501d44..81ce6ca3 100644 --- a/cherab/core/math/caching/caching1d.pyx +++ b/cherab/core/math/caching/caching1d.pyx @@ -21,7 +21,6 @@ import numpy as np cimport cython -from raysect.core.math.cython.utility cimport find_index from raysect.core.math.cython.interpolation.cubic cimport calc_coefficients_1d, evaluate_cubic_1d from cherab.core.math.function cimport autowrap_function1d @@ -73,7 +72,6 @@ cdef class Caching1D(Function1D): """ # The implementation works as follows: - # - From space_area and resolution, generate an x grid of sampled points. # - Store a list of which points have already been evaluated, and their values. # - For each new value x to evaluate, check if the sample points either side # have already been evaluated. @@ -84,6 +82,9 @@ cdef class Caching1D(Function1D): # - Use raysect.core.math.cython.interpolation.cubic's utilities to calculate # the polynomial coefficients for the 1D cubic and then evaluate the cubic # interpolation of the function at the position x. + # - Note that the sampled points have uniform spacing: this enables a few + # optimisations such as analytic calculations of the array index and x + # values for the nearest samples to the requested x. def __init__(self, function1d, space_area, resolution, no_boundary_error=False, function_boundaries=None): self._function = autowrap_function1d(function1d) @@ -93,14 +94,9 @@ cdef class Caching1D(Function1D): if resolution <= 0: raise ValueError("Resolution must be greater than zero.") self._resolution = resolution - nsamples = int((self._xmax - self._xmin) / resolution + 1) - xsamples = np.linspace(self._xmin, self._xmax, nsamples) - fsamples = np.empty_like(xsamples) - sampled = np.full_like(fsamples, False, dtype='uint8') - self._nsamples = nsamples - self._xsamples = xsamples - self._fsamples = fsamples - self._sampled = sampled + self._nsamples = int((self._xmax - self._xmin) / resolution + 1) + self._fsamples = np.empty(self._nsamples) + self._sampled = np.full(self._nsamples, False, dtype='uint8') self._no_boundary_error = no_boundary_error # TODO: normalise using function_boundaries to avoid numerical issues. @@ -111,7 +107,7 @@ cdef class Caching1D(Function1D): cdef double x, fsamp if not self._sampled[index]: - x = self._xsamples[index] + x = self._xmin + index * self._resolution fsamp = self._function.evaluate(x) self._sampled[index] = True self._fsamples[index] = fsamp @@ -135,7 +131,8 @@ cdef class Caching1D(Function1D): double dfdx[2] # Function derivative evaluations if self._xmin <= x < self._xmax: - lower_index = find_index(self._xsamples, x) + # Sampling is uniform, so lower_index is determined analytically. + lower_index = int((x - self._xmin) // self._resolution) f[0] = self._get_and_cache(lower_index) f[1] = self._get_and_cache(lower_index + 1) # Calculate derivatives. calc_coefficiets_1d requires derivatives @@ -151,7 +148,9 @@ cdef class Caching1D(Function1D): fnext = self._get_and_cache(lower_index + 2) dfdx[1] = (fnext - f[0]) / 2 calc_coefficients_1d(f, dfdx, a) - xnorm = (x - self._xsamples[lower_index]) / self._resolution + # lower_x = xmin + resolution * index + # xnorm = (x - lower_x) / res = (x - xmin) / resolution - index + xnorm = (x - self._xmin) / self._resolution - lower_index feval = evaluate_cubic_1d(a, xnorm) return feval