OTHER: Allow inputting assymetric_coef for aspls and pspline_aspls

Also added tests for the aspls weighting.
derb12 · Dec 19, 2024 · fcf4cfd · fcf4cfd
1 parent 23c6c5b
commit fcf4cfd
Show file tree

Hide file tree

Showing 8 changed files with 267 additions and 24 deletions.
diff --git a/pybaselines/_weighting.py b/pybaselines/_weighting.py
@@ -301,7 +301,7 @@ def _iarpls(y, baseline, iteration):
     return weights, exit_early
 
 
-def _aspls(y, baseline):
+def _aspls(y, baseline, assymetric_coef):
     """
     Weighting for the adaptive smoothness penalized least squares smoothing (aspls).
 
@@ -311,17 +311,23 @@ def _aspls(y, baseline):
         The measured data.
     baseline : numpy.ndarray, shape (N,)
         The calculated baseline.
+    assymetric_coef : float
+        The assymetric coefficient for the weighting. Higher values leads to a steeper
+        weighting curve (ie. more step-like).
 
     Returns
     -------
     weights : numpy.ndarray, shape (N,)
         The calculated weights.
     residual : numpy.ndarray, shape (N,)
         The residual, ``y - baseline``.
+    exit_early : bool
+        Designates if there is a potential error with the calculation such that no further
+        iterations should be performed.
 
     Notes
     -----
-    The weighting uses an asymmetric coefficient (`k` in the asPLS paper) of 0.5 instead
+    The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead
     of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it
     matches the results in Table 2 and Figure 5 of the asPLS paper closer than the
     factor of 2 and fits noisy data much better.
@@ -333,10 +339,22 @@ def _aspls(y, baseline):
 
     """
     residual = y - baseline
-    std = _safe_std(residual[residual < 0], ddof=1)  # use dof=1 since sampling a subset
+    neg_residual = residual[residual < 0]
+    if neg_residual.size < 2:
+        exit_early = True
+        warnings.warn(
+            ('almost all baseline points are below the data, indicating that "tol"'
+             ' is too low and/or "max_iter" is too high'), ParameterWarning,
+             stacklevel=2
+        )
+        return np.zeros_like(y), residual, exit_early
+    else:
+        exit_early = False
+    std = _safe_std(neg_residual, ddof=1)  # use dof=1 since sampling subset
+
     # add a negative sign since expit performs 1/(1+exp(-input))
-    weights = expit(-(0.5 / std) * (residual - std))
-    return weights, residual
+    weights = expit(-(assymetric_coef / std) * (residual - std))
+    return weights, residual, exit_early
 
 
 def _psalsa(y, baseline, p, k, shape_y):

diff --git a/pybaselines/spline.py b/pybaselines/spline.py
@@ -16,7 +16,7 @@
 from ._banded_utils import _add_diagonals, _shift_rows, diff_penalty_diagonals
 from ._compat import dia_object, jit, trapezoid
 from ._spline_utils import _basis_midpoints
-from ._validation import _check_lam, _check_optional_array
+from ._validation import _check_lam, _check_optional_array, _check_scalar_variable
 from .utils import (
     ParameterWarning, _mollifier_kernel, _sort_array, gaussian, pad_edges, padded_convolve,
     relative_difference
@@ -907,7 +907,7 @@ def pspline_iarpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_ord
 
     @_Algorithm._register(sort_keys=('weights', 'alpha'), dtype=float, order='C')
     def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_order=2,
-                      max_iter=100, tol=1e-3, weights=None, alpha=None):
+                      max_iter=100, tol=1e-3, weights=None, alpha=None, assymetric_coef=0.5):
         """
         A penalized spline version of the asPLS algorithm.
 
@@ -937,6 +937,9 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde
             An array of values that control the local value of `lam` to better
             fit peak and non-peak regions. If None (default), then the initial values
             will be an array with size equal to N and all values set to 1.
+        assymetric_coef : float
+            The assymetric coefficient for the weighting. Higher values leads to a steeper
+            weighting curve (ie. more step-like). Default is 0.5.
 
         Returns
         -------
@@ -955,13 +958,19 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
 
+        Raises
+        ------
+        ValueError
+            Raised if `alpha` and `data` do not have the same shape. Also raised if
+            `assymetric_coef` is not greater than 0.
+
         See Also
         --------
         Baseline.aspls
 
         Notes
         -----
-        The weighting uses an asymmetric coefficient (`k` in the asPLS paper) of 0.5 instead
+        The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead
         of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it
         matches the results in Table 2 and Figure 5 of the asPLS paper closer than the
         factor of 2 and fits noisy data much better.
@@ -985,6 +994,7 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde
         )
         if self._sort_order is not None and alpha is not None:
             alpha_array = alpha_array[self._sort_order]
+        assymetric_coef = _check_scalar_variable(assymetric_coef, variable_name='assymetric_coef')
 
         interp_pts = _basis_midpoints(self.pspline.knots, self.pspline.spline_degree)
         tol_history = np.empty(max_iter + 1)
@@ -995,7 +1005,10 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde
                 self.pspline.num_bands, self.pspline.num_bands
             )
             baseline = self.pspline.solve_pspline(y, weight_array, alpha_penalty)
-            new_weights, residual = _weighting._aspls(y, baseline)
+            new_weights, residual, exit_early = _weighting._aspls(y, baseline, assymetric_coef)
+            if exit_early:
+                i -= 1  # reduce i so that output tol_history indexing is correct
+                break
             calc_difference = relative_difference(weight_array, new_weights)
             tol_history[i] = calc_difference
             if calc_difference < tol:
@@ -2091,7 +2104,8 @@ def pspline_iarpls(data, lam=1e3, num_knots=100, spline_degree=3, diff_order=2,
 
 @_spline_wrapper
 def pspline_aspls(data, lam=1e4, num_knots=100, spline_degree=3, diff_order=2,
-                  max_iter=100, tol=1e-3, weights=None, alpha=None, x_data=None):
+                  max_iter=100, tol=1e-3, weights=None, alpha=None, x_data=None,
+                  assymetric_coef=0.5):
     """
     A penalized spline version of the asPLS algorithm.
 
@@ -2124,6 +2138,9 @@ def pspline_aspls(data, lam=1e4, num_knots=100, spline_degree=3, diff_order=2,
     x_data : array-like, shape (N,), optional
         The x-values of the measured data. Default is None, which will create an
         array from -1 to 1 with N points.
+    assymetric_coef : float
+        The assymetric coefficient for the weighting. Higher values leads to a steeper
+        weighting curve (ie. more step-like). Default is 0.5.
 
     Returns
     -------
@@ -2142,13 +2159,19 @@ def pspline_aspls(data, lam=1e4, num_knots=100, spline_degree=3, diff_order=2,
             completed. If the last value in the array is greater than the input
             `tol` value, then the function did not converge.
 
+    Raises
+    ------
+    ValueError
+        Raised if `alpha` and `data` do not have the same shape. Also raised if
+        `assymetric_coef` is not greater than 0.
+
     See Also
     --------
     pybaselines.whittaker.aspls
 
     Notes
     -----
-    The weighting uses an asymmetric coefficient (`k` in the asPLS paper) of 0.5 instead
+    The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead
     of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it
     matches the results in Table 2 and Figure 5 of the asPLS paper closer than the
     factor of 2 and fits noisy data much better.

diff --git a/pybaselines/two_d/whittaker.py b/pybaselines/two_d/whittaker.py
@@ -10,7 +10,7 @@
 
 from .. import _weighting
 from .._compat import diags
-from .._validation import _check_optional_array
+from .._validation import _check_optional_array, _check_scalar_variable
 from ..utils import _MIN_FLOAT, relative_difference
 from ._algorithm_setup import _Algorithm2D
 from ._whittaker_utils import PenalizedSystem2D
@@ -585,7 +585,7 @@ def iarpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non
         sort_keys=('weights', 'alpha'), reshape_keys=('weights', 'alpha'), reshape_baseline=True
     )
     def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
-              weights=None, alpha=None):
+              weights=None, alpha=None, assymetric_coef=0.5):
         """
         Adaptive smoothness penalized least squares smoothing (asPLS).
 
@@ -612,6 +612,9 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
             An array of values that control the local value of `lam` to better
             fit peak and non-peak regions. If None (default), then the initial values
             will be an array with shape equal to (M, N) and all values set to 1.
+        assymetric_coef : float
+            The assymetric coefficient for the weighting. Higher values leads to a steeper
+            weighting curve (ie. more step-like). Default is 0.5.
 
         Returns
         -------
@@ -630,9 +633,15 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
 
+        Raises
+        ------
+        ValueError
+            Raised if `alpha` and `data` do not have the same shape. Also raised if
+            `assymetric_coef` is not greater than 0.
+
         Notes
         -----
-        The weighting uses an asymmetric coefficient (`k` in the asPLS paper) of 0.5 instead
+        The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead
         of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it
         matches the results in Table 2 and Figure 5 of the asPLS paper closer than the
         factor of 2 and fits noisy data much better.
@@ -651,6 +660,7 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
         )
         if self._sort_order is not None and alpha is not None:
             alpha_array = alpha_array[self._sort_order]
+        assymetric_coef = _check_scalar_variable(assymetric_coef, variable_name='assymetric_coef')
 
         # use a sparse matrix to maintain sparsity after multiplication; implementation note:
         # could skip making an alpha matrix and just use alpha_array[:, None] * penalty once
@@ -660,7 +670,10 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
         for i in range(max_iter + 1):
             penalty = alpha_matrix @ self.whittaker_system.penalty
             baseline = self.whittaker_system.solve(y, weight_array, penalty=penalty)
-            new_weights, residual = _weighting._aspls(y, baseline)
+            new_weights, residual, exit_early = _weighting._aspls(y, baseline, assymetric_coef)
+            if exit_early:
+                i -= 1  # reduce i so that output tol_history indexing is correct
+                break
             calc_difference = relative_difference(weight_array, new_weights)
             tol_history[i] = calc_difference
             if calc_difference < tol:

diff --git a/pybaselines/whittaker.py b/pybaselines/whittaker.py
@@ -11,7 +11,7 @@
 from . import _weighting
 from ._algorithm_setup import _Algorithm, _class_wrapper
 from ._banded_utils import _shift_rows, diff_penalty_diagonals
-from ._validation import _check_lam, _check_optional_array
+from ._validation import _check_lam, _check_optional_array, _check_scalar_variable
 from .utils import _mollifier_kernel, pad_edges, padded_convolve, relative_difference
 
 
@@ -494,7 +494,7 @@ def iarpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non
 
     @_Algorithm._register(sort_keys=('weights', 'alpha'))
     def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
-              weights=None, alpha=None):
+              weights=None, alpha=None, assymetric_coef=0.5):
         """
         Adaptive smoothness penalized least squares smoothing (asPLS).
 
@@ -520,6 +520,9 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
             An array of values that control the local value of `lam` to better
             fit peak and non-peak regions. If None (default), then the initial values
             will be an array with size equal to N and all values set to 1.
+        assymetric_coef : float
+            The assymetric coefficient for the weighting. Higher values leads to a steeper
+            weighting curve (ie. more step-like). Default is 0.5.
 
         Returns
         -------
@@ -538,9 +541,15 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
 
+        Raises
+        ------
+        ValueError
+            Raised if `alpha` and `data` do not have the same shape. Also raised if
+            `assymetric_coef` is not greater than 0.
+
         Notes
         -----
-        The weighting uses an asymmetric coefficient (`k` in the asPLS paper) of 0.5 instead
+        The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead
         of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it
         matches the results in Table 2 and Figure 5 of the asPLS paper closer than the
         factor of 2 and fits noisy data much better.
@@ -560,6 +569,7 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
         )
         if self._sort_order is not None and alpha is not None:
             alpha_array = alpha_array[self._sort_order]
+        assymetric_coef = _check_scalar_variable(assymetric_coef, variable_name='assymetric_coef')
 
         main_diag_idx = self.whittaker_system.main_diagonal_index
         lower_upper_bands = (diff_order, diff_order)
@@ -573,7 +583,10 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
                 lhs, weight_array * y, overwrite_ab=True, overwrite_b=True,
                 l_and_u=lower_upper_bands
             )
-            new_weights, residual = _weighting._aspls(y, baseline)
+            new_weights, residual, exit_early = _weighting._aspls(y, baseline, assymetric_coef)
+            if exit_early:
+                i -= 1  # reduce i so that output tol_history indexing is correct
+                break
             calc_difference = relative_difference(weight_array, new_weights)
             tol_history[i] = calc_difference
             if calc_difference < tol:
@@ -1138,7 +1151,7 @@ def iarpls(data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=None, x_d
 
 @_whittaker_wrapper
 def aspls(data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, weights=None,
-          alpha=None, x_data=None):
+          alpha=None, x_data=None, assymetric_coef=0.5):
     """
     Adaptive smoothness penalized least squares smoothing (asPLS).
 
@@ -1167,6 +1180,9 @@ def aspls(data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, weights=None,
     x_data : array-like, optional
         The x-values. Not used by this function, but input is allowed for consistency
         with other functions.
+    assymetric_coef : float
+        The assymetric coefficient for the weighting. Higher values leads to a steeper
+        weighting curve (ie. more step-like). Default is 0.5.
 
     Returns
     -------
@@ -1188,11 +1204,12 @@ def aspls(data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, weights=None,
     Raises
     ------
     ValueError
-        Raised if `alpha` and `data` do not have the same shape.
+        Raised if `alpha` and `data` do not have the same shape. Also raised if `assymetric_coef`
+        is not greater than 0.
 
     Notes
     -----
-    The weighting uses an asymmetric coefficient (`k` in the asPLS paper) of 0.5 instead
+    The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead
     of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it
     matches the results in Table 2 and Figure 5 of the asPLS paper closer than the
     factor of 2 and fits noisy data much better.

diff --git a/tests/test_spline.py b/tests/test_spline.py
@@ -437,6 +437,12 @@ def test_whittaker_comparison(self, lam, diff_order):
             self, whittaker.aspls, self.y, lam=lam, diff_order=diff_order, test_rtol=rtol
         )
 
+    @pytest.mark.parametrize('assymetric_coef', (0, -1))
+    def test_outside_assymetric_coef_fails(self, assymetric_coef):
+        """Ensures assymetric_coef values not greater than 0 raise an exception."""
+        with pytest.raises(ValueError):
+            self.class_func(self.y, assymetric_coef=assymetric_coef)
+
 
 class TestPsplinePsalsa(IterativeSplineTester):
     """Class for testing pspline_psalsa baseline."""