Skip to content

Commit

Permalink
OTHER: Allow inputting assymetric_coef for aspls and pspline_aspls
Browse files Browse the repository at this point in the history
Also added tests for the aspls weighting.
  • Loading branch information
derb12 committed Dec 19, 2024
1 parent 23c6c5b commit fcf4cfd
Show file tree
Hide file tree
Showing 8 changed files with 267 additions and 24 deletions.
28 changes: 23 additions & 5 deletions pybaselines/_weighting.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def _iarpls(y, baseline, iteration):
return weights, exit_early


def _aspls(y, baseline):
def _aspls(y, baseline, assymetric_coef):
"""
Weighting for the adaptive smoothness penalized least squares smoothing (aspls).
Expand All @@ -311,17 +311,23 @@ def _aspls(y, baseline):
The measured data.
baseline : numpy.ndarray, shape (N,)
The calculated baseline.
assymetric_coef : float
The assymetric coefficient for the weighting. Higher values leads to a steeper
weighting curve (ie. more step-like).
Returns
-------
weights : numpy.ndarray, shape (N,)
The calculated weights.
residual : numpy.ndarray, shape (N,)
The residual, ``y - baseline``.
exit_early : bool
Designates if there is a potential error with the calculation such that no further
iterations should be performed.
Notes
-----
The weighting uses an asymmetric coefficient (`k` in the asPLS paper) of 0.5 instead
The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead
of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it
matches the results in Table 2 and Figure 5 of the asPLS paper closer than the
factor of 2 and fits noisy data much better.
Expand All @@ -333,10 +339,22 @@ def _aspls(y, baseline):
"""
residual = y - baseline
std = _safe_std(residual[residual < 0], ddof=1) # use dof=1 since sampling a subset
neg_residual = residual[residual < 0]
if neg_residual.size < 2:
exit_early = True
warnings.warn(
('almost all baseline points are below the data, indicating that "tol"'
' is too low and/or "max_iter" is too high'), ParameterWarning,
stacklevel=2
)
return np.zeros_like(y), residual, exit_early
else:
exit_early = False
std = _safe_std(neg_residual, ddof=1) # use dof=1 since sampling subset

# add a negative sign since expit performs 1/(1+exp(-input))
weights = expit(-(0.5 / std) * (residual - std))
return weights, residual
weights = expit(-(assymetric_coef / std) * (residual - std))
return weights, residual, exit_early


def _psalsa(y, baseline, p, k, shape_y):
Expand Down
35 changes: 29 additions & 6 deletions pybaselines/spline.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from ._banded_utils import _add_diagonals, _shift_rows, diff_penalty_diagonals
from ._compat import dia_object, jit, trapezoid
from ._spline_utils import _basis_midpoints
from ._validation import _check_lam, _check_optional_array
from ._validation import _check_lam, _check_optional_array, _check_scalar_variable
from .utils import (
ParameterWarning, _mollifier_kernel, _sort_array, gaussian, pad_edges, padded_convolve,
relative_difference
Expand Down Expand Up @@ -907,7 +907,7 @@ def pspline_iarpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_ord

@_Algorithm._register(sort_keys=('weights', 'alpha'), dtype=float, order='C')
def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_order=2,
max_iter=100, tol=1e-3, weights=None, alpha=None):
max_iter=100, tol=1e-3, weights=None, alpha=None, assymetric_coef=0.5):
"""
A penalized spline version of the asPLS algorithm.
Expand Down Expand Up @@ -937,6 +937,9 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde
An array of values that control the local value of `lam` to better
fit peak and non-peak regions. If None (default), then the initial values
will be an array with size equal to N and all values set to 1.
assymetric_coef : float
The assymetric coefficient for the weighting. Higher values leads to a steeper
weighting curve (ie. more step-like). Default is 0.5.
Returns
-------
Expand All @@ -955,13 +958,19 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde
completed. If the last value in the array is greater than the input
`tol` value, then the function did not converge.
Raises
------
ValueError
Raised if `alpha` and `data` do not have the same shape. Also raised if
`assymetric_coef` is not greater than 0.
See Also
--------
Baseline.aspls
Notes
-----
The weighting uses an asymmetric coefficient (`k` in the asPLS paper) of 0.5 instead
The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead
of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it
matches the results in Table 2 and Figure 5 of the asPLS paper closer than the
factor of 2 and fits noisy data much better.
Expand All @@ -985,6 +994,7 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde
)
if self._sort_order is not None and alpha is not None:
alpha_array = alpha_array[self._sort_order]
assymetric_coef = _check_scalar_variable(assymetric_coef, variable_name='assymetric_coef')

interp_pts = _basis_midpoints(self.pspline.knots, self.pspline.spline_degree)
tol_history = np.empty(max_iter + 1)
Expand All @@ -995,7 +1005,10 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde
self.pspline.num_bands, self.pspline.num_bands
)
baseline = self.pspline.solve_pspline(y, weight_array, alpha_penalty)
new_weights, residual = _weighting._aspls(y, baseline)
new_weights, residual, exit_early = _weighting._aspls(y, baseline, assymetric_coef)
if exit_early:
i -= 1 # reduce i so that output tol_history indexing is correct
break
calc_difference = relative_difference(weight_array, new_weights)
tol_history[i] = calc_difference
if calc_difference < tol:
Expand Down Expand Up @@ -2091,7 +2104,8 @@ def pspline_iarpls(data, lam=1e3, num_knots=100, spline_degree=3, diff_order=2,

@_spline_wrapper
def pspline_aspls(data, lam=1e4, num_knots=100, spline_degree=3, diff_order=2,
max_iter=100, tol=1e-3, weights=None, alpha=None, x_data=None):
max_iter=100, tol=1e-3, weights=None, alpha=None, x_data=None,
assymetric_coef=0.5):
"""
A penalized spline version of the asPLS algorithm.
Expand Down Expand Up @@ -2124,6 +2138,9 @@ def pspline_aspls(data, lam=1e4, num_knots=100, spline_degree=3, diff_order=2,
x_data : array-like, shape (N,), optional
The x-values of the measured data. Default is None, which will create an
array from -1 to 1 with N points.
assymetric_coef : float
The assymetric coefficient for the weighting. Higher values leads to a steeper
weighting curve (ie. more step-like). Default is 0.5.
Returns
-------
Expand All @@ -2142,13 +2159,19 @@ def pspline_aspls(data, lam=1e4, num_knots=100, spline_degree=3, diff_order=2,
completed. If the last value in the array is greater than the input
`tol` value, then the function did not converge.
Raises
------
ValueError
Raised if `alpha` and `data` do not have the same shape. Also raised if
`assymetric_coef` is not greater than 0.
See Also
--------
pybaselines.whittaker.aspls
Notes
-----
The weighting uses an asymmetric coefficient (`k` in the asPLS paper) of 0.5 instead
The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead
of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it
matches the results in Table 2 and Figure 5 of the asPLS paper closer than the
factor of 2 and fits noisy data much better.
Expand Down
21 changes: 17 additions & 4 deletions pybaselines/two_d/whittaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from .. import _weighting
from .._compat import diags
from .._validation import _check_optional_array
from .._validation import _check_optional_array, _check_scalar_variable
from ..utils import _MIN_FLOAT, relative_difference
from ._algorithm_setup import _Algorithm2D
from ._whittaker_utils import PenalizedSystem2D
Expand Down Expand Up @@ -585,7 +585,7 @@ def iarpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non
sort_keys=('weights', 'alpha'), reshape_keys=('weights', 'alpha'), reshape_baseline=True
)
def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
weights=None, alpha=None):
weights=None, alpha=None, assymetric_coef=0.5):
"""
Adaptive smoothness penalized least squares smoothing (asPLS).
Expand All @@ -612,6 +612,9 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
An array of values that control the local value of `lam` to better
fit peak and non-peak regions. If None (default), then the initial values
will be an array with shape equal to (M, N) and all values set to 1.
assymetric_coef : float
The assymetric coefficient for the weighting. Higher values leads to a steeper
weighting curve (ie. more step-like). Default is 0.5.
Returns
-------
Expand All @@ -630,9 +633,15 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
completed. If the last value in the array is greater than the input
`tol` value, then the function did not converge.
Raises
------
ValueError
Raised if `alpha` and `data` do not have the same shape. Also raised if
`assymetric_coef` is not greater than 0.
Notes
-----
The weighting uses an asymmetric coefficient (`k` in the asPLS paper) of 0.5 instead
The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead
of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it
matches the results in Table 2 and Figure 5 of the asPLS paper closer than the
factor of 2 and fits noisy data much better.
Expand All @@ -651,6 +660,7 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
)
if self._sort_order is not None and alpha is not None:
alpha_array = alpha_array[self._sort_order]
assymetric_coef = _check_scalar_variable(assymetric_coef, variable_name='assymetric_coef')

# use a sparse matrix to maintain sparsity after multiplication; implementation note:
# could skip making an alpha matrix and just use alpha_array[:, None] * penalty once
Expand All @@ -660,7 +670,10 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
for i in range(max_iter + 1):
penalty = alpha_matrix @ self.whittaker_system.penalty
baseline = self.whittaker_system.solve(y, weight_array, penalty=penalty)
new_weights, residual = _weighting._aspls(y, baseline)
new_weights, residual, exit_early = _weighting._aspls(y, baseline, assymetric_coef)
if exit_early:
i -= 1 # reduce i so that output tol_history indexing is correct
break
calc_difference = relative_difference(weight_array, new_weights)
tol_history[i] = calc_difference
if calc_difference < tol:
Expand Down
31 changes: 24 additions & 7 deletions pybaselines/whittaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from . import _weighting
from ._algorithm_setup import _Algorithm, _class_wrapper
from ._banded_utils import _shift_rows, diff_penalty_diagonals
from ._validation import _check_lam, _check_optional_array
from ._validation import _check_lam, _check_optional_array, _check_scalar_variable
from .utils import _mollifier_kernel, pad_edges, padded_convolve, relative_difference


Expand Down Expand Up @@ -494,7 +494,7 @@ def iarpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non

@_Algorithm._register(sort_keys=('weights', 'alpha'))
def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
weights=None, alpha=None):
weights=None, alpha=None, assymetric_coef=0.5):
"""
Adaptive smoothness penalized least squares smoothing (asPLS).
Expand All @@ -520,6 +520,9 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
An array of values that control the local value of `lam` to better
fit peak and non-peak regions. If None (default), then the initial values
will be an array with size equal to N and all values set to 1.
assymetric_coef : float
The assymetric coefficient for the weighting. Higher values leads to a steeper
weighting curve (ie. more step-like). Default is 0.5.
Returns
-------
Expand All @@ -538,9 +541,15 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
completed. If the last value in the array is greater than the input
`tol` value, then the function did not converge.
Raises
------
ValueError
Raised if `alpha` and `data` do not have the same shape. Also raised if
`assymetric_coef` is not greater than 0.
Notes
-----
The weighting uses an asymmetric coefficient (`k` in the asPLS paper) of 0.5 instead
The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead
of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it
matches the results in Table 2 and Figure 5 of the asPLS paper closer than the
factor of 2 and fits noisy data much better.
Expand All @@ -560,6 +569,7 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
)
if self._sort_order is not None and alpha is not None:
alpha_array = alpha_array[self._sort_order]
assymetric_coef = _check_scalar_variable(assymetric_coef, variable_name='assymetric_coef')

main_diag_idx = self.whittaker_system.main_diagonal_index
lower_upper_bands = (diff_order, diff_order)
Expand All @@ -573,7 +583,10 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
lhs, weight_array * y, overwrite_ab=True, overwrite_b=True,
l_and_u=lower_upper_bands
)
new_weights, residual = _weighting._aspls(y, baseline)
new_weights, residual, exit_early = _weighting._aspls(y, baseline, assymetric_coef)
if exit_early:
i -= 1 # reduce i so that output tol_history indexing is correct
break
calc_difference = relative_difference(weight_array, new_weights)
tol_history[i] = calc_difference
if calc_difference < tol:
Expand Down Expand Up @@ -1138,7 +1151,7 @@ def iarpls(data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=None, x_d

@_whittaker_wrapper
def aspls(data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, weights=None,
alpha=None, x_data=None):
alpha=None, x_data=None, assymetric_coef=0.5):
"""
Adaptive smoothness penalized least squares smoothing (asPLS).
Expand Down Expand Up @@ -1167,6 +1180,9 @@ def aspls(data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, weights=None,
x_data : array-like, optional
The x-values. Not used by this function, but input is allowed for consistency
with other functions.
assymetric_coef : float
The assymetric coefficient for the weighting. Higher values leads to a steeper
weighting curve (ie. more step-like). Default is 0.5.
Returns
-------
Expand All @@ -1188,11 +1204,12 @@ def aspls(data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, weights=None,
Raises
------
ValueError
Raised if `alpha` and `data` do not have the same shape.
Raised if `alpha` and `data` do not have the same shape. Also raised if `assymetric_coef`
is not greater than 0.
Notes
-----
The weighting uses an asymmetric coefficient (`k` in the asPLS paper) of 0.5 instead
The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead
of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it
matches the results in Table 2 and Figure 5 of the asPLS paper closer than the
factor of 2 and fits noisy data much better.
Expand Down
6 changes: 6 additions & 0 deletions tests/test_spline.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,12 @@ def test_whittaker_comparison(self, lam, diff_order):
self, whittaker.aspls, self.y, lam=lam, diff_order=diff_order, test_rtol=rtol
)

@pytest.mark.parametrize('assymetric_coef', (0, -1))
def test_outside_assymetric_coef_fails(self, assymetric_coef):
"""Ensures assymetric_coef values not greater than 0 raise an exception."""
with pytest.raises(ValueError):
self.class_func(self.y, assymetric_coef=assymetric_coef)


class TestPsplinePsalsa(IterativeSplineTester):
"""Class for testing pspline_psalsa baseline."""
Expand Down
Loading

0 comments on commit fcf4cfd

Please sign in to comment.