From d677d719b577b86a4d4aa84f1faa5e1518f6e7a3 Mon Sep 17 00:00:00 2001 From: Donnie Erb <55961724+derb12@users.noreply.github.com> Date: Mon, 12 Feb 2024 18:56:53 -0500 Subject: [PATCH] DOCS: Add docs and example for custom_bc Also updated the logo_plot helper script to use the Baseline class. --- docs/algorithms/optimizers.rst | 12 ++ docs/conf.py | 3 +- docs/index.rst | 1 + docs/introduction.rst | 1 + docs/parameter_selection.rst | 53 +++++++++ examples/optimizers/README.rst | 2 + .../optimizers/plot_custom_bc_1_whittaker.py | 104 ++++++++++++++++++ pybaselines/__init__.py | 1 + tools/logo_plot.py | 12 +- 9 files changed, 183 insertions(+), 6 deletions(-) create mode 100644 docs/parameter_selection.rst create mode 100644 examples/optimizers/README.rst create mode 100644 examples/optimizers/plot_custom_bc_1_whittaker.py diff --git a/docs/algorithms/optimizers.rst b/docs/algorithms/optimizers.rst index c67182c..c5b1eef 100644 --- a/docs/algorithms/optimizers.rst +++ b/docs/algorithms/optimizers.rst @@ -284,3 +284,15 @@ the name). poly_order = 1 baseline, params = baseline_fitter.adaptive_minmax(y, poly_order=poly_order, method='imodpoly') ax.plot(baseline, 'g--') + + +custom_bc (Customized Baseline Correction) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~.Baseline.custom_bc` allows fine tuning the stiffness of the +baseline within different regions of the fit data, which is helpful when +experimental data has drastically different baselines within it. This is done by +reducing the number of data points in regions where higher stiffness +is required. There is no figure showing the fits for various baseline types for +this method since it is more suited for hard-to-fit data; however, :ref:`an +example ` showcases its use. diff --git a/docs/conf.py b/docs/conf.py index 3803137..16a31f7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -158,7 +158,8 @@ '../examples/morphological', '../examples/spline', '../examples/classification', - '../examples/misc' + '../examples/misc', + '../examples/optimizers', ] sphinx_gallery_conf = { diff --git a/docs/index.rst b/docs/index.rst index 80c3737..c9d3ae0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -21,6 +21,7 @@ pybaselines is a library of algorithms for the baseline correction of experiment introduction installation quickstart + parameter_selection algorithms/index examples/index api/index diff --git a/docs/introduction.rst b/docs/introduction.rst index 0403a88..f1ea966 100644 --- a/docs/introduction.rst +++ b/docs/introduction.rst @@ -83,6 +83,7 @@ names of the algorithms were used. The algorithms are grouped accordingly: * collab_pls (Collaborative Penalized Least Squares) * optimize_extended_range * adaptive_minmax (Adaptive MinMax) + * custom_bc (Customized Baseline Correction) * Miscellaneous methods (:mod:`pybaselines.misc`) diff --git a/docs/parameter_selection.rst b/docs/parameter_selection.rst new file mode 100644 index 0000000..128c005 --- /dev/null +++ b/docs/parameter_selection.rst @@ -0,0 +1,53 @@ +=================== +Parameter Selection +=================== + +Most baseline algorithms in pybaselines have several parameters that can be adjusted. +While this allows for fine-tuning each algorithm to work in a wide array of cases, +it can also present a difficulty for new users. It is suggested to start by adjusting only +one or two main parameters, and then change other parameters as needed. **Due to the +variable nature of baselines, it is highly recommended to not assume the default +parameters will work for your data!** Below are the suggested parameters to begin +adjusting for each family of algorithms within pybaselines: + +* Polynomial methods + + * ``poly_order`` controls the curvature of the baseline. + +* Whittaker-smoothing-based methods + + * ``lam`` controls the curvature of the baseline. See + :ref:`this example ` + to get an idea of how ``lam`` effects the baseline. The optimal ``lam`` + value for each algorithm is not typically the same. + +* Morphological methods + + * ``half_window`` controls the general fit of the baseline. See + :ref:`this example ` + to get an idea of how ``half_window`` effects the baseline. The optimal + ``half_window`` value for each algorithm is not typically the same. + +* Spline methods + + * ``lam`` controls the curvature of the baseline. The + :ref:`Whittaker example ` + also generally applies to spline methods. + +* Smoothing-based methods + + * ``half_window`` controls the general fit of the baseline. The + :ref:`Morphological example ` + also generally applies to smoothing methods. + +* Baseline/Peak Classification methods + + * Algorithm dependent + +* Optimizers + + * Algorithm dependent + +* Miscellaneous methods + + * Algorithm dependent diff --git a/examples/optimizers/README.rst b/examples/optimizers/README.rst new file mode 100644 index 0000000..1ca4a8f --- /dev/null +++ b/examples/optimizers/README.rst @@ -0,0 +1,2 @@ +Optimizer Baseline Examples +--------------------------- diff --git a/examples/optimizers/plot_custom_bc_1_whittaker.py b/examples/optimizers/plot_custom_bc_1_whittaker.py new file mode 100644 index 0000000..4630be7 --- /dev/null +++ b/examples/optimizers/plot_custom_bc_1_whittaker.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +""" +Customized Baseline Correction +------------------------------ + +This example looks at the ingenious basline correction method created +by Liland et al., :meth:`~.Baseline.custom_bc`. + +The :meth:`.custom_bc` method works exceedingly well for morphological +and smoothing baselines, since those methods typically depend directly +on the number of data points, and for Whittaker-smoothing-based methods, +since the `lam` value is :ref:`heavily dependant on the number of data +points `. + +This example will examine the use of the optimizer method +:meth:`~.Baseline.custom_bc` paired with the Whittaker-smoothing-based +method :meth:`~.Baseline.arpls` + +""" +# sphinx_gallery_thumbnail_number = 1 + +import matplotlib.pyplot as plt +import numpy as np + +from pybaselines import Baseline +from pybaselines.utils import gaussian + + +x = np.linspace(20, 1000, 1000) +signal = ( + + gaussian(x, 6, 240, 5) + + gaussian(x, 8, 350, 11) + + gaussian(x, 15, 400, 18) + + gaussian(x, 6, 550, 6) + + gaussian(x, 13, 700, 8) + + gaussian(x, 9, 800, 9) + + gaussian(x, 9, 880, 7) +) +baseline = 5 + 6 * np.exp(-(x - 40) / 30) + gaussian(x, 5, 1000, 300) +noise = np.random.default_rng(0).normal(0, 0.1, len(x)) +y = signal + baseline + noise + +baseline_fitter = Baseline(x_data=x) +# %% +# For certain types of data, there can often be a sharp change in the +# baseline withinin a small region, such as in Raman spectroscopy +# near a wavenumber of 0 or in XRD at low two-theta. This presents a +# significant challenge to baseline algorithms that fit a single "global" +# baseline such as Whittaker-smoothing-based methods. The majority of +# the data can be fit using a "stiff" baseline, but the anomolous region +# requires a more flexible baseline. +# +# Plotting each of these two cases separately, it is apparent each fits +# its target region well, but combining the two into a single baseline +# is difficult. +lam_flexible = 1e2 +lam_stiff = 5e5 + +flexible_baseline = baseline_fitter.arpls(y, lam=lam_flexible)[0] +stiff_baseline = baseline_fitter.arpls(y, lam=lam_stiff)[0] + +plt.figure() +plt.plot(x, y) +plt.plot(x, flexible_baseline, label='Flexible baseline') +plt.plot(x, stiff_baseline, label='Stiff baseline') +plt.legend() + +# %% +# The beauty of Liland's customized baseline correction method is that +# it allows fitting baselines that are stiff in some regions and flexible +# in others by simply truncating the data to increase the stiffness. The input +# ``lam`` value within `method_kwargs` should correspond to the most flexible region, and the +# truncation should begin close to where the stiff and flexible baselines +# overlap, which is at approximately x=160 from the above figure. A small `lam` value +# of 1e1 is used to then smooth the calculated baseline using Whittaker +# smoothing so that the two regions connect without any significant discontinuity. + +crossover_index = np.argmin(abs(x - 160)) +fit_baseline, params = baseline_fitter.custom_bc( + y, 'arpls', + regions=([crossover_index, None],), + sampling=15, + method_kwargs={'lam': lam_flexible}, + lam=1e1 +) + +plt.figure() +plt.plot(x, y) +plt.plot(x, fit_baseline, label='Fit baseline') +plt.plot(x, baseline, '--', label='True baseline') +plt.legend() + +# %% +# Looking at the results, this method is able to accurately recreate the +# true data even though the two baselines have significantly different +# requirements for stiffness. + +plt.figure() +plt.plot(x, y - baseline, label='True data') +plt.plot(x, y - fit_baseline, label='Baseline corrected') +plt.legend() + + +plt.show() diff --git a/pybaselines/__init__.py b/pybaselines/__init__.py index 0e4de78..854aed3 100644 --- a/pybaselines/__init__.py +++ b/pybaselines/__init__.py @@ -78,6 +78,7 @@ * collab_pls (Collaborative Penalized Least Squares) * optimize_extended_range * adaptive_minmax (Adaptive MinMax) + * custom_bc (Customized Baseline Correction) * Miscellaneous methods (:mod:`pybaselines.misc`) diff --git a/tools/logo_plot.py b/tools/logo_plot.py index 01f59a1..672ca0e 100644 --- a/tools/logo_plot.py +++ b/tools/logo_plot.py @@ -21,8 +21,7 @@ raise import numpy as np - import pybaselines - from pybaselines import utils + from pybaselines import Baseline, utils # assumes file is in pybaselines/tools image_directory = Path(__file__).parent @@ -46,10 +45,12 @@ noise = np.random.default_rng(1).normal(0, 0.05, x.size) y = signal + true_baseline + noise - baseline = pybaselines.whittaker.arpls(y, lam=1e7)[0] + baseline = Baseline().arpls(y, lam=1e7)[0] - ax.plot(x, y, lw=1.5) - ax.plot(x, baseline) + blue = '#0952ff' + pink = '#ff5255' + ax.plot(x, y, lw=1.5, color=blue) + ax.plot(x, baseline, lw=4, color=pink) ax.set_yticks([]) ax.set_xticks([]) @@ -57,6 +58,7 @@ ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['right'].set_visible(False) + # save as an svg so that it can be edited/scaled in inkskape without # losing image quality fig.savefig(image_directory.joinpath('logo_new.svg'), transparent=True)