-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbaselines.py
124 lines (112 loc) · 4.95 KB
/
baselines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import numpy as np
from scipy import sparse
from scipy.sparse.linalg import spsolve
import pyqtgraph as pg
def savitzky_golay(y, window_size, order, deriv=0, rate=1):
"""From SciPy cookbook
https://scipy-cookbook.readthedocs.io/items/SavitzkyGolay.html
Not particularly useful for undersampled data!!
Smooth (and optionally differentiate) data with a Savitzky-Golay filter.
The Savitzky-Golay filter removes high frequency noise from data.
It has the advantage of preserving the original shape and
features of the signal better than other types of filtering
approaches, such as moving averages techniques.
Parameters
----------
y : array_like, shape (N,)
the values of the time history of the signal.
window_size : int
the length of the window. Must be an odd integer number.
order : int
the order of the polynomial used in the filtering.
Must be less then `window_size` - 1.
deriv: int
the order of the derivative to compute (default = 0 means only smoothing)
Returns
-------
ys : ndarray, shape (N)
the smoothed signal (or it's n-th derivative).
Notes
-----
The Savitzky-Golay is a type of low-pass filter, particularly
suited for smoothing noisy data. The main idea behind this
approach is to make for each point a least-square fit with a
polynomial of high order over a odd-sized window centered at
the point.
Examples
--------
t = np.linspace(-4, 4, 500)
y = np.exp( -t**2 ) + np.random.normal(0, 0.05, t.shape)
ysg = savitzky_golay(y, window_size=31, order=4)
import matplotlib.pyplot as plt
plt.plot(t, y, label='Noisy signal')
plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal')
plt.plot(t, ysg, 'r', label='Filtered signal')
plt.legend()
plt.show()
References
----------
.. [1] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of
Data by Simplified Least Squares Procedures. Analytical
Chemistry, 1964, 36 (8), pp 1627-1639.
.. [2] Numerical Recipes 3rd Edition: The Art of Scientific Computing
W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery
Cambridge University Press ISBN-13: 9780521880688
"""
#import numpy as np
from math import factorial
try:
window_size = np.abs(np.int(window_size))
order = np.abs(np.int(order))
except ValueError as msg:
print (str(msg))
raise ValueError("window_size and order have to be of type int")
if window_size % 2 != 1 or window_size < 1:
raise TypeError("window_size size must be a positive odd number")
if window_size < order + 2:
raise TypeError("window_size is too small for the polynomials order")
order_range = range(order+1)
half_window = (window_size -1) // 2
# precompute coefficients
b = np.mat([[k**i for i in order_range] for k in range(-half_window, half_window+1)])
m = np.linalg.pinv(b).A[deriv] * rate**deriv * factorial(deriv)
# pad the signal at the extremes with
# values taken from the signal itself
firstvals = y[0] - np.abs( y[1:half_window+1][::-1] - y[0] )
lastvals = y[-1] + np.abs(y[-half_window-1:-1][::-1] - y[-1])
y = np.concatenate((firstvals, y, lastvals))
return np.convolve( m[::-1], y, mode='valid')
def baseline_als(y, lam, p, niter=20, quiet=False):
""" y is a numpy array, niter is the number of iterations
from https://stackoverflow.com/questions/29156532
Reference: "Asymmetric Least Squares Smoothing" by P. Eilers and H. Boelens in 2005.
There are two parameters: p for asymmetry and λ for smoothness. Both have to be tuned to the data at hand. We found that generally
0.001 ≤ p ≤ 0.1 is a good choice (for a signal with positive peaks) and 10^2 ≤ λ ≤ 10^9 , but exceptions may occur. In any case one
should vary λ on a grid that is approximately linear for log λ"""
if not quiet: print('Asymmetric Baseline subtraction with lambda {0:.3f} and p {1:.3f}.'.format(lam, p))
L = len(y)
D = sparse.diags([1, -2, 1],[0, -1, -2], shape=(L, L-2))
w = np.ones(L)
for i in range(niter):
WW = sparse.spdiags(w, 0, L, L)
ZZ = WW + lam * D.dot(D.transpose())
z = spsolve(ZZ, w * y)
w = p * (y > z) + (1-p) * (y < z)
return z
def baselineIterator(data, lam, p, niter=20):
""" iterate baseline subtraction over dictionary of dataframes of ROI traces """
#is there a problem with running it twice? No, the problem was peaks were being chucked out
bdata = {}
for _set, df in data.items():
print ("Auto baseline for {0} set. lambda: {1:.3f} and p: {2:.3f}".format(_set, lam, p))
maxVal = len (df.columns)
progMsg = "Auto baseline for {0} traces".format(maxVal)
with pg.ProgressDialog(progMsg, 0, maxVal) as dlg:
dlg.setMinimumWidth(300)
for col in df:
dlg += 1
y = np.asarray(df[col])
# subtract appropriate baseline from each column of df
df[col] -= baseline_als(y, lam, p, niter=20, quiet=True)
bdata[_set] = df
return bdata