forked from shirtsgroup/BCC_membrane
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patherror_analysis.py
57 lines (45 loc) · 2.27 KB
/
error_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Script to run bootstrapping error on the geometric analysis distributions
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.neighbors import KernelDensity
def histogram_KDE(data, X, bandwidth=None):
# K-fold cross validation for bandwidth choice with sklearn
reshaped = data.reshape(-1,1)
if bandwidth is None:
bandwidths = 10 ** np.linspace(-1, 1, 100)
grid = GridSearchCV(KernelDensity(kernel='gaussian'),
{'bandwidth': bandwidths},
cv=KFold(n_splits=5))
grid.fit(reshaped)
bandwidth = grid.best_params_['bandwidth']
print('Best bandwidth: ', bandwidth)
kde = KernelDensity(kernel="gaussian", bandwidth=bandwidth).fit(reshaped)
log_dens = kde.score_samples(X.reshape(-1,1))
return bandwidth, np.exp(log_dens)
# Load in data
data = np.loadtxt('data/pore2pore_schwarz_raw.txt')
idx = [i for i in range(data.shape[0])]
# Generate bootstrapped data sets of the raw pore-to-pore distances
n_bootstraps = 1000
boot = np.zeros((n_bootstraps, data.shape[0]))
for b in range(n_bootstraps):
boot_idx = np.random.choice(idx,replace=True,size=data.shape[0])
boot[b,:] = data[boot_idx]
# Calculate KDE histogram for each bootstrapped data sets
X_plot = np.linspace(0,10,1000)
histograms = np.zeros((n_bootstraps, X_plot.shape[0]))
bandwidths = np.zeros(n_bootstraps)
for b, b_data in enumerate(boot):
bandwidths[b], histograms[b, :] = histogram_KDE(b_data, X_plot, bandwidth=0.1)
print('Completed {} out of {} bootstraps'.format(b+1, n_bootstraps))
print('Average best bandwidth: {:.4f}'.format(bandwidths.mean()))
# Save the average KDE and CI from bootstrapping
ci = st.norm.interval(alpha=0.95, loc=histograms.mean(axis=0), scale=st.sem(histograms))
save_data = np.vstack([X_plot, histograms.mean(axis=0), ci[0], ci[1]])
np.savetxt('data/schwarz_with_error.txt', save_data.transpose(), header='Bootstrapped average pore-to-pore distance distribution (2nd column) with 95% confidence intervals (3rd and 4th columns) -- (1st column is x axis (nm))')
plt.plot(X_plot, histograms.mean(axis=0), c='k')
plt.plot(X_plot, ci[0], ls='--', c='r')
plt.plot(X_plot, ci[1], ls='--', c='r')
plt.show()