forked from cnmusco/recursive-nystrom
-
Notifications
You must be signed in to change notification settings - Fork 0
/
exampleApplication.m
83 lines (75 loc) · 2.88 KB
/
exampleApplication.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
%% Sample code
% Compares the performance of recursiveNystrom to uniform Nystrom and
% on a forest covertype dataset.
% download forest covertype data from UCI repository
covtype = websave('covtype.csv.gz','https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz')
covtype = gunzip(covtype)
% read in data matrix and select small subset to experiment with
A = csvread('covtype.csv');
n = 6000;
% trim off last column of labels
X = A(randperm(size(A,1),n),1:end-1);
% mean center and normalize numerical data, scale binary data by 1/sqrt(n)
d = size(X,2);
means = zeros(1,d);
means(1:10) = mean(X(:,1:10));
X = X - repmat(means,n,1);
cnorms = 1/sqrt(n)*ones(1,d);
cnorms(1:10) = 1./sqrt(sum(X(:,1:10).^2,1));
cnorms(find(cnorms == Inf)) = 0;
X = X*diag(cnorms);
% set kernel variance
gamma = 256;
% explicitly construct full kernel for evaluating error
K = gaussianKernel(X,1:n,1:n,gamma);
%% Compare methods for approximating K
% these are the sample sizes we'll try
svals = [200:200:2000];
trials = 3;
%% Recursive Ridge Leverage Score Nystrom
errorsRN = zeros(trials,length(svals));
for j = 1:trials
fprintf('Beginning Trial %d for Recursive Nystrom\n',j);
for i = 1:length(svals)
s = svals(i);
fprintf('Constructing Nystrom approximation with %d samples\n',s);
kFunc = @(X,rowInd,colInd) gaussianKernel(X,rowInd,colInd,gamma);
[C,W] = recursiveNystrom(X,s,kFunc);
% error is the top eigenvalue of the difference between our approximation and K
% sometimes if K - CWC' ~ 0 eigs throws an error for non-convergence so catch this.
try
[errorsRN(j,i)] = eigs(@(x) K*x - C*(W*(C'*x)), n, 1);
catch
[errorsRN(j,i)] = 0;
end
end
end
%% Uniform Nystrom
errorsUniform = zeros(trials,length(svals));
for j = 1:trials
fprintf('Beginning Trial %d for Uniform Nystrom\n',j);
for i = 1:length(svals)
s = svals(i);
fprintf('Constructing Nystrom approximation with %d samples\n',s);
kFunc = @(X,rowInd,colInd) gaussianKernel(X,rowInd,colInd,gamma);
samp = randperm(n,s);
C = kFunc(X,1:n,samp);
SKS = C(samp,:);
W = inv(SKS+(10e-6)*eye(s,s));
% error is the top eigenvalue of the difference between our approximation and K
% sometimes if K - CWC' ~ 0 eigs throws an error for non-convergence so catch this.
try
errorsUniform(j,i) = eigs(@(x) K*x - C*(W*(C'*x)), n, 1);
catch
[errorsRN(j,i)] = 0;
end
end
end
%% compare errors to number of samples
figure();
plot(svals,mean(abs(errorsRN),1),svals,mean(abs(errorsUniform),1));
legend('Recursive Nystrom','Uniform Nystrom')
xlabel('number of samples')
ylabel('spectral norm error')
set(gca,'yscale','log');
ylim([min(min(mean(abs(errorsRN))),min(mean(abs(errorsUniform))))/2 max(max(mean(abs(errorsRN))),max(max(abs(errorsUniform))))*2])