Contents
knncv.m
From A First Course in Machine Learning, Chapter 4. Simon Rogers, 01/11/11 [simon.rogers@glasgow.ac.uk] Cross-validation over K in KNN
clear all;close all;
Generate some data
N1 = 100; N2 = 20; % Class sizes
x = [randn(N1,2);randn(N2,2)+2];
t = [repmat(0,N1,1);repmat(1,N2,1)];
N = size(x,1);
Plot the data
ma = {'ko','ks'};
fc = {[0 0 0],[1 1 1]};
tv = unique(t);
figure(1); hold off
for i = 1:length(tv)
pos = find(t==tv(i));
plot(x(pos,1),x(pos,2),ma{i},'markerfacecolor',fc{i});
hold on
end
loop over values of K
Nfold = 10;
Kvals = [1:2:30];
Nrep = 100;
Errors = zeros(length(Kvals),Nfold,Nrep);
for rep = 1:Nrep
Permute the data and split into folds
order = randperm(N);
Nfold = 10; % 10-fold CV
sizes = repmat(floor(N/Nfold),1,Nfold);
sizes(end) = sizes(end) + N - sum(sizes);
csizes = [0 cumsum(sizes)];
for kv = 1:length(Kvals)
K = Kvals(kv);
% Loop over folds
for fold = 1:Nfold
trainX = x;
traint = t;
foldindex = order(csizes(fold)+1:csizes(fold+1));
trainX(foldindex,:) = [];
traint(foldindex) = [];
testX = x(foldindex,:);
testt = t(foldindex);
% Do the KNN
classes = zeros(size(testX,1),1);
for i = 1:size(testX,1)
this = testX(i,:);
dists = sum((trainX - repmat(this,size(trainX,1),1)).^2,2);
[d I] = sort(dists,'ascend');
[a,b] = hist(traint(I(1:K)),unique(t));
pos = find(a==max(a));
if length(pos)>1
temp_order = randperm(length(pos));
pos = pos(temp_order(1));
end
classes(i) = b(pos);
end
Errors(kv,fold,rep) = sum(classes~=testt);
end
end
end
Plot the results
figure(1); hold off s = sum(sum(Errors,3),2)./(N*Nrep); plot(Kvals,s); xlabel('K'); ylabel('Error');