-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathkMeans.m
More file actions
76 lines (55 loc) · 2.13 KB
/
kMeans.m
File metadata and controls
76 lines (55 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
function [ cluster, centr ] = kMeans( k, P )
%kMeans Clusters data points into k clusters.
% Input args: k: number of clusters;
% points: m-by-n matrix of n m-dimensional data points.
% Output args: cluster: 1-by-n array with values of 0,...,k-1
% representing in which cluster the corresponding point lies in
% centr: m-by-k matrix of the m-dimensional centroids of the k clusters
numP = size(P,2); % number of points
dimP = size(P,1); % dimension of points
%% Choose k data points as initial centroids
% choose k unique random indices between 1 and size(P,2) (number of points)
randIdx = randperm(numP,k);
% initial centroids
centr = P(:,randIdx);
%% Repeat until stopping criterion is met
% init cluster array
cluster = zeros(1,numP);
% init previous cluster array clusterPrev (for stopping criterion)
clusterPrev = cluster;
% for reference: count the iterations
iterations = 0;
% init stopping criterion
stop = false; % if stopping criterion met, it changes to true
while stop == false
% for each data point
for idxP = 1:numP
% init distance array dist
dist = zeros(1,k);
% compute distance to each centroid
for idxC=1:k
dist(idxC) = norm(P(:,idxP)-centr(:,idxC));
end
% find index of closest centroid (= find the cluster)
[~, clusterP] = min(dist);
cluster(idxP) = clusterP;
end
% Recompute centroids using current cluster memberships:
% init centroid array centr
centr = zeros(dimP,k);
% for every cluster compute new centroid
for idxC = 1:k
% find the points in cluster number idxC and compute row-wise mean
centr(:,idxC) = mean(P(:,cluster==idxC),2);
end
% Checking for stopping criterion: Clusters do not chnage anymore
if clusterPrev==cluster
stop = true;
end
% update previous cluster clusterPrev
clusterPrev = cluster;
iterations = iterations + 1;
end
% for reference: print number of iterations
fprintf('kMeans.m used %d iterations of changing centroids.\n',iterations);
end