- Real Time Signals India

# KNN with K MEANS ALGORITHM

% MATLAB CODE FOR KNN with K MEANS ALGORITHM

% AUTHOR : **Real Time Signals **

clc;

%filename = 'C:\Users\user\Desktop\RTS\test.xlsx'; % load the data into the file

a = linspace(0,3*pi,200);

b = cos(a) + rand(1,200);

x= [a.',b.'];

%x= xlsread(filename);

K=3; %Number of clusters

scatter(x(:,1),x(:,2));

[M,dim]= size(x);

map = zeros(1,M); % Array tells to which cluster the data points are mapped

C= zeros(K,dim); % Centroid of the points mapped to that cluster; K vector points

%------Initialization--------------%

% chose K random points and assign them as the cluster centroids

temp_index_array = zeros(1,K);

for j =1:K

index = randi(M); % choose a random index

% Check if that index is already assigned; If yes repeat till you find

% an available index

while(find(temp_index_array==index))

index = randi(M);

end

%assign the index

temp_index_array(j) = index;

C(j,:)= x(index,:);

end

%-----------K-Means Algorithm-------------%

Jpre=10^6;

J=10^5;

while((Jpre - J) > 0.001)

aggr = zeros(K,dim);

occ = zeros(1,K); % counts the occurance of x in that particular cluster

% Assign every element of x to a cluster

for i=1:M

min_dist = norm(x(i,:)-C(1,:));

% Search for the cluster midpoint whose distance is least from the

% considered element

map_index =1;

for j=2:K

diff = norm(x(i,:)-C(j,:));

if(min_dist >= diff)

min_dist = diff;

map_index =j;

end

end

map(i) = map_index; % Map x[i] to that particular cluster

aggr(map_index,:) = aggr(map_index,:) + x(i,:); % Sum up the points of that cluster

occ(map_index)= occ(map_index) + 1; % Increment the number of points mapped to that cluster

end

% Cost function calculation

Jpre = J;

J=0;

for i = 1:M

J = J + norm(x(i,:) - C(map(i),:)).^2;

end

% Update the centroid once all the points have been mapped

for j=1 :K

C(j,:) = aggr(j,:)/occ(j);

end

end

index_1 = find(map == 1);

x1 = x(index_1,:);

index_2 = find(map == 2);

x2 = x(index_2,:);

index_3 = find(map == 3);

x3 = x(index_3,:);

figure;

scatter(x1(:,1),x1(:,2),'r');

hold on;

scatter(x2(:,1),x2(:,2),'g');

hold on;

scatter(x3(:,1),x3(:,2),'b');

%----------------------------------------------------------------------------------------------

% KNN Algorithm

%----------------------------------------------------------------------------------------------

k =18; % Number of nearest neighbour element to determine the class of given input vector

xin = [5.5 0.5]; % Input to find which class it belongs to

hold on;

scatter(xin(1),xin(2),'k');

% Initialize the k nearest neighours to the 1st k points of the data set

min_index = 1:k;

min_dist = zeros(1,k);

for i = 1:k

min_dist(i) = norm(x(i,:) - xin);

end

% Initialize the max_dist and max_index

% max_index is the maximum of the minimum distance found out already

[max_dist,max_index] = max(min_dist);

% Find the K min distance points in the data set

for i = k+1 : M

dist = norm(x(i,:)-xin); % Find the distance of the current data set to the input

% Move to the next iteration if the current distance is greater than the

% maximum of minimum distances found out already

if(dist >=max_dist)

continue;

else

% Replace the maximum value and index to the present data set from

% the iteration

min_dist(max_index) = dist;

min_index(max_index) = i;

% Find the index and distance corresponding to the maximum of

% min_dist array

[max_dist,max_index] = max(min_dist);

end

end

C

cluster_no =mode(map(min_index));

% Print the color of the cluster to which the input is mapped

if(cluster_no == 1)

disp('red');

elseif (cluster_no == 2)

disp('green');

elseif (cluster_no == 3)

disp('blue');

end