import math
def kmeans(points, cl_num):
n = len(points)
k = len(points[0])
clusters = []
cl_centers = []
for i in range(n):
clusters.append(0)
for i in range(cl_num):
cl_centers.append(points[i])
iters = 0
while True:
new_cl_acc = []
new_cl_cnt = []
for i in range(cl_num):
new_cl_acc.append([])
new_cl_cnt.append(0)
for j in range(k):
new_cl_acc[i].append(0)
# Rearranging points into clusters, adding point to new cluster center accumulation points
for i in range(n):
min_ind = -1
min_dist = 1000000
for j in range(cl_num):
dist = calc_dist(points[i], cl_centers[j])
if dist < min_dist:
min_dist = dist
min_ind = j
clusters[i] = min_ind
sum_to_point(new_cl_acc[min_ind], points[i])
new_cl_cnt[min_ind] += 1
# Recalculate cluster centers
cl_centers = []
for i in range(cl_num):
cl_centers.append(div_point(new_cl_acc[i], new_cl_cnt[i]))
#
iters += 1
if iters > 100:
break
return clusters
def calc_dist(p1, p2):
buf = 0
for i in range(len(p1)):
buf += pow(p1[i] - p2[i], 2)
return math.sqrt(buf)
def sum_to_point(p, p2):
for i in range(len(p)):
p[i] += p2[i]
def div_point(p, k):
p2 = []
for i in range(len(p)):
p2.append(p[i] / k)
return p2