import math def kmeans points cl_num len points len points clusters cl

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import math
def kmeans(points, cl_num):
n = len(points)
k = len(points[0])
clusters = []
cl_centers = []
for i in range(n):
clusters.append(0)
for i in range(cl_num):
cl_centers.append(points[i])
iters = 0
while True:
new_cl_acc = []
new_cl_cnt = []
for i in range(cl_num):
new_cl_acc.append([])
new_cl_cnt.append(0)
for j in range(k):
new_cl_acc[i].append(0)
# Rearranging points into clusters, adding point to new cluster center accumulation points
for i in range(n):
min_ind = -1
min_dist = 1000000
for j in range(cl_num):
dist = calc_dist(points[i], cl_centers[j])
if dist < min_dist:
min_dist = dist
min_ind = j
clusters[i] = min_ind
sum_to_point(new_cl_acc[min_ind], points[i])
new_cl_cnt[min_ind] += 1
# Recalculate cluster centers
cl_centers = []
for i in range(cl_num):
cl_centers.append(div_point(new_cl_acc[i], new_cl_cnt[i]))
#
iters += 1
if iters > 100:
break
return clusters
def calc_dist(p1, p2):
buf = 0
for i in range(len(p1)):
buf += pow(p1[i] - p2[i], 2)
return math.sqrt(buf)
def sum_to_point(p, p2):
for i in range(len(p)):
p[i] += p2[i]
def div_point(p, k):
p2 = []
for i in range(len(p)):
p2.append(p[i] / k)
return p2