import json from kmeans import kmeans from matplotlib import pyplot PY

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import json
from kmeans import kmeans
from matplotlib import pyplot
PYPLOT_COLORS = ["r", "b", "g", "y", "c"]
CLUSTER_NUM = 5
def main():
if CLUSTER_NUM < 2 or CLUSTER_NUM > 5:
raise Exception("%d clusters feature is not supported" % CLUSTER_NUM)
f = open("data.json", "r")
json_line = f.readline()
f.close()
json_dict = json.loads(json_line)
# Filtering
json_filtered = []
for obj in json_dict:
if "data" in obj:
for item in obj["data"]:
if len(item) > 0 and "text" in item[0] and len(item[0]["text"]) == 6 and len(item) > 12:
raw_math = item[9]["text"]
raw_geom = item[12]["text"]
try:
math = float(raw_math.replace(",", "."))
geom = float(raw_geom.replace(",", "."))
json_filtered.append([math, geom])
except:
pass
# Writing final version of JSON data
#
# json_dump = json.dumps(json_filtered, indent=4, ensure_ascii=False)
#
# f = open("data_final.json", "w")
# f.write(json_dump)
# f.close()
# K-means algorithm
clusters = kmeans(json_filtered, CLUSTER_NUM)
print(clusters)
maths = []
geoms = []
for i in range(CLUSTER_NUM):
maths.append([])
geoms.append([])
for i in range(len(clusters)):
maths[clusters[i]].append(json_filtered[i][0])
geoms[clusters[i]].append(json_filtered[i][1])
args = []
for i in range(CLUSTER_NUM):
args.append(maths[i])
args.append(geoms[i])
args.append(PYPLOT_COLORS[i] + "o")
pyplot.plot(*args)
pyplot.xlabel("Математика, %")
pyplot.ylabel("Геометрия, %")
pyplot.show()
main()