-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathclustering.py
More file actions
95 lines (84 loc) · 2.98 KB
/
clustering.py
File metadata and controls
95 lines (84 loc) · 2.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import numpy as np
distance_treshold = 1
assignments = []
centroids = []
cluster_label_counts = []
label_mapping = {}
def reset_clustering():
global assignments
global centroids
global cluster_label_counts
global label_mapping
assignments = []
centroids = []
cluster_label_counts = []
label_mapping = {}
def distance(x, y):
return np.linalg.norm(x-y)
def min_distance_from_centroids(x):
global centroids
distances = []
for i, center in enumerate(centroids):
distances.append(distance(x, center))
return np.argmin(distances), np.min(distances)
def cluster(input_data):
global assignments
global centroids
res = []
for x in input_data:
if len(centroids) == 0:
assignments.append(len(np.unique(assignments)))
centroids.append(x)
else:
index, min_distance = min_distance_from_centroids(x)
if min_distance < distance_treshold:
assignments.append(index)
centroids[index] = (centroids[index] + x) / 2
else:
assignments.append(len(np.unique(assignments)))
centroids.append(x)
res.append(assignments[-1])
return res
def get_nearest_same_label_centeroid(labels):
global centroids
global label_mapping
res = []
indexes = []
for label in labels:
cluster_id = label_mapping[label][0]
res.append(centroids[cluster_id])
return res
def create_centroids_label_mapping(data_mapping, dataController):
global centroids
global cluster_label_counts
#Assign a label to each cluster
print("Assign labels to clusters")
for cluster_label, center in enumerate(centroids):
cluster_label_data = {k: v for k, v in data_mapping.items() if v==cluster_label}
label_counts = {}
for name, cluster in cluster_label_data.items():
label = dataController.get_label_from_name(name)
if label in label_counts:
label_counts[label] += 1
else:
label_counts[label] = 1
cluster_label_counts.append(label_counts)
# print("Data points in cluster number", cluster_label, label_counts)
def create_label_mapping():
global cluster_label_counts
global label_mapping
for cluster_id, label_counts in enumerate(cluster_label_counts):
for label, count in label_counts.items():
if label in label_mapping:
if label_mapping[label][1] < count:
label_mapping[label] = (cluster_id, count)
else:
label_mapping[label] = (cluster_id, count)
def run_clustering(model, dataController, validation_mode):
global cluster_label_counts
reset_clustering()
data_mapping = model.create_mapping(dataController, validation_mode)
create_centroids_label_mapping(data_mapping, dataController)
# print(cluster_label_counts)
create_label_mapping()
return label_mapping, cluster_label_counts