-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
46 lines (33 loc) · 1.29 KB
/
main.py
File metadata and controls
46 lines (33 loc) · 1.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from collections import defaultdict
from sys import argv
from dataloader import DataLoader
from stemmer import Stemmer
from data import DataClusterer
from description import DescriptionClusterer
def test_clusterer(clusterer, lines):
print('Training')
clusterer.train(num_of_clusters=32)
results = defaultdict(list)
print('Classyfing')
for k, v in lines.items():
results[clusterer.classify(v)].append(k)
print('Results')
for k, v in results.items():
print('Category {}'.format(k))
for result in v:
print('\t{}'.format(result))
def main(args):
dl = DataLoader()
stem = Stemmer('porter')
# files is a list of files, which are lists of lines, which are lists of words
files = [{element[0]: stem.stem(element[1]) for element in dl.load_data(file) if stem.stem(element[1])} for file in args]
for file, arg in zip(files, args):
print('Processing file {}...'.format(arg))
file = {k: list(v) for k, v in file.items()}
print('Data Clusterer')
test_clusterer(DataClusterer(list(file.values()), 'euclidean'), file)
print('-'*64)
print('Description Clusterer')
test_clusterer(DescriptionClusterer(list(file.values()), 'cosine'), file)
if __name__ == '__main__':
main(argv[1:])