-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
154 lines (134 loc) · 5.75 KB
/
main.py
File metadata and controls
154 lines (134 loc) · 5.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os
import tkinter as tk
from tkinter import messagebox
from perceptron import Perceptron as Per1
from perceptron2 import Perceptron as Per2
def get_text_weights(text: str):
fileweights = [0] * 26
total = 0
for char in text:
if ord('a') <= ord(char.lower()) <= ord('z'):
fileweights[ord(char.lower()) - ord('a')] += 1
total+=1
for i in range(len(fileweights)):
fileweights[i] = float(fileweights[i] / total * 100)
return fileweights
def read_all_files(data_dir):
data = []
for x in os.walk(data_dir):
for filename in x[2]:
fileweights = [0] * 26
total = 0
with open(os.path.join(x[0], filename), "r", encoding="utf8") as file:
for char in file.read():
if ord('a') <= ord(char.lower()) <= ord('z') :
fileweights[ord(char.lower()) - ord('a')] += 1
total+=1
for i in range(len(fileweights)):
fileweights[i] = float(fileweights[i] / total * 100)
data.append([x[0].split("\\")[1], fileweights])
return data
def check_answer(perceptron, data):
correctAnswer = 0
needed = 0
for language, inputs in data:
if perceptron.calculate(inputs) == 1:
correctAnswer += 1
if language == perceptron.language:
needed += 1
return correctAnswer != needed
def main():
first_perceptron_list = [Per1(26, 0.001, 0.001, x) for x in os.listdir("training_languages")]
second_perceptron_list = [Per2(26, 0.001, x) for x in os.listdir("training_languages")]
training_data = read_all_files("training_languages")
test_data = read_all_files("test_languages")
error_margin = 0.001
max_epoque = 100
print("Uczenie perceptronów pierwszej metody:\n")
for perceptron in first_perceptron_list:
epoque = 0
while check_answer(perceptron, training_data) and epoque < max_epoque:
epoque += 1
for language, inputs in training_data:
perceptron.learn(inputs, language == perceptron.language)
print(f"Perceptron metody 1 języka {perceptron.language} uczył się {epoque} razy")
print("\nUczenie perceptronu drugiej metody:\n")
for perceptron in second_perceptron_list:
epoque = 0
to_break = 1
while to_break and epoque < max_epoque:
epoque += 1
for language, inputs in training_data:
if language != perceptron.language:
continue
decision = 1 if language == perceptron.language else -1
e = 0.5 * (decision - perceptron.calculate(inputs)) ** 2
perceptron.learn(inputs, e)
if e < error_margin:
to_break = 0
print(f"Perceptron metody 2 języka {perceptron.language} uczył się {epoque} razy")
print("\n\n")
print("Wyniki dla pierwszego perceptronu z danych testowych")
classified = 0
for language, inputs in test_data:
active_perceptrons = []
for perceptron in first_perceptron_list:
if perceptron.calculate(inputs) == 1:
active_perceptrons.append(perceptron)
if len(active_perceptrons) == 1:
print(f"Tekst: {language}\tPercpetron: {active_perceptrons[0].language}\t{active_perceptrons[0].calculate(inputs)}")
classified += 1
else:
print(f"Tekst: {language} nie został poprawnie zakwalifikowany")
print(f"Dokładność klasyfikacji wynosi: {classified/len(test_data)*100}%\n")
print("Wyniki dla drugiego perceptronu z danych testowych")
classified = 0
for language, inputs in test_data:
output = []
for perceptron in second_perceptron_list:
output.append([perceptron.language, perceptron.calculate(inputs)])
winner = max(output, key=lambda x: x[1])
if language == winner[0]:
classified += 1
print(f"Text: {language} -> Predicted: {winner[0]} (score: {winner[1]:.6f})")
print()
print(f"Dokładność klasyfikacji wynosi: {classified/len(test_data)*100}%\n")
print("\n\n")
root = tk.Tk("Klasyfikator")
root.geometry("400x250")
label = tk.Label(root, text="Wpisz tekst do klasyfikacji")
label.pack(pady=10)
entry = tk.Text(root, width=50, height=7)
entry.pack()
result = tk.Label(root, text="", font=('Arial', 12))
result.pack()
def classify_text1():
user_string = entry.get("1.0", tk.END).strip()
if not user_string.strip():
messagebox.showwarning("Błąd", "Wpisz coś do pola tekstowego")
return
weights = get_text_weights(user_string)
classified = 0
for per in first_perceptron_list:
if per.calculate(weights) == 1:
classified = 1
result.config(text=f"Metoda 1 zakwalifikowano jako: {per.language} ")
if not classified:
result.config(text="Nie udało się zakwalifikować tekstu")
def classify_text2():
user_string = entry.get("1.0", tk.END).strip()
if not user_string.strip():
messagebox.showwarning("Błąd", "Wpisz coś do pola tekstowego")
return
output = []
for per in second_perceptron_list:
output.append([per.language, per.calculate(get_text_weights(user_string))])
winner = max(output, key=lambda x: x[1])
result.config(text=f"Metoda 2 zakwalifikowano jako: {winner[0]} (score: {winner[1]:.3f})")
button1 = tk.Button(root, text="Klasyfikuj metodą 1", command=classify_text1)
button1.pack(pady=1)
button2 = tk.Button(root, text="Klasyfikuj metodą 2", command=classify_text2)
button2.pack(pady=1)
root.mainloop()
if __name__ == '__main__':
main()