-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmodel_visualization.py
More file actions
57 lines (48 loc) · 1.72 KB
/
model_visualization.py
File metadata and controls
57 lines (48 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import matplotlib as mpl
from matplotlib import rc
import matplotlib.pyplot as plt
import pandas as pd
from gensim.models import KeyedVectors
# 그래프에서 마이너스 폰트 깨지는 문제에 대한 대처
mpl.rc('font', family='AppleGothic') # 한글출력
mpl.rcParams['axes.unicode_minus'] = False
#plt.rc('font', family='D2Coding')
def show_tsne():
tsne = TSNE(n_components=2)
X = tsne.fit_transform(X_show)
df = pd.DataFrame(X, index=vocab_show, columns=['x', 'y'])
fig = plt.figure()
fig.set_size_inches(30, 20)
ax = fig.add_subplot(1, 1, 1)
ax.scatter(df['x'], df['y'])
for word, pos in df.iterrows():
ax.annotate(word, pos, fontsize=10)
plt.xlabel("t-SNE feature 0")
plt.ylabel("t-SNE feature 1")
plt.show()
def show_pca():
# PCA 모델을 생성합니다
pca = PCA(n_components=2)
pca.fit(X_show)
# 처음 두 개의 주성분으로 숫자 데이터를 변환합니다
x_pca = pca.transform(X_show)
plt.figure(figsize=(30, 20))
plt.xlim(x_pca[:, 0].min(), x_pca[:, 0].max())
plt.ylim(x_pca[:, 1].min(), x_pca[:, 1].max())
for i in range(len(X_show)):
plt.text(x_pca[i, 0], x_pca[i, 1], str(vocab_show[i]), fontdict={'weight': 'bold', 'size': 9})
plt.xlabel("first label")
plt.ylabel("second label")
plt.show()
model_name = './training_data/vector_clean_data_final_ver2_iter1000'
model = KeyedVectors.load_word2vec_format(model_name)
vocab = list(model.wv.vocab)
X = model[vocab]
# sz개의 단어에 대해서만 시각화
sz = 1500
X_show = X[:sz,:]
vocab_show = vocab[:sz]
#show_tsne()
show_pca()