-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpca.py
More file actions
61 lines (43 loc) · 1.72 KB
/
pca.py
File metadata and controls
61 lines (43 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.decomposition import IncrementalPCA
from sklearn.decomposition import KernelPCA
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
if __name__ == '__main__':
# Data preparation
dt_heart = pd.read_csv('db/heart.csv')
print(dt_heart.head(5))
dt_features = dt_heart.drop(['target'], axis=1)
dt_target = dt_heart['target']
dt_features = StandardScaler().fit_transform(dt_features)
X_train, X_test, Y_train, Y_test = train_test_split(dt_features, dt_target, test_size=0.3, random_state=42)
print(X_train.shape)
print(Y_train.shape)
# PCA and IPCA implementation
pca = PCA(n_components=3)
pca.fit(X_train)
ipca = IncrementalPCA(n_components=3, batch_size=10)
ipca.fit(X_train)
plt.plot(range(len(pca.explained_variance_)), pca.explained_variance_ratio_)
plt.show()
logistic = LogisticRegression(solver='lbfgs')
dt_train = pca.transform(X_train)
dt_test = pca.transform(X_test)
logistic.fit(dt_train, Y_train)
print("SCORE PCA: ", logistic.score(dt_test, Y_test))
dt_train = ipca.transform(X_train)
dt_test = ipca.transform(X_test)
logistic.fit(dt_train, Y_train)
print("SCORE IPCA: ", logistic.score(dt_test, Y_test))
# KPCA
kpca = KernelPCA(n_components=4, kernel='poly')
kpca.fit(X_train)
dt_train = kpca.transform(X_train)
dt_test = kpca.transform(X_test)
logistic = LogisticRegression(solver='lbfgs')
logistic.fit(dt_train, Y_train)
print("SCORE KPCA: ", logistic.score(dt_test, Y_test))