-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPCA.py
More file actions
42 lines (33 loc) · 1.45 KB
/
PCA.py
File metadata and controls
42 lines (33 loc) · 1.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import numpy as np
def PCA(data, n_component=None) -> np.ndarray:
"""
输入:data (n_samples, n_features)
返回:降维后的样本矩阵 (n_samples, n_component)
同时把 components_ 与 explained_variance_ratio_ 存为属性
"""
# 1. 中心化
X = data - data.mean(axis=0)
# 2. 协方差矩阵
C = np.cov(X, rowvar=False) # (n_features, n_features)
# 3. 特征分解
eig_vals, eig_vecs = np.linalg.eigh(C) # 特征值升序
eig_vals = eig_vals[::-1] # 降序
eig_vecs = eig_vecs[:, ::-1]
# 4. 确定主成分个数
max_component = min(data.shape)
if n_component is None:
n_component_ = max_component
else:
n_component_ = min(n_component, data.shape[1])
# 5. 存 components (每一列是一个主成分方向)
components_ = eig_vecs[:, :n_component_].T # (n_component, n_features)
# 6. 解释方差比例
total_var = eig_vals.sum()
explained_variance_ratio_ = eig_vals[:n_component_] / total_var
# 7. 投影得到降维后的数据
return X @ components_.T, components_, explained_variance_ratio_
if __name__ == "__main__":
data = np.random.randn(100, 8) # 100 样本,8 特征
embedding_pca, pca_composition, explained_variance_ratio_ = PCA(data) # (100, 3)
print("components:\n", pca_composition.shape)
print("explained variance ratio:\n", explained_variance_ratio_)