forked from Ionic-Polymers-and-Energy-Devices-Lab/V_MCES
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpca.py
More file actions
59 lines (41 loc) · 1.84 KB
/
pca.py
File metadata and controls
59 lines (41 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pandas as pd
from sklearn.decomposition import PCA
import numpy as np
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
from sklearn.metrics import silhouette_samples, silhouette_score, davies_bouldin_score, calinski_harabasz_score
import matplotlib.pyplot as plt
class PCAModel:
def __init__(self, X, n=4, rs=1):
"""
Wrapper for the sklearn.PCA algorithm that extracts relevant information.
:param X: (pd.DataFrame) The (processed) input data for the model. Needs to be provided as a pandas.DataFrame.
Ideally, the index of the DataFrame refers to the identifiers within the data set.
:param n: (int) Number of PCs. Default: 4
:param rs: (int) Random state to be used for the model. Default: 1
"""
self.X = X
self.n = n
self.rs = rs
# Storage for model
self.model = None
# Storage for resulting PCs
self.pcs = pd.DataFrame()
# Storage for PC Loadings
self.load = pd.DataFrame()
# Storage for Summary
self.summary = pd.DataFrame()
self.run()
def run(self):
"""
Run the principal component analysis with the provided parameters.
"""
self.model = PCA(n_components=self.n, random_state=self.rs)
names = [f"PC{i + 1}" for i in range(self.n)]
self.pcs = pd.DataFrame(self.model.fit_transform(self.X), columns=names, index=self.X.index)
self.load = pd.DataFrame(self.model.components_.T, columns=names, index=self.X.columns)
self.summary = pd.DataFrame({
"Variance": self.model.explained_variance_ratio_,
"Cumulative Variance": self.model.explained_variance_ratio_.cumsum(),
"Singular Value": self.model.singular_values_
}, index=names)