cnc-mill-prognostics-software/machineLearning.py at main · TheRandomFox/cnc-mill-prognostics-software · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
"""
title: Milling machine prognostics program
author: Abdul Halim bin Slamat
std no.: 9664005
cohort: FT/CS119

Original dataset & report can be acquired at: https://ti.arc.nasa.gov/c/4/
Credit: K.Goebel & A.Agogino
"""

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

def prepData(milldat):
    '''
    Takes the raw data and prepares it for use.
    Find and get rid of corrupt or unusable indexes.
    Apply StandardScaler to each dataframe
    df_x1 = feed, DOC, material : input 1
    df_x2 = signal data vals    : input 2
    df_y1 = VB labels            : output

    Parameters
    ----------
    milldat : ndarray

    Returns
    ----------
    milldat : ndarray
    dfs_x1, dfs_x2, df_y : DataFrame
    '''

    #remove corrupt/unusable indexes
    milldat = np.delete(milldat,[17,94,105],0)
    lenmill = len(milldat)
    #replace NaN indexes with mean values
    xnan = [] #store indexes with nan values
    vbmean = []
    for x in range(lenmill):
        if np.isnan(milldat[x][2][0][0]) == True:
            xnan.append(x)
        else:
            vbmean.append(milldat[x][2][0][0])
    vbmean = round(np.mean(vbmean),2)
    for i in range(len(xnan)):
        milldat[xnan[i]][2][0][0] = vbmean

    #populate df_x1, df_x2, df_y1, df_y2
    #y1 (164,)
    dat = []
    for x in range(lenmill):
        yclass = classifyWearState(milldat[x][2][0][0])
        dat.append(yclass)
    df_y1 = pd.DataFrame(data=dat)

    #x1 (164,3)
    df_x1 = []
    for x in range(lenmill):
        dat = []
        for y in range(4,7):
            dat.append(milldat[x][y][0][0])
        df_x1.append(dat)
    df_x1 = pd.DataFrame(data=df_x1)

    def vbt(dvb, dt):
        '''
        Approximate Vb gradient for each cut

        Parameters
        ----------
        dvb : float
        dt : float

        Returns
        -------
        vb_grad : vb
        '''
        vb_grad = dvb/dt     #gradient btw min/max per cut
        return vb_grad

    '''
    #y2 (164,)
    #   VB per cut.
    #initial dvb/dt from t,vb=0
    df_y2 = []
    curr_case = 0
    dvb = 0.0
    dt = 0
    for x in range(len(milldat)):
        if milldat[x][0][0][0] > curr_case:  #if new case

            if x != 0:  #if not start of list, submit curr dvb and dt
                df_y2.append(vbt(dvb, dt))

            #increment case, reset dvb and dt
            dvb = milldat[x][2][0][0]
            dt = milldat[x][3][0][0]
            curr_case+1

        elif milldat[x][0][0][0] == curr_case: #if same case

            if milldat[x][2][0][0] >= dvb:  #if x.vb is greater than current dvb
                dvb = milldat[x][2][0][0]

            dt = milldat[x][3][0][0]

            if x == len(milldat)-1: #if end of list
                df_y2.append(vbt(dvb, dt))
                break
    df_y2 = np.array(df_y2)
    np.delete(df_y2, range(0,4))    #del first 4 secs
    '''

    #x2 (164,6(8))
    #divide into chunks of 1000, pertaining to roughly 4s of readings each
    #ignore the first 1000 readings as the milling machine had not started yet
    #T(total) = (9000-1000)/(250Hz*4) = 8s
    df_x2 = []
    for x in range(lenmill):
        daty = []
        for y in range(7,13):
            datz = []
            for z in range(1000,9000,1000):  #z=time value
                datz = (milldat[x][y][z:z+1000][0])
            daty.append(datz)
        df_x2.append(daty)
    df_x2 = pd.DataFrame(data=df_x2)

    #Perform scaling by Standardization on X feats
    scaler = StandardScaler()
    df_x1 = scaler.fit_transform(df_x1)
    df_x2 = scaler.fit_transform(df_x2)
    #ensure Y is 1-D
    df_y1 = np.ravel(df_y1)

    #visualise dataframe table
    #print('Visualise dataset labels:\n\n')
    #print('X1:\n', df_x1,'\n')
    #print('X2:\n', df_x2,'\n')
    #print('Y:\n', df_y1,'\n')

    return milldat, df_x1, df_x2, df_y1


def classifyWearState(vb):
    '''
    Assigns labels to VB values based on degree of wear.
    The thresholds chosen for VB are just dummy values for the purpose of this project.
    Classification:
        VB value		| Label
        VB < 0.2 		: 'Good' (1)
        0.2 >= VB < 0.5 : 'Fair' (2)
        0.5 <= VB < 0.8 : 'Degraded' (3)
        VB >= 0.8		: 'Failed' (4)

    Parameters
    ----------
    vb : float

    Returns
    ----------
    yclass : int
    '''
    if vb < 0.2:
        return 1
    elif vb < 0.5:
        return 2
    elif vb < 0.8:
        return 3
    else:
        return 4

def train(df_x1, df_x2, df_y1):
    '''
    Feature selection and training the algorithm.

    Parameters
    ----------
    df_x1 : DataFrame (164,3)
    df_x2 : DataFrame (164,6)
    df_y1 : Dataframe (164,)

    Returns
    -------
    None.
    '''
    print('Training algorithms...\n')

    #divide data sets into training & testing groups
    X1_train, X1_test, y1_train, y1_test = train_test_split(df_x1, df_y1, test_size=0.1)
    X2_train, X2_test, y1_train, y1_test = train_test_split(df_x2, df_y1, test_size=0.1)

    #prediction X1
    cls1 = SVC(C=0.5, kernel='poly')
    cls1.fit(X1_train, y1_train)
    pred1 = cls1.predict(X1_test)

    #prediction X2
    cls2 = AdaBoostClassifier(n_estimators=100)
    cls2.fit(X2_train, y1_train)
    pred2 = cls2.predict(X2_test)

    #prediction combined
    #pred_comb = np.stack((pred1,pred2), axis=1)

    #determine accuracy rate
    acc1 = accuracy_score(pred1, y1_test)
    acc2 = accuracy_score(pred2, y1_test)
    #acc_comb = accuracy_score(pred_comb, y1_test)

    print('X1 accuracy: ', round(acc1*100, 4), '%')
    print('X2 accuracy: ', round(acc2*100, 4), '%')
    #print('Combined accuracy: ', round(acc_comb, 4), ' X_comb shape: ', pred_comb.shape)
    print('Predict X1: ', pred1, '\nPredict X2: ', pred2)
    print('True: ', y1_test)