cesar преди 16 часа
родител
ревизия
d2bd2050c6
променени са 1 файла, в които са добавени 307 реда и са изтрити 0 реда
  1. 307
    0
      v1_multifailure_importance_analysis.py

+ 307
- 0
v1_multifailure_importance_analysis.py Целия файл

@@ -0,0 +1,307 @@
1
+# Csar Fdez, UdL, 2025
2
+import pandas as pd
3
+import matplotlib.pyplot as plt
4
+import datetime
5
+import numpy as np
6
+import keras
7
+import os.path
8
+import pickle
9
+from keras import layers
10
+import copy
11
+
12
+
13
+# data files arrays. Index:
14
+# 0.  No failure
15
+# 1.  Blocked evaporator
16
+# 2.   Full Blocked condenser
17
+# 3.   Partial Blocked condenser
18
+# 4   Fan condenser not working
19
+# 5.  Open door
20
+
21
+
22
+NumberOfFailures=5
23
+NumberOfFailures=4  # So far, we have only data for the first 4 types of failures
24
+datafiles=[]
25
+for i in range(NumberOfFailures+1):
26
+    datafiles.append([])
27
+
28
+# Next set of ddata corresponds to Freezer, SP=-26
29
+datafiles[0]=['2024-08-07_5_','2024-08-08_5_'] 
30
+datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] 
31
+datafiles[2]=['2024-12-18_5_','2024-12-19_5_'] 
32
+datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_'] 
33
+datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] 
34
+#datafiles[4]=[] 
35
+
36
+# Features suggested by Xavier
37
+features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
38
+features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
39
+NumFeatures=len(features)
40
+
41
+df_list=[]
42
+for i in range(NumberOfFailures+1):
43
+    df_list.append([])
44
+
45
+for i in range(NumberOfFailures+1):
46
+    dftemp=[]
47
+    for f in datafiles[i]:
48
+        print("                 ", f)
49
+        #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
50
+        df1 = pd.read_csv('./data/'+f+'.csv')
51
+        dftemp.append(df1)
52
+    df_list[i]=pd.concat(dftemp)
53
+
54
+
55
+# subsampled to 5'  =  30 * 10"
56
+# We consider smaples every 5' because in production, we will only have data at this frequency
57
+subsamplingrate=30
58
+
59
+dataframe=[]
60
+for i in range(NumberOfFailures+1):
61
+    dataframe.append([])
62
+
63
+for i in range(NumberOfFailures+1):
64
+    datalength=df_list[i].shape[0]
65
+    dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
66
+    dataframe[i].reset_index(inplace=True,drop=True)
67
+    dataframe[i].dropna(inplace=True)
68
+
69
+
70
+# Train data is first 2/3 of data
71
+# Test data is: last 1/3 of data 
72
+dataTrain=[]
73
+dataTest=[]
74
+for i in range(NumberOfFailures+1):
75
+    dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
76
+    dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
77
+
78
+
79
+def normalize2(train,test):
80
+    # merges train and test
81
+    means=[]
82
+    stdevs=[]
83
+    for i in range(NumFeatures):
84
+        means.append(train[:,i].mean())
85
+        stdevs.append(train[:,i].std())
86
+    return( (train-means)/stdevs, (test-means)/stdevs )
87
+
88
+dataTrainNorm=[]
89
+dataTestNorm=[]
90
+for i in range(NumberOfFailures+1):
91
+    dataTrainNorm.append([])
92
+    dataTestNorm.append([])
93
+
94
+for i in range(NumberOfFailures+1):
95
+    (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
96
+
97
+def plotData():    
98
+    fig, axes = plt.subplots(
99
+        nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
100
+    )
101
+    for i in range(NumberOfFailures+1):
102
+        axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+",  feature 0")
103
+        axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+",  feature 1")
104
+    #axes[1].legend()
105
+    #axes[0].set_ylabel(features[0])
106
+    #axes[1].set_ylabel(features[1])
107
+    plt.show()
108
+
109
+#plotData()
110
+
111
+
112
+TIME_STEPS = 12
113
+def create_sequences(values, time_steps=TIME_STEPS):
114
+    output = []
115
+    for i in range(len(values) - time_steps + 1):
116
+        output.append(values[i : (i + time_steps)])
117
+    return np.stack(output)
118
+
119
+x_train=[]
120
+for i in range(NumberOfFailures+1):
121
+    x_train.append(create_sequences(dataTrainNorm[i]))
122
+
123
+
124
+model=[]
125
+modelckpt_callback =[]
126
+es_callback =[]
127
+path_checkpoint=[]
128
+for i in range(NumberOfFailures+1):
129
+    model.append([])
130
+    model[i] = keras.Sequential(
131
+        [
132
+            layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])),
133
+            layers.Conv1D(
134
+                filters=64,
135
+                kernel_size=7,
136
+                padding="same",
137
+                strides=2,
138
+                activation="relu",
139
+            ),
140
+            layers.Dropout(rate=0.2),
141
+            layers.Conv1D(
142
+                filters=32,
143
+                kernel_size=7,
144
+                padding="same",
145
+                strides=2,
146
+                activation="relu",
147
+            ),
148
+            layers.Conv1DTranspose(
149
+                filters=32,
150
+                kernel_size=7,
151
+                padding="same",
152
+                strides=2,
153
+                activation="relu",
154
+            ),
155
+            layers.Dropout(rate=0.2),
156
+            layers.Conv1DTranspose(
157
+                filters=64,
158
+                kernel_size=7,
159
+                padding="same",
160
+                strides=2,
161
+                activation="relu",
162
+            ),
163
+            layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=7, padding="same"),
164
+        ]
165
+    )
166
+    model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
167
+    model[i].summary()
168
+    path_checkpoint.append("model_v1_"+str(i)+"._checkpoint.weights.h5")
169
+    es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15))
170
+    modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,))
171
+
172
+
173
+
174
+# Load models
175
+for i in range(NumberOfFailures+1):
176
+    model[i].load_weights(path_checkpoint[i])
177
+
178
+
179
+x_train_pred=[]
180
+train_mae_loss=[]
181
+threshold=[]
182
+for i in range(NumberOfFailures+1):
183
+    x_train_pred.append(model[i].predict(x_train[i]))
184
+    train_mae_loss.append(np.mean(np.abs(x_train_pred[i] - x_train[i]), axis=1))
185
+    threshold.append(np.max(train_mae_loss[i],axis=0))
186
+
187
+print("Threshold : ",threshold)
188
+for i in range(NumberOfFailures+1):
189
+    threshold[i]=threshold[i]*1.3
190
+# Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives
191
+
192
+
193
+#  Anomaly metrics:  
194
+#   False positive: with datatTestNorm[0]
195
+#   True negative: with datatTestNorm[i]  i>0
196
+
197
+def AtLeastOneTrue(x):
198
+    for i in range(NumFeatures):
199
+        if x[i]:
200
+            return True
201
+    return False
202
+
203
+def anomalyMetric(testList):  # first of list is non failure data
204
+    # FP, TP: false/true positive
205
+    # TN, FN: true/false negative
206
+    # Sensitivity: probab of fail detection if data is fail
207
+    # Specificity: prob of no fail detection if data is well
208
+    x_test = create_sequences(testList[0])
209
+    x_test_pred = model[0].predict(x_test)
210
+    test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
211
+    anomalies = test_mae_loss > threshold[0]
212
+    count=0
213
+    for i in range(anomalies.shape[0]):
214
+        if AtLeastOneTrue(anomalies[i]):
215
+            count+=1
216
+    FP=count
217
+    TN=anomalies.shape[0]-1
218
+    count=0
219
+    TP=np.zeros((NumberOfFailures))
220
+    FN=np.zeros((NumberOfFailures))
221
+    for i in range(1,len(testList)):
222
+        x_test = create_sequences(testList[i])
223
+        x_test_pred = model[0].predict(x_test)
224
+        test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
225
+        anomalies = test_mae_loss > threshold[0]
226
+        count=0
227
+        for j in range(anomalies.shape[0]):
228
+            if AtLeastOneTrue(anomalies[j]):
229
+                count+=1
230
+        TP[i-1] = count
231
+        FN[i-1] = anomalies.shape[0]-count
232
+    Sensitivity=TP.sum()/(TP.sum()+FN.sum())
233
+    Specifity=TN/(TN+FP)
234
+    print("Sensitivity: ",Sensitivity)
235
+    print("Specifity: ",Specifity)
236
+    return Sensitivity+Specifity
237
+
238
+
239
+
240
+MaxMetric=anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
241
+
242
+# Now iterate  by permuting features and order them depending on Metric reduction
243
+
244
+
245
+metric=np.zeros(NumFeatures)
246
+for i in range(NumFeatures):
247
+    dataTestNormCopy=[]
248
+    # A deep copy is required because shuffle is an inline operation
249
+    for j in range(NumberOfFailures+1):
250
+        dataTestNormCopy.append([])
251
+        dataTestNormCopy[j]=copy.deepcopy(dataTestNorm[j])
252
+    for j in range(NumberOfFailures+1):
253
+        np.random.shuffle(dataTestNormCopy[j][:,i])
254
+    metric[i]=anomalyMetric([dataTestNormCopy[0],dataTestNormCopy[1],dataTestNormCopy[2],dataTestNormCopy[3],dataTestNormCopy[4]])
255
+
256
+
257
+# features ordered from least to most important
258
+indexes_ordered=np.argsort(metric)
259
+
260
+
261
+# Now, lets eliminate features accumulatively from least to most
262
+metric=np.zeros(NumFeatures)
263
+dataTestNormCopy=[]
264
+for j in range(NumberOfFailures+1):
265
+    dataTestNormCopy.append([])
266
+    dataTestNormCopy[j]=copy.deepcopy(dataTestNorm[j])
267
+# A deep copy is required because shuffle is an inline operation
268
+for i in range(NumFeatures):
269
+    for j in range(NumberOfFailures+1):
270
+        np.random.shuffle(dataTestNormCopy[j][:,i])
271
+    metric[i]=anomalyMetric([dataTestNormCopy[0],dataTestNormCopy[1],dataTestNormCopy[2],dataTestNormCopy[3],dataTestNormCopy[4]])
272
+
273
+# print features to be used in v1_multifailure.py
274
+
275
+#features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2','tc s3']
276
+F=np.array(features)
277
+
278
+l=indexes_ordered.shape[0]
279
+for i in range(3,NumFeatures-5):
280
+    print(F[indexes_ordered[l-i:]])
281
+
282
+'''
283
+
284
+['r1 s10' 'r1 s6' 'r2 s8']
285
+['tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
286
+['r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
287
+['r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
288
+['r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
289
+['pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
290
+['r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6'
291
+ 'r2 s8']
292
+['r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10'
293
+ 'r1 s6' 'r2 s8']
294
+['tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1'
295
+ 'r1 s10' 'r1 s6' 'r2 s8']
296
+['r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2'
297
+ 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
298
+['r2 s5' 'r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1'
299
+ 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
300
+['r2 s7' 'r2 s5' 'r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8'
301
+ 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
302
+['r2 s4' 'r2 s7' 'r2 s5' 'r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii'
303
+ 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
304
+['r1 s9' 'r2 s4' 'r2 s7' 'r2 s5' 'r1 s7' 'tc s2' 'r2 s3' 'r1 s5'
305
+ 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
306
+'''
307
+

Powered by TurnKey Linux.