cesar 1 일 전
부모
커밋
52e79d4116

BIN
Figure_1.png 파일 보기


+ 7851
- 0
data/2024-12-16_5_.csv
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
파일 보기


+ 7851
- 0
data/2024-12-17_5_.csv
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
파일 보기


+ 7826
- 0
data/2024-12-27_5_.csv
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
파일 보기


+ 7831
- 0
data/2025-01-26_3_.csv
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
파일 보기


+ 1200
- 0
data/2025-01-26_5_.csv
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
파일 보기


+ 7827
- 0
data/2025-01-27_3_.csv
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
파일 보기


+ 7828
- 0
data/2025-01-27_5_.csv
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
파일 보기


+ 7832
- 0
data/2025-01-28_3_.csv
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
파일 보기


+ 7832
- 0
data/2025-01-28_5_.csv
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
파일 보기


BIN
model_0._checkpoint.weights.h5 파일 보기


BIN
model_1._checkpoint.weights.h5 파일 보기


BIN
model_2._checkpoint.weights.h5 파일 보기


BIN
model_3._checkpoint.weights.h5 파일 보기


BIN
model_4._checkpoint.weights.h5 파일 보기


BIN
model_noclass_v1_checkpoint.weights.h5 파일 보기


BIN
model_v1_0._checkpoint.weights.h5 파일 보기


BIN
model_v1_1._checkpoint.weights.h5 파일 보기


BIN
model_v1_2._checkpoint.weights.h5 파일 보기


BIN
model_v1_3._checkpoint.weights.h5 파일 보기


BIN
model_v1_4._checkpoint.weights.h5 파일 보기


BIN
paper/Adapt25_Paper_Template_updated_AKO_v2.docx 파일 보기


BIN
paper/Adapt25_Paper_Template_updated_AKO_v2_GZ.docx 파일 보기


BIN
paper/Adapt25_Paper_Template_updated_AKO_v3.docx 파일 보기


BIN
paper/fig2.png 파일 보기


BIN
paper/fig3.png 파일 보기


BIN
paper/fig4.png 파일 보기


BIN
paper/fig5.png 파일 보기


+ 73
- 15
v1.py 파일 보기

@@ -96,6 +96,7 @@ def normalize2(train,test):
96 96
     for i in range(NumFeatures):
97 97
         means.append(train[:,i].mean())
98 98
         stdevs.append(train[:,i].std())
99
+    print(means)
99 100
     return( (train-means)/stdevs, (test-means)/stdevs )
100 101
 
101 102
 dataTrainNorm=[]
@@ -126,7 +127,7 @@ def plotData():
126 127
 NumFilters=64
127 128
 KernelSize=7
128 129
 DropOut=0.2
129
-ThresholdFactor=0.9
130
+ThresholdFactor=1
130 131
 TIME_STEPS = 48  # This is a trade off among better performance (high) and better response delay (low)
131 132
 def create_sequences(values, time_steps=TIME_STEPS):
132 133
     output = []
@@ -236,9 +237,6 @@ for i in range(anomalies.shape[0]):
236 237
     #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
237 238
         anomalous_data_indices.append(i)
238 239
 
239
-#print(anomalous_data_indices)
240
-
241
-
242 240
 # Let's plot some features
243 241
 
244 242
 colorline=['violet','lightcoral','cyan','lime','grey']
@@ -264,7 +262,7 @@ def plotData3():
264 262
         init=end
265 263
         end+=(testRanges[1][1]-testRanges[1][0])
266 264
         for j in range(1,NumberOfFailures+1):
267
-            axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]],label="fail type "+str(j), color=colorline[j-1])
265
+            axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]],label="Fail type "+str(j), color=colorline[j-1])
268 266
             if j<NumberOfFailures:
269 267
                 init=end
270 268
                 end+=(testRanges[j+1][1]-testRanges[j+1][0])
@@ -274,7 +272,7 @@ def plotData3():
274 272
             if (k+TIME_STEPS)<x_test.shape[0]:
275 273
                 x.append(k+TIME_STEPS)
276 274
                 y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
277
-        axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="fail detection" )
275
+        axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="Fail detection" )
278 276
 
279 277
         if i==0:
280 278
             axes[i].legend(bbox_to_anchor=(0.9, 0.4))
@@ -291,7 +289,8 @@ def anomalyMetric(testList):  # first of list is non failure data
291 289
     # Specificity: true negative ratio given  data is OK: TN/(TN+FP)
292 290
     # Accuracy: Rate of correct predictions:  (TN+TP)/(TN+TP+FP+FN)
293 291
     # Precision: Rate of positive results:  TP/(TP+FP)  
294
-    # F-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
292
+    # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
293
+    # F2-score: predictive performance measure:  2*Specificity*Sensitity/(Specificity+Sensitity)
295 294
 
296 295
     x_test = create_sequences(testList[0])
297 296
     x_test_pred = model.predict(x_test)
@@ -326,7 +325,8 @@ def anomalyMetric(testList):  # first of list is non failure data
326 325
     Specificity=TN/(TN+FP)
327 326
     Accuracy=(TN+TP.sum())/(TN+TP.sum()+FP+FN.sum())
328 327
     GlobalPrecision=TP.sum()/(TP.sum()+FP)
329
-    FScore= 2*GlobalPrecision*GlobalSensitivity/(GlobalPrecision+GlobalSensitivity)
328
+    F1Score= 2*GlobalPrecision*GlobalSensitivity/(GlobalPrecision+GlobalSensitivity)
329
+    F2Score = 2*Specificity*GlobalSensitivity/(Specificity+GlobalSensitivity)
330 330
 
331 331
     print("Sensitivity: ",Sensitivity)
332 332
     print("Global Sensitivity: ",GlobalSensitivity)
@@ -334,10 +334,11 @@ def anomalyMetric(testList):  # first of list is non failure data
334 334
     print("Global Precision: ",GlobalPrecision)
335 335
     print("Specifity: ",Specificity)
336 336
     print("Accuracy: ",Accuracy)
337
-    print("FScore: ",FScore)
337
+    print("F1Score: ",F1Score)
338
+    print("F2Score: ",F2Score)
338 339
     print("FP: ",FP)
339 340
     #return Sensitivity+Specifity
340
-    return FScore
341
+    return (F1Score,F2Score)
341 342
 
342 343
 anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
343 344
 
@@ -350,19 +351,76 @@ def plotFScore():
350 351
     while tf<1.5:
351 352
         threshold=thresholdOrig*tf
352 353
         r=anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
353
-        res.append([tf,r])
354
+        res.append([tf,r[0],r[1]])
354 355
         tf+=0.05
355 356
 
356 357
     print(res)
357 358
     ar=np.array((res))
358 359
     plt.rcParams.update({'font.size': 16})
359 360
     fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 10), dpi=80, facecolor="w", edgecolor="k")
360
-    axes.plot(ar[:,0],ar[:,1],label="normal train",linewidth=4)
361
+    ln1=axes.plot(ar[:,0],ar[:,1],label="F1-Score",linewidth=4)
362
+    ax1=axes.twinx()
363
+    ln2=ax1.plot(ar[:,0],ar[:,2],label="F2-Score",linewidth=4,color='C3')
361 364
     axes.set_xlabel("Threshold factor")
362
-    axes.set_ylabel("F-Score")
363
-    plt.grid()
365
+    axes.set_ylabel("F1-Score")
366
+    ax1.set_ylabel("F2-Score")
367
+    lns = ln1+ln2
368
+    labs = [l.get_label() for l in lns]
369
+    axes.legend(lns, labs, loc=0)
370
+    axes.grid()
364 371
     plt.show()
365 372
 
366 373
 #plotFScore()
367
-plotData3()
374
+#plotData3()
375
+
376
+
377
+
378
+
379
+#  2nd scenario. Detect only anomaly.  Later, we will classiffy it
380
+# Test data=  testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
381
+#d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
382
+num=100
383
+d=np.vstack((dataTestNorm[0][0:num,:],dataTestNorm[1][0:num,:],dataTestNorm[0][num:2*num,:],dataTestNorm[2][70:70+num,:],dataTestNorm[0][2*num-90:3*num-90,:],dataTestNorm[3][50:num+50,:],dataTestNorm[0][150:150+num,:],dataTestNorm[4][0:num+TIME_STEPS,:]))
384
+
385
+x_test = create_sequences(d)
386
+x_test_pred = model.predict(x_test)
387
+test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
388
+
389
+
390
+anomalies = test_mae_loss > threshold
391
+anomalous_data_indices = []
392
+for i in range(anomalies.shape[0]):
393
+    if AtLeastOneTrue(anomalies[i]):
394
+    #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
395
+        anomalous_data_indices.append(i)
396
+
397
+def plotData4():
398
+    NumFeaturesToPlot=len(indexesToPlot)
399
+    plt.rcParams.update({'font.size': 16})
400
+    fig, axes = plt.subplots(
401
+        nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
402
+    )
403
+    for i in range(NumFeaturesToPlot):
404
+        for j in range(1,NumberOfFailures+1):
405
+            if j==1:
406
+                axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]],label="No fail", color='C0')
407
+            else:
408
+                axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]], color='C0')
409
+            axes[i].plot(range(j*2*num-num,j*2*num),x_test[j*2*num-num:j*2*num,0,indexesToPlot[i]],label="File type "+str(j),color=colorline[j-1])
410
+        x=[]
411
+        y=[]
412
+        for k in anomalous_data_indices:
413
+            if (k+TIME_STEPS)<x_test.shape[0]:
414
+                x.append(k+TIME_STEPS)
415
+                y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
416
+        axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="Fail detection" )
417
+
418
+        if i==0:
419
+            axes[i].legend(bbox_to_anchor=(0.9, 0.4))
420
+        axes[i].set_ylabel(features[indexesToPlot[i]])
421
+        axes[i].grid()
422
+    axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
423
+    plt.show()
424
+
368 425
 
426
+plotData4()

+ 334
- 102
v2.py 파일 보기

@@ -1,4 +1,7 @@
1 1
 # Csar Fdez, UdL, 2025
2
+# Changes from v1:   Normalization 
3
+# IN v1, each failure type has its own normalization pars (mean and stdevs)
4
+# In v2, mean and stdev is the same for all data
2 5
 import pandas as pd
3 6
 import matplotlib.pyplot as plt
4 7
 import datetime
@@ -8,10 +11,8 @@ import os.path
8 11
 import pickle
9 12
 from keras import layers
10 13
 from optparse import OptionParser
14
+import copy
11 15
 
12
-#   facility type 5. Mural cerrado de congelación (closed freezer). Set point at -18  (we will have two possible setpoints, -18 and -26)
13
-# This code only deals with a given failure type
14
-# Data for abnormal functioning corresponds to Condenser Fan failure
15 16
 
16 17
 parser = OptionParser()
17 18
 parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
@@ -19,178 +20,409 @@ parser.add_option("-t", "--train", dest="train", help="Trains the models (false)
19 20
 (options, args) = parser.parse_args()
20 21
 
21 22
 
22
-normal_datafiles_list=['2025-01-09_5_','2025-01-10_5_','2025-01-11_5_']
23
-anormal_datafiles_list=['2025-01-04_5_','2025-01-05_5_','2025-01-06_5_','2025-01-07_5_']
23
+# data files arrays. Index:
24
+# 0.  No failure
25
+# 1.  Blocked evaporator
26
+# 2.   Full Blocked condenser
27
+# 3.   Partial Blocked condenser
28
+# 4   Fan condenser not working
29
+# 5.  Open door
24 30
 
25
-# Features suggested by Xavier
26
-features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
27
-NumFeatures=len(features)
28 31
 
29
-df_list=[]
30
-for f in normal_datafiles_list:
31
-    #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
32
-    df1 = pd.read_csv('./data/'+f+'.csv')
33
-    df_list.append(df1)
32
+NumberOfFailures=4  # So far, we have only data for the first 4 types of failures
33
+datafiles=[]
34
+for i in range(NumberOfFailures+1):
35
+    datafiles.append([])
34 36
 
35
-df=pd.concat(df_list)
36
-datalength=df.shape[0]
37
-# subsampled to 5'  =  30 * 10"
38
-# We consider smaples every 5' because in production, we will only have data at this frequency
39
-subsamplingrate=30
40
-subsamplingrate=30
37
+# Next set of ddata corresponds to Freezer, SP=-26
38
+datafiles[0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_','2025-01-27_5_'] 
39
+datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] 
40
+datafiles[2]=['2024-12-18_5_','2024-12-19_5_'] 
41
+datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_'] 
42
+datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] 
43
+#datafiles[4]=[] 
41 44
 
45
+# Features suggested by Xavier
46
+# Care with 'tc s3' because on datafiles[0] is always nulll
47
+# Seems to be incoropored in new tests
42 48
 
43
-normaldataframe=df.iloc[range(0,datalength,subsamplingrate)][features]
44
-normaldataframe.reset_index(inplace=True,drop=True)
49
+features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
50
+#features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
45 51
 
52
+#features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']
53
+
54
+NumFeatures=len(features)
46 55
 
47 56
 df_list=[]
48
-for f in anormal_datafiles_list:
49
-    #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
50
-    df1 = pd.read_csv('./data/'+f+'.csv')
51
-    df_list.append(df1)
57
+for i in range(NumberOfFailures+1):
58
+    df_list.append([])
52 59
 
53
-df=pd.concat(df_list)
54
-datalength=df.shape[0]
55
-# subsampled to 5'  =  30 * 10"
56
-anormaldataframe=df.iloc[range(0,datalength,subsamplingrate)][features]
57
-anormaldataframe.reset_index(inplace=True,drop=True)
60
+for i in range(NumberOfFailures+1):
61
+    dftemp=[]
62
+    for f in datafiles[i]:
63
+        print("                 ", f)
64
+        #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
65
+        df1 = pd.read_csv('./data/'+f+'.csv')
66
+        dftemp.append(df1)
67
+    df_list[i]=pd.concat(dftemp)
58 68
 
59 69
 
60
-# Train data is first 2/3 of normaldata
61
-# Test data is: last 1/3 of normaldata + anormaldata + last 1/3 of normaldata
62
-dataTrain=normaldataframe.values[0:int(normaldataframe.shape[0]*2/3),:]
63
-dataTest=np.vstack((normaldataframe.values[int(normaldataframe.shape[0]*2/3)+1:,:],anormaldataframe.values, normaldataframe.values[int(normaldataframe.shape[0]*2/3)+1:,:] ))
70
+# subsampled to 5'  =  30 * 10"
71
+# We consider smaples every 5' because in production, we will only have data at this frequency
72
+subsamplingrate=30
64 73
 
74
+dataframe=[]
75
+for i in range(NumberOfFailures+1):
76
+    dataframe.append([])
77
+
78
+for i in range(NumberOfFailures+1):
79
+    datalength=df_list[i].shape[0]
80
+    dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
81
+    dataframe[i].reset_index(inplace=True,drop=True)
82
+    dataframe[i].dropna(inplace=True)
83
+
84
+
85
+# Train data is first 2/3 of data
86
+# Test data is: last 1/3 of data 
87
+dataTrain=[]
88
+dataTest=[]
89
+for i in range(NumberOfFailures+1):
90
+    dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
91
+    dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
92
+
93
+# Calculate means and stdev
94
+a=dataTrain[0]
95
+for i in range(1,NumberOfFailures+1):
96
+    a=np.vstack((a,dataTrain[i]))
97
+
98
+means=a.mean(axis=0) 
99
+stdevs=a.std(axis=0)
100
+def normalize2(train,test):
101
+    return( (train-means)/stdevs, (test-means)/stdevs )
102
+
103
+dataTrainNorm=[]
104
+dataTestNorm=[]
105
+for i in range(NumberOfFailures+1):
106
+    dataTrainNorm.append([])
107
+    dataTestNorm.append([])
108
+
109
+for i in range(NumberOfFailures+1):
110
+    (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
111
+
112
+def plotData():    
113
+    fig, axes = plt.subplots(
114
+        nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
115
+    )
116
+    for i in range(NumberOfFailures+1):
117
+        axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+",  feature 0")
118
+        axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+",  feature 1")
119
+    #axes[1].legend()
120
+    #axes[0].set_ylabel(features[0])
121
+    #axes[1].set_ylabel(features[1])
122
+    plt.show()
65 123
 
66
-def normalize2():
67
-    # merges train and test
68
-    means=[]
69
-    stdevs=[]
70
-    for i in range(NumFeatures):
71
-        means.append(dataTrain[:,i].mean())
72
-        stdevs.append(dataTrain[:,i].std())
73
-    return( (dataTrain-means)/stdevs, (dataTest-means)/stdevs )
124
+#plotData()
125
+#exit(0)
74 126
 
75
-(dataTrainNorm,dataTestNorm)=normalize2()
76 127
 
77
-TIME_STEPS = 24
128
+NumFilters=64
129
+KernelSize=7
130
+DropOut=0.2
131
+ThresholdFactor=1.4
132
+TIME_STEPS = 12 # This is a trade off among better performance (high) and better response delay (low)
78 133
 def create_sequences(values, time_steps=TIME_STEPS):
79 134
     output = []
80 135
     for i in range(len(values) - time_steps + 1):
81 136
         output.append(values[i : (i + time_steps)])
82 137
     return np.stack(output)
83 138
 
84
-x_train = create_sequences(dataTrainNorm)
139
+x_train=[]
140
+for i in range(NumberOfFailures+1):
141
+    x_train.append(create_sequences(dataTrainNorm[i]))
142
+
85 143
 
144
+# Reused code from v1_multifailure for only one model. No classification
145
+#for i in range(NumberOfFailures+1):
86 146
 model = keras.Sequential(
87 147
     [
88
-        layers.Input(shape=(x_train.shape[1], x_train.shape[2])),
148
+        layers.Input(shape=(x_train[0].shape[1], x_train[0].shape[2])),
89 149
         layers.Conv1D(
90
-            filters=64,
91
-            kernel_size=7,
150
+            filters=NumFilters,
151
+            kernel_size=KernelSize,
92 152
             padding="same",
93 153
             strides=2,
94 154
             activation="relu",
95 155
         ),
96
-        layers.Dropout(rate=0.2),
156
+        layers.Dropout(rate=DropOut),
97 157
         layers.Conv1D(
98
-            filters=32,
99
-            kernel_size=7,
158
+            filters=int(NumFilters/2),
159
+            kernel_size=KernelSize,
100 160
             padding="same",
101 161
             strides=2,
102 162
             activation="relu",
103 163
         ),
104 164
         layers.Conv1DTranspose(
105
-            filters=32,
106
-            kernel_size=7,
165
+            filters=int(NumFilters/2),
166
+            kernel_size=KernelSize,
107 167
             padding="same",
108 168
             strides=2,
109 169
             activation="relu",
110 170
         ),
111
-        layers.Dropout(rate=0.2),
171
+        layers.Dropout(rate=DropOut),
112 172
         layers.Conv1DTranspose(
113
-            filters=64,
114
-            kernel_size=7,
173
+            filters=NumFilters,
174
+            kernel_size=KernelSize,
115 175
             padding="same",
116 176
             strides=2,
117 177
             activation="relu",
118 178
         ),
119
-        layers.Conv1DTranspose(filters=x_train.shape[2], kernel_size=7, padding="same"),
179
+        layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"),
120 180
     ]
121 181
 )
122 182
 model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
123 183
 model.summary()
124
-
125
-path_checkpoint = "model._checkpoint.weights.h5"
126
-es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)
127
-
128
-modelckpt_callback = keras.callbacks.ModelCheckpoint(
129
-    monitor="val_loss",
130
-    filepath=path_checkpoint,
131
-    verbose=1,
132
-    save_weights_only=True,
133
-    save_best_only=True,
134
-)
184
+path_checkpoint="model_noclass_v2_checkpoint.weights.h5"
185
+es_callback=keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)
186
+modelckpt_callback=keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True,)
135 187
 
136 188
 
137 189
 if options.train:
138
-    history = model.fit(
139
-        x_train,
140
-        x_train,
141
-        epochs=400,
142
-        batch_size=128,
143
-        validation_split=0.3,
144
-        callbacks=[  es_callback, modelckpt_callback      ],
145
-    )
146
-
147
-    plt.plot(history.history["loss"], label="Training Loss")
148
-    plt.plot(history.history["val_loss"], label="Validation Loss")
149
-    plt.legend()
150
-    plt.show()
190
+    history=model.fit( x_train[0], x_train[0], epochs=400, batch_size=128, validation_split=0.3, callbacks=[  es_callback, modelckpt_callback      ],)
151 191
 else:
152 192
     model.load_weights(path_checkpoint)
153 193
 
154 194
 
155
-x_train_pred = model.predict(x_train)
156
-train_mae_loss = np.mean(np.abs(x_train_pred - x_train), axis=1)
157
-threshold = np.max(train_mae_loss,axis=0)
195
+x_train_pred=model.predict(x_train[0])
196
+train_mae_loss=np.mean(np.abs(x_train_pred - x_train[0]), axis=1)
197
+threshold=np.max(train_mae_loss,axis=0)
198
+thresholdOrig=copy.deepcopy(threshold)
158 199
 
159 200
 print("Threshold : ",threshold)
160
-threshold=threshold*2
201
+threshold=threshold*ThresholdFactor
161 202
 # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives
162 203
 
163
-x_test = create_sequences(dataTestNorm)
204
+
205
+#  1st scenario. Detect only anomaly.  Later, we will classiffy it
206
+# Test data=  testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
207
+#d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
208
+d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]))
209
+
210
+x_test = create_sequences(d)
164 211
 x_test_pred = model.predict(x_test)
165 212
 test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
166 213
 
214
+
215
+# Define ranges for plotting in different colors
216
+testRanges=[]
217
+r=dataTestNorm[0].shape[0]
218
+testRanges.append([0,r])
219
+for i in range(1,NumberOfFailures+1):
220
+    rnext=r+dataTestNorm[i].shape[0]
221
+    testRanges.append([r,rnext] )
222
+    r=rnext
223
+
224
+# Drop the last TIME_STEPS for plotting
225
+testRanges[NumberOfFailures][1]=testRanges[NumberOfFailures][1]-TIME_STEPS
226
+
227
+
228
+def AtLeastOneTrue(x):
229
+    for i in range(NumFeatures):
230
+        if x[i]:
231
+            return True
232
+    return False
233
+
167 234
 anomalies = test_mae_loss > threshold
168 235
 anomalous_data_indices = []
169 236
 for i in range(anomalies.shape[0]):
170
-    if anomalies[i][0] or anomalies[i][1]:
237
+    if AtLeastOneTrue(anomalies[i]):
238
+    #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
171 239
         anomalous_data_indices.append(i)
172 240
 
173
-#print(anomalous_data_indices)
241
+# Let's plot some features
174 242
 
243
+colorline=['violet','lightcoral','cyan','lime','grey']
244
+colordot=['darkviolet','red','blue','green','black']
175 245
 
176
-# Let's plot only a couple of features
177
-def plotData2():    
246
+#featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii']
247
+featuresToPlot=features
248
+
249
+indexesToPlot=[]
250
+for i in featuresToPlot:
251
+    indexesToPlot.append(features.index(i))
252
+
253
+def plotData3():
254
+    NumFeaturesToPlot=len(indexesToPlot)
255
+    plt.rcParams.update({'font.size': 16})
178 256
     fig, axes = plt.subplots(
179
-        nrows=2, ncols=1, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
257
+        nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
180 258
     )
181
-    axes[0].plot(range(len(x_train)),x_train[:,0,0],label="normal")
182
-    axes[0].plot(range(len(x_train),len(x_train)+len(x_test)),x_test[:,0,0],label="abnormal")
183
-    axes[0].plot(len(x_train)+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,0],color='red',marker='.',linewidth=0,label="abnormal detection")
184
-    axes[0].legend()
185
-    axes[1].plot(range(len(x_train)),x_train[:,0,1],label="normal")
186
-    axes[1].plot(range(len(x_train),len(x_train)+len(x_test)),x_test[:,0,1],label="abnormal")
187
-    axes[1].plot(len(x_train)+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,1],color='red',marker='.',linewidth=0,label="abnormal detection")
188
-    axes[1].legend()
189
-    axes[0].set_ylabel(features[0])
190
-    axes[1].set_ylabel(features[1])
259
+    for i in range(NumFeaturesToPlot):
260
+        init=0
261
+        end=testRanges[0][1]
262
+        axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]],label="No fail")
263
+        init=end
264
+        end+=(testRanges[1][1]-testRanges[1][0])
265
+        for j in range(1,NumberOfFailures+1):
266
+            axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]],label="Fail type "+str(j), color=colorline[j-1])
267
+            if j<NumberOfFailures:
268
+                init=end
269
+                end+=(testRanges[j+1][1]-testRanges[j+1][0])
270
+        x=[]
271
+        y=[]
272
+        for k in anomalous_data_indices:
273
+            if (k+TIME_STEPS)<x_test.shape[0]:
274
+                x.append(k+TIME_STEPS)
275
+                y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
276
+        axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="Fail detection" )
277
+
278
+        if i==0:
279
+            axes[i].legend(bbox_to_anchor=(0.9, 0.4))
280
+        axes[i].set_ylabel(features[indexesToPlot[i]])
281
+        axes[i].grid()
282
+    axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
191 283
     plt.show()
192 284
 
193
-plotData2()
194 285
 
286
+def anomalyMetric(testList):  # first of list is non failure data
287
+    # FP, TP: false/true positive
288
+    # TN, FN: true/false negative
289
+    # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
290
+    # Specificity: true negative ratio given  data is OK: TN/(TN+FP)
291
+    # Accuracy: Rate of correct predictions:  (TN+TP)/(TN+TP+FP+FN)
292
+    # Precision: Rate of positive results:  TP/(TP+FP)  
293
+    # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
294
+    # F2-score: predictive performance measure:  2*Specificity*Sensitity/(Specificity+Sensitity)
295
+
296
+    x_test = create_sequences(testList[0])
297
+    x_test_pred = model.predict(x_test)
298
+    test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
299
+    anomalies = test_mae_loss > threshold
300
+    count=0
301
+    for i in range(anomalies.shape[0]):
302
+        if AtLeastOneTrue(anomalies[i]):
303
+            count+=1
304
+    FP=count
305
+    TN=anomalies.shape[0]-count
306
+    count=0
307
+    TP=np.zeros((NumberOfFailures))
308
+    FN=np.zeros((NumberOfFailures))
309
+    Sensitivity=np.zeros((NumberOfFailures))
310
+    Precision=np.zeros((NumberOfFailures))
311
+    for i in range(1,len(testList)):
312
+        x_test = create_sequences(testList[i])
313
+        x_test_pred = model.predict(x_test)
314
+        test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
315
+        anomalies = test_mae_loss > threshold
316
+        count=0
317
+        for j in range(anomalies.shape[0]):
318
+            if AtLeastOneTrue(anomalies[j]):
319
+                count+=1
320
+        TP[i-1] = count
321
+        FN[i-1] = anomalies.shape[0]-count
322
+        Sensitivity[i-1]=TP[i-1]/(TP[i-1]+FN[i-1])
323
+        Precision[i-1]=TP[i-1]/(TP[i-1]+FP)
324
+
325
+    GlobalSensitivity=TP.sum()/(TP.sum()+FN.sum())
326
+    Specificity=TN/(TN+FP)
327
+    Accuracy=(TN+TP.sum())/(TN+TP.sum()+FP+FN.sum())
328
+    GlobalPrecision=TP.sum()/(TP.sum()+FP)
329
+    F1Score= 2*GlobalPrecision*GlobalSensitivity/(GlobalPrecision+GlobalSensitivity)
330
+    F2Score = 2*Specificity*GlobalSensitivity/(Specificity+GlobalSensitivity)
331
+
332
+    print("Sensitivity: ",Sensitivity)
333
+    print("Global Sensitivity: ",GlobalSensitivity)
334
+    print("Precision: ",Precision)
335
+    print("Global Precision: ",GlobalPrecision)
336
+    print("Specifity: ",Specificity)
337
+    print("Accuracy: ",Accuracy)
338
+    print("F1Score: ",F1Score)
339
+    print("F2Score: ",F2Score)
340
+    print("FP: ",FP)
341
+    #return Sensitivity+Specifity
342
+    return (F1Score,F2Score)
343
+
344
+anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
345
+
346
+
347
+def plotFScore():
348
+    global threshold
349
+    res=[]
350
+    # plots FSCroe as a function of Threshold  Factor
351
+    tf=0.3
352
+    while tf<1.5:
353
+        threshold=thresholdOrig*tf
354
+        r=anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
355
+        res.append([tf,r[0],r[1]])
356
+        tf+=0.05
357
+
358
+    print(res)
359
+    ar=np.array((res))
360
+    plt.rcParams.update({'font.size': 16})
361
+    fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 10), dpi=80, facecolor="w", edgecolor="k")
362
+    ln1=axes.plot(ar[:,0],ar[:,1],label="F1-Score",linewidth=4)
363
+    ax1=axes.twinx()
364
+    ln2=ax1.plot(ar[:,0],ar[:,2],label="F2-Score",linewidth=4,color='C3')
365
+    axes.set_xlabel("Threshold factor")
366
+    axes.set_ylabel("F1-Score")
367
+    ax1.set_ylabel("F2-Score")
368
+    lns = ln1+ln2
369
+    labs = [l.get_label() for l in lns]
370
+    axes.legend(lns, labs, loc=0)
371
+    axes.grid()
372
+    plt.show()
373
+
374
+#plotFScore()
375
+plotData3()
376
+
377
+exit(0)
378
+
379
+
380
+
381
+#  2nd scenario. Detect only anomaly.  Later, we will classiffy it
382
+# Test data=  testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
383
+#d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
384
+num=100
385
+d=np.vstack((dataTestNorm[0][0:num,:],dataTestNorm[1][0:num,:],dataTestNorm[0][num:2*num,:],dataTestNorm[2][70:70+num,:],dataTestNorm[0][2*num-90:3*num-90,:],dataTestNorm[3][50:num+50,:],dataTestNorm[0][150:150+num,:],dataTestNorm[4][0:num+TIME_STEPS,:]))
386
+
387
+x_test = create_sequences(d)
388
+x_test_pred = model.predict(x_test)
389
+test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
390
+
391
+
392
+anomalies = test_mae_loss > threshold
393
+anomalous_data_indices = []
394
+for i in range(anomalies.shape[0]):
395
+    if AtLeastOneTrue(anomalies[i]):
396
+    #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
397
+        anomalous_data_indices.append(i)
398
+
399
+def plotData4():
400
+    NumFeaturesToPlot=len(indexesToPlot)
401
+    plt.rcParams.update({'font.size': 16})
402
+    fig, axes = plt.subplots(
403
+        nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
404
+    )
405
+    for i in range(NumFeaturesToPlot):
406
+        for j in range(1,NumberOfFailures+1):
407
+            if j==1:
408
+                axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]],label="No fail", color='C0')
409
+            else:
410
+                axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]], color='C0')
411
+            axes[i].plot(range(j*2*num-num,j*2*num),x_test[j*2*num-num:j*2*num,0,indexesToPlot[i]],label="File type "+str(j),color=colorline[j-1])
412
+        x=[]
413
+        y=[]
414
+        for k in anomalous_data_indices:
415
+            if (k+TIME_STEPS)<x_test.shape[0]:
416
+                x.append(k+TIME_STEPS)
417
+                y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
418
+        axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="Fail detection" )
419
+
420
+        if i==0:
421
+            axes[i].legend(bbox_to_anchor=(0.9, 0.4))
422
+        axes[i].set_ylabel(features[indexesToPlot[i]])
423
+        axes[i].grid()
424
+    axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
425
+    plt.show()
195 426
 
196 427
 
428
+plotData4()

Powered by TurnKey Linux.