cesar 1 day ago
parent
commit
52e79d4116

BIN
Figure_1.png View File


+ 7851
- 0
data/2024-12-16_5_.csv
File diff suppressed because it is too large
View File


+ 7851
- 0
data/2024-12-17_5_.csv
File diff suppressed because it is too large
View File


+ 7826
- 0
data/2024-12-27_5_.csv
File diff suppressed because it is too large
View File


+ 7831
- 0
data/2025-01-26_3_.csv
File diff suppressed because it is too large
View File


+ 1200
- 0
data/2025-01-26_5_.csv
File diff suppressed because it is too large
View File


+ 7827
- 0
data/2025-01-27_3_.csv
File diff suppressed because it is too large
View File


+ 7828
- 0
data/2025-01-27_5_.csv
File diff suppressed because it is too large
View File


+ 7832
- 0
data/2025-01-28_3_.csv
File diff suppressed because it is too large
View File


+ 7832
- 0
data/2025-01-28_5_.csv
File diff suppressed because it is too large
View File


BIN
model_0._checkpoint.weights.h5 View File


BIN
model_1._checkpoint.weights.h5 View File


BIN
model_2._checkpoint.weights.h5 View File


BIN
model_3._checkpoint.weights.h5 View File


BIN
model_4._checkpoint.weights.h5 View File


BIN
model_noclass_v1_checkpoint.weights.h5 View File


BIN
model_v1_0._checkpoint.weights.h5 View File


BIN
model_v1_1._checkpoint.weights.h5 View File


BIN
model_v1_2._checkpoint.weights.h5 View File


BIN
model_v1_3._checkpoint.weights.h5 View File


BIN
model_v1_4._checkpoint.weights.h5 View File


BIN
paper/Adapt25_Paper_Template_updated_AKO_v2.docx View File


BIN
paper/Adapt25_Paper_Template_updated_AKO_v2_GZ.docx View File


BIN
paper/Adapt25_Paper_Template_updated_AKO_v3.docx View File


BIN
paper/fig2.png View File


BIN
paper/fig3.png View File


BIN
paper/fig4.png View File


BIN
paper/fig5.png View File


+ 73
- 15
v1.py View File

96
     for i in range(NumFeatures):
96
     for i in range(NumFeatures):
97
         means.append(train[:,i].mean())
97
         means.append(train[:,i].mean())
98
         stdevs.append(train[:,i].std())
98
         stdevs.append(train[:,i].std())
99
+    print(means)
99
     return( (train-means)/stdevs, (test-means)/stdevs )
100
     return( (train-means)/stdevs, (test-means)/stdevs )
100
 
101
 
101
 dataTrainNorm=[]
102
 dataTrainNorm=[]
126
 NumFilters=64
127
 NumFilters=64
127
 KernelSize=7
128
 KernelSize=7
128
 DropOut=0.2
129
 DropOut=0.2
129
-ThresholdFactor=0.9
130
+ThresholdFactor=1
130
 TIME_STEPS = 48  # This is a trade off among better performance (high) and better response delay (low)
131
 TIME_STEPS = 48  # This is a trade off among better performance (high) and better response delay (low)
131
 def create_sequences(values, time_steps=TIME_STEPS):
132
 def create_sequences(values, time_steps=TIME_STEPS):
132
     output = []
133
     output = []
236
     #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
237
     #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
237
         anomalous_data_indices.append(i)
238
         anomalous_data_indices.append(i)
238
 
239
 
239
-#print(anomalous_data_indices)
240
-
241
-
242
 # Let's plot some features
240
 # Let's plot some features
243
 
241
 
244
 colorline=['violet','lightcoral','cyan','lime','grey']
242
 colorline=['violet','lightcoral','cyan','lime','grey']
264
         init=end
262
         init=end
265
         end+=(testRanges[1][1]-testRanges[1][0])
263
         end+=(testRanges[1][1]-testRanges[1][0])
266
         for j in range(1,NumberOfFailures+1):
264
         for j in range(1,NumberOfFailures+1):
267
-            axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]],label="fail type "+str(j), color=colorline[j-1])
265
+            axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]],label="Fail type "+str(j), color=colorline[j-1])
268
             if j<NumberOfFailures:
266
             if j<NumberOfFailures:
269
                 init=end
267
                 init=end
270
                 end+=(testRanges[j+1][1]-testRanges[j+1][0])
268
                 end+=(testRanges[j+1][1]-testRanges[j+1][0])
274
             if (k+TIME_STEPS)<x_test.shape[0]:
272
             if (k+TIME_STEPS)<x_test.shape[0]:
275
                 x.append(k+TIME_STEPS)
273
                 x.append(k+TIME_STEPS)
276
                 y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
274
                 y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
277
-        axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="fail detection" )
275
+        axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="Fail detection" )
278
 
276
 
279
         if i==0:
277
         if i==0:
280
             axes[i].legend(bbox_to_anchor=(0.9, 0.4))
278
             axes[i].legend(bbox_to_anchor=(0.9, 0.4))
291
     # Specificity: true negative ratio given  data is OK: TN/(TN+FP)
289
     # Specificity: true negative ratio given  data is OK: TN/(TN+FP)
292
     # Accuracy: Rate of correct predictions:  (TN+TP)/(TN+TP+FP+FN)
290
     # Accuracy: Rate of correct predictions:  (TN+TP)/(TN+TP+FP+FN)
293
     # Precision: Rate of positive results:  TP/(TP+FP)  
291
     # Precision: Rate of positive results:  TP/(TP+FP)  
294
-    # F-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
292
+    # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
293
+    # F2-score: predictive performance measure:  2*Specificity*Sensitity/(Specificity+Sensitity)
295
 
294
 
296
     x_test = create_sequences(testList[0])
295
     x_test = create_sequences(testList[0])
297
     x_test_pred = model.predict(x_test)
296
     x_test_pred = model.predict(x_test)
326
     Specificity=TN/(TN+FP)
325
     Specificity=TN/(TN+FP)
327
     Accuracy=(TN+TP.sum())/(TN+TP.sum()+FP+FN.sum())
326
     Accuracy=(TN+TP.sum())/(TN+TP.sum()+FP+FN.sum())
328
     GlobalPrecision=TP.sum()/(TP.sum()+FP)
327
     GlobalPrecision=TP.sum()/(TP.sum()+FP)
329
-    FScore= 2*GlobalPrecision*GlobalSensitivity/(GlobalPrecision+GlobalSensitivity)
328
+    F1Score= 2*GlobalPrecision*GlobalSensitivity/(GlobalPrecision+GlobalSensitivity)
329
+    F2Score = 2*Specificity*GlobalSensitivity/(Specificity+GlobalSensitivity)
330
 
330
 
331
     print("Sensitivity: ",Sensitivity)
331
     print("Sensitivity: ",Sensitivity)
332
     print("Global Sensitivity: ",GlobalSensitivity)
332
     print("Global Sensitivity: ",GlobalSensitivity)
334
     print("Global Precision: ",GlobalPrecision)
334
     print("Global Precision: ",GlobalPrecision)
335
     print("Specifity: ",Specificity)
335
     print("Specifity: ",Specificity)
336
     print("Accuracy: ",Accuracy)
336
     print("Accuracy: ",Accuracy)
337
-    print("FScore: ",FScore)
337
+    print("F1Score: ",F1Score)
338
+    print("F2Score: ",F2Score)
338
     print("FP: ",FP)
339
     print("FP: ",FP)
339
     #return Sensitivity+Specifity
340
     #return Sensitivity+Specifity
340
-    return FScore
341
+    return (F1Score,F2Score)
341
 
342
 
342
 anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
343
 anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
343
 
344
 
350
     while tf<1.5:
351
     while tf<1.5:
351
         threshold=thresholdOrig*tf
352
         threshold=thresholdOrig*tf
352
         r=anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
353
         r=anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
353
-        res.append([tf,r])
354
+        res.append([tf,r[0],r[1]])
354
         tf+=0.05
355
         tf+=0.05
355
 
356
 
356
     print(res)
357
     print(res)
357
     ar=np.array((res))
358
     ar=np.array((res))
358
     plt.rcParams.update({'font.size': 16})
359
     plt.rcParams.update({'font.size': 16})
359
     fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 10), dpi=80, facecolor="w", edgecolor="k")
360
     fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 10), dpi=80, facecolor="w", edgecolor="k")
360
-    axes.plot(ar[:,0],ar[:,1],label="normal train",linewidth=4)
361
+    ln1=axes.plot(ar[:,0],ar[:,1],label="F1-Score",linewidth=4)
362
+    ax1=axes.twinx()
363
+    ln2=ax1.plot(ar[:,0],ar[:,2],label="F2-Score",linewidth=4,color='C3')
361
     axes.set_xlabel("Threshold factor")
364
     axes.set_xlabel("Threshold factor")
362
-    axes.set_ylabel("F-Score")
363
-    plt.grid()
365
+    axes.set_ylabel("F1-Score")
366
+    ax1.set_ylabel("F2-Score")
367
+    lns = ln1+ln2
368
+    labs = [l.get_label() for l in lns]
369
+    axes.legend(lns, labs, loc=0)
370
+    axes.grid()
364
     plt.show()
371
     plt.show()
365
 
372
 
366
 #plotFScore()
373
 #plotFScore()
367
-plotData3()
374
+#plotData3()
375
+
376
+
377
+
378
+
379
+#  2nd scenario. Detect only anomaly.  Later, we will classiffy it
380
+# Test data=  testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
381
+#d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
382
+num=100
383
+d=np.vstack((dataTestNorm[0][0:num,:],dataTestNorm[1][0:num,:],dataTestNorm[0][num:2*num,:],dataTestNorm[2][70:70+num,:],dataTestNorm[0][2*num-90:3*num-90,:],dataTestNorm[3][50:num+50,:],dataTestNorm[0][150:150+num,:],dataTestNorm[4][0:num+TIME_STEPS,:]))
384
+
385
+x_test = create_sequences(d)
386
+x_test_pred = model.predict(x_test)
387
+test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
388
+
389
+
390
+anomalies = test_mae_loss > threshold
391
+anomalous_data_indices = []
392
+for i in range(anomalies.shape[0]):
393
+    if AtLeastOneTrue(anomalies[i]):
394
+    #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
395
+        anomalous_data_indices.append(i)
396
+
397
+def plotData4():
398
+    NumFeaturesToPlot=len(indexesToPlot)
399
+    plt.rcParams.update({'font.size': 16})
400
+    fig, axes = plt.subplots(
401
+        nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
402
+    )
403
+    for i in range(NumFeaturesToPlot):
404
+        for j in range(1,NumberOfFailures+1):
405
+            if j==1:
406
+                axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]],label="No fail", color='C0')
407
+            else:
408
+                axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]], color='C0')
409
+            axes[i].plot(range(j*2*num-num,j*2*num),x_test[j*2*num-num:j*2*num,0,indexesToPlot[i]],label="File type "+str(j),color=colorline[j-1])
410
+        x=[]
411
+        y=[]
412
+        for k in anomalous_data_indices:
413
+            if (k+TIME_STEPS)<x_test.shape[0]:
414
+                x.append(k+TIME_STEPS)
415
+                y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
416
+        axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="Fail detection" )
417
+
418
+        if i==0:
419
+            axes[i].legend(bbox_to_anchor=(0.9, 0.4))
420
+        axes[i].set_ylabel(features[indexesToPlot[i]])
421
+        axes[i].grid()
422
+    axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
423
+    plt.show()
424
+
368
 
425
 
426
+plotData4()

+ 334
- 102
v2.py View File

1
 # Csar Fdez, UdL, 2025
1
 # Csar Fdez, UdL, 2025
2
+# Changes from v1:   Normalization 
3
+# IN v1, each failure type has its own normalization pars (mean and stdevs)
4
+# In v2, mean and stdev is the same for all data
2
 import pandas as pd
5
 import pandas as pd
3
 import matplotlib.pyplot as plt
6
 import matplotlib.pyplot as plt
4
 import datetime
7
 import datetime
8
 import pickle
11
 import pickle
9
 from keras import layers
12
 from keras import layers
10
 from optparse import OptionParser
13
 from optparse import OptionParser
14
+import copy
11
 
15
 
12
-#   facility type 5. Mural cerrado de congelación (closed freezer). Set point at -18  (we will have two possible setpoints, -18 and -26)
13
-# This code only deals with a given failure type
14
-# Data for abnormal functioning corresponds to Condenser Fan failure
15
 
16
 
16
 parser = OptionParser()
17
 parser = OptionParser()
17
 parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
18
 parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
19
 (options, args) = parser.parse_args()
20
 (options, args) = parser.parse_args()
20
 
21
 
21
 
22
 
22
-normal_datafiles_list=['2025-01-09_5_','2025-01-10_5_','2025-01-11_5_']
23
-anormal_datafiles_list=['2025-01-04_5_','2025-01-05_5_','2025-01-06_5_','2025-01-07_5_']
23
+# data files arrays. Index:
24
+# 0.  No failure
25
+# 1.  Blocked evaporator
26
+# 2.   Full Blocked condenser
27
+# 3.   Partial Blocked condenser
28
+# 4   Fan condenser not working
29
+# 5.  Open door
24
 
30
 
25
-# Features suggested by Xavier
26
-features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
27
-NumFeatures=len(features)
28
 
31
 
29
-df_list=[]
30
-for f in normal_datafiles_list:
31
-    #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
32
-    df1 = pd.read_csv('./data/'+f+'.csv')
33
-    df_list.append(df1)
32
+NumberOfFailures=4  # So far, we have only data for the first 4 types of failures
33
+datafiles=[]
34
+for i in range(NumberOfFailures+1):
35
+    datafiles.append([])
34
 
36
 
35
-df=pd.concat(df_list)
36
-datalength=df.shape[0]
37
-# subsampled to 5'  =  30 * 10"
38
-# We consider smaples every 5' because in production, we will only have data at this frequency
39
-subsamplingrate=30
40
-subsamplingrate=30
37
+# Next set of ddata corresponds to Freezer, SP=-26
38
+datafiles[0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_','2025-01-27_5_'] 
39
+datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] 
40
+datafiles[2]=['2024-12-18_5_','2024-12-19_5_'] 
41
+datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_'] 
42
+datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] 
43
+#datafiles[4]=[] 
41
 
44
 
45
+# Features suggested by Xavier
46
+# Care with 'tc s3' because on datafiles[0] is always nulll
47
+# Seems to be incoropored in new tests
42
 
48
 
43
-normaldataframe=df.iloc[range(0,datalength,subsamplingrate)][features]
44
-normaldataframe.reset_index(inplace=True,drop=True)
49
+features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
50
+#features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
45
 
51
 
52
+#features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']
53
+
54
+NumFeatures=len(features)
46
 
55
 
47
 df_list=[]
56
 df_list=[]
48
-for f in anormal_datafiles_list:
49
-    #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
50
-    df1 = pd.read_csv('./data/'+f+'.csv')
51
-    df_list.append(df1)
57
+for i in range(NumberOfFailures+1):
58
+    df_list.append([])
52
 
59
 
53
-df=pd.concat(df_list)
54
-datalength=df.shape[0]
55
-# subsampled to 5'  =  30 * 10"
56
-anormaldataframe=df.iloc[range(0,datalength,subsamplingrate)][features]
57
-anormaldataframe.reset_index(inplace=True,drop=True)
60
+for i in range(NumberOfFailures+1):
61
+    dftemp=[]
62
+    for f in datafiles[i]:
63
+        print("                 ", f)
64
+        #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
65
+        df1 = pd.read_csv('./data/'+f+'.csv')
66
+        dftemp.append(df1)
67
+    df_list[i]=pd.concat(dftemp)
58
 
68
 
59
 
69
 
60
-# Train data is first 2/3 of normaldata
61
-# Test data is: last 1/3 of normaldata + anormaldata + last 1/3 of normaldata
62
-dataTrain=normaldataframe.values[0:int(normaldataframe.shape[0]*2/3),:]
63
-dataTest=np.vstack((normaldataframe.values[int(normaldataframe.shape[0]*2/3)+1:,:],anormaldataframe.values, normaldataframe.values[int(normaldataframe.shape[0]*2/3)+1:,:] ))
70
+# subsampled to 5'  =  30 * 10"
71
+# We consider smaples every 5' because in production, we will only have data at this frequency
72
+subsamplingrate=30
64
 
73
 
74
+dataframe=[]
75
+for i in range(NumberOfFailures+1):
76
+    dataframe.append([])
77
+
78
+for i in range(NumberOfFailures+1):
79
+    datalength=df_list[i].shape[0]
80
+    dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
81
+    dataframe[i].reset_index(inplace=True,drop=True)
82
+    dataframe[i].dropna(inplace=True)
83
+
84
+
85
+# Train data is first 2/3 of data
86
+# Test data is: last 1/3 of data 
87
+dataTrain=[]
88
+dataTest=[]
89
+for i in range(NumberOfFailures+1):
90
+    dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
91
+    dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
92
+
93
+# Calculate means and stdev
94
+a=dataTrain[0]
95
+for i in range(1,NumberOfFailures+1):
96
+    a=np.vstack((a,dataTrain[i]))
97
+
98
+means=a.mean(axis=0) 
99
+stdevs=a.std(axis=0)
100
+def normalize2(train,test):
101
+    return( (train-means)/stdevs, (test-means)/stdevs )
102
+
103
+dataTrainNorm=[]
104
+dataTestNorm=[]
105
+for i in range(NumberOfFailures+1):
106
+    dataTrainNorm.append([])
107
+    dataTestNorm.append([])
108
+
109
+for i in range(NumberOfFailures+1):
110
+    (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
111
+
112
+def plotData():    
113
+    fig, axes = plt.subplots(
114
+        nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
115
+    )
116
+    for i in range(NumberOfFailures+1):
117
+        axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+",  feature 0")
118
+        axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+",  feature 1")
119
+    #axes[1].legend()
120
+    #axes[0].set_ylabel(features[0])
121
+    #axes[1].set_ylabel(features[1])
122
+    plt.show()
65
 
123
 
66
-def normalize2():
67
-    # merges train and test
68
-    means=[]
69
-    stdevs=[]
70
-    for i in range(NumFeatures):
71
-        means.append(dataTrain[:,i].mean())
72
-        stdevs.append(dataTrain[:,i].std())
73
-    return( (dataTrain-means)/stdevs, (dataTest-means)/stdevs )
124
+#plotData()
125
+#exit(0)
74
 
126
 
75
-(dataTrainNorm,dataTestNorm)=normalize2()
76
 
127
 
77
-TIME_STEPS = 24
128
+NumFilters=64
129
+KernelSize=7
130
+DropOut=0.2
131
+ThresholdFactor=1.4
132
+TIME_STEPS = 12 # This is a trade off among better performance (high) and better response delay (low)
78
 def create_sequences(values, time_steps=TIME_STEPS):
133
 def create_sequences(values, time_steps=TIME_STEPS):
79
     output = []
134
     output = []
80
     for i in range(len(values) - time_steps + 1):
135
     for i in range(len(values) - time_steps + 1):
81
         output.append(values[i : (i + time_steps)])
136
         output.append(values[i : (i + time_steps)])
82
     return np.stack(output)
137
     return np.stack(output)
83
 
138
 
84
-x_train = create_sequences(dataTrainNorm)
139
+x_train=[]
140
+for i in range(NumberOfFailures+1):
141
+    x_train.append(create_sequences(dataTrainNorm[i]))
142
+
85
 
143
 
144
+# Reused code from v1_multifailure for only one model. No classification
145
+#for i in range(NumberOfFailures+1):
86
 model = keras.Sequential(
146
 model = keras.Sequential(
87
     [
147
     [
88
-        layers.Input(shape=(x_train.shape[1], x_train.shape[2])),
148
+        layers.Input(shape=(x_train[0].shape[1], x_train[0].shape[2])),
89
         layers.Conv1D(
149
         layers.Conv1D(
90
-            filters=64,
91
-            kernel_size=7,
150
+            filters=NumFilters,
151
+            kernel_size=KernelSize,
92
             padding="same",
152
             padding="same",
93
             strides=2,
153
             strides=2,
94
             activation="relu",
154
             activation="relu",
95
         ),
155
         ),
96
-        layers.Dropout(rate=0.2),
156
+        layers.Dropout(rate=DropOut),
97
         layers.Conv1D(
157
         layers.Conv1D(
98
-            filters=32,
99
-            kernel_size=7,
158
+            filters=int(NumFilters/2),
159
+            kernel_size=KernelSize,
100
             padding="same",
160
             padding="same",
101
             strides=2,
161
             strides=2,
102
             activation="relu",
162
             activation="relu",
103
         ),
163
         ),
104
         layers.Conv1DTranspose(
164
         layers.Conv1DTranspose(
105
-            filters=32,
106
-            kernel_size=7,
165
+            filters=int(NumFilters/2),
166
+            kernel_size=KernelSize,
107
             padding="same",
167
             padding="same",
108
             strides=2,
168
             strides=2,
109
             activation="relu",
169
             activation="relu",
110
         ),
170
         ),
111
-        layers.Dropout(rate=0.2),
171
+        layers.Dropout(rate=DropOut),
112
         layers.Conv1DTranspose(
172
         layers.Conv1DTranspose(
113
-            filters=64,
114
-            kernel_size=7,
173
+            filters=NumFilters,
174
+            kernel_size=KernelSize,
115
             padding="same",
175
             padding="same",
116
             strides=2,
176
             strides=2,
117
             activation="relu",
177
             activation="relu",
118
         ),
178
         ),
119
-        layers.Conv1DTranspose(filters=x_train.shape[2], kernel_size=7, padding="same"),
179
+        layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"),
120
     ]
180
     ]
121
 )
181
 )
122
 model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
182
 model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
123
 model.summary()
183
 model.summary()
124
-
125
-path_checkpoint = "model._checkpoint.weights.h5"
126
-es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)
127
-
128
-modelckpt_callback = keras.callbacks.ModelCheckpoint(
129
-    monitor="val_loss",
130
-    filepath=path_checkpoint,
131
-    verbose=1,
132
-    save_weights_only=True,
133
-    save_best_only=True,
134
-)
184
+path_checkpoint="model_noclass_v2_checkpoint.weights.h5"
185
+es_callback=keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)
186
+modelckpt_callback=keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True,)
135
 
187
 
136
 
188
 
137
 if options.train:
189
 if options.train:
138
-    history = model.fit(
139
-        x_train,
140
-        x_train,
141
-        epochs=400,
142
-        batch_size=128,
143
-        validation_split=0.3,
144
-        callbacks=[  es_callback, modelckpt_callback      ],
145
-    )
146
-
147
-    plt.plot(history.history["loss"], label="Training Loss")
148
-    plt.plot(history.history["val_loss"], label="Validation Loss")
149
-    plt.legend()
150
-    plt.show()
190
+    history=model.fit( x_train[0], x_train[0], epochs=400, batch_size=128, validation_split=0.3, callbacks=[  es_callback, modelckpt_callback      ],)
151
 else:
191
 else:
152
     model.load_weights(path_checkpoint)
192
     model.load_weights(path_checkpoint)
153
 
193
 
154
 
194
 
155
-x_train_pred = model.predict(x_train)
156
-train_mae_loss = np.mean(np.abs(x_train_pred - x_train), axis=1)
157
-threshold = np.max(train_mae_loss,axis=0)
195
+x_train_pred=model.predict(x_train[0])
196
+train_mae_loss=np.mean(np.abs(x_train_pred - x_train[0]), axis=1)
197
+threshold=np.max(train_mae_loss,axis=0)
198
+thresholdOrig=copy.deepcopy(threshold)
158
 
199
 
159
 print("Threshold : ",threshold)
200
 print("Threshold : ",threshold)
160
-threshold=threshold*2
201
+threshold=threshold*ThresholdFactor
161
 # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives
202
 # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives
162
 
203
 
163
-x_test = create_sequences(dataTestNorm)
204
+
205
+#  1st scenario. Detect only anomaly.  Later, we will classiffy it
206
+# Test data=  testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
207
+#d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
208
+d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]))
209
+
210
+x_test = create_sequences(d)
164
 x_test_pred = model.predict(x_test)
211
 x_test_pred = model.predict(x_test)
165
 test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
212
 test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
166
 
213
 
214
+
215
+# Define ranges for plotting in different colors
216
+testRanges=[]
217
+r=dataTestNorm[0].shape[0]
218
+testRanges.append([0,r])
219
+for i in range(1,NumberOfFailures+1):
220
+    rnext=r+dataTestNorm[i].shape[0]
221
+    testRanges.append([r,rnext] )
222
+    r=rnext
223
+
224
+# Drop the last TIME_STEPS for plotting
225
+testRanges[NumberOfFailures][1]=testRanges[NumberOfFailures][1]-TIME_STEPS
226
+
227
+
228
+def AtLeastOneTrue(x):
229
+    for i in range(NumFeatures):
230
+        if x[i]:
231
+            return True
232
+    return False
233
+
167
 anomalies = test_mae_loss > threshold
234
 anomalies = test_mae_loss > threshold
168
 anomalous_data_indices = []
235
 anomalous_data_indices = []
169
 for i in range(anomalies.shape[0]):
236
 for i in range(anomalies.shape[0]):
170
-    if anomalies[i][0] or anomalies[i][1]:
237
+    if AtLeastOneTrue(anomalies[i]):
238
+    #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
171
         anomalous_data_indices.append(i)
239
         anomalous_data_indices.append(i)
172
 
240
 
173
-#print(anomalous_data_indices)
241
+# Let's plot some features
174
 
242
 
243
+colorline=['violet','lightcoral','cyan','lime','grey']
244
+colordot=['darkviolet','red','blue','green','black']
175
 
245
 
176
-# Let's plot only a couple of features
177
-def plotData2():    
246
+#featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii']
247
+featuresToPlot=features
248
+
249
+indexesToPlot=[]
250
+for i in featuresToPlot:
251
+    indexesToPlot.append(features.index(i))
252
+
253
+def plotData3():
254
+    NumFeaturesToPlot=len(indexesToPlot)
255
+    plt.rcParams.update({'font.size': 16})
178
     fig, axes = plt.subplots(
256
     fig, axes = plt.subplots(
179
-        nrows=2, ncols=1, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
257
+        nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
180
     )
258
     )
181
-    axes[0].plot(range(len(x_train)),x_train[:,0,0],label="normal")
182
-    axes[0].plot(range(len(x_train),len(x_train)+len(x_test)),x_test[:,0,0],label="abnormal")
183
-    axes[0].plot(len(x_train)+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,0],color='red',marker='.',linewidth=0,label="abnormal detection")
184
-    axes[0].legend()
185
-    axes[1].plot(range(len(x_train)),x_train[:,0,1],label="normal")
186
-    axes[1].plot(range(len(x_train),len(x_train)+len(x_test)),x_test[:,0,1],label="abnormal")
187
-    axes[1].plot(len(x_train)+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,1],color='red',marker='.',linewidth=0,label="abnormal detection")
188
-    axes[1].legend()
189
-    axes[0].set_ylabel(features[0])
190
-    axes[1].set_ylabel(features[1])
259
+    for i in range(NumFeaturesToPlot):
260
+        init=0
261
+        end=testRanges[0][1]
262
+        axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]],label="No fail")
263
+        init=end
264
+        end+=(testRanges[1][1]-testRanges[1][0])
265
+        for j in range(1,NumberOfFailures+1):
266
+            axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]],label="Fail type "+str(j), color=colorline[j-1])
267
+            if j<NumberOfFailures:
268
+                init=end
269
+                end+=(testRanges[j+1][1]-testRanges[j+1][0])
270
+        x=[]
271
+        y=[]
272
+        for k in anomalous_data_indices:
273
+            if (k+TIME_STEPS)<x_test.shape[0]:
274
+                x.append(k+TIME_STEPS)
275
+                y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
276
+        axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="Fail detection" )
277
+
278
+        if i==0:
279
+            axes[i].legend(bbox_to_anchor=(0.9, 0.4))
280
+        axes[i].set_ylabel(features[indexesToPlot[i]])
281
+        axes[i].grid()
282
+    axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
191
     plt.show()
283
     plt.show()
192
 
284
 
193
-plotData2()
194
 
285
 
286
+def anomalyMetric(testList):  # first of list is non failure data
287
+    # FP, TP: false/true positive
288
+    # TN, FN: true/false negative
289
+    # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
290
+    # Specificity: true negative ratio given  data is OK: TN/(TN+FP)
291
+    # Accuracy: Rate of correct predictions:  (TN+TP)/(TN+TP+FP+FN)
292
+    # Precision: Rate of positive results:  TP/(TP+FP)  
293
+    # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
294
+    # F2-score: predictive performance measure:  2*Specificity*Sensitity/(Specificity+Sensitity)
295
+
296
+    x_test = create_sequences(testList[0])
297
+    x_test_pred = model.predict(x_test)
298
+    test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
299
+    anomalies = test_mae_loss > threshold
300
+    count=0
301
+    for i in range(anomalies.shape[0]):
302
+        if AtLeastOneTrue(anomalies[i]):
303
+            count+=1
304
+    FP=count
305
+    TN=anomalies.shape[0]-count
306
+    count=0
307
+    TP=np.zeros((NumberOfFailures))
308
+    FN=np.zeros((NumberOfFailures))
309
+    Sensitivity=np.zeros((NumberOfFailures))
310
+    Precision=np.zeros((NumberOfFailures))
311
+    for i in range(1,len(testList)):
312
+        x_test = create_sequences(testList[i])
313
+        x_test_pred = model.predict(x_test)
314
+        test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
315
+        anomalies = test_mae_loss > threshold
316
+        count=0
317
+        for j in range(anomalies.shape[0]):
318
+            if AtLeastOneTrue(anomalies[j]):
319
+                count+=1
320
+        TP[i-1] = count
321
+        FN[i-1] = anomalies.shape[0]-count
322
+        Sensitivity[i-1]=TP[i-1]/(TP[i-1]+FN[i-1])
323
+        Precision[i-1]=TP[i-1]/(TP[i-1]+FP)
324
+
325
+    GlobalSensitivity=TP.sum()/(TP.sum()+FN.sum())
326
+    Specificity=TN/(TN+FP)
327
+    Accuracy=(TN+TP.sum())/(TN+TP.sum()+FP+FN.sum())
328
+    GlobalPrecision=TP.sum()/(TP.sum()+FP)
329
+    F1Score= 2*GlobalPrecision*GlobalSensitivity/(GlobalPrecision+GlobalSensitivity)
330
+    F2Score = 2*Specificity*GlobalSensitivity/(Specificity+GlobalSensitivity)
331
+
332
+    print("Sensitivity: ",Sensitivity)
333
+    print("Global Sensitivity: ",GlobalSensitivity)
334
+    print("Precision: ",Precision)
335
+    print("Global Precision: ",GlobalPrecision)
336
+    print("Specifity: ",Specificity)
337
+    print("Accuracy: ",Accuracy)
338
+    print("F1Score: ",F1Score)
339
+    print("F2Score: ",F2Score)
340
+    print("FP: ",FP)
341
+    #return Sensitivity+Specifity
342
+    return (F1Score,F2Score)
343
+
344
+anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
345
+
346
+
347
+def plotFScore():
348
+    global threshold
349
+    res=[]
350
+    # plots FSCroe as a function of Threshold  Factor
351
+    tf=0.3
352
+    while tf<1.5:
353
+        threshold=thresholdOrig*tf
354
+        r=anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
355
+        res.append([tf,r[0],r[1]])
356
+        tf+=0.05
357
+
358
+    print(res)
359
+    ar=np.array((res))
360
+    plt.rcParams.update({'font.size': 16})
361
+    fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 10), dpi=80, facecolor="w", edgecolor="k")
362
+    ln1=axes.plot(ar[:,0],ar[:,1],label="F1-Score",linewidth=4)
363
+    ax1=axes.twinx()
364
+    ln2=ax1.plot(ar[:,0],ar[:,2],label="F2-Score",linewidth=4,color='C3')
365
+    axes.set_xlabel("Threshold factor")
366
+    axes.set_ylabel("F1-Score")
367
+    ax1.set_ylabel("F2-Score")
368
+    lns = ln1+ln2
369
+    labs = [l.get_label() for l in lns]
370
+    axes.legend(lns, labs, loc=0)
371
+    axes.grid()
372
+    plt.show()
373
+
374
+#plotFScore()
375
+plotData3()
376
+
377
+exit(0)
378
+
379
+
380
+
381
+#  2nd scenario. Detect only anomaly.  Later, we will classiffy it
382
+# Test data=  testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
383
+#d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
384
+num=100
385
+d=np.vstack((dataTestNorm[0][0:num,:],dataTestNorm[1][0:num,:],dataTestNorm[0][num:2*num,:],dataTestNorm[2][70:70+num,:],dataTestNorm[0][2*num-90:3*num-90,:],dataTestNorm[3][50:num+50,:],dataTestNorm[0][150:150+num,:],dataTestNorm[4][0:num+TIME_STEPS,:]))
386
+
387
+x_test = create_sequences(d)
388
+x_test_pred = model.predict(x_test)
389
+test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
390
+
391
+
392
+anomalies = test_mae_loss > threshold
393
+anomalous_data_indices = []
394
+for i in range(anomalies.shape[0]):
395
+    if AtLeastOneTrue(anomalies[i]):
396
+    #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
397
+        anomalous_data_indices.append(i)
398
+
399
+def plotData4():
400
+    NumFeaturesToPlot=len(indexesToPlot)
401
+    plt.rcParams.update({'font.size': 16})
402
+    fig, axes = plt.subplots(
403
+        nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
404
+    )
405
+    for i in range(NumFeaturesToPlot):
406
+        for j in range(1,NumberOfFailures+1):
407
+            if j==1:
408
+                axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]],label="No fail", color='C0')
409
+            else:
410
+                axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]], color='C0')
411
+            axes[i].plot(range(j*2*num-num,j*2*num),x_test[j*2*num-num:j*2*num,0,indexesToPlot[i]],label="File type "+str(j),color=colorline[j-1])
412
+        x=[]
413
+        y=[]
414
+        for k in anomalous_data_indices:
415
+            if (k+TIME_STEPS)<x_test.shape[0]:
416
+                x.append(k+TIME_STEPS)
417
+                y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
418
+        axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="Fail detection" )
419
+
420
+        if i==0:
421
+            axes[i].legend(bbox_to_anchor=(0.9, 0.4))
422
+        axes[i].set_ylabel(features[indexesToPlot[i]])
423
+        axes[i].grid()
424
+    axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
425
+    plt.show()
195
 
426
 
196
 
427
 
428
+plotData4()

Powered by TurnKey Linux.