Kaynağa Gözat

Add report file relateed to Review for explain line to line v5_class_V1.py

Masoud Hosseini 2 hafta önce
işleme
2ff759a14f
1 değiştirilmiş dosya ile 655 ekleme ve 0 silme
  1. 655
    0
      Review for explain line to line v5_class_V1.py

+ 655
- 0
Review for explain line to line v5_class_V1.py Dosyayı Görüntüle

@@ -0,0 +1,655 @@
1
+# Csar Fdez, UdL, 2025
2
+# Changes from v1:   Normalization 
3
+# IN v1, each failure type has its own normalization pars (mean and stdevs)
4
+# In v2, mean and stdev is the same for all data
5
+# v3.py trains the models looping in TIME_STEPS (4,8,12,16,20,24,....) finding the optimal Threshold factor
6
+
7
+#  Derived from v3_class, derived from v3.py with code from v1_multifailure.py
8
+#  This code don't train for multiple time steps !!
9
+
10
+#  partial and total blocked condenser merged in one class.
11
+#  Construction of train and test sets changed. Now is done by days
12
+
13
+
14
+
15
+
16
+# This code snippet creates the foundation for a Python script used for time series prediction and classification, using Keras.
17
+
18
+#####################################      Section 1: Libraries   ################################
19
+
20
+import pandas as pd # Data analysis, working with tabular data
21
+import matplotlib.pyplot as plt # To draw and visualize
22
+import datetime # For working with dates and times. Useful for time series data (although no direct code is used in this section)
23
+import numpy as np # For numerical calculations, especially with arrays
24
+import keras # Deep learning library for building and training neural networks
25
+import os.path # To work with file paths (checking for file existence, etc.)
26
+from keras import layers # Importing the cross-layer module that includes different layers of neural networks
27
+from optparse import OptionParser # To manage command line options. This allows you to run the script with different settings.
28
+import copy # To create copies of objects (potentially for data or models)
29
+import pickle # To save and load Python objects (although not used directly in this section of code)
30
+
31
+
32
+parser = OptionParser() # Creates an OptionParser object to handle command line arguments.
33
+
34
+parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
35
+# Adds a -t or --train option. When this option is used (e.g., python your_script.py -t), 
36
+# the options.train variable is set to True, otherwise it defaults to False.
37
+#  Used to control whether the script should train a new model or load a pre-trained model.
38
+
39
+parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS ", default=12)
40
+# Adds the -n or --timesteps option to specify the number of time steps.
41
+# The default is 12. This is very important for time series tasks, as it determines the length of the input sequence for the model.
42
+
43
+parser.add_option("-r", "--transition", dest="transition", help="Includes transition data (false)", default=False, action="store_true")
44
+# If specified, data from the transition period between normal and failure is considered (default: False).
45
+
46
+parser.add_option("-p", "--plot", dest="plot", help="Only plot data (false)", default=False, action="store_true")
47
+#
48
+
49
+#parser.add_option("-f", "--thresholdfactor", dest="TF", help="Threshold Factor ", default=1.4)
50
+# threshold makes no sense when classifying, becaues we apply many models and decide class for the less MSE
51
+
52
+# In previous versions, a threshold factor was used.
53
+# Why is the threshold factor not used in this version? 
54
+# Because of the use of the model and the decision-making based on the least squares error (MSE), it no longer makes sense to use it.
55
+
56
+(options, args) = parser.parse_args() # Parse command line arguments and store them in the options object
57
+
58
+#####################################      Section 2: Defining Data and Files   ################################
59
+
60
+# data files arrays. Index:
61
+# 0.  No failure
62
+# 1.  Blocked evaporator
63
+# 2.   Full Blocked condenser
64
+# 3.   Partial Blocked condenser
65
+# 4   Fan condenser not working
66
+# 5.  Open door
67
+
68
+
69
+NumberOfFailures=4  # So far, we have only data for the first 4 types of failures
70
+# Determine the number of failure types, here 4 failure types are considered
71
+datafiles=[[],[]]   # 0 for train,  1 for test
72
+# Create an empty list to store the list of given files. Index 0 is for training data and index 1 is for testing data.
73
+
74
+for i in range(NumberOfFailures+1): # Initialize internal lists for each class (class 0 for no failure mode and classes 1 to 4 for failure types)
75
+    datafiles[0].append([])
76
+    datafiles[1].append([])
77
+
78
+# Next set of ddata corresponds to Freezer, SP=-26 # Explanation that the data is for a freezer with a set point (SP) of -26°C
79
+datafiles[0][0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_'] # List of files given for class 0 (no crashes) in the training set. File names include date. 
80
+datafiles[0][1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_'] # List of files given for Class 1 (Evaporator Blockage Failure) in the training set
81
+datafiles[0][2]=['2024-12-18_5_','2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_'] # List of data files for Class 2 (Complete Condenser Blockage Failure) in the training set
82
+datafiles[0][3]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_'] # List of files given for Class 3 (Partial Condenser Blockage Failure) in the training set
83
+datafiles[0][4]=['2025-02-13_5_','2025-02-14_5_'] # List of files given for class 4 (condenser fan malfunction) in the training set
84
+
85
+if options.transition: # Checks if a --transition option is enabled on the command line.
86
+    datafiles[1][0]=['2025-01-27_5_','2025-01-28_5_'] # List of data files for class 0 in the test suite (including transfer data if the option is enabled).
87
+    datafiles[1][1]=['2024-12-14_5_','2024-12-15_5_','2024-12-16_5_']  # with TRANSITION # List of data files for class 1 in the test set (including transfer data if the option is enabled).
88
+    datafiles[1][2]=['2024-12-17_5_','2024-12-19_5_','2024-12-25_5_','2024-12-26_5_'] # with TRANSITION # List of data files for class 2 in the test suite (including transfer data if the option is enabled).
89
+    datafiles[1][3]=['2024-12-27_5_','2024-12-31_5_','2025-01-01_5_'] # with TRANSITION # List of data files for class 3 in the test suite (including transfer data if the option is enabled).
90
+    datafiles[1][4]=['2025-02-12_5_','2025-02-15_5_','2025-02-16_5_'] # List of data files for class 4 in the test suite (including transfer data if the option is enabled).
91
+
92
+else: # If --transition is not possible
93
+    datafiles[1][0]=['2025-01-27_5_','2025-01-28_5_'] # List of data files for class 0 in the test set (without transfer data)
94
+    datafiles[1][1]=['2024-12-14_5_','2024-12-15_5_'] # List of data files for class 1 in the test set (without transfer data)
95
+    datafiles[1][2]=['2024-12-19_5_','2024-12-25_5_','2024-12-26_5_'] # List of data files for class 2 in the test set (without transfer data)
96
+    datafiles[1][3]=['2024-12-31_5_','2025-01-01_5_'] # List of data files for class 3 in the test set (without transfer data)
97
+    datafiles[1][4]=['2025-02-15_5_','2025-02-16_5_'] # List of data files for class 4 in the test set (without transfer data)
98
+ 
99
+
100
+#datafiles[0][4]=['2025-02-05_5_'] 
101
+#datafiles[1][4]=['2025-02-05_5_'] 
102
+
103
+#####################################      Section 3: Features   ################################
104
+
105
+
106
+#r1s5 supply air flow temperature
107
+#r1s1 inlet evaporator temperature
108
+#r1s4 condenser outlet
109
+
110
+# VAriables r1s4 and pa1 apiii  may not exists in cloud controlers
111
+
112
+
113
+features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
114
+features=['r1 s1','r1 s4','r1 s5']
115
+features=['r1 s5']
116
+# Feature combination suggested by AKO
117
+#features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
118
+features=['r1 s1','r1 s4','r1 s5'] # Redefining the feature list (this seems to be the final list of features used)
119
+#features=['r1 s1','r1 s5','pa1 apiii']
120
+#features=['r1 s5','pa1 apiii']
121
+#features=['r1 s1','r1 s5']
122
+#features=['r1 s5']
123
+
124
+
125
+
126
+featureNames={} # Create a dictionary to write feature names into names that can be displayed in the image.
127
+featureNames['r1 s1']='$T_{evap}$' 
128
+featureNames['r1 s4']='$T_{cond}$'
129
+featureNames['r1 s5']='$T_{air}$'
130
+featureNames['pa1 apiii']='$P_{elec}$'
131
+
132
+unitNames={} # Create a dictionary to write feature names in their units
133
+unitNames['r1 s1']='$(^{o}C)$'
134
+unitNames['r1 s4']='$(^{o}C)$'
135
+unitNames['r1 s5']='$(^{o}C)$'
136
+unitNames['pa1 apiii']='$(W)$'
137
+
138
+NumFeatures=len(features) # Length or number of features
139
+
140
+
141
+#####################################      Section 4: Data Loading and Preprocessing   ################################
142
+
143
+
144
+df_list=[[],[]] # Create a nested list to store Pandas DataFrames. Index 0 is for training data and index 1 is for test data.
145
+for i in range(NumberOfFailures+1): # Initializing internal lists for each class
146
+    df_list[0].append([])
147
+    df_list[1].append([])
148
+
149
+for i in range(NumberOfFailures+1): # Loop to process data for each class in the training set
150
+    dftemp=[] # Create a temporary list to store DataFrames from different files of a class
151
+    for f in datafiles[0][i]: # The loop is for reading each file corresponding to a specific class in the training set.
152
+        print("             ", f) # Print the name of the file being processed.
153
+
154
+
155
+
156
+        # *
157
+        # ***
158
+        script_dir = os.path.dirname(os.path.abspath(__file__)) # Script folder path, get the path of the current script directory.
159
+        data_dir = os.path.join(script_dir, 'data') # data folder path, creating the path to the 'data' directory where the CSV files are located.
160
+        file_path = os.path.join(data_dir, f + '.csv') # csv full file path.
161
+
162
+        
163
+        print(f"Currently attempting to open: {file_path}") # For debugging, print the path it is trying to open (for debugging). 
164
+
165
+        df1 = pd.read_csv(file_path) # Reading a CSV file and converting it to a Pandas DataFrame.
166
+        dftemp.append(df1) # Add the read DataFrame to the temporary list dftemp
167
+    df_list[0][i]=pd.concat(dftemp) # Merge all DataFrames in dftemp of a specific class and store it in df_list[0][i]
168
+
169
+########## A similar section for loading test data
170
+########
171
+#####
172
+###
173
+##
174
+#
175
+
176
+for i in range(NumberOfFailures+1):
177
+    dftemp=[]
178
+    for f in datafiles[1][i]:
179
+        print("             ", f)
180
+
181
+        # ***   ***
182
+        script_dir = os.path.dirname(os.path.abspath(__file__))
183
+        data_dir = os.path.join(script_dir, 'data')
184
+        file_path = os.path.join(data_dir, f + '.csv')
185
+
186
+         
187
+        print(f"Currently attempting to open: {file_path}") #
188
+
189
+        df1 = pd.read_csv(file_path)
190
+        dftemp.append(df1)
191
+    df_list[1][i]=pd.concat(dftemp)
192
+
193
+
194
+# Explains that the data was sampled every 5 minutes (30 samples of 10 seconds each)
195
+# subsampled to 5'  =  30 * 10"
196
+# We consider smaples every 5' because in production, we will only have data at this frequency
197
+subsamplingrate=30 # Determining the resampling rate
198
+
199
+dataframe=[[],[]] # Create a nested list to store the sampled DataFrames. Index 0 for training data and index 1 for testing data.
200
+for i in range(NumberOfFailures+1): # Initializing internal lists for each class
201
+    dataframe[0].append([])
202
+    dataframe[1].append([])
203
+
204
+for i in range(NumberOfFailures+1): # Loop to process data for each class in the training set
205
+    datalength=df_list[0][i].shape[0] # Getting the number of rows in the original DataFrame
206
+    dataframe[0][i]=df_list[0][i].iloc[range(0,datalength,subsamplingrate)][features] 
207
+    # The code is sampling rows with a sub sampling rate and selecting specific feature columns from the dataframe and storing the result somewhere else.
208
+    dataframe[0][i].reset_index(inplace=True,drop=True) # Reset DataFrame index and remove old index
209
+    dataframe[0][i].dropna(inplace=True) # Delete rows that have NaN values
210
+
211
+for i in range(NumberOfFailures+1): # Similar section for sampling and preprocessing of test data
212
+    datalength=df_list[1][i].shape[0]
213
+    dataframe[1][i]=df_list[1][i].iloc[range(0,datalength,subsamplingrate)][features]
214
+    dataframe[1][i].reset_index(inplace=True,drop=True)
215
+    dataframe[1][i].dropna(inplace=True)
216
+
217
+
218
+# Train data i [0] and test data is [1]
219
+dataTrain=[] # Creating a List to Store Training DataFrames
220
+dataTest=[] # Creating a list to store test DataFrames
221
+for i in range(NumberOfFailures+1): # Copying the processed DataFrames into the dataTrain and dataTest lists
222
+    dataTrain.append(dataframe[0][i])
223
+    dataTest.append(dataframe[1][i])
224
+
225
+#####################################      Section 5: Data Normalization   ################################
226
+
227
+# Calculate means and stdev
228
+a=dataTrain[0] # Initialization with DataFrame training class 0
229
+for i in range(1,NumberOfFailures+1): # Merge training DataFrames of all classes vertically into a NumPy array for mean and standard deviation
230
+    a=np.vstack((a,dataTrain[i]))
231
+
232
+means=a.mean(axis=0) # Calculate the average for each feature across the entire training data.
233
+stdevs=a.std(axis=0) # Calculating the standard deviation for each feature across the entire training data
234
+def normalize2(train,test): # Define a function to normalize data using the mean and standard deviation
235
+    return( (train-means)/stdevs, (test-means)/stdevs )
236
+
237
+dataTrainNorm=[] # Create a list to store normalized training data
238
+dataTestNorm=[] # Create a list to store normalized test data
239
+for i in range(NumberOfFailures+1): # Initializing internal lists
240
+    dataTrainNorm.append([])
241
+    dataTestNorm.append([])
242
+
243
+for i in range(NumberOfFailures+1): # Normalization of training and test data for each class
244
+    (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
245
+
246
+#####################################      Section 6: Creating Time Sequences   ################################
247
+
248
+# Definition and training of educational sector models
249
+
250
+NumFilters=64 # Determine the number of filters in the convolutional layers
251
+KernelSize=7 # Determine the size of the kernel in the convolutional layers
252
+DropOut=0.2 # Determine the dropout rate to prevent overfitting
253
+def create_sequences(values, time_steps): # Define a function to convert time series data into input sequences for the model. This function creates sequences of length time_steps
254
+    output = []
255
+    for i in range(len(values) - time_steps + 1):
256
+        output.append(values[i : (i + time_steps)])
257
+    return np.stack(output)
258
+
259
+def listToString(l): # Define a function to convert a list to a string without spaces (for naming files)
260
+    r=''
261
+    for i in l:
262
+        r+=str(i)
263
+    return(r.replace(' ',''))
264
+
265
+#####################################      Section 7: Model Definition   ################################
266
+
267
+model=[] # Create a list to store cross models
268
+modelckpt_callback =[] # A list to hold ModelCheckpoint callbacks used to store the best model weights during training.
269
+es_callback =[]
270
+# It creates an empty list. This list is later populated with tools called EarlyStopping.
271
+#  The job of these tools is to act like a smart supervisor: 
272
+# they monitor the model's performance on the validation data
273
+#  and if they see that the model's performance (e.g., error rate) is no longer improving,
274
+#  they issue a stop training command to avoid the common problem of "overfitting" (learning
275
+#  too much on the training data and performing poorly on new data).
276
+path_checkpoint=[] # Create a list to store and maintain checkpoint model file paths
277
+timesteps=int(options.timesteps) # The number of time steps taken from the command line argument ( options.timesteps ).
278
+x_train=[] # A list to hold the training sequences for each class.
279
+
280
+# A loop is created on each failure class:
281
+# The normalized data is converted to sequences using create_sequences.
282
+# A SequentialKeras model is created. This model is an Autoencoder that uses Conv1D layers to encode into a latent space and Conv1DTranspose layers to decode and use the data.
283
+# The model is compiled using the Adam algorithm and the MSE loss function.
284
+# A summary of the model is printed.
285
+# The checkpoint file path is set to store the weights of the built model.
286
+# The EarlyStopping and ModelCheckpoint callbacks are defined
287
+
288
+for i in range(NumberOfFailures+1): # Loop to create the model and prepare the training data for each class
289
+    x_train.append(create_sequences(dataTrainNorm[i],timesteps)) # Create input sequences from normalized training data for the current class
290
+    model.append([]) # Add an empty list to the model list
291
+
292
+    # Define a Keras Sequential model for the current class. The model includes Conv1D and Conv1DTranspose layers to build an autoencoder
293
+    model[i] = keras.Sequential(
294
+        [
295
+            layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])), # Input layer with shape (time_steps, num_feature)
296
+            layers.Conv1D( # 1D convolutional layers for feature generation
297
+                filters=NumFilters,
298
+                kernel_size=KernelSize,
299
+                padding="same",
300
+                strides=2,
301
+                activation="relu",
302
+            ),
303
+            layers.Dropout(rate=DropOut), # Dropout layers to prevent overfitting
304
+            layers.Conv1D(
305
+                filters=int(NumFilters/2),
306
+                kernel_size=KernelSize,
307
+                padding="same",
308
+                strides=2,
309
+                activation="relu",
310
+            ),
311
+            layers.Conv1DTranspose( # Convolutional transform for input layer
312
+                filters=int(NumFilters/2),
313
+                kernel_size=KernelSize,
314
+                padding="same",
315
+                strides=2,
316
+                activation="relu",
317
+            ),
318
+            layers.Dropout(rate=DropOut),
319
+            layers.Conv1DTranspose(
320
+                filters=NumFilters,
321
+                kernel_size=KernelSize,
322
+                padding="same",
323
+                strides=2,
324
+                activation="relu",
325
+            ),
326
+            layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"),
327
+        ]
328
+    )
329
+    model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse") # Compile the model using the Adam algorithm and the least-squares loss function (MSE)
330
+    model[i].summary() # Print model summary
331
+    path_checkpoint.append("model_class_v5_"+str(i)+"_"+str(timesteps)+listToString(features)+"_checkpoint.weights.h5") # Create a file path for the saved checkpoint model
332
+    es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)) # Define a callback for early stopping, stopping training if no improvement is observed in val_loss
333
+    # Define a callback to store the best model weights based on val_loss
334
+    modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,))
335
+
336
+
337
+#####################################      Section 8: Training or Loading the Model   ################################
338
+
339
+# If options.train is True:
340
+# A history list is created to keep track of the training history of each model.
341
+# A loop is executed over each class and the corresponding model is trained with the training data of that class. Callbacks are used to stop early and save the best weights.
342
+# The model predictions are made on the training data.
343
+# Otherwise (if options.train is False):
344
+# Pre-trained weights are loaded from checkpoint files.
345
+
346
+if options.train: # If the --train option is enabled on the command line
347
+    history=[] # Create a list to store training history   
348
+    for i in range(NumberOfFailures+1): # Loop to train the model for each class
349
+        # Train the model with the current class training data
350
+        history.append(model[i].fit( x_train[i], x_train[i], epochs=400, batch_size=128, validation_split=0.3, callbacks=[  es_callback[i], modelckpt_callback[i]      ],))
351
+
352
+        x_train_pred=model[i].predict(x_train[i]) # Predict the model output for training data (to check performance)
353
+else: # If --train is not possible
354
+    for i in range(NumberOfFailures+1): # Load pre-trained weights from checkpoint files
355
+        model[i].load_weights(path_checkpoint[i])
356
+
357
+#####################################      Section 9: Data Plotting Function (Primary)   ################################
358
+
359
+# Let's plot some features
360
+
361
+colorline=['black','violet','lightcoral','cyan','lime','grey'] # A list of colors to draw lines
362
+colordot=['grey','darkviolet','red','blue','green','black'] # A list of colors to draw points
363
+
364
+#featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii']
365
+featuresToPlot=features # Redefine the feature list to draw the graph using the original list
366
+
367
+indexesToPlot=[] # Create a list to store the indices of the features you want to plot.
368
+for i in featuresToPlot: # Find the index of each feature in the original list
369
+    indexesToPlot.append(features.index(i))
370
+
371
+
372
+def plotData(): # Define a function to plot the experimental data for each class
373
+    NumFeaturesToPlot=len(indexesToPlot) # Specific features to be drawn
374
+    plt.rcParams.update({'font.size': 16}) # Set font size for chart
375
+    fig, axes = plt.subplots( # Create shapes and sub-axes for the chart
376
+        nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
377
+    )
378
+    for i in range(NumFeaturesToPlot): # Set labels and titles for axes # Show help # Loop to plot data for each class for each feature
379
+        init=0
380
+        end=testRanges[0][1]
381
+        for j in range(NumberOfFailures+1):
382
+            if NumFeaturesToPlot==1:
383
+                axes.plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
384
+            else:
385
+                axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
386
+            if j<NumberOfFailures:
387
+                init=end
388
+                end+=(testRanges[j+1][1]-testRanges[j+1][0])
389
+
390
+
391
+        s=''
392
+        s+=featureNames[features[indexesToPlot[i]]]
393
+        s+=' '+unitNames[features[indexesToPlot[i]]]
394
+        if NumFeaturesToPlot==1:
395
+
396
+
397
+
398
+
399
+            axes.set_ylabel(s)
400
+            axes.grid()
401
+        else:
402
+            axes[i].set_ylabel(s)
403
+            axes[i].grid()
404
+            
405
+    if NumFeaturesToPlot==1:
406
+        axes.legend(ncol=4,loc=(0.1,0.98))
407
+    else:
408
+        axes[0].legend(ncol=4,loc=(0.1,0.98))
409
+    plt.show()
410
+
411
+#####################################      Section 10: Preparing Test Data   ################################
412
+
413
+#   2nd scenario. Go over anomalies and classify it by less error
414
+#datalist=[dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3]]
415
+
416
+
417
+# Anomaly classification:
418
+# datalist: A list of normalized test data for each class.
419
+# x_test: The test data that was converted into sequences using create_sequences.
420
+# testRanges: A list of ranges of x_test index that correspond to each class. This is for plotting plots with different colors.
421
+# If options.plot is True, the plotData function is called and the program terminates.
422
+# testClasses: A list of actual classes corresponding to each range in testRanges.
423
+# A check is made to ensure that the lengths of testClasses and testRanges are equal.
424
+# x_test_predict: A list to hold the predictions of each Autoencoder model on the test data.
425
+# A loop is run over each model and its predictions are made on x_test.
426
+# test_mae_loss: A list to hold the mean absolute error (MAE) between the predictions and the actual data for each model.
427
+# test_mae_loss_average: The average MAE across features.
428
+# classes: An array representing the predicted class for each test sequence. The predicted class is the class with the lowest MAE.
429
+
430
+datalist=[dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]] # Create a list of test data for all classes
431
+x_test=create_sequences(datalist[0],int(options.timesteps)) # Create input sequences from test data for class 0
432
+for i in range(1,len(datalist)): # Merge input sequences for all classes into one array
433
+    x_test=np.vstack((x_test,create_sequences(datalist[i],int(options.timesteps))))
434
+
435
+# Define ranges for plotting in different colors
436
+testRanges=[] # Create a list to store the index ranges for each class in the array
437
+r=0 # Initialize the starting index
438
+for i in range(len(datalist)): # Calculate index intervals for each class
439
+    testRanges.append([r,r+datalist[i].shape[0]-int(options.timesteps)+1])
440
+    r+=datalist[i].shape[0]-int(options.timesteps)+1
441
+
442
+if options.plot: # If the --plot option is enabled on the command line
443
+# Only plot data and exit
444
+    plotData() # Call the function to plot the data
445
+    exit(0) # Exit the program
446
+
447
+testClasses=[0,1,2,3,4] # List of actual classes for the test data
448
+
449
+if not len(testClasses)==len(testRanges): # Check that the length of the class and range lists is equal
450
+    print("ERROR:  testClasses and testRanges must have same length")
451
+    exit(0)
452
+#####################################      Section 11: Prediction and Classification   ################################
453
+
454
+x_test_predict=[] # Create a list to store the model predictions for the test data
455
+for m in range(NumberOfFailures+1): # Predict the output of each model for the entire test data
456
+    x_test_predict.append(model[m].predict(x_test))
457
+
458
+x_test_predict=np.array((x_test_predict)) # Convert the list of predictions to a NumPy array
459
+test_mae_loss =[] # Create a list to store the mean absolute error (MAE) for each model
460
+for m in range(NumberOfFailures+1): # Calculate the MAE between each model's predictions and the actual data
461
+    test_mae_loss.append(np.mean(np.abs(x_test_predict[m,:,:,:] - x_test), axis=1))
462
+
463
+test_mae_loss=np.array((test_mae_loss)) # Convert the MAE list to a NumPy array 
464
+test_mae_loss_average=np.mean(test_mae_loss,axis=2)  # average over features # Calculate the average MAE for each sample across features
465
+classes=np.argmin(test_mae_loss_average,axis=0)    # Choose the minimum loss # Determine the predicted class for each sample based on the model that has the lowest MAE
466
+
467
+# Plot the classification results:
468
+# x and y: Lists to hold indices and features that were misclassified.
469
+# A loop is run over each class and then over each sample in the range corresponding to that class. If the predicted class is different from the actual class, the corresponding index and features are added to the x and y lists.
470
+# plotData4: A function similar to plotData, but it also plots the points that were misclassified on the graph
471
+
472
+x=[] # Create a list for indices of misclassified samples
473
+y=[] # Create a list to store the features of misclassified samples
474
+for j in range(NumberOfFailures+1): # Loop to find misclassified samples and store their indices and features
475
+    x.append([])
476
+    y.append([])
477
+for j in range(NumberOfFailures+1):
478
+    for k in range(testRanges[j][0],testRanges[j][1]):
479
+        if not  classes[k]==testClasses[j]:
480
+            x[classes[k]].append(k)
481
+            y[classes[k]].append(x_test[k,0,indexesToPlot[0]]*stdevs[0]+means[0])
482
+
483
+#####################################      Section 12: Data plotting function with anomaly detection   ################################
484
+
485
+# Similar function to plotData with the addition of plotting points for misclassified samples
486
+# Define a function to plot the test data and show the misclassified samples
487
+
488
+def plotData4():
489
+    NumFeaturesToPlot=len(indexesToPlot)
490
+    plt.rcParams.update({'font.size': 16})
491
+    fig, axes = plt.subplots(
492
+        nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
493
+    )
494
+    for i in range(NumFeaturesToPlot):
495
+        init=0
496
+        end=testRanges[0][1]
497
+        for j in range(NumberOfFailures+1):
498
+            if NumFeaturesToPlot==1:
499
+                axes.plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
500
+            else:
501
+                axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
502
+            if j<NumberOfFailures:
503
+                init=end
504
+                end+=(testRanges[j+1][1]-testRanges[j+1][0])
505
+
506
+            #if i==0:
507
+            #    axes[0].plot(x[j],y[j] ,color=colordot[j],marker='.',markersize=10,linewidth=0,label="Fail detect  class "+str(j) )
508
+
509
+
510
+
511
+        s=''
512
+        s+=featureNames[features[indexesToPlot[i]]]
513
+        s+=' '+unitNames[features[indexesToPlot[i]]]
514
+        if NumFeaturesToPlot==1:
515
+            axes.set_ylabel(s)
516
+            axes.set_xlabel('Sample number')
517
+            axes.grid()
518
+        else:
519
+            axes[i].set_ylabel(s)
520
+            axes[NumFeaturesToPlot-1].set_xlabel('Sample number')
521
+            axes[i].grid()
522
+
523
+    for j in range(NumberOfFailures+1):
524
+        if NumFeaturesToPlot==1:
525
+            axes.plot(x[j],y[j] ,color=colordot[j],marker='.',markersize=10,linewidth=0,label="Fail detect  class "+str(j) )
526
+        else:
527
+            axes[0].plot(x[j],y[j] ,color=colordot[j],marker='.',markersize=10,linewidth=0,label="Fail detect  class "+str(j) )
528
+            
529
+    if NumFeaturesToPlot==1:
530
+        axes.legend(ncol=4,loc=(0.1,0.98))
531
+    else:
532
+        axes[0].legend(ncol=4,loc=(0.1,0.98))
533
+
534
+        
535
+    #axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
536
+    plt.show()
537
+#####################################      Section 13: Helper function for determining class   ################################
538
+
539
+# Define a function to find the actual class of an instance based on its index.
540
+
541
+def whichClass(k,ranges):
542
+    for i in range(NumberOfFailures+1):
543
+        if k in range(ranges[i][0],ranges[i][1]):
544
+            return(i)
545
+    print("Error:  Class not exists")
546
+    exit(0)     
547
+
548
+#####################################      Section 14: Calculating the anomaly metric   ################################
549
+
550
+
551
+# Classification performance evaluation:
552
+# whichClass: A helper function that returns the true class of an instance given the index and ranges defined in testRanges.
553
+# anomalyMetric: A function to calculate the classification performance evaluation such as sensitivity, precision and F1-score for each class.
554
+# TP, FP, FN: Arrays to hold the number of True Positive, False Positive and False Negative for each class.
555
+# Loops are run over the test data and can calculate TP, FP and FN.
556
+# Sensitivity, Precision and F1-Score are calculated and printed for each class and their scores.
557
+# The anomalyMetric function is called for the classification results and the test range.
558
+# computeDelay: A function to calculate and correctly detect failures for a list of classes (when there are transition periods).
559
+# d: An array to hold each class.
560
+# NoFailsInARow: The number of consecutive samples that must be correctly classified for the failure to be considered detected.
561
+# Loops over the specified classes and calculates the quantity. Defined as the number of samples from the initial class until NoFailsInARow consecutive samples are correctly classified.
562
+# The average is calculated and printed.
563
+# The computeDelay function is called for classes 2, 3, and 4.
564
+# Finally, the plotData4 function is called to display the classification results.
565
+
566
+
567
+##   implemenent anomaly metrics for each failure class
568
+def anomalyMetric(classes,testranges,testclasses): 
569
+##########Define a function for the performance evaluation criteria of the classification (sensitivity, precision, F1-score).
570
+# Calculate TP, FP, FN for each class.
571
+# Calculate sensitivity and precision for each class.
572
+# Calculate the sensitivity and precision.
573
+# Calculate F1-score.
574
+# Print results.    
575
+
576
+    # FP, TP: false/true positive
577
+    # TN, FN: true/false negative
578
+    # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
579
+    # Precision: Rate of positive results:  TP/(TP+FP)  
580
+    # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
581
+    TP=np.zeros(NumberOfFailures+1)
582
+    FP=np.zeros(NumberOfFailures+1)
583
+    FN=np.zeros(NumberOfFailures+1)
584
+    Sensitivity=np.zeros(NumberOfFailures+1)
585
+    Precision=np.zeros(NumberOfFailures+1)
586
+    for i in range(len(testranges)):
587
+        for k in range(testranges[i][0],testranges[i][1]):
588
+            if classes[k]==testclasses[i]:
589
+                TP[i]+=1
590
+            else:
591
+                FP[i]+=1
592
+    for k in range(testranges[NumberOfFailures][1]):
593
+        for i in range(len(testranges)):
594
+            classK=whichClass(k,testranges)
595
+            if not classK==testClasses[i]:
596
+                if not classes[k]==classK:
597
+                    FN[classes[k]]+=1
598
+
599
+    for i in range(NumberOfFailures+1):
600
+        if (TP[i]+FN[i])>0:
601
+            Sensitivity[i]=TP[i]/(TP[i]+FN[i])
602
+        else:
603
+            Sensitivity[i]=0
604
+        Precision[i]=TP[i]/(TP[i]+FP[i])
605
+    S=Sensitivity.mean()
606
+    P=Precision.mean()
607
+    F1=2*S*P/(S+P)
608
+    print("Sensitivity: ",Sensitivity) 
609
+    print("S: ",S) 
610
+    print("Precision: ",Precision) 
611
+    print("P: ",P) 
612
+    print("F1-Score: ",F1)
613
+
614
+anomalyMetric(classes,testRanges,testClasses) # Call the function to calculate and print the evaluation criteria
615
+
616
+#####################################      Section 15: Estimated Amount   ################################
617
+
618
+# Compute delay until correct detection for a list of ranges (when transition data exists)
619
+
620
+# Define a function to calculate the correct failure detection (if there is transitive data)
621
+# The number of consecutive samples that are correctly classified.
622
+# Calculate until a certain number of samples are correctly classified.
623
+# Calculate the value.
624
+# Print the average
625
+
626
+def computeDelay(l,classes,testRanges,testClasses):
627
+    d=np.zeros(len(l))
628
+    NoFailsInARow=4
629
+    ind=0
630
+    for i in l:
631
+        start=testRanges[i][0]
632
+        count=0
633
+        while start<testRanges[i][1]:
634
+            if classes[start]==testClasses[i]:
635
+                count+=1
636
+            if count==NoFailsInARow or start==(testRanges[i][1]-1):
637
+                count=0
638
+                #print(start,start-testRanges[i][0]-NoFailsInARow+timesteps)
639
+                d[ind]=start-testRanges[i][0]-NoFailsInARow+timesteps
640
+                break
641
+            start+=1
642
+        ind+=1
643
+    print(d)
644
+    return(d.mean())
645
+
646
+d=computeDelay([2,3,4],classes,testRanges,testClasses) # Call the function to calculate, for classes 2, 3, and 4
647
+print("Delay: ",d) 
648
+#####################################      Section 16: Plotting Data with Final Results   ################################
649
+
650
+plotData4() # Call the function to plot the classification results.
651
+
652
+# In short, this code is a fault detection and classification system based on Autoencoders.
653
+# For each fault class (and the no-fault class), an Autoencoder is trained.
654
+# In the testing phase, test samples are passed through all Autoencoders,
655
+# The class with the lowest error is selected as the predicted class. The performance is then evaluated using various metrics.

Powered by TurnKey Linux.