# Csar Fdez, UdL, 2025 # Changes from v1: Normalization # IN v1, each failure type has its own normalization pars (mean and stdevs) # In v2, mean and stdev is the same for all data # v3.py trains the models looping in TIME_STEPS (4,8,12,16,20,24,....) finding the optimal Threshold factor # Derived from v3_class, derived from v3.py with code from v1_multifailure.py # This code don't train for multiple time steps !! # partial and total blocked condenser merged in one class. # Construction of train and test sets changed. Now is done by days # This code snippet creates the foundation for a Python script used for time series prediction and classification, using Keras. ##################################### Section 1: Libraries ################################ import pandas as pd # Data analysis, working with tabular data import matplotlib.pyplot as plt # To draw and visualize import datetime # For working with dates and times. Useful for time series data (although no direct code is used in this section) import numpy as np # For numerical calculations, especially with arrays import keras # Deep learning library for building and training neural networks import os.path # To work with file paths (checking for file existence, etc.) from keras import layers # Importing the cross-layer module that includes different layers of neural networks from optparse import OptionParser # To manage command line options. This allows you to run the script with different settings. import copy # To create copies of objects (potentially for data or models) import pickle # To save and load Python objects (although not used directly in this section of code) parser = OptionParser() # Creates an OptionParser object to handle command line arguments. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true") # Adds a -t or --train option. When this option is used (e.g., python your_script.py -t), # the options.train variable is set to True, otherwise it defaults to False. # Used to control whether the script should train a new model or load a pre-trained model. parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS ", default=12) # Adds the -n or --timesteps option to specify the number of time steps. # The default is 12. This is very important for time series tasks, as it determines the length of the input sequence for the model. parser.add_option("-r", "--transition", dest="transition", help="Includes transition data (false)", default=False, action="store_true") # If specified, data from the transition period between normal and failure is considered (default: False). parser.add_option("-p", "--plot", dest="plot", help="Only plot data (false)", default=False, action="store_true") # #parser.add_option("-f", "--thresholdfactor", dest="TF", help="Threshold Factor ", default=1.4) # threshold makes no sense when classifying, becaues we apply many models and decide class for the less MSE # In previous versions, a threshold factor was used. # Why is the threshold factor not used in this version? # Because of the use of the model and the decision-making based on the least squares error (MSE), it no longer makes sense to use it. (options, args) = parser.parse_args() # Parse command line arguments and store them in the options object ##################################### Section 2: Defining Data and Files ################################ # data files arrays. Index: # 0. No failure # 1. Blocked evaporator # 2. Full Blocked condenser # 3. Partial Blocked condenser # 4 Fan condenser not working # 5. Open door NumberOfFailures=4 # So far, we have only data for the first 4 types of failures # Determine the number of failure types, here 4 failure types are considered datafiles=[[],[]] # 0 for train, 1 for test # Create an empty list to store the list of given files. Index 0 is for training data and index 1 is for testing data. for i in range(NumberOfFailures+1): # Initialize internal lists for each class (class 0 for no failure mode and classes 1 to 4 for failure types) datafiles[0].append([]) datafiles[1].append([]) # Next set of ddata corresponds to Freezer, SP=-26 # Explanation that the data is for a freezer with a set point (SP) of -26°C datafiles[0][0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_'] # List of files given for class 0 (no crashes) in the training set. File names include date. datafiles[0][1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_'] # List of files given for Class 1 (Evaporator Blockage Failure) in the training set datafiles[0][2]=['2024-12-18_5_','2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_'] # List of data files for Class 2 (Complete Condenser Blockage Failure) in the training set datafiles[0][3]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_'] # List of files given for Class 3 (Partial Condenser Blockage Failure) in the training set datafiles[0][4]=['2025-02-13_5_','2025-02-14_5_'] # List of files given for class 4 (condenser fan malfunction) in the training set if options.transition: # Checks if a --transition option is enabled on the command line. datafiles[1][0]=['2025-01-27_5_','2025-01-28_5_'] # List of data files for class 0 in the test suite (including transfer data if the option is enabled). datafiles[1][1]=['2024-12-14_5_','2024-12-15_5_','2024-12-16_5_'] # with TRANSITION # List of data files for class 1 in the test set (including transfer data if the option is enabled). datafiles[1][2]=['2024-12-17_5_','2024-12-19_5_','2024-12-25_5_','2024-12-26_5_'] # with TRANSITION # List of data files for class 2 in the test suite (including transfer data if the option is enabled). datafiles[1][3]=['2024-12-27_5_','2024-12-31_5_','2025-01-01_5_'] # with TRANSITION # List of data files for class 3 in the test suite (including transfer data if the option is enabled). datafiles[1][4]=['2025-02-12_5_','2025-02-15_5_','2025-02-16_5_'] # List of data files for class 4 in the test suite (including transfer data if the option is enabled). else: # If --transition is not possible datafiles[1][0]=['2025-01-27_5_','2025-01-28_5_'] # List of data files for class 0 in the test set (without transfer data) datafiles[1][1]=['2024-12-14_5_','2024-12-15_5_'] # List of data files for class 1 in the test set (without transfer data) datafiles[1][2]=['2024-12-19_5_','2024-12-25_5_','2024-12-26_5_'] # List of data files for class 2 in the test set (without transfer data) datafiles[1][3]=['2024-12-31_5_','2025-01-01_5_'] # List of data files for class 3 in the test set (without transfer data) datafiles[1][4]=['2025-02-15_5_','2025-02-16_5_'] # List of data files for class 4 in the test set (without transfer data) #datafiles[0][4]=['2025-02-05_5_'] #datafiles[1][4]=['2025-02-05_5_'] ##################################### Section 3: Features ################################ #r1s5 supply air flow temperature #r1s1 inlet evaporator temperature #r1s4 condenser outlet # VAriables r1s4 and pa1 apiii may not exists in cloud controlers features=['r1 s1','r1 s4','r1 s5','pa1 apiii'] features=['r1 s1','r1 s4','r1 s5'] features=['r1 s5'] # Feature combination suggested by AKO #features=['r1 s1','r1 s4','r1 s5','pa1 apiii'] features=['r1 s1','r1 s4','r1 s5'] # Redefining the feature list (this seems to be the final list of features used) #features=['r1 s1','r1 s5','pa1 apiii'] #features=['r1 s5','pa1 apiii'] #features=['r1 s1','r1 s5'] #features=['r1 s5'] featureNames={} # Create a dictionary to write feature names into names that can be displayed in the image. featureNames['r1 s1']='$T_{evap}$' featureNames['r1 s4']='$T_{cond}$' featureNames['r1 s5']='$T_{air}$' featureNames['pa1 apiii']='$P_{elec}$' unitNames={} # Create a dictionary to write feature names in their units unitNames['r1 s1']='$(^{o}C)$' unitNames['r1 s4']='$(^{o}C)$' unitNames['r1 s5']='$(^{o}C)$' unitNames['pa1 apiii']='$(W)$' NumFeatures=len(features) # Length or number of features ##################################### Section 4: Data Loading and Preprocessing ################################ df_list=[[],[]] # Create a nested list to store Pandas DataFrames. Index 0 is for training data and index 1 is for test data. for i in range(NumberOfFailures+1): # Initializing internal lists for each class df_list[0].append([]) df_list[1].append([]) for i in range(NumberOfFailures+1): # Loop to process data for each class in the training set dftemp=[] # Create a temporary list to store DataFrames from different files of a class for f in datafiles[0][i]: # The loop is for reading each file corresponding to a specific class in the training set. print(" ", f) # Print the name of the file being processed. # * # *** script_dir = os.path.dirname(os.path.abspath(__file__)) # Script folder path, get the path of the current script directory. data_dir = os.path.join(script_dir, 'data') # data folder path, creating the path to the 'data' directory where the CSV files are located. file_path = os.path.join(data_dir, f + '.csv') # csv full file path. print(f"Currently attempting to open: {file_path}") # For debugging, print the path it is trying to open (for debugging). df1 = pd.read_csv(file_path) # Reading a CSV file and converting it to a Pandas DataFrame. dftemp.append(df1) # Add the read DataFrame to the temporary list dftemp df_list[0][i]=pd.concat(dftemp) # Merge all DataFrames in dftemp of a specific class and store it in df_list[0][i] ########## A similar section for loading test data ######## ##### ### ## # for i in range(NumberOfFailures+1): dftemp=[] for f in datafiles[1][i]: print(" ", f) # *** *** script_dir = os.path.dirname(os.path.abspath(__file__)) data_dir = os.path.join(script_dir, 'data') file_path = os.path.join(data_dir, f + '.csv') print(f"Currently attempting to open: {file_path}") # df1 = pd.read_csv(file_path) dftemp.append(df1) df_list[1][i]=pd.concat(dftemp) # Explains that the data was sampled every 5 minutes (30 samples of 10 seconds each) # subsampled to 5' = 30 * 10" # We consider smaples every 5' because in production, we will only have data at this frequency subsamplingrate=30 # Determining the resampling rate dataframe=[[],[]] # Create a nested list to store the sampled DataFrames. Index 0 for training data and index 1 for testing data. for i in range(NumberOfFailures+1): # Initializing internal lists for each class dataframe[0].append([]) dataframe[1].append([]) for i in range(NumberOfFailures+1): # Loop to process data for each class in the training set datalength=df_list[0][i].shape[0] # Getting the number of rows in the original DataFrame dataframe[0][i]=df_list[0][i].iloc[range(0,datalength,subsamplingrate)][features] # The code is sampling rows with a sub sampling rate and selecting specific feature columns from the dataframe and storing the result somewhere else. dataframe[0][i].reset_index(inplace=True,drop=True) # Reset DataFrame index and remove old index dataframe[0][i].dropna(inplace=True) # Delete rows that have NaN values for i in range(NumberOfFailures+1): # Similar section for sampling and preprocessing of test data datalength=df_list[1][i].shape[0] dataframe[1][i]=df_list[1][i].iloc[range(0,datalength,subsamplingrate)][features] dataframe[1][i].reset_index(inplace=True,drop=True) dataframe[1][i].dropna(inplace=True) # Train data i [0] and test data is [1] dataTrain=[] # Creating a List to Store Training DataFrames dataTest=[] # Creating a list to store test DataFrames for i in range(NumberOfFailures+1): # Copying the processed DataFrames into the dataTrain and dataTest lists dataTrain.append(dataframe[0][i]) dataTest.append(dataframe[1][i]) ##################################### Section 5: Data Normalization ################################ # Calculate means and stdev a=dataTrain[0] # Initialization with DataFrame training class 0 for i in range(1,NumberOfFailures+1): # Merge training DataFrames of all classes vertically into a NumPy array for mean and standard deviation a=np.vstack((a,dataTrain[i])) means=a.mean(axis=0) # Calculate the average for each feature across the entire training data. stdevs=a.std(axis=0) # Calculating the standard deviation for each feature across the entire training data def normalize2(train,test): # Define a function to normalize data using the mean and standard deviation return( (train-means)/stdevs, (test-means)/stdevs ) dataTrainNorm=[] # Create a list to store normalized training data dataTestNorm=[] # Create a list to store normalized test data for i in range(NumberOfFailures+1): # Initializing internal lists dataTrainNorm.append([]) dataTestNorm.append([]) for i in range(NumberOfFailures+1): # Normalization of training and test data for each class (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i]) ##################################### Section 6: Creating Time Sequences ################################ # Definition and training of educational sector models NumFilters=64 # Determine the number of filters in the convolutional layers KernelSize=7 # Determine the size of the kernel in the convolutional layers DropOut=0.2 # Determine the dropout rate to prevent overfitting def create_sequences(values, time_steps): # Define a function to convert time series data into input sequences for the model. This function creates sequences of length time_steps output = [] for i in range(len(values) - time_steps + 1): output.append(values[i : (i + time_steps)]) return np.stack(output) def listToString(l): # Define a function to convert a list to a string without spaces (for naming files) r='' for i in l: r+=str(i) return(r.replace(' ','')) ##################################### Section 7: Model Definition ################################ model=[] # Create a list to store cross models modelckpt_callback =[] # A list to hold ModelCheckpoint callbacks used to store the best model weights during training. es_callback =[] # It creates an empty list. This list is later populated with tools called EarlyStopping. # The job of these tools is to act like a smart supervisor: # they monitor the model's performance on the validation data # and if they see that the model's performance (e.g., error rate) is no longer improving, # they issue a stop training command to avoid the common problem of "overfitting" (learning # too much on the training data and performing poorly on new data). path_checkpoint=[] # Create a list to store and maintain checkpoint model file paths timesteps=int(options.timesteps) # The number of time steps taken from the command line argument ( options.timesteps ). x_train=[] # A list to hold the training sequences for each class. # A loop is created on each failure class: # The normalized data is converted to sequences using create_sequences. # A SequentialKeras model is created. This model is an Autoencoder that uses Conv1D layers to encode into a latent space and Conv1DTranspose layers to decode and use the data. # The model is compiled using the Adam algorithm and the MSE loss function. # A summary of the model is printed. # The checkpoint file path is set to store the weights of the built model. # The EarlyStopping and ModelCheckpoint callbacks are defined for i in range(NumberOfFailures+1): # Loop to create the model and prepare the training data for each class x_train.append(create_sequences(dataTrainNorm[i],timesteps)) # Create input sequences from normalized training data for the current class model.append([]) # Add an empty list to the model list # Define a Keras Sequential model for the current class. The model includes Conv1D and Conv1DTranspose layers to build an autoencoder model[i] = keras.Sequential( [ layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])), # Input layer with shape (time_steps, num_feature) layers.Conv1D( # 1D convolutional layers for feature generation filters=NumFilters, kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=DropOut), # Dropout layers to prevent overfitting layers.Conv1D( filters=int(NumFilters/2), kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose( # Convolutional transform for input layer filters=int(NumFilters/2), kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=DropOut), layers.Conv1DTranspose( filters=NumFilters, kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"), ] ) model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse") # Compile the model using the Adam algorithm and the least-squares loss function (MSE) model[i].summary() # Print model summary path_checkpoint.append("model_class_v5_"+str(i)+"_"+str(timesteps)+listToString(features)+"_checkpoint.weights.h5") # Create a file path for the saved checkpoint model es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)) # Define a callback for early stopping, stopping training if no improvement is observed in val_loss # Define a callback to store the best model weights based on val_loss modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,)) ##################################### Section 8: Training or Loading the Model ################################ # If options.train is True: # A history list is created to keep track of the training history of each model. # A loop is executed over each class and the corresponding model is trained with the training data of that class. Callbacks are used to stop early and save the best weights. # The model predictions are made on the training data. # Otherwise (if options.train is False): # Pre-trained weights are loaded from checkpoint files. if options.train: # If the --train option is enabled on the command line history=[] # Create a list to store training history for i in range(NumberOfFailures+1): # Loop to train the model for each class # Train the model with the current class training data history.append(model[i].fit( x_train[i], x_train[i], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback[i], modelckpt_callback[i] ],)) x_train_pred=model[i].predict(x_train[i]) # Predict the model output for training data (to check performance) else: # If --train is not possible for i in range(NumberOfFailures+1): # Load pre-trained weights from checkpoint files model[i].load_weights(path_checkpoint[i]) ##################################### Section 9: Data Plotting Function (Primary) ################################ # Let's plot some features colorline=['black','violet','lightcoral','cyan','lime','grey'] # A list of colors to draw lines colordot=['grey','darkviolet','red','blue','green','black'] # A list of colors to draw points #featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii'] featuresToPlot=features # Redefine the feature list to draw the graph using the original list indexesToPlot=[] # Create a list to store the indices of the features you want to plot. for i in featuresToPlot: # Find the index of each feature in the original list indexesToPlot.append(features.index(i)) def plotData(): # Define a function to plot the experimental data for each class NumFeaturesToPlot=len(indexesToPlot) # Specific features to be drawn plt.rcParams.update({'font.size': 16}) # Set font size for chart fig, axes = plt.subplots( # Create shapes and sub-axes for the chart nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True ) for i in range(NumFeaturesToPlot): # Set labels and titles for axes # Show help # Loop to plot data for each class for each feature init=0 end=testRanges[0][1] for j in range(NumberOfFailures+1): if NumFeaturesToPlot==1: axes.plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1) else: axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1) if j0: Sensitivity[i]=TP[i]/(TP[i]+FN[i]) else: Sensitivity[i]=0 Precision[i]=TP[i]/(TP[i]+FP[i]) S=Sensitivity.mean() P=Precision.mean() F1=2*S*P/(S+P) print("Sensitivity: ",Sensitivity) print("S: ",S) print("Precision: ",Precision) print("P: ",P) print("F1-Score: ",F1) anomalyMetric(classes,testRanges,testClasses) # Call the function to calculate and print the evaluation criteria ##################################### Section 15: Estimated Amount ################################ # Compute delay until correct detection for a list of ranges (when transition data exists) # Define a function to calculate the correct failure detection (if there is transitive data) # The number of consecutive samples that are correctly classified. # Calculate until a certain number of samples are correctly classified. # Calculate the value. # Print the average def computeDelay(l,classes,testRanges,testClasses): d=np.zeros(len(l)) NoFailsInARow=4 ind=0 for i in l: start=testRanges[i][0] count=0 while start