# Csar Fdez, UdL, 2025 import pandas as pd import matplotlib.pyplot as plt import datetime import numpy as np import keras import os.path import pickle from keras import layers import copy # data files arrays. Index: # 0. No failure # 1. Blocked evaporator # 2. Full Blocked condenser # 3. Partial Blocked condenser # 4 Fan condenser not working # 5. Open door NumberOfFailures=5 NumberOfFailures=4 # So far, we have only data for the first 4 types of failures datafiles=[] for i in range(NumberOfFailures+1): datafiles.append([]) # Next set of ddata corresponds to Freezer, SP=-26 datafiles[0]=['2024-08-07_5_','2024-08-08_5_'] datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] datafiles[2]=['2024-12-18_5_','2024-12-19_5_'] datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_'] datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] #datafiles[4]=[] # Features suggested by Xavier features=['r1 s1','r1 s4','r1 s5','pa1 apiii'] features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2'] NumFeatures=len(features) df_list=[] for i in range(NumberOfFailures+1): df_list.append([]) for i in range(NumberOfFailures+1): dftemp=[] for f in datafiles[i]: print(" ", f) #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime') df1 = pd.read_csv('./data/'+f+'.csv') dftemp.append(df1) df_list[i]=pd.concat(dftemp) # subsampled to 5' = 30 * 10" # We consider smaples every 5' because in production, we will only have data at this frequency subsamplingrate=30 dataframe=[] for i in range(NumberOfFailures+1): dataframe.append([]) for i in range(NumberOfFailures+1): datalength=df_list[i].shape[0] dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features] dataframe[i].reset_index(inplace=True,drop=True) dataframe[i].dropna(inplace=True) # Train data is first 2/3 of data # Test data is: last 1/3 of data dataTrain=[] dataTest=[] for i in range(NumberOfFailures+1): dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:]) dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:]) def normalize2(train,test): # merges train and test means=[] stdevs=[] for i in range(NumFeatures): means.append(train[:,i].mean()) stdevs.append(train[:,i].std()) return( (train-means)/stdevs, (test-means)/stdevs ) dataTrainNorm=[] dataTestNorm=[] for i in range(NumberOfFailures+1): dataTrainNorm.append([]) dataTestNorm.append([]) for i in range(NumberOfFailures+1): (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i]) def plotData(): fig, axes = plt.subplots( nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True ) for i in range(NumberOfFailures+1): axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0") axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1") #axes[1].legend() #axes[0].set_ylabel(features[0]) #axes[1].set_ylabel(features[1]) plt.show() #plotData() TIME_STEPS = 12 def create_sequences(values, time_steps=TIME_STEPS): output = [] for i in range(len(values) - time_steps + 1): output.append(values[i : (i + time_steps)]) return np.stack(output) x_train=[] for i in range(NumberOfFailures+1): x_train.append(create_sequences(dataTrainNorm[i])) model=[] modelckpt_callback =[] es_callback =[] path_checkpoint=[] for i in range(NumberOfFailures+1): model.append([]) model[i] = keras.Sequential( [ layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])), layers.Conv1D( filters=64, kernel_size=7, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=0.2), layers.Conv1D( filters=32, kernel_size=7, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose( filters=32, kernel_size=7, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=0.2), layers.Conv1DTranspose( filters=64, kernel_size=7, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=7, padding="same"), ] ) model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse") model[i].summary() path_checkpoint.append("model_v1_"+str(i)+"._checkpoint.weights.h5") es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)) modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,)) # Load models for i in range(NumberOfFailures+1): model[i].load_weights(path_checkpoint[i]) x_train_pred=[] train_mae_loss=[] threshold=[] for i in range(NumberOfFailures+1): x_train_pred.append(model[i].predict(x_train[i])) train_mae_loss.append(np.mean(np.abs(x_train_pred[i] - x_train[i]), axis=1)) threshold.append(np.max(train_mae_loss[i],axis=0)) print("Threshold : ",threshold) for i in range(NumberOfFailures+1): threshold[i]=threshold[i]*1.3 # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives # Anomaly metrics: # False positive: with datatTestNorm[0] # True negative: with datatTestNorm[i] i>0 def AtLeastOneTrue(x): for i in range(NumFeatures): if x[i]: return True return False def anomalyMetric(testList): # first of list is non failure data # FP, TP: false/true positive # TN, FN: true/false negative # Sensitivity: probab of fail detection if data is fail # Specificity: prob of no fail detection if data is well x_test = create_sequences(testList[0]) x_test_pred = model[0].predict(x_test) test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) anomalies = test_mae_loss > threshold[0] count=0 for i in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[i]): count+=1 FP=count TN=anomalies.shape[0]-1 count=0 TP=np.zeros((NumberOfFailures)) FN=np.zeros((NumberOfFailures)) for i in range(1,len(testList)): x_test = create_sequences(testList[i]) x_test_pred = model[0].predict(x_test) test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) anomalies = test_mae_loss > threshold[0] count=0 for j in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[j]): count+=1 TP[i-1] = count FN[i-1] = anomalies.shape[0]-count Sensitivity=TP.sum()/(TP.sum()+FN.sum()) Specifity=TN/(TN+FP) print("Sensitivity: ",Sensitivity) print("Specifity: ",Specifity) return Sensitivity+Specifity MaxMetric=anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]]) # Now iterate by permuting features and order them depending on Metric reduction metric=np.zeros(NumFeatures) for i in range(NumFeatures): dataTestNormCopy=[] # A deep copy is required because shuffle is an inline operation for j in range(NumberOfFailures+1): dataTestNormCopy.append([]) dataTestNormCopy[j]=copy.deepcopy(dataTestNorm[j]) for j in range(NumberOfFailures+1): np.random.shuffle(dataTestNormCopy[j][:,i]) metric[i]=anomalyMetric([dataTestNormCopy[0],dataTestNormCopy[1],dataTestNormCopy[2],dataTestNormCopy[3],dataTestNormCopy[4]]) # features ordered from least to most important indexes_ordered=np.argsort(metric) # Now, lets eliminate features accumulatively from least to most metric=np.zeros(NumFeatures) dataTestNormCopy=[] for j in range(NumberOfFailures+1): dataTestNormCopy.append([]) dataTestNormCopy[j]=copy.deepcopy(dataTestNorm[j]) # A deep copy is required because shuffle is an inline operation for i in range(NumFeatures): for j in range(NumberOfFailures+1): np.random.shuffle(dataTestNormCopy[j][:,i]) metric[i]=anomalyMetric([dataTestNormCopy[0],dataTestNormCopy[1],dataTestNormCopy[2],dataTestNormCopy[3],dataTestNormCopy[4]]) # print features to be used in v1_multifailure.py #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2','tc s3'] F=np.array(features) l=indexes_ordered.shape[0] for i in range(3,NumFeatures-5): print(F[indexes_ordered[l-i:]]) ''' ['r1 s10' 'r1 s6' 'r2 s8'] ['tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] ['r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] ['r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] ['r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] ['pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] ['r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] ['r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] ['tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] ['r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] ['r2 s5' 'r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] ['r2 s7' 'r2 s5' 'r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] ['r2 s4' 'r2 s7' 'r2 s5' 'r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] ['r1 s9' 'r2 s4' 'r2 s7' 'r2 s5' 'r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8'] '''