# Csar Fdez, UdL, 2025 # Changes from v1: Normalization # IN v1, each failure type has its own normalization pars (mean and stdevs) # In v2, mean and stdev is the same for all data import pandas as pd import matplotlib.pyplot as plt import datetime import numpy as np import keras import os.path import pickle from keras import layers from optparse import OptionParser import copy parser = OptionParser() parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true") (options, args) = parser.parse_args() # data files arrays. Index: # 0. No failure # 1. Blocked evaporator # 2. Full Blocked condenser # 3. Partial Blocked condenser # 4 Fan condenser not working # 5. Open door NumberOfFailures=4 # So far, we have only data for the first 4 types of failures datafiles=[] for i in range(NumberOfFailures+1): datafiles.append([]) # Next set of ddata corresponds to Freezer, SP=-26 datafiles[0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_','2025-01-27_5_'] datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] datafiles[2]=['2024-12-18_5_','2024-12-19_5_'] datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_'] datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] #datafiles[4]=[] # Features suggested by Xavier # Care with 'tc s3' because on datafiles[0] is always nulll # Seems to be incoropored in new tests features=['r1 s1','r1 s4','r1 s5','pa1 apiii'] #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2'] #features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8'] NumFeatures=len(features) df_list=[] for i in range(NumberOfFailures+1): df_list.append([]) for i in range(NumberOfFailures+1): dftemp=[] for f in datafiles[i]: print(" ", f) #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime') df1 = pd.read_csv('./data/'+f+'.csv') dftemp.append(df1) df_list[i]=pd.concat(dftemp) # subsampled to 5' = 30 * 10" # We consider smaples every 5' because in production, we will only have data at this frequency subsamplingrate=30 dataframe=[] for i in range(NumberOfFailures+1): dataframe.append([]) for i in range(NumberOfFailures+1): datalength=df_list[i].shape[0] dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features] dataframe[i].reset_index(inplace=True,drop=True) dataframe[i].dropna(inplace=True) # Train data is first 2/3 of data # Test data is: last 1/3 of data dataTrain=[] dataTest=[] for i in range(NumberOfFailures+1): dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:]) dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:]) # Calculate means and stdev a=dataTrain[0] for i in range(1,NumberOfFailures+1): a=np.vstack((a,dataTrain[i])) means=a.mean(axis=0) stdevs=a.std(axis=0) def normalize2(train,test): return( (train-means)/stdevs, (test-means)/stdevs ) dataTrainNorm=[] dataTestNorm=[] for i in range(NumberOfFailures+1): dataTrainNorm.append([]) dataTestNorm.append([]) for i in range(NumberOfFailures+1): (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i]) def plotData(): fig, axes = plt.subplots( nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True ) for i in range(NumberOfFailures+1): axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0") axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1") #axes[1].legend() #axes[0].set_ylabel(features[0]) #axes[1].set_ylabel(features[1]) plt.show() #plotData() #exit(0) NumFilters=64 KernelSize=7 DropOut=0.2 ThresholdFactor=1.4 TIME_STEPS = 12 # This is a trade off among better performance (high) and better response delay (low) def create_sequences(values, time_steps=TIME_STEPS): output = [] for i in range(len(values) - time_steps + 1): output.append(values[i : (i + time_steps)]) return np.stack(output) x_train=[] for i in range(NumberOfFailures+1): x_train.append(create_sequences(dataTrainNorm[i])) # Reused code from v1_multifailure for only one model. No classification #for i in range(NumberOfFailures+1): model = keras.Sequential( [ layers.Input(shape=(x_train[0].shape[1], x_train[0].shape[2])), layers.Conv1D( filters=NumFilters, kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=DropOut), layers.Conv1D( filters=int(NumFilters/2), kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose( filters=int(NumFilters/2), kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=DropOut), layers.Conv1DTranspose( filters=NumFilters, kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"), ] ) model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse") model.summary() path_checkpoint="model_noclass_v2_checkpoint.weights.h5" es_callback=keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15) modelckpt_callback=keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True,) if options.train: history=model.fit( x_train[0], x_train[0], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback, modelckpt_callback ],) else: model.load_weights(path_checkpoint) x_train_pred=model.predict(x_train[0]) train_mae_loss=np.mean(np.abs(x_train_pred - x_train[0]), axis=1) threshold=np.max(train_mae_loss,axis=0) thresholdOrig=copy.deepcopy(threshold) print("Threshold : ",threshold) threshold=threshold*ThresholdFactor # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives # 1st scenario. Detect only anomaly. Later, we will classiffy it # Test data= testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal #d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0])) d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4])) x_test = create_sequences(d) x_test_pred = model.predict(x_test) test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) # Define ranges for plotting in different colors testRanges=[] r=dataTestNorm[0].shape[0] testRanges.append([0,r]) for i in range(1,NumberOfFailures+1): rnext=r+dataTestNorm[i].shape[0] testRanges.append([r,rnext] ) r=rnext # Drop the last TIME_STEPS for plotting testRanges[NumberOfFailures][1]=testRanges[NumberOfFailures][1]-TIME_STEPS def AtLeastOneTrue(x): for i in range(NumFeatures): if x[i]: return True return False anomalies = test_mae_loss > threshold anomalous_data_indices = [] for i in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[i]): #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]: anomalous_data_indices.append(i) # Let's plot some features colorline=['violet','lightcoral','cyan','lime','grey'] colordot=['darkviolet','red','blue','green','black'] #featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii'] featuresToPlot=features indexesToPlot=[] for i in featuresToPlot: indexesToPlot.append(features.index(i)) def plotData3(): NumFeaturesToPlot=len(indexesToPlot) plt.rcParams.update({'font.size': 16}) fig, axes = plt.subplots( nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True ) for i in range(NumFeaturesToPlot): init=0 end=testRanges[0][1] axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]],label="No fail") init=end end+=(testRanges[1][1]-testRanges[1][0]) for j in range(1,NumberOfFailures+1): axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]],label="Fail type "+str(j), color=colorline[j-1]) if j threshold count=0 for i in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[i]): count+=1 FP=count TN=anomalies.shape[0]-count count=0 TP=np.zeros((NumberOfFailures)) FN=np.zeros((NumberOfFailures)) Sensitivity=np.zeros((NumberOfFailures)) Precision=np.zeros((NumberOfFailures)) for i in range(1,len(testList)): x_test = create_sequences(testList[i]) x_test_pred = model.predict(x_test) test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) anomalies = test_mae_loss > threshold count=0 for j in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[j]): count+=1 TP[i-1] = count FN[i-1] = anomalies.shape[0]-count Sensitivity[i-1]=TP[i-1]/(TP[i-1]+FN[i-1]) Precision[i-1]=TP[i-1]/(TP[i-1]+FP) GlobalSensitivity=TP.sum()/(TP.sum()+FN.sum()) Specificity=TN/(TN+FP) Accuracy=(TN+TP.sum())/(TN+TP.sum()+FP+FN.sum()) GlobalPrecision=TP.sum()/(TP.sum()+FP) F1Score= 2*GlobalPrecision*GlobalSensitivity/(GlobalPrecision+GlobalSensitivity) F2Score = 2*Specificity*GlobalSensitivity/(Specificity+GlobalSensitivity) print("Sensitivity: ",Sensitivity) print("Global Sensitivity: ",GlobalSensitivity) print("Precision: ",Precision) print("Global Precision: ",GlobalPrecision) print("Specifity: ",Specificity) print("Accuracy: ",Accuracy) print("F1Score: ",F1Score) print("F2Score: ",F2Score) print("FP: ",FP) #return Sensitivity+Specifity return (F1Score,F2Score) anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]]) def plotFScore(): global threshold res=[] # plots FSCroe as a function of Threshold Factor tf=0.3 while tf<1.5: threshold=thresholdOrig*tf r=anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]]) res.append([tf,r[0],r[1]]) tf+=0.05 print(res) ar=np.array((res)) plt.rcParams.update({'font.size': 16}) fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 10), dpi=80, facecolor="w", edgecolor="k") ln1=axes.plot(ar[:,0],ar[:,1],label="F1-Score",linewidth=4) ax1=axes.twinx() ln2=ax1.plot(ar[:,0],ar[:,2],label="F2-Score",linewidth=4,color='C3') axes.set_xlabel("Threshold factor") axes.set_ylabel("F1-Score") ax1.set_ylabel("F2-Score") lns = ln1+ln2 labs = [l.get_label() for l in lns] axes.legend(lns, labs, loc=0) axes.grid() plt.show() #plotFScore() plotData3() exit(0) # 2nd scenario. Detect only anomaly. Later, we will classiffy it # Test data= testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal #d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0])) num=100 d=np.vstack((dataTestNorm[0][0:num,:],dataTestNorm[1][0:num,:],dataTestNorm[0][num:2*num,:],dataTestNorm[2][70:70+num,:],dataTestNorm[0][2*num-90:3*num-90,:],dataTestNorm[3][50:num+50,:],dataTestNorm[0][150:150+num,:],dataTestNorm[4][0:num+TIME_STEPS,:])) x_test = create_sequences(d) x_test_pred = model.predict(x_test) test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) anomalies = test_mae_loss > threshold anomalous_data_indices = [] for i in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[i]): #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]: anomalous_data_indices.append(i) def plotData4(): NumFeaturesToPlot=len(indexesToPlot) plt.rcParams.update({'font.size': 16}) fig, axes = plt.subplots( nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True ) for i in range(NumFeaturesToPlot): for j in range(1,NumberOfFailures+1): if j==1: axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]],label="No fail", color='C0') else: axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]], color='C0') axes[i].plot(range(j*2*num-num,j*2*num),x_test[j*2*num-num:j*2*num,0,indexesToPlot[i]],label="File type "+str(j),color=colorline[j-1]) x=[] y=[] for k in anomalous_data_indices: if (k+TIME_STEPS)