# Csar Fdez, UdL, 2025 # Changes from v1: Normalization # IN v1, each failure type has its own normalization pars (mean and stdevs) # In v2, mean and stdev is the same for all data # v3.py trains the models looping in TIME_STEPS (4,8,12,16,20,24,....) finding the optimal Threshold factor import pandas as pd import matplotlib.pyplot as plt import datetime import numpy as np import keras import os.path from keras import layers from optparse import OptionParser import copy import pickle parser = OptionParser() parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true") parser.add_option("-o", "--optimizetf", dest="optimizetf", help="Optimzes Threshold Factor (false)", default=False, action="store_true") parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS ", default=12) parser.add_option("-f", "--thresholdfactor", dest="TF", help="Threshold Factor ", default=1.4) (options, args) = parser.parse_args() # data files arrays. Index: # 0. No failure # 1. Blocked evaporator # 2. Full Blocked condenser # 3. Partial Blocked condenser # 4 Fan condenser not working # 5. Open door NumberOfFailures=4 # So far, we have only data for the first 4 types of failures datafiles=[] for i in range(NumberOfFailures+1): datafiles.append([]) # Next set of ddata corresponds to Freezer, SP=-26 datafiles[0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_','2025-01-27_5_','2025-01-28_5_'] datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] #datafiles[1]=['2024-12-17_5_','2024-12-16_5_','2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] # This have transitions datafiles[2]=['2024-12-18_5_','2024-12-19_5_'] datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_'] datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] #datafiles[4]=['2024-12-27_5_','2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] # This have transitions #datafiles[4]=[] # Features suggested by Xavier # Care with 'tc s3' because on datafiles[0] is always nulll # Seems to be incoropored in new tests #r1s5 supply air flow temperature #r1s1 inlet evaporator temperature #r1s4 condenser outlet # VAriables r1s4 and pa1 apiii may not exists in cloud controlers features=['r1 s1','r1 s4','r1 s5','pa1 apiii'] features=['r1 s1','r1 s4','r1 s5'] featureNames={} featureNames['r1 s1']='$T_{evap}$' featureNames['r1 s4']='$T_{cond}$' featureNames['r1 s5']='$T_{air}$' featureNames['pa1 apiii']='$P_{elec}$' unitNames={} unitNames['r1 s1']='$(^{o}C)$' unitNames['r1 s4']='$(^{o}C)$' unitNames['r1 s5']='$(^{o}C)$' unitNames['pa1 apiii']='$(W)$' #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2'] #features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8'] NumFeatures=len(features) df_list=[] for i in range(NumberOfFailures+1): df_list.append([]) for i in range(NumberOfFailures+1): dftemp=[] for f in datafiles[i]: print(" ", f) #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime') df1 = pd.read_csv('./data/'+f+'.csv') dftemp.append(df1) df_list[i]=pd.concat(dftemp) # subsampled to 5' = 30 * 10" # We consider smaples every 5' because in production, we will only have data at this frequency subsamplingrate=30 dataframe=[] for i in range(NumberOfFailures+1): dataframe.append([]) for i in range(NumberOfFailures+1): datalength=df_list[i].shape[0] dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features] dataframe[i].reset_index(inplace=True,drop=True) dataframe[i].dropna(inplace=True) # Train data is first 2/3 of data # Test data is: last 1/3 of data dataTrain=[] dataTest=[] for i in range(NumberOfFailures+1): dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:]) dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:]) # Calculate means and stdev a=dataTrain[0] for i in range(1,NumberOfFailures+1): a=np.vstack((a,dataTrain[i])) means=a.mean(axis=0) stdevs=a.std(axis=0) def normalize2(train,test): return( (train-means)/stdevs, (test-means)/stdevs ) dataTrainNorm=[] dataTestNorm=[] for i in range(NumberOfFailures+1): dataTrainNorm.append([]) dataTestNorm.append([]) for i in range(NumberOfFailures+1): (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i]) def plotData(): fig, axes = plt.subplots( nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True ) for i in range(NumberOfFailures+1): axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0") axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1") #axes[1].legend() #axes[0].set_ylabel(features[0]) #axes[1].set_ylabel(features[1]) plt.show() #plotData() #exit(0) NumFilters=64 KernelSize=7 DropOut=0.2 ThresholdFactor=1.4 def create_sequences(values, time_steps): output = [] for i in range(len(values) - time_steps + 1): output.append(values[i : (i + time_steps)]) return np.stack(output) def AtLeastOneTrue(x): for i in range(NumFeatures): if x[i]: return True return False def anomalyMetric(th,ts,testList): # first of list is non failure data # FP, TP: false/true positive # TN, FN: true/false negative # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN) # Specificity: true negative ratio given data is OK: TN/(TN+FP) # Accuracy: Rate of correct predictions: (TN+TP)/(TN+TP+FP+FN) # Precision: Rate of positive results: TP/(TP+FP) # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity) # F2-score: predictive performance measure: 2*Specificity*Sensitity/(Specificity+Sensitity) x_test = create_sequences(testList[0],ts) x_test_pred = model.predict(x_test) test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) anomalies = test_mae_loss > th count=0 for i in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[i]): count+=1 FP=count TN=anomalies.shape[0]-count count=0 TP=np.zeros((NumberOfFailures)) FN=np.zeros((NumberOfFailures)) Sensitivity=np.zeros((NumberOfFailures)) Precision=np.zeros((NumberOfFailures)) for i in range(1,len(testList)): x_test = create_sequences(testList[i],ts) x_test_pred = model.predict(x_test) test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) anomalies = test_mae_loss > th count=0 for j in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[j]): count+=1 TP[i-1] = count FN[i-1] = anomalies.shape[0]-count Sensitivity[i-1]=TP[i-1]/(TP[i-1]+FN[i-1]) Precision[i-1]=TP[i-1]/(TP[i-1]+FP) GlobalSensitivity=TP.sum()/(TP.sum()+FN.sum()) Specificity=TN/(TN+FP) Accuracy=(TN+TP.sum())/(TN+TP.sum()+FP+FN.sum()) GlobalPrecision=TP.sum()/(TP.sum()+FP) F1Score= 2*GlobalPrecision*GlobalSensitivity/(GlobalPrecision+GlobalSensitivity) F2Score = 2*Specificity*GlobalSensitivity/(Specificity+GlobalSensitivity) print("Sensitivity: ",Sensitivity) print("Global Sensitivity: ",GlobalSensitivity) #print("Precision: ",Precision) #print("Global Precision: ",GlobalPrecision) print("Specifity: ",Specificity) #print("Accuracy: ",Accuracy) #print("F1Score: ",F1Score) print("F2Score: ",F2Score) #print("FP: ",FP) #return Sensitivity+Specifity return F2Score FScoreHash={} threshold={} def getFScore(timestep,datalist): FScoreHash[timestep]=[] # plots FSCore as a function of Threshold Factor tf=0.3 while tf<8: th=threshold[timestep]*tf r=anomalyMetric(th,timestep,datalist) FScoreHash[timestep].append([tf,r]) if tf<2: tf+=0.1 else: tf+=0.5 def plotFScore(FS): plt.rcParams.update({'font.size': 16}) fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 10), dpi=80, facecolor="w", edgecolor="k") for k in FS.keys(): ar=np.array((FS[k])) axes.plot(ar[:,0],ar[:,1],label="$ns=$"+str(k),linewidth=3) axes.set_xlabel("Threshold factor ($TF$)") axes.set_ylabel("FScore") axes.legend() axes.grid() s='[' for i in range(len(features)): s+=featureNames[features[i]] if i < len(features)-1: s+=', ' s+=']' plt.title(s) plt.show() def listToString(l): r='' for i in l: r+=str(i) return(r.replace(' ','')) if options.train: for timesteps in range(4,21,4): x_train=[] for i in range(NumberOfFailures+1): x_train.append(create_sequences(dataTrainNorm[i],timesteps)) model = keras.Sequential( [ layers.Input(shape=(x_train[0].shape[1], x_train[0].shape[2])), layers.Conv1D( filters=NumFilters, kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=DropOut), layers.Conv1D( filters=int(NumFilters/2), kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose( filters=int(NumFilters/2), kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=DropOut), layers.Conv1DTranspose( filters=NumFilters, kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"), ] ) model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse") model.summary() path_checkpoint="model_noclass_v2_"+str(timesteps)+listToString(features)+"_checkpoint.weights.h5" es_callback=keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15) modelckpt_callback=keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True,) history=model.fit( x_train[0], x_train[0], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback, modelckpt_callback ],) x_train_pred=model.predict(x_train[0]) train_mae_loss=np.mean(np.abs(x_train_pred - x_train[0]), axis=1) threshold[timesteps]=np.max(train_mae_loss,axis=0) file = open('threshold'+listToString(features)+'.pk', 'wb') pickle.dump(threshold, file) file.close() exit(0) else: file = open('threshold'+listToString(features)+'.pk', 'rb') threshold=pickle.load(file) file.close() x_train=[] for i in range(NumberOfFailures+1): x_train.append(create_sequences(dataTrainNorm[i],int(options.timesteps))) model = keras.Sequential( [ layers.Input(shape=(x_train[0].shape[1], x_train[0].shape[2])), layers.Conv1D( filters=NumFilters, kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=DropOut), layers.Conv1D( filters=int(NumFilters/2), kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose( filters=int(NumFilters/2), kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=DropOut), layers.Conv1DTranspose( filters=NumFilters, kernel_size=KernelSize, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"), ] ) model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse") model.summary() if options.optimizetf: for timesteps in range(4,21,4): path_checkpoint="model_noclass_v2_"+str(timesteps)+listToString(features)+"_checkpoint.weights.h5" es_callback=keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15) modelckpt_callback=keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True,) model.load_weights(path_checkpoint) getFScore(timesteps,[dataTestNorm[0],dataTrainNorm[1],dataTrainNorm[2],dataTrainNorm[3],dataTrainNorm[4]]) file = open('FScore'+listToString(features)+'.pk', 'wb') pickle.dump(FScoreHash, file) file.close() path_checkpoint="model_noclass_v2_"+str(options.timesteps)+listToString(features)+"_checkpoint.weights.h5" es_callback=keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15) modelckpt_callback=keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True,) model.load_weights(path_checkpoint) file = open('FScore'+listToString(features)+'.pk', 'rb') FS=pickle.load(file) file.close() #plotFScore(FS) #exit(0) TIME_STEPS=int(options.timesteps) # 1st scenario. Detect only anomaly. Later, we will classiffy it # Test data= testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal #d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0])) # For Failure data, we can use Train data becasue not used for training and includes the firsts samples #datalist=[dataTestNorm[0],dataTrainNorm[1],dataTrainNorm[2],dataTrainNorm[3],dataTrainNorm[4]] datalist=[dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]] d=np.vstack((datalist)) x_test = create_sequences(d,int(options.timesteps)) x_test_pred = model.predict(x_test) test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) # Define ranges for plotting in different colors testRanges=[] r=0 for i in range(len(datalist)): testRanges.append([r,r+datalist[i].shape[0]]) r+=datalist[i].shape[0] #r=dataTestNorm[0].shape[0] #testRanges.append([0,r]) #for i in range(1,NumberOfFailures+1): # rnext=r+dataTrainNorm[i].shape[0] # testRanges.append([r,rnext] ) # r=rnext # Drop the last TIME_STEPS for plotting testRanges[NumberOfFailures][1]=testRanges[NumberOfFailures][1]-TIME_STEPS anomalies = test_mae_loss > threshold[int(options.timesteps)]*float(options.TF) anomalous_data_indices = [] for i in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[i]): #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]: anomalous_data_indices.append(i) # Let's plot some features colorline=['violet','lightcoral','cyan','lime','grey'] colordot=['darkviolet','red','blue','green','black'] #featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii'] featuresToPlot=features indexesToPlot=[] for i in featuresToPlot: indexesToPlot.append(features.index(i)) def plotData3(): NumFeaturesToPlot=len(indexesToPlot) plt.rcParams.update({'font.size': 16}) fig, axes = plt.subplots( nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True ) for i in range(NumFeaturesToPlot): init=0 end=testRanges[0][1] axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="No fail") init=end end+=(testRanges[1][1]-testRanges[1][0]) for j in range(1,NumberOfFailures+1): axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Fail type "+str(j), color=colorline[j-1]) if j threshold[4]*float(options.TF) anomalous_data_indices = [] for i in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[i]): anomalous_data_indices.append(i) plt.rcParams.update({'font.size': 16}) fig, axes = plt.subplots( nrows=2, ncols=1, figsize=(15, 7), dpi=80, facecolor="w", edgecolor="k" , sharex=True ) for i in range(1): init=0 end=testRanges[0][1] axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="No fail") init=end end+=(testRanges[1][1]-testRanges[1][0]) for j in range(1,NumberOfFailures+1): axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Fail type "+str(j), color=colorline[j-1]) if j threshold[20]*float(options.TF) anomalous_data_indices = [] for i in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[i]): anomalous_data_indices.append(i) print(testRanges) for i in range(1): init=0 end=testRanges[0][1] axes[i+1].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="No fail") init=end end+=(testRanges[1][1]-testRanges[1][0]) for j in range(1,NumberOfFailures+1): if j==1: axes[i+1].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Fail type 3", color=colorline[j-1]) else: axes[i+1].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i], color=colorline[j-1]) if j threshold[int(options.timesteps)]*float(options.TF) anomalous_data_indices = [] for i in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[i]): #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]: anomalous_data_indices.append(i) def plotData4(): NumFeaturesToPlot=len(indexesToPlot) plt.rcParams.update({'font.size': 16}) fig, axes = plt.subplots( nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True ) for i in range(NumFeaturesToPlot): for j in range(1,NumberOfFailures+1): if j==1: axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]],label="No fail", color='C0') else: axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]], color='C0') axes[i].plot(range(j*2*num-num,j*2*num),x_test[j*2*num-num:j*2*num,0,indexesToPlot[i]],label="File type "+str(j),color=colorline[j-1]) x=[] y=[] for k in anomalous_data_indices: if (k+TIME_STEPS)