# Csar Fdez, UdL, 2025 import pandas as pd import matplotlib.pyplot as plt import datetime import numpy as np import keras import os.path import pickle from keras import layers from optparse import OptionParser parser = OptionParser() parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true") (options, args) = parser.parse_args() # data files arrays. Index: # 0. No failure # 1. Blocked evaporator # 2. Blocked condenser # 3 Fan condenser not working # 4. Open door NumberOfFailures=4 NumberOfFailures=3 # So far, we have only data for the first 3 types of failures datafiles=[] for i in range(NumberOfFailures+1): datafiles.append([]) # Next set of ddata corresponds to Freezer, SP=-26 datafiles[0]=['2024-08-07_5_','2024-08-08_5_'] datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] datafiles[2]=['2024-12-18_5_','2024-12-19_5_','2024-12-20_5_'] datafiles[3]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] #datafiles[4]=[] # Features suggested by Xavier features=['r1 s1','r1 s4','r1 s5','pa1 apiii'] NumFeatures=len(features) df_list=[] for i in range(NumberOfFailures+1): df_list.append([]) for i in range(NumberOfFailures+1): dftemp=[] for f in datafiles[i]: print(" ", f) #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime') df1 = pd.read_csv('./data/'+f+'.csv') dftemp.append(df1) df_list[i]=pd.concat(dftemp) # subsampled to 5' = 30 * 10" # We consider smaples every 5' because in production, we will only have data at this frequency subsamplingrate=30 dataframe=[] for i in range(NumberOfFailures+1): dataframe.append([]) for i in range(NumberOfFailures+1): datalength=df_list[i].shape[0] dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features] dataframe[i].reset_index(inplace=True,drop=True) dataframe[i].dropna(inplace=True) # Train data is first 2/3 of data # Test data is: last 1/3 of data dataTrain=[] dataTest=[] for i in range(NumberOfFailures+1): dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:]) dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:]) def normalize2(train,test): # merges train and test means=[] stdevs=[] for i in range(NumFeatures): means.append(train[:,i].mean()) stdevs.append(train[:,i].std()) return( (train-means)/stdevs, (test-means)/stdevs ) dataTrainNorm=[] dataTestNorm=[] for i in range(NumberOfFailures+1): dataTrainNorm.append([]) dataTestNorm.append([]) for i in range(NumberOfFailures+1): (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i]) def plotData(): fig, axes = plt.subplots( nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True ) for i in range(NumberOfFailures+1): axes[i][0].plot(dataTrainNorm[i][:,0],label="Fail 0, feature 0") axes[i][1].plot(dataTrainNorm[i][:,1],label="Fail 0, feature 1") #axes[1].legend() #axes[0].set_ylabel(features[0]) #axes[1].set_ylabel(features[1]) plt.show() #plotData() TIME_STEPS = 24 def create_sequences(values, time_steps=TIME_STEPS): output = [] for i in range(len(values) - time_steps + 1): output.append(values[i : (i + time_steps)]) return np.stack(output) x_train=[] for i in range(NumberOfFailures+1): x_train.append(create_sequences(dataTrainNorm[i])) model=[] modelckpt_callback =[] es_callback =[] path_checkpoint=[] for i in range(NumberOfFailures+1): model.append([]) model[i] = keras.Sequential( [ layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])), layers.Conv1D( filters=64, kernel_size=7, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=0.2), layers.Conv1D( filters=32, kernel_size=7, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose( filters=32, kernel_size=7, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=0.2), layers.Conv1DTranspose( filters=64, kernel_size=7, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=7, padding="same"), ] ) model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse") model[i].summary() path_checkpoint.append("model_"+str(i)+"._checkpoint.weights.h5") es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)) modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,)) if options.train: history=[] for i in range(NumberOfFailures+1): history.append(model[i].fit( x_train[i], x_train[i], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback[i], modelckpt_callback[i] ],)) fig, axes = plt.subplots( nrows=int(np.ceil((NumberOfFailures+1)/2)), ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True ) for i in range(int(np.ceil((NumberOfFailures+1)/2))): for j in range(2): r=2*i+j if r < NumberOfFailures+1: axes[i][j].plot(history[r].history["loss"], label="Training Loss") axes[i][j].plot(history[r].history["val_loss"], label="Val Loss") axes[i][j].legend() plt.show() else: for i in range(NumberOfFailures+1): model[i].load_weights(path_checkpoint[i]) x_train_pred=[] train_mae_loss=[] threshold=[] for i in range(NumberOfFailures+1): x_train_pred.append(model[i].predict(x_train[i])) train_mae_loss.append(np.mean(np.abs(x_train_pred[i] - x_train[i]), axis=1)) threshold.append(np.max(train_mae_loss[i],axis=0)) print("Threshold : ",threshold) for i in range(NumberOfFailures+1): threshold[i]=threshold[i]*2 # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives # 1st scenario. Detect only anomaly. Later, we will classiffy it # Test data= testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[0])) x_test = create_sequences(d) x_test_pred = model[0].predict(x_test) test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) testRanges=[] testRanges.append([0,dataTestNorm[0].shape[0]]) testRanges.append([dataTestNorm[0].shape[0], dataTestNorm[0].shape[0]+dataTestNorm[1].shape[0] ]) testRanges.append([dataTestNorm[0].shape[0]+dataTestNorm[1].shape[0], dataTestNorm[0].shape[0]+dataTestNorm[1].shape[0]+ dataTestNorm[2].shape[0] ]) testRanges.append([dataTestNorm[0].shape[0]+dataTestNorm[1].shape[0]+ dataTestNorm[2].shape[0], dataTestNorm[0].shape[0]+dataTestNorm[1].shape[0]+ dataTestNorm[2].shape[0]+ dataTestNorm[3].shape[0]]) testRanges.append([dataTestNorm[0].shape[0]+dataTestNorm[1].shape[0]+ dataTestNorm[2].shape[0]+ dataTestNorm[3].shape[0] , x_test.shape[0] ]) def AtLeastOneTrue(x): for i in range(NumFeatures): if x[i]: return True return False anomalies = test_mae_loss > threshold[0] anomalous_data_indices = [] for i in range(anomalies.shape[0]): if AtLeastOneTrue(anomalies[i]): #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]: anomalous_data_indices.append(i) #print(anomalous_data_indices) # Let's plot only a couple of features def plotData2(): fig, axes = plt.subplots( nrows=2, ncols=1, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True ) axes[0].plot(range(len(x_train[0])),x_train[0][:,0,0],label="normal") axes[0].plot(range(len(x_train[0]),len(x_train[0])+len(x_test)),x_test[:,0,0],label="abnormal") axes[0].plot(len(x_train[0])+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,0],color='red',marker='.',linewidth=0,label="abnormal detection") axes[0].legend() axes[1].plot(range(len(x_train[0])),x_train[0][:,0,1],label="normal") axes[1].plot(range(len(x_train[0]),len(x_train[0])+len(x_test)),x_test[:,0,1],label="abnormal") axes[1].plot(len(x_train[0])+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,1],color='red',marker='.',linewidth=0,label="abnormal detection") axes[1].legend() axes[0].set_ylabel(features[0]) axes[1].set_ylabel(features[1]) plt.show() #plotData2() # 2nd scenario. Go over anomalies and classify it by less error ''' #This code works, but too slow anomalous_data_type=[] for i in anomalous_data_indices: error=[] for m in range(1,NumberOfFailures+1): error.append(np.mean(np.mean(np.abs(model[m].predict(x_test[i:i+1,:,:])-x_test[i:i+1,:,:]),axis=1))) anomalous_data_type.append(np.argmin(error)+1) ''' anomalous_data_type=[] x_test_predict=[] for m in range(NumberOfFailures+1): x_test_predict.append(model[m].predict(x_test)) for i in anomalous_data_indices: error=[] for m in range(1,NumberOfFailures+1): error.append(np.mean(np.mean(np.abs(x_test_predict[m][i:i+1,:,:]-x_test[i:i+1,:,:]),axis=1))) anomalous_data_type.append(np.argmin(error)+1) # For plotting purposes anomalous_data_indices_by_failure=[] for i in range(NumberOfFailures+1): anomalous_data_indices_by_failure.append([]) for i in range(len(anomalous_data_indices)): print(i," ",anomalous_data_type[i]) anomalous_data_indices_by_failure[anomalous_data_type[i]].append(anomalous_data_indices[i]) def plotData2(): fig, axes = plt.subplots( nrows=2, ncols=1, figsize=(25, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True ) init=0 end=len(x_train[0]) axes[0].plot(range(init,end),x_train[0][:,0,0],label="normal train") #axes.plot(range(len(x_train[0]),len(x_train[0])+len(x_test)),x_test[:,0,0],label="abnormal") init=end end+=testRanges[0][1] axes[0].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,0],label="normal test") init=end end+=(testRanges[1][1]-testRanges[1][0]) axes[0].plot(range(init,end),x_test[testRanges[1][0]:testRanges[1][1],0,0],label="fail type 1", color="lightcoral") init=end end+=(testRanges[2][1]-testRanges[2][0]) axes[0].plot(range(init,end),x_test[testRanges[2][0]:testRanges[2][1],0,0],label="fail type 2", color="cyan") init=end end+=(testRanges[3][1]-testRanges[3][0]) axes[0].plot(range(init,end),x_test[testRanges[3][0]:testRanges[3][1],0,0],label="fail type 3", color="lime") axes[0].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[1]),x_test[anomalous_data_indices_by_failure[1],0,0],color='red',marker='.',linewidth=0,label="abnormal detection type 1") axes[0].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[2]),x_test[anomalous_data_indices_by_failure[2],0,0],color='blue',marker='.',linewidth=0,label="abnormal detection type 2") axes[0].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[3]),x_test[anomalous_data_indices_by_failure[3],0,0],color='green',marker='.',linewidth=0,label="abnormal detection type 3") axes[0].legend() axes[0].set_ylabel(features[0]) init=0 end=len(x_train[0]) axes[1].plot(range(init,end),x_train[0][:,0,1],label="normal train") #axes.plot(range(len(x_train[0]),len(x_train[0])+len(x_test)),x_test[:,0,0],label="abnormal") init=end end+=testRanges[0][1] axes[1].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,1],label="normal test") init=end end+=(testRanges[1][1]-testRanges[1][0]) axes[1].plot(range(init,end),x_test[testRanges[1][0]:testRanges[1][1],0,1],label="fail type 1", color="lightcoral") init=end end+=(testRanges[2][1]-testRanges[2][0]) axes[1].plot(range(init,end),x_test[testRanges[2][0]:testRanges[2][1],0,1],label="fail type 2", color="cyan") init=end end+=(testRanges[3][1]-testRanges[3][0]) axes[1].plot(range(init,end),x_test[testRanges[3][0]:testRanges[3][1],0,1],label="fail type 3", color="lime") axes[1].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[1]),x_test[anomalous_data_indices_by_failure[1],0,1],color='red',marker='.',linewidth=0,label="abnormal detection type 1") axes[1].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[2]),x_test[anomalous_data_indices_by_failure[2],0,1],color='blue',marker='.',linewidth=0,label="abnormal detection type 2") axes[1].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[3]),x_test[anomalous_data_indices_by_failure[3],0,1],color='green',marker='.',linewidth=0,label="abnormal detection type 3") axes[1].legend() axes[1].set_ylabel(features[1]) plt.show() plotData2()