# Csar Fdez, UdL, 2025 import pandas as pd import matplotlib.pyplot as plt import datetime import numpy as np import keras import os.path import pickle from keras import layers from optparse import OptionParser # facility type 5. Mural cerrado de congelaciĆ³n (closed freezer). Set point at -18 (we will have two possible setpoints, -18 and -26) # This code only deals with a given failure type # Data for abnormal functioning corresponds to Condenser Fan failure parser = OptionParser() parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true") (options, args) = parser.parse_args() normal_datafiles_list=['2025-01-09_5_','2025-01-10_5_','2025-01-11_5_'] anormal_datafiles_list=['2025-01-04_5_','2025-01-05_5_','2025-01-06_5_','2025-01-07_5_'] # Features suggested by Xavier features=['r1 s1','r1 s4','r1 s5','pa1 apiii'] NumFeatures=len(features) df_list=[] for f in normal_datafiles_list: #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime') df1 = pd.read_csv('./data/'+f+'.csv') df_list.append(df1) df=pd.concat(df_list) datalength=df.shape[0] # subsampled to 5' = 30 * 10" # We consider smaples every 5' because in production, we will only have data at this frequency subsamplingrate=30 subsamplingrate=30 normaldataframe=df.iloc[range(0,datalength,subsamplingrate)][features] normaldataframe.reset_index(inplace=True,drop=True) df_list=[] for f in anormal_datafiles_list: #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime') df1 = pd.read_csv('./data/'+f+'.csv') df_list.append(df1) df=pd.concat(df_list) datalength=df.shape[0] # subsampled to 5' = 30 * 10" anormaldataframe=df.iloc[range(0,datalength,subsamplingrate)][features] anormaldataframe.reset_index(inplace=True,drop=True) # Train data is first 2/3 of normaldata # Test data is: last 1/3 of normaldata + anormaldata + last 1/3 of normaldata dataTrain=normaldataframe.values[0:int(normaldataframe.shape[0]*2/3),:] dataTest=np.vstack((normaldataframe.values[int(normaldataframe.shape[0]*2/3)+1:,:],anormaldataframe.values, normaldataframe.values[int(normaldataframe.shape[0]*2/3)+1:,:] )) def normalize2(): # merges train and test means=[] stdevs=[] for i in range(NumFeatures): means.append(dataTrain[:,i].mean()) stdevs.append(dataTrain[:,i].std()) return( (dataTrain-means)/stdevs, (dataTest-means)/stdevs ) (dataTrainNorm,dataTestNorm)=normalize2() TIME_STEPS = 24 def create_sequences(values, time_steps=TIME_STEPS): output = [] for i in range(len(values) - time_steps + 1): output.append(values[i : (i + time_steps)]) return np.stack(output) x_train = create_sequences(dataTrainNorm) model = keras.Sequential( [ layers.Input(shape=(x_train.shape[1], x_train.shape[2])), layers.Conv1D( filters=64, kernel_size=7, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=0.2), layers.Conv1D( filters=32, kernel_size=7, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose( filters=32, kernel_size=7, padding="same", strides=2, activation="relu", ), layers.Dropout(rate=0.2), layers.Conv1DTranspose( filters=64, kernel_size=7, padding="same", strides=2, activation="relu", ), layers.Conv1DTranspose(filters=x_train.shape[2], kernel_size=7, padding="same"), ] ) model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse") model.summary() path_checkpoint = "model._checkpoint.weights.h5" es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15) modelckpt_callback = keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True, ) if options.train: history = model.fit( x_train, x_train, epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback, modelckpt_callback ], ) plt.plot(history.history["loss"], label="Training Loss") plt.plot(history.history["val_loss"], label="Validation Loss") plt.legend() plt.show() else: model.load_weights(path_checkpoint) x_train_pred = model.predict(x_train) train_mae_loss = np.mean(np.abs(x_train_pred - x_train), axis=1) threshold = np.max(train_mae_loss,axis=0) print("Threshold : ",threshold) threshold=threshold*2 # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives x_test = create_sequences(dataTestNorm) x_test_pred = model.predict(x_test) test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) anomalies = test_mae_loss > threshold anomalous_data_indices = [] for i in range(anomalies.shape[0]): if anomalies[i][0] or anomalies[i][1]: anomalous_data_indices.append(i) #print(anomalous_data_indices) # Let's plot only a couple of features def plotData2(): fig, axes = plt.subplots( nrows=2, ncols=1, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True ) axes[0].plot(range(len(x_train)),x_train[:,0,0],label="normal") axes[0].plot(range(len(x_train),len(x_train)+len(x_test)),x_test[:,0,0],label="abnormal") axes[0].plot(len(x_train)+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,0],color='red',marker='.',linewidth=0,label="abnormal detection") axes[0].legend() axes[1].plot(range(len(x_train)),x_train[:,0,1],label="normal") axes[1].plot(range(len(x_train),len(x_train)+len(x_test)),x_test[:,0,1],label="abnormal") axes[1].plot(len(x_train)+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,1],color='red',marker='.',linewidth=0,label="abnormal detection") axes[1].legend() axes[0].set_ylabel(features[0]) axes[1].set_ylabel(features[1]) plt.show() plotData2()