cesar
/
2025-AKO


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
							# Csar Fdez, UdL, 2025
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
import keras
import os.path
import pickle
from keras import layers
from optparse import OptionParser

#   facility type 5. Mural cerrado de congelación (closed freezer). Set point at -18  (we will have two possible setpoints, -18 and -26)
# This code only deals with a given failure type
# Data for abnormal functioning corresponds to Condenser Fan failure

parser = OptionParser()
parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")

(options, args) = parser.parse_args()


normal_datafiles_list=['2025-01-09_5_','2025-01-10_5_','2025-01-11_5_']
anormal_datafiles_list=['2025-01-04_5_','2025-01-05_5_','2025-01-06_5_','2025-01-07_5_']

# Features suggested by Xavier
features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
NumFeatures=len(features)

df_list=[]
for f in normal_datafiles_list:
    #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
    df1 = pd.read_csv('./data/'+f+'.csv')
    df_list.append(df1)

df=pd.concat(df_list)
datalength=df.shape[0]
# subsampled to 5'  =  30 * 10"
# We consider smaples every 5' because in production, we will only have data at this frequency
subsamplingrate=30
subsamplingrate=30


normaldataframe=df.iloc[range(0,datalength,subsamplingrate)][features]
normaldataframe.reset_index(inplace=True,drop=True)


df_list=[]
for f in anormal_datafiles_list:
    #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
    df1 = pd.read_csv('./data/'+f+'.csv')
    df_list.append(df1)

df=pd.concat(df_list)
datalength=df.shape[0]
# subsampled to 5'  =  30 * 10"
anormaldataframe=df.iloc[range(0,datalength,subsamplingrate)][features]
anormaldataframe.reset_index(inplace=True,drop=True)


# Train data is first 2/3 of normaldata
# Test data is: last 1/3 of normaldata + anormaldata + last 1/3 of normaldata
dataTrain=normaldataframe.values[0:int(normaldataframe.shape[0]*2/3),:]
dataTest=np.vstack((normaldataframe.values[int(normaldataframe.shape[0]*2/3)+1:,:],anormaldataframe.values, normaldataframe.values[int(normaldataframe.shape[0]*2/3)+1:,:] ))


def normalize2():
    # merges train and test
    means=[]
    stdevs=[]
    for i in range(NumFeatures):
        means.append(dataTrain[:,i].mean())
        stdevs.append(dataTrain[:,i].std())
    return( (dataTrain-means)/stdevs, (dataTest-means)/stdevs )

(dataTrainNorm,dataTestNorm)=normalize2()

TIME_STEPS = 24
def create_sequences(values, time_steps=TIME_STEPS):
    output = []
    for i in range(len(values) - time_steps + 1):
        output.append(values[i : (i + time_steps)])
    return np.stack(output)

x_train = create_sequences(dataTrainNorm)

model = keras.Sequential(
    [
        layers.Input(shape=(x_train.shape[1], x_train.shape[2])),
        layers.Conv1D(
            filters=64,
            kernel_size=7,
            padding="same",
            strides=2,
            activation="relu",
        ),
        layers.Dropout(rate=0.2),
        layers.Conv1D(
            filters=32,
            kernel_size=7,
            padding="same",
            strides=2,
            activation="relu",
        ),
        layers.Conv1DTranspose(
            filters=32,
            kernel_size=7,
            padding="same",
            strides=2,
            activation="relu",
        ),
        layers.Dropout(rate=0.2),
        layers.Conv1DTranspose(
            filters=64,
            kernel_size=7,
            padding="same",
            strides=2,
            activation="relu",
        ),
        layers.Conv1DTranspose(filters=x_train.shape[2], kernel_size=7, padding="same"),
    ]
)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
model.summary()

path_checkpoint = "model._checkpoint.weights.h5"
es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)

modelckpt_callback = keras.callbacks.ModelCheckpoint(
    monitor="val_loss",
    filepath=path_checkpoint,
    verbose=1,
    save_weights_only=True,
    save_best_only=True,
)


if options.train:
    history = model.fit(
        x_train,
        x_train,
        epochs=400,
        batch_size=128,
        validation_split=0.3,
        callbacks=[  es_callback, modelckpt_callback      ],
    )

    plt.plot(history.history["loss"], label="Training Loss")
    plt.plot(history.history["val_loss"], label="Validation Loss")
    plt.legend()
    plt.show()
else:
    model.load_weights(path_checkpoint)


x_train_pred = model.predict(x_train)
train_mae_loss = np.mean(np.abs(x_train_pred - x_train), axis=1)
threshold = np.max(train_mae_loss,axis=0)

print("Threshold : ",threshold)
threshold=threshold*2
# Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives

x_test = create_sequences(dataTestNorm)
x_test_pred = model.predict(x_test)
test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)

anomalies = test_mae_loss > threshold
anomalous_data_indices = []
for i in range(anomalies.shape[0]):
    if anomalies[i][0] or anomalies[i][1]:
        anomalous_data_indices.append(i)

#print(anomalous_data_indices)


# Let's plot only a couple of features
def plotData2():    
    fig, axes = plt.subplots(
        nrows=2, ncols=1, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
    )
    axes[0].plot(range(len(x_train)),x_train[:,0,0],label="normal")
    axes[0].plot(range(len(x_train),len(x_train)+len(x_test)),x_test[:,0,0],label="abnormal")
    axes[0].plot(len(x_train)+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,0],color='red',marker='.',linewidth=0,label="abnormal detection")
    axes[0].legend()
    axes[1].plot(range(len(x_train)),x_train[:,0,1],label="normal")
    axes[1].plot(range(len(x_train),len(x_train)+len(x_test)),x_test[:,0,1],label="abnormal")
    axes[1].plot(len(x_train)+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,1],color='red',marker='.',linewidth=0,label="abnormal detection")
    axes[1].legend()
    axes[0].set_ylabel(features[0])
    axes[1].set_ylabel(features[1])
    plt.show()

plotData2()