# Csar Fdez, UdL, 2025
# Changes from v1:   Normalization 
# IN v1, each failure type has its own normalization pars (mean and stdevs)
# In v2, mean and stdev is the same for all data
# v3.py trains the models looping in TIME_STEPS (4,8,12,16,20,24,....) finding the optimal Threshold factor
# Changes in v4:  see comments on v1_unsupervised.py
# Be careful with v0_unsupervised  and all versions for supervised.
# because dataTrain is not stacke before create_sequences,  so, 
#  the sets are not aligned in time

# Because in this case we don't observe into transitories, we keep independently each train set

# Optimizxation of threshold factor changed, bacause is based on F1-Score

import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
import keras
import os.path
from keras import layers
from optparse import OptionParser
import copy
import pickle


parser = OptionParser()
parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
parser.add_option("-o", "--optimizetf", dest="optimizetf", help="Optimzes Threshold Factor (false)", default=False, action="store_true")
parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS ", default=12)
parser.add_option("-f", "--thresholdfactor", dest="TF", help="Threshold Factor ", default=1.4)

(options, args) = parser.parse_args()


# data files arrays. Index:
# 0.  No failure
# 1.  Blocked evaporator
# 2.   Full Blocked condenser
# 3.   Partial Blocked condenser
# 4   Fan condenser not working
# 5.  Open door


NumberOfFailures=4  # So far, we have only data for the first 4 types of failures
datafiles=[]
for i in range(NumberOfFailures+1):
    datafiles.append([])

# Next set of ddata corresponds to Freezer, SP=-26
datafiles[0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_','2025-01-27_5_','2025-01-28_5_'] 
datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] 
#datafiles[1]=['2024-12-17_5_','2024-12-16_5_','2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] #   This have transitions
datafiles[2]=['2024-12-18_5_','2024-12-19_5_'] 
datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_'] 
datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] 
#datafiles[4]=['2024-12-27_5_','2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']  #   This have transitions

#datafiles[4]=[] 

# Features suggested by Xavier
# Care with 'tc s3' because on datafiles[0] is always nulll
# Seems to be incoropored in new tests

#r1s5 supply air flow temperature
#r1s1 inlet evaporator temperature
#r1s4 condenser outlet

# VAriables r1s4 and pa1 apiii  may not exists in cloud controlers


features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
features=['r1 s1','r1 s4','r1 s5']
#features=['r1 s1','r1 s5']
featureNames={}
featureNames['r1 s1']='$T_{evap}$'
featureNames['r1 s4']='$T_{cond}$'
featureNames['r1 s5']='$T_{air}$'
featureNames['pa1 apiii']='$P_{elec}$'

unitNames={}
unitNames['r1 s1']='$(^{o}C)$'
unitNames['r1 s4']='$(^{o}C)$'
unitNames['r1 s5']='$(^{o}C)$'
unitNames['pa1 apiii']='$(W)$'


#features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']

#features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']

NumFeatures=len(features)

df_list=[]
for i in range(NumberOfFailures+1):
    df_list.append([])

for i in range(NumberOfFailures+1):
    dftemp=[]
    for f in datafiles[i]:
        print("                 ", f)
        #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
        df1 = pd.read_csv('./data/'+f+'.csv')
        dftemp.append(df1)
    df_list[i]=pd.concat(dftemp)


# subsampled to 5'  =  30 * 10"
# We consider smaples every 5' because in production, we will only have data at this frequency
subsamplingrate=30

dataframe=[]
for i in range(NumberOfFailures+1):
    dataframe.append([])

for i in range(NumberOfFailures+1):
    datalength=df_list[i].shape[0]
    dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
    dataframe[i].reset_index(inplace=True,drop=True)
    dataframe[i].dropna(inplace=True)


# Train data is first 2/3 of data
# Test data is: last 1/3 of data 
dataTrain=[]
dataTest=[]
for i in range(NumberOfFailures+1):
    dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
    dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])

# Calculate means and stdev
a=dataTrain[0]
for i in range(1,NumberOfFailures+1):
    a=np.vstack((a,dataTrain[i]))

means=a.mean(axis=0) 
stdevs=a.std(axis=0)
def normalize2(train,test):
    return( (train-means)/stdevs, (test-means)/stdevs )

dataTrainNorm=[]
dataTestNorm=[]
for i in range(NumberOfFailures+1):
    dataTrainNorm.append([])
    dataTestNorm.append([])

for i in range(NumberOfFailures+1):
    (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])

def plotData():    
    fig, axes = plt.subplots(
        nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
    )
    for i in range(NumberOfFailures+1):
        axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+",  feature 0")
        axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+",  feature 1")
    #axes[1].legend()
    #axes[0].set_ylabel(features[0])
    #axes[1].set_ylabel(features[1])
    plt.show()

#plotData()
#exit(0)


NumFilters=64
KernelSize=7
DropOut=0.2
ThresholdFactor=1.4
def create_sequences(values, time_steps):
    output = []
    for i in range(len(values) - time_steps + 1):
        output.append(values[i : (i + time_steps)])
    return np.stack(output)

def AtLeastOneTrue(x):
    for i in range(NumFeatures):
        if x[i]:
            return True
    return False

def anomalyMetric(th,ts,testList):  # first of list is non failure data
    # FP, TP: false/true positive
    # TN, FN: true/false negative
    # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
    # Specificity: true negative ratio given  data is OK: TN/(TN+FP)
    # Accuracy: Rate of correct predictions:  (TN+TP)/(TN+TP+FP+FN)
    # Precision: Rate of positive results:  TP/(TP+FP)  
    # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
    # F2-score: predictive performance measure:  2*Specificity*Sensitity/(Specificity+Sensitity)
    x_test = create_sequences(testList[0],ts)
    x_test_pred = model.predict(x_test)
    test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
    anomalies = test_mae_loss > th
    count=0
    for i in range(anomalies.shape[0]):
        if AtLeastOneTrue(anomalies[i]):
            count+=1
    FP=count
    TN=anomalies.shape[0]-count
    count=0
    TP=np.zeros((NumberOfFailures))
    FN=np.zeros((NumberOfFailures))
    Sensitivity=np.zeros((NumberOfFailures))
    Precision=np.zeros((NumberOfFailures))
    for i in range(1,len(testList)):
        x_test = create_sequences(testList[i],ts)
        x_test_pred = model.predict(x_test)
        test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
        anomalies = test_mae_loss > th
        count=0
        for j in range(anomalies.shape[0]):
            if AtLeastOneTrue(anomalies[j]):
                count+=1
        TP[i-1] = count
        FN[i-1] = anomalies.shape[0]-count
        Sensitivity[i-1]=TP[i-1]/(TP[i-1]+FN[i-1])
        Precision[i-1]=TP[i-1]/(TP[i-1]+FP)
    GlobalSensitivity=TP.sum()/(TP.sum()+FN.sum())
    Specificity=TN/(TN+FP)
    Accuracy=(TN+TP.sum())/(TN+TP.sum()+FP+FN.sum())
    GlobalPrecision=TP.sum()/(TP.sum()+FP)
    F1Score= 2*GlobalPrecision*GlobalSensitivity/(GlobalPrecision+GlobalSensitivity)
    F2Score = 2*Specificity*GlobalSensitivity/(Specificity+GlobalSensitivity)
    print("Global Precision: ",GlobalPrecision)
    print("Precision: ",Precision)
    print("Global Sensitivity: ",GlobalSensitivity)
    print("Sensitivity: ",Sensitivity)
    #print("Specifity: ",Specificity)
    #print("Accuracy: ",Accuracy)
    print("F1Score: ",F1Score)
    #print("F2Score: ",F2Score)
    #print("FP: ",FP)
    #return Sensitivity+Specifity
    return F1Score

FScoreHash={}
threshold={}
def getFScore(timestep,datalist):
    FScoreHash[timestep]=[]
    tf=0.3
    while tf<8:
        th=threshold[timestep]*tf
        r=anomalyMetric(th,timestep,datalist)
        FScoreHash[timestep].append([tf,r])
        if tf<2:
            tf+=0.1
        else:
            tf+=0.5


def plotFScore(FS):
    plt.rcParams.update({'font.size': 16})
    fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 10), dpi=80, facecolor="w", edgecolor="k")
    for k in FS.keys():
        ar=np.array((FS[k]))
        axes.plot(ar[:,0],ar[:,1],label="$ns=$"+str(k),linewidth=3)
    axes.set_xlabel("Threshold factor ($tf$)")
    axes.set_ylabel("F1-Score")
    axes.legend()
    axes.grid()
    s='['
    for i in range(len(features)):
        s+=featureNames[features[i]]
        if i < len(features)-1:
            s+=', '
    s+=']'
    plt.title(s)
    plt.show()

def listToString(l):
    r=''
    for i in l:
        r+=str(i)
    return(r.replace(' ',''))

if options.train:   #  Train not needed to be changed
    for timesteps in range(4,21,4):
        x_train=[]
        for i in range(NumberOfFailures+1):
            x_train.append(create_sequences(dataTrainNorm[i],timesteps))

        model = keras.Sequential(
            [
                layers.Input(shape=(x_train[0].shape[1], x_train[0].shape[2])),
                layers.Conv1D(
                    filters=NumFilters,
                    kernel_size=KernelSize,
                    padding="same",
                    strides=2,
                    activation="relu",
                ),
                layers.Dropout(rate=DropOut),
                layers.Conv1D(
                    filters=int(NumFilters/2),
                    kernel_size=KernelSize,
                    padding="same",
                    strides=2,
                    activation="relu",
                ),
                layers.Conv1DTranspose(
                    filters=int(NumFilters/2),
                    kernel_size=KernelSize,
                    padding="same",
                    strides=2,
                    activation="relu",
                ),
                layers.Dropout(rate=DropOut),
                layers.Conv1DTranspose(
                    filters=NumFilters,
                    kernel_size=KernelSize,
                    padding="same",
                    strides=2,
                    activation="relu",
                ),
                layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"),
            ]
        )
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
        model.summary()
        path_checkpoint="model_noclass_v4_"+str(timesteps)+listToString(features)+"_checkpoint.weights.h5"
        es_callback=keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)
        modelckpt_callback=keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True,)

        history=model.fit( x_train[0], x_train[0], epochs=400, batch_size=128, validation_split=0.3, callbacks=[  es_callback, modelckpt_callback      ],)

        x_train_pred=model.predict(x_train[0])
        train_mae_loss=np.mean(np.abs(x_train_pred - x_train[0]), axis=1)
        threshold[timesteps]=np.max(train_mae_loss,axis=0)
    file = open('threshold_v4_'+listToString(features)+'.pk', 'wb')
    pickle.dump(threshold, file)
    file.close()
    exit(0)
else:
    file = open('threshold_v4_'+listToString(features)+'.pk', 'rb')
    threshold=pickle.load(file)
    file.close()


    x_train=[]
    for i in range(NumberOfFailures+1):
        x_train.append(create_sequences(dataTrainNorm[i],int(options.timesteps)))

    model = keras.Sequential(
        [
            layers.Input(shape=(x_train[0].shape[1], x_train[0].shape[2])),
            layers.Conv1D(
                filters=NumFilters,
                kernel_size=KernelSize,
                padding="same",
                strides=2,
                activation="relu",
            ),
            layers.Dropout(rate=DropOut),
            layers.Conv1D(
                filters=int(NumFilters/2),
                kernel_size=KernelSize,
                padding="same",
                strides=2,
                activation="relu",
            ),
            layers.Conv1DTranspose(
                filters=int(NumFilters/2),
                kernel_size=KernelSize,
                padding="same",
                strides=2,
                activation="relu",
            ),
            layers.Dropout(rate=DropOut),
            layers.Conv1DTranspose(
                filters=NumFilters,
                kernel_size=KernelSize,
                padding="same",
                strides=2,
                activation="relu",
            ),
            layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"),
        ]
    )
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    model.summary()

    
    if options.optimizetf:
        for timesteps in range(4,21,4):
            path_checkpoint="model_noclass_v4_"+str(timesteps)+listToString(features)+"_checkpoint.weights.h5"
            es_callback=keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)
            modelckpt_callback=keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True,)
            model.load_weights(path_checkpoint)
            getFScore(timesteps,[dataTestNorm[0],dataTrainNorm[1],dataTrainNorm[2],dataTrainNorm[3],dataTrainNorm[4]])
        file = open('FScore_v4_'+listToString(features)+'.pk', 'wb')
        pickle.dump(FScoreHash, file)
        file.close()


    path_checkpoint="model_noclass_v4_"+str(options.timesteps)+listToString(features)+"_checkpoint.weights.h5"
    es_callback=keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)
    modelckpt_callback=keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True,)
    model.load_weights(path_checkpoint)


    file = open('FScore_v4_'+listToString(features)+'.pk', 'rb')
    FS=pickle.load(file)
    file.close()


    #plotFScore(FS)
    #exit(0)

TIME_STEPS=int(options.timesteps)
#  1st scenario. Detect only anomaly.  Later, we will classiffy it
# Test data=  testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
#d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
# For Failure data, we can use Train data becasue not used for training and includes the firsts samples
#datalist=[dataTestNorm[0],dataTrainNorm[1],dataTrainNorm[2],dataTrainNorm[3],dataTrainNorm[4]]
datalist=[dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]]


x_test=create_sequences(datalist[0],int(options.timesteps))
for i in range(1,len(datalist)):
    x_test=np.vstack((x_test,create_sequences(datalist[i],int(options.timesteps))))
x_test_pred = model.predict(x_test)
test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)


# Define ranges for plotting in different colors
testRanges=[]
r=0
for i in range(len(datalist)):
    testRanges.append([r,r+datalist[i].shape[0]-int(options.timesteps)])
    r+=datalist[i].shape[0]-int(options.timesteps)

#r=dataTestNorm[0].shape[0]
#testRanges.append([0,r])
#for i in range(1,NumberOfFailures+1):
#    rnext=r+dataTrainNorm[i].shape[0]
#    testRanges.append([r,rnext] )
#    r=rnext



anomalies = test_mae_loss > threshold[int(options.timesteps)]*float(options.TF)
anomalous_data_indices = []
for i in range(anomalies.shape[0]):
    if AtLeastOneTrue(anomalies[i]):
    #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
        anomalous_data_indices.append(i)

# Let's plot some features

colorline=['violet','lightcoral','cyan','lime','grey']
colordot=['darkviolet','red','blue','green','black']

#featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii']
featuresToPlot=features

indexesToPlot=[]
for i in featuresToPlot:
    indexesToPlot.append(features.index(i))

def plotData3():
    NumFeaturesToPlot=len(indexesToPlot)
    plt.rcParams.update({'font.size': 16})
    fig, axes = plt.subplots(
        nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
    )
    for i in range(NumFeaturesToPlot):
        x=[]
        y=[]
        for k in anomalous_data_indices:
            if (k)<x_test.shape[0]:
                x.append(k)
                y.append(x_test[k,0,indexesToPlot[i]]*stdevs[i]+means[i])
        axes[i].plot(x,y ,color='black',marker='.',linewidth=0,label="Fail detection" )

        init=0
        end=testRanges[0][1]
        axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="No fail")
        init=end
        end+=(testRanges[1][1]-testRanges[1][0])
        for j in range(1,NumberOfFailures+1):
            axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Fail type "+str(j), color=colorline[j-1],linewidth=1)
            if j<NumberOfFailures:
                init=end
                end+=(testRanges[j+1][1]-testRanges[j+1][0])

        if i==(NumFeatures-1):
            axes[i].legend(loc='right')
        s=''
        s+=featureNames[features[indexesToPlot[i]]]
        s+=' '+unitNames[features[indexesToPlot[i]]]
        axes[i].set_ylabel(s)
        axes[i].grid()
    axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
    plt.show()


anomalyMetric(threshold[int(options.timesteps)]*float(options.TF), int(options.timesteps),datalist)


plotData3()


exit(0)



#  2nd scenario. Detect only anomaly.  Later, we will classiffy it
# Test data=  testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
#d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
num=100
d=np.vstack((dataTestNorm[0][0:num,:],dataTestNorm[1][0:num,:],dataTestNorm[0][num:2*num,:],dataTestNorm[2][70:70+num,:],dataTestNorm[0][2*num-90:3*num-90,:],dataTestNorm[3][50:num+50,:],dataTestNorm[0][150:150+num,:],dataTestNorm[4][0:num+TIME_STEPS,:]))

x_test = create_sequences(d,int(options.timesteps))
x_test_pred = model.predict(x_test)
test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)


anomalies = test_mae_loss > threshold[int(options.timesteps)]*float(options.TF)
anomalous_data_indices = []
for i in range(anomalies.shape[0]):
    if AtLeastOneTrue(anomalies[i]):
    #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
        anomalous_data_indices.append(i)

def plotData4():
    NumFeaturesToPlot=len(indexesToPlot)
    plt.rcParams.update({'font.size': 16})
    fig, axes = plt.subplots(
        nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
    )
    for i in range(NumFeaturesToPlot):
        for j in range(1,NumberOfFailures+1):
            if j==1:
                axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]],label="No fail", color='C0')
            else:
                axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]], color='C0')
            axes[i].plot(range(j*2*num-num,j*2*num),x_test[j*2*num-num:j*2*num,0,indexesToPlot[i]],label="File type "+str(j),color=colorline[j-1])
        x=[]
        y=[]
        for k in anomalous_data_indices:
            if (k+TIME_STEPS)<x_test.shape[0]:
                x.append(k+TIME_STEPS)
                y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
        axes[i].plot(x,y ,color='black',marker='.',linewidth=0,label="Fail detection" )

        if i==0:
            axes[i].legend(bbox_to_anchor=(0.9, 0.4))

        s=''
        s+=featureNames[features[indexesToPlot[i]]]
        axes[i].set_ylabel(s)
        axes[i].grid()
    axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
    plt.show()


plotData4()