Sin descripción

v5_class.py 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. # Csar Fdez, UdL, 2025
  2. # Changes from v1: Normalization
  3. # IN v1, each failure type has its own normalization pars (mean and stdevs)
  4. # In v2, mean and stdev is the same for all data
  5. # v3.py trains the models looping in TIME_STEPS (4,8,12,16,20,24,....) finding the optimal Threshold factor
  6. # Derived from v3_class, derived from v3.py with code from v1_multifailure.py
  7. # This code don't train for multiple time steps !!
  8. # partial and total blocked condenser merged in one class.
  9. # Construction of train and test sets changed. Now is done by days
  10. import pandas as pd
  11. import matplotlib.pyplot as plt
  12. import datetime
  13. import numpy as np
  14. import keras
  15. import os.path
  16. from keras import layers
  17. from optparse import OptionParser
  18. import copy
  19. import pickle
  20. parser = OptionParser()
  21. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  22. parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS ", default=12)
  23. parser.add_option("-r", "--transition", dest="transition", help="Includes transition data (false)", default=False, action="store_true")
  24. #parser.add_option("-f", "--thresholdfactor", dest="TF", help="Threshold Factor ", default=1.4)
  25. # threshold makes no sense when classifying, becaues we apply many models and decide class for the less MSE
  26. (options, args) = parser.parse_args()
  27. # data files arrays. Index:
  28. # 0. No failure
  29. # 1. Blocked evaporator
  30. # 2. Full Blocked condenser
  31. # 3. Partial Blocked condenser
  32. # 4 Fan condenser not working
  33. # 5. Open door
  34. NumberOfFailures=3 # So far, we have only data for the first 4 types of failures
  35. datafiles=[[],[]] # 0 for train, 1 for test
  36. for i in range(NumberOfFailures+1):
  37. datafiles[0].append([])
  38. datafiles[1].append([])
  39. # Next set of ddata corresponds to Freezer, SP=-26
  40. datafiles[0][0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_']
  41. datafiles[0][1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_']
  42. datafiles[0][2]=['2024-12-18_5_','2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_']
  43. datafiles[0][3]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_']
  44. if options.transition:
  45. datafiles[1][0]=['2025-01-27_5_','2025-01-28_5_']
  46. datafiles[1][1]=['2024-12-14_5_','2024-12-15_5_','2024-12-16_5_'] # with TRANSITION
  47. datafiles[1][2]=['2024-12-17_5_','2024-12-19_5_','2024-12-25_5_','2024-12-26_5_'] # with TRANSITION
  48. datafiles[1][3]=['2024-12-27_5_','2024-12-31_5_','2025-01-01_5_'] # with TRANSITION
  49. else:
  50. datafiles[1][0]=['2025-01-27_5_','2025-01-28_5_']
  51. datafiles[1][1]=['2024-12-14_5_','2024-12-15_5_']
  52. datafiles[1][2]=['2024-12-19_5_','2024-12-25_5_','2024-12-26_5_']
  53. datafiles[1][3]=['2024-12-31_5_','2025-01-01_5_']
  54. #datafiles[0][4]=['2025-02-05_5_']
  55. #datafiles[1][4]=['2025-02-05_5_']
  56. #r1s5 supply air flow temperature
  57. #r1s1 inlet evaporator temperature
  58. #r1s4 condenser outlet
  59. # VAriables r1s4 and pa1 apiii may not exists in cloud controlers
  60. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  61. features=['r1 s1','r1 s4','r1 s5']
  62. features=['r1 s5']
  63. # Feature combination suggested by AKO
  64. #features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  65. features=['r1 s1','r1 s4','r1 s5']
  66. #features=['r1 s1','r1 s5','pa1 apiii']
  67. #features=['r1 s5','pa1 apiii']
  68. #features=['r1 s1','r1 s5']
  69. #features=['r1 s5']
  70. featureNames={}
  71. featureNames['r1 s1']='$T_{evap}$'
  72. featureNames['r1 s4']='$T_{cond}$'
  73. featureNames['r1 s5']='$T_{air}$'
  74. featureNames['pa1 apiii']='$P_{elec}$'
  75. unitNames={}
  76. unitNames['r1 s1']='$(^{o}C)$'
  77. unitNames['r1 s4']='$(^{o}C)$'
  78. unitNames['r1 s5']='$(^{o}C)$'
  79. unitNames['pa1 apiii']='$(W)$'
  80. NumFeatures=len(features)
  81. df_list=[[],[]]
  82. for i in range(NumberOfFailures+1):
  83. df_list[0].append([])
  84. df_list[1].append([])
  85. for i in range(NumberOfFailures+1):
  86. dftemp=[]
  87. for f in datafiles[0][i]:
  88. print(" ", f)
  89. df1 = pd.read_csv('./data/'+f+'.csv')
  90. dftemp.append(df1)
  91. df_list[0][i]=pd.concat(dftemp)
  92. for i in range(NumberOfFailures+1):
  93. dftemp=[]
  94. for f in datafiles[1][i]:
  95. print(" ", f)
  96. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  97. df1 = pd.read_csv('./data/'+f+'.csv')
  98. dftemp.append(df1)
  99. df_list[1][i]=pd.concat(dftemp)
  100. # subsampled to 5' = 30 * 10"
  101. # We consider smaples every 5' because in production, we will only have data at this frequency
  102. subsamplingrate=30
  103. dataframe=[[],[]]
  104. for i in range(NumberOfFailures+1):
  105. dataframe[0].append([])
  106. dataframe[1].append([])
  107. for i in range(NumberOfFailures+1):
  108. datalength=df_list[0][i].shape[0]
  109. dataframe[0][i]=df_list[0][i].iloc[range(0,datalength,subsamplingrate)][features]
  110. dataframe[0][i].reset_index(inplace=True,drop=True)
  111. dataframe[0][i].dropna(inplace=True)
  112. for i in range(NumberOfFailures+1):
  113. datalength=df_list[1][i].shape[0]
  114. dataframe[1][i]=df_list[1][i].iloc[range(0,datalength,subsamplingrate)][features]
  115. dataframe[1][i].reset_index(inplace=True,drop=True)
  116. dataframe[1][i].dropna(inplace=True)
  117. # Train data is first 2/3 of data. Not exactly. L
  118. # Test data is: last 1/3 of data
  119. dataTrain=[]
  120. dataTest=[]
  121. for i in range(NumberOfFailures+1):
  122. dataTrain.append(dataframe[0][i])
  123. dataTest.append(dataframe[1][i])
  124. # Calculate means and stdev
  125. a=dataTrain[0]
  126. for i in range(1,NumberOfFailures+1):
  127. a=np.vstack((a,dataTrain[i]))
  128. means=a.mean(axis=0)
  129. stdevs=a.std(axis=0)
  130. def normalize2(train,test):
  131. return( (train-means)/stdevs, (test-means)/stdevs )
  132. dataTrainNorm=[]
  133. dataTestNorm=[]
  134. for i in range(NumberOfFailures+1):
  135. dataTrainNorm.append([])
  136. dataTestNorm.append([])
  137. for i in range(NumberOfFailures+1):
  138. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  139. NumFilters=64
  140. KernelSize=7
  141. DropOut=0.2
  142. def create_sequences(values, time_steps):
  143. output = []
  144. for i in range(len(values) - time_steps + 1):
  145. output.append(values[i : (i + time_steps)])
  146. return np.stack(output)
  147. def listToString(l):
  148. r=''
  149. for i in l:
  150. r+=str(i)
  151. return(r.replace(' ',''))
  152. model=[]
  153. modelckpt_callback =[]
  154. es_callback =[]
  155. path_checkpoint=[]
  156. timesteps=int(options.timesteps)
  157. x_train=[]
  158. for i in range(NumberOfFailures+1):
  159. x_train.append(create_sequences(dataTrainNorm[i],timesteps))
  160. model.append([])
  161. model[i] = keras.Sequential(
  162. [
  163. layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])),
  164. layers.Conv1D(
  165. filters=NumFilters,
  166. kernel_size=KernelSize,
  167. padding="same",
  168. strides=2,
  169. activation="relu",
  170. ),
  171. layers.Dropout(rate=DropOut),
  172. layers.Conv1D(
  173. filters=int(NumFilters/2),
  174. kernel_size=KernelSize,
  175. padding="same",
  176. strides=2,
  177. activation="relu",
  178. ),
  179. layers.Conv1DTranspose(
  180. filters=int(NumFilters/2),
  181. kernel_size=KernelSize,
  182. padding="same",
  183. strides=2,
  184. activation="relu",
  185. ),
  186. layers.Dropout(rate=DropOut),
  187. layers.Conv1DTranspose(
  188. filters=NumFilters,
  189. kernel_size=KernelSize,
  190. padding="same",
  191. strides=2,
  192. activation="relu",
  193. ),
  194. layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"),
  195. ]
  196. )
  197. model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
  198. model[i].summary()
  199. path_checkpoint.append("model_class_v5_"+str(i)+"_"+str(timesteps)+listToString(features)+"_checkpoint.weights.h5")
  200. es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15))
  201. modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,))
  202. if options.train:
  203. history=[]
  204. for i in range(NumberOfFailures+1):
  205. history.append(model[i].fit( x_train[i], x_train[i], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback[i], modelckpt_callback[i] ],))
  206. x_train_pred=model[i].predict(x_train[i])
  207. else:
  208. for i in range(NumberOfFailures+1):
  209. model[i].load_weights(path_checkpoint[i])
  210. # Let's plot some features
  211. colorline=['black','violet','lightcoral','cyan','lime','grey']
  212. colordot=['grey','darkviolet','red','blue','green','black']
  213. #featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii']
  214. featuresToPlot=features
  215. indexesToPlot=[]
  216. for i in featuresToPlot:
  217. indexesToPlot.append(features.index(i))
  218. # 2nd scenario. Go over anomalies and classify it by less error
  219. datalist=[dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3]]
  220. x_test=create_sequences(datalist[0],int(options.timesteps))
  221. for i in range(1,len(datalist)):
  222. x_test=np.vstack((x_test,create_sequences(datalist[i],int(options.timesteps))))
  223. # Define ranges for plotting in different colors
  224. testRanges=[]
  225. r=0
  226. for i in range(len(datalist)):
  227. testRanges.append([r,r+datalist[i].shape[0]-int(options.timesteps)+1])
  228. r+=datalist[i].shape[0]-int(options.timesteps)+1
  229. testClasses=[0,1,2,3]
  230. if not len(testClasses)==len(testRanges):
  231. print("ERROR: testClasses and testRanges must have same length")
  232. exit(0)
  233. x_test_predict=[]
  234. for m in range(NumberOfFailures+1):
  235. x_test_predict.append(model[m].predict(x_test))
  236. x_test_predict=np.array((x_test_predict))
  237. test_mae_loss =[]
  238. for m in range(NumberOfFailures+1):
  239. test_mae_loss.append(np.mean(np.abs(x_test_predict[m,:,:,:] - x_test), axis=1))
  240. test_mae_loss=np.array((test_mae_loss))
  241. test_mae_loss_average=np.mean(test_mae_loss,axis=2) # average over features
  242. classes=np.argmin(test_mae_loss_average,axis=0)
  243. x=[]
  244. y=[]
  245. for j in range(NumberOfFailures+1):
  246. x.append([])
  247. y.append([])
  248. for j in range(NumberOfFailures+1):
  249. for k in range(testRanges[j][0],testRanges[j][1]):
  250. if not classes[k]==testClasses[j]:
  251. x[classes[k]].append(k)
  252. y[classes[k]].append(x_test[k,0,indexesToPlot[0]]*stdevs[0]+means[0])
  253. def plotData4():
  254. NumFeaturesToPlot=len(indexesToPlot)
  255. plt.rcParams.update({'font.size': 16})
  256. fig, axes = plt.subplots(
  257. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  258. )
  259. for i in range(NumFeaturesToPlot):
  260. init=0
  261. end=testRanges[0][1]
  262. for j in range(NumberOfFailures+1):
  263. if NumFeaturesToPlot==1:
  264. axes.plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
  265. else:
  266. axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
  267. if j<NumberOfFailures:
  268. init=end
  269. end+=(testRanges[j+1][1]-testRanges[j+1][0])
  270. #if i==0:
  271. # axes[0].plot(x[j],y[j] ,color=colordot[j],marker='.',markersize=10,linewidth=0,label="Fail detect class "+str(j) )
  272. s=''
  273. s+=featureNames[features[indexesToPlot[i]]]
  274. s+=' '+unitNames[features[indexesToPlot[i]]]
  275. if NumFeaturesToPlot==1:
  276. axes.set_ylabel(s)
  277. axes.grid()
  278. else:
  279. axes[i].set_ylabel(s)
  280. axes[i].grid()
  281. for j in range(NumberOfFailures+1):
  282. if NumFeaturesToPlot==1:
  283. axes.plot(x[j],y[j] ,color=colordot[j],marker='.',markersize=10,linewidth=0,label="Fail detect class "+str(j) )
  284. else:
  285. axes[0].plot(x[j],y[j] ,color=colordot[j],marker='.',markersize=10,linewidth=0,label="Fail detect class "+str(j) )
  286. if NumFeaturesToPlot==1:
  287. axes.legend(ncol=4,loc=(0.1,0.98))
  288. else:
  289. axes[0].legend(ncol=4,loc=(0.1,0.98))
  290. #axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  291. plt.show()
  292. def whichClass(k,ranges):
  293. for i in range(NumberOfFailures+1):
  294. if k in range(ranges[i][0],ranges[i][1]):
  295. return(i)
  296. print("Error: Class not exists")
  297. exit(0)
  298. ## implemenent anomaly metrics for each failure class
  299. def anomalyMetric(classes,testranges,testclasses):
  300. # FP, TP: false/true positive
  301. # TN, FN: true/false negative
  302. # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
  303. # Precision: Rate of positive results: TP/(TP+FP)
  304. # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
  305. TP=np.zeros(NumberOfFailures+1)
  306. FP=np.zeros(NumberOfFailures+1)
  307. FN=np.zeros(NumberOfFailures+1)
  308. Sensitivity=np.zeros(NumberOfFailures+1)
  309. Precision=np.zeros(NumberOfFailures+1)
  310. for i in range(len(testranges)):
  311. for k in range(testranges[i][0],testranges[i][1]):
  312. if classes[k]==testclasses[i]:
  313. TP[i]+=1
  314. else:
  315. FP[i]+=1
  316. for k in range(testranges[NumberOfFailures][1]):
  317. for i in range(len(testranges)):
  318. classK=whichClass(k,testranges)
  319. if not classK==testClasses[i]:
  320. if not classes[k]==classK:
  321. FN[classes[k]]+=1
  322. for i in range(NumberOfFailures+1):
  323. Sensitivity[i]=TP[i]/(TP[i]+FN[i])
  324. Precision[i]=TP[i]/(TP[i]+FP[i])
  325. S=Sensitivity.mean()
  326. P=Precision.mean()
  327. F1=2*S*P/(S+P)
  328. print("Sensitivity: ",Sensitivity)
  329. print("S: ",S)
  330. print("Precision: ",Precision)
  331. print("P: ",P)
  332. print("F1-Score: ",F1)
  333. anomalyMetric(classes,testRanges,testClasses)
  334. plotData4()

Powered by TurnKey Linux.