Nessuna descrizione

v5_class.py 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. # Csar Fdez, UdL, 2025
  2. # Changes from v1: Normalization
  3. # IN v1, each failure type has its own normalization pars (mean and stdevs)
  4. # In v2, mean and stdev is the same for all data
  5. # v3.py trains the models looping in TIME_STEPS (4,8,12,16,20,24,....) finding the optimal Threshold factor
  6. # Derived from v3_class, derived from v3.py with code from v1_multifailure.py
  7. # This code don't train for multiple time steps !!
  8. # partial and total blocked condenser merged in one class.
  9. # Construction of train and test sets changed. Now is done by days
  10. import pandas as pd
  11. import matplotlib.pyplot as plt
  12. import datetime
  13. import numpy as np
  14. import keras
  15. import os.path
  16. from keras import layers
  17. from optparse import OptionParser
  18. import copy
  19. import pickle
  20. parser = OptionParser()
  21. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  22. parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS ", default=12)
  23. parser.add_option("-r", "--transition", dest="transition", help="Includes transition data (false)", default=False, action="store_true")
  24. parser.add_option("-p", "--plot", dest="plot", help="Only plot data (false)", default=False, action="store_true")
  25. #parser.add_option("-f", "--thresholdfactor", dest="TF", help="Threshold Factor ", default=1.4)
  26. # threshold makes no sense when classifying, becaues we apply many models and decide class for the less MSE
  27. (options, args) = parser.parse_args()
  28. # data files arrays. Index:
  29. # 0. No failure
  30. # 1. Blocked evaporator
  31. # 2. Full Blocked condenser
  32. # 3. Partial Blocked condenser
  33. # 4 Fan condenser not working
  34. # 5. Open door
  35. NumberOfFailures=4 # So far, we have only data for the first 4 types of failures
  36. datafiles=[[],[]] # 0 for train, 1 for test
  37. for i in range(NumberOfFailures+1):
  38. datafiles[0].append([])
  39. datafiles[1].append([])
  40. # Next set of ddata corresponds to Freezer, SP=-26
  41. datafiles[0][0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_']
  42. datafiles[0][1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_']
  43. datafiles[0][2]=['2024-12-18_5_','2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_']
  44. datafiles[0][3]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_']
  45. datafiles[0][4]=['2025-02-13_5_','2025-02-14_5_']
  46. if options.transition:
  47. datafiles[1][0]=['2025-01-27_5_','2025-01-28_5_']
  48. datafiles[1][1]=['2024-12-14_5_','2024-12-15_5_','2024-12-16_5_'] # with TRANSITION
  49. datafiles[1][2]=['2024-12-17_5_','2024-12-19_5_','2024-12-25_5_','2024-12-26_5_'] # with TRANSITION
  50. datafiles[1][3]=['2024-12-27_5_','2024-12-31_5_','2025-01-01_5_'] # with TRANSITION
  51. datafiles[1][4]=['2025-02-12_5_','2025-02-15_5_','2025-02-16_5_']
  52. else:
  53. datafiles[1][0]=['2025-01-27_5_','2025-01-28_5_']
  54. datafiles[1][1]=['2024-12-14_5_','2024-12-15_5_']
  55. datafiles[1][2]=['2024-12-19_5_','2024-12-25_5_','2024-12-26_5_']
  56. datafiles[1][3]=['2024-12-31_5_','2025-01-01_5_']
  57. datafiles[1][4]=['2025-02-15_5_','2025-02-16_5_']
  58. #datafiles[0][4]=['2025-02-05_5_']
  59. #datafiles[1][4]=['2025-02-05_5_']
  60. #r1s5 supply air flow temperature
  61. #r1s1 inlet evaporator temperature
  62. #r1s4 condenser outlet
  63. # VAriables r1s4 and pa1 apiii may not exists in cloud controlers
  64. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  65. features=['r1 s1','r1 s4','r1 s5']
  66. features=['r1 s5']
  67. # Feature combination suggested by AKO
  68. #features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  69. features=['r1 s1','r1 s4','r1 s5']
  70. #features=['r1 s1','r1 s5','pa1 apiii']
  71. #features=['r1 s5','pa1 apiii']
  72. #features=['r1 s1','r1 s5']
  73. #features=['r1 s5']
  74. featureNames={}
  75. featureNames['r1 s1']='$T_{evap}$'
  76. featureNames['r1 s4']='$T_{cond}$'
  77. featureNames['r1 s5']='$T_{air}$'
  78. featureNames['pa1 apiii']='$P_{elec}$'
  79. unitNames={}
  80. unitNames['r1 s1']='$(^{o}C)$'
  81. unitNames['r1 s4']='$(^{o}C)$'
  82. unitNames['r1 s5']='$(^{o}C)$'
  83. unitNames['pa1 apiii']='$(W)$'
  84. NumFeatures=len(features)
  85. df_list=[[],[]]
  86. for i in range(NumberOfFailures+1):
  87. df_list[0].append([])
  88. df_list[1].append([])
  89. for i in range(NumberOfFailures+1):
  90. dftemp=[]
  91. for f in datafiles[0][i]:
  92. print(" ", f)
  93. df1 = pd.read_csv('./data/'+f+'.csv')
  94. dftemp.append(df1)
  95. df_list[0][i]=pd.concat(dftemp)
  96. for i in range(NumberOfFailures+1):
  97. dftemp=[]
  98. for f in datafiles[1][i]:
  99. print(" ", f)
  100. df1 = pd.read_csv('./data/'+f+'.csv')
  101. dftemp.append(df1)
  102. df_list[1][i]=pd.concat(dftemp)
  103. # subsampled to 5' = 30 * 10"
  104. # We consider smaples every 5' because in production, we will only have data at this frequency
  105. subsamplingrate=30
  106. dataframe=[[],[]]
  107. for i in range(NumberOfFailures+1):
  108. dataframe[0].append([])
  109. dataframe[1].append([])
  110. for i in range(NumberOfFailures+1):
  111. datalength=df_list[0][i].shape[0]
  112. dataframe[0][i]=df_list[0][i].iloc[range(0,datalength,subsamplingrate)][features]
  113. dataframe[0][i].reset_index(inplace=True,drop=True)
  114. dataframe[0][i].dropna(inplace=True)
  115. for i in range(NumberOfFailures+1):
  116. datalength=df_list[1][i].shape[0]
  117. dataframe[1][i]=df_list[1][i].iloc[range(0,datalength,subsamplingrate)][features]
  118. dataframe[1][i].reset_index(inplace=True,drop=True)
  119. dataframe[1][i].dropna(inplace=True)
  120. # Train data i [0] and test data is [1]
  121. dataTrain=[]
  122. dataTest=[]
  123. for i in range(NumberOfFailures+1):
  124. dataTrain.append(dataframe[0][i])
  125. dataTest.append(dataframe[1][i])
  126. # Calculate means and stdev
  127. a=dataTrain[0]
  128. for i in range(1,NumberOfFailures+1):
  129. a=np.vstack((a,dataTrain[i]))
  130. means=a.mean(axis=0)
  131. stdevs=a.std(axis=0)
  132. def normalize2(train,test):
  133. return( (train-means)/stdevs, (test-means)/stdevs )
  134. dataTrainNorm=[]
  135. dataTestNorm=[]
  136. for i in range(NumberOfFailures+1):
  137. dataTrainNorm.append([])
  138. dataTestNorm.append([])
  139. for i in range(NumberOfFailures+1):
  140. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  141. NumFilters=64
  142. KernelSize=7
  143. DropOut=0.2
  144. def create_sequences(values, time_steps):
  145. output = []
  146. for i in range(len(values) - time_steps + 1):
  147. output.append(values[i : (i + time_steps)])
  148. return np.stack(output)
  149. def listToString(l):
  150. r=''
  151. for i in l:
  152. r+=str(i)
  153. return(r.replace(' ',''))
  154. model=[]
  155. modelckpt_callback =[]
  156. es_callback =[]
  157. path_checkpoint=[]
  158. timesteps=int(options.timesteps)
  159. x_train=[]
  160. for i in range(NumberOfFailures+1):
  161. x_train.append(create_sequences(dataTrainNorm[i],timesteps))
  162. model.append([])
  163. model[i] = keras.Sequential(
  164. [
  165. layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])),
  166. layers.Conv1D(
  167. filters=NumFilters,
  168. kernel_size=KernelSize,
  169. padding="same",
  170. strides=2,
  171. activation="relu",
  172. ),
  173. layers.Dropout(rate=DropOut),
  174. layers.Conv1D(
  175. filters=int(NumFilters/2),
  176. kernel_size=KernelSize,
  177. padding="same",
  178. strides=2,
  179. activation="relu",
  180. ),
  181. layers.Conv1DTranspose(
  182. filters=int(NumFilters/2),
  183. kernel_size=KernelSize,
  184. padding="same",
  185. strides=2,
  186. activation="relu",
  187. ),
  188. layers.Dropout(rate=DropOut),
  189. layers.Conv1DTranspose(
  190. filters=NumFilters,
  191. kernel_size=KernelSize,
  192. padding="same",
  193. strides=2,
  194. activation="relu",
  195. ),
  196. layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"),
  197. ]
  198. )
  199. model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
  200. model[i].summary()
  201. path_checkpoint.append("model_class_v5_"+str(i)+"_"+str(timesteps)+listToString(features)+"_checkpoint.weights.h5")
  202. es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15))
  203. modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,))
  204. if options.train:
  205. history=[]
  206. for i in range(NumberOfFailures+1):
  207. history.append(model[i].fit( x_train[i], x_train[i], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback[i], modelckpt_callback[i] ],))
  208. x_train_pred=model[i].predict(x_train[i])
  209. else:
  210. for i in range(NumberOfFailures+1):
  211. model[i].load_weights(path_checkpoint[i])
  212. # Let's plot some features
  213. colorline=['black','violet','lightcoral','cyan','lime','grey']
  214. colordot=['grey','darkviolet','red','blue','green','black']
  215. #featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii']
  216. featuresToPlot=features
  217. indexesToPlot=[]
  218. for i in featuresToPlot:
  219. indexesToPlot.append(features.index(i))
  220. def plotData():
  221. NumFeaturesToPlot=len(indexesToPlot)
  222. plt.rcParams.update({'font.size': 16})
  223. fig, axes = plt.subplots(
  224. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  225. )
  226. for i in range(NumFeaturesToPlot):
  227. init=0
  228. end=testRanges[0][1]
  229. for j in range(NumberOfFailures+1):
  230. if NumFeaturesToPlot==1:
  231. axes.plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
  232. else:
  233. axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
  234. if j<NumberOfFailures:
  235. init=end
  236. end+=(testRanges[j+1][1]-testRanges[j+1][0])
  237. s=''
  238. s+=featureNames[features[indexesToPlot[i]]]
  239. s+=' '+unitNames[features[indexesToPlot[i]]]
  240. if NumFeaturesToPlot==1:
  241. axes.set_ylabel(s)
  242. axes.grid()
  243. else:
  244. axes[i].set_ylabel(s)
  245. axes[i].grid()
  246. if NumFeaturesToPlot==1:
  247. axes.legend(ncol=4,loc=(0.1,0.98))
  248. else:
  249. axes[0].legend(ncol=4,loc=(0.1,0.98))
  250. plt.show()
  251. # 2nd scenario. Go over anomalies and classify it by less error
  252. #datalist=[dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3]]
  253. datalist=[dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]]
  254. x_test=create_sequences(datalist[0],int(options.timesteps))
  255. for i in range(1,len(datalist)):
  256. x_test=np.vstack((x_test,create_sequences(datalist[i],int(options.timesteps))))
  257. # Define ranges for plotting in different colors
  258. testRanges=[]
  259. r=0
  260. for i in range(len(datalist)):
  261. testRanges.append([r,r+datalist[i].shape[0]-int(options.timesteps)+1])
  262. r+=datalist[i].shape[0]-int(options.timesteps)+1
  263. if options.plot:
  264. # Only plot data and exit
  265. plotData()
  266. exit(0)
  267. testClasses=[0,1,2,3,4]
  268. if not len(testClasses)==len(testRanges):
  269. print("ERROR: testClasses and testRanges must have same length")
  270. exit(0)
  271. x_test_predict=[]
  272. for m in range(NumberOfFailures+1):
  273. x_test_predict.append(model[m].predict(x_test))
  274. x_test_predict=np.array((x_test_predict))
  275. test_mae_loss =[]
  276. for m in range(NumberOfFailures+1):
  277. test_mae_loss.append(np.mean(np.abs(x_test_predict[m,:,:,:] - x_test), axis=1))
  278. test_mae_loss=np.array((test_mae_loss))
  279. test_mae_loss_average=np.mean(test_mae_loss,axis=2) # average over features
  280. classes=np.argmin(test_mae_loss_average,axis=0) # Choose the minimum loss
  281. x=[]
  282. y=[]
  283. for j in range(NumberOfFailures+1):
  284. x.append([])
  285. y.append([])
  286. for j in range(NumberOfFailures+1):
  287. for k in range(testRanges[j][0],testRanges[j][1]):
  288. if not classes[k]==testClasses[j]:
  289. x[classes[k]].append(k)
  290. y[classes[k]].append(x_test[k,0,indexesToPlot[0]]*stdevs[0]+means[0])
  291. def plotData4():
  292. NumFeaturesToPlot=len(indexesToPlot)
  293. plt.rcParams.update({'font.size': 16})
  294. fig, axes = plt.subplots(
  295. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  296. )
  297. for i in range(NumFeaturesToPlot):
  298. init=0
  299. end=testRanges[0][1]
  300. for j in range(NumberOfFailures+1):
  301. if NumFeaturesToPlot==1:
  302. axes.plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
  303. else:
  304. axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
  305. if j<NumberOfFailures:
  306. init=end
  307. end+=(testRanges[j+1][1]-testRanges[j+1][0])
  308. #if i==0:
  309. # axes[0].plot(x[j],y[j] ,color=colordot[j],marker='.',markersize=10,linewidth=0,label="Fail detect class "+str(j) )
  310. s=''
  311. s+=featureNames[features[indexesToPlot[i]]]
  312. s+=' '+unitNames[features[indexesToPlot[i]]]
  313. if NumFeaturesToPlot==1:
  314. axes.set_ylabel(s)
  315. axes.set_xlabel('Sample number')
  316. axes.grid()
  317. else:
  318. axes[i].set_ylabel(s)
  319. axes[NumFeaturesToPlot-1].set_xlabel('Sample number')
  320. axes[i].grid()
  321. for j in range(NumberOfFailures+1):
  322. if NumFeaturesToPlot==1:
  323. axes.plot(x[j],y[j] ,color=colordot[j],marker='.',markersize=10,linewidth=0,label="Fail detect class "+str(j) )
  324. else:
  325. axes[0].plot(x[j],y[j] ,color=colordot[j],marker='.',markersize=10,linewidth=0,label="Fail detect class "+str(j) )
  326. if NumFeaturesToPlot==1:
  327. axes.legend(ncol=4,loc=(0.1,0.98))
  328. else:
  329. axes[0].legend(ncol=4,loc=(0.1,0.98))
  330. #axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  331. plt.show()
  332. def whichClass(k,ranges):
  333. for i in range(NumberOfFailures+1):
  334. if k in range(ranges[i][0],ranges[i][1]):
  335. return(i)
  336. print("Error: Class not exists")
  337. exit(0)
  338. ## implemenent anomaly metrics for each failure class
  339. def anomalyMetric(classes,testranges,testclasses):
  340. # FP, TP: false/true positive
  341. # TN, FN: true/false negative
  342. # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
  343. # Precision: Rate of positive results: TP/(TP+FP)
  344. # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
  345. TP=np.zeros(NumberOfFailures+1)
  346. FP=np.zeros(NumberOfFailures+1)
  347. FN=np.zeros(NumberOfFailures+1)
  348. Sensitivity=np.zeros(NumberOfFailures+1)
  349. Precision=np.zeros(NumberOfFailures+1)
  350. for i in range(len(testranges)):
  351. for k in range(testranges[i][0],testranges[i][1]):
  352. if classes[k]==testclasses[i]:
  353. TP[i]+=1
  354. else:
  355. FP[i]+=1
  356. for k in range(testranges[NumberOfFailures][1]):
  357. for i in range(len(testranges)):
  358. classK=whichClass(k,testranges)
  359. if not classK==testClasses[i]:
  360. if not classes[k]==classK:
  361. FN[classes[k]]+=1
  362. for i in range(NumberOfFailures+1):
  363. if (TP[i]+FN[i])>0:
  364. Sensitivity[i]=TP[i]/(TP[i]+FN[i])
  365. else:
  366. Sensitivity[i]=0
  367. Precision[i]=TP[i]/(TP[i]+FP[i])
  368. S=Sensitivity.mean()
  369. P=Precision.mean()
  370. F1=2*S*P/(S+P)
  371. print("Sensitivity: ",Sensitivity)
  372. print("S: ",S)
  373. print("Precision: ",Precision)
  374. print("P: ",P)
  375. print("F1-Score: ",F1)
  376. anomalyMetric(classes,testRanges,testClasses)
  377. # Compute delay until correct detection for a list of ranges (when transition data exists)
  378. def computeDelay(l,classes,testRanges,testClasses):
  379. d=np.zeros(len(l))
  380. NoFailsInARow=4
  381. ind=0
  382. for i in l:
  383. start=testRanges[i][0]
  384. count=0
  385. while start<testRanges[i][1]:
  386. if classes[start]==testClasses[i]:
  387. count+=1
  388. if count==NoFailsInARow or start==(testRanges[i][1]-1):
  389. count=0
  390. #print(start,start-testRanges[i][0]-NoFailsInARow+timesteps)
  391. d[ind]=start-testRanges[i][0]-NoFailsInARow+timesteps
  392. break
  393. start+=1
  394. ind+=1
  395. print(d)
  396. return(d.mean())
  397. d=computeDelay([2,3,4],classes,testRanges,testClasses)
  398. print("Delay: ",d)
  399. plotData4()

Powered by TurnKey Linux.