説明なし

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. # Csar Fdez, UdL, 2025
  2. # Changes from v1: Normalization
  3. # IN v1, each failure type has its own normalization pars (mean and stdevs)
  4. # In v2, mean and stdev is the same for all data
  5. # v3.py trains the models looping in TIME_STEPS (4,8,12,16,20,24,....) finding the optimal Threshold factor
  6. # Derived from v3_class, derived from v3.py with code from v1_multifailure.py
  7. # This code don't train for multiple time steps !!
  8. import pandas as pd
  9. import matplotlib.pyplot as plt
  10. import datetime
  11. import numpy as np
  12. import keras
  13. import os.path
  14. from keras import layers
  15. from optparse import OptionParser
  16. import copy
  17. import pickle
  18. parser = OptionParser()
  19. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  20. parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS ", default=12)
  21. #parser.add_option("-f", "--thresholdfactor", dest="TF", help="Threshold Factor ", default=1.4)
  22. # threshold makes no sense when classifying, becaues we apply many models and decide class for the less MSE
  23. (options, args) = parser.parse_args()
  24. # data files arrays. Index:
  25. # 0. No failure
  26. # 1. Blocked evaporator
  27. # 2. Full Blocked condenser
  28. # 3. Partial Blocked condenser
  29. # 4 Fan condenser not working
  30. # 5. Open door
  31. NumberOfFailures=4 # So far, we have only data for the first 4 types of failures
  32. datafiles=[]
  33. for i in range(NumberOfFailures+1):
  34. datafiles.append([])
  35. # Next set of ddata corresponds to Freezer, SP=-26
  36. datafiles[0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_','2025-01-27_5_','2025-01-28_5_']
  37. datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  38. #datafiles[1]=['2024-12-17_5_','2024-12-16_5_','2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] # This have transitions
  39. datafiles[2]=['2024-12-18_5_','2024-12-19_5_']
  40. datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_']
  41. datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  42. #datafiles[4]=['2024-12-27_5_','2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] # This have transitions
  43. #datafiles[4]=[]
  44. # Features suggested by Xavier
  45. # Care with 'tc s3' because on datafiles[0] is always nulll
  46. # Seems to be incoropored in new tests
  47. #r1s5 supply air flow temperature
  48. #r1s1 inlet evaporator temperature
  49. #r1s4 condenser outlet
  50. # VAriables r1s4 and pa1 apiii may not exists in cloud controlers
  51. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  52. features=['r1 s1','r1 s4','r1 s5']
  53. features=['r1 s5']
  54. # Feature combination suggested by AKO
  55. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  56. features=['r1 s1','r1 s4','r1 s5']
  57. features=['r1 s1','r1 s5','pa1 apiii']
  58. features=['r1 s5','pa1 apiii']
  59. features=['r1 s1','r1 s5']
  60. features=['r1 s5']
  61. featureNames={}
  62. featureNames['r1 s1']='$T_{evap}$'
  63. featureNames['r1 s4']='$T_{cond}$'
  64. featureNames['r1 s5']='$T_{air}$'
  65. featureNames['pa1 apiii']='$P_{elec}$'
  66. unitNames={}
  67. unitNames['r1 s1']='$(^{o}C)$'
  68. unitNames['r1 s4']='$(^{o}C)$'
  69. unitNames['r1 s5']='$(^{o}C)$'
  70. unitNames['pa1 apiii']='$(W)$'
  71. #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
  72. #features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']
  73. NumFeatures=len(features)
  74. df_list=[]
  75. for i in range(NumberOfFailures+1):
  76. df_list.append([])
  77. for i in range(NumberOfFailures+1):
  78. dftemp=[]
  79. for f in datafiles[i]:
  80. print(" ", f)
  81. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  82. df1 = pd.read_csv('./data/'+f+'.csv')
  83. dftemp.append(df1)
  84. df_list[i]=pd.concat(dftemp)
  85. # subsampled to 5' = 30 * 10"
  86. # We consider smaples every 5' because in production, we will only have data at this frequency
  87. subsamplingrate=30
  88. dataframe=[]
  89. for i in range(NumberOfFailures+1):
  90. dataframe.append([])
  91. for i in range(NumberOfFailures+1):
  92. datalength=df_list[i].shape[0]
  93. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  94. dataframe[i].reset_index(inplace=True,drop=True)
  95. dataframe[i].dropna(inplace=True)
  96. # Train data is first 2/3 of data
  97. # Test data is: last 1/3 of data
  98. dataTrain=[]
  99. dataTest=[]
  100. for i in range(NumberOfFailures+1):
  101. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  102. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  103. # Calculate means and stdev
  104. a=dataTrain[0]
  105. for i in range(1,NumberOfFailures+1):
  106. a=np.vstack((a,dataTrain[i]))
  107. means=a.mean(axis=0)
  108. stdevs=a.std(axis=0)
  109. def normalize2(train,test):
  110. return( (train-means)/stdevs, (test-means)/stdevs )
  111. dataTrainNorm=[]
  112. dataTestNorm=[]
  113. for i in range(NumberOfFailures+1):
  114. dataTrainNorm.append([])
  115. dataTestNorm.append([])
  116. for i in range(NumberOfFailures+1):
  117. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  118. def plotData():
  119. fig, axes = plt.subplots(
  120. nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  121. )
  122. for i in range(NumberOfFailures+1):
  123. axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0")
  124. axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1")
  125. #axes[1].legend()
  126. #axes[0].set_ylabel(features[0])
  127. #axes[1].set_ylabel(features[1])
  128. plt.show()
  129. #plotData()
  130. #exit(0)
  131. NumFilters=64
  132. KernelSize=7
  133. DropOut=0.2
  134. ThresholdFactor=1.4
  135. def create_sequences(values, time_steps):
  136. output = []
  137. for i in range(len(values) - time_steps + 1):
  138. output.append(values[i : (i + time_steps)])
  139. return np.stack(output)
  140. def listToString(l):
  141. r=''
  142. for i in l:
  143. r+=str(i)
  144. return(r.replace(' ',''))
  145. model=[]
  146. modelckpt_callback =[]
  147. es_callback =[]
  148. path_checkpoint=[]
  149. timesteps=int(options.timesteps)
  150. x_train=[]
  151. for i in range(NumberOfFailures+1):
  152. x_train.append(create_sequences(dataTrainNorm[i],timesteps))
  153. model.append([])
  154. model[i] = keras.Sequential(
  155. [
  156. layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])),
  157. layers.Conv1D(
  158. filters=NumFilters,
  159. kernel_size=KernelSize,
  160. padding="same",
  161. strides=2,
  162. activation="relu",
  163. ),
  164. layers.Dropout(rate=DropOut),
  165. layers.Conv1D(
  166. filters=int(NumFilters/2),
  167. kernel_size=KernelSize,
  168. padding="same",
  169. strides=2,
  170. activation="relu",
  171. ),
  172. layers.Conv1DTranspose(
  173. filters=int(NumFilters/2),
  174. kernel_size=KernelSize,
  175. padding="same",
  176. strides=2,
  177. activation="relu",
  178. ),
  179. layers.Dropout(rate=DropOut),
  180. layers.Conv1DTranspose(
  181. filters=NumFilters,
  182. kernel_size=KernelSize,
  183. padding="same",
  184. strides=2,
  185. activation="relu",
  186. ),
  187. layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"),
  188. ]
  189. )
  190. model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
  191. model[i].summary()
  192. path_checkpoint.append("model_class_v4_"+str(i)+"_"+str(timesteps)+listToString(features)+"_checkpoint.weights.h5")
  193. es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15))
  194. modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,))
  195. if options.train:
  196. history=[]
  197. for i in range(NumberOfFailures+1):
  198. history.append(model[i].fit( x_train[i], x_train[i], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback[i], modelckpt_callback[i] ],))
  199. x_train_pred=model[i].predict(x_train[i])
  200. else:
  201. for i in range(NumberOfFailures+1):
  202. model[i].load_weights(path_checkpoint[i])
  203. # Let's plot some features
  204. colorline=['black','violet','lightcoral','cyan','lime','grey']
  205. colordot=['grey','darkviolet','red','blue','green','black']
  206. #featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii']
  207. featuresToPlot=features
  208. indexesToPlot=[]
  209. for i in featuresToPlot:
  210. indexesToPlot.append(features.index(i))
  211. # 2nd scenario. Go over anomalies and classify it by less error
  212. datalist=[dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]]
  213. x_test=create_sequences(datalist[0],int(options.timesteps))
  214. for i in range(1,len(datalist)):
  215. x_test=np.vstack((x_test,create_sequences(datalist[i],int(options.timesteps))))
  216. # Define ranges for plotting in different colors
  217. testRanges=[]
  218. r=0
  219. for i in range(len(datalist)):
  220. testRanges.append([r,r+datalist[i].shape[0]-int(options.timesteps)])
  221. r+=datalist[i].shape[0]-int(options.timesteps)
  222. testClasses=[0,1,2,3,4]
  223. if not len(testClasses)==len(testRanges):
  224. print("ERROR: testClasses and testRanges must have same length")
  225. exit(0)
  226. x_test_predict=[]
  227. for m in range(NumberOfFailures+1):
  228. x_test_predict.append(model[m].predict(x_test))
  229. x_test_predict=np.array((x_test_predict))
  230. test_mae_loss =[]
  231. for m in range(NumberOfFailures+1):
  232. test_mae_loss.append(np.mean(np.abs(x_test_predict[m,:,:,:] - x_test), axis=1))
  233. test_mae_loss=np.array((test_mae_loss))
  234. test_mae_loss_average=np.mean(test_mae_loss,axis=2) # average over features
  235. classes=np.argmin(test_mae_loss_average,axis=0)
  236. x=[]
  237. y=[]
  238. for j in range(NumberOfFailures+1):
  239. x.append([])
  240. y.append([])
  241. for j in range(NumberOfFailures+1):
  242. for k in range(testRanges[j][0],testRanges[j][1]):
  243. if not classes[k]==testClasses[j]:
  244. x[classes[k]].append(k)
  245. y[classes[k]].append(x_test[k,0,indexesToPlot[0]]*stdevs[0]+means[0])
  246. def plotData4():
  247. NumFeaturesToPlot=len(indexesToPlot)
  248. plt.rcParams.update({'font.size': 16})
  249. fig, axes = plt.subplots(
  250. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  251. )
  252. for i in range(NumFeaturesToPlot):
  253. init=0
  254. end=testRanges[0][1]
  255. for j in range(NumberOfFailures+1):
  256. if NumFeaturesToPlot==1:
  257. axes.plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
  258. else:
  259. axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
  260. if j<NumberOfFailures:
  261. init=end
  262. end+=(testRanges[j+1][1]-testRanges[j+1][0])
  263. #if i==0:
  264. # axes[0].plot(x[j],y[j] ,color=colordot[j],marker='.',markersize=10,linewidth=0,label="Fail detect type "+str(j) )
  265. s=''
  266. s+=featureNames[features[indexesToPlot[i]]]
  267. s+=' '+unitNames[features[indexesToPlot[i]]]
  268. if NumFeaturesToPlot==1:
  269. axes.set_ylabel(s)
  270. axes.grid()
  271. else:
  272. axes[i].set_ylabel(s)
  273. axes[i].grid()
  274. for j in range(NumberOfFailures+1):
  275. if NumFeaturesToPlot==1:
  276. axes.plot(x[j],y[j] ,color=colordot[j],marker='.',markersize=10,linewidth=0,label="Fail detect type "+str(j) )
  277. else:
  278. axes[0].plot(x[j],y[j] ,color=colordot[j],marker='.',markersize=10,linewidth=0,label="Fail detect type "+str(j) )
  279. if NumFeaturesToPlot==1:
  280. axes.legend(ncol=4,loc=(0.1,0.98))
  281. else:
  282. axes[0].legend(ncol=4,loc=(0.1,0.98))
  283. #axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  284. plt.show()
  285. def whichClass(k,ranges):
  286. for i in range(NumberOfFailures+1):
  287. if k in range(ranges[i][0],ranges[i][1]):
  288. return(i)
  289. print("Error: Class not exists")
  290. exit(0)
  291. ## It remains to implemenent anomaly metrics for each failure type
  292. def anomalyMetric(classes,testranges,testclasses):
  293. # FP, TP: false/true positive
  294. # TN, FN: true/false negative
  295. # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
  296. # Precision: Rate of positive results: TP/(TP+FP)
  297. # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
  298. TP=np.zeros(NumberOfFailures+1)
  299. FP=np.zeros(NumberOfFailures+1)
  300. FN=np.zeros(NumberOfFailures+1)
  301. Sensitivity=np.zeros(NumberOfFailures+1)
  302. Precision=np.zeros(NumberOfFailures+1)
  303. for i in range(len(testranges)):
  304. for k in range(testranges[i][0],testranges[i][1]):
  305. if classes[k]==testclasses[i]:
  306. TP[i]+=1
  307. else:
  308. FP[i]+=1
  309. for k in range(testranges[NumberOfFailures][1]):
  310. for i in range(len(testranges)):
  311. classK=whichClass(k,testranges)
  312. if not classK==testClasses[i]:
  313. if not classes[k]==classK:
  314. FN[classes[k]]+=1
  315. for i in range(NumberOfFailures+1):
  316. Sensitivity[i]=TP[i]/(TP[i]+FN[i])
  317. Precision[i]=TP[i]/(TP[i]+FP[i])
  318. S=Sensitivity.mean()
  319. P=Precision.mean()
  320. F1=2*S*P/(S+P)
  321. print("Sensitivity: ",Sensitivity)
  322. print("S: ",S)
  323. print("Precision: ",Precision)
  324. print("P: ",P)
  325. print("F1-Score: ",F1)
  326. anomalyMetric(classes,testRanges,testClasses)
  327. plotData4()
  328. exit(0)

Powered by TurnKey Linux.