Нема описа

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. # Csar Fdez, UdL, 2025
  2. # Changes from v1: Normalization
  3. # IN v1, each failure type has its own normalization pars (mean and stdevs)
  4. # In v2, mean and stdev is the same for all data
  5. # v3.py trains the models looping in TIME_STEPS (4,8,12,16,20,24,....) finding the optimal Threshold factor
  6. # Derived from v3.py with code from v1_multifailure.py
  7. # This code don't train for multiple time steps !!
  8. import pandas as pd
  9. import matplotlib.pyplot as plt
  10. import datetime
  11. import numpy as np
  12. import keras
  13. import os.path
  14. from keras import layers
  15. from optparse import OptionParser
  16. import copy
  17. import pickle
  18. parser = OptionParser()
  19. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  20. parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS ", default=12)
  21. parser.add_option("-f", "--thresholdfactor", dest="TF", help="Threshold Factor ", default=1.4)
  22. (options, args) = parser.parse_args()
  23. # data files arrays. Index:
  24. # 0. No failure
  25. # 1. Blocked evaporator
  26. # 2. Full Blocked condenser
  27. # 3. Partial Blocked condenser
  28. # 4 Fan condenser not working
  29. # 5. Open door
  30. NumberOfFailures=4 # So far, we have only data for the first 4 types of failures
  31. datafiles=[]
  32. for i in range(NumberOfFailures+1):
  33. datafiles.append([])
  34. # Next set of ddata corresponds to Freezer, SP=-26
  35. datafiles[0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_','2025-01-27_5_','2025-01-28_5_']
  36. datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  37. #datafiles[1]=['2024-12-17_5_','2024-12-16_5_','2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] # This have transitions
  38. datafiles[2]=['2024-12-18_5_','2024-12-19_5_']
  39. datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_']
  40. datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  41. #datafiles[4]=['2024-12-27_5_','2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] # This have transitions
  42. #datafiles[4]=[]
  43. # Features suggested by Xavier
  44. # Care with 'tc s3' because on datafiles[0] is always nulll
  45. # Seems to be incoropored in new tests
  46. #r1s5 supply air flow temperature
  47. #r1s1 inlet evaporator temperature
  48. #r1s4 condenser outlet
  49. # VAriables r1s4 and pa1 apiii may not exists in cloud controlers
  50. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  51. features=['r1 s1','r1 s4','r1 s5']
  52. featureNames={}
  53. featureNames['r1 s1']='$T_{evap}$'
  54. featureNames['r1 s4']='$T_{cond}$'
  55. featureNames['r1 s5']='$T_{air}$'
  56. featureNames['pa1 apiii']='$P_{elec}$'
  57. unitNames={}
  58. unitNames['r1 s1']='$(^{o}C)$'
  59. unitNames['r1 s4']='$(^{o}C)$'
  60. unitNames['r1 s5']='$(^{o}C)$'
  61. unitNames['pa1 apiii']='$(W)$'
  62. #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
  63. #features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']
  64. NumFeatures=len(features)
  65. df_list=[]
  66. for i in range(NumberOfFailures+1):
  67. df_list.append([])
  68. for i in range(NumberOfFailures+1):
  69. dftemp=[]
  70. for f in datafiles[i]:
  71. print(" ", f)
  72. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  73. df1 = pd.read_csv('./data/'+f+'.csv')
  74. dftemp.append(df1)
  75. df_list[i]=pd.concat(dftemp)
  76. # subsampled to 5' = 30 * 10"
  77. # We consider smaples every 5' because in production, we will only have data at this frequency
  78. subsamplingrate=30
  79. dataframe=[]
  80. for i in range(NumberOfFailures+1):
  81. dataframe.append([])
  82. for i in range(NumberOfFailures+1):
  83. datalength=df_list[i].shape[0]
  84. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  85. dataframe[i].reset_index(inplace=True,drop=True)
  86. dataframe[i].dropna(inplace=True)
  87. # Train data is first 2/3 of data
  88. # Test data is: last 1/3 of data
  89. dataTrain=[]
  90. dataTest=[]
  91. for i in range(NumberOfFailures+1):
  92. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  93. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  94. # Calculate means and stdev
  95. a=dataTrain[0]
  96. for i in range(1,NumberOfFailures+1):
  97. a=np.vstack((a,dataTrain[i]))
  98. means=a.mean(axis=0)
  99. stdevs=a.std(axis=0)
  100. def normalize2(train,test):
  101. return( (train-means)/stdevs, (test-means)/stdevs )
  102. dataTrainNorm=[]
  103. dataTestNorm=[]
  104. for i in range(NumberOfFailures+1):
  105. dataTrainNorm.append([])
  106. dataTestNorm.append([])
  107. for i in range(NumberOfFailures+1):
  108. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  109. def plotData():
  110. fig, axes = plt.subplots(
  111. nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  112. )
  113. for i in range(NumberOfFailures+1):
  114. axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0")
  115. axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1")
  116. #axes[1].legend()
  117. #axes[0].set_ylabel(features[0])
  118. #axes[1].set_ylabel(features[1])
  119. plt.show()
  120. #plotData()
  121. #exit(0)
  122. NumFilters=64
  123. KernelSize=7
  124. DropOut=0.2
  125. ThresholdFactor=1.4
  126. def create_sequences(values, time_steps):
  127. output = []
  128. for i in range(len(values) - time_steps + 1):
  129. output.append(values[i : (i + time_steps)])
  130. return np.stack(output)
  131. def AtLeastOneTrue(x):
  132. for i in range(NumFeatures):
  133. if x[i]:
  134. return True
  135. return False
  136. def anomalyMetric(th,ts,testList): # first of list is non failure data
  137. # FP, TP: false/true positive
  138. # TN, FN: true/false negative
  139. # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
  140. # Specificity: true negative ratio given data is OK: TN/(TN+FP)
  141. # Accuracy: Rate of correct predictions: (TN+TP)/(TN+TP+FP+FN)
  142. # Precision: Rate of positive results: TP/(TP+FP)
  143. # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
  144. # F2-score: predictive performance measure: 2*Specificity*Sensitity/(Specificity+Sensitity)
  145. x_test = create_sequences(testList[0],ts)
  146. x_test_pred = model.predict(x_test)
  147. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  148. anomalies = test_mae_loss > th
  149. count=0
  150. for i in range(anomalies.shape[0]):
  151. if AtLeastOneTrue(anomalies[i]):
  152. count+=1
  153. FP=count
  154. TN=anomalies.shape[0]-count
  155. count=0
  156. TP=np.zeros((NumberOfFailures))
  157. FN=np.zeros((NumberOfFailures))
  158. Sensitivity=np.zeros((NumberOfFailures))
  159. Precision=np.zeros((NumberOfFailures))
  160. for i in range(1,len(testList)):
  161. x_test = create_sequences(testList[i],ts)
  162. x_test_pred = model.predict(x_test)
  163. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  164. anomalies = test_mae_loss > th
  165. count=0
  166. for j in range(anomalies.shape[0]):
  167. if AtLeastOneTrue(anomalies[j]):
  168. count+=1
  169. TP[i-1] = count
  170. FN[i-1] = anomalies.shape[0]-count
  171. Sensitivity[i-1]=TP[i-1]/(TP[i-1]+FN[i-1])
  172. Precision[i-1]=TP[i-1]/(TP[i-1]+FP)
  173. GlobalSensitivity=TP.sum()/(TP.sum()+FN.sum())
  174. Specificity=TN/(TN+FP)
  175. Accuracy=(TN+TP.sum())/(TN+TP.sum()+FP+FN.sum())
  176. GlobalPrecision=TP.sum()/(TP.sum()+FP)
  177. F1Score= 2*GlobalPrecision*GlobalSensitivity/(GlobalPrecision+GlobalSensitivity)
  178. F2Score = 2*Specificity*GlobalSensitivity/(Specificity+GlobalSensitivity)
  179. print("Sensitivity: ",Sensitivity)
  180. print("Global Sensitivity: ",GlobalSensitivity)
  181. #print("Precision: ",Precision)
  182. #print("Global Precision: ",GlobalPrecision)
  183. print("Specifity: ",Specificity)
  184. #print("Accuracy: ",Accuracy)
  185. #print("F1Score: ",F1Score)
  186. print("F2Score: ",F2Score)
  187. #print("FP: ",FP)
  188. #return Sensitivity+Specifity
  189. return F2Score
  190. def listToString(l):
  191. r=''
  192. for i in l:
  193. r+=str(i)
  194. return(r.replace(' ',''))
  195. threshold={}
  196. fname='threshold_class'+listToString(features)+'.pk'
  197. if os.path.isfile(fname): # Checks if it's a file and exists
  198. print("File ",fname," exists. Loading it!")
  199. file = open(fname, 'rb')
  200. threshold=pickle.load(file)
  201. file.close()
  202. if not int(options.timesteps) in threshold.keys():
  203. threshold[int(options.timesteps)]=[]
  204. for i in range(NumberOfFailures+1):
  205. threshold[int(options.timesteps)].append(0) # Initzialize
  206. else:
  207. threshold[int(options.timesteps)]=[]
  208. for i in range(NumberOfFailures+1):
  209. threshold[int(options.timesteps)].append(0) # Initzialize
  210. model=[]
  211. modelckpt_callback =[]
  212. es_callback =[]
  213. path_checkpoint=[]
  214. timesteps=int(options.timesteps)
  215. x_train=[]
  216. for i in range(NumberOfFailures+1):
  217. x_train.append(create_sequences(dataTrainNorm[i],timesteps))
  218. model.append([])
  219. model[i] = keras.Sequential(
  220. [
  221. layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])),
  222. layers.Conv1D(
  223. filters=NumFilters,
  224. kernel_size=KernelSize,
  225. padding="same",
  226. strides=2,
  227. activation="relu",
  228. ),
  229. layers.Dropout(rate=DropOut),
  230. layers.Conv1D(
  231. filters=int(NumFilters/2),
  232. kernel_size=KernelSize,
  233. padding="same",
  234. strides=2,
  235. activation="relu",
  236. ),
  237. layers.Conv1DTranspose(
  238. filters=int(NumFilters/2),
  239. kernel_size=KernelSize,
  240. padding="same",
  241. strides=2,
  242. activation="relu",
  243. ),
  244. layers.Dropout(rate=DropOut),
  245. layers.Conv1DTranspose(
  246. filters=NumFilters,
  247. kernel_size=KernelSize,
  248. padding="same",
  249. strides=2,
  250. activation="relu",
  251. ),
  252. layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"),
  253. ]
  254. )
  255. model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
  256. model[i].summary()
  257. path_checkpoint.append("model_class_v3_"+str(i)+"_"+str(timesteps)+listToString(features)+"_checkpoint.weights.h5")
  258. es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15))
  259. modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,))
  260. if options.train:
  261. history=[]
  262. for i in range(NumberOfFailures+1):
  263. history.append(model[i].fit( x_train[i], x_train[i], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback[i], modelckpt_callback[i] ],))
  264. x_train_pred=model[i].predict(x_train[i])
  265. train_mae_loss=np.mean(np.abs(x_train_pred - x_train[i]), axis=1)
  266. threshold[timesteps][i]=np.max(train_mae_loss,axis=0)
  267. file = open('threshold_class'+listToString(features)+'.pk', 'wb')
  268. pickle.dump(threshold, file)
  269. file.close()
  270. else:
  271. for i in range(NumberOfFailures+1):
  272. model[i].load_weights(path_checkpoint[i])
  273. file = open('threshold_class'+listToString(features)+'.pk', 'rb')
  274. threshold=pickle.load(file)
  275. file.close()
  276. #print(threshold)
  277. # 1st scenario. Detect only anomaly. Later, we will classiffy it
  278. # Test data= testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
  279. datalist=[dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]]
  280. d=np.vstack((datalist))
  281. x_test = create_sequences(d,int(options.timesteps))
  282. x_test_pred = model[0].predict(x_test)
  283. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  284. anomalies = test_mae_loss > threshold[int(options.timesteps)][0]*float(options.TF)
  285. anomalous_data_indices = []
  286. for i in range(anomalies.shape[0]):
  287. if AtLeastOneTrue(anomalies[i]):
  288. #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
  289. anomalous_data_indices.append(i)
  290. # Define ranges for plotting in different colors
  291. testRanges=[]
  292. r=0
  293. for i in range(len(datalist)):
  294. testRanges.append([r,r+datalist[i].shape[0]])
  295. r+=datalist[i].shape[0]
  296. # Drop the last TIME_STEPS for plotting
  297. testRanges[NumberOfFailures][1]=testRanges[NumberOfFailures][1]-int(options.timesteps)
  298. # Let's plot some features
  299. colorline=['violet','lightcoral','cyan','lime','grey']
  300. colordot=['darkviolet','red','blue','green','black']
  301. #featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii']
  302. featuresToPlot=features
  303. indexesToPlot=[]
  304. for i in featuresToPlot:
  305. indexesToPlot.append(features.index(i))
  306. def plotData3():
  307. NumFeaturesToPlot=len(indexesToPlot)
  308. plt.rcParams.update({'font.size': 16})
  309. fig, axes = plt.subplots(
  310. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  311. )
  312. for i in range(NumFeaturesToPlot):
  313. x=[]
  314. y=[]
  315. for k in anomalous_data_indices:
  316. if (k+int(options.timesteps))<x_test.shape[0]:
  317. x.append(k+int(options.timesteps))
  318. y.append(x_train[k+int(options.timesteps),0,indexesToPlot[i]]*stdevs[i]+means[i])
  319. axes[i].plot(x,y ,color='black',marker='.',linewidth=0,label="Fail detection" )
  320. init=0
  321. end=testRanges[0][1]
  322. axes[i].plot(range(init,end),xtrain[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="No fail")
  323. init=end
  324. end+=(testRanges[1][1]-testRanges[1][0])
  325. for j in range(1,NumberOfFailures+1):
  326. axes[i].plot(range(init,end),xtrain[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Fail type "+str(j), color=colorline[j-1],linewidth=1)
  327. if j<NumberOfFailures:
  328. init=end
  329. end+=(testRanges[j+1][1]-testRanges[j+1][0])
  330. if i==(NumFeatures-1):
  331. axes[i].legend(loc='right')
  332. s=''
  333. s+=featureNames[features[indexesToPlot[i]]]
  334. s+=' '+unitNames[features[indexesToPlot[i]]]
  335. axes[i].set_ylabel(s)
  336. axes[i].grid()
  337. axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  338. plt.show()
  339. #plotData3()
  340. # 2nd scenario. Go over anomalies and classify it by less error
  341. anomalous_data_type=[]
  342. x_test_predict=[]
  343. for m in range(1,NumberOfFailures+1):
  344. x_test_predict.append(model[m].predict(x_test))
  345. anomalous_data_type={}
  346. for i in range(1,NumberOfFailures+1):
  347. anomalous_data_type[i-1]=[]
  348. for i in anomalous_data_indices:
  349. error=[]
  350. for m in range(1,NumberOfFailures+1):
  351. error.append(np.mean(np.mean(np.abs(x_test_predict[m-1][i:i+1,:,:]-x_test[i:i+1,:,:]),axis=1)))
  352. anomalous_data_type[np.argmin(error)].append(i)
  353. def plotData4():
  354. NumFeaturesToPlot=len(indexesToPlot)
  355. plt.rcParams.update({'font.size': 16})
  356. fig, axes = plt.subplots(
  357. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  358. )
  359. for i in range(NumFeaturesToPlot):
  360. init=0
  361. end=testRanges[0][1]
  362. axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="No fail", color='black')
  363. init=end
  364. end+=(testRanges[1][1]-testRanges[1][0])
  365. for j in range(1,NumberOfFailures+1):
  366. axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Fail type "+str(j), color=colorline[j-1],linewidth=1)
  367. if j<NumberOfFailures:
  368. init=end
  369. end+=(testRanges[j+1][1]-testRanges[j+1][0])
  370. for j in range(NumberOfFailures):
  371. x=[]
  372. y=[]
  373. for k in anomalous_data_type[j]:
  374. if (k+int(options.timesteps))<x_test.shape[0]:
  375. x.append(k+int(options.timesteps))
  376. y.append(x_test[k+int(options.timesteps),0,indexesToPlot[i]]*stdevs[i]+means[i])
  377. axes[i].plot(x,y ,color=colordot[j],marker='.',linewidth=0,label="Fail detect type "+str(j+1) )
  378. if i==(NumFeatures-1):
  379. axes[i].legend(loc='right')
  380. s=''
  381. s+=featureNames[features[indexesToPlot[i]]]
  382. s+=' '+unitNames[features[indexesToPlot[i]]]
  383. axes[i].set_ylabel(s)
  384. axes[i].grid()
  385. axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  386. plt.show()
  387. plotData4()
  388. ## It remains to implemenent anomaly metrics for each failure type
  389. exit(0)

Powered by TurnKey Linux.