Aucune description

v1_multifailure.py 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. # Csar Fdez, UdL, 2025
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import datetime
  5. import numpy as np
  6. import keras
  7. import os.path
  8. import pickle
  9. from keras import layers
  10. from optparse import OptionParser
  11. parser = OptionParser()
  12. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  13. (options, args) = parser.parse_args()
  14. # data files arrays. Index:
  15. # 0. No failure
  16. # 1. Blocked evaporator
  17. # 2. Full Blocked condenser
  18. # 3. Partial Blocked condenser
  19. # 4 Fan condenser not working
  20. # 5. Open door
  21. NumberOfFailures=5
  22. NumberOfFailures=4 # So far, we have only data for the first 4 types of failures
  23. datafiles=[]
  24. for i in range(NumberOfFailures+1):
  25. datafiles.append([])
  26. # Next set of ddata corresponds to Freezer, SP=-26
  27. datafiles[0]=['2024-08-07_5_','2024-08-08_5_']
  28. datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  29. datafiles[2]=['2024-12-18_5_','2024-12-19_5_']
  30. datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_']
  31. datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  32. #datafiles[4]=[]
  33. # Features suggested by Xavier
  34. # Care with 'tc s3' because on datafiles[0] is always nulll
  35. # Seems to be incoropored in new tests
  36. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  37. features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
  38. #features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']
  39. NumFeatures=len(features)
  40. df_list=[]
  41. for i in range(NumberOfFailures+1):
  42. df_list.append([])
  43. for i in range(NumberOfFailures+1):
  44. dftemp=[]
  45. for f in datafiles[i]:
  46. print(" ", f)
  47. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  48. df1 = pd.read_csv('./data/'+f+'.csv')
  49. dftemp.append(df1)
  50. df_list[i]=pd.concat(dftemp)
  51. # subsampled to 5' = 30 * 10"
  52. # We consider smaples every 5' because in production, we will only have data at this frequency
  53. subsamplingrate=30
  54. dataframe=[]
  55. for i in range(NumberOfFailures+1):
  56. dataframe.append([])
  57. for i in range(NumberOfFailures+1):
  58. datalength=df_list[i].shape[0]
  59. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  60. dataframe[i].reset_index(inplace=True,drop=True)
  61. dataframe[i].dropna(inplace=True)
  62. # Train data is first 2/3 of data
  63. # Test data is: last 1/3 of data
  64. dataTrain=[]
  65. dataTest=[]
  66. for i in range(NumberOfFailures+1):
  67. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  68. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  69. def normalize2(train,test):
  70. # merges train and test
  71. means=[]
  72. stdevs=[]
  73. for i in range(NumFeatures):
  74. means.append(train[:,i].mean())
  75. stdevs.append(train[:,i].std())
  76. return( (train-means)/stdevs, (test-means)/stdevs )
  77. dataTrainNorm=[]
  78. dataTestNorm=[]
  79. for i in range(NumberOfFailures+1):
  80. dataTrainNorm.append([])
  81. dataTestNorm.append([])
  82. for i in range(NumberOfFailures+1):
  83. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  84. def plotData():
  85. fig, axes = plt.subplots(
  86. nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  87. )
  88. for i in range(NumberOfFailures+1):
  89. axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0")
  90. axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1")
  91. #axes[1].legend()
  92. #axes[0].set_ylabel(features[0])
  93. #axes[1].set_ylabel(features[1])
  94. #plt.show()
  95. #plotData()
  96. NumFilters=64
  97. KernelSize=7
  98. DropOut=0.2
  99. ThresholdFactor=2
  100. TIME_STEPS = 48
  101. def create_sequences(values, time_steps=TIME_STEPS):
  102. output = []
  103. for i in range(len(values) - time_steps + 1):
  104. output.append(values[i : (i + time_steps)])
  105. return np.stack(output)
  106. x_train=[]
  107. for i in range(NumberOfFailures+1):
  108. x_train.append(create_sequences(dataTrainNorm[i]))
  109. model=[]
  110. modelckpt_callback =[]
  111. es_callback =[]
  112. path_checkpoint=[]
  113. for i in range(NumberOfFailures+1):
  114. model.append([])
  115. model[i] = keras.Sequential(
  116. [
  117. layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])),
  118. layers.Conv1D(
  119. filters=NumFilters,
  120. kernel_size=KernelSize,
  121. padding="same",
  122. strides=2,
  123. activation="relu",
  124. ),
  125. layers.Dropout(rate=DropOut),
  126. layers.Conv1D(
  127. filters=int(NumFilters/2),
  128. kernel_size=KernelSize,
  129. padding="same",
  130. strides=2,
  131. activation="relu",
  132. ),
  133. layers.Conv1DTranspose(
  134. filters=int(NumFilters/2),
  135. kernel_size=KernelSize,
  136. padding="same",
  137. strides=2,
  138. activation="relu",
  139. ),
  140. layers.Dropout(rate=DropOut),
  141. layers.Conv1DTranspose(
  142. filters=NumFilters,
  143. kernel_size=KernelSize,
  144. padding="same",
  145. strides=2,
  146. activation="relu",
  147. ),
  148. layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"),
  149. ]
  150. )
  151. model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
  152. model[i].summary()
  153. path_checkpoint.append("model_v1_"+str(i)+"._checkpoint.weights.h5")
  154. es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15))
  155. modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,))
  156. if options.train:
  157. history=[]
  158. for i in range(NumberOfFailures+1):
  159. history.append(model[i].fit( x_train[i], x_train[i], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback[i], modelckpt_callback[i] ],))
  160. fig, axes = plt.subplots(
  161. nrows=int(np.ceil((NumberOfFailures+1)/2)), ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  162. )
  163. for i in range(int(np.ceil((NumberOfFailures+1)/2))):
  164. for j in range(2):
  165. r=2*i+j
  166. if r < NumberOfFailures+1:
  167. axes[i][j].plot(history[r].history["loss"], label="Training Loss")
  168. axes[i][j].plot(history[r].history["val_loss"], label="Val Loss")
  169. axes[i][j].legend()
  170. #plt.show()
  171. else:
  172. for i in range(NumberOfFailures+1):
  173. model[i].load_weights(path_checkpoint[i])
  174. x_train_pred=[]
  175. train_mae_loss=[]
  176. threshold=[]
  177. for i in range(NumberOfFailures+1):
  178. x_train_pred.append(model[i].predict(x_train[i]))
  179. train_mae_loss.append(np.mean(np.abs(x_train_pred[i] - x_train[i]), axis=1))
  180. threshold.append(np.max(train_mae_loss[i],axis=0))
  181. print("Threshold : ",threshold)
  182. for i in range(NumberOfFailures+1):
  183. threshold[i]=threshold[i]*ThresholdFactor
  184. # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives
  185. # 1st scenario. Detect only anomaly. Later, we will classiffy it
  186. # Test data= testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
  187. d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
  188. x_test = create_sequences(d)
  189. x_test_pred = model[0].predict(x_test)
  190. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  191. # Define ranges for plotting in different colors
  192. testRanges=[]
  193. r=dataTestNorm[0].shape[0]
  194. testRanges.append([0,r])
  195. for i in range(1,NumberOfFailures+1):
  196. rnext=r+dataTestNorm[i].shape[0]
  197. testRanges.append([r,rnext] )
  198. r=rnext
  199. testRanges.append([r, x_test.shape[0]+TIME_STEPS ])
  200. def AtLeastOneTrue(x):
  201. for i in range(NumFeatures):
  202. if x[i]:
  203. return True
  204. return False
  205. anomalies = test_mae_loss > threshold[0]
  206. anomalous_data_indices = []
  207. for i in range(anomalies.shape[0]):
  208. if AtLeastOneTrue(anomalies[i]):
  209. #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
  210. anomalous_data_indices.append(i)
  211. #print(anomalous_data_indices)
  212. # Let's plot some features
  213. colorline=['violet','lightcoral','cyan','lime','grey']
  214. colordot=['darkviolet','red','blue','green','black']
  215. featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii']
  216. #featuresToPlot=features
  217. indexesToPlot=[]
  218. for i in featuresToPlot:
  219. indexesToPlot.append(features.index(i))
  220. def plotData2():
  221. NumFeaturesToPlot=len(indexesToPlot)
  222. fig, axes = plt.subplots(
  223. nrows=NumFeaturesToPlot, ncols=1, figsize=(25, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  224. )
  225. for i in range(NumFeaturesToPlot):
  226. init=0
  227. end=len(x_train[0])
  228. axes[i].plot(range(init,end),x_train[0][:,0,indexesToPlot[i]],label="normal train")
  229. init=end
  230. end+=testRanges[0][1]
  231. axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]],label="normal test")
  232. init=end
  233. end+=(testRanges[1][1]-testRanges[1][0])
  234. for j in range(1,NumberOfFailures+1):
  235. axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]],label="fail type "+str(j), color=colorline[j-1])
  236. init=end
  237. end+=(testRanges[j+1][1]-testRanges[j+1][0])
  238. # Shift TIME_STEPS because detection is performed at the end of time serie
  239. trail=np.hstack((x_test[:,0,indexesToPlot[i]], x_test[-1:,1:TIME_STEPS,indexesToPlot[i]].reshape(TIME_STEPS-1)))
  240. axes[i].plot(len(x_train[0])+np.array(anomalous_data_indices)+TIME_STEPS,trail[np.array(anomalous_data_indices)+TIME_STEPS],color='grey',marker='.',linewidth=0,label="abnormal detection" )
  241. init=end-(testRanges[NumberOfFailures+1][1]-testRanges[NumberOfFailures+1][0])
  242. end=init+(testRanges[0][1]-testRanges[0][0])
  243. axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]],color='orange')
  244. if i==0:
  245. axes[i].legend(bbox_to_anchor=(1, 0.5))
  246. axes[i].set_ylabel(features[indexesToPlot[i]])
  247. axes[i].grid()
  248. plt.show()
  249. def anomalyMetric(testList): # first of list is non failure data
  250. # FP, TP: false/true positive
  251. # TN, FN: true/false negative
  252. # Sensitivity: probab failure detection if data is fail: TP/(TP+FN)
  253. # Specificity: true negative ratio given data is OK: TN/(TN+FP)
  254. x_test = create_sequences(testList[0])
  255. x_test_pred = model[0].predict(x_test)
  256. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  257. anomalies = test_mae_loss > threshold[0]
  258. count=0
  259. for i in range(anomalies.shape[0]):
  260. if AtLeastOneTrue(anomalies[i]):
  261. count+=1
  262. FP=count
  263. TN=anomalies.shape[0]-count
  264. count=0
  265. TP=np.zeros((NumberOfFailures))
  266. FN=np.zeros((NumberOfFailures))
  267. for i in range(1,len(testList)):
  268. x_test = create_sequences(testList[i])
  269. x_test_pred = model[0].predict(x_test)
  270. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  271. anomalies = test_mae_loss > threshold[0]
  272. count=0
  273. for j in range(anomalies.shape[0]):
  274. if AtLeastOneTrue(anomalies[j]):
  275. count+=1
  276. TP[i-1] = count
  277. FN[i-1] = anomalies.shape[0]-count
  278. Sensitivity=TP.sum()/(TP.sum()+FN.sum())
  279. Specifity=TN/(TN+FP)
  280. print("Sensitivity: ",Sensitivity)
  281. print("Specifity: ",Specifity)
  282. print("FP: ",FP)
  283. return Sensitivity+Specifity
  284. anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
  285. plotData2()
  286. # 2nd scenario. Go over anomalies and classify it by less error
  287. '''
  288. #This code works, but too slow
  289. anomalous_data_type=[]
  290. for i in anomalous_data_indices:
  291. error=[]
  292. for m in range(1,NumberOfFailures+1):
  293. error.append(np.mean(np.mean(np.abs(model[m].predict(x_test[i:i+1,:,:])-x_test[i:i+1,:,:]),axis=1)))
  294. anomalous_data_type.append(np.argmin(error)+1)
  295. '''
  296. anomalous_data_type=[]
  297. x_test_predict=[]
  298. for m in range(NumberOfFailures+1):
  299. x_test_predict.append(model[m].predict(x_test))
  300. for i in anomalous_data_indices:
  301. error=[]
  302. for m in range(1,NumberOfFailures+1):
  303. error.append(np.mean(np.mean(np.abs(x_test_predict[m][i:i+1,:,:]-x_test[i:i+1,:,:]),axis=1)))
  304. anomalous_data_type.append(np.argmin(error)+1)
  305. anomalous_data_indices_by_failure=[]
  306. for i in range(NumberOfFailures+1):
  307. anomalous_data_indices_by_failure.append([])
  308. for i in range(len(anomalous_data_indices)):
  309. #print(i," ",anomalous_data_type[i])
  310. anomalous_data_indices_by_failure[anomalous_data_type[i]].append(anomalous_data_indices[i])
  311. MaxIndex=0
  312. for i in range(1,NumberOfFailures+1):
  313. MaxIndex=max(MaxIndex,max(anomalous_data_indices_by_failure[i]))
  314. # Enlarge x_test to plot failure points
  315. XTest=np.vstack((x_test[:,0,:],x_test[-1,1:TIME_STEPS,:]))
  316. def plotData3():
  317. NumFeaturesToPlot=len(indexesToPlot)
  318. fig, axes = plt.subplots(
  319. nrows=NumFeaturesToPlot, ncols=1, figsize=(25, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  320. )
  321. for i in range(NumFeaturesToPlot):
  322. init=0
  323. end=len(x_train[0])
  324. axes[i].plot(range(init,end),x_train[0][:,0,indexesToPlot[i]],label="normal train")
  325. init=end
  326. end+=testRanges[0][1]
  327. axes[i].plot(range(init,end),XTest[testRanges[0][0]:testRanges[0][1],indexesToPlot[i]],label="normal test")
  328. init=end
  329. end+=(testRanges[1][1]-testRanges[1][0])
  330. for j in range(1,NumberOfFailures+1):
  331. axes[i].plot(range(init,end),XTest[testRanges[j][0]:testRanges[j][1],indexesToPlot[i]],label="fail type "+str(j), color=colorline[j-1])
  332. init=end
  333. end+=(testRanges[j+1][1]-testRanges[j+1][0])
  334. ## MODIFY here as in PRevious Plot
  335. axes[i].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[j])+TIME_STEPS,XTest[np.array(anomalous_data_indices_by_failure[j])+TIME_STEPS,indexesToPlot[i]],color=colordot[j-1],marker='.',linewidth=0,label="abnormal detection type "+str(j))
  336. init=end-(testRanges[NumberOfFailures+1][1]-testRanges[NumberOfFailures+1][0])
  337. end=init+(testRanges[0][1]-testRanges[0][0])
  338. axes[i].plot(range(init,end),XTest[testRanges[0][0]:testRanges[0][1],indexesToPlot[i]],color='orange')
  339. if i==0:
  340. axes[i].legend(bbox_to_anchor=(1, 0.5))
  341. axes[i].set_ylabel(features[indexesToPlot[i]])
  342. axes[i].grid()
  343. plt.show()
  344. anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
  345. plotData3()
  346. # A new anomalyMEtric for multiclass must be defined
  347. # look at anomalies detected on first stage
  348. # l0ok at classification of second stage
  349. # then: determine if: correct classified or bad classified/unclassified

Powered by TurnKey Linux.