Nav apraksta

v1_multifailure_importance_analysis.py 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. # Csar Fdez, UdL, 2025
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import datetime
  5. import numpy as np
  6. import keras
  7. import os.path
  8. import pickle
  9. from keras import layers
  10. from optparse import OptionParser
  11. parser = OptionParser()
  12. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  13. (options, args) = parser.parse_args()
  14. # data files arrays. Index:
  15. # 0. No failure
  16. # 1. Blocked evaporator
  17. # 2. Full Blocked condenser
  18. # 3. Partial Blocked condenser
  19. # 4 Fan condenser not working
  20. # 5. Open door
  21. NumberOfFailures=5
  22. NumberOfFailures=4 # So far, we have only data for the first 4 types of failures
  23. datafiles=[]
  24. for i in range(NumberOfFailures+1):
  25. datafiles.append([])
  26. # Next set of ddata corresponds to Freezer, SP=-26
  27. datafiles[0]=['2024-08-07_5_','2024-08-08_5_']
  28. datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  29. datafiles[2]=['2024-12-18_5_','2024-12-19_5_']
  30. datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_']
  31. datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  32. #datafiles[4]=[]
  33. # Features suggested by Xavier
  34. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  35. features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2','tc s3']
  36. NumFeatures=len(features)
  37. df_list=[]
  38. for i in range(NumberOfFailures+1):
  39. df_list.append([])
  40. for i in range(NumberOfFailures+1):
  41. dftemp=[]
  42. for f in datafiles[i]:
  43. print(" ", f)
  44. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  45. df1 = pd.read_csv('./data/'+f+'.csv')
  46. dftemp.append(df1)
  47. df_list[i]=pd.concat(dftemp)
  48. # subsampled to 5' = 30 * 10"
  49. # We consider smaples every 5' because in production, we will only have data at this frequency
  50. subsamplingrate=30
  51. dataframe=[]
  52. for i in range(NumberOfFailures+1):
  53. dataframe.append([])
  54. for i in range(NumberOfFailures+1):
  55. datalength=df_list[i].shape[0]
  56. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  57. dataframe[i].reset_index(inplace=True,drop=True)
  58. dataframe[i].dropna(inplace=True)
  59. # Train data is first 2/3 of data
  60. # Test data is: last 1/3 of data
  61. dataTrain=[]
  62. dataTest=[]
  63. for i in range(NumberOfFailures+1):
  64. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  65. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  66. def normalize2(train,test):
  67. # merges train and test
  68. means=[]
  69. stdevs=[]
  70. for i in range(NumFeatures):
  71. means.append(train[:,i].mean())
  72. stdevs.append(train[:,i].std())
  73. return( (train-means)/stdevs, (test-means)/stdevs )
  74. dataTrainNorm=[]
  75. dataTestNorm=[]
  76. for i in range(NumberOfFailures+1):
  77. dataTrainNorm.append([])
  78. dataTestNorm.append([])
  79. for i in range(NumberOfFailures+1):
  80. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  81. def plotData():
  82. fig, axes = plt.subplots(
  83. nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  84. )
  85. for i in range(NumberOfFailures+1):
  86. axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0")
  87. axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1")
  88. #axes[1].legend()
  89. #axes[0].set_ylabel(features[0])
  90. #axes[1].set_ylabel(features[1])
  91. plt.show()
  92. #plotData()
  93. TIME_STEPS = 12
  94. def create_sequences(values, time_steps=TIME_STEPS):
  95. output = []
  96. for i in range(len(values) - time_steps + 1):
  97. output.append(values[i : (i + time_steps)])
  98. return np.stack(output)
  99. x_train=[]
  100. for i in range(NumberOfFailures+1):
  101. x_train.append(create_sequences(dataTrainNorm[i]))
  102. model=[]
  103. modelckpt_callback =[]
  104. es_callback =[]
  105. path_checkpoint=[]
  106. for i in range(NumberOfFailures+1):
  107. model.append([])
  108. model[i] = keras.Sequential(
  109. [
  110. layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])),
  111. layers.Conv1D(
  112. filters=64,
  113. kernel_size=7,
  114. padding="same",
  115. strides=2,
  116. activation="relu",
  117. ),
  118. layers.Dropout(rate=0.2),
  119. layers.Conv1D(
  120. filters=32,
  121. kernel_size=7,
  122. padding="same",
  123. strides=2,
  124. activation="relu",
  125. ),
  126. layers.Conv1DTranspose(
  127. filters=32,
  128. kernel_size=7,
  129. padding="same",
  130. strides=2,
  131. activation="relu",
  132. ),
  133. layers.Dropout(rate=0.2),
  134. layers.Conv1DTranspose(
  135. filters=64,
  136. kernel_size=7,
  137. padding="same",
  138. strides=2,
  139. activation="relu",
  140. ),
  141. layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=7, padding="same"),
  142. ]
  143. )
  144. model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
  145. model[i].summary()
  146. path_checkpoint.append("model_v1_"+str(i)+"._checkpoint.weights.h5")
  147. es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15))
  148. modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,))
  149. if options.train:
  150. history=[]
  151. for i in range(NumberOfFailures+1):
  152. history.append(model[i].fit( x_train[i], x_train[i], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback[i], modelckpt_callback[i] ],))
  153. fig, axes = plt.subplots(
  154. nrows=int(np.ceil((NumberOfFailures+1)/2)), ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  155. )
  156. for i in range(int(np.ceil((NumberOfFailures+1)/2))):
  157. for j in range(2):
  158. r=2*i+j
  159. if r < NumberOfFailures+1:
  160. axes[i][j].plot(history[r].history["loss"], label="Training Loss")
  161. axes[i][j].plot(history[r].history["val_loss"], label="Val Loss")
  162. axes[i][j].legend()
  163. plt.show()
  164. else:
  165. for i in range(NumberOfFailures+1):
  166. model[i].load_weights(path_checkpoint[i])
  167. x_train_pred=[]
  168. train_mae_loss=[]
  169. threshold=[]
  170. for i in range(NumberOfFailures+1):
  171. x_train_pred.append(model[i].predict(x_train[i]))
  172. train_mae_loss.append(np.mean(np.abs(x_train_pred[i] - x_train[i]), axis=1))
  173. threshold.append(np.max(train_mae_loss[i],axis=0))
  174. print("Threshold : ",threshold)
  175. for i in range(NumberOfFailures+1):
  176. threshold[i]=threshold[i]*1.7
  177. # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives
  178. # 1st scenario. Detect only anomaly. Later, we will classiffy it
  179. # Test data= testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
  180. d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
  181. x_test = create_sequences(d)
  182. x_test_pred = model[0].predict(x_test)
  183. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  184. # Define ranges for plotting in different colors
  185. testRanges=[]
  186. r=dataTestNorm[0].shape[0]
  187. testRanges.append([0,r])
  188. for i in range(1,NumberOfFailures+1):
  189. rnext=r+dataTestNorm[i].shape[0]
  190. testRanges.append([r,rnext] )
  191. r=rnext
  192. testRanges.append([r, x_test.shape[0] ])
  193. def AtLeastOneTrue(x):
  194. for i in range(NumFeatures):
  195. if x[i]:
  196. return True
  197. return False
  198. anomalies = test_mae_loss > threshold[0]
  199. anomalous_data_indices = []
  200. for i in range(anomalies.shape[0]):
  201. if AtLeastOneTrue(anomalies[i]):
  202. #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
  203. anomalous_data_indices.append(i)
  204. #print(anomalous_data_indices)
  205. # Let's plot only a couple of features
  206. def plotData2():
  207. fig, axes = plt.subplots(
  208. nrows=2, ncols=1, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  209. )
  210. axes[0].plot(range(len(x_train[0])),x_train[0][:,0,0],label="normal")
  211. axes[0].plot(range(len(x_train[0]),len(x_train[0])+len(x_test)),x_test[:,0,0],label="abnormal")
  212. axes[0].plot(len(x_train[0])+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,0],color='red',marker='.',linewidth=0,label="abnormal detection")
  213. axes[0].legend()
  214. axes[1].plot(range(len(x_train[0])),x_train[0][:,0,1],label="normal")
  215. axes[1].plot(range(len(x_train[0]),len(x_train[0])+len(x_test)),x_test[:,0,1],label="abnormal")
  216. axes[1].plot(len(x_train[0])+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,1],color='red',marker='.',linewidth=0,label="abnormal detection")
  217. axes[1].legend()
  218. axes[0].set_ylabel(features[0])
  219. axes[1].set_ylabel(features[1])
  220. plt.show()
  221. #plotData2()
  222. # 2nd scenario. Go over anomalies and classify it by less error
  223. '''
  224. #This code works, but too slow
  225. anomalous_data_type=[]
  226. for i in anomalous_data_indices:
  227. error=[]
  228. for m in range(1,NumberOfFailures+1):
  229. error.append(np.mean(np.mean(np.abs(model[m].predict(x_test[i:i+1,:,:])-x_test[i:i+1,:,:]),axis=1)))
  230. anomalous_data_type.append(np.argmin(error)+1)
  231. '''
  232. anomalous_data_type=[]
  233. x_test_predict=[]
  234. for m in range(NumberOfFailures+1):
  235. x_test_predict.append(model[m].predict(x_test))
  236. for i in anomalous_data_indices:
  237. error=[]
  238. for m in range(1,NumberOfFailures+1):
  239. error.append(np.mean(np.mean(np.abs(x_test_predict[m][i:i+1,:,:]-x_test[i:i+1,:,:]),axis=1)))
  240. anomalous_data_type.append(np.argmin(error)+1)
  241. # For plotting purposes
  242. anomalous_data_indices_by_failure=[]
  243. for i in range(NumberOfFailures+1):
  244. anomalous_data_indices_by_failure.append([])
  245. for i in range(len(anomalous_data_indices)):
  246. print(i," ",anomalous_data_type[i])
  247. anomalous_data_indices_by_failure[anomalous_data_type[i]].append(anomalous_data_indices[i])
  248. colorline=['violet','lightcoral','cyan','lime','grey']
  249. colordot=['darkviolet','red','blue','green','black']
  250. featuresToPlot=['r1 s1','r1 s3','r1 s5','r2 s3','r2 s4','pa1 apiii','tc s1','tc s2','tc s3']
  251. indexesToPlot=[]
  252. for i in featuresToPlot:
  253. indexesToPlot.append(features.index(i))
  254. def plotData3():
  255. NumFeaturesToPlot=len(indexesToPlot)
  256. fig, axes = plt.subplots(
  257. nrows=NumFeaturesToPlot, ncols=1, figsize=(25, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  258. )
  259. for i in range(NumFeaturesToPlot):
  260. init=0
  261. end=len(x_train[0])
  262. axes[i].plot(range(init,end),x_train[0][:,0,indexesToPlot[i]],label="normal train")
  263. #axes.plot(range(len(x_train[0]),len(x_train[0])+len(x_test)),x_test[:,0,0],label="abnormal")
  264. init=end
  265. end+=testRanges[0][1]
  266. axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]],label="normal test")
  267. init=end
  268. end+=(testRanges[1][1]-testRanges[1][0])
  269. for j in range(1,NumberOfFailures+1):
  270. axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]],label="fail type "+str(j), color=colorline[j-1])
  271. init=end
  272. end+=(testRanges[j+1][1]-testRanges[j+1][0])
  273. axes[i].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[j]),x_test[anomalous_data_indices_by_failure[j],0,indexesToPlot[i]],color=colordot[j-1],marker='.',linewidth=0,label="abnormal detection type "+str(j))
  274. init=end-(testRanges[NumberOfFailures+1][1]-testRanges[NumberOfFailures+1][0])
  275. end=init+(testRanges[0][1]-testRanges[0][0])
  276. axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]],color='orange')
  277. if i==0:
  278. axes[i].legend(bbox_to_anchor=(1, 0.5))
  279. axes[i].set_ylabel(features[indexesToPlot[i]])
  280. axes[i].grid()
  281. plt.show()
  282. plotData3()

Powered by TurnKey Linux.