Geen omschrijving

v0_multifailure.py 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. # Csar Fdez, UdL, 2025
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import datetime
  5. import numpy as np
  6. import keras
  7. import os.path
  8. import pickle
  9. from keras import layers
  10. from optparse import OptionParser
  11. parser = OptionParser()
  12. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  13. (options, args) = parser.parse_args()
  14. # data files arrays. Index:
  15. # 0. No failure
  16. # 1. Blocked evaporator
  17. # 2. Blocked condenser
  18. # 3 Fan condenser not working
  19. # 4. Open door
  20. NumberOfFailures=4
  21. NumberOfFailures=3 # So far, we have only data for the first 3 types of failures
  22. datafiles=[]
  23. for i in range(NumberOfFailures+1):
  24. datafiles.append([])
  25. # Next set of ddata corresponds to Freezer, SP=-26
  26. datafiles[0]=['2024-08-07_5_','2024-08-08_5_']
  27. datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  28. datafiles[2]=['2024-12-18_5_','2024-12-19_5_','2024-12-20_5_']
  29. datafiles[3]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  30. #datafiles[4]=[]
  31. # Features suggested by Xavier
  32. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  33. NumFeatures=len(features)
  34. df_list=[]
  35. for i in range(NumberOfFailures+1):
  36. df_list.append([])
  37. for i in range(NumberOfFailures+1):
  38. dftemp=[]
  39. for f in datafiles[i]:
  40. print(" ", f)
  41. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  42. df1 = pd.read_csv('./data/'+f+'.csv')
  43. dftemp.append(df1)
  44. df_list[i]=pd.concat(dftemp)
  45. # subsampled to 5' = 30 * 10"
  46. # We consider smaples every 5' because in production, we will only have data at this frequency
  47. subsamplingrate=30
  48. dataframe=[]
  49. for i in range(NumberOfFailures+1):
  50. dataframe.append([])
  51. for i in range(NumberOfFailures+1):
  52. datalength=df_list[i].shape[0]
  53. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  54. dataframe[i].reset_index(inplace=True,drop=True)
  55. dataframe[i].dropna(inplace=True)
  56. # Train data is first 2/3 of data
  57. # Test data is: last 1/3 of data
  58. dataTrain=[]
  59. dataTest=[]
  60. for i in range(NumberOfFailures+1):
  61. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  62. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  63. def normalize2(train,test):
  64. # merges train and test
  65. means=[]
  66. stdevs=[]
  67. for i in range(NumFeatures):
  68. means.append(train[:,i].mean())
  69. stdevs.append(train[:,i].std())
  70. return( (train-means)/stdevs, (test-means)/stdevs )
  71. dataTrainNorm=[]
  72. dataTestNorm=[]
  73. for i in range(NumberOfFailures+1):
  74. dataTrainNorm.append([])
  75. dataTestNorm.append([])
  76. for i in range(NumberOfFailures+1):
  77. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  78. def plotData():
  79. fig, axes = plt.subplots(
  80. nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  81. )
  82. for i in range(NumberOfFailures+1):
  83. axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0")
  84. axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1")
  85. #axes[1].legend()
  86. #axes[0].set_ylabel(features[0])
  87. #axes[1].set_ylabel(features[1])
  88. plt.show()
  89. #plotData()
  90. TIME_STEPS = 24
  91. def create_sequences(values, time_steps=TIME_STEPS):
  92. output = []
  93. for i in range(len(values) - time_steps + 1):
  94. output.append(values[i : (i + time_steps)])
  95. return np.stack(output)
  96. x_train=[]
  97. for i in range(NumberOfFailures+1):
  98. x_train.append(create_sequences(dataTrainNorm[i]))
  99. model=[]
  100. modelckpt_callback =[]
  101. es_callback =[]
  102. path_checkpoint=[]
  103. for i in range(NumberOfFailures+1):
  104. model.append([])
  105. model[i] = keras.Sequential(
  106. [
  107. layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])),
  108. layers.Conv1D(
  109. filters=64,
  110. kernel_size=7,
  111. padding="same",
  112. strides=2,
  113. activation="relu",
  114. ),
  115. layers.Dropout(rate=0.2),
  116. layers.Conv1D(
  117. filters=32,
  118. kernel_size=7,
  119. padding="same",
  120. strides=2,
  121. activation="relu",
  122. ),
  123. layers.Conv1DTranspose(
  124. filters=32,
  125. kernel_size=7,
  126. padding="same",
  127. strides=2,
  128. activation="relu",
  129. ),
  130. layers.Dropout(rate=0.2),
  131. layers.Conv1DTranspose(
  132. filters=64,
  133. kernel_size=7,
  134. padding="same",
  135. strides=2,
  136. activation="relu",
  137. ),
  138. layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=7, padding="same"),
  139. ]
  140. )
  141. model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
  142. model[i].summary()
  143. path_checkpoint.append("model_"+str(i)+"._checkpoint.weights.h5")
  144. es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15))
  145. modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,))
  146. if options.train:
  147. history=[]
  148. for i in range(NumberOfFailures+1):
  149. history.append(model[i].fit( x_train[i], x_train[i], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback[i], modelckpt_callback[i] ],))
  150. fig, axes = plt.subplots(
  151. nrows=int(np.ceil((NumberOfFailures+1)/2)), ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  152. )
  153. for i in range(int(np.ceil((NumberOfFailures+1)/2))):
  154. for j in range(2):
  155. r=2*i+j
  156. if r < NumberOfFailures+1:
  157. axes[i][j].plot(history[r].history["loss"], label="Training Loss")
  158. axes[i][j].plot(history[r].history["val_loss"], label="Val Loss")
  159. axes[i][j].legend()
  160. plt.show()
  161. else:
  162. for i in range(NumberOfFailures+1):
  163. model[i].load_weights(path_checkpoint[i])
  164. x_train_pred=[]
  165. train_mae_loss=[]
  166. threshold=[]
  167. for i in range(NumberOfFailures+1):
  168. x_train_pred.append(model[i].predict(x_train[i]))
  169. train_mae_loss.append(np.mean(np.abs(x_train_pred[i] - x_train[i]), axis=1))
  170. threshold.append(np.max(train_mae_loss[i],axis=0))
  171. print("Threshold : ",threshold)
  172. for i in range(NumberOfFailures+1):
  173. threshold[i]=threshold[i]*2
  174. # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives
  175. # 1st scenario. Detect only anomaly. Later, we will classiffy it
  176. # Test data= testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
  177. d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[0]))
  178. x_test = create_sequences(d)
  179. x_test_pred = model[0].predict(x_test)
  180. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  181. testRanges=[]
  182. testRanges.append([0,dataTestNorm[0].shape[0]])
  183. testRanges.append([dataTestNorm[0].shape[0], dataTestNorm[0].shape[0]+dataTestNorm[1].shape[0] ])
  184. testRanges.append([dataTestNorm[0].shape[0]+dataTestNorm[1].shape[0], dataTestNorm[0].shape[0]+dataTestNorm[1].shape[0]+ dataTestNorm[2].shape[0] ])
  185. testRanges.append([dataTestNorm[0].shape[0]+dataTestNorm[1].shape[0]+ dataTestNorm[2].shape[0], dataTestNorm[0].shape[0]+dataTestNorm[1].shape[0]+ dataTestNorm[2].shape[0]+ dataTestNorm[3].shape[0]])
  186. testRanges.append([dataTestNorm[0].shape[0]+dataTestNorm[1].shape[0]+ dataTestNorm[2].shape[0]+ dataTestNorm[3].shape[0] , x_test.shape[0] ])
  187. def AtLeastOneTrue(x):
  188. for i in range(NumFeatures):
  189. if x[i]:
  190. return True
  191. return False
  192. anomalies = test_mae_loss > threshold[0]
  193. anomalous_data_indices = []
  194. for i in range(anomalies.shape[0]):
  195. if AtLeastOneTrue(anomalies[i]):
  196. #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
  197. anomalous_data_indices.append(i)
  198. #print(anomalous_data_indices)
  199. # Let's plot only a couple of features
  200. def plotData2():
  201. fig, axes = plt.subplots(
  202. nrows=2, ncols=1, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  203. )
  204. axes[0].plot(range(len(x_train[0])),x_train[0][:,0,0],label="normal")
  205. axes[0].plot(range(len(x_train[0]),len(x_train[0])+len(x_test)),x_test[:,0,0],label="abnormal")
  206. axes[0].plot(len(x_train[0])+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,0],color='red',marker='.',linewidth=0,label="abnormal detection")
  207. axes[0].legend()
  208. axes[1].plot(range(len(x_train[0])),x_train[0][:,0,1],label="normal")
  209. axes[1].plot(range(len(x_train[0]),len(x_train[0])+len(x_test)),x_test[:,0,1],label="abnormal")
  210. axes[1].plot(len(x_train[0])+np.array(anomalous_data_indices),x_test[anomalous_data_indices,0,1],color='red',marker='.',linewidth=0,label="abnormal detection")
  211. axes[1].legend()
  212. axes[0].set_ylabel(features[0])
  213. axes[1].set_ylabel(features[1])
  214. plt.show()
  215. #plotData2()
  216. # 2nd scenario. Go over anomalies and classify it by less error
  217. '''
  218. #This code works, but too slow
  219. anomalous_data_type=[]
  220. for i in anomalous_data_indices:
  221. error=[]
  222. for m in range(1,NumberOfFailures+1):
  223. error.append(np.mean(np.mean(np.abs(model[m].predict(x_test[i:i+1,:,:])-x_test[i:i+1,:,:]),axis=1)))
  224. anomalous_data_type.append(np.argmin(error)+1)
  225. '''
  226. anomalous_data_type=[]
  227. x_test_predict=[]
  228. for m in range(NumberOfFailures+1):
  229. x_test_predict.append(model[m].predict(x_test))
  230. for i in anomalous_data_indices:
  231. error=[]
  232. for m in range(1,NumberOfFailures+1):
  233. error.append(np.mean(np.mean(np.abs(x_test_predict[m][i:i+1,:,:]-x_test[i:i+1,:,:]),axis=1)))
  234. anomalous_data_type.append(np.argmin(error)+1)
  235. # For plotting purposes
  236. anomalous_data_indices_by_failure=[]
  237. for i in range(NumberOfFailures+1):
  238. anomalous_data_indices_by_failure.append([])
  239. for i in range(len(anomalous_data_indices)):
  240. print(i," ",anomalous_data_type[i])
  241. anomalous_data_indices_by_failure[anomalous_data_type[i]].append(anomalous_data_indices[i])
  242. def plotData2():
  243. fig, axes = plt.subplots(
  244. nrows=2, ncols=1, figsize=(25, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  245. )
  246. init=0
  247. end=len(x_train[0])
  248. axes[0].plot(range(init,end),x_train[0][:,0,0],label="normal train")
  249. #axes.plot(range(len(x_train[0]),len(x_train[0])+len(x_test)),x_test[:,0,0],label="abnormal")
  250. init=end
  251. end+=testRanges[0][1]
  252. axes[0].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,0],label="normal test")
  253. init=end
  254. end+=(testRanges[1][1]-testRanges[1][0])
  255. axes[0].plot(range(init,end),x_test[testRanges[1][0]:testRanges[1][1],0,0],label="fail type 1", color="lightcoral")
  256. init=end
  257. end+=(testRanges[2][1]-testRanges[2][0])
  258. axes[0].plot(range(init,end),x_test[testRanges[2][0]:testRanges[2][1],0,0],label="fail type 2", color="cyan")
  259. init=end
  260. end+=(testRanges[3][1]-testRanges[3][0])
  261. axes[0].plot(range(init,end),x_test[testRanges[3][0]:testRanges[3][1],0,0],label="fail type 3", color="lime")
  262. axes[0].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[1]),x_test[anomalous_data_indices_by_failure[1],0,0],color='red',marker='.',linewidth=0,label="abnormal detection type 1")
  263. axes[0].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[2]),x_test[anomalous_data_indices_by_failure[2],0,0],color='blue',marker='.',linewidth=0,label="abnormal detection type 2")
  264. axes[0].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[3]),x_test[anomalous_data_indices_by_failure[3],0,0],color='green',marker='.',linewidth=0,label="abnormal detection type 3")
  265. axes[0].legend()
  266. axes[0].set_ylabel(features[0])
  267. init=0
  268. end=len(x_train[0])
  269. axes[1].plot(range(init,end),x_train[0][:,0,1],label="normal train")
  270. #axes.plot(range(len(x_train[0]),len(x_train[0])+len(x_test)),x_test[:,0,0],label="abnormal")
  271. init=end
  272. end+=testRanges[0][1]
  273. axes[1].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,1],label="normal test")
  274. init=end
  275. end+=(testRanges[1][1]-testRanges[1][0])
  276. axes[1].plot(range(init,end),x_test[testRanges[1][0]:testRanges[1][1],0,1],label="fail type 1", color="lightcoral")
  277. init=end
  278. end+=(testRanges[2][1]-testRanges[2][0])
  279. axes[1].plot(range(init,end),x_test[testRanges[2][0]:testRanges[2][1],0,1],label="fail type 2", color="cyan")
  280. init=end
  281. end+=(testRanges[3][1]-testRanges[3][0])
  282. axes[1].plot(range(init,end),x_test[testRanges[3][0]:testRanges[3][1],0,1],label="fail type 3", color="lime")
  283. axes[1].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[1]),x_test[anomalous_data_indices_by_failure[1],0,1],color='red',marker='.',linewidth=0,label="abnormal detection type 1")
  284. axes[1].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[2]),x_test[anomalous_data_indices_by_failure[2],0,1],color='blue',marker='.',linewidth=0,label="abnormal detection type 2")
  285. axes[1].plot(len(x_train[0])+np.array(anomalous_data_indices_by_failure[3]),x_test[anomalous_data_indices_by_failure[3],0,1],color='green',marker='.',linewidth=0,label="abnormal detection type 3")
  286. axes[1].legend()
  287. axes[1].set_ylabel(features[1])
  288. plt.show()
  289. plotData2()

Powered by TurnKey Linux.