Açıklama Yok

v1_multifailure_importance_analysis.py 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. # Csar Fdez, UdL, 2025
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import datetime
  5. import numpy as np
  6. import keras
  7. import os.path
  8. import pickle
  9. from keras import layers
  10. import copy
  11. # data files arrays. Index:
  12. # 0. No failure
  13. # 1. Blocked evaporator
  14. # 2. Full Blocked condenser
  15. # 3. Partial Blocked condenser
  16. # 4 Fan condenser not working
  17. # 5. Open door
  18. NumberOfFailures=5
  19. NumberOfFailures=4 # So far, we have only data for the first 4 types of failures
  20. datafiles=[]
  21. for i in range(NumberOfFailures+1):
  22. datafiles.append([])
  23. # Next set of ddata corresponds to Freezer, SP=-26
  24. datafiles[0]=['2024-08-07_5_','2024-08-08_5_']
  25. datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  26. datafiles[2]=['2024-12-18_5_','2024-12-19_5_']
  27. datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_']
  28. datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  29. #datafiles[4]=[]
  30. # Features suggested by Xavier
  31. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  32. features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
  33. NumFeatures=len(features)
  34. df_list=[]
  35. for i in range(NumberOfFailures+1):
  36. df_list.append([])
  37. for i in range(NumberOfFailures+1):
  38. dftemp=[]
  39. for f in datafiles[i]:
  40. print(" ", f)
  41. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  42. df1 = pd.read_csv('./data/'+f+'.csv')
  43. dftemp.append(df1)
  44. df_list[i]=pd.concat(dftemp)
  45. # subsampled to 5' = 30 * 10"
  46. # We consider smaples every 5' because in production, we will only have data at this frequency
  47. subsamplingrate=30
  48. dataframe=[]
  49. for i in range(NumberOfFailures+1):
  50. dataframe.append([])
  51. for i in range(NumberOfFailures+1):
  52. datalength=df_list[i].shape[0]
  53. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  54. dataframe[i].reset_index(inplace=True,drop=True)
  55. dataframe[i].dropna(inplace=True)
  56. # Train data is first 2/3 of data
  57. # Test data is: last 1/3 of data
  58. dataTrain=[]
  59. dataTest=[]
  60. for i in range(NumberOfFailures+1):
  61. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  62. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  63. def normalize2(train,test):
  64. # merges train and test
  65. means=[]
  66. stdevs=[]
  67. for i in range(NumFeatures):
  68. means.append(train[:,i].mean())
  69. stdevs.append(train[:,i].std())
  70. return( (train-means)/stdevs, (test-means)/stdevs )
  71. dataTrainNorm=[]
  72. dataTestNorm=[]
  73. for i in range(NumberOfFailures+1):
  74. dataTrainNorm.append([])
  75. dataTestNorm.append([])
  76. for i in range(NumberOfFailures+1):
  77. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  78. def plotData():
  79. fig, axes = plt.subplots(
  80. nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  81. )
  82. for i in range(NumberOfFailures+1):
  83. axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0")
  84. axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1")
  85. #axes[1].legend()
  86. #axes[0].set_ylabel(features[0])
  87. #axes[1].set_ylabel(features[1])
  88. plt.show()
  89. #plotData()
  90. TIME_STEPS = 12
  91. def create_sequences(values, time_steps=TIME_STEPS):
  92. output = []
  93. for i in range(len(values) - time_steps + 1):
  94. output.append(values[i : (i + time_steps)])
  95. return np.stack(output)
  96. x_train=[]
  97. for i in range(NumberOfFailures+1):
  98. x_train.append(create_sequences(dataTrainNorm[i]))
  99. model=[]
  100. modelckpt_callback =[]
  101. es_callback =[]
  102. path_checkpoint=[]
  103. for i in range(NumberOfFailures+1):
  104. model.append([])
  105. model[i] = keras.Sequential(
  106. [
  107. layers.Input(shape=(x_train[i].shape[1], x_train[i].shape[2])),
  108. layers.Conv1D(
  109. filters=64,
  110. kernel_size=7,
  111. padding="same",
  112. strides=2,
  113. activation="relu",
  114. ),
  115. layers.Dropout(rate=0.2),
  116. layers.Conv1D(
  117. filters=32,
  118. kernel_size=7,
  119. padding="same",
  120. strides=2,
  121. activation="relu",
  122. ),
  123. layers.Conv1DTranspose(
  124. filters=32,
  125. kernel_size=7,
  126. padding="same",
  127. strides=2,
  128. activation="relu",
  129. ),
  130. layers.Dropout(rate=0.2),
  131. layers.Conv1DTranspose(
  132. filters=64,
  133. kernel_size=7,
  134. padding="same",
  135. strides=2,
  136. activation="relu",
  137. ),
  138. layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=7, padding="same"),
  139. ]
  140. )
  141. model[i].compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
  142. model[i].summary()
  143. path_checkpoint.append("model_v1_"+str(i)+"._checkpoint.weights.h5")
  144. es_callback.append(keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15))
  145. modelckpt_callback.append(keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint[i], verbose=1, save_weights_only=True, save_best_only=True,))
  146. # Load models
  147. for i in range(NumberOfFailures+1):
  148. model[i].load_weights(path_checkpoint[i])
  149. x_train_pred=[]
  150. train_mae_loss=[]
  151. threshold=[]
  152. for i in range(NumberOfFailures+1):
  153. x_train_pred.append(model[i].predict(x_train[i]))
  154. train_mae_loss.append(np.mean(np.abs(x_train_pred[i] - x_train[i]), axis=1))
  155. threshold.append(np.max(train_mae_loss[i],axis=0))
  156. print("Threshold : ",threshold)
  157. for i in range(NumberOfFailures+1):
  158. threshold[i]=threshold[i]*1.3
  159. # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives
  160. # Anomaly metrics:
  161. # False positive: with datatTestNorm[0]
  162. # True negative: with datatTestNorm[i] i>0
  163. def AtLeastOneTrue(x):
  164. for i in range(NumFeatures):
  165. if x[i]:
  166. return True
  167. return False
  168. def anomalyMetric(testList): # first of list is non failure data
  169. # FP, TP: false/true positive
  170. # TN, FN: true/false negative
  171. # Sensitivity: probab of fail detection if data is fail
  172. # Specificity: prob of no fail detection if data is well
  173. x_test = create_sequences(testList[0])
  174. x_test_pred = model[0].predict(x_test)
  175. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  176. anomalies = test_mae_loss > threshold[0]
  177. count=0
  178. for i in range(anomalies.shape[0]):
  179. if AtLeastOneTrue(anomalies[i]):
  180. count+=1
  181. FP=count
  182. TN=anomalies.shape[0]-1
  183. count=0
  184. TP=np.zeros((NumberOfFailures))
  185. FN=np.zeros((NumberOfFailures))
  186. for i in range(1,len(testList)):
  187. x_test = create_sequences(testList[i])
  188. x_test_pred = model[0].predict(x_test)
  189. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  190. anomalies = test_mae_loss > threshold[0]
  191. count=0
  192. for j in range(anomalies.shape[0]):
  193. if AtLeastOneTrue(anomalies[j]):
  194. count+=1
  195. TP[i-1] = count
  196. FN[i-1] = anomalies.shape[0]-count
  197. Sensitivity=TP.sum()/(TP.sum()+FN.sum())
  198. Specifity=TN/(TN+FP)
  199. print("Sensitivity: ",Sensitivity)
  200. print("Specifity: ",Specifity)
  201. return Sensitivity+Specifity
  202. MaxMetric=anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
  203. # Now iterate by permuting features and order them depending on Metric reduction
  204. metric=np.zeros(NumFeatures)
  205. for i in range(NumFeatures):
  206. dataTestNormCopy=[]
  207. # A deep copy is required because shuffle is an inline operation
  208. for j in range(NumberOfFailures+1):
  209. dataTestNormCopy.append([])
  210. dataTestNormCopy[j]=copy.deepcopy(dataTestNorm[j])
  211. for j in range(NumberOfFailures+1):
  212. np.random.shuffle(dataTestNormCopy[j][:,i])
  213. metric[i]=anomalyMetric([dataTestNormCopy[0],dataTestNormCopy[1],dataTestNormCopy[2],dataTestNormCopy[3],dataTestNormCopy[4]])
  214. # features ordered from least to most important
  215. indexes_ordered=np.argsort(metric)
  216. # Now, lets eliminate features accumulatively from least to most
  217. metric=np.zeros(NumFeatures)
  218. dataTestNormCopy=[]
  219. for j in range(NumberOfFailures+1):
  220. dataTestNormCopy.append([])
  221. dataTestNormCopy[j]=copy.deepcopy(dataTestNorm[j])
  222. # A deep copy is required because shuffle is an inline operation
  223. for i in range(NumFeatures):
  224. for j in range(NumberOfFailures+1):
  225. np.random.shuffle(dataTestNormCopy[j][:,i])
  226. metric[i]=anomalyMetric([dataTestNormCopy[0],dataTestNormCopy[1],dataTestNormCopy[2],dataTestNormCopy[3],dataTestNormCopy[4]])
  227. # print features to be used in v1_multifailure.py
  228. #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2','tc s3']
  229. F=np.array(features)
  230. l=indexes_ordered.shape[0]
  231. for i in range(3,NumFeatures-5):
  232. print(F[indexes_ordered[l-i:]])
  233. '''
  234. ['r1 s10' 'r1 s6' 'r2 s8']
  235. ['tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
  236. ['r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
  237. ['r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
  238. ['r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
  239. ['pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
  240. ['r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6'
  241. 'r2 s8']
  242. ['r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10'
  243. 'r1 s6' 'r2 s8']
  244. ['tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1'
  245. 'r1 s10' 'r1 s6' 'r2 s8']
  246. ['r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2'
  247. 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
  248. ['r2 s5' 'r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8' 'r2 s1'
  249. 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
  250. ['r2 s7' 'r2 s5' 'r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii' 'r1 s8'
  251. 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
  252. ['r2 s4' 'r2 s7' 'r2 s5' 'r1 s7' 'tc s2' 'r2 s3' 'r1 s5' 'pa1 apiii'
  253. 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
  254. ['r1 s9' 'r2 s4' 'r2 s7' 'r2 s5' 'r1 s7' 'tc s2' 'r2 s3' 'r1 s5'
  255. 'pa1 apiii' 'r1 s8' 'r2 s1' 'r2 s2' 'tc s1' 'r1 s10' 'r1 s6' 'r2 s8']
  256. '''

Powered by TurnKey Linux.