Keine Beschreibung

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. # Csar Fdez, UdL, 2025
  2. # Changes from v1: Normalization
  3. # IN v1, each failure type has its own normalization pars (mean and stdevs)
  4. # In v2, mean and stdev is the same for all data
  5. import pandas as pd
  6. import matplotlib.pyplot as plt
  7. import datetime
  8. import numpy as np
  9. import keras
  10. import os.path
  11. import pickle
  12. from keras import layers
  13. from optparse import OptionParser
  14. import copy
  15. parser = OptionParser()
  16. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  17. (options, args) = parser.parse_args()
  18. # data files arrays. Index:
  19. # 0. No failure
  20. # 1. Blocked evaporator
  21. # 2. Full Blocked condenser
  22. # 3. Partial Blocked condenser
  23. # 4 Fan condenser not working
  24. # 5. Open door
  25. NumberOfFailures=4 # So far, we have only data for the first 4 types of failures
  26. datafiles=[]
  27. for i in range(NumberOfFailures+1):
  28. datafiles.append([])
  29. # Next set of ddata corresponds to Freezer, SP=-26
  30. datafiles[0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_','2025-01-27_5_']
  31. datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  32. datafiles[2]=['2024-12-18_5_','2024-12-19_5_']
  33. datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_']
  34. datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  35. #datafiles[4]=[]
  36. # Features suggested by Xavier
  37. # Care with 'tc s3' because on datafiles[0] is always nulll
  38. # Seems to be incoropored in new tests
  39. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  40. #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
  41. #features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']
  42. NumFeatures=len(features)
  43. df_list=[]
  44. for i in range(NumberOfFailures+1):
  45. df_list.append([])
  46. for i in range(NumberOfFailures+1):
  47. dftemp=[]
  48. for f in datafiles[i]:
  49. print(" ", f)
  50. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  51. df1 = pd.read_csv('./data/'+f+'.csv')
  52. dftemp.append(df1)
  53. df_list[i]=pd.concat(dftemp)
  54. # subsampled to 5' = 30 * 10"
  55. # We consider smaples every 5' because in production, we will only have data at this frequency
  56. subsamplingrate=30
  57. dataframe=[]
  58. for i in range(NumberOfFailures+1):
  59. dataframe.append([])
  60. for i in range(NumberOfFailures+1):
  61. datalength=df_list[i].shape[0]
  62. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  63. dataframe[i].reset_index(inplace=True,drop=True)
  64. dataframe[i].dropna(inplace=True)
  65. # Train data is first 2/3 of data
  66. # Test data is: last 1/3 of data
  67. dataTrain=[]
  68. dataTest=[]
  69. for i in range(NumberOfFailures+1):
  70. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  71. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  72. # Calculate means and stdev
  73. a=dataTrain[0]
  74. for i in range(1,NumberOfFailures+1):
  75. a=np.vstack((a,dataTrain[i]))
  76. means=a.mean(axis=0)
  77. stdevs=a.std(axis=0)
  78. def normalize2(train,test):
  79. return( (train-means)/stdevs, (test-means)/stdevs )
  80. dataTrainNorm=[]
  81. dataTestNorm=[]
  82. for i in range(NumberOfFailures+1):
  83. dataTrainNorm.append([])
  84. dataTestNorm.append([])
  85. for i in range(NumberOfFailures+1):
  86. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  87. def plotData():
  88. fig, axes = plt.subplots(
  89. nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  90. )
  91. for i in range(NumberOfFailures+1):
  92. axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0")
  93. axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1")
  94. #axes[1].legend()
  95. #axes[0].set_ylabel(features[0])
  96. #axes[1].set_ylabel(features[1])
  97. plt.show()
  98. #plotData()
  99. #exit(0)
  100. NumFilters=64
  101. KernelSize=7
  102. DropOut=0.2
  103. ThresholdFactor=1.4
  104. TIME_STEPS = 12 # This is a trade off among better performance (high) and better response delay (low)
  105. def create_sequences(values, time_steps=TIME_STEPS):
  106. output = []
  107. for i in range(len(values) - time_steps + 1):
  108. output.append(values[i : (i + time_steps)])
  109. return np.stack(output)
  110. x_train=[]
  111. for i in range(NumberOfFailures+1):
  112. x_train.append(create_sequences(dataTrainNorm[i]))
  113. # Reused code from v1_multifailure for only one model. No classification
  114. #for i in range(NumberOfFailures+1):
  115. model = keras.Sequential(
  116. [
  117. layers.Input(shape=(x_train[0].shape[1], x_train[0].shape[2])),
  118. layers.Conv1D(
  119. filters=NumFilters,
  120. kernel_size=KernelSize,
  121. padding="same",
  122. strides=2,
  123. activation="relu",
  124. ),
  125. layers.Dropout(rate=DropOut),
  126. layers.Conv1D(
  127. filters=int(NumFilters/2),
  128. kernel_size=KernelSize,
  129. padding="same",
  130. strides=2,
  131. activation="relu",
  132. ),
  133. layers.Conv1DTranspose(
  134. filters=int(NumFilters/2),
  135. kernel_size=KernelSize,
  136. padding="same",
  137. strides=2,
  138. activation="relu",
  139. ),
  140. layers.Dropout(rate=DropOut),
  141. layers.Conv1DTranspose(
  142. filters=NumFilters,
  143. kernel_size=KernelSize,
  144. padding="same",
  145. strides=2,
  146. activation="relu",
  147. ),
  148. layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"),
  149. ]
  150. )
  151. model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
  152. model.summary()
  153. path_checkpoint="model_noclass_v2_checkpoint.weights.h5"
  154. es_callback=keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)
  155. modelckpt_callback=keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True,)
  156. if options.train:
  157. history=model.fit( x_train[0], x_train[0], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback, modelckpt_callback ],)
  158. else:
  159. model.load_weights(path_checkpoint)
  160. x_train_pred=model.predict(x_train[0])
  161. train_mae_loss=np.mean(np.abs(x_train_pred - x_train[0]), axis=1)
  162. threshold=np.max(train_mae_loss,axis=0)
  163. thresholdOrig=copy.deepcopy(threshold)
  164. print("Threshold : ",threshold)
  165. threshold=threshold*ThresholdFactor
  166. # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives
  167. # 1st scenario. Detect only anomaly. Later, we will classiffy it
  168. # Test data= testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
  169. #d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
  170. d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]))
  171. x_test = create_sequences(d)
  172. x_test_pred = model.predict(x_test)
  173. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  174. # Define ranges for plotting in different colors
  175. testRanges=[]
  176. r=dataTestNorm[0].shape[0]
  177. testRanges.append([0,r])
  178. for i in range(1,NumberOfFailures+1):
  179. rnext=r+dataTestNorm[i].shape[0]
  180. testRanges.append([r,rnext] )
  181. r=rnext
  182. # Drop the last TIME_STEPS for plotting
  183. testRanges[NumberOfFailures][1]=testRanges[NumberOfFailures][1]-TIME_STEPS
  184. def AtLeastOneTrue(x):
  185. for i in range(NumFeatures):
  186. if x[i]:
  187. return True
  188. return False
  189. anomalies = test_mae_loss > threshold
  190. anomalous_data_indices = []
  191. for i in range(anomalies.shape[0]):
  192. if AtLeastOneTrue(anomalies[i]):
  193. #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
  194. anomalous_data_indices.append(i)
  195. # Let's plot some features
  196. colorline=['violet','lightcoral','cyan','lime','grey']
  197. colordot=['darkviolet','red','blue','green','black']
  198. #featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii']
  199. featuresToPlot=features
  200. indexesToPlot=[]
  201. for i in featuresToPlot:
  202. indexesToPlot.append(features.index(i))
  203. def plotData3():
  204. NumFeaturesToPlot=len(indexesToPlot)
  205. plt.rcParams.update({'font.size': 16})
  206. fig, axes = plt.subplots(
  207. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  208. )
  209. for i in range(NumFeaturesToPlot):
  210. init=0
  211. end=testRanges[0][1]
  212. axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]],label="No fail")
  213. init=end
  214. end+=(testRanges[1][1]-testRanges[1][0])
  215. for j in range(1,NumberOfFailures+1):
  216. axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]],label="Fail type "+str(j), color=colorline[j-1])
  217. if j<NumberOfFailures:
  218. init=end
  219. end+=(testRanges[j+1][1]-testRanges[j+1][0])
  220. x=[]
  221. y=[]
  222. for k in anomalous_data_indices:
  223. if (k+TIME_STEPS)<x_test.shape[0]:
  224. x.append(k+TIME_STEPS)
  225. y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
  226. axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="Fail detection" )
  227. if i==0:
  228. axes[i].legend(bbox_to_anchor=(0.9, 0.4))
  229. axes[i].set_ylabel(features[indexesToPlot[i]])
  230. axes[i].grid()
  231. axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  232. plt.show()
  233. def anomalyMetric(testList): # first of list is non failure data
  234. # FP, TP: false/true positive
  235. # TN, FN: true/false negative
  236. # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
  237. # Specificity: true negative ratio given data is OK: TN/(TN+FP)
  238. # Accuracy: Rate of correct predictions: (TN+TP)/(TN+TP+FP+FN)
  239. # Precision: Rate of positive results: TP/(TP+FP)
  240. # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
  241. # F2-score: predictive performance measure: 2*Specificity*Sensitity/(Specificity+Sensitity)
  242. x_test = create_sequences(testList[0])
  243. x_test_pred = model.predict(x_test)
  244. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  245. anomalies = test_mae_loss > threshold
  246. count=0
  247. for i in range(anomalies.shape[0]):
  248. if AtLeastOneTrue(anomalies[i]):
  249. count+=1
  250. FP=count
  251. TN=anomalies.shape[0]-count
  252. count=0
  253. TP=np.zeros((NumberOfFailures))
  254. FN=np.zeros((NumberOfFailures))
  255. Sensitivity=np.zeros((NumberOfFailures))
  256. Precision=np.zeros((NumberOfFailures))
  257. for i in range(1,len(testList)):
  258. x_test = create_sequences(testList[i])
  259. x_test_pred = model.predict(x_test)
  260. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  261. anomalies = test_mae_loss > threshold
  262. count=0
  263. for j in range(anomalies.shape[0]):
  264. if AtLeastOneTrue(anomalies[j]):
  265. count+=1
  266. TP[i-1] = count
  267. FN[i-1] = anomalies.shape[0]-count
  268. Sensitivity[i-1]=TP[i-1]/(TP[i-1]+FN[i-1])
  269. Precision[i-1]=TP[i-1]/(TP[i-1]+FP)
  270. GlobalSensitivity=TP.sum()/(TP.sum()+FN.sum())
  271. Specificity=TN/(TN+FP)
  272. Accuracy=(TN+TP.sum())/(TN+TP.sum()+FP+FN.sum())
  273. GlobalPrecision=TP.sum()/(TP.sum()+FP)
  274. F1Score= 2*GlobalPrecision*GlobalSensitivity/(GlobalPrecision+GlobalSensitivity)
  275. F2Score = 2*Specificity*GlobalSensitivity/(Specificity+GlobalSensitivity)
  276. print("Sensitivity: ",Sensitivity)
  277. print("Global Sensitivity: ",GlobalSensitivity)
  278. print("Precision: ",Precision)
  279. print("Global Precision: ",GlobalPrecision)
  280. print("Specifity: ",Specificity)
  281. print("Accuracy: ",Accuracy)
  282. print("F1Score: ",F1Score)
  283. print("F2Score: ",F2Score)
  284. print("FP: ",FP)
  285. #return Sensitivity+Specifity
  286. return (F1Score,F2Score)
  287. anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
  288. def plotFScore():
  289. global threshold
  290. res=[]
  291. # plots FSCroe as a function of Threshold Factor
  292. tf=0.3
  293. while tf<1.5:
  294. threshold=thresholdOrig*tf
  295. r=anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
  296. res.append([tf,r[0],r[1]])
  297. tf+=0.05
  298. print(res)
  299. ar=np.array((res))
  300. plt.rcParams.update({'font.size': 16})
  301. fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 10), dpi=80, facecolor="w", edgecolor="k")
  302. ln1=axes.plot(ar[:,0],ar[:,1],label="F1-Score",linewidth=4)
  303. ax1=axes.twinx()
  304. ln2=ax1.plot(ar[:,0],ar[:,2],label="F2-Score",linewidth=4,color='C3')
  305. axes.set_xlabel("Threshold factor")
  306. axes.set_ylabel("F1-Score")
  307. ax1.set_ylabel("F2-Score")
  308. lns = ln1+ln2
  309. labs = [l.get_label() for l in lns]
  310. axes.legend(lns, labs, loc=0)
  311. axes.grid()
  312. plt.show()
  313. #plotFScore()
  314. plotData3()
  315. exit(0)
  316. # 2nd scenario. Detect only anomaly. Later, we will classiffy it
  317. # Test data= testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
  318. #d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
  319. num=100
  320. d=np.vstack((dataTestNorm[0][0:num,:],dataTestNorm[1][0:num,:],dataTestNorm[0][num:2*num,:],dataTestNorm[2][70:70+num,:],dataTestNorm[0][2*num-90:3*num-90,:],dataTestNorm[3][50:num+50,:],dataTestNorm[0][150:150+num,:],dataTestNorm[4][0:num+TIME_STEPS,:]))
  321. x_test = create_sequences(d)
  322. x_test_pred = model.predict(x_test)
  323. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  324. anomalies = test_mae_loss > threshold
  325. anomalous_data_indices = []
  326. for i in range(anomalies.shape[0]):
  327. if AtLeastOneTrue(anomalies[i]):
  328. #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
  329. anomalous_data_indices.append(i)
  330. def plotData4():
  331. NumFeaturesToPlot=len(indexesToPlot)
  332. plt.rcParams.update({'font.size': 16})
  333. fig, axes = plt.subplots(
  334. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  335. )
  336. for i in range(NumFeaturesToPlot):
  337. for j in range(1,NumberOfFailures+1):
  338. if j==1:
  339. axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]],label="No fail", color='C0')
  340. else:
  341. axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]], color='C0')
  342. axes[i].plot(range(j*2*num-num,j*2*num),x_test[j*2*num-num:j*2*num,0,indexesToPlot[i]],label="File type "+str(j),color=colorline[j-1])
  343. x=[]
  344. y=[]
  345. for k in anomalous_data_indices:
  346. if (k+TIME_STEPS)<x_test.shape[0]:
  347. x.append(k+TIME_STEPS)
  348. y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
  349. axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="Fail detection" )
  350. if i==0:
  351. axes[i].legend(bbox_to_anchor=(0.9, 0.4))
  352. axes[i].set_ylabel(features[indexesToPlot[i]])
  353. axes[i].grid()
  354. axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  355. plt.show()
  356. plotData4()

Powered by TurnKey Linux.