Bez popisu

v1.py 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. # Csar Fdez, UdL, 2025
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import datetime
  5. import numpy as np
  6. import keras
  7. import os.path
  8. import pickle
  9. from keras import layers
  10. from optparse import OptionParser
  11. import copy
  12. parser = OptionParser()
  13. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  14. (options, args) = parser.parse_args()
  15. # data files arrays. Index:
  16. # 0. No failure
  17. # 1. Blocked evaporator
  18. # 2. Full Blocked condenser
  19. # 3. Partial Blocked condenser
  20. # 4 Fan condenser not working
  21. # 5. Open door
  22. NumberOfFailures=5
  23. NumberOfFailures=4 # So far, we have only data for the first 4 types of failures
  24. datafiles=[]
  25. for i in range(NumberOfFailures+1):
  26. datafiles.append([])
  27. # Next set of ddata corresponds to Freezer, SP=-26
  28. datafiles[0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_']
  29. datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  30. datafiles[2]=['2024-12-18_5_','2024-12-19_5_']
  31. datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_']
  32. datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  33. #datafiles[4]=[]
  34. # Features suggested by Xavier
  35. # Care with 'tc s3' because on datafiles[0] is always nulll
  36. # Seems to be incoropored in new tests
  37. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  38. #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
  39. #features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']
  40. NumFeatures=len(features)
  41. df_list=[]
  42. for i in range(NumberOfFailures+1):
  43. df_list.append([])
  44. for i in range(NumberOfFailures+1):
  45. dftemp=[]
  46. for f in datafiles[i]:
  47. print(" ", f)
  48. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  49. df1 = pd.read_csv('./data/'+f+'.csv')
  50. dftemp.append(df1)
  51. df_list[i]=pd.concat(dftemp)
  52. # subsampled to 5' = 30 * 10"
  53. # We consider smaples every 5' because in production, we will only have data at this frequency
  54. subsamplingrate=30
  55. dataframe=[]
  56. for i in range(NumberOfFailures+1):
  57. dataframe.append([])
  58. for i in range(NumberOfFailures+1):
  59. datalength=df_list[i].shape[0]
  60. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  61. dataframe[i].reset_index(inplace=True,drop=True)
  62. dataframe[i].dropna(inplace=True)
  63. # Train data is first 2/3 of data
  64. # Test data is: last 1/3 of data
  65. dataTrain=[]
  66. dataTest=[]
  67. for i in range(NumberOfFailures+1):
  68. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  69. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  70. def normalize2(train,test):
  71. # merges train and test
  72. means=[]
  73. stdevs=[]
  74. for i in range(NumFeatures):
  75. means.append(train[:,i].mean())
  76. stdevs.append(train[:,i].std())
  77. print(means)
  78. return( (train-means)/stdevs, (test-means)/stdevs )
  79. dataTrainNorm=[]
  80. dataTestNorm=[]
  81. for i in range(NumberOfFailures+1):
  82. dataTrainNorm.append([])
  83. dataTestNorm.append([])
  84. for i in range(NumberOfFailures+1):
  85. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  86. def plotData():
  87. fig, axes = plt.subplots(
  88. nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  89. )
  90. for i in range(NumberOfFailures+1):
  91. axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0")
  92. axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1")
  93. #axes[1].legend()
  94. #axes[0].set_ylabel(features[0])
  95. #axes[1].set_ylabel(features[1])
  96. plt.show()
  97. #plotData()
  98. #exit(0)
  99. NumFilters=64
  100. KernelSize=7
  101. DropOut=0.2
  102. ThresholdFactor=1
  103. TIME_STEPS = 48 # This is a trade off among better performance (high) and better response delay (low)
  104. def create_sequences(values, time_steps=TIME_STEPS):
  105. output = []
  106. for i in range(len(values) - time_steps + 1):
  107. output.append(values[i : (i + time_steps)])
  108. return np.stack(output)
  109. x_train=[]
  110. for i in range(NumberOfFailures+1):
  111. x_train.append(create_sequences(dataTrainNorm[i]))
  112. # Reused code from v1_multifailure for only one model. No classification
  113. #for i in range(NumberOfFailures+1):
  114. model = keras.Sequential(
  115. [
  116. layers.Input(shape=(x_train[0].shape[1], x_train[0].shape[2])),
  117. layers.Conv1D(
  118. filters=NumFilters,
  119. kernel_size=KernelSize,
  120. padding="same",
  121. strides=2,
  122. activation="relu",
  123. ),
  124. layers.Dropout(rate=DropOut),
  125. layers.Conv1D(
  126. filters=int(NumFilters/2),
  127. kernel_size=KernelSize,
  128. padding="same",
  129. strides=2,
  130. activation="relu",
  131. ),
  132. layers.Conv1DTranspose(
  133. filters=int(NumFilters/2),
  134. kernel_size=KernelSize,
  135. padding="same",
  136. strides=2,
  137. activation="relu",
  138. ),
  139. layers.Dropout(rate=DropOut),
  140. layers.Conv1DTranspose(
  141. filters=NumFilters,
  142. kernel_size=KernelSize,
  143. padding="same",
  144. strides=2,
  145. activation="relu",
  146. ),
  147. layers.Conv1DTranspose(filters=x_train[i].shape[2], kernel_size=KernelSize, padding="same"),
  148. ]
  149. )
  150. model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
  151. model.summary()
  152. path_checkpoint="model_noclass_v1_checkpoint.weights.h5"
  153. es_callback=keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=15)
  154. modelckpt_callback=keras.callbacks.ModelCheckpoint( monitor="val_loss", filepath=path_checkpoint, verbose=1, save_weights_only=True, save_best_only=True,)
  155. if options.train:
  156. history=model.fit( x_train[0], x_train[0], epochs=400, batch_size=128, validation_split=0.3, callbacks=[ es_callback, modelckpt_callback ],)
  157. else:
  158. model.load_weights(path_checkpoint)
  159. x_train_pred=model.predict(x_train[0])
  160. train_mae_loss=np.mean(np.abs(x_train_pred - x_train[0]), axis=1)
  161. threshold=np.max(train_mae_loss,axis=0)
  162. thresholdOrig=copy.deepcopy(threshold)
  163. print("Threshold : ",threshold)
  164. threshold=threshold*ThresholdFactor
  165. # Threshold is enlarged because, otherwise, for subsamples at 5' have many false positives
  166. # 1st scenario. Detect only anomaly. Later, we will classiffy it
  167. # Test data= testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
  168. #d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
  169. d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]))
  170. x_test = create_sequences(d)
  171. x_test_pred = model.predict(x_test)
  172. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  173. # Define ranges for plotting in different colors
  174. testRanges=[]
  175. r=dataTestNorm[0].shape[0]
  176. testRanges.append([0,r])
  177. for i in range(1,NumberOfFailures+1):
  178. rnext=r+dataTestNorm[i].shape[0]
  179. testRanges.append([r,rnext] )
  180. r=rnext
  181. # Drop the last TIME_STEPS for plotting
  182. testRanges[NumberOfFailures][1]=testRanges[NumberOfFailures][1]-TIME_STEPS
  183. def AtLeastOneTrue(x):
  184. for i in range(NumFeatures):
  185. if x[i]:
  186. return True
  187. return False
  188. anomalies = test_mae_loss > threshold
  189. anomalous_data_indices = []
  190. for i in range(anomalies.shape[0]):
  191. if AtLeastOneTrue(anomalies[i]):
  192. #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
  193. anomalous_data_indices.append(i)
  194. # Let's plot some features
  195. colorline=['violet','lightcoral','cyan','lime','grey']
  196. colordot=['darkviolet','red','blue','green','black']
  197. #featuresToPlot=['r1 s1','r1 s2','r1 s3','pa1 apiii']
  198. featuresToPlot=features
  199. indexesToPlot=[]
  200. for i in featuresToPlot:
  201. indexesToPlot.append(features.index(i))
  202. def plotData3():
  203. NumFeaturesToPlot=len(indexesToPlot)
  204. plt.rcParams.update({'font.size': 16})
  205. fig, axes = plt.subplots(
  206. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  207. )
  208. for i in range(NumFeaturesToPlot):
  209. init=0
  210. end=testRanges[0][1]
  211. axes[i].plot(range(init,end),x_test[testRanges[0][0]:testRanges[0][1],0,indexesToPlot[i]],label="No fail")
  212. init=end
  213. end+=(testRanges[1][1]-testRanges[1][0])
  214. for j in range(1,NumberOfFailures+1):
  215. axes[i].plot(range(init,end),x_test[testRanges[j][0]:testRanges[j][1],0,indexesToPlot[i]],label="Fail type "+str(j), color=colorline[j-1])
  216. if j<NumberOfFailures:
  217. init=end
  218. end+=(testRanges[j+1][1]-testRanges[j+1][0])
  219. x=[]
  220. y=[]
  221. for k in anomalous_data_indices:
  222. if (k+TIME_STEPS)<x_test.shape[0]:
  223. x.append(k+TIME_STEPS)
  224. y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
  225. axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="Fail detection" )
  226. if i==0:
  227. axes[i].legend(bbox_to_anchor=(0.9, 0.4))
  228. axes[i].set_ylabel(features[indexesToPlot[i]])
  229. axes[i].grid()
  230. axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  231. plt.show()
  232. def anomalyMetric(testList): # first of list is non failure data
  233. # FP, TP: false/true positive
  234. # TN, FN: true/false negative
  235. # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
  236. # Specificity: true negative ratio given data is OK: TN/(TN+FP)
  237. # Accuracy: Rate of correct predictions: (TN+TP)/(TN+TP+FP+FN)
  238. # Precision: Rate of positive results: TP/(TP+FP)
  239. # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
  240. # F2-score: predictive performance measure: 2*Specificity*Sensitity/(Specificity+Sensitity)
  241. x_test = create_sequences(testList[0])
  242. x_test_pred = model.predict(x_test)
  243. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  244. anomalies = test_mae_loss > threshold
  245. count=0
  246. for i in range(anomalies.shape[0]):
  247. if AtLeastOneTrue(anomalies[i]):
  248. count+=1
  249. FP=count
  250. TN=anomalies.shape[0]-count
  251. count=0
  252. TP=np.zeros((NumberOfFailures))
  253. FN=np.zeros((NumberOfFailures))
  254. Sensitivity=np.zeros((NumberOfFailures))
  255. Precision=np.zeros((NumberOfFailures))
  256. for i in range(1,len(testList)):
  257. x_test = create_sequences(testList[i])
  258. x_test_pred = model.predict(x_test)
  259. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  260. anomalies = test_mae_loss > threshold
  261. count=0
  262. for j in range(anomalies.shape[0]):
  263. if AtLeastOneTrue(anomalies[j]):
  264. count+=1
  265. TP[i-1] = count
  266. FN[i-1] = anomalies.shape[0]-count
  267. Sensitivity[i-1]=TP[i-1]/(TP[i-1]+FN[i-1])
  268. Precision[i-1]=TP[i-1]/(TP[i-1]+FP)
  269. GlobalSensitivity=TP.sum()/(TP.sum()+FN.sum())
  270. Specificity=TN/(TN+FP)
  271. Accuracy=(TN+TP.sum())/(TN+TP.sum()+FP+FN.sum())
  272. GlobalPrecision=TP.sum()/(TP.sum()+FP)
  273. F1Score= 2*GlobalPrecision*GlobalSensitivity/(GlobalPrecision+GlobalSensitivity)
  274. F2Score = 2*Specificity*GlobalSensitivity/(Specificity+GlobalSensitivity)
  275. print("Sensitivity: ",Sensitivity)
  276. print("Global Sensitivity: ",GlobalSensitivity)
  277. print("Precision: ",Precision)
  278. print("Global Precision: ",GlobalPrecision)
  279. print("Specifity: ",Specificity)
  280. print("Accuracy: ",Accuracy)
  281. print("F1Score: ",F1Score)
  282. print("F2Score: ",F2Score)
  283. print("FP: ",FP)
  284. #return Sensitivity+Specifity
  285. return (F1Score,F2Score)
  286. anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
  287. def plotFScore():
  288. global threshold
  289. res=[]
  290. # plots FSCroe as a function of Threshold Factor
  291. tf=0.3
  292. while tf<1.5:
  293. threshold=thresholdOrig*tf
  294. r=anomalyMetric([dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4]])
  295. res.append([tf,r[0],r[1]])
  296. tf+=0.05
  297. print(res)
  298. ar=np.array((res))
  299. plt.rcParams.update({'font.size': 16})
  300. fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 10), dpi=80, facecolor="w", edgecolor="k")
  301. ln1=axes.plot(ar[:,0],ar[:,1],label="F1-Score",linewidth=4)
  302. ax1=axes.twinx()
  303. ln2=ax1.plot(ar[:,0],ar[:,2],label="F2-Score",linewidth=4,color='C3')
  304. axes.set_xlabel("Threshold factor")
  305. axes.set_ylabel("F1-Score")
  306. ax1.set_ylabel("F2-Score")
  307. lns = ln1+ln2
  308. labs = [l.get_label() for l in lns]
  309. axes.legend(lns, labs, loc=0)
  310. axes.grid()
  311. plt.show()
  312. #plotFScore()
  313. #plotData3()
  314. # 2nd scenario. Detect only anomaly. Later, we will classiffy it
  315. # Test data= testnormal + testfail1 + testtail2 + testfail3 + testfail4 + testnormal
  316. #d=np.vstack((dataTestNorm[0],dataTestNorm[1],dataTestNorm[2],dataTestNorm[3],dataTestNorm[4],dataTestNorm[0]))
  317. num=100
  318. d=np.vstack((dataTestNorm[0][0:num,:],dataTestNorm[1][0:num,:],dataTestNorm[0][num:2*num,:],dataTestNorm[2][70:70+num,:],dataTestNorm[0][2*num-90:3*num-90,:],dataTestNorm[3][50:num+50,:],dataTestNorm[0][150:150+num,:],dataTestNorm[4][0:num+TIME_STEPS,:]))
  319. x_test = create_sequences(d)
  320. x_test_pred = model.predict(x_test)
  321. test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
  322. anomalies = test_mae_loss > threshold
  323. anomalous_data_indices = []
  324. for i in range(anomalies.shape[0]):
  325. if AtLeastOneTrue(anomalies[i]):
  326. #if anomalies[i][0] or anomalies[i][1] or anomalies[i][2] or anomalies[i][3]:
  327. anomalous_data_indices.append(i)
  328. def plotData4():
  329. NumFeaturesToPlot=len(indexesToPlot)
  330. plt.rcParams.update({'font.size': 16})
  331. fig, axes = plt.subplots(
  332. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  333. )
  334. for i in range(NumFeaturesToPlot):
  335. for j in range(1,NumberOfFailures+1):
  336. if j==1:
  337. axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]],label="No fail", color='C0')
  338. else:
  339. axes[i].plot(range((j-1)*2*num,(j-1)*2*num+num),x_test[(j-1)*2*num:(j-1)*2*num+num,0,indexesToPlot[i]], color='C0')
  340. axes[i].plot(range(j*2*num-num,j*2*num),x_test[j*2*num-num:j*2*num,0,indexesToPlot[i]],label="File type "+str(j),color=colorline[j-1])
  341. x=[]
  342. y=[]
  343. for k in anomalous_data_indices:
  344. if (k+TIME_STEPS)<x_test.shape[0]:
  345. x.append(k+TIME_STEPS)
  346. y.append(x_test[k+TIME_STEPS,0,indexesToPlot[i]])
  347. axes[i].plot(x,y ,color='grey',marker='.',linewidth=0,label="Fail detection" )
  348. if i==0:
  349. axes[i].legend(bbox_to_anchor=(0.9, 0.4))
  350. axes[i].set_ylabel(features[indexesToPlot[i]])
  351. axes[i].grid()
  352. axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  353. plt.show()
  354. plotData4()

Powered by TurnKey Linux.