Açıklama Yok

v1_unsupervised.py 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. # Csar Fdez, UdL, 2025
  2. # Unsupervised classification. Uses tslearn
  3. # https://tslearn.readthedocs.io/en/stable/index.html
  4. # Be careful with v0_unsupervised and all versions for supervised.
  5. # because dataTrain is not stacke before create_sequences, so,
  6. # the sets are not aligned in time
  7. import pandas as pd
  8. import matplotlib.pyplot as plt
  9. import datetime
  10. import numpy as np
  11. import os.path
  12. from optparse import OptionParser
  13. import copy
  14. import pickle
  15. from tslearn.clustering import TimeSeriesKMeans
  16. from tslearn.neighbors import KNeighborsTimeSeries
  17. from collections import Counter
  18. parser = OptionParser()
  19. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  20. parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS ", default=12)
  21. (options, args) = parser.parse_args()
  22. # data files arrays. Index:
  23. # 0. No failure
  24. # 1. Blocked evaporator
  25. # 2. Full Blocked condenser
  26. # 3. Partial Blocked condenser
  27. # 4 Fan condenser not working
  28. # 5. Open door
  29. NumberOfFailures=4 # So far, we have only data for the first 4 types of failures
  30. datafiles=[]
  31. for i in range(NumberOfFailures+1):
  32. datafiles.append([])
  33. # Next set of ddata corresponds to Freezer, SP=-26
  34. datafiles[0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_','2025-01-27_5_','2025-01-28_5_']
  35. datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  36. #datafiles[1]=['2024-12-17_5_','2024-12-16_5_','2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] # This have transitions
  37. datafiles[2]=['2024-12-18_5_','2024-12-19_5_']
  38. datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_']
  39. datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  40. #datafiles[4]=['2024-12-27_5_','2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] # This have transitions
  41. #datafiles[4]=[]
  42. # Features suggested by Xavier
  43. # Care with 'tc s3' because on datafiles[0] is always nulll
  44. # Seems to be incoropored in new tests
  45. #r1s5 supply air flow temperature
  46. #r1s1 inlet evaporator temperature
  47. #r1s4 condenser outlet
  48. # VAriables r1s4 and pa1 apiii may not exists in cloud controlers
  49. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  50. features=['r1 s1','r1 s4','r1 s5']
  51. featureNames={}
  52. featureNames['r1 s1']='$T_{evap}$'
  53. featureNames['r1 s4']='$T_{cond}$'
  54. featureNames['r1 s5']='$T_{air}$'
  55. featureNames['pa1 apiii']='$P_{elec}$'
  56. unitNames={}
  57. unitNames['r1 s1']='$(^{o}C)$'
  58. unitNames['r1 s4']='$(^{o}C)$'
  59. unitNames['r1 s5']='$(^{o}C)$'
  60. unitNames['pa1 apiii']='$(W)$'
  61. #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
  62. #features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']
  63. NumFeatures=len(features)
  64. df_list=[]
  65. for i in range(NumberOfFailures+1):
  66. df_list.append([])
  67. for i in range(NumberOfFailures+1):
  68. dftemp=[]
  69. for f in datafiles[i]:
  70. print(" ", f)
  71. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  72. df1 = pd.read_csv('./data/'+f+'.csv')
  73. dftemp.append(df1)
  74. df_list[i]=pd.concat(dftemp)
  75. # subsampled to 5' = 30 * 10"
  76. # We consider smaples every 5' because in production, we will only have data at this frequency
  77. subsamplingrate=30
  78. dataframe=[]
  79. for i in range(NumberOfFailures+1):
  80. dataframe.append([])
  81. for i in range(NumberOfFailures+1):
  82. datalength=df_list[i].shape[0]
  83. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  84. dataframe[i].reset_index(inplace=True,drop=True)
  85. dataframe[i].dropna(inplace=True)
  86. # Train data is first 2/3 of data
  87. # Test data is: last 1/3 of data
  88. dataTrain=[]
  89. dataTest=[]
  90. NumberOfSamplesForTest=300
  91. for i in range(NumberOfFailures+1):
  92. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  93. if NumberOfSamplesForTest==0: # Take all
  94. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  95. else:
  96. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):int(dataframe[i].shape[0]*2/3)+NumberOfSamplesForTest,:])
  97. # Calculate means and stdev
  98. a=dataTrain[0]
  99. for i in range(1,NumberOfFailures+1):
  100. a=np.vstack((a,dataTrain[i]))
  101. means=a.mean(axis=0)
  102. stdevs=a.std(axis=0)
  103. def normalize2(train,test):
  104. return( (train-means)/stdevs, (test-means)/stdevs )
  105. dataTrainNorm=[]
  106. dataTestNorm=[]
  107. for i in range(NumberOfFailures+1):
  108. dataTrainNorm.append([])
  109. dataTestNorm.append([])
  110. for i in range(NumberOfFailures+1):
  111. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  112. def plotData():
  113. fig, axes = plt.subplots(
  114. nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  115. )
  116. for i in range(NumberOfFailures+1):
  117. axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0")
  118. axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1")
  119. #axes[1].legend()
  120. #axes[0].set_ylabel(features[0])
  121. #axes[1].set_ylabel(features[1])
  122. plt.show()
  123. #plotData()
  124. def create_sequences(values, time_steps):
  125. output = []
  126. for i in range(len(values) - time_steps ):
  127. output.append(values[i : (i + time_steps)])
  128. return np.stack(output)
  129. def listToString(l):
  130. r=''
  131. for i in l:
  132. r+=str(i)
  133. return(r.replace(' ',''))
  134. timesteps=int(options.timesteps)
  135. X=dataTrainNorm[0]
  136. for i in range(1,NumberOfFailures+1):
  137. X=np.vstack((X,dataTrainNorm[i]))
  138. xtrain=create_sequences(X,timesteps)
  139. km = TimeSeriesKMeans(n_clusters=NumberOfFailures+1, metric="dtw", random_state=0)
  140. modelpath="model_v1_unsupervised_"+str(timesteps)+listToString(features)+".pk"
  141. if options.train:
  142. km.fit(xtrain)
  143. km.to_pickle(modelpath)
  144. else:
  145. km.from_pickle(modelpath)
  146. #km.fit_predict(xtrain)
  147. colorline=['violet','lightcoral','cyan','lime','grey']
  148. colordot=['darkviolet','red','blue','green','black']
  149. featuresToPlot=features
  150. indexesToPlot=[]
  151. for i in featuresToPlot:
  152. indexesToPlot.append(features.index(i))
  153. def plot(data,ranges):
  154. km.fit_predict(data)
  155. # Expand data to plot with the timesteps samples of last sample
  156. datatoplot=data[:,0,:]
  157. datatoplot=np.vstack((datatoplot,data[ranges[len(ranges)-1][1],:,:]))
  158. labels=[] # Labels are assigned randomly by classifer
  159. for i in range(len(ranges)):
  160. b=Counter(km.labels_[ranges[i][0]:ranges[i][1]])
  161. labels.append(b.most_common(1)[0][0])
  162. print("\n\n\n\LABELS: ",labels,"\n\n")
  163. NumFeaturesToPlot=len(indexesToPlot)
  164. plt.rcParams.update({'font.size': 16})
  165. fig, axes = plt.subplots(
  166. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  167. )
  168. for i in range(NumFeaturesToPlot):
  169. init=0
  170. end=ranges[0][1]
  171. labelsplotted=[]
  172. for j in range(len(ranges)):
  173. if j==(len(ranges)-1): # Plot the last timesteps
  174. classtype=labels.index(labels[j])
  175. if classtype in labelsplotted:
  176. axes[i].plot(range(init,end+timesteps),datatoplot[ranges[j][0]:ranges[j][1]+timesteps,indexesToPlot[i]]*stdevs[i]+means[i], color=colorline[classtype],linewidth=1)
  177. else:
  178. axes[i].plot(range(init,end+timesteps),datatoplot[ranges[j][0]:ranges[j][1]+timesteps,indexesToPlot[i]]*stdevs[i]+means[i], label="Class: "+str(classtype), color=colorline[classtype],linewidth=1)
  179. labelsplotted.append(classtype)
  180. else:
  181. classtype=labels.index(labels[j])
  182. if classtype in labelsplotted:
  183. axes[i].plot(range(init,end),datatoplot[ranges[j][0]:ranges[j][1],indexesToPlot[i]]*stdevs[i]+means[i], color=colorline[classtype],linewidth=1)
  184. else:
  185. axes[i].plot(range(init,end),datatoplot[ranges[j][0]:ranges[j][1],indexesToPlot[i]]*stdevs[i]+means[i], label="Class: "+str(classtype), color=colorline[classtype],linewidth=1)
  186. labelsplotted.append(classtype)
  187. init=end
  188. if j<(len(ranges)-1):
  189. end+=(ranges[j+1][1]-ranges[j+1][0])
  190. x=[]
  191. y=[]
  192. for j in range(len(ranges)):
  193. x.append([])
  194. y.append([])
  195. for j in range(len(ranges)):
  196. for k in range(ranges[j][0],ranges[j][1]):
  197. try: # Idont know why sometimes fails index !!!!
  198. x[labels.index(km.labels_[k])].append(k+timesteps)
  199. y[labels.index(km.labels_[k])].append(datatoplot[k+timesteps,indexesToPlot[i]]*stdevs[i]+means[i])
  200. except:
  201. x[0].append(k+timesteps)
  202. y[0].append(datatoplot[k+timesteps,indexesToPlot[i]]*stdevs[i]+means[i])
  203. labelsplotted=[]
  204. for j in range(len(ranges)):
  205. classtype=labels.index(labels[j])
  206. if classtype in labelsplotted:
  207. axes[i].plot(x[j],y[j] ,color=colordot[labels.index(labels[j])],marker='.',linewidth=0)
  208. else:
  209. axes[i].plot(x[j],y[j] ,color=colordot[labels.index(labels[j])],marker='.',linewidth=0,label="Class type "+str(j) )
  210. labelsplotted.append(classtype)
  211. if i==(NumFeatures-1):
  212. axes[i].legend(loc='right')
  213. s=''
  214. s+=featureNames[features[indexesToPlot[i]]]
  215. s+=' '+unitNames[features[indexesToPlot[i]]]
  216. axes[i].set_ylabel(s)
  217. axes[i].grid()
  218. axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  219. plt.show()
  220. '''
  221. Ranges=[]
  222. r=0
  223. for i in range(NumberOfFailures+1):
  224. Ranges.append([r,r+dataTrainNorm[i].shape[0]])
  225. r+=dataTrainNorm[i].shape[0]
  226. # Drop the last TIME_STEPS for plotting
  227. Ranges[NumberOfFailures][1]=Ranges[NumberOfFailures][1]-timesteps-1
  228. plot(xtrain,Ranges)
  229. '''
  230. # Try with test data
  231. X=dataTestNorm[0]
  232. Ranges=[[0,dataTestNorm[0].shape[0]]]
  233. r=dataTestNorm[0].shape[0]
  234. for i in range(1,NumberOfFailures+1):
  235. X=np.vstack((X,dataTestNorm[i]))
  236. Ranges.append([r,r+dataTestNorm[i].shape[0]])
  237. r+=dataTestNorm[i].shape[0]
  238. X=np.vstack((X,dataTestNorm[0])) # We add a last segment of no fail data
  239. Ranges.append([r,r+dataTestNorm[0].shape[0]])
  240. Ranges[len(Ranges)-1][1]=Ranges[len(Ranges)-1][1]-timesteps-1
  241. xtest=create_sequences(X,timesteps)
  242. def anomalyMetric(labels,ranges):
  243. # Takes ONLY the firsts segments of Ranges
  244. # FP, TP: false/true positive
  245. # TN, FN: true/false negative
  246. # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
  247. # Precision: Rate of positive results: TP/(TP+FP)
  248. # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
  249. lab=[] # Labels are assigned randomly by classifer
  250. TP=[]
  251. FN=[]
  252. TPFP=[]
  253. for i in range(NumberOfFailures+1):
  254. TP.append([])
  255. FN.append([])
  256. TPFP.append([])
  257. b=Counter(labels[ranges[i][0]:ranges[i][1]])
  258. lab.append(b.most_common(1)[0][0])
  259. for i in range(NumberOfFailures+1):
  260. counttp=0
  261. countfn=0
  262. for j in range(ranges[i][0],ranges[i][1]-timesteps):
  263. if lab.index(labels[j])==i:
  264. counttp+=1
  265. else:
  266. countfn+=1
  267. TP[i]=counttp
  268. FN[i]=countfn
  269. for i in range(NumberOfFailures+1):
  270. count=0
  271. for ii in range(NumberOfFailures+1):
  272. for j in range(ranges[ii][0],ranges[ii][1]-timesteps):
  273. if lab.index(labels[j])==i:
  274. count+=1
  275. TPFP[i]=count
  276. segmentLength=[]
  277. for i in range(NumberOfFailures+1):
  278. segmentLength.append(ranges[i][1]-timesteps-ranges[i][0])
  279. totalSegmentLength=0
  280. for i in range(NumberOfFailures+1):
  281. totalSegmentLength+=segmentLength[i]
  282. Sensitivity=0
  283. Precision=0
  284. for i in range(NumberOfFailures+1):
  285. Sensitivity+=TP[i]/(TP[i]+FN[i])*segmentLength[i]/totalSegmentLength
  286. Precision+=TP[i]/(TPFP[i])*segmentLength[i]/totalSegmentLength
  287. print(lab)
  288. print("TP: ",TP)
  289. print("FN: ",FN)
  290. print("TPFP: ",TPFP)
  291. print("Sensitivity: ",Sensitivity)
  292. print("Precision: ",Precision)
  293. print("F1-Score: ",2*Precision*Sensitivity/(Sensitivity+Precision))
  294. km.fit_predict(xtest)
  295. anomalyMetric(km.labels_,Ranges)
  296. plot(xtest,Ranges)

Powered by TurnKey Linux.