Нема описа

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. # Csar Fdez, UdL, 2025
  2. # Unsupervised classification. Uses tslearn
  3. # https://tslearn.readthedocs.io/en/stable/index.html
  4. # Be careful with v0_unsupervised and all versions for supervised.
  5. # because dataTrain is not stacke before create_sequences, so,
  6. # the sets are not aligned in time
  7. # Same as v2 but including multiple KM metrics
  8. # No hi ha color entre DTW i Euclidea. Amb Euclidea, al test "32" no distingueix entre Class0 i class 2
  9. import pandas as pd
  10. import matplotlib.pyplot as plt
  11. import datetime
  12. import numpy as np
  13. import os.path
  14. from optparse import OptionParser
  15. import copy
  16. import pickle
  17. from tslearn.clustering import TimeSeriesKMeans
  18. from tslearn.neighbors import KNeighborsTimeSeries
  19. from collections import Counter
  20. parser = OptionParser()
  21. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  22. parser.add_option("-p", "--plot", dest="plot", help="Plot Data (false)", default=False, action="store_true")
  23. parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS ", default=12)
  24. parser.add_option("-m", "--metric", dest="metric", help="KM metric (0:euclidean, 1:dtw, 2: softdtw)", default=1)
  25. (options, args) = parser.parse_args()
  26. # data files arrays. Index:
  27. # 0. No failure
  28. # 1. Blocked evaporator
  29. # 2. Full Blocked condenser
  30. # 3 Fan condenser not working
  31. # 4. Open door
  32. NumberOfFailures=4
  33. datafiles={}
  34. listofFacilitySetpoint=['5-26','5-18','5-22','30','32','34']
  35. for j in listofFacilitySetpoint:
  36. datafiles[j]=[]
  37. for i in range(NumberOfFailures+1):
  38. datafiles[j].append([])
  39. # Freezer, SP=-26
  40. datafiles['5-26'][0]=['2025-01-25_5_','2025-01-26_5_','2025-01-29_5_','2025-01-30_5_','2025-01-31_5_','2025-02-01_5_','2025-02-02_5_','2025-02-03_5_','2025-02-04_5_']
  41. datafiles['5-26'][1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  42. datafiles['5-26'][2]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_']
  43. datafiles['5-26'][3]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  44. datafiles['5-26'][4]=['2025-02-13_5_','2025-02-14_5_','2025-02-15_5_','2025-02-16_5_','2025-02-17_5_','2025-02-18_5_','2025-02-19_5_']
  45. # Freezer, SP=-18
  46. datafiles['5-18'][0]=['2025-01-21_5_','2025-01-22_5_','2025-01-23_5_'] #
  47. datafiles['5-18'][1]=['2025-02-17_5_','2025-02-18_5_','2025-02-19_5_'] #
  48. datafiles['5-18'][2]=['2025-03-10_5_','2025-03-11_5_','2025-03-12_5_'] #
  49. datafiles['5-18'][3]=['2025-01-04_5_','2025-01-05_5_','2025-01-06_5_','2025-01-07_5_'] #
  50. datafiles['5-18'][4]=['2025-04-30_5_','2025-05-01_5_','2025-05-02_5_','2025-05-03_5_','2025-05-04_5_','2025-05-05_5_'] # Faltens aquestes dades
  51. # Freezer, SP=-22
  52. datafiles['5-22'][0]=['2025-03-13_5_','2025-03-14_5_','2025-03-15_5_','2025-03-16_5_']
  53. datafiles['5-22'][1]=['2025-03-21_5_','2025-03-22_5_','2025-03-23_5_','2025-03-24_5_','2025-03-25_5_'] #
  54. datafiles['5-22'][2]=['2025-03-26_5_','2025-03-27_5_','2025-03-28_5_']
  55. datafiles['5-22'][3]=['2025-03-31_5_','2025-04-01_5_','2025-04-02_5_','2025-04-03_5_']
  56. datafiles['5-22'][4]=['2025-03-17_5_']
  57. # Refrigerator 0
  58. datafiles['30'][0]=['2025-01-21_3_','2025-01-22_3_','2025-01-23_3_','2025-01-24_3_','2025-01-25_3_','2025-01-26_3_']
  59. datafiles['30'][1]=['2024-12-11_3_','2024-12-12_3_','2024-12-13_3_','2024-12-14_3_','2024-12-15_3_']
  60. datafiles['30'][2]=['2024-12-18_3_','2024-12-19_3_','2024-12-20_3_']
  61. datafiles['30'][3]=['2024-12-28_3_','2024-12-29_3_','2024-12-30_3_','2024-12-31_3_','2025-01-01_3_']
  62. datafiles['30'][4]=['2025-02-12_3_','2025-02-13_3_','2025-02-14_3_','2025-02-15_3_','2025-02-16_3_','2025-02-17_3_','2025-02-18_3_','2025-02-19_3_'] # es solapa amb ventilador no funcionant. i els dies 20 i 21 no hi son
  63. # Refrigerator 2
  64. datafiles['32'][0]=['2025-03-13_3_','2025-03-14_3_','2025-03-15_3_','2025-03-16_3_']
  65. datafiles['32'][1]=['2025-03-10_3_']
  66. datafiles['32'][2]=['2025-03-17_3_']
  67. datafiles['32'][3]=['2025-03-24_3_','2025-03-25_3_','2025-03-26_3_']
  68. datafiles['32'][4]=['2025-03-27_3_','2025-03-28_3_']
  69. # Refrigerator 4
  70. datafiles['34'][0]=['2025-03-31_3_','2025-04-01_3_','2025-04-02_3_','2025-04-03_3_']
  71. datafiles['34'][1]=['2025-04-25_3_','2025-04-26_3_','2025-04-27_3_','2025-04-28_3_'] # aquestes dades no hi son
  72. datafiles['34'][2]=['2025-04-11_3_','2025-04-12_3_','2025-04-13_3_','2025-04-14_3_']
  73. datafiles['34'][3]=['2025-04-30_3_','2025-05-01_3_','2025-05-02_3_','2025-05-03_3_','2025-05-04_3_','2025-05-05_3_']
  74. datafiles['34'][4]=['2025-04-23_3_','2025-04-24_3_','2025-04-25_3_']
  75. # data files arrays. Index:
  76. # 0. No failure
  77. # 1. Blocked evaporator
  78. # 2. Full Blocked condenser
  79. # 3 Fan condenser not working
  80. # 4. Open door
  81. facilitySetpoint='32'
  82. # Features suggested by Xavier
  83. # Care with 'tc s3' because on datafiles[0] is always nulll
  84. # Seems to be incoropored in new tests
  85. #r1s5 supply air flow temperature
  86. #r1s1 inlet evaporator temperature
  87. #r1s4 condenser outlet
  88. # VAriables r1s4 and pa1 apiii may not exists in cloud controlers
  89. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  90. features=['r1 s1','r1 s4','r1 s5']
  91. featureNames={}
  92. featureNames['r1 s1']='$T_{evap}$'
  93. featureNames['r1 s4']='$T_{cond}$'
  94. featureNames['r1 s5']='$T_{air}$'
  95. featureNames['pa1 apiii']='$P_{elec}$'
  96. unitNames={}
  97. unitNames['r1 s1']='$(^{o}C)$'
  98. unitNames['r1 s4']='$(^{o}C)$'
  99. unitNames['r1 s5']='$(^{o}C)$'
  100. unitNames['pa1 apiii']='$(W)$'
  101. #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
  102. #features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']
  103. NumFeatures=len(features)
  104. df_list=[]
  105. for i in range(NumberOfFailures+1):
  106. df_list.append([])
  107. for i in range(NumberOfFailures+1):
  108. dftemp=[]
  109. for f in datafiles[facilitySetpoint][i]:
  110. print(" ", f)
  111. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  112. df1 = pd.read_csv('./data/'+f+'.csv')
  113. dftemp.append(df1)
  114. df_list[i]=pd.concat(dftemp)
  115. # subsampled to 5' = 30 * 10"
  116. # We consider smaples every 5' because in production, we will only have data at this frequency
  117. subsamplingrate=30
  118. dataframe=[]
  119. for i in range(NumberOfFailures+1):
  120. dataframe.append([])
  121. for i in range(NumberOfFailures+1):
  122. datalength=df_list[i].shape[0]
  123. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  124. dataframe[i].reset_index(inplace=True,drop=True)
  125. dataframe[i].dropna(inplace=True)
  126. # Train data is first 2/3 of data
  127. # Test data is: last 1/3 of data
  128. dataTrain=[]
  129. dataTest=[]
  130. NumberOfSamplesForTest=0
  131. for i in range(NumberOfFailures+1):
  132. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  133. if NumberOfSamplesForTest==0: # Take all
  134. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  135. else:
  136. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):int(dataframe[i].shape[0]*2/3)+NumberOfSamplesForTest,:])
  137. # Calculate means and stdev
  138. a=dataTrain[0]
  139. for i in range(1,NumberOfFailures+1):
  140. a=np.vstack((a,dataTrain[i]))
  141. means=a.mean(axis=0)
  142. stdevs=a.std(axis=0)
  143. def normalize2(train,test):
  144. return( (train-means)/stdevs, (test-means)/stdevs )
  145. dataTrainNorm=[]
  146. dataTestNorm=[]
  147. for i in range(NumberOfFailures+1):
  148. dataTrainNorm.append([])
  149. dataTestNorm.append([])
  150. for i in range(NumberOfFailures+1):
  151. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  152. def plotData():
  153. fig, axes = plt.subplots(
  154. nrows=NumberOfFailures+1, ncols=NumFeatures, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  155. )
  156. for i in range(NumberOfFailures+1):
  157. for j in range(NumFeatures):
  158. axes[i][j].plot(np.concatenate((dataTrainNorm[i][:,j],dataTestNorm[i][:,j])),label="Fail "+str(i)+", feature 0")
  159. #axes[1].legend()
  160. #axes[0].set_ylabel(features[0])
  161. #axes[1].set_ylabel(features[1])
  162. plt.show()
  163. if options.plot:
  164. plotData()
  165. exit()
  166. def create_sequences(values, time_steps):
  167. output = []
  168. for i in range(len(values) - time_steps ):
  169. output.append(values[i : (i + time_steps)])
  170. return np.stack(output)
  171. def listToString(l):
  172. r=''
  173. for i in l:
  174. r+=str(i)
  175. return(r.replace(' ',''))
  176. timesteps=int(options.timesteps)
  177. X=dataTrainNorm[0]
  178. for i in range(1,NumberOfFailures+1):
  179. X=np.vstack((X,dataTrainNorm[i]))
  180. xtrain=create_sequences(X,timesteps)
  181. # set metric
  182. metric=int(options.metric)
  183. metricstring="euclidean"
  184. if metric==0:
  185. metrictring="euclidean"
  186. elif metric==1:
  187. metricstring="dtw"
  188. elif metric==2:
  189. metricstring="softdtw"
  190. else:
  191. print("Error in metric: ",metric)
  192. exit(0)
  193. km = TimeSeriesKMeans(n_clusters=NumberOfFailures+1, metric=metricstring, random_state=0)
  194. modelpath="model_v3_unsupervised_metric_"+metricstring+"_"+facilitySetpoint+"_"+str(timesteps)+listToString(features)+".pk"
  195. if options.train:
  196. km.fit(xtrain)
  197. km.to_pickle(modelpath)
  198. else:
  199. km.from_pickle(modelpath)
  200. #km.fit_predict(xtrain)
  201. colorline=['violet','lightcoral','cyan','lime','grey']
  202. colordot=['darkviolet','red','blue','green','black']
  203. featuresToPlot=features
  204. indexesToPlot=[]
  205. for i in featuresToPlot:
  206. indexesToPlot.append(features.index(i))
  207. def plot(data,ranges):
  208. km.fit_predict(data)
  209. # Expand data to plot with the timesteps samples of last sample
  210. datatoplot=data[:,0,:]
  211. datatoplot=np.vstack((datatoplot,data[ranges[len(ranges)-1][1],:,:]))
  212. labels=[] # Labels are assigned randomly by classifer
  213. for i in range(len(ranges)):
  214. b=Counter(km.labels_[ranges[i][0]:ranges[i][1]])
  215. labels.append(b.most_common(1)[0][0])
  216. print("\n\n\n LABELS: ",labels,"\n\n")
  217. NumFeaturesToPlot=len(indexesToPlot)
  218. plt.rcParams.update({'font.size': 16})
  219. fig, axes = plt.subplots(
  220. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  221. )
  222. for i in range(NumFeaturesToPlot):
  223. init=0
  224. end=ranges[0][1]
  225. labelsplotted=[]
  226. for j in range(len(ranges)):
  227. if j==(len(ranges)-1): # Plot the last timesteps
  228. classtype=labels.index(labels[j])
  229. if classtype in labelsplotted:
  230. axes[i].plot(range(init,end+timesteps),datatoplot[ranges[j][0]:ranges[j][1]+timesteps,indexesToPlot[i]]*stdevs[i]+means[i], color=colorline[classtype],linewidth=1)
  231. else:
  232. axes[i].plot(range(init,end+timesteps),datatoplot[ranges[j][0]:ranges[j][1]+timesteps,indexesToPlot[i]]*stdevs[i]+means[i], label="Class: "+str(classtype), color=colorline[classtype],linewidth=1)
  233. labelsplotted.append(classtype)
  234. else:
  235. classtype=labels.index(labels[j])
  236. if classtype in labelsplotted:
  237. axes[i].plot(range(init,end),datatoplot[ranges[j][0]:ranges[j][1],indexesToPlot[i]]*stdevs[i]+means[i], color=colorline[classtype],linewidth=1)
  238. else:
  239. axes[i].plot(range(init,end),datatoplot[ranges[j][0]:ranges[j][1],indexesToPlot[i]]*stdevs[i]+means[i], label="Class: "+str(classtype), color=colorline[classtype],linewidth=1)
  240. labelsplotted.append(classtype)
  241. init=end
  242. if j<(len(ranges)-1):
  243. end+=(ranges[j+1][1]-ranges[j+1][0])
  244. x=[]
  245. y=[]
  246. for j in range(len(ranges)):
  247. x.append([])
  248. y.append([])
  249. for j in range(len(ranges)):
  250. for k in range(ranges[j][0],ranges[j][1]):
  251. try: # Idont know why sometimes fails index !!!!
  252. x[labels.index(km.labels_[k])].append(k+timesteps)
  253. y[labels.index(km.labels_[k])].append(datatoplot[k+timesteps,indexesToPlot[i]]*stdevs[i]+means[i])
  254. except:
  255. x[0].append(k+timesteps)
  256. y[0].append(datatoplot[k+timesteps,indexesToPlot[i]]*stdevs[i]+means[i])
  257. labelsplotted=[]
  258. for j in range(len(ranges)):
  259. classtype=labels.index(labels[j])
  260. if classtype in labelsplotted:
  261. axes[i].plot(x[j],y[j] ,color=colordot[labels.index(labels[j])],marker='.',linewidth=0)
  262. else:
  263. axes[i].plot(x[j],y[j] ,color=colordot[labels.index(labels[j])],marker='.',linewidth=0,label="Class type "+str(j) )
  264. labelsplotted.append(classtype)
  265. if i==(NumFeatures-1):
  266. axes[i].legend(loc='right')
  267. s=''
  268. s+=featureNames[features[indexesToPlot[i]]]
  269. s+=' '+unitNames[features[indexesToPlot[i]]]
  270. axes[i].set_ylabel(s)
  271. axes[i].grid()
  272. axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  273. plt.show()
  274. '''
  275. Ranges=[]
  276. r=0
  277. for i in range(NumberOfFailures+1):
  278. Ranges.append([r,r+dataTrainNorm[i].shape[0]])
  279. r+=dataTrainNorm[i].shape[0]
  280. # Drop the last TIME_STEPS for plotting
  281. Ranges[NumberOfFailures][1]=Ranges[NumberOfFailures][1]-timesteps-1
  282. plot(xtrain,Ranges)
  283. '''
  284. # Try with test data
  285. X=dataTestNorm[0]
  286. Ranges=[[0,dataTestNorm[0].shape[0]]]
  287. r=dataTestNorm[0].shape[0]
  288. for i in range(1,NumberOfFailures+1):
  289. X=np.vstack((X,dataTestNorm[i]))
  290. Ranges.append([r,r+dataTestNorm[i].shape[0]])
  291. r+=dataTestNorm[i].shape[0]
  292. #X=np.vstack((X,dataTestNorm[0])) # We add a last segment of no fail data
  293. #Ranges.append([r,r+dataTestNorm[0].shape[0]])
  294. Ranges[len(Ranges)-1][1]=Ranges[len(Ranges)-1][1]-timesteps-1
  295. xtest=create_sequences(X,timesteps)
  296. def anomalyMetric(labels,ranges):
  297. # Takes ONLY the firsts segments of Ranges
  298. # FP, TP: false/true positive
  299. # TN, FN: true/false negative
  300. # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
  301. # Precision: Rate of positive results: TP/(TP+FP)
  302. # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
  303. lab=[] # Labels are assigned randomly by classifer
  304. TP=[]
  305. FN=[]
  306. TPFP=[]
  307. for i in range(NumberOfFailures+1):
  308. TP.append([])
  309. FN.append([])
  310. TPFP.append([])
  311. b=Counter(labels[ranges[i][0]:ranges[i][1]])
  312. lab.append(b.most_common(1)[0][0])
  313. for i in range(NumberOfFailures+1):
  314. counttp=0
  315. countfn=0
  316. for j in range(ranges[i][0],ranges[i][1]-timesteps):
  317. if lab.index(labels[j])==i:
  318. counttp+=1
  319. else:
  320. countfn+=1
  321. TP[i]=counttp
  322. FN[i]=countfn
  323. for i in range(NumberOfFailures+1):
  324. count=0
  325. for ii in range(NumberOfFailures+1):
  326. for j in range(ranges[ii][0],ranges[ii][1]-timesteps):
  327. if lab.index(labels[j])==i:
  328. count+=1
  329. TPFP[i]=count
  330. segmentLength=[]
  331. for i in range(NumberOfFailures+1):
  332. segmentLength.append(ranges[i][1]-timesteps-ranges[i][0])
  333. totalSegmentLength=0
  334. for i in range(NumberOfFailures+1):
  335. totalSegmentLength+=segmentLength[i]
  336. Sensitivity=0
  337. Precision=0
  338. for i in range(NumberOfFailures+1):
  339. Sensitivity+=TP[i]/(TP[i]+FN[i])*segmentLength[i]/totalSegmentLength
  340. Precision+=TP[i]/(TPFP[i])*segmentLength[i]/totalSegmentLength
  341. print(lab)
  342. print("TP: ",TP)
  343. print("FN: ",FN)
  344. print("TPFP: ",TPFP)
  345. print("Sensitivity: ",Sensitivity)
  346. print("Precision: ",Precision)
  347. print("F1-Score: ",2*Precision*Sensitivity/(Sensitivity+Precision))
  348. km.fit_predict(xtest)
  349. anomalyMetric(km.labels_,Ranges)
  350. plot(xtest,Ranges)

Powered by TurnKey Linux.