Nav apraksta

v2_unsupervised.py 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. # Csar Fdez, UdL, 2025
  2. # Unsupervised classification. Uses tslearn
  3. # https://tslearn.readthedocs.io/en/stable/index.html
  4. # Be careful with v0_unsupervised and all versions for supervised.
  5. # because dataTrain is not stacke before create_sequences, so,
  6. # the sets are not aligned in time
  7. # Same as v1 but changing data
  8. import pandas as pd
  9. import matplotlib.pyplot as plt
  10. import datetime
  11. import numpy as np
  12. import os.path
  13. from optparse import OptionParser
  14. import copy
  15. import pickle
  16. from tslearn.clustering import TimeSeriesKMeans
  17. from tslearn.neighbors import KNeighborsTimeSeries
  18. from collections import Counter
  19. parser = OptionParser()
  20. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  21. parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS ", default=12)
  22. (options, args) = parser.parse_args()
  23. # data files arrays. Index:
  24. # 0. No failure
  25. # 1. Blocked evaporator
  26. # 2. Full Blocked condenser
  27. # 3 Fan condenser not working
  28. # 4. Open door
  29. NumberOfFailures=4
  30. datafiles={}
  31. listofFacilitySetpoint=['5-26','5-18','5-22','30','32','34']
  32. for j in listofFacilitySetpoint:
  33. datafiles[j]=[]
  34. for i in range(NumberOfFailures+1):
  35. datafiles[j].append([])
  36. # Freezer, SP=-26
  37. datafiles['5-26'][0]=['2025-01-25_5_','2025-01-26_5_','2025-01-29_5_','2025-01-30_5_','2025-01-31_5_','2025-02-01_5_','2025-02-02_5_']
  38. datafiles['5-26'][1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  39. datafiles['5-26'][2]=['2024-12-18_5_','2024-12-19_5_']
  40. datafiles['5-26'][3]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  41. datafiles['5-26'][4]=['2025-02-13_5_','2025-02-14_5_','2025-02-15_5_','2025-02-16_5_','2025-02-17_5_','2025-02-18_5_','2025-02-19_5_']
  42. # Freezer, SP=-18
  43. datafiles['5-18'][0]=['2025-01-21_5_','2025-01-22_5_','2025-01-23_5_',] # no hi son aquestx arxius
  44. # Freezer, SP=-22
  45. datafiles['5-22'][0]=['2025-03-13_5_','2025-03-14_5_','2025-03-15_5_','2025-03-16_5_']
  46. datafiles['5-22'][1]=['2025-03-23_5_','2025-03-24_5_','2025-03-25_5_'] # es solapa amb el seguent test
  47. datafiles['5-22'][2]=[]
  48. # Refrigerator 0
  49. datafiles['30'][0]=['2025-01-21_3_','2025-01-22_3_','2025-01-23_3_','2025-01-24_3_','2025-01-25_3_','2025-01-26_3_']
  50. datafiles['30'][1]=['2024-12-11_3_','2024-12-12_3_','2024-12-13_3_','2024-12-14_3_','2024-12-15_3_']
  51. datafiles['30'][2]=['2024-12-18_3_','2024-12-19_3_','2024-12-20_3_']
  52. datafiles['30'][3]=['2024-12-28_3_','2024-12-29_3_','2024-12-30_3_','2024-12-31_3_']
  53. datafiles['30'][4]=['2025-02-12_3_','2025-02-13_3_','2025-02-14_3_','2025-02-15_3_','2025-02-16_3_','2025-02-17_3_','2025-02-18_3_','2025-02-19_3_'] # es solapa amb ventilador no funcionant. i els dies 20 i 21 no hi son
  54. # Refrigerator 2
  55. datafiles['32'][0]=['2025-03-13_3_','2025-03-14_3_','2025-03-15_3_','2025-03-16_3_']
  56. datafiles['32'][1]=['2025-03-10_3_']
  57. datafiles['32'][2]=['2025-03-17_3_']
  58. datafiles['32'][3]=['2025-03-22_3_','2025-03-23_3_']
  59. datafiles['32'][4]=['2025-03-27_3_','2025-03-28_3_']
  60. # Refrigerator 4
  61. datafiles['34'][0]=['2025-03-31_3_','2025-04-01_3_','2025-04-02_3_','2025-04-03_3_']
  62. datafiles['34'][1]=['2025-04-25_3_','2025-04-26_3_','2025-04-27_3_','2025-04-28_3_'] # aquestes dades no hi son
  63. datafiles['34'][2]=['2025-04-11_3_','2025-04-12_3_','2025-04-13_3_','2025-04-14_3_']
  64. datafiles['34'][3]=['2025-04-30_3_','2025-05-01_3_','2025-05-02_3_','2025-05-03_3_','2025-05-04_3_','2025-05-05_3_']
  65. datafiles['34'][4]=['2025-04-23_3_','2025-04-24_3_','2025-04-25_3_']
  66. # data files arrays. Index:
  67. # 0. No failure
  68. # 1. Blocked evaporator
  69. # 2. Full Blocked condenser
  70. # 3 Fan condenser not working
  71. # 4. Open door
  72. facilitySetpoint='32'
  73. # Features suggested by Xavier
  74. # Care with 'tc s3' because on datafiles[0] is always nulll
  75. # Seems to be incoropored in new tests
  76. #r1s5 supply air flow temperature
  77. #r1s1 inlet evaporator temperature
  78. #r1s4 condenser outlet
  79. # VAriables r1s4 and pa1 apiii may not exists in cloud controlers
  80. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  81. features=['r1 s1','r1 s4','r1 s5']
  82. featureNames={}
  83. featureNames['r1 s1']='$T_{evap}$'
  84. featureNames['r1 s4']='$T_{cond}$'
  85. featureNames['r1 s5']='$T_{air}$'
  86. featureNames['pa1 apiii']='$P_{elec}$'
  87. unitNames={}
  88. unitNames['r1 s1']='$(^{o}C)$'
  89. unitNames['r1 s4']='$(^{o}C)$'
  90. unitNames['r1 s5']='$(^{o}C)$'
  91. unitNames['pa1 apiii']='$(W)$'
  92. #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
  93. #features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']
  94. NumFeatures=len(features)
  95. df_list=[]
  96. for i in range(NumberOfFailures+1):
  97. df_list.append([])
  98. for i in range(NumberOfFailures+1):
  99. dftemp=[]
  100. for f in datafiles[facilitySetpoint][i]:
  101. print(" ", f)
  102. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  103. df1 = pd.read_csv('./data/'+f+'.csv')
  104. dftemp.append(df1)
  105. df_list[i]=pd.concat(dftemp)
  106. # subsampled to 5' = 30 * 10"
  107. # We consider smaples every 5' because in production, we will only have data at this frequency
  108. subsamplingrate=30
  109. dataframe=[]
  110. for i in range(NumberOfFailures+1):
  111. dataframe.append([])
  112. for i in range(NumberOfFailures+1):
  113. datalength=df_list[i].shape[0]
  114. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  115. dataframe[i].reset_index(inplace=True,drop=True)
  116. dataframe[i].dropna(inplace=True)
  117. # Train data is first 2/3 of data
  118. # Test data is: last 1/3 of data
  119. dataTrain=[]
  120. dataTest=[]
  121. NumberOfSamplesForTest=300
  122. for i in range(NumberOfFailures+1):
  123. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  124. if NumberOfSamplesForTest==0: # Take all
  125. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  126. else:
  127. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):int(dataframe[i].shape[0]*2/3)+NumberOfSamplesForTest,:])
  128. # Calculate means and stdev
  129. a=dataTrain[0]
  130. for i in range(1,NumberOfFailures+1):
  131. a=np.vstack((a,dataTrain[i]))
  132. means=a.mean(axis=0)
  133. stdevs=a.std(axis=0)
  134. def normalize2(train,test):
  135. return( (train-means)/stdevs, (test-means)/stdevs )
  136. dataTrainNorm=[]
  137. dataTestNorm=[]
  138. for i in range(NumberOfFailures+1):
  139. dataTrainNorm.append([])
  140. dataTestNorm.append([])
  141. for i in range(NumberOfFailures+1):
  142. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  143. def plotData():
  144. fig, axes = plt.subplots(
  145. nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  146. )
  147. for i in range(NumberOfFailures+1):
  148. axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0")
  149. axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1")
  150. #axes[1].legend()
  151. #axes[0].set_ylabel(features[0])
  152. #axes[1].set_ylabel(features[1])
  153. plt.show()
  154. #plotData()
  155. #exit()
  156. def create_sequences(values, time_steps):
  157. output = []
  158. for i in range(len(values) - time_steps ):
  159. output.append(values[i : (i + time_steps)])
  160. return np.stack(output)
  161. def listToString(l):
  162. r=''
  163. for i in l:
  164. r+=str(i)
  165. return(r.replace(' ',''))
  166. timesteps=int(options.timesteps)
  167. X=dataTrainNorm[0]
  168. for i in range(1,NumberOfFailures+1):
  169. X=np.vstack((X,dataTrainNorm[i]))
  170. xtrain=create_sequences(X,timesteps)
  171. km = TimeSeriesKMeans(n_clusters=NumberOfFailures+1, metric="dtw", random_state=0)
  172. modelpath="model_v2_unsupervised_"+facilitySetpoint+"_"+str(timesteps)+listToString(features)+".pk"
  173. if options.train:
  174. km.fit(xtrain)
  175. km.to_pickle(modelpath)
  176. else:
  177. km.from_pickle(modelpath)
  178. #km.fit_predict(xtrain)
  179. colorline=['violet','lightcoral','cyan','lime','grey']
  180. colordot=['darkviolet','red','blue','green','black']
  181. featuresToPlot=features
  182. indexesToPlot=[]
  183. for i in featuresToPlot:
  184. indexesToPlot.append(features.index(i))
  185. def plot(data,ranges):
  186. km.fit_predict(data)
  187. # Expand data to plot with the timesteps samples of last sample
  188. datatoplot=data[:,0,:]
  189. datatoplot=np.vstack((datatoplot,data[ranges[len(ranges)-1][1],:,:]))
  190. labels=[] # Labels are assigned randomly by classifer
  191. for i in range(len(ranges)):
  192. b=Counter(km.labels_[ranges[i][0]:ranges[i][1]])
  193. labels.append(b.most_common(1)[0][0])
  194. print("\n\n\n\LABELS: ",labels,"\n\n")
  195. NumFeaturesToPlot=len(indexesToPlot)
  196. plt.rcParams.update({'font.size': 16})
  197. fig, axes = plt.subplots(
  198. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  199. )
  200. for i in range(NumFeaturesToPlot):
  201. init=0
  202. end=ranges[0][1]
  203. labelsplotted=[]
  204. for j in range(len(ranges)):
  205. if j==(len(ranges)-1): # Plot the last timesteps
  206. classtype=labels.index(labels[j])
  207. if classtype in labelsplotted:
  208. axes[i].plot(range(init,end+timesteps),datatoplot[ranges[j][0]:ranges[j][1]+timesteps,indexesToPlot[i]]*stdevs[i]+means[i], color=colorline[classtype],linewidth=1)
  209. else:
  210. axes[i].plot(range(init,end+timesteps),datatoplot[ranges[j][0]:ranges[j][1]+timesteps,indexesToPlot[i]]*stdevs[i]+means[i], label="Class: "+str(classtype), color=colorline[classtype],linewidth=1)
  211. labelsplotted.append(classtype)
  212. else:
  213. classtype=labels.index(labels[j])
  214. if classtype in labelsplotted:
  215. axes[i].plot(range(init,end),datatoplot[ranges[j][0]:ranges[j][1],indexesToPlot[i]]*stdevs[i]+means[i], color=colorline[classtype],linewidth=1)
  216. else:
  217. axes[i].plot(range(init,end),datatoplot[ranges[j][0]:ranges[j][1],indexesToPlot[i]]*stdevs[i]+means[i], label="Class: "+str(classtype), color=colorline[classtype],linewidth=1)
  218. labelsplotted.append(classtype)
  219. init=end
  220. if j<(len(ranges)-1):
  221. end+=(ranges[j+1][1]-ranges[j+1][0])
  222. x=[]
  223. y=[]
  224. for j in range(len(ranges)):
  225. x.append([])
  226. y.append([])
  227. for j in range(len(ranges)):
  228. for k in range(ranges[j][0],ranges[j][1]):
  229. try: # Idont know why sometimes fails index !!!!
  230. x[labels.index(km.labels_[k])].append(k+timesteps)
  231. y[labels.index(km.labels_[k])].append(datatoplot[k+timesteps,indexesToPlot[i]]*stdevs[i]+means[i])
  232. except:
  233. x[0].append(k+timesteps)
  234. y[0].append(datatoplot[k+timesteps,indexesToPlot[i]]*stdevs[i]+means[i])
  235. labelsplotted=[]
  236. for j in range(len(ranges)):
  237. classtype=labels.index(labels[j])
  238. if classtype in labelsplotted:
  239. axes[i].plot(x[j],y[j] ,color=colordot[labels.index(labels[j])],marker='.',linewidth=0)
  240. else:
  241. axes[i].plot(x[j],y[j] ,color=colordot[labels.index(labels[j])],marker='.',linewidth=0,label="Class type "+str(j) )
  242. labelsplotted.append(classtype)
  243. if i==(NumFeatures-1):
  244. axes[i].legend(loc='right')
  245. s=''
  246. s+=featureNames[features[indexesToPlot[i]]]
  247. s+=' '+unitNames[features[indexesToPlot[i]]]
  248. axes[i].set_ylabel(s)
  249. axes[i].grid()
  250. axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  251. plt.show()
  252. '''
  253. Ranges=[]
  254. r=0
  255. for i in range(NumberOfFailures+1):
  256. Ranges.append([r,r+dataTrainNorm[i].shape[0]])
  257. r+=dataTrainNorm[i].shape[0]
  258. # Drop the last TIME_STEPS for plotting
  259. Ranges[NumberOfFailures][1]=Ranges[NumberOfFailures][1]-timesteps-1
  260. plot(xtrain,Ranges)
  261. '''
  262. # Try with test data
  263. X=dataTestNorm[0]
  264. Ranges=[[0,dataTestNorm[0].shape[0]]]
  265. r=dataTestNorm[0].shape[0]
  266. for i in range(1,NumberOfFailures+1):
  267. X=np.vstack((X,dataTestNorm[i]))
  268. Ranges.append([r,r+dataTestNorm[i].shape[0]])
  269. r+=dataTestNorm[i].shape[0]
  270. X=np.vstack((X,dataTestNorm[0])) # We add a last segment of no fail data
  271. Ranges.append([r,r+dataTestNorm[0].shape[0]])
  272. Ranges[len(Ranges)-1][1]=Ranges[len(Ranges)-1][1]-timesteps-1
  273. xtest=create_sequences(X,timesteps)
  274. def anomalyMetric(labels,ranges):
  275. # Takes ONLY the firsts segments of Ranges
  276. # FP, TP: false/true positive
  277. # TN, FN: true/false negative
  278. # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
  279. # Precision: Rate of positive results: TP/(TP+FP)
  280. # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
  281. lab=[] # Labels are assigned randomly by classifer
  282. TP=[]
  283. FN=[]
  284. TPFP=[]
  285. for i in range(NumberOfFailures+1):
  286. TP.append([])
  287. FN.append([])
  288. TPFP.append([])
  289. b=Counter(labels[ranges[i][0]:ranges[i][1]])
  290. lab.append(b.most_common(1)[0][0])
  291. for i in range(NumberOfFailures+1):
  292. counttp=0
  293. countfn=0
  294. for j in range(ranges[i][0],ranges[i][1]-timesteps):
  295. if lab.index(labels[j])==i:
  296. counttp+=1
  297. else:
  298. countfn+=1
  299. TP[i]=counttp
  300. FN[i]=countfn
  301. for i in range(NumberOfFailures+1):
  302. count=0
  303. for ii in range(NumberOfFailures+1):
  304. for j in range(ranges[ii][0],ranges[ii][1]-timesteps):
  305. if lab.index(labels[j])==i:
  306. count+=1
  307. TPFP[i]=count
  308. segmentLength=[]
  309. for i in range(NumberOfFailures+1):
  310. segmentLength.append(ranges[i][1]-timesteps-ranges[i][0])
  311. totalSegmentLength=0
  312. for i in range(NumberOfFailures+1):
  313. totalSegmentLength+=segmentLength[i]
  314. Sensitivity=0
  315. Precision=0
  316. for i in range(NumberOfFailures+1):
  317. Sensitivity+=TP[i]/(TP[i]+FN[i])*segmentLength[i]/totalSegmentLength
  318. Precision+=TP[i]/(TPFP[i])*segmentLength[i]/totalSegmentLength
  319. print(lab)
  320. print("TP: ",TP)
  321. print("FN: ",FN)
  322. print("TPFP: ",TPFP)
  323. print("Sensitivity: ",Sensitivity)
  324. print("Precision: ",Precision)
  325. print("F1-Score: ",2*Precision*Sensitivity/(Sensitivity+Precision))
  326. km.fit_predict(xtest)
  327. #anomalyMetric(km.labels_,Ranges)
  328. plot(xtest,Ranges)

Powered by TurnKey Linux.