설명 없음

v2_unsupervised.py 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. # Csar Fdez, UdL, 2025
  2. # Unsupervised classification. Uses tslearn
  3. # https://tslearn.readthedocs.io/en/stable/index.html
  4. # Be careful with v0_unsupervised and all versions for supervised.
  5. # because dataTrain is not stacke before create_sequences, so,
  6. # the sets are not aligned in time
  7. # Same as v1 but changing data
  8. import pandas as pd
  9. import matplotlib.pyplot as plt
  10. import datetime
  11. import numpy as np
  12. import os.path
  13. from optparse import OptionParser
  14. import copy
  15. import pickle
  16. from tslearn.clustering import TimeSeriesKMeans
  17. from tslearn.neighbors import KNeighborsTimeSeries
  18. from collections import Counter
  19. parser = OptionParser()
  20. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  21. parser.add_option("-p", "--plot", dest="plot", help="Plot Data (false)", default=False, action="store_true")
  22. parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS (default 12)", default=12)
  23. parser.add_option("-f", "--facilitysetpoint", dest="fsp", help="Facility and Set Point (default 5-26)", default="5-26")
  24. (options, args) = parser.parse_args()
  25. # data files arrays. Index:
  26. # 0. No failure
  27. # 1. Blocked evaporator
  28. # 2. Full Blocked condenser
  29. # 3 Fan condenser not working
  30. # 4. Open door
  31. FailureName=['No Fail','Block Evap', 'Block Cond', 'Not Fan Cond', 'Open Door']
  32. NumberOfFailures=4
  33. datafiles={}
  34. listofFacilitySetpoint=['5-26','5-18','5-22','30','32','34']
  35. for j in listofFacilitySetpoint:
  36. datafiles[j]=[]
  37. for i in range(NumberOfFailures+1):
  38. datafiles[j].append([])
  39. # Freezer, SP=-26
  40. datafiles['5-26'][0]=['2025-01-25_5_','2025-01-26_5_','2025-01-29_5_','2025-01-30_5_','2025-01-31_5_','2025-02-01_5_','2025-02-02_5_','2025-02-03_5_','2025-02-04_5_']
  41. datafiles['5-26'][1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  42. datafiles['5-26'][2]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_']
  43. datafiles['5-26'][3]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  44. datafiles['5-26'][4]=['2025-02-13_5_','2025-02-14_5_','2025-02-15_5_','2025-02-16_5_','2025-02-17_5_','2025-02-18_5_','2025-02-19_5_']
  45. # Freezer, SP=-18
  46. datafiles['5-18'][0]=['2025-01-21_5_','2025-01-22_5_','2025-01-23_5_'] #
  47. datafiles['5-18'][1]=['2025-02-17_5_','2025-02-18_5_','2025-02-19_5_'] #
  48. datafiles['5-18'][2]=['2025-03-10_5_','2025-03-11_5_','2025-03-12_5_'] #
  49. datafiles['5-18'][3]=['2025-01-04_5_','2025-01-05_5_','2025-01-06_5_','2025-01-07_5_'] #
  50. datafiles['5-18'][4]=['2025-04-30_5_','2025-05-01_5_','2025-05-02_5_','2025-05-03_5_','2025-05-04_5_','2025-05-05_5_'] #
  51. # Freezer, SP=-22
  52. datafiles['5-22'][0]=['2025-03-13_5_','2025-03-14_5_','2025-03-15_5_','2025-03-16_5_']
  53. datafiles['5-22'][1]=['2025-03-21_5_','2025-03-22_5_','2025-03-23_5_','2025-03-24_5_','2025-03-25_5_'] #
  54. datafiles['5-22'][2]=['2025-03-26_5_','2025-03-27_5_','2025-03-28_5_']
  55. datafiles['5-22'][3]=['2025-03-31_5_','2025-04-01_5_','2025-04-02_5_','2025-04-03_5_']
  56. datafiles['5-22'][3]=['2025-03-31_5_','2025-04-01_5_','2025-04-02_5_'] # day 2025-04-03 seems transition
  57. #datafiles['5-22'][4]=['2025-03-17_5_']
  58. datafiles['5-22'][4]=['2025-05-29_5_','2025-05-30_5_','2025-05-31_5_', '2025-06-01_5_', '2025-06-02_5_'] # new data
  59. # Refrigerator 0
  60. datafiles['30'][0]=['2025-01-21_3_','2025-01-22_3_','2025-01-23_3_','2025-01-24_3_','2025-01-25_3_','2025-01-26_3_']
  61. datafiles['30'][1]=['2024-12-11_3_','2024-12-12_3_','2024-12-13_3_','2024-12-14_3_','2024-12-15_3_']
  62. datafiles['30'][2]=['2024-12-18_3_','2024-12-19_3_','2024-12-20_3_']
  63. datafiles['30'][3]=['2024-12-28_3_','2024-12-29_3_','2024-12-30_3_','2024-12-31_3_','2025-01-01_3_']
  64. #datafiles['30'][4]=['2025-02-12_3_','2025-02-13_3_','2025-02-14_3_','2025-02-15_3_','2025-02-16_3_','2025-02-17_3_','2025-02-18_3_','2025-02-19_3_'] # es solapa amb ventilador no funcionant. i els dies 20 i 21 no hi son
  65. datafiles['30'][4]=['2025-05-08_3_','2025-05-09_3_','2025-05-10_3_','2025-05-11_3_','2025-05-12_3_']
  66. # Refrigerator 2
  67. datafiles['32'][0]=['2025-03-13_3_','2025-03-14_3_','2025-03-15_3_','2025-03-16_3_']
  68. datafiles['32'][1]=['2025-03-10_3_']
  69. #datafiles['32'][1]=['2025-05-28_3_','2025-05-29_3_','2025-05-30_3_','2025-05-31_3_','2025-06-01_3_'] # new data . read the label. it is like open door
  70. datafiles['32'][2]=['2025-03-17_3_']
  71. datafiles['32'][3]=['2025-03-24_3_','2025-03-25_3_','2025-03-26_3_']
  72. datafiles['32'][4]=['2025-03-27_3_','2025-03-28_3_']
  73. # Refrigerator 4
  74. datafiles['34'][0]=['2025-03-31_3_','2025-04-01_3_','2025-04-02_3_','2025-04-03_3_']
  75. # diuen: que amb aquest set no obtenen gel i aquest experiment es pot descartar ????
  76. datafiles['34'][2]=['2025-04-11_3_','2025-04-12_3_','2025-04-13_3_','2025-04-14_3_']
  77. datafiles['34'][3]=['2025-04-30_3_','2025-05-01_3_','2025-05-02_3_','2025-05-03_3_','2025-05-04_3_','2025-05-05_3_']
  78. datafiles['34'][4]=['2025-04-23_3_','2025-04-24_3_','2025-04-25_3_']
  79. # data files arrays. Index:
  80. # 0. No failure
  81. # 1. Blocked evaporator
  82. # 2. Full Blocked condenser
  83. # 3 Fan condenser not working
  84. # 4. Open door
  85. facilitySetpoint=options.fsp
  86. # Features suggested by Xavier
  87. # Care with 'tc s3' because on datafiles[0] is always nulll
  88. # Seems to be incoropored in new tests
  89. #r1s5 supply air flow temperature
  90. #r1s1 inlet evaporator temperature
  91. #r1s4 condenser outlet
  92. # VAriables r1s4 and pa1 apiii may not exists in cloud controlers
  93. #features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  94. features=['r1 s1','r1 s4','r1 s5']
  95. featureNames={}
  96. featureNames['r1 s1']='$T_{evap}$'
  97. featureNames['r1 s4']='$T_{cond}$'
  98. featureNames['r1 s5']='$T_{air}$'
  99. featureNames['pa1 apiii']='$P_{elec}$'
  100. unitNames={}
  101. unitNames['r1 s1']='$(^{o}C)$'
  102. unitNames['r1 s4']='$(^{o}C)$'
  103. unitNames['r1 s5']='$(^{o}C)$'
  104. unitNames['pa1 apiii']='$(W)$'
  105. #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
  106. #features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']
  107. NumFeatures=len(features)
  108. df_list=[]
  109. for i in range(NumberOfFailures+1):
  110. df_list.append([])
  111. for i in range(NumberOfFailures+1):
  112. dftemp=[]
  113. for f in datafiles[facilitySetpoint][i]:
  114. print(" ", f)
  115. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  116. df1 = pd.read_csv('./data/'+f+'.csv')
  117. dftemp.append(df1)
  118. df_list[i]=pd.concat(dftemp)
  119. # subsampled to 5' = 30 * 10"
  120. # We consider smaples every 5' because in production, we will only have data at this frequency
  121. subsamplingrate=30
  122. dataframe=[]
  123. for i in range(NumberOfFailures+1):
  124. dataframe.append([])
  125. for i in range(NumberOfFailures+1):
  126. datalength=df_list[i].shape[0]
  127. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  128. dataframe[i].reset_index(inplace=True,drop=True)
  129. dataframe[i].dropna(inplace=True)
  130. # Train data is first 2/3 of data
  131. # Test data is: last 1/3 of data
  132. dataTrain=[]
  133. dataTest=[]
  134. NumberOfSamplesForTest=0
  135. for i in range(NumberOfFailures+1):
  136. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  137. if NumberOfSamplesForTest==0: # Take all
  138. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  139. else:
  140. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):int(dataframe[i].shape[0]*2/3)+NumberOfSamplesForTest,:])
  141. # Calculate means and stdev
  142. a=dataTrain[0]
  143. for i in range(1,NumberOfFailures+1):
  144. a=np.vstack((a,dataTrain[i]))
  145. means=a.mean(axis=0)
  146. stdevs=a.std(axis=0)
  147. def normalize2(train,test):
  148. return( (train-means)/stdevs, (test-means)/stdevs )
  149. dataTrainNorm=[]
  150. dataTestNorm=[]
  151. for i in range(NumberOfFailures+1):
  152. dataTrainNorm.append([])
  153. dataTestNorm.append([])
  154. for i in range(NumberOfFailures+1):
  155. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  156. def plotData():
  157. fig, axes = plt.subplots(
  158. nrows=NumberOfFailures+1, ncols=NumFeatures, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  159. )
  160. for i in range(NumberOfFailures+1):
  161. for j in range(NumFeatures):
  162. #axes[i][j].plot(np.concatenate((dataTrainNorm[i][:,j],dataTestNorm[i][:,j])),label="Fail "+str(i)+", feature 0")
  163. axes[i][j].plot(np.concatenate((dataTrain[i][:,j],dataTest[i][:,j])),label="Fail "+str(i)+", feature 0")
  164. if i==(NumberOfFailures):
  165. axes[i][j].set_xlabel(features[j])
  166. if j==0:
  167. axes[i][j].set_ylabel(FailureName[i])
  168. #axes[1].legend()
  169. #axes[0].set_ylabel(features[0])
  170. #axes[1].set_ylabel(features[1])
  171. plt.show()
  172. if options.plot:
  173. plotData()
  174. exit()
  175. def create_sequences(values, time_steps):
  176. output = []
  177. for i in range(len(values) - time_steps ):
  178. output.append(values[i : (i + time_steps)])
  179. return np.stack(output)
  180. def listToString(l):
  181. r=''
  182. for i in l:
  183. r+=str(i)
  184. return(r.replace(' ',''))
  185. timesteps=int(options.timesteps)
  186. X=dataTrainNorm[0]
  187. for i in range(1,NumberOfFailures+1):
  188. X=np.vstack((X,dataTrainNorm[i]))
  189. xtrain=create_sequences(X,timesteps)
  190. km = TimeSeriesKMeans(n_clusters=NumberOfFailures+1, metric="dtw", random_state=0)
  191. modelpath="model_v2_unsupervised_"+facilitySetpoint+"_"+str(timesteps)+listToString(features)+".pk"
  192. if options.train:
  193. km.fit(xtrain)
  194. km.to_pickle(modelpath)
  195. else:
  196. km.from_pickle(modelpath)
  197. colorline=['violet','lightcoral','cyan','lime','grey']
  198. colordot=['darkviolet','red','blue','green','black']
  199. featuresToPlot=features
  200. indexesToPlot=[]
  201. for i in featuresToPlot:
  202. indexesToPlot.append(features.index(i))
  203. def plot(data,ranges):
  204. km.fit_predict(data)
  205. # Expand data to plot with the timesteps samples of last sample
  206. datatoplot=data[:,0,:]
  207. datatoplot=np.vstack((datatoplot,data[ranges[len(ranges)-1][1],:,:]))
  208. labels=[] # Labels are assigned randomly by classifer
  209. for i in range(len(ranges)):
  210. b=Counter(km.labels_[ranges[i][0]:ranges[i][1]])
  211. labels.append(b.most_common(1)[0][0])
  212. print("\n\n\n LABELS: ",labels,"\n\n")
  213. NumFeaturesToPlot=len(indexesToPlot)
  214. plt.rcParams.update({'font.size': 16})
  215. fig, axes = plt.subplots(
  216. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  217. )
  218. for i in range(NumFeaturesToPlot):
  219. init=0
  220. end=ranges[0][1]
  221. labelsplotted=[]
  222. for j in range(len(ranges)):
  223. if j==(len(ranges)-1): # Plot the last timesteps
  224. classtype=labels.index(labels[j])
  225. if classtype in labelsplotted:
  226. axes[i].plot(range(init,end+timesteps),datatoplot[ranges[j][0]:ranges[j][1]+timesteps,indexesToPlot[i]]*stdevs[i]+means[i], color=colorline[classtype],linewidth=1)
  227. else:
  228. axes[i].plot(range(init,end+timesteps),datatoplot[ranges[j][0]:ranges[j][1]+timesteps,indexesToPlot[i]]*stdevs[i]+means[i], label="Class: "+str(classtype), color=colorline[classtype],linewidth=1)
  229. labelsplotted.append(classtype)
  230. else:
  231. classtype=labels.index(labels[j])
  232. if classtype in labelsplotted:
  233. axes[i].plot(range(init,end),datatoplot[ranges[j][0]:ranges[j][1],indexesToPlot[i]]*stdevs[i]+means[i], color=colorline[classtype],linewidth=1)
  234. else:
  235. axes[i].plot(range(init,end),datatoplot[ranges[j][0]:ranges[j][1],indexesToPlot[i]]*stdevs[i]+means[i], label="Class: "+str(classtype), color=colorline[classtype],linewidth=1)
  236. labelsplotted.append(classtype)
  237. init=end
  238. if j<(len(ranges)-1):
  239. end+=(ranges[j+1][1]-ranges[j+1][0])
  240. x=[]
  241. y=[]
  242. for j in range(len(ranges)):
  243. x.append([])
  244. y.append([])
  245. for j in range(len(ranges)):
  246. for k in range(ranges[j][0],ranges[j][1]):
  247. try: # Idont know why sometimes fails index !!!!
  248. x[labels.index(km.labels_[k])].append(k+timesteps)
  249. y[labels.index(km.labels_[k])].append(datatoplot[k+timesteps,indexesToPlot[i]]*stdevs[i]+means[i])
  250. except:
  251. x[0].append(k+timesteps)
  252. y[0].append(datatoplot[k+timesteps,indexesToPlot[i]]*stdevs[i]+means[i])
  253. labelsplotted=[]
  254. for j in range(len(ranges)):
  255. classtype=labels.index(labels[j])
  256. if classtype in labelsplotted:
  257. axes[i].plot(x[j],y[j] ,color=colordot[labels.index(labels[j])],marker='.',linewidth=0)
  258. else:
  259. axes[i].plot(x[j],y[j] ,color=colordot[labels.index(labels[j])],marker='.',linewidth=0,label="Class type "+str(j) )
  260. labelsplotted.append(classtype)
  261. if i==(NumFeatures-1):
  262. axes[i].legend(loc='right')
  263. s=''
  264. s+=featureNames[features[indexesToPlot[i]]]
  265. s+=' '+unitNames[features[indexesToPlot[i]]]
  266. axes[i].set_ylabel(s)
  267. axes[i].grid()
  268. axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  269. plt.show()
  270. '''
  271. Ranges=[]
  272. r=0
  273. for i in range(NumberOfFailures+1):
  274. Ranges.append([r,r+dataTrainNorm[i].shape[0]])
  275. r+=dataTrainNorm[i].shape[0]
  276. # Drop the last TIME_STEPS for plotting
  277. Ranges[NumberOfFailures][1]=Ranges[NumberOfFailures][1]-timesteps-1
  278. plot(xtrain,Ranges)
  279. '''
  280. # Try with test data
  281. X=dataTestNorm[0]
  282. Ranges=[[0,dataTestNorm[0].shape[0]]]
  283. r=dataTestNorm[0].shape[0]
  284. for i in range(1,NumberOfFailures+1):
  285. X=np.vstack((X,dataTestNorm[i]))
  286. Ranges.append([r,r+dataTestNorm[i].shape[0]])
  287. r+=dataTestNorm[i].shape[0]
  288. #X=np.vstack((X,dataTestNorm[0])) # We add a last segment of no fail data
  289. #Ranges.append([r,r+dataTestNorm[0].shape[0]])
  290. Ranges[len(Ranges)-1][1]=Ranges[len(Ranges)-1][1]-timesteps-1
  291. xtest=create_sequences(X,timesteps)
  292. def anomalyMetric(labels,ranges):
  293. # Takes ONLY the firsts segments of Ranges
  294. # FP, TP: false/true positive
  295. # TN, FN: true/false negative
  296. # Sensitivity (recall): probab failure detection if data is fail: TP/(TP+FN)
  297. # Precision: Rate of positive results: TP/(TP+FP)
  298. # F1-score: predictive performance measure: 2*Precision*Sensitity/(Precision+Sensitity)
  299. print("Labels on test: ",set(labels))
  300. lab=[] # Labels are assigned randomly by classifer
  301. TP=[]
  302. FN=[]
  303. TPFP=[]
  304. ASSIGNED_CLASS=[]
  305. for i in range(NumberOfFailures+1):
  306. ASSIGNED_CLASS.append(-1)
  307. UNASSIGNED_CLASS=[]
  308. for i in range(NumberOfFailures+1):
  309. TP.append([])
  310. FN.append([])
  311. TPFP.append([])
  312. Class=Counter(labels[ranges[i][0]:ranges[i][1]]).most_common(1)[0][0]
  313. if not Class in ASSIGNED_CLASS:
  314. ASSIGNED_CLASS[i]=Class
  315. for i in range(NumberOfFailures+1):
  316. if not i in ASSIGNED_CLASS:
  317. UNASSIGNED_CLASS.append(i)
  318. for i in range(NumberOfFailures+1):
  319. if ASSIGNED_CLASS[i]==-1:
  320. ASSIGNED_CLASS[i]=UNASSIGNED_CLASS.pop(0)
  321. for i in range(NumberOfFailures+1):
  322. lab.append(ASSIGNED_CLASS[i])
  323. for i in range(NumberOfFailures+1):
  324. counttp=0
  325. countfn=0
  326. for j in range(ranges[i][0],ranges[i][1]-timesteps):
  327. if lab.index(labels[j])==i:
  328. counttp+=1
  329. else:
  330. countfn+=1
  331. TP[i]=counttp
  332. FN[i]=countfn
  333. for i in range(NumberOfFailures+1):
  334. count=0
  335. for ii in range(NumberOfFailures+1):
  336. for j in range(ranges[ii][0],ranges[ii][1]-timesteps):
  337. if lab.index(labels[j])==i:
  338. count+=1
  339. TPFP[i]=count
  340. segmentLength=[]
  341. for i in range(NumberOfFailures+1):
  342. segmentLength.append(ranges[i][1]-timesteps-ranges[i][0])
  343. totalSegmentLength=0
  344. for i in range(NumberOfFailures+1):
  345. totalSegmentLength+=segmentLength[i]
  346. Sensitivity=0
  347. Precision=0
  348. for i in range(NumberOfFailures+1):
  349. Sensitivity+=TP[i]/(TP[i]+FN[i])*segmentLength[i]/totalSegmentLength
  350. Precision+=TP[i]/(TPFP[i])*segmentLength[i]/totalSegmentLength
  351. print(lab)
  352. print("TP: ",TP)
  353. print("FN: ",FN)
  354. print("TPFP: ",TPFP)
  355. print("Sensitivity: ",Sensitivity)
  356. print("Precision: ",Precision)
  357. print("F1-Score: ",2*Precision*Sensitivity/(Sensitivity+Precision))
  358. km.fit_predict(xtest)
  359. anomalyMetric(km.labels_,Ranges)
  360. plot(xtest,Ranges)

Powered by TurnKey Linux.