No Description

v0_unsupervised.py 8.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. # Csar Fdez, UdL, 2025
  2. # Unsupervised classification. Uses tslearn
  3. # https://tslearn.readthedocs.io/en/stable/index.html
  4. import pandas as pd
  5. import matplotlib.pyplot as plt
  6. import datetime
  7. import numpy as np
  8. import keras
  9. import os.path
  10. from optparse import OptionParser
  11. import copy
  12. import pickle
  13. from tslearn.clustering import TimeSeriesKMeans
  14. from collections import Counter
  15. parser = OptionParser()
  16. parser.add_option("-t", "--train", dest="train", help="Trains the models (false)", default=False, action="store_true")
  17. parser.add_option("-n", "--timesteps", dest="timesteps", help="TIME STEPS ", default=12)
  18. (options, args) = parser.parse_args()
  19. # data files arrays. Index:
  20. # 0. No failure
  21. # 1. Blocked evaporator
  22. # 2. Full Blocked condenser
  23. # 3. Partial Blocked condenser
  24. # 4 Fan condenser not working
  25. # 5. Open door
  26. NumberOfFailures=4 # So far, we have only data for the first 4 types of failures
  27. datafiles=[]
  28. for i in range(NumberOfFailures+1):
  29. datafiles.append([])
  30. # Next set of ddata corresponds to Freezer, SP=-26
  31. datafiles[0]=['2024-08-07_5_','2024-08-08_5_','2025-01-25_5_','2025-01-26_5_','2025-01-27_5_','2025-01-28_5_']
  32. datafiles[1]=['2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_']
  33. #datafiles[1]=['2024-12-17_5_','2024-12-16_5_','2024-12-11_5_', '2024-12-12_5_','2024-12-13_5_','2024-12-14_5_','2024-12-15_5_'] # This have transitions
  34. datafiles[2]=['2024-12-18_5_','2024-12-19_5_']
  35. datafiles[3]=['2024-12-21_5_','2024-12-22_5_','2024-12-23_5_','2024-12-24_5_','2024-12-25_5_','2024-12-26_5_']
  36. datafiles[4]=['2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_']
  37. #datafiles[4]=['2024-12-27_5_','2024-12-28_5_','2024-12-29_5_','2024-12-30_5_','2024-12-31_5_','2025-01-01_5_'] # This have transitions
  38. #datafiles[4]=[]
  39. # Features suggested by Xavier
  40. # Care with 'tc s3' because on datafiles[0] is always nulll
  41. # Seems to be incoropored in new tests
  42. #r1s5 supply air flow temperature
  43. #r1s1 inlet evaporator temperature
  44. #r1s4 condenser outlet
  45. # VAriables r1s4 and pa1 apiii may not exists in cloud controlers
  46. features=['r1 s1','r1 s4','r1 s5','pa1 apiii']
  47. features=['r1 s1','r1 s4','r1 s5']
  48. featureNames={}
  49. featureNames['r1 s1']='$T_{evap}$'
  50. featureNames['r1 s4']='$T_{cond}$'
  51. featureNames['r1 s5']='$T_{air}$'
  52. featureNames['pa1 apiii']='$P_{elec}$'
  53. unitNames={}
  54. unitNames['r1 s1']='$(^{o}C)$'
  55. unitNames['r1 s4']='$(^{o}C)$'
  56. unitNames['r1 s5']='$(^{o}C)$'
  57. unitNames['pa1 apiii']='$(W)$'
  58. #features=['r1 s1','r1 s2','r1 s3','r1 s4','r1 s5','r1 s6','r1 s7','r1 s8','r1 s9','r1 s10','r2 s1','r2 s2','r2 s3','r2 s4','r2 s5','r2 s6','r2 s7','r2 s8','r2 s9','pa1 apiii','tc s1','tc s2']
  59. #features=['r2 s2', 'tc s1','r1 s10','r1 s6','r2 s8']
  60. NumFeatures=len(features)
  61. df_list=[]
  62. for i in range(NumberOfFailures+1):
  63. df_list.append([])
  64. for i in range(NumberOfFailures+1):
  65. dftemp=[]
  66. for f in datafiles[i]:
  67. print(" ", f)
  68. #df1 = pd.read_csv('./data/'+f+'.csv', parse_dates=['datetime'], dayfirst=True, index_col='datetime')
  69. df1 = pd.read_csv('./data/'+f+'.csv')
  70. dftemp.append(df1)
  71. df_list[i]=pd.concat(dftemp)
  72. # subsampled to 5' = 30 * 10"
  73. # We consider smaples every 5' because in production, we will only have data at this frequency
  74. subsamplingrate=30
  75. dataframe=[]
  76. for i in range(NumberOfFailures+1):
  77. dataframe.append([])
  78. for i in range(NumberOfFailures+1):
  79. datalength=df_list[i].shape[0]
  80. dataframe[i]=df_list[i].iloc[range(0,datalength,subsamplingrate)][features]
  81. dataframe[i].reset_index(inplace=True,drop=True)
  82. dataframe[i].dropna(inplace=True)
  83. # Train data is first 2/3 of data
  84. # Test data is: last 1/3 of data
  85. dataTrain=[]
  86. dataTest=[]
  87. for i in range(NumberOfFailures+1):
  88. dataTrain.append(dataframe[i].values[0:int(dataframe[i].shape[0]*2/3),:])
  89. dataTest.append(dataframe[i].values[int(dataframe[i].shape[0]*2/3):,:])
  90. # Calculate means and stdev
  91. a=dataTrain[0]
  92. for i in range(1,NumberOfFailures+1):
  93. a=np.vstack((a,dataTrain[i]))
  94. means=a.mean(axis=0)
  95. stdevs=a.std(axis=0)
  96. def normalize2(train,test):
  97. return( (train-means)/stdevs, (test-means)/stdevs )
  98. dataTrainNorm=[]
  99. dataTestNorm=[]
  100. for i in range(NumberOfFailures+1):
  101. dataTrainNorm.append([])
  102. dataTestNorm.append([])
  103. for i in range(NumberOfFailures+1):
  104. (dataTrainNorm[i],dataTestNorm[i])=normalize2(dataTrain[i],dataTest[i])
  105. def plotData():
  106. fig, axes = plt.subplots(
  107. nrows=NumberOfFailures+1, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k",sharex=True
  108. )
  109. for i in range(NumberOfFailures+1):
  110. axes[i][0].plot(np.concatenate((dataTrainNorm[i][:,0],dataTestNorm[i][:,0])),label="Fail "+str(i)+", feature 0")
  111. axes[i][1].plot(np.concatenate((dataTrainNorm[i][:,1],dataTestNorm[i][:,1])),label="Fail "+str(i)+", feature 1")
  112. #axes[1].legend()
  113. #axes[0].set_ylabel(features[0])
  114. #axes[1].set_ylabel(features[1])
  115. plt.show()
  116. #plotData()
  117. def create_sequences(values, time_steps):
  118. output = []
  119. for i in range(len(values) - time_steps + 1):
  120. output.append(values[i : (i + time_steps)])
  121. return np.stack(output)
  122. def listToString(l):
  123. r=''
  124. for i in l:
  125. r+=str(i)
  126. return(r.replace(' ',''))
  127. timesteps=int(options.timesteps)
  128. x_train=[]
  129. for i in range(NumberOfFailures+1):
  130. x_train.append(create_sequences(dataTrainNorm[i],timesteps))
  131. xtrain=x_train[0]
  132. for i in range(1,NumberOfFailures+1):
  133. xtrain=np.vstack((xtrain,x_train[i]))
  134. km = TimeSeriesKMeans(n_clusters=NumberOfFailures+1, metric="dtw")
  135. modelpath="model_co_unsupervised_"+str(timesteps)+listToString(features)+".pk"
  136. if options.train:
  137. km.fit(xtrain)
  138. km.to_pickle(modelpath)
  139. else:
  140. km.from_pickle(modelpath)
  141. km.fit_predict(xtrain)
  142. colorline=['violet','lightcoral','cyan','lime','grey']
  143. colordot=['darkviolet','red','blue','green','black']
  144. Ranges=[]
  145. r=0
  146. for i in range(NumberOfFailures+1):
  147. Ranges.append([r,r+x_train[i].shape[0]])
  148. r+=x_train[i].shape[0]
  149. # Drop the last TIME_STEPS for plotting
  150. Ranges[NumberOfFailures][1]=Ranges[NumberOfFailures][1]
  151. featuresToPlot=features
  152. indexesToPlot=[]
  153. for i in featuresToPlot:
  154. indexesToPlot.append(features.index(i))
  155. labels=[] # Labels are assigned randomly by classifer
  156. for i in range(NumberOfFailures+1):
  157. b=Counter(km.labels_[Ranges[i][0]:Ranges[i][1]])
  158. labels.append(b.most_common(1)[0][0])
  159. def plot(data,ranges,labels):
  160. NumFeaturesToPlot=len(indexesToPlot)
  161. plt.rcParams.update({'font.size': 16})
  162. fig, axes = plt.subplots(
  163. nrows=NumFeaturesToPlot, ncols=1, figsize=(15, 10), dpi=80, facecolor="w", edgecolor="k",sharex=True
  164. )
  165. for i in range(NumFeaturesToPlot):
  166. init=0
  167. end=ranges[0][1]
  168. for j in range(NumberOfFailures+1):
  169. axes[i].plot(range(init,end),data[ranges[j][0]:ranges[j][1],0,indexesToPlot[i]]*stdevs[i]+means[i],label="Class "+str(j), color=colorline[j],linewidth=1)
  170. if j<NumberOfFailures:
  171. init=end
  172. end+=(ranges[j+1][1]-ranges[j+1][0])
  173. x=[]
  174. y=[]
  175. for j in range(NumberOfFailures+1):
  176. x.append([])
  177. y.append([])
  178. for j in range(NumberOfFailures+1):
  179. for k in range(ranges[j][0],ranges[j][1]):
  180. print(j,k)
  181. x[labels.index(km.labels_[k])].append(k)
  182. y[labels.index(km.labels_[k])].append(data[k,0,indexesToPlot[i]]*stdevs[i]+means[i])
  183. for j in range(NumberOfFailures+1):
  184. axes[i].plot(x[j],y[j] ,color=colordot[j],marker='.',linewidth=0,label="Class type "+str(j) )
  185. if i==(NumFeatures-1):
  186. axes[i].legend(loc='right')
  187. s=''
  188. s+=featureNames[features[indexesToPlot[i]]]
  189. s+=' '+unitNames[features[indexesToPlot[i]]]
  190. axes[i].set_ylabel(s)
  191. axes[i].grid()
  192. axes[NumFeaturesToPlot-1].set_xlabel("Sample number")
  193. plt.show()
  194. #plot(xtrain,Ranges,labels)
  195. # Try with test data
  196. x_test=[]
  197. for i in range(NumberOfFailures+1):
  198. x_test.append(create_sequences(dataTestNorm[i],timesteps))
  199. xtest=x_test[0]
  200. for i in range(1,NumberOfFailures+1):
  201. xtest=np.vstack((xtest,x_test[i]))
  202. Ranges=[]
  203. r=0
  204. for i in range(NumberOfFailures+1):
  205. Ranges.append([r,r+x_test[i].shape[0]])
  206. r+=x_test[i].shape[0]
  207. # Drop the last TIME_STEPS for plotting
  208. Ranges[NumberOfFailures][1]=Ranges[NumberOfFailures][1]
  209. km.fit_predict(xtest)
  210. labels=[] # Labels are assigned randomly by classifer
  211. for i in range(NumberOfFailures+1):
  212. b=Counter(km.labels_[Ranges[i][0]:Ranges[i][1]])
  213. labels.append(b.most_common(1)[0][0])
  214. plot(xtest,Ranges,labels)

Powered by TurnKey Linux.