WARD_script

09ec93b4 · Yoshiaki Sakagami · 67211178 · 09ec93b4
Commit 09ec93b4 authored 3 years ago by Yoshiaki Sakagami
--- a/Fail_ward_T07_hyd.py
+++ b/Fail_ward_T07_hyd.py
+# #Code : Failed Detecting Wind Turbine
+
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.cluster import AgglomerativeClustering
+from sklearn.neighbors import kneighbors_graph
+
+#import susi
+
+# Load training data
+df_sig = pd.read_csv('../data/wind-farm-1-signals-training.csv', sep=';') # signals
+df_met = pd.read_csv('../data/wind-farm-1-metmast-training.csv', sep=';') # metmast
+df_log = pd.read_csv('../data/wind-farm-1-logs-training.csv', sep=';') # logs
+df_fail = pd.read_csv('../data/wind-farm-1-failures-training.csv',sep=';') # failures
+
+#%% index time - datetime and reindex full time series , remove utc format
+
+dt=pd.date_range(start='2016-01-01 00:00:00',end='2017-09-01 00:10:00',freq='10min'); #complete time series
+idx=pd.DatetimeIndex(dt); 
+
+df_met.index=pd.to_datetime(df_met['Timestamp']).dt.tz_localize(None)
+df_fail.index=pd.to_datetime(df_fail['Timestamp']).round('10min').dt.tz_localize(None) # round 10 min (caution)
+
+df_met=df_met.reindex(idx)
+df_fail=df_fail.reindex(idx)
+
+
+#%% separate time series of each wind turbine , index time and remove duplicate
+turbines=['T01','T06','T07','T09','T11']
+dfs=[df_sig[df_sig['Turbine_ID']==wt] for wt in turbines ]
+
+for i in range(0,5):
+    dfs[i].index=pd.to_datetime(dfs[i]['Timestamp']).dt.tz_localize(None) # index datetime
+    dfs[i]=dfs[i].loc[~dfs[i].index.duplicated(keep='last')] # Remove duplicate data
+    dfs[i]=dfs[i].reindex(idx) #reindex date 
+
+df=pd.concat(dfs,keys=turbines,names=['Turbine'],axis=1) # Concatenate 5 wind turbines (signals) - Mult-Index
+
+
+#%% Plot variables of wind turbines
+#df['T01'].columns # view the columns you wnat to plot
+#'Nac_Temp_Avg' , 'Gen_RPM_Avg','Grd_Prod_Pwr_Avg', 'Gen_Bear_Temp_Avg'
+#Hyd_Oil_Temp_Avg
+
+dfx=df.xs('Hyd_Oil_Temp_Avg',axis=1,level=1) # cross section variable of each turbine
+
+#dfr1=df.xs('Gen_RPM_Avg',axis=1,level=1) # cross section variable of each turbine
+#dfr2=df.xs('Grd_Prod_Pwr_Avg',axis=1,level=1) # cross section variable of each turbine
+
+#dfv=df.xs('Amb_WindSpeed_Avg',axis=1,level=1) # cross section variable of each turbine
+#dfg=df.xs('Grd_Prod_Pwr_Avg',axis=1,level=1) # cross section variable of each turbine
+
+#dfx=dfr2/dfr1
+
+#dfx=dfx.resample('H').mean()
+
+dfx['average']=dfx.mean(axis=1) # average all variable
+
+dfx[dfx==np.inf]=np.nan
+dfx[dfx==-np.inf]=np.nan
+
+dfwa=dfx.iloc[:,0:5].dropna() # remove nan rows
+
+
+
+#%% self organizing map
+'''
+X=np.array(dfwa) # remove nan rows
+som = susi.SOMClustering(n_rows=3, n_columns=4,learning_rate_start=1,random_state=50,n_iter_unsupervised=1000,verbose=1)
+som.fit(X)
+clusters = np.array(som.get_clusters(X))
+
+ncol=3
+df1 = pd.DataFrame(clusters)
+df1.columns=['linha','coluna']
+df1['cluster']=df1['linha']+df1['coluna']*ncol+1
+df1.index=dfwa.index
+
+dfa=pd.concat([dfa,df1['cluster']],axis=1)
+
+'''
+#%%
+
+#from sklearn.preprocessing import MinMaxScaler
+#scaler = MinMaxScaler()
+#scaler.fit(dfwa)
+#dfwaN=scaler.transform(dfwa)
+
+# Clustering Ward method (20 clusters)
+#connectivity = kneighbors_graph(dfwa, n_neighbors=20, include_self=False) default
+
+connectivity = kneighbors_graph(dfwa, n_neighbors=20, include_self=False) 
+km= AgglomerativeClustering(n_clusters=12, linkage='ward',connectivity=connectivity)
+
+dfc = pd.DataFrame(km.fit_predict(dfwa)+1)
+dfc.index=dfwa.index
+dfc.columns=['cluster']
+
+dfa=pd.concat([dfx,dfc['cluster']],axis=1)
+
+dfa.to_csv('input_hyd.csv')
+
+#%%
+import matplotlib.cm as cm
+cmap = cm.jet
+n_clusters=13
+evenly_spaced_interval = np.linspace(0, 1, n_clusters)
+colors = [cmap(x) for x in evenly_spaced_interval]
+
+colors[8]=(0.0,0.0,0.0,1.0)
+
+#colors[11]=(1.0,0.0,0.0,1.0)
+
+fig, ax=plt.subplots(1,1,figsize=(8,6));
+
+clusters=dfa.iloc[:,0:5].groupby(dfa['cluster']).mean()
+clusters['average']=clusters.mean(axis=1)
+clusters=clusters.sort_values(by=['average'])
+clusters=clusters.reset_index()
+
+for i in range(1, 13):
+    plt.subplot(3, 4, i)
+    plt.plot(clusters.T[i-1][1:-1],marker='o',c=colors[i-1])
+    plt.ylim(20,60)
+    plt.grid()
+    plt.title('C'+str(i),fontsize=10)
+
+fig.text(0.01, 0.3, 'Hyd_Oil_Temp_Avg [C]',rotation=90, fontsize=14)
+fig.text(0.45, 0.02, 'Wind Turbines',rotation=0, fontsize=14)
+
+plt.tight_layout()
+fig.subplots_adjust(left=0.08)
+fig.subplots_adjust(bottom=0.1)
+fig.subplots_adjust(top=0.9)
+
+            
+#%%
+
+
+dfM=dfa.pivot_table(index=[dfa.index.year,dfa.index.week],columns=dfa['cluster'],values='cluster', aggfunc=np.nansum)
+dfM=dfM.reset_index(drop=True)
+dfM.index=(dfa.resample('W').mean().index)
+dfT=dfM.T.divide(dfM.sum(axis=1)).T
+
+dfT.columns=clusters.sort_values(by=['cluster']).index+1
+dfT=dfT.sort_index(axis=1)
+
+import matplotlib.gridspec as gridspec
+gs = gridspec.GridSpec(4, 1)
+
+ax1 = plt.subplot(gs[0,:])
+ax2 = plt.subplot(gs[1:,:])
+
+turbines=['T01','T06','T07','T09','T11']
+cores=['r', 'm', 'g', 'c', 'b']
+mk=['o','d','P','s','^']
+status=['GENERATOR', 'HYDRAULIC_GROUP', 'GENERATOR_BEARING', 'TRANSFORMER','GEARBOX']
+status_name= [x+'_'+y for x in turbines for y in status]
+
+for i in range(0,5):    
+#    plt.plot(dfa[turbines].iloc[:,i],c=cores[i],label=turbines[i])
+    for j in range(0,5):
+        ax1.plot(dfx['average'][(df_fail['Component']==status[j]) & (df_fail['Turbine_ID']==turbines[i])],c=cores[i],marker=mk[j],markersize=7,linestyle='None',label=status_name[j+i*5])
+ax1.set_xlim(dfT.index[0], dfT.index[-2])
+
+
+ax=dfT.plot(kind="bar",ax=ax2, stacked=True, width=0.9,edgecolor='w',lw=0.3,color=colors,figsize=(12,6))
+
+ax.xaxis.set_ticks(np.arange(0,88, 4))
+ax.set_xticklabels( [x.strftime("%y-%m-%d") for x in dfT.index[::4]], rotation=90)
+ax.legend(bbox_to_anchor=(1.1,1.7),ncol=1)
+ax1.set_ylabel('tp [C]')
+
+plt.subplots_adjust(left=0.05)
+plt.subplots_adjust(right=0.9)
+plt.subplots_adjust(bottom=0.17)
+plt.subplots_adjust(top=0.78)
+plt.subplots_adjust(hspace = 0.5)
+plt.xlabel('UTC Time',fontsize=14)
+plt.ylabel('Occurrence [-]',fontsize=14)
+
+ax1.legend(loc='center left',bbox_to_anchor=(0.01,2.0),ncol=5,fontsize=8)
+
+#%%
+