Skip to content
Snippets Groups Projects
Commit 09ec93b4 authored by Yoshiaki Sakagami's avatar Yoshiaki Sakagami
Browse files

WARD_script

parent 67211178
Branches main
No related tags found
No related merge requests found
# #Code : Failed Detecting Wind Turbine
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import AgglomerativeClustering
from sklearn.neighbors import kneighbors_graph
#import susi
# Load training data
df_sig = pd.read_csv('../data/wind-farm-1-signals-training.csv', sep=';') # signals
df_met = pd.read_csv('../data/wind-farm-1-metmast-training.csv', sep=';') # metmast
df_log = pd.read_csv('../data/wind-farm-1-logs-training.csv', sep=';') # logs
df_fail = pd.read_csv('../data/wind-farm-1-failures-training.csv',sep=';') # failures
#%% index time - datetime and reindex full time series , remove utc format
dt=pd.date_range(start='2016-01-01 00:00:00',end='2017-09-01 00:10:00',freq='10min'); #complete time series
idx=pd.DatetimeIndex(dt);
df_met.index=pd.to_datetime(df_met['Timestamp']).dt.tz_localize(None)
df_fail.index=pd.to_datetime(df_fail['Timestamp']).round('10min').dt.tz_localize(None) # round 10 min (caution)
df_met=df_met.reindex(idx)
df_fail=df_fail.reindex(idx)
#%% separate time series of each wind turbine , index time and remove duplicate
turbines=['T01','T06','T07','T09','T11']
dfs=[df_sig[df_sig['Turbine_ID']==wt] for wt in turbines ]
for i in range(0,5):
dfs[i].index=pd.to_datetime(dfs[i]['Timestamp']).dt.tz_localize(None) # index datetime
dfs[i]=dfs[i].loc[~dfs[i].index.duplicated(keep='last')] # Remove duplicate data
dfs[i]=dfs[i].reindex(idx) #reindex date
df=pd.concat(dfs,keys=turbines,names=['Turbine'],axis=1) # Concatenate 5 wind turbines (signals) - Mult-Index
#%% Plot variables of wind turbines
#df['T01'].columns # view the columns you wnat to plot
#'Nac_Temp_Avg' , 'Gen_RPM_Avg','Grd_Prod_Pwr_Avg', 'Gen_Bear_Temp_Avg'
#Hyd_Oil_Temp_Avg
dfx=df.xs('Hyd_Oil_Temp_Avg',axis=1,level=1) # cross section variable of each turbine
#dfr1=df.xs('Gen_RPM_Avg',axis=1,level=1) # cross section variable of each turbine
#dfr2=df.xs('Grd_Prod_Pwr_Avg',axis=1,level=1) # cross section variable of each turbine
#dfv=df.xs('Amb_WindSpeed_Avg',axis=1,level=1) # cross section variable of each turbine
#dfg=df.xs('Grd_Prod_Pwr_Avg',axis=1,level=1) # cross section variable of each turbine
#dfx=dfr2/dfr1
#dfx=dfx.resample('H').mean()
dfx['average']=dfx.mean(axis=1) # average all variable
dfx[dfx==np.inf]=np.nan
dfx[dfx==-np.inf]=np.nan
dfwa=dfx.iloc[:,0:5].dropna() # remove nan rows
#%% self organizing map
'''
X=np.array(dfwa) # remove nan rows
som = susi.SOMClustering(n_rows=3, n_columns=4,learning_rate_start=1,random_state=50,n_iter_unsupervised=1000,verbose=1)
som.fit(X)
clusters = np.array(som.get_clusters(X))
ncol=3
df1 = pd.DataFrame(clusters)
df1.columns=['linha','coluna']
df1['cluster']=df1['linha']+df1['coluna']*ncol+1
df1.index=dfwa.index
dfa=pd.concat([dfa,df1['cluster']],axis=1)
'''
#%%
#from sklearn.preprocessing import MinMaxScaler
#scaler = MinMaxScaler()
#scaler.fit(dfwa)
#dfwaN=scaler.transform(dfwa)
# Clustering Ward method (20 clusters)
#connectivity = kneighbors_graph(dfwa, n_neighbors=20, include_self=False) default
connectivity = kneighbors_graph(dfwa, n_neighbors=20, include_self=False)
km= AgglomerativeClustering(n_clusters=12, linkage='ward',connectivity=connectivity)
dfc = pd.DataFrame(km.fit_predict(dfwa)+1)
dfc.index=dfwa.index
dfc.columns=['cluster']
dfa=pd.concat([dfx,dfc['cluster']],axis=1)
dfa.to_csv('input_hyd.csv')
#%%
import matplotlib.cm as cm
cmap = cm.jet
n_clusters=13
evenly_spaced_interval = np.linspace(0, 1, n_clusters)
colors = [cmap(x) for x in evenly_spaced_interval]
colors[8]=(0.0,0.0,0.0,1.0)
#colors[11]=(1.0,0.0,0.0,1.0)
fig, ax=plt.subplots(1,1,figsize=(8,6));
clusters=dfa.iloc[:,0:5].groupby(dfa['cluster']).mean()
clusters['average']=clusters.mean(axis=1)
clusters=clusters.sort_values(by=['average'])
clusters=clusters.reset_index()
for i in range(1, 13):
plt.subplot(3, 4, i)
plt.plot(clusters.T[i-1][1:-1],marker='o',c=colors[i-1])
plt.ylim(20,60)
plt.grid()
plt.title('C'+str(i),fontsize=10)
fig.text(0.01, 0.3, 'Hyd_Oil_Temp_Avg [C]',rotation=90, fontsize=14)
fig.text(0.45, 0.02, 'Wind Turbines',rotation=0, fontsize=14)
plt.tight_layout()
fig.subplots_adjust(left=0.08)
fig.subplots_adjust(bottom=0.1)
fig.subplots_adjust(top=0.9)
#%%
dfM=dfa.pivot_table(index=[dfa.index.year,dfa.index.week],columns=dfa['cluster'],values='cluster', aggfunc=np.nansum)
dfM=dfM.reset_index(drop=True)
dfM.index=(dfa.resample('W').mean().index)
dfT=dfM.T.divide(dfM.sum(axis=1)).T
dfT.columns=clusters.sort_values(by=['cluster']).index+1
dfT=dfT.sort_index(axis=1)
import matplotlib.gridspec as gridspec
gs = gridspec.GridSpec(4, 1)
ax1 = plt.subplot(gs[0,:])
ax2 = plt.subplot(gs[1:,:])
turbines=['T01','T06','T07','T09','T11']
cores=['r', 'm', 'g', 'c', 'b']
mk=['o','d','P','s','^']
status=['GENERATOR', 'HYDRAULIC_GROUP', 'GENERATOR_BEARING', 'TRANSFORMER','GEARBOX']
status_name= [x+'_'+y for x in turbines for y in status]
for i in range(0,5):
# plt.plot(dfa[turbines].iloc[:,i],c=cores[i],label=turbines[i])
for j in range(0,5):
ax1.plot(dfx['average'][(df_fail['Component']==status[j]) & (df_fail['Turbine_ID']==turbines[i])],c=cores[i],marker=mk[j],markersize=7,linestyle='None',label=status_name[j+i*5])
ax1.set_xlim(dfT.index[0], dfT.index[-2])
ax=dfT.plot(kind="bar",ax=ax2, stacked=True, width=0.9,edgecolor='w',lw=0.3,color=colors,figsize=(12,6))
ax.xaxis.set_ticks(np.arange(0,88, 4))
ax.set_xticklabels( [x.strftime("%y-%m-%d") for x in dfT.index[::4]], rotation=90)
ax.legend(bbox_to_anchor=(1.1,1.7),ncol=1)
ax1.set_ylabel('tp [C]')
plt.subplots_adjust(left=0.05)
plt.subplots_adjust(right=0.9)
plt.subplots_adjust(bottom=0.17)
plt.subplots_adjust(top=0.78)
plt.subplots_adjust(hspace = 0.5)
plt.xlabel('UTC Time',fontsize=14)
plt.ylabel('Occurrence [-]',fontsize=14)
ax1.legend(loc='center left',bbox_to_anchor=(0.01,2.0),ncol=5,fontsize=8)
#%%
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment