Skip to content
Snippets Groups Projects
Commit 657b35ef authored by Effi Latiffianti's avatar Effi Latiffianti
Browse files

Upload New File

parents
No related branches found
No related tags found
No related merge requests found
##-------------------------------------------------------------------------------##
## This code produce cumulative score for each turbine in .csv files ##
## Running the whole lines of code requires about 4 hours (16 GB memory laptop) ##
##-------------------------------------------------------------------------------##
setwd("~/01 - WIND Research/2022 Energies Paper")
library(fossil)
library(dbscan)
library(dplyr)
library(BBmisc)
library(stringr)
library(DDoutlier)
source('LoMSTEDP.R') ##LoMST function (must be copied in the directory)
#----------------------------------------------------------------#
## Prepare the data and downsize it into hourly average
#----------------------------------------------------------------#
signals.train = read.csv("Data/wind-farm-1-signals-training.csv",header = T, as.is = T, sep = ";")
signals.test = read.csv("Data/wind-farm-1-signals-testing.csv",header = T, as.is = T, sep = ";")
signals = rbind.data.frame(signals.train,signals.test)
fail = read.csv("Data/wind-farm-1-failures-training.csv",header = T, as.is = T, sep = ";")
fail = fail[fail$Turbine_ID=="T07",]
# Averaging hourly data (it takes awhile, around 6 minutes)
signals$Timestamp=substr(signals$Timestamp,1,nchar(signals$Timestamp)-12)
All.Timestamp = signals$Timestamp
All.Timestamp=All.Timestamp[!duplicated(All.Timestamp)]
col.gbx = c(1,2,12:14,17,22,51)
signals <- signals[complete.cases(signals), ]
signals.gbx = signals[,col.gbx]
data.list = list()
turbine = c("T01","T06","T07","T09","T11")
for (turb in 1:5){
data.list[[turb]]=signals.gbx[signals.gbx$Turbine_ID==turbine[turb],]
}
Hour.avg = data.frame()
for (i in 1:5){
data=data.list[[i]]
date.time=data$Timestamp[!duplicated(data$Timestamp)]
n=length(date.time)
row=nrow(Hour.avg)
Hour.avg[(row+1):(row+n),1]=turbine[i]
Hour.avg[(row+1):(row+n),2]=date.time
for (j in 3:8){
Hour.avg[(row+1):(row+n),j]=tapply(data[,j],data$Timestamp,mean)
}
}
rm(date.time,i,j,n,row,data.list,data)
names(Hour.avg)=names(signals.gbx)
#--------------------------------------------------------------------------------------------#
## Perform LoMST (takes about 3-4 hours depending on the computer)
#--------------------------------------------------------------------------------------------#
dat = Hour.avg
names(dat)[2]="timestamp"
dat <- dat[,-1]
data = dat[,-1]
data[] <- lapply(data, function(x) as.numeric(as.character(x)))
data=normalize(data, method = "range", range = c(0, 1), margin = 1L,on.constant = "quiet")
data=data[complete.cases(data),]
dat <- dat[rownames(dat) %in% rownames(data), ]
start.time= Sys.time()
result=as.data.frame(LoMSTEDP (25))
write.csv(result,"LoMST_Gbx.csv")
#--------------------------------------------------------------------------------------------#
# Accumulating the scores for pre-defined accumulation windows
#--------------------------------------------------------------------------------------------#
result$Turbine_ID = Hour.avg$Turbine_ID[result$obs]
threshold = 0.3
turbine = c("T01","T06","T07","T09","T11")
Cluster.result = list()
for (turb in 1:5){
mydata = result[result$Turbine_ID==turbine[turb],]
mydata= mydata[mydata$Outlier_Score>=threshold,]
sort.data = as.data.frame(arrange(mydata,timestamp))
sort.data$diff.hrs = 0
for (i in 2:nrow(mydata)){
a = str_replace(sort.data$timestamp[i-1], ("T")," ")
b = str_replace(sort.data$timestamp[i], ("T")," ")
sort.data[i,6]=difftime(as.POSIXct(b,format="%Y-%m-%d %H"),as.POSIXct(a,format="%Y-%m-%d %H"),units="hours")
if(is.na(sort.data[i,6])==T){sort.data[i,6]=70} #Sometime it came back NA for only few close point
}
Cluster.result[[turb]]=sort.data[,c(1,3,4,6)]
}
rm(sort.data,a,b,mydata)
for (turb in 1:5){
prior.fail = 24*7 #change the day as required
data = Cluster.result[[turb]][,4]
column = ncol(Cluster.result[[turb]])+1
Cluster.result[[turb]][1,column]=1
for (j in 2:nrow(Cluster.result[[turb]])){
if (data[j]<= prior.fail){Cluster.result[[turb]][j,column]= Cluster.result[[turb]][(j-1),column]} else {
Cluster.result[[turb]][j,column]= Cluster.result[[turb]][(j-1),column]+1}}
names(Cluster.result[[turb]])[5]="7Days"
}
# Cumulating scores
col=5
for (turb in 1:5){
Cluster.result[[turb]]$Cum.Score[1:nrow(Cluster.result[[turb]])]= 0
Cluster.result[[turb]]$Cum.Score[1]= Cluster.result[[turb]]$Outlier_Score[1]
for (i in 2:nrow(Cluster.result[[turb]])){
if(Cluster.result[[turb]][i,col]==Cluster.result[[turb]][(i-1),col]){
Cluster.result[[turb]]$Cum.Score[i]=Cluster.result[[turb]]$Cum.Score[i-1]+
Cluster.result[[turb]]$Outlier_Score[i]} else {
Cluster.result[[turb]]$Cum.Score[i]=Cluster.result[[turb]]$Outlier_Score[i]}
}
}
##Write the Cumulative score
write.csv(Cluster.result[[3]],"cusum_T07_Gbx.csv")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment