Skip to content
Snippets Groups Projects
Commit 48cb9b00 authored by Effi Latiffianti's avatar Effi Latiffianti
Browse files

Update CUSUM_LoMST.R

parent 17ea48f6
No related branches found
No related tags found
No related merge requests found
##-------------------------------------------------------------------------------##
## This code produce cumulative score for each turbine in .csv files ##
## Running the whole lines of code requires about 4 hours (16 GB memory laptop) ##
##-------------------------------------------------------------------------------##
##--------------------------------------------------------------------------##
## READ ME BEFORE RUNNING THE CODE. ##
##--------------------------------------------------------------------------##
# This code is set for gearbox. To obtain all detection, this code should be run
# for each of the component by changing the col in line 75 with the desired component
# column. Then changed the parameter offset to 0.3 for all components, and 0.35 for
# hydraulic, 0.2 for transformer. Then change the file name in line 139.
# When plotting, the threshold for alarms are:
# Gbx = 20, Gen = 13.5, Gen.bearing = 8, Hyd.group = 13.5, Transformer = 5
##--------------------------------------------------------------------------##
setwd("~/01 - WIND Research/2022 Energies Paper")
setwd("~/01 - WIND Research/EDP Challenge")
library(fossil)
library(dbscan)
library(dplyr)
......@@ -17,22 +23,23 @@ source('LoMSTEDP.R') ##LoMST function (must be copied in the directory)
#----------------------------------------------------------------#
signals.train = read.csv("Data/wind-farm-1-signals-training.csv",header = T, as.is = T, sep = ";")
signals.test = read.csv("Data/wind-farm-1-signals-testing.csv",header = T, as.is = T, sep = ";")
signals = rbind.data.frame(signals.train,signals.test)
fail = read.csv("Data/wind-farm-1-failures-training.csv",header = T, as.is = T, sep = ";")
fail = fail[fail$Turbine_ID=="T07",]
## add failure in the test set for T07
fail[24,]=c("T07","HYDRAULIC_GROUP","2017-10-19T10:11:00+00:00","Oil leakage in Hub")
signals = rbind.data.frame(signals.train,signals.test)
#--------------------------------------------------------#
## Make one-hour average
#--------------------------------------------------------#
# Averaging hourly data (it takes awhile, around 6 minutes)
signals$Timestamp=substr(signals$Timestamp,1,nchar(signals$Timestamp)-12)
All.Timestamp = signals$Timestamp
All.Timestamp=All.Timestamp[!duplicated(All.Timestamp)]
col.gbx = c(1,2,12:14,17,22,51)
signals <- signals[complete.cases(signals), ]
signals.gbx = signals[,col.gbx]
data.list = list()
turbine = c("T01","T06","T07","T09","T11")
for (turb in 1:5){
data.list[[turb]]=signals.gbx[signals.gbx$Turbine_ID==turbine[turb],]
data.list[[turb]]=signals[signals$Turbine_ID==turbine[turb],]
}
Hour.avg = data.frame()
......@@ -43,37 +50,48 @@ for (i in 1:5){
row=nrow(Hour.avg)
Hour.avg[(row+1):(row+n),1]=turbine[i]
Hour.avg[(row+1):(row+n),2]=date.time
for (j in 3:8){
for (j in 3:ncol(signals)){
Hour.avg[(row+1):(row+n),j]=tapply(data[,j],data$Timestamp,mean)
}
}
rm(date.time,i,j,n,row,data.list,data)
names(Hour.avg)=names(signals.gbx)
names(Hour.avg)=names(signals)
#--------------------------------------------------------------------------------------------#
## Perform LoMST (takes about 3-4 hours depending on the computer)
#--------------------------------------------------------------------------------------------#
dat = Hour.avg
## Selected signals for each of the failures component
col.gbx = c(1,2,12:14,17,22,51)
col.hyd =c(1,2,11,14,24,38) #Only detect oil leakage in hydraulic group
col.Gen = c(1:6,8:10,20,14,22,40,51)
col.GenB = c(1:10,20,14,22,40,51)
col.Trf = c(1,2,14,22,40,33:35,51)
## Obtaining anomaly score (only one type of component failure at once)
col = col.gbx #change this based on the failure type to model
dat = Hour.Avg
names(dat)[2]="timestamp"
dat <- dat[,-1]
dat <- dat[,col[-1]]
dat <- dat[complete.cases(dat), ]
data = dat[,-1]
data[] <- lapply(data, function(x) as.numeric(as.character(x)))
data=normalize(data, method = "range", range = c(0, 1), margin = 1L,on.constant = "quiet")
data=data[complete.cases(data),]
data <- data[complete.cases(data), ]
dat <- dat[rownames(dat) %in% rownames(data), ]
start.time= Sys.time()
result=as.data.frame(LoMSTEDP (25))
write.csv(result,"LoMST_Gbx.csv")
#--------------------------------------------------------------------------------------------#
# Accumulating the scores for pre-defined accumulation windows
#--------------------------------------------------------------------------------------------#
result$Turbine_ID = Hour.avg$Turbine_ID[result$obs]
threshold = 0.3
result$Turbine_ID = Hour.Avg$Turbine_ID[result$obs]
offset = 0.3
turbine = c("T01","T06","T07","T09","T11")
Cluster.result = list()
for (turb in 1:5){
mydata = result[result$Turbine_ID==turbine[turb],]
mydata= mydata[mydata$Outlier_Score>=threshold,]
mydata= mydata[mydata$Outlier_Score>=offset,]
sort.data = as.data.frame(arrange(mydata,timestamp))
sort.data$diff.hrs = 0
for (i in 2:nrow(mydata)){
......@@ -110,4 +128,39 @@ for (turb in 1:5){
}
}
##Write the Cumulative score
write.csv(Cluster.result[[3]],"cusum_T07_Gbx.csv")
write.csv(Cluster.result[[3]],"T07CUSUM_gbx.csv")
## Repeat for other components
#write.csv(Cluster.result[[3]],"T07CUSUM_gen.csv")
#write.csv(Cluster.result[[3]],"T07CUSUM_genb.csv")
#write.csv(Cluster.result[[3]],"T07CUSUM_hyd.csv")
#write.csv(Cluster.result[[3]],"T07CUSUM_trf.csv")
## Plotting the results (only after all 5 components were run)
Cluster=list()
Cluster[[1]]=read.csv("T07CUSUM_gen.csv")[,-1]
Cluster[[2]]=read.csv("T07CUSUM_hyd.csv")[,-1]
Cluster[[3]]=read.csv("T07CUSUM_genb.csv")[,-1]
Cluster[[4]]=read.csv("T07CUSUM_trf.csv")[,-1]
Cluster[[5]]=read.csv("T07CUSUM_gbx.csv")[,-1]
border1 = "2017-07-03T00"
border = "2017-09-01T00"
component = fail$Component[!duplicated(fail$Component)]
thrs = c(13.5,13.5,8,5,20)
path_results = NULL
pdf(paste0(path_results,'Result_T07.pdf'), height = 8, width =10)
par(mfrow=c(3,2))
for (comp in 1:5){
mydata = Cluster[[comp]]
x2 = as.Date(mydata$timestamp)
par(mai = c(0.3,0.7,0.5,0.2))
matplot(x2,mydata$Cum.Score,pch=20,col="gray30",ylab="Cumulative Score",xaxt="n",
xlim = c(x2[1],x2[length(x2)]),main=turbine[turb.number], xlab="")
axis.Date(1, at=seq(x2[1], x2[length(x2)], by="month"), format="%m-%Y")
points(x2,mydata$Cum.Score,type="l")
abline(v=as.Date(c(border,border1)), lty=2,lwd=1,col="red")
idx = which(fail$COMPONENT==component[comp])
abline(v=as.Date(fail$Timestamp[idx]),col = "deepskyblue3",lwd=2)
abline(h = thrs[comp], col="red")
}
dev.off()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment