From f19350bf40493228d5a842e024a13b8302e828dd Mon Sep 17 00:00:00 2001 From: Thanasis Naskos Date: Thu, 21 Mar 2019 14:07:02 +0200 Subject: [PATCH] first commit --- supervised/Supervised_Predictor_v4.r | 703 ++++++++++++++ unsupervised/pom.xml | 53 + .../atl/afaultdetection/utils/InfluxDBHandler.java | 203 ++++ .../com/atl/afaultdetection/utils/Measurement.java | 46 + .../java/com/atl/afaultdetection/utils/Utils.java | 105 ++ .../java/com/atl/mcod/ComposedSplitFunction.java | 40 + .../main/java/com/atl/mcod/DistanceFunction.java | 12 + .../main/java/com/atl/mcod/DistanceFunctions.java | 193 ++++ unsupervised/src/main/java/com/atl/mcod/MCOD.java | 598 ++++++++++++ unsupervised/src/main/java/com/atl/mcod/MTree.java | 1009 ++++++++++++++++++++ .../src/main/java/com/atl/mcod/MTreeClass.java | 51 + .../main/java/com/atl/mcod/PartitionFunction.java | 27 + .../main/java/com/atl/mcod/PartitionFunctions.java | 107 +++ .../main/java/com/atl/mcod/PromotionFunction.java | 25 + .../main/java/com/atl/mcod/PromotionFunctions.java | 37 + .../src/main/java/com/atl/mcod/SplitFunction.java | 62 ++ .../main/java/com/atl/mcod/utils/Constants.java | 98 ++ .../src/main/java/com/atl/mcod/utils/Data.java | 134 +++ .../src/main/java/com/atl/mcod/utils/Pair.java | 53 + .../src/main/java/com/atl/mcod/utils/Utils.java | 84 ++ .../com/atl/smartmaintenance/AFaultDetection.java | 204 ++++ .../atl/smartmaintenance/faultdetection/FD.java | 185 ++++ .../faultdetection/FDKnowledge.java | 207 ++++ .../faultdetection/FaultDetectionManager.java | 193 ++++ 24 files changed, 4429 insertions(+) create mode 100644 supervised/Supervised_Predictor_v4.r create mode 100644 unsupervised/pom.xml create mode 100644 unsupervised/src/main/java/com/atl/afaultdetection/utils/InfluxDBHandler.java create mode 100644 unsupervised/src/main/java/com/atl/afaultdetection/utils/Measurement.java create mode 100644 unsupervised/src/main/java/com/atl/afaultdetection/utils/Utils.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/ComposedSplitFunction.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/DistanceFunction.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/DistanceFunctions.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/MCOD.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/MTree.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/MTreeClass.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/PartitionFunction.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/PartitionFunctions.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/PromotionFunction.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/PromotionFunctions.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/SplitFunction.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/utils/Constants.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/utils/Data.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/utils/Pair.java create mode 100644 unsupervised/src/main/java/com/atl/mcod/utils/Utils.java create mode 100644 unsupervised/src/main/java/com/atl/smartmaintenance/AFaultDetection.java create mode 100644 unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FD.java create mode 100644 unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FDKnowledge.java create mode 100644 unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FaultDetectionManager.java diff --git a/supervised/Supervised_Predictor_v4.r b/supervised/Supervised_Predictor_v4.r new file mode 100644 index 0000000..c47872e --- /dev/null +++ b/supervised/Supervised_Predictor_v4.r @@ -0,0 +1,703 @@ +suppressMessages(library(CORElearn)) +suppressMessages(library(dplyr)) +suppressMessages(library(plyr)) +suppressMessages(library(data.table)) +suppressMessages(library(randomForest)) +suppressMessages(library(ggplot2)) +suppressMessages(library(grid)) +suppressMessages(library(argparser)) +suppressMessages(library(stringr)) + +export_ds_for_spm <- function(target_event,episodes_list,output){ + if (file.exists(output)) { + file.remove(output) + } + #output for HirateYamana + for(ep_index in (1:length(episodes_list))){ + ep = episodes_list[[ep_index]][ , !(names(episodes_list[[ep_index]]) %in% c("Timestamps"))] + ep_list = list() + for(i in (1:nrow(ep))){ + matches = which(ep[i,] %in% c(1)) + if(length(matches) == 0){ + next + } + line=paste(matches,collapse=" ") + ep_list[i] = line + } + if(length(ep_list) == 0){ + next + } + ep_list[length(ep_list)+1] = target_event + episode = "" + for(ep_lli in (1:length(ep_list))){ + if(length(ep_list[[ep_lli]]) > 0){ + index = paste(paste("<",ep_lli,sep=""),">",sep="") + if(episode == ""){ + episode = paste(index,ep_list[[ep_lli]],sep=" ") + } else { + episode = paste(episode,paste(index,ep_list[[ep_lli]],sep=" "),sep=" -1 ") + } + } + } + write(paste(episode,"-1 -2"),file=output,append=TRUE) + } +} + +remove_rare_events <- function(ds,target_event_frequency_proportion_rare){ + if(!csv){ + print("~~~~~~~APPLYING PREPROCESSING REMOVE RARE EVENTS~~~~~~~") + } + a = table(ds$Event_id) + target_event_frequency = a[names(a)==target_event][[1]] + rare_events = as.integer(names(a[a < target_event_frequency*target_event_frequency_proportion_rare])) + return(ds[!(ds$Event_id %in% rare_events),]) +} + +remove_frequent_events <- function(ds,max_event_frequency_proportion_frequent){ + if(!csv){ + print("~~~~~~~APPLYING PREPROCESSING REMOVE FREQUENT EVENTS~~~~~~~") + } + a = table(ds[!(ds$Event_id == target_event),]$Event_id) + max_freq = sort(a,decreasing = TRUE)[[1]] + frequent_events = as.integer(names(a[a > max_freq*max_event_frequency_proportion_frequent])) + #print(frequent_events) + return(ds[!(ds$Event_id %in% frequent_events),]) +} + +keep_only_first_occureness <- function(episodes_list){ + if(!csv){ + print("~~~~~~~APPLYING PREPROCESSING KEEP ONLY FIRST OCCURENESS~~~~~~~") + } + #for every episode in the episodes_list + for(ep_index in (1:length(episodes_list))){ + ep = episodes_list[[ep_index]] + #For every segment of each episode starting from the end up to the second segment. + #We need to keep only the 1st occurness of consequtive events, hence starting from the end is the easy way. + if(nrow(ep) < 2){ + next + } + for(i in (nrow(ep):2)){ + #as we deal with binary vectors, to find the indeces that both vectors have "1" we sum them and check for "2"s in the result + matches = which((ep[i,]+ep[i-1,]) %in% c(2)) + #replace the 1s with 0s in the matching positions of the segment that is closer to the end of the episode + ep[i,][c(matches)] = 0 + } + episodes_list[[ep_index]] = ep + } + return(episodes_list) +} + +mil_text <- function(milw,F_thres,episodes_list,b_length){ + if(!csv){ + print("~~~~~~~APPLYING PREPROCESSING MULTI INSTANCE LEARNING~~~~~~~") + } + window_df = data.frame(matrix(ncol = b_length+1, nrow = 0)) + + #for every episode in the episodes_list + for(ep_index in (1:length(episodes_list))){ + ep = episodes_list[[ep_index]] + if(nrow(ep) < 1){ + next + } + new_ep = data.frame(matrix(ncol = b_length+1, nrow = 0)) + i = 1 + while(i <= nrow(ep)){ + new_ep = rbind(new_ep,ep[i,]) + + if(ep[i,][b_length+1] >= F_thres && nrow(window_df) < milw){ + window_df = rbind(window_df,ep[i,]) + } + if((nrow(window_df) == milw || i == nrow(ep)) && nrow(window_df) > 0){ + mean = colMeans(window_df) + mean[mean > 0] = 1 + mf <- data.frame(as.list(mean)) + mf[1] = ep[i,][1] + mf[b_length+1] = ep[i,][b_length+1] + colnames(mf) = colnames(new_ep) + new_ep = rbind(new_ep,mf) + if(nrow(window_df) > 1){ + i = i - (nrow(window_df)-2) + } + window_df = data.frame(matrix(ncol = b_length+1, nrow = 0)) + } + i = i + 1 + } + episodes_list[[ep_index]] = new_ep + } + return(episodes_list) +} + +mil_image <- function(milw,F_thres,episodes_list,b_length){ + if(!csv){ + print("~~~~~~~APPLYING PREPROCESSING MULTI INSTANCE LEARNING~~~~~~~") + } + + #for every episode in the episodes_list + for(ep_index in (1:length(episodes_list))){ + ep = episodes_list[[ep_index]] + if(nrow(ep) < 1){ + next + } + new_ep = data.frame(matrix(ncol = b_length+1, nrow = 0)) + #a data.frame with the vectors that need to be averaged + window_df = data.frame(matrix(ncol = b_length+1, nrow = 0)) + i = 1 + while(i <= nrow(ep)){ + #new_ep = rbind(new_ep,ep[i,]) + if(nrow(window_df) < milw){ + window_df = rbind(window_df,ep[i,]) + } + if((nrow(window_df) == milw || i == nrow(ep)) && nrow(window_df) > 0){ + mean = colMeans(window_df) + mean[mean > 0] = 1 + mf = data.frame(as.list(mean)) + mf[1] = ep[i,][1] + mf[b_length+1] = ep[i,][b_length+1] + #colnames(mf) = colnames(new_ep) + new_ep = rbind(new_ep,mf) + if(window_df[1,][b_length+1] >= F_thres && nrow(window_df) > 1){ + i = i - (nrow(window_df)-1) + } + window_df = data.frame(matrix(ncol = b_length+1, nrow = 0)) + } + i = i + 1 + } + episodes_list[[ep_index]] = new_ep + } + return(episodes_list) +} + +#the Risk function +compute_F <- function(s,midpoint,t,ep_length){ + #s affects the steepness + # s <- 0.9 + return(1/(1+exp(s*(ep_length-midpoint-t)))) +} + +#convert event vectors to binary vectors +compute_frequency_vectors <- function(aggr_episode_df,b_length,s,midpoint){ + freq_aggr_episode_df <- data.frame(matrix(ncol = b_length+2, nrow = 0)) + x <- c(c("Timestamps"), c(paste("e_",c(1:b_length),sep = "")), c("Risk_F")) + # colnames(bin_aggr_episode_df) <- x + + for(i in 1:nrow(aggr_episode_df)) { + #init a vector with 3405 0s + freq_vector = as.vector(integer(b_length)) + seg <- aggr_episode_df[i,] + #if segment contains the j number, replace the 0 in the bin_vector with 1 + for(value in seg$x[[1]]){ + freq_vector[[value]] = length(which(seg$x[[1]] == value)) + } + #add a new line to the bin_aggr_epissode_df + #we use a matrix holding the elements of the new_data.frame as matrix is able to store variable of different data types + F = compute_F(s,midpoint,i-1,nrow(aggr_episode_df)) + if(midpoint >= nrow(aggr_episode_df)){ + F = 0 + } + date = seg$Timeframe[[1]] + new_df = data.frame(matrix(c(date, freq_vector,F),nrow=1,ncol=b_length+2)) + freq_aggr_episode_df <- rbind(freq_aggr_episode_df,new_df) + } + # x <- c(c("Timestamps"), c(paste("e_",c(1:3405))), c("Risk_F")) + colnames(freq_aggr_episode_df) <- x + return(freq_aggr_episode_df) +} + +create_episodes_list <- function(ds,target_event,b_length,s,midpoint,test_mode){ + if(!csv){ + print("~~~~~~~CREATING FREQUENCY VECTORS AND BINARIZE THEM~~~~~~~") + } + #devide in episodes + target_event_spotted = FALSE + #a list with data.frames for the episodes (each episode one data.frame) + episodes_list = list() + #data.frame for episodes + episode_df <- data.frame(Timestamps=as.POSIXct(character()),Event_id=integer()) + #iterate over every line of the original dataset + for(i in 1:nrow(ds)) { + #get the current row of the ds + meas <- ds[i,] + #If it is the target event enable the appropriate flag + if((meas$Event_id == target_event)){ + target_event_spotted = TRUE + } + #fill the episode data.frame with the events that are between two target events + if(meas$Event_id != target_event && target_event_spotted){ + episode_df <- rbind(episode_df,data.frame(Timestamps=meas$Timestamps, Event_id=meas$Event_id)) + } else if(meas$Event_id == target_event && target_event_spotted && is.data.frame(episode_df) && nrow(episode_df) != 0){ + #a second occurness of the target event is spotted, close the episode + #target_event_spotted = FALSE + #aggregate by day all the events to form the segments inside the episodes + aggr_episode_df = aggregate(episode_df[ ,2], FUN=function(x){return(x)}, by=list(Timeframe=cut(as.POSIXct(episode_df$Timestamps, format="%Y-%m-%dT%H:%M:%OSZ"),"hour"))) #%Y-%m-%dT%H:%M:%OSZ + #binarize the frequncy vector + bin_aggr_episode_df = compute_frequency_vectors(aggr_episode_df,b_length,s,midpoint) + + #Remove event 0, which does not provide any info KOUGKA + bin_aggr_episode_df = bin_aggr_episode_df[ , !(names(bin_aggr_episode_df) %in% c("e_1"))] + + #add the episode to the episodes_list + episodes_list[[length(episodes_list)+1]] = bin_aggr_episode_df + #reset episode_df to en empty data.frame + episode_df <- data.frame(Timestamps=as.POSIXct(character()),Event_id=integer()) + } else if(meas$Event_id == target_event && target_event_spotted && is.data.frame(episode_df) && nrow(episode_df) == 0 && test_mode){ + freq_vector = as.vector(integer(b_length)) + new_df = data.frame(matrix(c(0, freq_vector,0),nrow=1,ncol=b_length+2)) + episode_df <- rbind(episode_df,new_df) + + x <- c(c("Timestamps"), c(paste("e_",c(1:b_length),sep = "")), c("Risk_F")) + colnames(episode_df) <- x + episode_df = episode_df[ , !(names(episode_df) %in% c("e_1"))] + + #add the episode to the episodes_list + episodes_list[[length(episodes_list)+1]] = episode_df + #reset episode_df to en empty data.frame + episode_df <- data.frame(Timestamps=as.POSIXct(character()),Event_id=integer()) + } + } + return(episodes_list) +} + +preprocess <- function(ds,TEST_DATA,REMOVE_RARE_EVENTS,REMOVE_FREQUENT_EVENTS,KEEP_ONLY_FIRST_OCCURENESS,MULTI_INSTANCE_LEARNING_TEXT,MULTI_INSTANCE_LEARNING_IMAGE,FEATURE_SELECTION,top_features,s,midpoint,b_length,target_event,target_event_frequency_proportion_rare,max_event_frequency_proportion_frequent,w,F_thres,test_mode){ + + #Remove events that appear < n times. We consider n = (target event frequency)/2 + if(REMOVE_RARE_EVENTS){ + ds<-remove_rare_events(ds,target_event_frequency_proportion_rare) + } + + #Remove events that appear < n times. We consider n = (target event frequency)/2 + if(REMOVE_FREQUENT_EVENTS){ + ds<-remove_frequent_events(ds,max_event_frequency_proportion_frequent) + } + + episodes_list = create_episodes_list(ds,target_event,b_length,s,midpoint,test_mode) + #if(length(episodes_list) == 0){ + # return() + #} + + #binarize the vector + for(ep_index in (1:length(episodes_list))){ + ep = episodes_list[[ep_index]] + ep[2:(ncol(ep)-1)][ep[2:(ncol(ep)-1)] > 0] = 1 + episodes_list[[ep_index]] = ep + } + + # keep only the first occurness of event in consecutive segments + if(KEEP_ONLY_FIRST_OCCURENESS){ + episodes_list <- keep_only_first_occureness(episodes_list) + } + + # Multi-instance learning to increase the pattern frequency + if(MULTI_INSTANCE_LEARNING_TEXT){ + episodes_list <- mil_text(w,F_thres,episodes_list,b_length) + } else if(MULTI_INSTANCE_LEARNING_IMAGE){ + episodes_list <- mil_image(w,F_thres,episodes_list,b_length) + } + return(episodes_list) +} + +feature_selection <- function(merged_episodes,top_features){ + estReliefF <- attrEval(Risk_F ~ ., merged_episodes, estimator="RReliefFexpRank", ReliefIterations=50) + sorted_indeces = order(estReliefF, decreasing = TRUE) + merged_episodes = merged_episodes %>% select(sorted_indeces[1:top_features],ncol(merged_episodes)) + return(merged_episodes) +} + +read_dataset <- function(path){ + dataset = read.table(path, header = TRUE, sep = ",", dec = ".", comment.char = "#") + dataset[, 2] <- as.numeric(dataset[, 2]) + return(dataset) +} + +read_dataset_cross <- function(path){ + dataset = read.table(path, header = TRUE, sep = ",", dec = ".", comment.char = "#") + dataset[, 2] <- as.numeric(dataset[, 2]) + + aggr_episode_df = aggregate(dataset, FUN=function(x){return(x)}, by=list(cut(as.POSIXct(dataset$Timestamps, format="%Y-%m-%dT%H:%M:%OSZ"),"day"))) #%Y-%m-%dT%H:%M:%OSZ + + training = sample_n(aggr_episode_df,(2*nrow(aggr_episode_df)/3)) + + testing = setDT(aggr_episode_df)[!training, on="Group.1"] + + testing = testing[, !"Group.1"] + testing_df = data.frame(Timestamps=unlist(testing[,1][1][[1]]),Event_id=unlist(testing[,2][1][[1]])) + for(i in 2:nrow(testing)){ + testing_df = rbind(testing_df,data.frame(Timestamps=unlist(testing[,1][i][[1]]),Event_id=unlist(testing[,2][i][[1]]))) + } + + training = as.data.table(training)[, !"Group.1"] + training_df = data.frame(Timestamps=unlist(training[,1][1][[1]]),Event_id=unlist(training[,2][1][[1]])) + for(i in 2:nrow(training)){ + training_df = rbind(training_df,data.frame(Timestamps=unlist(training[,1][i][[1]]),Event_id=unlist(training[,2][i][[1]]))) + } + + return(list("training" = training_df, "testing"=testing_df)) +} + +read_dataset_cross2 <- function(path,form){ + dataset = read.table(path, header = TRUE, sep = ",", dec = ".", comment.char = "#") + dataset[, 2] <- as.numeric(dataset[, 2]) + if(form == 1){ + training_df = dataset[1:(2*nrow(dataset)/3),] + testing_df = dataset[(2*nrow(dataset)/3):nrow(dataset),] + } else if (form == 2){ + training_df = dataset[(nrow(dataset)/3):nrow(dataset),] + testing_df = dataset[1:(nrow(dataset)/3),] + } else { + training_df = rbind(dataset[1:(nrow(dataset)/3),],dataset[(2*nrow(dataset)/3):nrow(dataset),]) + testing_df = dataset[(nrow(dataset)/3+1):(2*nrow(dataset)/3),] + } + return(list("training" = training_df, "testing"=testing_df)) +} + +find_next_fake_episodes <- function(ep_index,test_episodes_list){ + fake_episodes_vec = vector() + cnt = 0 + if(ep_index < length(test_episodes_list)){ + for(i in ((ep_index+1):length(test_episodes_list))){ + n_ep = test_episodes_list[i] + if(nrow(n_ep[[1]]) == 1 && n_ep[[1]]$Timestamps == 0){ + cnt = cnt + 1 + fake_episodes_vec <- c(fake_episodes_vec, i) + } else { + return(fake_episodes_vec) + } + } + } + return(fake_episodes_vec) +} + +find_next_close_episodes <- function(ep_index,test_episodes_list,pred_index){ + ep_length = nrow(test_episodes_list[ep_index][[1]]) + remaining_hours = max_warning_interval-(ep_length-pred_index) + + nc_episodes_vec = vector() + cnt = 0 + while (remaining_hours > 0 && ep_index < length(test_episodes_list)) { + ep_index = ep_index + 1 + n_ep = test_episodes_list[ep_index] + if(nrow(n_ep[[1]]) == 1 && n_ep[[1]]$Timestamps == 0){ + next + } + n_ep_length = nrow(n_ep[[1]]) + if(n_ep_length <= remaining_hours){ + remaining_hours = remaining_hours-n_ep_length + if(remaining_hours >= 0){ + cnt = cnt + 1 + nc_episodes_vec <- c(nc_episodes_vec, ep_index) + } + } else{ + return(nc_episodes_vec) + } + } + return(nc_episodes_vec) +} + +eval <- function(train_episodes,test_episodes_list,seed){ + set.seed(seed) + my.rf = randomForest(Risk_F ~ .,data=train_episodes,importance=TRUE) + #varImpPlot(my.rf) + false_positives = 0 + tp_vec = vector() + fp_vec = vector() + fn_vec = vector() + true_positives = 0 + false_negatives = 0 + ep_index = 1 + while (ep_index <= length(test_episodes_list)) { + ep = test_episodes_list[[ep_index]] + fake_episodes_vec = find_next_fake_episodes(ep_index,test_episodes_list) + fake_ep_cnt = length(fake_episodes_vec) + + ep = ep[ , !(names(ep) %in% c("Timestamps"))] + Prediction <- predict(my.rf, ep) + ep_legth = length(Prediction) + pred_indeces = as.numeric(names(Prediction[Prediction >= acceptance_threshold])) + predicted_next_episodes = 0 + if(length(pred_indeces) > 0){ + pred_index = tail(pred_indeces, n=1) + nc_episodes_vec = find_next_close_episodes(ep_index,test_episodes_list,pred_index) + + predicted_next_episodes = length(nc_episodes_vec) + } + + + if(length(pred_indeces[pred_indeces < (ep_legth-(max_warning_interval))]) > 0){ + fp_reps = length(pred_indeces[pred_indeces < (ep_legth-(max_warning_interval))]) + false_positives = false_positives + fp_reps + (fake_ep_cnt*fp_reps) + fp_vec = c(fp_vec,rep(ep_index,fp_reps),rep(fake_episodes_vec,fp_reps)) + } + if(length(pred_indeces[pred_indeces >= (ep_legth-(max_warning_interval)) & pred_indeces <= (ep_legth-min_warning_interval)]) > 0){ + true_positives = true_positives + 1 + fake_ep_cnt + predicted_next_episodes + tp_vec = c(tp_vec,ep_index,fake_episodes_vec,nc_episodes_vec) + } else { + false_negatives = false_negatives + 1 + fake_ep_cnt + fn_vec = c(fn_vec,ep_index,fake_episodes_vec) + } + ep_index = ep_index + 1 + fake_ep_cnt + predicted_next_episodes + } + + precision = true_positives/(true_positives+false_positives) + if((true_positives+false_positives) == 0){ + precision = 0 + } + recall = true_positives/length(test_episodes_list) + + F1 = 2*((precision*recall)/(precision+recall)) + if((precision+recall) == 0){ + F1 = 0 + } + if(!csv){ + cat(paste("dataset:",argv$test,"\ntrue_positives:", true_positives,"\nfalse_positives:", false_positives,"\nfalse_negatives:", false_negatives,"\nprecision:", precision,"\nrecall:", recall,"\nF1:", F1, "\ntp:",paste(as.character(tp_vec), sep="", collapse=","), "\nfp:",paste(as.character(fp_vec), sep="", collapse=","), "\nfn:",paste(as.character(fn_vec), sep="", collapse=","))) + } else { + cat(paste(argv$test,",",true_positives,",",false_positives,",",false_negatives,",",precision,",",recall,",",F1,",",argv$fet,",",argv$tet,",",argv$rre,",",argv$rfe,",",argv$kofe,",",argv$mili,",",argv$milt,",",argv$fs,",",argv$top,",",argv$rer,",",argv$fer,",",argv$seed,",",argv$steepness,",",argv$pthres,",",argv$milw,",",argv$milthres,",",argv$midpoint,",",argv$minwint,",",argv$maxwint,",",argv$cross,",tp,",paste(as.character(tp_vec), sep="", collapse=","),",fp,",paste(as.character(fp_vec), sep="", collapse=","),",fn,",paste(as.character(fn_vec), sep="", collapse=","),"\n",sep="")) + } + return(my.rf) +} + +plot <- function(test_episodes_list, episode_index, my.rf){ + test_episodes = test_episodes_list[[episode_index]][ , !(names(test_episodes_list[[episode_index]]) %in% c("Timestamps"))] + Prediction <- predict(my.rf, test_episodes) + results = data.frame(Risk_F=test_episodes$Risk_F,num_Prediction=as.numeric(Prediction)) + mse = mean((Prediction-test_episodes$Risk_F)^2) + + chart =ggplot(results,aes((1:nrow(results)))) + + # geom_rect(aes(xmin = ceiling(nrow(df_test)/2), xmax = nrow(df_test), ymin = -Inf, ymax = Inf), + # fill = "yellow", alpha = 0.003) + + geom_line(aes(y = Risk_F, colour = "Actual")) + + geom_line(aes(y = num_Prediction, colour="Predicted")) + + labs(colour="Lines") + + xlab("Segments") + + ylab('Risk (F)') + + ggtitle("Risk Prediction") + # (RR_KF_2YEARS_PAT08) + theme(plot.title = element_text(hjust = 0.5)) + + geom_text(aes(label = paste("MSE=",round(mse,3)), x = 20, y = 1), hjust = -2, vjust = 6, color="black", size=4) #add MSE label + + # Disable clip-area so that the MSE is shown in the plot + gt <- ggplot_gtable(ggplot_build(chart)) + gt$layout$clip[gt$layout$name == "panel"] <- "off" + grid.draw(gt) +} + + +p <- arg_parser("Implementation of the AIRBUS Predictor") +# Add a positional argument +p <- add_argument(p, "id", help="experiment ID") +p <- add_argument(p, "train", help="training dataset") +p <- add_argument(p, "test", help="test dataset") +p <- add_argument(p, "fet", help="different types of the fault events",default=151) +p <- add_argument(p, "tet", help="type of the target fault events",default=151) +p <- add_argument(p, "--rre", help="remove rare events", default=FALSE) +p <- add_argument(p, "--rfe", help="remove frequent events", default=FALSE) +p <- add_argument(p, "--kofe", help="keep only first event", default=FALSE) +p <- add_argument(p, "--milt", help="MIL as written in the text of the paper", default=FALSE) +p <- add_argument(p, "--mili", help="MIL as shonw in the Figure of the paper", default=FALSE) +p <- add_argument(p, "--milthres", help="MIL threshold to the sigmoid function for over-sampling", default=0.6) +p <- add_argument(p, "--steepness", help="steepness of the sigmoid function", default=0.8) +p <- add_argument(p, "--midpoint", help="midpoint of the sigmoid function (in days)", default=4) +p <- add_argument(p, "--fs", help="apply feature selection", default=FALSE) +p <- add_argument(p, "--top", help="# of features to keep in feature selection", default=200) +p <- add_argument(p, "--rer", help="rare events ratio of the target event frequency", default=0.2) +p <- add_argument(p, "--fer", help="frequent events ratio of the frequency of the most frequent event", default=0.8) +p <- add_argument(p, "--milw", help="MIL window size (in days)", default=4) +p <- add_argument(p, "--pthres", help="prediction threshold to the Risk value for a true positive episode", default=0.4) +p <- add_argument(p, "--seed", help="seed for RF", default=500) +p <- add_argument(p, "--csv", help="output for csv", default=FALSE) + + +p <- add_argument(p, "--spme", help="export datasets for sequential pattern minning", default=FALSE) +p <- add_argument(p, "--java", help="the java path", default="/usr/bin/java") +p <- add_argument(p, "--python", help="the java path", default="/usr/bin/python") +p <- add_argument(p, "--cep", help="the java path", default="/media/thanasis/Storage/ATLANTIS/0_Ensembled_Predictive_Solution_EPS/spm_rules.py") +p <- add_argument(p, "--spmf", help="the spmf path", default="/media/thanasis/Storage/ATLANTIS/0_Ensembled_Predictive_Solution_EPS/spmf.jar") +p <- add_argument(p, "--conf", help="minimum support (minsup)", default="20%") +p <- add_argument(p, "--minti", help="minimum time interval allowed between two succesive itemsets of a sequential pattern", default=1) +p <- add_argument(p, "--maxti", help="maximum time interval allowed between two succesive itemsets of a sequential pattern", default=5) +p <- add_argument(p, "--minwi", help="minimum time interval allowed between the first itemset and the last itemset of a sequential pattern", default=1) +p <- add_argument(p, "--maxwi", help="maximum time interval allowed between the first itemset and the last itemset of a sequential pattern", default=11) +p <- add_argument(p, "--minwint", help="min # of days before failure to expect a warning for true positive decision", default=1) +p <- add_argument(p, "--maxwint", help="max # of days before failure to expect a warning for true positive decision", default=8) + +p <- add_argument(p, "--cross", help="cross validation", default=3) +p <- add_argument(p, "--form", help="form", default=1) + + + +argv = data.frame() +if( length(commandArgs(trailingOnly = TRUE)) != 0){ + argv <- parse_args(p) +} else { + argv <- parse_args(p,c(1,"/home/thanasis/Desktop/Atlantis/zBreak/Philips_AE_prediction/full_stops/all_channels/training/events1P5_LouvainPhilips_ch1_6405te_training.csv","/home/thanasis/Desktop/Atlantis/zBreak/Philips_AE_prediction/full_stops/all_channels/training/events1P5_LouvainPhilips_ch1_6405te_training.csv",6405,6405)) +} + + +TEST_DATA = FALSE +id = argv$id +REMOVE_RARE_EVENTS = argv$rre +REMOVE_FREQUENT_EVENTS = argv$rfe +KEEP_ONLY_FIRST_OCCURENESS = argv$kofe +MULTI_INSTANCE_LEARNING_TEXT = argv$milt #MIL as explained in the text +MULTI_INSTANCE_LEARNING_IMAGE = argv$mili #MIL as presented in the figure +FEATURE_SELECTION = argv$fs +top_features = argv$top +target_event_frequency_proportion_rare = argv$rer +max_event_frequency_proportion_frequent = argv$fer +milw = argv$milw +F_thres = argv$milthres +s = argv$steepness +midpoint = argv$midpoint +target_event = argv$tet +b_length = argv$fet +acceptance_threshold = argv$pthres +export_spm = argv$spme +seed = argv$seed +csv = argv$csv +max_warning_interval = argv$maxwint +min_warning_interval = argv$minwint + +CROSS = argv$cross + +if(CROSS == 1){ + #set.seed(argv$seed) + d = read_dataset_cross2(argv$train,argv$form) + training_set = d$training + test_set = d$testing +} else if(CROSS == 2) { + set.seed(argv$seed) + d = read_dataset_cross(argv$train,argv$form) + training_set = d$training + test_set = d$testing +} else { + training_set = read_dataset(argv$train) + test_set = read_dataset(argv$test) +} + +episodes_list <- preprocess(training_set,TEST_DATA,REMOVE_RARE_EVENTS,REMOVE_FREQUENT_EVENTS,KEEP_ONLY_FIRST_OCCURENESS,MULTI_INSTANCE_LEARNING_TEXT,MULTI_INSTANCE_LEARNING_IMAGE,FEATURE_SELECTION,top_features,s,midpoint,b_length,target_event,target_event_frequency_proportion_rare,max_event_frequency_proportion_frequent,milw,F_thres,FALSE) + +#keep only 2/3 of the list +if(CROSS == 3){ + episodes_list = episodes_list[1:(2*length(episodes_list)/3)] +} else if(CROSS == 4){ + episodes_list = episodes_list[((length(episodes_list)/3)+1):length(episodes_list)] +} else if(CROSS == 5) { + episodes_list = episodes_list[-(((length(episodes_list)/3)+1):((2*length(episodes_list)/3)))] +} + +#merge episodes +merged_episodes = ldply(episodes_list, data.frame) +merged_episodes = merged_episodes[ , !(names(merged_episodes) %in% c("Timestamps"))] + + +if(FEATURE_SELECTION){ + #remove columns with all values equal to zero + merged_episodes = merged_episodes[, colSums(merged_episodes != 0) > 0] + merged_episodes = feature_selection(merged_episodes,top_features) +} + +TEST_DATA = TRUE +REMOVE_RARE_EVENTS = FALSE +REMOVE_FREQUENT_EVENTS = FALSE +KEEP_ONLY_FIRST_OCCURENESS = FALSE +MULTI_INSTANCE_LEARNING_TEXT = FALSE #MIL as explained in the text +MULTI_INSTANCE_LEARNING_IMAGE = FALSE #MIL as presented in the figure +FEATURE_SELECTION = FALSE +test_episodes_list <- preprocess(test_set,TEST_DATA,REMOVE_RARE_EVENTS,REMOVE_FREQUENT_EVENTS,KEEP_ONLY_FIRST_OCCURENESS,MULTI_INSTANCE_LEARNING_TEXT,MULTI_INSTANCE_LEARNING_IMAGE,FEATURE_SELECTION,top_features,s,midpoint,b_length,target_event,target_event_frequency_proportion_rare,max_event_frequency_proportion_frequent,milw,F_thres,TRUE) + +#keep last 1/3 of episodes +if(CROSS == 3){ + test_episodes_list = test_episodes_list[((2*length(test_episodes_list)/3)+1):length(test_episodes_list)] +} else if(CROSS == 4){ + test_episodes_list = test_episodes_list[1:(length(test_episodes_list)/3)] +} else if(CROSS == 5) { + test_episodes_list = test_episodes_list[(((length(test_episodes_list)/3)+1):((2*length(test_episodes_list)/3)))] +} + +my.rf = eval(merged_episodes,test_episodes_list,seed) + +# for(s in (0:6)){ +# my.rf = eval(merged_episodes,test_episodes_list,seed) +# seed = seed + 1 +# } + +# for(ep in 1:length(test_episodes_list)){ +# jpeg(paste(ep,'_rplot.jpg')) +# plot(test_episodes_list,ep,my.rf) +# dev.off() +# } + +if(export_spm){ + if(!csv){ + print("~~~~~~~SEQUENTIAL PATTERN MINING~~~~~~~") + } + spm_train_path = gsub(".csv",paste("_spm_",id,".csv",sep=""),argv$train) + spm_test_path = gsub(".csv",paste("_spm_",id,".csv",sep=""),argv$test) + spm_results_path = gsub(".csv",paste("_results_",id,".csv",sep=""),argv$train) + confidence = argv$conf + min_dist_seq = argv$minti + max_dist_seq = argv$maxti + min_dist_first_last = argv$minwi + max_dist_first_last = argv$maxwi + java_path = argv$java + jspmf_path = argv$spmf + python_path = argv$python + cep_path = argv$cep + max_warning_interval = argv$maxwint + min_warning_interval = argv$minwint + export_ds_for_spm(target_event,episodes_list,spm_train_path) + export_ds_for_spm(target_event,test_episodes_list,spm_test_path) + + if (file.exists(spm_results_path)) { + invisible(file.remove(spm_results_path)) + } + + javaOutput <- system(paste(java_path,"-jar",jspmf_path,"run HirateYamana",spm_train_path,spm_results_path,confidence,min_dist_seq,max_dist_seq,min_dist_first_last,max_dist_first_last), intern = TRUE) + #print(javaOutput) + + pythonOutput <- system(paste(python_path,cep_path,spm_results_path,spm_test_path,target_event), intern = TRUE) + #print(pythonOutput) + true_positives = 0 + false_positives = 0 + false_negatives = 0 + total_failures = 0 + d = 0 + + warnings = list() + for(w in pythonOutput){ + d = as.integer(str_extract(w, "\\-*\\d+\\.*\\d*")) + if(!grepl("Failure",w,fixed=TRUE)){ + warnings = c(warnings,d) + } else { + total_failures = total_failures + 1 + if(length(warnings) == 0){ + false_negatives = false_negatives + 1 + } else { + if(length(warnings[warnings < d-max_warning_interval]) > 0){ + false_positives = false_positives + length(warnings[warnings < d-max_warning_interval]) + } + if(length(warnings[warnings >= (d-max_warning_interval)]) > 0 & length(warnings[warnings <= (d-min_warning_interval)]) > 0){ + true_positives = true_positives + 1 + } else { + false_negatives = false_negatives + 1 + } + } + warnings = list() + } + } + + precision = true_positives/(true_positives+false_positives) + if((true_positives+false_positives) == 0){ + precision = 0 + } + + recall = true_positives/total_failures + + F1 = 2*((precision*recall)/(precision+recall)) + if((precision+recall) == 0){ + F1 = 0 + } + + if(!csv){ + cat(paste("dataset:",argv$test,"\ntrue_positives:", true_positives,"\nfalse_positives:", false_positives,"\nfalse_negatives:", false_negatives,"\nprecision:", precision,"\nrecall:", recall,"\nF1:", F1, "\n")) + } else { + cat(paste(argv$test,",", true_positives,",", false_positives,",", false_negatives,",", precision,",", recall,",", F1,",",argv$conf,",",argv$minti,",",argv$maxti,",",argv$minwi,",",argv$maxwi,",",argv$minwint,",",argv$maxwint, "\n",sep="")) + } +} diff --git a/unsupervised/pom.xml b/unsupervised/pom.xml new file mode 100644 index 0000000..8045d1e --- /dev/null +++ b/unsupervised/pom.xml @@ -0,0 +1,53 @@ + + + 4.0.0 + com + aFaultDetection + 5.0 + jar + + UTF-8 + 1.8 + 1.8 + com.atl.smartmaintenance.AFaultDetection + + + + org.influxdb + influxdb-java + 2.10 + jar + + + + + + + + maven-assembly-plugin + + false + + + ${mainClass} + + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + + + \ No newline at end of file diff --git a/unsupervised/src/main/java/com/atl/afaultdetection/utils/InfluxDBHandler.java b/unsupervised/src/main/java/com/atl/afaultdetection/utils/InfluxDBHandler.java new file mode 100644 index 0000000..ce5e869 --- /dev/null +++ b/unsupervised/src/main/java/com/atl/afaultdetection/utils/InfluxDBHandler.java @@ -0,0 +1,203 @@ +package com.atl.afaultdetection.utils; + +import com.atl.mcod.utils.Data; +import com.atl.mcod.utils.Constants; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; +import java.security.cert.CertificateException; +import java.time.Duration; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; +import javax.net.ssl.HostnameVerifier; +import javax.net.ssl.SSLContext; +import javax.net.ssl.SSLSession; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; +import okhttp3.OkHttpClient; +import org.influxdb.InfluxDB; +import org.influxdb.InfluxDBFactory; +import org.influxdb.dto.Point; +import org.influxdb.dto.Query; +import org.influxdb.dto.QueryResult; + +/** + * + * @author thanasis + */ +public class InfluxDBHandler { + + private InfluxDB influxDB; + private String database; + private String resultsDatabase; + private Map lastReportedMap; //to avoid multiple reports + +// public InfluxDBHandler(String host, String port, String username, String password, String database) { +// OkHttpClient.Builder builder = new OkHttpClient.Builder() +// .readTimeout(100, TimeUnit.SECONDS).connectTimeout(1000, TimeUnit.SECONDS); +// influxDB = InfluxDBFactory.connect("https://" + host + ":" + port, username, password, builder); +// influxDB.setDatabase(database); +// this.database = database; +// this.lastReportedMap = new HashMap<>(); +// } + + public InfluxDBHandler(String host, String port, String database, String resutlsDatabase, String username, String password, boolean ssl) { + try { + OkHttpClient.Builder builder = null; + String http = "http://"; + if(ssl){ + // Create a trust manager that does not validate certificate chains + final TrustManager[] trustAllCerts = new TrustManager[]{ + new X509TrustManager() { + @Override + public void checkClientTrusted(java.security.cert.X509Certificate[] chain, String authType) throws CertificateException { + } + + @Override + public void checkServerTrusted(java.security.cert.X509Certificate[] chain, String authType) throws CertificateException { + } + + @Override + public java.security.cert.X509Certificate[] getAcceptedIssuers() { + return new java.security.cert.X509Certificate[]{}; + } + } + }; + + // Install the all-trusting trust manager + SSLContext sc = SSLContext.getInstance("SSL"); + sc.init(null, trustAllCerts, new java.security.SecureRandom()); + + builder = new OkHttpClient.Builder() + .sslSocketFactory(sc.getSocketFactory(), (X509TrustManager) trustAllCerts[0]).readTimeout(100, TimeUnit.SECONDS).connectTimeout(1000, TimeUnit.SECONDS); + builder.hostnameVerifier(new HostnameVerifier() { + @Override + public boolean verify(String hostname, SSLSession session) { + return true; + } + }); + http = "https://"; + } else { + builder = new OkHttpClient.Builder() + .readTimeout(100, TimeUnit.SECONDS).connectTimeout(1000, TimeUnit.SECONDS); + http = "http://"; + } + if(username == null || username.equals("")){ + influxDB = InfluxDBFactory.connect(http + host + ":" + port, builder); + } else { + influxDB = InfluxDBFactory.connect(http + host + ":" + port, username, password, builder); + } + influxDB.setDatabase(database); + this.database = database; + this.resultsDatabase = resutlsDatabase; + + if(!dbExists(this.resultsDatabase)){ + influxDB.query(new Query("create database " + this.resultsDatabase + " with name \"autogen\"",resutlsDatabase)); + } + + this.lastReportedMap = new HashMap<>(); + } catch (NoSuchAlgorithmException ex) { + Logger.getLogger(InfluxDBHandler.class.getName()).log(Level.SEVERE, null, ex); + } catch (KeyManagementException ex) { + Logger.getLogger(InfluxDBHandler.class.getName()).log(Level.SEVERE, null, ex); + } + } + + public InfluxDB getInfluxDBHandler() { + return influxDB; + } + + public QueryResult getMeasurements(String measurementName, long seconds, boolean excludeZeroValues){ + if(influxDB != null){ + influxDB.setDatabase(this.database); + Query query = new Query("SELECT mean(value) FROM "+measurementName+" WHERE time > now() - "+seconds+"s and time <= now() "+(excludeZeroValues?"and value != 0":"")+" group by time(1s) fill(linear)", database); + QueryResult results = influxDB.query(query); + return results; + } else { + System.err.print("No DB connection"); + } + return new QueryResult(); + } + //for debugging + public QueryResult getMeasurementsDebug(String measurementName, long seconds, boolean excludeZeroValues) { + if (influxDB != null) { + influxDB.setDatabase(this.database); +// Query query = new Query("SELECT mean(value) FROM " + measurementName + " WHERE time > '2018-01-25T00:00:01.004Z' + " + (seconds - Constants.slide) + "s and time <= '2018-01-25T00:00:01.004Z' + " + seconds + "s " + (excludeZeroValues ? "and value != 0" : "") + " group by time(1s) fill(linear)", database); + //Query query = new Query("SELECT mean(value) FROM " + measurementName + " WHERE time > '2018-07-05T00:00:01.004Z' + " + (seconds - Constants.slide) + "s and time <= '2018-07-05T00:00:01.004Z' + " + seconds + "s " + (excludeZeroValues ? "and value != 0" : "") + " group by time(1s) fill(linear)", database); + //FOR AE TESTING + Query query = new Query("SELECT value AS \"mean\" FROM " + measurementName + " WHERE time > '2018-08-19T00:00:01.004Z' + " + (seconds - Constants.slide) + "s and time <= '2018-08-19T00:00:01.004Z' + " + seconds + "s " /*+ (excludeZeroValues ? "and value != 0" : "") + ""*/, database); + //FOR THICK TESTING + //Query query = new Query("SELECT value AS \"mean\" FROM " + measurementName + " WHERE time > '2018-10-14T00:00:01.004Z' + " + (seconds - Constants.slide) + "s and time <= '2018-10-14T00:00:01.004Z' + " + seconds + "s " /*+ (excludeZeroValues ? "and value != 0" : "") + ""*/, database); + QueryResult results = influxDB.query(query); + return results; + } else { + System.err.print("No DB connection"); + } + return new QueryResult(); + } + + public Double getMinValue(String measurementName, boolean excludeZeroValues, boolean excludeNegativeValues) { + if (influxDB != null) { + influxDB.setDatabase(this.database); + Query query = new Query("SELECT min(value) FROM " + measurementName + (excludeNegativeValues && excludeZeroValues ? " where value > 0" : excludeZeroValues ? " where value != 0" : excludeNegativeValues ? " where value >= 0" : ""), database); + QueryResult results = influxDB.query(query); + return (Double) results.getResults().get(0).getSeries().get(0).getValues().get(0).get(1); + } else { + System.err.print("No DB connection"); + } + return null; + } + + public Double getMaxValue(String measurementName) { + if (influxDB != null) { + influxDB.setDatabase(this.database); + Query query = new Query("SELECT max(value) FROM " + measurementName, database); + QueryResult results = influxDB.query(query); + return (Double) results.getResults().get(0).getSeries().get(0).getValues().get(0).get(1); + } else { + System.err.print("No DB connection"); + } + return null; + } + + public void reportOutlier(Data outlier, String label, String ruleMeas) { + influxDB.setDatabase(this.resultsDatabase); + //don't report outliers with distance lower than 5mins +// if (lastReportedMap.containsKey(label)) { +// if (Duration.between(lastReportedMap.get(label).getActualTime(), outlier.getActualTime()).toMinutes() < 5 ) { +// return; +// } +// } + if(Constants.debug || Constants.verbose){ + //System.out.println(label + " " + outlier.getActualTime().toString()); + } + + influxDB.setRetentionPolicy("autogen"); + influxDB.write(Point.measurement(Constants.resutlsDBTable) + .time(outlier.getActualTime().toEpochMilli(), TimeUnit.MILLISECONDS) + //.addField("report_time", System.currentTimeMillis()) + .addField("report_time", outlier.getOutlier_start_time()) + .addField("reason", "") + .tag("sensor", ruleMeas.isEmpty()?Constants.sensors:ruleMeas) + .tag("seen", "") + .tag("submitted", "") + .tag("machine", "A-ERWAERM1-PRESSE") + .tag("label", label) + .build()); + lastReportedMap.put(label, outlier); + } + + private boolean dbExists(String database){ + boolean exists = false; + QueryResult query = influxDB.query(new Query("show databases",database)); + for (List v : query.getResults().get(0).getSeries().get(0).getValues()){ + if(v.get(0).equals(database)){ + return true; + } + } + return exists; + } +} diff --git a/unsupervised/src/main/java/com/atl/afaultdetection/utils/Measurement.java b/unsupervised/src/main/java/com/atl/afaultdetection/utils/Measurement.java new file mode 100644 index 0000000..6eb8cfb --- /dev/null +++ b/unsupervised/src/main/java/com/atl/afaultdetection/utils/Measurement.java @@ -0,0 +1,46 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package com.atl.afaultdetection.utils; + +/** + * + * @author thanasis + */ +public class Measurement { + private String measurementDBName; + private Double minValue; + private Double maxValue; + + public Measurement(String measurementDBName) { + this.measurementDBName = measurementDBName; + } + + public String getMeasurementDBName() { + return measurementDBName; + } + + public void setMeasurementDBName(String measurementDBName) { + this.measurementDBName = measurementDBName; + } + + public Double getMinValue() { + return minValue; + } + + public void setMinValue(Double minValue) { + this.minValue = minValue; + } + + public Double getMaxValue() { + return maxValue; + } + + public void setMaxValue(Double maxValue) { + this.maxValue = maxValue; + } + + +} diff --git a/unsupervised/src/main/java/com/atl/afaultdetection/utils/Utils.java b/unsupervised/src/main/java/com/atl/afaultdetection/utils/Utils.java new file mode 100644 index 0000000..c34c87f --- /dev/null +++ b/unsupervised/src/main/java/com/atl/afaultdetection/utils/Utils.java @@ -0,0 +1,105 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package com.atl.afaultdetection.utils; + +import com.atl.mcod.utils.Constants; +import java.time.Instant; +import java.util.Iterator; +import java.util.Map; + +/** + * + * @author thanasis + */ +public class Utils { + public static double normalize(double value, double minValue, double maxValue){ + return (value - minValue) / (maxValue - minValue); + } + + public static boolean allIteratorsHaveNext(Map>> slideDataIteratorsMap) { + for(Map.Entry>> entry : slideDataIteratorsMap.entrySet()){ + if(!entry.getValue().hasNext()){ + return false; + } + } + return true; + } + + public static String checkRuleBasedParameters(Map measMap) { + String ruleMeas = ""; + if (Constants.algorithms.contains("mcod") && Constants.maxRule == null && Constants.minRule == null) { + System.err.println("No rule set for the rule based"); + System.exit(5); + } + if (Constants.algorithms.contains("mcodKnowledge") && Constants.maxRule_k == null && Constants.minRule_k == null) { + System.err.println("No rule set for the rule based knowledge"); + System.exit(5); + } + if (Constants.maxRule != null) { + String ruleQuickMeas = Constants.maxRule.split("-")[0]; + if (!measMap.containsKey(Constants.measShortNameMap.get(ruleQuickMeas))) { + System.err.println("The measurement of the max rule should be passed using the --meas flag"); + System.exit(6); + } + ruleMeas = Constants.measShortNameMap.get(ruleQuickMeas); + } + if (Constants.minRule != null) { + String ruleQuickMeas = Constants.minRule.split("-")[0]; + if (!measMap.containsKey(Constants.measShortNameMap.get(ruleQuickMeas))) { + System.err.println("The measurement of the min rule should be passed using the --meas flag"); + System.exit(7); + } + ruleMeas = Constants.measShortNameMap.get(ruleQuickMeas); + } + if (Constants.maxRule_k != null) { + String ruleQuickMeas = Constants.maxRule_k.split("-")[0]; + if (!measMap.containsKey(Constants.measShortNameMap.get(ruleQuickMeas))) { + System.err.println("The measurement of the max rule should be passed using the --meas flag"); + System.exit(6); + } + ruleMeas = Constants.measShortNameMap.get(ruleQuickMeas); + } + if (Constants.minRule_k != null) { + String ruleQuickMeas = Constants.minRule_k.split("-")[0]; + if (!measMap.containsKey(Constants.measShortNameMap.get(ruleQuickMeas))) { + System.err.println("The measurement of the min rule should be passed using the --meas flag"); + System.exit(7); + } + ruleMeas = Constants.measShortNameMap.get(ruleQuickMeas); + } + return ruleMeas; + } + + public static boolean checkRule(String ruleType, String key, double value, String ruleMeas) { + if (key.equals(ruleMeas)) { + if(ruleType.equals("ruleBased")){ + if (Constants.maxRule != null) { + if (value > Double.valueOf(Constants.maxRule.split("-")[1])) { + return true; + } + } + if (Constants.minRule != null) { + if (value < Double.valueOf(Constants.minRule.split("-")[1])) { + return true; + } + } + } else { + if (Constants.maxRule_k != null) { + if (value > Double.valueOf(Constants.maxRule_k.split("-")[1])) { + return true; + } + } + if (Constants.minRule_k != null) { + if (value < Double.valueOf(Constants.minRule_k.split("-")[1])) { + return true; + } + } + } + + } + return false; + } +} diff --git a/unsupervised/src/main/java/com/atl/mcod/ComposedSplitFunction.java b/unsupervised/src/main/java/com/atl/mcod/ComposedSplitFunction.java new file mode 100644 index 0000000..5cf5a6e --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/ComposedSplitFunction.java @@ -0,0 +1,40 @@ +package com.atl.mcod; + +import java.util.Set; + +import com.atl.mcod.utils.Pair; + +/** + * A {@linkplain SplitFunction split function} that is defined by composing + * a {@linkplain PromotionFunction promotion function} and a + * {@linkplain PartitionFunction partition function}. + * + * @param The type of the data objects. + */ +public class ComposedSplitFunction implements SplitFunction { + + private PromotionFunction promotionFunction; + private PartitionFunction partitionFunction; + + /** + * The constructor of a {@link SplitFunction} composed by a + * {@link PromotionFunction} and a {@link PartitionFunction}. + */ + public ComposedSplitFunction( + PromotionFunction promotionFunction, + PartitionFunction partitionFunction + ) + { + this.promotionFunction = promotionFunction; + this.partitionFunction = partitionFunction; + } + + + @Override + public SplitResult process(Set dataSet, DistanceFunction distanceFunction) { + Pair promoted = promotionFunction.process(dataSet, distanceFunction); + Pair> partitions = partitionFunction.process(promoted, dataSet, distanceFunction); + return new SplitResult(promoted, partitions); + } + +} diff --git a/unsupervised/src/main/java/com/atl/mcod/DistanceFunction.java b/unsupervised/src/main/java/com/atl/mcod/DistanceFunction.java new file mode 100644 index 0000000..e83c7f4 --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/DistanceFunction.java @@ -0,0 +1,12 @@ +package com.atl.mcod; + +/** + * An object that can calculate the distance between two data objects. + * + * @param The type of the data objects. + */ +public interface DistanceFunction { + + double calculate(DATA data1, DATA data2); + +} diff --git a/unsupervised/src/main/java/com/atl/mcod/DistanceFunctions.java b/unsupervised/src/main/java/com/atl/mcod/DistanceFunctions.java new file mode 100644 index 0000000..678d5c3 --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/DistanceFunctions.java @@ -0,0 +1,193 @@ +package com.atl.mcod; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Some pre-defined implementations of {@linkplain DistanceFunction distance + * functions}. + */ +public final class DistanceFunctions { + + /** + * Don't let anyone instantiate this class. + */ + private DistanceFunctions() {} + + + /** + * Creates a cached version of a {@linkplain DistanceFunction distance + * function}. This method is used internally by {@link MTree} to create + * a cached distance function to pass to the {@linkplain SplitFunction split + * function}. + * @param distanceFunction The distance function to create a cached version + * of. + * @return The cached distance function. + */ + public static DistanceFunction cached(final DistanceFunction distanceFunction) { + return new DistanceFunction() { + class Pair { + Data data1; + Data data2; + + public Pair(Data data1, Data data2) { + this.data1 = data1; + this.data2 = data2; + } + + @Override + public int hashCode() { + return data1.hashCode() ^ data2.hashCode(); + } + + @Override + public boolean equals(Object arg0) { + if(arg0 instanceof Pair) { + Pair that = (Pair) arg0; + return this.data1.equals(that.data1) + && this.data2.equals(that.data2); + } else { + return false; + } + } + } + + private final Map cache = new HashMap(); + + @Override + public double calculate(Data data1, Data data2) { + Pair pair1 = new Pair(data1, data2); + Double distance = cache.get(pair1); + if(distance != null) { + return distance; + } + + Pair pair2 = new Pair(data2, data1); + distance = cache.get(pair2); + if(distance != null) { + return distance; + } + + distance = distanceFunction.calculate(data1, data2); + cache.put(pair1, distance); + cache.put(pair2, distance); + return distance; + } + }; + } + + + + /** + * An interface to represent coordinates in Euclidean spaces. + * @see "Euclidean + * Space" article at Wikipedia + */ + public interface EuclideanCoordinate { + /** + * The number of dimensions. + */ + int dimensions(); + + /** + * A method to access the {@code index}-th component of the coordinate. + * + * @param index The index of the component. Must be less than {@link + * #dimensions()}. + */ + double get(int index); + } + + + /** + * Calculates the distance between two {@linkplain EuclideanCoordinate + * euclidean coordinates}. + */ + public static double euclidean(EuclideanCoordinate coord1, EuclideanCoordinate coord2) { + int size = Math.min(coord1.dimensions(), coord2.dimensions()); + double distance = 0; + for(int i = 0; i < size; i++) { + double diff = coord1.get(i) - coord2.get(i); + distance += diff * diff; + } + distance = Math.sqrt(distance); + return distance; + } + + + /** + * A {@linkplain DistanceFunction distance function} object that calculates + * the distance between two {@linkplain EuclideanCoordinate euclidean + * coordinates}. + */ + public static final DistanceFunction EUCLIDEAN = new DistanceFunction() { + @Override + public double calculate(EuclideanCoordinate coord1, EuclideanCoordinate coord2) { + return DistanceFunctions.euclidean(coord1, coord2); + } + }; + + + /** + * A {@linkplain DistanceFunction distance function} object that calculates + * the distance between two coordinates represented by {@linkplain + * java.util.List lists} of {@link java.lang.Integer}s. + */ + public static final DistanceFunction> EUCLIDEAN_INTEGER_LIST = new DistanceFunction>() { + @Override + public double calculate(List data1, List data2) { + class IntegerListEuclideanCoordinate implements EuclideanCoordinate { + List list; + public IntegerListEuclideanCoordinate(List list) { this.list = list; } + @Override public int dimensions() { return list.size(); } + @Override public double get(int index) { return list.get(index); } + }; + IntegerListEuclideanCoordinate coord1 = new IntegerListEuclideanCoordinate(data1); + IntegerListEuclideanCoordinate coord2 = new IntegerListEuclideanCoordinate(data2); + return DistanceFunctions.euclidean(coord1, coord2); + } + }; + + /** + * A {@linkplain DistanceFunction distance function} object that calculates + * the distance between two coordinates represented by {@linkplain + * java.util.List lists} of {@link java.lang.Double}s. + */ + public static final DistanceFunction> EUCLIDEAN_DOUBLE_LIST = new DistanceFunction>() { + @Override + public double calculate(List data1, List data2) { + class DoubleListEuclideanCoordinate implements EuclideanCoordinate { + List list; + public DoubleListEuclideanCoordinate(List list) { this.list = list; } + @Override public int dimensions() { return list.size(); } + @Override public double get(int index) { return list.get(index); } + }; + DoubleListEuclideanCoordinate coord1 = new DoubleListEuclideanCoordinate(data1); + DoubleListEuclideanCoordinate coord2 = new DoubleListEuclideanCoordinate(data2); + return DistanceFunctions.euclidean(coord1, coord2); + } + }; + + public static void main(String[] args){ + class DoubleListEuclideanCoordinate implements EuclideanCoordinate { + List list; + public DoubleListEuclideanCoordinate(List list) { this.list = list; } + @Override public int dimensions() { return list.size(); } + @Override public double get(int index) { return list.get(index); } + }; + ArrayList data1 = new ArrayList(); + ArrayList data2 = new ArrayList(); + data1.add(9728.715); + data1.add(46.84968185424805); + data2.add(8691.598); + data2.add(25.426794052124023); + DoubleListEuclideanCoordinate coord1 = new DoubleListEuclideanCoordinate(data1); + DoubleListEuclideanCoordinate coord2 = new DoubleListEuclideanCoordinate(data2); + System.err.println(DistanceFunctions.euclidean(coord1, coord2)); + } + + + +} diff --git a/unsupervised/src/main/java/com/atl/mcod/MCOD.java b/unsupervised/src/main/java/com/atl/mcod/MCOD.java new file mode 100644 index 0000000..9c55a05 --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/MCOD.java @@ -0,0 +1,598 @@ +package com.atl.mcod; + +import java.io.BufferedWriter; +import java.io.FileNotFoundException; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.PriorityQueue; +import com.atl.mcod.utils.Data; +import java.util.List; + +/** + * + * @author Luan + */ +public class MCOD { + + private HashMap dataList_set = new HashMap<>(); + private HashMap> micro_clusters = new HashMap<>(); + private ArrayList PD = new ArrayList<>(); + // store list ob in increasing time arrival order + private ArrayList dataList = new ArrayList<>(); + private MTreeClass mtree = new MTreeClass(); + private ArrayList outlierList = new ArrayList<>(); + private PriorityQueue eventQueue = new PriorityQueue<>(new MCComparator()); + + private double R; + private int k; + private int W; + private int slide; + + public long currentTime = 0; // Naskos change + + public MCOD(double R, int k, int W, int slide) { + this.R = R; + this.k = k; + this.W = W; + this.slide = slide; + } + + public ArrayList detectOutlier(List data, long currentTime, int W, int slide) { + this.currentTime = currentTime;//Naskos change + ArrayList result = new ArrayList<>(); + + if (slide != W) { + //purge expire object + for (int i = dataList.size() - 1; i >= 0; i--) { + MCO d = dataList.get(i); + if (d.getArrivalTime() <= currentTime - W) { + //remove d from data List + dataList.remove(i); + + //if d is in cluster + if (d.isInCluster) { + removeFromCluster(d); + } + //if d is PD + + removeFromPD(d); + //process event queue + process_event_queue(currentTime); + + } + } + } else { + micro_clusters.clear(); + dataList.clear(); + dataList_set.clear(); + eventQueue.clear(); + mtree = null; + mtree = new MTreeClass(); + PD.clear(); + outlierList.clear(); + } + //process new data + data.stream().forEach((d) -> { + processNewData(d); + }); + + //add result + outlierList.stream().forEach((o) -> { + result.add(o); + }); + return result; + } + + public double computeAvgNeighborList (){ + double result = 0; + for(MCO point: PD){ + result += point.exps.size(); + } + return result/PD.size(); + } + + public int computeNumberOfPointsInCluster() { + int count = 0; + for (ArrayList points : micro_clusters.values()) { + count += points.size(); + } + return count; + } + +// public void addToHashMap(Integer o1, Integer o2) { +// ArrayList values = checkedPoints.get(o1); +// if (values != null) { +// values.add(o2); +// checkedPoints.put(o1, values); +// } else { +// values = new ArrayList<>(); +// values.add(o2); +// checkedPoints.put(o1, values); +// } +// } +// public boolean checkInHashMap(Integer key, Integer v) { +// ArrayList values = checkedPoints.get(key); +// return values != null && values.contains(v); +// } + private void removeFromCluster(MCO d) { + + //get the cluster + ArrayList cluster = micro_clusters.get(d.center); + if (cluster != null) { + cluster.remove(d); + micro_clusters.put(d.center, cluster); + + //cluster is shrinked + if (cluster.size() < k + 1) { + //remove this cluster from micro cluster list + micro_clusters.remove(d.center); + dataList_set.remove(d.center); + Collections.sort(cluster, new MCComparatorArrivalTime()); + //process the objects in clusters + for (int i = 0; i < cluster.size(); i++) { + MCO o = cluster.get(i); + //reset all objects + resetObject(o); + //put into PD + + o.numberOfSucceeding = o.numberOfSucceeding + cluster.size() - 1 - i; + addToPD(o, true); + + } + + } + } + + } + + private void removeFromPD(MCO d) { + //remove from pd + PD.remove(d); +// mtree.remove(d); + + //if d is in outlier list + if (d.numberOfSucceeding + d.exps.size() < k) { + if(outlierList.contains(d)){ + d.setOutlier_end_time(currentTime); //Naskos change + } + outlierList.remove(d); + } + + outlierList.stream().forEach((data) -> { + while (data.exps.size() > 0 && data.exps.get(0) <= d.getArrivalTime() + W) { + data.exps.remove(0); + if (data.exps.isEmpty()) { + data.ev = 0; + } else { + data.ev = data.exps.get(0); + } + } + }); + } + + private void resetObject(MCO o) { + o.exps.clear(); + o.Rmc.clear(); + o.isCenter = false; + o.isInCluster = false; + o.ev = 0; + o.center = -1; + o.numberOfSucceeding = 0; + + } + + public void appendToFile(String filename, String str) throws FileNotFoundException, UnsupportedEncodingException { + try (PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(filename, true)))) { + out.println(str); + } catch (IOException e) { + //exception handling left as an exercise for the reader + } + } + + private void addToPD(MCO o, boolean fromCluster) { + + PD.stream().forEach((inPD) -> { + //compute distance + double distance = mtree.getDistanceFunction().calculate(o, inPD); + if (distance <= R) { + //check inPD is succeeding or preceding neighbor + if (isSameSlide(inPD, o) == -1) { + //is preceeding neighbor + o.exps.add(inPD.getArrivalTime() + W); + if (!fromCluster) { + inPD.numberOfSucceeding++; + } + } else if (isSameSlide(inPD, o) == 0) { + o.numberOfSucceeding++; + if (!fromCluster) { + inPD.numberOfSucceeding++; + } + } else { + o.numberOfSucceeding++; + if (!fromCluster) { + inPD.exps.add(o.getArrivalTime() + W); + } + + } + //just keep k-numberofSucceedingNeighbor + if (!fromCluster) { + checkInlier(inPD); + } + + } + }); + + //find neighbors in clusters (3R/2) + ArrayList clusters = findClusterIn3_2Range(o); + clusters.stream().map((center_id) -> micro_clusters.get(center_id)).forEach((points) -> { + points.stream().filter((p) -> (isNeighbor(p, o))).forEach((p) -> { + if (isSameSlide(o, p) <= 0) { + //o is preceeding neighbor + o.numberOfSucceeding++; + } else { + //p is preceeding neighbor + o.exps.add(p.getArrivalTime() + W); + } + }); + }); + + //keep k-numberofSucceedingNeighbor of o + checkInlier(o); + + PD.add(o); +// mtree.add(o); + + } + + public int isSameSlide(MCO o1, MCO o2) { + if ((o1.getArrivalTime() - 1) / slide == (o2.getArrivalTime() - 1) / slide) { + return 0; + } else if ((o1.getArrivalTime() - 1) / slide < (o2.getArrivalTime() - 1) / slide) { + return -1; + } else { + return 1; + } + } + + public long findNearestCenter(MCO d) { + + double min_distance = Double.MAX_VALUE; + long min_center_id = -1; + for (Long center_id : micro_clusters.keySet()) { + //get the center object + MCO center = dataList_set.get(center_id); + //compute the distance + double distance = mtree.getDistanceFunction().calculate(center, d); + + if (distance < min_distance) { + min_distance = distance; + min_center_id = center_id; + } + } + return min_center_id; + + } + + public ArrayList findClusterIn3_2Range(MCO d) { + ArrayList result = new ArrayList<>(); + micro_clusters.keySet().stream().forEach((center_id) -> { + //get the center object + MCO center = dataList_set.get(center_id); + //compute the distance + double distance = mtree.getDistanceFunction().calculate(center, d); + if (distance <= R * 3.0 / 2) { + result.add(center_id); + } + }); + return result; + } + + private void processNewData(Data data) { + + MCO d = new MCO(data); + + //add to datalist + dataList.add(d); + + long nearest_center_id = findNearestCenter(d); + double min_distance = Double.MAX_VALUE; + if (nearest_center_id > -1) { //found neareast cluster + min_distance = mtree.getDistanceFunction(). + calculate(dataList_set.get(nearest_center_id), d); + } + //assign to cluster if min distance <= R/2 + if (min_distance <= R / 2) { + addToCluster(nearest_center_id, d); + } else { + //find all neighbors for d in PD that can form a cluster + ArrayList neighborsInR2Distance = findNeighborR2InPD(d); + if (neighborsInR2Distance.size() >= k * 1.1) { + //form new cluster + formNewCluster(d, neighborsInR2Distance); + + } else { + //cannot form a new cluster + addToPD(d, false); + } + } + if (d.isCenter) { + dataList_set.put(d.getArrivalTime(), d); + } + + } + + private void addToCluster(long nearest_center_id, MCO d) { + + //update for points in cluster + d.isCenter = false; + d.isInCluster = true; + d.center = nearest_center_id; + ArrayList cluster = micro_clusters.get(nearest_center_id); + cluster.add(d); + micro_clusters.put(nearest_center_id, cluster); + + //update for points in PD that has Rmc list contains center + PD.stream().filter((inPD) -> (inPD.Rmc.contains(nearest_center_id))).forEach((inPD) -> { + //check if inPD is neighbor of d + double distance = mtree.getDistanceFunction(). + calculate(d, inPD); + if (distance <= R) { + if (isSameSlide(d, inPD) == -1) { + inPD.exps.add(d.getArrivalTime() + W); + + } else if (isSameSlide(d, inPD) >= 0) { + inPD.numberOfSucceeding++; + } + //mark inPD has checked with d +// addToHashMap(inPD.arrivalTime,d.arrivalTime); + //check if inPD become inlier + checkInlier(inPD); + } + }); + + } + + public ArrayList findNeighborR2InPD(MCO d) { + ArrayList results = new ArrayList<>(); + PD.stream().filter((o) -> (mtree.getDistanceFunction().calculate(o, d) <= R * 1.0 / 2)).forEach((o) -> { + results.add(o); + }); + return results; + } + + public boolean isOutlier(MCO d) { + return d.numberOfSucceeding + d.exps.size() < k; + } + + private void formNewCluster(MCO d, ArrayList neighborsInR2Distance) { + + d.isCenter = true; + d.isInCluster = true; + d.center = d.getArrivalTime(); + neighborsInR2Distance.stream().map((data) -> { + PD.remove(data); + return data; + }).map((data) -> { + if (isOutlier(data)) { + if(outlierList.contains(data)){ + data.setOutlier_end_time(currentTime); //Naskos change + } + outlierList.remove(data); + } + return data; + }).map((data) -> { + if (!isOutlier(data)) { + eventQueue.remove(data); + } + return data; + }).map((data) -> { + resetObject(data); + return data; + }).map((data) -> { + data.isInCluster = true; + return data; + }).map((data) -> { + data.center = d.getArrivalTime(); + return data; + }).forEach((data) -> { + data.isCenter = false; + }); + + //add center to neighbor list + Collections.sort(neighborsInR2Distance, new MCComparatorArrivalTime()); + neighborsInR2Distance.add(d); + micro_clusters.put(d.getArrivalTime(), neighborsInR2Distance); + + //update Rmc list + ArrayList list_rmc = findNeighborInR3_2InPD(d); + list_rmc.stream().map((o) -> { + if (isNeighbor(o, d)) { + if (isSameSlide(o, d) <= 0) { + o.numberOfSucceeding++; + } else { + o.exps.add(d.getArrivalTime() + W); + } +// addToHashMap(o.arrivalTime,d.arrivalTime); + checkInlier(o); + + } + return o; + }).forEach((o) -> { + o.Rmc.add(d.getArrivalTime()); + }); + + } + + private ArrayList findNeighborInRInPD(MCO d) { + + ArrayList result = new ArrayList<>(); + + PD.stream().filter((o) -> (mtree.getDistanceFunction().calculate(o, d) <= R)).forEach((o) -> { + result.add(o); + }); + return result; + } + + private ArrayList findNeighborInR3_2InPD(MCO d) { + + ArrayList result = new ArrayList<>(); + + PD.stream().forEach((p) -> { + double distance = mtree.getDistanceFunction().calculate(p, d); + if (distance <= R * 3.0 / 2) { + result.add(p); + } + }); + return result; + } + + private void checkInlier(MCO inPD) { + Collections.sort(inPD.exps); + + while (inPD.exps.size() > k - inPD.numberOfSucceeding && inPD.exps.size() > 0) { + inPD.exps.remove(0); + } + if (inPD.exps.size() > 0) { + inPD.ev = inPD.exps.get(0); + } else { + inPD.ev = 0; + } + + if (inPD.exps.size() + inPD.numberOfSucceeding >= k) { + if (inPD.numberOfSucceeding >= k) { + + eventQueue.remove(inPD); + + if(outlierList.contains(inPD)){ + inPD.setOutlier_end_time(currentTime); //Naskos change + } + outlierList.remove(inPD); + } else { + if(outlierList.contains(inPD)){ + inPD.setOutlier_end_time(currentTime); //Naskos change + } + outlierList.remove(inPD); + if (!eventQueue.contains(inPD)) { + eventQueue.add(inPD); + } + } + + } else { + eventQueue.remove(inPD); + if (!outlierList.contains(inPD)) { + outlierList.add(inPD); + if(inPD.getOutlier_start_time() == 0){ + inPD.setOutlier_start_time(currentTime); //Naskos change + } + } + } + } + + private boolean isNeighbor(MCO p, MCO o) { + double d = mtree.getDistanceFunction().calculate(p, o); + return d <= R; + } + + private void process_event_queue(long currentTime) { + MCO x = eventQueue.peek(); + + while (x != null && x.ev <= currentTime) { + + x = eventQueue.poll(); + while (x.exps.get(0) <= currentTime) { + x.exps.remove(0); + if (x.exps.isEmpty()) { + x.ev = 0; + break; + } else { + x.ev = x.exps.get(0); + } + } + if (x.exps.size() + x.numberOfSucceeding < k) { + + outlierList.add(x); + if(x.getOutlier_start_time() == 0){ + x.setOutlier_start_time(currentTime); //Naskos change + } + + } else if (x.numberOfSucceeding < k + && x.exps.size() + x.numberOfSucceeding >= k) { + eventQueue.add(x); + } + + x = eventQueue.peek(); + + } + } + + static class MCComparator implements Comparator { + + @Override + public int compare(MCO o1, MCO o2) { + if (o1.ev < o2.ev) { + return -1; + } else if (o1.ev == o2.ev) { + return 0; + } else { + return 1; + } + + } + + } + + static class MCComparatorArrivalTime implements Comparator { + + @Override + public int compare(MCO o1, MCO o2) { + if (o1.getArrivalTime() < o2.getArrivalTime()) { + return -1; + } else if (o1.getArrivalTime() == o2.getArrivalTime()) { + return 0; + } else { + return 1; + } + + } + + } + + static class MCO extends Data { + + public long center; + public ArrayList exps; + public ArrayList Rmc; + + public long ev; + public boolean isInCluster; + public boolean isCenter; + + public int numberOfSucceeding; + + public MCO(Data d) { + super(); + this.setArrivalTime(d.getArrivalTime()); + this.setActualTime(d.getActualTime()); + this.setValues(d.getValues()); + + exps = new ArrayList<>(); + Rmc = new ArrayList<>(); + isCenter = false; + isInCluster = false; + } + + } + + public int getDataListSize() { + return dataList.size(); + } + + + +} diff --git a/unsupervised/src/main/java/com/atl/mcod/MTree.java b/unsupervised/src/main/java/com/atl/mcod/MTree.java new file mode 100644 index 0000000..3a48a9d --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/MTree.java @@ -0,0 +1,1009 @@ +package com.atl.mcod; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.PriorityQueue; +import java.util.Set; + +import com.atl.mcod.SplitFunction.SplitResult; +import com.atl.mcod.utils.Pair; + + +/** + * The main class that implements the M-Tree. + * + * @param The type of data that will be indexed by the M-Tree. Objects of + * this type are stored in HashMaps and HashSets, so their + * {@code hashCode()} and {@code equals()} methods must be consistent. + */ +public class MTree { + + /** + * The type of the results for nearest-neighbor queries. + */ + public class ResultItem { + private ResultItem(DATA data, double distance) { + this.data = data; + this.distance = distance; + } + + /** A nearest-neighbor. */ + public DATA data; + + /** + * The distance from the nearest-neighbor to the query data object + * parameter. + */ + public double distance; + } + + + // Exception classes + private static class SplitNodeReplacement extends Exception { + // A subclass of Throwable cannot be generic. :-( + // So, we have newNodes declared as Object[] instead of Node[]. + private Object newNodes[]; + + private SplitNodeReplacement(Object... newNodes) { + this.newNodes = newNodes; + } + } + + private static class RootNodeReplacement extends Exception { + // A subclass of Throwable cannot be generic. :-( + // So, we have newRoot declared as Object instead of Node. + private Object newRoot; + + private RootNodeReplacement(Object newRoot) { + this.newRoot = newRoot; + } + } + + + private static class NodeUnderCapacity extends Exception { } + + + private static class DataNotFound extends Exception { } + + /** + * An {@link Iterable} class which can be iterated to fetch the results of a + * nearest-neighbors query. + * + *

The neighbors are presented in non-decreasing order from the {@code + * queryData} argument to the {@link MTree#getNearest(Object, double, int) + * getNearest*()} + * call. + * + *

The query on the M-Tree is executed during the iteration, as the + * results are fetched. It means that, by the time when the n-th + * result is fetched, the next result may still not be known, and the + * resources allocated were only the necessary to identify the n + * first results. + */ + public class Query implements Iterable { + + private class ResultsIterator implements Iterator { + + private class ItemWithDistances implements Comparable> { + private U item; + private double distance; + private double minDistance; + + public ItemWithDistances(U item, double distance, double minDistance) { + this.item = item; + this.distance = distance; + this.minDistance = minDistance; + } + + @Override + public int compareTo(ItemWithDistances that) { + if(this.minDistance < that.minDistance) { + return -1; + } else if(this.minDistance > that.minDistance) { + return +1; + } else { + return 0; + } + } + } + + + private ResultItem nextResultItem = null; + private boolean finished = false; + private PriorityQueue> pendingQueue = new PriorityQueue>(); + private double nextPendingMinDistance; + private PriorityQueue> nearestQueue = new PriorityQueue>(); + private int yieldedCount; + + private ResultsIterator() { + if(MTree.this.root == null) { + finished = true; + return; + } + + double distance = MTree.this.distanceFunction.calculate(Query.this.data, MTree.this.root.data); + double minDistance = Math.max(distance - MTree.this.root.radius, 0.0); + + pendingQueue.add(new ItemWithDistances(MTree.this.root, distance, minDistance)); + nextPendingMinDistance = minDistance; + } + + + @Override + public boolean hasNext() { + if(finished) { + return false; + } + + if(nextResultItem == null) { + fetchNext(); + } + + if(nextResultItem == null) { + finished = true; + return false; + } else { + return true; + } + } + + @Override + public ResultItem next() { + if(hasNext()) { + ResultItem next = nextResultItem; + nextResultItem = null; + return next; + } else { + throw new NoSuchElementException(); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + + private void fetchNext() { + assert !finished; + + if(finished || yieldedCount >= Query.this.limit) { + finished = true; + return; + } + + while(!pendingQueue.isEmpty() || !nearestQueue.isEmpty()) { + if(prepareNextNearest()) { + return; + } + + assert !pendingQueue.isEmpty(); + + ItemWithDistances pending = pendingQueue.poll(); + Node node = pending.item; + + for(IndexItem child : node.children.values()) { + if(Math.abs(pending.distance - child.distanceToParent) - child.radius <= Query.this.range) { + double childDistance = MTree.this.distanceFunction.calculate(Query.this.data, child.data); + double childMinDistance = Math.max(childDistance - child.radius, 0.0); + if(childMinDistance <= Query.this.range) { + if(child instanceof MTree.Entry) { + @SuppressWarnings("unchecked") + Entry entry = (Entry)child; + nearestQueue.add(new ItemWithDistances(entry, childDistance, childMinDistance)); + } else { + @SuppressWarnings("unchecked") + Node childNode = (Node)child; + pendingQueue.add(new ItemWithDistances(childNode, childDistance, childMinDistance)); + } + } + } + } + + if(pendingQueue.isEmpty()) { + nextPendingMinDistance = Double.POSITIVE_INFINITY; + } else { + nextPendingMinDistance = pendingQueue.peek().minDistance; + } + } + + finished = true; + } + + + private boolean prepareNextNearest() { + if(!nearestQueue.isEmpty()) { + ItemWithDistances nextNearest = nearestQueue.peek(); + if(nextNearest.distance <= nextPendingMinDistance) { + nearestQueue.poll(); + nextResultItem = new ResultItem(nextNearest.item.data, nextNearest.distance); + ++yieldedCount; + return true; + } + } + + return false; + } + + } + + + private Query(DATA data, double range, int limit) { + this.data = data; + this.range = range; + this.limit = limit; + } + + + @Override + public Iterator iterator() { + return new ResultsIterator(); + } + + + private DATA data; + private double range; + private int limit; + } + + + + /** + * The default minimum capacity of nodes in an M-Tree, when not specified in + * the constructor call. + */ + public static final int DEFAULT_MIN_NODE_CAPACITY = 50; + + + protected int minNodeCapacity; + protected int maxNodeCapacity; + protected DistanceFunction distanceFunction; + protected SplitFunction splitFunction; + protected Node root; + + + /** + * Constructs an M-Tree with the specified distance function. + * @param distanceFunction The object used to calculate the distance between + * two data objects. + */ + public MTree(DistanceFunction distanceFunction, + SplitFunction splitFunction) { + this(DEFAULT_MIN_NODE_CAPACITY, distanceFunction, splitFunction); + } + + /** + * Constructs an M-Tree with the specified minimum node capacity and + * distance function. + * @param minNodeCapacity The minimum capacity for the nodes of the tree. + * @param distanceFunction The object used to calculate the distance between + * two data objects. + * @param splitFunction The object used to process the split of nodes if + * they are full when a new child must be added. + */ + public MTree(int minNodeCapacity, + DistanceFunction distanceFunction, + SplitFunction splitFunction) { + this(minNodeCapacity, 2 * minNodeCapacity - 1, distanceFunction, splitFunction); + } + + /** + * Constructs an M-Tree with the specified minimum and maximum node + * capacities and distance function. + * @param minNodeCapacity The minimum capacity for the nodes of the tree. + * @param maxNodeCapacity The maximum capacity for the nodes of the tree. + * @param distanceFunction The object used to calculate the distance between + * two data objects. + * @param splitFunction The object used to process the split of nodes if + * they are full when a new child must be added. + */ + public MTree(int minNodeCapacity, int maxNodeCapacity, + DistanceFunction distanceFunction, + SplitFunction splitFunction) + { + if(minNodeCapacity < 2 || maxNodeCapacity <= minNodeCapacity || + distanceFunction == null) { + throw new IllegalArgumentException(); + } + + if(splitFunction == null) { + splitFunction = new ComposedSplitFunction( + new PromotionFunctions.RandomPromotion(), + new PartitionFunctions.BalancedPartition() + ); + } + + this.minNodeCapacity = minNodeCapacity; + this.maxNodeCapacity = maxNodeCapacity; + this.distanceFunction = distanceFunction; + this.splitFunction = splitFunction; + this.root = null; + } + + + /** + * Adds and indexes a data object. + * + *

An object that is already indexed should not be added. There is no + * validation regarding this, and the behavior is undefined if done. + * + * @param data The data object to index. + */ + public void add(DATA data) { + if(root == null) { + root = new RootLeafNode(data); + try { + root.addData(data, 0); + } catch (SplitNodeReplacement e) { + throw new RuntimeException("Should never happen!"); + } + } else { + double distance = distanceFunction.calculate(data, root.data); + try { + root.addData(data, distance); + } catch(SplitNodeReplacement e) { + Node newRoot = new RootNode(data); + root = newRoot; + for(int i = 0; i < e.newNodes.length; i++) { + @SuppressWarnings("unchecked") + Node newNode = (Node) e.newNodes[i]; + distance = distanceFunction.calculate(root.data, newNode.data); + root.addChild(newNode, distance); + } + } + } + } + + + /** + * Removes a data object from the M-Tree. + * @param data The data object to be removed. + * @return {@code true} if and only if the object was found. + */ + public boolean remove(DATA data) { + if(root == null) { + return false; + } + + double distanceToRoot = distanceFunction.calculate(data, root.data); + try { + root.removeData(data, distanceToRoot); + } catch(RootNodeReplacement e) { + @SuppressWarnings("unchecked") + Node newRoot = (Node) e.newRoot; + root = newRoot; + } catch(DataNotFound e) { + return false; + } catch (NodeUnderCapacity e) { + throw new RuntimeException("Should have never happened", e); + } + return true; + } + + /** + * Performs a nearest-neighbors query on the M-Tree, constrained by distance. + * @param queryData The query data object. + * @param range The maximum distance from {@code queryData} to fetched + * neighbors. + * @return A {@link Query} object used to iterate on the results. + */ + public Query getNearestByRange(DATA queryData, double range) { + return getNearest(queryData, range, Integer.MAX_VALUE); + } + + + /** + * Performs a nearest-neighbors query on the M-Tree, constrained by the + * number of neighbors. + * @param queryData The query data object. + * @param limit The maximum number of neighbors to fetch. + * @return A {@link Query} object used to iterate on the results. + */ + public Query getNearestByLimit(DATA queryData, int limit) { + return getNearest(queryData, Double.POSITIVE_INFINITY, limit); + } + + /** + * Performs a nearest-neighbor query on the M-Tree, constrained by distance + * and/or the number of neighbors. + * @param queryData The query data object. + * @param range The maximum distance from {@code queryData} to fetched + * neighbors. + * @param limit The maximum number of neighbors to fetch. + * @return A {@link Query} object used to iterate on the results. + */ + public Query getNearest(DATA queryData, double range, int limit) { + return new Query(queryData, range, limit); + } + + /** + * Performs a nearest-neighbor query on the M-Tree, without constraints. + * @param queryData The query data object. + * @return A {@link Query} object used to iterate on the results. + */ + public Query getNearest(DATA queryData) { + return new Query(queryData, Double.POSITIVE_INFINITY, Integer.MAX_VALUE); + } + + + protected void _check() { + if(root != null) { + root._check(); + } + } + + + private class IndexItem { + DATA data; + protected double radius; + double distanceToParent; + + private IndexItem(DATA data) { + this.data = data; + this.radius = 0; + this.distanceToParent = -1; + } + + int _check() { + _checkRadius(); + _checkDistanceToParent(); + return 1; + } + + private void _checkRadius() { + assert radius >= 0; + } + + protected void _checkDistanceToParent() { + assert !(this instanceof MTree.RootLeafNode); + assert !(this instanceof MTree.RootNode); + assert distanceToParent >= 0; + } + } + + + + private abstract class Node extends IndexItem { + + protected Map children = new HashMap(); + protected Rootness rootness; + protected Leafness leafness; + + private + > + Node(DATA data, R rootness, L leafness) { + super(data); + + rootness.thisNode = this; + this.rootness = rootness; + + leafness.thisNode = this; + this.leafness = leafness; + } + + private final void addData(DATA data, double distance) throws SplitNodeReplacement { + doAddData(data, distance); + checkMaxCapacity(); + } + + int _check() { + super._check(); + _checkMinCapacity(); + _checkMaxCapacity(); + + int childHeight = -1; + for(Map.Entry e : children.entrySet()) { + DATA data = e.getKey(); + IndexItem child = e.getValue(); + assert child.data.equals(data); + + _checkChildClass(child); + _checkChildMetrics(child); + + int height = child._check(); + if(childHeight < 0) { + childHeight = height; + } else { + assert childHeight == height; + } + } + + return childHeight + 1; + } + + protected void doAddData(DATA data, double distance) { + leafness.doAddData(data, distance); + } + + protected void doRemoveData(DATA data, double distance) throws DataNotFound { + leafness.doRemoveData(data, distance); + } + + private final void checkMaxCapacity() throws SplitNodeReplacement { + if(children.size() > MTree.this.maxNodeCapacity) { + DistanceFunction cachedDistanceFunction = DistanceFunctions.cached(MTree.this.distanceFunction); + SplitResult splitResult = MTree.this.splitFunction.process(children.keySet(), cachedDistanceFunction); + + Node newNode0 = null; + Node newNode1 = null; + for(int i = 0; i < 2; ++i) { + DATA promotedData = splitResult.promoted.get(i); + Set partition = splitResult.partitions.get(i); + + Node newNode = newSplitNodeReplacement(promotedData); + for(DATA data : partition) { + IndexItem child = children.get(data); + children.remove(data); + double distance = cachedDistanceFunction.calculate(promotedData, data); + newNode.addChild(child, distance); + } + + if(i == 0) { + newNode0 = newNode; + } else { + newNode1 = newNode; + } + } + assert children.isEmpty(); + + throw new SplitNodeReplacement(newNode0, newNode1); + } + + } + + protected Node newSplitNodeReplacement(DATA data) { + return leafness.newSplitNodeReplacement(data); + } + + protected void addChild(IndexItem child, double distance) { + leafness.addChild(child, distance); + } + + void removeData(DATA data, double distance) throws RootNodeReplacement, NodeUnderCapacity, DataNotFound { + doRemoveData(data, distance); + if(children.size() < getMinCapacity()) { + throw new NodeUnderCapacity(); + } + } + + protected int getMinCapacity() { + return rootness.getMinCapacity(); + } + + private void updateMetrics(IndexItem child, double distance) { + child.distanceToParent = distance; + updateRadius(child); + } + + private void updateRadius(IndexItem child) { + this.radius = Math.max(this.radius, child.distanceToParent + child.radius); + } + + void _checkMinCapacity() { + rootness._checkMinCapacity(); + } + + private void _checkMaxCapacity() { + assert children.size() <= MTree.this.maxNodeCapacity; + } + + private void _checkChildClass(IndexItem child) { + leafness._checkChildClass(child); + } + + private void _checkChildMetrics(IndexItem child) { + double dist = MTree.this.distanceFunction.calculate(child.data, this.data); + assert child.distanceToParent == dist; + + double sum = child.distanceToParent + child.radius; + assert sum <= this.radius; + } + + protected void _checkDistanceToParent() { + rootness._checkDistanceToParent(); + } + + private MTree mtree() { + return MTree.this; + } + } + + + + + private abstract class NodeTrait { + protected Node thisNode; + } + + private interface Leafness { + void doAddData(DATA data, double distance); + void addChild(MTree.IndexItem child, double distance); + void doRemoveData(DATA data, double distance) throws DataNotFound; + MTree.Node newSplitNodeReplacement(DATA data); + void _checkChildClass(MTree.IndexItem child); + } + + private interface Rootness { + int getMinCapacity(); + void _checkDistanceToParent(); + void _checkMinCapacity(); + } + + + + private class RootNodeTrait extends NodeTrait implements Rootness { + + @Override + public int getMinCapacity() { + throw new RuntimeException("Should not be called!"); + } + + @Override + public void _checkDistanceToParent() { + assert thisNode.distanceToParent == -1; + } + + @Override + public void _checkMinCapacity() { + thisNode._checkMinCapacity(); + } + + }; + + + private class NonRootNodeTrait extends NodeTrait implements Rootness { + + @Override + public int getMinCapacity() { + return MTree.this.minNodeCapacity; + } + + @Override + public void _checkMinCapacity() { + assert thisNode.children.size() >= thisNode.mtree().minNodeCapacity; + } + + @Override + public void _checkDistanceToParent() { + assert thisNode.distanceToParent >= 0; + } + }; + + + private class LeafNodeTrait extends NodeTrait implements Leafness { + + public void doAddData(DATA data, double distance) { + Entry entry = thisNode.mtree().new Entry(data); + assert !thisNode.children.containsKey(data); + thisNode.children.put(data, entry); + assert thisNode.children.containsKey(data); + thisNode.updateMetrics(entry, distance); + } + + public void addChild(IndexItem child, double distance) { + assert !thisNode.children.containsKey(child.data); + thisNode.children.put(child.data, child); + assert thisNode.children.containsKey(child.data); + thisNode.updateMetrics(child, distance); + } + + public Node newSplitNodeReplacement(DATA data) { + return thisNode.mtree().new LeafNode(data); + } + + + @Override + public void doRemoveData(DATA data, double distance) throws DataNotFound { + if(thisNode.children.remove(data) == null) { + throw new DataNotFound(); + } + } + + public void _checkChildClass(IndexItem child) { + assert child instanceof MTree.Entry; + } + } + + + class NonLeafNodeTrait extends NodeTrait implements Leafness { + + public void doAddData(DATA data, double distance) { + class CandidateChild { + Node node; + double distance; + double metric; + private CandidateChild(Node node, double distance, double metric) { + this.node = node; + this.distance = distance; + this.metric = metric; + } + } + + CandidateChild minRadiusIncreaseNeeded = new CandidateChild(null, -1.0, Double.POSITIVE_INFINITY); + CandidateChild nearestDistance = new CandidateChild(null, -1.0, Double.POSITIVE_INFINITY); + + for(IndexItem item : thisNode.children.values()) { + @SuppressWarnings("unchecked") + Node child = (Node)item; + double childDistance = thisNode.mtree().distanceFunction.calculate(child.data, data); + if(childDistance > child.radius) { + double radiusIncrease = childDistance - child.radius; + if(radiusIncrease < minRadiusIncreaseNeeded.metric) { + minRadiusIncreaseNeeded = new CandidateChild(child, childDistance, radiusIncrease); + } + } else { + if(childDistance < nearestDistance.metric) { + nearestDistance = new CandidateChild(child, childDistance, childDistance); + } + } + } + + CandidateChild chosen = (nearestDistance.node != null) + ? nearestDistance + : minRadiusIncreaseNeeded; + + Node child = chosen.node; + try { + child.addData(data, chosen.distance); + thisNode.updateRadius(child); + } catch(SplitNodeReplacement e) { + // Replace current child with new nodes + IndexItem _ = thisNode.children.remove(child.data); + assert _ != null; + + for(int i = 0; i < e.newNodes.length; ++i) { + @SuppressWarnings("unchecked") + Node newChild = (Node) e.newNodes[i]; + distance = thisNode.mtree().distanceFunction.calculate(thisNode.data, newChild.data); + thisNode.addChild(newChild, distance); + } + } + } + + + public void addChild(IndexItem newChild_, double distance) { + @SuppressWarnings("unchecked") + Node newChild = (Node) newChild_; + + class ChildWithDistance { + Node child; + double distance; + private ChildWithDistance(Node child, double distance) { + this.child = child; + this.distance = distance; + } + } + + Deque newChildren = new ArrayDeque(); + newChildren.addFirst(new ChildWithDistance(newChild, distance)); + + while(!newChildren.isEmpty()) { + ChildWithDistance cwd = newChildren.removeFirst(); + + newChild = cwd.child; + distance = cwd.distance; + if(thisNode.children.containsKey(newChild.data)) { + @SuppressWarnings("unchecked") + Node existingChild = (Node) thisNode.children.get(newChild.data); + assert existingChild.data.equals(newChild.data); + + // Transfer the _children_ of the newChild to the existingChild + for(IndexItem grandchild : newChild.children.values()) { + existingChild.addChild(grandchild, grandchild.distanceToParent); + } + newChild.children.clear(); + + try { + existingChild.checkMaxCapacity(); + } catch(SplitNodeReplacement e) { + IndexItem _ = thisNode.children.remove(existingChild.data); + assert _ != null; + + for(int i = 0; i < e.newNodes.length; ++i) { + @SuppressWarnings("unchecked") + Node newNode = (Node) e.newNodes[i]; + distance = thisNode.mtree().distanceFunction.calculate(thisNode.data, newNode.data); + newChildren.addFirst(new ChildWithDistance(newNode, distance)); + } + } + } else { + thisNode.children.put(newChild.data, newChild); + thisNode.updateMetrics(newChild, distance); + } + } + } + + + public Node newSplitNodeReplacement(DATA data) { + return new InternalNode(data); + } + + + public void doRemoveData(DATA data, double distance) throws DataNotFound { + for(IndexItem childItem : thisNode.children.values()) { + @SuppressWarnings("unchecked") + Node child = (Node)childItem; + if(Math.abs(distance - child.distanceToParent) <= child.radius) { + double distanceToChild = thisNode.mtree().distanceFunction.calculate(data, child.data); + if(distanceToChild <= child.radius) { + try { + child.removeData(data, distanceToChild); + thisNode.updateRadius(child); + return; + } catch(DataNotFound e) { + // If DataNotFound was thrown, then the data was not found in the child + } catch(NodeUnderCapacity e) { + Node expandedChild = balanceChildren(child); + thisNode.updateRadius(expandedChild); + return; + } catch (RootNodeReplacement e) { + throw new RuntimeException("Should never happen!"); + } + } + } + } + + throw new DataNotFound(); + } + + + private Node balanceChildren(Node theChild) { + // Tries to find anotherChild which can donate a grand-child to theChild. + + Node nearestDonor = null; + double distanceNearestDonor = Double.POSITIVE_INFINITY; + + Node nearestMergeCandidate = null; + double distanceNearestMergeCandidate = Double.POSITIVE_INFINITY; + + for(IndexItem child : thisNode.children.values()) { + @SuppressWarnings("unchecked") + Node anotherChild = (Node)child; + if(anotherChild == theChild) continue; + + double distance = thisNode.mtree().distanceFunction.calculate(theChild.data, anotherChild.data); + if(anotherChild.children.size() > anotherChild.getMinCapacity()) { + if(distance < distanceNearestDonor) { + distanceNearestDonor = distance; + nearestDonor = anotherChild; + } + } else { + if(distance < distanceNearestMergeCandidate) { + distanceNearestMergeCandidate = distance; + nearestMergeCandidate = anotherChild; + } + } + } + + if(nearestDonor == null) { + // Merge + for(IndexItem grandchild : theChild.children.values()) { + double distance = thisNode.mtree().distanceFunction.calculate(grandchild.data, nearestMergeCandidate.data); + nearestMergeCandidate.addChild(grandchild, distance); + } + + IndexItem removed = thisNode.children.remove(theChild.data); + assert removed != null; + return nearestMergeCandidate; + } else { + // Donate + // Look for the nearest grandchild + IndexItem nearestGrandchild = null; + double nearestGrandchildDistance = Double.POSITIVE_INFINITY; + for(IndexItem grandchild : nearestDonor.children.values()) { + double distance = thisNode.mtree().distanceFunction.calculate(grandchild.data, theChild.data); + if(distance < nearestGrandchildDistance) { + nearestGrandchildDistance = distance; + nearestGrandchild = grandchild; + } + } + + IndexItem _ = nearestDonor.children.remove(nearestGrandchild.data); + assert _ != null; + theChild.addChild(nearestGrandchild, nearestGrandchildDistance); + return theChild; + } + } + + + public void _checkChildClass(IndexItem child) { + assert child instanceof MTree.InternalNode + || child instanceof MTree.LeafNode; + } + } + + + private class RootLeafNode extends Node { + + private RootLeafNode(DATA data) { + super(data, new RootNodeTrait(), new LeafNodeTrait()); + } + + void removeData(DATA data, double distance) throws RootNodeReplacement, DataNotFound { + try { + super.removeData(data, distance); + } catch (NodeUnderCapacity e) { + assert children.isEmpty(); + throw new RootNodeReplacement(null); + } + } + + protected int getMinCapacity() { + return 1; + } + + void _checkMinCapacity() { + assert children.size() >= 1; + } + } + + private class RootNode extends Node { + + private RootNode(DATA data) { + super(data, new RootNodeTrait(), new NonLeafNodeTrait()); + } + + void removeData(DATA data, double distance) throws RootNodeReplacement, NodeUnderCapacity, DataNotFound { + try { + super.removeData(data, distance); + } catch(NodeUnderCapacity e) { + // Promote the only child to root + @SuppressWarnings("unchecked") + Node theChild = (Node)(children.values().iterator().next()); + Node newRoot; + if(theChild instanceof MTree.InternalNode) { + newRoot = new RootNode(theChild.data); + } else { + assert theChild instanceof MTree.LeafNode; + newRoot = new RootLeafNode(theChild.data); + } + + for(IndexItem grandchild : theChild.children.values()) { + distance = MTree.this.distanceFunction.calculate(newRoot.data, grandchild.data); + newRoot.addChild(grandchild, distance); + } + theChild.children.clear(); + + throw new RootNodeReplacement(newRoot); + } + } + + + @Override + protected int getMinCapacity() { + return 2; + } + + @Override + void _checkMinCapacity() { + assert children.size() >= 2; + } + } + + + private class InternalNode extends Node { + private InternalNode(DATA data) { + super(data, new NonRootNodeTrait(), new NonLeafNodeTrait()); + } + }; + + + private class LeafNode extends Node { + + public LeafNode(DATA data) { + super(data, new NonRootNodeTrait(), new LeafNodeTrait()); + } + } + + + private class Entry extends IndexItem { + private Entry(DATA data) { + super(data); + } + } +} diff --git a/unsupervised/src/main/java/com/atl/mcod/MTreeClass.java b/unsupervised/src/main/java/com/atl/mcod/MTreeClass.java new file mode 100644 index 0000000..4cfd517 --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/MTreeClass.java @@ -0,0 +1,51 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package com.atl.mcod; + +import java.util.Set; +import com.atl.mcod.ComposedSplitFunction; +import com.atl.mcod.DistanceFunction; +import com.atl.mcod.DistanceFunctions; +import com.atl.mcod.MTree; +import com.atl.mcod.PartitionFunctions; +import com.atl.mcod.PromotionFunction; +import com.atl.mcod.utils.Data; +import com.atl.mcod.utils.Pair; +import com.atl.mcod.utils.Utils; + +/** + * + * @author thanasis + */ +class MTreeClass extends MTree { + + private static final PromotionFunction nonRandomPromotion = new PromotionFunction() { + @Override + public Pair process(Set dataSet, DistanceFunction distanceFunction) { + return Utils.minMax(dataSet); + } + }; + + MTreeClass() { + super(25, DistanceFunctions.EUCLIDEAN, new ComposedSplitFunction(nonRandomPromotion, + new PartitionFunctions.BalancedPartition())); + } + + public void add(Data data) { + super.add(data); + _check(); + } + + public boolean remove(Data data) { + boolean result = super.remove(data); + _check(); + return result; + } + + DistanceFunction getDistanceFunction() { + return distanceFunction; + } +}; \ No newline at end of file diff --git a/unsupervised/src/main/java/com/atl/mcod/PartitionFunction.java b/unsupervised/src/main/java/com/atl/mcod/PartitionFunction.java new file mode 100644 index 0000000..fc94ceb --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/PartitionFunction.java @@ -0,0 +1,27 @@ +package com.atl.mcod; + +import java.util.Set; + +import com.atl.mcod.utils.Pair; + +/** + * An object with partitions a set of data into two sub-sets. + * + * @param The type of the data on the sets. + */ +public interface PartitionFunction { + + /** + * Executes the partitioning. + * + * @param promoted The pair of data objects that will guide the partition + * process. + * @param dataSet The original set of data objects to be partitioned. + * @param distanceFunction A {@linkplain DistanceFunction distance function} + * to be used on the partitioning. + * @return A pair of partition sub-sets. Each sub-set must correspond to one + * of the {@code promoted} data objects. + */ + Pair> process(Pair promoted, Set dataSet, DistanceFunction distanceFunction); + +} diff --git a/unsupervised/src/main/java/com/atl/mcod/PartitionFunctions.java b/unsupervised/src/main/java/com/atl/mcod/PartitionFunctions.java new file mode 100644 index 0000000..416d5aa --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/PartitionFunctions.java @@ -0,0 +1,107 @@ +package com.atl.mcod; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import com.atl.mcod.utils.Pair; + +/** + * Some pre-defined implementations of {@linkplain PartitionFunction partition + * functions}. + */ +public final class PartitionFunctions { + + /** + * Don't let anyone instantiate this class. + */ + private PartitionFunctions() {} + + + /** + * A {@linkplain PartitionFunction partition function} that tries to + * distribute the data objects equally between the promoted data objects, + * associating to each promoted data objects the nearest data objects. + * + * @param The type of the data objects. + */ + public static class BalancedPartition implements PartitionFunction { + + /** + * Processes the balanced partition. + * + *

The algorithm is roughly equivalent to this: + *

+		 *     While dataSet is not Empty:
+		 *         X := The object in dataSet which is nearest to promoted.first
+		 *         Remove X from dataSet
+		 *         Add X to result.first
+		 *         
+		 *         Y := The object in dataSet which is nearest to promoted.second
+		 *         Remove Y from dataSet
+		 *         Add Y to result.second
+		 *         
+		 *     Return result
+		 * 
+ * + * @see mtree.PartitionFunction#process(mtree.utils.Pair, java.util.Set, mtree.DistanceFunction) + */ + @Override + public Pair> process( + final Pair promoted, + Set dataSet, + final DistanceFunction distanceFunction + ) + { + List queue1 = new ArrayList(dataSet); + // Sort by distance to the first promoted data + Collections.sort(queue1, new Comparator() { + @Override + public int compare(DATA data1, DATA data2) { + double distance1 = distanceFunction.calculate(data1, promoted.first); + double distance2 = distanceFunction.calculate(data2, promoted.first); + return Double.compare(distance1, distance2); + } + }); + + List queue2 = new ArrayList(dataSet); + // Sort by distance to the second promoted data + Collections.sort(queue2, new Comparator() { + @Override + public int compare(DATA data1, DATA data2) { + double distance1 = distanceFunction.calculate(data1, promoted.second); + double distance2 = distanceFunction.calculate(data2, promoted.second); + return Double.compare(distance1, distance2); + } + }); + + Pair> partitions = new Pair>(new HashSet(), new HashSet()); + + int index1 = 0; + int index2 = 0; + + while(index1 < queue1.size() || index2 != queue2.size()) { + while(index1 < queue1.size()) { + DATA data = queue1.get(index1++); + if(!partitions.second.contains(data)) { + partitions.first.add(data); + break; + } + } + + while(index2 < queue2.size()) { + DATA data = queue2.get(index2++); + if(!partitions.first.contains(data)) { + partitions.second.add(data); + break; + } + } + } + + return partitions; + } + } +} diff --git a/unsupervised/src/main/java/com/atl/mcod/PromotionFunction.java b/unsupervised/src/main/java/com/atl/mcod/PromotionFunction.java new file mode 100644 index 0000000..6c5e0d3 --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/PromotionFunction.java @@ -0,0 +1,25 @@ +package com.atl.mcod; + +import java.util.Set; + +import com.atl.mcod.utils.Pair; + +/** + * An object that chooses a pair from a set of data objects. + * + * @param The type of the data objects. + */ +public interface PromotionFunction { + + /** + * Chooses (promotes) a pair of objects according to some criteria that is + * suitable for the application using the M-Tree. + * + * @param dataSet The set of objects to choose a pair from. + * @param distanceFunction A function that can be used for choosing the + * promoted objects. + * @return A pair of chosen objects. + */ + Pair process(Set dataSet, DistanceFunction distanceFunction); + +} diff --git a/unsupervised/src/main/java/com/atl/mcod/PromotionFunctions.java b/unsupervised/src/main/java/com/atl/mcod/PromotionFunctions.java new file mode 100644 index 0000000..01cf00f --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/PromotionFunctions.java @@ -0,0 +1,37 @@ +package com.atl.mcod; + +import java.util.List; +import java.util.Set; + +import com.atl.mcod.utils.Pair; +import com.atl.mcod.utils.Utils; + +/** + * Some pre-defined implementations of {@linkplain PromotionFunction promotion + * functions}. + */ +public final class PromotionFunctions { + + /** + * Don't let anyone instantiate this class. + */ + private PromotionFunctions() {} + + + /** + * A {@linkplain PromotionFunction promotion function} object that randomly + * chooses ("promotes") two data objects. + * + * @param The type of the data objects. + */ + public static class RandomPromotion implements PromotionFunction { + @Override + public Pair process(Set dataSet, + DistanceFunction distanceFunction) + { + List promotedList = Utils.randomSample(dataSet, 2); + return new Pair(promotedList.get(0), promotedList.get(1)); + } + } + +} diff --git a/unsupervised/src/main/java/com/atl/mcod/SplitFunction.java b/unsupervised/src/main/java/com/atl/mcod/SplitFunction.java new file mode 100644 index 0000000..7399f07 --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/SplitFunction.java @@ -0,0 +1,62 @@ +package com.atl.mcod; + +import java.util.Set; + +import com.atl.mcod.utils.Pair; + +/** + * Defines an object to be used to split a node in an M-Tree. A node must be + * split when it has reached its maximum capacity and a new child node would be + * added to it. + * + *

The splitting consists in choosing a pair of "promoted" data objects from + * the children and then partition the set of children in two partitions + * corresponding to the two promoted data objects. + * + * @param The type of the data objects. + */ +public interface SplitFunction { + + /** + * Processes the splitting of a node. + * + * @param dataSet A set of data that are keys to the children of the node + * to be split. + * @param distanceFunction A {@linkplain DistanceFunction distance function} + * that can be used to help splitting the node. + * @return A {@link SplitResult} object with a pair of promoted data objects + * and a pair of corresponding partitions of the data objects. + */ + SplitResult process(Set dataSet, DistanceFunction distanceFunction); + + + /** + * An object used as the result for the + * {@link SplitFunction#process(Set, DistanceFunction)} method. + * + * @param The type of the data objects. + */ + public static class SplitResult { + + /** + * A pair of promoted data objects. + */ + public Pair promoted; + + /** + * A pair of partitions corresponding to the {@code promoted} data + * objects. + */ + public Pair> partitions; + + /** + * The constructor for a {@link SplitResult} object. + */ + public SplitResult(Pair promoted, Pair> partitions) { + this.promoted = promoted; + this.partitions = partitions; + } + + } + +} diff --git a/unsupervised/src/main/java/com/atl/mcod/utils/Constants.java b/unsupervised/src/main/java/com/atl/mcod/utils/Constants.java new file mode 100644 index 0000000..47206f5 --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/utils/Constants.java @@ -0,0 +1,98 @@ +package com.atl.mcod.utils; + +import com.atl.afaultdetection.utils.Measurement; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +public class Constants { + + public static boolean debug = false; + + public static double stamperLockedPosition = 2198; + public static int stabilizationPeriod = 10; + public static String resutlsDB = "BoostAnmlyDtctn"; + public static String resutlsDBTable = "anomalies2"; + + public static String maxRule; + public static String minRule; + public static String maxRule_k; + public static String minRule_k; + public static Set algorithms; + + public static double R = 0.05; + public static int k = 50; + public static int W = 3600; + public static int slide = 360; + public static double outlierLife = 0.1; //the life of the outlier expressed in percentage of total slides per window (i.e. how many slides should the outlier be active to be reported) + + public static double R_k = 0.05; + public static int k_k = 720; + public static double outlierLife_k = 0.1; + + public static boolean ssl = false; + public static boolean verbose = false; + +// public static double R = 0.15; +// public static int k = 400; +// public static int W = 3600; +// public static int slide = 360; +// public static double outlierLife = 0.7; //the life of the outlier expressed in percentage of total slides per window (i.e. how many slides should the outlier be active to be reported) +// +// public static double R_k = 0.08; +// public static int k_k = 720; +// public static double outlierLife_k = 0.1; //the life of the outlier expressed in percentage of total slides per window (i.e. how many slides should the outlier be active to be reported) + + public static String host; + public static String port; + public static String username; + public static String password; + public static String database; + + public static String sensors = ""; + + + public static Map measMap = new HashMap<>(); + public static Map measShortNameMap = new HashMap<>(); + static{ + measShortNameMap.put("oilLevel","A-ERWAERM1-PRESSE/BL:XXXX:OIL:G01O91M0A011:Oelstand Hintenoil level rear"); + measShortNameMap.put("stamperPosition","A-ERWAERM1-PRESSE/BG:MM:LENGTH:Positionmittel:Stoesselposition gemittelt mm"); + measShortNameMap.put("pressForce","A-ERWAERM1-PRESSE/CF:XXXX:XXXX:IF_CtrlToPlc_TCPIP_Slide_ActVal_TotalForce_F:Stoeselkraft ges amt total slide force kN"); + measShortNameMap.put("lowPressurePumps","A-ERWAERM1-PRESSE/BP:BAR:OIL:G01O91M0B02:Niederdruck Druckueberwachunglow pressur"); + measShortNameMap.put("highPressurePumps","A-ERWAERM1-PRESSE/BP:BAR:OIL:G01O91M0B01:Hochdruck Druckueberwachunghigh pressure"); + measShortNameMap.put("oilTemperature","A-ERWAERM1-PRESSE/BT:CELSIUS:OIL:G01O91M0A012:Oeltemperatur Hintenoil temperatur rear"); + measShortNameMap.put("productNumber","A-ERWAERM1-PRESSE/CF:XXXX:OTHER:PRODNR:Produktnummer"); + measShortNameMap.put("cylinderPressure1","A-ERWAERM1-PRESSE/BP:BAR:OIL:G09O91M0B111:Zylinder 1 Druck Acylinder 1 pressure A"); + measShortNameMap.put("cylinderPressure2","A-ERWAERM1-PRESSE/BP:BAR:OIL:G09O91M0B111:Zylinder 2 Druck Acylinder 2 pressure A"); + measShortNameMap.put("cylinderPressure3","A-ERWAERM1-PRESSE/BP:BAR:OIL:G09O91M0B111:Zylinder 3 Druck Acylinder 3 pressure A"); + measShortNameMap.put("cylinderPressure4","A-ERWAERM1-PRESSE/BP:BAR:OIL:G09O91M0B111:Zylinder 4 Druck Acylinder 4 pressure A"); + measShortNameMap.put("1_mo","Channel_1_MO"); + measShortNameMap.put("2_mo","Channel_2_MO"); + measShortNameMap.put("3_mo","Channel_3_MO"); + measShortNameMap.put("4_mo","Channel_4_MO"); + measShortNameMap.put("5_mo","Channel_5_MO"); + measShortNameMap.put("6_mo","Channel_6_MO"); + measShortNameMap.put("1_max","Channel_1_MAX"); + measShortNameMap.put("2_max","Channel_2_MAX"); + measShortNameMap.put("3_max","Channel_3_MAX"); + measShortNameMap.put("4_max","Channel_4_MAX"); + measShortNameMap.put("5_max","Channel_5_MAX"); + measShortNameMap.put("6_max","Channel_6_MAX"); + measShortNameMap.put("thick","Thickness"); + measShortNameMap.put("in_tmp","Temperature_in_casing"); + measShortNameMap.put("out_tmp","Temperature_out_casing"); + } + + public static void formSensors(){ + if(measMap.isEmpty()){ + System.err.println("No measurements provided!"); + System.exit(9); + } + for(String meas : measMap.keySet()){ + sensors += "," + meas; + } + //trim the first ',' + sensors = sensors.substring(1); + } + +} diff --git a/unsupervised/src/main/java/com/atl/mcod/utils/Data.java b/unsupervised/src/main/java/com/atl/mcod/utils/Data.java new file mode 100644 index 0000000..bfb8dcd --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/utils/Data.java @@ -0,0 +1,134 @@ +package com.atl.mcod.utils; + +import java.util.Random; + +import com.atl.mcod.DistanceFunctions.EuclideanCoordinate; +import java.time.Instant; + +public class Data implements EuclideanCoordinate, Comparable { + + private double[] values; + public final int hashCode; + + + //arrival time + private long arrivalTime; + private Instant actualTime; + private long outlier_start_time; + private long outlier_end_time; + + + + public Data(double... values) { + this.values = values; + + int hashCode2 = 1; + for(double value : values) { + hashCode2 = 31*hashCode2 + (int)value + (new Random()).nextInt(100000); + } + this.hashCode = hashCode2; + } + + @Override + public int dimensions() { + return values.length; + } + + @Override + public double get(int index) { + return values[index]; + } + + @Override + public int hashCode() { + return hashCode; + } + + @Override + public boolean equals(Object obj) { + if(obj instanceof Data) { + Data that = (Data) obj; + if(this.arrivalTime != that.arrivalTime) return false; + if(this.dimensions() != that.dimensions()) { + return false; + } + for(int i = 0; i < this.dimensions(); i++) { + if(this.values[i] != that.values[i]) { + return false; + } + } + return true; + } else { + return false; + } + } + + @Override + public int compareTo(Data that) { + int dimensions = Math.min(this.dimensions(), that.dimensions()); + for(int i = 0; i < dimensions; i++) { + double v1 = this.values[i]; + double v2 = that.values[i]; + if(v1 > v2) { + return +1; + } + if(v1 < v2) { + return -1; + } + } + + if(this.dimensions() > dimensions) { + return +1; + } + + if(that.dimensions() > dimensions) { + return -1; + } + + return 0; + } + + public double[] getValues() { + return values; + } + + public void setValues(double[] values) { + this.values = values; + } + + public long getArrivalTime() { + return arrivalTime; + } + + public void setArrivalTime(long arrivalTime) { + this.arrivalTime = arrivalTime; + } + + public long getOutlier_start_time() { + return outlier_start_time; + } + + public void setOutlier_start_time(long outlier_start_time) { + this.outlier_start_time = outlier_start_time; + } + + public long getOutlier_end_time() { + return outlier_end_time; + } + + public void setOutlier_end_time(long outlier_end_time) { + this.outlier_end_time = outlier_end_time; + } + + public Instant getActualTime() { + return actualTime; + } + + public void setActualTime(Object actualTime) { + this.actualTime = Instant.parse( actualTime.toString() ); + } + + + + +} \ No newline at end of file diff --git a/unsupervised/src/main/java/com/atl/mcod/utils/Pair.java b/unsupervised/src/main/java/com/atl/mcod/utils/Pair.java new file mode 100644 index 0000000..5e69f38 --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/utils/Pair.java @@ -0,0 +1,53 @@ +package com.atl.mcod.utils; + +/** + * A pair of objects of the same type. + * + * @param The type of the objects. + */ +public class Pair { + + /** + * The first object. + */ + public T first; + + + /** + * The second object. + */ + public T second; + + /** + * Creates a pair of {@code null} objects. + */ + public Pair() {} + + /** + * Creates a pair with the objects specified in the arguments. + * @param first The first object. + * @param second The second object. + */ + public Pair(T first, T second) { + this.first = first; + this.second = second; + } + + /** + * Accesses an object by its index. The {@link #first} object has index + * {@code 0} and the {@link #second} object has index {@code 1}. + * @param index The index of the object to be accessed. + * @return The {@link #first} object if {@code index} is {@code 0}; the + * {@link #second} object if {@code index} is {@code 1}. + * @throws IllegalArgumentException If {@code index} is neither {@code 0} + * or {@code 1}. + */ + public T get(int index) throws IllegalArgumentException { + switch(index) { + case 0: return first; + case 1: return second; + default: throw new IllegalArgumentException(); + } + } + +} diff --git a/unsupervised/src/main/java/com/atl/mcod/utils/Utils.java b/unsupervised/src/main/java/com/atl/mcod/utils/Utils.java new file mode 100644 index 0000000..e4034ca --- /dev/null +++ b/unsupervised/src/main/java/com/atl/mcod/utils/Utils.java @@ -0,0 +1,84 @@ +package com.atl.mcod.utils; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Random; + +/** + * Some utilities. + */ +public final class Utils { + + /** + * Don't let anyone instantiate this class. + */ + private Utils() {} + + + /** + * Identifies the minimum and maximum elements from an iterable, according + * to the natural ordering of the elements. + * @param items An {@link Iterable} object with the elements + * @param The type of the elements. + * @return A pair with the minimum and maximum elements. + */ + public static > Pair minMax(Iterable items) { + Iterator iterator = items.iterator(); + if(!iterator.hasNext()) { + return null; + } + + T min = iterator.next(); + T max = min; + + while(iterator.hasNext()) { + T item = iterator.next(); + if(item.compareTo(min) < 0) { + min = item; + } + if(item.compareTo(max) > 0) { + max = item; + } + } + + return new Pair(min, max); + } + + + /** + * Randomly chooses elements from the collection. + * @param collection The collection. + * @param n The number of elements to choose. + * @param The type of the elements. + * @return A list with the chosen elements. + */ + public static List randomSample(Collection collection, int n) { + List list = new ArrayList(collection); + List sample = new ArrayList(n); + Random random = new Random(); + while(n > 0 && !list.isEmpty()) { + int index = random.nextInt(list.size()); + sample.add(list.get(index)); + int indexLast = list.size() - 1; + T last = list.remove(indexLast); + if(index < indexLast) { + list.set(index, last); + } + n--; + } + return sample; + } + + + public static Integer[] removeFirstElement(Integer[] x){ + + Integer[] r= new Integer[x.length-1]; + for(int i = 1; i < x.length; i++){ + r[i-1] = x[i]; + } + return r; + } + +} diff --git a/unsupervised/src/main/java/com/atl/smartmaintenance/AFaultDetection.java b/unsupervised/src/main/java/com/atl/smartmaintenance/AFaultDetection.java new file mode 100644 index 0000000..875344a --- /dev/null +++ b/unsupervised/src/main/java/com/atl/smartmaintenance/AFaultDetection.java @@ -0,0 +1,204 @@ +package com.atl.smartmaintenance; + +import com.atl.smartmaintenance.faultdetection.FaultDetectionManager; +import com.atl.afaultdetection.utils.Measurement; +import com.atl.mcod.utils.Constants; +import java.util.HashSet; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * + * @author thanasis + */ +public class AFaultDetection { + + public static void main(String[] args) { + try { + readArguments(args); + FaultDetectionManager fdm = new FaultDetectionManager(); + Constants.formSensors(); + fdm.detectOutliers(Constants.measMap); + } catch (InterruptedException ex) { + Logger.getLogger(AFaultDetection.class.getName()).log(Level.SEVERE, null, ex); + } + } + + public static void readArguments(String[] args) { + for (int i = 0; i < args.length; i++) { + + //check if arg starts with -- + String arg = args[i]; + if (arg.indexOf("--") == 0) { + switch (arg) { + case "--R": + Constants.R = Double.valueOf(args[i + 1]); + break; + case "--W": + Constants.W = Integer.valueOf(args[i + 1]); + break; + case "--k": + Constants.k = Integer.valueOf(args[i + 1]); + break; + case "--slide": + Constants.slide = Integer.valueOf(args[i + 1]); + break; + case "--R_k": + Constants.R_k = Double.valueOf(args[i + 1]); + break; + case "--k_k": + Constants.k_k = Integer.valueOf(args[i + 1]); + break; + case "--meas": + for(String meas : args[i+1].split(",")){ + Constants.measMap.put(Constants.measShortNameMap.get(meas), new Measurement("\""+Constants.measShortNameMap.get(meas)+"\"")); + } + break; + case "--algorithms": + Set algorithms = new HashSet<>(); + for (String algo : args[i + 1].split(",")) { + algorithms.add(algo); + } + Constants.algorithms = algorithms; + break; + case "--stamperLockedPosition": + Constants.stamperLockedPosition = Double.valueOf(args[i + 1]); + break; + case "--stabilizationPeriod": + Constants.stabilizationPeriod = Integer.valueOf(args[i + 1]); + break; + case "--resultsDB": + Constants.resutlsDB = args[i + 1]; + break; + case "--resultsDBTable": + Constants.resutlsDBTable = args[i + 1]; + break; + case "--maxRule": + Constants.maxRule = args[i + 1]; + break; + case "--minRule": + Constants.minRule = args[i + 1]; + break; + case "--maxRule_k": + Constants.maxRule_k = args[i + 1]; + break; + case "--minRule_k": + Constants.minRule_k = args[i + 1]; + break; + case "--outlierLife": + Constants.outlierLife = Double.valueOf(args[i + 1]); + break; + case "--outlierLife_k": + Constants.outlierLife_k = Double.valueOf(args[i + 1]); + break; + case "--host": + Constants.host = args[i + 1]; + break; + case "--port": + Constants.port = args[i + 1]; + break; + case "--username": + Constants.username = args[i + 1]; + break; + case "--password": + Constants.password = args[i + 1]; + break; + case "--database": + Constants.database = args[i + 1]; + break; + case "--debug": + Constants.debug = Boolean.parseBoolean(args[i + 1]); + break; + case "--verbose": + Constants.verbose = Boolean.parseBoolean(args[i + 1]); + break; + case "--ssl": + Constants.ssl = Boolean.parseBoolean(args[i + 1]); + break; + case "--help": + System.out.println("Usage of aFaultDetection:"); + System.out.println("\t--host\n" + + "\t\tSpecify the influxDB host (hostname or ip address).\n" + + "\t--port\n" + + "\t\tSpecify the influxDB port.\n" + + "\t--username\n" + + "\t\tSpecify the influxDB username (optional). You will be prompt for a password if not specified \n" + + "\t\twith --passowrd.\n" + + "\t--password\n" + + "\t\tSpecify the influxDB password (optional).\n" + + "\t--database\n" + + "\t\tSpecify the influxDB database.\n" + + "\t--ssl\n" + + "\t\tSpecify whether to use ssl certification (https://) or not. ssl is used without verification.\n" + + "\t\t(default: false).\n" + + "\t--algorithms mcod,mcodKnowledge,ruleBased,ruleBasedKnowledge\n" + + "\t\tSpecify the used algorithms for the fault detection (multiple selections allowed separated with \n" + + "\t\tcomma (no spaces)): mcod, mcodKnowledge, ruleBased, ruleBasedKnowledge\n" + + "\t--meas oilLevel,lowPressurePumps,highPressurePumps,pressForce,stamperPosition,productNumber,oilTemperature,\n" + + "\t cylinderPressure1,cylinderPressure2,cylinderPressure3,cylinderPressure4\n" + + "\t\tSpecify the measurments to monitor (multiple selections allowed separated with comma (no spaces)): \n" + + "\t\toilLevel, lowPressurePumps, highPressurePumps, pressForce, stamperPosition, productNumber, \n" + + "\t\toilTemperature, cylinderPressure1, cylinderPressure2, cylinderPressure3, cylinderPressure4\n" + + "\t--R, --R_k\n" + + "\t\tSet the R radius of the mcod and mcodKnowledge algorithms, respectively. The higher the R the less \n" + + "\t\tthe outliers (default: R=0.15, R_k=0.08).\n" + + "\t--k, --k_k\n" + + "\t\tSet the minimum number of points inside radius R (R_k resp.) to form a cluster of inlier points for \n" + + "\t\tmcod and mcodKnowledge, respectively. The higher the k the more the outliers (default: k=400, k_k=720).\n" + + "\t--W\n" + + "\t\tSet the window size for both mcod and mcodKnowledge. Higher window sizes demand more \n" + + "\t\tprocessing power (default: 3600).\n" + + "\t--slide\n" + + "\t\tSet the slide size for both mcod and mcodKnowledge. Defines the movement of the window \n" + + "\t\t(i.e. the frequency of data fetching from influxDB) (default: 360).\n" + + "\t--resultsDB\n" + + "\t\tSpecify the database that the outliers are going to be reported \n" + + "\t\t(default: BoostAnmlyDtctn).\n" + + "\t--resultsDBTable\n" + + "\t\tSpecify the table name of the influxDB that the outliers are going to be reported \n" + + "\t\t(default: anomalies2).\n" + + "\t--verbose\n" + + "\t\tReport the detected outliers also in the stdout \n" + + "\t\t(default: false).\n" + + "\t--stamperLockedPoisition\n" + + "\t\tSet the position of the stamper in meters, where the stamper is considered locked. \n" + + "\t\tUsed only in the mcodKnowledge algorithm (default: 2198).\n" + + "\t--stabilizationPeriod\n" + + "\t\tSet the stabilization period in seconds where the oil is considered stabilized after the stamper is locked. \n" + + "\t\tUsed only in the mcodKnowledge algorithm (default: 10).\n" + + "\t--outlierLife, --outlierLife_k\n" + + "\t\tSet the life of an outlier for mcod and mcodKnowledge, respectively, expressed as percentage of total slides \n" + + "\t\tper window (i.e. how many slides should the outlier be active to be reported). The higher the outlierLife the \n" + + "\t\tless the reported outliers. (default: outlierLife=0.7, outlierLife_k=0.1).\n" + + "\t--minRule --minRule_k oilLevel-|lowPressurePumps-|highPressurePumps-|pressForce-\n" + + "\t |stamperPosition-|productNumber-|oilTemperature-|cylinderPressure1-\n" + + "\t |cylinderPressure2-|cylinderPressure3-|cylinderPressure4-\n" + + "\t\tSpecify a rule with minimum threshold e.g. --minRule oilLevel-8330 reports a fault if oilLevel<8330. \n" + + "\t\tIt can be used with --maxRule, OR condition is considered between them. --minRule_k is for the ruleBasedKnowledge\n" + + "\t--maxRule --maxRule_k oilLevel-|lowPressurePumps-|highPressurePumps-|pressForce-\n" + + "\t |stamperPosition-|productNumber-|oilTemperature-|cylinderPressure1-\n" + + "\t |cylinderPressure2-|cylinderPressure3-|cylinderPressure4-\n" + + "\t\tSpecify a rule with maximum threshold e.g. --maxRule oilLevel-9500 reports a fault if oilLevel>9500. \n" + + "\t\tIt can be used with --minRule, OR condition is considered between them. --maxRule_k is for the ruleBasedKnowledge"); + System.out.println("\nExamples:\n"); + System.out.println("\t# Monitor the oil level with mcodKnowledge and ruleBasedKnoledge with default parameters for \n" + + "\tmcodKnowledge and oilLevel<8400 rule for the ruleBasedKnowledge:\n" + + "\tjava -jar aFaultDetection-.jar --host localhost --port 8086 --database Axoom1 \n" + + "\t--algorithms mcodKnowledge,ruleBasedKnoledge --minRule_k oilLevel-8400\n\n" + + "\t# Monitor the oil level and low pressure pumps with mcod seting R=0.5 euclidean distance, k=600 points, \n" + + "\tW=5mins (360secs), slide=5secs:\n" + + "\tjava -jar aFaultDetection-.jar --host localhost --port 8086 --database Axoom1 --algorithms mcod --R 0.5 --k 600 --W 360 --slide 5\n\n" + + "\t# Monitor the oilLevel and lowPressurePums, connect to the influxDB using ssl, write the results to the BoostAnmlyDtctn database,\n " + + "\tanomalies2 Measurements/Table, use all the avalable monitoring approaches, providing all the possible parameters:\n" + + "\tjava -jar aFaultDetection-.jar --host localhost --port 8086 --database Axoom1 --ssl true --resultsDB BoostAnmlyDtctn\n" + + "\t--resultsDBTable anomalies2 --meas oilLevel,lowPressurePumps --algorithms mcod,ruleBased,mcodKnowledge,ruleBasedKnowledge \n" + + "\t--minRule oilLevel-9130 --minRule_k oilLevel-9330 --W 3600 --slide 360 --R 0.5 --k 22 --oulierLife 1 --R_k 0.12 --k_k 720 \n" + + "\t--outlierLife_k 1 --stamperLockedPosition 2198 --stabilizationPeriod 10 --verbose true"); + System.exit(0); + } + } + } + } + +} diff --git a/unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FD.java b/unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FD.java new file mode 100644 index 0000000..d5445f6 --- /dev/null +++ b/unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FD.java @@ -0,0 +1,185 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package com.atl.smartmaintenance.faultdetection; + +import com.atl.afaultdetection.utils.InfluxDBHandler; +import com.atl.mcod.MCOD; +import com.atl.afaultdetection.utils.Measurement; +import com.atl.afaultdetection.utils.Utils; +import com.atl.mcod.utils.Constants; +import com.atl.mcod.utils.Data; +import java.time.Instant; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + * + * @author thanasis + */ +public class FD { + + private MCOD mcod; + private InfluxDBHandler infDBH; + private double R; + private int k; + private int W; + private int slide; + private double outlierLife; + private String ruleMeas; + private Map measMap; + private boolean stamperPositionExplicitlyAdded; //MCOD with Knowledge needs stamper position to define if stamper is locked + private boolean productNumberExplicitlyAdded; //MCOD with Knowledge needs product number as for some products (like 3 and 12) the stamper is not clear where it is locked + private long currentTime; + + //Holds the data of each slide + //MCOD with knowledge is count-based not time based, hence the slideDataList should be processed only if it's size is equal to the slide size + private List slideDataList; + private List notProcessedDataList; + + public FD(Map measMap, InfluxDBHandler infDBH, double R, int k, int W, int slide, double outlierLife, boolean stamperPositionExplicitlyAdded, boolean productNumberExplicitlyAdded) { + this.R = R; + this.k = k; + this.W = W; + this.slide = slide; + this.outlierLife = outlierLife; + this.mcod = new MCOD(R, k, W, slide); + this.infDBH = infDBH; + this.stamperPositionExplicitlyAdded = stamperPositionExplicitlyAdded; + this.productNumberExplicitlyAdded = productNumberExplicitlyAdded; + this.measMap = measMap; + if (Constants.algorithms.contains("ruleBased")) { + this.ruleMeas = Utils.checkRuleBasedParameters(measMap); + } + this.slideDataList = new ArrayList(); + this.notProcessedDataList = new ArrayList(); + this.currentTime = 0; + } + + public void mcodNoKnowledge(Map>> slideDataIteratorsMap) { + + if(!notProcessedDataList.isEmpty()){ + if(notProcessedDataList.size() <= Constants.slide){ + slideDataList.addAll(notProcessedDataList); + notProcessedDataList.clear(); + currentTime += slideDataList.size(); + } else { + currentTime += Constants.slide; + mcodOutliers(mcod, notProcessedDataList.subList(0, Constants.slide-1), currentTime); + notProcessedDataList.subList(0, Constants.slide).clear(); + slideDataList.addAll(notProcessedDataList); + notProcessedDataList.clear(); + currentTime += slideDataList.size(); + } + } + + //loop until one of the iterator runs out of data. + //Since we have used group by and fill(linear) in the query it is expected all the iterators to have the same amount of data. + iteratorsLoop: + while (Utils.allIteratorsHaveNext(slideDataIteratorsMap)) { + Instant date = null;//it is used to mark the actual arrival date of the current data point. We have used precision of a second in the query so it is expected all the current points from different measurements to have the same timestamp + + boolean skipValues = false; + Map> valuesMap = new HashMap<>(); //holds the actual pair for each different measurment + for (Map.Entry>> entry : slideDataIteratorsMap.entrySet()) { + String key = entry.getKey(); + Map.Entry value = entry.getValue().next(); + + date = value.getKey(); + + //There might be a bug in the InfluxDB java library, as the last returned value is always null. + //Skip the null values. + if (value.getValue() == null) { + skipValues = true; //continue the outer loop + continue iteratorsLoop; + } + + //InfluxDB does not accept constraints in specific values of multi-measurement queries + //Hence the exclusion of zero oil level values (which is possibly a sensor fault) should be done manually. + if (value.getValue() == 0 && key.contains(Constants.measShortNameMap.get("oilLevel"))) { + currentTime++; + continue iteratorsLoop; + } + + //Ignore the values that are less than zero which is possibly caused by a sensor problem + if (value.getValue() < 0) { + currentTime++; + continue iteratorsLoop; + } + valuesMap.put(key, value); + } + if(skipValues){ + continue; + } + + //a flag to report data that violate a rule based threshold + boolean reportDataOutlierRuleBased = false; + + //The implementation of the MCOD expects all the data point values in an array format + //subtract the explicitly added fields + double[] d = new double[valuesMap.size()-(stamperPositionExplicitlyAdded && productNumberExplicitlyAdded?2:stamperPositionExplicitlyAdded?1:productNumberExplicitlyAdded?1:0)]; + int i = 0; + for (String key : valuesMap.keySet()) { + double value = valuesMap.get(key).getValue(); + if((key.contains(Constants.measShortNameMap.get("stamperPosition")) && stamperPositionExplicitlyAdded) || (key.contains(Constants.measShortNameMap.get("productNumber")) && productNumberExplicitlyAdded)){ + continue; + } + + if(Constants.algorithms.contains("ruleBased") && key.equals(ruleMeas)){ + reportDataOutlierRuleBased = Utils.checkRule("ruleBased",key,value,ruleMeas); + } + + Measurement meas = measMap.get(key); + d[i] = Utils.normalize(value, meas.getMinValue(), meas.getMaxValue()); + i++; + } + + //Data class is needed by the current MCOD implementation + Data data = new Data(d); + data.setActualTime(date); + data.setArrivalTime(++currentTime); //The sampling rate is 1 sec, hence to compute the currentTime we increment by 1 the time for each processed data point + slideDataList.add(data); + + if(reportDataOutlierRuleBased){ + infDBH.reportOutlier(data, "ruleBased",ruleMeas); + } + } + + if(slideDataList.size() > Constants.slide){ + notProcessedDataList.addAll(new ArrayList(slideDataList.subList(Constants.slide, slideDataList.size()))); + currentTime -= slideDataList.subList(Constants.slide, slideDataList.size()).size(); + slideDataList.subList(Constants.slide, slideDataList.size()).clear(); + } + + if(slideDataList.size() == Constants.slide){ + //Execute the MCOD algorithm + mcodOutliers(mcod, slideDataList, currentTime); + slideDataList.clear(); + } + } + + private void mcodOutliers(MCOD mcnew, List slideDataList, long currentTime) { + ArrayList outliers9 = mcnew.detectOutlier(slideDataList, currentTime, W, slide); + + //if (((double) mcnew.getDataListSize()) > k * 1.2) { //k*1.2 is a buffer for cold start + for (Data outlier : outliers9) { + double outlierLifeInSlides = 0; + if (outlier.getOutlier_end_time() != 0) { + outlierLifeInSlides = (outlier.getOutlier_end_time() / slide) - (outlier.getOutlier_start_time() / slide) + 1; + } else { + outlierLifeInSlides = (currentTime / slide) - (outlier.getOutlier_start_time() / slide) + 1; + } + if (outlierLifeInSlides >= ((double) W / slide) * outlierLife) { + if (outlier.getArrivalTime() > currentTime - W) { + infDBH.reportOutlier(outlier, "mcod", ""); + } + } + } + //} + } +} diff --git a/unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FDKnowledge.java b/unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FDKnowledge.java new file mode 100644 index 0000000..2dfe81c --- /dev/null +++ b/unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FDKnowledge.java @@ -0,0 +1,207 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package com.atl.smartmaintenance.faultdetection; + +import com.atl.afaultdetection.utils.InfluxDBHandler; +import com.atl.mcod.MCOD; +import com.atl.afaultdetection.utils.Measurement; +import com.atl.afaultdetection.utils.Utils; +import com.atl.mcod.utils.Constants; +import com.atl.mcod.utils.Data; +import java.time.Instant; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + * + * @author thanasis + */ +public class FDKnowledge { + + private MCOD mcod; + private InfluxDBHandler infDBH; + private double R; + private int k; + private int W; + private int slide; + private double outlierLife; + private String ruleMeas; + private Map measMap; + long currentTime; + + //Holds the data of each slide + //MCOD with knowledge is count-based not time based, hence the slideDataList should be processed only if it's size is equal to the slide size + private List slideDataList; + private List notProcessedDataList; + + public FDKnowledge(Map measMap, InfluxDBHandler infDBH, double R, int k, int W, int slide, double outlierLife) { + this.R = R; + this.k = k; + this.W = W; + this.slide = slide; + this.outlierLife = outlierLife; + this.mcod = new MCOD(R, k, W, slide); + this.infDBH = infDBH; + this.measMap = measMap; + if (Constants.algorithms.contains("ruleBasedKnowledge")) { + this.ruleMeas = Utils.checkRuleBasedParameters(measMap); + } + this.slideDataList = new ArrayList(); + this.notProcessedDataList = new ArrayList(); + this.currentTime = 0; + } + + public void mcodWithKnowledge(Map>> slideDataIteratorsMap) { + if(slideDataIteratorsMap.size() < 2){ + return; + } + + if(!notProcessedDataList.isEmpty()){ + if(notProcessedDataList.size() <= Constants.slide){ + slideDataList.addAll(notProcessedDataList); + notProcessedDataList.clear(); + currentTime += slideDataList.size(); + } else { + currentTime += Constants.slide; + mcodOutliers(mcod, notProcessedDataList.subList(0, Constants.slide-1), currentTime); + notProcessedDataList.subList(0, Constants.slide).clear(); + slideDataList.addAll(notProcessedDataList); + notProcessedDataList.clear(); + currentTime += slideDataList.size(); + } + } + + double previousStamperPosition = -1; + int stabilizationOffset = Constants.stabilizationPeriod; + + //loop until one of the iterator runs out of data. + //Since we have used group by and fill(linear) in the query it is expected all the iterators to have the same amount of data. + iteratorsLoop: + while (Utils.allIteratorsHaveNext(slideDataIteratorsMap)) { + Instant date = null;//it is used to mark the actual arrival date of the current data point. We have used precision of a second in the query so it is expected all the current points from different measurements to have the same timestamp + + boolean skipValues = false; + Map> valuesMap = new HashMap<>(); //holds the actual pair for each different measurment + for (Map.Entry>> entry : slideDataIteratorsMap.entrySet()) { + String key = entry.getKey(); + Map.Entry value = entry.getValue().next(); + + date = value.getKey(); +// System.out.println(date); + //There might be a bug in the InfluxDB java library, as there are some null values returned that actually don't exist. + //Skip the null values. + if (value.getValue() == null) { + skipValues = true; //continue the outer loop + } + + //If the stamper is locked (above a specific value) discard some values to be sure that the oil level is stabilized + if (key.contains(Constants.measShortNameMap.get("stamperPosition"))) { + if((value.getValue() >= Constants.stamperLockedPosition && previousStamperPosition < Constants.stamperLockedPosition) || stabilizationOffset < Constants.stabilizationPeriod){ + stabilizationOffset -= 1; + if(stabilizationOffset == 0){ + stabilizationOffset = Constants.stabilizationPeriod; + } else { + skipValues = true; //continue the outer loop + } + } + previousStamperPosition = value.getValue(); + } + + //InfluxDB does not accept constraints in specific values of multi-measurement queries + //Hence the exclusion of zero oil level values (which is possibly a sensor fault) should be done manually. + if (value.getValue() == 0 && key.contains(Constants.measShortNameMap.get("oilLevel"))) { + skipValues = true; //break the outer loop + } + + //Ignore the values that are less than zero possibly due to a sensor problem + if (value.getValue() < 0) { + skipValues = true; //continue the outer loop + } + + //Ignore products 12 and 3 as there is no clear stamper locked position for those + if (key.contains(Constants.measShortNameMap.get("productNumber")) && (value.getValue() == 12 || value.getValue() == 3)) { + skipValues = true; //continue the outer loop + } + + //Ignore values if the stamper is not locked (above a specific high) + if (key.contains(Constants.measShortNameMap.get("stamperPosition")) && value.getValue() < Constants.stamperLockedPosition) { + skipValues = true; //continue the outer loop + } + + valuesMap.put(key, value); + } + if(skipValues){ + continue; + } + + //a flag to report data that violate a rule based threshold + boolean reportDataOutlierRuleBased = false; + + //The implementation of the MCOD expects all the data point values in an array format + //Exclude stamperPosition and ProductNumber + double[] d = new double[valuesMap.size()-2]; + int i = 0; + for (String key : valuesMap.keySet()) { + double value = valuesMap.get(key).getValue(); + if(key.contains(Constants.measShortNameMap.get("stamperPosition")) || key.contains(Constants.measShortNameMap.get("productNumber"))){ + continue; + } + + if(Constants.algorithms.contains("ruleBasedKnowledge") && key.equals(ruleMeas)){ + reportDataOutlierRuleBased = Utils.checkRule("ruleBasedKnowledge", key, value, ruleMeas); + } + + Measurement meas = measMap.get(key); + d[i] = Utils.normalize(value, meas.getMinValue(), meas.getMaxValue()); + i++; + } + + + //Data class is needed by the current MCOD implementation + Data data = new Data(d); + data.setActualTime(date); + data.setArrivalTime(++currentTime); //The sampling rate is 1 sec, hence to compute the currentTime we increment by 1 the time for each processed data point. + slideDataList.add(data); + if(reportDataOutlierRuleBased){ + infDBH.reportOutlier(data, "ruleBasedKnowledge",ruleMeas); + } + } + + if(slideDataList.size() > Constants.slide){ + notProcessedDataList.addAll(new ArrayList(slideDataList.subList(Constants.slide, slideDataList.size()))); + currentTime -= slideDataList.subList(Constants.slide, slideDataList.size()).size(); + slideDataList.subList(Constants.slide, slideDataList.size()).clear(); + } + + if(slideDataList.size() == Constants.slide){ + //Execute the MCOD algorithm + mcodOutliers(mcod, slideDataList, currentTime); + slideDataList.clear(); + } + } + + private void mcodOutliers(MCOD mcnew, List slideDataList, long currentTime) { + ArrayList outliers9 = mcnew.detectOutlier(slideDataList, currentTime, W, slide); + //if (((double) mcnew.getDataListSize()) > k * 1.2) { //k*1.2 is a buffer for cold start + for (Data outlier : outliers9) { + double outlierLifeInSlides = 0; + if (outlier.getOutlier_end_time() != 0) { + outlierLifeInSlides = (outlier.getOutlier_end_time() / slide) - (outlier.getOutlier_start_time() / slide) + 1; + } else { + outlierLifeInSlides = (currentTime / slide) - (outlier.getOutlier_start_time() / slide) + 1; + } + if (outlierLifeInSlides >= ((double) W / slide) * outlierLife) { + if (outlier.getArrivalTime() > currentTime - W) { + infDBH.reportOutlier(outlier, "mcodKnowledge", ""); + } + } + } + //} + } +} diff --git a/unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FaultDetectionManager.java b/unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FaultDetectionManager.java new file mode 100644 index 0000000..ec6e318 --- /dev/null +++ b/unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FaultDetectionManager.java @@ -0,0 +1,193 @@ +package com.atl.smartmaintenance.faultdetection; + +import com.atl.afaultdetection.utils.InfluxDBHandler; +import com.atl.afaultdetection.utils.Measurement; +import com.atl.mcod.utils.Constants; +import java.io.Console; +import java.time.Instant; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Scanner; +import java.util.TreeMap; +import org.influxdb.dto.QueryResult; +import org.influxdb.dto.QueryResult.Series; + +/** + * + * @author thanasis + */ +public class FaultDetectionManager { + + /** + * @param args the command line arguments + */ + + public void detectOutliers(Map measMap) throws InterruptedException { + + FD fd = null; + FDKnowledge fdKnowledge = null; + boolean stamperPositionExplicitlyAdded = false; //MCOD with Knowledge needs stamper position to define if stamper is locked + boolean productNumberExplicitlyAdded = false; //MCOD with Knowledge needs product number as for some products (like 3 and 12) the stamper is not clear where it is locked + //Connect to the Influx DB creating a new InfluxDB handler + InfluxDBHandler infDBH = null; + String password = ""; + if(Constants.username != null){ + if(Constants.password == null){ + Scanner scanner = null; + Console console = System.console(); + if(console == null){ + scanner = new Scanner(System.in); + System.out.println("\nInfluxDB Password for "+Constants.username+": "); + password = scanner.next(); + } else { + password = new String(console.readPassword("\nInfluxDB Password for "+Constants.username+": ")); + } + } else { + password = Constants.password; + } + + } + infDBH = new InfluxDBHandler(Constants.host, Constants.port, Constants.database, Constants.resutlsDB, Constants.username, password, Constants.ssl); + + + if(Constants.algorithms.contains("mcodKnowledge")){ + //stamperPosition and productNumber are needed in the MCOD with knowledge algorithm, hence if not provided using the --meas flag the are added here + fdKnowledge = new FDKnowledge(measMap, infDBH, Constants.R_k, Constants.k_k, Constants.W, Constants.slide, Constants.outlierLife_k); + if(!measMap.containsKey("stamperPosition")){ + measMap.put(Constants.measShortNameMap.get("stamperPosition"), new Measurement("\""+Constants.measShortNameMap.get("stamperPosition")+"\"")); + stamperPositionExplicitlyAdded = true; + } + if(!measMap.containsKey("productNumber")){ + measMap.put(Constants.measShortNameMap.get("productNumber"), new Measurement("\""+Constants.measShortNameMap.get("productNumber")+"\"")); + productNumberExplicitlyAdded = true; + } + } + + if(Constants.algorithms.contains("mcod")){ + fd = new FD(measMap, infDBH, Constants.R, Constants.k, Constants.W, Constants.slide, Constants.outlierLife, stamperPositionExplicitlyAdded, productNumberExplicitlyAdded); + } + + //instatiate the minimum and maximum values for each measurement needed for the min-max normalization process + for(String measName : measMap.keySet()){ + boolean excludeZeroValues = measName.contains(Constants.measShortNameMap.get("oilLevel")) || measName.contains(Constants.measShortNameMap.get("stamperPosition")) || measName.contains(Constants.measShortNameMap.get("1_max")) + || measName.contains(Constants.measShortNameMap.get("2_max")) || measName.contains(Constants.measShortNameMap.get("3_max")) || measName.contains(Constants.measShortNameMap.get("4_max")) + || measName.contains(Constants.measShortNameMap.get("5_max")) || measName.contains(Constants.measShortNameMap.get("6_max")) + || measName.contains(Constants.measShortNameMap.get("in_tmp")) || measName.contains(Constants.measShortNameMap.get("thick")) + || measName.contains(Constants.measShortNameMap.get("out_tmp")); + boolean excludeNegativeValues = true; // we don't want negative values in any of the measurements + measMap.get(measName).setMinValue(infDBH.getMinValue(measMap.get(measName).getMeasurementDBName(),excludeZeroValues, excludeNegativeValues)); + measMap.get(measName).setMaxValue(infDBH.getMaxValue(measMap.get(measName).getMeasurementDBName())); + if(Constants.debug){ + System.out.println(measMap.get(measName).getMinValue()); + System.out.println(measMap.get(measName).getMaxValue()); + } + } + + //The offset used to fetch measurments from the database. It is instatiated to the slide size + long secs = Constants.slide; + + + + //form the query concatanating all the participating measurments + StringBuilder strb = new StringBuilder(); + int i = 0; + for(String measName : measMap.keySet()){ + strb.append(measMap.get(measName).getMeasurementDBName()); + if(i < measMap.size()-1){ + strb.append(","); + } + i++; + } + + while(true){ + QueryResult measurements = null; + if(Constants.debug){ + measurements = infDBH.getMeasurementsDebug(strb.toString(), secs, true); + } else { + measurements = infDBH.getMeasurements(strb.toString(), secs, false); + } + //influx DB can't syncronize by time measurments from different tables. + //rawFetchedMeasurements is used to transform the returned results to an easy to process form + Map> rawFetchedMeasurements = new HashMap<>(); + //processedFetchedMeasurements holds the final synced measurements + Map> processedFetchedMeasurements = new HashMap<>(); + + if(!measurements.hasError() && measurements.getResults().get(0).getSeries() != null){ + //Store all the value iterators to a HashMap + + for(Series series : measurements.getResults().get(0).getSeries()){ + for(List pair : series.getValues()){ + if(!rawFetchedMeasurements.containsKey(series.getName())){ + rawFetchedMeasurements.put(series.getName(), new TreeMap()); // use TreeMap to keep the measurements sorted by their time + } + if(pair.get(1) != null){ + //the first item of the pair is always the date (transformed into Instant), the second one is the value (transformed into double) + rawFetchedMeasurements.get(series.getName()).put(Instant.parse(pair.get(0).toString()), (Double) pair.get(1)); + } + } + } + + //filter the measurements based on the dates of the smallest series. + String smallerSeriesName = findSmallestMap(rawFetchedMeasurements); + for(String seriesName : rawFetchedMeasurements.keySet()){ + if(!seriesName.equals(smallerSeriesName)){ + for(Instant key : rawFetchedMeasurements.get(smallerSeriesName).keySet()){ + if(rawFetchedMeasurements.get(seriesName).containsKey(key)){ + if(!processedFetchedMeasurements.containsKey(seriesName)){ + processedFetchedMeasurements.put(seriesName, new TreeMap<>()); + } + processedFetchedMeasurements.get(seriesName).put(key, rawFetchedMeasurements.get(seriesName).get(key)); + } + } + } + } + + if(!processedFetchedMeasurements.isEmpty() || rawFetchedMeasurements.size() == 1){ + processedFetchedMeasurements.put(smallerSeriesName, rawFetchedMeasurements.get(smallerSeriesName)); + rawFetchedMeasurements.clear(); + + + Map>> slideDataIteratorsMap = new HashMap<>(); + Map>> slideDataIteratorsMapKnowledge = new HashMap<>(); + for(String seriesName : processedFetchedMeasurements.keySet()){ + slideDataIteratorsMap.put(seriesName,processedFetchedMeasurements.get(seriesName).entrySet().iterator()); + slideDataIteratorsMapKnowledge.put(seriesName,processedFetchedMeasurements.get(seriesName).entrySet().iterator()); + } + if(Constants.algorithms.contains("mcod")){ + fd.mcodNoKnowledge(slideDataIteratorsMap); + } + if(Constants.algorithms.contains("mcodKnowledge")){ + fdKnowledge.mcodWithKnowledge(slideDataIteratorsMapKnowledge); + } + } + } + if(Constants.debug){ + //Increment the secs for the query by a slide + secs += Constants.slide; +// if(secs > 6000000){ + if(secs > 2332800){ + System.out.println(Constants.resutlsDBTable + " finished"); + break; + } + } + if(!Constants.debug){ + Thread.sleep(Constants.slide*1000); + } + } + } + + private String findSmallestMap(Map> rawFetchedMeasurments) { + String smallestSeriesName = ""; + int min = Integer.MAX_VALUE; + for(String seriesName : rawFetchedMeasurments.keySet()){ + if(rawFetchedMeasurments.get(seriesName).size() < min){ + smallestSeriesName = seriesName; + min = rawFetchedMeasurments.get(seriesName).size(); + } + } + return smallestSeriesName; + } +} -- 2.2.2