Commit f19350bf40493228d5a842e024a13b8302e828dd
0 parents
Exists in
master
first commit
Showing 24 changed files with 4429 additions and 0 deletions
- supervised/Supervised_Predictor_v4.r
- unsupervised/pom.xml
- unsupervised/src/main/java/com/atl/afaultdetection/utils/InfluxDBHandler.java
- unsupervised/src/main/java/com/atl/afaultdetection/utils/Measurement.java
- unsupervised/src/main/java/com/atl/afaultdetection/utils/Utils.java
- unsupervised/src/main/java/com/atl/mcod/ComposedSplitFunction.java
- unsupervised/src/main/java/com/atl/mcod/DistanceFunction.java
- unsupervised/src/main/java/com/atl/mcod/DistanceFunctions.java
- unsupervised/src/main/java/com/atl/mcod/MCOD.java
- unsupervised/src/main/java/com/atl/mcod/MTree.java
- unsupervised/src/main/java/com/atl/mcod/MTreeClass.java
- unsupervised/src/main/java/com/atl/mcod/PartitionFunction.java
- unsupervised/src/main/java/com/atl/mcod/PartitionFunctions.java
- unsupervised/src/main/java/com/atl/mcod/PromotionFunction.java
- unsupervised/src/main/java/com/atl/mcod/PromotionFunctions.java
- unsupervised/src/main/java/com/atl/mcod/SplitFunction.java
- unsupervised/src/main/java/com/atl/mcod/utils/Constants.java
- unsupervised/src/main/java/com/atl/mcod/utils/Data.java
- unsupervised/src/main/java/com/atl/mcod/utils/Pair.java
- unsupervised/src/main/java/com/atl/mcod/utils/Utils.java
- unsupervised/src/main/java/com/atl/smartmaintenance/AFaultDetection.java
- unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FD.java
- unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FDKnowledge.java
- unsupervised/src/main/java/com/atl/smartmaintenance/faultdetection/FaultDetectionManager.java
supervised/Supervised_Predictor_v4.r
View file @
f19350b
... | ... | @@ -0,0 +1,703 @@ |
1 | +suppressMessages(library(CORElearn)) | |
2 | +suppressMessages(library(dplyr)) | |
3 | +suppressMessages(library(plyr)) | |
4 | +suppressMessages(library(data.table)) | |
5 | +suppressMessages(library(randomForest)) | |
6 | +suppressMessages(library(ggplot2)) | |
7 | +suppressMessages(library(grid)) | |
8 | +suppressMessages(library(argparser)) | |
9 | +suppressMessages(library(stringr)) | |
10 | + | |
11 | +export_ds_for_spm <- function(target_event,episodes_list,output){ | |
12 | + if (file.exists(output)) { | |
13 | + file.remove(output) | |
14 | + } | |
15 | + #output for HirateYamana | |
16 | + for(ep_index in (1:length(episodes_list))){ | |
17 | + ep = episodes_list[[ep_index]][ , !(names(episodes_list[[ep_index]]) %in% c("Timestamps"))] | |
18 | + ep_list = list() | |
19 | + for(i in (1:nrow(ep))){ | |
20 | + matches = which(ep[i,] %in% c(1)) | |
21 | + if(length(matches) == 0){ | |
22 | + next | |
23 | + } | |
24 | + line=paste(matches,collapse=" ") | |
25 | + ep_list[i] = line | |
26 | + } | |
27 | + if(length(ep_list) == 0){ | |
28 | + next | |
29 | + } | |
30 | + ep_list[length(ep_list)+1] = target_event | |
31 | + episode = "" | |
32 | + for(ep_lli in (1:length(ep_list))){ | |
33 | + if(length(ep_list[[ep_lli]]) > 0){ | |
34 | + index = paste(paste("<",ep_lli,sep=""),">",sep="") | |
35 | + if(episode == ""){ | |
36 | + episode = paste(index,ep_list[[ep_lli]],sep=" ") | |
37 | + } else { | |
38 | + episode = paste(episode,paste(index,ep_list[[ep_lli]],sep=" "),sep=" -1 ") | |
39 | + } | |
40 | + } | |
41 | + } | |
42 | + write(paste(episode,"-1 -2"),file=output,append=TRUE) | |
43 | + } | |
44 | +} | |
45 | + | |
46 | +remove_rare_events <- function(ds,target_event_frequency_proportion_rare){ | |
47 | + if(!csv){ | |
48 | + print("~~~~~~~APPLYING PREPROCESSING REMOVE RARE EVENTS~~~~~~~") | |
49 | + } | |
50 | + a = table(ds$Event_id) | |
51 | + target_event_frequency = a[names(a)==target_event][[1]] | |
52 | + rare_events = as.integer(names(a[a < target_event_frequency*target_event_frequency_proportion_rare])) | |
53 | + return(ds[!(ds$Event_id %in% rare_events),]) | |
54 | +} | |
55 | + | |
56 | +remove_frequent_events <- function(ds,max_event_frequency_proportion_frequent){ | |
57 | + if(!csv){ | |
58 | + print("~~~~~~~APPLYING PREPROCESSING REMOVE FREQUENT EVENTS~~~~~~~") | |
59 | + } | |
60 | + a = table(ds[!(ds$Event_id == target_event),]$Event_id) | |
61 | + max_freq = sort(a,decreasing = TRUE)[[1]] | |
62 | + frequent_events = as.integer(names(a[a > max_freq*max_event_frequency_proportion_frequent])) | |
63 | + #print(frequent_events) | |
64 | + return(ds[!(ds$Event_id %in% frequent_events),]) | |
65 | +} | |
66 | + | |
67 | +keep_only_first_occureness <- function(episodes_list){ | |
68 | + if(!csv){ | |
69 | + print("~~~~~~~APPLYING PREPROCESSING KEEP ONLY FIRST OCCURENESS~~~~~~~") | |
70 | + } | |
71 | + #for every episode in the episodes_list | |
72 | + for(ep_index in (1:length(episodes_list))){ | |
73 | + ep = episodes_list[[ep_index]] | |
74 | + #For every segment of each episode starting from the end up to the second segment. | |
75 | + #We need to keep only the 1st occurness of consequtive events, hence starting from the end is the easy way. | |
76 | + if(nrow(ep) < 2){ | |
77 | + next | |
78 | + } | |
79 | + for(i in (nrow(ep):2)){ | |
80 | + #as we deal with binary vectors, to find the indeces that both vectors have "1" we sum them and check for "2"s in the result | |
81 | + matches = which((ep[i,]+ep[i-1,]) %in% c(2)) | |
82 | + #replace the 1s with 0s in the matching positions of the segment that is closer to the end of the episode | |
83 | + ep[i,][c(matches)] = 0 | |
84 | + } | |
85 | + episodes_list[[ep_index]] = ep | |
86 | + } | |
87 | + return(episodes_list) | |
88 | +} | |
89 | + | |
90 | +mil_text <- function(milw,F_thres,episodes_list,b_length){ | |
91 | + if(!csv){ | |
92 | + print("~~~~~~~APPLYING PREPROCESSING MULTI INSTANCE LEARNING~~~~~~~") | |
93 | + } | |
94 | + window_df = data.frame(matrix(ncol = b_length+1, nrow = 0)) | |
95 | + | |
96 | + #for every episode in the episodes_list | |
97 | + for(ep_index in (1:length(episodes_list))){ | |
98 | + ep = episodes_list[[ep_index]] | |
99 | + if(nrow(ep) < 1){ | |
100 | + next | |
101 | + } | |
102 | + new_ep = data.frame(matrix(ncol = b_length+1, nrow = 0)) | |
103 | + i = 1 | |
104 | + while(i <= nrow(ep)){ | |
105 | + new_ep = rbind(new_ep,ep[i,]) | |
106 | + | |
107 | + if(ep[i,][b_length+1] >= F_thres && nrow(window_df) < milw){ | |
108 | + window_df = rbind(window_df,ep[i,]) | |
109 | + } | |
110 | + if((nrow(window_df) == milw || i == nrow(ep)) && nrow(window_df) > 0){ | |
111 | + mean = colMeans(window_df) | |
112 | + mean[mean > 0] = 1 | |
113 | + mf <- data.frame(as.list(mean)) | |
114 | + mf[1] = ep[i,][1] | |
115 | + mf[b_length+1] = ep[i,][b_length+1] | |
116 | + colnames(mf) = colnames(new_ep) | |
117 | + new_ep = rbind(new_ep,mf) | |
118 | + if(nrow(window_df) > 1){ | |
119 | + i = i - (nrow(window_df)-2) | |
120 | + } | |
121 | + window_df = data.frame(matrix(ncol = b_length+1, nrow = 0)) | |
122 | + } | |
123 | + i = i + 1 | |
124 | + } | |
125 | + episodes_list[[ep_index]] = new_ep | |
126 | + } | |
127 | + return(episodes_list) | |
128 | +} | |
129 | + | |
130 | +mil_image <- function(milw,F_thres,episodes_list,b_length){ | |
131 | + if(!csv){ | |
132 | + print("~~~~~~~APPLYING PREPROCESSING MULTI INSTANCE LEARNING~~~~~~~") | |
133 | + } | |
134 | + | |
135 | + #for every episode in the episodes_list | |
136 | + for(ep_index in (1:length(episodes_list))){ | |
137 | + ep = episodes_list[[ep_index]] | |
138 | + if(nrow(ep) < 1){ | |
139 | + next | |
140 | + } | |
141 | + new_ep = data.frame(matrix(ncol = b_length+1, nrow = 0)) | |
142 | + #a data.frame with the vectors that need to be averaged | |
143 | + window_df = data.frame(matrix(ncol = b_length+1, nrow = 0)) | |
144 | + i = 1 | |
145 | + while(i <= nrow(ep)){ | |
146 | + #new_ep = rbind(new_ep,ep[i,]) | |
147 | + if(nrow(window_df) < milw){ | |
148 | + window_df = rbind(window_df,ep[i,]) | |
149 | + } | |
150 | + if((nrow(window_df) == milw || i == nrow(ep)) && nrow(window_df) > 0){ | |
151 | + mean = colMeans(window_df) | |
152 | + mean[mean > 0] = 1 | |
153 | + mf = data.frame(as.list(mean)) | |
154 | + mf[1] = ep[i,][1] | |
155 | + mf[b_length+1] = ep[i,][b_length+1] | |
156 | + #colnames(mf) = colnames(new_ep) | |
157 | + new_ep = rbind(new_ep,mf) | |
158 | + if(window_df[1,][b_length+1] >= F_thres && nrow(window_df) > 1){ | |
159 | + i = i - (nrow(window_df)-1) | |
160 | + } | |
161 | + window_df = data.frame(matrix(ncol = b_length+1, nrow = 0)) | |
162 | + } | |
163 | + i = i + 1 | |
164 | + } | |
165 | + episodes_list[[ep_index]] = new_ep | |
166 | + } | |
167 | + return(episodes_list) | |
168 | +} | |
169 | + | |
170 | +#the Risk function | |
171 | +compute_F <- function(s,midpoint,t,ep_length){ | |
172 | + #s affects the steepness | |
173 | + # s <- 0.9 | |
174 | + return(1/(1+exp(s*(ep_length-midpoint-t)))) | |
175 | +} | |
176 | + | |
177 | +#convert event vectors to binary vectors | |
178 | +compute_frequency_vectors <- function(aggr_episode_df,b_length,s,midpoint){ | |
179 | + freq_aggr_episode_df <- data.frame(matrix(ncol = b_length+2, nrow = 0)) | |
180 | + x <- c(c("Timestamps"), c(paste("e_",c(1:b_length),sep = "")), c("Risk_F")) | |
181 | + # colnames(bin_aggr_episode_df) <- x | |
182 | + | |
183 | + for(i in 1:nrow(aggr_episode_df)) { | |
184 | + #init a vector with 3405 0s | |
185 | + freq_vector = as.vector(integer(b_length)) | |
186 | + seg <- aggr_episode_df[i,] | |
187 | + #if segment contains the j number, replace the 0 in the bin_vector with 1 | |
188 | + for(value in seg$x[[1]]){ | |
189 | + freq_vector[[value]] = length(which(seg$x[[1]] == value)) | |
190 | + } | |
191 | + #add a new line to the bin_aggr_epissode_df | |
192 | + #we use a matrix holding the elements of the new_data.frame as matrix is able to store variable of different data types | |
193 | + F = compute_F(s,midpoint,i-1,nrow(aggr_episode_df)) | |
194 | + if(midpoint >= nrow(aggr_episode_df)){ | |
195 | + F = 0 | |
196 | + } | |
197 | + date = seg$Timeframe[[1]] | |
198 | + new_df = data.frame(matrix(c(date, freq_vector,F),nrow=1,ncol=b_length+2)) | |
199 | + freq_aggr_episode_df <- rbind(freq_aggr_episode_df,new_df) | |
200 | + } | |
201 | + # x <- c(c("Timestamps"), c(paste("e_",c(1:3405))), c("Risk_F")) | |
202 | + colnames(freq_aggr_episode_df) <- x | |
203 | + return(freq_aggr_episode_df) | |
204 | +} | |
205 | + | |
206 | +create_episodes_list <- function(ds,target_event,b_length,s,midpoint,test_mode){ | |
207 | + if(!csv){ | |
208 | + print("~~~~~~~CREATING FREQUENCY VECTORS AND BINARIZE THEM~~~~~~~") | |
209 | + } | |
210 | + #devide in episodes | |
211 | + target_event_spotted = FALSE | |
212 | + #a list with data.frames for the episodes (each episode one data.frame) | |
213 | + episodes_list = list() | |
214 | + #data.frame for episodes | |
215 | + episode_df <- data.frame(Timestamps=as.POSIXct(character()),Event_id=integer()) | |
216 | + #iterate over every line of the original dataset | |
217 | + for(i in 1:nrow(ds)) { | |
218 | + #get the current row of the ds | |
219 | + meas <- ds[i,] | |
220 | + #If it is the target event enable the appropriate flag | |
221 | + if((meas$Event_id == target_event)){ | |
222 | + target_event_spotted = TRUE | |
223 | + } | |
224 | + #fill the episode data.frame with the events that are between two target events | |
225 | + if(meas$Event_id != target_event && target_event_spotted){ | |
226 | + episode_df <- rbind(episode_df,data.frame(Timestamps=meas$Timestamps, Event_id=meas$Event_id)) | |
227 | + } else if(meas$Event_id == target_event && target_event_spotted && is.data.frame(episode_df) && nrow(episode_df) != 0){ | |
228 | + #a second occurness of the target event is spotted, close the episode | |
229 | + #target_event_spotted = FALSE | |
230 | + #aggregate by day all the events to form the segments inside the episodes | |
231 | + aggr_episode_df = aggregate(episode_df[ ,2], FUN=function(x){return(x)}, by=list(Timeframe=cut(as.POSIXct(episode_df$Timestamps, format="%Y-%m-%dT%H:%M:%OSZ"),"hour"))) #%Y-%m-%dT%H:%M:%OSZ | |
232 | + #binarize the frequncy vector | |
233 | + bin_aggr_episode_df = compute_frequency_vectors(aggr_episode_df,b_length,s,midpoint) | |
234 | + | |
235 | + #Remove event 0, which does not provide any info KOUGKA | |
236 | + bin_aggr_episode_df = bin_aggr_episode_df[ , !(names(bin_aggr_episode_df) %in% c("e_1"))] | |
237 | + | |
238 | + #add the episode to the episodes_list | |
239 | + episodes_list[[length(episodes_list)+1]] = bin_aggr_episode_df | |
240 | + #reset episode_df to en empty data.frame | |
241 | + episode_df <- data.frame(Timestamps=as.POSIXct(character()),Event_id=integer()) | |
242 | + } else if(meas$Event_id == target_event && target_event_spotted && is.data.frame(episode_df) && nrow(episode_df) == 0 && test_mode){ | |
243 | + freq_vector = as.vector(integer(b_length)) | |
244 | + new_df = data.frame(matrix(c(0, freq_vector,0),nrow=1,ncol=b_length+2)) | |
245 | + episode_df <- rbind(episode_df,new_df) | |
246 | + | |
247 | + x <- c(c("Timestamps"), c(paste("e_",c(1:b_length),sep = "")), c("Risk_F")) | |
248 | + colnames(episode_df) <- x | |
249 | + episode_df = episode_df[ , !(names(episode_df) %in% c("e_1"))] | |
250 | + | |
251 | + #add the episode to the episodes_list | |
252 | + episodes_list[[length(episodes_list)+1]] = episode_df | |
253 | + #reset episode_df to en empty data.frame | |
254 | + episode_df <- data.frame(Timestamps=as.POSIXct(character()),Event_id=integer()) | |
255 | + } | |
256 | + } | |
257 | + return(episodes_list) | |
258 | +} | |
259 | + | |
260 | +preprocess <- function(ds,TEST_DATA,REMOVE_RARE_EVENTS,REMOVE_FREQUENT_EVENTS,KEEP_ONLY_FIRST_OCCURENESS,MULTI_INSTANCE_LEARNING_TEXT,MULTI_INSTANCE_LEARNING_IMAGE,FEATURE_SELECTION,top_features,s,midpoint,b_length,target_event,target_event_frequency_proportion_rare,max_event_frequency_proportion_frequent,w,F_thres,test_mode){ | |
261 | + | |
262 | + #Remove events that appear < n times. We consider n = (target event frequency)/2 | |
263 | + if(REMOVE_RARE_EVENTS){ | |
264 | + ds<-remove_rare_events(ds,target_event_frequency_proportion_rare) | |
265 | + } | |
266 | + | |
267 | + #Remove events that appear < n times. We consider n = (target event frequency)/2 | |
268 | + if(REMOVE_FREQUENT_EVENTS){ | |
269 | + ds<-remove_frequent_events(ds,max_event_frequency_proportion_frequent) | |
270 | + } | |
271 | + | |
272 | + episodes_list = create_episodes_list(ds,target_event,b_length,s,midpoint,test_mode) | |
273 | + #if(length(episodes_list) == 0){ | |
274 | + # return() | |
275 | + #} | |
276 | + | |
277 | + #binarize the vector | |
278 | + for(ep_index in (1:length(episodes_list))){ | |
279 | + ep = episodes_list[[ep_index]] | |
280 | + ep[2:(ncol(ep)-1)][ep[2:(ncol(ep)-1)] > 0] = 1 | |
281 | + episodes_list[[ep_index]] = ep | |
282 | + } | |
283 | + | |
284 | + # keep only the first occurness of event in consecutive segments | |
285 | + if(KEEP_ONLY_FIRST_OCCURENESS){ | |
286 | + episodes_list <- keep_only_first_occureness(episodes_list) | |
287 | + } | |
288 | + | |
289 | + # Multi-instance learning to increase the pattern frequency | |
290 | + if(MULTI_INSTANCE_LEARNING_TEXT){ | |
291 | + episodes_list <- mil_text(w,F_thres,episodes_list,b_length) | |
292 | + } else if(MULTI_INSTANCE_LEARNING_IMAGE){ | |
293 | + episodes_list <- mil_image(w,F_thres,episodes_list,b_length) | |
294 | + } | |
295 | + return(episodes_list) | |
296 | +} | |
297 | + | |
298 | +feature_selection <- function(merged_episodes,top_features){ | |
299 | + estReliefF <- attrEval(Risk_F ~ ., merged_episodes, estimator="RReliefFexpRank", ReliefIterations=50) | |
300 | + sorted_indeces = order(estReliefF, decreasing = TRUE) | |
301 | + merged_episodes = merged_episodes %>% select(sorted_indeces[1:top_features],ncol(merged_episodes)) | |
302 | + return(merged_episodes) | |
303 | +} | |
304 | + | |
305 | +read_dataset <- function(path){ | |
306 | + dataset = read.table(path, header = TRUE, sep = ",", dec = ".", comment.char = "#") | |
307 | + dataset[, 2] <- as.numeric(dataset[, 2]) | |
308 | + return(dataset) | |
309 | +} | |
310 | + | |
311 | +read_dataset_cross <- function(path){ | |
312 | + dataset = read.table(path, header = TRUE, sep = ",", dec = ".", comment.char = "#") | |
313 | + dataset[, 2] <- as.numeric(dataset[, 2]) | |
314 | + | |
315 | + aggr_episode_df = aggregate(dataset, FUN=function(x){return(x)}, by=list(cut(as.POSIXct(dataset$Timestamps, format="%Y-%m-%dT%H:%M:%OSZ"),"day"))) #%Y-%m-%dT%H:%M:%OSZ | |
316 | + | |
317 | + training = sample_n(aggr_episode_df,(2*nrow(aggr_episode_df)/3)) | |
318 | + | |
319 | + testing = setDT(aggr_episode_df)[!training, on="Group.1"] | |
320 | + | |
321 | + testing = testing[, !"Group.1"] | |
322 | + testing_df = data.frame(Timestamps=unlist(testing[,1][1][[1]]),Event_id=unlist(testing[,2][1][[1]])) | |
323 | + for(i in 2:nrow(testing)){ | |
324 | + testing_df = rbind(testing_df,data.frame(Timestamps=unlist(testing[,1][i][[1]]),Event_id=unlist(testing[,2][i][[1]]))) | |
325 | + } | |
326 | + | |
327 | + training = as.data.table(training)[, !"Group.1"] | |
328 | + training_df = data.frame(Timestamps=unlist(training[,1][1][[1]]),Event_id=unlist(training[,2][1][[1]])) | |
329 | + for(i in 2:nrow(training)){ | |
330 | + training_df = rbind(training_df,data.frame(Timestamps=unlist(training[,1][i][[1]]),Event_id=unlist(training[,2][i][[1]]))) | |
331 | + } | |
332 | + | |
333 | + return(list("training" = training_df, "testing"=testing_df)) | |
334 | +} | |
335 | + | |
336 | +read_dataset_cross2 <- function(path,form){ | |
337 | + dataset = read.table(path, header = TRUE, sep = ",", dec = ".", comment.char = "#") | |
338 | + dataset[, 2] <- as.numeric(dataset[, 2]) | |
339 | + if(form == 1){ | |
340 | + training_df = dataset[1:(2*nrow(dataset)/3),] | |
341 | + testing_df = dataset[(2*nrow(dataset)/3):nrow(dataset),] | |
342 | + } else if (form == 2){ | |
343 | + training_df = dataset[(nrow(dataset)/3):nrow(dataset),] | |
344 | + testing_df = dataset[1:(nrow(dataset)/3),] | |
345 | + } else { | |
346 | + training_df = rbind(dataset[1:(nrow(dataset)/3),],dataset[(2*nrow(dataset)/3):nrow(dataset),]) | |
347 | + testing_df = dataset[(nrow(dataset)/3+1):(2*nrow(dataset)/3),] | |
348 | + } | |
349 | + return(list("training" = training_df, "testing"=testing_df)) | |
350 | +} | |
351 | + | |
352 | +find_next_fake_episodes <- function(ep_index,test_episodes_list){ | |
353 | + fake_episodes_vec = vector() | |
354 | + cnt = 0 | |
355 | + if(ep_index < length(test_episodes_list)){ | |
356 | + for(i in ((ep_index+1):length(test_episodes_list))){ | |
357 | + n_ep = test_episodes_list[i] | |
358 | + if(nrow(n_ep[[1]]) == 1 && n_ep[[1]]$Timestamps == 0){ | |
359 | + cnt = cnt + 1 | |
360 | + fake_episodes_vec <- c(fake_episodes_vec, i) | |
361 | + } else { | |
362 | + return(fake_episodes_vec) | |
363 | + } | |
364 | + } | |
365 | + } | |
366 | + return(fake_episodes_vec) | |
367 | +} | |
368 | + | |
369 | +find_next_close_episodes <- function(ep_index,test_episodes_list,pred_index){ | |
370 | + ep_length = nrow(test_episodes_list[ep_index][[1]]) | |
371 | + remaining_hours = max_warning_interval-(ep_length-pred_index) | |
372 | + | |
373 | + nc_episodes_vec = vector() | |
374 | + cnt = 0 | |
375 | + while (remaining_hours > 0 && ep_index < length(test_episodes_list)) { | |
376 | + ep_index = ep_index + 1 | |
377 | + n_ep = test_episodes_list[ep_index] | |
378 | + if(nrow(n_ep[[1]]) == 1 && n_ep[[1]]$Timestamps == 0){ | |
379 | + next | |
380 | + } | |
381 | + n_ep_length = nrow(n_ep[[1]]) | |
382 | + if(n_ep_length <= remaining_hours){ | |
383 | + remaining_hours = remaining_hours-n_ep_length | |
384 | + if(remaining_hours >= 0){ | |
385 | + cnt = cnt + 1 | |
386 | + nc_episodes_vec <- c(nc_episodes_vec, ep_index) | |
387 | + } | |
388 | + } else{ | |
389 | + return(nc_episodes_vec) | |
390 | + } | |
391 | + } | |
392 | + return(nc_episodes_vec) | |
393 | +} | |
394 | + | |
395 | +eval <- function(train_episodes,test_episodes_list,seed){ | |
396 | + set.seed(seed) | |
397 | + my.rf = randomForest(Risk_F ~ .,data=train_episodes,importance=TRUE) | |
398 | + #varImpPlot(my.rf) | |
399 | + false_positives = 0 | |
400 | + tp_vec = vector() | |
401 | + fp_vec = vector() | |
402 | + fn_vec = vector() | |
403 | + true_positives = 0 | |
404 | + false_negatives = 0 | |
405 | + ep_index = 1 | |
406 | + while (ep_index <= length(test_episodes_list)) { | |
407 | + ep = test_episodes_list[[ep_index]] | |
408 | + fake_episodes_vec = find_next_fake_episodes(ep_index,test_episodes_list) | |
409 | + fake_ep_cnt = length(fake_episodes_vec) | |
410 | + | |
411 | + ep = ep[ , !(names(ep) %in% c("Timestamps"))] | |
412 | + Prediction <- predict(my.rf, ep) | |
413 | + ep_legth = length(Prediction) | |
414 | + pred_indeces = as.numeric(names(Prediction[Prediction >= acceptance_threshold])) | |
415 | + predicted_next_episodes = 0 | |
416 | + if(length(pred_indeces) > 0){ | |
417 | + pred_index = tail(pred_indeces, n=1) | |
418 | + nc_episodes_vec = find_next_close_episodes(ep_index,test_episodes_list,pred_index) | |
419 | + | |
420 | + predicted_next_episodes = length(nc_episodes_vec) | |
421 | + } | |
422 | + | |
423 | + | |
424 | + if(length(pred_indeces[pred_indeces < (ep_legth-(max_warning_interval))]) > 0){ | |
425 | + fp_reps = length(pred_indeces[pred_indeces < (ep_legth-(max_warning_interval))]) | |
426 | + false_positives = false_positives + fp_reps + (fake_ep_cnt*fp_reps) | |
427 | + fp_vec = c(fp_vec,rep(ep_index,fp_reps),rep(fake_episodes_vec,fp_reps)) | |
428 | + } | |
429 | + if(length(pred_indeces[pred_indeces >= (ep_legth-(max_warning_interval)) & pred_indeces <= (ep_legth-min_warning_interval)]) > 0){ | |
430 | + true_positives = true_positives + 1 + fake_ep_cnt + predicted_next_episodes | |
431 | + tp_vec = c(tp_vec,ep_index,fake_episodes_vec,nc_episodes_vec) | |
432 | + } else { | |
433 | + false_negatives = false_negatives + 1 + fake_ep_cnt | |
434 | + fn_vec = c(fn_vec,ep_index,fake_episodes_vec) | |
435 | + } | |
436 | + ep_index = ep_index + 1 + fake_ep_cnt + predicted_next_episodes | |
437 | + } | |
438 | + | |
439 | + precision = true_positives/(true_positives+false_positives) | |
440 | + if((true_positives+false_positives) == 0){ | |
441 | + precision = 0 | |
442 | + } | |
443 | + recall = true_positives/length(test_episodes_list) | |
444 | + | |
445 | + F1 = 2*((precision*recall)/(precision+recall)) | |
446 | + if((precision+recall) == 0){ | |
447 | + F1 = 0 | |
448 | + } | |
449 | + if(!csv){ | |
450 | + cat(paste("dataset:",argv$test,"\ntrue_positives:", true_positives,"\nfalse_positives:", false_positives,"\nfalse_negatives:", false_negatives,"\nprecision:", precision,"\nrecall:", recall,"\nF1:", F1, "\ntp:",paste(as.character(tp_vec), sep="", collapse=","), "\nfp:",paste(as.character(fp_vec), sep="", collapse=","), "\nfn:",paste(as.character(fn_vec), sep="", collapse=","))) | |
451 | + } else { | |
452 | + cat(paste(argv$test,",",true_positives,",",false_positives,",",false_negatives,",",precision,",",recall,",",F1,",",argv$fet,",",argv$tet,",",argv$rre,",",argv$rfe,",",argv$kofe,",",argv$mili,",",argv$milt,",",argv$fs,",",argv$top,",",argv$rer,",",argv$fer,",",argv$seed,",",argv$steepness,",",argv$pthres,",",argv$milw,",",argv$milthres,",",argv$midpoint,",",argv$minwint,",",argv$maxwint,",",argv$cross,",tp,",paste(as.character(tp_vec), sep="", collapse=","),",fp,",paste(as.character(fp_vec), sep="", collapse=","),",fn,",paste(as.character(fn_vec), sep="", collapse=","),"\n",sep="")) | |
453 | + } | |
454 | + return(my.rf) | |
455 | +} | |
456 | + | |
457 | +plot <- function(test_episodes_list, episode_index, my.rf){ | |
458 | + test_episodes = test_episodes_list[[episode_index]][ , !(names(test_episodes_list[[episode_index]]) %in% c("Timestamps"))] | |
459 | + Prediction <- predict(my.rf, test_episodes) | |
460 | + results = data.frame(Risk_F=test_episodes$Risk_F,num_Prediction=as.numeric(Prediction)) | |
461 | + mse = mean((Prediction-test_episodes$Risk_F)^2) | |
462 | + | |
463 | + chart =ggplot(results,aes((1:nrow(results)))) + | |
464 | + # geom_rect(aes(xmin = ceiling(nrow(df_test)/2), xmax = nrow(df_test), ymin = -Inf, ymax = Inf), | |
465 | + # fill = "yellow", alpha = 0.003) + | |
466 | + geom_line(aes(y = Risk_F, colour = "Actual")) + | |
467 | + geom_line(aes(y = num_Prediction, colour="Predicted")) + | |
468 | + labs(colour="Lines") + | |
469 | + xlab("Segments") + | |
470 | + ylab('Risk (F)') + | |
471 | + ggtitle("Risk Prediction") + # (RR_KF_2YEARS_PAT08) | |
472 | + theme(plot.title = element_text(hjust = 0.5)) + | |
473 | + geom_text(aes(label = paste("MSE=",round(mse,3)), x = 20, y = 1), hjust = -2, vjust = 6, color="black", size=4) #add MSE label | |
474 | + | |
475 | + # Disable clip-area so that the MSE is shown in the plot | |
476 | + gt <- ggplot_gtable(ggplot_build(chart)) | |
477 | + gt$layout$clip[gt$layout$name == "panel"] <- "off" | |
478 | + grid.draw(gt) | |
479 | +} | |
480 | + | |
481 | + | |
482 | +p <- arg_parser("Implementation of the AIRBUS Predictor") | |
483 | +# Add a positional argument | |
484 | +p <- add_argument(p, "id", help="experiment ID") | |
485 | +p <- add_argument(p, "train", help="training dataset") | |
486 | +p <- add_argument(p, "test", help="test dataset") | |
487 | +p <- add_argument(p, "fet", help="different types of the fault events",default=151) | |
488 | +p <- add_argument(p, "tet", help="type of the target fault events",default=151) | |
489 | +p <- add_argument(p, "--rre", help="remove rare events", default=FALSE) | |
490 | +p <- add_argument(p, "--rfe", help="remove frequent events", default=FALSE) | |
491 | +p <- add_argument(p, "--kofe", help="keep only first event", default=FALSE) | |
492 | +p <- add_argument(p, "--milt", help="MIL as written in the text of the paper", default=FALSE) | |
493 | +p <- add_argument(p, "--mili", help="MIL as shonw in the Figure of the paper", default=FALSE) | |
494 | +p <- add_argument(p, "--milthres", help="MIL threshold to the sigmoid function for over-sampling", default=0.6) | |
495 | +p <- add_argument(p, "--steepness", help="steepness of the sigmoid function", default=0.8) | |
496 | +p <- add_argument(p, "--midpoint", help="midpoint of the sigmoid function (in days)", default=4) | |
497 | +p <- add_argument(p, "--fs", help="apply feature selection", default=FALSE) | |
498 | +p <- add_argument(p, "--top", help="# of features to keep in feature selection", default=200) | |
499 | +p <- add_argument(p, "--rer", help="rare events ratio of the target event frequency", default=0.2) | |
500 | +p <- add_argument(p, "--fer", help="frequent events ratio of the frequency of the most frequent event", default=0.8) | |
501 | +p <- add_argument(p, "--milw", help="MIL window size (in days)", default=4) | |
502 | +p <- add_argument(p, "--pthres", help="prediction threshold to the Risk value for a true positive episode", default=0.4) | |
503 | +p <- add_argument(p, "--seed", help="seed for RF", default=500) | |
504 | +p <- add_argument(p, "--csv", help="output for csv", default=FALSE) | |
505 | + | |
506 | + | |
507 | +p <- add_argument(p, "--spme", help="export datasets for sequential pattern minning", default=FALSE) | |
508 | +p <- add_argument(p, "--java", help="the java path", default="/usr/bin/java") | |
509 | +p <- add_argument(p, "--python", help="the java path", default="/usr/bin/python") | |
510 | +p <- add_argument(p, "--cep", help="the java path", default="/media/thanasis/Storage/ATLANTIS/0_Ensembled_Predictive_Solution_EPS/spm_rules.py") | |
511 | +p <- add_argument(p, "--spmf", help="the spmf path", default="/media/thanasis/Storage/ATLANTIS/0_Ensembled_Predictive_Solution_EPS/spmf.jar") | |
512 | +p <- add_argument(p, "--conf", help="minimum support (minsup)", default="20%") | |
513 | +p <- add_argument(p, "--minti", help="minimum time interval allowed between two succesive itemsets of a sequential pattern", default=1) | |
514 | +p <- add_argument(p, "--maxti", help="maximum time interval allowed between two succesive itemsets of a sequential pattern", default=5) | |
515 | +p <- add_argument(p, "--minwi", help="minimum time interval allowed between the first itemset and the last itemset of a sequential pattern", default=1) | |
516 | +p <- add_argument(p, "--maxwi", help="maximum time interval allowed between the first itemset and the last itemset of a sequential pattern", default=11) | |
517 | +p <- add_argument(p, "--minwint", help="min # of days before failure to expect a warning for true positive decision", default=1) | |
518 | +p <- add_argument(p, "--maxwint", help="max # of days before failure to expect a warning for true positive decision", default=8) | |
519 | + | |
520 | +p <- add_argument(p, "--cross", help="cross validation", default=3) | |
521 | +p <- add_argument(p, "--form", help="form", default=1) | |
522 | + | |
523 | + | |
524 | + | |
525 | +argv = data.frame() | |
526 | +if( length(commandArgs(trailingOnly = TRUE)) != 0){ | |
527 | + argv <- parse_args(p) | |
528 | +} else { | |
529 | + argv <- parse_args(p,c(1,"/home/thanasis/Desktop/Atlantis/zBreak/Philips_AE_prediction/full_stops/all_channels/training/events1P5_LouvainPhilips_ch1_6405te_training.csv","/home/thanasis/Desktop/Atlantis/zBreak/Philips_AE_prediction/full_stops/all_channels/training/events1P5_LouvainPhilips_ch1_6405te_training.csv",6405,6405)) | |
530 | +} | |
531 | + | |
532 | + | |
533 | +TEST_DATA = FALSE | |
534 | +id = argv$id | |
535 | +REMOVE_RARE_EVENTS = argv$rre | |
536 | +REMOVE_FREQUENT_EVENTS = argv$rfe | |
537 | +KEEP_ONLY_FIRST_OCCURENESS = argv$kofe | |
538 | +MULTI_INSTANCE_LEARNING_TEXT = argv$milt #MIL as explained in the text | |
539 | +MULTI_INSTANCE_LEARNING_IMAGE = argv$mili #MIL as presented in the figure | |
540 | +FEATURE_SELECTION = argv$fs | |
541 | +top_features = argv$top | |
542 | +target_event_frequency_proportion_rare = argv$rer | |
543 | +max_event_frequency_proportion_frequent = argv$fer | |
544 | +milw = argv$milw | |
545 | +F_thres = argv$milthres | |
546 | +s = argv$steepness | |
547 | +midpoint = argv$midpoint | |
548 | +target_event = argv$tet | |
549 | +b_length = argv$fet | |
550 | +acceptance_threshold = argv$pthres | |
551 | +export_spm = argv$spme | |
552 | +seed = argv$seed | |
553 | +csv = argv$csv | |
554 | +max_warning_interval = argv$maxwint | |
555 | +min_warning_interval = argv$minwint | |
556 | + | |
557 | +CROSS = argv$cross | |
558 | + | |
559 | +if(CROSS == 1){ | |
560 | + #set.seed(argv$seed) | |
561 | + d = read_dataset_cross2(argv$train,argv$form) | |
562 | + training_set = d$training | |
563 | + test_set = d$testing | |
564 | +} else if(CROSS == 2) { | |
565 | + set.seed(argv$seed) | |
566 | + d = read_dataset_cross(argv$train,argv$form) | |
567 | + training_set = d$training | |
568 | + test_set = d$testing | |
569 | +} else { | |
570 | + training_set = read_dataset(argv$train) | |
571 | + test_set = read_dataset(argv$test) | |
572 | +} | |
573 | + | |
574 | +episodes_list <- preprocess(training_set,TEST_DATA,REMOVE_RARE_EVENTS,REMOVE_FREQUENT_EVENTS,KEEP_ONLY_FIRST_OCCURENESS,MULTI_INSTANCE_LEARNING_TEXT,MULTI_INSTANCE_LEARNING_IMAGE,FEATURE_SELECTION,top_features,s,midpoint,b_length,target_event,target_event_frequency_proportion_rare,max_event_frequency_proportion_frequent,milw,F_thres,FALSE) | |
575 | + | |
576 | +#keep only 2/3 of the list | |
577 | +if(CROSS == 3){ | |
578 | + episodes_list = episodes_list[1:(2*length(episodes_list)/3)] | |
579 | +} else if(CROSS == 4){ | |
580 | + episodes_list = episodes_list[((length(episodes_list)/3)+1):length(episodes_list)] | |
581 | +} else if(CROSS == 5) { | |
582 | + episodes_list = episodes_list[-(((length(episodes_list)/3)+1):((2*length(episodes_list)/3)))] | |
583 | +} | |
584 | + | |
585 | +#merge episodes | |
586 | +merged_episodes = ldply(episodes_list, data.frame) | |
587 | +merged_episodes = merged_episodes[ , !(names(merged_episodes) %in% c("Timestamps"))] | |
588 | + | |
589 | + | |
590 | +if(FEATURE_SELECTION){ | |
591 | + #remove columns with all values equal to zero | |
592 | + merged_episodes = merged_episodes[, colSums(merged_episodes != 0) > 0] | |
593 | + merged_episodes = feature_selection(merged_episodes,top_features) | |
594 | +} | |
595 | + | |
596 | +TEST_DATA = TRUE | |
597 | +REMOVE_RARE_EVENTS = FALSE | |
598 | +REMOVE_FREQUENT_EVENTS = FALSE | |
599 | +KEEP_ONLY_FIRST_OCCURENESS = FALSE | |
600 | +MULTI_INSTANCE_LEARNING_TEXT = FALSE #MIL as explained in the text | |
601 | +MULTI_INSTANCE_LEARNING_IMAGE = FALSE #MIL as presented in the figure | |
602 | +FEATURE_SELECTION = FALSE | |
603 | +test_episodes_list <- preprocess(test_set,TEST_DATA,REMOVE_RARE_EVENTS,REMOVE_FREQUENT_EVENTS,KEEP_ONLY_FIRST_OCCURENESS,MULTI_INSTANCE_LEARNING_TEXT,MULTI_INSTANCE_LEARNING_IMAGE,FEATURE_SELECTION,top_features,s,midpoint,b_length,target_event,target_event_frequency_proportion_rare,max_event_frequency_proportion_frequent,milw,F_thres,TRUE) | |
604 | + | |
605 | +#keep last 1/3 of episodes | |
606 | +if(CROSS == 3){ | |
607 | + test_episodes_list = test_episodes_list[((2*length(test_episodes_list)/3)+1):length(test_episodes_list)] | |
608 | +} else if(CROSS == 4){ | |
609 | + test_episodes_list = test_episodes_list[1:(length(test_episodes_list)/3)] | |
610 | +} else if(CROSS == 5) { | |
611 | + test_episodes_list = test_episodes_list[(((length(test_episodes_list)/3)+1):((2*length(test_episodes_list)/3)))] | |
612 | +} | |
613 | + | |
614 | +my.rf = eval(merged_episodes,test_episodes_list,seed) | |
615 | + | |
616 | +# for(s in (0:6)){ | |
617 | +# my.rf = eval(merged_episodes,test_episodes_list,seed) | |
618 | +# seed = seed + 1 | |
619 | +# } | |
620 | + | |
621 | +# for(ep in 1:length(test_episodes_list)){ | |
622 | +# jpeg(paste(ep,'_rplot.jpg')) | |
623 | +# plot(test_episodes_list,ep,my.rf) | |
624 | +# dev.off() | |
625 | +# } | |
626 | + | |
627 | +if(export_spm){ | |
628 | + if(!csv){ | |
629 | + print("~~~~~~~SEQUENTIAL PATTERN MINING~~~~~~~") | |
630 | + } | |
631 | + spm_train_path = gsub(".csv",paste("_spm_",id,".csv",sep=""),argv$train) | |
632 | + spm_test_path = gsub(".csv",paste("_spm_",id,".csv",sep=""),argv$test) | |
633 | + spm_results_path = gsub(".csv",paste("_results_",id,".csv",sep=""),argv$train) | |
634 | + confidence = argv$conf | |
635 | + min_dist_seq = argv$minti | |
636 | + max_dist_seq = argv$maxti | |
637 | + min_dist_first_last = argv$minwi | |
638 | + max_dist_first_last = argv$maxwi | |
639 | + java_path = argv$java | |
640 | + jspmf_path = argv$spmf | |
641 | + python_path = argv$python | |
642 | + cep_path = argv$cep | |
643 | + max_warning_interval = argv$maxwint | |
644 | + min_warning_interval = argv$minwint | |
645 | + export_ds_for_spm(target_event,episodes_list,spm_train_path) | |
646 | + export_ds_for_spm(target_event,test_episodes_list,spm_test_path) | |
647 | + | |
648 | + if (file.exists(spm_results_path)) { | |
649 | + invisible(file.remove(spm_results_path)) | |
650 | + } | |
651 | + | |
652 | + javaOutput <- system(paste(java_path,"-jar",jspmf_path,"run HirateYamana",spm_train_path,spm_results_path,confidence,min_dist_seq,max_dist_seq,min_dist_first_last,max_dist_first_last), intern = TRUE) | |
653 | + #print(javaOutput) | |
654 | + | |
655 | + pythonOutput <- system(paste(python_path,cep_path,spm_results_path,spm_test_path,target_event), intern = TRUE) | |
656 | + #print(pythonOutput) | |
657 | + true_positives = 0 | |
658 | + false_positives = 0 | |
659 | + false_negatives = 0 | |
660 | + total_failures = 0 | |
661 | + d = 0 | |
662 | + | |
663 | + warnings = list() | |
664 | + for(w in pythonOutput){ | |
665 | + d = as.integer(str_extract(w, "\\-*\\d+\\.*\\d*")) | |
666 | + if(!grepl("Failure",w,fixed=TRUE)){ | |
667 | + warnings = c(warnings,d) | |
668 | + } else { | |
669 | + total_failures = total_failures + 1 | |
670 | + if(length(warnings) == 0){ | |
671 | + false_negatives = false_negatives + 1 | |
672 | + } else { | |
673 | + if(length(warnings[warnings < d-max_warning_interval]) > 0){ | |
674 | + false_positives = false_positives + length(warnings[warnings < d-max_warning_interval]) | |
675 | + } | |
676 | + if(length(warnings[warnings >= (d-max_warning_interval)]) > 0 & length(warnings[warnings <= (d-min_warning_interval)]) > 0){ | |
677 | + true_positives = true_positives + 1 | |
678 | + } else { | |
679 | + false_negatives = false_negatives + 1 | |
680 | + } | |
681 | + } | |
682 | + warnings = list() | |
683 | + } | |
684 | + } | |
685 | + | |
686 | + precision = true_positives/(true_positives+false_positives) | |
687 | + if((true_positives+false_positives) == 0){ | |
688 | + precision = 0 | |
689 | + } | |
690 | + | |
691 | + recall = true_positives/total_failures | |
692 | + | |
693 | + F1 = 2*((precision*recall)/(precision+recall)) | |
694 | + if((precision+recall) == 0){ | |
695 | + F1 = 0 | |
696 | + } | |
697 | + | |
698 | + if(!csv){ | |
699 | + cat(paste("dataset:",argv$test,"\ntrue_positives:", true_positives,"\nfalse_positives:", false_positives,"\nfalse_negatives:", false_negatives,"\nprecision:", precision,"\nrecall:", recall,"\nF1:", F1, "\n")) | |
700 | + } else { | |
701 | + cat(paste(argv$test,",", true_positives,",", false_positives,",", false_negatives,",", precision,",", recall,",", F1,",",argv$conf,",",argv$minti,",",argv$maxti,",",argv$minwi,",",argv$maxwi,",",argv$minwint,",",argv$maxwint, "\n",sep="")) | |
702 | + } | |
703 | +} |
unsupervised/pom.xml
View file @
f19350b
... | ... | @@ -0,0 +1,53 @@ |
1 | +<?xml version="1.0" encoding="UTF-8"?> | |
2 | +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | |
3 | + <modelVersion>4.0.0</modelVersion> | |
4 | + <groupId>com</groupId> | |
5 | + <artifactId>aFaultDetection</artifactId> | |
6 | + <version>5.0</version> | |
7 | + <packaging>jar</packaging> | |
8 | + <properties> | |
9 | + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | |
10 | + <maven.compiler.source>1.8</maven.compiler.source> | |
11 | + <maven.compiler.target>1.8</maven.compiler.target> | |
12 | + <mainClass>com.atl.smartmaintenance.AFaultDetection</mainClass> | |
13 | + </properties> | |
14 | + <dependencies> | |
15 | + <dependency> | |
16 | + <groupId>org.influxdb</groupId> | |
17 | + <artifactId>influxdb-java</artifactId> | |
18 | + <version>2.10</version> | |
19 | + <type>jar</type> | |
20 | + </dependency> | |
21 | + | |
22 | + </dependencies> | |
23 | + | |
24 | + <build> | |
25 | + <plugins> | |
26 | + <plugin> | |
27 | + <artifactId>maven-assembly-plugin</artifactId> | |
28 | + <configuration> | |
29 | + <appendAssemblyId>false</appendAssemblyId> | |
30 | + <archive> | |
31 | + <manifest> | |
32 | + <mainClass>${mainClass}</mainClass> | |
33 | + </manifest> | |
34 | + </archive> | |
35 | + <descriptorRefs> | |
36 | + <descriptorRef>jar-with-dependencies</descriptorRef> | |
37 | + </descriptorRefs> | |
38 | + </configuration> | |
39 | + <executions> | |
40 | + <execution> | |
41 | + <id>make-assembly</id> <!-- this is used for inheritance merges --> | |
42 | + <phase>package</phase> <!-- bind to the packaging phase --> | |
43 | + <goals> | |
44 | + <goal>single</goal> | |
45 | + </goals> | |
46 | + </execution> | |
47 | + </executions> | |
48 | + </plugin> | |
49 | + </plugins> | |
50 | + </build> | |
51 | + | |
52 | + | |
53 | +</project> | |
0 | 54 | \ No newline at end of file |
unsupervised/src/main/java/com/atl/afaultdetection/utils/InfluxDBHandler.java
View file @
f19350b
... | ... | @@ -0,0 +1,203 @@ |
1 | +package com.atl.afaultdetection.utils; | |
2 | + | |
3 | +import com.atl.mcod.utils.Data; | |
4 | +import com.atl.mcod.utils.Constants; | |
5 | +import java.security.KeyManagementException; | |
6 | +import java.security.NoSuchAlgorithmException; | |
7 | +import java.security.cert.CertificateException; | |
8 | +import java.time.Duration; | |
9 | +import java.util.HashMap; | |
10 | +import java.util.List; | |
11 | +import java.util.Map; | |
12 | +import java.util.concurrent.TimeUnit; | |
13 | +import java.util.logging.Level; | |
14 | +import java.util.logging.Logger; | |
15 | +import javax.net.ssl.HostnameVerifier; | |
16 | +import javax.net.ssl.SSLContext; | |
17 | +import javax.net.ssl.SSLSession; | |
18 | +import javax.net.ssl.TrustManager; | |
19 | +import javax.net.ssl.X509TrustManager; | |
20 | +import okhttp3.OkHttpClient; | |
21 | +import org.influxdb.InfluxDB; | |
22 | +import org.influxdb.InfluxDBFactory; | |
23 | +import org.influxdb.dto.Point; | |
24 | +import org.influxdb.dto.Query; | |
25 | +import org.influxdb.dto.QueryResult; | |
26 | + | |
27 | +/** | |
28 | + * | |
29 | + * @author thanasis | |
30 | + */ | |
31 | +public class InfluxDBHandler { | |
32 | + | |
33 | + private InfluxDB influxDB; | |
34 | + private String database; | |
35 | + private String resultsDatabase; | |
36 | + private Map<String, Data> lastReportedMap; //to avoid multiple reports | |
37 | + | |
38 | +// public InfluxDBHandler(String host, String port, String username, String password, String database) { | |
39 | +// OkHttpClient.Builder builder = new OkHttpClient.Builder() | |
40 | +// .readTimeout(100, TimeUnit.SECONDS).connectTimeout(1000, TimeUnit.SECONDS); | |
41 | +// influxDB = InfluxDBFactory.connect("https://" + host + ":" + port, username, password, builder); | |
42 | +// influxDB.setDatabase(database); | |
43 | +// this.database = database; | |
44 | +// this.lastReportedMap = new HashMap<>(); | |
45 | +// } | |
46 | + | |
47 | + public InfluxDBHandler(String host, String port, String database, String resutlsDatabase, String username, String password, boolean ssl) { | |
48 | + try { | |
49 | + OkHttpClient.Builder builder = null; | |
50 | + String http = "http://"; | |
51 | + if(ssl){ | |
52 | + // Create a trust manager that does not validate certificate chains | |
53 | + final TrustManager[] trustAllCerts = new TrustManager[]{ | |
54 | + new X509TrustManager() { | |
55 | + @Override | |
56 | + public void checkClientTrusted(java.security.cert.X509Certificate[] chain, String authType) throws CertificateException { | |
57 | + } | |
58 | + | |
59 | + @Override | |
60 | + public void checkServerTrusted(java.security.cert.X509Certificate[] chain, String authType) throws CertificateException { | |
61 | + } | |
62 | + | |
63 | + @Override | |
64 | + public java.security.cert.X509Certificate[] getAcceptedIssuers() { | |
65 | + return new java.security.cert.X509Certificate[]{}; | |
66 | + } | |
67 | + } | |
68 | + }; | |
69 | + | |
70 | + // Install the all-trusting trust manager | |
71 | + SSLContext sc = SSLContext.getInstance("SSL"); | |
72 | + sc.init(null, trustAllCerts, new java.security.SecureRandom()); | |
73 | + | |
74 | + builder = new OkHttpClient.Builder() | |
75 | + .sslSocketFactory(sc.getSocketFactory(), (X509TrustManager) trustAllCerts[0]).readTimeout(100, TimeUnit.SECONDS).connectTimeout(1000, TimeUnit.SECONDS); | |
76 | + builder.hostnameVerifier(new HostnameVerifier() { | |
77 | + @Override | |
78 | + public boolean verify(String hostname, SSLSession session) { | |
79 | + return true; | |
80 | + } | |
81 | + }); | |
82 | + http = "https://"; | |
83 | + } else { | |
84 | + builder = new OkHttpClient.Builder() | |
85 | + .readTimeout(100, TimeUnit.SECONDS).connectTimeout(1000, TimeUnit.SECONDS); | |
86 | + http = "http://"; | |
87 | + } | |
88 | + if(username == null || username.equals("")){ | |
89 | + influxDB = InfluxDBFactory.connect(http + host + ":" + port, builder); | |
90 | + } else { | |
91 | + influxDB = InfluxDBFactory.connect(http + host + ":" + port, username, password, builder); | |
92 | + } | |
93 | + influxDB.setDatabase(database); | |
94 | + this.database = database; | |
95 | + this.resultsDatabase = resutlsDatabase; | |
96 | + | |
97 | + if(!dbExists(this.resultsDatabase)){ | |
98 | + influxDB.query(new Query("create database " + this.resultsDatabase + " with name \"autogen\"",resutlsDatabase)); | |
99 | + } | |
100 | + | |
101 | + this.lastReportedMap = new HashMap<>(); | |
102 | + } catch (NoSuchAlgorithmException ex) { | |
103 | + Logger.getLogger(InfluxDBHandler.class.getName()).log(Level.SEVERE, null, ex); | |
104 | + } catch (KeyManagementException ex) { | |
105 | + Logger.getLogger(InfluxDBHandler.class.getName()).log(Level.SEVERE, null, ex); | |
106 | + } | |
107 | + } | |
108 | + | |
109 | + public InfluxDB getInfluxDBHandler() { | |
110 | + return influxDB; | |
111 | + } | |
112 | + | |
113 | + public QueryResult getMeasurements(String measurementName, long seconds, boolean excludeZeroValues){ | |
114 | + if(influxDB != null){ | |
115 | + influxDB.setDatabase(this.database); | |
116 | + Query query = new Query("SELECT mean(value) FROM "+measurementName+" WHERE time > now() - "+seconds+"s and time <= now() "+(excludeZeroValues?"and value != 0":"")+" group by time(1s) fill(linear)", database); | |
117 | + QueryResult results = influxDB.query(query); | |
118 | + return results; | |
119 | + } else { | |
120 | + System.err.print("No DB connection"); | |
121 | + } | |
122 | + return new QueryResult(); | |
123 | + } | |
124 | + //for debugging | |
125 | + public QueryResult getMeasurementsDebug(String measurementName, long seconds, boolean excludeZeroValues) { | |
126 | + if (influxDB != null) { | |
127 | + influxDB.setDatabase(this.database); | |
128 | +// Query query = new Query("SELECT mean(value) FROM " + measurementName + " WHERE time > '2018-01-25T00:00:01.004Z' + " + (seconds - Constants.slide) + "s and time <= '2018-01-25T00:00:01.004Z' + " + seconds + "s " + (excludeZeroValues ? "and value != 0" : "") + " group by time(1s) fill(linear)", database); | |
129 | + //Query query = new Query("SELECT mean(value) FROM " + measurementName + " WHERE time > '2018-07-05T00:00:01.004Z' + " + (seconds - Constants.slide) + "s and time <= '2018-07-05T00:00:01.004Z' + " + seconds + "s " + (excludeZeroValues ? "and value != 0" : "") + " group by time(1s) fill(linear)", database); | |
130 | + //FOR AE TESTING | |
131 | + Query query = new Query("SELECT value AS \"mean\" FROM " + measurementName + " WHERE time > '2018-08-19T00:00:01.004Z' + " + (seconds - Constants.slide) + "s and time <= '2018-08-19T00:00:01.004Z' + " + seconds + "s " /*+ (excludeZeroValues ? "and value != 0" : "") + ""*/, database); | |
132 | + //FOR THICK TESTING | |
133 | + //Query query = new Query("SELECT value AS \"mean\" FROM " + measurementName + " WHERE time > '2018-10-14T00:00:01.004Z' + " + (seconds - Constants.slide) + "s and time <= '2018-10-14T00:00:01.004Z' + " + seconds + "s " /*+ (excludeZeroValues ? "and value != 0" : "") + ""*/, database); | |
134 | + QueryResult results = influxDB.query(query); | |
135 | + return results; | |
136 | + } else { | |
137 | + System.err.print("No DB connection"); | |
138 | + } | |
139 | + return new QueryResult(); | |
140 | + } | |
141 | + | |
142 | + public Double getMinValue(String measurementName, boolean excludeZeroValues, boolean excludeNegativeValues) { | |
143 | + if (influxDB != null) { | |
144 | + influxDB.setDatabase(this.database); | |
145 | + Query query = new Query("SELECT min(value) FROM " + measurementName + (excludeNegativeValues && excludeZeroValues ? " where value > 0" : excludeZeroValues ? " where value != 0" : excludeNegativeValues ? " where value >= 0" : ""), database); | |
146 | + QueryResult results = influxDB.query(query); | |
147 | + return (Double) results.getResults().get(0).getSeries().get(0).getValues().get(0).get(1); | |
148 | + } else { | |
149 | + System.err.print("No DB connection"); | |
150 | + } | |
151 | + return null; | |
152 | + } | |
153 | + | |
154 | + public Double getMaxValue(String measurementName) { | |
155 | + if (influxDB != null) { | |
156 | + influxDB.setDatabase(this.database); | |
157 | + Query query = new Query("SELECT max(value) FROM " + measurementName, database); | |
158 | + QueryResult results = influxDB.query(query); | |
159 | + return (Double) results.getResults().get(0).getSeries().get(0).getValues().get(0).get(1); | |
160 | + } else { | |
161 | + System.err.print("No DB connection"); | |
162 | + } | |
163 | + return null; | |
164 | + } | |
165 | + | |
166 | + public void reportOutlier(Data outlier, String label, String ruleMeas) { | |
167 | + influxDB.setDatabase(this.resultsDatabase); | |
168 | + //don't report outliers with distance lower than 5mins | |
169 | +// if (lastReportedMap.containsKey(label)) { | |
170 | +// if (Duration.between(lastReportedMap.get(label).getActualTime(), outlier.getActualTime()).toMinutes() < 5 ) { | |
171 | +// return; | |
172 | +// } | |
173 | +// } | |
174 | + if(Constants.debug || Constants.verbose){ | |
175 | + //System.out.println(label + " " + outlier.getActualTime().toString()); | |
176 | + } | |
177 | + | |
178 | + influxDB.setRetentionPolicy("autogen"); | |
179 | + influxDB.write(Point.measurement(Constants.resutlsDBTable) | |
180 | + .time(outlier.getActualTime().toEpochMilli(), TimeUnit.MILLISECONDS) | |
181 | + //.addField("report_time", System.currentTimeMillis()) | |
182 | + .addField("report_time", outlier.getOutlier_start_time()) | |
183 | + .addField("reason", "") | |
184 | + .tag("sensor", ruleMeas.isEmpty()?Constants.sensors:ruleMeas) | |
185 | + .tag("seen", "") | |
186 | + .tag("submitted", "") | |
187 | + .tag("machine", "A-ERWAERM1-PRESSE") | |
188 | + .tag("label", label) | |
189 | + .build()); | |
190 | + lastReportedMap.put(label, outlier); | |
191 | + } | |
192 | + | |
193 | + private boolean dbExists(String database){ | |
194 | + boolean exists = false; | |
195 | + QueryResult query = influxDB.query(new Query("show databases",database)); | |
196 | + for (List<Object> v : query.getResults().get(0).getSeries().get(0).getValues()){ | |
197 | + if(v.get(0).equals(database)){ | |
198 | + return true; | |
199 | + } | |
200 | + } | |
201 | + return exists; | |
202 | + } | |
203 | +} |
unsupervised/src/main/java/com/atl/afaultdetection/utils/Measurement.java
View file @
f19350b
... | ... | @@ -0,0 +1,46 @@ |
1 | +/* | |
2 | + * To change this license header, choose License Headers in Project Properties. | |
3 | + * To change this template file, choose Tools | Templates | |
4 | + * and open the template in the editor. | |
5 | + */ | |
6 | +package com.atl.afaultdetection.utils; | |
7 | + | |
8 | +/** | |
9 | + * | |
10 | + * @author thanasis | |
11 | + */ | |
12 | +public class Measurement { | |
13 | + private String measurementDBName; | |
14 | + private Double minValue; | |
15 | + private Double maxValue; | |
16 | + | |
17 | + public Measurement(String measurementDBName) { | |
18 | + this.measurementDBName = measurementDBName; | |
19 | + } | |
20 | + | |
21 | + public String getMeasurementDBName() { | |
22 | + return measurementDBName; | |
23 | + } | |
24 | + | |
25 | + public void setMeasurementDBName(String measurementDBName) { | |
26 | + this.measurementDBName = measurementDBName; | |
27 | + } | |
28 | + | |
29 | + public Double getMinValue() { | |
30 | + return minValue; | |
31 | + } | |
32 | + | |
33 | + public void setMinValue(Double minValue) { | |
34 | + this.minValue = minValue; | |
35 | + } | |
36 | + | |
37 | + public Double getMaxValue() { | |
38 | + return maxValue; | |
39 | + } | |
40 | + | |
41 | + public void setMaxValue(Double maxValue) { | |
42 | + this.maxValue = maxValue; | |
43 | + } | |
44 | + | |
45 | + | |
46 | +} |
unsupervised/src/main/java/com/atl/afaultdetection/utils/Utils.java
View file @
f19350b
... | ... | @@ -0,0 +1,105 @@ |
1 | +/* | |
2 | + * To change this license header, choose License Headers in Project Properties. | |
3 | + * To change this template file, choose Tools | Templates | |
4 | + * and open the template in the editor. | |
5 | + */ | |
6 | +package com.atl.afaultdetection.utils; | |
7 | + | |
8 | +import com.atl.mcod.utils.Constants; | |
9 | +import java.time.Instant; | |
10 | +import java.util.Iterator; | |
11 | +import java.util.Map; | |
12 | + | |
13 | +/** | |
14 | + * | |
15 | + * @author thanasis | |
16 | + */ | |
17 | +public class Utils { | |
18 | + public static double normalize(double value, double minValue, double maxValue){ | |
19 | + return (value - minValue) / (maxValue - minValue); | |
20 | + } | |
21 | + | |
22 | + public static boolean allIteratorsHaveNext(Map<String,Iterator<Map.Entry<Instant,Double>>> slideDataIteratorsMap) { | |
23 | + for(Map.Entry<String, Iterator<Map.Entry<Instant,Double>>> entry : slideDataIteratorsMap.entrySet()){ | |
24 | + if(!entry.getValue().hasNext()){ | |
25 | + return false; | |
26 | + } | |
27 | + } | |
28 | + return true; | |
29 | + } | |
30 | + | |
31 | + public static String checkRuleBasedParameters(Map<String, Measurement> measMap) { | |
32 | + String ruleMeas = ""; | |
33 | + if (Constants.algorithms.contains("mcod") && Constants.maxRule == null && Constants.minRule == null) { | |
34 | + System.err.println("No rule set for the rule based"); | |
35 | + System.exit(5); | |
36 | + } | |
37 | + if (Constants.algorithms.contains("mcodKnowledge") && Constants.maxRule_k == null && Constants.minRule_k == null) { | |
38 | + System.err.println("No rule set for the rule based knowledge"); | |
39 | + System.exit(5); | |
40 | + } | |
41 | + if (Constants.maxRule != null) { | |
42 | + String ruleQuickMeas = Constants.maxRule.split("-")[0]; | |
43 | + if (!measMap.containsKey(Constants.measShortNameMap.get(ruleQuickMeas))) { | |
44 | + System.err.println("The measurement of the max rule should be passed using the --meas flag"); | |