# -*- coding: utf-8 -*-
import sqlite3
import re
import math
import logging
import os
try:
import cPickle as pickle
except:
import pickle
from sqlite3 import Error
logger = logging.getLogger(__name__)
def create_sqlite_connection(db_file):
conn = None
try:
conn = sqlite3.connect(db_file)
except Error as e:
logger.exception("Can't connect to SQLite")
return conn
def select_model_attribute(conn, trainID, attribute):
time_seg = ""
cur = conn.cursor()
cur.execute(
"SELECT " + attribute + " FROM models WHERE model_id=?", (int(trainID),)
)
rows = cur.fetchone()
if rows != None:
if len(rows) > 0:
time_seg = rows[0]
return time_seg
def check_pid(pid):
""" Check For the existence of a unix pid. """
try:
os.kill(pid, 0)
except OSError:
return False
else:
return True
# def load_data(train_id, train_dir=DEFAULT_TRAIN_DIR, only_aem=False):
# filename = "train_" + str(train_id) + ".dat"
# train_path = os.path.join(train_dir, filename)
# infile = open(train_path, "rb")
# weak_bins_mapping = pickle.load(infile)
# profile_index = pickle.load(infile)
# mp = pickle.load(infile)
# train_dataset_values = np.array(pickle.load(infile))
# art_events_dataset = pickle.load(infile)
# processed_art_events_dataset = ""
# regr = ""
# feature_importance = ""
# if not only_aem:
# processed_art_events_dataset = pickle.load(infile)
# regr = pickle.load(infile)
# feature_importance = pickle.load(infile)
# infile.close()
# return (
# weak_bins_mapping,
# profile_index,
# mp,
# train_dataset_values,
# art_events_dataset,
# processed_art_events_dataset,
# regr,
# feature_importance,
# )
def create_table(conn, create_table_sql):
try:
c = conn.cursor()
c.execute(create_table_sql)
except Error as e:
print(e)
def sigmoid_risk(s, midpoint, t):
try:
return 1 / (1 + math.exp(s * (t - midpoint)))
except OverflowError as ex:
logger.warning(f"Timedelta to big: {t}. " f"Returning 0 risk")
return 0
def sigmoid_mins(v, s, midpoint, hours_before):
if v > 1:
logger.error(f"SIGMOID Risk can't be more than one {v}")
raise
elif v == 1:
return 0
elif v == 0:
return convert_hours_to_mins(strtime_to_hours(hours_before))
return (math.log(1 / v - 1) / s) + midpoint
def convert_hours_to_mins(hours):
return hours * 60
def strtime_to_hours(time_segments):
time_segments_digits = float(re.findall(r"\d+\.\d+|\d+", time_segments)[0])
time_segments_hours = 0.0
if "H" in time_segments:
time_segments_hours = time_segments_digits
elif "T" in time_segments or "min" in time_segments:
time_segments_hours = time_segments_digits / 60.0
elif "S" in time_segments:
time_segments_hours = time_segments_digits / 60.0 / 60.0
return time_segments_hours
def inBufferWindow(actual_hours, buffer_time):
"""
Converts the buffer_size, which is in hours, into segments and checks
whether t is higher than ep_length-buffer
Parameters
----------
actual_hours : int
the current segment
buffer_time : str
the size of the buffer window in hours
Returns
-------
Returns true if t inside the buffer window
"""
if buffer_time is not None:
buffer_time_mins = convert_hours_to_mins(strtime_to_hours(buffer_time))
return strtime_to_hours(convert_hours_to_mins(actual_hours)) <= buffer_time_mins
return False
time_segments_hours = time_segments_to_hours(chunks_type)
last_segments_count = math.ceil(buffer_time / time_segments_hours)
return ep_length - last_segments_count <= t
def time_segments_to_hours(time_segments):
time_segments_digits = float(re.findall(r"\d+", time_segments)[0])
time_segments_hours = 0.0
if "H" in time_segments:
time_segments_hours = time_segments_digits
elif "T" in time_segments or "min" in time_segments:
time_segments_hours = time_segments_digits / 60.0
elif "S" in time_segments:
time_segments_hours = time_segments_digits / 60.0 / 60.0
return time_segments_hours
def fetch_influxdb_data(
inf_client, start_date, end_date, dbname, measurement, multidimensional_data
):
dataset_dates = []
dataset_values = []
if multidimensional_data:
query = (
"SELECT * FROM "
+ measurement
+ " WHERE "
+ "time>='"
+ start_date
+ "' AND time <= '"
+ end_date
+ "'"
)
result = inf_client.query(query, database=dbname)
if len(result.items()) > 0:
for c in result.items()[0][1]:
ae = [0] * (len(c.keys()) - 1)
for key in c.keys():
if key == "time":
time = c["time"]
else:
ae[int(key)] = c[key]
if len(ae) == 1 or sum(ae) == 0:
continue
dataset_dates.append(time)
dataset_values.append(ae)
else:
query = (
"SELECT * FROM "
+ measurement
+ " WHERE "
+ "time>='"
+ start_date
+ "' AND time <= '"
+ end_date
+ "'"
)
result = inf_client.query(query, database=dbname)
if len(result.items()) > 0:
for c in result.items()[0][1]:
ae = [0] * (len(c.keys()) - 1)
for key in c.keys():
if key == "time":
time = c["time"]
else:
ae[int(re.findall(r"\d+\.\d+|\d+", key)[0])] = c[key]
if len(ae) == 1 or sum(ae) == 0:
continue
dataset_dates.append(time)
dataset_values.append(max(ae))
return dataset_dates, dataset_values
def compute_thresholds(
warning_hours_thres, prediction_threshold, s, midpoint, hours_before, buffer_time
):
buffer_time_mins = 0
if buffer_time is not None:
buffer_time_mins = convert_hours_to_mins(strtime_to_hours(buffer_time))
if warning_hours_thres is not None:
prediction_threshold = sigmoid_risk(
s,
convert_hours_to_mins(strtime_to_hours(midpoint)),
convert_hours_to_mins(strtime_to_hours(warning_hours_thres))
- buffer_time_mins,
)
else:
warning_hours_thres = (
sigmoid_mins(
prediction_threshold,
s,
convert_hours_to_mins(strtime_to_hours(midpoint)),
hours_before,
)
/ 60.0
)
return warning_hours_thres, prediction_threshold