Thanasis Naskos / CUREX - Knowledge Extraction Analytics - KEA

Blame view

CEPTD/docker/suricata/dist/process_suricata_log.py 3.78 KB
  #!/usr/bin/env python3
  # -*- coding: utf-8 -*-
  """
  @author: chribell
  
  The purpose of this script is to process Suricata's output json file (eve.json)
  and find possible connections between Suricata rules -> CVEs -> CWEs -> CAPECs.
  
  Arguments
  ---------
  1. Path of CAPEC sqlite db
  2. Path of eve.json
  3. Path of output json file
  
  
  Example
  ---------
  ./process_suricata_log.py ./capecdb.sqlite /var/log/suricata/eve.json  /var/log/suricata/capec.json
  """
  
  
  import time
  from sys import argv
  from os import SEEK_END, path, linesep
  import json
  from sqlitedict import SqliteDict
  
  
  def suricata_event(line):
      src = json.loads(line)
      evt = {}
      evt['timestamp'] = src['timestamp']
      # extract these fields from the suricata alert
      alert_fields = ('signature_id', 'signature', 'category', 'severity')
      if 'alert' in src:
          for field in alert_fields:
              evt[field] = src['alert'][field]
      return evt
  
  
  def tail(file):
      file.seek(0, SEEK_END)
      while True:
          line = file.readline()
          if not line:
              time.sleep(0.1)
              continue
          # using a generator is preferred, just to consume every new line
          yield line
  
  def extract_capec_paths(db, capec):
      paths = {}
      for c in db['capec'][capec]['parents']:
          paths['capec-' + c] = {
                  'name': db['capec'][c]['name'],
                  **extract_capec_paths(db, c)
              }
      return paths
  
  
  def extract_cwe_paths(db, cwe):
      paths = {}
      if cwe in db['cwe-capec']: # CWE has a related CAPEC
          for c in db['cwe-capec'][cwe]:
              paths['capec-' + c] = {
                      'name': db['capec'][c]['name'],
                      **extract_capec_paths(db, c)
                  }
      for w in db['cwe'][cwe]['parents']:
          paths['cwe-' + w] = {
                  'name': db['cwe'][w]['name'],
                  **extract_cwe_paths(db, w)
              }
      return paths
  
  
  def extract_paths(db, cve):
      paths = {}
      # check if is CVE is related to any CWE
      if cve not in db['cve-cwe']:
          return {}
      cwe = db['cve-cwe'][cve]
      for w in cwe:
          paths['cwe-' + w] = {
                  'name': db['cwe'][w]['name'],
                  **extract_cwe_paths(db, w)
              }
      return paths
  
  def format_paths(key, value):
      if not bool(value):
          return [ [key] ]  # empty dictionary, return key only
      paths = []
      for i in value.keys():
          for p in format_paths(i, value[i]):
              paths.append([key] + p)
      return paths
  
  def write_to_file(output_path, evt):
      with open(output_path,'a', encoding='utf-8') as out:
          json.dump(evt, out, ensure_ascii=False)
          out.write(linesep)
  
  def start(db_path, log_path, output_path):
      db = SqliteDict(db_path) # open db
      log_file = open(log_path) # open log file
      log_lines = tail(log_file) # calling the generator, watching for new lines
  
      for line in log_lines:
          evt = suricata_event(line)
          # suricara rule 2200007 is used for padding, so we ignore it
          if 'signature_id' in evt and evt['signature_id'] != 2200007:
              sid = str(evt['signature_id'])
              if sid in db['sid-cve']:
                  cve = db['sid-cve'][sid]
  
                  evt['tree'] = extract_paths(db, cve)
  
                  # paths = []
                  # for i in tmp:
                  #     paths = paths + [ [i] ]
                  #     paths = paths + format_paths(i, tmp[i])
                  
                  # evt['paths'] = {}
                  # for i in range(len(paths)):
                  #     evt['paths'][i] = paths[i]
  
                  evt['cve'] = cve
                  write_to_file(output_path, evt)
  
  
  
  if len(argv) - 1 == 3 and path.exists(argv[1]) and path.exists(argv[2]):
      start(argv[1], argv[2], argv[3])
  else:
      print('Error, please provide the paths of capecdb.sqlite, eve.json and output')