Skip to content

Instantly share code, notes, and snippets.

@Annihil
Created April 4, 2016 18:25
Show Gist options
  • Save Annihil/3d929d067603cc581d109ac9598663ed to your computer and use it in GitHub Desktop.
Save Annihil/3d929d067603cc581d109ac9598663ed to your computer and use it in GitHub Desktop.
import json
import socket
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
class HTTPClassifier:
def __init__(self, dumpPath):
self.protoMap = ['HTTP/0.9', 'HTTP/1.0', 'HTTP/1.1', 'HTTP/2']
self.methodMap = ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE', 'OPTIONS', 'CONNECT', 'PATCH']
self.dumpPath = dumpPath
self.requests = []
self.mappedReqs = []
with open(dumpPath) as f:
for line in f:
self.requests.append(json.loads(line))
# print json.dumps(self.requests, indent=4)
def checkRequests(self):
for request in self.requests:
print request
request['sane'] = self.askSanity()
print "sane:", request['sane']
def askSanity(self):
while True:
input = raw_input("Is this HTTP request sane [Y/n]: ")
if not input:
return 1
else:
if str(input) == 'n':
return 0
def mapRequest(self):
for request in self.requests:
argslist = []
argslist.extend(request['args'].keys())
argslist.extend(request['args'].values())
count_vect = CountVectorizer()
X_new_counts = count_vect.fit_transform(argslist).toarray()
print X_new_counts
mappedReq = np.concatenate([ [self.protoMap.index(request['proto'])],
[self.methodMap.index(request['method'])],
self.getIpForHost(request['host']),
[request['sane']]
])
# self.mappedReqs.append(mappedReq)
print mappedReq
def getIpForHost(self, host):
try:
ip = np.array(socket.gethostbyname(host).split('.'))
except socket.gaierror:
ip = np.array([0, 0, 0, 0])
return ip
if __name__ == '__main__':
classifier = HTTPClassifier('logs/jsondump.log')
classifier.checkRequests()
classifier.mapRequest()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment