Created
April 4, 2016 18:25
-
-
Save Annihil/3d929d067603cc581d109ac9598663ed to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import socket | |
import numpy as np | |
from sklearn.feature_extraction.text import CountVectorizer | |
class HTTPClassifier: | |
def __init__(self, dumpPath): | |
self.protoMap = ['HTTP/0.9', 'HTTP/1.0', 'HTTP/1.1', 'HTTP/2'] | |
self.methodMap = ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE', 'OPTIONS', 'CONNECT', 'PATCH'] | |
self.dumpPath = dumpPath | |
self.requests = [] | |
self.mappedReqs = [] | |
with open(dumpPath) as f: | |
for line in f: | |
self.requests.append(json.loads(line)) | |
# print json.dumps(self.requests, indent=4) | |
def checkRequests(self): | |
for request in self.requests: | |
print request | |
request['sane'] = self.askSanity() | |
print "sane:", request['sane'] | |
def askSanity(self): | |
while True: | |
input = raw_input("Is this HTTP request sane [Y/n]: ") | |
if not input: | |
return 1 | |
else: | |
if str(input) == 'n': | |
return 0 | |
def mapRequest(self): | |
for request in self.requests: | |
argslist = [] | |
argslist.extend(request['args'].keys()) | |
argslist.extend(request['args'].values()) | |
count_vect = CountVectorizer() | |
X_new_counts = count_vect.fit_transform(argslist).toarray() | |
print X_new_counts | |
mappedReq = np.concatenate([ [self.protoMap.index(request['proto'])], | |
[self.methodMap.index(request['method'])], | |
self.getIpForHost(request['host']), | |
[request['sane']] | |
]) | |
# self.mappedReqs.append(mappedReq) | |
print mappedReq | |
def getIpForHost(self, host): | |
try: | |
ip = np.array(socket.gethostbyname(host).split('.')) | |
except socket.gaierror: | |
ip = np.array([0, 0, 0, 0]) | |
return ip | |
if __name__ == '__main__': | |
classifier = HTTPClassifier('logs/jsondump.log') | |
classifier.checkRequests() | |
classifier.mapRequest() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment