Created
March 2, 2018 01:51
-
-
Save GedowFather/39427ec004142ee9cc369c1fa2c14197 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python | |
# | |
# Usage: | |
# ./dump.py | |
# - It will stop after period specified seconds or after getting max size. | |
# - log is sent to s3 bucket. | |
# - notification | |
# > EC2 needed profile (s3 put) | |
# > You need edit INFRA s3 bucket policy. | |
# | |
# Packages | |
# yum install python2-boto3 pcapy python2-impacket python-netaddr python-netifaces pytz | |
# | |
# general log format | |
# - ${timestamp}\t${src_port}\t${dst_addr}\t${query} | |
#----- example ----- | |
#171026 11:14:00\t34567\t10.1.2.3\tselect 1 from db1 | |
#\t34567\t10.1.2.3\tselect 2 from db1 | |
#171026 11:14:01\t45678\t10.1.2.4\tselect 3 from db2 | |
#------------------- | |
# | |
# query packet | |
# * There are 4bytes binary before mysql query packet. | |
# | |
# SQL changed | |
# * Delete \n and \t to make it one line | |
import boto3 | |
import os | |
import sys | |
import re | |
import bz2 | |
import json | |
import urllib2 | |
import socket | |
import time | |
from datetime import datetime | |
from pytz import timezone | |
import pcapy | |
from impacket.ImpactDecoder import * | |
import netifaces | |
from netaddr import * | |
# | |
# Functions | |
# - linux iface: eth0 | |
# windows iface: \\Device\\NPF_{5D21FB4F-D9BF-47AC-AD3E-CE2E172F27A0} | |
# iname: {5D21FB4F-D9BF-47AC-AD3E-CE2E172F27A0} | |
def get_private_interface(): | |
for iface in pcapy.findalldevs(): | |
iname = iface | |
search = re.search(r'.+({.+})', iface) | |
if search: iname = search.group(1) | |
for ipv4 in netifaces.ifaddresses(iname).get(netifaces.AF_INET, []): | |
if IPAddress(ipv4.get('addr')).is_private(): | |
return iface | |
def tags2dict(tags): | |
dict = {} | |
for v in tags: | |
dict[v['Key']] = v['Value'] | |
return dict | |
# | |
# config | |
# | |
now = datetime.now(timezone('Asia/Tokyo')) | |
max_size = 1024 * 1024 * 100 | |
period = 60 | |
dev = get_private_interface() | |
snaplen = 1024 * 1024 * 1 | |
promiscious = False | |
timeout = 100 | |
log_bucket = 'gedow-general-log-test' | |
log_prefix = 'generallog' | |
log_region = 'ap-northeast-1' | |
# | |
# AWS CLI | |
# | |
key_id = os.environ.get('AWS_ACCESS_KEY_ID') | |
secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY') | |
region = os.environ.get('AWS_REGION') | |
on_ec2 = False | |
instance_id = None | |
meta_data_url = 'http://169.254.169.254/latest/dynamic/instance-identity/document/' | |
if region is None: | |
try: | |
meta_data = json.loads(urllib2.urlopen(meta_data_url, timeout=1).read()) | |
region = meta_data['region'] | |
instance_id = meta_data['instanceId'] | |
on_ec2 = True | |
except Exception as e: | |
raise Exception("Export AWS_REGION.") | |
if on_ec2 is None and (key_id is None or secret_key is None): | |
raise Exception("Export AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.") | |
# CLI | |
ec2 = boto3.client('ec2', region_name=region) | |
s3 = boto3.resource('s3', region_name=log_region) | |
# Tag | |
service = None | |
role = None | |
if not on_ec2: | |
service = "test" | |
role = "dev" | |
else: | |
res = ec2.describe_tags( | |
Filters = [{'Name': 'resource-id', 'Values': [instance_id]}] | |
)['Tags'] | |
tags = tags2dict(res) | |
if 'service' not in tags or ('role' not in tags and 'roles' not in tags): | |
raise Exception("Not found service and role/roles tags.") | |
service = tags['service'] | |
role = None | |
if 'role' in tags: | |
role = tags['role'] | |
else: | |
role = tags['roles'].split(",")[0] | |
# | |
# packet dump | |
# | |
p = pcapy.open_live(dev, snaplen, promiscious, timeout) | |
p.setfilter("tcp dst port 3306") | |
start_utime = int(time.time()) | |
before_utime = start_utime - 1 | |
log = '' | |
log_len = 0 | |
while True: | |
current_utime = int(time.time()) | |
if current_utime - start_utime > period: break | |
try: | |
(header, payload) = p.next() | |
except pcapy.PcapError: | |
continue | |
if not header: continue | |
eth = EthDecoder().decode(payload) | |
ip = eth.child() | |
tcp = ip.child() | |
search = re.search(r'^[^a-z]*([a-z].+)$', tcp.get_data_as_string()[4:], re.DOTALL | re.IGNORECASE) | |
if not search: continue | |
sql = re.sub(r'[\n\t]', '', search.group(1)) | |
dst = ip.get_ip_dst() | |
sport = tcp.get_th_sport() | |
timestamp = "\t" | |
if current_utime != before_utime: | |
timestamp = datetime.fromtimestamp(current_utime).strftime("%y%m%d %H:%M:%S") | |
row = "%s\t%s\t%s\t%s\n" % (timestamp, str(sport).rjust(5), dst, sql) | |
log += row | |
log_len += len(row) | |
if log_len > max_size: break | |
before_utime = current_utime | |
# | |
# compress & upload | |
# | |
date = now.strftime("%Y%m%d") | |
hostname = socket.gethostname() | |
file_name = now.strftime("%Y%m%d_%H%M") + "-" + hostname + ".log.bz2" | |
obj_key = "%s/%s/%s/%s/%s" % (log_prefix, service, role, date, file_name) | |
bucket = s3.Bucket(log_bucket) | |
obj = bucket.Object(obj_key) | |
obj.put(Body=bz2.compress(log), ACL="bucket-owner-full-control") | |
print "Save sql to s3://%s/%s" % (log_bucket, obj_key) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment