Created
January 9, 2014 16:38
-
-
Save slayercat/8337339 to your computer and use it in GitHub Desktop.
pcap http get or post analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#via http://wiki.wireshark.org/Development/LibpcapFileFormat | |
import os,sys | |
def get_int(li): | |
result = 0 | |
for i in li[::-1]: | |
result<<=8 | |
result|=ord(i) | |
return result | |
def petty_mac(input): | |
return '-'.join( | |
[hex(ord(each))[2:].rjust(2,'0') for each in input] | |
) | |
def main(): | |
all_hosts={} | |
all_uri_path = {} | |
target=open(sys.argv[1]) | |
#read all header | |
magic_number=get_int(target.read(4)) | |
version_major=get_int(target.read(2)) | |
version_minor=get_int(target.read(2)) | |
thiszone=get_int(target.read(4)) | |
sigfigs=get_int(target.read(4)) | |
snaplen=get_int(target.read(4)) | |
network=get_int(target.read(4)) | |
print 'magic',hex(magic_number) | |
print 'v',version_major,'.',version_minor | |
print 'zone',thiszone | |
print 'sigfigs',sigfigs | |
print 'snaplen',snaplen | |
print 'network',network | |
#for each record | |
packets = 0 | |
while True: | |
ts_sec=get_int(target.read(4)) | |
ts_usec=get_int(target.read(4)) | |
incl_len=get_int(target.read(4)) | |
orig_len=get_int(target.read(4)) | |
print 'ts_sec',ts_sec | |
print 'ts_usec',ts_usec | |
print 'incl_len',incl_len | |
print 'orig_len',orig_len | |
#frame item is the content of raw socket | |
#begin with 2 mac address | |
#then ip & tcp header | |
e = target.read(incl_len) | |
if len(e) < incl_len: | |
break | |
to_mac = e[0:6] | |
from_mac=e[6:12] | |
print petty_mac(from_mac), '->', petty_mac(to_mac) | |
eth_type=e[12:13] | |
ipv4_sip=e[26:29] | |
ipv4_dip=e[30:33] | |
#0:14 is the mac head | |
#14:14+20 is the ip head | |
tcp_pos = e[14+20:] | |
offset = (ord(tcp_pos[13-1])>>4)*4 | |
tcp_data=tcp_pos[offset:] | |
if not (tcp_data.startswith('GET ') or tcp_data.startswith('POST ')): | |
''' | |
print>>sys.stderr,'-'*10 | |
print>>sys.stderr,tcp_data.__repr__() | |
v += 1 | |
if v > 10: | |
print>>sys.stderr,'asasdfasdfsadf' | |
break | |
''' | |
continue | |
else: | |
v = 0 | |
try: | |
head,body=tcp_data.split('\r\n\r\n',1) | |
except: | |
#print>>sys.stderr,tcp_data.__repr__() | |
#sys.stdin.readline() | |
head,body = tcp_data,'' | |
heads = head.split('\r\n') | |
last = heads[0].rfind('HTTP') | |
try: | |
method,url = heads[0][:last].split(None,1) | |
except Exception,e: | |
#print>>sys.stderr, e | |
#print>>sys.stderr, heads[0].__repr__() | |
#sys.stdin.readline() | |
pass | |
hostname = None | |
for each in heads[1:]: | |
if each.strip()=='': | |
continue | |
try: | |
k,v=each.split(':',1) | |
except: | |
print>>sys.stderr, heads | |
continue | |
if k.lower().strip() == 'host': | |
hostname = v | |
if hostname is None: | |
hostname = '.'.join([str(ord(dist)) for dist in ipv4_dip]) | |
if hostname not in all_hosts: | |
all_hosts[hostname]=0 | |
all_uri_path[hostname]=[] | |
all_hosts[hostname]+=1 | |
all_uri_path[hostname].append(hostname+url) | |
for each in head.split('\r\n'): | |
pass | |
packets+=1 | |
times = 0 | |
sorteditems = list(all_hosts.items()) | |
sorteditems.sort(key=lambda v:v[1], reverse = False) | |
for v in sorteditems: | |
print>>sys.stderr, v | |
times += 1 | |
if times > 4000: | |
return | |
if __name__ == "__main__": | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment