Created
June 19, 2014 12:24
-
-
Save zakx/0ac8ac66bf2def3dda5d to your computer and use it in GitHub Desktop.
irssi log format to CSV converter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: utf-8 | |
""" | |
not handled: | |
* nick changes | |
* topic changes | |
* mode changes | |
""" | |
import csv | |
import datetime | |
import glob | |
import logging | |
import re | |
import sys | |
logging.basicConfig(format='[%(levelname)s] %(message)s', level=logging.DEBUG) | |
RE_PRIVMSG = re.compile(r'^<(?P<nick>[^\>]+)> (?P<message>.*)$') | |
RE_JPQ = re.compile(r'^ (?P<nick>[^\>]+) \((?P<host>.+\@.+)\) has (?P<action>(joined|left|quit)) (?:#nodrama.de\s?)?(?:\(?(?P<message>.+?)?\)?)$') | |
JPQ_EVENTS = { | |
'joined': 'JOIN', | |
'left': 'PART', | |
'quit': 'QUIT' | |
} | |
RE_ACTION = re.compile(r'^ \* (?P<nick>[^\s]+) (?P<message>.*)$') | |
RE_NOTICE = re.compile(r'^\-(?P<nick>[^\|-]*)[^-]+?- (?P<message>.*)$') | |
RE_FILE = re.compile(r'^logs/(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}).txt$') | |
def parse_line(line): | |
# Skip client comments | |
if line.startswith('--- '): | |
return None | |
# Parse time | |
try: | |
hour = int(line[0:2]) | |
minute = int(line[3:5]) | |
except ValueError: | |
logging.error("Faulty time parsing for line: [%s]" % line) | |
return None | |
# remove time and newline from line | |
line = line.strip('\n').strip('\r') | |
content = line[6:] | |
# handle PRIVMSG | |
if line[6] == '<': | |
match = RE_PRIVMSG.match(content) | |
data = match.groupdict() | |
return { | |
'hour': hour, | |
'minute': minute, | |
'event': 'PRIVMSG', | |
'nick': data['nick'], | |
'host': None, | |
'message': data['message'], | |
} | |
# handle JOIN/PART/QUIT and "/me PRIVMSG" (we'll call it ACTION) | |
if line[6] == " ": | |
if line[8] == "*": | |
# ACTION | |
match = RE_ACTION.match(content) | |
data = match.groupdict() | |
event = "ACTION" | |
else: | |
# JOIN/PART/QUIT | |
match = RE_JPQ.match(content) | |
try: | |
data = match.groupdict() | |
except AttributeError: | |
logging.error("Failed to match in JPQ: [%s]" % line) | |
return None | |
event = JPQ_EVENTS[data['action']] | |
return { | |
'hour': hour, | |
'minute': minute, | |
'event': event, | |
'nick': data['nick'], | |
'host': data['host'], | |
'message': data['message'], | |
} | |
# handle NOTICE | |
if line[6] == "-": | |
match = RE_NOTICE.match(content) | |
try: | |
data = match.groupdict() | |
except AttributeError: | |
logging.error("Failed to match in NOTICE: [%s]" % line) | |
sys.exit(1) | |
return None | |
return { | |
'hour': hour, | |
'minute': minute, | |
'event': 'NOTICE', | |
'nick': data['nick'], | |
'host': None, | |
'message': data['message'], | |
} | |
logging.error("Failed parsing [%s]" % line) | |
def build_csv_line(year, month, day, hour, minute, event, nick, host=None, message=None): | |
if host == None: | |
host = "" | |
if message == None: | |
message = "" | |
return (year, month, day, hour, minute, datetime.datetime(int(year), int(month), int(day), hour, minute).isoformat(' '), event, nick, host, message) | |
csv_data = [] | |
for logfile in glob.glob("logs/*.txt"): | |
print logfile | |
match = RE_FILE.match(logfile) | |
ymd = match.groupdict() | |
logging.info("Parsing %(year)s-%(month)s-%(day)s" % ymd) | |
with open(logfile, "r") as f: | |
for line in f: | |
transfer = parse_line(line) | |
if transfer == None: | |
continue | |
parsed = build_csv_line(**dict(ymd.items()+transfer.items())) | |
csv_data.append(parsed) | |
with open("parsed.csv", "w") as o: | |
writer = csv.writer(o, delimiter=";") | |
writer.writerow(("year", "month", "day", "hour", "minute", "ts", "event", "nick", "host", "message")) | |
writer.writerows(csv_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment