Skip to content

Instantly share code, notes, and snippets.

@informationsea
Created August 21, 2013 11:31
Show Gist options
  • Save informationsea/6293363 to your computer and use it in GitHub Desktop.
Save informationsea/6293363 to your computer and use it in GitHub Desktop.
Import apache combine log to sqlite3
#!/usr/bin/env python
import argparse
import sqlite3
import csv
import gzip
import bz2
import datetime
def prepare_database(conn):
"""
Arguments:
- `conn`:
"""
conn.execute('CREATE TABLE IF NOT EXISTS access (remote TEXT, user TEXT, date DATETIME, request TEXT, status TEXT, size INTEGER, referrer TEXT, useragent TEXT)')
conn.execute('CREATE INDEX IF NOT EXISTS access__remote ON access(remote)')
conn.execute('CREATE INDEX IF NOT EXISTS access__user ON access(user)')
conn.execute('CREATE INDEX IF NOT EXISTS access__date ON access(date)')
conn.execute('CREATE INDEX IF NOT EXISTS access__request ON access(request)')
conn.execute('CREATE INDEX IF NOT EXISTS access__status ON access(status)')
conn.execute('CREATE INDEX IF NOT EXISTS access__size ON access(size)')
conn.execute('CREATE INDEX IF NOT EXISTS access__referrer ON access(referrer)')
conn.execute('CREATE INDEX IF NOT EXISTS access__useragent ON access(useragent)')
def import_log(conn, fileobj):
"""
Arguments:
- `conn`:
- `fileobj`:
"""
reader = csv.reader(fileobj, quotechar='"', delimiter=' ')
for row in reader:
#print row
date = datetime.datetime.strptime(row[3], '[%d/%b/%Y:%H:%M:%S')
conn.execute('INSERT INTO access VALUES(?, ?, ?, ?, ?, ?, ?, ?)', [row[0]]+[row[2]]+[date] + row[5:])
def open_compressed(path):
"""
Arguments:
- `path`:
"""
if path.endswith('.gz'):
return gzip.open(path, 'r')
if path.endswith('.bz2'):
return bz2.open(path, 'r')
return open(path, 'r')
def _main():
parser = argparse.ArgumentParser(description="Apache log to sqlite3")
parser.add_argument('apachelog', nargs='+')
parser.add_argument('-d', '--sqlitedb', required=True)
options = parser.parse_args()
conn = sqlite3.connect(options.sqlitedb)
prepare_database(conn)
for i in options.apachelog:
f = open_compressed(i)
import_log(conn, f)
f.close()
conn.commit()
if __name__ == '__main__':
_main()
@MichalLeonBorsuk
Copy link

Hi, thanks for the script. Sadly I have received an error, although I have not changed the format of the logs:

yxxx@sun:~/$ ./apachelogdb.py -d access-201805.db access.log 
Traceback (most recent call last):
  File "./apachelogdb.py", line 69, in <module>
    _main()
  File "./apachelogdb.py", line 64, in _main
    import_log(conn, f)
  File "./apachelogdb.py", line 38, in import_log
    conn.execute('INSERT INTO access VALUES(?, ?, ?, ?, ?, ?, ?, ?)', [row[0]]+[row[2]]+[date] + row[5:])
sqlite3.ProgrammingError: Incorrect number of bindings supplied. The current statement uses 8, and there are 9 supplied.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment