-
-
Save hastyeagle/9a01f549535a167071cfbc0e61d57927 to your computer and use it in GitHub Desktop.
Export iOS/iMessage chat logs to HTML or text
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import argparse | |
import urllib | |
import urlparse | |
import base64 | |
import mimetypes | |
import cgi | |
import sqlite3 | |
import os | |
import errno | |
import plistlib | |
from os import path | |
from shutil import copy2 | |
# TODO: | |
# - Add auto-linking | |
# - Export video media | |
# - Match chat IDs up to names using Contacts.app SQLite db | |
CHAT_DB = path.expanduser("~/Library/Messages/chat.db") | |
CHAT_DB_BACKUP_PATH = path.expanduser("~/Library/Application Support/MobileSync/Backup") | |
CHAT_DB_BACKUP = "3d0d7e5fb2ce288813306e4d4636395e047a3d28" | |
ATTACHDIR = "export-Attachments" | |
# Apple's epoch starts on January 1st, 2001 for some reason... | |
# cf. http://apple.stackexchange.com/questions/114168 | |
EPOCH = 978307200 | |
def list_backups(): | |
print "Below is a list of possible backups to choose from:" | |
for dir in os.listdir(CHAT_DB_BACKUP_PATH): | |
if os.path.isdir(os.path.join(CHAT_DB_BACKUP_PATH, dir)): | |
# Get the device name | |
info = plistlib.readPlist(os.path.join(CHAT_DB_BACKUP_PATH, dir + "/Info.plist")) | |
devName = unicode(info.get("Device Name", "missing")) | |
print " " + dir + " (" + devName + ")" | |
def list_chats(chatFile): | |
try: | |
db = sqlite3.connect(chatFile) | |
except: | |
print "Error accessing '" + chatFile + "'!" | |
sys.exit() | |
cursor = db.cursor() | |
cursor2 = db.cursor() | |
print "Below is a list of IDs and their associated message counts (most recent first):" | |
rows = cursor.execute(""" | |
SELECT DISTINCT chat_identifier | |
FROM chat WHERE is_archived = 0 ORDER BY chat_identifier; | |
""") | |
# Loop through each ID and print out the ID and the number of messages | |
# associated with that ID | |
chatIDs = {} | |
for row in rows: | |
# See how many messages there are for this ID | |
chat_id = row[0] | |
sql = """ | |
SELECT COUNT(*) | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
WHERE c.chat_identifier = \"%s\"; | |
""" % (chat_id) | |
sql = cursor2.execute(sql) | |
numRows = sql.fetchone()[0] | |
if numRows == 0: continue | |
sql = cursor2.execute(""" | |
SELECT substr(datetime(m.date/1000000000 + ?, 'unixepoch', 'localtime'), 0, 11) | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
WHERE c.chat_identifier = ? | |
ORDER BY m.date LIMIT 1; | |
""", (EPOCH, chat_id)) | |
firstDate = sql.fetchone()[0] | |
# Grab the date of the last message | |
sql = cursor2.execute(""" | |
SELECT substr(datetime(m.date/1000000000 + ?, 'unixepoch', 'localtime'), 0, 11) | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
WHERE c.chat_identifier = ? | |
ORDER BY m.date DESC LIMIT 1; | |
""", (EPOCH, chat_id)) | |
lastDate = sql.fetchone()[0] | |
# Throw the information into an array that we can then sort | |
chatIDs[chat_id] = [numRows, firstDate, lastDate] | |
# Sort the array by lastDate | |
for key, value in sorted(chatIDs.items(), key=lambda e: e[1][2], reverse=True): | |
print " " + key + " (" + str(value[0]) + " messages, " + str(value[1]) + " to " + str(value[2]) + ")" | |
def exportID(chatFile, chat_id, prettyID, HTML, separate, keepAttachment): | |
oldstdout = sys.stdout | |
try: | |
db = sqlite3.connect(chatFile) | |
except: | |
print "Error accessing '" + chatFile + "'!" | |
sys.exit() | |
db.row_factory = sqlite3.Row | |
cursor = db.cursor() | |
cursor2 = db.cursor() | |
if len(prettyID) > 0: | |
prettyPrint = 1 | |
else: | |
prettyPrint = 0 | |
prettyID = chat_id | |
# Grab the date of the first message | |
sql = cursor.execute(""" | |
SELECT substr(datetime(m.date/1000000000 + ?, 'unixepoch', 'localtime'), 0, 11) | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
WHERE c.chat_identifier = ? | |
ORDER BY m.date LIMIT 1; | |
""", (EPOCH, chat_id)) | |
row = sql.fetchone() | |
# First make sure data actually exists for this chat_id | |
if row == None: | |
print "No chat logs found for '%s'!" % (chat_id) | |
return | |
# Now grab the date of the first message | |
firstDate = row[0] | |
# Grab the date of the last message | |
sql = cursor.execute(""" | |
SELECT substr(datetime(m.date/1000000000 + ?, 'unixepoch', 'localtime'), 0, 11) | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
WHERE c.chat_identifier = ? | |
ORDER BY m.date DESC LIMIT 1; | |
""", (EPOCH, chat_id)) | |
lastDate = sql.fetchone()[0] | |
# Grab all the rows for this chat_id | |
rows = cursor.execute(""" | |
SELECT datetime(m.date/1000000000 + ?, 'unixepoch', 'localtime') as fmtdate, | |
m.is_from_me, | |
m.text, | |
h.id, | |
a.filename, | |
a.mime_type, | |
a.total_bytes, | |
a.guid, | |
a.transfer_name, | |
m.ROWID | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
LEFT JOIN handle as h | |
ON h.ROWID = m.handle_ID | |
LEFT JOIN message_attachment_join AS ma | |
ON ma.message_id = m.ROWID | |
LEFT JOIN attachment as a | |
ON a.ROWID = ma.attachment_id | |
WHERE c.chat_identifier = ? | |
ORDER BY m.date; | |
""", (EPOCH, chat_id)) | |
if HTML: | |
logExt = ".html" | |
else: | |
logExt = ".txt" | |
if separate: | |
# Output each message to their own file | |
sys.stdout = open("Export-" + chat_id.replace("+", "") + logExt, 'w') | |
else: | |
# Output each message to one file | |
sys.stdout = oldstdout | |
# See if this is a group chat instead of an individual conversation | |
if prettyID[:4] == "chat": | |
groupChat = 1 | |
# Get the handles that are part of the chat | |
handles = cursor2.execute(""" | |
SELECT DISTINCT h.id | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
INNER JOIN handle as h | |
ON h.ROWID = m.handle_id | |
WHERE c.chat_identifier = ? | |
ORDER BY h.id; | |
""", (chat_id,)) | |
chat_handles = "" | |
prevHandle = "" | |
chatPadding = 0 | |
for handle in handles: | |
chat_handles += handle[0] + ", " | |
# Set chatPadding to the longest chat_handle | |
if len(handle[0]) > len(prevHandle): | |
chatPadding = len(handle[0]) | |
prevHandle = handle[0] | |
# Strip off the last comma | |
chat_handles = chat_handles[:-2] | |
if len(chat_handles) == 0: | |
chat_handles = "NOBODY" | |
chatTitle = "Group conversation with %s from %s to %s" % (chat_handles, firstDate, lastDate) | |
chatEnd = "End of group conversation with %s" % (chat_handles) | |
else: | |
groupChat = 0 | |
chatTitle = "Chat transcript with %s from %s to %s" % (prettyID, firstDate, lastDate) | |
chatEnd = "* End of chat transcript with %s *" % (prettyID) | |
chatPadding = len(prettyID) | |
# | |
# Start printing | |
# | |
if HTML == 0: | |
# Text format | |
stars = "*" * (len(chatTitle) + 4) | |
chatTitle = stars + "\n* " + chatTitle + " *\n" + stars | |
print(chatTitle.encode("utf8")) | |
for row in rows: | |
# Skip this message if the text is blank (happens occasionally) | |
if row[2] is None: continue | |
# Let's pad "me" so it matches the chat_id length | |
if groupChat: | |
who = "me" if row[1] is 1 else row[3] | |
else: | |
who = "me" if row[1] is 1 else prettyID | |
date = row[0] | |
text = row[2].strip() | |
# Get the attachment information if one exists | |
attachFilename = row[4] if row[4] is not None else "" | |
hasAttachment = 1 if len(attachFilename) > 0 else 0 | |
if hasAttachment: | |
attachDataSize = format(row[6], ',d') | |
text = text + "<Attachment removed: " + attachFilename + " (Bytes: " + attachDataSize + " KB)>" | |
line = "%s @ %s: %s" % (who.rjust(chatPadding, ' '), date, text) | |
print(line.encode("utf8")) | |
stars = "*" * (len(chatEnd) + 4) | |
chatEnd = stars + "\n* " + chatEnd + " *\n" + stars + "\n" | |
print(chatEnd.encode("utf8")) | |
else: | |
# HTML format | |
# Start of a new chat transcript | |
printHTMLHeader() | |
print "<div class=\"message date1\">" | |
print "<div align=\"center\"><b>" + chatTitle + "</b></div>" | |
prevDate = "" | |
for row in rows: | |
# Skip this message if the text is blank (happens occasionally) | |
if row[2] is None: continue | |
guid = row[9] | |
datetime = row[0] | |
date = datetime[:11] | |
time = datetime[11:] | |
text = row[2].strip() | |
attachFilename = row[4] if row[4] is not None else "" | |
hasAttachment = 1 if len(attachFilename) > 0 else 0 | |
# Use "me" or the pretty ID specified | |
if groupChat: | |
who = "me" if row[1] is 1 else row[3] | |
else: | |
who = "me" if row[1] is 1 else prettyID | |
# See if an attachment is part of this message | |
if hasAttachment: | |
attachment = path.expanduser(attachFilename) | |
attachGUID = row[7] | |
attachName = row[8] | |
mimeType = row[5] | |
# Save the attachments if requested | |
if keepAttachment: | |
# Create the attachment dir if it doesn't exist | |
try: | |
os.makedirs(ATTACHDIR) | |
except OSError as exception: | |
if exception.errno != errno.EEXIST: | |
raise | |
attachmentFound = 1 if os.path.exists(attachment) else 0 | |
if attachmentFound: | |
# Attachment found | |
attachDataSize = format(row[6], ',d') | |
# Copy the attachment | |
newFile = ATTACHDIR + "/" + attachGUID + "-" + attachName | |
newFile = newFile.replace(" ", "_") | |
copy2(attachment, newFile) | |
if "video/" in mimeType: | |
attachmentHTML = "Video: " + attachName + "<BR><DIV ALIGN=\"center\"><video id=\"" + attachName + "\" src=\"" + newFile + "\" width=\"800\" height=\"540\" controls></video></DIV>" | |
else: | |
attachmentHTML = "<a href=\"" + newFile + "\" target=\"_blank\" border=\"0\"><img src=\"" + newFile + "\" width=\"150\" align=\"top\"></a><br>" | |
# Only newline+center the image if text was included | |
#if len(text) > 200: | |
# attachmentHTML = "<BR><DIV ALIGN=\"center\">" + attachmentHTML + "</DIV>" | |
else: | |
# No attachment found | |
attachDataSize = 0 | |
attachmentHTML = cgi.escape("<Attachment not found: " + attachFilename + ">") | |
if hasAttachment: | |
text = cgi.escape(text) + attachmentHTML | |
else: | |
text = cgi.escape(text) | |
else: | |
# Not keeping attachments | |
attachDataSize = format(row[6], ',d') | |
if hasAttachment: | |
text = text + "<Attachment removed: " + attachFilename + " (Bytes: " + attachDataSize + " KB)>" | |
text = cgi.escape(text) | |
else: | |
text = text | |
text = cgi.escape(text) | |
# See if this is a new date | |
if date == prevDate: | |
# This is the same date as last time, just print the user, time and text | |
line = "<b>%s @ %s:</b> %s " % (who.rjust(chatPadding, ' '), time, text) | |
else: | |
# This is a new date, start a new bubble | |
line = "</div><div class=\"message date\"><b>%s</b><hr><b>%s @ %s:</b> %s" % (date, who.rjust(chatPadding, ' '), time, text) | |
# Output the HTML | |
print(line.encode("utf8")) | |
prevDate = date | |
# End of the current chat transcript | |
print "</div><div class=\"message\" align=\"center\"><b>" + chatEnd + "</b></div>" | |
# | |
# exportAll strictly loops through all the chat conversations and calls | |
# exportID accordingly | |
# | |
def exportAll(chatFile, HTML, separate, keepAttachment): | |
oldstdout = sys.stdout | |
try: | |
db = sqlite3.connect(chatFile) | |
except: | |
print "Error accessing '" + chatFile + "'!" | |
sys.exit() | |
cursor = db.cursor() | |
rows = cursor.execute(""" | |
SELECT DISTINCT chat_identifier | |
FROM chat WHERE is_archived = 0 ORDER BY chat_identifier; | |
""") | |
# Loop through each ID and export to a file/stdout | |
for row in rows: | |
chat_id = row[0] | |
# Export the chat | |
exportID(chatFile, row[0], "", HTML, separate, keepAttachment) | |
sys.stdout = oldstdout | |
def printHTMLHeader(): | |
print(""" | |
<meta charset=\"utf-8\"> | |
<style> | |
body { margin: 0; padding: 0; } | |
.message { | |
white-space: pre-wrap; | |
max-width: 800px; | |
padding: 10px; | |
margin: 10px; | |
font-family: "Courier", Calibri, Tahoma; | |
font-size: 14px; | |
} | |
.date { background-color: #EEE; } | |
.date1 { background-color: #A6DBFF; } | |
</style> | |
""") | |
def usage(): | |
args.print_help() | |
def main(): | |
chatFile = CHAT_DB | |
chat_id = "none" | |
prettyID = "" | |
export = "id" | |
HTML = 1 | |
separate = 0 | |
listchats = 0 | |
reqArg = 0 | |
keepAttachment = 0 | |
exportType = "m" | |
backupID = "" | |
# Options with a colon after them require an argument. | |
# Long options with an = after them require an argument. | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--type", help="Specify the source. Either [m]essages (default), or [b]ackup. When specifying [b], you must also pass the -b option to specify which backup to use.") | |
parser.add_argument("-b", "--backupID", nargs='?', help="Specifies which backup to use. If blank, a list of backups will be shown.") | |
parser.add_argument("-f", "--file", help="Alternate SQLite DB to use") | |
parser.add_argument("-l", "--list", action="store_true", help="List all non-archived chats") | |
parser.add_argument("-s", "--separate", action="store_true", help="During export, write chat(s) to a separate file") | |
parser.add_argument("-c", "--id", help="Specify the ID to export. Specify 'a' for all.") | |
parser.add_argument("-p", "--prettyID", help="Specify the pretty ID to use when exporting a single chat") | |
parser.add_argument("-t", action="store_true", help="Output chat in TXT format instead of HTML") | |
parser.add_argument("-k", action="store_true", help="Keep attachments (stored in ./export-attachments)") | |
args = parser.parse_args() | |
if args.type: | |
exportType = args.type | |
if args.backupID: | |
backupID = args.backupID | |
if args.list: | |
listchats = 1 | |
if args.id: | |
if args.id == "a": | |
export = "all" | |
else: | |
chat_id = args.id | |
reqArg = 1 | |
if args.prettyID: | |
prettyID = args.prettyID | |
if args.t: | |
HTML = 0 | |
if args.separate: | |
separate = 1 | |
if args.file: | |
chatFile = args.file | |
if args.k: | |
keepAttachment = 1 | |
if not os.path.isfile(chatFile): | |
print "'" + chatFile + "' not found!" | |
sys.exit() | |
# Set chatFile accordingly if we're using a backup instead of the (default) | |
# Messages DB. If so, see if an argument was passed for -b, and if not print | |
# out the current backups available. | |
if exportType != "b" and exportType != "m": | |
print "The export type must be either [b]ackups or [m]essages." | |
parser.print_help() | |
sys.exit() | |
if exportType == "b": | |
if len(backupID) == 0: | |
# Print out a list of backups available. | |
list_backups() | |
sys.exit() | |
else: | |
# Verify the backup directory exists. | |
if not os.path.isdir(os.path.join(CHAT_DB_BACKUP_PATH, backupID)): | |
print "The backup directory '" + os.path.join(CHAT_DB_BACKUP_PATH, backupID) + "' was not found!" | |
sys.exit() | |
# Set the chatFile accordingly | |
chatFile = os.path.join(CHAT_DB_BACKUP_PATH, backupID + "/3d/" + CHAT_DB_BACKUP ) | |
# See if we want to list all chats. | |
if listchats: | |
list_chats(chatFile) | |
sys.exit() | |
# Make sure a required argument was used. | |
if not reqArg: | |
print "You need to specify [-c]." | |
parser.print_help() | |
sys.exit() | |
# See if we're exporting all, or just one conversation. | |
if export == "all": | |
exportAll(chatFile, HTML, separate, keepAttachment) | |
else: | |
if chat_id == "none": | |
print "The [-t] argument requires either [-c] or [-a]." | |
parser.print_help() | |
sys.exit() | |
# Export a single ID since '-a' gets invoked immediately and then exits. | |
exportID(chatFile, chat_id, prettyID, HTML, separate, keepAttachment) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment