-
-
Save phord/8c90c7458f7795859562360af26997e3 to your computer and use it in GitHub Desktop.
This script will process the json dump from Google Takeout to extract all messages sent between two people. Before extracting the conversation you need to know the conversation_id.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# example: | |
# python hangout-filter.py Hangouts.json Ugw3axd-nDFKCvFsDi94AaABAQ | sort | tr '\n' ' ' > words.txt | |
# !/usr/bin/env python | |
import json | |
import argparse | |
import dateutil.parser | |
import re | |
import time | |
parser = argparse.ArgumentParser( | |
description='Print conversations in readable format.') | |
parser.add_argument('filename') | |
parser.add_argument('conversation_id') | |
args = parser.parse_args() | |
data = json.load(open(args.filename)) | |
states = data['conversations'] | |
for state in states: | |
conversation_state = state['conversation'] | |
if 'events' in state: | |
conversations = state['events'] | |
for conversation in conversations: | |
if 'chat_message' in conversation: | |
message_content = conversation['chat_message']['message_content'] | |
if 'segment' in message_content: | |
segment = message_content['segment'] | |
for line in segment: | |
conversation_id = conversation['conversation_id']['id'] | |
if conversation_id == args.conversation_id: | |
timestamp = time.localtime(int(conversation['timestamp']) / 1000000) | |
user_id = conversation['sender_id']['gaia_id'] | |
try: | |
uname = [p['fallback_name'] for p in conversation_state['conversation']['participant_data'] if p['id']['gaia_id'] == user_id][0] | |
except: | |
uname = user_id | |
if 'text' in line: | |
msg = line['text'] | |
elif 'type' in line and line['type'] == 'LINE_BREAK': | |
msg = '' | |
else: | |
msg = line | |
out = '{:<12}'.format(uname) + ' @ ' + time.asctime(timestamp) + ': ' + msg | |
print(out.encode('ascii', 'ignore')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment