Skip to content

Instantly share code, notes, and snippets.

@moinologics
Created January 26, 2025 12:26
Show Gist options
  • Save moinologics/1e68cdb2626a98178eee49df22183766 to your computer and use it in GitHub Desktop.
Save moinologics/1e68cdb2626a98178eee49df22183766 to your computer and use it in GitHub Desktop.
parse WhatsApp exported chat
import re
datetime_pattern = r'\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2}\s?[ap]m'
name_pattern = r'[A-Za-z ]*'
message_pattern = r'.*'
pattern = re.compile(f'({datetime_pattern}) - ({name_pattern}): ({message_pattern})')
def get_chats(path):
with open(path, 'r') as f:
chat_lines = f.read().split('\n')
chats = []
for line in chat_lines:
match = re.findall(pattern, line.replace('\u202f', ' '))
if len(match):
chats.append(list(match[0]))
elif len(chats):
# attach line to previous chat message
prev_chat_content = chats[-1][-1]
chats[-1][-1] = prev_chat_content + '\n' + line
return chats
chats = get_chats('chat.txt')
for chat in chats:
print(chat)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment