Created
January 26, 2025 12:26
-
-
Save moinologics/1e68cdb2626a98178eee49df22183766 to your computer and use it in GitHub Desktop.
parse WhatsApp exported chat
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
datetime_pattern = r'\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2}\s?[ap]m' | |
name_pattern = r'[A-Za-z ]*' | |
message_pattern = r'.*' | |
pattern = re.compile(f'({datetime_pattern}) - ({name_pattern}): ({message_pattern})') | |
def get_chats(path): | |
with open(path, 'r') as f: | |
chat_lines = f.read().split('\n') | |
chats = [] | |
for line in chat_lines: | |
match = re.findall(pattern, line.replace('\u202f', ' ')) | |
if len(match): | |
chats.append(list(match[0])) | |
elif len(chats): | |
# attach line to previous chat message | |
prev_chat_content = chats[-1][-1] | |
chats[-1][-1] = prev_chat_content + '\n' + line | |
return chats | |
chats = get_chats('chat.txt') | |
for chat in chats: | |
print(chat) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment