Created
January 23, 2024 21:05
-
-
Save paulclip/c8cbb96a6d4aa2b8b16fe76f0484fe83 to your computer and use it in GitHub Desktop.
Python script to convert ChatGPT exports into markdown files, suitable for importing into Obsidian et al
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Convert ChatGPT conversations into markdown | |
# source: https://dev.to/gavi/convert-chatgpt-conversations-to-obsidian-markdown-format-p61 | |
# Improvements (tried to match original coding conventions) | |
# 2023.08.10: Fix bug accessing 'parts' in node['message']['content']['parts'] | |
# 2024.01.06: Updated get_conversation to handle non-string content parts, i.e. images (which are not currently included in the ChatGPT exports) | |
# 2024.01.20: Added link to conversation in ChatGTP using conversation_id, and also set time stamps correctly, esp. on macOS | |
import json | |
import os | |
import sys | |
import time | |
import subprocess | |
import platform | |
def get_conversation(node_id, mapping, list): | |
node = mapping[node_id] | |
if 'message' in node and node['message'] is not None: | |
if 'content' in node['message'] and node['message']['content'] is not None: | |
if 'parts' in node['message']['content'] and node['message']['content']['parts'] is not None: | |
content_parts = node['message']['content']['parts'] | |
if len(content_parts) > 0: | |
text_parts = [] | |
for part in content_parts: | |
if isinstance(part, str): | |
text_parts.append(part) | |
elif isinstance(part, dict) and 'asset_pointer' in part: | |
# Replace with a Markdown image placeholder | |
text_parts.append(f"") | |
# Add more conditions if there are other types of content | |
else: | |
print("Non-string content_parts found:", content_parts) | |
sys.exit(1) # Exit the script | |
author_role = node['message']['author']['role'] | |
list.append(f"## {author_role}\n {''.join(text_parts)}") | |
for child_id in node.get('children', []): | |
get_conversation(child_id, mapping, list) | |
def set_timestamps(file_path, create_time, update_time): | |
# Setting the creation time (macOS specific) | |
if platform.system() == 'Darwin': | |
subprocess.run(["SetFile", "-d", time.strftime("%m/%d/%Y %H:%M:%S", time.localtime(create_time)), file_path]) | |
# Updating the modification time | |
os.utime(file_path, (update_time, update_time)) | |
if __name__ == '__main__': | |
folder_path = "output" | |
# Check if the directory exists | |
if not os.path.isdir(folder_path): | |
# If not, create the directory | |
os.makedirs(folder_path) | |
with open('conversations.json') as f: | |
data = json.loads(f.read()) | |
for item in data: | |
if 'title' in item and item["title"] is not None: | |
title = item["title"] | |
else: | |
title = "New chat" | |
title = title.replace("/","_").replace('"','') | |
if title == "New chat": | |
title = "New chat " + str(int(item["create_time"])) | |
if 'conversation_id' in item and item["conversation_id"] is not None: | |
url = "https://chat.openai.com/c/" + item["conversation_id"] | |
else: | |
url = "None" | |
root_node_id = [node_id for node_id, node in item['mapping'].items() if node['parent'] is None][0] | |
list = [] | |
get_conversation(root_node_id, item['mapping'],list) | |
file_path = f'{folder_path}/{title}.md' | |
with open(file_path,'w') as outfile: | |
outfile.write(f'Source: {url}\n') | |
outfile.write('\n'.join(list)) | |
set_timestamps(file_path, item["create_time"], item["update_time"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment