giladbarnea · September 1, 2025 13:07
diff --git a/export_chatgpt_chat_to_md.py b/export_chatgpt_chat_to_md.py
 #! /usr/bin/env python3.12
 from __future__ import annotations

 import argparse
 import json
 import random
 import re
 import sys
 from pathlib import Path
 from typing import Any, Literal, TypedDict


 class Node(TypedDict):
    id: str
    parent: str | None
    children: list[str]
    message: Message | None


 class Message(TypedDict):
    id: str
    author: Author
    content: UserEditableContext | TextContent
    metadata: dict | ReasoningMetadata | ExecutionOutputMetadata
    status: Literal["finished_successfully"]


 class Author(TypedDict):
    role: Literal["user", "assistant", "tool", "system"]
    name: Literal["python"] | None


 class UserEditableContext(TypedDict):
    content_type: Literal["user_editable_context"]
    user_profile: str
    user_instructions: str


 class TextContent(TypedDict):
    content_type: Literal["text"]
    parts: list[str]


 class ThoughtsContent(TypedDict):
    content_type: Literal["thoughts"]
    thoughts: list[Thought]
    source_analysis_msg_id: str


 class Thought(TypedDict):
    summary: str
    content: str


 class ReasoningMetadata(TypedDict):
    reasoning_status: Literal["is_reasoning", "reasoning_ended"]


 class CodeContent(TypedDict):
    content_type: Literal["code"]
    language: Literal["unknown"]
    text: str


 class ExecutionOutputContent(TypedDict):
    content_type: Literal["execution_output"]
    text: str


 class ExecutionOutputMetadata(TypedDict):
    aggregate_result: dict


 class ExecutionOutputAggregateResult(TypedDict):
    status: Literal["success"]
    run_id: str
    code: str
    final_expression_output: str


 # region ---[ Common Helpers ]---
 def _write_branch_to_file(
    branch: list[dict], filename: str, title: str, no_thoughts: bool = False
 ):
    with open(filename, "w") as f:
        f.write(f"# {title}\n" + "=" * len(title) + "\n\n")
        current_author = None
        in_thoughts = False
        skip_next = False
        for i, node in enumerate(branch):
            if skip_next:
                skip_next = False
                continue

            message = node.get("message")
            if not message:
                continue

            author = message.get("author", {}).get("role")
            if author == "system":
                continue  # Skip hidden system messages

            content = message.get("content", {})
            content_type = content.get("content_type")
            metadata = message.get("metadata", {})
            reasoning_status = metadata.get("reasoning_status")

            if author != current_author:
                if author == "user":
                    f.write("---\n\n# User\n\n")
                elif author == "assistant":
                    f.write("---\n\n# Assistant\n\n")
                current_author = author

            if author == "user":
                for part in content.get("parts", []):
                    match content_type:
                        case "text":
                            assert isinstance(part, str), (
                                f"Expected 'part' to be a str because content.content_type is 'text', got {type(part)}"
                            )
                            f.write(f"{part}\n\n")
                            continue

                    match part.get("content_type"):
                        case "real_time_user_audio_video_asset_pointer":
                            duration = float(
                                part.get("audio_asset_pointer", {})
                                .get("metadata", {})
                                .get("end")
                            ) - float(
                                part.get("audio_asset_pointer", {})
                                .get("metadata", {})
                                .get("start")
                            )
                            f.write(f"🎙️ ({duration:.1f}s)\n\n")
                        case "audio_transcription":
                            f.write(f"{part.get('text')}\n\n")
                        case _:
                            print(
                                f"⚠️ Unknown user part type: {part.get('content_type')}"
                            )
            elif author == "assistant":
                if no_thoughts and reasoning_status == "is_reasoning":
                    continue
                if (
                    reasoning_status == "is_reasoning"
                    and content_type == "thoughts"
                    and not in_thoughts
                ):
                    f.write("<thoughts>\n")
                    in_thoughts = True

                if content_type == "thoughts":
                    for thought in content.get("thoughts", []):
                        f.write(
                            f"*   **{thought.get('summary')}**: {thought.get('content')}\n"
                        )
                    f.write("\n")
                elif content_type == "code":
                    f.write(f"```python\n{content.get('text', '')}\n```\n")
                    if i + 1 < len(branch):
                        next_node = branch[i + 1]
                        next_message = next_node.get("message", {})
                        next_author = next_message.get("author", {}).get("role")
                        next_content = next_message.get("content", {})
                        if (
                            next_author == "tool"
                            and next_content.get("content_type") == "execution_output"
                        ):
                            f.write(f"{next_content.get('text', '')}\n\n\n")
                            skip_next = True
                elif content_type == "text":
                    parts = content.get("parts", [])
                    if parts:
                        f.write(f"{parts[0]}\n\n")
                elif content_type == "reasoning_recap":
                    f.write(f"{content.get('content')}\n\n")
                elif content_type == "multimodal_text":
                    for part in content.get("parts", []):
                        match part.get("content_type"):
                            case "real_time_user_audio_video_asset_pointer":
                                duration = float(
                                    part.get("audio_asset_pointer", {})
                                    .get("metadata", {})
                                    .get("end")
                                ) - float(
                                    part.get("audio_asset_pointer", {})
                                    .get("metadata", {})
                                    .get("start")
                                )
                                f.write(f"🎙️ ({duration:.1f}s)\n\n")
                            case "audio_transcription":
                                f.write(f"{part.get('text')}\n\n")
                            case "audio_asset_pointer":
                                pass
                            case _:
                                print(
                                    f"⚠️ Unknown multimodal_text part type: {part.get('content_type')}"
                                )
                else:
                    non_empty_keys = {
                        k for k, v in content.items() if v and k != "content_type"
                    }
                    if non_empty_keys:
                        print(
                            f"⚠️ Unknown content type: {content_type}. Non-empty keys: {non_empty_keys}"
                        )

                if in_thoughts and (
                    reasoning_status == "reasoning_ended" or content_type != "thoughts"
                ):
                    f.write("</thoughts>\n\n")
                    in_thoughts = False
            else:
                print(f"⚠️ Unknown author: {author}")

    print(f"Wrote {len(branch)} nodes to {filename}")


 def _traverse_branches(
    node_id: str, data: dict, path: list[dict], all_branches: list[list[dict]]
 ) -> None:
    """Populates all_branches with all message lists in place."""
    path.append(data["mapping"][node_id])

    node = data["mapping"][node_id]
    children: list = node.get("children", [])

    if not children:
        all_branches.append(list(path))
    else:
        for child_id in children:
            _traverse_branches(child_id, data, path, all_branches)

    path.pop()


 def _collect_path_to_root(data: dict, current_node_id: str) -> list[dict]:
    path: list[dict] = []
    # Climb up the tree to the root node and collect the nodes in the path.
    while current_node_id:
        node = data["mapping"].get(current_node_id)
        if not node:
            break
        path.append(node)
        current_node_id = node.get("parent")
    path.reverse()
    return path


 def _replace_uuids(input_path: Path | str) -> str:
    WORDS = [
        "Abscond",
        "Absurdist",
        "Adventure",
        "Alacrity",
        "Algorithm",
        "Allegory",
        "Altruism",
        "Ambivalent",
        "Ameliorate",
        "Amethyst",
        "Anthropological",
        "Archeological",
        "Artificial",
        "Astronomical",
        "Auburn",
        "Augmented",
        "Baleen",
        "Ballad",
        "Ballet",
        "Baroque",
        "Benevolent",
        "Bicycle",
        "Bilk",
        "Biological",
        "Breeze",
        "Bubble",
        "Burlesque",
        "Cacophony",
        "Cadence",
        "Cajole",
        "Capricious",
        "Carousel",
        "Cascade",
        "Catalyst",
        "Cavalier",
        "Chastise",
        "Chiaroscuro",
        "Chocolate",
        "Chrysalis",
        "Classicism",
        "Cobalt",
        "Colonial",
        "Comedy",
        "Concerto",
        "Conundrum",
        "Convivial",
        "Copious",
        "Cosmological",
        "Courage",
        "Crimson",
        "Cubism",
        "Curiosity",
        "Dadaism",
        "Dazzle",
        "Deleterious",
        "Delineate",
        "Dewdrop",
        "Digital",
        "Discombobulate",
        "Dolphin",
        "Dragonfly",
        "Drama",
        "Drift",
        "Ebullient",
        "Echo",
        "Ecological",
        "Effusive",
        "Egalitarian",
        "Egotistical",
        "Egregious",
        "Elegy",
        "Elephant",
        "Ember",
        "Emerald",
        "Enigma",
        "Enlightenment",
        "Environmental",
        "Ephemeral",
        "Epic",
        "Epicurean",
        "Epiphany",
        "Euphemism",
        "Existential",
        "Expressionism",
        "Fable",
        "Facetious",
        "Farce",
        "Fathom",
        "Firefly",
        "Flicker",
        "Flourish",
        "Flummox",
        "Folklore",
        "Fossil",
        "Frivolous",
        "Futurism",
        "Garnet",
        "Garrulous",
        "Geological",
        "Giggle",
        "Glimmer",
        "Glimpse",
        "Gossamer",
        "Gothic",
        "Grandiloquent",
        "Gregarious",
        "Gusto",
        "Hackneyed",
        "Haiku",
        "Halcyon",
        "Hapless",
        "Harangue",
        "Harmony",
        "Hedonistic",
        "Horizon",
        "Humming",
        "Hush",
        "Hyperbole",
        "Iconoclast",
        "Idiom",
        "Idiosyncrasy",
        "Imbibe",
        "Impecunious",
        "Impressionism",
        "Incandescent",
        "Indigo",
        "Industrial",
        "Ineffable",
        "Innovation",
        "Insidious",
        "Integrity",
        "Irony",
        "Ivory",
        "Jasmine",
        "Jigsaw",
        "Jocular",
        "Jocund",
        "Jubilant",
        "Jubilation",
        "Jubilee",
        "Juxtapose",
        "Juxtaposition",
        "Kaleidoscope",
        "Keen",
        "Kintsugi",
        "Kite",
        "Kiwi",
        "Knoll",
        "Labyrinth",
        "Lackadaisical",
        "Laconic",
        "Lark",
        "Lavender",
        "Legend",
        "Lighthouse",
        "Limerick",
        "Liminal",
        "Lissom",
        "Lugubrious",
        "Lullaby",
        "Luminous",
        "Majestic",
        "Malevolent",
        "Malign",
        "Masticate",
        "Maximalism",
        "Meadow",
        "Melancholy",
        "Mellifluous",
        "Melodrama",
        "Metaphor",
        "Metaphysical",
        "Minimalism",
        "Mirage",
        "Mitigate",
        "Modernism",
        "Moonglade",
        "Mountain",
        "Mystery",
        "Myth",
        "Nary",
        "Natural",
        "Nebula",
        "Nectar",
        "Nefarious",
        "Nihilism",
        "Nimbus",
        "Noxious",
        "Nymph",
        "Obfuscate",
        "Obsequious",
        "Ode",
        "Onerous",
        "Onomatopoeia",
        "Opal",
        "Opaline",
        "Opera",
        "Orchid",
        "Organic",
        "Ostentatious",
        "Oxymoron",
        "Paradigm",
        "Paradox",
        "Paranormal",
        "Parody",
        "Parsimonious",
        "Pastiche",
        "Paucity",
        "Pebble",
        "Perfunctory",
        "Pernicious",
        "Petrichor",
        "Philosophical",
        "Pillow",
        "Plethora",
        "Ponder",
        "Poppy",
        "Postmodernism",
        "Prism",
        "Proverb",
        "Psychological",
        "Quagmire",
        "Quaint",
        "Quantize",
        "Quantum",
        "Quasar",
        "Querulous",
        "Quibble",
        "Quill",
        "Quixotic",
        "Radiant",
        "Rainbow",
        "Rancor",
        "Recalcitrant",
        "Renaissance",
        "Repudiate",
        "Resilience",
        "Rhapsody",
        "Ripple",
        "Rococo",
        "Romanticism",
        "Rustic",
        "Sagacious",
        "Salient",
        "Sapphire",
        "Sarcasm",
        "Sardonic",
        "Satire",
        "Serendipity",
        "Serene",
        "Simile",
        "Sociological",
        "Solipsism",
        "Solstice",
        "Sonnet",
        "Sparkle",
        "Starlight",
        "Stoic",
        "Stymie",
        "Sunshine",
        "Supernatural",
        "Surrealism",
        "Sway",
        "Sycophant",
        "Symphony",
        "Synthetic",
        "Taciturn",
        "Tapestry",
        "Tautology",
        "Tender",
        "Theological",
        "Toady",
        "Tragedy",
        "Tranquil",
        "Transcendent",
        "Trepidation",
        "Twilight",
        "Ubiquitous",
        "Umbrella",
        "Unctuous",
        "Utopia",
        "Velvet",
        "Vexatious",
        "Vicarious",
        "Vicissitude",
        "Victorian",
        "Virtual",
        "Vivid",
        "Vortex",
        "Wander",
        "Wanderlust",
        "Wanton",
        "Watermelon",
        "Whisker",
        "Whisper",
        "Willow",
        "Wily",
        "Xenodochial",
        "Xenon",
        "Xenophobia",
        "Xylophone",
        "Yacht",
        "Yawn",
        "Yearn",
        "Yield",
        "Zealous",
        "Zenith",
        "Zephyr",
        "Zest",
        "Zigzag",
        "Zinnia",
    ]
    try:
        input_content = Path(input_path).read_text()
    except FileNotFoundError:
        json.loads(input_path)  # If it's JSON parsable, then its a raw data string
        input_content = input_path

    uuid_re = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")
    all_uuids_in_input = re.findall(uuid_re, input_content)
    unique_uuids = list(set(all_uuids_in_input))

    # Ensure there are enough unique words for all the unique UUIDs
    if len(WORDS) < len(unique_uuids):
        error = (
            f"Error: Not enough unique words in '{WORDS}'. "
            f"Need {len(unique_uuids)}, but only {len(WORDS)} words are available."
        )
        raise IndexError(error)

    # Assign a unique random word to each UUID by shuffling the word list
    # and creating a mapping dictionary for efficient lookups.
    random.shuffle(WORDS)
    uuid_to_word_map = dict(zip(unique_uuids, WORDS))

    # For efficiency, build a single regex that matches any of the UUIDs.
    # re.escape is used to safely handle any special regex characters in UUIDs.
    pattern = re.compile("|".join(re.escape(uuid) for uuid in uuid_to_word_map))

    # Perform the replacement in a single pass. The lambda function looks up
    # the matched UUID and returns its corresponding word.
    modified_content = pattern.sub(
        lambda match: uuid_to_word_map[match.group(0)], input_content
    )

    return modified_content


 # region ---[ Convert ]---


 def convert(
    json_file: Path | str,
    output_markdown_file: str,
    convert_each_branch: bool,
    replace_uuids: bool,
    no_thoughts: bool = False,
 ) -> None:
    json_file = Path(json_file)
    if replace_uuids:
        data = _replace_uuids(json_file)
    else:
        data = json_file.read_text()

    data = json.loads(data)
    if convert_each_branch:
        root_node_id = None
        for node_id, node in data["mapping"].items():
            if node.get("parent") is None:
                root_node_id = node_id
                break

        if root_node_id:
            all_branches = []
            _traverse_branches(root_node_id, data, [], all_branches)

            for i, branch in enumerate(all_branches):
                filename = (
                    f"{output_markdown_file.replace('.md', '')}_branch_{i + 1}.md"
                )
                _write_branch_to_file(
                    branch,
                    filename,
                    data.get("title", "Conversation"),
                    no_thoughts=no_thoughts,
                )
    else:
        _convert_conversation_to_markdown(
            json_file,
            output_markdown_file,
            no_thoughts=no_thoughts,
        )


 def _convert_conversation_to_markdown(
    json_file: str, markdown_file: str, no_thoughts: bool = False
 ) -> None:
    with open(json_file, "r") as f:
        data = json.load(f)

    # current_node is the bottom-most node in the conversation.
    current_node_id = data.get("current_node")

    # Climb up the tree to the root node and collect the nodes in the path.
    path = _collect_path_to_root(data, current_node_id)

    _write_branch_to_file(
        path, markdown_file, data.get("title", "Conversation"), no_thoughts=no_thoughts
    )


 # region ---[ Pick Conversation ]---


 def pick_conversation_tree(
    json_file: Path | str,
    output_markdown_file: str,
    message_id_in_conversation: str,
    convert_each_branch: bool,
    replace_uuids: bool,
    no_thoughts: bool = False,
 ) -> None:
    json_file = Path(json_file)
    if replace_uuids:
        data: str = _replace_uuids(json_file)
    else:
        data: str = json_file.read_text()

    data: list[dict] = json.loads(data)
    conversations_with_target_message = [
        conversation
        for conversation in data
        if message_id_in_conversation in str(conversation)
    ]
    trees: list[list[dict]] = []
    for conversation in conversations_with_target_message:
        tree: list[dict] = _collect_path_to_root(
            conversation, message_id_in_conversation
        )
        trees.append(tree)
    # Best case scenario: only one tree
    assert len(trees) == 1, f"Don't know how to handle multiple trees: {trees}"
    tree: list[dict] = trees[0]
    parent_node: dict = tree[1] if tree[0]['id'] == 'client-created-root' else tree[0]
    conversation: dict = next(
        conv
        for conv in data
        if conv["current_node"] == parent_node["id"]
        or conv["conversation_id"] == parent_node["id"]
        or conv["id"] == parent_node["id"]
    )

    _write_branch_to_file(
        tree,
        output_markdown_file,
        conversation.get("title", "Conversation"),
        no_thoughts=no_thoughts,
    )


 # region ---[ Pick ]---
 def pick_branch(
    json_file: Path | str,
    output_file: str,
    message_id_in_branch: str | None = None,
    replace_uuids: bool = False,
    no_thoughts: bool = False,
 ) -> None:
    json_file = Path(json_file)
    if replace_uuids:
        data = _replace_uuids(json_file)
    else:
        data = json_file.read_text()

    data = json.loads(data)
    title = data.get("title", "Conversation")
    if message_id_in_branch is None:
        # Pluck main branch from current_node
        current_node_id = data.get("current_node")
        path = _collect_path_to_root(data, current_node_id)
        # No need to collect children, because current_node is the bottom-most node.
        _write_branch_to_file(path, output_file, title, no_thoughts=no_thoughts)
        return

    # Generate all branches and select the specified one
    up_to_root = _collect_path_to_root(data, message_id_in_branch)
    children = []
    _traverse_branches(message_id_in_branch, data, [], children)
    down_to_bottom = children[0]
    assert down_to_bottom[0].get("id") == up_to_root[-1].get("id")
    entire_branch = [*up_to_root, *down_to_bottom[1:]]
    _write_branch_to_file(entire_branch, output_file, title, no_thoughts=no_thoughts)


 # region ---[ CLI ]---
 def main_cli():
    CONVERT_COMMAND = "convert"
    PICK_BRANCH_COMMAND = "pick"
    PICK_CONVERSATION_COMMAND = "pick-conversation"
    """Only relevant when the input is a *list* of conversation objects."""
    if not any(
        arg in [CONVERT_COMMAND, PICK_BRANCH_COMMAND, PICK_CONVERSATION_COMMAND]
        for arg in sys.argv[1:]
    ):
        sys.argv.insert(1, CONVERT_COMMAND)

    parser = argparse.ArgumentParser(
        description="Convert conversation JSON to Markdown or pick node."
    )

    subparsers = parser.add_subparsers(dest="command", required=True)

    # Default (convert) subparser
    convert_parser = subparsers.add_parser(
        CONVERT_COMMAND, help="Convert JSON to Markdown (default)"
    )
    convert_parser.add_argument("json_file", help="The input JSON file.")
    convert_parser.add_argument(
        "output_markdown_file", help="The output Markdown file."
    )
    convert_parser.add_argument(
        "-b",
        "--each-branch",
        action="store_true",
        help="Export each conversation branch to an individual file.",
    )

    # Pick subparser
    pick_branch_parser = subparsers.add_parser(
        PICK_BRANCH_COMMAND, help="Pick a branch to Markdown file."
    )
    pick_branch_parser.add_argument("json_file", help="The input JSON file.")
    pick_branch_parser.add_argument(
        "output_markdown_file", help="The output Markdown file."
    )
    pick_branch_parser.add_argument(
        "-m",
        "--message-id",
        type=str,
        default=None,
        help="Optional message ID which the target branch contains.",
    )
    pick_conversation_parser = subparsers.add_parser(
        PICK_CONVERSATION_COMMAND,
        help="Pick a conversation tree from a list of conversation objects to Markdown file.",
    )
    pick_conversation_parser.add_argument("json_file", help="The input JSON file.")
    pick_conversation_parser.add_argument(
        "output_markdown_file", help="The output Markdown file."
    )
    pick_conversation_parser.add_argument(
        "-m",
        "--message-id",
        type=str,
        help="Required message ID which the target conversation tree contains.",
    )
    pick_conversation_parser.add_argument(
        "-b",
        "--each-branch",
        action="store_true",
        help="Export each conversation branch to an individual file.",
    )

    for subparser in subparsers.choices.values():
        subparser.add_argument(
            "--replace-uuids",
            action="store_true",
            help="Replace UUIDs in the output with random words.",
        )
        subparser.add_argument(
            "--no-thoughts",
            action="store_true",
            help="Exclude thoughts in the output.",
        )

    args = parser.parse_args()
    replace_uuids: bool = args.replace_uuids
    no_thoughts: bool = args.no_thoughts
    json_file = args.json_file
    output_markdown_file = args.output_markdown_file

    if args.command == CONVERT_COMMAND:
        convert_each_branch = args.each_branch

        convert(
            json_file,
            output_markdown_file,
            convert_each_branch,
            replace_uuids,
            no_thoughts=no_thoughts,
        )
    elif args.command == PICK_BRANCH_COMMAND:
        pick_branch(
            json_file,
            output_markdown_file,
            args.message_id,
            replace_uuids=replace_uuids,
            no_thoughts=no_thoughts,
        )
    elif args.command == PICK_CONVERSATION_COMMAND:
        convert_each_branch = args.each_branch
        pick_conversation_tree(
            json_file,
            output_markdown_file,
            args.message_id,
            convert_each_branch,
            replace_uuids=replace_uuids,
            no_thoughts=no_thoughts,
        )
    else:
        raise ValueError(f"Invalid command: {args.command}")


 if __name__ == "__main__":
    main_cli()
	#! /usr/bin/env python3.12
	from __future__ import annotations

	import argparse
	import json
	import random
	import re
	import sys
	from pathlib import Path
	from typing import Any, Literal, TypedDict


	class Node(TypedDict):
	id: str
	parent: str \| None
	children: list[str]
	message: Message \| None


	class Message(TypedDict):
	id: str
	author: Author
	content: UserEditableContext \| TextContent
	metadata: dict \| ReasoningMetadata \| ExecutionOutputMetadata
	status: Literal["finished_successfully"]


	class Author(TypedDict):
	role: Literal["user", "assistant", "tool", "system"]
	name: Literal["python"] \| None


	class UserEditableContext(TypedDict):
	content_type: Literal["user_editable_context"]
	user_profile: str
	user_instructions: str


	class TextContent(TypedDict):
	content_type: Literal["text"]
	parts: list[str]


	class ThoughtsContent(TypedDict):
	content_type: Literal["thoughts"]
	thoughts: list[Thought]
	source_analysis_msg_id: str


	class Thought(TypedDict):
	summary: str
	content: str


	class ReasoningMetadata(TypedDict):
	reasoning_status: Literal["is_reasoning", "reasoning_ended"]


	class CodeContent(TypedDict):
	content_type: Literal["code"]
	language: Literal["unknown"]
	text: str


	class ExecutionOutputContent(TypedDict):
	content_type: Literal["execution_output"]
	text: str


	class ExecutionOutputMetadata(TypedDict):
	aggregate_result: dict


	class ExecutionOutputAggregateResult(TypedDict):
	status: Literal["success"]
	run_id: str
	code: str
	final_expression_output: str


	# region ---[ Common Helpers ]---
	def _write_branch_to_file(
	branch: list[dict], filename: str, title: str, no_thoughts: bool = False
	):
	with open(filename, "w") as f:
	f.write(f"# {title}\n" + "=" * len(title) + "\n\n")
	current_author = None
	in_thoughts = False
	skip_next = False
	for i, node in enumerate(branch):
	if skip_next:
	skip_next = False
	continue

	message = node.get("message")
	if not message:
	continue

	author = message.get("author", {}).get("role")
	if author == "system":
	continue # Skip hidden system messages

	content = message.get("content", {})
	content_type = content.get("content_type")
	metadata = message.get("metadata", {})
	reasoning_status = metadata.get("reasoning_status")

	if author != current_author:
	if author == "user":
	f.write("---\n\n# User\n\n")
	elif author == "assistant":
	f.write("---\n\n# Assistant\n\n")
	current_author = author

	if author == "user":
	for part in content.get("parts", []):
	match content_type:
	case "text":
	assert isinstance(part, str), (
	f"Expected 'part' to be a str because content.content_type is 'text', got {type(part)}"
	)
	f.write(f"{part}\n\n")
	continue

	match part.get("content_type"):
	case "real_time_user_audio_video_asset_pointer":
	duration = float(
	part.get("audio_asset_pointer", {})
	.get("metadata", {})
	.get("end")
	) - float(
	part.get("audio_asset_pointer", {})
	.get("metadata", {})
	.get("start")
	)
	f.write(f"🎙️ ({duration:.1f}s)\n\n")
	case "audio_transcription":
	f.write(f"{part.get('text')}\n\n")
	case _:
	print(
	f"⚠️ Unknown user part type: {part.get('content_type')}"
	)
	elif author == "assistant":
	if no_thoughts and reasoning_status == "is_reasoning":
	continue
	if (
	reasoning_status == "is_reasoning"
	and content_type == "thoughts"
	and not in_thoughts
	):
	f.write("<thoughts>\n")
	in_thoughts = True

	if content_type == "thoughts":
	for thought in content.get("thoughts", []):
	f.write(
	f"* {thought.get('summary')}: {thought.get('content')}\n"
	)
	f.write("\n")
	elif content_type == "code":
	f.write(f"```python\n{content.get('text', '')}\n```\n")
	if i + 1 < len(branch):
	next_node = branch[i + 1]
	next_message = next_node.get("message", {})
	next_author = next_message.get("author", {}).get("role")
	next_content = next_message.get("content", {})
	if (
	next_author == "tool"
	and next_content.get("content_type") == "execution_output"
	):
	f.write(f"{next_content.get('text', '')}\n\n\n")
	skip_next = True
	elif content_type == "text":
	parts = content.get("parts", [])
	if parts:
	f.write(f"{parts[0]}\n\n")
	elif content_type == "reasoning_recap":
	f.write(f"{content.get('content')}\n\n")
	elif content_type == "multimodal_text":
	for part in content.get("parts", []):
	match part.get("content_type"):
	case "real_time_user_audio_video_asset_pointer":
	duration = float(
	part.get("audio_asset_pointer", {})
	.get("metadata", {})
	.get("end")
	) - float(
	part.get("audio_asset_pointer", {})
	.get("metadata", {})
	.get("start")
	)
	f.write(f"🎙️ ({duration:.1f}s)\n\n")
	case "audio_transcription":
	f.write(f"{part.get('text')}\n\n")
	case "audio_asset_pointer":
	pass
	case _:
	print(
	f"⚠️ Unknown multimodal_text part type: {part.get('content_type')}"
	)
	else:
	non_empty_keys = {
	k for k, v in content.items() if v and k != "content_type"
	}
	if non_empty_keys:
	print(
	f"⚠️ Unknown content type: {content_type}. Non-empty keys: {non_empty_keys}"
	)

	if in_thoughts and (
	reasoning_status == "reasoning_ended" or content_type != "thoughts"
	):
	f.write("</thoughts>\n\n")
	in_thoughts = False
	else:
	print(f"⚠️ Unknown author: {author}")

	print(f"Wrote {len(branch)} nodes to {filename}")


	def _traverse_branches(
	node_id: str, data: dict, path: list[dict], all_branches: list[list[dict]]
	) -> None:
	"""Populates all_branches with all message lists in place."""
	path.append(data["mapping"][node_id])

	node = data["mapping"][node_id]
	children: list = node.get("children", [])

	if not children:
	all_branches.append(list(path))
	else:
	for child_id in children:
	_traverse_branches(child_id, data, path, all_branches)

	path.pop()


	def _collect_path_to_root(data: dict, current_node_id: str) -> list[dict]:
	path: list[dict] = []
	# Climb up the tree to the root node and collect the nodes in the path.
	while current_node_id:
	node = data["mapping"].get(current_node_id)
	if not node:
	break
	path.append(node)
	current_node_id = node.get("parent")
	path.reverse()
	return path


	def _replace_uuids(input_path: Path \| str) -> str:
	WORDS = [
	"Abscond",
	"Absurdist",
	"Adventure",
	"Alacrity",
	"Algorithm",
	"Allegory",
	"Altruism",
	"Ambivalent",
	"Ameliorate",
	"Amethyst",
	"Anthropological",
	"Archeological",
	"Artificial",
	"Astronomical",
	"Auburn",
	"Augmented",
	"Baleen",
	"Ballad",
	"Ballet",
	"Baroque",
	"Benevolent",
	"Bicycle",
	"Bilk",
	"Biological",
	"Breeze",
	"Bubble",
	"Burlesque",
	"Cacophony",
	"Cadence",
	"Cajole",
	"Capricious",
	"Carousel",
	"Cascade",
	"Catalyst",
	"Cavalier",
	"Chastise",
	"Chiaroscuro",
	"Chocolate",
	"Chrysalis",
	"Classicism",
	"Cobalt",
	"Colonial",
	"Comedy",
	"Concerto",
	"Conundrum",
	"Convivial",
	"Copious",
	"Cosmological",
	"Courage",
	"Crimson",
	"Cubism",
	"Curiosity",
	"Dadaism",
	"Dazzle",
	"Deleterious",
	"Delineate",
	"Dewdrop",
	"Digital",
	"Discombobulate",
	"Dolphin",
	"Dragonfly",
	"Drama",
	"Drift",
	"Ebullient",
	"Echo",
	"Ecological",
	"Effusive",
	"Egalitarian",
	"Egotistical",
	"Egregious",
	"Elegy",
	"Elephant",
	"Ember",
	"Emerald",
	"Enigma",
	"Enlightenment",
	"Environmental",
	"Ephemeral",
	"Epic",
	"Epicurean",
	"Epiphany",
	"Euphemism",
	"Existential",
	"Expressionism",
	"Fable",
	"Facetious",
	"Farce",
	"Fathom",
	"Firefly",
	"Flicker",
	"Flourish",
	"Flummox",
	"Folklore",
	"Fossil",
	"Frivolous",
	"Futurism",
	"Garnet",
	"Garrulous",
	"Geological",
	"Giggle",
	"Glimmer",
	"Glimpse",
	"Gossamer",
	"Gothic",
	"Grandiloquent",
	"Gregarious",
	"Gusto",
	"Hackneyed",
	"Haiku",
	"Halcyon",
	"Hapless",
	"Harangue",
	"Harmony",
	"Hedonistic",
	"Horizon",
	"Humming",
	"Hush",
	"Hyperbole",
	"Iconoclast",
	"Idiom",
	"Idiosyncrasy",
	"Imbibe",
	"Impecunious",
	"Impressionism",
	"Incandescent",
	"Indigo",
	"Industrial",
	"Ineffable",
	"Innovation",
	"Insidious",
	"Integrity",
	"Irony",
	"Ivory",
	"Jasmine",
	"Jigsaw",
	"Jocular",
	"Jocund",
	"Jubilant",
	"Jubilation",
	"Jubilee",
	"Juxtapose",
	"Juxtaposition",
	"Kaleidoscope",
	"Keen",
	"Kintsugi",
	"Kite",
	"Kiwi",
	"Knoll",
	"Labyrinth",
	"Lackadaisical",
	"Laconic",
	"Lark",
	"Lavender",
	"Legend",
	"Lighthouse",
	"Limerick",
	"Liminal",
	"Lissom",
	"Lugubrious",
	"Lullaby",
	"Luminous",
	"Majestic",
	"Malevolent",
	"Malign",
	"Masticate",
	"Maximalism",
	"Meadow",
	"Melancholy",
	"Mellifluous",
	"Melodrama",
	"Metaphor",
	"Metaphysical",
	"Minimalism",
	"Mirage",
	"Mitigate",
	"Modernism",
	"Moonglade",
	"Mountain",
	"Mystery",
	"Myth",
	"Nary",
	"Natural",
	"Nebula",
	"Nectar",
	"Nefarious",
	"Nihilism",
	"Nimbus",
	"Noxious",
	"Nymph",
	"Obfuscate",
	"Obsequious",
	"Ode",
	"Onerous",
	"Onomatopoeia",
	"Opal",
	"Opaline",
	"Opera",
	"Orchid",
	"Organic",
	"Ostentatious",
	"Oxymoron",
	"Paradigm",
	"Paradox",
	"Paranormal",
	"Parody",
	"Parsimonious",
	"Pastiche",
	"Paucity",
	"Pebble",
	"Perfunctory",
	"Pernicious",
	"Petrichor",
	"Philosophical",
	"Pillow",
	"Plethora",
	"Ponder",
	"Poppy",
	"Postmodernism",
	"Prism",
	"Proverb",
	"Psychological",
	"Quagmire",
	"Quaint",
	"Quantize",
	"Quantum",
	"Quasar",
	"Querulous",
	"Quibble",
	"Quill",
	"Quixotic",
	"Radiant",
	"Rainbow",
	"Rancor",
	"Recalcitrant",
	"Renaissance",
	"Repudiate",
	"Resilience",
	"Rhapsody",
	"Ripple",
	"Rococo",
	"Romanticism",
	"Rustic",
	"Sagacious",
	"Salient",
	"Sapphire",
	"Sarcasm",
	"Sardonic",
	"Satire",
	"Serendipity",
	"Serene",
	"Simile",
	"Sociological",
	"Solipsism",
	"Solstice",
	"Sonnet",
	"Sparkle",
	"Starlight",
	"Stoic",
	"Stymie",
	"Sunshine",
	"Supernatural",
	"Surrealism",
	"Sway",
	"Sycophant",
	"Symphony",
	"Synthetic",
	"Taciturn",
	"Tapestry",
	"Tautology",
	"Tender",
	"Theological",
	"Toady",
	"Tragedy",
	"Tranquil",
	"Transcendent",
	"Trepidation",
	"Twilight",
	"Ubiquitous",
	"Umbrella",
	"Unctuous",
	"Utopia",
	"Velvet",
	"Vexatious",
	"Vicarious",
	"Vicissitude",
	"Victorian",
	"Virtual",
	"Vivid",
	"Vortex",
	"Wander",
	"Wanderlust",
	"Wanton",
	"Watermelon",
	"Whisker",
	"Whisper",
	"Willow",
	"Wily",
	"Xenodochial",
	"Xenon",
	"Xenophobia",
	"Xylophone",
	"Yacht",
	"Yawn",
	"Yearn",
	"Yield",
	"Zealous",
	"Zenith",
	"Zephyr",
	"Zest",
	"Zigzag",
	"Zinnia",
	]
	try:
	input_content = Path(input_path).read_text()
	except FileNotFoundError:
	json.loads(input_path) # If it's JSON parsable, then its a raw data string
	input_content = input_path

	uuid_re = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")
	all_uuids_in_input = re.findall(uuid_re, input_content)
	unique_uuids = list(set(all_uuids_in_input))

	# Ensure there are enough unique words for all the unique UUIDs
	if len(WORDS) < len(unique_uuids):
	error = (
	f"Error: Not enough unique words in '{WORDS}'. "
	f"Need {len(unique_uuids)}, but only {len(WORDS)} words are available."
	)
	raise IndexError(error)

	# Assign a unique random word to each UUID by shuffling the word list
	# and creating a mapping dictionary for efficient lookups.
	random.shuffle(WORDS)
	uuid_to_word_map = dict(zip(unique_uuids, WORDS))

	# For efficiency, build a single regex that matches any of the UUIDs.
	# re.escape is used to safely handle any special regex characters in UUIDs.
	pattern = re.compile("\|".join(re.escape(uuid) for uuid in uuid_to_word_map))

	# Perform the replacement in a single pass. The lambda function looks up
	# the matched UUID and returns its corresponding word.
	modified_content = pattern.sub(
	lambda match: uuid_to_word_map[match.group(0)], input_content
	)

	return modified_content


	# region ---[ Convert ]---


	def convert(
	json_file: Path \| str,
	output_markdown_file: str,
	convert_each_branch: bool,
	replace_uuids: bool,
	no_thoughts: bool = False,
	) -> None:
	json_file = Path(json_file)
	if replace_uuids:
	data = _replace_uuids(json_file)
	else:
	data = json_file.read_text()

	data = json.loads(data)
	if convert_each_branch:
	root_node_id = None
	for node_id, node in data["mapping"].items():
	if node.get("parent") is None:
	root_node_id = node_id
	break

	if root_node_id:
	all_branches = []
	_traverse_branches(root_node_id, data, [], all_branches)

	for i, branch in enumerate(all_branches):
	filename = (
	f"{output_markdown_file.replace('.md', '')}_branch_{i + 1}.md"
	)
	_write_branch_to_file(
	branch,
	filename,
	data.get("title", "Conversation"),
	no_thoughts=no_thoughts,
	)
	else:
	_convert_conversation_to_markdown(
	json_file,
	output_markdown_file,
	no_thoughts=no_thoughts,
	)


	def _convert_conversation_to_markdown(
	json_file: str, markdown_file: str, no_thoughts: bool = False
	) -> None:
	with open(json_file, "r") as f:
	data = json.load(f)

	# current_node is the bottom-most node in the conversation.
	current_node_id = data.get("current_node")

	# Climb up the tree to the root node and collect the nodes in the path.
	path = _collect_path_to_root(data, current_node_id)

	_write_branch_to_file(
	path, markdown_file, data.get("title", "Conversation"), no_thoughts=no_thoughts
	)


	# region ---[ Pick Conversation ]---


	def pick_conversation_tree(
	json_file: Path \| str,
	output_markdown_file: str,
	message_id_in_conversation: str,
	convert_each_branch: bool,
	replace_uuids: bool,
	no_thoughts: bool = False,
	) -> None:
	json_file = Path(json_file)
	if replace_uuids:
	data: str = _replace_uuids(json_file)
	else:
	data: str = json_file.read_text()

	data: list[dict] = json.loads(data)
	conversations_with_target_message = [
	conversation
	for conversation in data
	if message_id_in_conversation in str(conversation)
	]
	trees: list[list[dict]] = []
	for conversation in conversations_with_target_message:
	tree: list[dict] = _collect_path_to_root(
	conversation, message_id_in_conversation
	)
	trees.append(tree)
	# Best case scenario: only one tree
	assert len(trees) == 1, f"Don't know how to handle multiple trees: {trees}"
	tree: list[dict] = trees[0]
	parent_node: dict = tree[1] if tree[0]['id'] == 'client-created-root' else tree[0]
	conversation: dict = next(
	conv
	for conv in data
	if conv["current_node"] == parent_node["id"]
	or conv["conversation_id"] == parent_node["id"]
	or conv["id"] == parent_node["id"]
	)

	_write_branch_to_file(
	tree,
	output_markdown_file,
	conversation.get("title", "Conversation"),
	no_thoughts=no_thoughts,
	)


	# region ---[ Pick ]---
	def pick_branch(
	json_file: Path \| str,
	output_file: str,
	message_id_in_branch: str \| None = None,
	replace_uuids: bool = False,
	no_thoughts: bool = False,
	) -> None:
	json_file = Path(json_file)
	if replace_uuids:
	data = _replace_uuids(json_file)
	else:
	data = json_file.read_text()

	data = json.loads(data)
	title = data.get("title", "Conversation")
	if message_id_in_branch is None:
	# Pluck main branch from current_node
	current_node_id = data.get("current_node")
	path = _collect_path_to_root(data, current_node_id)
	# No need to collect children, because current_node is the bottom-most node.
	_write_branch_to_file(path, output_file, title, no_thoughts=no_thoughts)
	return

	# Generate all branches and select the specified one
	up_to_root = _collect_path_to_root(data, message_id_in_branch)
	children = []
	_traverse_branches(message_id_in_branch, data, [], children)
	down_to_bottom = children[0]
	assert down_to_bottom[0].get("id") == up_to_root[-1].get("id")
	entire_branch = [up_to_root, down_to_bottom[1:]]
	_write_branch_to_file(entire_branch, output_file, title, no_thoughts=no_thoughts)


	# region ---[ CLI ]---
	def main_cli():
	CONVERT_COMMAND = "convert"
	PICK_BRANCH_COMMAND = "pick"
	PICK_CONVERSATION_COMMAND = "pick-conversation"
	"""Only relevant when the input is a list of conversation objects."""
	if not any(
	arg in [CONVERT_COMMAND, PICK_BRANCH_COMMAND, PICK_CONVERSATION_COMMAND]
	for arg in sys.argv[1:]
	):
	sys.argv.insert(1, CONVERT_COMMAND)

	parser = argparse.ArgumentParser(
	description="Convert conversation JSON to Markdown or pick node."
	)

	subparsers = parser.add_subparsers(dest="command", required=True)

	# Default (convert) subparser
	convert_parser = subparsers.add_parser(
	CONVERT_COMMAND, help="Convert JSON to Markdown (default)"
	)
	convert_parser.add_argument("json_file", help="The input JSON file.")
	convert_parser.add_argument(
	"output_markdown_file", help="The output Markdown file."
	)
	convert_parser.add_argument(
	"-b",
	"--each-branch",
	action="store_true",
	help="Export each conversation branch to an individual file.",
	)

	# Pick subparser
	pick_branch_parser = subparsers.add_parser(
	PICK_BRANCH_COMMAND, help="Pick a branch to Markdown file."
	)
	pick_branch_parser.add_argument("json_file", help="The input JSON file.")
	pick_branch_parser.add_argument(
	"output_markdown_file", help="The output Markdown file."
	)
	pick_branch_parser.add_argument(
	"-m",
	"--message-id",
	type=str,
	default=None,
	help="Optional message ID which the target branch contains.",
	)
	pick_conversation_parser = subparsers.add_parser(
	PICK_CONVERSATION_COMMAND,
	help="Pick a conversation tree from a list of conversation objects to Markdown file.",
	)
	pick_conversation_parser.add_argument("json_file", help="The input JSON file.")
	pick_conversation_parser.add_argument(
	"output_markdown_file", help="The output Markdown file."
	)
	pick_conversation_parser.add_argument(
	"-m",
	"--message-id",
	type=str,
	help="Required message ID which the target conversation tree contains.",
	)
	pick_conversation_parser.add_argument(
	"-b",
	"--each-branch",
	action="store_true",
	help="Export each conversation branch to an individual file.",
	)

	for subparser in subparsers.choices.values():
	subparser.add_argument(
	"--replace-uuids",
	action="store_true",
	help="Replace UUIDs in the output with random words.",
	)
	subparser.add_argument(
	"--no-thoughts",
	action="store_true",
	help="Exclude thoughts in the output.",
	)

	args = parser.parse_args()
	replace_uuids: bool = args.replace_uuids
	no_thoughts: bool = args.no_thoughts
	json_file = args.json_file
	output_markdown_file = args.output_markdown_file

	if args.command == CONVERT_COMMAND:
	convert_each_branch = args.each_branch

	convert(
	json_file,
	output_markdown_file,
	convert_each_branch,
	replace_uuids,
	no_thoughts=no_thoughts,
	)
	elif args.command == PICK_BRANCH_COMMAND:
	pick_branch(
	json_file,
	output_markdown_file,
	args.message_id,
	replace_uuids=replace_uuids,
	no_thoughts=no_thoughts,
	)
	elif args.command == PICK_CONVERSATION_COMMAND:
	convert_each_branch = args.each_branch
	pick_conversation_tree(
	json_file,
	output_markdown_file,
	args.message_id,
	convert_each_branch,
	replace_uuids=replace_uuids,
	no_thoughts=no_thoughts,
	)
	else:
	raise ValueError(f"Invalid command: {args.command}")


	if __name__ == "__main__":
	main_cli()
No results found