hathibelagal-dev · April 23, 2025 23:25
diff --git a/diatest.ipynb b/diatest.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4",
      "authorship_tag": "ABX9TyP1P6DdP1Z61VZVoHuBry/z",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/hathibelagal-dev/392ce0a11e34876185c959838d3be9d9/diatest.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "0eo3pO-IDI50",
        "outputId": "8c9e6ad7-8fb7-4a11-cd6d-d2879c802c16",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting str2speech\n",
            "  Downloading str2speech-0.4.3-py3-none-any.whl.metadata (6.3 kB)\n",
            "Collecting transformers==4.50.3 (from str2speech)\n",
            "  Downloading transformers-4.50.3-py3-none-any.whl.metadata (39 kB)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (from str2speech) (2.6.0+cu124)\n",
            "Requirement already satisfied: torchvision in /usr/local/lib/python3.11/dist-packages (from str2speech) (0.21.0+cu124)\n",
            "Requirement already satisfied: torchaudio in /usr/local/lib/python3.11/dist-packages (from str2speech) (2.6.0+cu124)\n",
            "Requirement already satisfied: tokenizers in /usr/local/lib/python3.11/dist-packages (from str2speech) (0.21.1)\n",
            "Requirement already satisfied: scipy>=1.13.1 in /usr/local/lib/python3.11/dist-packages (from str2speech) (1.14.1)\n",
            "Requirement already satisfied: accelerate in /usr/local/lib/python3.11/dist-packages (from str2speech) (1.5.2)\n",
            "Requirement already satisfied: numpy==2.0.2 in /usr/local/lib/python3.11/dist-packages (from str2speech) (2.0.2)\n",
            "Collecting kokoro==0.9.4 (from str2speech)\n",
            "  Downloading kokoro-0.9.4-py3-none-any.whl.metadata (21 kB)\n",
            "Requirement already satisfied: soundfile in /usr/local/lib/python3.11/dist-packages (from str2speech) (0.13.1)\n",
            "Requirement already satisfied: gitpython in /usr/local/lib/python3.11/dist-packages (from str2speech) (3.1.44)\n",
            "Collecting moshi==0.2.4 (from str2speech)\n",
            "  Downloading moshi-0.2.4.tar.gz (148 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m148.8/148.8 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting torchtune (from str2speech)\n",
            "  Downloading torchtune-0.6.1-py3-none-any.whl.metadata (24 kB)\n",
            "Collecting torchao (from str2speech)\n",
            "  Downloading torchao-0.10.0-cp39-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (15 kB)\n",
            "Collecting huggingface_hub==0.28.1 (from str2speech)\n",
            "  Downloading huggingface_hub-0.28.1-py3-none-any.whl.metadata (13 kB)\n",
            "Requirement already satisfied: soxr==0.5.0.post1 in /usr/local/lib/python3.11/dist-packages (from str2speech) (0.5.0.post1)\n",
            "Requirement already satisfied: einops==0.8.1 in /usr/local/lib/python3.11/dist-packages (from str2speech) (0.8.1)\n",
            "Collecting einx==0.3.0 (from str2speech)\n",
            "  Downloading einx-0.3.0-py3-none-any.whl.metadata (6.9 kB)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from str2speech) (2.32.3)\n",
            "Collecting snac>=1.2.1 (from str2speech)\n",
            "  Downloading snac-1.2.1-py3-none-any.whl.metadata (3.5 kB)\n",
            "Collecting attrdict (from str2speech)\n",
            "  Downloading attrdict-2.0.1-py2.py3-none-any.whl.metadata (6.7 kB)\n",
            "Collecting librosa==0.10.2.post1 (from str2speech)\n",
            "  Downloading librosa-0.10.2.post1-py3-none-any.whl.metadata (8.6 kB)\n",
            "Collecting pydub==0.25.1 (from str2speech)\n",
            "  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
            "Collecting pyloudnorm==0.1.1 (from str2speech)\n",
            "  Downloading pyloudnorm-0.1.1-py3-none-any.whl.metadata (5.6 kB)\n",
            "Collecting x-transformers==2.1.37 (from str2speech)\n",
            "  Downloading x_transformers-2.1.37-py3-none-any.whl.metadata (88 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m88.2/88.2 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting openai-whisper==20240930 (from str2speech)\n",
            "  Downloading openai-whisper-20240930.tar.gz (800 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m800.5/800.5 kB\u001b[0m \u001b[31m32.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: inflect==7.5.0 in /usr/local/lib/python3.11/dist-packages (from str2speech) (7.5.0)\n",
            "Collecting argbind (from str2speech)\n",
            "  Downloading argbind-0.3.9.tar.gz (17 kB)\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.11/dist-packages (from einx==0.3.0->str2speech) (1.13.1)\n",
            "Requirement already satisfied: frozendict in /usr/local/lib/python3.11/dist-packages (from einx==0.3.0->str2speech) (2.4.6)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface_hub==0.28.1->str2speech) (3.18.0)\n",
            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub==0.28.1->str2speech) (2025.3.2)\n",
            "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub==0.28.1->str2speech) (24.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub==0.28.1->str2speech) (6.0.2)\n",
            "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub==0.28.1->str2speech) (4.67.1)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub==0.28.1->str2speech) (4.13.2)\n",
            "Requirement already satisfied: more_itertools>=8.5.0 in /usr/local/lib/python3.11/dist-packages (from inflect==7.5.0->str2speech) (10.6.0)\n",
            "Requirement already satisfied: typeguard>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from inflect==7.5.0->str2speech) (4.4.2)\n",
            "Collecting loguru (from kokoro==0.9.4->str2speech)\n",
            "  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n",
            "Collecting misaki>=0.9.4 (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading misaki-0.9.4-py3-none-any.whl.metadata (19 kB)\n",
            "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (3.0.1)\n",
            "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (1.6.1)\n",
            "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (1.4.2)\n",
            "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (4.4.2)\n",
            "Requirement already satisfied: numba>=0.51.0 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (0.60.0)\n",
            "Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (1.8.2)\n",
            "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (0.4)\n",
            "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (1.1.0)\n",
            "Requirement already satisfied: safetensors<0.6,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from moshi==0.2.4->str2speech) (0.5.3)\n",
            "Collecting bitsandbytes<0.46,>=0.45 (from moshi==0.2.4->str2speech)\n",
            "  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)\n",
            "Requirement already satisfied: sentencepiece==0.2 in /usr/local/lib/python3.11/dist-packages (from moshi==0.2.4->str2speech) (0.2.0)\n",
            "Collecting sounddevice==0.5 (from moshi==0.2.4->str2speech)\n",
            "  Downloading sounddevice-0.5.0-py3-none-any.whl.metadata (1.4 kB)\n",
            "Collecting sphn>=0.1.4 (from moshi==0.2.4->str2speech)\n",
            "  Downloading sphn-0.1.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.2 kB)\n",
            "Requirement already satisfied: aiohttp<3.12,>=3.10.5 in /usr/local/lib/python3.11/dist-packages (from moshi==0.2.4->str2speech) (3.11.15)\n",
            "Requirement already satisfied: pytest>=8.3.3 in /usr/local/lib/python3.11/dist-packages (from moshi==0.2.4->str2speech) (8.3.5)\n",
            "Collecting tiktoken (from openai-whisper==20240930->str2speech)\n",
            "  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
            "Requirement already satisfied: triton>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from openai-whisper==20240930->str2speech) (3.2.0)\n",
            "Requirement already satisfied: future>=0.16.0 in /usr/local/lib/python3.11/dist-packages (from pyloudnorm==0.1.1->str2speech) (1.0.0)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers==4.50.3->str2speech) (2024.11.6)\n",
            "Requirement already satisfied: CFFI>=1.0 in /usr/local/lib/python3.11/dist-packages (from sounddevice==0.5->moshi==0.2.4->str2speech) (1.17.1)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch->str2speech) (3.4.2)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch->str2speech) (3.1.6)\n",
            "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->str2speech)\n",
            "  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->str2speech)\n",
            "  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->str2speech)\n",
            "  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
            "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->str2speech)\n",
            "  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
            "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->str2speech)\n",
            "  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->str2speech)\n",
            "  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-curand-cu12==10.3.5.147 (from torch->str2speech)\n",
            "  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch->str2speech)\n",
            "  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
            "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch->str2speech)\n",
            "  Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
            "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch->str2speech) (0.6.2)\n",
            "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch->str2speech) (2.21.5)\n",
            "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->str2speech) (12.4.127)\n",
            "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch->str2speech)\n",
            "  Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy->einx==0.3.0->str2speech) (1.3.0)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from accelerate->str2speech) (5.9.5)\n",
            "Requirement already satisfied: docstring-parser in /usr/local/lib/python3.11/dist-packages (from argbind->str2speech) (0.16)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.11/dist-packages (from attrdict->str2speech) (1.17.0)\n",
            "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython->str2speech) (4.0.12)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->str2speech) (3.4.1)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->str2speech) (3.10)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->str2speech) (2.3.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->str2speech) (2025.1.31)\n",
            "Collecting torchdata==0.11.0 (from torchtune->str2speech)\n",
            "  Downloading torchdata-0.11.0-py3-none-any.whl.metadata (6.3 kB)\n",
            "Collecting datasets (from torchtune->str2speech)\n",
            "  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)\n",
            "Requirement already satisfied: kagglehub in /usr/local/lib/python3.11/dist-packages (from torchtune->str2speech) (0.3.11)\n",
            "Collecting blobfile>=2 (from torchtune->str2speech)\n",
            "  Downloading blobfile-3.0.0-py3-none-any.whl.metadata (15 kB)\n",
            "Collecting omegaconf (from torchtune->str2speech)\n",
            "  Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)\n",
            "Requirement already satisfied: Pillow>=9.4.0 in /usr/local/lib/python3.11/dist-packages (from torchtune->str2speech) (11.1.0)\n",
            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (2.6.1)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (1.3.2)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (25.3.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (1.5.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (6.4.3)\n",
            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (0.3.1)\n",
            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (1.19.0)\n",
            "Collecting pycryptodomex>=3.8 (from blobfile>=2->torchtune->str2speech)\n",
            "  Downloading pycryptodomex-3.22.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\n",
            "Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.11/dist-packages (from blobfile>=2->torchtune->str2speech) (5.3.2)\n",
            "Requirement already satisfied: pycparser in /usr/local/lib/python3.11/dist-packages (from CFFI>=1.0->sounddevice==0.5->moshi==0.2.4->str2speech) (2.22)\n",
            "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython->str2speech) (5.0.2)\n",
            "Collecting addict (from misaki>=0.9.4->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading addict-2.4.0-py3-none-any.whl.metadata (1.0 kB)\n",
            "Collecting espeakng-loader (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading espeakng_loader-0.2.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)\n",
            "Collecting num2words (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading num2words-0.5.14-py3-none-any.whl.metadata (13 kB)\n",
            "Collecting phonemizer-fork (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading phonemizer_fork-3.3.2-py3-none-any.whl.metadata (48 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.3/48.3 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: spacy in /usr/local/lib/python3.11/dist-packages (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (3.8.5)\n",
            "Collecting spacy-curated-transformers (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading spacy_curated_transformers-2.1.2-py2.py3-none-any.whl.metadata (2.8 kB)\n",
            "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.11/dist-packages (from numba>=0.51.0->librosa==0.10.2.post1->str2speech) (0.43.0)\n",
            "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.11/dist-packages (from pooch>=1.1->librosa==0.10.2.post1->str2speech) (4.3.7)\n",
            "Requirement already satisfied: iniconfig in /usr/local/lib/python3.11/dist-packages (from pytest>=8.3.3->moshi==0.2.4->str2speech) (2.1.0)\n",
            "Requirement already satisfied: pluggy<2,>=1.5 in /usr/local/lib/python3.11/dist-packages (from pytest>=8.3.3->moshi==0.2.4->str2speech) (1.5.0)\n",
            "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn>=0.20.0->librosa==0.10.2.post1->str2speech) (3.6.0)\n",
            "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets->torchtune->str2speech) (18.1.0)\n",
            "Collecting dill<0.3.9,>=0.3.0 (from datasets->torchtune->str2speech)\n",
            "  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets->torchtune->str2speech) (2.2.2)\n",
            "Collecting xxhash (from datasets->torchtune->str2speech)\n",
            "  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
            "Collecting multiprocess<0.70.17 (from datasets->torchtune->str2speech)\n",
            "  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n",
            "Collecting fsspec>=2023.5.0 (from huggingface_hub==0.28.1->str2speech)\n",
            "  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)\n",
            "Collecting hf-transfer>=0.1.4 (from huggingface_hub[hf_transfer]->torchtune->str2speech)\n",
            "  Downloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch->str2speech) (3.0.2)\n",
            "Collecting antlr4-python3-runtime==4.9.* (from omegaconf->torchtune->str2speech)\n",
            "  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting docopt>=0.6.2 (from num2words->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading docopt-0.6.2.tar.gz (25 kB)\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets->torchtune->str2speech) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets->torchtune->str2speech) (2025.2)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets->torchtune->str2speech) (2025.2)\n",
            "Collecting dlinfo (from phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading dlinfo-2.0.0-py3-none-any.whl.metadata (1.1 kB)\n",
            "Collecting segments (from phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading segments-2.3.0-py2.py3-none-any.whl.metadata (3.5 kB)\n",
            "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (3.0.12)\n",
            "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.0.5)\n",
            "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.0.12)\n",
            "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.0.11)\n",
            "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (3.0.9)\n",
            "Requirement already satisfied: thinc<8.4.0,>=8.3.4 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (8.3.6)\n",
            "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.1.3)\n",
            "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.5.1)\n",
            "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.0.10)\n",
            "Requirement already satisfied: weasel<0.5.0,>=0.1.0 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.4.1)\n",
            "Requirement already satisfied: typer<1.0.0,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.15.2)\n",
            "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.11.3)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (75.2.0)\n",
            "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (3.5.0)\n",
            "Collecting curated-transformers<3.0.0,>=2.0.0 (from spacy-curated-transformers->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading curated_transformers-2.0.1-py2.py3-none-any.whl.metadata (5.3 kB)\n",
            "Collecting curated-tokenizers<3.0.0,>=2.0.0 (from spacy-curated-transformers->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading curated_tokenizers-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.9 kB)\n",
            "INFO: pip is looking at multiple versions of spacy-curated-transformers to determine which version is compatible with other requirements. This could take a while.\n",
            "Collecting spacy-curated-transformers (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading spacy_curated_transformers-2.1.1-py2.py3-none-any.whl.metadata (2.8 kB)\n",
            "Collecting spacy (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading spacy-4.0.0.dev3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (26 kB)\n",
            "Collecting spacy-curated-transformers (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading spacy_curated_transformers-2.0.0-py2.py3-none-any.whl.metadata (2.8 kB)\n",
            "  Downloading spacy_curated_transformers-0.3.0-py2.py3-none-any.whl.metadata (2.7 kB)\n",
            "Collecting curated-transformers<0.2.0,>=0.1.0 (from spacy-curated-transformers->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading curated_transformers-0.1.1-py2.py3-none-any.whl.metadata (965 bytes)\n",
            "Collecting curated-tokenizers<0.1.0,>=0.0.9 (from spacy-curated-transformers->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading curated_tokenizers-0.0.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.9 kB)\n",
            "Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.11/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.3.0)\n",
            "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.7.0)\n",
            "Requirement already satisfied: pydantic-core==2.33.1 in /usr/local/lib/python3.11/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.33.1)\n",
            "Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.4.0)\n",
            "Requirement already satisfied: blis<1.4.0,>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from thinc<8.4.0,>=8.3.4->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.3.0)\n",
            "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from thinc<8.4.0,>=8.3.4->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.1.5)\n",
            "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0.0,>=0.3.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (8.1.8)\n",
            "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0.0,>=0.3.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.5.4)\n",
            "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0.0,>=0.3.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (13.9.4)\n",
            "Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from weasel<0.5.0,>=0.1.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.21.0)\n",
            "Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in /usr/local/lib/python3.11/dist-packages (from weasel<0.5.0,>=0.1.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (7.1.0)\n",
            "Collecting csvw>=1.5.6 (from segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading csvw-3.5.1-py2.py3-none-any.whl.metadata (10 kB)\n",
            "Collecting isodate (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)\n",
            "Collecting rfc3986<2 (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)\n",
            "Requirement already satisfied: uritemplate>=3.0.0 in /usr/local/lib/python3.11/dist-packages (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (4.1.1)\n",
            "Requirement already satisfied: babel in /usr/local/lib/python3.11/dist-packages (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.17.0)\n",
            "Collecting language-tags (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading language_tags-1.2.0-py3-none-any.whl.metadata (2.1 kB)\n",
            "Collecting rdflib (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading rdflib-7.1.4-py3-none-any.whl.metadata (11 kB)\n",
            "Collecting colorama (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n",
            "  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)\n",
            "Requirement already satisfied: jsonschema in /usr/local/lib/python3.11/dist-packages (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (4.23.0)\n",
            "Requirement already satisfied: marisa-trie>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.2.1)\n",
            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (3.0.0)\n",
            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.18.0)\n",
            "Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.17.2)\n",
            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.1.2)\n",
            "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2024.10.1)\n",
            "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.36.2)\n",
            "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.24.0)\n",
            "Requirement already satisfied: pyparsing<4,>=2.1.0 in /usr/local/lib/python3.11/dist-packages (from rdflib->csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (3.2.3)\n",
            "Downloading str2speech-0.4.3-py3-none-any.whl (145 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m145.2/145.2 kB\u001b[0m \u001b[31m16.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading einx-0.3.0-py3-none-any.whl (102 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.0/103.0 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading huggingface_hub-0.28.1-py3-none-any.whl (464 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m464.1/464.1 kB\u001b[0m \u001b[31m47.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading kokoro-0.9.4-py3-none-any.whl (32 kB)\n",
            "Downloading librosa-0.10.2.post1-py3-none-any.whl (260 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m260.1/260.1 kB\u001b[0m \u001b[31m27.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
            "Downloading pyloudnorm-0.1.1-py3-none-any.whl (9.6 kB)\n",
            "Downloading transformers-4.50.3-py3-none-any.whl (10.2 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.2/10.2 MB\u001b[0m \u001b[31m91.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading x_transformers-2.1.37-py3-none-any.whl (80 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.6/80.6 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading sounddevice-0.5.0-py3-none-any.whl (32 kB)\n",
            "Downloading snac-1.2.1-py3-none-any.whl (8.4 kB)\n",
            "Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m128.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m98.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m66.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m98.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)\n",
            "Downloading torchao-0.10.0-cp39-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (5.5 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m114.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading torchtune-0.6.1-py3-none-any.whl (910 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m910.7/910.7 kB\u001b[0m \u001b[31m64.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading torchdata-0.11.0-py3-none-any.whl (61 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.0/62.0 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl (76.1 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.1/76.1 MB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading blobfile-3.0.0-py3-none-any.whl (75 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading misaki-0.9.4-py3-none-any.whl (3.6 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m43.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading sphn-0.1.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.7/12.7 MB\u001b[0m \u001b[31m122.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading datasets-3.5.0-py3-none-any.whl (491 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m491.2/491.2 kB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading fsspec-2024.12.0-py3-none-any.whl (183 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.9/183.9 kB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading loguru-0.7.3-py3-none-any.whl (61 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m79.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading pycryptodomex-3.22.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m103.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
            "Downloading espeakng_loader-0.2.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.1 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.1/10.1 MB\u001b[0m \u001b[31m31.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading num2words-0.5.14-py3-none-any.whl (163 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m163.5/163.5 kB\u001b[0m \u001b[31m16.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading phonemizer_fork-3.3.2-py3-none-any.whl (82 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.7/82.7 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading spacy_curated_transformers-0.3.0-py2.py3-none-any.whl (236 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.3/236.3 kB\u001b[0m \u001b[31m27.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m21.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading curated_tokenizers-0.0.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (735 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m735.6/735.6 kB\u001b[0m \u001b[31m58.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading curated_transformers-0.1.1-py2.py3-none-any.whl (25 kB)\n",
            "Downloading dlinfo-2.0.0-py3-none-any.whl (3.7 kB)\n",
            "Downloading segments-2.3.0-py2.py3-none-any.whl (15 kB)\n",
            "Downloading csvw-3.5.1-py2.py3-none-any.whl (59 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.9/59.9 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)\n",
            "Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
            "Downloading isodate-0.7.2-py3-none-any.whl (22 kB)\n",
            "Downloading language_tags-1.2.0-py3-none-any.whl (213 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m213.4/213.4 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading rdflib-7.1.4-py3-none-any.whl (565 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m565.1/565.1 kB\u001b[0m \u001b[31m47.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hBuilding wheels for collected packages: moshi, openai-whisper, argbind, antlr4-python3-runtime, docopt\n",
            "  Building wheel for moshi (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for moshi: filename=moshi-0.2.4-py3-none-any.whl size=92809 sha256=09f1182180d8b50bb3a961a7fddcf0a42e0a23553f721a95ffc46631dbc2e8b5\n",
            "  Stored in directory: /root/.cache/pip/wheels/79/cc/1d/8e732c108b9cd47b99c85df24166ca9048f5b136682812dd21\n",
            "  Building wheel for openai-whisper (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for openai-whisper: filename=openai_whisper-20240930-py3-none-any.whl size=803406 sha256=0ada31cffc8f60682da3ca448c344798a61e5c2f6ab535a8dc6766beb75f6225\n",
            "  Stored in directory: /root/.cache/pip/wheels/2f/f2/ce/6eb23db4091d026238ce76703bd66da60b969d70bcc81d5d3a\n",
            "  Building wheel for argbind (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for argbind: filename=argbind-0.3.9-py2.py3-none-any.whl size=11730 sha256=abfd39825fe32697413aa7840d20bebed27102f315446867f682b1869a4e2c32\n",
            "  Stored in directory: /root/.cache/pip/wheels/36/3a/34/e858fa3cf5f8c33a040734efcc17e95cb5cfd99c256a7fcecf\n",
            "  Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=d5fc3e1d7e7b79e173e2480a4c4f10f42f0771d66ff3c18a3285f4346a54a88d\n",
            "  Stored in directory: /root/.cache/pip/wheels/1a/97/32/461f837398029ad76911109f07047fde1d7b661a147c7c56d1\n",
            "  Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13706 sha256=e3b63adb9fc7cd8e181c8c056d62d5254062c1002f3acaca8b925f8d51ac2983\n",
            "  Stored in directory: /root/.cache/pip/wheels/1a/b0/8c/4b75c4116c31f83c8f9f047231251e13cc74481cca4a78a9ce\n",
            "Successfully built moshi openai-whisper argbind antlr4-python3-runtime docopt\n",
            "Installing collected packages: torchao, rfc3986, pydub, language-tags, docopt, antlr4-python3-runtime, addict, xxhash, sphn, rdflib, pycryptodomex, omegaconf, nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, num2words, misaki, loguru, isodate, hf-transfer, fsspec, espeakng-loader, dlinfo, dill, curated-tokenizers, colorama, attrdict, argbind, tiktoken, sounddevice, pyloudnorm, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, huggingface_hub, einx, blobfile, nvidia-cusolver-cu12, librosa, transformers, datasets, csvw, x-transformers, torchdata, snac, segments, openai-whisper, curated-transformers, bitsandbytes, torchtune, spacy-curated-transformers, phonemizer-fork, moshi, kokoro, str2speech\n",
            "  Attempting uninstall: nvidia-nvjitlink-cu12\n",
            "    Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n",
            "    Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n",
            "      Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n",
            "  Attempting uninstall: nvidia-curand-cu12\n",
            "    Found existing installation: nvidia-curand-cu12 10.3.6.82\n",
            "    Uninstalling nvidia-curand-cu12-10.3.6.82:\n",
            "      Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n",
            "  Attempting uninstall: nvidia-cufft-cu12\n",
            "    Found existing installation: nvidia-cufft-cu12 11.2.3.61\n",
            "    Uninstalling nvidia-cufft-cu12-11.2.3.61:\n",
            "      Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n",
            "  Attempting uninstall: nvidia-cuda-runtime-cu12\n",
            "    Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n",
            "    Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n",
            "      Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n",
            "  Attempting uninstall: nvidia-cuda-nvrtc-cu12\n",
            "    Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n",
            "    Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n",
            "      Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n",
            "  Attempting uninstall: nvidia-cuda-cupti-cu12\n",
            "    Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n",
            "    Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n",
            "      Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n",
            "  Attempting uninstall: nvidia-cublas-cu12\n",
            "    Found existing installation: nvidia-cublas-cu12 12.5.3.2\n",
            "    Uninstalling nvidia-cublas-cu12-12.5.3.2:\n",
            "      Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n",
            "  Attempting uninstall: fsspec\n",
            "    Found existing installation: fsspec 2025.3.2\n",
            "    Uninstalling fsspec-2025.3.2:\n",
            "      Successfully uninstalled fsspec-2025.3.2\n",
            "  Attempting uninstall: nvidia-cusparse-cu12\n",
            "    Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n",
            "    Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n",
            "      Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n",
            "  Attempting uninstall: nvidia-cudnn-cu12\n",
            "    Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n",
            "    Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n",
            "      Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n",
            "  Attempting uninstall: huggingface_hub\n",
            "    Found existing installation: huggingface-hub 0.30.2\n",
            "    Uninstalling huggingface-hub-0.30.2:\n",
            "      Successfully uninstalled huggingface-hub-0.30.2\n",
            "  Attempting uninstall: nvidia-cusolver-cu12\n",
            "    Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n",
            "    Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n",
            "      Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n",
            "  Attempting uninstall: librosa\n",
            "    Found existing installation: librosa 0.11.0\n",
            "    Uninstalling librosa-0.11.0:\n",
            "      Successfully uninstalled librosa-0.11.0\n",
            "  Attempting uninstall: transformers\n",
            "    Found existing installation: transformers 4.51.3\n",
            "    Uninstalling transformers-4.51.3:\n",
            "      Successfully uninstalled transformers-4.51.3\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2024.12.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed addict-2.4.0 antlr4-python3-runtime-4.9.3 argbind-0.3.9 attrdict-2.0.1 bitsandbytes-0.45.5 blobfile-3.0.0 colorama-0.4.6 csvw-3.5.1 curated-tokenizers-0.0.9 curated-transformers-0.1.1 datasets-3.5.0 dill-0.3.8 dlinfo-2.0.0 docopt-0.6.2 einx-0.3.0 espeakng-loader-0.2.4 fsspec-2024.12.0 hf-transfer-0.1.9 huggingface_hub-0.28.1 isodate-0.7.2 kokoro-0.9.4 language-tags-1.2.0 librosa-0.10.2.post1 loguru-0.7.3 misaki-0.9.4 moshi-0.2.4 multiprocess-0.70.16 num2words-0.5.14 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127 omegaconf-2.3.0 openai-whisper-20240930 phonemizer-fork-3.3.2 pycryptodomex-3.22.0 pydub-0.25.1 pyloudnorm-0.1.1 rdflib-7.1.4 rfc3986-1.5.0 segments-2.3.0 snac-1.2.1 sounddevice-0.5.0 spacy-curated-transformers-0.3.0 sphn-0.1.12 str2speech-0.4.3 tiktoken-0.9.0 torchao-0.10.0 torchdata-0.11.0 torchtune-0.6.1 transformers-4.50.3 x-transformers-2.1.37 xxhash-3.5.0\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.colab-display-data+json": {
              "pip_warning": {
                "packages": [
                  "pydevd_plugins"
                ]
              },
              "id": "c2ff769005c345748aa9fceb4d58cda1"
            }
          },
          "metadata": {}
        }
      ],
      "source": [
        "!pip3 install str2speech"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!str2speech -l"
      ],
      "metadata": {
        "id": "QLmroXmBDNGC",
        "outputId": "dfc28f76-1f55-4e89-9511-ad0a271823fc",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Now running str2speech 0.4.3\n",
            "Available TTS models:\n",
            "0. suno/bark-small\n",
            "1. suno/bark\n",
            "2. megatts3\n",
            "3. facebook/mms-tts-eng\n",
            "4. facebook/mms-tts-deu\n",
            "5. facebook/mms-tts-fra\n",
            "6. facebook/mms-tts-spa\n",
            "7. facebook/mms-tts-swe\n",
            "8. kokoro\n",
            "9. sesame/csm-1b\n",
            "10. zyphra/zonos-v0.1-transformer\n",
            "11. sparkaudio/spark-tts-0.5b\n",
            "12. microsoft/speecht5_tts\n",
            "13. nari-labs/dia-1.6b\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "dia_test = \"\"\"[S1] So, what's happening here?\n",
        "[S2] My cat just swallowed an egg. An entire boiled egg!\n",
        "[S1] Wow, must have been really hungry.\n",
        "[S2] Are you kidding me. He's always hungry, no matter how much I feed him.\n",
        "[S1] Do you feed him boiled eggs?\n",
        "[S2] No, just cat food.\n",
        "[S1] Well, there you go. Looks like it got bored of it.\n",
        "\"\"\"\n",
        "\n",
        "with open(\"dia_test.txt\", \"w\") as f:\n",
        "  f.write(dia_test)"
      ],
      "metadata": {
        "id": "dzdaxJTvDQti"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!str2speech -f dia_test.txt --model \"nari-labs/dia-1.6b\" --output dia.wav"
      ],
      "metadata": {
        "id": "FjV2SXiHDTzd",
        "outputId": "24c911ba-e8a1-4594-f080-e0bb9a3c356f",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Now running str2speech 0.4.3\n",
            "Model provided: nari-labs/dia-1.6b\n",
            "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
            "E0000 00:00:1745450413.922681    2817 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "E0000 00:00:1745450413.931012    2817 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "Audiotools found\n",
            "Codec found\n",
            "Using default voices.\n",
            "Audio saved.\n",
            "Generated speech in 81.64 seconds.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from IPython.display import Audio, display\n",
        "display(Audio(\"dia.wav\"))"
      ],
      "metadata": {
        "id": "2MhgIrJADWjO",
        "outputId": "da3d37a4-cab0-460e-f886-0c296f220194",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 62
        }
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.lib.display.Audio object>"
            ],
            "text/html": [
              "\n",
              "                <audio  controls=\"controls\" >\n",
No results found