Last active
April 23, 2025 23:25
-
-
Save hathibelagal-dev/392ce0a11e34876185c959838d3be9d9 to your computer and use it in GitHub Desktop.
diatest.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "gpuType": "T4", | |
| "authorship_tag": "ABX9TyP1P6DdP1Z61VZVoHuBry/z", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| }, | |
| "accelerator": "GPU" | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/hathibelagal-dev/392ce0a11e34876185c959838d3be9d9/diatest.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "id": "0eo3pO-IDI50", | |
| "outputId": "8c9e6ad7-8fb7-4a11-cd6d-d2879c802c16", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 1000 | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Collecting str2speech\n", | |
| " Downloading str2speech-0.4.3-py3-none-any.whl.metadata (6.3 kB)\n", | |
| "Collecting transformers==4.50.3 (from str2speech)\n", | |
| " Downloading transformers-4.50.3-py3-none-any.whl.metadata (39 kB)\n", | |
| "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (from str2speech) (2.6.0+cu124)\n", | |
| "Requirement already satisfied: torchvision in /usr/local/lib/python3.11/dist-packages (from str2speech) (0.21.0+cu124)\n", | |
| "Requirement already satisfied: torchaudio in /usr/local/lib/python3.11/dist-packages (from str2speech) (2.6.0+cu124)\n", | |
| "Requirement already satisfied: tokenizers in /usr/local/lib/python3.11/dist-packages (from str2speech) (0.21.1)\n", | |
| "Requirement already satisfied: scipy>=1.13.1 in /usr/local/lib/python3.11/dist-packages (from str2speech) (1.14.1)\n", | |
| "Requirement already satisfied: accelerate in /usr/local/lib/python3.11/dist-packages (from str2speech) (1.5.2)\n", | |
| "Requirement already satisfied: numpy==2.0.2 in /usr/local/lib/python3.11/dist-packages (from str2speech) (2.0.2)\n", | |
| "Collecting kokoro==0.9.4 (from str2speech)\n", | |
| " Downloading kokoro-0.9.4-py3-none-any.whl.metadata (21 kB)\n", | |
| "Requirement already satisfied: soundfile in /usr/local/lib/python3.11/dist-packages (from str2speech) (0.13.1)\n", | |
| "Requirement already satisfied: gitpython in /usr/local/lib/python3.11/dist-packages (from str2speech) (3.1.44)\n", | |
| "Collecting moshi==0.2.4 (from str2speech)\n", | |
| " Downloading moshi-0.2.4.tar.gz (148 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m148.8/148.8 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", | |
| " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", | |
| " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", | |
| "Collecting torchtune (from str2speech)\n", | |
| " Downloading torchtune-0.6.1-py3-none-any.whl.metadata (24 kB)\n", | |
| "Collecting torchao (from str2speech)\n", | |
| " Downloading torchao-0.10.0-cp39-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (15 kB)\n", | |
| "Collecting huggingface_hub==0.28.1 (from str2speech)\n", | |
| " Downloading huggingface_hub-0.28.1-py3-none-any.whl.metadata (13 kB)\n", | |
| "Requirement already satisfied: soxr==0.5.0.post1 in /usr/local/lib/python3.11/dist-packages (from str2speech) (0.5.0.post1)\n", | |
| "Requirement already satisfied: einops==0.8.1 in /usr/local/lib/python3.11/dist-packages (from str2speech) (0.8.1)\n", | |
| "Collecting einx==0.3.0 (from str2speech)\n", | |
| " Downloading einx-0.3.0-py3-none-any.whl.metadata (6.9 kB)\n", | |
| "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from str2speech) (2.32.3)\n", | |
| "Collecting snac>=1.2.1 (from str2speech)\n", | |
| " Downloading snac-1.2.1-py3-none-any.whl.metadata (3.5 kB)\n", | |
| "Collecting attrdict (from str2speech)\n", | |
| " Downloading attrdict-2.0.1-py2.py3-none-any.whl.metadata (6.7 kB)\n", | |
| "Collecting librosa==0.10.2.post1 (from str2speech)\n", | |
| " Downloading librosa-0.10.2.post1-py3-none-any.whl.metadata (8.6 kB)\n", | |
| "Collecting pydub==0.25.1 (from str2speech)\n", | |
| " Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n", | |
| "Collecting pyloudnorm==0.1.1 (from str2speech)\n", | |
| " Downloading pyloudnorm-0.1.1-py3-none-any.whl.metadata (5.6 kB)\n", | |
| "Collecting x-transformers==2.1.37 (from str2speech)\n", | |
| " Downloading x_transformers-2.1.37-py3-none-any.whl.metadata (88 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m88.2/88.2 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hCollecting openai-whisper==20240930 (from str2speech)\n", | |
| " Downloading openai-whisper-20240930.tar.gz (800 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m800.5/800.5 kB\u001b[0m \u001b[31m32.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", | |
| " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", | |
| " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", | |
| "Requirement already satisfied: inflect==7.5.0 in /usr/local/lib/python3.11/dist-packages (from str2speech) (7.5.0)\n", | |
| "Collecting argbind (from str2speech)\n", | |
| " Downloading argbind-0.3.9.tar.gz (17 kB)\n", | |
| " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
| "Requirement already satisfied: sympy in /usr/local/lib/python3.11/dist-packages (from einx==0.3.0->str2speech) (1.13.1)\n", | |
| "Requirement already satisfied: frozendict in /usr/local/lib/python3.11/dist-packages (from einx==0.3.0->str2speech) (2.4.6)\n", | |
| "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface_hub==0.28.1->str2speech) (3.18.0)\n", | |
| "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub==0.28.1->str2speech) (2025.3.2)\n", | |
| "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub==0.28.1->str2speech) (24.2)\n", | |
| "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub==0.28.1->str2speech) (6.0.2)\n", | |
| "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub==0.28.1->str2speech) (4.67.1)\n", | |
| "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub==0.28.1->str2speech) (4.13.2)\n", | |
| "Requirement already satisfied: more_itertools>=8.5.0 in /usr/local/lib/python3.11/dist-packages (from inflect==7.5.0->str2speech) (10.6.0)\n", | |
| "Requirement already satisfied: typeguard>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from inflect==7.5.0->str2speech) (4.4.2)\n", | |
| "Collecting loguru (from kokoro==0.9.4->str2speech)\n", | |
| " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", | |
| "Collecting misaki>=0.9.4 (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading misaki-0.9.4-py3-none-any.whl.metadata (19 kB)\n", | |
| "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (3.0.1)\n", | |
| "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (1.6.1)\n", | |
| "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (1.4.2)\n", | |
| "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (4.4.2)\n", | |
| "Requirement already satisfied: numba>=0.51.0 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (0.60.0)\n", | |
| "Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (1.8.2)\n", | |
| "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (0.4)\n", | |
| "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.11/dist-packages (from librosa==0.10.2.post1->str2speech) (1.1.0)\n", | |
| "Requirement already satisfied: safetensors<0.6,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from moshi==0.2.4->str2speech) (0.5.3)\n", | |
| "Collecting bitsandbytes<0.46,>=0.45 (from moshi==0.2.4->str2speech)\n", | |
| " Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)\n", | |
| "Requirement already satisfied: sentencepiece==0.2 in /usr/local/lib/python3.11/dist-packages (from moshi==0.2.4->str2speech) (0.2.0)\n", | |
| "Collecting sounddevice==0.5 (from moshi==0.2.4->str2speech)\n", | |
| " Downloading sounddevice-0.5.0-py3-none-any.whl.metadata (1.4 kB)\n", | |
| "Collecting sphn>=0.1.4 (from moshi==0.2.4->str2speech)\n", | |
| " Downloading sphn-0.1.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.2 kB)\n", | |
| "Requirement already satisfied: aiohttp<3.12,>=3.10.5 in /usr/local/lib/python3.11/dist-packages (from moshi==0.2.4->str2speech) (3.11.15)\n", | |
| "Requirement already satisfied: pytest>=8.3.3 in /usr/local/lib/python3.11/dist-packages (from moshi==0.2.4->str2speech) (8.3.5)\n", | |
| "Collecting tiktoken (from openai-whisper==20240930->str2speech)\n", | |
| " Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n", | |
| "Requirement already satisfied: triton>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from openai-whisper==20240930->str2speech) (3.2.0)\n", | |
| "Requirement already satisfied: future>=0.16.0 in /usr/local/lib/python3.11/dist-packages (from pyloudnorm==0.1.1->str2speech) (1.0.0)\n", | |
| "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers==4.50.3->str2speech) (2024.11.6)\n", | |
| "Requirement already satisfied: CFFI>=1.0 in /usr/local/lib/python3.11/dist-packages (from sounddevice==0.5->moshi==0.2.4->str2speech) (1.17.1)\n", | |
| "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch->str2speech) (3.4.2)\n", | |
| "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch->str2speech) (3.1.6)\n", | |
| "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->str2speech)\n", | |
| " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", | |
| "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->str2speech)\n", | |
| " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", | |
| "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->str2speech)\n", | |
| " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", | |
| "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->str2speech)\n", | |
| " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", | |
| "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->str2speech)\n", | |
| " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", | |
| "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->str2speech)\n", | |
| " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", | |
| "Collecting nvidia-curand-cu12==10.3.5.147 (from torch->str2speech)\n", | |
| " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", | |
| "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch->str2speech)\n", | |
| " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", | |
| "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch->str2speech)\n", | |
| " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", | |
| "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch->str2speech) (0.6.2)\n", | |
| "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch->str2speech) (2.21.5)\n", | |
| "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->str2speech) (12.4.127)\n", | |
| "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch->str2speech)\n", | |
| " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", | |
| "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy->einx==0.3.0->str2speech) (1.3.0)\n", | |
| "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from accelerate->str2speech) (5.9.5)\n", | |
| "Requirement already satisfied: docstring-parser in /usr/local/lib/python3.11/dist-packages (from argbind->str2speech) (0.16)\n", | |
| "Requirement already satisfied: six in /usr/local/lib/python3.11/dist-packages (from attrdict->str2speech) (1.17.0)\n", | |
| "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython->str2speech) (4.0.12)\n", | |
| "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->str2speech) (3.4.1)\n", | |
| "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->str2speech) (3.10)\n", | |
| "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->str2speech) (2.3.0)\n", | |
| "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->str2speech) (2025.1.31)\n", | |
| "Collecting torchdata==0.11.0 (from torchtune->str2speech)\n", | |
| " Downloading torchdata-0.11.0-py3-none-any.whl.metadata (6.3 kB)\n", | |
| "Collecting datasets (from torchtune->str2speech)\n", | |
| " Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)\n", | |
| "Requirement already satisfied: kagglehub in /usr/local/lib/python3.11/dist-packages (from torchtune->str2speech) (0.3.11)\n", | |
| "Collecting blobfile>=2 (from torchtune->str2speech)\n", | |
| " Downloading blobfile-3.0.0-py3-none-any.whl.metadata (15 kB)\n", | |
| "Collecting omegaconf (from torchtune->str2speech)\n", | |
| " Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)\n", | |
| "Requirement already satisfied: Pillow>=9.4.0 in /usr/local/lib/python3.11/dist-packages (from torchtune->str2speech) (11.1.0)\n", | |
| "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (2.6.1)\n", | |
| "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (1.3.2)\n", | |
| "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (25.3.0)\n", | |
| "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (1.5.0)\n", | |
| "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (6.4.3)\n", | |
| "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (0.3.1)\n", | |
| "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<3.12,>=3.10.5->moshi==0.2.4->str2speech) (1.19.0)\n", | |
| "Collecting pycryptodomex>=3.8 (from blobfile>=2->torchtune->str2speech)\n", | |
| " Downloading pycryptodomex-3.22.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\n", | |
| "Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.11/dist-packages (from blobfile>=2->torchtune->str2speech) (5.3.2)\n", | |
| "Requirement already satisfied: pycparser in /usr/local/lib/python3.11/dist-packages (from CFFI>=1.0->sounddevice==0.5->moshi==0.2.4->str2speech) (2.22)\n", | |
| "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython->str2speech) (5.0.2)\n", | |
| "Collecting addict (from misaki>=0.9.4->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading addict-2.4.0-py3-none-any.whl.metadata (1.0 kB)\n", | |
| "Collecting espeakng-loader (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading espeakng_loader-0.2.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)\n", | |
| "Collecting num2words (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading num2words-0.5.14-py3-none-any.whl.metadata (13 kB)\n", | |
| "Collecting phonemizer-fork (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading phonemizer_fork-3.3.2-py3-none-any.whl.metadata (48 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.3/48.3 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hRequirement already satisfied: spacy in /usr/local/lib/python3.11/dist-packages (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (3.8.5)\n", | |
| "Collecting spacy-curated-transformers (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading spacy_curated_transformers-2.1.2-py2.py3-none-any.whl.metadata (2.8 kB)\n", | |
| "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.11/dist-packages (from numba>=0.51.0->librosa==0.10.2.post1->str2speech) (0.43.0)\n", | |
| "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.11/dist-packages (from pooch>=1.1->librosa==0.10.2.post1->str2speech) (4.3.7)\n", | |
| "Requirement already satisfied: iniconfig in /usr/local/lib/python3.11/dist-packages (from pytest>=8.3.3->moshi==0.2.4->str2speech) (2.1.0)\n", | |
| "Requirement already satisfied: pluggy<2,>=1.5 in /usr/local/lib/python3.11/dist-packages (from pytest>=8.3.3->moshi==0.2.4->str2speech) (1.5.0)\n", | |
| "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn>=0.20.0->librosa==0.10.2.post1->str2speech) (3.6.0)\n", | |
| "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets->torchtune->str2speech) (18.1.0)\n", | |
| "Collecting dill<0.3.9,>=0.3.0 (from datasets->torchtune->str2speech)\n", | |
| " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", | |
| "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets->torchtune->str2speech) (2.2.2)\n", | |
| "Collecting xxhash (from datasets->torchtune->str2speech)\n", | |
| " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", | |
| "Collecting multiprocess<0.70.17 (from datasets->torchtune->str2speech)\n", | |
| " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", | |
| "Collecting fsspec>=2023.5.0 (from huggingface_hub==0.28.1->str2speech)\n", | |
| " Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)\n", | |
| "Collecting hf-transfer>=0.1.4 (from huggingface_hub[hf_transfer]->torchtune->str2speech)\n", | |
| " Downloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n", | |
| "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch->str2speech) (3.0.2)\n", | |
| "Collecting antlr4-python3-runtime==4.9.* (from omegaconf->torchtune->str2speech)\n", | |
| " Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
| "Collecting docopt>=0.6.2 (from num2words->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading docopt-0.6.2.tar.gz (25 kB)\n", | |
| " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
| "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets->torchtune->str2speech) (2.8.2)\n", | |
| "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets->torchtune->str2speech) (2025.2)\n", | |
| "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets->torchtune->str2speech) (2025.2)\n", | |
| "Collecting dlinfo (from phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading dlinfo-2.0.0-py3-none-any.whl.metadata (1.1 kB)\n", | |
| "Collecting segments (from phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading segments-2.3.0-py2.py3-none-any.whl.metadata (3.5 kB)\n", | |
| "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (3.0.12)\n", | |
| "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.0.5)\n", | |
| "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.0.12)\n", | |
| "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.0.11)\n", | |
| "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (3.0.9)\n", | |
| "Requirement already satisfied: thinc<8.4.0,>=8.3.4 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (8.3.6)\n", | |
| "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.1.3)\n", | |
| "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.5.1)\n", | |
| "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.0.10)\n", | |
| "Requirement already satisfied: weasel<0.5.0,>=0.1.0 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.4.1)\n", | |
| "Requirement already satisfied: typer<1.0.0,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.15.2)\n", | |
| "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.11.3)\n", | |
| "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (75.2.0)\n", | |
| "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.11/dist-packages (from spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (3.5.0)\n", | |
| "Collecting curated-transformers<3.0.0,>=2.0.0 (from spacy-curated-transformers->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading curated_transformers-2.0.1-py2.py3-none-any.whl.metadata (5.3 kB)\n", | |
| "Collecting curated-tokenizers<3.0.0,>=2.0.0 (from spacy-curated-transformers->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading curated_tokenizers-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.9 kB)\n", | |
| "INFO: pip is looking at multiple versions of spacy-curated-transformers to determine which version is compatible with other requirements. This could take a while.\n", | |
| "Collecting spacy-curated-transformers (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading spacy_curated_transformers-2.1.1-py2.py3-none-any.whl.metadata (2.8 kB)\n", | |
| "Collecting spacy (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading spacy-4.0.0.dev3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (26 kB)\n", | |
| "Collecting spacy-curated-transformers (from misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading spacy_curated_transformers-2.0.0-py2.py3-none-any.whl.metadata (2.8 kB)\n", | |
| " Downloading spacy_curated_transformers-0.3.0-py2.py3-none-any.whl.metadata (2.7 kB)\n", | |
| "Collecting curated-transformers<0.2.0,>=0.1.0 (from spacy-curated-transformers->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading curated_transformers-0.1.1-py2.py3-none-any.whl.metadata (965 bytes)\n", | |
| "Collecting curated-tokenizers<0.1.0,>=0.0.9 (from spacy-curated-transformers->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading curated_tokenizers-0.0.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.9 kB)\n", | |
| "Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.11/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.3.0)\n", | |
| "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.7.0)\n", | |
| "Requirement already satisfied: pydantic-core==2.33.1 in /usr/local/lib/python3.11/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.33.1)\n", | |
| "Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.4.0)\n", | |
| "Requirement already satisfied: blis<1.4.0,>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from thinc<8.4.0,>=8.3.4->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.3.0)\n", | |
| "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from thinc<8.4.0,>=8.3.4->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.1.5)\n", | |
| "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0.0,>=0.3.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (8.1.8)\n", | |
| "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0.0,>=0.3.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.5.4)\n", | |
| "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0.0,>=0.3.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (13.9.4)\n", | |
| "Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from weasel<0.5.0,>=0.1.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.21.0)\n", | |
| "Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in /usr/local/lib/python3.11/dist-packages (from weasel<0.5.0,>=0.1.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (7.1.0)\n", | |
| "Collecting csvw>=1.5.6 (from segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading csvw-3.5.1-py2.py3-none-any.whl.metadata (10 kB)\n", | |
| "Collecting isodate (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)\n", | |
| "Collecting rfc3986<2 (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)\n", | |
| "Requirement already satisfied: uritemplate>=3.0.0 in /usr/local/lib/python3.11/dist-packages (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (4.1.1)\n", | |
| "Requirement already satisfied: babel in /usr/local/lib/python3.11/dist-packages (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.17.0)\n", | |
| "Collecting language-tags (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading language_tags-1.2.0-py3-none-any.whl.metadata (2.1 kB)\n", | |
| "Collecting rdflib (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading rdflib-7.1.4-py3-none-any.whl.metadata (11 kB)\n", | |
| "Collecting colorama (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech)\n", | |
| " Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)\n", | |
| "Requirement already satisfied: jsonschema in /usr/local/lib/python3.11/dist-packages (from csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (4.23.0)\n", | |
| "Requirement already satisfied: marisa-trie>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.2.1)\n", | |
| "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (3.0.0)\n", | |
| "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2.18.0)\n", | |
| "Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (1.17.2)\n", | |
| "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.1.2)\n", | |
| "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (2024.10.1)\n", | |
| "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.36.2)\n", | |
| "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (0.24.0)\n", | |
| "Requirement already satisfied: pyparsing<4,>=2.1.0 in /usr/local/lib/python3.11/dist-packages (from rdflib->csvw>=1.5.6->segments->phonemizer-fork->misaki[en]>=0.9.4->kokoro==0.9.4->str2speech) (3.2.3)\n", | |
| "Downloading str2speech-0.4.3-py3-none-any.whl (145 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m145.2/145.2 kB\u001b[0m \u001b[31m16.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading einx-0.3.0-py3-none-any.whl (102 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.0/103.0 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading huggingface_hub-0.28.1-py3-none-any.whl (464 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m464.1/464.1 kB\u001b[0m \u001b[31m47.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading kokoro-0.9.4-py3-none-any.whl (32 kB)\n", | |
| "Downloading librosa-0.10.2.post1-py3-none-any.whl (260 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m260.1/260.1 kB\u001b[0m \u001b[31m27.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", | |
| "Downloading pyloudnorm-0.1.1-py3-none-any.whl (9.6 kB)\n", | |
| "Downloading transformers-4.50.3-py3-none-any.whl (10.2 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.2/10.2 MB\u001b[0m \u001b[31m91.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading x_transformers-2.1.37-py3-none-any.whl (80 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.6/80.6 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading sounddevice-0.5.0-py3-none-any.whl (32 kB)\n", | |
| "Downloading snac-1.2.1-py3-none-any.whl (8.4 kB)\n", | |
| "Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m128.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m98.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m66.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m98.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)\n", | |
| "Downloading torchao-0.10.0-cp39-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (5.5 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m114.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading torchtune-0.6.1-py3-none-any.whl (910 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m910.7/910.7 kB\u001b[0m \u001b[31m64.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading torchdata-0.11.0-py3-none-any.whl (61 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.0/62.0 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl (76.1 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.1/76.1 MB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading blobfile-3.0.0-py3-none-any.whl (75 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading misaki-0.9.4-py3-none-any.whl (3.6 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m43.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading sphn-0.1.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.7/12.7 MB\u001b[0m \u001b[31m122.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading datasets-3.5.0-py3-none-any.whl (491 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m491.2/491.2 kB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading fsspec-2024.12.0-py3-none-any.whl (183 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.9/183.9 kB\u001b[0m \u001b[31m21.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m79.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading pycryptodomex-3.22.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m103.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n", | |
| "Downloading espeakng_loader-0.2.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.1 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.1/10.1 MB\u001b[0m \u001b[31m31.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading num2words-0.5.14-py3-none-any.whl (163 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m163.5/163.5 kB\u001b[0m \u001b[31m16.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading phonemizer_fork-3.3.2-py3-none-any.whl (82 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.7/82.7 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading spacy_curated_transformers-0.3.0-py2.py3-none-any.whl (236 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.3/236.3 kB\u001b[0m \u001b[31m27.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m21.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading curated_tokenizers-0.0.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (735 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m735.6/735.6 kB\u001b[0m \u001b[31m58.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading curated_transformers-0.1.1-py2.py3-none-any.whl (25 kB)\n", | |
| "Downloading dlinfo-2.0.0-py3-none-any.whl (3.7 kB)\n", | |
| "Downloading segments-2.3.0-py2.py3-none-any.whl (15 kB)\n", | |
| "Downloading csvw-3.5.1-py2.py3-none-any.whl (59 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.9/59.9 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)\n", | |
| "Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n", | |
| "Downloading isodate-0.7.2-py3-none-any.whl (22 kB)\n", | |
| "Downloading language_tags-1.2.0-py3-none-any.whl (213 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m213.4/213.4 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading rdflib-7.1.4-py3-none-any.whl (565 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m565.1/565.1 kB\u001b[0m \u001b[31m47.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hBuilding wheels for collected packages: moshi, openai-whisper, argbind, antlr4-python3-runtime, docopt\n", | |
| " Building wheel for moshi (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", | |
| " Created wheel for moshi: filename=moshi-0.2.4-py3-none-any.whl size=92809 sha256=09f1182180d8b50bb3a961a7fddcf0a42e0a23553f721a95ffc46631dbc2e8b5\n", | |
| " Stored in directory: /root/.cache/pip/wheels/79/cc/1d/8e732c108b9cd47b99c85df24166ca9048f5b136682812dd21\n", | |
| " Building wheel for openai-whisper (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", | |
| " Created wheel for openai-whisper: filename=openai_whisper-20240930-py3-none-any.whl size=803406 sha256=0ada31cffc8f60682da3ca448c344798a61e5c2f6ab535a8dc6766beb75f6225\n", | |
| " Stored in directory: /root/.cache/pip/wheels/2f/f2/ce/6eb23db4091d026238ce76703bd66da60b969d70bcc81d5d3a\n", | |
| " Building wheel for argbind (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
| " Created wheel for argbind: filename=argbind-0.3.9-py2.py3-none-any.whl size=11730 sha256=abfd39825fe32697413aa7840d20bebed27102f315446867f682b1869a4e2c32\n", | |
| " Stored in directory: /root/.cache/pip/wheels/36/3a/34/e858fa3cf5f8c33a040734efcc17e95cb5cfd99c256a7fcecf\n", | |
| " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
| " Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=d5fc3e1d7e7b79e173e2480a4c4f10f42f0771d66ff3c18a3285f4346a54a88d\n", | |
| " Stored in directory: /root/.cache/pip/wheels/1a/97/32/461f837398029ad76911109f07047fde1d7b661a147c7c56d1\n", | |
| " Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
| " Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13706 sha256=e3b63adb9fc7cd8e181c8c056d62d5254062c1002f3acaca8b925f8d51ac2983\n", | |
| " Stored in directory: /root/.cache/pip/wheels/1a/b0/8c/4b75c4116c31f83c8f9f047231251e13cc74481cca4a78a9ce\n", | |
| "Successfully built moshi openai-whisper argbind antlr4-python3-runtime docopt\n", | |
| "Installing collected packages: torchao, rfc3986, pydub, language-tags, docopt, antlr4-python3-runtime, addict, xxhash, sphn, rdflib, pycryptodomex, omegaconf, nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, num2words, misaki, loguru, isodate, hf-transfer, fsspec, espeakng-loader, dlinfo, dill, curated-tokenizers, colorama, attrdict, argbind, tiktoken, sounddevice, pyloudnorm, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, huggingface_hub, einx, blobfile, nvidia-cusolver-cu12, librosa, transformers, datasets, csvw, x-transformers, torchdata, snac, segments, openai-whisper, curated-transformers, bitsandbytes, torchtune, spacy-curated-transformers, phonemizer-fork, moshi, kokoro, str2speech\n", | |
| " Attempting uninstall: nvidia-nvjitlink-cu12\n", | |
| " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", | |
| " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", | |
| " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", | |
| " Attempting uninstall: nvidia-curand-cu12\n", | |
| " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", | |
| " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", | |
| " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", | |
| " Attempting uninstall: nvidia-cufft-cu12\n", | |
| " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", | |
| " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", | |
| " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", | |
| " Attempting uninstall: nvidia-cuda-runtime-cu12\n", | |
| " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", | |
| " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", | |
| " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", | |
| " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", | |
| " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", | |
| " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", | |
| " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", | |
| " Attempting uninstall: nvidia-cuda-cupti-cu12\n", | |
| " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", | |
| " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", | |
| " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", | |
| " Attempting uninstall: nvidia-cublas-cu12\n", | |
| " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", | |
| " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", | |
| " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", | |
| " Attempting uninstall: fsspec\n", | |
| " Found existing installation: fsspec 2025.3.2\n", | |
| " Uninstalling fsspec-2025.3.2:\n", | |
| " Successfully uninstalled fsspec-2025.3.2\n", | |
| " Attempting uninstall: nvidia-cusparse-cu12\n", | |
| " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", | |
| " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", | |
| " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", | |
| " Attempting uninstall: nvidia-cudnn-cu12\n", | |
| " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", | |
| " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", | |
| " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", | |
| " Attempting uninstall: huggingface_hub\n", | |
| " Found existing installation: huggingface-hub 0.30.2\n", | |
| " Uninstalling huggingface-hub-0.30.2:\n", | |
| " Successfully uninstalled huggingface-hub-0.30.2\n", | |
| " Attempting uninstall: nvidia-cusolver-cu12\n", | |
| " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", | |
| " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", | |
| " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", | |
| " Attempting uninstall: librosa\n", | |
| " Found existing installation: librosa 0.11.0\n", | |
| " Uninstalling librosa-0.11.0:\n", | |
| " Successfully uninstalled librosa-0.11.0\n", | |
| " Attempting uninstall: transformers\n", | |
| " Found existing installation: transformers 4.51.3\n", | |
| " Uninstalling transformers-4.51.3:\n", | |
| " Successfully uninstalled transformers-4.51.3\n", | |
| "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", | |
| "gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2024.12.0 which is incompatible.\u001b[0m\u001b[31m\n", | |
| "\u001b[0mSuccessfully installed addict-2.4.0 antlr4-python3-runtime-4.9.3 argbind-0.3.9 attrdict-2.0.1 bitsandbytes-0.45.5 blobfile-3.0.0 colorama-0.4.6 csvw-3.5.1 curated-tokenizers-0.0.9 curated-transformers-0.1.1 datasets-3.5.0 dill-0.3.8 dlinfo-2.0.0 docopt-0.6.2 einx-0.3.0 espeakng-loader-0.2.4 fsspec-2024.12.0 hf-transfer-0.1.9 huggingface_hub-0.28.1 isodate-0.7.2 kokoro-0.9.4 language-tags-1.2.0 librosa-0.10.2.post1 loguru-0.7.3 misaki-0.9.4 moshi-0.2.4 multiprocess-0.70.16 num2words-0.5.14 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127 omegaconf-2.3.0 openai-whisper-20240930 phonemizer-fork-3.3.2 pycryptodomex-3.22.0 pydub-0.25.1 pyloudnorm-0.1.1 rdflib-7.1.4 rfc3986-1.5.0 segments-2.3.0 snac-1.2.1 sounddevice-0.5.0 spacy-curated-transformers-0.3.0 sphn-0.1.12 str2speech-0.4.3 tiktoken-0.9.0 torchao-0.10.0 torchdata-0.11.0 torchtune-0.6.1 transformers-4.50.3 x-transformers-2.1.37 xxhash-3.5.0\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "application/vnd.colab-display-data+json": { | |
| "pip_warning": { | |
| "packages": [ | |
| "pydevd_plugins" | |
| ] | |
| }, | |
| "id": "c2ff769005c345748aa9fceb4d58cda1" | |
| } | |
| }, | |
| "metadata": {} | |
| } | |
| ], | |
| "source": [ | |
| "!pip3 install str2speech" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "!str2speech -l" | |
| ], | |
| "metadata": { | |
| "id": "QLmroXmBDNGC", | |
| "outputId": "dfc28f76-1f55-4e89-9511-ad0a271823fc", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| } | |
| }, | |
| "execution_count": 2, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Now running str2speech 0.4.3\n", | |
| "Available TTS models:\n", | |
| "0. suno/bark-small\n", | |
| "1. suno/bark\n", | |
| "2. megatts3\n", | |
| "3. facebook/mms-tts-eng\n", | |
| "4. facebook/mms-tts-deu\n", | |
| "5. facebook/mms-tts-fra\n", | |
| "6. facebook/mms-tts-spa\n", | |
| "7. facebook/mms-tts-swe\n", | |
| "8. kokoro\n", | |
| "9. sesame/csm-1b\n", | |
| "10. zyphra/zonos-v0.1-transformer\n", | |
| "11. sparkaudio/spark-tts-0.5b\n", | |
| "12. microsoft/speecht5_tts\n", | |
| "13. nari-labs/dia-1.6b\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "dia_test = \"\"\"[S1] So, what's happening here?\n", | |
| "[S2] My cat just swallowed an egg. An entire boiled egg!\n", | |
| "[S1] Wow, must have been really hungry.\n", | |
| "[S2] Are you kidding me. He's always hungry, no matter how much I feed him.\n", | |
| "[S1] Do you feed him boiled eggs?\n", | |
| "[S2] No, just cat food.\n", | |
| "[S1] Well, there you go. Looks like it got bored of it.\n", | |
| "\"\"\"\n", | |
| "\n", | |
| "with open(\"dia_test.txt\", \"w\") as f:\n", | |
| " f.write(dia_test)" | |
| ], | |
| "metadata": { | |
| "id": "dzdaxJTvDQti" | |
| }, | |
| "execution_count": 6, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "!str2speech -f dia_test.txt --model \"nari-labs/dia-1.6b\" --output dia.wav" | |
| ], | |
| "metadata": { | |
| "id": "FjV2SXiHDTzd", | |
| "outputId": "24c911ba-e8a1-4594-f080-e0bb9a3c356f", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| } | |
| }, | |
| "execution_count": 7, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Now running str2speech 0.4.3\n", | |
| "Model provided: nari-labs/dia-1.6b\n", | |
| "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", | |
| "E0000 00:00:1745450413.922681 2817 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", | |
| "E0000 00:00:1745450413.931012 2817 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", | |
| "Audiotools found\n", | |
| "Codec found\n", | |
| "Using default voices.\n", | |
| "Audio saved.\n", | |
| "Generated speech in 81.64 seconds.\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from IPython.display import Audio, display\n", | |
| "display(Audio(\"dia.wav\"))" | |
| ], | |
| "metadata": { | |
| "id": "2MhgIrJADWjO", | |
| "outputId": "da3d37a4-cab0-460e-f886-0c296f220194", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 62 | |
| } | |
| }, | |
| "execution_count": 8, | |
| "outputs": [ | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<IPython.lib.display.Audio object>" | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <audio controls=\"controls\" >\n", |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment