erogol · December 19, 2018 11:50 · vitaly-zdanevich · Apr 25, 2019
diff --git a/tts_example.ipynb b/tts_example.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "TTS_example.ipynb",
      "version": "0.3.2",
      "provenance": [],
      "collapsed_sections": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/erogol/8f39174c3f0475221c8978aeb10d4fdc/tts_example.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "metadata": {
        "id": "6uMCom74Ft81",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive/')b"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "9wqjz3lIGXZd",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "# get TTS to your local\n",
        "!git clone https://github.com/mozilla/TTS"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "TzjnO4pjGePs",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "# download LJSpeech dataset\n",
        "!wget http://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2d\n",
        "# decompress \n",
        "!tar -xvjf LJSpeech-1.1.tar.bz2"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "__k0BrbfLQ-F",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "# create train-val splits\n",
        "!shuf LJSpeech-1.1/metadata.csv > LJSpeech-1.1/metadata_shuf.csv\n",
        "!head -n 12000 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_train.csv\n",
        "!tail -n 11000 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_val.csv"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "G1OnsNyJJtem",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "# install TTS requirements\n",
        "!cd TTS\n",
        "!pip install -r requirements.txt"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "J1XOWu_oKfdv",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "# check the default TTS config.json. It is necessary for all your training \n",
        "# settings\n",
        "!cat config.json"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "y7_Xao7uNOvX",
        "colab_type": "code",
        "outputId": "6400488b-c1f4-45d1-dc40-a9a72543b1f8",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 36
        }
      },
      "cell_type": "code",
      "source": [
        "\n",
        "# set data fields for LJSpeech\n",
        "%%writefile config.json\n",
        "\n",
        "{\n",
        "    \"model_name\": \"TTS-master\",\n",
        "    \"model_description\": \"Higher dropout rate for stopnet and disabled custom initialization, pull current mel prediction to stopnet.\",\n",
        "\n",
        "    \"audio\":{\n",
        "        \"audio_processor\": \"audio\",     // to use dictate different audio processors, if available.\n",
        "        // Audio processing parameters\n",
        "        \"num_mels\": 80,         // size of the mel spec frame. \n",
        "        \"num_freq\": 1025,       // number of stft frequency levels. Size of the linear spectogram frame.\n",
        "        \"sample_rate\": 22050,   // wav sample-rate. If different than the original data, it is resampled.\n",
        "        \"frame_length_ms\": 50,  // stft window length in ms.\n",
        "        \"frame_shift_ms\": 12.5, // stft window hop-lengh in ms.\n",
        "        \"preemphasis\": 0.97,    // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis.\n",
        "        \"min_level_db\": -100,   // normalization range\n",
        "        \"ref_level_db\": 20,     // reference level db, theoretically 20db is the sound of air.\n",
        "        \"power\": 1.5,           // value to sharpen wav signals after GL algorithm.\n",
        "        \"griffin_lim_iters\": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation.\n",
        "        // Normalization parameters\n",
        "        \"signal_norm\": true,    // normalize the spec values in range [0, 1]\n",
        "        \"symmetric_norm\": false, // move normalization to range [-1, 1]\n",
        "        \"max_norm\": 1,          // scale normalization to range [-max_norm, max_norm] or [0, max_norm]\n",
        "        \"clip_norm\": true,      // clip normalized values into the range.\n",
        "        \"mel_fmin\": null,         // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!!\n",
        "        \"mel_fmax\": null,        // maximum freq level for mel-spec. Tune for dataset!!\n",
        "        \"do_trim_silence\": true  // enable trimming of slience of audio as you load it.\n",
        "    },\n",
        "\n",
        "    \"embedding_size\": 256,    \n",
        "    \"text_cleaner\": \"english_cleaners\",\n",
        "    \"epochs\": 1000,\n",
        "    \n",
        "    \"lr\": 0.0001,\n",
        "    \"lr_decay\": false,\n",
        "    \"warmup_steps\": 4000,\n",
        "\n",
        "    \"batch_size\": 32,\n",
        "    \"eval_batch_size\":32,\n",
        "    \"r\": 5,\n",
        "    \"wd\": 0.000001,\n",
        "    \"checkpoint\": true,\n",
        "    \"save_step\": 5000,\n",
        "    \"print_step\": 10,\n",
        "\n",
        "    \"run_eval\": true,\n",
        "    \"data_path\": \"../../Data/LJSpeech-1.1/\",  // can overwritten from command argument\n",
        "    \"meta_file_train\": \"metadata_train.csv\",      // metafile for training dataloader\n",
        "    \"meta_file_val\": \"metadata_val.csv\",    // metafile for validation dataloader\n",
        "    \"data_loader\": \"TTSDataset\",      // dataloader, [\"TTSDataset\", \"TTSDatasetCached\", \"TTSDatasetMemory\"]\n",
        "    \"dataset\": \"ljspeech\",     // one of TTS.dataset.preprocessors, only valid id dataloader == \"TTSDataset\", rest uses \"tts_cache\" by default.\n",
        "    \"min_seq_len\": 0,\n",
        "    \"output_path\": \"../keep/\",\n",
        "    \"num_loader_workers\": 2,\n",
        "    \"num_val_loader_workers\": 2\n",
        "}"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Overwriting config.json\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "1Z7PR2pBLHxq",
        "colab_type": "code",
        "outputId": "e285d179-469c-445a-948f-dc706e51e1f4",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1123
        }
      },
      "cell_type": "code",
      "source": [
        "# pull the trigger\n",
        "!python train.py --config_path config.json --data_path ../LJSpeech-1.1/ | tee training.log"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            " > Using CUDA:  True\n",
            " > Number of GPUs:  1\n",
            " > Git Hash: 20ee7c0\n",
            " > Experiment folder: /content/TTS/../keep/December-19-2018_11+32AM-TTS-master-20ee7c0\n",
            " > Setting up Audio Processor...\n",
            " | > fft size: 2048, hop length: 275, win length: 1102\n",
            " | > Audio Processor attributes.\n",
            "   | > bits:None\n",
            "   | > sample_rate:22050\n",
            "   | > num_mels:80\n",
            "   | > min_level_db:-100\n",
            "   | > frame_shift_ms:12.5\n",
            "   | > frame_length_ms:50\n",
            "   | > ref_level_db:20\n",
            "   | > num_freq:1025\n",
            "   | > power:1.5\n",
            "   | > preemphasis:0.97\n",
            "   | > griffin_lim_iters:60\n",
            "   | > signal_norm:True\n",
            "   | > symmetric_norm:False\n",
            "   | > mel_fmin:0\n",
            "   | > mel_fmax:None\n",
            "   | > max_norm:1.0\n",
            "   | > clip_norm:True\n",
            "   | > do_trim_silence:True\n",
            "   | > n_fft:2048\n",
            "   | > hop_length:275\n",
            "   | > win_length:1102\n",
            " | > Number of characters : 149\n",
            " | > Num output units : 1025\n",
            "\n",
            " > Starting a new training\n",
            " | > Model has 7083650 parameters\n",
            " > Reading LJSpeech from - ../LJSpeech-1.1/\n",
            " | > Number of instances : 12000\n",
            " | > Max length sequence 187\n",
            " | > Min length sequence 5\n",
            " | > Avg length sequence 98.28775\n",
            " | > 0 instances are ignored by min_seq_len (0)\n",
            " | > Batch group shuffling is active.\n",
            " | > Epoch 0/1000\n",
            "   | > Step:9/375  GlobalStep:10  TotalLoss:0.33402  LinearLoss:0.14762  MelLoss:0.18641  StopLoss:0.73977  GradNorm:0.04653  GradNormST:1.01440  AvgTextLen:36.2  AvgSpecLen:172.2  StepTime:2.21  LR:0.000100\n",
            "   | > Step:19/375  GlobalStep:20  TotalLoss:0.29765  LinearLoss:0.14310  MelLoss:0.15455  StopLoss:0.66492  GradNorm:0.06941  GradNormST:0.48356  AvgTextLen:44.8  AvgSpecLen:222.0  StepTime:2.41  LR:0.000100\n",
            "   | > Step:29/375  GlobalStep:30  TotalLoss:0.28474  LinearLoss:0.14459  MelLoss:0.14015  StopLoss:0.72310  GradNorm:0.06081  GradNormST:0.92077  AvgTextLen:48.8  AvgSpecLen:246.3  StepTime:3.07  LR:0.000100\n",
            "   | > Step:39/375  GlobalStep:40  TotalLoss:0.27690  LinearLoss:0.14139  MelLoss:0.13551  StopLoss:0.73683  GradNorm:0.05845  GradNormST:2.58913  AvgTextLen:51.6  AvgSpecLen:254.2  StepTime:3.22  LR:0.000100\n",
            "   | > Step:49/375  GlobalStep:50  TotalLoss:0.26876  LinearLoss:0.13891  MelLoss:0.12986  StopLoss:0.72490  GradNorm:0.03796  GradNormST:3.11640  AvgTextLen:60.6  AvgSpecLen:313.0  StepTime:3.56  LR:0.000100\n",
            "   | > Step:59/375  GlobalStep:60  TotalLoss:0.25832  LinearLoss:0.13163  MelLoss:0.12669  StopLoss:0.74020  GradNorm:0.05046  GradNormST:1.47773  AvgTextLen:65.6  AvgSpecLen:326.4  StepTime:3.71  LR:0.000100\n",
            "   | > Step:69/375  GlobalStep:70  TotalLoss:0.25436  LinearLoss:0.12792  MelLoss:0.12644  StopLoss:0.67741  GradNorm:0.06263  GradNormST:2.02652  AvgTextLen:66.6  AvgSpecLen:321.7  StepTime:3.73  LR:0.000100\n",
            "   | > Step:79/375  GlobalStep:80  TotalLoss:0.25033  LinearLoss:0.12247  MelLoss:0.12785  StopLoss:0.77042  GradNorm:0.05292  GradNormST:0.97249  AvgTextLen:72.3  AvgSpecLen:363.9  StepTime:4.67  LR:0.000100\n",
            "   | > Step:89/375  GlobalStep:90  TotalLoss:0.24469  LinearLoss:0.11755  MelLoss:0.12714  StopLoss:0.67230  GradNorm:0.08123  GradNormST:0.90303  AvgTextLen:79.5  AvgSpecLen:389.7  StepTime:4.24  LR:0.000100\n",
            "   | > Step:99/375  GlobalStep:100  TotalLoss:0.24387  LinearLoss:0.11588  MelLoss:0.12799  StopLoss:0.56327  GradNorm:0.05469  GradNormST:2.77339  AvgTextLen:78.1  AvgSpecLen:393.7  StepTime:3.59  LR:0.000100\n",
            "   | > Step:109/375  GlobalStep:110  TotalLoss:0.23841  LinearLoss:0.11188  MelLoss:0.12652  StopLoss:0.62725  GradNorm:0.11853  GradNormST:1.29791  AvgTextLen:81.9  AvgSpecLen:400.1  StepTime:4.18  LR:0.000100\n",
            "   | > Step:119/375  GlobalStep:120  TotalLoss:0.23700  LinearLoss:0.10967  MelLoss:0.12733  StopLoss:0.60695  GradNorm:0.02969  GradNormST:1.44895  AvgTextLen:82.7  AvgSpecLen:423.1  StepTime:4.22  LR:0.000100\n",
            "   | > Step:129/375  GlobalStep:130  TotalLoss:0.23633  LinearLoss:0.10984  MelLoss:0.12650  StopLoss:0.73151  GradNorm:0.10447  GradNormST:1.45647  AvgTextLen:91.1  AvgSpecLen:462.0  StepTime:5.03  LR:0.000100\n",
            "   | > Step:139/375  GlobalStep:140  TotalLoss:0.23326  LinearLoss:0.10804  MelLoss:0.12522  StopLoss:0.56982  GradNorm:0.04163  GradNormST:1.32453  AvgTextLen:95.1  AvgSpecLen:470.1  StepTime:4.71  LR:0.000100\n",
            "   | > Step:149/375  GlobalStep:150  TotalLoss:0.23332  LinearLoss:0.10898  MelLoss:0.12434  StopLoss:0.64671  GradNorm:0.05904  GradNormST:0.94457  AvgTextLen:93.3  AvgSpecLen:470.4  StepTime:4.87  LR:0.000100\n",
            "   | > Step:159/375  GlobalStep:160  TotalLoss:0.23035  LinearLoss:0.10733  MelLoss:0.12302  StopLoss:0.70120  GradNorm:0.03810  GradNormST:1.59297  AvgTextLen:97.2  AvgSpecLen:487.0  StepTime:5.40  LR:0.000100\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "TTS_example.ipynb",
	"version": "0.3.2",
	"provenance": [],
	"collapsed_sections": [],
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"accelerator": "GPU"
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/erogol/8f39174c3f0475221c8978aeb10d4fdc/tts_example.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"metadata": {
	"id": "6uMCom74Ft81",
	"colab_type": "code",
	"colab": {}
	},
	"cell_type": "code",
	"source": [
	"from google.colab import drive\n",
	"drive.mount('/content/drive/')b"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"metadata": {
	"id": "9wqjz3lIGXZd",
	"colab_type": "code",
	"colab": {}
	},
	"cell_type": "code",
	"source": [
	"# get TTS to your local\n",
	"!git clone https://github.com/mozilla/TTS"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"metadata": {
	"id": "TzjnO4pjGePs",
	"colab_type": "code",
	"colab": {}
	},
	"cell_type": "code",
	"source": [
	"# download LJSpeech dataset\n",
	"!wget http://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2d\n",
	"# decompress \n",
	"!tar -xvjf LJSpeech-1.1.tar.bz2"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"metadata": {
	"id": "__k0BrbfLQ-F",
	"colab_type": "code",
	"colab": {}
	},
	"cell_type": "code",
	"source": [
	"# create train-val splits\n",
	"!shuf LJSpeech-1.1/metadata.csv > LJSpeech-1.1/metadata_shuf.csv\n",
	"!head -n 12000 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_train.csv\n",
	"!tail -n 11000 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_val.csv"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"metadata": {
	"id": "G1OnsNyJJtem",
	"colab_type": "code",
	"colab": {}
	},
	"cell_type": "code",
	"source": [
	"# install TTS requirements\n",
	"!cd TTS\n",
	"!pip install -r requirements.txt"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"metadata": {
	"id": "J1XOWu_oKfdv",
	"colab_type": "code",
	"colab": {}
	},
	"cell_type": "code",
	"source": [
	"# check the default TTS config.json. It is necessary for all your training \n",
	"# settings\n",
	"!cat config.json"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"metadata": {
	"id": "y7_Xao7uNOvX",
	"colab_type": "code",
	"outputId": "6400488b-c1f4-45d1-dc40-a9a72543b1f8",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 36
	}
	},
	"cell_type": "code",
	"source": [
	"\n",
	"# set data fields for LJSpeech\n",
	"%%writefile config.json\n",
	"\n",
	"{\n",
	" \"model_name\": \"TTS-master\",\n",
	" \"model_description\": \"Higher dropout rate for stopnet and disabled custom initialization, pull current mel prediction to stopnet.\",\n",
	"\n",
	" \"audio\":{\n",
	" \"audio_processor\": \"audio\", // to use dictate different audio processors, if available.\n",
	" // Audio processing parameters\n",
	" \"num_mels\": 80, // size of the mel spec frame. \n",
	" \"num_freq\": 1025, // number of stft frequency levels. Size of the linear spectogram frame.\n",
	" \"sample_rate\": 22050, // wav sample-rate. If different than the original data, it is resampled.\n",
	" \"frame_length_ms\": 50, // stft window length in ms.\n",
	" \"frame_shift_ms\": 12.5, // stft window hop-lengh in ms.\n",
	" \"preemphasis\": 0.97, // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis.\n",
	" \"min_level_db\": -100, // normalization range\n",
	" \"ref_level_db\": 20, // reference level db, theoretically 20db is the sound of air.\n",
	" \"power\": 1.5, // value to sharpen wav signals after GL algorithm.\n",
	" \"griffin_lim_iters\": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation.\n",
	" // Normalization parameters\n",
	" \"signal_norm\": true, // normalize the spec values in range [0, 1]\n",
	" \"symmetric_norm\": false, // move normalization to range [-1, 1]\n",
	" \"max_norm\": 1, // scale normalization to range [-max_norm, max_norm] or [0, max_norm]\n",
	" \"clip_norm\": true, // clip normalized values into the range.\n",
	" \"mel_fmin\": null, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!!\n",
	" \"mel_fmax\": null, // maximum freq level for mel-spec. Tune for dataset!!\n",
	" \"do_trim_silence\": true // enable trimming of slience of audio as you load it.\n",
	" },\n",
	"\n",
	" \"embedding_size\": 256, \n",
	" \"text_cleaner\": \"english_cleaners\",\n",
	" \"epochs\": 1000,\n",
	" \n",
	" \"lr\": 0.0001,\n",
	" \"lr_decay\": false,\n",
	" \"warmup_steps\": 4000,\n",
	"\n",
	" \"batch_size\": 32,\n",
	" \"eval_batch_size\":32,\n",
	" \"r\": 5,\n",
	" \"wd\": 0.000001,\n",
	" \"checkpoint\": true,\n",
	" \"save_step\": 5000,\n",
	" \"print_step\": 10,\n",
	"\n",
	" \"run_eval\": true,\n",
	" \"data_path\": \"../../Data/LJSpeech-1.1/\", // can overwritten from command argument\n",
	" \"meta_file_train\": \"metadata_train.csv\", // metafile for training dataloader\n",
	" \"meta_file_val\": \"metadata_val.csv\", // metafile for validation dataloader\n",
	" \"data_loader\": \"TTSDataset\", // dataloader, [\"TTSDataset\", \"TTSDatasetCached\", \"TTSDatasetMemory\"]\n",
	" \"dataset\": \"ljspeech\", // one of TTS.dataset.preprocessors, only valid id dataloader == \"TTSDataset\", rest uses \"tts_cache\" by default.\n",
	" \"min_seq_len\": 0,\n",
	" \"output_path\": \"../keep/\",\n",
	" \"num_loader_workers\": 2,\n",
	" \"num_val_loader_workers\": 2\n",
	"}"
	],
	"execution_count": 0,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Overwriting config.json\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"id": "1Z7PR2pBLHxq",
	"colab_type": "code",
	"outputId": "e285d179-469c-445a-948f-dc706e51e1f4",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 1123
	}
	},
	"cell_type": "code",
	"source": [
	"# pull the trigger\n",
	"!python train.py --config_path config.json --data_path ../LJSpeech-1.1/ \| tee training.log"
	],
	"execution_count": 0,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	" > Using CUDA: True\n",
	" > Number of GPUs: 1\n",
	" > Git Hash: 20ee7c0\n",
	" > Experiment folder: /content/TTS/../keep/December-19-2018_11+32AM-TTS-master-20ee7c0\n",
	" > Setting up Audio Processor...\n",
	" \| > fft size: 2048, hop length: 275, win length: 1102\n",
	" \| > Audio Processor attributes.\n",
	" \| > bits:None\n",
	" \| > sample_rate:22050\n",
	" \| > num_mels:80\n",
	" \| > min_level_db:-100\n",
	" \| > frame_shift_ms:12.5\n",
	" \| > frame_length_ms:50\n",
	" \| > ref_level_db:20\n",
	" \| > num_freq:1025\n",
	" \| > power:1.5\n",
	" \| > preemphasis:0.97\n",
	" \| > griffin_lim_iters:60\n",
	" \| > signal_norm:True\n",
	" \| > symmetric_norm:False\n",
	" \| > mel_fmin:0\n",
	" \| > mel_fmax:None\n",
	" \| > max_norm:1.0\n",
	" \| > clip_norm:True\n",
	" \| > do_trim_silence:True\n",
	" \| > n_fft:2048\n",
	" \| > hop_length:275\n",
	" \| > win_length:1102\n",
	" \| > Number of characters : 149\n",
	" \| > Num output units : 1025\n",
	"\n",
	" > Starting a new training\n",
	" \| > Model has 7083650 parameters\n",
	" > Reading LJSpeech from - ../LJSpeech-1.1/\n",
	" \| > Number of instances : 12000\n",
	" \| > Max length sequence 187\n",
	" \| > Min length sequence 5\n",
	" \| > Avg length sequence 98.28775\n",
	" \| > 0 instances are ignored by min_seq_len (0)\n",
	" \| > Batch group shuffling is active.\n",
	" \| > Epoch 0/1000\n",
	" \| > Step:9/375 GlobalStep:10 TotalLoss:0.33402 LinearLoss:0.14762 MelLoss:0.18641 StopLoss:0.73977 GradNorm:0.04653 GradNormST:1.01440 AvgTextLen:36.2 AvgSpecLen:172.2 StepTime:2.21 LR:0.000100\n",
	" \| > Step:19/375 GlobalStep:20 TotalLoss:0.29765 LinearLoss:0.14310 MelLoss:0.15455 StopLoss:0.66492 GradNorm:0.06941 GradNormST:0.48356 AvgTextLen:44.8 AvgSpecLen:222.0 StepTime:2.41 LR:0.000100\n",
	" \| > Step:29/375 GlobalStep:30 TotalLoss:0.28474 LinearLoss:0.14459 MelLoss:0.14015 StopLoss:0.72310 GradNorm:0.06081 GradNormST:0.92077 AvgTextLen:48.8 AvgSpecLen:246.3 StepTime:3.07 LR:0.000100\n",
	" \| > Step:39/375 GlobalStep:40 TotalLoss:0.27690 LinearLoss:0.14139 MelLoss:0.13551 StopLoss:0.73683 GradNorm:0.05845 GradNormST:2.58913 AvgTextLen:51.6 AvgSpecLen:254.2 StepTime:3.22 LR:0.000100\n",
	" \| > Step:49/375 GlobalStep:50 TotalLoss:0.26876 LinearLoss:0.13891 MelLoss:0.12986 StopLoss:0.72490 GradNorm:0.03796 GradNormST:3.11640 AvgTextLen:60.6 AvgSpecLen:313.0 StepTime:3.56 LR:0.000100\n",
	" \| > Step:59/375 GlobalStep:60 TotalLoss:0.25832 LinearLoss:0.13163 MelLoss:0.12669 StopLoss:0.74020 GradNorm:0.05046 GradNormST:1.47773 AvgTextLen:65.6 AvgSpecLen:326.4 StepTime:3.71 LR:0.000100\n",
	" \| > Step:69/375 GlobalStep:70 TotalLoss:0.25436 LinearLoss:0.12792 MelLoss:0.12644 StopLoss:0.67741 GradNorm:0.06263 GradNormST:2.02652 AvgTextLen:66.6 AvgSpecLen:321.7 StepTime:3.73 LR:0.000100\n",
	" \| > Step:79/375 GlobalStep:80 TotalLoss:0.25033 LinearLoss:0.12247 MelLoss:0.12785 StopLoss:0.77042 GradNorm:0.05292 GradNormST:0.97249 AvgTextLen:72.3 AvgSpecLen:363.9 StepTime:4.67 LR:0.000100\n",
	" \| > Step:89/375 GlobalStep:90 TotalLoss:0.24469 LinearLoss:0.11755 MelLoss:0.12714 StopLoss:0.67230 GradNorm:0.08123 GradNormST:0.90303 AvgTextLen:79.5 AvgSpecLen:389.7 StepTime:4.24 LR:0.000100\n",
	" \| > Step:99/375 GlobalStep:100 TotalLoss:0.24387 LinearLoss:0.11588 MelLoss:0.12799 StopLoss:0.56327 GradNorm:0.05469 GradNormST:2.77339 AvgTextLen:78.1 AvgSpecLen:393.7 StepTime:3.59 LR:0.000100\n",
	" \| > Step:109/375 GlobalStep:110 TotalLoss:0.23841 LinearLoss:0.11188 MelLoss:0.12652 StopLoss:0.62725 GradNorm:0.11853 GradNormST:1.29791 AvgTextLen:81.9 AvgSpecLen:400.1 StepTime:4.18 LR:0.000100\n",
	" \| > Step:119/375 GlobalStep:120 TotalLoss:0.23700 LinearLoss:0.10967 MelLoss:0.12733 StopLoss:0.60695 GradNorm:0.02969 GradNormST:1.44895 AvgTextLen:82.7 AvgSpecLen:423.1 StepTime:4.22 LR:0.000100\n",
	" \| > Step:129/375 GlobalStep:130 TotalLoss:0.23633 LinearLoss:0.10984 MelLoss:0.12650 StopLoss:0.73151 GradNorm:0.10447 GradNormST:1.45647 AvgTextLen:91.1 AvgSpecLen:462.0 StepTime:5.03 LR:0.000100\n",
	" \| > Step:139/375 GlobalStep:140 TotalLoss:0.23326 LinearLoss:0.10804 MelLoss:0.12522 StopLoss:0.56982 GradNorm:0.04163 GradNormST:1.32453 AvgTextLen:95.1 AvgSpecLen:470.1 StepTime:4.71 LR:0.000100\n",
	" \| > Step:149/375 GlobalStep:150 TotalLoss:0.23332 LinearLoss:0.10898 MelLoss:0.12434 StopLoss:0.64671 GradNorm:0.05904 GradNormST:0.94457 AvgTextLen:93.3 AvgSpecLen:470.4 StepTime:4.87 LR:0.000100\n",
	" \| > Step:159/375 GlobalStep:160 TotalLoss:0.23035 LinearLoss:0.10733 MelLoss:0.12302 StopLoss:0.70120 GradNorm:0.03810 GradNormST:1.59297 AvgTextLen:97.2 AvgSpecLen:487.0 StepTime:5.40 LR:0.000100\n"
	],
	"name": "stdout"
	}
	]
	}
	]
	}