-
-
Save netpi/1fc8c5bceb25a4bbac08dbf0756a1734 to your computer and use it in GitHub Desktop.
MFA LJSpeech.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "MFA LJSpeech.ipynb", | |
"private_outputs": true, | |
"provenance": [], | |
"collapsed_sections": [], | |
"machine_shape": "hm", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/NTT123/12264d15afad861cb897f7a20a01762e/mfa-ljspeech.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "bhYTF6XCOYAh" | |
}, | |
"source": [ | |
"### Forced align LJSpeech dataset using Montreal Forced Aligner (MFA)\n", | |
"\n", | |
"\n", | |
"**Note**: The notebook takes 20 minutes to finish.\n", | |
"\n", | |
"Expected results:\n", | |
"\n", | |
"<img src=\"https://i.imgur.com/5uehkba.png\"></img>\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "_twSwprjG_M9" | |
}, | |
"source": [ | |
"%%writefile install_mfa.sh\n", | |
"#!/bin/bash\n", | |
"\n", | |
"## a script to install Montreal Forced Aligner (MFA)\n", | |
"\n", | |
"root_dir=${1:-/tmp/mfa}\n", | |
"mkdir -p $root_dir\n", | |
"cd $root_dir\n", | |
"\n", | |
"# download miniconda3\n", | |
"wget -q --show-progress https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh\n", | |
"bash Miniconda3-latest-Linux-x86_64.sh -b -p $root_dir/miniconda3 -f\n", | |
"\n", | |
"#install MFA\n", | |
"$root_dir/miniconda3/bin/conda create -n aligner -c conda-forge montreal-forced-aligner -y\n", | |
"\n", | |
"echo -e \"\\n======== DONE ==========\"\n", | |
"echo -e \"\\nTo activate MFA, run: source $root_dir/miniconda3/bin/activate aligner\"\n", | |
"echo -e \"\\nTo delete MFA, run: rm -rf $root_dir\"\n", | |
"echo -e \"\\nSee: https://montreal-forced-aligner.readthedocs.io/en/latest/aligning.html to know how to use MFA\"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "molbePbO8mlv" | |
}, | |
"source": [ | |
"# download and install mfa\n", | |
"INSTALL_DIR=\"/tmp/mfa\" # path to install directory\n", | |
"\n", | |
"!bash ./install_mfa.sh {INSTALL_DIR}\n", | |
"!source {INSTALL_DIR}/miniconda3/bin/activate aligner; mfa align --help" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ppEcCzZ2MZSp" | |
}, | |
"source": [ | |
"# download and unpack ljs dataset\n", | |
"!echo \"download and unpack ljs dataset\"\n", | |
"!mkdir -p ./ljs; cd ./ljs; wget -q --show-progress https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2\n", | |
"!cd ./ljs; tar xjf LJSpeech-1.1.tar.bz2" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "j6XLgf6aMbgo" | |
}, | |
"source": [ | |
"# install sox tool\n", | |
"!sudo apt install -q -y sox\n", | |
"# convert to 16k audio clips\n", | |
"!mkdir ./wav\n", | |
"!echo \"normalize audio clips to sample rate of 16k\"\n", | |
"!find ./ljs -name \"*.wav\" -type f -execdir sox --norm=-3 {} -r 16k -c 1 `pwd`/wav/{} \\;\n", | |
"!echo \"Number of clips\" $(ls ./wav/ | wc -l)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ZbNfEn5pMdOf" | |
}, | |
"source": [ | |
"# create transcript files from metadata.csv\n", | |
"lines = open('./ljs/LJSpeech-1.1/metadata.csv', 'r').readlines()\n", | |
"from tqdm.auto import tqdm\n", | |
"for line in tqdm(lines):\n", | |
" fn, _, transcript = line.strip().split('|')\n", | |
" ident = fn\n", | |
" open(f'./wav/{ident}.txt', 'w').write(transcript)\n", | |
"\n", | |
"# this is an example transcript for LJ001-0001.wav\n", | |
"!cat ./wav/LJ001-0001.txt" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "obtWj9_wMghH" | |
}, | |
"source": [ | |
"# download a pretrained english acoustic model, and english lexicon\n", | |
"!wget -q --show-progress https://github.com/MontrealCorpusTools/mfa-models/raw/main/acoustic/english.zip\n", | |
"!wget -q --show-progress http://www.openslr.org/resources/11/librispeech-lexicon.txt" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# see: https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/pull/480\n", | |
"import re\n", | |
"lexicon = open(\"librispeech-lexicon.txt\").readlines()\n", | |
"sp = re.compile(\"\\s+\")\n", | |
"with open(\"modified_librispeech-lexicon.txt\", \"w\") as f:\n", | |
" for line in lexicon:\n", | |
" word, *phonemes = sp.split(line.strip())\n", | |
" phonemes = \" \".join(phonemes)\n", | |
" f.write(f\"{word}\\t{phonemes}\\n\")" | |
], | |
"metadata": { | |
"id": "zf_ssMA8cbHw" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "HcZE4uxyMhXg" | |
}, | |
"source": [ | |
"# FINALLY, align phonemes and speech\n", | |
"!source {INSTALL_DIR}/miniconda3/bin/activate aligner; \\\n", | |
"mfa align -t ./temp -j 4 ./wav modified_librispeech-lexicon.txt ./english.zip ./ljs_aligned\n", | |
"# output files are at ./ljs_aligned\n", | |
"!echo \"See output files at ./ljs_aligned\"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [], | |
"metadata": { | |
"id": "0bWYNXRlLZ84" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment