Created
July 13, 2025 14:21
-
-
Save greg-randall/8b727a46d85aaab1fed24e962b21c406 to your computer and use it in GitHub Desktop.
This Python script converts a large text file into a series of audio files. You can run it from the command line by passing in your text file and choosing a voice with the --voice flag (e.g., --voice bm_lewis or the default --voice bf_emma).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| A script to convert a large text file into multiple speech audio files | |
| by splitting the text into chunks and processing them sequentially. | |
| """ | |
| import argparse | |
| import datetime | |
| import re | |
| import warnings | |
| from pathlib import Path | |
| from typing import List | |
| import nltk | |
| import soundfile as sf | |
| from kokoro import KPipeline | |
| from tqdm import tqdm | |
| # --- Suppress specific library warnings --- | |
| warnings.filterwarnings("ignore", category=UserWarning, module="torch.nn.modules.rnn") | |
| warnings.filterwarnings("ignore", category=FutureWarning, module="torch.nn.utils.weight_norm") | |
| # --- Constants --- | |
| DEFAULT_VOICE = 'bf_emma' | |
| DEFAULT_CHUNK_SIZE = 600 | |
| AUDIO_SAMPLE_RATE = 24000 | |
| # Download the sentence tokenizer model from NLTK. | |
| nltk.download('punkt', quiet=True) | |
| def generate_audio_files(pipeline: KPipeline, filename_prefix: Path, text: str, voice: str, speed: float = 1.0) -> None: | |
| """ | |
| Generates one or more TTS audio files from the given text using a pre-initialized Kokoro pipeline. | |
| """ | |
| generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'') | |
| for i, (gs, ps, audio) in enumerate(generator): | |
| output_filename = filename_prefix.with_name(f"{filename_prefix.name}_{i}.wav") | |
| sf.write(output_filename, audio, AUDIO_SAMPLE_RATE) | |
| def split_text(text: str, max_length: int) -> List[str]: | |
| """ | |
| Splits text into chunks smaller than `max_length`, respecting sentence boundaries. | |
| """ | |
| sentences = nltk.sent_tokenize(text) | |
| chunks = [] | |
| current_chunk = "" | |
| for sentence in sentences: | |
| sentence = sentence.strip() | |
| if not sentence: | |
| continue | |
| if len(sentence) > max_length: | |
| if current_chunk: | |
| chunks.append(current_chunk) | |
| current_chunk = "" | |
| words = sentence.split() | |
| sub_chunk = "" | |
| for word in words: | |
| if len(sub_chunk) + len(word) + 1 <= max_length: | |
| sub_chunk += f" {word}" if sub_chunk else word | |
| else: | |
| chunks.append(sub_chunk) | |
| sub_chunk = word | |
| if sub_chunk: | |
| chunks.append(sub_chunk) | |
| continue | |
| if len(current_chunk) + len(sentence) + 1 <= max_length: | |
| current_chunk += f" {sentence}" if current_chunk else sentence | |
| else: | |
| chunks.append(current_chunk) | |
| current_chunk = sentence | |
| if current_chunk: | |
| chunks.append(current_chunk) | |
| return chunks | |
| def create_output_folder(input_file: Path) -> Path: | |
| """Creates a uniquely named folder for the output audio files.""" | |
| base_filename = input_file.stem | |
| timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") | |
| dir_name = Path(f"{base_filename}_audio_{timestamp}") | |
| dir_name.mkdir(exist_ok=True) | |
| return dir_name | |
| def verify_chunks(original_text: str, chunks: List[str]) -> bool: | |
| """ | |
| Performs a simple verification to ensure no content was lost during chunking. | |
| """ | |
| pattern = re.compile(r'[\W_]+') | |
| original_processed = pattern.sub('', original_text).lower() | |
| chunks_processed = pattern.sub('', "".join(chunks)).lower() | |
| return original_processed == chunks_processed | |
| def main(): | |
| """Main function to orchestrate the text-to-speech processing.""" | |
| parser = argparse.ArgumentParser( | |
| description="Reads a text file, splits it into chunks, and converts each chunk to speech.", | |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter | |
| ) | |
| parser.add_argument("input_file", type=Path, help="The text file to be processed.") | |
| parser.add_argument("--voice", type=str, default=DEFAULT_VOICE, help="The voice to use for text-to-speech.") | |
| parser.add_argument("--chunk-size", type=int, default=DEFAULT_CHUNK_SIZE, help="The maximum character length of each text chunk.") | |
| args = parser.parse_args() | |
| if not args.input_file.is_file(): | |
| print(f"Error: Input file not found at {args.input_file}") | |
| return | |
| # --- Step 1: Read and Split Text --- | |
| print(f"Reading '{args.input_file.name}' and splitting into chunks of ~{args.chunk_size} characters...") | |
| try: | |
| original_text = args.input_file.read_text(encoding='utf-8') | |
| chunks = split_text(original_text, args.chunk_size) | |
| print(f"Successfully split text into {len(chunks)} chunks.") | |
| except Exception as e: | |
| print(f"Error reading or splitting file: {e}") | |
| return | |
| # --- Step 2: Verify Integrity --- | |
| print("\nVerifying that no content was lost during chunking...") | |
| if verify_chunks(original_text, chunks): | |
| print("Verification successful: Chunked text matches original.") | |
| else: | |
| print("Error: Verification failed. The chunked text does not match the original. Aborting.") | |
| return | |
| # --- Step 3: Create Output Directory --- | |
| output_folder = create_output_folder(args.input_file) | |
| print(f"\nCreated output folder: {output_folder}") | |
| # --- Step 4: Process Chunks Sequentially --- | |
| print(f"\nProcessing {len(chunks)} chunks...") | |
| # Initialize the TTS pipeline once, before the loop starts. | |
| pipeline = KPipeline(lang_code='a') | |
| # Process each chunk one by one with a progress bar. | |
| for i, chunk in enumerate(tqdm(chunks, desc="Generating Audio"), start=1): | |
| try: | |
| speech_file_path = output_folder / f'{i:06d}' | |
| generate_audio_files(pipeline, speech_file_path, chunk, voice=args.voice) | |
| except Exception as e: | |
| tqdm.write(f"Error processing chunk {i}: {e}") | |
| print("\nProcessing complete.") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment