Last active
July 13, 2025 22:14
-
-
Save manics/73b37a9f4a0b60aee07187520f6afce0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "13e39ceb", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import boto3\n", | |
"import wave\n", | |
"\n", | |
"# boto3.setup_default_session(profile_name=\"...\")\n", | |
"polly = boto3.client(\"polly\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "a670d769", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lines = [\n", | |
" \"CATER is designed using infrastructure-as-code, and is deployed using a git-ops workflow. We\"re using ArgoCD which is an open-source git-ops tools designed for Kubernetes. This means all infrastructure is fully reproducible, and all approved code updates are automatically deployed by ArgoCD.\",\n", | |
" \"CATER consists of a set of applications (or components). By default we install everything required to run a T.R.E., but all components can be disabled or replaced by another implementation.\",\n", | |
" \"For the first three months work has focussed on this backend work of writing and deploying components, but we\"ve now started integrating those components so they can be used through a frontend.\",\n", | |
"\n", | |
" \"You login to CATER using Keycloak. A username and password are used in this demo, but multi-factor authentication can be easily added, and Keycloak can federate with other identity providers using SAML or O.I.C.D.\",\n", | |
" \"In this demo we\"re using JupyterHub as a control plane for researcher workspaces.\",\n", | |
" \"You can see a list of projects and workspace types. This demo only has Ubuntu Mate desktops.\",\n", | |
" \"When you launch a workspace a new Kubernetes pod is created, project storage is mounted, and users are given access via Apache Guacamole which is an open-source remote desktop gateway.\",\n", | |
" \"As you can see you have a full desktop via a web browser\",\n", | |
"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "bf70124e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"voices = polly.describe_voices()[\"Voices\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "47c5c74e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# voice = \"Brian\"\n", | |
"voice = \"Amy\"\n", | |
"# voice = \"Emma\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "6d166646", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def speak(text, outfile):\n", | |
" r = polly.synthesize_speech(\n", | |
" Engine=\"neural\",\n", | |
" LanguageCode=\"en-GB\",\n", | |
" OutputFormat=\"pcm\",\n", | |
" Text=text,\n", | |
" VoiceId=voice,\n", | |
" )\n", | |
" s = r[\"AudioStream\"]\n", | |
"\n", | |
" with wave.open(outfile, \"wb\") as wav:\n", | |
" wav.setparams((1, 2, 16000, 0, \"NONE\", \"NONE\"))\n", | |
" wav.writeframes(s.read())\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f62929a0", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for i, text in enumerate(lines):\n", | |
" print(text)\n", | |
" speak(text, f\"{i:02d}.wav\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "5784e676-37aa-4e92-99bb-c34727329b32", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "07110973", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import ffmpeg\n", | |
"import os\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "9afc0e1e-34d8-4928-bba8-4ceb89f5f57d", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Define input file paths\n", | |
"audio_files = [\"00.wav\", \"01.wav\", \"02.wav\", \"03.wav\", \"04.wav\", \"05.wav\", \"06.wav\", \"07.wav\"]\n", | |
"output_file = \"concatenated_with_gaps.wav\"\n", | |
"# audio_codec = \"libmp3lame\" # Or \"aac\", \"pcm_s16le\", etc. based on desired output\n", | |
"audio_codec = \"pcm_s16le\"\n", | |
"\n", | |
"# Define the gap duration in seconds\n", | |
"START_DURATION = 2.0\n", | |
"GAP_DURATION = 2.0\n", | |
"END_DURATION = 2.0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "752cf3cc-645a-4777-a535-89b2b6d761c4", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# --- Step 1: Get audio file properties for silence generation ---\n", | |
"# We need to match the sample rate and channel layout of the real audio files\n", | |
"# to ensure consistent silence. We\"ll just probe the first audio file.\n", | |
"def get_audio_properties(file_path):\n", | |
" \"\"\"\n", | |
" Gets sample rate and channel layout of an audio file using ffprobe.\n", | |
" \"\"\"\n", | |
" try:\n", | |
" probe = ffmpeg.probe(file_path)\n", | |
" audio_stream = next((s for s in probe[\"streams\"] if s[\"codec_type\"] == \"audio\"), None)\n", | |
" if audio_stream:\n", | |
" return {\n", | |
" \"sample_rate\": int(audio_stream[\"sample_rate\"]),\n", | |
" \"channel_layout\": audio_stream.get(\"channel_layout\", \"mono\"), # Default to mono if not found\n", | |
" \"duration\": float(audio_stream[\"duration\"]),\n", | |
" }\n", | |
" else:\n", | |
" raise ValueError(f\"No audio stream found for: {file_path}\")\n", | |
" except ffmpeg.Error as e:\n", | |
" print(f\"Error probing {file_path}: {e.stderr.decode()}\")\n", | |
" raise\n", | |
" except Exception as e:\n", | |
" print(f\"An unexpected error occurred while probing {file_path}: {e}\")\n", | |
" raise" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "0149e5cd-a562-458a-a2d7-4b9ff4a24144", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Properties for all audio files must match\n", | |
"audio_props = [get_audio_properties(f) for f in audio_files]\n", | |
"sample_rates = set(p[\"sample_rate\"] for p in audio_props)\n", | |
"if len(sample_rates) != 1:\n", | |
" print(f\"Multiple sample rates found: {sample_rates}\")\n", | |
"channel_layouts = set(p[\"channel_layout\"] for p in audio_props)\n", | |
"if len(channel_layouts) != 1:\n", | |
" print(f\"Multiple channel layouts found: {channel_layouts}\")\n", | |
"\n", | |
"first_audio_props = audio_props[0]\n", | |
"SAMPLE_RATE = next(iter(sample_rates))\n", | |
"CHANNEL_LAYOUT = next(iter(channel_layouts))\n", | |
"\n", | |
"print(f\"Using sample rate: {SAMPLE_RATE} Hz, channel layout: {CHANNEL_LAYOUT} for silence.\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "ff6db6c4-e646-4e3b-8bcc-adf1699431f1", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# --- Step 2: Build the filtergraph including silence ---\n", | |
"graph_inputs = []\n", | |
"\n", | |
"# Insert gap at start\n", | |
"silence_input = ffmpeg.input(\n", | |
" f\"anullsrc=r={SAMPLE_RATE}:cl={CHANNEL_LAYOUT}\",\n", | |
" f=\"lavfi\",\n", | |
" t=START_DURATION,\n", | |
").audio\n", | |
"graph_inputs.append(silence_input)\n", | |
"\n", | |
"for i, audio_file in enumerate(audio_files):\n", | |
" graph_inputs.append(ffmpeg.input(audio_file).audio)\n", | |
"\n", | |
" # Add silence between tracks\n", | |
" if i < len(audio_files) - 1:\n", | |
" silence_input = ffmpeg.input(\n", | |
" f\"anullsrc=r={SAMPLE_RATE}:cl={CHANNEL_LAYOUT}\",\n", | |
" f=\"lavfi\", # \"lavfi\" is for libavfilter inputs like anullsrc\n", | |
" t=GAP_DURATION,\n", | |
" ).audio\n", | |
" graph_inputs.append(silence_input)\n", | |
"\n", | |
"# Insert gap at end\n", | |
"silence_input = ffmpeg.input(\n", | |
" f\"anullsrc=r={SAMPLE_RATE}:cl={CHANNEL_LAYOUT}\",\n", | |
" f=\"lavfi\",\n", | |
" t=END_DURATION,\n", | |
").audio\n", | |
"graph_inputs.append(silence_input)\n", | |
"\n", | |
"\n", | |
"# Need to tell the concat filter how many inputs it has in total.\n", | |
"total_inputs_for_concat = len(graph_inputs)\n", | |
"\n", | |
"# Apply the concat filter\n", | |
"concatenated_audio = ffmpeg.filter(\n", | |
" graph_inputs,\n", | |
" \"concat\",\n", | |
" n=total_inputs_for_concat,\n", | |
" v=0, # No video streams\n", | |
" a=1 # One audio stream per input\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "360882f9-5fba-476f-a741-539e3df749cb", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# --- Step 3: Define the output and run ---\n", | |
"output_stream = ffmpeg.output(\n", | |
" concatenated_audio,\n", | |
" output_file,\n", | |
" acodec=audio_codec,\n", | |
")\n", | |
"\n", | |
"try:\n", | |
" ffmpeg.run(output_stream, overwrite_output=True)\n", | |
" print(f\"Audio files concatenated to {output_file} with {GAP_DURATION}-second gaps successfully!\")\n", | |
"\n", | |
"except ffmpeg.Error as e:\n", | |
" print(e)\n", | |
" raise" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "2e8f960a-afa5-49c0-b423-c9eabb6d1b51", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.13.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment