Skip to content

Instantly share code, notes, and snippets.

@manics
Last active July 13, 2025 22:14
Show Gist options
  • Save manics/73b37a9f4a0b60aee07187520f6afce0 to your computer and use it in GitHub Desktop.
Save manics/73b37a9f4a0b60aee07187520f6afce0 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "13e39ceb",
"metadata": {},
"outputs": [],
"source": [
"import boto3\n",
"import wave\n",
"\n",
"# boto3.setup_default_session(profile_name=\"...\")\n",
"polly = boto3.client(\"polly\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a670d769",
"metadata": {},
"outputs": [],
"source": [
"lines = [\n",
" \"CATER is designed using infrastructure-as-code, and is deployed using a git-ops workflow. We\"re using ArgoCD which is an open-source git-ops tools designed for Kubernetes. This means all infrastructure is fully reproducible, and all approved code updates are automatically deployed by ArgoCD.\",\n",
" \"CATER consists of a set of applications (or components). By default we install everything required to run a T.R.E., but all components can be disabled or replaced by another implementation.\",\n",
" \"For the first three months work has focussed on this backend work of writing and deploying components, but we\"ve now started integrating those components so they can be used through a frontend.\",\n",
"\n",
" \"You login to CATER using Keycloak. A username and password are used in this demo, but multi-factor authentication can be easily added, and Keycloak can federate with other identity providers using SAML or O.I.C.D.\",\n",
" \"In this demo we\"re using JupyterHub as a control plane for researcher workspaces.\",\n",
" \"You can see a list of projects and workspace types. This demo only has Ubuntu Mate desktops.\",\n",
" \"When you launch a workspace a new Kubernetes pod is created, project storage is mounted, and users are given access via Apache Guacamole which is an open-source remote desktop gateway.\",\n",
" \"As you can see you have a full desktop via a web browser\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bf70124e",
"metadata": {},
"outputs": [],
"source": [
"voices = polly.describe_voices()[\"Voices\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "47c5c74e",
"metadata": {},
"outputs": [],
"source": [
"# voice = \"Brian\"\n",
"voice = \"Amy\"\n",
"# voice = \"Emma\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6d166646",
"metadata": {},
"outputs": [],
"source": [
"def speak(text, outfile):\n",
" r = polly.synthesize_speech(\n",
" Engine=\"neural\",\n",
" LanguageCode=\"en-GB\",\n",
" OutputFormat=\"pcm\",\n",
" Text=text,\n",
" VoiceId=voice,\n",
" )\n",
" s = r[\"AudioStream\"]\n",
"\n",
" with wave.open(outfile, \"wb\") as wav:\n",
" wav.setparams((1, 2, 16000, 0, \"NONE\", \"NONE\"))\n",
" wav.writeframes(s.read())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f62929a0",
"metadata": {},
"outputs": [],
"source": [
"for i, text in enumerate(lines):\n",
" print(text)\n",
" speak(text, f\"{i:02d}.wav\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5784e676-37aa-4e92-99bb-c34727329b32",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "07110973",
"metadata": {},
"outputs": [],
"source": [
"import ffmpeg\n",
"import os\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9afc0e1e-34d8-4928-bba8-4ceb89f5f57d",
"metadata": {},
"outputs": [],
"source": [
"# Define input file paths\n",
"audio_files = [\"00.wav\", \"01.wav\", \"02.wav\", \"03.wav\", \"04.wav\", \"05.wav\", \"06.wav\", \"07.wav\"]\n",
"output_file = \"concatenated_with_gaps.wav\"\n",
"# audio_codec = \"libmp3lame\" # Or \"aac\", \"pcm_s16le\", etc. based on desired output\n",
"audio_codec = \"pcm_s16le\"\n",
"\n",
"# Define the gap duration in seconds\n",
"START_DURATION = 2.0\n",
"GAP_DURATION = 2.0\n",
"END_DURATION = 2.0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "752cf3cc-645a-4777-a535-89b2b6d761c4",
"metadata": {},
"outputs": [],
"source": [
"# --- Step 1: Get audio file properties for silence generation ---\n",
"# We need to match the sample rate and channel layout of the real audio files\n",
"# to ensure consistent silence. We\"ll just probe the first audio file.\n",
"def get_audio_properties(file_path):\n",
" \"\"\"\n",
" Gets sample rate and channel layout of an audio file using ffprobe.\n",
" \"\"\"\n",
" try:\n",
" probe = ffmpeg.probe(file_path)\n",
" audio_stream = next((s for s in probe[\"streams\"] if s[\"codec_type\"] == \"audio\"), None)\n",
" if audio_stream:\n",
" return {\n",
" \"sample_rate\": int(audio_stream[\"sample_rate\"]),\n",
" \"channel_layout\": audio_stream.get(\"channel_layout\", \"mono\"), # Default to mono if not found\n",
" \"duration\": float(audio_stream[\"duration\"]),\n",
" }\n",
" else:\n",
" raise ValueError(f\"No audio stream found for: {file_path}\")\n",
" except ffmpeg.Error as e:\n",
" print(f\"Error probing {file_path}: {e.stderr.decode()}\")\n",
" raise\n",
" except Exception as e:\n",
" print(f\"An unexpected error occurred while probing {file_path}: {e}\")\n",
" raise"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0149e5cd-a562-458a-a2d7-4b9ff4a24144",
"metadata": {},
"outputs": [],
"source": [
"# Properties for all audio files must match\n",
"audio_props = [get_audio_properties(f) for f in audio_files]\n",
"sample_rates = set(p[\"sample_rate\"] for p in audio_props)\n",
"if len(sample_rates) != 1:\n",
" print(f\"Multiple sample rates found: {sample_rates}\")\n",
"channel_layouts = set(p[\"channel_layout\"] for p in audio_props)\n",
"if len(channel_layouts) != 1:\n",
" print(f\"Multiple channel layouts found: {channel_layouts}\")\n",
"\n",
"first_audio_props = audio_props[0]\n",
"SAMPLE_RATE = next(iter(sample_rates))\n",
"CHANNEL_LAYOUT = next(iter(channel_layouts))\n",
"\n",
"print(f\"Using sample rate: {SAMPLE_RATE} Hz, channel layout: {CHANNEL_LAYOUT} for silence.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ff6db6c4-e646-4e3b-8bcc-adf1699431f1",
"metadata": {},
"outputs": [],
"source": [
"# --- Step 2: Build the filtergraph including silence ---\n",
"graph_inputs = []\n",
"\n",
"# Insert gap at start\n",
"silence_input = ffmpeg.input(\n",
" f\"anullsrc=r={SAMPLE_RATE}:cl={CHANNEL_LAYOUT}\",\n",
" f=\"lavfi\",\n",
" t=START_DURATION,\n",
").audio\n",
"graph_inputs.append(silence_input)\n",
"\n",
"for i, audio_file in enumerate(audio_files):\n",
" graph_inputs.append(ffmpeg.input(audio_file).audio)\n",
"\n",
" # Add silence between tracks\n",
" if i < len(audio_files) - 1:\n",
" silence_input = ffmpeg.input(\n",
" f\"anullsrc=r={SAMPLE_RATE}:cl={CHANNEL_LAYOUT}\",\n",
" f=\"lavfi\", # \"lavfi\" is for libavfilter inputs like anullsrc\n",
" t=GAP_DURATION,\n",
" ).audio\n",
" graph_inputs.append(silence_input)\n",
"\n",
"# Insert gap at end\n",
"silence_input = ffmpeg.input(\n",
" f\"anullsrc=r={SAMPLE_RATE}:cl={CHANNEL_LAYOUT}\",\n",
" f=\"lavfi\",\n",
" t=END_DURATION,\n",
").audio\n",
"graph_inputs.append(silence_input)\n",
"\n",
"\n",
"# Need to tell the concat filter how many inputs it has in total.\n",
"total_inputs_for_concat = len(graph_inputs)\n",
"\n",
"# Apply the concat filter\n",
"concatenated_audio = ffmpeg.filter(\n",
" graph_inputs,\n",
" \"concat\",\n",
" n=total_inputs_for_concat,\n",
" v=0, # No video streams\n",
" a=1 # One audio stream per input\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "360882f9-5fba-476f-a741-539e3df749cb",
"metadata": {},
"outputs": [],
"source": [
"# --- Step 3: Define the output and run ---\n",
"output_stream = ffmpeg.output(\n",
" concatenated_audio,\n",
" output_file,\n",
" acodec=audio_codec,\n",
")\n",
"\n",
"try:\n",
" ffmpeg.run(output_stream, overwrite_output=True)\n",
" print(f\"Audio files concatenated to {output_file} with {GAP_DURATION}-second gaps successfully!\")\n",
"\n",
"except ffmpeg.Error as e:\n",
" print(e)\n",
" raise"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2e8f960a-afa5-49c0-b423-c9eabb6d1b51",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment