Skip to content

Instantly share code, notes, and snippets.

@jarmitage
Created November 25, 2025 19:50
Show Gist options
  • Select an option

  • Save jarmitage/72605173bc8f330f8ccafd798ee7bef1 to your computer and use it in GitHub Desktop.

Select an option

Save jarmitage/72605173bc8f330f8ccafd798ee7bef1 to your computer and use it in GitHub Desktop.
Annoying podcast host remover
#!/usr/bin/env python3
"""
Extract audio segments for Speaker 1 from an SRT file and M4A audio.
"""
import re
from pathlib import Path
from pydub import AudioSegment
def parse_timestamp(ts: str) -> int:
"""Convert SRT timestamp (HH:MM:SS,mmm) to milliseconds."""
match = re.match(r"(\d+):(\d+):(\d+),(\d+)", ts.strip())
if not match:
raise ValueError(f"Invalid timestamp format: {ts}")
hours, minutes, seconds, millis = map(int, match.groups())
return (hours * 3600 + minutes * 60 + seconds) * 1000 + millis
def parse_srt(srt_path: Path) -> list[tuple[int, int, str, str]]:
"""
Parse SRT file and return list of (start_ms, end_ms, speaker, text) tuples.
"""
content = srt_path.read_text(encoding="utf-8")
# Split into blocks (separated by blank lines)
blocks = re.split(r"\n\s*\n", content.strip())
entries = []
for block in blocks:
lines = block.strip().split("\n")
if len(lines) < 3:
continue
# Line 0: sequence number
# Line 1: timestamp
# Line 2+: text (may span multiple lines)
timestamp_line = lines[1]
text_lines = lines[2:]
# Parse timestamp
match = re.match(r"(.+?)\s*-->\s*(.+)", timestamp_line)
if not match:
continue
start_ts, end_ts = match.groups()
start_ms = parse_timestamp(start_ts)
end_ms = parse_timestamp(end_ts)
# Join text lines and extract speaker
full_text = " ".join(text_lines)
speaker_match = re.match(r"(Speaker \d+):\s*(.*)", full_text)
if speaker_match:
speaker = speaker_match.group(1)
text = speaker_match.group(2)
else:
speaker = "Unknown"
text = full_text
entries.append((start_ms, end_ms, speaker, text))
return entries
def extract_speaker_audio(
audio_path: Path,
srt_path: Path,
output_path: Path,
target_speaker: str = "Speaker 1",
) -> None:
"""
Extract audio segments for a specific speaker and save to a new file.
"""
print(f"Loading audio from {audio_path}...")
audio = AudioSegment.from_file(audio_path)
print(f"Parsing SRT from {srt_path}...")
entries = parse_srt(srt_path)
# Filter for target speaker
speaker_entries = [(start, end, text) for start, end, speaker, text in entries if speaker == target_speaker]
print(f"Found {len(speaker_entries)} segments for {target_speaker}")
if not speaker_entries:
print(f"No segments found for {target_speaker}")
return
# Extract and concatenate segments
print("Extracting audio segments...")
combined = AudioSegment.empty()
for i, (start_ms, end_ms, text) in enumerate(speaker_entries):
segment = audio[start_ms:end_ms]
combined += segment
if (i + 1) % 50 == 0:
print(f" Processed {i + 1}/{len(speaker_entries)} segments...")
# Export
print(f"Exporting to {output_path}...")
output_format = output_path.suffix.lstrip(".").lower()
if output_format == "m4a":
output_format = "ipod" # pydub uses "ipod" for m4a
combined.export(output_path, format=output_format)
duration_sec = len(combined) / 1000
print(f"Done! Output duration: {duration_sec:.1f} seconds ({duration_sec/60:.1f} minutes)")
def main():
base_dir = Path(__file__).parent
audio_path = base_dir / "podcast.m4a"
srt_path = base_dir / "podcast.srt"
output_path = base_dir / "podcast.m4a"
extract_speaker_audio(audio_path, srt_path, output_path, target_speaker="Speaker 1")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment