Skip to content

Instantly share code, notes, and snippets.

@pplantinga
Created April 4, 2025 19:43
Show Gist options
  • Save pplantinga/945839c24d22cb59835aa4a3ebbf235e to your computer and use it in GitHub Desktop.
Save pplantinga/945839c24d22cb59835aa4a3ebbf235e to your computer and use it in GitHub Desktop.
Segment File from PRAAT TextGrid
"""Segments an audio file based on a TextGrid file from PRAAT, loosely based on the following repo:
https://github.com/ThiagoCF05/PraatSegmentation
Assumes you want new files from all "named" segments in the file:
xmin: {start time}
xmax: {end time}
text: "{name}"
Author: Peter Plantinga
"""
from textgrid_py3 import TextGrid
from scipy.io import wavfile
from dataclasses import dataclass
import argparse
import pathlib
import re
interval_matcher = re.compile(r'xmin = (\d+\.?\d*)\s+xmax = (\d+\.?\d*)\s+text = "(.+)"', re.MULTILINE)
def load_intervals(text):
matches = interval_matcher.findall(text)
return [Interval.from_match(m) for m in matches]
@dataclass
class Interval:
name: str
start: float
stop: float
@classmethod
def from_match(cls, match):
return cls(match[2], float(match[0]), float(match[1]))
@dataclass
class TextGrid:
name: str
intervals: list[Interval]
@classmethod
def from_file(cls, filepath):
name = filepath.stem
with open(filepath) as f:
filetext = f.read()
intervals = load_intervals(filetext)
return cls(name, intervals)
def write_segments(self, write_dir, audio, name):
fs, audio = audio
for i, interval in enumerate(self.intervals):
s1 = int(interval.start * fs)
s2 = int(interval.stop * fs)
fname = (write_dir / (name + '_' + interval.name)).with_suffix('.wav')
wavfile.write(fname, fs, audio[s1:s2])
def main(read_dir, write_dir):
read_dir = pathlib.Path(read_dir)
write_dir = pathlib.Path(write_dir)
for faudio in read_dir.glob("*.wav"):
print('Reading file', faudio)
audio = wavfile.read(faudio)
fgrid = faudio.with_suffix('.TextGrid')
print('Reading file', fgrid)
grid = TextGrid.from_file(fgrid)
print('Writing segments for', len(grid.intervals), 'intervals')
grid.write_segments(write_dir, audio, faudio.stem)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Segmentation of a .wav file based on its .TextGrid annotation.')
parser.add_argument("src", help="directory with the read files", type=str)
parser.add_argument("dest", help="directory where the chunks should be written", type=str)
args = parser.parse_args()
main(args.src, args.dest)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment