Created
July 25, 2023 15:02
-
-
Save hrishioa/0aa300edbf262116a7ea1bb550ebff3d to your computer and use it in GitHub Desktop.
Script for combining the output of diart and whisper into a single Speaker-labelled SRT File
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Hrishi Olickel | |
// https://olickel.com | |
// 25 July 2023 | |
const fs = require('fs'); | |
const readline = require('readline'); | |
const speakers = {}; // Hold start time and speaker | |
const speakerNames = ['Hrishi', 'Nick']; // Speaker names array | |
const rttmFilename = 'podcastPart1.rttm'; | |
const srtFilename = 'podcastPart1.srt'; | |
const outputSrtFilename = 'podcastPart1Annotated.srt'; | |
const readRTTM = readline.createInterface({ | |
input: fs.createReadStream(rttmFilename), | |
output: process.stdout, | |
terminal: false | |
}); | |
readRTTM.on('line', function(line) { | |
console.log('Reading RTTM line: ', line); | |
let lineParts = line.split(' '); | |
let timeStart = parseFloat(lineParts[3]); | |
let timeEnd = timeStart + parseFloat(lineParts[4]); | |
let speaker = speakerNames[parseInt(lineParts[7].replace('speaker', ''))]; | |
speakers[timeStart.toFixed(3)] = speaker; | |
console.log('Mapped speaker: ', speaker, ' to time start: ', timeStart.toFixed(3)); | |
}); | |
readRTTM.on('close', function() { | |
console.log('Finished reading RTTM file'); | |
const readSRT = readline.createInterface({ | |
input: fs.createReadStream(srtFilename), | |
output: process.stdout, | |
terminal: false | |
}); | |
let writeStream = fs.createWriteStream(outputSrtFilename); | |
let currentSpeaker = ''; | |
readSRT.on('line', function(line) { | |
console.log('Reading SRT line: ', line); | |
let match = line.match(/(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})/); | |
if (match) { | |
let timeStart = timeToSeconds(match[1]); | |
console.log('Matched SRT timestamp, time start: ', timeStart); | |
currentSpeaker = findSpeaker(timeStart); | |
console.log('Found speaker: ', currentSpeaker, ' for time start: ', timeStart); | |
} else if (currentSpeaker && line.trim().length > 0) { | |
console.log('Inserting speaker name into dialogue: ', currentSpeaker); | |
line = `${currentSpeaker}: ${line}`; | |
currentSpeaker = ''; | |
} | |
console.log('Writing line to merged file: ', line); | |
writeStream.write(`${line}\n`); | |
}); | |
readSRT.on('close', function() { | |
console.log('Finished reading SRT file'); | |
}); | |
}); | |
function timeToSeconds(time) { | |
let parts = time.split(':'); | |
console.log('Converting time to seconds: ', time); | |
return parseFloat(parts[0]) * 3600 + parseFloat(parts[1]) * 60 + parseFloat(parts[2].replace(',', '.')); | |
} | |
function findSpeaker(timeStart) { | |
console.log('Searching for speaker for time start: ', timeStart); | |
let keys = Object.keys(speakers).map(k => parseFloat(k)); | |
for (let i = 0; i < keys.length; i++) { | |
if (keys[i] > timeStart) { | |
if (keys[i - 1] !== undefined) { | |
console.log('Found speaker: ', speakers[keys[i - 1].toFixed(3)], ' for time start: ', timeStart); | |
return speakers[keys[i - 1].toFixed(3)]; | |
} | |
} | |
} | |
console.log('Speaker not found for time start: ', timeStart); | |
return undefined; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment