Created
May 8, 2025 14:55
-
-
Save UltiRequiem/121d77473914ba28a8bd640e88154186 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { DeepgramClient, type PrerecordedSchema } from "@deepgram/sdk"; | |
import type { SpeakerSegment, Utterance } from "../utils/types"; | |
import { AppError } from "../utils/error"; | |
import type { S3Client } from "bun"; | |
import { FFmpeg } from "@ffmpeg/ffmpeg"; | |
import { randomUUID } from "crypto"; | |
export class AudioService { | |
transcribingConfig: Partial<PrerecordedSchema>; | |
ffmpeg: FFmpeg; | |
constructor( | |
private deepgramClient: DeepgramClient, | |
private s3Client: S3Client | |
) { | |
this.transcribingConfig = { | |
utterances: true, | |
detect_language: ["en", "es", "pt"], | |
diarize: true, | |
model: "nova-3", | |
}; | |
this.ffmpeg = new FFmpeg(); | |
} | |
/** | |
* Split audio file by speaker segments and upload to S3 | |
* @param file Audio file source | |
* @returns Object with segment URLs and metadata | |
*/ | |
public async splitAndUploadSegments(file: Buffer) { | |
const speakerSegments = await this.getSpeakerSegments(file); | |
if (!this.ffmpeg.loaded) { | |
await this.ffmpeg.load(); | |
} | |
const inputFileName = "input.wav"; | |
this.ffmpeg.writeFile(inputFileName, file); | |
const uploadResults = await Promise.all( | |
speakerSegments.flatMap(async (speakerData) => { | |
return Promise.all( | |
speakerData.segments.map(async (segment, index) => { | |
try { | |
const segmentFileName = `speaker_${speakerData.speaker}_segment_${index}.wav`; | |
await this.ffmpeg.exec([ | |
"-i", | |
inputFileName, | |
"-ss", | |
segment.start.toString(), | |
"-t", | |
segment.duration.toString(), | |
"-acodec", | |
"pcm_s16le", | |
"-ar", | |
"44100", | |
segmentFileName, | |
]); | |
const segmentData = await this.ffmpeg.readFile(segmentFileName); | |
const s3Key = `segments/${randomUUID()}_speaker${ | |
speakerData.speaker | |
}_segment${index}.wav`; | |
const url = await this.uploadToS3( | |
Buffer.from(segmentData), | |
s3Key | |
); | |
await this.ffmpeg.deleteFile(segmentFileName); | |
return { | |
speaker: speakerData.speaker, | |
segmentIndex: index, | |
start: segment.start, | |
end: segment.end, | |
duration: segment.duration, | |
url, | |
}; | |
} catch (error) { | |
console.error( | |
`Failed to process segment ${index} for speaker ${speakerData.speaker}:`, | |
error | |
); | |
throw new AppError( | |
`Failed to process segment ${index} for speaker ${speakerData.speaker}`, | |
"SEGMENT_PROCESSING_ERROR" | |
); | |
} | |
}) | |
); | |
}) | |
); | |
this.ffmpeg.deleteFile(inputFileName); | |
return { | |
speakerSegments, | |
uploadedSegments: uploadResults.flat(), | |
}; | |
} | |
public async getSpeakerSegments(file: Buffer) { | |
const response = | |
await this.deepgramClient.listen.prerecorded.transcribeFile( | |
file, | |
this.transcribingConfig | |
); | |
const utterances = response.result?.results.utterances ?? []; | |
if (utterances.length === 0) { | |
throw new AppError("No utterances found in the audio", "NO_UTTERANCES"); | |
} | |
const speakerSet = new Set<number>(); | |
utterances.forEach((u) => { | |
if (typeof u.speaker === "number") { | |
speakerSet.add(u.speaker); | |
} | |
}); | |
const speakers = Array.from(speakerSet); | |
const speakerSegments: SpeakerSegment[] = await Promise.all( | |
speakers.map(async (speaker) => { | |
const segments = await this.findTimestamps(speaker, utterances, 3); | |
return { speaker, segments }; | |
}) | |
); | |
return speakerSegments; | |
} | |
private findTimestamps( | |
targetSpeakerNumber: number, | |
utterances: Utterance[], | |
wantedSegments = 3 | |
) { | |
const segments: { | |
start: number; | |
end: number; | |
}[] = []; | |
let current: (typeof segments)[0] | null = null; | |
for (const u of utterances) { | |
if (u.speaker !== targetSpeakerNumber) { | |
if (current !== null) { | |
segments.push(current); | |
current = null; | |
} | |
continue; | |
} | |
if (current === null) { | |
current = { | |
start: u.start, | |
end: u.end, | |
}; | |
} else { | |
current.end = u.end; | |
} | |
} | |
if (current !== null) { | |
segments.push(current); | |
} | |
const sortedSegments = segments | |
.map((s) => ({ | |
start: s.start, | |
end: s.end, | |
duration: s.end - s.start, | |
})) | |
.sort((a, b) => b.duration - a.duration); | |
sortedSegments.length = | |
wantedSegments > sortedSegments.length | |
? sortedSegments.length | |
: wantedSegments; | |
return sortedSegments; | |
} | |
/** | |
* Upload buffer to S3 | |
*/ | |
private async uploadToS3(buffer: Buffer, key: string) { | |
try { | |
const file = this.s3Client.file(key); | |
await file.write(buffer); | |
const signedUrl = file.presign({ expiresIn: 43200 }); | |
return signedUrl; | |
} catch (error) { | |
console.error(`Failed to upload ${key} to S3:`, error); | |
throw new AppError( | |
`Failed to upload audio segment to S3: ${ | |
error instanceof Error ? error.message : "Unknown error" | |
}`, | |
"S3_UPLOAD_ERROR" | |
); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment