Skip to content

Instantly share code, notes, and snippets.

@UltiRequiem
Created May 8, 2025 14:55
Show Gist options
  • Save UltiRequiem/121d77473914ba28a8bd640e88154186 to your computer and use it in GitHub Desktop.
Save UltiRequiem/121d77473914ba28a8bd640e88154186 to your computer and use it in GitHub Desktop.
import { DeepgramClient, type PrerecordedSchema } from "@deepgram/sdk";
import type { SpeakerSegment, Utterance } from "../utils/types";
import { AppError } from "../utils/error";
import type { S3Client } from "bun";
import { FFmpeg } from "@ffmpeg/ffmpeg";
import { randomUUID } from "crypto";
export class AudioService {
transcribingConfig: Partial<PrerecordedSchema>;
ffmpeg: FFmpeg;
constructor(
private deepgramClient: DeepgramClient,
private s3Client: S3Client
) {
this.transcribingConfig = {
utterances: true,
detect_language: ["en", "es", "pt"],
diarize: true,
model: "nova-3",
};
this.ffmpeg = new FFmpeg();
}
/**
* Split audio file by speaker segments and upload to S3
* @param file Audio file source
* @returns Object with segment URLs and metadata
*/
public async splitAndUploadSegments(file: Buffer) {
const speakerSegments = await this.getSpeakerSegments(file);
if (!this.ffmpeg.loaded) {
await this.ffmpeg.load();
}
const inputFileName = "input.wav";
this.ffmpeg.writeFile(inputFileName, file);
const uploadResults = await Promise.all(
speakerSegments.flatMap(async (speakerData) => {
return Promise.all(
speakerData.segments.map(async (segment, index) => {
try {
const segmentFileName = `speaker_${speakerData.speaker}_segment_${index}.wav`;
await this.ffmpeg.exec([
"-i",
inputFileName,
"-ss",
segment.start.toString(),
"-t",
segment.duration.toString(),
"-acodec",
"pcm_s16le",
"-ar",
"44100",
segmentFileName,
]);
const segmentData = await this.ffmpeg.readFile(segmentFileName);
const s3Key = `segments/${randomUUID()}_speaker${
speakerData.speaker
}_segment${index}.wav`;
const url = await this.uploadToS3(
Buffer.from(segmentData),
s3Key
);
await this.ffmpeg.deleteFile(segmentFileName);
return {
speaker: speakerData.speaker,
segmentIndex: index,
start: segment.start,
end: segment.end,
duration: segment.duration,
url,
};
} catch (error) {
console.error(
`Failed to process segment ${index} for speaker ${speakerData.speaker}:`,
error
);
throw new AppError(
`Failed to process segment ${index} for speaker ${speakerData.speaker}`,
"SEGMENT_PROCESSING_ERROR"
);
}
})
);
})
);
this.ffmpeg.deleteFile(inputFileName);
return {
speakerSegments,
uploadedSegments: uploadResults.flat(),
};
}
public async getSpeakerSegments(file: Buffer) {
const response =
await this.deepgramClient.listen.prerecorded.transcribeFile(
file,
this.transcribingConfig
);
const utterances = response.result?.results.utterances ?? [];
if (utterances.length === 0) {
throw new AppError("No utterances found in the audio", "NO_UTTERANCES");
}
const speakerSet = new Set<number>();
utterances.forEach((u) => {
if (typeof u.speaker === "number") {
speakerSet.add(u.speaker);
}
});
const speakers = Array.from(speakerSet);
const speakerSegments: SpeakerSegment[] = await Promise.all(
speakers.map(async (speaker) => {
const segments = await this.findTimestamps(speaker, utterances, 3);
return { speaker, segments };
})
);
return speakerSegments;
}
private findTimestamps(
targetSpeakerNumber: number,
utterances: Utterance[],
wantedSegments = 3
) {
const segments: {
start: number;
end: number;
}[] = [];
let current: (typeof segments)[0] | null = null;
for (const u of utterances) {
if (u.speaker !== targetSpeakerNumber) {
if (current !== null) {
segments.push(current);
current = null;
}
continue;
}
if (current === null) {
current = {
start: u.start,
end: u.end,
};
} else {
current.end = u.end;
}
}
if (current !== null) {
segments.push(current);
}
const sortedSegments = segments
.map((s) => ({
start: s.start,
end: s.end,
duration: s.end - s.start,
}))
.sort((a, b) => b.duration - a.duration);
sortedSegments.length =
wantedSegments > sortedSegments.length
? sortedSegments.length
: wantedSegments;
return sortedSegments;
}
/**
* Upload buffer to S3
*/
private async uploadToS3(buffer: Buffer, key: string) {
try {
const file = this.s3Client.file(key);
await file.write(buffer);
const signedUrl = file.presign({ expiresIn: 43200 });
return signedUrl;
} catch (error) {
console.error(`Failed to upload ${key} to S3:`, error);
throw new AppError(
`Failed to upload audio segment to S3: ${
error instanceof Error ? error.message : "Unknown error"
}`,
"S3_UPLOAD_ERROR"
);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment