Last active
July 29, 2021 21:17
-
-
Save lmcarreiro/89fb4b2a2fb6385dc73e88aa3d737f72 to your computer and use it in GitHub Desktop.
STT+VAD article - useSpeechToText.diff
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ... | |
- const mutedRef = React.useRef<boolean>(muted); | |
- | |
- // We use a ref to check if the microphone is muted or not, to avoid recreating | |
- // the Azure's SpeechRecognizer instance every time we mute/unmute. | |
- React.useEffect(() => { | |
- mutedRef.current = muted; | |
- }, [muted]); | |
+ const speakerActive = useAudioActive(stream); | |
+ const streamingFlagRef = React.useRef<boolean>(false); | |
+ const shouldStream = !muted && speakerActive && running.current; | |
+ | |
+ // Control the streaming flag, based on the voice activity detection (that uses hark) and the mute/unmute flag | |
+ React.useEffect(() => { | |
+ if (shouldStream) { | |
+ console.log("Voice activity detected, starting streaming current buffer + live streaming..."); | |
+ streamingFlagRef.current = true; | |
+ } else { | |
+ console.log("No voice activity detected, stopped streaming."); | |
+ streamingFlagRef.current = false; | |
+ } | |
+ }, [shouldStream]); | |
// ... | |
// Initialize the Azure Speech to Text instance and bind the necessary events | |
React.useEffect(() => { | |
// ... | |
const onAudioProcess = (ev: AudioProcessingEvent) => { | |
const block = { | |
duration: ev.inputBuffer.duration, | |
bytes: convertFloat32ToInt16(ev.inputBuffer.getChannelData(0)), | |
}; | |
- if (!mutedRef.current) { | |
+ if (streamingFlagRef.current) { | |
pushStream.write(block.bytes); | |
} | |
}; | |
}, [speechToTextEnabled, newMessage, stream]); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment