lmcarreiro · July 29, 2021 21:17
diff --git a/useSpeechToText.diff b/useSpeechToText.diff
  // ...
  
 - const mutedRef = React.useRef<boolean>(muted);
 - 
 - // We use a ref to check if the microphone is muted or not, to avoid recreating
 - // the Azure's SpeechRecognizer instance every time we mute/unmute.
 - React.useEffect(() => {
 -   mutedRef.current = muted;
 - }, [muted]);


 + const speakerActive = useAudioActive(stream);
 + const streamingFlagRef = React.useRef<boolean>(false);
 + const shouldStream = !muted && speakerActive && running.current;
 +
 + // Control the streaming flag, based on the voice activity detection (that uses hark) and the mute/unmute flag
 + React.useEffect(() => {
 +   if (shouldStream) {
 +     console.log("Voice activity detected, starting streaming current buffer + live streaming...");
 +     streamingFlagRef.current = true;
 +   } else {
 +     console.log("No voice activity detected, stopped streaming.");
 +     streamingFlagRef.current = false;
 +   }
 + }, [shouldStream]);


  // ...


  // Initialize the Azure Speech to Text instance and bind the necessary events
  React.useEffect(() => {

    // ...

    const onAudioProcess = (ev: AudioProcessingEvent) => {
      const block = {
        duration: ev.inputBuffer.duration,
        bytes: convertFloat32ToInt16(ev.inputBuffer.getChannelData(0)),
      };

 -     if (!mutedRef.current) {
 +     if (streamingFlagRef.current) {
        pushStream.write(block.bytes);
      }
    };

  }, [speechToTextEnabled, newMessage, stream]);
	// ...

	- const mutedRef = React.useRef<boolean>(muted);
	-
	- // We use a ref to check if the microphone is muted or not, to avoid recreating
	- // the Azure's SpeechRecognizer instance every time we mute/unmute.
	- React.useEffect(() => {
	- mutedRef.current = muted;
	- }, [muted]);


	+ const speakerActive = useAudioActive(stream);
	+ const streamingFlagRef = React.useRef<boolean>(false);
	+ const shouldStream = !muted && speakerActive && running.current;
	+
	+ // Control the streaming flag, based on the voice activity detection (that uses hark) and the mute/unmute flag
	+ React.useEffect(() => {
	+ if (shouldStream) {
	+ console.log("Voice activity detected, starting streaming current buffer + live streaming...");
	+ streamingFlagRef.current = true;
	+ } else {
	+ console.log("No voice activity detected, stopped streaming.");
	+ streamingFlagRef.current = false;
	+ }
	+ }, [shouldStream]);


	// ...


	// Initialize the Azure Speech to Text instance and bind the necessary events
	React.useEffect(() => {

	// ...

	const onAudioProcess = (ev: AudioProcessingEvent) => {
	const block = {
	duration: ev.inputBuffer.duration,
	bytes: convertFloat32ToInt16(ev.inputBuffer.getChannelData(0)),
	};

	- if (!mutedRef.current) {
	+ if (streamingFlagRef.current) {
	pushStream.write(block.bytes);
	}
	};

	}, [speechToTextEnabled, newMessage, stream]);