🎯 fix: Prevent UI De-sync By Removing Redundant States (#5333)

* fix: remove local state from Dropdown causing de-sync * refactor: cleanup STT code, avoid redundant states to prevent de-sync and side effects * fix: reset transcript after sending final text to prevent data loss * fix: clear timeout on component unmount to prevent memory leaks
2026-02-08 10:34:23 +01:00 · 2025-01-16 17:38:59 -05:00 · 2025-01-16 17:38:59 -05:00 · e309c6abef
commit e309c6abef
parent b55e695541
8 changed files with 149 additions and 145 deletions
--- a/client/src/hooks/Input/useSpeechToText.ts
+++ b/client/src/hooks/Input/useSpeechToText.ts
@ -1,83 +1,48 @@
-import { useState, useEffect } from 'react';
 import useSpeechToTextBrowser from './useSpeechToTextBrowser';
 import useSpeechToTextExternal from './useSpeechToTextExternal';
 import useGetAudioSettings from './useGetAudioSettings';

-const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) => {
+const useSpeechToText = (
+  setText: (text: string) => void,
+  onTranscriptionComplete: (text: string) => void,
+): {
+  isLoading?: boolean;
+  isListening?: boolean;
+  stopRecording: () => void | (() => Promise<void>);
+  startRecording: () => void | (() => Promise<void>);
+} => {
  const { speechToTextEndpoint } = useGetAudioSettings();
-  const [animatedText, setAnimatedText] = useState('');
  const externalSpeechToText = speechToTextEndpoint === 'external';

  const {
    isListening: speechIsListeningBrowser,
    isLoading: speechIsLoadingBrowser,
-    interimTranscript: interimTranscriptBrowser,
-    text: speechTextBrowser,
    startRecording: startSpeechRecordingBrowser,
    stopRecording: stopSpeechRecordingBrowser,
-  } = useSpeechToTextBrowser();
+  } = useSpeechToTextBrowser(setText, onTranscriptionComplete);

  const {
    isListening: speechIsListeningExternal,
    isLoading: speechIsLoadingExternal,
-    text: speechTextExternal,
    externalStartRecording: startSpeechRecordingExternal,
    externalStopRecording: stopSpeechRecordingExternal,
-    clearText,
-  } = useSpeechToTextExternal(handleTranscriptionComplete);
+  } = useSpeechToTextExternal(setText, onTranscriptionComplete);

  const isListening = externalSpeechToText ? speechIsListeningExternal : speechIsListeningBrowser;
  const isLoading = externalSpeechToText ? speechIsLoadingExternal : speechIsLoadingBrowser;
-  const speechTextForm = externalSpeechToText ? speechTextExternal : speechTextBrowser;
+
  const startRecording = externalSpeechToText
    ? startSpeechRecordingExternal
    : startSpeechRecordingBrowser;
  const stopRecording = externalSpeechToText
    ? stopSpeechRecordingExternal
    : stopSpeechRecordingBrowser;
-  const speechText =
-    isListening || (speechTextExternal && speechTextExternal.length > 0)
-      ? speechTextExternal
-      : speechTextForm || '';
-  // for a future real-time STT external
-  const interimTranscript = externalSpeechToText ? '' : interimTranscriptBrowser;
-
-  const animateTextTyping = (text: string) => {
-    const totalDuration = 2000;
-    const frameRate = 60;
-    const totalFrames = totalDuration / (1000 / frameRate);
-    const charsPerFrame = Math.ceil(text.length / totalFrames);
-    let currentIndex = 0;
-
-    const animate = () => {
-      currentIndex += charsPerFrame;
-      const currentText = text.substring(0, currentIndex);
-      setAnimatedText(currentText);
-
-      if (currentIndex < text.length) {
-        requestAnimationFrame(animate);
-      } else {
-        setAnimatedText(text);
-      }
-    };
-
-    requestAnimationFrame(animate);
-  };
-
-  useEffect(() => {
-    if (speechText && externalSpeechToText) {
-      animateTextTyping(speechText);
-    }
-  }, [speechText, externalSpeechToText]);

  return {
-    isListening,
    isLoading,
-    startRecording,
+    isListening,
    stopRecording,
-    interimTranscript,
-    speechText: externalSpeechToText ? animatedText : speechText,
-    clearText,
+    startRecording,
  };
 };

--- a/client/src/hooks/Input/useSpeechToTextBrowser.ts
+++ b/client/src/hooks/Input/useSpeechToTextBrowser.ts
@ -1,25 +1,72 @@
-import { useEffect, useState } from 'react';
+import { useEffect, useRef, useMemo } from 'react';
 import { useRecoilState } from 'recoil';
-import { useToastContext } from '~/Providers';
-import store from '~/store';
 import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
 import useGetAudioSettings from './useGetAudioSettings';
+import { useToastContext } from '~/Providers';
+import store from '~/store';

-const useSpeechToTextBrowser = () => {
+const useSpeechToTextBrowser = (
+  setText: (text: string) => void,
+  onTranscriptionComplete: (text: string) => void,
+) => {
  const { showToast } = useToastContext();
-  const [languageSTT] = useRecoilState<string>(store.languageSTT);
-  const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
  const { speechToTextEndpoint } = useGetAudioSettings();
  const isBrowserSTTEnabled = speechToTextEndpoint === 'browser';
-  const [isListening, setIsListening] = useState(false);
+
+  const lastTranscript = useRef<string | null>(null);
+  const lastInterim = useRef<string | null>(null);
+  const timeoutRef = useRef<NodeJS.Timeout | null>();
+  const [autoSendText] = useRecoilState(store.autoSendText);
+  const [languageSTT] = useRecoilState<string>(store.languageSTT);
+  const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);

  const {
-    interimTranscript,
-    finalTranscript,
    listening,
-    browserSupportsSpeechRecognition,
+    finalTranscript,
+    resetTranscript,
+    interimTranscript,
    isMicrophoneAvailable,
+    browserSupportsSpeechRecognition,
  } = useSpeechRecognition();
+  const isListening = useMemo(() => listening, [listening]);
+
+  useEffect(() => {
+    if (interimTranscript == null || interimTranscript === '') {
+      return;
+    }
+
+    if (lastInterim.current === interimTranscript) {
+      return;
+    }
+
+    setText(interimTranscript);
+    lastInterim.current = interimTranscript;
+  }, [setText, interimTranscript]);
+
+  useEffect(() => {
+    if (finalTranscript == null || finalTranscript === '') {
+      return;
+    }
+
+    if (lastTranscript.current === finalTranscript) {
+      return;
+    }
+
+    setText(finalTranscript);
+    lastTranscript.current = finalTranscript;
+    if (autoSendText > -1 && finalTranscript.length > 0) {
+      timeoutRef.current = setTimeout(() => {
+        onTranscriptionComplete(finalTranscript);
+        resetTranscript();
+      }, autoSendText * 1000);
+    }
+
+    return () => {
+      if (timeoutRef.current) {
+        clearTimeout(timeoutRef.current);
+      }
+    };
+  }, [setText, onTranscriptionComplete, resetTranscript, finalTranscript, autoSendText]);

  const toggleListening = () => {
    if (!browserSupportsSpeechRecognition) {
@ -38,11 +85,9 @@ const useSpeechToTextBrowser = () => {
      return;
    }

-    if (listening) {
-      setIsListening(false);
+    if (isListening === true) {
      SpeechRecognition.stopListening();
    } else {
-      setIsListening(true);
      SpeechRecognition.startListening({
        language: languageSTT,
        continuous: autoTranscribeAudio,
@ -61,17 +106,9 @@ const useSpeechToTextBrowser = () => {
    return () => window.removeEventListener('keydown', handleKeyDown);
  }, []);

-  useEffect(() => {
-    if (!listening) {
-      setIsListening(false);
-    }
-  }, [listening]);
-
  return {
    isListening,
    isLoading: false,
-    interimTranscript,
-    text: finalTranscript,
    startRecording: toggleListening,
    stopRecording: toggleListening,
  };
--- a/client/src/hooks/Input/useSpeechToTextExternal.ts
+++ b/client/src/hooks/Input/useSpeechToTextExternal.ts
@ -1,27 +1,31 @@
 import { useState, useEffect, useRef } from 'react';
 import { useRecoilState } from 'recoil';
 import { useSpeechToTextMutation } from '~/data-provider';
+import useGetAudioSettings from './useGetAudioSettings';
 import { useToastContext } from '~/Providers';
 import store from '~/store';
-import useGetAudioSettings from './useGetAudioSettings';

-const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void) => {
+const useSpeechToTextExternal = (
+  setText: (text: string) => void,
+  onTranscriptionComplete: (text: string) => void,
+) => {
  const { showToast } = useToastContext();
  const { speechToTextEndpoint } = useGetAudioSettings();
  const isExternalSTTEnabled = speechToTextEndpoint === 'external';
-  const [speechToText] = useRecoilState<boolean>(store.speechToText);
-  const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
-  const [autoSendText] = useRecoilState(store.autoSendText);
-  const [text, setText] = useState<string>('');
-  const [isListening, setIsListening] = useState(false);
+  const audioStream = useRef<MediaStream | null>(null);
+  const animationFrameIdRef = useRef<number | null>(null);
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+
  const [permission, setPermission] = useState(false);
+  const [isListening, setIsListening] = useState(false);
  const [audioChunks, setAudioChunks] = useState<Blob[]>([]);
  const [isRequestBeingMade, setIsRequestBeingMade] = useState(false);
+
  const [minDecibels] = useRecoilState(store.decibelValue);
-  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
-  const audioStream = useRef<MediaStream | null>(null);
-  const audioContextRef = useRef<AudioContext | null>(null);
-  const animationFrameIdRef = useRef<number | null>(null);
+  const [autoSendText] = useRecoilState(store.autoSendText);
+  const [speechToText] = useRecoilState<boolean>(store.speechToText);
+  const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);

  const { mutate: processAudio, isLoading: isProcessing } = useSpeechToTextMutation({
    onSuccess: (data) => {
@ -54,10 +58,6 @@ const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void
    }
  };

-  const clearText = () => {
-    setText('');
-  };
-
  const getMicrophonePermission = async () => {
    try {
      const streamData = await navigator.mediaDevices.getUserMedia({
@ -226,11 +226,9 @@ const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void

  return {
    isListening,
-    isLoading: isProcessing,
-    text,
-    externalStartRecording,
    externalStopRecording,
-    clearText,
+    externalStartRecording,
+    isLoading: isProcessing,
  };
 };