From e309c6abef5757ee6b1d4a9c94e52145ce068b72 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Thu, 16 Jan 2025 17:38:59 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=AF=20fix:=20Prevent=20UI=20De-sync=20?= =?UTF-8?q?By=20Removing=20Redundant=20States=20(#5333)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: remove local state from Dropdown causing de-sync * refactor: cleanup STT code, avoid redundant states to prevent de-sync and side effects * fix: reset transcript after sending final text to prevent data loss * fix: clear timeout on component unmount to prevent memory leaks --- .../components/Chat/Input/AudioRecorder.tsx | 98 ++++++++++--------- client/src/components/Chat/Input/ChatForm.tsx | 7 +- .../SettingsTabs/Chat/FontSizeSelector.tsx | 1 - client/src/components/ui/Dropdown.tsx | 9 +- client/src/hooks/Input/useSpeechToText.ts | 63 +++--------- .../src/hooks/Input/useSpeechToTextBrowser.ts | 79 +++++++++++---- .../hooks/Input/useSpeechToTextExternal.ts | 36 ++++--- client/src/localization/languages/Eng.ts | 1 + 8 files changed, 149 insertions(+), 145 deletions(-) diff --git a/client/src/components/Chat/Input/AudioRecorder.tsx b/client/src/components/Chat/Input/AudioRecorder.tsx index a8754749ad..96e29ec502 100644 --- a/client/src/components/Chat/Input/AudioRecorder.tsx +++ b/client/src/components/Chat/Input/AudioRecorder.tsx @@ -1,73 +1,79 @@ -import { useEffect } from 'react'; +import { useCallback } from 'react'; +import { useChatFormContext, useToastContext } from '~/Providers'; import { ListeningIcon, Spinner } from '~/components/svg'; import { useLocalize, useSpeechToText } from '~/hooks'; -import { useChatFormContext } from '~/Providers'; import { TooltipAnchor } from '~/components/ui'; import { globalAudioId } from '~/common'; import { cn } from '~/utils'; export default function AudioRecorder({ - textAreaRef, - methods, - ask, isRTL, disabled, + ask, + methods, + textAreaRef, + isSubmitting, }: { - textAreaRef: React.RefObject; - methods: ReturnType; - ask: (data: { text: string }) => void; isRTL: boolean; disabled: boolean; + ask: (data: { text: string }) => void; + methods: ReturnType; + textAreaRef: React.RefObject; + isSubmitting: boolean; }) { + const { setValue, reset } = methods; const localize = useLocalize(); + const { showToast } = useToastContext(); - const handleTranscriptionComplete = (text: string) => { - if (text) { - const globalAudio = document.getElementById(globalAudioId) as HTMLAudioElement; - if (globalAudio) { - console.log('Unmuting global audio'); - globalAudio.muted = false; + const onTranscriptionComplete = useCallback( + (text: string) => { + if (isSubmitting) { + showToast({ + message: localize('com_ui_speech_while_submitting'), + status: 'error', + }); + return; } - ask({ text }); - methods.reset({ text: '' }); - clearText(); - } - }; + if (text) { + const globalAudio = document.getElementById(globalAudioId) as HTMLAudioElement | null; + if (globalAudio) { + console.log('Unmuting global audio'); + globalAudio.muted = false; + } + ask({ text }); + reset({ text: '' }); + } + }, + [ask, reset, showToast, localize, isSubmitting], + ); - const { - isListening, - isLoading, - startRecording, - stopRecording, - interimTranscript, - speechText, - clearText, - } = useSpeechToText(handleTranscriptionComplete); - - useEffect(() => { - if (isListening && textAreaRef.current) { - methods.setValue('text', interimTranscript, { + const setText = useCallback( + (text: string) => { + setValue('text', text, { shouldValidate: true, }); - } else if (textAreaRef.current) { - textAreaRef.current.value = speechText; - methods.setValue('text', speechText, { shouldValidate: true }); - } - }, [interimTranscript, speechText, methods, textAreaRef]); + }, + [setValue], + ); - const handleStartRecording = async () => { - await startRecording(); - }; + const { isListening, isLoading, startRecording, stopRecording } = useSpeechToText( + setText, + onTranscriptionComplete, + ); - const handleStopRecording = async () => { - await stopRecording(); - }; + if (!textAreaRef.current) { + return null; + } + + const handleStartRecording = async () => startRecording(); + + const handleStopRecording = async () => stopRecording(); const renderIcon = () => { - if (isListening) { + if (isListening === true) { return ; } - if (isLoading) { + if (isLoading === true) { return ; } return ; @@ -77,7 +83,7 @@ export default function AudioRecorder({ { {SpeechToText && ( )} {TextToSpeech && automaticPlayback && } diff --git a/client/src/components/Nav/SettingsTabs/Chat/FontSizeSelector.tsx b/client/src/components/Nav/SettingsTabs/Chat/FontSizeSelector.tsx index e140c8a4d7..b442c86cdc 100644 --- a/client/src/components/Nav/SettingsTabs/Chat/FontSizeSelector.tsx +++ b/client/src/components/Nav/SettingsTabs/Chat/FontSizeSelector.tsx @@ -30,7 +30,6 @@ export default function FontSizeSelector() { onChange={handleChange} testId="font-size-selector" sizeClasses="w-[150px]" - anchor="bottom start" /> ); diff --git a/client/src/components/ui/Dropdown.tsx b/client/src/components/ui/Dropdown.tsx index 785cef36de..f89ae7dc78 100644 --- a/client/src/components/ui/Dropdown.tsx +++ b/client/src/components/ui/Dropdown.tsx @@ -1,10 +1,10 @@ -import React, { useState } from 'react'; +import React from 'react'; import * as Select from '@ariakit/react/select'; import type { Option } from '~/common'; import { cn } from '~/utils/'; interface DropdownProps { - value: string; + value?: string; label?: string; onChange: (value: string) => void; options: string[] | Option[]; @@ -14,7 +14,7 @@ interface DropdownProps { } const Dropdown: React.FC = ({ - value: initialValue, + value: selectedValue, label = '', onChange, options, @@ -22,10 +22,7 @@ const Dropdown: React.FC = ({ sizeClasses, testId = 'dropdown-menu', }) => { - const [selectedValue, setSelectedValue] = useState(initialValue); - const handleChange = (value: string) => { - setSelectedValue(value); onChange(value); }; diff --git a/client/src/hooks/Input/useSpeechToText.ts b/client/src/hooks/Input/useSpeechToText.ts index da09926b4e..705b870dc6 100644 --- a/client/src/hooks/Input/useSpeechToText.ts +++ b/client/src/hooks/Input/useSpeechToText.ts @@ -1,83 +1,48 @@ -import { useState, useEffect } from 'react'; import useSpeechToTextBrowser from './useSpeechToTextBrowser'; import useSpeechToTextExternal from './useSpeechToTextExternal'; import useGetAudioSettings from './useGetAudioSettings'; -const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) => { +const useSpeechToText = ( + setText: (text: string) => void, + onTranscriptionComplete: (text: string) => void, +): { + isLoading?: boolean; + isListening?: boolean; + stopRecording: () => void | (() => Promise); + startRecording: () => void | (() => Promise); +} => { const { speechToTextEndpoint } = useGetAudioSettings(); - const [animatedText, setAnimatedText] = useState(''); const externalSpeechToText = speechToTextEndpoint === 'external'; const { isListening: speechIsListeningBrowser, isLoading: speechIsLoadingBrowser, - interimTranscript: interimTranscriptBrowser, - text: speechTextBrowser, startRecording: startSpeechRecordingBrowser, stopRecording: stopSpeechRecordingBrowser, - } = useSpeechToTextBrowser(); + } = useSpeechToTextBrowser(setText, onTranscriptionComplete); const { isListening: speechIsListeningExternal, isLoading: speechIsLoadingExternal, - text: speechTextExternal, externalStartRecording: startSpeechRecordingExternal, externalStopRecording: stopSpeechRecordingExternal, - clearText, - } = useSpeechToTextExternal(handleTranscriptionComplete); + } = useSpeechToTextExternal(setText, onTranscriptionComplete); const isListening = externalSpeechToText ? speechIsListeningExternal : speechIsListeningBrowser; const isLoading = externalSpeechToText ? speechIsLoadingExternal : speechIsLoadingBrowser; - const speechTextForm = externalSpeechToText ? speechTextExternal : speechTextBrowser; + const startRecording = externalSpeechToText ? startSpeechRecordingExternal : startSpeechRecordingBrowser; const stopRecording = externalSpeechToText ? stopSpeechRecordingExternal : stopSpeechRecordingBrowser; - const speechText = - isListening || (speechTextExternal && speechTextExternal.length > 0) - ? speechTextExternal - : speechTextForm || ''; - // for a future real-time STT external - const interimTranscript = externalSpeechToText ? '' : interimTranscriptBrowser; - - const animateTextTyping = (text: string) => { - const totalDuration = 2000; - const frameRate = 60; - const totalFrames = totalDuration / (1000 / frameRate); - const charsPerFrame = Math.ceil(text.length / totalFrames); - let currentIndex = 0; - - const animate = () => { - currentIndex += charsPerFrame; - const currentText = text.substring(0, currentIndex); - setAnimatedText(currentText); - - if (currentIndex < text.length) { - requestAnimationFrame(animate); - } else { - setAnimatedText(text); - } - }; - - requestAnimationFrame(animate); - }; - - useEffect(() => { - if (speechText && externalSpeechToText) { - animateTextTyping(speechText); - } - }, [speechText, externalSpeechToText]); return { - isListening, isLoading, - startRecording, + isListening, stopRecording, - interimTranscript, - speechText: externalSpeechToText ? animatedText : speechText, - clearText, + startRecording, }; }; diff --git a/client/src/hooks/Input/useSpeechToTextBrowser.ts b/client/src/hooks/Input/useSpeechToTextBrowser.ts index 75393efc72..1d31c3348d 100644 --- a/client/src/hooks/Input/useSpeechToTextBrowser.ts +++ b/client/src/hooks/Input/useSpeechToTextBrowser.ts @@ -1,25 +1,72 @@ -import { useEffect, useState } from 'react'; +import { useEffect, useRef, useMemo } from 'react'; import { useRecoilState } from 'recoil'; -import { useToastContext } from '~/Providers'; -import store from '~/store'; import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'; import useGetAudioSettings from './useGetAudioSettings'; +import { useToastContext } from '~/Providers'; +import store from '~/store'; -const useSpeechToTextBrowser = () => { +const useSpeechToTextBrowser = ( + setText: (text: string) => void, + onTranscriptionComplete: (text: string) => void, +) => { const { showToast } = useToastContext(); - const [languageSTT] = useRecoilState(store.languageSTT); - const [autoTranscribeAudio] = useRecoilState(store.autoTranscribeAudio); const { speechToTextEndpoint } = useGetAudioSettings(); const isBrowserSTTEnabled = speechToTextEndpoint === 'browser'; - const [isListening, setIsListening] = useState(false); + + const lastTranscript = useRef(null); + const lastInterim = useRef(null); + const timeoutRef = useRef(); + const [autoSendText] = useRecoilState(store.autoSendText); + const [languageSTT] = useRecoilState(store.languageSTT); + const [autoTranscribeAudio] = useRecoilState(store.autoTranscribeAudio); const { - interimTranscript, - finalTranscript, listening, - browserSupportsSpeechRecognition, + finalTranscript, + resetTranscript, + interimTranscript, isMicrophoneAvailable, + browserSupportsSpeechRecognition, } = useSpeechRecognition(); + const isListening = useMemo(() => listening, [listening]); + + useEffect(() => { + if (interimTranscript == null || interimTranscript === '') { + return; + } + + if (lastInterim.current === interimTranscript) { + return; + } + + setText(interimTranscript); + lastInterim.current = interimTranscript; + }, [setText, interimTranscript]); + + useEffect(() => { + if (finalTranscript == null || finalTranscript === '') { + return; + } + + if (lastTranscript.current === finalTranscript) { + return; + } + + setText(finalTranscript); + lastTranscript.current = finalTranscript; + if (autoSendText > -1 && finalTranscript.length > 0) { + timeoutRef.current = setTimeout(() => { + onTranscriptionComplete(finalTranscript); + resetTranscript(); + }, autoSendText * 1000); + } + + return () => { + if (timeoutRef.current) { + clearTimeout(timeoutRef.current); + } + }; + }, [setText, onTranscriptionComplete, resetTranscript, finalTranscript, autoSendText]); const toggleListening = () => { if (!browserSupportsSpeechRecognition) { @@ -38,11 +85,9 @@ const useSpeechToTextBrowser = () => { return; } - if (listening) { - setIsListening(false); + if (isListening === true) { SpeechRecognition.stopListening(); } else { - setIsListening(true); SpeechRecognition.startListening({ language: languageSTT, continuous: autoTranscribeAudio, @@ -61,17 +106,9 @@ const useSpeechToTextBrowser = () => { return () => window.removeEventListener('keydown', handleKeyDown); }, []); - useEffect(() => { - if (!listening) { - setIsListening(false); - } - }, [listening]); - return { isListening, isLoading: false, - interimTranscript, - text: finalTranscript, startRecording: toggleListening, stopRecording: toggleListening, }; diff --git a/client/src/hooks/Input/useSpeechToTextExternal.ts b/client/src/hooks/Input/useSpeechToTextExternal.ts index ea96f31f51..b9f0ee94d8 100644 --- a/client/src/hooks/Input/useSpeechToTextExternal.ts +++ b/client/src/hooks/Input/useSpeechToTextExternal.ts @@ -1,27 +1,31 @@ import { useState, useEffect, useRef } from 'react'; import { useRecoilState } from 'recoil'; import { useSpeechToTextMutation } from '~/data-provider'; +import useGetAudioSettings from './useGetAudioSettings'; import { useToastContext } from '~/Providers'; import store from '~/store'; -import useGetAudioSettings from './useGetAudioSettings'; -const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void) => { +const useSpeechToTextExternal = ( + setText: (text: string) => void, + onTranscriptionComplete: (text: string) => void, +) => { const { showToast } = useToastContext(); const { speechToTextEndpoint } = useGetAudioSettings(); const isExternalSTTEnabled = speechToTextEndpoint === 'external'; - const [speechToText] = useRecoilState(store.speechToText); - const [autoTranscribeAudio] = useRecoilState(store.autoTranscribeAudio); - const [autoSendText] = useRecoilState(store.autoSendText); - const [text, setText] = useState(''); - const [isListening, setIsListening] = useState(false); + const audioStream = useRef(null); + const animationFrameIdRef = useRef(null); + const audioContextRef = useRef(null); + const mediaRecorderRef = useRef(null); + const [permission, setPermission] = useState(false); + const [isListening, setIsListening] = useState(false); const [audioChunks, setAudioChunks] = useState([]); const [isRequestBeingMade, setIsRequestBeingMade] = useState(false); + const [minDecibels] = useRecoilState(store.decibelValue); - const mediaRecorderRef = useRef(null); - const audioStream = useRef(null); - const audioContextRef = useRef(null); - const animationFrameIdRef = useRef(null); + const [autoSendText] = useRecoilState(store.autoSendText); + const [speechToText] = useRecoilState(store.speechToText); + const [autoTranscribeAudio] = useRecoilState(store.autoTranscribeAudio); const { mutate: processAudio, isLoading: isProcessing } = useSpeechToTextMutation({ onSuccess: (data) => { @@ -54,10 +58,6 @@ const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void } }; - const clearText = () => { - setText(''); - }; - const getMicrophonePermission = async () => { try { const streamData = await navigator.mediaDevices.getUserMedia({ @@ -226,11 +226,9 @@ const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void return { isListening, - isLoading: isProcessing, - text, - externalStartRecording, externalStopRecording, - clearText, + externalStartRecording, + isLoading: isProcessing, }; }; diff --git a/client/src/localization/languages/Eng.ts b/client/src/localization/languages/Eng.ts index 64aa137140..a33c49b78e 100644 --- a/client/src/localization/languages/Eng.ts +++ b/client/src/localization/languages/Eng.ts @@ -895,4 +895,5 @@ export default { com_ui_decline: 'I do not accept', com_ui_terms_and_conditions: 'Terms and Conditions', com_ui_no_terms_content: 'No terms and conditions content to display', + com_ui_speech_while_submitting: 'Can\'t submit speech while a response is being generated', };