🎯 fix: Prevent UI De-sync By Removing Redundant States (#5333)

* fix: remove local state from Dropdown causing de-sync

* refactor: cleanup STT code, avoid redundant states to prevent de-sync and side effects

* fix: reset transcript after sending final text to prevent data loss

* fix: clear timeout on component unmount to prevent memory leaks
This commit is contained in:
Danny Avila 2025-01-16 17:38:59 -05:00 committed by GitHub
parent b55e695541
commit e309c6abef
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 149 additions and 145 deletions

View file

@ -1,73 +1,79 @@
import { useEffect } from 'react';
import { useCallback } from 'react';
import { useChatFormContext, useToastContext } from '~/Providers';
import { ListeningIcon, Spinner } from '~/components/svg';
import { useLocalize, useSpeechToText } from '~/hooks';
import { useChatFormContext } from '~/Providers';
import { TooltipAnchor } from '~/components/ui';
import { globalAudioId } from '~/common';
import { cn } from '~/utils';
export default function AudioRecorder({
textAreaRef,
methods,
ask,
isRTL,
disabled,
ask,
methods,
textAreaRef,
isSubmitting,
}: {
textAreaRef: React.RefObject<HTMLTextAreaElement>;
methods: ReturnType<typeof useChatFormContext>;
ask: (data: { text: string }) => void;
isRTL: boolean;
disabled: boolean;
ask: (data: { text: string }) => void;
methods: ReturnType<typeof useChatFormContext>;
textAreaRef: React.RefObject<HTMLTextAreaElement>;
isSubmitting: boolean;
}) {
const { setValue, reset } = methods;
const localize = useLocalize();
const { showToast } = useToastContext();
const handleTranscriptionComplete = (text: string) => {
if (text) {
const globalAudio = document.getElementById(globalAudioId) as HTMLAudioElement;
if (globalAudio) {
console.log('Unmuting global audio');
globalAudio.muted = false;
const onTranscriptionComplete = useCallback(
(text: string) => {
if (isSubmitting) {
showToast({
message: localize('com_ui_speech_while_submitting'),
status: 'error',
});
return;
}
ask({ text });
methods.reset({ text: '' });
clearText();
}
};
if (text) {
const globalAudio = document.getElementById(globalAudioId) as HTMLAudioElement | null;
if (globalAudio) {
console.log('Unmuting global audio');
globalAudio.muted = false;
}
ask({ text });
reset({ text: '' });
}
},
[ask, reset, showToast, localize, isSubmitting],
);
const {
isListening,
isLoading,
startRecording,
stopRecording,
interimTranscript,
speechText,
clearText,
} = useSpeechToText(handleTranscriptionComplete);
useEffect(() => {
if (isListening && textAreaRef.current) {
methods.setValue('text', interimTranscript, {
const setText = useCallback(
(text: string) => {
setValue('text', text, {
shouldValidate: true,
});
} else if (textAreaRef.current) {
textAreaRef.current.value = speechText;
methods.setValue('text', speechText, { shouldValidate: true });
}
}, [interimTranscript, speechText, methods, textAreaRef]);
},
[setValue],
);
const handleStartRecording = async () => {
await startRecording();
};
const { isListening, isLoading, startRecording, stopRecording } = useSpeechToText(
setText,
onTranscriptionComplete,
);
const handleStopRecording = async () => {
await stopRecording();
};
if (!textAreaRef.current) {
return null;
}
const handleStartRecording = async () => startRecording();
const handleStopRecording = async () => stopRecording();
const renderIcon = () => {
if (isListening) {
if (isListening === true) {
return <ListeningIcon className="stroke-red-500" />;
}
if (isLoading) {
if (isLoading === true) {
return <Spinner className="stroke-gray-700 dark:stroke-gray-300" />;
}
return <ListeningIcon className="stroke-gray-700 dark:stroke-gray-300" />;
@ -77,7 +83,7 @@ export default function AudioRecorder({
<TooltipAnchor
id="audio-recorder"
aria-label={localize('com_ui_use_micrphone')}
onClick={isListening ? handleStopRecording : handleStartRecording}
onClick={isListening === true ? handleStopRecording : handleStartRecording}
disabled={disabled}
className={cn(
'absolute flex size-[35px] items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover',

View file

@ -228,11 +228,12 @@ const ChatForm = ({ index = 0 }) => {
</FileFormWrapper>
{SpeechToText && (
<AudioRecorder
disabled={!!disableInputs}
textAreaRef={textAreaRef}
ask={submitMessage}
isRTL={isRTL}
methods={methods}
ask={submitMessage}
textAreaRef={textAreaRef}
disabled={!!disableInputs}
isSubmitting={isSubmitting}
/>
)}
{TextToSpeech && automaticPlayback && <StreamAudio index={index} />}

View file

@ -30,7 +30,6 @@ export default function FontSizeSelector() {
onChange={handleChange}
testId="font-size-selector"
sizeClasses="w-[150px]"
anchor="bottom start"
/>
</div>
);

View file

@ -1,10 +1,10 @@
import React, { useState } from 'react';
import React from 'react';
import * as Select from '@ariakit/react/select';
import type { Option } from '~/common';
import { cn } from '~/utils/';
interface DropdownProps {
value: string;
value?: string;
label?: string;
onChange: (value: string) => void;
options: string[] | Option[];
@ -14,7 +14,7 @@ interface DropdownProps {
}
const Dropdown: React.FC<DropdownProps> = ({
value: initialValue,
value: selectedValue,
label = '',
onChange,
options,
@ -22,10 +22,7 @@ const Dropdown: React.FC<DropdownProps> = ({
sizeClasses,
testId = 'dropdown-menu',
}) => {
const [selectedValue, setSelectedValue] = useState(initialValue);
const handleChange = (value: string) => {
setSelectedValue(value);
onChange(value);
};

View file

@ -1,83 +1,48 @@
import { useState, useEffect } from 'react';
import useSpeechToTextBrowser from './useSpeechToTextBrowser';
import useSpeechToTextExternal from './useSpeechToTextExternal';
import useGetAudioSettings from './useGetAudioSettings';
const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) => {
const useSpeechToText = (
setText: (text: string) => void,
onTranscriptionComplete: (text: string) => void,
): {
isLoading?: boolean;
isListening?: boolean;
stopRecording: () => void | (() => Promise<void>);
startRecording: () => void | (() => Promise<void>);
} => {
const { speechToTextEndpoint } = useGetAudioSettings();
const [animatedText, setAnimatedText] = useState('');
const externalSpeechToText = speechToTextEndpoint === 'external';
const {
isListening: speechIsListeningBrowser,
isLoading: speechIsLoadingBrowser,
interimTranscript: interimTranscriptBrowser,
text: speechTextBrowser,
startRecording: startSpeechRecordingBrowser,
stopRecording: stopSpeechRecordingBrowser,
} = useSpeechToTextBrowser();
} = useSpeechToTextBrowser(setText, onTranscriptionComplete);
const {
isListening: speechIsListeningExternal,
isLoading: speechIsLoadingExternal,
text: speechTextExternal,
externalStartRecording: startSpeechRecordingExternal,
externalStopRecording: stopSpeechRecordingExternal,
clearText,
} = useSpeechToTextExternal(handleTranscriptionComplete);
} = useSpeechToTextExternal(setText, onTranscriptionComplete);
const isListening = externalSpeechToText ? speechIsListeningExternal : speechIsListeningBrowser;
const isLoading = externalSpeechToText ? speechIsLoadingExternal : speechIsLoadingBrowser;
const speechTextForm = externalSpeechToText ? speechTextExternal : speechTextBrowser;
const startRecording = externalSpeechToText
? startSpeechRecordingExternal
: startSpeechRecordingBrowser;
const stopRecording = externalSpeechToText
? stopSpeechRecordingExternal
: stopSpeechRecordingBrowser;
const speechText =
isListening || (speechTextExternal && speechTextExternal.length > 0)
? speechTextExternal
: speechTextForm || '';
// for a future real-time STT external
const interimTranscript = externalSpeechToText ? '' : interimTranscriptBrowser;
const animateTextTyping = (text: string) => {
const totalDuration = 2000;
const frameRate = 60;
const totalFrames = totalDuration / (1000 / frameRate);
const charsPerFrame = Math.ceil(text.length / totalFrames);
let currentIndex = 0;
const animate = () => {
currentIndex += charsPerFrame;
const currentText = text.substring(0, currentIndex);
setAnimatedText(currentText);
if (currentIndex < text.length) {
requestAnimationFrame(animate);
} else {
setAnimatedText(text);
}
};
requestAnimationFrame(animate);
};
useEffect(() => {
if (speechText && externalSpeechToText) {
animateTextTyping(speechText);
}
}, [speechText, externalSpeechToText]);
return {
isListening,
isLoading,
startRecording,
isListening,
stopRecording,
interimTranscript,
speechText: externalSpeechToText ? animatedText : speechText,
clearText,
startRecording,
};
};

View file

@ -1,25 +1,72 @@
import { useEffect, useState } from 'react';
import { useEffect, useRef, useMemo } from 'react';
import { useRecoilState } from 'recoil';
import { useToastContext } from '~/Providers';
import store from '~/store';
import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
import useGetAudioSettings from './useGetAudioSettings';
import { useToastContext } from '~/Providers';
import store from '~/store';
const useSpeechToTextBrowser = () => {
const useSpeechToTextBrowser = (
setText: (text: string) => void,
onTranscriptionComplete: (text: string) => void,
) => {
const { showToast } = useToastContext();
const [languageSTT] = useRecoilState<string>(store.languageSTT);
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
const { speechToTextEndpoint } = useGetAudioSettings();
const isBrowserSTTEnabled = speechToTextEndpoint === 'browser';
const [isListening, setIsListening] = useState(false);
const lastTranscript = useRef<string | null>(null);
const lastInterim = useRef<string | null>(null);
const timeoutRef = useRef<NodeJS.Timeout | null>();
const [autoSendText] = useRecoilState(store.autoSendText);
const [languageSTT] = useRecoilState<string>(store.languageSTT);
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
const {
interimTranscript,
finalTranscript,
listening,
browserSupportsSpeechRecognition,
finalTranscript,
resetTranscript,
interimTranscript,
isMicrophoneAvailable,
browserSupportsSpeechRecognition,
} = useSpeechRecognition();
const isListening = useMemo(() => listening, [listening]);
useEffect(() => {
if (interimTranscript == null || interimTranscript === '') {
return;
}
if (lastInterim.current === interimTranscript) {
return;
}
setText(interimTranscript);
lastInterim.current = interimTranscript;
}, [setText, interimTranscript]);
useEffect(() => {
if (finalTranscript == null || finalTranscript === '') {
return;
}
if (lastTranscript.current === finalTranscript) {
return;
}
setText(finalTranscript);
lastTranscript.current = finalTranscript;
if (autoSendText > -1 && finalTranscript.length > 0) {
timeoutRef.current = setTimeout(() => {
onTranscriptionComplete(finalTranscript);
resetTranscript();
}, autoSendText * 1000);
}
return () => {
if (timeoutRef.current) {
clearTimeout(timeoutRef.current);
}
};
}, [setText, onTranscriptionComplete, resetTranscript, finalTranscript, autoSendText]);
const toggleListening = () => {
if (!browserSupportsSpeechRecognition) {
@ -38,11 +85,9 @@ const useSpeechToTextBrowser = () => {
return;
}
if (listening) {
setIsListening(false);
if (isListening === true) {
SpeechRecognition.stopListening();
} else {
setIsListening(true);
SpeechRecognition.startListening({
language: languageSTT,
continuous: autoTranscribeAudio,
@ -61,17 +106,9 @@ const useSpeechToTextBrowser = () => {
return () => window.removeEventListener('keydown', handleKeyDown);
}, []);
useEffect(() => {
if (!listening) {
setIsListening(false);
}
}, [listening]);
return {
isListening,
isLoading: false,
interimTranscript,
text: finalTranscript,
startRecording: toggleListening,
stopRecording: toggleListening,
};

View file

@ -1,27 +1,31 @@
import { useState, useEffect, useRef } from 'react';
import { useRecoilState } from 'recoil';
import { useSpeechToTextMutation } from '~/data-provider';
import useGetAudioSettings from './useGetAudioSettings';
import { useToastContext } from '~/Providers';
import store from '~/store';
import useGetAudioSettings from './useGetAudioSettings';
const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void) => {
const useSpeechToTextExternal = (
setText: (text: string) => void,
onTranscriptionComplete: (text: string) => void,
) => {
const { showToast } = useToastContext();
const { speechToTextEndpoint } = useGetAudioSettings();
const isExternalSTTEnabled = speechToTextEndpoint === 'external';
const [speechToText] = useRecoilState<boolean>(store.speechToText);
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
const [autoSendText] = useRecoilState(store.autoSendText);
const [text, setText] = useState<string>('');
const [isListening, setIsListening] = useState(false);
const audioStream = useRef<MediaStream | null>(null);
const animationFrameIdRef = useRef<number | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const [permission, setPermission] = useState(false);
const [isListening, setIsListening] = useState(false);
const [audioChunks, setAudioChunks] = useState<Blob[]>([]);
const [isRequestBeingMade, setIsRequestBeingMade] = useState(false);
const [minDecibels] = useRecoilState(store.decibelValue);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const audioStream = useRef<MediaStream | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const animationFrameIdRef = useRef<number | null>(null);
const [autoSendText] = useRecoilState(store.autoSendText);
const [speechToText] = useRecoilState<boolean>(store.speechToText);
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
const { mutate: processAudio, isLoading: isProcessing } = useSpeechToTextMutation({
onSuccess: (data) => {
@ -54,10 +58,6 @@ const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void
}
};
const clearText = () => {
setText('');
};
const getMicrophonePermission = async () => {
try {
const streamData = await navigator.mediaDevices.getUserMedia({
@ -226,11 +226,9 @@ const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void
return {
isListening,
isLoading: isProcessing,
text,
externalStartRecording,
externalStopRecording,
clearText,
externalStartRecording,
isLoading: isProcessing,
};
};

View file

@ -895,4 +895,5 @@ export default {
com_ui_decline: 'I do not accept',
com_ui_terms_and_conditions: 'Terms and Conditions',
com_ui_no_terms_content: 'No terms and conditions content to display',
com_ui_speech_while_submitting: 'Can\'t submit speech while a response is being generated',
};