mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
🎯 fix: Prevent UI De-sync By Removing Redundant States (#5333)
* fix: remove local state from Dropdown causing de-sync * refactor: cleanup STT code, avoid redundant states to prevent de-sync and side effects * fix: reset transcript after sending final text to prevent data loss * fix: clear timeout on component unmount to prevent memory leaks
This commit is contained in:
parent
b55e695541
commit
e309c6abef
8 changed files with 149 additions and 145 deletions
|
|
@ -1,73 +1,79 @@
|
|||
import { useEffect } from 'react';
|
||||
import { useCallback } from 'react';
|
||||
import { useChatFormContext, useToastContext } from '~/Providers';
|
||||
import { ListeningIcon, Spinner } from '~/components/svg';
|
||||
import { useLocalize, useSpeechToText } from '~/hooks';
|
||||
import { useChatFormContext } from '~/Providers';
|
||||
import { TooltipAnchor } from '~/components/ui';
|
||||
import { globalAudioId } from '~/common';
|
||||
import { cn } from '~/utils';
|
||||
|
||||
export default function AudioRecorder({
|
||||
textAreaRef,
|
||||
methods,
|
||||
ask,
|
||||
isRTL,
|
||||
disabled,
|
||||
ask,
|
||||
methods,
|
||||
textAreaRef,
|
||||
isSubmitting,
|
||||
}: {
|
||||
textAreaRef: React.RefObject<HTMLTextAreaElement>;
|
||||
methods: ReturnType<typeof useChatFormContext>;
|
||||
ask: (data: { text: string }) => void;
|
||||
isRTL: boolean;
|
||||
disabled: boolean;
|
||||
ask: (data: { text: string }) => void;
|
||||
methods: ReturnType<typeof useChatFormContext>;
|
||||
textAreaRef: React.RefObject<HTMLTextAreaElement>;
|
||||
isSubmitting: boolean;
|
||||
}) {
|
||||
const { setValue, reset } = methods;
|
||||
const localize = useLocalize();
|
||||
const { showToast } = useToastContext();
|
||||
|
||||
const handleTranscriptionComplete = (text: string) => {
|
||||
if (text) {
|
||||
const globalAudio = document.getElementById(globalAudioId) as HTMLAudioElement;
|
||||
if (globalAudio) {
|
||||
console.log('Unmuting global audio');
|
||||
globalAudio.muted = false;
|
||||
const onTranscriptionComplete = useCallback(
|
||||
(text: string) => {
|
||||
if (isSubmitting) {
|
||||
showToast({
|
||||
message: localize('com_ui_speech_while_submitting'),
|
||||
status: 'error',
|
||||
});
|
||||
return;
|
||||
}
|
||||
ask({ text });
|
||||
methods.reset({ text: '' });
|
||||
clearText();
|
||||
}
|
||||
};
|
||||
if (text) {
|
||||
const globalAudio = document.getElementById(globalAudioId) as HTMLAudioElement | null;
|
||||
if (globalAudio) {
|
||||
console.log('Unmuting global audio');
|
||||
globalAudio.muted = false;
|
||||
}
|
||||
ask({ text });
|
||||
reset({ text: '' });
|
||||
}
|
||||
},
|
||||
[ask, reset, showToast, localize, isSubmitting],
|
||||
);
|
||||
|
||||
const {
|
||||
isListening,
|
||||
isLoading,
|
||||
startRecording,
|
||||
stopRecording,
|
||||
interimTranscript,
|
||||
speechText,
|
||||
clearText,
|
||||
} = useSpeechToText(handleTranscriptionComplete);
|
||||
|
||||
useEffect(() => {
|
||||
if (isListening && textAreaRef.current) {
|
||||
methods.setValue('text', interimTranscript, {
|
||||
const setText = useCallback(
|
||||
(text: string) => {
|
||||
setValue('text', text, {
|
||||
shouldValidate: true,
|
||||
});
|
||||
} else if (textAreaRef.current) {
|
||||
textAreaRef.current.value = speechText;
|
||||
methods.setValue('text', speechText, { shouldValidate: true });
|
||||
}
|
||||
}, [interimTranscript, speechText, methods, textAreaRef]);
|
||||
},
|
||||
[setValue],
|
||||
);
|
||||
|
||||
const handleStartRecording = async () => {
|
||||
await startRecording();
|
||||
};
|
||||
const { isListening, isLoading, startRecording, stopRecording } = useSpeechToText(
|
||||
setText,
|
||||
onTranscriptionComplete,
|
||||
);
|
||||
|
||||
const handleStopRecording = async () => {
|
||||
await stopRecording();
|
||||
};
|
||||
if (!textAreaRef.current) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const handleStartRecording = async () => startRecording();
|
||||
|
||||
const handleStopRecording = async () => stopRecording();
|
||||
|
||||
const renderIcon = () => {
|
||||
if (isListening) {
|
||||
if (isListening === true) {
|
||||
return <ListeningIcon className="stroke-red-500" />;
|
||||
}
|
||||
if (isLoading) {
|
||||
if (isLoading === true) {
|
||||
return <Spinner className="stroke-gray-700 dark:stroke-gray-300" />;
|
||||
}
|
||||
return <ListeningIcon className="stroke-gray-700 dark:stroke-gray-300" />;
|
||||
|
|
@ -77,7 +83,7 @@ export default function AudioRecorder({
|
|||
<TooltipAnchor
|
||||
id="audio-recorder"
|
||||
aria-label={localize('com_ui_use_micrphone')}
|
||||
onClick={isListening ? handleStopRecording : handleStartRecording}
|
||||
onClick={isListening === true ? handleStopRecording : handleStartRecording}
|
||||
disabled={disabled}
|
||||
className={cn(
|
||||
'absolute flex size-[35px] items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover',
|
||||
|
|
|
|||
|
|
@ -228,11 +228,12 @@ const ChatForm = ({ index = 0 }) => {
|
|||
</FileFormWrapper>
|
||||
{SpeechToText && (
|
||||
<AudioRecorder
|
||||
disabled={!!disableInputs}
|
||||
textAreaRef={textAreaRef}
|
||||
ask={submitMessage}
|
||||
isRTL={isRTL}
|
||||
methods={methods}
|
||||
ask={submitMessage}
|
||||
textAreaRef={textAreaRef}
|
||||
disabled={!!disableInputs}
|
||||
isSubmitting={isSubmitting}
|
||||
/>
|
||||
)}
|
||||
{TextToSpeech && automaticPlayback && <StreamAudio index={index} />}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ export default function FontSizeSelector() {
|
|||
onChange={handleChange}
|
||||
testId="font-size-selector"
|
||||
sizeClasses="w-[150px]"
|
||||
anchor="bottom start"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
import React, { useState } from 'react';
|
||||
import React from 'react';
|
||||
import * as Select from '@ariakit/react/select';
|
||||
import type { Option } from '~/common';
|
||||
import { cn } from '~/utils/';
|
||||
|
||||
interface DropdownProps {
|
||||
value: string;
|
||||
value?: string;
|
||||
label?: string;
|
||||
onChange: (value: string) => void;
|
||||
options: string[] | Option[];
|
||||
|
|
@ -14,7 +14,7 @@ interface DropdownProps {
|
|||
}
|
||||
|
||||
const Dropdown: React.FC<DropdownProps> = ({
|
||||
value: initialValue,
|
||||
value: selectedValue,
|
||||
label = '',
|
||||
onChange,
|
||||
options,
|
||||
|
|
@ -22,10 +22,7 @@ const Dropdown: React.FC<DropdownProps> = ({
|
|||
sizeClasses,
|
||||
testId = 'dropdown-menu',
|
||||
}) => {
|
||||
const [selectedValue, setSelectedValue] = useState(initialValue);
|
||||
|
||||
const handleChange = (value: string) => {
|
||||
setSelectedValue(value);
|
||||
onChange(value);
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,83 +1,48 @@
|
|||
import { useState, useEffect } from 'react';
|
||||
import useSpeechToTextBrowser from './useSpeechToTextBrowser';
|
||||
import useSpeechToTextExternal from './useSpeechToTextExternal';
|
||||
import useGetAudioSettings from './useGetAudioSettings';
|
||||
|
||||
const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) => {
|
||||
const useSpeechToText = (
|
||||
setText: (text: string) => void,
|
||||
onTranscriptionComplete: (text: string) => void,
|
||||
): {
|
||||
isLoading?: boolean;
|
||||
isListening?: boolean;
|
||||
stopRecording: () => void | (() => Promise<void>);
|
||||
startRecording: () => void | (() => Promise<void>);
|
||||
} => {
|
||||
const { speechToTextEndpoint } = useGetAudioSettings();
|
||||
const [animatedText, setAnimatedText] = useState('');
|
||||
const externalSpeechToText = speechToTextEndpoint === 'external';
|
||||
|
||||
const {
|
||||
isListening: speechIsListeningBrowser,
|
||||
isLoading: speechIsLoadingBrowser,
|
||||
interimTranscript: interimTranscriptBrowser,
|
||||
text: speechTextBrowser,
|
||||
startRecording: startSpeechRecordingBrowser,
|
||||
stopRecording: stopSpeechRecordingBrowser,
|
||||
} = useSpeechToTextBrowser();
|
||||
} = useSpeechToTextBrowser(setText, onTranscriptionComplete);
|
||||
|
||||
const {
|
||||
isListening: speechIsListeningExternal,
|
||||
isLoading: speechIsLoadingExternal,
|
||||
text: speechTextExternal,
|
||||
externalStartRecording: startSpeechRecordingExternal,
|
||||
externalStopRecording: stopSpeechRecordingExternal,
|
||||
clearText,
|
||||
} = useSpeechToTextExternal(handleTranscriptionComplete);
|
||||
} = useSpeechToTextExternal(setText, onTranscriptionComplete);
|
||||
|
||||
const isListening = externalSpeechToText ? speechIsListeningExternal : speechIsListeningBrowser;
|
||||
const isLoading = externalSpeechToText ? speechIsLoadingExternal : speechIsLoadingBrowser;
|
||||
const speechTextForm = externalSpeechToText ? speechTextExternal : speechTextBrowser;
|
||||
|
||||
const startRecording = externalSpeechToText
|
||||
? startSpeechRecordingExternal
|
||||
: startSpeechRecordingBrowser;
|
||||
const stopRecording = externalSpeechToText
|
||||
? stopSpeechRecordingExternal
|
||||
: stopSpeechRecordingBrowser;
|
||||
const speechText =
|
||||
isListening || (speechTextExternal && speechTextExternal.length > 0)
|
||||
? speechTextExternal
|
||||
: speechTextForm || '';
|
||||
// for a future real-time STT external
|
||||
const interimTranscript = externalSpeechToText ? '' : interimTranscriptBrowser;
|
||||
|
||||
const animateTextTyping = (text: string) => {
|
||||
const totalDuration = 2000;
|
||||
const frameRate = 60;
|
||||
const totalFrames = totalDuration / (1000 / frameRate);
|
||||
const charsPerFrame = Math.ceil(text.length / totalFrames);
|
||||
let currentIndex = 0;
|
||||
|
||||
const animate = () => {
|
||||
currentIndex += charsPerFrame;
|
||||
const currentText = text.substring(0, currentIndex);
|
||||
setAnimatedText(currentText);
|
||||
|
||||
if (currentIndex < text.length) {
|
||||
requestAnimationFrame(animate);
|
||||
} else {
|
||||
setAnimatedText(text);
|
||||
}
|
||||
};
|
||||
|
||||
requestAnimationFrame(animate);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (speechText && externalSpeechToText) {
|
||||
animateTextTyping(speechText);
|
||||
}
|
||||
}, [speechText, externalSpeechToText]);
|
||||
|
||||
return {
|
||||
isListening,
|
||||
isLoading,
|
||||
startRecording,
|
||||
isListening,
|
||||
stopRecording,
|
||||
interimTranscript,
|
||||
speechText: externalSpeechToText ? animatedText : speechText,
|
||||
clearText,
|
||||
startRecording,
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,25 +1,72 @@
|
|||
import { useEffect, useState } from 'react';
|
||||
import { useEffect, useRef, useMemo } from 'react';
|
||||
import { useRecoilState } from 'recoil';
|
||||
import { useToastContext } from '~/Providers';
|
||||
import store from '~/store';
|
||||
import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
|
||||
import useGetAudioSettings from './useGetAudioSettings';
|
||||
import { useToastContext } from '~/Providers';
|
||||
import store from '~/store';
|
||||
|
||||
const useSpeechToTextBrowser = () => {
|
||||
const useSpeechToTextBrowser = (
|
||||
setText: (text: string) => void,
|
||||
onTranscriptionComplete: (text: string) => void,
|
||||
) => {
|
||||
const { showToast } = useToastContext();
|
||||
const [languageSTT] = useRecoilState<string>(store.languageSTT);
|
||||
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
||||
const { speechToTextEndpoint } = useGetAudioSettings();
|
||||
const isBrowserSTTEnabled = speechToTextEndpoint === 'browser';
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
|
||||
const lastTranscript = useRef<string | null>(null);
|
||||
const lastInterim = useRef<string | null>(null);
|
||||
const timeoutRef = useRef<NodeJS.Timeout | null>();
|
||||
const [autoSendText] = useRecoilState(store.autoSendText);
|
||||
const [languageSTT] = useRecoilState<string>(store.languageSTT);
|
||||
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
||||
|
||||
const {
|
||||
interimTranscript,
|
||||
finalTranscript,
|
||||
listening,
|
||||
browserSupportsSpeechRecognition,
|
||||
finalTranscript,
|
||||
resetTranscript,
|
||||
interimTranscript,
|
||||
isMicrophoneAvailable,
|
||||
browserSupportsSpeechRecognition,
|
||||
} = useSpeechRecognition();
|
||||
const isListening = useMemo(() => listening, [listening]);
|
||||
|
||||
useEffect(() => {
|
||||
if (interimTranscript == null || interimTranscript === '') {
|
||||
return;
|
||||
}
|
||||
|
||||
if (lastInterim.current === interimTranscript) {
|
||||
return;
|
||||
}
|
||||
|
||||
setText(interimTranscript);
|
||||
lastInterim.current = interimTranscript;
|
||||
}, [setText, interimTranscript]);
|
||||
|
||||
useEffect(() => {
|
||||
if (finalTranscript == null || finalTranscript === '') {
|
||||
return;
|
||||
}
|
||||
|
||||
if (lastTranscript.current === finalTranscript) {
|
||||
return;
|
||||
}
|
||||
|
||||
setText(finalTranscript);
|
||||
lastTranscript.current = finalTranscript;
|
||||
if (autoSendText > -1 && finalTranscript.length > 0) {
|
||||
timeoutRef.current = setTimeout(() => {
|
||||
onTranscriptionComplete(finalTranscript);
|
||||
resetTranscript();
|
||||
}, autoSendText * 1000);
|
||||
}
|
||||
|
||||
return () => {
|
||||
if (timeoutRef.current) {
|
||||
clearTimeout(timeoutRef.current);
|
||||
}
|
||||
};
|
||||
}, [setText, onTranscriptionComplete, resetTranscript, finalTranscript, autoSendText]);
|
||||
|
||||
const toggleListening = () => {
|
||||
if (!browserSupportsSpeechRecognition) {
|
||||
|
|
@ -38,11 +85,9 @@ const useSpeechToTextBrowser = () => {
|
|||
return;
|
||||
}
|
||||
|
||||
if (listening) {
|
||||
setIsListening(false);
|
||||
if (isListening === true) {
|
||||
SpeechRecognition.stopListening();
|
||||
} else {
|
||||
setIsListening(true);
|
||||
SpeechRecognition.startListening({
|
||||
language: languageSTT,
|
||||
continuous: autoTranscribeAudio,
|
||||
|
|
@ -61,17 +106,9 @@ const useSpeechToTextBrowser = () => {
|
|||
return () => window.removeEventListener('keydown', handleKeyDown);
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (!listening) {
|
||||
setIsListening(false);
|
||||
}
|
||||
}, [listening]);
|
||||
|
||||
return {
|
||||
isListening,
|
||||
isLoading: false,
|
||||
interimTranscript,
|
||||
text: finalTranscript,
|
||||
startRecording: toggleListening,
|
||||
stopRecording: toggleListening,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,27 +1,31 @@
|
|||
import { useState, useEffect, useRef } from 'react';
|
||||
import { useRecoilState } from 'recoil';
|
||||
import { useSpeechToTextMutation } from '~/data-provider';
|
||||
import useGetAudioSettings from './useGetAudioSettings';
|
||||
import { useToastContext } from '~/Providers';
|
||||
import store from '~/store';
|
||||
import useGetAudioSettings from './useGetAudioSettings';
|
||||
|
||||
const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void) => {
|
||||
const useSpeechToTextExternal = (
|
||||
setText: (text: string) => void,
|
||||
onTranscriptionComplete: (text: string) => void,
|
||||
) => {
|
||||
const { showToast } = useToastContext();
|
||||
const { speechToTextEndpoint } = useGetAudioSettings();
|
||||
const isExternalSTTEnabled = speechToTextEndpoint === 'external';
|
||||
const [speechToText] = useRecoilState<boolean>(store.speechToText);
|
||||
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
||||
const [autoSendText] = useRecoilState(store.autoSendText);
|
||||
const [text, setText] = useState<string>('');
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
const audioStream = useRef<MediaStream | null>(null);
|
||||
const animationFrameIdRef = useRef<number | null>(null);
|
||||
const audioContextRef = useRef<AudioContext | null>(null);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
|
||||
const [permission, setPermission] = useState(false);
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
const [audioChunks, setAudioChunks] = useState<Blob[]>([]);
|
||||
const [isRequestBeingMade, setIsRequestBeingMade] = useState(false);
|
||||
|
||||
const [minDecibels] = useRecoilState(store.decibelValue);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const audioStream = useRef<MediaStream | null>(null);
|
||||
const audioContextRef = useRef<AudioContext | null>(null);
|
||||
const animationFrameIdRef = useRef<number | null>(null);
|
||||
const [autoSendText] = useRecoilState(store.autoSendText);
|
||||
const [speechToText] = useRecoilState<boolean>(store.speechToText);
|
||||
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
||||
|
||||
const { mutate: processAudio, isLoading: isProcessing } = useSpeechToTextMutation({
|
||||
onSuccess: (data) => {
|
||||
|
|
@ -54,10 +58,6 @@ const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void
|
|||
}
|
||||
};
|
||||
|
||||
const clearText = () => {
|
||||
setText('');
|
||||
};
|
||||
|
||||
const getMicrophonePermission = async () => {
|
||||
try {
|
||||
const streamData = await navigator.mediaDevices.getUserMedia({
|
||||
|
|
@ -226,11 +226,9 @@ const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void
|
|||
|
||||
return {
|
||||
isListening,
|
||||
isLoading: isProcessing,
|
||||
text,
|
||||
externalStartRecording,
|
||||
externalStopRecording,
|
||||
clearText,
|
||||
externalStartRecording,
|
||||
isLoading: isProcessing,
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -895,4 +895,5 @@ export default {
|
|||
com_ui_decline: 'I do not accept',
|
||||
com_ui_terms_and_conditions: 'Terms and Conditions',
|
||||
com_ui_no_terms_content: 'No terms and conditions content to display',
|
||||
com_ui_speech_while_submitting: 'Can\'t submit speech while a response is being generated',
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue