mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-19 18:00:15 +01:00
🎯 fix: Prevent UI De-sync By Removing Redundant States (#5333)
* fix: remove local state from Dropdown causing de-sync * refactor: cleanup STT code, avoid redundant states to prevent de-sync and side effects * fix: reset transcript after sending final text to prevent data loss * fix: clear timeout on component unmount to prevent memory leaks
This commit is contained in:
parent
b55e695541
commit
e309c6abef
8 changed files with 149 additions and 145 deletions
|
|
@ -1,83 +1,48 @@
|
|||
import { useState, useEffect } from 'react';
|
||||
import useSpeechToTextBrowser from './useSpeechToTextBrowser';
|
||||
import useSpeechToTextExternal from './useSpeechToTextExternal';
|
||||
import useGetAudioSettings from './useGetAudioSettings';
|
||||
|
||||
const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) => {
|
||||
const useSpeechToText = (
|
||||
setText: (text: string) => void,
|
||||
onTranscriptionComplete: (text: string) => void,
|
||||
): {
|
||||
isLoading?: boolean;
|
||||
isListening?: boolean;
|
||||
stopRecording: () => void | (() => Promise<void>);
|
||||
startRecording: () => void | (() => Promise<void>);
|
||||
} => {
|
||||
const { speechToTextEndpoint } = useGetAudioSettings();
|
||||
const [animatedText, setAnimatedText] = useState('');
|
||||
const externalSpeechToText = speechToTextEndpoint === 'external';
|
||||
|
||||
const {
|
||||
isListening: speechIsListeningBrowser,
|
||||
isLoading: speechIsLoadingBrowser,
|
||||
interimTranscript: interimTranscriptBrowser,
|
||||
text: speechTextBrowser,
|
||||
startRecording: startSpeechRecordingBrowser,
|
||||
stopRecording: stopSpeechRecordingBrowser,
|
||||
} = useSpeechToTextBrowser();
|
||||
} = useSpeechToTextBrowser(setText, onTranscriptionComplete);
|
||||
|
||||
const {
|
||||
isListening: speechIsListeningExternal,
|
||||
isLoading: speechIsLoadingExternal,
|
||||
text: speechTextExternal,
|
||||
externalStartRecording: startSpeechRecordingExternal,
|
||||
externalStopRecording: stopSpeechRecordingExternal,
|
||||
clearText,
|
||||
} = useSpeechToTextExternal(handleTranscriptionComplete);
|
||||
} = useSpeechToTextExternal(setText, onTranscriptionComplete);
|
||||
|
||||
const isListening = externalSpeechToText ? speechIsListeningExternal : speechIsListeningBrowser;
|
||||
const isLoading = externalSpeechToText ? speechIsLoadingExternal : speechIsLoadingBrowser;
|
||||
const speechTextForm = externalSpeechToText ? speechTextExternal : speechTextBrowser;
|
||||
|
||||
const startRecording = externalSpeechToText
|
||||
? startSpeechRecordingExternal
|
||||
: startSpeechRecordingBrowser;
|
||||
const stopRecording = externalSpeechToText
|
||||
? stopSpeechRecordingExternal
|
||||
: stopSpeechRecordingBrowser;
|
||||
const speechText =
|
||||
isListening || (speechTextExternal && speechTextExternal.length > 0)
|
||||
? speechTextExternal
|
||||
: speechTextForm || '';
|
||||
// for a future real-time STT external
|
||||
const interimTranscript = externalSpeechToText ? '' : interimTranscriptBrowser;
|
||||
|
||||
const animateTextTyping = (text: string) => {
|
||||
const totalDuration = 2000;
|
||||
const frameRate = 60;
|
||||
const totalFrames = totalDuration / (1000 / frameRate);
|
||||
const charsPerFrame = Math.ceil(text.length / totalFrames);
|
||||
let currentIndex = 0;
|
||||
|
||||
const animate = () => {
|
||||
currentIndex += charsPerFrame;
|
||||
const currentText = text.substring(0, currentIndex);
|
||||
setAnimatedText(currentText);
|
||||
|
||||
if (currentIndex < text.length) {
|
||||
requestAnimationFrame(animate);
|
||||
} else {
|
||||
setAnimatedText(text);
|
||||
}
|
||||
};
|
||||
|
||||
requestAnimationFrame(animate);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (speechText && externalSpeechToText) {
|
||||
animateTextTyping(speechText);
|
||||
}
|
||||
}, [speechText, externalSpeechToText]);
|
||||
|
||||
return {
|
||||
isListening,
|
||||
isLoading,
|
||||
startRecording,
|
||||
isListening,
|
||||
stopRecording,
|
||||
interimTranscript,
|
||||
speechText: externalSpeechToText ? animatedText : speechText,
|
||||
clearText,
|
||||
startRecording,
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,25 +1,72 @@
|
|||
import { useEffect, useState } from 'react';
|
||||
import { useEffect, useRef, useMemo } from 'react';
|
||||
import { useRecoilState } from 'recoil';
|
||||
import { useToastContext } from '~/Providers';
|
||||
import store from '~/store';
|
||||
import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
|
||||
import useGetAudioSettings from './useGetAudioSettings';
|
||||
import { useToastContext } from '~/Providers';
|
||||
import store from '~/store';
|
||||
|
||||
const useSpeechToTextBrowser = () => {
|
||||
const useSpeechToTextBrowser = (
|
||||
setText: (text: string) => void,
|
||||
onTranscriptionComplete: (text: string) => void,
|
||||
) => {
|
||||
const { showToast } = useToastContext();
|
||||
const [languageSTT] = useRecoilState<string>(store.languageSTT);
|
||||
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
||||
const { speechToTextEndpoint } = useGetAudioSettings();
|
||||
const isBrowserSTTEnabled = speechToTextEndpoint === 'browser';
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
|
||||
const lastTranscript = useRef<string | null>(null);
|
||||
const lastInterim = useRef<string | null>(null);
|
||||
const timeoutRef = useRef<NodeJS.Timeout | null>();
|
||||
const [autoSendText] = useRecoilState(store.autoSendText);
|
||||
const [languageSTT] = useRecoilState<string>(store.languageSTT);
|
||||
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
||||
|
||||
const {
|
||||
interimTranscript,
|
||||
finalTranscript,
|
||||
listening,
|
||||
browserSupportsSpeechRecognition,
|
||||
finalTranscript,
|
||||
resetTranscript,
|
||||
interimTranscript,
|
||||
isMicrophoneAvailable,
|
||||
browserSupportsSpeechRecognition,
|
||||
} = useSpeechRecognition();
|
||||
const isListening = useMemo(() => listening, [listening]);
|
||||
|
||||
useEffect(() => {
|
||||
if (interimTranscript == null || interimTranscript === '') {
|
||||
return;
|
||||
}
|
||||
|
||||
if (lastInterim.current === interimTranscript) {
|
||||
return;
|
||||
}
|
||||
|
||||
setText(interimTranscript);
|
||||
lastInterim.current = interimTranscript;
|
||||
}, [setText, interimTranscript]);
|
||||
|
||||
useEffect(() => {
|
||||
if (finalTranscript == null || finalTranscript === '') {
|
||||
return;
|
||||
}
|
||||
|
||||
if (lastTranscript.current === finalTranscript) {
|
||||
return;
|
||||
}
|
||||
|
||||
setText(finalTranscript);
|
||||
lastTranscript.current = finalTranscript;
|
||||
if (autoSendText > -1 && finalTranscript.length > 0) {
|
||||
timeoutRef.current = setTimeout(() => {
|
||||
onTranscriptionComplete(finalTranscript);
|
||||
resetTranscript();
|
||||
}, autoSendText * 1000);
|
||||
}
|
||||
|
||||
return () => {
|
||||
if (timeoutRef.current) {
|
||||
clearTimeout(timeoutRef.current);
|
||||
}
|
||||
};
|
||||
}, [setText, onTranscriptionComplete, resetTranscript, finalTranscript, autoSendText]);
|
||||
|
||||
const toggleListening = () => {
|
||||
if (!browserSupportsSpeechRecognition) {
|
||||
|
|
@ -38,11 +85,9 @@ const useSpeechToTextBrowser = () => {
|
|||
return;
|
||||
}
|
||||
|
||||
if (listening) {
|
||||
setIsListening(false);
|
||||
if (isListening === true) {
|
||||
SpeechRecognition.stopListening();
|
||||
} else {
|
||||
setIsListening(true);
|
||||
SpeechRecognition.startListening({
|
||||
language: languageSTT,
|
||||
continuous: autoTranscribeAudio,
|
||||
|
|
@ -61,17 +106,9 @@ const useSpeechToTextBrowser = () => {
|
|||
return () => window.removeEventListener('keydown', handleKeyDown);
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (!listening) {
|
||||
setIsListening(false);
|
||||
}
|
||||
}, [listening]);
|
||||
|
||||
return {
|
||||
isListening,
|
||||
isLoading: false,
|
||||
interimTranscript,
|
||||
text: finalTranscript,
|
||||
startRecording: toggleListening,
|
||||
stopRecording: toggleListening,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,27 +1,31 @@
|
|||
import { useState, useEffect, useRef } from 'react';
|
||||
import { useRecoilState } from 'recoil';
|
||||
import { useSpeechToTextMutation } from '~/data-provider';
|
||||
import useGetAudioSettings from './useGetAudioSettings';
|
||||
import { useToastContext } from '~/Providers';
|
||||
import store from '~/store';
|
||||
import useGetAudioSettings from './useGetAudioSettings';
|
||||
|
||||
const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void) => {
|
||||
const useSpeechToTextExternal = (
|
||||
setText: (text: string) => void,
|
||||
onTranscriptionComplete: (text: string) => void,
|
||||
) => {
|
||||
const { showToast } = useToastContext();
|
||||
const { speechToTextEndpoint } = useGetAudioSettings();
|
||||
const isExternalSTTEnabled = speechToTextEndpoint === 'external';
|
||||
const [speechToText] = useRecoilState<boolean>(store.speechToText);
|
||||
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
||||
const [autoSendText] = useRecoilState(store.autoSendText);
|
||||
const [text, setText] = useState<string>('');
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
const audioStream = useRef<MediaStream | null>(null);
|
||||
const animationFrameIdRef = useRef<number | null>(null);
|
||||
const audioContextRef = useRef<AudioContext | null>(null);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
|
||||
const [permission, setPermission] = useState(false);
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
const [audioChunks, setAudioChunks] = useState<Blob[]>([]);
|
||||
const [isRequestBeingMade, setIsRequestBeingMade] = useState(false);
|
||||
|
||||
const [minDecibels] = useRecoilState(store.decibelValue);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const audioStream = useRef<MediaStream | null>(null);
|
||||
const audioContextRef = useRef<AudioContext | null>(null);
|
||||
const animationFrameIdRef = useRef<number | null>(null);
|
||||
const [autoSendText] = useRecoilState(store.autoSendText);
|
||||
const [speechToText] = useRecoilState<boolean>(store.speechToText);
|
||||
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
||||
|
||||
const { mutate: processAudio, isLoading: isProcessing } = useSpeechToTextMutation({
|
||||
onSuccess: (data) => {
|
||||
|
|
@ -54,10 +58,6 @@ const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void
|
|||
}
|
||||
};
|
||||
|
||||
const clearText = () => {
|
||||
setText('');
|
||||
};
|
||||
|
||||
const getMicrophonePermission = async () => {
|
||||
try {
|
||||
const streamData = await navigator.mediaDevices.getUserMedia({
|
||||
|
|
@ -226,11 +226,9 @@ const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void
|
|||
|
||||
return {
|
||||
isListening,
|
||||
isLoading: isProcessing,
|
||||
text,
|
||||
externalStartRecording,
|
||||
externalStopRecording,
|
||||
clearText,
|
||||
externalStartRecording,
|
||||
isLoading: isProcessing,
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue