LibreChat/client/src/hooks/Input/useSpeechToTextExternal.ts
Danny Avila e309c6abef
🎯 fix: Prevent UI De-sync By Removing Redundant States (#5333)
* fix: remove local state from Dropdown causing de-sync

* refactor: cleanup STT code, avoid redundant states to prevent de-sync and side effects

* fix: reset transcript after sending final text to prevent data loss

* fix: clear timeout on component unmount to prevent memory leaks
2025-01-16 17:38:59 -05:00

235 lines
7 KiB
TypeScript

import { useState, useEffect, useRef } from 'react';
import { useRecoilState } from 'recoil';
import { useSpeechToTextMutation } from '~/data-provider';
import useGetAudioSettings from './useGetAudioSettings';
import { useToastContext } from '~/Providers';
import store from '~/store';
const useSpeechToTextExternal = (
setText: (text: string) => void,
onTranscriptionComplete: (text: string) => void,
) => {
const { showToast } = useToastContext();
const { speechToTextEndpoint } = useGetAudioSettings();
const isExternalSTTEnabled = speechToTextEndpoint === 'external';
const audioStream = useRef<MediaStream | null>(null);
const animationFrameIdRef = useRef<number | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const [permission, setPermission] = useState(false);
const [isListening, setIsListening] = useState(false);
const [audioChunks, setAudioChunks] = useState<Blob[]>([]);
const [isRequestBeingMade, setIsRequestBeingMade] = useState(false);
const [minDecibels] = useRecoilState(store.decibelValue);
const [autoSendText] = useRecoilState(store.autoSendText);
const [speechToText] = useRecoilState<boolean>(store.speechToText);
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
const { mutate: processAudio, isLoading: isProcessing } = useSpeechToTextMutation({
onSuccess: (data) => {
const extractedText = data.text;
setText(extractedText);
setIsRequestBeingMade(false);
if (autoSendText > -1 && speechToText && extractedText.length > 0) {
setTimeout(() => {
onTranscriptionComplete(extractedText);
}, autoSendText * 1000);
}
},
onError: () => {
showToast({
message: 'An error occurred while processing the audio, maybe the audio was too short',
status: 'error',
});
setIsRequestBeingMade(false);
},
});
const cleanup = () => {
if (mediaRecorderRef.current) {
mediaRecorderRef.current.removeEventListener('dataavailable', (event: BlobEvent) => {
audioChunks.push(event.data);
});
mediaRecorderRef.current.removeEventListener('stop', handleStop);
mediaRecorderRef.current = null;
}
};
const getMicrophonePermission = async () => {
try {
const streamData = await navigator.mediaDevices.getUserMedia({
audio: true,
video: false,
});
setPermission(true);
audioStream.current = streamData ?? null;
} catch (err) {
setPermission(false);
}
};
const handleStop = () => {
if (audioChunks.length > 0) {
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
setAudioChunks([]);
const formData = new FormData();
formData.append('audio', audioBlob, 'audio.wav');
setIsRequestBeingMade(true);
cleanup();
processAudio(formData);
} else {
showToast({ message: 'The audio was too short', status: 'warning' });
}
};
const monitorSilence = (stream: MediaStream, stopRecording: () => void) => {
const audioContext = new AudioContext();
const audioStreamSource = audioContext.createMediaStreamSource(stream);
const analyser = audioContext.createAnalyser();
analyser.minDecibels = minDecibels;
audioStreamSource.connect(analyser);
const bufferLength = analyser.frequencyBinCount;
const domainData = new Uint8Array(bufferLength);
let lastSoundTime = Date.now();
const detectSound = () => {
analyser.getByteFrequencyData(domainData);
const isSoundDetected = domainData.some((value) => value > 0);
if (isSoundDetected) {
lastSoundTime = Date.now();
}
const timeSinceLastSound = Date.now() - lastSoundTime;
const isOverSilenceThreshold = timeSinceLastSound > 3000;
if (isOverSilenceThreshold) {
stopRecording();
return;
}
animationFrameIdRef.current = window.requestAnimationFrame(detectSound);
};
animationFrameIdRef.current = window.requestAnimationFrame(detectSound);
};
const startRecording = async () => {
if (isRequestBeingMade) {
showToast({ message: 'A request is already being made. Please wait.', status: 'warning' });
return;
}
if (!audioStream.current) {
await getMicrophonePermission();
}
if (audioStream.current) {
try {
setAudioChunks([]);
mediaRecorderRef.current = new MediaRecorder(audioStream.current);
mediaRecorderRef.current.addEventListener('dataavailable', (event: BlobEvent) => {
audioChunks.push(event.data);
});
mediaRecorderRef.current.addEventListener('stop', handleStop);
mediaRecorderRef.current.start(100);
if (!audioContextRef.current && autoTranscribeAudio && speechToText) {
monitorSilence(audioStream.current, stopRecording);
}
setIsListening(true);
} catch (error) {
showToast({ message: `Error starting recording: ${error}`, status: 'error' });
}
} else {
showToast({ message: 'Microphone permission not granted', status: 'error' });
}
};
const stopRecording = () => {
if (!mediaRecorderRef.current) {
return;
}
if (mediaRecorderRef.current.state === 'recording') {
mediaRecorderRef.current.stop();
audioStream.current?.getTracks().forEach((track) => track.stop());
audioStream.current = null;
if (animationFrameIdRef.current !== null) {
window.cancelAnimationFrame(animationFrameIdRef.current);
animationFrameIdRef.current = null;
}
setIsListening(false);
} else {
showToast({ message: 'MediaRecorder is not recording', status: 'error' });
}
};
const externalStartRecording = () => {
if (isListening) {
showToast({ message: 'Already listening. Please stop recording first.', status: 'warning' });
return;
}
startRecording();
};
const externalStopRecording = () => {
if (!isListening) {
showToast({
message: 'Not currently recording. Please start recording first.',
status: 'warning',
});
return;
}
stopRecording();
};
const handleKeyDown = async (e: KeyboardEvent) => {
if (e.shiftKey && e.altKey && e.code === 'KeyL' && isExternalSTTEnabled) {
if (!window.MediaRecorder) {
showToast({ message: 'MediaRecorder is not supported in this browser', status: 'error' });
return;
}
if (permission === false) {
await getMicrophonePermission();
}
if (isListening) {
stopRecording();
} else {
startRecording();
}
e.preventDefault();
}
};
useEffect(() => {
window.addEventListener('keydown', handleKeyDown);
return () => {
window.removeEventListener('keydown', handleKeyDown);
};
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [isListening]);
return {
isListening,
externalStopRecording,
externalStartRecording,
isLoading: isProcessing,
};
};
export default useSpeechToTextExternal;