mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-20 02:10:15 +01:00
* WIP: message audio refactor * WIP: use MessageAudio by provider * fix: Update MessageAudio component to use TTSEndpoints enum * feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging * feat: Add voice dropdown components for different TTS engines * docs: update incorrect `voices` example changed `voice: ''` to `voices: ['alloy']` * feat: Add brwoser support check for Edge TTS engine component with error toast if not supported --------- Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
200 lines
6 KiB
TypeScript
200 lines
6 KiB
TypeScript
import { useRecoilValue } from 'recoil';
|
|
import { useState, useMemo, useRef, useCallback, useEffect } from 'react';
|
|
import { useTextToSpeechMutation, useVoicesQuery } from '~/data-provider';
|
|
import { useToastContext } from '~/Providers/ToastContext';
|
|
import useLocalize from '~/hooks/useLocalize';
|
|
import store from '~/store';
|
|
|
|
const createFormData = (text: string, voice: string) => {
|
|
const formData = new FormData();
|
|
formData.append('input', text);
|
|
formData.append('voice', voice);
|
|
return formData;
|
|
};
|
|
|
|
type TUseTTSExternal = {
|
|
setIsSpeaking: React.Dispatch<React.SetStateAction<boolean>>;
|
|
audioRef: React.MutableRefObject<HTMLAudioElement | null>;
|
|
messageId?: string;
|
|
isLast: boolean;
|
|
index?: number;
|
|
};
|
|
|
|
function useTextToSpeechExternal({
|
|
setIsSpeaking,
|
|
audioRef,
|
|
messageId,
|
|
isLast,
|
|
index = 0,
|
|
}: TUseTTSExternal) {
|
|
const localize = useLocalize();
|
|
const { showToast } = useToastContext();
|
|
const voice = useRecoilValue(store.voice);
|
|
const cacheTTS = useRecoilValue(store.cacheTTS);
|
|
const playbackRate = useRecoilValue(store.playbackRate);
|
|
|
|
const [downloadFile, setDownloadFile] = useState(false);
|
|
|
|
const promiseAudioRef = useRef<HTMLAudioElement | null>(null);
|
|
|
|
/* Global Audio Variables */
|
|
const globalIsFetching = useRecoilValue(store.globalAudioFetchingFamily(index));
|
|
const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index));
|
|
|
|
const autoPlayAudio = (blobUrl: string) => {
|
|
const newAudio = new Audio(blobUrl);
|
|
audioRef.current = newAudio;
|
|
};
|
|
|
|
const playAudioPromise = (blobUrl: string) => {
|
|
const newAudio = new Audio(blobUrl);
|
|
const initializeAudio = () => {
|
|
if (playbackRate != null && playbackRate !== 1 && playbackRate > 0) {
|
|
newAudio.playbackRate = playbackRate;
|
|
}
|
|
};
|
|
|
|
initializeAudio();
|
|
const playPromise = () => newAudio.play().then(() => setIsSpeaking(true));
|
|
|
|
playPromise().catch((error: Error) => {
|
|
if (
|
|
error.message &&
|
|
error.message.includes('The play() request was interrupted by a call to pause()')
|
|
) {
|
|
console.log('Play request was interrupted by a call to pause()');
|
|
initializeAudio();
|
|
return playPromise().catch(console.error);
|
|
}
|
|
console.error(error);
|
|
showToast({ message: localize('com_nav_audio_play_error', error.message), status: 'error' });
|
|
});
|
|
|
|
newAudio.onended = () => {
|
|
console.log('Cached message audio ended');
|
|
URL.revokeObjectURL(blobUrl);
|
|
setIsSpeaking(false);
|
|
};
|
|
|
|
promiseAudioRef.current = newAudio;
|
|
};
|
|
|
|
const downloadAudio = (blobUrl: string) => {
|
|
const a = document.createElement('a');
|
|
a.href = blobUrl;
|
|
a.download = 'audio.mp3';
|
|
a.click();
|
|
setDownloadFile(false);
|
|
};
|
|
|
|
const { mutate: processAudio, isLoading: isProcessing } = useTextToSpeechMutation({
|
|
onMutate: (variables) => {
|
|
const inputText = (variables.get('input') ?? '') as string;
|
|
if (inputText.length >= 4096) {
|
|
showToast({
|
|
message: localize('com_nav_long_audio_warning'),
|
|
status: 'warning',
|
|
});
|
|
}
|
|
},
|
|
onSuccess: async (data: ArrayBuffer, variables) => {
|
|
try {
|
|
const inputText = (variables.get('input') ?? '') as string;
|
|
const audioBlob = new Blob([data], { type: 'audio/mpeg' });
|
|
|
|
if (cacheTTS && inputText) {
|
|
const cache = await caches.open('tts-responses');
|
|
const request = new Request(inputText);
|
|
const response = new Response(audioBlob);
|
|
cache.put(request, response);
|
|
}
|
|
|
|
const blobUrl = URL.createObjectURL(audioBlob);
|
|
if (downloadFile) {
|
|
downloadAudio(blobUrl);
|
|
}
|
|
autoPlayAudio(blobUrl);
|
|
} catch (error) {
|
|
showToast({
|
|
message: `Error processing audio: ${(error as Error).message}`,
|
|
status: 'error',
|
|
});
|
|
}
|
|
},
|
|
onError: (error: unknown) => {
|
|
showToast({
|
|
message: localize('com_nav_audio_process_error', (error as Error).message),
|
|
status: 'error',
|
|
});
|
|
},
|
|
});
|
|
|
|
const startMutation = (text: string, download: boolean) => {
|
|
const formData = createFormData(text, voice ?? '');
|
|
setDownloadFile(download);
|
|
processAudio(formData);
|
|
};
|
|
|
|
const generateSpeechExternal = (text: string, download: boolean) => {
|
|
if (cacheTTS) {
|
|
handleCachedResponse(text, download);
|
|
} else {
|
|
startMutation(text, download);
|
|
}
|
|
};
|
|
|
|
const handleCachedResponse = async (text: string, download: boolean) => {
|
|
const cachedResponse = await caches.match(text);
|
|
if (!cachedResponse) {
|
|
return startMutation(text, download);
|
|
}
|
|
const audioBlob = await cachedResponse.blob();
|
|
const blobUrl = URL.createObjectURL(audioBlob);
|
|
if (download) {
|
|
downloadAudio(blobUrl);
|
|
} else {
|
|
playAudioPromise(blobUrl);
|
|
}
|
|
};
|
|
|
|
const cancelSpeech = () => {
|
|
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
|
|
const pauseAudio = (currentElement: HTMLAudioElement | null) => {
|
|
if (currentElement) {
|
|
currentElement.pause();
|
|
currentElement.src && URL.revokeObjectURL(currentElement.src);
|
|
audioRef.current = null;
|
|
}
|
|
};
|
|
pauseAudio(messageAudio);
|
|
pauseAudio(promiseAudioRef.current);
|
|
setIsSpeaking(false);
|
|
};
|
|
|
|
const cancelPromiseSpeech = useCallback(() => {
|
|
if (promiseAudioRef.current) {
|
|
promiseAudioRef.current.pause();
|
|
promiseAudioRef.current.src && URL.revokeObjectURL(promiseAudioRef.current.src);
|
|
promiseAudioRef.current = null;
|
|
setIsSpeaking(false);
|
|
}
|
|
}, []);
|
|
|
|
useEffect(() => cancelPromiseSpeech, [cancelPromiseSpeech]);
|
|
|
|
const isLoading = useMemo(() => {
|
|
return isProcessing || (isLast && globalIsFetching && !globalIsPlaying);
|
|
}, [isProcessing, globalIsFetching, globalIsPlaying, isLast]);
|
|
|
|
const { data: voicesData = [] } = useVoicesQuery();
|
|
|
|
return {
|
|
generateSpeechExternal,
|
|
cancelSpeech,
|
|
isLoading,
|
|
audioRef,
|
|
voices: voicesData,
|
|
};
|
|
}
|
|
|
|
export default useTextToSpeechExternal;
|