LibreChat/client/src/hooks/Input/useTextToSpeechExternal.ts
Danny Avila dba704079c
🔀 refactor: Modularize TTS Logic for Improved Browser support (#3657)
* WIP: message audio refactor

* WIP: use MessageAudio by provider

* fix: Update MessageAudio component to use TTSEndpoints enum

* feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging

* feat: Add voice dropdown components for different TTS engines

* docs: update incorrect `voices` example

changed `voice: ''` to `voices: ['alloy']`

* feat: Add brwoser support check for Edge TTS engine component with error toast if not supported

---------

Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
2024-08-15 11:34:25 -04:00

200 lines
6 KiB
TypeScript

import { useRecoilValue } from 'recoil';
import { useState, useMemo, useRef, useCallback, useEffect } from 'react';
import { useTextToSpeechMutation, useVoicesQuery } from '~/data-provider';
import { useToastContext } from '~/Providers/ToastContext';
import useLocalize from '~/hooks/useLocalize';
import store from '~/store';
const createFormData = (text: string, voice: string) => {
const formData = new FormData();
formData.append('input', text);
formData.append('voice', voice);
return formData;
};
type TUseTTSExternal = {
setIsSpeaking: React.Dispatch<React.SetStateAction<boolean>>;
audioRef: React.MutableRefObject<HTMLAudioElement | null>;
messageId?: string;
isLast: boolean;
index?: number;
};
function useTextToSpeechExternal({
setIsSpeaking,
audioRef,
messageId,
isLast,
index = 0,
}: TUseTTSExternal) {
const localize = useLocalize();
const { showToast } = useToastContext();
const voice = useRecoilValue(store.voice);
const cacheTTS = useRecoilValue(store.cacheTTS);
const playbackRate = useRecoilValue(store.playbackRate);
const [downloadFile, setDownloadFile] = useState(false);
const promiseAudioRef = useRef<HTMLAudioElement | null>(null);
/* Global Audio Variables */
const globalIsFetching = useRecoilValue(store.globalAudioFetchingFamily(index));
const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index));
const autoPlayAudio = (blobUrl: string) => {
const newAudio = new Audio(blobUrl);
audioRef.current = newAudio;
};
const playAudioPromise = (blobUrl: string) => {
const newAudio = new Audio(blobUrl);
const initializeAudio = () => {
if (playbackRate != null && playbackRate !== 1 && playbackRate > 0) {
newAudio.playbackRate = playbackRate;
}
};
initializeAudio();
const playPromise = () => newAudio.play().then(() => setIsSpeaking(true));
playPromise().catch((error: Error) => {
if (
error.message &&
error.message.includes('The play() request was interrupted by a call to pause()')
) {
console.log('Play request was interrupted by a call to pause()');
initializeAudio();
return playPromise().catch(console.error);
}
console.error(error);
showToast({ message: localize('com_nav_audio_play_error', error.message), status: 'error' });
});
newAudio.onended = () => {
console.log('Cached message audio ended');
URL.revokeObjectURL(blobUrl);
setIsSpeaking(false);
};
promiseAudioRef.current = newAudio;
};
const downloadAudio = (blobUrl: string) => {
const a = document.createElement('a');
a.href = blobUrl;
a.download = 'audio.mp3';
a.click();
setDownloadFile(false);
};
const { mutate: processAudio, isLoading: isProcessing } = useTextToSpeechMutation({
onMutate: (variables) => {
const inputText = (variables.get('input') ?? '') as string;
if (inputText.length >= 4096) {
showToast({
message: localize('com_nav_long_audio_warning'),
status: 'warning',
});
}
},
onSuccess: async (data: ArrayBuffer, variables) => {
try {
const inputText = (variables.get('input') ?? '') as string;
const audioBlob = new Blob([data], { type: 'audio/mpeg' });
if (cacheTTS && inputText) {
const cache = await caches.open('tts-responses');
const request = new Request(inputText);
const response = new Response(audioBlob);
cache.put(request, response);
}
const blobUrl = URL.createObjectURL(audioBlob);
if (downloadFile) {
downloadAudio(blobUrl);
}
autoPlayAudio(blobUrl);
} catch (error) {
showToast({
message: `Error processing audio: ${(error as Error).message}`,
status: 'error',
});
}
},
onError: (error: unknown) => {
showToast({
message: localize('com_nav_audio_process_error', (error as Error).message),
status: 'error',
});
},
});
const startMutation = (text: string, download: boolean) => {
const formData = createFormData(text, voice ?? '');
setDownloadFile(download);
processAudio(formData);
};
const generateSpeechExternal = (text: string, download: boolean) => {
if (cacheTTS) {
handleCachedResponse(text, download);
} else {
startMutation(text, download);
}
};
const handleCachedResponse = async (text: string, download: boolean) => {
const cachedResponse = await caches.match(text);
if (!cachedResponse) {
return startMutation(text, download);
}
const audioBlob = await cachedResponse.blob();
const blobUrl = URL.createObjectURL(audioBlob);
if (download) {
downloadAudio(blobUrl);
} else {
playAudioPromise(blobUrl);
}
};
const cancelSpeech = () => {
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
const pauseAudio = (currentElement: HTMLAudioElement | null) => {
if (currentElement) {
currentElement.pause();
currentElement.src && URL.revokeObjectURL(currentElement.src);
audioRef.current = null;
}
};
pauseAudio(messageAudio);
pauseAudio(promiseAudioRef.current);
setIsSpeaking(false);
};
const cancelPromiseSpeech = useCallback(() => {
if (promiseAudioRef.current) {
promiseAudioRef.current.pause();
promiseAudioRef.current.src && URL.revokeObjectURL(promiseAudioRef.current.src);
promiseAudioRef.current = null;
setIsSpeaking(false);
}
}, []);
useEffect(() => cancelPromiseSpeech, [cancelPromiseSpeech]);
const isLoading = useMemo(() => {
return isProcessing || (isLast && globalIsFetching && !globalIsPlaying);
}, [isProcessing, globalIsFetching, globalIsPlaying, isLast]);
const { data: voicesData = [] } = useVoicesQuery();
return {
generateSpeechExternal,
cancelSpeech,
isLoading,
audioRef,
voices: voicesData,
};
}
export default useTextToSpeechExternal;