mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-19 18:00:15 +01:00
⏯️ fix(tts): Resolve Voice Selection and Manual Playback Issues (#2845)
* fix: voice setting for autoplayback TTS * fix(useTextToSpeechExternal): resolve stateful playback issues and consolidate state logic * refactor: initialize tts voice and provider schema once per request * fix(tts): edge case, longer text inputs. TODO: use continuous stream for longer text inputs * fix(tts): pause global audio on conversation change * refactor: keyvMongo ban cache to allow db updates for unbanning, to prevent server restart * chore: eslint fix * refactor: make ban cache exclusively keyvMongo
This commit is contained in:
parent
8e66683577
commit
514a502b9c
10 changed files with 332 additions and 178 deletions
|
|
@ -1,6 +1,7 @@
|
|||
import { useRecoilValue } from 'recoil';
|
||||
import { useCallback, useEffect, useState, useMemo } from 'react';
|
||||
import { useCallback, useEffect, useState, useMemo, useRef } from 'react';
|
||||
import { useTextToSpeechMutation } from '~/data-provider';
|
||||
import useLocalize from '~/hooks/useLocalize';
|
||||
import { useToastContext } from '~/Providers';
|
||||
import store from '~/store';
|
||||
|
||||
|
|
@ -12,16 +13,16 @@ const createFormData = (text: string, voice: string) => {
|
|||
};
|
||||
|
||||
function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
||||
const localize = useLocalize();
|
||||
const { showToast } = useToastContext();
|
||||
const voice = useRecoilValue(store.voice);
|
||||
const cacheTTS = useRecoilValue(store.cacheTTS);
|
||||
const playbackRate = useRecoilValue(store.playbackRate);
|
||||
|
||||
const [text, setText] = useState<string | null>(null);
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
|
||||
const [downloadFile, setDownloadFile] = useState(false);
|
||||
const [isLocalSpeaking, setIsSpeaking] = useState(false);
|
||||
const [blobUrl, setBlobUrl] = useState<string | null>(null);
|
||||
const [audio, setAudio] = useState<HTMLAudioElement | null>(null);
|
||||
|
||||
/* Global Audio Variables */
|
||||
const globalIsFetching = useRecoilValue(store.globalAudioFetchingFamily(index));
|
||||
|
|
@ -29,10 +30,13 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
|||
|
||||
const playAudio = (blobUrl: string) => {
|
||||
const newAudio = new Audio(blobUrl);
|
||||
if (playbackRate && playbackRate !== 1) {
|
||||
newAudio.playbackRate = playbackRate;
|
||||
}
|
||||
const initializeAudio = () => {
|
||||
if (playbackRate && playbackRate !== 1) {
|
||||
newAudio.playbackRate = playbackRate;
|
||||
}
|
||||
};
|
||||
|
||||
initializeAudio();
|
||||
const playPromise = () => newAudio.play().then(() => setIsSpeaking(true));
|
||||
|
||||
playPromise().catch((error: Error) => {
|
||||
|
|
@ -40,10 +44,12 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
|||
error?.message &&
|
||||
error.message.includes('The play() request was interrupted by a call to pause()')
|
||||
) {
|
||||
console.log('Play request was interrupted by a call to pause()');
|
||||
initializeAudio();
|
||||
return playPromise().catch(console.error);
|
||||
}
|
||||
console.error(error);
|
||||
showToast({ message: `Error playing audio: ${error.message}`, status: 'error' });
|
||||
showToast({ message: localize('com_nav_audio_play_error', error.message), status: 'error' });
|
||||
});
|
||||
|
||||
newAudio.onended = () => {
|
||||
|
|
@ -52,8 +58,7 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
|||
setIsSpeaking(false);
|
||||
};
|
||||
|
||||
setAudio(newAudio);
|
||||
setBlobUrl(blobUrl);
|
||||
audioRef.current = newAudio;
|
||||
};
|
||||
|
||||
const downloadAudio = (blobUrl: string) => {
|
||||
|
|
@ -65,35 +70,32 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
|||
};
|
||||
|
||||
const { mutate: processAudio, isLoading: isProcessing } = useTextToSpeechMutation({
|
||||
onSuccess: async (data: ArrayBuffer) => {
|
||||
onMutate: (variables) => {
|
||||
const inputText = (variables.get('input') ?? '') as string;
|
||||
if (inputText.length >= 4096) {
|
||||
showToast({
|
||||
message: localize('com_nav_long_audio_warning'),
|
||||
status: 'warning',
|
||||
});
|
||||
}
|
||||
},
|
||||
onSuccess: async (data: ArrayBuffer, variables) => {
|
||||
try {
|
||||
const mediaSource = new MediaSource();
|
||||
const audio = new Audio();
|
||||
audio.src = URL.createObjectURL(mediaSource);
|
||||
audio.autoplay = true;
|
||||
const inputText = (variables.get('input') ?? '') as string;
|
||||
const audioBlob = new Blob([data], { type: 'audio/mpeg' });
|
||||
|
||||
mediaSource.onsourceopen = () => {
|
||||
const sourceBuffer = mediaSource.addSourceBuffer('audio/mpeg');
|
||||
sourceBuffer.appendBuffer(data);
|
||||
};
|
||||
|
||||
audio.onended = () => {
|
||||
URL.revokeObjectURL(audio.src);
|
||||
setIsSpeaking(false);
|
||||
};
|
||||
|
||||
setAudio(audio);
|
||||
|
||||
if (cacheTTS) {
|
||||
if (cacheTTS && inputText) {
|
||||
const cache = await caches.open('tts-responses');
|
||||
const request = new Request(text!);
|
||||
const response = new Response(new Blob([data], { type: 'audio/mpeg' }));
|
||||
const request = new Request(inputText!);
|
||||
const response = new Response(audioBlob);
|
||||
cache.put(request, response);
|
||||
}
|
||||
|
||||
const blobUrl = URL.createObjectURL(audioBlob);
|
||||
if (downloadFile) {
|
||||
downloadAudio(audio.src);
|
||||
downloadAudio(blobUrl);
|
||||
}
|
||||
playAudio(blobUrl);
|
||||
} catch (error) {
|
||||
showToast({
|
||||
message: `Error processing audio: ${(error as Error).message}`,
|
||||
|
|
@ -102,13 +104,15 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
|||
}
|
||||
},
|
||||
onError: (error: unknown) => {
|
||||
showToast({ message: `Error: ${(error as Error).message}`, status: 'error' });
|
||||
showToast({
|
||||
message: localize('com_nav_audio_process_error', (error as Error).message),
|
||||
status: 'error',
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
const generateSpeechExternal = async (text: string, download: boolean) => {
|
||||
setText(text);
|
||||
const cachedResponse = await getCachedResponse(text);
|
||||
const cachedResponse = await caches.match(text);
|
||||
|
||||
if (cachedResponse && cacheTTS) {
|
||||
handleCachedResponse(cachedResponse, download);
|
||||
|
|
@ -119,8 +123,6 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
|||
}
|
||||
};
|
||||
|
||||
const getCachedResponse = async (text: string) => await caches.match(text);
|
||||
|
||||
const handleCachedResponse = async (cachedResponse: Response, download: boolean) => {
|
||||
const audioBlob = await cachedResponse.blob();
|
||||
const blobUrl = URL.createObjectURL(audioBlob);
|
||||
|
|
@ -132,12 +134,13 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
|||
};
|
||||
|
||||
const cancelSpeech = useCallback(() => {
|
||||
if (audio) {
|
||||
audio.pause();
|
||||
blobUrl && URL.revokeObjectURL(blobUrl);
|
||||
if (audioRef.current) {
|
||||
audioRef.current.pause();
|
||||
audioRef.current.src && URL.revokeObjectURL(audioRef.current.src);
|
||||
audioRef.current = null;
|
||||
setIsSpeaking(false);
|
||||
}
|
||||
}, [audio, blobUrl]);
|
||||
}, []);
|
||||
|
||||
useEffect(() => cancelSpeech, [cancelSpeech]);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue