⏯️ fix(tts): Resolve Voice Selection and Manual Playback Issues (#2845)

* fix: voice setting for autoplayback TTS

* fix(useTextToSpeechExternal): resolve stateful playback issues and consolidate state logic

* refactor: initialize tts voice and provider schema once per request

* fix(tts): edge case, longer text inputs. TODO: use continuous stream for longer text inputs

* fix(tts): pause global audio on conversation change

* refactor: keyvMongo ban cache to allow db updates for unbanning, to prevent server restart

* chore: eslint fix

* refactor: make ban cache exclusively keyvMongo
This commit is contained in:
Danny Avila 2024-05-23 16:27:36 -04:00 committed by GitHub
parent 8e66683577
commit 514a502b9c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 332 additions and 178 deletions

View file

@ -1,6 +1,7 @@
import { useRecoilValue } from 'recoil';
import { useCallback, useEffect, useState, useMemo } from 'react';
import { useCallback, useEffect, useState, useMemo, useRef } from 'react';
import { useTextToSpeechMutation } from '~/data-provider';
import useLocalize from '~/hooks/useLocalize';
import { useToastContext } from '~/Providers';
import store from '~/store';
@ -12,16 +13,16 @@ const createFormData = (text: string, voice: string) => {
};
function useTextToSpeechExternal(isLast: boolean, index = 0) {
const localize = useLocalize();
const { showToast } = useToastContext();
const voice = useRecoilValue(store.voice);
const cacheTTS = useRecoilValue(store.cacheTTS);
const playbackRate = useRecoilValue(store.playbackRate);
const [text, setText] = useState<string | null>(null);
const audioRef = useRef<HTMLAudioElement | null>(null);
const [downloadFile, setDownloadFile] = useState(false);
const [isLocalSpeaking, setIsSpeaking] = useState(false);
const [blobUrl, setBlobUrl] = useState<string | null>(null);
const [audio, setAudio] = useState<HTMLAudioElement | null>(null);
/* Global Audio Variables */
const globalIsFetching = useRecoilValue(store.globalAudioFetchingFamily(index));
@ -29,10 +30,13 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
const playAudio = (blobUrl: string) => {
const newAudio = new Audio(blobUrl);
if (playbackRate && playbackRate !== 1) {
newAudio.playbackRate = playbackRate;
}
const initializeAudio = () => {
if (playbackRate && playbackRate !== 1) {
newAudio.playbackRate = playbackRate;
}
};
initializeAudio();
const playPromise = () => newAudio.play().then(() => setIsSpeaking(true));
playPromise().catch((error: Error) => {
@ -40,10 +44,12 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
error?.message &&
error.message.includes('The play() request was interrupted by a call to pause()')
) {
console.log('Play request was interrupted by a call to pause()');
initializeAudio();
return playPromise().catch(console.error);
}
console.error(error);
showToast({ message: `Error playing audio: ${error.message}`, status: 'error' });
showToast({ message: localize('com_nav_audio_play_error', error.message), status: 'error' });
});
newAudio.onended = () => {
@ -52,8 +58,7 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
setIsSpeaking(false);
};
setAudio(newAudio);
setBlobUrl(blobUrl);
audioRef.current = newAudio;
};
const downloadAudio = (blobUrl: string) => {
@ -65,35 +70,32 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
};
const { mutate: processAudio, isLoading: isProcessing } = useTextToSpeechMutation({
onSuccess: async (data: ArrayBuffer) => {
onMutate: (variables) => {
const inputText = (variables.get('input') ?? '') as string;
if (inputText.length >= 4096) {
showToast({
message: localize('com_nav_long_audio_warning'),
status: 'warning',
});
}
},
onSuccess: async (data: ArrayBuffer, variables) => {
try {
const mediaSource = new MediaSource();
const audio = new Audio();
audio.src = URL.createObjectURL(mediaSource);
audio.autoplay = true;
const inputText = (variables.get('input') ?? '') as string;
const audioBlob = new Blob([data], { type: 'audio/mpeg' });
mediaSource.onsourceopen = () => {
const sourceBuffer = mediaSource.addSourceBuffer('audio/mpeg');
sourceBuffer.appendBuffer(data);
};
audio.onended = () => {
URL.revokeObjectURL(audio.src);
setIsSpeaking(false);
};
setAudio(audio);
if (cacheTTS) {
if (cacheTTS && inputText) {
const cache = await caches.open('tts-responses');
const request = new Request(text!);
const response = new Response(new Blob([data], { type: 'audio/mpeg' }));
const request = new Request(inputText!);
const response = new Response(audioBlob);
cache.put(request, response);
}
const blobUrl = URL.createObjectURL(audioBlob);
if (downloadFile) {
downloadAudio(audio.src);
downloadAudio(blobUrl);
}
playAudio(blobUrl);
} catch (error) {
showToast({
message: `Error processing audio: ${(error as Error).message}`,
@ -102,13 +104,15 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
}
},
onError: (error: unknown) => {
showToast({ message: `Error: ${(error as Error).message}`, status: 'error' });
showToast({
message: localize('com_nav_audio_process_error', (error as Error).message),
status: 'error',
});
},
});
const generateSpeechExternal = async (text: string, download: boolean) => {
setText(text);
const cachedResponse = await getCachedResponse(text);
const cachedResponse = await caches.match(text);
if (cachedResponse && cacheTTS) {
handleCachedResponse(cachedResponse, download);
@ -119,8 +123,6 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
}
};
const getCachedResponse = async (text: string) => await caches.match(text);
const handleCachedResponse = async (cachedResponse: Response, download: boolean) => {
const audioBlob = await cachedResponse.blob();
const blobUrl = URL.createObjectURL(audioBlob);
@ -132,12 +134,13 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
};
const cancelSpeech = useCallback(() => {
if (audio) {
audio.pause();
blobUrl && URL.revokeObjectURL(blobUrl);
if (audioRef.current) {
audioRef.current.pause();
audioRef.current.src && URL.revokeObjectURL(audioRef.current.src);
audioRef.current = null;
setIsSpeaking(false);
}
}, [audio, blobUrl]);
}, []);
useEffect(() => cancelSpeech, [cancelSpeech]);