🔊 fix(tts): NotAllowedError (mobile/safari), Unsupported MediaSource type (firefox), Hide Audio Element (#2854)

* fix: hide audio element on mobile

* chore: add tts docs link

* fix: select voice option on first render

* fix: NotAllowedError, prevent async playback for mobile triggers, consolidate MessageAudio code, user user-triggered unmutes

* fix: Firefox/unsupported type for MediaSource hack

* refactor(STT): make icon red when recording. consolidate logic to AudioRecorder component

* fix: revert Redis changes to use separate client for sessions
This commit is contained in:
Danny Avila 2024-05-24 12:18:11 -04:00 committed by GitHub
parent dcd2e3e62d
commit 35ba4ba1a4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 421 additions and 130 deletions

View file

@ -0,0 +1,48 @@
import { useEffect, useRef } from 'react';
export default function useCustomAudioRef({
setIsPlaying,
}: {
setIsPlaying: (isPlaying: boolean) => void;
}) {
const audioRef = useRef<HTMLAudioElement | null>(null);
useEffect(() => {
const handleEnded = () => {
setIsPlaying(false);
console.log('message audio ended');
if (audioRef.current) {
URL.revokeObjectURL(audioRef.current.src);
}
};
const handleStart = () => {
setIsPlaying(true);
console.log('message audio started');
};
const handlePause = () => {
setIsPlaying(false);
console.log('message audio paused');
};
const audioElement = audioRef.current;
if (audioRef.current) {
audioRef.current.muted = true;
audioRef.current.addEventListener('ended', handleEnded);
audioRef.current.addEventListener('play', handleStart);
audioRef.current.addEventListener('pause', handlePause);
}
return () => {
if (audioElement) {
audioElement.removeEventListener('ended', handleEnded);
audioElement.removeEventListener('play', handleStart);
audioElement.removeEventListener('pause', handlePause);
URL.revokeObjectURL(audioElement.src);
}
};
}, [setIsPlaying]);
return { audioRef };
}

View file

@ -1,13 +1,13 @@
import { useRef } from 'react';
import { parseTextParts } from 'librechat-data-provider';
import type { TMessageContentParts } from 'librechat-data-provider';
import type { TMessage } from 'librechat-data-provider';
import useTextToSpeechExternal from './useTextToSpeechExternal';
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
import { usePauseGlobalAudio } from '../Audio';
import { useRecoilState } from 'recoil';
import store from '~/store';
const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boolean, index = 0) => {
const useTextToSpeech = (message: TMessage, isLast: boolean, index = 0) => {
const [endpointTTS] = useRecoilState<string>(store.endpointTTS);
const useExternalTextToSpeech = endpointTTS === 'external';
@ -22,7 +22,8 @@ const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boole
cancelSpeech: cancelSpeechExternal,
isSpeaking: isSpeakingExternal,
isLoading: isLoading,
} = useTextToSpeechExternal(isLast, index);
audioRef,
} = useTextToSpeechExternal(message.messageId, isLast, index);
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
const generateSpeech = useExternalTextToSpeech ? generateSpeechExternal : generateSpeechLocal;
@ -36,8 +37,10 @@ const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boole
isMouseDownRef.current = true;
timerRef.current = window.setTimeout(() => {
if (isMouseDownRef.current) {
const parsedMessage = typeof message === 'string' ? message : parseTextParts(message);
generateSpeech(parsedMessage, true);
const messageContent = message?.content ?? message?.text ?? '';
const parsedMessage =
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
generateSpeech(parsedMessage, false);
}
}, 1000);
};
@ -51,10 +54,13 @@ const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boole
const toggleSpeech = () => {
if (isSpeaking) {
console.log('canceling message audio speech');
cancelSpeech();
pauseGlobalAudio();
} else {
const parsedMessage = typeof message === 'string' ? message : parseTextParts(message);
const messageContent = message?.content ?? message?.text ?? '';
const parsedMessage =
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
generateSpeech(parsedMessage, false);
}
};
@ -65,6 +71,7 @@ const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boole
toggleSpeech,
isSpeaking,
isLoading,
audioRef,
};
};

View file

@ -1,6 +1,7 @@
import { useRecoilValue } from 'recoil';
import { useCallback, useEffect, useState, useMemo, useRef } from 'react';
import { useState, useMemo, useRef, useCallback, useEffect } from 'react';
import { useTextToSpeechMutation } from '~/data-provider';
import useAudioRef from '~/hooks/Audio/useAudioRef';
import useLocalize from '~/hooks/useLocalize';
import { useToastContext } from '~/Providers';
import store from '~/store';
@ -12,23 +13,28 @@ const createFormData = (text: string, voice: string) => {
return formData;
};
function useTextToSpeechExternal(isLast: boolean, index = 0) {
function useTextToSpeechExternal(messageId: string, isLast: boolean, index = 0) {
const localize = useLocalize();
const { showToast } = useToastContext();
const voice = useRecoilValue(store.voice);
const cacheTTS = useRecoilValue(store.cacheTTS);
const playbackRate = useRecoilValue(store.playbackRate);
const audioRef = useRef<HTMLAudioElement | null>(null);
const [downloadFile, setDownloadFile] = useState(false);
const [isLocalSpeaking, setIsSpeaking] = useState(false);
const { audioRef } = useAudioRef({ setIsPlaying: setIsSpeaking });
const promiseAudioRef = useRef<HTMLAudioElement | null>(null);
/* Global Audio Variables */
const globalIsFetching = useRecoilValue(store.globalAudioFetchingFamily(index));
const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index));
const playAudio = (blobUrl: string) => {
const autoPlayAudio = (blobUrl: string) => {
const newAudio = new Audio(blobUrl);
audioRef.current = newAudio;
};
const playAudioPromise = (blobUrl: string) => {
const newAudio = new Audio(blobUrl);
const initializeAudio = () => {
if (playbackRate && playbackRate !== 1) {
@ -53,12 +59,12 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
});
newAudio.onended = () => {
console.log('Target message audio ended');
console.log('Cached message audio ended');
URL.revokeObjectURL(blobUrl);
setIsSpeaking(false);
};
audioRef.current = newAudio;
promiseAudioRef.current = newAudio;
};
const downloadAudio = (blobUrl: string) => {
@ -95,7 +101,7 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
if (downloadFile) {
downloadAudio(blobUrl);
}
playAudio(blobUrl);
autoPlayAudio(blobUrl);
} catch (error) {
showToast({
message: `Error processing audio: ${(error as Error).message}`,
@ -111,38 +117,58 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
},
});
const generateSpeechExternal = async (text: string, download: boolean) => {
const cachedResponse = await caches.match(text);
const startMutation = (text: string, download: boolean) => {
const formData = createFormData(text, voice);
setDownloadFile(download);
processAudio(formData);
};
if (cachedResponse && cacheTTS) {
handleCachedResponse(cachedResponse, download);
const generateSpeechExternal = (text: string, download: boolean) => {
if (cacheTTS) {
handleCachedResponse(text, download);
} else {
const formData = createFormData(text, voice);
setDownloadFile(download);
processAudio(formData);
startMutation(text, download);
}
};
const handleCachedResponse = async (cachedResponse: Response, download: boolean) => {
const handleCachedResponse = async (text: string, download: boolean) => {
const cachedResponse = await caches.match(text);
if (!cachedResponse) {
return startMutation(text, download);
}
const audioBlob = await cachedResponse.blob();
const blobUrl = URL.createObjectURL(audioBlob);
if (download) {
downloadAudio(blobUrl);
} else {
playAudio(blobUrl);
playAudioPromise(blobUrl);
}
};
const cancelSpeech = useCallback(() => {
if (audioRef.current) {
audioRef.current.pause();
audioRef.current.src && URL.revokeObjectURL(audioRef.current.src);
audioRef.current = null;
const cancelSpeech = () => {
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
const pauseAudio = (currentElement: HTMLAudioElement | null) => {
if (currentElement) {
currentElement.pause();
currentElement.src && URL.revokeObjectURL(currentElement.src);
audioRef.current = null;
}
};
pauseAudio(messageAudio);
pauseAudio(promiseAudioRef.current);
setIsSpeaking(false);
};
const cancelPromiseSpeech = useCallback(() => {
if (promiseAudioRef.current) {
promiseAudioRef.current.pause();
promiseAudioRef.current.src && URL.revokeObjectURL(promiseAudioRef.current.src);
promiseAudioRef.current = null;
setIsSpeaking(false);
}
}, []);
useEffect(() => cancelSpeech, [cancelSpeech]);
useEffect(() => cancelPromiseSpeech, [cancelPromiseSpeech]);
const isLoading = useMemo(() => {
return isProcessing || (isLast && globalIsFetching && !globalIsPlaying);
@ -152,7 +178,7 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
return isLocalSpeaking || (isLast && globalIsPlaying);
}, [isLocalSpeaking, globalIsPlaying, isLast]);
return { generateSpeechExternal, cancelSpeech, isLoading, isSpeaking };
return { generateSpeechExternal, cancelSpeech, isLoading, isSpeaking, audioRef };
}
export default useTextToSpeechExternal;

View file

@ -10,6 +10,7 @@ import useGetSender from '~/hooks/Conversations/useGetSender';
import useFileHandling from '~/hooks/Files/useFileHandling';
import { useChatContext } from '~/Providers/ChatContext';
import useLocalize from '~/hooks/useLocalize';
import { globalAudioId } from '~/common';
import store from '~/store';
type KeyEvent = KeyboardEvent<HTMLTextAreaElement>;
@ -178,6 +179,11 @@ export default function useTextarea({
}
if (isNonShiftEnter && !isComposing?.current) {
const globalAudio = document.getElementById(globalAudioId) as HTMLAudioElement;
if (globalAudio) {
console.log('Unmuting global audio');
globalAudio.muted = false;
}
submitButtonRef.current?.click();
}
},