🗣️ feat: Edge TTS engine (#3358)

* feat: MS Edge TTS

* feat: Edge TTS; fix: STT hook
This commit is contained in:
Marco Beretta 2024-08-07 20:15:41 +02:00 committed by GitHub
parent 01a88991ab
commit b390ba781f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 379 additions and 129 deletions

View file

@ -3,30 +3,67 @@ import { parseTextParts } from 'librechat-data-provider';
import type { TMessage } from 'librechat-data-provider';
import useTextToSpeechExternal from './useTextToSpeechExternal';
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
import { usePauseGlobalAudio } from '../Audio';
import useGetAudioSettings from './useGetAudioSettings';
import useTextToSpeechEdge from './useTextToSpeechEdge';
import { usePauseGlobalAudio } from '../Audio';
const useTextToSpeech = (message: TMessage, isLast: boolean, index = 0) => {
const { externalTextToSpeech } = useGetAudioSettings();
const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
const { textToSpeechEndpoint } = useGetAudioSettings();
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
const audioRef = useRef<HTMLAudioElement | null>(null);
const {
generateSpeechLocal: generateSpeechLocal,
cancelSpeechLocal: cancelSpeechLocal,
generateSpeechLocal,
cancelSpeechLocal,
isSpeaking: isSpeakingLocal,
voices: voicesLocal,
} = useTextToSpeechBrowser();
const {
generateSpeechExternal: generateSpeechExternal,
generateSpeechEdge,
cancelSpeechEdge,
isSpeaking: isSpeakingEdge,
voices: voicesEdge,
} = useTextToSpeechEdge();
const {
generateSpeechExternal,
cancelSpeech: cancelSpeechExternal,
isSpeaking: isSpeakingExternal,
isLoading: isLoading,
audioRef,
} = useTextToSpeechExternal(message.messageId, isLast, index);
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
isLoading: isLoadingExternal,
audioRef: audioRefExternal,
voices: voicesExternal,
} = useTextToSpeechExternal(message?.messageId || '', isLast, index);
const generateSpeech = externalTextToSpeech ? generateSpeechExternal : generateSpeechLocal;
const cancelSpeech = externalTextToSpeech ? cancelSpeechExternal : cancelSpeechLocal;
const isSpeaking = externalTextToSpeech ? isSpeakingExternal : isSpeakingLocal;
let generateSpeech, cancelSpeech, isSpeaking, isLoading, voices;
switch (textToSpeechEndpoint) {
case 'external':
generateSpeech = generateSpeechExternal;
cancelSpeech = cancelSpeechExternal;
isSpeaking = isSpeakingExternal;
isLoading = isLoadingExternal;
if (audioRefExternal) {
audioRef.current = audioRefExternal.current;
}
voices = voicesExternal;
break;
case 'edge':
generateSpeech = generateSpeechEdge;
cancelSpeech = cancelSpeechEdge;
isSpeaking = isSpeakingEdge;
isLoading = false;
voices = voicesEdge;
break;
case 'browser':
default:
generateSpeech = generateSpeechLocal;
cancelSpeech = cancelSpeechLocal;
isSpeaking = isSpeakingLocal;
isLoading = false;
voices = voicesLocal;
break;
}
const isMouseDownRef = useRef(false);
const timerRef = useRef<number | undefined>(undefined);
@ -52,7 +89,6 @@ const useTextToSpeech = (message: TMessage, isLast: boolean, index = 0) => {
const toggleSpeech = () => {
if (isSpeaking) {
console.log('canceling message audio speech');
cancelSpeech();
pauseGlobalAudio();
} else {
@ -69,6 +105,7 @@ const useTextToSpeech = (message: TMessage, isLast: boolean, index = 0) => {
toggleSpeech,
isSpeaking,
isLoading,
voices,
audioRef,
};
};