mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-20 18:30:15 +01:00
🔀 refactor: Modularize TTS Logic for Improved Browser support (#3657)
* WIP: message audio refactor * WIP: use MessageAudio by provider * fix: Update MessageAudio component to use TTSEndpoints enum * feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging * feat: Add voice dropdown components for different TTS engines * docs: update incorrect `voices` example changed `voice: ''` to `voices: ['alloy']` * feat: Add brwoser support check for Edge TTS engine component with error toast if not supported --------- Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
This commit is contained in:
parent
bcde0beb47
commit
dba704079c
18 changed files with 784 additions and 187 deletions
100
client/src/hooks/Audio/useTTSBrowser.ts
Normal file
100
client/src/hooks/Audio/useTTSBrowser.ts
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
// client/src/hooks/Audio/useTTSBrowser.ts
|
||||
import { useRef, useEffect, useState } from 'react';
|
||||
import { useRecoilState, useRecoilValue } from 'recoil';
|
||||
import { parseTextParts } from 'librechat-data-provider';
|
||||
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||
import useTextToSpeechBrowser from '~/hooks/Input/useTextToSpeechBrowser';
|
||||
import usePauseGlobalAudio from '~/hooks/Audio/usePauseGlobalAudio';
|
||||
import useAudioRef from '~/hooks/Audio/useAudioRef';
|
||||
import { logger } from '~/utils';
|
||||
import store from '~/store';
|
||||
|
||||
type TUseTextToSpeech = {
|
||||
messageId?: string;
|
||||
content?: TMessageContentParts[] | string;
|
||||
isLast?: boolean;
|
||||
index?: number;
|
||||
};
|
||||
|
||||
const useTTSBrowser = (props?: TUseTextToSpeech) => {
|
||||
const { content, isLast = false, index = 0 } = props ?? {};
|
||||
|
||||
const isMouseDownRef = useRef(false);
|
||||
const timerRef = useRef<number | undefined>(undefined);
|
||||
const [isSpeakingState, setIsSpeaking] = useState(false);
|
||||
const { audioRef } = useAudioRef({ setIsPlaying: setIsSpeaking });
|
||||
|
||||
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
||||
const [voice, setVoice] = useRecoilState(store.voice);
|
||||
const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index));
|
||||
|
||||
const isSpeaking = isSpeakingState || (isLast && globalIsPlaying);
|
||||
|
||||
const {
|
||||
generateSpeechLocal: generateSpeech,
|
||||
cancelSpeechLocal: cancelSpeech,
|
||||
voices,
|
||||
} = useTextToSpeechBrowser({ setIsSpeaking });
|
||||
|
||||
useEffect(() => {
|
||||
const firstVoice = voices[0];
|
||||
if (voices.length && typeof firstVoice === 'object') {
|
||||
const lastSelectedVoice = voices.find((v) =>
|
||||
typeof v === 'object' ? v.value === voice : v === voice,
|
||||
);
|
||||
if (lastSelectedVoice != null) {
|
||||
const currentVoice =
|
||||
typeof lastSelectedVoice === 'object' ? lastSelectedVoice.value : lastSelectedVoice;
|
||||
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: currentVoice });
|
||||
setVoice(currentVoice);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: firstVoice.value });
|
||||
setVoice(firstVoice.value);
|
||||
}
|
||||
}, [setVoice, voice, voices]);
|
||||
|
||||
const handleMouseDown = () => {
|
||||
isMouseDownRef.current = true;
|
||||
timerRef.current = window.setTimeout(() => {
|
||||
if (isMouseDownRef.current) {
|
||||
const messageContent = content ?? '';
|
||||
const parsedMessage =
|
||||
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||
generateSpeech(parsedMessage);
|
||||
}
|
||||
}, 1000);
|
||||
};
|
||||
|
||||
const handleMouseUp = () => {
|
||||
isMouseDownRef.current = false;
|
||||
if (timerRef.current != null) {
|
||||
window.clearTimeout(timerRef.current);
|
||||
}
|
||||
};
|
||||
|
||||
const toggleSpeech = () => {
|
||||
if (isSpeaking === true) {
|
||||
cancelSpeech();
|
||||
pauseGlobalAudio();
|
||||
} else {
|
||||
const messageContent = content ?? '';
|
||||
const parsedMessage =
|
||||
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||
generateSpeech(parsedMessage);
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
handleMouseDown,
|
||||
handleMouseUp,
|
||||
toggleSpeech,
|
||||
isSpeaking,
|
||||
isLoading: false,
|
||||
audioRef,
|
||||
voices,
|
||||
};
|
||||
};
|
||||
|
||||
export default useTTSBrowser;
|
||||
Loading…
Add table
Add a link
Reference in a new issue