mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-15 15:08:10 +01:00
🔀 refactor: Modularize TTS Logic for Improved Browser support (#3657)
* WIP: message audio refactor * WIP: use MessageAudio by provider * fix: Update MessageAudio component to use TTSEndpoints enum * feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging * feat: Add voice dropdown components for different TTS engines * docs: update incorrect `voices` example changed `voice: ''` to `voices: ['alloy']` * feat: Add brwoser support check for Edge TTS engine component with error toast if not supported --------- Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
This commit is contained in:
parent
bcde0beb47
commit
dba704079c
18 changed files with 784 additions and 187 deletions
|
|
@ -1,3 +1,6 @@
|
|||
export * from './MediaSourceAppender';
|
||||
export { default as useCustomAudioRef } from './useCustomAudioRef';
|
||||
export { default as usePauseGlobalAudio } from './usePauseGlobalAudio';
|
||||
export { default as useTTSExternal } from './useTTSExternal';
|
||||
export { default as useTTSBrowser } from './useTTSBrowser';
|
||||
export { default as useTTSEdge } from './useTTSEdge';
|
||||
|
|
|
|||
100
client/src/hooks/Audio/useTTSBrowser.ts
Normal file
100
client/src/hooks/Audio/useTTSBrowser.ts
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
// client/src/hooks/Audio/useTTSBrowser.ts
|
||||
import { useRef, useEffect, useState } from 'react';
|
||||
import { useRecoilState, useRecoilValue } from 'recoil';
|
||||
import { parseTextParts } from 'librechat-data-provider';
|
||||
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||
import useTextToSpeechBrowser from '~/hooks/Input/useTextToSpeechBrowser';
|
||||
import usePauseGlobalAudio from '~/hooks/Audio/usePauseGlobalAudio';
|
||||
import useAudioRef from '~/hooks/Audio/useAudioRef';
|
||||
import { logger } from '~/utils';
|
||||
import store from '~/store';
|
||||
|
||||
type TUseTextToSpeech = {
|
||||
messageId?: string;
|
||||
content?: TMessageContentParts[] | string;
|
||||
isLast?: boolean;
|
||||
index?: number;
|
||||
};
|
||||
|
||||
const useTTSBrowser = (props?: TUseTextToSpeech) => {
|
||||
const { content, isLast = false, index = 0 } = props ?? {};
|
||||
|
||||
const isMouseDownRef = useRef(false);
|
||||
const timerRef = useRef<number | undefined>(undefined);
|
||||
const [isSpeakingState, setIsSpeaking] = useState(false);
|
||||
const { audioRef } = useAudioRef({ setIsPlaying: setIsSpeaking });
|
||||
|
||||
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
||||
const [voice, setVoice] = useRecoilState(store.voice);
|
||||
const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index));
|
||||
|
||||
const isSpeaking = isSpeakingState || (isLast && globalIsPlaying);
|
||||
|
||||
const {
|
||||
generateSpeechLocal: generateSpeech,
|
||||
cancelSpeechLocal: cancelSpeech,
|
||||
voices,
|
||||
} = useTextToSpeechBrowser({ setIsSpeaking });
|
||||
|
||||
useEffect(() => {
|
||||
const firstVoice = voices[0];
|
||||
if (voices.length && typeof firstVoice === 'object') {
|
||||
const lastSelectedVoice = voices.find((v) =>
|
||||
typeof v === 'object' ? v.value === voice : v === voice,
|
||||
);
|
||||
if (lastSelectedVoice != null) {
|
||||
const currentVoice =
|
||||
typeof lastSelectedVoice === 'object' ? lastSelectedVoice.value : lastSelectedVoice;
|
||||
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: currentVoice });
|
||||
setVoice(currentVoice);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: firstVoice.value });
|
||||
setVoice(firstVoice.value);
|
||||
}
|
||||
}, [setVoice, voice, voices]);
|
||||
|
||||
const handleMouseDown = () => {
|
||||
isMouseDownRef.current = true;
|
||||
timerRef.current = window.setTimeout(() => {
|
||||
if (isMouseDownRef.current) {
|
||||
const messageContent = content ?? '';
|
||||
const parsedMessage =
|
||||
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||
generateSpeech(parsedMessage);
|
||||
}
|
||||
}, 1000);
|
||||
};
|
||||
|
||||
const handleMouseUp = () => {
|
||||
isMouseDownRef.current = false;
|
||||
if (timerRef.current != null) {
|
||||
window.clearTimeout(timerRef.current);
|
||||
}
|
||||
};
|
||||
|
||||
const toggleSpeech = () => {
|
||||
if (isSpeaking === true) {
|
||||
cancelSpeech();
|
||||
pauseGlobalAudio();
|
||||
} else {
|
||||
const messageContent = content ?? '';
|
||||
const parsedMessage =
|
||||
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||
generateSpeech(parsedMessage);
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
handleMouseDown,
|
||||
handleMouseUp,
|
||||
toggleSpeech,
|
||||
isSpeaking,
|
||||
isLoading: false,
|
||||
audioRef,
|
||||
voices,
|
||||
};
|
||||
};
|
||||
|
||||
export default useTTSBrowser;
|
||||
100
client/src/hooks/Audio/useTTSEdge.ts
Normal file
100
client/src/hooks/Audio/useTTSEdge.ts
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
// client/src/hooks/Audio/useTTSEdge.ts
|
||||
import { useRef, useEffect, useState } from 'react';
|
||||
import { useRecoilState, useRecoilValue } from 'recoil';
|
||||
import { parseTextParts } from 'librechat-data-provider';
|
||||
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||
import usePauseGlobalAudio from '~/hooks/Audio/usePauseGlobalAudio';
|
||||
import useTextToSpeechEdge from '~/hooks/Input/useTextToSpeechEdge';
|
||||
import useAudioRef from '~/hooks/Audio/useAudioRef';
|
||||
import { logger } from '~/utils';
|
||||
import store from '~/store';
|
||||
|
||||
type TUseTextToSpeech = {
|
||||
messageId?: string;
|
||||
content?: TMessageContentParts[] | string;
|
||||
isLast?: boolean;
|
||||
index?: number;
|
||||
};
|
||||
|
||||
const useTTSEdge = (props?: TUseTextToSpeech) => {
|
||||
const { content, isLast = false, index = 0 } = props ?? {};
|
||||
|
||||
const isMouseDownRef = useRef(false);
|
||||
const timerRef = useRef<number | undefined>(undefined);
|
||||
const [isSpeakingState, setIsSpeaking] = useState(false);
|
||||
const { audioRef } = useAudioRef({ setIsPlaying: setIsSpeaking });
|
||||
|
||||
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
||||
const [voice, setVoice] = useRecoilState(store.voice);
|
||||
const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index));
|
||||
|
||||
const isSpeaking = isSpeakingState || (isLast && globalIsPlaying);
|
||||
|
||||
const {
|
||||
generateSpeechEdge: generateSpeech,
|
||||
cancelSpeechEdge: cancelSpeech,
|
||||
voices,
|
||||
} = useTextToSpeechEdge({ setIsSpeaking });
|
||||
|
||||
useEffect(() => {
|
||||
const firstVoice = voices[0];
|
||||
if (voices.length && typeof firstVoice === 'object') {
|
||||
const lastSelectedVoice = voices.find((v) =>
|
||||
typeof v === 'object' ? v.value === voice : v === voice,
|
||||
);
|
||||
if (lastSelectedVoice != null) {
|
||||
const currentVoice =
|
||||
typeof lastSelectedVoice === 'object' ? lastSelectedVoice.value : lastSelectedVoice;
|
||||
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: currentVoice });
|
||||
setVoice(currentVoice);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: firstVoice.value });
|
||||
setVoice(firstVoice.value);
|
||||
}
|
||||
}, [setVoice, voice, voices]);
|
||||
|
||||
const handleMouseDown = () => {
|
||||
isMouseDownRef.current = true;
|
||||
timerRef.current = window.setTimeout(() => {
|
||||
if (isMouseDownRef.current) {
|
||||
const messageContent = content ?? '';
|
||||
const parsedMessage =
|
||||
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||
generateSpeech(parsedMessage);
|
||||
}
|
||||
}, 1000);
|
||||
};
|
||||
|
||||
const handleMouseUp = () => {
|
||||
isMouseDownRef.current = false;
|
||||
if (timerRef.current != null) {
|
||||
window.clearTimeout(timerRef.current);
|
||||
}
|
||||
};
|
||||
|
||||
const toggleSpeech = () => {
|
||||
if (isSpeaking === true) {
|
||||
cancelSpeech();
|
||||
pauseGlobalAudio();
|
||||
} else {
|
||||
const messageContent = content ?? '';
|
||||
const parsedMessage =
|
||||
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||
generateSpeech(parsedMessage);
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
handleMouseDown,
|
||||
handleMouseUp,
|
||||
toggleSpeech,
|
||||
isSpeaking,
|
||||
isLoading: false,
|
||||
audioRef,
|
||||
voices,
|
||||
};
|
||||
};
|
||||
|
||||
export default useTTSEdge;
|
||||
101
client/src/hooks/Audio/useTTSExternal.ts
Normal file
101
client/src/hooks/Audio/useTTSExternal.ts
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
// client/src/hooks/Audio/useTTSExternal.ts
|
||||
import { useRef, useEffect, useState } from 'react';
|
||||
import { useRecoilState, useRecoilValue } from 'recoil';
|
||||
import { parseTextParts } from 'librechat-data-provider';
|
||||
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||
import useTextToSpeechExternal from '~/hooks/Input/useTextToSpeechExternal';
|
||||
import usePauseGlobalAudio from '~/hooks/Audio/usePauseGlobalAudio';
|
||||
import useAudioRef from '~/hooks/Audio/useAudioRef';
|
||||
import { logger } from '~/utils';
|
||||
import store from '~/store';
|
||||
|
||||
type TUseTextToSpeech = {
|
||||
messageId?: string;
|
||||
content?: TMessageContentParts[] | string;
|
||||
isLast?: boolean;
|
||||
index?: number;
|
||||
};
|
||||
|
||||
const useTTSExternal = (props?: TUseTextToSpeech) => {
|
||||
const { messageId, content, isLast = false, index = 0 } = props ?? {};
|
||||
|
||||
const isMouseDownRef = useRef(false);
|
||||
const timerRef = useRef<number | undefined>(undefined);
|
||||
const [isSpeakingState, setIsSpeaking] = useState(false);
|
||||
const { audioRef } = useAudioRef({ setIsPlaying: setIsSpeaking });
|
||||
|
||||
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
||||
const [voice, setVoice] = useRecoilState(store.voice);
|
||||
const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index));
|
||||
|
||||
const isSpeaking = isSpeakingState || (isLast && globalIsPlaying);
|
||||
const {
|
||||
cancelSpeech,
|
||||
generateSpeechExternal: generateSpeech,
|
||||
isLoading,
|
||||
voices,
|
||||
} = useTextToSpeechExternal({
|
||||
setIsSpeaking,
|
||||
audioRef,
|
||||
messageId,
|
||||
isLast,
|
||||
index,
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
const firstVoice = voices[0];
|
||||
if (voices.length) {
|
||||
const lastSelectedVoice = voices.find((v) => v === voice);
|
||||
if (lastSelectedVoice != null) {
|
||||
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: lastSelectedVoice });
|
||||
setVoice(lastSelectedVoice.toString());
|
||||
return;
|
||||
}
|
||||
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: firstVoice });
|
||||
setVoice(firstVoice.toString());
|
||||
}
|
||||
}, [setVoice, voice, voices]);
|
||||
|
||||
const handleMouseDown = () => {
|
||||
isMouseDownRef.current = true;
|
||||
timerRef.current = window.setTimeout(() => {
|
||||
if (isMouseDownRef.current) {
|
||||
const messageContent = content ?? '';
|
||||
const parsedMessage =
|
||||
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||
generateSpeech(parsedMessage, false);
|
||||
}
|
||||
}, 1000);
|
||||
};
|
||||
|
||||
const handleMouseUp = () => {
|
||||
isMouseDownRef.current = false;
|
||||
if (timerRef.current != null) {
|
||||
window.clearTimeout(timerRef.current);
|
||||
}
|
||||
};
|
||||
|
||||
const toggleSpeech = () => {
|
||||
if (isSpeaking === true) {
|
||||
cancelSpeech();
|
||||
pauseGlobalAudio();
|
||||
} else {
|
||||
const messageContent = content ?? '';
|
||||
const parsedMessage =
|
||||
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||
generateSpeech(parsedMessage, false);
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
handleMouseDown,
|
||||
handleMouseUp,
|
||||
toggleSpeech,
|
||||
isSpeaking,
|
||||
isLoading,
|
||||
audioRef,
|
||||
voices,
|
||||
};
|
||||
};
|
||||
|
||||
export default useTTSExternal;
|
||||
Loading…
Add table
Add a link
Reference in a new issue