mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-21 21:50:49 +02:00

* WIP: message audio refactor * WIP: use MessageAudio by provider * fix: Update MessageAudio component to use TTSEndpoints enum * feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging * feat: Add voice dropdown components for different TTS engines * docs: update incorrect `voices` example changed `voice: ''` to `voices: ['alloy']` * feat: Add brwoser support check for Edge TTS engine component with error toast if not supported --------- Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
100 lines
3.1 KiB
TypeScript
100 lines
3.1 KiB
TypeScript
// client/src/hooks/Audio/useTTSBrowser.ts
|
|
import { useRef, useEffect, useState } from 'react';
|
|
import { useRecoilState, useRecoilValue } from 'recoil';
|
|
import { parseTextParts } from 'librechat-data-provider';
|
|
import type { TMessageContentParts } from 'librechat-data-provider';
|
|
import useTextToSpeechBrowser from '~/hooks/Input/useTextToSpeechBrowser';
|
|
import usePauseGlobalAudio from '~/hooks/Audio/usePauseGlobalAudio';
|
|
import useAudioRef from '~/hooks/Audio/useAudioRef';
|
|
import { logger } from '~/utils';
|
|
import store from '~/store';
|
|
|
|
type TUseTextToSpeech = {
|
|
messageId?: string;
|
|
content?: TMessageContentParts[] | string;
|
|
isLast?: boolean;
|
|
index?: number;
|
|
};
|
|
|
|
const useTTSBrowser = (props?: TUseTextToSpeech) => {
|
|
const { content, isLast = false, index = 0 } = props ?? {};
|
|
|
|
const isMouseDownRef = useRef(false);
|
|
const timerRef = useRef<number | undefined>(undefined);
|
|
const [isSpeakingState, setIsSpeaking] = useState(false);
|
|
const { audioRef } = useAudioRef({ setIsPlaying: setIsSpeaking });
|
|
|
|
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
|
const [voice, setVoice] = useRecoilState(store.voice);
|
|
const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index));
|
|
|
|
const isSpeaking = isSpeakingState || (isLast && globalIsPlaying);
|
|
|
|
const {
|
|
generateSpeechLocal: generateSpeech,
|
|
cancelSpeechLocal: cancelSpeech,
|
|
voices,
|
|
} = useTextToSpeechBrowser({ setIsSpeaking });
|
|
|
|
useEffect(() => {
|
|
const firstVoice = voices[0];
|
|
if (voices.length && typeof firstVoice === 'object') {
|
|
const lastSelectedVoice = voices.find((v) =>
|
|
typeof v === 'object' ? v.value === voice : v === voice,
|
|
);
|
|
if (lastSelectedVoice != null) {
|
|
const currentVoice =
|
|
typeof lastSelectedVoice === 'object' ? lastSelectedVoice.value : lastSelectedVoice;
|
|
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: currentVoice });
|
|
setVoice(currentVoice);
|
|
return;
|
|
}
|
|
|
|
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: firstVoice.value });
|
|
setVoice(firstVoice.value);
|
|
}
|
|
}, [setVoice, voice, voices]);
|
|
|
|
const handleMouseDown = () => {
|
|
isMouseDownRef.current = true;
|
|
timerRef.current = window.setTimeout(() => {
|
|
if (isMouseDownRef.current) {
|
|
const messageContent = content ?? '';
|
|
const parsedMessage =
|
|
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
|
generateSpeech(parsedMessage);
|
|
}
|
|
}, 1000);
|
|
};
|
|
|
|
const handleMouseUp = () => {
|
|
isMouseDownRef.current = false;
|
|
if (timerRef.current != null) {
|
|
window.clearTimeout(timerRef.current);
|
|
}
|
|
};
|
|
|
|
const toggleSpeech = () => {
|
|
if (isSpeaking === true) {
|
|
cancelSpeech();
|
|
pauseGlobalAudio();
|
|
} else {
|
|
const messageContent = content ?? '';
|
|
const parsedMessage =
|
|
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
|
generateSpeech(parsedMessage);
|
|
}
|
|
};
|
|
|
|
return {
|
|
handleMouseDown,
|
|
handleMouseUp,
|
|
toggleSpeech,
|
|
isSpeaking,
|
|
isLoading: false,
|
|
audioRef,
|
|
voices,
|
|
};
|
|
};
|
|
|
|
export default useTTSBrowser;
|