From e3ebcfd2b1b73a0f64cddc46327e81e37558ac55 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Tue, 13 Aug 2024 04:14:37 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=99=EF=B8=8F=20fix:=20Optimize=20and?= =?UTF-8?q?=20Fix=20Browser=20TTS=20Incompatibility=20(firefox)=20(#3627)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: 'disable' MsEdgeTTS on unsupported browser (firefox) * refactor: only pass necessary props to HoverButton MessageAudio * refactor: Fix conditional comparison operators in MessageAudio component * refactor: Remove console.log statement in MessageAudio component --- .../components/Chat/Messages/HoverButtons.tsx | 11 +++++- .../components/Chat/Messages/MessageAudio.tsx | 37 +++++++++---------- client/src/hooks/Input/useTextToSpeech.ts | 20 +++++++--- client/src/hooks/Input/useTextToSpeechEdge.ts | 22 ++++++++++- 4 files changed, 62 insertions(+), 28 deletions(-) diff --git a/client/src/components/Chat/Messages/HoverButtons.tsx b/client/src/components/Chat/Messages/HoverButtons.tsx index 7da848b849..938503acdb 100644 --- a/client/src/components/Chat/Messages/HoverButtons.tsx +++ b/client/src/components/Chat/Messages/HoverButtons.tsx @@ -73,7 +73,14 @@ export default function HoverButtons({ return (
- {TextToSpeech && } + {TextToSpeech && ( + + )} {isEditableEndpoint && ( @@ -84,8 +83,8 @@ export default function MessageAudio({ index, message, isLast }: THoverButtons) height: '0px', width: '0px', }} - src={audioRef.current?.src || undefined} - id={`audio-${message.messageId}`} + src={audioRef.current?.src ?? undefined} + id={`audio-${messageId}`} muted autoPlay /> diff --git a/client/src/hooks/Input/useTextToSpeech.ts b/client/src/hooks/Input/useTextToSpeech.ts index 95ac5a6f84..26d4ef9528 100644 --- a/client/src/hooks/Input/useTextToSpeech.ts +++ b/client/src/hooks/Input/useTextToSpeech.ts @@ -1,7 +1,7 @@ import { useRecoilState } from 'recoil'; import { useRef, useMemo, useEffect } from 'react'; import { parseTextParts } from 'librechat-data-provider'; -import type { TMessage } from 'librechat-data-provider'; +import type { TMessageContentParts } from 'librechat-data-provider'; import type { Option } from '~/common'; import useTextToSpeechExternal from './useTextToSpeechExternal'; import useTextToSpeechBrowser from './useTextToSpeechBrowser'; @@ -11,7 +11,15 @@ import { usePauseGlobalAudio } from '../Audio'; import { logger } from '~/utils'; import store from '~/store'; -const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => { +type TUseTextToSpeech = { + messageId?: string; + content?: TMessageContentParts[] | string; + isLast?: boolean; + index?: number; +}; + +const useTextToSpeech = (props?: TUseTextToSpeech) => { + const { messageId, content, isLast = false, index = 0 } = props ?? {}; const [voice, setVoice] = useRecoilState(store.voice); const { textToSpeechEndpoint } = useGetAudioSettings(); const { pauseGlobalAudio } = usePauseGlobalAudio(index); @@ -38,7 +46,7 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => { isLoading: isLoadingExternal, audioRef: audioRefExternal, voices: voicesExternal, - } = useTextToSpeechExternal(message?.messageId ?? '', isLast, index); + } = useTextToSpeechExternal(messageId ?? '', isLast, index); let generateSpeech, cancelSpeech, isSpeaking, isLoading; @@ -112,7 +120,7 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => { isMouseDownRef.current = true; timerRef.current = window.setTimeout(() => { if (isMouseDownRef.current) { - const messageContent = message?.content ?? message?.text ?? ''; + const messageContent = content ?? ''; const parsedMessage = typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent); generateSpeech(parsedMessage, false); @@ -128,11 +136,11 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => { }; const toggleSpeech = () => { - if (isSpeaking) { + if (isSpeaking === true) { cancelSpeech(); pauseGlobalAudio(); } else { - const messageContent = message?.content ?? message?.text ?? ''; + const messageContent = content ?? ''; const parsedMessage = typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent); generateSpeech(parsedMessage, false); diff --git a/client/src/hooks/Input/useTextToSpeechEdge.ts b/client/src/hooks/Input/useTextToSpeechEdge.ts index fd969cd2b0..bc6f8bea02 100644 --- a/client/src/hooks/Input/useTextToSpeechEdge.ts +++ b/client/src/hooks/Input/useTextToSpeechEdge.ts @@ -1,5 +1,5 @@ import { useRecoilValue } from 'recoil'; -import { useState, useCallback, useRef, useEffect } from 'react'; +import { useState, useCallback, useRef, useEffect, useMemo } from 'react'; import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts'; import { useToastContext } from '~/Providers'; import useLocalize from '~/hooks/useLocalize'; @@ -29,6 +29,8 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn { const pendingBuffers = useRef([]); const { showToast } = useToastContext(); + const isBrowserSupported = useMemo(() => MediaSource.isTypeSupported('audio/mpeg'), []); + const fetchVoices = useCallback(() => { if (!ttsRef.current) { ttsRef.current = new MsEdgeTTS(); @@ -198,14 +200,23 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn { }, [showToast, localize]); useEffect(() => { + if (!MediaSource.isTypeSupported('audio/mpeg')) { + return; + } fetchVoices(); }, [fetchVoices]); useEffect(() => { + if (!MediaSource.isTypeSupported('audio/mpeg')) { + return; + } initializeTTS(); }, [voiceName, initializeTTS]); useEffect(() => { + if (!MediaSource.isTypeSupported('audio/mpeg')) { + return; + } initializeMediaSource(); return () => { if (mediaSourceRef.current) { @@ -214,6 +225,15 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn { }; }, [initializeMediaSource]); + if (!isBrowserSupported) { + return { + generateSpeechEdge: () => ({}), + cancelSpeechEdge: () => ({}), + isSpeaking: false, + voices: [], + }; + } + return { generateSpeechEdge, cancelSpeechEdge, isSpeaking, voices }; }