diff --git a/client/src/components/Chat/Messages/HoverButtons.tsx b/client/src/components/Chat/Messages/HoverButtons.tsx
index 7da848b849..938503acdb 100644
--- a/client/src/components/Chat/Messages/HoverButtons.tsx
+++ b/client/src/components/Chat/Messages/HoverButtons.tsx
@@ -73,7 +73,14 @@ export default function HoverButtons({
return (
- {TextToSpeech && }
+ {TextToSpeech && (
+
+ )}
{isEditableEndpoint && (
- {continueSupported ? (
+ {continueSupported === true ? (
@@ -84,8 +83,8 @@ export default function MessageAudio({ index, message, isLast }: THoverButtons)
height: '0px',
width: '0px',
}}
- src={audioRef.current?.src || undefined}
- id={`audio-${message.messageId}`}
+ src={audioRef.current?.src ?? undefined}
+ id={`audio-${messageId}`}
muted
autoPlay
/>
diff --git a/client/src/hooks/Input/useTextToSpeech.ts b/client/src/hooks/Input/useTextToSpeech.ts
index 95ac5a6f84..26d4ef9528 100644
--- a/client/src/hooks/Input/useTextToSpeech.ts
+++ b/client/src/hooks/Input/useTextToSpeech.ts
@@ -1,7 +1,7 @@
import { useRecoilState } from 'recoil';
import { useRef, useMemo, useEffect } from 'react';
import { parseTextParts } from 'librechat-data-provider';
-import type { TMessage } from 'librechat-data-provider';
+import type { TMessageContentParts } from 'librechat-data-provider';
import type { Option } from '~/common';
import useTextToSpeechExternal from './useTextToSpeechExternal';
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
@@ -11,7 +11,15 @@ import { usePauseGlobalAudio } from '../Audio';
import { logger } from '~/utils';
import store from '~/store';
-const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
+type TUseTextToSpeech = {
+ messageId?: string;
+ content?: TMessageContentParts[] | string;
+ isLast?: boolean;
+ index?: number;
+};
+
+const useTextToSpeech = (props?: TUseTextToSpeech) => {
+ const { messageId, content, isLast = false, index = 0 } = props ?? {};
const [voice, setVoice] = useRecoilState(store.voice);
const { textToSpeechEndpoint } = useGetAudioSettings();
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
@@ -38,7 +46,7 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
isLoading: isLoadingExternal,
audioRef: audioRefExternal,
voices: voicesExternal,
- } = useTextToSpeechExternal(message?.messageId ?? '', isLast, index);
+ } = useTextToSpeechExternal(messageId ?? '', isLast, index);
let generateSpeech, cancelSpeech, isSpeaking, isLoading;
@@ -112,7 +120,7 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
isMouseDownRef.current = true;
timerRef.current = window.setTimeout(() => {
if (isMouseDownRef.current) {
- const messageContent = message?.content ?? message?.text ?? '';
+ const messageContent = content ?? '';
const parsedMessage =
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
generateSpeech(parsedMessage, false);
@@ -128,11 +136,11 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
};
const toggleSpeech = () => {
- if (isSpeaking) {
+ if (isSpeaking === true) {
cancelSpeech();
pauseGlobalAudio();
} else {
- const messageContent = message?.content ?? message?.text ?? '';
+ const messageContent = content ?? '';
const parsedMessage =
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
generateSpeech(parsedMessage, false);
diff --git a/client/src/hooks/Input/useTextToSpeechEdge.ts b/client/src/hooks/Input/useTextToSpeechEdge.ts
index fd969cd2b0..bc6f8bea02 100644
--- a/client/src/hooks/Input/useTextToSpeechEdge.ts
+++ b/client/src/hooks/Input/useTextToSpeechEdge.ts
@@ -1,5 +1,5 @@
import { useRecoilValue } from 'recoil';
-import { useState, useCallback, useRef, useEffect } from 'react';
+import { useState, useCallback, useRef, useEffect, useMemo } from 'react';
import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
import { useToastContext } from '~/Providers';
import useLocalize from '~/hooks/useLocalize';
@@ -29,6 +29,8 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
const pendingBuffers = useRef([]);
const { showToast } = useToastContext();
+ const isBrowserSupported = useMemo(() => MediaSource.isTypeSupported('audio/mpeg'), []);
+
const fetchVoices = useCallback(() => {
if (!ttsRef.current) {
ttsRef.current = new MsEdgeTTS();
@@ -198,14 +200,23 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
}, [showToast, localize]);
useEffect(() => {
+ if (!MediaSource.isTypeSupported('audio/mpeg')) {
+ return;
+ }
fetchVoices();
}, [fetchVoices]);
useEffect(() => {
+ if (!MediaSource.isTypeSupported('audio/mpeg')) {
+ return;
+ }
initializeTTS();
}, [voiceName, initializeTTS]);
useEffect(() => {
+ if (!MediaSource.isTypeSupported('audio/mpeg')) {
+ return;
+ }
initializeMediaSource();
return () => {
if (mediaSourceRef.current) {
@@ -214,6 +225,15 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
};
}, [initializeMediaSource]);
+ if (!isBrowserSupported) {
+ return {
+ generateSpeechEdge: () => ({}),
+ cancelSpeechEdge: () => ({}),
+ isSpeaking: false,
+ voices: [],
+ };
+ }
+
return { generateSpeechEdge, cancelSpeechEdge, isSpeaking, voices };
}