🎙️ fix: Optimize and Fix Browser TTS Incompatibility (firefox) (#3627)

* fix: 'disable' MsEdgeTTS on unsupported browser (firefox) * refactor: only pass necessary props to HoverButton MessageAudio * refactor: Fix conditional comparison operators in MessageAudio component * refactor: Remove console.log statement in MessageAudio component
2026-02-03 08:11:50 +01:00 · 2024-08-13 04:14:37 -04:00 · 2024-08-13 04:14:37 -04:00 · e3ebcfd2b1
commit e3ebcfd2b1
parent 6655304753
4 changed files with 62 additions and 28 deletions
--- a/client/src/hooks/Input/useTextToSpeech.ts
+++ b/client/src/hooks/Input/useTextToSpeech.ts
@ -1,7 +1,7 @@
 import { useRecoilState } from 'recoil';
 import { useRef, useMemo, useEffect } from 'react';
 import { parseTextParts } from 'librechat-data-provider';
-import type { TMessage } from 'librechat-data-provider';
+import type { TMessageContentParts } from 'librechat-data-provider';
 import type { Option } from '~/common';
 import useTextToSpeechExternal from './useTextToSpeechExternal';
 import useTextToSpeechBrowser from './useTextToSpeechBrowser';
@ -11,7 +11,15 @@ import { usePauseGlobalAudio } from '../Audio';
 import { logger } from '~/utils';
 import store from '~/store';

-const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
+type TUseTextToSpeech = {
+  messageId?: string;
+  content?: TMessageContentParts[] | string;
+  isLast?: boolean;
+  index?: number;
+};
+
+const useTextToSpeech = (props?: TUseTextToSpeech) => {
+  const { messageId, content, isLast = false, index = 0 } = props ?? {};
  const [voice, setVoice] = useRecoilState(store.voice);
  const { textToSpeechEndpoint } = useGetAudioSettings();
  const { pauseGlobalAudio } = usePauseGlobalAudio(index);
@ -38,7 +46,7 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
    isLoading: isLoadingExternal,
    audioRef: audioRefExternal,
    voices: voicesExternal,
-  } = useTextToSpeechExternal(message?.messageId ?? '', isLast, index);
+  } = useTextToSpeechExternal(messageId ?? '', isLast, index);

  let generateSpeech, cancelSpeech, isSpeaking, isLoading;

@ -112,7 +120,7 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
    isMouseDownRef.current = true;
    timerRef.current = window.setTimeout(() => {
      if (isMouseDownRef.current) {
-        const messageContent = message?.content ?? message?.text ?? '';
+        const messageContent = content ?? '';
        const parsedMessage =
          typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
        generateSpeech(parsedMessage, false);
@ -128,11 +136,11 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
  };

  const toggleSpeech = () => {
-    if (isSpeaking) {
+    if (isSpeaking === true) {
      cancelSpeech();
      pauseGlobalAudio();
    } else {
-      const messageContent = message?.content ?? message?.text ?? '';
+      const messageContent = content ?? '';
      const parsedMessage =
        typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
      generateSpeech(parsedMessage, false);
--- a/client/src/hooks/Input/useTextToSpeechEdge.ts
+++ b/client/src/hooks/Input/useTextToSpeechEdge.ts
@ -1,5 +1,5 @@
 import { useRecoilValue } from 'recoil';
-import { useState, useCallback, useRef, useEffect } from 'react';
+import { useState, useCallback, useRef, useEffect, useMemo } from 'react';
 import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
 import { useToastContext } from '~/Providers';
 import useLocalize from '~/hooks/useLocalize';
@ -29,6 +29,8 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
  const pendingBuffers = useRef<Uint8Array[]>([]);
  const { showToast } = useToastContext();

+  const isBrowserSupported = useMemo(() => MediaSource.isTypeSupported('audio/mpeg'), []);
+
  const fetchVoices = useCallback(() => {
    if (!ttsRef.current) {
      ttsRef.current = new MsEdgeTTS();
@ -198,14 +200,23 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
  }, [showToast, localize]);

  useEffect(() => {
+    if (!MediaSource.isTypeSupported('audio/mpeg')) {
+      return;
+    }
    fetchVoices();
  }, [fetchVoices]);

  useEffect(() => {
+    if (!MediaSource.isTypeSupported('audio/mpeg')) {
+      return;
+    }
    initializeTTS();
  }, [voiceName, initializeTTS]);

  useEffect(() => {
+    if (!MediaSource.isTypeSupported('audio/mpeg')) {
+      return;
+    }
    initializeMediaSource();
    return () => {
      if (mediaSourceRef.current) {
@ -214,6 +225,15 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
    };
  }, [initializeMediaSource]);

+  if (!isBrowserSupported) {
+    return {
+      generateSpeechEdge: () => ({}),
+      cancelSpeechEdge: () => ({}),
+      isSpeaking: false,
+      voices: [],
+    };
+  }
+
  return { generateSpeechEdge, cancelSpeechEdge, isSpeaking, voices };
 }