🔊 fix(tts): NotAllowedError (mobile/safari), Unsupported MediaSource type (firefox), Hide Audio Element (#2854)

* fix: hide audio element on mobile * chore: add tts docs link * fix: select voice option on first render * fix: NotAllowedError, prevent async playback for mobile triggers, consolidate MessageAudio code, user user-triggered unmutes * fix: Firefox/unsupported type for MediaSource hack * refactor(STT): make icon red when recording. consolidate logic to AudioRecorder component * fix: revert Redis changes to use separate client for sessions
2025-12-18 09:20:15 +01:00 · 2024-05-24 12:18:11 -04:00 · 2024-05-24 12:18:11 -04:00 · 35ba4ba1a4
commit 35ba4ba1a4
parent dcd2e3e62d
14 changed files with 421 additions and 130 deletions
--- a/client/src/hooks/Input/useTextToSpeech.ts
+++ b/client/src/hooks/Input/useTextToSpeech.ts
@ -1,13 +1,13 @@
 import { useRef } from 'react';
 import { parseTextParts } from 'librechat-data-provider';
-import type { TMessageContentParts } from 'librechat-data-provider';
+import type { TMessage } from 'librechat-data-provider';
 import useTextToSpeechExternal from './useTextToSpeechExternal';
 import useTextToSpeechBrowser from './useTextToSpeechBrowser';
 import { usePauseGlobalAudio } from '../Audio';
 import { useRecoilState } from 'recoil';
 import store from '~/store';

-const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boolean, index = 0) => {
+const useTextToSpeech = (message: TMessage, isLast: boolean, index = 0) => {
  const [endpointTTS] = useRecoilState<string>(store.endpointTTS);
  const useExternalTextToSpeech = endpointTTS === 'external';

@ -22,7 +22,8 @@ const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boole
    cancelSpeech: cancelSpeechExternal,
    isSpeaking: isSpeakingExternal,
    isLoading: isLoading,
-  } = useTextToSpeechExternal(isLast, index);
+    audioRef,
+  } = useTextToSpeechExternal(message.messageId, isLast, index);
  const { pauseGlobalAudio } = usePauseGlobalAudio(index);

  const generateSpeech = useExternalTextToSpeech ? generateSpeechExternal : generateSpeechLocal;
@ -36,8 +37,10 @@ const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boole
    isMouseDownRef.current = true;
    timerRef.current = window.setTimeout(() => {
      if (isMouseDownRef.current) {
-        const parsedMessage = typeof message === 'string' ? message : parseTextParts(message);
-        generateSpeech(parsedMessage, true);
+        const messageContent = message?.content ?? message?.text ?? '';
+        const parsedMessage =
+          typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
+        generateSpeech(parsedMessage, false);
      }
    }, 1000);
  };
@ -51,10 +54,13 @@ const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boole

  const toggleSpeech = () => {
    if (isSpeaking) {
+      console.log('canceling message audio speech');
      cancelSpeech();
      pauseGlobalAudio();
    } else {
-      const parsedMessage = typeof message === 'string' ? message : parseTextParts(message);
+      const messageContent = message?.content ?? message?.text ?? '';
+      const parsedMessage =
+        typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
      generateSpeech(parsedMessage, false);
    }
  };
@ -65,6 +71,7 @@ const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boole
    toggleSpeech,
    isSpeaking,
    isLoading,
+    audioRef,
  };
 };