🔀 refactor: Modularize TTS Logic for Improved Browser support (#3657)

* WIP: message audio refactor * WIP: use MessageAudio by provider * fix: Update MessageAudio component to use TTSEndpoints enum * feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging * feat: Add voice dropdown components for different TTS engines * docs: update incorrect `voices` example changed `voice: ''` to `voices: ['alloy']` * feat: Add brwoser support check for Edge TTS engine component with error toast if not supported --------- Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
2025-12-19 01:40:15 +01:00 · 2024-08-15 11:34:25 -04:00 · 2024-08-15 11:34:25 -04:00 · dba704079c
commit dba704079c
parent bcde0beb47
18 changed files with 784 additions and 187 deletions
--- a/client/src/hooks/Input/useTextToSpeechEdge.ts
+++ b/client/src/hooks/Input/useTextToSpeechEdge.ts
@ -1,28 +1,24 @@
 import { useRecoilValue } from 'recoil';
-import { useState, useCallback, useRef, useEffect, useMemo } from 'react';
 import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
-import { useToastContext } from '~/Providers';
+import { useState, useCallback, useRef, useEffect, useMemo } from 'react';
+import type { VoiceOption } from '~/common';
+import { useToastContext } from '~/Providers/ToastContext';
 import useLocalize from '~/hooks/useLocalize';
 import store from '~/store';

-interface Voice {
-  value: string;
-  label: string;
-}
-
 interface UseTextToSpeechEdgeReturn {
  generateSpeechEdge: (text: string) => void;
  cancelSpeechEdge: () => void;
-  voices: Voice[];
+  voices: VoiceOption[];
 }

 function useTextToSpeechEdge({
  setIsSpeaking,
 }: {
-  setIsSpeaking: (isSpeaking: boolean) => void;
+  setIsSpeaking: React.Dispatch<React.SetStateAction<boolean>>;
 }): UseTextToSpeechEdgeReturn {
  const localize = useLocalize();
-  const [voices, setVoices] = useState<Voice[]>([]);
+  const [voices, setVoices] = useState<VoiceOption[]>([]);
  const voiceName = useRecoilValue(store.voice);
  const ttsRef = useRef<MsEdgeTTS | null>(null);
  const audioElementRef = useRef<HTMLAudioElement | null>(null);
@ -63,7 +59,7 @@ function useTextToSpeechEdge({
    if (!ttsRef.current) {
      ttsRef.current = new MsEdgeTTS();
    }
-    const availableVoice: Voice | undefined = voices.find((v) => v.value === voiceName);
+    const availableVoice: VoiceOption | undefined = voices.find((v) => v.value === voiceName);

    if (availableVoice) {
      ttsRef.current
@ -181,7 +177,7 @@ function useTextToSpeechEdge({

      generate();
    },
-    [appendNextBuffer, showToast, localize],
+    [setIsSpeaking, appendNextBuffer, showToast, localize],
  );

  const cancelSpeechEdge = useCallback(() => {
@ -202,7 +198,7 @@ function useTextToSpeechEdge({
        status: 'error',
      });
    }
-  }, [showToast, localize]);
+  }, [setIsSpeaking, showToast, localize]);

  useEffect(() => {
    if (!isBrowserSupported) {