🔀 refactor: Modularize TTS Logic for Improved Browser support (#3657)

* WIP: message audio refactor * WIP: use MessageAudio by provider * fix: Update MessageAudio component to use TTSEndpoints enum * feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging * feat: Add voice dropdown components for different TTS engines * docs: update incorrect `voices` example changed `voice: ''` to `voices: ['alloy']` * feat: Add brwoser support check for Edge TTS engine component with error toast if not supported --------- Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
2025-12-20 10:20:15 +01:00 · 2024-08-15 11:34:25 -04:00 · 2024-08-15 11:34:25 -04:00 · dba704079c
commit dba704079c
parent bcde0beb47
18 changed files with 784 additions and 187 deletions
--- a/client/src/hooks/Input/useTextToSpeechBrowser.ts
+++ b/client/src/hooks/Input/useTextToSpeechBrowser.ts
@ -1,43 +1,54 @@
 import { useRecoilState } from 'recoil';
 import { useState, useEffect, useCallback } from 'react';
+import type { VoiceOption } from '~/common';
 import store from '~/store';

-interface VoiceOption {
-  value: string;
-  label: string;
-}
-
 function useTextToSpeechBrowser({
  setIsSpeaking,
 }: {
-  setIsSpeaking: (isSpeaking: boolean) => void;
+  setIsSpeaking: React.Dispatch<React.SetStateAction<boolean>>;
 }) {
  const [cloudBrowserVoices] = useRecoilState(store.cloudBrowserVoices);
  const [voiceName] = useRecoilState(store.voice);
  const [voices, setVoices] = useState<VoiceOption[]>([]);

  const updateVoices = useCallback(() => {
-    const availableVoices = window.speechSynthesis
-      .getVoices()
-      .filter((v) => cloudBrowserVoices || v.localService === true);
+    try {
+      const availableVoices = window.speechSynthesis.getVoices();
+      if (!Array.isArray(availableVoices)) {
+        console.error('getVoices() did not return an array');
+        return;
+      }

-    const voiceOptions: VoiceOption[] = availableVoices.map((v) => ({
-      value: v.name,
-      label: v.name,
-    }));
+      const filteredVoices = availableVoices.filter(
+        (v) => cloudBrowserVoices || v.localService === true,
+      );
+      const voiceOptions: VoiceOption[] = filteredVoices.map((v) => ({
+        value: v.name,
+        label: v.name,
+      }));

-    setVoices(voiceOptions);
+      setVoices(voiceOptions);
+    } catch (error) {
+      console.error('Error updating voices:', error);
+    }
  }, [cloudBrowserVoices]);

  useEffect(() => {
-    if (window.speechSynthesis.getVoices().length) {
-      updateVoices();
-    } else {
-      window.speechSynthesis.onvoiceschanged = updateVoices;
+    const synth = window.speechSynthesis;
+
+    try {
+      if (synth.getVoices().length) {
+        updateVoices();
+      } else {
+        synth.onvoiceschanged = updateVoices;
+      }
+    } catch (error) {
+      console.error('Error in useEffect:', error);
    }

    return () => {
-      window.speechSynthesis.onvoiceschanged = null;
+      synth.onvoiceschanged = null;
    };
  }, [updateVoices]);

@ -46,22 +57,37 @@ function useTextToSpeechBrowser({
    const voice = voices.find((v) => v.value === voiceName);

    if (!voice) {
+      console.warn('Selected voice not found');
      return;
    }

-    synth.cancel();
-    const utterance = new SpeechSynthesisUtterance(text);
-    utterance.voice = synth.getVoices().find((v) => v.name === voice.value) || null;
-    utterance.onend = () => {
+    try {
+      synth.cancel();
+      const utterance = new SpeechSynthesisUtterance(text);
+      utterance.voice = synth.getVoices().find((v) => v.name === voice.value) || null;
+      utterance.onend = () => {
+        setIsSpeaking(false);
+      };
+      utterance.onerror = (event) => {
+        console.error('Speech synthesis error:', event);
+        setIsSpeaking(false);
+      };
+      setIsSpeaking(true);
+      synth.speak(utterance);
+    } catch (error) {
+      console.error('Error generating speech:', error);
      setIsSpeaking(false);
-    };
-    setIsSpeaking(true);
-    synth.speak(utterance);
+    }
  };

  const cancelSpeechLocal = () => {
-    window.speechSynthesis.cancel();
-    setIsSpeaking(false);
+    try {
+      window.speechSynthesis.cancel();
+    } catch (error) {
+      console.error('Error cancelling speech:', error);
+    } finally {
+      setIsSpeaking(false);
+    }
  };

  return { generateSpeechLocal, cancelSpeechLocal, voices };