🔧 fix: Consolidate Text Parsing and TTS Edge Initialization (#6582)

* 🔧 fix: Update useTextToSpeechExternal to include loading state and improve text parsing logic * fix: update msedge-tts and prevent excessive initialization attempts * fix: Refactor text parsing logic in mongoMeili model to use parseTextParts function
2026-02-06 01:31:49 +01:00 · 2025-03-27 17:09:46 -04:00 · 2025-03-27 17:09:46 -04:00 · b9ebdd4aa5
commit b9ebdd4aa5
parent a6f062e468
6 changed files with 57 additions and 29 deletions
--- a/client/src/hooks/Input/useTextToSpeechEdge.ts
+++ b/client/src/hooks/Input/useTextToSpeechEdge.ts
@ -26,6 +26,7 @@ function useTextToSpeechEdge({
  const sourceBufferRef = useRef<SourceBuffer | null>(null);
  const pendingBuffers = useRef<Uint8Array[]>([]);
  const { showToast } = useToastContext();
+  const initAttempts = useRef(0);

  const isBrowserSupported = useMemo(
    () => typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg'),
@ -57,14 +58,20 @@ function useTextToSpeechEdge({

  const initializeTTS = useCallback(() => {
    if (!ttsRef.current) {
-      ttsRef.current = new MsEdgeTTS();
+      ttsRef.current = new MsEdgeTTS({
+        enableLogger: true,
+      });
    }
    const availableVoice: VoiceOption | undefined = voices.find((v) => v.value === voiceName);

    if (availableVoice) {
+      if (initAttempts.current > 3) {
+        return;
+      }
      ttsRef.current
-        .setMetadata(availableVoice.value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3)
+        .setMetadata(availableVoice.value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3, {})
        .catch((error) => {
+          initAttempts.current += 1;
          console.error('Error initializing TTS:', error);
          showToast({
            message: localize('com_nav_tts_init_error', { 0: (error as Error).message }),
@ -73,8 +80,9 @@ function useTextToSpeechEdge({
        });
    } else if (voices.length > 0) {
      ttsRef.current
-        .setMetadata(voices[0].value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3)
+        .setMetadata(voices[0].value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3, {})
        .catch((error) => {
+          initAttempts.current += 1;
          console.error('Error initializing TTS:', error);
          showToast({
            message: localize('com_nav_tts_init_error', { 0: (error as Error).message }),
@ -147,7 +155,8 @@ function useTextToSpeechEdge({
          setIsSpeaking(true);
          pendingBuffers.current = [];

-          const readable = ttsRef.current.toStream(text);
+          const result = await ttsRef.current.toStream(text);
+          const readable = result.audioStream;

          readable.on('data', (chunk: Buffer) => {
            pendingBuffers.current.push(new Uint8Array(chunk));
--- a/client/src/hooks/Input/useTextToSpeechExternal.ts
+++ b/client/src/hooks/Input/useTextToSpeechExternal.ts
@ -67,7 +67,10 @@ function useTextToSpeechExternal({
        return playPromise().catch(console.error);
      }
      console.error(error);
-      showToast({ message: localize('com_nav_audio_play_error', { 0: error.message }), status: 'error' });
+      showToast({
+        message: localize('com_nav_audio_play_error', { 0: error.message }),
+        status: 'error',
+      });
    });

    newAudio.onended = () => {
@ -87,7 +90,7 @@ function useTextToSpeechExternal({
    setDownloadFile(false);
  };

-  const { mutate: processAudio } = useTextToSpeechMutation({
+  const { mutate: processAudio, isLoading } = useTextToSpeechMutation({
    onMutate: (variables) => {
      const inputText = (variables.get('input') ?? '') as string;
      if (inputText.length >= 4096) {
@ -182,7 +185,7 @@ function useTextToSpeechExternal({

  useEffect(() => cancelPromiseSpeech, [cancelPromiseSpeech]);

-  const isLoading = useMemo(
+  const isFetching = useMemo(
    () => isLast && globalIsFetching && !globalIsPlaying,
    [globalIsFetching, globalIsPlaying, isLast],
  );
@ -192,7 +195,7 @@ function useTextToSpeechExternal({
  return {
    generateSpeechExternal,
    cancelSpeech,
-    isLoading,
+    isLoading: isFetching || isLoading,
    audioRef,
    voices: voicesData,
  };