⏯️ fix(tts): Resolve Voice Selection and Manual Playback Issues (#2845)

* fix: voice setting for autoplayback TTS * fix(useTextToSpeechExternal): resolve stateful playback issues and consolidate state logic * refactor: initialize tts voice and provider schema once per request * fix(tts): edge case, longer text inputs. TODO: use continuous stream for longer text inputs * fix(tts): pause global audio on conversation change * refactor: keyvMongo ban cache to allow db updates for unbanning, to prevent server restart * chore: eslint fix * refactor: make ban cache exclusively keyvMongo
2025-12-18 01:10:14 +01:00 · 2024-05-23 16:27:36 -04:00 · 2024-05-23 16:27:36 -04:00 · 514a502b9c
commit 514a502b9c
parent 8e66683577
10 changed files with 332 additions and 178 deletions
--- a/client/src/components/Chat/Input/StreamAudio.tsx
+++ b/client/src/components/Chat/Input/StreamAudio.tsx
@ -1,10 +1,10 @@
 import { useParams } from 'react-router-dom';
+import { useEffect, useCallback } from 'react';
 import { QueryKeys } from 'librechat-data-provider';
 import { useQueryClient } from '@tanstack/react-query';
-import { useEffect, useCallback } from 'react';
 import { useRecoilState, useRecoilValue, useSetRecoilState } from 'recoil';
 import type { TMessage } from 'librechat-data-provider';
-import { useCustomAudioRef, MediaSourceAppender } from '~/hooks/Audio';
+import { useCustomAudioRef, MediaSourceAppender, usePauseGlobalAudio } from '~/hooks/Audio';
 import { useAuthContext } from '~/hooks';
 import { globalAudioId } from '~/common';
 import store from '~/store';
@ -24,6 +24,7 @@ export default function StreamAudio({ index = 0 }) {
  const cacheTTS = useRecoilValue(store.cacheTTS);
  const playbackRate = useRecoilValue(store.playbackRate);

+  const voice = useRecoilValue(store.voice);
  const activeRunId = useRecoilValue(store.activeRunFamily(index));
  const automaticPlayback = useRecoilValue(store.automaticPlayback);
  const isSubmitting = useRecoilValue(store.isSubmittingFamily(index));
@ -34,6 +35,7 @@ export default function StreamAudio({ index = 0 }) {
  const [globalAudioURL, setGlobalAudioURL] = useRecoilState(store.globalAudioURLFamily(index));

  const { audioRef } = useCustomAudioRef({ setIsPlaying });
+  const { pauseGlobalAudio } = usePauseGlobalAudio();

  const { conversationId: paramId } = useParams();
  const queryParam = paramId === 'new' ? paramId : latestMessage?.conversationId ?? paramId ?? '';
@ -90,7 +92,7 @@ export default function StreamAudio({ index = 0 }) {
        const response = await fetch('/api/files/tts', {
          method: 'POST',
          headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${token}` },
-          body: JSON.stringify({ messageId: latestMessage?.messageId, runId: activeRunId }),
+          body: JSON.stringify({ messageId: latestMessage?.messageId, runId: activeRunId, voice }),
        });

        if (!response.ok) {
@ -166,6 +168,7 @@ export default function StreamAudio({ index = 0 }) {
    audioRunId,
    cacheTTS,
    audioRef,
+    voice,
    token,
  ]);

@ -180,6 +183,12 @@ export default function StreamAudio({ index = 0 }) {
    }
  }, [audioRef, globalAudioURL, playbackRate]);

+  useEffect(() => {
+    pauseGlobalAudio();
+    // We only want the effect to run when the paramId changes
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [paramId]);
+
  return (
    <audio
      ref={audioRef}