⏯️ fix(tts): Resolve Voice Selection and Manual Playback Issues (#2845)

* fix: voice setting for autoplayback TTS * fix(useTextToSpeechExternal): resolve stateful playback issues and consolidate state logic * refactor: initialize tts voice and provider schema once per request * fix(tts): edge case, longer text inputs. TODO: use continuous stream for longer text inputs * fix(tts): pause global audio on conversation change * refactor: keyvMongo ban cache to allow db updates for unbanning, to prevent server restart * chore: eslint fix * refactor: make ban cache exclusively keyvMongo
2026-03-16 20:56:35 +01:00 · 2024-05-23 16:27:36 -04:00 · 2024-05-23 16:27:36 -04:00 · 514a502b9c
commit 514a502b9c
parent 8e66683577
10 changed files with 332 additions and 178 deletions
--- a/api/server/services/Files/Audio/streamAudio.js
+++ b/api/server/services/Files/Audio/streamAudio.js
@ -90,7 +90,7 @@ function findLastSeparatorIndex(text, separators = SEPARATORS) {
 }

 const MAX_NOT_FOUND_COUNT = 6;
-const MAX_NO_CHANGE_COUNT = 12;
+const MAX_NO_CHANGE_COUNT = 10;

 /**
 * @param {string} messageId
@ -152,6 +152,64 @@ function createChunkProcessor(messageId) {
  return processChunks;
 }

+/**
+ * @param {string} text
+ * @param {number} [chunkSize=4000]
+ * @returns {{ text: string, isFinished: boolean }[]}
+ */
+function splitTextIntoChunks(text, chunkSize = 4000) {
+  if (!text) {
+    throw new Error('Text is required');
+  }
+
+  const chunks = [];
+  let startIndex = 0;
+  const textLength = text.length;
+
+  while (startIndex < textLength) {
+    let endIndex = Math.min(startIndex + chunkSize, textLength);
+    let chunkText = text.slice(startIndex, endIndex);
+
+    if (endIndex < textLength) {
+      let lastSeparatorIndex = -1;
+      for (const separator of SEPARATORS) {
+        const index = chunkText.lastIndexOf(separator);
+        if (index !== -1) {
+          lastSeparatorIndex = Math.max(lastSeparatorIndex, index);
+        }
+      }
+
+      if (lastSeparatorIndex !== -1) {
+        endIndex = startIndex + lastSeparatorIndex + 1;
+        chunkText = text.slice(startIndex, endIndex);
+      } else {
+        const nextSeparatorIndex = text.slice(endIndex).search(/\S/);
+        if (nextSeparatorIndex !== -1) {
+          endIndex += nextSeparatorIndex;
+          chunkText = text.slice(startIndex, endIndex);
+        }
+      }
+    }
+
+    chunkText = chunkText.trim();
+    if (chunkText) {
+      chunks.push({
+        text: chunkText,
+        isFinished: endIndex >= textLength,
+      });
+    } else if (chunks.length > 0) {
+      chunks[chunks.length - 1].isFinished = true;
+    }
+
+    startIndex = endIndex;
+    while (startIndex < textLength && text[startIndex].trim() === '') {
+      startIndex++;
+    }
+  }
+
+  return chunks;
+}
+
 /**
 * Input stream text to speech
 * @param {Express.Response} res
@ -307,6 +365,7 @@ module.exports = {
  inputStreamTextToSpeech,
  findLastSeparatorIndex,
  createChunkProcessor,
+  splitTextIntoChunks,
  llmMessageSource,
  getRandomVoiceId,
 };