From b9ebdd4aa5b8d84b711653d26752c442f1633316 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Thu, 27 Mar 2025 17:09:46 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20fix:=20Consolidate=20Text=20Pars?= =?UTF-8?q?ing=20and=20TTS=20Edge=20Initialization=20(#6582)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🔧 fix: Update useTextToSpeechExternal to include loading state and improve text parsing logic * fix: update msedge-tts and prevent excessive initialization attempts * fix: Refactor text parsing logic in mongoMeili model to use parseTextParts function --- api/models/plugins/mongoMeili.js | 6 ++-- client/package.json | 2 +- client/src/hooks/Input/useTextToSpeechEdge.ts | 17 ++++++--- .../hooks/Input/useTextToSpeechExternal.ts | 11 +++--- package-lock.json | 36 ++++++++++--------- packages/data-provider/src/parsers.ts | 14 ++++++++ 6 files changed, 57 insertions(+), 29 deletions(-) diff --git a/api/models/plugins/mongoMeili.js b/api/models/plugins/mongoMeili.js index 6577370b1e..75e3738e5d 100644 --- a/api/models/plugins/mongoMeili.js +++ b/api/models/plugins/mongoMeili.js @@ -1,6 +1,7 @@ const _ = require('lodash'); const mongoose = require('mongoose'); const { MeiliSearch } = require('meilisearch'); +const { parseTextParts, ContentTypes } = require('librechat-data-provider'); const { cleanUpPrimaryKeyValue } = require('~/lib/utils/misc'); const logger = require('~/config/meiliLogger'); @@ -238,10 +239,7 @@ const createMeiliMongooseModel = function ({ index, attributesToIndex }) { } if (object.content && Array.isArray(object.content)) { - object.text = object.content - .filter((item) => item.type === 'text' && item.text && item.text.value) - .map((item) => item.text.value) - .join(' '); + object.text = parseTextParts(object.content); delete object.content; } diff --git a/client/package.json b/client/package.json index 5da707a2a1..c8b6c5192d 100644 --- a/client/package.json +++ b/client/package.json @@ -73,7 +73,7 @@ "lodash": "^4.17.21", "lucide-react": "^0.394.0", "match-sorter": "^6.3.4", - "msedge-tts": "^1.3.4", + "msedge-tts": "^2.0.0", "qrcode.react": "^4.2.0", "rc-input-number": "^7.4.2", "react": "^18.2.0", diff --git a/client/src/hooks/Input/useTextToSpeechEdge.ts b/client/src/hooks/Input/useTextToSpeechEdge.ts index 18ea00827f..65bfadd30d 100644 --- a/client/src/hooks/Input/useTextToSpeechEdge.ts +++ b/client/src/hooks/Input/useTextToSpeechEdge.ts @@ -26,6 +26,7 @@ function useTextToSpeechEdge({ const sourceBufferRef = useRef(null); const pendingBuffers = useRef([]); const { showToast } = useToastContext(); + const initAttempts = useRef(0); const isBrowserSupported = useMemo( () => typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg'), @@ -57,14 +58,20 @@ function useTextToSpeechEdge({ const initializeTTS = useCallback(() => { if (!ttsRef.current) { - ttsRef.current = new MsEdgeTTS(); + ttsRef.current = new MsEdgeTTS({ + enableLogger: true, + }); } const availableVoice: VoiceOption | undefined = voices.find((v) => v.value === voiceName); if (availableVoice) { + if (initAttempts.current > 3) { + return; + } ttsRef.current - .setMetadata(availableVoice.value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3) + .setMetadata(availableVoice.value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3, {}) .catch((error) => { + initAttempts.current += 1; console.error('Error initializing TTS:', error); showToast({ message: localize('com_nav_tts_init_error', { 0: (error as Error).message }), @@ -73,8 +80,9 @@ function useTextToSpeechEdge({ }); } else if (voices.length > 0) { ttsRef.current - .setMetadata(voices[0].value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3) + .setMetadata(voices[0].value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3, {}) .catch((error) => { + initAttempts.current += 1; console.error('Error initializing TTS:', error); showToast({ message: localize('com_nav_tts_init_error', { 0: (error as Error).message }), @@ -147,7 +155,8 @@ function useTextToSpeechEdge({ setIsSpeaking(true); pendingBuffers.current = []; - const readable = ttsRef.current.toStream(text); + const result = await ttsRef.current.toStream(text); + const readable = result.audioStream; readable.on('data', (chunk: Buffer) => { pendingBuffers.current.push(new Uint8Array(chunk)); diff --git a/client/src/hooks/Input/useTextToSpeechExternal.ts b/client/src/hooks/Input/useTextToSpeechExternal.ts index 4076d913fb..cf8edb0382 100644 --- a/client/src/hooks/Input/useTextToSpeechExternal.ts +++ b/client/src/hooks/Input/useTextToSpeechExternal.ts @@ -67,7 +67,10 @@ function useTextToSpeechExternal({ return playPromise().catch(console.error); } console.error(error); - showToast({ message: localize('com_nav_audio_play_error', { 0: error.message }), status: 'error' }); + showToast({ + message: localize('com_nav_audio_play_error', { 0: error.message }), + status: 'error', + }); }); newAudio.onended = () => { @@ -87,7 +90,7 @@ function useTextToSpeechExternal({ setDownloadFile(false); }; - const { mutate: processAudio } = useTextToSpeechMutation({ + const { mutate: processAudio, isLoading } = useTextToSpeechMutation({ onMutate: (variables) => { const inputText = (variables.get('input') ?? '') as string; if (inputText.length >= 4096) { @@ -182,7 +185,7 @@ function useTextToSpeechExternal({ useEffect(() => cancelPromiseSpeech, [cancelPromiseSpeech]); - const isLoading = useMemo( + const isFetching = useMemo( () => isLast && globalIsFetching && !globalIsPlaying, [globalIsFetching, globalIsPlaying, isLast], ); @@ -192,7 +195,7 @@ function useTextToSpeechExternal({ return { generateSpeechExternal, cancelSpeech, - isLoading, + isLoading: isFetching || isLoading, audioRef, voices: voicesData, }; diff --git a/package-lock.json b/package-lock.json index cb5a3027bd..91948f252d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1667,7 +1667,7 @@ "lodash": "^4.17.21", "lucide-react": "^0.394.0", "match-sorter": "^6.3.4", - "msedge-tts": "^1.3.4", + "msedge-tts": "^2.0.0", "qrcode.react": "^4.2.0", "rc-input-number": "^7.4.2", "react": "^18.2.0", @@ -3303,6 +3303,25 @@ "node": ">=4" } }, + "client/node_modules/msedge-tts": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/msedge-tts/-/msedge-tts-2.0.0.tgz", + "integrity": "sha512-9qmAh80/rvEFCWDlfqHvrZzf9zioEqksiwpNKSy8MuBud27D6FNPVTHNDc1c37dX0u6w7iYe++Dg/V0a9fAFSw==", + "hasInstallScript": true, + "dependencies": { + "axios": "^1.5.0", + "buffer": "^6.0.3", + "crypto-browserify": "^3.12.0", + "isomorphic-ws": "^5.0.0", + "process": "^0.11.10", + "randombytes": "^2.1.0", + "stream-browserify": "^3.0.0", + "ws": "^8.14.1" + }, + "engines": { + "node": ">=16.0.0" + } + }, "client/node_modules/node-releases": { "version": "2.0.19", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz", @@ -34288,21 +34307,6 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" }, - "node_modules/msedge-tts": { - "version": "1.3.4", - "resolved": "https://registry.npmjs.org/msedge-tts/-/msedge-tts-1.3.4.tgz", - "integrity": "sha512-0dj86Gg9VzdOJZVCkSSK/O5Eg0NM9W5p8LsXAEPe7qUmsvdAugPUTcPwt9tyz4GThAzAFBBu554kevH8StLEHQ==", - "license": "MIT", - "dependencies": { - "axios": "^1.5.0", - "buffer": "^6.0.3", - "crypto-browserify": "^3.12.0", - "isomorphic-ws": "^5.0.0", - "process": "^0.11.10", - "stream-browserify": "^3.0.0", - "ws": "^8.14.1" - } - }, "node_modules/multer": { "version": "1.4.5-lts.1", "resolved": "https://registry.npmjs.org/multer/-/multer-1.4.5-lts.1.tgz", diff --git a/packages/data-provider/src/parsers.ts b/packages/data-provider/src/parsers.ts index 10a23a542b..21040a70d5 100644 --- a/packages/data-provider/src/parsers.ts +++ b/packages/data-provider/src/parsers.ts @@ -375,9 +375,23 @@ export function parseTextParts(contentParts: a.TMessageContentParts[]): string { let result = ''; for (const part of contentParts) { + if (!part.type) { + continue; + } if (part.type === ContentTypes.TEXT) { const textValue = typeof part.text === 'string' ? part.text : part.text.value; + if ( + result.length > 0 && + textValue.length > 0 && + result[result.length - 1] !== ' ' && + textValue[0] !== ' ' + ) { + result += ' '; + } + result += textValue; + } else if (part.type === ContentTypes.THINK) { + const textValue = typeof part.think === 'string' ? part.think : ''; if ( result.length > 0 && textValue.length > 0 &&