mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 17:00:15 +01:00
🔧 fix: Consolidate Text Parsing and TTS Edge Initialization (#6582)
* 🔧 fix: Update useTextToSpeechExternal to include loading state and improve text parsing logic
* fix: update msedge-tts and prevent excessive initialization attempts
* fix: Refactor text parsing logic in mongoMeili model to use parseTextParts function
This commit is contained in:
parent
a6f062e468
commit
b9ebdd4aa5
6 changed files with 57 additions and 29 deletions
|
|
@ -1,6 +1,7 @@
|
||||||
const _ = require('lodash');
|
const _ = require('lodash');
|
||||||
const mongoose = require('mongoose');
|
const mongoose = require('mongoose');
|
||||||
const { MeiliSearch } = require('meilisearch');
|
const { MeiliSearch } = require('meilisearch');
|
||||||
|
const { parseTextParts, ContentTypes } = require('librechat-data-provider');
|
||||||
const { cleanUpPrimaryKeyValue } = require('~/lib/utils/misc');
|
const { cleanUpPrimaryKeyValue } = require('~/lib/utils/misc');
|
||||||
const logger = require('~/config/meiliLogger');
|
const logger = require('~/config/meiliLogger');
|
||||||
|
|
||||||
|
|
@ -238,10 +239,7 @@ const createMeiliMongooseModel = function ({ index, attributesToIndex }) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (object.content && Array.isArray(object.content)) {
|
if (object.content && Array.isArray(object.content)) {
|
||||||
object.text = object.content
|
object.text = parseTextParts(object.content);
|
||||||
.filter((item) => item.type === 'text' && item.text && item.text.value)
|
|
||||||
.map((item) => item.text.value)
|
|
||||||
.join(' ');
|
|
||||||
delete object.content;
|
delete object.content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,7 @@
|
||||||
"lodash": "^4.17.21",
|
"lodash": "^4.17.21",
|
||||||
"lucide-react": "^0.394.0",
|
"lucide-react": "^0.394.0",
|
||||||
"match-sorter": "^6.3.4",
|
"match-sorter": "^6.3.4",
|
||||||
"msedge-tts": "^1.3.4",
|
"msedge-tts": "^2.0.0",
|
||||||
"qrcode.react": "^4.2.0",
|
"qrcode.react": "^4.2.0",
|
||||||
"rc-input-number": "^7.4.2",
|
"rc-input-number": "^7.4.2",
|
||||||
"react": "^18.2.0",
|
"react": "^18.2.0",
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ function useTextToSpeechEdge({
|
||||||
const sourceBufferRef = useRef<SourceBuffer | null>(null);
|
const sourceBufferRef = useRef<SourceBuffer | null>(null);
|
||||||
const pendingBuffers = useRef<Uint8Array[]>([]);
|
const pendingBuffers = useRef<Uint8Array[]>([]);
|
||||||
const { showToast } = useToastContext();
|
const { showToast } = useToastContext();
|
||||||
|
const initAttempts = useRef(0);
|
||||||
|
|
||||||
const isBrowserSupported = useMemo(
|
const isBrowserSupported = useMemo(
|
||||||
() => typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg'),
|
() => typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg'),
|
||||||
|
|
@ -57,14 +58,20 @@ function useTextToSpeechEdge({
|
||||||
|
|
||||||
const initializeTTS = useCallback(() => {
|
const initializeTTS = useCallback(() => {
|
||||||
if (!ttsRef.current) {
|
if (!ttsRef.current) {
|
||||||
ttsRef.current = new MsEdgeTTS();
|
ttsRef.current = new MsEdgeTTS({
|
||||||
|
enableLogger: true,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
const availableVoice: VoiceOption | undefined = voices.find((v) => v.value === voiceName);
|
const availableVoice: VoiceOption | undefined = voices.find((v) => v.value === voiceName);
|
||||||
|
|
||||||
if (availableVoice) {
|
if (availableVoice) {
|
||||||
|
if (initAttempts.current > 3) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
ttsRef.current
|
ttsRef.current
|
||||||
.setMetadata(availableVoice.value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3)
|
.setMetadata(availableVoice.value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3, {})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
|
initAttempts.current += 1;
|
||||||
console.error('Error initializing TTS:', error);
|
console.error('Error initializing TTS:', error);
|
||||||
showToast({
|
showToast({
|
||||||
message: localize('com_nav_tts_init_error', { 0: (error as Error).message }),
|
message: localize('com_nav_tts_init_error', { 0: (error as Error).message }),
|
||||||
|
|
@ -73,8 +80,9 @@ function useTextToSpeechEdge({
|
||||||
});
|
});
|
||||||
} else if (voices.length > 0) {
|
} else if (voices.length > 0) {
|
||||||
ttsRef.current
|
ttsRef.current
|
||||||
.setMetadata(voices[0].value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3)
|
.setMetadata(voices[0].value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3, {})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
|
initAttempts.current += 1;
|
||||||
console.error('Error initializing TTS:', error);
|
console.error('Error initializing TTS:', error);
|
||||||
showToast({
|
showToast({
|
||||||
message: localize('com_nav_tts_init_error', { 0: (error as Error).message }),
|
message: localize('com_nav_tts_init_error', { 0: (error as Error).message }),
|
||||||
|
|
@ -147,7 +155,8 @@ function useTextToSpeechEdge({
|
||||||
setIsSpeaking(true);
|
setIsSpeaking(true);
|
||||||
pendingBuffers.current = [];
|
pendingBuffers.current = [];
|
||||||
|
|
||||||
const readable = ttsRef.current.toStream(text);
|
const result = await ttsRef.current.toStream(text);
|
||||||
|
const readable = result.audioStream;
|
||||||
|
|
||||||
readable.on('data', (chunk: Buffer) => {
|
readable.on('data', (chunk: Buffer) => {
|
||||||
pendingBuffers.current.push(new Uint8Array(chunk));
|
pendingBuffers.current.push(new Uint8Array(chunk));
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,10 @@ function useTextToSpeechExternal({
|
||||||
return playPromise().catch(console.error);
|
return playPromise().catch(console.error);
|
||||||
}
|
}
|
||||||
console.error(error);
|
console.error(error);
|
||||||
showToast({ message: localize('com_nav_audio_play_error', { 0: error.message }), status: 'error' });
|
showToast({
|
||||||
|
message: localize('com_nav_audio_play_error', { 0: error.message }),
|
||||||
|
status: 'error',
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
newAudio.onended = () => {
|
newAudio.onended = () => {
|
||||||
|
|
@ -87,7 +90,7 @@ function useTextToSpeechExternal({
|
||||||
setDownloadFile(false);
|
setDownloadFile(false);
|
||||||
};
|
};
|
||||||
|
|
||||||
const { mutate: processAudio } = useTextToSpeechMutation({
|
const { mutate: processAudio, isLoading } = useTextToSpeechMutation({
|
||||||
onMutate: (variables) => {
|
onMutate: (variables) => {
|
||||||
const inputText = (variables.get('input') ?? '') as string;
|
const inputText = (variables.get('input') ?? '') as string;
|
||||||
if (inputText.length >= 4096) {
|
if (inputText.length >= 4096) {
|
||||||
|
|
@ -182,7 +185,7 @@ function useTextToSpeechExternal({
|
||||||
|
|
||||||
useEffect(() => cancelPromiseSpeech, [cancelPromiseSpeech]);
|
useEffect(() => cancelPromiseSpeech, [cancelPromiseSpeech]);
|
||||||
|
|
||||||
const isLoading = useMemo(
|
const isFetching = useMemo(
|
||||||
() => isLast && globalIsFetching && !globalIsPlaying,
|
() => isLast && globalIsFetching && !globalIsPlaying,
|
||||||
[globalIsFetching, globalIsPlaying, isLast],
|
[globalIsFetching, globalIsPlaying, isLast],
|
||||||
);
|
);
|
||||||
|
|
@ -192,7 +195,7 @@ function useTextToSpeechExternal({
|
||||||
return {
|
return {
|
||||||
generateSpeechExternal,
|
generateSpeechExternal,
|
||||||
cancelSpeech,
|
cancelSpeech,
|
||||||
isLoading,
|
isLoading: isFetching || isLoading,
|
||||||
audioRef,
|
audioRef,
|
||||||
voices: voicesData,
|
voices: voicesData,
|
||||||
};
|
};
|
||||||
|
|
|
||||||
36
package-lock.json
generated
36
package-lock.json
generated
|
|
@ -1667,7 +1667,7 @@
|
||||||
"lodash": "^4.17.21",
|
"lodash": "^4.17.21",
|
||||||
"lucide-react": "^0.394.0",
|
"lucide-react": "^0.394.0",
|
||||||
"match-sorter": "^6.3.4",
|
"match-sorter": "^6.3.4",
|
||||||
"msedge-tts": "^1.3.4",
|
"msedge-tts": "^2.0.0",
|
||||||
"qrcode.react": "^4.2.0",
|
"qrcode.react": "^4.2.0",
|
||||||
"rc-input-number": "^7.4.2",
|
"rc-input-number": "^7.4.2",
|
||||||
"react": "^18.2.0",
|
"react": "^18.2.0",
|
||||||
|
|
@ -3303,6 +3303,25 @@
|
||||||
"node": ">=4"
|
"node": ">=4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"client/node_modules/msedge-tts": {
|
||||||
|
"version": "2.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/msedge-tts/-/msedge-tts-2.0.0.tgz",
|
||||||
|
"integrity": "sha512-9qmAh80/rvEFCWDlfqHvrZzf9zioEqksiwpNKSy8MuBud27D6FNPVTHNDc1c37dX0u6w7iYe++Dg/V0a9fAFSw==",
|
||||||
|
"hasInstallScript": true,
|
||||||
|
"dependencies": {
|
||||||
|
"axios": "^1.5.0",
|
||||||
|
"buffer": "^6.0.3",
|
||||||
|
"crypto-browserify": "^3.12.0",
|
||||||
|
"isomorphic-ws": "^5.0.0",
|
||||||
|
"process": "^0.11.10",
|
||||||
|
"randombytes": "^2.1.0",
|
||||||
|
"stream-browserify": "^3.0.0",
|
||||||
|
"ws": "^8.14.1"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=16.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"client/node_modules/node-releases": {
|
"client/node_modules/node-releases": {
|
||||||
"version": "2.0.19",
|
"version": "2.0.19",
|
||||||
"resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz",
|
"resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz",
|
||||||
|
|
@ -34288,21 +34307,6 @@
|
||||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
||||||
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
|
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
|
||||||
},
|
},
|
||||||
"node_modules/msedge-tts": {
|
|
||||||
"version": "1.3.4",
|
|
||||||
"resolved": "https://registry.npmjs.org/msedge-tts/-/msedge-tts-1.3.4.tgz",
|
|
||||||
"integrity": "sha512-0dj86Gg9VzdOJZVCkSSK/O5Eg0NM9W5p8LsXAEPe7qUmsvdAugPUTcPwt9tyz4GThAzAFBBu554kevH8StLEHQ==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"axios": "^1.5.0",
|
|
||||||
"buffer": "^6.0.3",
|
|
||||||
"crypto-browserify": "^3.12.0",
|
|
||||||
"isomorphic-ws": "^5.0.0",
|
|
||||||
"process": "^0.11.10",
|
|
||||||
"stream-browserify": "^3.0.0",
|
|
||||||
"ws": "^8.14.1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/multer": {
|
"node_modules/multer": {
|
||||||
"version": "1.4.5-lts.1",
|
"version": "1.4.5-lts.1",
|
||||||
"resolved": "https://registry.npmjs.org/multer/-/multer-1.4.5-lts.1.tgz",
|
"resolved": "https://registry.npmjs.org/multer/-/multer-1.4.5-lts.1.tgz",
|
||||||
|
|
|
||||||
|
|
@ -375,9 +375,23 @@ export function parseTextParts(contentParts: a.TMessageContentParts[]): string {
|
||||||
let result = '';
|
let result = '';
|
||||||
|
|
||||||
for (const part of contentParts) {
|
for (const part of contentParts) {
|
||||||
|
if (!part.type) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (part.type === ContentTypes.TEXT) {
|
if (part.type === ContentTypes.TEXT) {
|
||||||
const textValue = typeof part.text === 'string' ? part.text : part.text.value;
|
const textValue = typeof part.text === 'string' ? part.text : part.text.value;
|
||||||
|
|
||||||
|
if (
|
||||||
|
result.length > 0 &&
|
||||||
|
textValue.length > 0 &&
|
||||||
|
result[result.length - 1] !== ' ' &&
|
||||||
|
textValue[0] !== ' '
|
||||||
|
) {
|
||||||
|
result += ' ';
|
||||||
|
}
|
||||||
|
result += textValue;
|
||||||
|
} else if (part.type === ContentTypes.THINK) {
|
||||||
|
const textValue = typeof part.think === 'string' ? part.think : '';
|
||||||
if (
|
if (
|
||||||
result.length > 0 &&
|
result.length > 0 &&
|
||||||
textValue.length > 0 &&
|
textValue.length > 0 &&
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue