mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 08:50:15 +01:00
🗣️ fix(tts): Add Text Parser for Message Content Parts (#2840)
* fix: manual TTS trigger for message content parts * ci(streamAudio): processChunks test
This commit is contained in:
parent
dc1778b11f
commit
8e66683577
4 changed files with 52 additions and 27 deletions
|
|
@ -13,7 +13,7 @@ describe('processChunks', () => {
|
||||||
let processChunks;
|
let processChunks;
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
processChunks = createChunkProcessor();
|
processChunks = createChunkProcessor('message-id');
|
||||||
Message.findOne.mockClear();
|
Message.findOne.mockClear();
|
||||||
Message.findOne().lean.mockClear();
|
Message.findOne().lean.mockClear();
|
||||||
});
|
});
|
||||||
|
|
@ -21,20 +21,17 @@ describe('processChunks', () => {
|
||||||
it('should return an empty array when the message is not found', async () => {
|
it('should return an empty array when the message is not found', async () => {
|
||||||
Message.findOne().lean.mockResolvedValueOnce(null);
|
Message.findOne().lean.mockResolvedValueOnce(null);
|
||||||
|
|
||||||
const result = await processChunks('non-existent-id');
|
const result = await processChunks();
|
||||||
|
|
||||||
expect(result).toEqual([]);
|
expect(result).toEqual([]);
|
||||||
expect(Message.findOne).toHaveBeenCalledWith(
|
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
|
||||||
{ messageId: 'non-existent-id' },
|
|
||||||
'text unfinished',
|
|
||||||
);
|
|
||||||
expect(Message.findOne().lean).toHaveBeenCalled();
|
expect(Message.findOne().lean).toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should return an empty array when the message does not have a text property', async () => {
|
it('should return an empty array when the message does not have a text property', async () => {
|
||||||
Message.findOne().lean.mockResolvedValueOnce({ unfinished: true });
|
Message.findOne().lean.mockResolvedValueOnce({ unfinished: true });
|
||||||
|
|
||||||
const result = await processChunks('message-id');
|
const result = await processChunks();
|
||||||
|
|
||||||
expect(result).toEqual([]);
|
expect(result).toEqual([]);
|
||||||
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
|
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
|
||||||
|
|
@ -45,7 +42,7 @@ describe('processChunks', () => {
|
||||||
const messageText = 'This is a long message. It should be split into chunks. Lol hi mom';
|
const messageText = 'This is a long message. It should be split into chunks. Lol hi mom';
|
||||||
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true });
|
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true });
|
||||||
|
|
||||||
const result = await processChunks('message-id');
|
const result = await processChunks();
|
||||||
|
|
||||||
expect(result).toEqual([
|
expect(result).toEqual([
|
||||||
{ text: 'This is a long message. It should be split into chunks.', isFinished: false },
|
{ text: 'This is a long message. It should be split into chunks.', isFinished: false },
|
||||||
|
|
@ -58,7 +55,7 @@ describe('processChunks', () => {
|
||||||
const messageText = 'This is a long message without separators hello there my friend';
|
const messageText = 'This is a long message without separators hello there my friend';
|
||||||
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true });
|
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true });
|
||||||
|
|
||||||
const result = await processChunks('message-id');
|
const result = await processChunks();
|
||||||
|
|
||||||
expect(result).toEqual([{ text: messageText, isFinished: false }]);
|
expect(result).toEqual([{ text: messageText, isFinished: false }]);
|
||||||
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
|
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
|
||||||
|
|
@ -69,7 +66,7 @@ describe('processChunks', () => {
|
||||||
const messageText = 'This is a finished message.';
|
const messageText = 'This is a finished message.';
|
||||||
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
|
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
|
||||||
|
|
||||||
const result = await processChunks('message-id');
|
const result = await processChunks();
|
||||||
|
|
||||||
expect(result).toEqual([{ text: messageText, isFinished: true }]);
|
expect(result).toEqual([{ text: messageText, isFinished: true }]);
|
||||||
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
|
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
|
||||||
|
|
@ -80,9 +77,9 @@ describe('processChunks', () => {
|
||||||
const messageText = 'This is a finished message.';
|
const messageText = 'This is a finished message.';
|
||||||
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
|
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
|
||||||
|
|
||||||
await processChunks('message-id');
|
await processChunks();
|
||||||
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
|
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
|
||||||
const result = await processChunks('message-id');
|
const result = await processChunks();
|
||||||
|
|
||||||
expect(result).toEqual([]);
|
expect(result).toEqual([]);
|
||||||
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
|
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
|
||||||
|
|
@ -50,7 +50,7 @@ export default function HoverButtons({
|
||||||
const [TextToSpeech] = useRecoilState<boolean>(store.TextToSpeech);
|
const [TextToSpeech] = useRecoilState<boolean>(store.TextToSpeech);
|
||||||
|
|
||||||
const { handleMouseDown, handleMouseUp, toggleSpeech, isSpeaking, isLoading } = useTextToSpeech(
|
const { handleMouseDown, handleMouseUp, toggleSpeech, isSpeaking, isLoading } = useTextToSpeech(
|
||||||
message?.text ?? '',
|
message?.content ?? message?.text ?? '',
|
||||||
isLast,
|
isLast,
|
||||||
index,
|
index,
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,13 @@
|
||||||
import { useRef } from 'react';
|
import { useRef } from 'react';
|
||||||
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
|
import { parseTextParts } from 'librechat-data-provider';
|
||||||
|
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||||
import useTextToSpeechExternal from './useTextToSpeechExternal';
|
import useTextToSpeechExternal from './useTextToSpeechExternal';
|
||||||
|
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
|
||||||
import { usePauseGlobalAudio } from '../Audio';
|
import { usePauseGlobalAudio } from '../Audio';
|
||||||
import { useRecoilState } from 'recoil';
|
import { useRecoilState } from 'recoil';
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
|
||||||
const useTextToSpeech = (message: string, isLast: boolean, index = 0) => {
|
const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boolean, index = 0) => {
|
||||||
const [endpointTTS] = useRecoilState<string>(store.endpointTTS);
|
const [endpointTTS] = useRecoilState<string>(store.endpointTTS);
|
||||||
const useExternalTextToSpeech = endpointTTS === 'external';
|
const useExternalTextToSpeech = endpointTTS === 'external';
|
||||||
|
|
||||||
|
|
@ -34,7 +36,8 @@ const useTextToSpeech = (message: string, isLast: boolean, index = 0) => {
|
||||||
isMouseDownRef.current = true;
|
isMouseDownRef.current = true;
|
||||||
timerRef.current = window.setTimeout(() => {
|
timerRef.current = window.setTimeout(() => {
|
||||||
if (isMouseDownRef.current) {
|
if (isMouseDownRef.current) {
|
||||||
generateSpeech(message, true);
|
const parsedMessage = typeof message === 'string' ? message : parseTextParts(message);
|
||||||
|
generateSpeech(parsedMessage, true);
|
||||||
}
|
}
|
||||||
}, 1000);
|
}, 1000);
|
||||||
};
|
};
|
||||||
|
|
@ -51,7 +54,8 @@ const useTextToSpeech = (message: string, isLast: boolean, index = 0) => {
|
||||||
cancelSpeech();
|
cancelSpeech();
|
||||||
pauseGlobalAudio();
|
pauseGlobalAudio();
|
||||||
} else {
|
} else {
|
||||||
generateSpeech(message, false);
|
const parsedMessage = typeof message === 'string' ? message : parseTextParts(message);
|
||||||
|
generateSpeech(parsedMessage, false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
import type { ZodIssue } from 'zod';
|
import type { ZodIssue } from 'zod';
|
||||||
import type { TConversation, TPreset } from './schemas';
|
import type * as a from './types/assistants';
|
||||||
import type { TConfig, TEndpointOption, TEndpointsConfig } from './types';
|
import type * as s from './schemas';
|
||||||
|
import type * as t from './types';
|
||||||
|
import { ContentTypes } from './types/assistants';
|
||||||
import {
|
import {
|
||||||
EModelEndpoint,
|
EModelEndpoint,
|
||||||
openAISchema,
|
openAISchema,
|
||||||
|
|
@ -71,7 +73,7 @@ export function getEnabledEndpoints() {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Orders an existing EndpointsConfig object based on enabled endpoint/custom ordering */
|
/** Orders an existing EndpointsConfig object based on enabled endpoint/custom ordering */
|
||||||
export function orderEndpointsConfig(endpointsConfig: TEndpointsConfig) {
|
export function orderEndpointsConfig(endpointsConfig: t.TEndpointsConfig) {
|
||||||
if (!endpointsConfig) {
|
if (!endpointsConfig) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
@ -79,7 +81,7 @@ export function orderEndpointsConfig(endpointsConfig: TEndpointsConfig) {
|
||||||
const endpointKeys = Object.keys(endpointsConfig);
|
const endpointKeys = Object.keys(endpointsConfig);
|
||||||
const defaultCustomIndex = enabledEndpoints.indexOf(EModelEndpoint.custom);
|
const defaultCustomIndex = enabledEndpoints.indexOf(EModelEndpoint.custom);
|
||||||
return endpointKeys.reduce(
|
return endpointKeys.reduce(
|
||||||
(accumulatedConfig: Record<string, TConfig | null | undefined>, currentEndpointKey) => {
|
(accumulatedConfig: Record<string, t.TConfig | null | undefined>, currentEndpointKey) => {
|
||||||
const isCustom = !(currentEndpointKey in EModelEndpoint);
|
const isCustom = !(currentEndpointKey in EModelEndpoint);
|
||||||
const isEnabled = enabledEndpoints.includes(currentEndpointKey);
|
const isEnabled = enabledEndpoints.includes(currentEndpointKey);
|
||||||
if (!isEnabled && !isCustom) {
|
if (!isEnabled && !isCustom) {
|
||||||
|
|
@ -91,7 +93,7 @@ export function orderEndpointsConfig(endpointsConfig: TEndpointsConfig) {
|
||||||
if (isCustom) {
|
if (isCustom) {
|
||||||
accumulatedConfig[currentEndpointKey] = {
|
accumulatedConfig[currentEndpointKey] = {
|
||||||
order: defaultCustomIndex >= 0 ? defaultCustomIndex : 9999,
|
order: defaultCustomIndex >= 0 ? defaultCustomIndex : 9999,
|
||||||
...(endpointsConfig[currentEndpointKey] as Omit<TConfig, 'order'> & { order?: number }),
|
...(endpointsConfig[currentEndpointKey] as Omit<t.TConfig, 'order'> & { order?: number }),
|
||||||
};
|
};
|
||||||
} else if (endpointsConfig[currentEndpointKey]) {
|
} else if (endpointsConfig[currentEndpointKey]) {
|
||||||
accumulatedConfig[currentEndpointKey] = {
|
accumulatedConfig[currentEndpointKey] = {
|
||||||
|
|
@ -165,7 +167,7 @@ export const parseConvo = ({
|
||||||
}: {
|
}: {
|
||||||
endpoint: EModelEndpoint;
|
endpoint: EModelEndpoint;
|
||||||
endpointType?: EModelEndpoint;
|
endpointType?: EModelEndpoint;
|
||||||
conversation: Partial<TConversation | TPreset>;
|
conversation: Partial<s.TConversation | s.TPreset>;
|
||||||
possibleValues?: TPossibleValues;
|
possibleValues?: TPossibleValues;
|
||||||
// TODO: POC for default schema
|
// TODO: POC for default schema
|
||||||
// defaultSchema?: Partial<EndpointSchema>,
|
// defaultSchema?: Partial<EndpointSchema>,
|
||||||
|
|
@ -182,7 +184,7 @@ export const parseConvo = ({
|
||||||
// schema = schemaCreators[endpoint](defaultSchema);
|
// schema = schemaCreators[endpoint](defaultSchema);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
const convo = schema.parse(conversation) as TConversation;
|
const convo = schema.parse(conversation) as s.TConversation;
|
||||||
const { models, secondaryModels } = possibleValues ?? {};
|
const { models, secondaryModels } = possibleValues ?? {};
|
||||||
|
|
||||||
if (models && convo) {
|
if (models && convo) {
|
||||||
|
|
@ -196,7 +198,7 @@ export const parseConvo = ({
|
||||||
return convo;
|
return convo;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const getResponseSender = (endpointOption: TEndpointOption): string => {
|
export const getResponseSender = (endpointOption: t.TEndpointOption): string => {
|
||||||
const { model, endpoint, endpointType, modelDisplayLabel, chatGptLabel, modelLabel, jailbreak } =
|
const { model, endpoint, endpointType, modelDisplayLabel, chatGptLabel, modelLabel, jailbreak } =
|
||||||
endpointOption;
|
endpointOption;
|
||||||
|
|
||||||
|
|
@ -292,7 +294,7 @@ export const parseCompactConvo = ({
|
||||||
}: {
|
}: {
|
||||||
endpoint?: EModelEndpoint;
|
endpoint?: EModelEndpoint;
|
||||||
endpointType?: EModelEndpoint;
|
endpointType?: EModelEndpoint;
|
||||||
conversation: Partial<TConversation | TPreset>;
|
conversation: Partial<s.TConversation | s.TPreset>;
|
||||||
possibleValues?: TPossibleValues;
|
possibleValues?: TPossibleValues;
|
||||||
// TODO: POC for default schema
|
// TODO: POC for default schema
|
||||||
// defaultSchema?: Partial<EndpointSchema>,
|
// defaultSchema?: Partial<EndpointSchema>,
|
||||||
|
|
@ -309,7 +311,7 @@ export const parseCompactConvo = ({
|
||||||
schema = compactEndpointSchemas[endpointType];
|
schema = compactEndpointSchemas[endpointType];
|
||||||
}
|
}
|
||||||
|
|
||||||
const convo = schema.parse(conversation) as TConversation;
|
const convo = schema.parse(conversation) as s.TConversation;
|
||||||
// const { models, secondaryModels } = possibleValues ?? {};
|
// const { models, secondaryModels } = possibleValues ?? {};
|
||||||
const { models } = possibleValues ?? {};
|
const { models } = possibleValues ?? {};
|
||||||
|
|
||||||
|
|
@ -323,3 +325,25 @@ export const parseCompactConvo = ({
|
||||||
|
|
||||||
return convo;
|
return convo;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export function parseTextParts(contentParts: a.TMessageContentParts[]): string {
|
||||||
|
let result = '';
|
||||||
|
|
||||||
|
for (const part of contentParts) {
|
||||||
|
if (part.type === ContentTypes.TEXT) {
|
||||||
|
const textValue = part.text.value;
|
||||||
|
|
||||||
|
if (
|
||||||
|
result.length > 0 &&
|
||||||
|
textValue.length > 0 &&
|
||||||
|
result[result.length - 1] !== ' ' &&
|
||||||
|
textValue[0] !== ' '
|
||||||
|
) {
|
||||||
|
result += ' ';
|
||||||
|
}
|
||||||
|
result += textValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue