🗣️ fix(tts): Add Text Parser for Message Content Parts (#2840)

* fix: manual TTS trigger for message content parts

* ci(streamAudio): processChunks test
This commit is contained in:
Danny Avila 2024-05-22 23:27:37 -04:00 committed by GitHub
parent dc1778b11f
commit 8e66683577
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 52 additions and 27 deletions

View file

@ -13,7 +13,7 @@ describe('processChunks', () => {
let processChunks; let processChunks;
beforeEach(() => { beforeEach(() => {
processChunks = createChunkProcessor(); processChunks = createChunkProcessor('message-id');
Message.findOne.mockClear(); Message.findOne.mockClear();
Message.findOne().lean.mockClear(); Message.findOne().lean.mockClear();
}); });
@ -21,20 +21,17 @@ describe('processChunks', () => {
it('should return an empty array when the message is not found', async () => { it('should return an empty array when the message is not found', async () => {
Message.findOne().lean.mockResolvedValueOnce(null); Message.findOne().lean.mockResolvedValueOnce(null);
const result = await processChunks('non-existent-id'); const result = await processChunks();
expect(result).toEqual([]); expect(result).toEqual([]);
expect(Message.findOne).toHaveBeenCalledWith( expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
{ messageId: 'non-existent-id' },
'text unfinished',
);
expect(Message.findOne().lean).toHaveBeenCalled(); expect(Message.findOne().lean).toHaveBeenCalled();
}); });
it('should return an empty array when the message does not have a text property', async () => { it('should return an empty array when the message does not have a text property', async () => {
Message.findOne().lean.mockResolvedValueOnce({ unfinished: true }); Message.findOne().lean.mockResolvedValueOnce({ unfinished: true });
const result = await processChunks('message-id'); const result = await processChunks();
expect(result).toEqual([]); expect(result).toEqual([]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished'); expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
@ -45,7 +42,7 @@ describe('processChunks', () => {
const messageText = 'This is a long message. It should be split into chunks. Lol hi mom'; const messageText = 'This is a long message. It should be split into chunks. Lol hi mom';
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true }); Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true });
const result = await processChunks('message-id'); const result = await processChunks();
expect(result).toEqual([ expect(result).toEqual([
{ text: 'This is a long message. It should be split into chunks.', isFinished: false }, { text: 'This is a long message. It should be split into chunks.', isFinished: false },
@ -58,7 +55,7 @@ describe('processChunks', () => {
const messageText = 'This is a long message without separators hello there my friend'; const messageText = 'This is a long message without separators hello there my friend';
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true }); Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true });
const result = await processChunks('message-id'); const result = await processChunks();
expect(result).toEqual([{ text: messageText, isFinished: false }]); expect(result).toEqual([{ text: messageText, isFinished: false }]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished'); expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
@ -69,7 +66,7 @@ describe('processChunks', () => {
const messageText = 'This is a finished message.'; const messageText = 'This is a finished message.';
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false }); Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
const result = await processChunks('message-id'); const result = await processChunks();
expect(result).toEqual([{ text: messageText, isFinished: true }]); expect(result).toEqual([{ text: messageText, isFinished: true }]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished'); expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
@ -80,9 +77,9 @@ describe('processChunks', () => {
const messageText = 'This is a finished message.'; const messageText = 'This is a finished message.';
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false }); Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
await processChunks('message-id'); await processChunks();
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false }); Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
const result = await processChunks('message-id'); const result = await processChunks();
expect(result).toEqual([]); expect(result).toEqual([]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished'); expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');

View file

@ -50,7 +50,7 @@ export default function HoverButtons({
const [TextToSpeech] = useRecoilState<boolean>(store.TextToSpeech); const [TextToSpeech] = useRecoilState<boolean>(store.TextToSpeech);
const { handleMouseDown, handleMouseUp, toggleSpeech, isSpeaking, isLoading } = useTextToSpeech( const { handleMouseDown, handleMouseUp, toggleSpeech, isSpeaking, isLoading } = useTextToSpeech(
message?.text ?? '', message?.content ?? message?.text ?? '',
isLast, isLast,
index, index,
); );

View file

@ -1,11 +1,13 @@
import { useRef } from 'react'; import { useRef } from 'react';
import useTextToSpeechBrowser from './useTextToSpeechBrowser'; import { parseTextParts } from 'librechat-data-provider';
import type { TMessageContentParts } from 'librechat-data-provider';
import useTextToSpeechExternal from './useTextToSpeechExternal'; import useTextToSpeechExternal from './useTextToSpeechExternal';
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
import { usePauseGlobalAudio } from '../Audio'; import { usePauseGlobalAudio } from '../Audio';
import { useRecoilState } from 'recoil'; import { useRecoilState } from 'recoil';
import store from '~/store'; import store from '~/store';
const useTextToSpeech = (message: string, isLast: boolean, index = 0) => { const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boolean, index = 0) => {
const [endpointTTS] = useRecoilState<string>(store.endpointTTS); const [endpointTTS] = useRecoilState<string>(store.endpointTTS);
const useExternalTextToSpeech = endpointTTS === 'external'; const useExternalTextToSpeech = endpointTTS === 'external';
@ -34,7 +36,8 @@ const useTextToSpeech = (message: string, isLast: boolean, index = 0) => {
isMouseDownRef.current = true; isMouseDownRef.current = true;
timerRef.current = window.setTimeout(() => { timerRef.current = window.setTimeout(() => {
if (isMouseDownRef.current) { if (isMouseDownRef.current) {
generateSpeech(message, true); const parsedMessage = typeof message === 'string' ? message : parseTextParts(message);
generateSpeech(parsedMessage, true);
} }
}, 1000); }, 1000);
}; };
@ -51,7 +54,8 @@ const useTextToSpeech = (message: string, isLast: boolean, index = 0) => {
cancelSpeech(); cancelSpeech();
pauseGlobalAudio(); pauseGlobalAudio();
} else { } else {
generateSpeech(message, false); const parsedMessage = typeof message === 'string' ? message : parseTextParts(message);
generateSpeech(parsedMessage, false);
} }
}; };

View file

@ -1,6 +1,8 @@
import type { ZodIssue } from 'zod'; import type { ZodIssue } from 'zod';
import type { TConversation, TPreset } from './schemas'; import type * as a from './types/assistants';
import type { TConfig, TEndpointOption, TEndpointsConfig } from './types'; import type * as s from './schemas';
import type * as t from './types';
import { ContentTypes } from './types/assistants';
import { import {
EModelEndpoint, EModelEndpoint,
openAISchema, openAISchema,
@ -71,7 +73,7 @@ export function getEnabledEndpoints() {
} }
/** Orders an existing EndpointsConfig object based on enabled endpoint/custom ordering */ /** Orders an existing EndpointsConfig object based on enabled endpoint/custom ordering */
export function orderEndpointsConfig(endpointsConfig: TEndpointsConfig) { export function orderEndpointsConfig(endpointsConfig: t.TEndpointsConfig) {
if (!endpointsConfig) { if (!endpointsConfig) {
return {}; return {};
} }
@ -79,7 +81,7 @@ export function orderEndpointsConfig(endpointsConfig: TEndpointsConfig) {
const endpointKeys = Object.keys(endpointsConfig); const endpointKeys = Object.keys(endpointsConfig);
const defaultCustomIndex = enabledEndpoints.indexOf(EModelEndpoint.custom); const defaultCustomIndex = enabledEndpoints.indexOf(EModelEndpoint.custom);
return endpointKeys.reduce( return endpointKeys.reduce(
(accumulatedConfig: Record<string, TConfig | null | undefined>, currentEndpointKey) => { (accumulatedConfig: Record<string, t.TConfig | null | undefined>, currentEndpointKey) => {
const isCustom = !(currentEndpointKey in EModelEndpoint); const isCustom = !(currentEndpointKey in EModelEndpoint);
const isEnabled = enabledEndpoints.includes(currentEndpointKey); const isEnabled = enabledEndpoints.includes(currentEndpointKey);
if (!isEnabled && !isCustom) { if (!isEnabled && !isCustom) {
@ -91,7 +93,7 @@ export function orderEndpointsConfig(endpointsConfig: TEndpointsConfig) {
if (isCustom) { if (isCustom) {
accumulatedConfig[currentEndpointKey] = { accumulatedConfig[currentEndpointKey] = {
order: defaultCustomIndex >= 0 ? defaultCustomIndex : 9999, order: defaultCustomIndex >= 0 ? defaultCustomIndex : 9999,
...(endpointsConfig[currentEndpointKey] as Omit<TConfig, 'order'> & { order?: number }), ...(endpointsConfig[currentEndpointKey] as Omit<t.TConfig, 'order'> & { order?: number }),
}; };
} else if (endpointsConfig[currentEndpointKey]) { } else if (endpointsConfig[currentEndpointKey]) {
accumulatedConfig[currentEndpointKey] = { accumulatedConfig[currentEndpointKey] = {
@ -165,7 +167,7 @@ export const parseConvo = ({
}: { }: {
endpoint: EModelEndpoint; endpoint: EModelEndpoint;
endpointType?: EModelEndpoint; endpointType?: EModelEndpoint;
conversation: Partial<TConversation | TPreset>; conversation: Partial<s.TConversation | s.TPreset>;
possibleValues?: TPossibleValues; possibleValues?: TPossibleValues;
// TODO: POC for default schema // TODO: POC for default schema
// defaultSchema?: Partial<EndpointSchema>, // defaultSchema?: Partial<EndpointSchema>,
@ -182,7 +184,7 @@ export const parseConvo = ({
// schema = schemaCreators[endpoint](defaultSchema); // schema = schemaCreators[endpoint](defaultSchema);
// } // }
const convo = schema.parse(conversation) as TConversation; const convo = schema.parse(conversation) as s.TConversation;
const { models, secondaryModels } = possibleValues ?? {}; const { models, secondaryModels } = possibleValues ?? {};
if (models && convo) { if (models && convo) {
@ -196,7 +198,7 @@ export const parseConvo = ({
return convo; return convo;
}; };
export const getResponseSender = (endpointOption: TEndpointOption): string => { export const getResponseSender = (endpointOption: t.TEndpointOption): string => {
const { model, endpoint, endpointType, modelDisplayLabel, chatGptLabel, modelLabel, jailbreak } = const { model, endpoint, endpointType, modelDisplayLabel, chatGptLabel, modelLabel, jailbreak } =
endpointOption; endpointOption;
@ -292,7 +294,7 @@ export const parseCompactConvo = ({
}: { }: {
endpoint?: EModelEndpoint; endpoint?: EModelEndpoint;
endpointType?: EModelEndpoint; endpointType?: EModelEndpoint;
conversation: Partial<TConversation | TPreset>; conversation: Partial<s.TConversation | s.TPreset>;
possibleValues?: TPossibleValues; possibleValues?: TPossibleValues;
// TODO: POC for default schema // TODO: POC for default schema
// defaultSchema?: Partial<EndpointSchema>, // defaultSchema?: Partial<EndpointSchema>,
@ -309,7 +311,7 @@ export const parseCompactConvo = ({
schema = compactEndpointSchemas[endpointType]; schema = compactEndpointSchemas[endpointType];
} }
const convo = schema.parse(conversation) as TConversation; const convo = schema.parse(conversation) as s.TConversation;
// const { models, secondaryModels } = possibleValues ?? {}; // const { models, secondaryModels } = possibleValues ?? {};
const { models } = possibleValues ?? {}; const { models } = possibleValues ?? {};
@ -323,3 +325,25 @@ export const parseCompactConvo = ({
return convo; return convo;
}; };
export function parseTextParts(contentParts: a.TMessageContentParts[]): string {
let result = '';
for (const part of contentParts) {
if (part.type === ContentTypes.TEXT) {
const textValue = part.text.value;
if (
result.length > 0 &&
textValue.length > 0 &&
result[result.length - 1] !== ' ' &&
textValue[0] !== ' '
) {
result += ' ';
}
result += textValue;
}
}
return result;
}