🗣️ fix(tts): Add Text Parser for Message Content Parts (#2840)

* fix: manual TTS trigger for message content parts

* ci(streamAudio): processChunks test
This commit is contained in:
Danny Avila 2024-05-22 23:27:37 -04:00 committed by GitHub
parent dc1778b11f
commit 8e66683577
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 52 additions and 27 deletions

View file

@ -13,7 +13,7 @@ describe('processChunks', () => {
let processChunks;
beforeEach(() => {
processChunks = createChunkProcessor();
processChunks = createChunkProcessor('message-id');
Message.findOne.mockClear();
Message.findOne().lean.mockClear();
});
@ -21,20 +21,17 @@ describe('processChunks', () => {
it('should return an empty array when the message is not found', async () => {
Message.findOne().lean.mockResolvedValueOnce(null);
const result = await processChunks('non-existent-id');
const result = await processChunks();
expect(result).toEqual([]);
expect(Message.findOne).toHaveBeenCalledWith(
{ messageId: 'non-existent-id' },
'text unfinished',
);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
expect(Message.findOne().lean).toHaveBeenCalled();
});
it('should return an empty array when the message does not have a text property', async () => {
Message.findOne().lean.mockResolvedValueOnce({ unfinished: true });
const result = await processChunks('message-id');
const result = await processChunks();
expect(result).toEqual([]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
@ -45,7 +42,7 @@ describe('processChunks', () => {
const messageText = 'This is a long message. It should be split into chunks. Lol hi mom';
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true });
const result = await processChunks('message-id');
const result = await processChunks();
expect(result).toEqual([
{ text: 'This is a long message. It should be split into chunks.', isFinished: false },
@ -58,7 +55,7 @@ describe('processChunks', () => {
const messageText = 'This is a long message without separators hello there my friend';
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true });
const result = await processChunks('message-id');
const result = await processChunks();
expect(result).toEqual([{ text: messageText, isFinished: false }]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
@ -69,7 +66,7 @@ describe('processChunks', () => {
const messageText = 'This is a finished message.';
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
const result = await processChunks('message-id');
const result = await processChunks();
expect(result).toEqual([{ text: messageText, isFinished: true }]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');
@ -80,9 +77,9 @@ describe('processChunks', () => {
const messageText = 'This is a finished message.';
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
await processChunks('message-id');
await processChunks();
Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false });
const result = await processChunks('message-id');
const result = await processChunks();
expect(result).toEqual([]);
expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished');

View file

@ -50,7 +50,7 @@ export default function HoverButtons({
const [TextToSpeech] = useRecoilState<boolean>(store.TextToSpeech);
const { handleMouseDown, handleMouseUp, toggleSpeech, isSpeaking, isLoading } = useTextToSpeech(
message?.text ?? '',
message?.content ?? message?.text ?? '',
isLast,
index,
);

View file

@ -1,11 +1,13 @@
import { useRef } from 'react';
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
import { parseTextParts } from 'librechat-data-provider';
import type { TMessageContentParts } from 'librechat-data-provider';
import useTextToSpeechExternal from './useTextToSpeechExternal';
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
import { usePauseGlobalAudio } from '../Audio';
import { useRecoilState } from 'recoil';
import store from '~/store';
const useTextToSpeech = (message: string, isLast: boolean, index = 0) => {
const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boolean, index = 0) => {
const [endpointTTS] = useRecoilState<string>(store.endpointTTS);
const useExternalTextToSpeech = endpointTTS === 'external';
@ -34,7 +36,8 @@ const useTextToSpeech = (message: string, isLast: boolean, index = 0) => {
isMouseDownRef.current = true;
timerRef.current = window.setTimeout(() => {
if (isMouseDownRef.current) {
generateSpeech(message, true);
const parsedMessage = typeof message === 'string' ? message : parseTextParts(message);
generateSpeech(parsedMessage, true);
}
}, 1000);
};
@ -51,7 +54,8 @@ const useTextToSpeech = (message: string, isLast: boolean, index = 0) => {
cancelSpeech();
pauseGlobalAudio();
} else {
generateSpeech(message, false);
const parsedMessage = typeof message === 'string' ? message : parseTextParts(message);
generateSpeech(parsedMessage, false);
}
};

View file

@ -1,6 +1,8 @@
import type { ZodIssue } from 'zod';
import type { TConversation, TPreset } from './schemas';
import type { TConfig, TEndpointOption, TEndpointsConfig } from './types';
import type * as a from './types/assistants';
import type * as s from './schemas';
import type * as t from './types';
import { ContentTypes } from './types/assistants';
import {
EModelEndpoint,
openAISchema,
@ -71,7 +73,7 @@ export function getEnabledEndpoints() {
}
/** Orders an existing EndpointsConfig object based on enabled endpoint/custom ordering */
export function orderEndpointsConfig(endpointsConfig: TEndpointsConfig) {
export function orderEndpointsConfig(endpointsConfig: t.TEndpointsConfig) {
if (!endpointsConfig) {
return {};
}
@ -79,7 +81,7 @@ export function orderEndpointsConfig(endpointsConfig: TEndpointsConfig) {
const endpointKeys = Object.keys(endpointsConfig);
const defaultCustomIndex = enabledEndpoints.indexOf(EModelEndpoint.custom);
return endpointKeys.reduce(
(accumulatedConfig: Record<string, TConfig | null | undefined>, currentEndpointKey) => {
(accumulatedConfig: Record<string, t.TConfig | null | undefined>, currentEndpointKey) => {
const isCustom = !(currentEndpointKey in EModelEndpoint);
const isEnabled = enabledEndpoints.includes(currentEndpointKey);
if (!isEnabled && !isCustom) {
@ -91,7 +93,7 @@ export function orderEndpointsConfig(endpointsConfig: TEndpointsConfig) {
if (isCustom) {
accumulatedConfig[currentEndpointKey] = {
order: defaultCustomIndex >= 0 ? defaultCustomIndex : 9999,
...(endpointsConfig[currentEndpointKey] as Omit<TConfig, 'order'> & { order?: number }),
...(endpointsConfig[currentEndpointKey] as Omit<t.TConfig, 'order'> & { order?: number }),
};
} else if (endpointsConfig[currentEndpointKey]) {
accumulatedConfig[currentEndpointKey] = {
@ -165,7 +167,7 @@ export const parseConvo = ({
}: {
endpoint: EModelEndpoint;
endpointType?: EModelEndpoint;
conversation: Partial<TConversation | TPreset>;
conversation: Partial<s.TConversation | s.TPreset>;
possibleValues?: TPossibleValues;
// TODO: POC for default schema
// defaultSchema?: Partial<EndpointSchema>,
@ -182,7 +184,7 @@ export const parseConvo = ({
// schema = schemaCreators[endpoint](defaultSchema);
// }
const convo = schema.parse(conversation) as TConversation;
const convo = schema.parse(conversation) as s.TConversation;
const { models, secondaryModels } = possibleValues ?? {};
if (models && convo) {
@ -196,7 +198,7 @@ export const parseConvo = ({
return convo;
};
export const getResponseSender = (endpointOption: TEndpointOption): string => {
export const getResponseSender = (endpointOption: t.TEndpointOption): string => {
const { model, endpoint, endpointType, modelDisplayLabel, chatGptLabel, modelLabel, jailbreak } =
endpointOption;
@ -292,7 +294,7 @@ export const parseCompactConvo = ({
}: {
endpoint?: EModelEndpoint;
endpointType?: EModelEndpoint;
conversation: Partial<TConversation | TPreset>;
conversation: Partial<s.TConversation | s.TPreset>;
possibleValues?: TPossibleValues;
// TODO: POC for default schema
// defaultSchema?: Partial<EndpointSchema>,
@ -309,7 +311,7 @@ export const parseCompactConvo = ({
schema = compactEndpointSchemas[endpointType];
}
const convo = schema.parse(conversation) as TConversation;
const convo = schema.parse(conversation) as s.TConversation;
// const { models, secondaryModels } = possibleValues ?? {};
const { models } = possibleValues ?? {};
@ -323,3 +325,25 @@ export const parseCompactConvo = ({
return convo;
};
export function parseTextParts(contentParts: a.TMessageContentParts[]): string {
let result = '';
for (const part of contentParts) {
if (part.type === ContentTypes.TEXT) {
const textValue = part.text.value;
if (
result.length > 0 &&
textValue.length > 0 &&
result[result.length - 1] !== ' ' &&
textValue[0] !== ' '
) {
result += ' ';
}
result += textValue;
}
}
return result;
}