diff --git a/api/server/services/Files/Audio/streamAudio-wip.js b/api/server/services/Files/Audio/streamAudio.spec.js similarity index 85% rename from api/server/services/Files/Audio/streamAudio-wip.js rename to api/server/services/Files/Audio/streamAudio.spec.js index f472309496..6aee27c7b8 100644 --- a/api/server/services/Files/Audio/streamAudio-wip.js +++ b/api/server/services/Files/Audio/streamAudio.spec.js @@ -13,7 +13,7 @@ describe('processChunks', () => { let processChunks; beforeEach(() => { - processChunks = createChunkProcessor(); + processChunks = createChunkProcessor('message-id'); Message.findOne.mockClear(); Message.findOne().lean.mockClear(); }); @@ -21,20 +21,17 @@ describe('processChunks', () => { it('should return an empty array when the message is not found', async () => { Message.findOne().lean.mockResolvedValueOnce(null); - const result = await processChunks('non-existent-id'); + const result = await processChunks(); expect(result).toEqual([]); - expect(Message.findOne).toHaveBeenCalledWith( - { messageId: 'non-existent-id' }, - 'text unfinished', - ); + expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished'); expect(Message.findOne().lean).toHaveBeenCalled(); }); it('should return an empty array when the message does not have a text property', async () => { Message.findOne().lean.mockResolvedValueOnce({ unfinished: true }); - const result = await processChunks('message-id'); + const result = await processChunks(); expect(result).toEqual([]); expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished'); @@ -45,7 +42,7 @@ describe('processChunks', () => { const messageText = 'This is a long message. It should be split into chunks. Lol hi mom'; Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true }); - const result = await processChunks('message-id'); + const result = await processChunks(); expect(result).toEqual([ { text: 'This is a long message. It should be split into chunks.', isFinished: false }, @@ -58,7 +55,7 @@ describe('processChunks', () => { const messageText = 'This is a long message without separators hello there my friend'; Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: true }); - const result = await processChunks('message-id'); + const result = await processChunks(); expect(result).toEqual([{ text: messageText, isFinished: false }]); expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished'); @@ -69,7 +66,7 @@ describe('processChunks', () => { const messageText = 'This is a finished message.'; Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false }); - const result = await processChunks('message-id'); + const result = await processChunks(); expect(result).toEqual([{ text: messageText, isFinished: true }]); expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished'); @@ -80,9 +77,9 @@ describe('processChunks', () => { const messageText = 'This is a finished message.'; Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false }); - await processChunks('message-id'); + await processChunks(); Message.findOne().lean.mockResolvedValueOnce({ text: messageText, unfinished: false }); - const result = await processChunks('message-id'); + const result = await processChunks(); expect(result).toEqual([]); expect(Message.findOne).toHaveBeenCalledWith({ messageId: 'message-id' }, 'text unfinished'); diff --git a/client/src/components/Chat/Messages/HoverButtons.tsx b/client/src/components/Chat/Messages/HoverButtons.tsx index 0a7c3e1d8d..ab9052dc77 100644 --- a/client/src/components/Chat/Messages/HoverButtons.tsx +++ b/client/src/components/Chat/Messages/HoverButtons.tsx @@ -50,7 +50,7 @@ export default function HoverButtons({ const [TextToSpeech] = useRecoilState(store.TextToSpeech); const { handleMouseDown, handleMouseUp, toggleSpeech, isSpeaking, isLoading } = useTextToSpeech( - message?.text ?? '', + message?.content ?? message?.text ?? '', isLast, index, ); diff --git a/client/src/hooks/Input/useTextToSpeech.ts b/client/src/hooks/Input/useTextToSpeech.ts index 0c1680ee3f..40378d24ba 100644 --- a/client/src/hooks/Input/useTextToSpeech.ts +++ b/client/src/hooks/Input/useTextToSpeech.ts @@ -1,11 +1,13 @@ import { useRef } from 'react'; -import useTextToSpeechBrowser from './useTextToSpeechBrowser'; +import { parseTextParts } from 'librechat-data-provider'; +import type { TMessageContentParts } from 'librechat-data-provider'; import useTextToSpeechExternal from './useTextToSpeechExternal'; +import useTextToSpeechBrowser from './useTextToSpeechBrowser'; import { usePauseGlobalAudio } from '../Audio'; import { useRecoilState } from 'recoil'; import store from '~/store'; -const useTextToSpeech = (message: string, isLast: boolean, index = 0) => { +const useTextToSpeech = (message: string | TMessageContentParts[], isLast: boolean, index = 0) => { const [endpointTTS] = useRecoilState(store.endpointTTS); const useExternalTextToSpeech = endpointTTS === 'external'; @@ -34,7 +36,8 @@ const useTextToSpeech = (message: string, isLast: boolean, index = 0) => { isMouseDownRef.current = true; timerRef.current = window.setTimeout(() => { if (isMouseDownRef.current) { - generateSpeech(message, true); + const parsedMessage = typeof message === 'string' ? message : parseTextParts(message); + generateSpeech(parsedMessage, true); } }, 1000); }; @@ -51,7 +54,8 @@ const useTextToSpeech = (message: string, isLast: boolean, index = 0) => { cancelSpeech(); pauseGlobalAudio(); } else { - generateSpeech(message, false); + const parsedMessage = typeof message === 'string' ? message : parseTextParts(message); + generateSpeech(parsedMessage, false); } }; diff --git a/packages/data-provider/src/parsers.ts b/packages/data-provider/src/parsers.ts index 5bf27cc1dc..3406a33f65 100644 --- a/packages/data-provider/src/parsers.ts +++ b/packages/data-provider/src/parsers.ts @@ -1,6 +1,8 @@ import type { ZodIssue } from 'zod'; -import type { TConversation, TPreset } from './schemas'; -import type { TConfig, TEndpointOption, TEndpointsConfig } from './types'; +import type * as a from './types/assistants'; +import type * as s from './schemas'; +import type * as t from './types'; +import { ContentTypes } from './types/assistants'; import { EModelEndpoint, openAISchema, @@ -71,7 +73,7 @@ export function getEnabledEndpoints() { } /** Orders an existing EndpointsConfig object based on enabled endpoint/custom ordering */ -export function orderEndpointsConfig(endpointsConfig: TEndpointsConfig) { +export function orderEndpointsConfig(endpointsConfig: t.TEndpointsConfig) { if (!endpointsConfig) { return {}; } @@ -79,7 +81,7 @@ export function orderEndpointsConfig(endpointsConfig: TEndpointsConfig) { const endpointKeys = Object.keys(endpointsConfig); const defaultCustomIndex = enabledEndpoints.indexOf(EModelEndpoint.custom); return endpointKeys.reduce( - (accumulatedConfig: Record, currentEndpointKey) => { + (accumulatedConfig: Record, currentEndpointKey) => { const isCustom = !(currentEndpointKey in EModelEndpoint); const isEnabled = enabledEndpoints.includes(currentEndpointKey); if (!isEnabled && !isCustom) { @@ -91,7 +93,7 @@ export function orderEndpointsConfig(endpointsConfig: TEndpointsConfig) { if (isCustom) { accumulatedConfig[currentEndpointKey] = { order: defaultCustomIndex >= 0 ? defaultCustomIndex : 9999, - ...(endpointsConfig[currentEndpointKey] as Omit & { order?: number }), + ...(endpointsConfig[currentEndpointKey] as Omit & { order?: number }), }; } else if (endpointsConfig[currentEndpointKey]) { accumulatedConfig[currentEndpointKey] = { @@ -165,7 +167,7 @@ export const parseConvo = ({ }: { endpoint: EModelEndpoint; endpointType?: EModelEndpoint; - conversation: Partial; + conversation: Partial; possibleValues?: TPossibleValues; // TODO: POC for default schema // defaultSchema?: Partial, @@ -182,7 +184,7 @@ export const parseConvo = ({ // schema = schemaCreators[endpoint](defaultSchema); // } - const convo = schema.parse(conversation) as TConversation; + const convo = schema.parse(conversation) as s.TConversation; const { models, secondaryModels } = possibleValues ?? {}; if (models && convo) { @@ -196,7 +198,7 @@ export const parseConvo = ({ return convo; }; -export const getResponseSender = (endpointOption: TEndpointOption): string => { +export const getResponseSender = (endpointOption: t.TEndpointOption): string => { const { model, endpoint, endpointType, modelDisplayLabel, chatGptLabel, modelLabel, jailbreak } = endpointOption; @@ -292,7 +294,7 @@ export const parseCompactConvo = ({ }: { endpoint?: EModelEndpoint; endpointType?: EModelEndpoint; - conversation: Partial; + conversation: Partial; possibleValues?: TPossibleValues; // TODO: POC for default schema // defaultSchema?: Partial, @@ -309,7 +311,7 @@ export const parseCompactConvo = ({ schema = compactEndpointSchemas[endpointType]; } - const convo = schema.parse(conversation) as TConversation; + const convo = schema.parse(conversation) as s.TConversation; // const { models, secondaryModels } = possibleValues ?? {}; const { models } = possibleValues ?? {}; @@ -323,3 +325,25 @@ export const parseCompactConvo = ({ return convo; }; + +export function parseTextParts(contentParts: a.TMessageContentParts[]): string { + let result = ''; + + for (const part of contentParts) { + if (part.type === ContentTypes.TEXT) { + const textValue = part.text.value; + + if ( + result.length > 0 && + textValue.length > 0 && + result[result.length - 1] !== ' ' && + textValue[0] !== ' ' + ) { + result += ' '; + } + result += textValue; + } + } + + return result; +}