diff --git a/.env.example b/.env.example index ae3537038a..db09bb471f 100644 --- a/.env.example +++ b/.env.example @@ -64,6 +64,8 @@ CONSOLE_JSON=false DEBUG_LOGGING=true DEBUG_CONSOLE=false +# Set to true to enable agent debug logging +AGENT_DEBUG_LOGGING=false # Enable memory diagnostics (logs heap/RSS snapshots every 60s, auto-enabled with --inspect) # MEM_DIAG=true diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js index a7ad089d20..ae2d362773 100644 --- a/api/app/clients/BaseClient.js +++ b/api/app/clients/BaseClient.js @@ -13,7 +13,6 @@ const { } = require('@librechat/api'); const { Constants, - ErrorTypes, FileSources, ContentTypes, excludedKeys, @@ -25,7 +24,6 @@ const { isBedrockDocumentType, } = require('librechat-data-provider'); const { getStrategyFunctions } = require('~/server/services/Files/strategies'); -const { truncateToolCallOutputs } = require('./prompts'); const { logViolation } = require('~/cache'); const TextStream = require('./TextStream'); const db = require('~/models'); @@ -333,45 +331,6 @@ class BaseClient { return payload; } - async handleTokenCountMap(tokenCountMap) { - if (this.clientName === EModelEndpoint.agents) { - return; - } - if (this.currentMessages.length === 0) { - return; - } - - for (let i = 0; i < this.currentMessages.length; i++) { - // Skip the last message, which is the user message. - if (i === this.currentMessages.length - 1) { - break; - } - - const message = this.currentMessages[i]; - const { messageId } = message; - const update = {}; - - if (messageId === tokenCountMap.summaryMessage?.messageId) { - logger.debug(`[BaseClient] Adding summary props to ${messageId}.`); - - update.summary = tokenCountMap.summaryMessage.content; - update.summaryTokenCount = tokenCountMap.summaryMessage.tokenCount; - } - - if (message.tokenCount && !update.summaryTokenCount) { - logger.debug(`[BaseClient] Skipping ${messageId}: already had a token count.`); - continue; - } - - const tokenCount = tokenCountMap[messageId]; - if (tokenCount) { - message.tokenCount = tokenCount; - update.tokenCount = tokenCount; - await this.updateMessageInDatabase({ messageId, ...update }); - } - } - } - concatenateMessages(messages) { return messages.reduce((acc, message) => { const nameOrRole = message.name ?? message.role; @@ -442,154 +401,6 @@ class BaseClient { }; } - async handleContextStrategy({ - instructions, - orderedMessages, - formattedMessages, - buildTokenMap = true, - }) { - let _instructions; - let tokenCount; - - if (instructions) { - ({ tokenCount, ..._instructions } = instructions); - } - - _instructions && logger.debug('[BaseClient] instructions tokenCount: ' + tokenCount); - if (tokenCount && tokenCount > this.maxContextTokens) { - const info = `${tokenCount} / ${this.maxContextTokens}`; - const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`; - logger.warn(`Instructions token count exceeds max token count (${info}).`); - throw new Error(errorMessage); - } - - if (this.clientName === EModelEndpoint.agents) { - const { dbMessages, editedIndices } = truncateToolCallOutputs( - orderedMessages, - this.maxContextTokens, - this.getTokenCountForMessage.bind(this), - ); - - if (editedIndices.length > 0) { - logger.debug('[BaseClient] Truncated tool call outputs:', editedIndices); - for (const index of editedIndices) { - formattedMessages[index].content = dbMessages[index].content; - } - orderedMessages = dbMessages; - } - } - - let orderedWithInstructions = this.addInstructions(orderedMessages, instructions); - - let { context, remainingContextTokens, messagesToRefine } = - await this.getMessagesWithinTokenLimit({ - messages: orderedWithInstructions, - instructions, - }); - - logger.debug('[BaseClient] Context Count (1/2)', { - remainingContextTokens, - maxContextTokens: this.maxContextTokens, - }); - - let summaryMessage; - let summaryTokenCount; - let { shouldSummarize } = this; - - // Calculate the difference in length to determine how many messages were discarded if any - let payload; - let { length } = formattedMessages; - length += instructions != null ? 1 : 0; - const diff = length - context.length; - const firstMessage = orderedWithInstructions[0]; - const usePrevSummary = - shouldSummarize && - diff === 1 && - firstMessage?.summary && - this.previous_summary.messageId === firstMessage.messageId; - - if (diff > 0) { - payload = formattedMessages.slice(diff); - logger.debug( - `[BaseClient] Difference between original payload (${length}) and context (${context.length}): ${diff}`, - ); - } - - payload = this.addInstructions(payload ?? formattedMessages, _instructions); - - const latestMessage = orderedWithInstructions[orderedWithInstructions.length - 1]; - if (payload.length === 0 && !shouldSummarize && latestMessage) { - const info = `${latestMessage.tokenCount} / ${this.maxContextTokens}`; - const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`; - logger.warn(`Prompt token count exceeds max token count (${info}).`); - throw new Error(errorMessage); - } else if ( - _instructions && - payload.length === 1 && - payload[0].content === _instructions.content - ) { - const info = `${tokenCount + 3} / ${this.maxContextTokens}`; - const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`; - logger.warn( - `Including instructions, the prompt token count exceeds remaining max token count (${info}).`, - ); - throw new Error(errorMessage); - } - - if (usePrevSummary) { - summaryMessage = { role: 'system', content: firstMessage.summary }; - summaryTokenCount = firstMessage.summaryTokenCount; - payload.unshift(summaryMessage); - remainingContextTokens -= summaryTokenCount; - } else if (shouldSummarize && messagesToRefine.length > 0) { - ({ summaryMessage, summaryTokenCount } = await this.summarizeMessages({ - messagesToRefine, - remainingContextTokens, - })); - summaryMessage && payload.unshift(summaryMessage); - remainingContextTokens -= summaryTokenCount; - } - - // Make sure to only continue summarization logic if the summary message was generated - shouldSummarize = summaryMessage != null && shouldSummarize === true; - - logger.debug('[BaseClient] Context Count (2/2)', { - remainingContextTokens, - maxContextTokens: this.maxContextTokens, - }); - - /** @type {Record | undefined} */ - let tokenCountMap; - if (buildTokenMap) { - const currentPayload = shouldSummarize ? orderedWithInstructions : context; - tokenCountMap = currentPayload.reduce((map, message, index) => { - const { messageId } = message; - if (!messageId) { - return map; - } - - if (shouldSummarize && index === messagesToRefine.length - 1 && !usePrevSummary) { - map.summaryMessage = { ...summaryMessage, messageId, tokenCount: summaryTokenCount }; - } - - map[messageId] = currentPayload[index].tokenCount; - return map; - }, {}); - } - - const promptTokens = this.maxContextTokens - remainingContextTokens; - - logger.debug('[BaseClient] tokenCountMap:', tokenCountMap); - logger.debug('[BaseClient]', { - promptTokens, - remainingContextTokens, - payloadSize: payload.length, - maxContextTokens: this.maxContextTokens, - }); - - return { payload, tokenCountMap, promptTokens, messages: orderedWithInstructions }; - } - async sendMessage(message, opts = {}) { const appConfig = this.options.req?.config; /** @type {Promise} */ @@ -658,17 +469,13 @@ class BaseClient { opts, ); - if (tokenCountMap) { - if (tokenCountMap[userMessage.messageId]) { - userMessage.tokenCount = tokenCountMap[userMessage.messageId]; - logger.debug('[BaseClient] userMessage', { - messageId: userMessage.messageId, - tokenCount: userMessage.tokenCount, - conversationId: userMessage.conversationId, - }); - } - - this.handleTokenCountMap(tokenCountMap); + if (tokenCountMap && tokenCountMap[userMessage.messageId]) { + userMessage.tokenCount = tokenCountMap[userMessage.messageId]; + logger.debug('[BaseClient] userMessage', { + messageId: userMessage.messageId, + tokenCount: userMessage.tokenCount, + conversationId: userMessage.conversationId, + }); } if (!isEdited && !this.skipSaveUserMessage) { @@ -766,12 +573,7 @@ class BaseClient { responseMessage.text = completion.join(''); } - if ( - tokenCountMap && - this.recordTokenUsage && - this.getTokenCountForResponse && - this.getTokenCount - ) { + if (tokenCountMap && this.recordTokenUsage && this.getTokenCountForResponse) { let completionTokens; /** @@ -784,13 +586,6 @@ class BaseClient { if (usage != null && Number(usage[this.outputTokensKey]) > 0) { responseMessage.tokenCount = usage[this.outputTokensKey]; completionTokens = responseMessage.tokenCount; - await this.updateUserMessageTokenCount({ - usage, - tokenCountMap, - userMessage, - userMessagePromise, - opts, - }); } else { responseMessage.tokenCount = this.getTokenCountForResponse(responseMessage); completionTokens = responseMessage.tokenCount; @@ -817,6 +612,27 @@ class BaseClient { await userMessagePromise; } + if ( + this.contextMeta?.calibrationRatio > 0 && + this.contextMeta.calibrationRatio !== 1 && + userMessage.tokenCount > 0 + ) { + const calibrated = Math.round(userMessage.tokenCount * this.contextMeta.calibrationRatio); + if (calibrated !== userMessage.tokenCount) { + logger.debug('[BaseClient] Calibrated user message tokenCount', { + messageId: userMessage.messageId, + raw: userMessage.tokenCount, + calibrated, + ratio: this.contextMeta.calibrationRatio, + }); + userMessage.tokenCount = calibrated; + await this.updateMessageInDatabase({ + messageId: userMessage.messageId, + tokenCount: calibrated, + }); + } + } + if (this.artifactPromises) { responseMessage.attachments = (await Promise.all(this.artifactPromises)).filter((a) => a); } @@ -829,6 +645,10 @@ class BaseClient { } } + if (this.contextMeta) { + responseMessage.contextMeta = this.contextMeta; + } + responseMessage.databasePromise = this.saveMessageToDatabase( responseMessage, saveOptions, @@ -839,75 +659,6 @@ class BaseClient { return responseMessage; } - /** - * Stream usage should only be used for user message token count re-calculation if: - * - The stream usage is available, with input tokens greater than 0, - * - the client provides a function to calculate the current token count, - * - files are being resent with every message (default behavior; or if `false`, with no attachments), - * - the `promptPrefix` (custom instructions) is not set. - * - * In these cases, the legacy token estimations would be more accurate. - * - * TODO: included system messages in the `orderedMessages` accounting, potentially as a - * separate message in the UI. ChatGPT does this through "hidden" system messages. - * @param {object} params - * @param {StreamUsage} params.usage - * @param {Record} params.tokenCountMap - * @param {TMessage} params.userMessage - * @param {Promise} params.userMessagePromise - * @param {object} params.opts - */ - async updateUserMessageTokenCount({ - usage, - tokenCountMap, - userMessage, - userMessagePromise, - opts, - }) { - /** @type {boolean} */ - const shouldUpdateCount = - this.calculateCurrentTokenCount != null && - Number(usage[this.inputTokensKey]) > 0 && - (this.options.resendFiles || - (!this.options.resendFiles && !this.options.attachments?.length)) && - !this.options.promptPrefix; - - if (!shouldUpdateCount) { - return; - } - - const userMessageTokenCount = this.calculateCurrentTokenCount({ - currentMessageId: userMessage.messageId, - tokenCountMap, - usage, - }); - - if (userMessageTokenCount === userMessage.tokenCount) { - return; - } - - userMessage.tokenCount = userMessageTokenCount; - /* - Note: `AgentController` saves the user message if not saved here - (noted by `savedMessageIds`), so we update the count of its `userMessage` reference - */ - if (typeof opts?.getReqData === 'function') { - opts.getReqData({ - userMessage, - }); - } - /* - Note: we update the user message to be sure it gets the calculated token count; - though `AgentController` saves the user message if not saved here - (noted by `savedMessageIds`), EditController does not - */ - await userMessagePromise; - await this.updateMessageInDatabase({ - messageId: userMessage.messageId, - tokenCount: userMessageTokenCount, - }); - } - async loadHistory(conversationId, parentMessageId = null) { logger.debug('[BaseClient] Loading history:', { conversationId, parentMessageId }); @@ -934,10 +685,24 @@ class BaseClient { return _messages; } - // Find the latest message with a 'summary' property for (let i = _messages.length - 1; i >= 0; i--) { - if (_messages[i]?.summary) { - this.previous_summary = _messages[i]; + const msg = _messages[i]; + if (!msg) { + continue; + } + + const summaryBlock = BaseClient.findSummaryContentBlock(msg); + if (summaryBlock) { + this.previous_summary = { + ...msg, + summary: BaseClient.getSummaryText(summaryBlock), + summaryTokenCount: summaryBlock.tokenCount, + }; + break; + } + + if (msg.summary) { + this.previous_summary = msg; break; } } @@ -1041,6 +806,34 @@ class BaseClient { await db.updateMessage(this.options?.req?.user?.id, message); } + /** Extracts text from a summary block (handles both legacy `text` field and new `content` array format). */ + static getSummaryText(summaryBlock) { + if (Array.isArray(summaryBlock.content)) { + return summaryBlock.content.map((b) => b.text ?? '').join(''); + } + if (typeof summaryBlock.content === 'string') { + return summaryBlock.content; + } + return summaryBlock.text ?? ''; + } + + /** Finds the last summary content block in a message's content array (last-summary-wins). */ + static findSummaryContentBlock(message) { + if (!Array.isArray(message?.content)) { + return null; + } + let lastSummary = null; + for (const part of message.content) { + if ( + part?.type === ContentTypes.SUMMARY && + BaseClient.getSummaryText(part).trim().length > 0 + ) { + lastSummary = part; + } + } + return lastSummary; + } + /** * Iterate through messages, building an array based on the parentMessageId. * @@ -1095,20 +888,35 @@ class BaseClient { break; } - if (summary && message.summary) { - message.role = 'system'; - message.text = message.summary; + let resolved = message; + let hasSummary = false; + if (summary) { + const summaryBlock = BaseClient.findSummaryContentBlock(message); + if (summaryBlock) { + const summaryText = BaseClient.getSummaryText(summaryBlock); + resolved = { + ...message, + role: 'system', + content: [{ type: ContentTypes.TEXT, text: summaryText }], + tokenCount: summaryBlock.tokenCount, + }; + hasSummary = true; + } else if (message.summary) { + resolved = { + ...message, + role: 'system', + content: [{ type: ContentTypes.TEXT, text: message.summary }], + tokenCount: message.summaryTokenCount ?? message.tokenCount, + }; + hasSummary = true; + } } - if (summary && message.summaryTokenCount) { - message.tokenCount = message.summaryTokenCount; - } - - const shouldMap = mapMethod != null && (mapCondition != null ? mapCondition(message) : true); - const processedMessage = shouldMap ? mapMethod(message) : message; + const shouldMap = mapMethod != null && (mapCondition != null ? mapCondition(resolved) : true); + const processedMessage = shouldMap ? mapMethod(resolved) : resolved; orderedMessages.push(processedMessage); - if (summary && message.summary) { + if (hasSummary) { break; } diff --git a/api/app/clients/prompts/truncate.js b/api/app/clients/prompts/truncate.js index 564b39efeb..e744b40daa 100644 --- a/api/app/clients/prompts/truncate.js +++ b/api/app/clients/prompts/truncate.js @@ -37,79 +37,4 @@ function smartTruncateText(text, maxLength = MAX_CHAR) { return text; } -/** - * @param {TMessage[]} _messages - * @param {number} maxContextTokens - * @param {function({role: string, content: TMessageContent[]}): number} getTokenCountForMessage - * - * @returns {{ - * dbMessages: TMessage[], - * editedIndices: number[] - * }} - */ -function truncateToolCallOutputs(_messages, maxContextTokens, getTokenCountForMessage) { - const THRESHOLD_PERCENTAGE = 0.5; - const targetTokenLimit = maxContextTokens * THRESHOLD_PERCENTAGE; - - let currentTokenCount = 3; - const messages = [..._messages]; - const processedMessages = []; - let currentIndex = messages.length; - const editedIndices = new Set(); - while (messages.length > 0) { - currentIndex--; - const message = messages.pop(); - currentTokenCount += message.tokenCount; - if (currentTokenCount < targetTokenLimit) { - processedMessages.push(message); - continue; - } - - if (!message.content || !Array.isArray(message.content)) { - processedMessages.push(message); - continue; - } - - const toolCallIndices = message.content - .map((item, index) => (item.type === 'tool_call' ? index : -1)) - .filter((index) => index !== -1) - .reverse(); - - if (toolCallIndices.length === 0) { - processedMessages.push(message); - continue; - } - - const newContent = [...message.content]; - - // Truncate all tool outputs since we're over threshold - for (const index of toolCallIndices) { - const toolCall = newContent[index].tool_call; - if (!toolCall || !toolCall.output) { - continue; - } - - editedIndices.add(currentIndex); - - newContent[index] = { - ...newContent[index], - tool_call: { - ...toolCall, - output: '[OUTPUT_OMITTED_FOR_BREVITY]', - }, - }; - } - - const truncatedMessage = { - ...message, - content: newContent, - tokenCount: getTokenCountForMessage({ role: 'assistant', content: newContent }), - }; - - processedMessages.push(truncatedMessage); - } - - return { dbMessages: processedMessages.reverse(), editedIndices: Array.from(editedIndices) }; -} - -module.exports = { truncateText, smartTruncateText, truncateToolCallOutputs }; +module.exports = { truncateText, smartTruncateText }; diff --git a/api/app/clients/specs/BaseClient.test.js b/api/app/clients/specs/BaseClient.test.js index f13c9979ac..edbbcaa87b 100644 --- a/api/app/clients/specs/BaseClient.test.js +++ b/api/app/clients/specs/BaseClient.test.js @@ -355,7 +355,8 @@ describe('BaseClient', () => { id: '3', parentMessageId: '2', role: 'system', - text: 'Summary for Message 3', + text: 'Message 3', + content: [{ type: 'text', text: 'Summary for Message 3' }], summary: 'Summary for Message 3', }, { id: '4', parentMessageId: '3', text: 'Message 4' }, @@ -380,7 +381,8 @@ describe('BaseClient', () => { id: '4', parentMessageId: '3', role: 'system', - text: 'Summary for Message 4', + text: 'Message 4', + content: [{ type: 'text', text: 'Summary for Message 4' }], summary: 'Summary for Message 4', }, { id: '5', parentMessageId: '4', text: 'Message 5' }, @@ -405,12 +407,123 @@ describe('BaseClient', () => { id: '4', parentMessageId: '3', role: 'system', - text: 'Summary for Message 4', + text: 'Message 4', + content: [{ type: 'text', text: 'Summary for Message 4' }], summary: 'Summary for Message 4', }, { id: '5', parentMessageId: '4', text: 'Message 5' }, ]); }); + + it('should detect summary content block and use it over legacy fields (summary mode)', () => { + const messagesWithContentBlock = [ + { id: '3', parentMessageId: '2', text: 'Message 3' }, + { + id: '2', + parentMessageId: '1', + text: 'Message 2', + content: [ + { type: 'text', text: 'Original text' }, + { type: 'summary', text: 'Content block summary', tokenCount: 42 }, + ], + }, + { id: '1', parentMessageId: null, text: 'Message 1' }, + ]; + const result = TestClient.constructor.getMessagesForConversation({ + messages: messagesWithContentBlock, + parentMessageId: '3', + summary: true, + }); + expect(result).toHaveLength(2); + expect(result[0].role).toBe('system'); + expect(result[0].content).toEqual([{ type: 'text', text: 'Content block summary' }]); + expect(result[0].tokenCount).toBe(42); + }); + + it('should prefer content block summary over legacy summary field', () => { + const messagesWithBoth = [ + { id: '2', parentMessageId: '1', text: 'Message 2' }, + { + id: '1', + parentMessageId: null, + text: 'Message 1', + summary: 'Legacy summary', + summaryTokenCount: 10, + content: [{ type: 'summary', text: 'Content block summary', tokenCount: 20 }], + }, + ]; + const result = TestClient.constructor.getMessagesForConversation({ + messages: messagesWithBoth, + parentMessageId: '2', + summary: true, + }); + expect(result).toHaveLength(2); + expect(result[0].content).toEqual([{ type: 'text', text: 'Content block summary' }]); + expect(result[0].tokenCount).toBe(20); + }); + + it('should fallback to legacy summary when no content block exists', () => { + const messagesWithLegacy = [ + { id: '2', parentMessageId: '1', text: 'Message 2' }, + { + id: '1', + parentMessageId: null, + text: 'Message 1', + summary: 'Legacy summary only', + summaryTokenCount: 15, + }, + ]; + const result = TestClient.constructor.getMessagesForConversation({ + messages: messagesWithLegacy, + parentMessageId: '2', + summary: true, + }); + expect(result).toHaveLength(2); + expect(result[0].content).toEqual([{ type: 'text', text: 'Legacy summary only' }]); + expect(result[0].tokenCount).toBe(15); + }); + }); + + describe('findSummaryContentBlock', () => { + it('should find a summary block in the content array', () => { + const message = { + content: [ + { type: 'text', text: 'some text' }, + { type: 'summary', text: 'Summary of conversation', tokenCount: 50 }, + ], + }; + const result = TestClient.constructor.findSummaryContentBlock(message); + expect(result).toBeTruthy(); + expect(result.text).toBe('Summary of conversation'); + expect(result.tokenCount).toBe(50); + }); + + it('should return null when no summary block exists', () => { + const message = { + content: [ + { type: 'text', text: 'some text' }, + { type: 'tool_call', tool_call: {} }, + ], + }; + expect(TestClient.constructor.findSummaryContentBlock(message)).toBeNull(); + }); + + it('should return null for string content', () => { + const message = { content: 'just a string' }; + expect(TestClient.constructor.findSummaryContentBlock(message)).toBeNull(); + }); + + it('should return null for missing content', () => { + expect(TestClient.constructor.findSummaryContentBlock({})).toBeNull(); + expect(TestClient.constructor.findSummaryContentBlock(null)).toBeNull(); + }); + + it('should skip summary blocks with no text', () => { + const message = { + content: [{ type: 'summary', tokenCount: 10 }], + }; + expect(TestClient.constructor.findSummaryContentBlock(message)).toBeNull(); + }); }); describe('sendMessage', () => { diff --git a/api/package.json b/api/package.json index aea98b3f8d..8b2f156cd3 100644 --- a/api/package.json +++ b/api/package.json @@ -44,7 +44,7 @@ "@google/genai": "^1.19.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.80", - "@librechat/agents": "^3.1.57", + "@librechat/agents": "^3.1.62", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", diff --git a/api/server/controllers/agents/__tests__/openai.spec.js b/api/server/controllers/agents/__tests__/openai.spec.js index deeb2ec51d..c2f13f7837 100644 --- a/api/server/controllers/agents/__tests__/openai.spec.js +++ b/api/server/controllers/agents/__tests__/openai.spec.js @@ -82,6 +82,9 @@ const mockGetCacheMultiplier = jest.fn().mockReturnValue(null); jest.mock('~/server/controllers/agents/callbacks', () => ({ createToolEndCallback: jest.fn().mockReturnValue(jest.fn()), + buildSummarizationHandlers: jest.fn().mockReturnValue({}), + markSummarizationUsage: jest.fn().mockImplementation((usage) => usage), + agentLogHandlerObj: { handle: jest.fn() }, })); jest.mock('~/server/services/PermissionService', () => ({ @@ -108,6 +111,7 @@ jest.mock('~/models', () => ({ getMultiplier: mockGetMultiplier, getCacheMultiplier: mockGetCacheMultiplier, getConvoFiles: jest.fn().mockResolvedValue([]), + getConvo: jest.fn().mockResolvedValue(null), })); describe('OpenAIChatCompletionController', () => { @@ -147,7 +151,7 @@ describe('OpenAIChatCompletionController', () => { describe('conversation ownership validation', () => { it('should skip ownership check when conversation_id is not provided', async () => { - const { getConvo } = require('~/models/Conversation'); + const { getConvo } = require('~/models'); await OpenAIChatCompletionController(req, res); expect(getConvo).not.toHaveBeenCalled(); }); @@ -164,7 +168,7 @@ describe('OpenAIChatCompletionController', () => { it('should return 404 when conversation is not owned by user', async () => { const { validateRequest } = require('@librechat/api'); - const { getConvo } = require('~/models/Conversation'); + const { getConvo } = require('~/models'); validateRequest.mockReturnValueOnce({ request: { model: 'agent-123', @@ -182,7 +186,7 @@ describe('OpenAIChatCompletionController', () => { it('should proceed when conversation is owned by user', async () => { const { validateRequest } = require('@librechat/api'); - const { getConvo } = require('~/models/Conversation'); + const { getConvo } = require('~/models'); validateRequest.mockReturnValueOnce({ request: { model: 'agent-123', @@ -200,7 +204,7 @@ describe('OpenAIChatCompletionController', () => { it('should return 500 when getConvo throws a DB error', async () => { const { validateRequest } = require('@librechat/api'); - const { getConvo } = require('~/models/Conversation'); + const { getConvo } = require('~/models'); validateRequest.mockReturnValueOnce({ request: { model: 'agent-123', diff --git a/api/server/controllers/agents/__tests__/responses.unit.spec.js b/api/server/controllers/agents/__tests__/responses.unit.spec.js index 0a63445f24..26f5f5d30b 100644 --- a/api/server/controllers/agents/__tests__/responses.unit.spec.js +++ b/api/server/controllers/agents/__tests__/responses.unit.spec.js @@ -104,10 +104,20 @@ jest.mock('~/server/services/ToolService', () => ({ const mockGetMultiplier = jest.fn().mockReturnValue(1); const mockGetCacheMultiplier = jest.fn().mockReturnValue(null); -jest.mock('~/server/controllers/agents/callbacks', () => ({ - createToolEndCallback: jest.fn().mockReturnValue(jest.fn()), - createResponsesToolEndCallback: jest.fn().mockReturnValue(jest.fn()), -})); +jest.mock('~/server/controllers/agents/callbacks', () => { + const noop = { handle: jest.fn() }; + return { + createToolEndCallback: jest.fn().mockReturnValue(jest.fn()), + createResponsesToolEndCallback: jest.fn().mockReturnValue(jest.fn()), + markSummarizationUsage: jest.fn().mockImplementation((usage) => usage), + agentLogHandlerObj: noop, + buildSummarizationHandlers: jest.fn().mockReturnValue({ + on_summarize_start: noop, + on_summarize_delta: noop, + on_summarize_complete: noop, + }), + }; +}); jest.mock('~/server/services/PermissionService', () => ({ findAccessibleResources: jest.fn().mockResolvedValue([]), @@ -175,7 +185,7 @@ describe('createResponse controller', () => { describe('conversation ownership validation', () => { it('should skip ownership check when previous_response_id is not provided', async () => { - const { getConvo } = require('~/models/Conversation'); + const { getConvo } = require('~/models'); await createResponse(req, res); expect(getConvo).not.toHaveBeenCalled(); }); @@ -202,7 +212,7 @@ describe('createResponse controller', () => { it('should return 404 when conversation is not owned by user', async () => { const { validateResponseRequest, sendResponsesErrorResponse } = require('@librechat/api'); - const { getConvo } = require('~/models/Conversation'); + const { getConvo } = require('~/models'); validateResponseRequest.mockReturnValueOnce({ request: { model: 'agent-123', @@ -225,7 +235,7 @@ describe('createResponse controller', () => { it('should proceed when conversation is owned by user', async () => { const { validateResponseRequest, sendResponsesErrorResponse } = require('@librechat/api'); - const { getConvo } = require('~/models/Conversation'); + const { getConvo } = require('~/models'); validateResponseRequest.mockReturnValueOnce({ request: { model: 'agent-123', @@ -248,7 +258,7 @@ describe('createResponse controller', () => { it('should return 500 when getConvo throws a DB error', async () => { const { validateResponseRequest, sendResponsesErrorResponse } = require('@librechat/api'); - const { getConvo } = require('~/models/Conversation'); + const { getConvo } = require('~/models'); validateResponseRequest.mockReturnValueOnce({ request: { model: 'agent-123', @@ -370,28 +380,7 @@ describe('createResponse controller', () => { it('should collect usage from on_chat_model_end events', async () => { const api = require('@librechat/api'); - let capturedOnChatModelEnd; - api.createAggregatorEventHandlers.mockImplementation(() => { - return { - on_message_delta: { handle: jest.fn() }, - on_reasoning_delta: { handle: jest.fn() }, - on_run_step: { handle: jest.fn() }, - on_run_step_delta: { handle: jest.fn() }, - on_chat_model_end: { - handle: jest.fn((event, data) => { - if (capturedOnChatModelEnd) { - capturedOnChatModelEnd(event, data); - } - }), - }, - }; - }); - api.createRun.mockImplementation(async ({ customHandlers }) => { - capturedOnChatModelEnd = (event, data) => { - customHandlers.on_chat_model_end.handle(event, data); - }; - return { processStream: jest.fn().mockImplementation(async () => { customHandlers.on_chat_model_end.handle('on_chat_model_end', { @@ -408,7 +397,6 @@ describe('createResponse controller', () => { }); await createResponse(req, res); - expect(mockRecordCollectedUsage).toHaveBeenCalledWith( expect.any(Object), expect.objectContaining({ diff --git a/api/server/controllers/agents/callbacks.js b/api/server/controllers/agents/callbacks.js index 0bb935795d..40fdf74212 100644 --- a/api/server/controllers/agents/callbacks.js +++ b/api/server/controllers/agents/callbacks.js @@ -1,7 +1,13 @@ const { nanoid } = require('nanoid'); const { logger } = require('@librechat/data-schemas'); -const { Constants, EnvVar, GraphEvents, ToolEndHandler } = require('@librechat/agents'); const { Tools, StepTypes, FileContext, ErrorTypes } = require('librechat-data-provider'); +const { + EnvVar, + Constants, + GraphEvents, + GraphNodeKeys, + ToolEndHandler, +} = require('@librechat/agents'); const { sendEvent, GenerationJobManager, @@ -71,7 +77,9 @@ class ModelEndHandler { usage.model = modelName; } - this.collectedUsage.push(usage); + const taggedUsage = markSummarizationUsage(usage, metadata); + + this.collectedUsage.push(taggedUsage); } catch (error) { logger.error('Error handling model end event:', error); return this.finalize(errorMessage); @@ -133,6 +141,7 @@ function getDefaultHandlers({ collectedUsage, streamId = null, toolExecuteOptions = null, + summarizationOptions = null, }) { if (!res || !aggregateContent) { throw new Error( @@ -245,6 +254,37 @@ function getDefaultHandlers({ handlers[GraphEvents.ON_TOOL_EXECUTE] = createToolExecuteHandler(toolExecuteOptions); } + if (summarizationOptions?.enabled !== false) { + handlers[GraphEvents.ON_SUMMARIZE_START] = { + handle: async (_event, data) => { + await emitEvent(res, streamId, { + event: GraphEvents.ON_SUMMARIZE_START, + data, + }); + }, + }; + handlers[GraphEvents.ON_SUMMARIZE_DELTA] = { + handle: async (_event, data) => { + aggregateContent({ event: GraphEvents.ON_SUMMARIZE_DELTA, data }); + await emitEvent(res, streamId, { + event: GraphEvents.ON_SUMMARIZE_DELTA, + data, + }); + }, + }; + handlers[GraphEvents.ON_SUMMARIZE_COMPLETE] = { + handle: async (_event, data) => { + aggregateContent({ event: GraphEvents.ON_SUMMARIZE_COMPLETE, data }); + await emitEvent(res, streamId, { + event: GraphEvents.ON_SUMMARIZE_COMPLETE, + data, + }); + }, + }; + } + + handlers[GraphEvents.ON_AGENT_LOG] = { handle: agentLogHandler }; + return handlers; } @@ -668,8 +708,62 @@ function createResponsesToolEndCallback({ req, res, tracker, artifactPromises }) }; } +const ALLOWED_LOG_LEVELS = new Set(['debug', 'info', 'warn', 'error']); + +function agentLogHandler(_event, data) { + if (!data) { + return; + } + const logFn = ALLOWED_LOG_LEVELS.has(data.level) ? logger[data.level] : logger.debug; + const meta = typeof data.data === 'object' && data.data != null ? data.data : {}; + logFn(`[agents:${data.scope ?? 'unknown'}] ${data.message ?? ''}`, { + ...meta, + runId: data.runId, + agentId: data.agentId, + }); +} + +function markSummarizationUsage(usage, metadata) { + const node = metadata?.langgraph_node; + if (typeof node === 'string' && node.startsWith(GraphNodeKeys.SUMMARIZE)) { + return { ...usage, usage_type: 'summarization' }; + } + return usage; +} + +const agentLogHandlerObj = { handle: agentLogHandler }; + +/** + * Builds the three summarization SSE event handlers. + * In streaming mode, each event is forwarded to the client via `res.write`. + * In non-streaming mode, the handlers are no-ops. + * @param {{ isStreaming: boolean, res: import('express').Response }} opts + */ +function buildSummarizationHandlers({ isStreaming, res }) { + if (!isStreaming) { + const noop = { handle: () => {} }; + return { on_summarize_start: noop, on_summarize_delta: noop, on_summarize_complete: noop }; + } + const writeEvent = (name) => ({ + handle: async (_event, data) => { + if (!res.writableEnded) { + res.write(`event: ${name}\ndata: ${JSON.stringify(data)}\n\n`); + } + }, + }); + return { + on_summarize_start: writeEvent('on_summarize_start'), + on_summarize_delta: writeEvent('on_summarize_delta'), + on_summarize_complete: writeEvent('on_summarize_complete'), + }; +} + module.exports = { + agentLogHandler, + agentLogHandlerObj, getDefaultHandlers, createToolEndCallback, + markSummarizationUsage, + buildSummarizationHandlers, createResponsesToolEndCallback, }; diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index bf75838a87..47a10165e3 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -3,11 +3,11 @@ const { logger } = require('@librechat/data-schemas'); const { getBufferString, HumanMessage } = require('@langchain/core/messages'); const { createRun, - Tokenizer, + isEnabled, checkAccess, buildToolSet, - sanitizeTitle, logToolError, + sanitizeTitle, payloadParser, resolveHeaders, createSafeUser, @@ -25,6 +25,8 @@ const { loadAgent: loadAgentFn, createMultiAgentMapper, filterMalformedContentParts, + countFormattedMessageTokens, + hydrateMissingIndexTokenCounts, } = require('@librechat/api'); const { Callback, @@ -62,9 +64,6 @@ class AgentClient extends BaseClient { * @type {string} */ this.clientName = EModelEndpoint.agents; - /** @type {'discard' | 'summarize'} */ - this.contextStrategy = 'discard'; - /** @deprecated @type {true} - Is a Chat Completion Request */ this.isChatCompletion = true; @@ -216,7 +215,6 @@ class AgentClient extends BaseClient { })) : []), ]; - if (this.options.attachments) { const attachments = await this.options.attachments; const latestMessage = orderedMessages[orderedMessages.length - 1]; @@ -243,6 +241,11 @@ class AgentClient extends BaseClient { ); } + /** @type {Record} */ + const canonicalTokenCountMap = {}; + /** @type {Record} */ + const tokenCountMap = {}; + let promptTokenTotal = 0; const formattedMessages = orderedMessages.map((message, i) => { const formattedMessage = formatMessage({ message, @@ -262,12 +265,14 @@ class AgentClient extends BaseClient { } } - const needsTokenCount = - (this.contextStrategy && !orderedMessages[i].tokenCount) || message.fileContext; + const dbTokenCount = orderedMessages[i].tokenCount; + const needsTokenCount = !dbTokenCount || message.fileContext; - /* If tokens were never counted, or, is a Vision request and the message has files, count again */ if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) { - orderedMessages[i].tokenCount = this.getTokenCountForMessage(formattedMessage); + orderedMessages[i].tokenCount = countFormattedMessageTokens( + formattedMessage, + this.getEncoding(), + ); } /* If message has files, calculate image token cost */ @@ -281,17 +286,37 @@ class AgentClient extends BaseClient { if (file.metadata?.fileIdentifier) { continue; } - // orderedMessages[i].tokenCount += this.calculateImageTokenCost({ - // width: file.width, - // height: file.height, - // detail: this.options.imageDetail ?? ImageDetail.auto, - // }); } } + const tokenCount = Number(orderedMessages[i].tokenCount); + const normalizedTokenCount = Number.isFinite(tokenCount) && tokenCount > 0 ? tokenCount : 0; + canonicalTokenCountMap[i] = normalizedTokenCount; + promptTokenTotal += normalizedTokenCount; + + if (message.messageId) { + tokenCountMap[message.messageId] = normalizedTokenCount; + } + + if (isEnabled(process.env.AGENT_DEBUG_LOGGING)) { + const role = message.isCreatedByUser ? 'user' : 'assistant'; + const hasSummary = + Array.isArray(message.content) && message.content.some((p) => p && p.type === 'summary'); + const suffix = hasSummary ? '[S]' : ''; + const id = (message.messageId ?? message.id ?? '').slice(-8); + const recalced = needsTokenCount ? orderedMessages[i].tokenCount : null; + logger.debug( + `[AgentClient] msg[${i}] ${role}${suffix} id=…${id} db=${dbTokenCount} needsRecount=${needsTokenCount} recalced=${recalced} tokens=${normalizedTokenCount}`, + ); + } + return formattedMessage; }); + payload = formattedMessages; + messages = orderedMessages; + promptTokens = promptTokenTotal; + /** * Build shared run context - applies to ALL agents in the run. * This includes: file context (latest message), augmented prompt (RAG), memory context. @@ -321,23 +346,20 @@ class AgentClient extends BaseClient { const sharedRunContext = sharedRunContextParts.join('\n\n'); - /** @type {Record | undefined} */ - let tokenCountMap; + /** Preserve canonical pre-format token counts for all history entering graph formatting */ + this.indexTokenCountMap = canonicalTokenCountMap; - if (this.contextStrategy) { - ({ payload, promptTokens, tokenCountMap, messages } = await this.handleContextStrategy({ - orderedMessages, - formattedMessages, - })); - } - - for (let i = 0; i < messages.length; i++) { - this.indexTokenCountMap[i] = messages[i].tokenCount; + /** Extract contextMeta from the parent response (second-to-last in ordered chain; + * last is the current user message). Seeds the pruner's calibration EMA for this run. */ + const parentResponse = + orderedMessages.length >= 2 ? orderedMessages[orderedMessages.length - 2] : undefined; + if (parentResponse?.contextMeta && !parentResponse.isCreatedByUser) { + this.contextMeta = parentResponse.contextMeta; } const result = { - tokenCountMap, prompt: payload, + tokenCountMap, promptTokens, messages, }; @@ -665,39 +687,7 @@ class AgentClient extends BaseClient { * @returns {number} */ getTokenCountForResponse({ content }) { - return this.getTokenCountForMessage({ - role: 'assistant', - content, - }); - } - - /** - * Calculates the correct token count for the current user message based on the token count map and API usage. - * Edge case: If the calculation results in a negative value, it returns the original estimate. - * If revisiting a conversation with a chat history entirely composed of token estimates, - * the cumulative token count going forward should become more accurate as the conversation progresses. - * @param {Object} params - The parameters for the calculation. - * @param {Record} params.tokenCountMap - A map of message IDs to their token counts. - * @param {string} params.currentMessageId - The ID of the current message to calculate. - * @param {OpenAIUsageMetadata} params.usage - The usage object returned by the API. - * @returns {number} The correct token count for the current user message. - */ - calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) { - const originalEstimate = tokenCountMap[currentMessageId] || 0; - - if (!usage || typeof usage[this.inputTokensKey] !== 'number') { - return originalEstimate; - } - - tokenCountMap[currentMessageId] = 0; - const totalTokensFromMap = Object.values(tokenCountMap).reduce((sum, count) => { - const numCount = Number(count); - return sum + (isNaN(numCount) ? 0 : numCount); - }, 0); - const totalInputTokens = usage[this.inputTokensKey] ?? 0; - - const currentMessageTokens = totalInputTokens - totalTokensFromMap; - return currentMessageTokens > 0 ? currentMessageTokens : originalEstimate; + return countFormattedMessageTokens({ role: 'assistant', content }, this.getEncoding()); } /** @@ -745,11 +735,34 @@ class AgentClient extends BaseClient { }; const toolSet = buildToolSet(this.options.agent); - let { messages: initialMessages, indexTokenCountMap } = formatAgentMessages( - payload, - this.indexTokenCountMap, - toolSet, - ); + const tokenCounter = createTokenCounter(this.getEncoding()); + let { + messages: initialMessages, + indexTokenCountMap, + summary: initialSummary, + boundaryTokenAdjustment, + } = formatAgentMessages(payload, this.indexTokenCountMap, toolSet); + if (boundaryTokenAdjustment) { + logger.debug( + `[AgentClient] Boundary token adjustment: ${boundaryTokenAdjustment.original} → ${boundaryTokenAdjustment.adjusted} (${boundaryTokenAdjustment.remainingChars}/${boundaryTokenAdjustment.totalChars} chars)`, + ); + } + if (indexTokenCountMap && isEnabled(process.env.AGENT_DEBUG_LOGGING)) { + const entries = Object.entries(indexTokenCountMap); + const perMsg = entries.map(([idx, count]) => { + const msg = initialMessages[Number(idx)]; + const type = msg ? msg._getType() : '?'; + return `${idx}:${type}=${count}`; + }); + logger.debug( + `[AgentClient] Token map after format: [${perMsg.join(', ')}] (payload=${payload.length}, formatted=${initialMessages.length})`, + ); + } + indexTokenCountMap = hydrateMissingIndexTokenCounts({ + messages: initialMessages, + indexTokenCountMap, + tokenCounter, + }); /** * @param {BaseMessage[]} messages @@ -803,16 +816,32 @@ class AgentClient extends BaseClient { memoryPromise = this.runMemory(messages); + /** Seed calibration state from previous run if encoding matches */ + const currentEncoding = this.getEncoding(); + const prevMeta = this.contextMeta; + const encodingMatch = prevMeta?.encoding === currentEncoding; + const calibrationRatio = + encodingMatch && prevMeta?.calibrationRatio > 0 ? prevMeta.calibrationRatio : undefined; + + if (prevMeta) { + logger.debug( + `[AgentClient] contextMeta from parent: ratio=${prevMeta.calibrationRatio}, encoding=${prevMeta.encoding}, current=${currentEncoding}, seeded=${calibrationRatio ?? 'none'}`, + ); + } + run = await createRun({ agents, messages, indexTokenCountMap, + initialSummary, + calibrationRatio, runId: this.responseMessageId, signal: abortController.signal, customHandlers: this.options.eventHandlers, requestBody: config.configurable.requestBody, user: createSafeUser(this.options.req?.user), - tokenCounter: createTokenCounter(this.getEncoding()), + summarizationConfig: appConfig?.summarization, + tokenCounter, }); if (!run) { @@ -843,6 +872,7 @@ class AgentClient extends BaseClient { const hideSequentialOutputs = config.configurable.hide_sequential_outputs; await runAgents(initialMessages); + /** @deprecated Agent Chain */ if (hideSequentialOutputs) { this.contentParts = this.contentParts.filter((part, index) => { @@ -873,6 +903,18 @@ class AgentClient extends BaseClient { }); } } finally { + /** Capture calibration state from the run for persistence on the response message. + * Runs in finally so values are captured even on abort. */ + const ratio = this.run?.getCalibrationRatio() ?? 0; + if (ratio > 0 && ratio !== 1) { + this.contextMeta = { + calibrationRatio: Math.round(ratio * 1000) / 1000, + encoding: this.getEncoding(), + }; + } else { + this.contextMeta = undefined; + } + try { const attachments = await this.awaitMemoryWithTimeout(memoryPromise); if (attachments && attachments.length > 0) { @@ -1058,6 +1100,7 @@ class AgentClient extends BaseClient { titlePrompt: endpointConfig?.titlePrompt, titlePromptTemplate: endpointConfig?.titlePromptTemplate, chainOptions: { + runName: 'TitleRun', signal: abortController.signal, callbacks: [ { @@ -1179,16 +1222,6 @@ class AgentClient extends BaseClient { } return 'o200k_base'; } - - /** - * Returns the token count of a given text. It also checks and resets the tokenizers if necessary. - * @param {string} text - The text to get the token count for. - * @returns {number} The token count of the given text. - */ - getTokenCount(text) { - const encoding = this.getEncoding(); - return Tokenizer.getTokenCount(text, encoding); - } } module.exports = AgentClient; diff --git a/api/server/controllers/agents/client.test.js b/api/server/controllers/agents/client.test.js index 4e3d10e8e6..41a806f66d 100644 --- a/api/server/controllers/agents/client.test.js +++ b/api/server/controllers/agents/client.test.js @@ -1818,7 +1818,7 @@ describe('AgentClient - titleConvo', () => { /** Traversal stops at msg-2 (has summary), so we get msg-4 -> msg-3 -> msg-2 */ expect(result).toHaveLength(3); - expect(result[0].text).toBe('Summary of conversation'); + expect(result[0].content).toEqual([{ type: 'text', text: 'Summary of conversation' }]); expect(result[0].role).toBe('system'); expect(result[0].mapped).toBe(true); expect(result[1].mapped).toBe(true); diff --git a/api/server/controllers/agents/openai.js b/api/server/controllers/agents/openai.js index ae2e462103..b649058806 100644 --- a/api/server/controllers/agents/openai.js +++ b/api/server/controllers/agents/openai.js @@ -21,8 +21,13 @@ const { createOpenAIContentAggregator, isChatCompletionValidationFailure, } = require('@librechat/api'); +const { + buildSummarizationHandlers, + markSummarizationUsage, + createToolEndCallback, + agentLogHandlerObj, +} = require('~/server/controllers/agents/callbacks'); const { loadAgentTools, loadToolsForExecution } = require('~/server/services/ToolService'); -const { createToolEndCallback } = require('~/server/controllers/agents/callbacks'); const { findAccessibleResources } = require('~/server/services/PermissionService'); const db = require('~/models'); @@ -181,7 +186,7 @@ const OpenAIChatCompletionController = async (req, res) => { 'invalid_request_error', ); } - if (!(await getConvo(req.user?.id, request.conversation_id))) { + if (!(await db.getConvo(req.user?.id, request.conversation_id))) { return sendErrorResponse(res, 404, 'Conversation not found', 'invalid_request_error'); } } @@ -282,14 +287,16 @@ const OpenAIChatCompletionController = async (req, res) => { toolEndCallback, }; + const summarizationConfig = appConfig?.summarization; + const openaiMessages = convertMessages(request.messages); const toolSet = buildToolSet(primaryConfig); - const { messages: formattedMessages, indexTokenCountMap } = formatAgentMessages( - openaiMessages, - {}, - toolSet, - ); + const { + messages: formattedMessages, + indexTokenCountMap, + summary: initialSummary, + } = formatAgentMessages(openaiMessages, {}, toolSet); /** * Create a simple handler that processes data @@ -432,24 +439,30 @@ const OpenAIChatCompletionController = async (req, res) => { }), // Usage tracking - on_chat_model_end: createHandler((data) => { - const usage = data?.output?.usage_metadata; - if (usage) { - collectedUsage.push(usage); - const target = isStreaming ? tracker : aggregator; - target.usage.promptTokens += usage.input_tokens ?? 0; - target.usage.completionTokens += usage.output_tokens ?? 0; - } - }), + on_chat_model_end: { + handle: (_event, data, metadata) => { + const usage = data?.output?.usage_metadata; + if (usage) { + const taggedUsage = markSummarizationUsage(usage, metadata); + collectedUsage.push(taggedUsage); + const target = isStreaming ? tracker : aggregator; + target.usage.promptTokens += taggedUsage.input_tokens ?? 0; + target.usage.completionTokens += taggedUsage.output_tokens ?? 0; + } + }, + }, on_run_step_completed: createHandler(), // Use proper ToolEndHandler for processing artifacts (images, file citations, code output) on_tool_end: new ToolEndHandler(toolEndCallback, logger), on_chain_stream: createHandler(), on_chain_end: createHandler(), on_agent_update: createHandler(), + on_agent_log: agentLogHandlerObj, on_custom_event: createHandler(), - // Event-driven tool execution handler on_tool_execute: createToolExecuteHandler(toolExecuteOptions), + ...(summarizationConfig?.enabled !== false + ? buildSummarizationHandlers({ isStreaming, res }) + : {}), }; // Create and run the agent @@ -462,7 +475,9 @@ const OpenAIChatCompletionController = async (req, res) => { agents: [primaryConfig], messages: formattedMessages, indexTokenCountMap, + initialSummary, runId: responseId, + summarizationConfig, signal: abortController.signal, customHandlers: handlers, requestBody: { diff --git a/api/server/controllers/agents/responses.js b/api/server/controllers/agents/responses.js index 62cedb14fd..7abddf5e2f 100644 --- a/api/server/controllers/agents/responses.js +++ b/api/server/controllers/agents/responses.js @@ -32,7 +32,10 @@ const { } = require('@librechat/api'); const { createResponsesToolEndCallback, + buildSummarizationHandlers, + markSummarizationUsage, createToolEndCallback, + agentLogHandlerObj, } = require('~/server/controllers/agents/callbacks'); const { loadAgentTools, loadToolsForExecution } = require('~/server/services/ToolService'); const { findAccessibleResources } = require('~/server/services/PermissionService'); @@ -277,6 +280,7 @@ const createResponse = async (req, res) => { const request = validation.request; const agentId = request.model; const isStreaming = request.stream === true; + const summarizationConfig = req.config?.summarization; // Look up the agent const agent = await db.getAgent({ id: agentId }); @@ -319,7 +323,7 @@ const createResponse = async (req, res) => { 'invalid_request', ); } - if (!(await getConvo(req.user?.id, request.previous_response_id))) { + if (!(await db.getConvo(req.user?.id, request.previous_response_id))) { return sendResponsesErrorResponse(res, 404, 'Conversation not found', 'not_found'); } } @@ -387,11 +391,11 @@ const createResponse = async (req, res) => { const allMessages = [...previousMessages, ...inputMessages]; const toolSet = buildToolSet(primaryConfig); - const { messages: formattedMessages, indexTokenCountMap } = formatAgentMessages( - allMessages, - {}, - toolSet, - ); + const { + messages: formattedMessages, + indexTokenCountMap, + summary: initialSummary, + } = formatAgentMessages(allMessages, {}, toolSet); // Create tracker for streaming or aggregator for non-streaming const tracker = actuallyStreaming ? createResponseTracker() : null; @@ -455,11 +459,12 @@ const createResponse = async (req, res) => { on_run_step: responsesHandlers.on_run_step, on_run_step_delta: responsesHandlers.on_run_step_delta, on_chat_model_end: { - handle: (event, data) => { + handle: (event, data, metadata) => { responsesHandlers.on_chat_model_end.handle(event, data); const usage = data?.output?.usage_metadata; if (usage) { - collectedUsage.push(usage); + const taggedUsage = markSummarizationUsage(usage, metadata); + collectedUsage.push(taggedUsage); } }, }, @@ -470,6 +475,10 @@ const createResponse = async (req, res) => { on_agent_update: { handle: () => {} }, on_custom_event: { handle: () => {} }, on_tool_execute: createToolExecuteHandler(toolExecuteOptions), + on_agent_log: agentLogHandlerObj, + ...(summarizationConfig?.enabled !== false + ? buildSummarizationHandlers({ isStreaming: actuallyStreaming, res }) + : {}), }; // Create and run the agent @@ -480,7 +489,9 @@ const createResponse = async (req, res) => { agents: [primaryConfig], messages: formattedMessages, indexTokenCountMap, + initialSummary, runId: responseId, + summarizationConfig, signal: abortController.signal, customHandlers: handlers, requestBody: { @@ -612,11 +623,12 @@ const createResponse = async (req, res) => { on_run_step: aggregatorHandlers.on_run_step, on_run_step_delta: aggregatorHandlers.on_run_step_delta, on_chat_model_end: { - handle: (event, data) => { + handle: (event, data, metadata) => { aggregatorHandlers.on_chat_model_end.handle(event, data); const usage = data?.output?.usage_metadata; if (usage) { - collectedUsage.push(usage); + const taggedUsage = markSummarizationUsage(usage, metadata); + collectedUsage.push(taggedUsage); } }, }, @@ -627,6 +639,10 @@ const createResponse = async (req, res) => { on_agent_update: { handle: () => {} }, on_custom_event: { handle: () => {} }, on_tool_execute: createToolExecuteHandler(toolExecuteOptions), + on_agent_log: agentLogHandlerObj, + ...(summarizationConfig?.enabled !== false + ? buildSummarizationHandlers({ isStreaming: false, res }) + : {}), }; const userId = req.user?.id ?? 'api-user'; @@ -636,7 +652,9 @@ const createResponse = async (req, res) => { agents: [primaryConfig], messages: formattedMessages, indexTokenCountMap, + initialSummary, runId: responseId, + summarizationConfig, signal: abortController.signal, customHandlers: handlers, requestBody: { diff --git a/api/server/controllers/assistants/helpers.js b/api/server/controllers/assistants/helpers.js index 6309268770..4630bfe7ef 100644 --- a/api/server/controllers/assistants/helpers.js +++ b/api/server/controllers/assistants/helpers.js @@ -8,8 +8,8 @@ const { initializeClient: initAzureClient, } = require('~/server/services/Endpoints/azureAssistants'); const { initializeClient } = require('~/server/services/Endpoints/assistants'); +const { hasCapability } = require('~/server/middleware/roles/capabilities'); const { getEndpointsConfig } = require('~/server/services/Config'); -const { hasCapability } = require('~/server/middleware'); /** * @param {ServerRequest} req diff --git a/api/server/middleware/assistants/validateAuthor.js b/api/server/middleware/assistants/validateAuthor.js index 3be1642a71..024d6abbe3 100644 --- a/api/server/middleware/assistants/validateAuthor.js +++ b/api/server/middleware/assistants/validateAuthor.js @@ -1,5 +1,5 @@ const { logger, SystemCapabilities } = require('@librechat/data-schemas'); -const { hasCapability } = require('~/server/middleware'); +const { hasCapability } = require('~/server/middleware/roles/capabilities'); const { getAssistant } = require('~/models'); /** diff --git a/api/server/middleware/roles/index.js b/api/server/middleware/roles/index.js index e6c315d007..f97d4b72b4 100644 --- a/api/server/middleware/roles/index.js +++ b/api/server/middleware/roles/index.js @@ -1,15 +1,17 @@ -const { - hasCapability, - requireCapability, - hasConfigCapability, - capabilityContextMiddleware, -} = require('./capabilities'); +/** + * NOTE: hasCapability, requireCapability, hasConfigCapability, and + * capabilityContextMiddleware are intentionally NOT re-exported here. + * + * capabilities.js depends on ~/models, and the middleware barrel + * (middleware/index.js) is frequently required by modules that are + * themselves loaded while the barrel is still initialising — creating + * a circular-require that silently returns an empty exports object. + * + * Always import capability helpers directly: + * require('~/server/middleware/roles/capabilities') + */ const checkAdmin = require('./admin'); module.exports = { checkAdmin, - hasCapability, - requireCapability, - hasConfigCapability, - capabilityContextMiddleware, }; diff --git a/api/server/routes/admin/auth.js b/api/server/routes/admin/auth.js index e19adf54a9..530764852b 100644 --- a/api/server/routes/admin/auth.js +++ b/api/server/routes/admin/auth.js @@ -6,10 +6,10 @@ const { CacheKeys } = require('librechat-data-provider'); const { SystemCapabilities } = require('@librechat/data-schemas'); const { getAdminPanelUrl, exchangeAdminCode, createSetBalanceConfig } = require('@librechat/api'); const { loginController } = require('~/server/controllers/auth/LoginController'); +const { requireCapability } = require('~/server/middleware/roles/capabilities'); const { createOAuthHandler } = require('~/server/controllers/auth/oauth'); const { findBalanceByUser, upsertBalanceFields } = require('~/models'); const { getAppConfig } = require('~/server/services/Config'); -const { requireCapability } = require('~/server/middleware'); const getLogStores = require('~/cache/getLogStores'); const { getOpenIdConfig } = require('~/strategies'); const middleware = require('~/server/middleware'); diff --git a/api/server/routes/files/files.agents.test.js b/api/server/routes/files/files.agents.test.js index 5a01df022d..cb0e4ff3d2 100644 --- a/api/server/routes/files/files.agents.test.js +++ b/api/server/routes/files/files.agents.test.js @@ -2,15 +2,14 @@ const express = require('express'); const request = require('supertest'); const mongoose = require('mongoose'); const { v4: uuidv4 } = require('uuid'); -const { createMethods } = require('@librechat/data-schemas'); const { MongoMemoryServer } = require('mongodb-memory-server'); +const { createMethods, SystemCapabilities } = require('@librechat/data-schemas'); const { SystemRoles, AccessRoleIds, ResourceType, PrincipalType, } = require('librechat-data-provider'); -const { SystemCapabilities } = require('@librechat/data-schemas'); const { createAgent, createFile } = require('~/models'); // Only mock the external dependencies that we don't want to test diff --git a/api/server/routes/files/files.js b/api/server/routes/files/files.js index e1b420fb5d..eb13ecdc31 100644 --- a/api/server/routes/files/files.js +++ b/api/server/routes/files/files.js @@ -27,11 +27,11 @@ const { const { fileAccess } = require('~/server/middleware/accessResources/fileAccess'); const { getStrategyFunctions } = require('~/server/services/Files/strategies'); const { getOpenAIClient } = require('~/server/controllers/assistants/helpers'); +const { hasCapability } = require('~/server/middleware/roles/capabilities'); const { checkPermission } = require('~/server/services/PermissionService'); const { loadAuthValues } = require('~/server/services/Tools/credentials'); const { hasAccessToFilesViaAgent } = require('~/server/services/Files'); const { cleanFileName } = require('~/server/utils/files'); -const { hasCapability } = require('~/server/middleware'); const { getLogStores } = require('~/cache'); const { Readable } = require('stream'); const db = require('~/models'); diff --git a/api/server/routes/prompts.js b/api/server/routes/prompts.js index c2e15ac6c0..60165d367b 100644 --- a/api/server/routes/prompts.js +++ b/api/server/routes/prompts.js @@ -32,7 +32,6 @@ const { getPrompt, } = require('~/models'); const { - hasCapability, canAccessPromptGroupResource, canAccessPromptViaGroup, requireJwtAuth, @@ -43,6 +42,7 @@ const { findAccessibleResources, grantPermission, } = require('~/server/services/PermissionService'); +const { hasCapability } = require('~/server/middleware/roles/capabilities'); const router = express.Router(); diff --git a/api/server/routes/prompts.test.js b/api/server/routes/prompts.test.js index ec162ac1fb..a3b868f022 100644 --- a/api/server/routes/prompts.test.js +++ b/api/server/routes/prompts.test.js @@ -36,7 +36,6 @@ jest.mock('~/models', () => { jest.mock('~/server/middleware', () => ({ requireJwtAuth: (req, res, next) => next(), - hasCapability: jest.requireActual('~/server/middleware').hasCapability, canAccessPromptViaGroup: jest.requireActual('~/server/middleware').canAccessPromptViaGroup, canAccessPromptGroupResource: jest.requireActual('~/server/middleware').canAccessPromptGroupResource, diff --git a/api/server/routes/roles.js b/api/server/routes/roles.js index 1b7e4632e3..25ee47854d 100644 --- a/api/server/routes/roles.js +++ b/api/server/routes/roles.js @@ -12,8 +12,9 @@ const { peoplePickerPermissionsSchema, remoteAgentsPermissionsSchema, } = require('librechat-data-provider'); -const { hasCapability, requireCapability, requireJwtAuth } = require('~/server/middleware'); +const { hasCapability, requireCapability } = require('~/server/middleware/roles/capabilities'); const { updateRoleByName, getRoleByName } = require('~/models'); +const { requireJwtAuth } = require('~/server/middleware'); const router = express.Router(); router.use(requireJwtAuth); diff --git a/api/server/services/ActionService.spec.js b/api/server/services/ActionService.spec.js index 42def44b4f..52419975f7 100644 --- a/api/server/services/ActionService.spec.js +++ b/api/server/services/ActionService.spec.js @@ -3,12 +3,12 @@ const { domainParser, legacyDomainEncode, validateAndUpdateTool } = require('./A jest.mock('keyv'); -jest.mock('~/models/Action', () => ({ +jest.mock('~/models', () => ({ getActions: jest.fn(), deleteActions: jest.fn(), })); -const { getActions } = require('~/models/Action'); +const { getActions } = require('~/models'); let mockDomainCache = {}; jest.mock('~/cache/getLogStores', () => { diff --git a/api/server/services/Endpoints/agents/initialize.js b/api/server/services/Endpoints/agents/initialize.js index 28282e68ea..69767e191c 100644 --- a/api/server/services/Endpoints/agents/initialize.js +++ b/api/server/services/Endpoints/agents/initialize.js @@ -82,6 +82,14 @@ function createToolLoader(signal, streamId = null, definitionsOnly = false) { }; } +/** + * Initializes the AgentClient for a given request/response cycle. + * @param {Object} params + * @param {Express.Request} params.req + * @param {Express.Response} params.res + * @param {AbortSignal} params.signal + * @param {Object} params.endpointOption + */ const initializeClient = async ({ req, res, signal, endpointOption }) => { if (!endpointOption) { throw new Error('Endpoint option not provided'); @@ -136,9 +144,13 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { toolEndCallback, }; + const summarizationOptions = + appConfig?.summarization?.enabled === false ? { enabled: false } : { enabled: true }; + const eventHandlers = getDefaultHandlers({ res, toolExecuteOptions, + summarizationOptions, aggregateContent, toolEndCallback, collectedUsage, diff --git a/api/server/services/Endpoints/index.js b/api/server/services/Endpoints/index.js deleted file mode 100644 index 3cabfe1c58..0000000000 --- a/api/server/services/Endpoints/index.js +++ /dev/null @@ -1,77 +0,0 @@ -const { Providers } = require('@librechat/agents'); -const { EModelEndpoint } = require('librechat-data-provider'); -const { getCustomEndpointConfig } = require('@librechat/api'); -const initAnthropic = require('~/server/services/Endpoints/anthropic/initialize'); -const getBedrockOptions = require('~/server/services/Endpoints/bedrock/options'); -const initOpenAI = require('~/server/services/Endpoints/openAI/initialize'); -const initCustom = require('~/server/services/Endpoints/custom/initialize'); -const initGoogle = require('~/server/services/Endpoints/google/initialize'); - -/** Check if the provider is a known custom provider - * @param {string | undefined} [provider] - The provider string - * @returns {boolean} - True if the provider is a known custom provider, false otherwise - */ -function isKnownCustomProvider(provider) { - return [Providers.XAI, Providers.DEEPSEEK, Providers.OPENROUTER, Providers.MOONSHOT].includes( - provider?.toLowerCase() || '', - ); -} - -const providerConfigMap = { - [Providers.XAI]: initCustom, - [Providers.DEEPSEEK]: initCustom, - [Providers.MOONSHOT]: initCustom, - [Providers.OPENROUTER]: initCustom, - [EModelEndpoint.openAI]: initOpenAI, - [EModelEndpoint.google]: initGoogle, - [EModelEndpoint.azureOpenAI]: initOpenAI, - [EModelEndpoint.anthropic]: initAnthropic, - [EModelEndpoint.bedrock]: getBedrockOptions, -}; - -/** - * Get the provider configuration and override endpoint based on the provider string - * @param {Object} params - * @param {string} params.provider - The provider string - * @param {AppConfig} params.appConfig - The application configuration - * @returns {{ - * getOptions: (typeof providerConfigMap)[keyof typeof providerConfigMap], - * overrideProvider: string, - * customEndpointConfig?: TEndpoint - * }} - */ -function getProviderConfig({ provider, appConfig }) { - let getOptions = providerConfigMap[provider]; - let overrideProvider = provider; - /** @type {TEndpoint | undefined} */ - let customEndpointConfig; - - if (!getOptions && providerConfigMap[provider.toLowerCase()] != null) { - overrideProvider = provider.toLowerCase(); - getOptions = providerConfigMap[overrideProvider]; - } else if (!getOptions) { - customEndpointConfig = getCustomEndpointConfig({ endpoint: provider, appConfig }); - if (!customEndpointConfig) { - throw new Error(`Provider ${provider} not supported`); - } - getOptions = initCustom; - overrideProvider = Providers.OPENAI; - } - - if (isKnownCustomProvider(overrideProvider) && !customEndpointConfig) { - customEndpointConfig = getCustomEndpointConfig({ endpoint: provider, appConfig }); - if (!customEndpointConfig) { - throw new Error(`Provider ${provider} not supported`); - } - } - - return { - getOptions, - overrideProvider, - customEndpointConfig, - }; -} - -module.exports = { - getProviderConfig, -}; diff --git a/client/src/a11y/LiveAnnouncer.tsx b/client/src/a11y/LiveAnnouncer.tsx index 0eac8089bc..ac83ff2962 100644 --- a/client/src/a11y/LiveAnnouncer.tsx +++ b/client/src/a11y/LiveAnnouncer.tsx @@ -21,6 +21,9 @@ const LiveAnnouncer: React.FC = ({ children }) => { start: localize('com_a11y_start'), end: localize('com_a11y_end'), composing: localize('com_a11y_ai_composing'), + summarize_started: localize('com_a11y_summarize_started'), + summarize_completed: localize('com_a11y_summarize_completed'), + summarize_failed: localize('com_a11y_summarize_failed'), }), [localize], ); diff --git a/client/src/components/Chat/Messages/Content/Part.tsx b/client/src/components/Chat/Messages/Content/Part.tsx index 7bce7ac11d..d0c7d2af37 100644 --- a/client/src/components/Chat/Messages/Content/Part.tsx +++ b/client/src/components/Chat/Messages/Content/Part.tsx @@ -8,7 +8,15 @@ import { } from 'librechat-data-provider'; import { memo } from 'react'; import type { TMessageContentParts, TAttachment } from 'librechat-data-provider'; -import { OpenAIImageGen, EmptyText, Reasoning, ExecuteCode, AgentUpdate, Text } from './Parts'; +import { + OpenAIImageGen, + ExecuteCode, + AgentUpdate, + EmptyText, + Reasoning, + Summary, + Text, +} from './Parts'; import { ErrorMessage } from './MessageContent'; import RetrievalCall from './RetrievalCall'; import { getCachedPreview } from '~/utils'; @@ -100,6 +108,16 @@ const Part = memo(function Part({ return null; } return ; + } else if (part.type === ContentTypes.SUMMARY) { + return ( + + ); } else if (part.type === ContentTypes.TOOL_CALL) { const toolCall = part[ContentTypes.TOOL_CALL]; diff --git a/client/src/components/Chat/Messages/Content/Parts/Summary.tsx b/client/src/components/Chat/Messages/Content/Parts/Summary.tsx new file mode 100644 index 0000000000..77973f0c06 --- /dev/null +++ b/client/src/components/Chat/Messages/Content/Parts/Summary.tsx @@ -0,0 +1,327 @@ +import { memo, useMemo, useState, useCallback, useRef, useId, useEffect } from 'react'; +import { useAtomValue } from 'jotai'; +import { Clipboard, CheckMark, TooltipAnchor } from '@librechat/client'; +import { ScrollText, ChevronDown, ChevronUp } from 'lucide-react'; +import type { MouseEvent, FocusEvent } from 'react'; +import type { SummaryContentPart } from 'librechat-data-provider'; +import { fontSizeAtom } from '~/store/fontSize'; +import { useMessageContext } from '~/Providers'; +import { useLocalize } from '~/hooks'; +import { cn } from '~/utils'; + +type SummaryProps = Pick< + SummaryContentPart, + 'content' | 'model' | 'provider' | 'tokenCount' | 'summarizing' +>; + +function useCopyToClipboard(content?: string) { + const [isCopied, setIsCopied] = useState(false); + const timerRef = useRef>(); + useEffect(() => () => clearTimeout(timerRef.current), []); + const handleCopy = useCallback( + (e: MouseEvent) => { + e.stopPropagation(); + if (content) { + navigator.clipboard.writeText(content).then( + () => { + clearTimeout(timerRef.current); + setIsCopied(true); + timerRef.current = setTimeout(() => setIsCopied(false), 2000); + }, + () => { + /* clipboard permission denied — leave icon unchanged */ + }, + ); + } + }, + [content], + ); + return { isCopied, handleCopy }; +} + +const SummaryContent = memo(({ children, meta }: { children: React.ReactNode; meta?: string }) => { + const fontSize = useAtomValue(fontSizeAtom); + + return ( +
+ {meta && {meta}} +

{children}

+
+ ); +}); + +const SummaryButton = memo( + ({ + isExpanded, + onClick, + label, + content, + contentId, + showCopyButton = true, + isCopied, + onCopy, + }: { + isExpanded: boolean; + onClick: (e: MouseEvent) => void; + label: string; + content?: string; + contentId: string; + showCopyButton?: boolean; + isCopied: boolean; + onCopy: (e: MouseEvent) => void; + }) => { + const localize = useLocalize(); + const fontSize = useAtomValue(fontSizeAtom); + + return ( +
+ + {content && showCopyButton && ( + + )} +
+ ); + }, +); + +const FloatingSummaryBar = memo( + ({ + isVisible, + onClick, + content, + contentId, + isCopied, + onCopy, + }: { + isVisible: boolean; + onClick: (e: MouseEvent) => void; + content?: string; + contentId: string; + isCopied: boolean; + onCopy: (e: MouseEvent) => void; + }) => { + const localize = useLocalize(); + + const collapseTooltip = localize('com_ui_collapse_summary'); + const copyTooltip = isCopied + ? localize('com_ui_copied_to_clipboard') + : localize('com_ui_copy_summary'); + + return ( +
+ +
+ ); + }, +); + +const Summary = memo(({ content, model, provider, tokenCount, summarizing }: SummaryProps) => { + const contentId = useId(); + const localize = useLocalize(); + const [isExpanded, setIsExpanded] = useState(false); + const [isBarVisible, setIsBarVisible] = useState(false); + const containerRef = useRef(null); + const { isSubmitting, isLatestMessage } = useMessageContext(); + + const text = useMemo( + () => + (content ?? []) + .map((block) => ('text' in block && typeof block.text === 'string' ? block.text : '')) + .join(''), + [content], + ); + const { isCopied, handleCopy } = useCopyToClipboard(text); + + const handleClick = useCallback((e: MouseEvent) => { + e.preventDefault(); + setIsExpanded((prev) => !prev); + }, []); + + const handleFocus = useCallback(() => setIsBarVisible(true), []); + const handleBlur = useCallback((e: FocusEvent) => { + if (!containerRef.current?.contains(e.relatedTarget as Node)) { + setIsBarVisible(false); + } + }, []); + const handleMouseEnter = useCallback(() => setIsBarVisible(true), []); + const handleMouseLeave = useCallback(() => { + if (!containerRef.current?.contains(document.activeElement)) { + setIsBarVisible(false); + } + }, []); + + const effectiveIsSubmitting = isLatestMessage ? isSubmitting : false; + const isActivelyStreaming = !!summarizing && !!effectiveIsSubmitting; + + const meta = useMemo(() => { + const parts: string[] = []; + if (provider || model) { + parts.push([provider, model].filter(Boolean).join('/')); + } + if (tokenCount != null && tokenCount > 0) { + parts.push(`${tokenCount} ${localize('com_ui_tokens')}`); + } + return parts.length > 0 ? parts.join(' \u00b7 ') : undefined; + }, [model, provider, tokenCount, localize]); + + const label = useMemo( + () => + isActivelyStreaming + ? localize('com_ui_summarizing') + : localize('com_ui_conversation_summarized'), + [isActivelyStreaming, localize], + ); + + if (!summarizing && !text) { + return null; + } + + return ( +
+
+
+ +
+
+
+ {text} + +
+
+
+
+ ); +}); + +SummaryContent.displayName = 'SummaryContent'; +SummaryButton.displayName = 'SummaryButton'; +FloatingSummaryBar.displayName = 'FloatingSummaryBar'; +Summary.displayName = 'Summary'; + +export default Summary; diff --git a/client/src/components/Chat/Messages/Content/Parts/index.ts b/client/src/components/Chat/Messages/Content/Parts/index.ts index 8788201e65..b0a418c819 100644 --- a/client/src/components/Chat/Messages/Content/Parts/index.ts +++ b/client/src/components/Chat/Messages/Content/Parts/index.ts @@ -6,5 +6,6 @@ export { default as Reasoning } from './Reasoning'; export { default as EmptyText } from './EmptyText'; export { default as LogContent } from './LogContent'; export { default as ExecuteCode } from './ExecuteCode'; +export { default as Summary } from './Summary'; export { default as AgentUpdate } from './AgentUpdate'; export { default as EditTextPart } from './EditTextPart'; diff --git a/client/src/hooks/SSE/__tests__/useStepHandler.spec.ts b/client/src/hooks/SSE/__tests__/useStepHandler.spec.ts index cbe13f3910..220d55704d 100644 --- a/client/src/hooks/SSE/__tests__/useStepHandler.spec.ts +++ b/client/src/hooks/SSE/__tests__/useStepHandler.spec.ts @@ -1,7 +1,8 @@ import { renderHook, act } from '@testing-library/react'; -import { StepTypes, ContentTypes, ToolCallTypes } from 'librechat-data-provider'; +import { StepTypes, StepEvents, ContentTypes, ToolCallTypes } from 'librechat-data-provider'; import type { TMessageContentParts, + SummaryContentPart, EventSubmission, TEndpointOption, TConversation, @@ -155,7 +156,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -174,7 +175,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(consoleSpy).toHaveBeenCalledWith('No message id found in run step event'); @@ -194,7 +195,7 @@ describe('useStepHandler', () => { }); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -210,7 +211,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -235,7 +236,7 @@ describe('useStepHandler', () => { act(() => { result.current.stepHandler( - { event: 'on_message_delta', data: createMessageDelta(stepId, 'Hello') }, + { event: StepEvents.ON_MESSAGE_DELTA, data: createMessageDelta(stepId, 'Hello') }, submission, ); }); @@ -245,7 +246,7 @@ describe('useStepHandler', () => { const runStep = createRunStep({ id: stepId }); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -266,7 +267,7 @@ describe('useStepHandler', () => { const submission = createSubmission({ userMessage: userMsg }); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -289,7 +290,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); const lastCall = mockSetMessages.mock.calls[mockSetMessages.mock.calls.length - 1][0]; @@ -315,7 +316,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); mockSetMessages.mockClear(); @@ -330,7 +331,10 @@ describe('useStepHandler', () => { }; act(() => { - result.current.stepHandler({ event: 'on_agent_update', data: agentUpdate }, submission); + result.current.stepHandler( + { event: StepEvents.ON_AGENT_UPDATE, data: agentUpdate }, + submission, + ); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -352,7 +356,10 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_agent_update', data: agentUpdate }, submission); + result.current.stepHandler( + { event: StepEvents.ON_AGENT_UPDATE, data: agentUpdate }, + submission, + ); }); expect(consoleSpy).toHaveBeenCalledWith('No message id found in agent update event'); @@ -371,7 +378,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); mockSetMessages.mockClear(); @@ -379,7 +386,10 @@ describe('useStepHandler', () => { const messageDelta = createMessageDelta('step-1', 'Hello'); act(() => { - result.current.stepHandler({ event: 'on_message_delta', data: messageDelta }, submission); + result.current.stepHandler( + { event: StepEvents.ON_MESSAGE_DELTA, data: messageDelta }, + submission, + ); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -397,7 +407,10 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_message_delta', data: messageDelta }, submission); + result.current.stepHandler( + { event: StepEvents.ON_MESSAGE_DELTA, data: messageDelta }, + submission, + ); }); expect(mockSetMessages).not.toHaveBeenCalled(); @@ -413,19 +426,19 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); act(() => { result.current.stepHandler( - { event: 'on_message_delta', data: createMessageDelta('step-1', 'Hello ') }, + { event: StepEvents.ON_MESSAGE_DELTA, data: createMessageDelta('step-1', 'Hello ') }, submission, ); }); act(() => { result.current.stepHandler( - { event: 'on_message_delta', data: createMessageDelta('step-1', 'World') }, + { event: StepEvents.ON_MESSAGE_DELTA, data: createMessageDelta('step-1', 'World') }, submission, ); }); @@ -447,7 +460,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); mockSetMessages.mockClear(); @@ -458,7 +471,10 @@ describe('useStepHandler', () => { }; act(() => { - result.current.stepHandler({ event: 'on_message_delta', data: messageDelta }, submission); + result.current.stepHandler( + { event: StepEvents.ON_MESSAGE_DELTA, data: messageDelta }, + submission, + ); }); expect(mockSetMessages).not.toHaveBeenCalled(); @@ -476,7 +492,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); mockSetMessages.mockClear(); @@ -485,7 +501,7 @@ describe('useStepHandler', () => { act(() => { result.current.stepHandler( - { event: 'on_reasoning_delta', data: reasoningDelta }, + { event: StepEvents.ON_REASONING_DELTA, data: reasoningDelta }, submission, ); }); @@ -506,7 +522,7 @@ describe('useStepHandler', () => { act(() => { result.current.stepHandler( - { event: 'on_reasoning_delta', data: reasoningDelta }, + { event: StepEvents.ON_REASONING_DELTA, data: reasoningDelta }, submission, ); }); @@ -524,19 +540,19 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); act(() => { result.current.stepHandler( - { event: 'on_reasoning_delta', data: createReasoningDelta('step-1', 'First ') }, + { event: StepEvents.ON_REASONING_DELTA, data: createReasoningDelta('step-1', 'First ') }, submission, ); }); act(() => { result.current.stepHandler( - { event: 'on_reasoning_delta', data: createReasoningDelta('step-1', 'thought') }, + { event: StepEvents.ON_REASONING_DELTA, data: createReasoningDelta('step-1', 'thought') }, submission, ); }); @@ -560,7 +576,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); mockSetMessages.mockClear(); @@ -574,7 +590,10 @@ describe('useStepHandler', () => { }; act(() => { - result.current.stepHandler({ event: 'on_run_step_delta', data: runStepDelta }, submission); + result.current.stepHandler( + { event: StepEvents.ON_RUN_STEP_DELTA, data: runStepDelta }, + submission, + ); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -593,7 +612,10 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step_delta', data: runStepDelta }, submission); + result.current.stepHandler( + { event: StepEvents.ON_RUN_STEP_DELTA, data: runStepDelta }, + submission, + ); }); expect(mockSetMessages).not.toHaveBeenCalled(); @@ -609,7 +631,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); mockSetMessages.mockClear(); @@ -625,7 +647,10 @@ describe('useStepHandler', () => { }; act(() => { - result.current.stepHandler({ event: 'on_run_step_delta', data: runStepDelta }, submission); + result.current.stepHandler( + { event: StepEvents.ON_RUN_STEP_DELTA, data: runStepDelta }, + submission, + ); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -649,7 +674,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); mockSetMessages.mockClear(); @@ -671,8 +696,8 @@ describe('useStepHandler', () => { act(() => { result.current.stepHandler( { - event: 'on_run_step_completed', - data: completedEvent as unknown as Agents.ToolEndEvent, + event: StepEvents.ON_RUN_STEP_COMPLETED, + data: completedEvent as { result: Agents.ToolEndEvent }, }, submission, ); @@ -710,8 +735,8 @@ describe('useStepHandler', () => { act(() => { result.current.stepHandler( { - event: 'on_run_step_completed', - data: completedEvent as unknown as Agents.ToolEndEvent, + event: StepEvents.ON_RUN_STEP_COMPLETED, + data: completedEvent as { result: Agents.ToolEndEvent }, }, submission, ); @@ -735,7 +760,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); act(() => { @@ -746,7 +771,7 @@ describe('useStepHandler', () => { act(() => { result.current.stepHandler( - { event: 'on_message_delta', data: createMessageDelta('step-1', 'Test') }, + { event: StepEvents.ON_MESSAGE_DELTA, data: createMessageDelta('step-1', 'Test') }, submission, ); }); @@ -772,12 +797,12 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); act(() => { result.current.stepHandler( - { event: 'on_message_delta', data: createMessageDelta('step-1', ' more') }, + { event: StepEvents.ON_MESSAGE_DELTA, data: createMessageDelta('step-1', ' more') }, submission, ); }); @@ -824,7 +849,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(mockAnnouncePolite).toHaveBeenCalledWith({ message: 'composing', isStatus: true }); @@ -842,7 +867,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(mockAnnouncePolite).not.toHaveBeenCalled(); @@ -872,7 +897,7 @@ describe('useStepHandler', () => { }); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -891,15 +916,15 @@ describe('useStepHandler', () => { act(() => { result.current.stepHandler( - { event: 'on_message_delta', data: createMessageDelta(stepId, 'First ') }, + { event: StepEvents.ON_MESSAGE_DELTA, data: createMessageDelta(stepId, 'First ') }, submission, ); result.current.stepHandler( - { event: 'on_message_delta', data: createMessageDelta(stepId, 'Second ') }, + { event: StepEvents.ON_MESSAGE_DELTA, data: createMessageDelta(stepId, 'Second ') }, submission, ); result.current.stepHandler( - { event: 'on_message_delta', data: createMessageDelta(stepId, 'Third') }, + { event: StepEvents.ON_MESSAGE_DELTA, data: createMessageDelta(stepId, 'Third') }, submission, ); }); @@ -909,7 +934,7 @@ describe('useStepHandler', () => { const runStep = createRunStep({ id: stepId }); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -931,11 +956,14 @@ describe('useStepHandler', () => { act(() => { result.current.stepHandler( - { event: 'on_reasoning_delta', data: createReasoningDelta(stepId, 'Thinking...') }, + { + event: StepEvents.ON_REASONING_DELTA, + data: createReasoningDelta(stepId, 'Thinking...'), + }, submission, ); result.current.stepHandler( - { event: 'on_message_delta', data: createMessageDelta(stepId, 'Response') }, + { event: StepEvents.ON_MESSAGE_DELTA, data: createMessageDelta(stepId, 'Response') }, submission, ); }); @@ -945,7 +973,7 @@ describe('useStepHandler', () => { const runStep = createRunStep({ id: stepId }); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -971,7 +999,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); const textDelta: Agents.MessageDeltaEvent = { @@ -980,7 +1008,10 @@ describe('useStepHandler', () => { }; act(() => { - result.current.stepHandler({ event: 'on_message_delta', data: textDelta }, submission); + result.current.stepHandler( + { event: StepEvents.ON_MESSAGE_DELTA, data: textDelta }, + submission, + ); }); expect(consoleSpy).toHaveBeenCalledWith( @@ -994,6 +1025,395 @@ describe('useStepHandler', () => { }); }); + describe('summarization events', () => { + it('ON_SUMMARIZE_START calls announcePolite', () => { + mockLastAnnouncementTimeRef.current = Date.now(); + const responseMessage = createResponseMessage(); + mockGetMessages.mockReturnValue([responseMessage]); + + const { result } = renderHook(() => useStepHandler(createHookParams())); + const submission = createSubmission(); + + act(() => { + result.current.stepHandler( + { + event: StepEvents.ON_SUMMARIZE_START, + data: { + agentId: 'agent-1', + provider: 'test-provider', + model: 'test-model', + messagesToRefineCount: 5, + summaryVersion: 1, + }, + }, + submission, + ); + }); + + expect(mockAnnouncePolite).toHaveBeenCalledWith({ + message: 'summarize_started', + isStatus: true, + }); + }); + + it('ON_SUMMARIZE_DELTA accumulates content on known run step', async () => { + mockLastAnnouncementTimeRef.current = Date.now(); + const responseMessage = createResponseMessage(); + mockGetMessages.mockReturnValue([responseMessage]); + + const { result } = renderHook(() => useStepHandler(createHookParams())); + const submission = createSubmission(); + + const runStep = createRunStep({ + summary: { + type: ContentTypes.SUMMARY, + model: 'test-model', + provider: 'test-provider', + } as TMessageContentParts & { type: typeof ContentTypes.SUMMARY }, + }); + + act(() => { + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); + }); + + mockSetMessages.mockClear(); + + act(() => { + result.current.stepHandler( + { + event: StepEvents.ON_SUMMARIZE_DELTA, + data: { + id: 'step-1', + delta: { + summary: { + type: ContentTypes.SUMMARY, + content: [{ type: ContentTypes.TEXT, text: 'chunk1' }], + provider: 'test-provider', + model: 'test-model', + summarizing: true, + }, + }, + }, + }, + submission, + ); + }); + + await act(async () => { + await new Promise((r) => requestAnimationFrame(r)); + }); + + expect(mockSetMessages).toHaveBeenCalled(); + const lastCall = mockSetMessages.mock.calls[mockSetMessages.mock.calls.length - 1][0]; + const responseMsg = lastCall[lastCall.length - 1]; + const summaryPart = responseMsg.content?.find( + (c: TMessageContentParts) => c.type === ContentTypes.SUMMARY, + ); + expect(summaryPart).toBeDefined(); + expect(summaryPart.content).toContainEqual( + expect.objectContaining({ type: ContentTypes.TEXT, text: 'chunk1' }), + ); + }); + + it('ON_SUMMARIZE_DELTA buffers when run step is not yet known', () => { + mockLastAnnouncementTimeRef.current = Date.now(); + const responseMessage = createResponseMessage(); + mockGetMessages.mockReturnValue([responseMessage]); + + const { result } = renderHook(() => useStepHandler(createHookParams())); + const submission = createSubmission(); + + act(() => { + result.current.stepHandler( + { + event: StepEvents.ON_SUMMARIZE_DELTA, + data: { + id: 'step-1', + delta: { + summary: { + type: ContentTypes.SUMMARY, + content: [{ type: ContentTypes.TEXT, text: 'buffered chunk' }], + provider: 'test-provider', + model: 'test-model', + summarizing: true, + }, + }, + }, + }, + submission, + ); + }); + + expect(mockSetMessages).not.toHaveBeenCalled(); + }); + + it('ON_SUMMARIZE_COMPLETE success replaces summarizing part with finalized summary', () => { + mockLastAnnouncementTimeRef.current = Date.now(); + const responseMessage = createResponseMessage(); + mockGetMessages.mockReturnValue([responseMessage]); + + const { result } = renderHook(() => useStepHandler(createHookParams())); + const submission = createSubmission(); + + const runStep = createRunStep({ + summary: { + type: ContentTypes.SUMMARY, + model: 'test-model', + provider: 'test-provider', + } as TMessageContentParts & { type: typeof ContentTypes.SUMMARY }, + }); + + act(() => { + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); + }); + + act(() => { + result.current.stepHandler( + { + event: StepEvents.ON_SUMMARIZE_DELTA, + data: { + id: 'step-1', + delta: { + summary: { + type: ContentTypes.SUMMARY, + content: [{ type: ContentTypes.TEXT, text: 'partial' }], + provider: 'test-provider', + model: 'test-model', + summarizing: true, + }, + }, + }, + }, + submission, + ); + }); + + mockSetMessages.mockClear(); + mockAnnouncePolite.mockClear(); + + const lastSetCall = mockGetMessages.mock.results[mockGetMessages.mock.results.length - 1]; + const latestMessages = lastSetCall?.value ?? []; + mockGetMessages.mockReturnValue( + latestMessages.length > 0 ? latestMessages : [responseMessage], + ); + + act(() => { + result.current.stepHandler( + { + event: StepEvents.ON_SUMMARIZE_COMPLETE, + data: { + id: 'step-1', + agentId: 'agent-1', + summary: { + type: ContentTypes.SUMMARY, + content: [{ type: ContentTypes.TEXT, text: 'Final summary' }], + tokenCount: 100, + summarizing: false, + }, + }, + }, + submission, + ); + }); + + expect(mockAnnouncePolite).toHaveBeenCalledWith({ + message: 'summarize_completed', + isStatus: true, + }); + expect(mockSetMessages).toHaveBeenCalled(); + const lastCall = mockSetMessages.mock.calls[mockSetMessages.mock.calls.length - 1][0]; + const responseMsg = lastCall.find((m: TMessage) => m.messageId === 'response-msg-1'); + const summaryPart = responseMsg?.content?.find( + (c: TMessageContentParts) => c.type === ContentTypes.SUMMARY, + ); + expect(summaryPart).toMatchObject({ summarizing: false }); + }); + + it('ON_SUMMARIZE_COMPLETE error removes summarizing parts', () => { + mockLastAnnouncementTimeRef.current = Date.now(); + const responseMessage = createResponseMessage(); + mockGetMessages.mockReturnValue([responseMessage]); + + const { result } = renderHook(() => useStepHandler(createHookParams())); + const submission = createSubmission(); + + const runStep = createRunStep({ + summary: { + type: ContentTypes.SUMMARY, + model: 'test-model', + provider: 'test-provider', + } as TMessageContentParts & { type: typeof ContentTypes.SUMMARY }, + }); + + act(() => { + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); + }); + + act(() => { + result.current.stepHandler( + { + event: StepEvents.ON_SUMMARIZE_DELTA, + data: { + id: 'step-1', + delta: { + summary: { + type: ContentTypes.SUMMARY, + content: [{ type: ContentTypes.TEXT, text: 'partial' }], + provider: 'test-provider', + model: 'test-model', + summarizing: true, + }, + }, + }, + }, + submission, + ); + }); + + mockSetMessages.mockClear(); + mockAnnouncePolite.mockClear(); + + const lastSetCall = mockGetMessages.mock.results[mockGetMessages.mock.results.length - 1]; + const latestMessages = lastSetCall?.value ?? []; + mockGetMessages.mockReturnValue( + latestMessages.length > 0 ? latestMessages : [responseMessage], + ); + + act(() => { + result.current.stepHandler( + { + event: StepEvents.ON_SUMMARIZE_COMPLETE, + data: { + id: 'step-1', + agentId: 'agent-1', + error: 'LLM failed', + }, + }, + submission, + ); + }); + + expect(mockAnnouncePolite).toHaveBeenCalledWith({ + message: 'summarize_failed', + isStatus: true, + }); + expect(mockSetMessages).toHaveBeenCalled(); + const lastCall = mockSetMessages.mock.calls[mockSetMessages.mock.calls.length - 1][0]; + const responseMsg = lastCall.find((m: TMessage) => m.messageId === 'response-msg-1'); + const summaryParts = + responseMsg?.content?.filter( + (c: TMessageContentParts) => c.type === ContentTypes.SUMMARY, + ) ?? []; + expect(summaryParts).toHaveLength(0); + }); + + it('ON_SUMMARIZE_COMPLETE returns early when target message not in messageMap', () => { + mockLastAnnouncementTimeRef.current = Date.now(); + const responseMessage = createResponseMessage(); + mockGetMessages.mockReturnValue([responseMessage]); + + const { result } = renderHook(() => useStepHandler(createHookParams())); + const submission = createSubmission(); + + act(() => { + result.current.stepHandler( + { + event: StepEvents.ON_SUMMARIZE_COMPLETE, + data: { + id: 'step-1', + agentId: 'agent-1', + summary: { + type: ContentTypes.SUMMARY, + content: [{ type: ContentTypes.TEXT, text: 'Final summary' }], + tokenCount: 100, + summarizing: false, + }, + }, + }, + submission, + ); + }); + + expect(mockSetMessages).not.toHaveBeenCalled(); + expect(mockAnnouncePolite).not.toHaveBeenCalled(); + }); + + it('ON_SUMMARIZE_COMPLETE with undefined summary finalizes existing part with summarizing=false', () => { + mockLastAnnouncementTimeRef.current = Date.now(); + const responseMessage = createResponseMessage(); + mockGetMessages.mockReturnValue([responseMessage]); + + const { result } = renderHook(() => useStepHandler(createHookParams())); + const submission = createSubmission(); + + const runStep = createRunStep({ + summary: { + type: ContentTypes.SUMMARY, + model: 'test-model', + provider: 'test-provider', + } as TMessageContentParts & { type: typeof ContentTypes.SUMMARY }, + }); + + act(() => { + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); + }); + + act(() => { + result.current.stepHandler( + { + event: StepEvents.ON_SUMMARIZE_DELTA, + data: { + id: 'step-1', + delta: { + summary: { + type: ContentTypes.SUMMARY, + content: [{ type: ContentTypes.TEXT, text: 'partial' }], + provider: 'test-provider', + model: 'test-model', + summarizing: true, + }, + }, + }, + }, + submission, + ); + }); + + mockSetMessages.mockClear(); + mockAnnouncePolite.mockClear(); + + const lastSetCall = mockGetMessages.mock.results[mockGetMessages.mock.results.length - 1]; + const latestMessages = lastSetCall?.value ?? []; + mockGetMessages.mockReturnValue( + latestMessages.length > 0 ? latestMessages : [responseMessage], + ); + + act(() => { + result.current.stepHandler( + { + event: StepEvents.ON_SUMMARIZE_COMPLETE, + data: { + id: 'step-1', + agentId: 'agent-1', + }, + }, + submission, + ); + }); + + expect(mockAnnouncePolite).toHaveBeenCalledWith({ + message: 'summarize_completed', + isStatus: true, + }); + expect(mockSetMessages).toHaveBeenCalledTimes(1); + const updatedMessages = mockSetMessages.mock.calls[0][0] as TMessage[]; + const summaryPart = updatedMessages[0]?.content?.find( + (p: TMessageContentParts) => p?.type === ContentTypes.SUMMARY, + ) as SummaryContentPart | undefined; + expect(summaryPart?.summarizing).toBe(false); + }); + }); + describe('edge cases', () => { it('should handle empty messages array', () => { mockGetMessages.mockReturnValue([]); @@ -1004,7 +1424,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -1019,7 +1439,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -1035,7 +1455,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); const messageDelta: Agents.MessageDeltaEvent = { @@ -1049,7 +1469,10 @@ describe('useStepHandler', () => { }; act(() => { - result.current.stepHandler({ event: 'on_message_delta', data: messageDelta }, submission); + result.current.stepHandler( + { event: StepEvents.ON_MESSAGE_DELTA, data: messageDelta }, + submission, + ); }); expect(mockSetMessages).toHaveBeenCalled(); @@ -1065,7 +1488,7 @@ describe('useStepHandler', () => { const submission = createSubmission(); act(() => { - result.current.stepHandler({ event: 'on_run_step', data: runStep }, submission); + result.current.stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, submission); }); mockSetMessages.mockClear(); @@ -1076,7 +1499,10 @@ describe('useStepHandler', () => { }; act(() => { - result.current.stepHandler({ event: 'on_message_delta', data: messageDelta }, submission); + result.current.stepHandler( + { event: StepEvents.ON_MESSAGE_DELTA, data: messageDelta }, + submission, + ); }); expect(mockSetMessages).not.toHaveBeenCalled(); diff --git a/client/src/hooks/SSE/useResumableSSE.ts b/client/src/hooks/SSE/useResumableSSE.ts index ddfee30120..32820f8392 100644 --- a/client/src/hooks/SSE/useResumableSSE.ts +++ b/client/src/hooks/SSE/useResumableSSE.ts @@ -8,6 +8,7 @@ import { Constants, QueryKeys, ErrorTypes, + StepEvents, apiBaseUrl, createPayload, ViolationTypes, @@ -224,7 +225,7 @@ export default function useResumableSSE( if (data.resumeState?.runSteps) { for (const runStep of data.resumeState.runSteps) { - stepHandler({ event: 'on_run_step', data: runStep }, { + stepHandler({ event: StepEvents.ON_RUN_STEP, data: runStep }, { ...currentSubmission, userMessage, } as EventSubmission); diff --git a/client/src/hooks/SSE/useStepHandler.ts b/client/src/hooks/SSE/useStepHandler.ts index c3b48cb107..1f28d97433 100644 --- a/client/src/hooks/SSE/useStepHandler.ts +++ b/client/src/hooks/SSE/useStepHandler.ts @@ -2,6 +2,7 @@ import { useCallback, useRef } from 'react'; import { Constants, StepTypes, + StepEvents, ContentTypes, ToolCallTypes, getNonEmptyValue, @@ -12,6 +13,7 @@ import type { PartMetadata, ContentMetadata, EventSubmission, + SummaryContentPart, TMessageContentParts, } from 'librechat-data-provider'; import type { SetterOrUpdater } from 'recoil'; @@ -27,20 +29,16 @@ type TUseStepHandler = { lastAnnouncementTimeRef: React.MutableRefObject; }; -type TStepEvent = { - event: string; - data: - | Agents.MessageDeltaEvent - | Agents.ReasoningDeltaEvent - | Agents.RunStepDeltaEvent - | Agents.AgentUpdate - | Agents.RunStep - | Agents.ToolEndEvent - | { - runId?: string; - message: string; - }; -}; +type TStepEvent = + | { event: StepEvents.ON_RUN_STEP; data: Agents.RunStep } + | { event: StepEvents.ON_AGENT_UPDATE; data: Agents.AgentUpdate } + | { event: StepEvents.ON_MESSAGE_DELTA; data: Agents.MessageDeltaEvent } + | { event: StepEvents.ON_REASONING_DELTA; data: Agents.ReasoningDeltaEvent } + | { event: StepEvents.ON_RUN_STEP_DELTA; data: Agents.RunStepDeltaEvent } + | { event: StepEvents.ON_RUN_STEP_COMPLETED; data: { result: Agents.ToolEndEvent } } + | { event: StepEvents.ON_SUMMARIZE_START; data: Agents.SummarizeStartEvent } + | { event: StepEvents.ON_SUMMARIZE_DELTA; data: Agents.SummarizeDeltaEvent } + | { event: StepEvents.ON_SUMMARIZE_COMPLETE; data: Agents.SummarizeCompleteEvent }; type MessageDeltaUpdate = { type: ContentTypes.TEXT; text: string; tool_call_ids?: string[] }; @@ -52,6 +50,7 @@ type AllContentTypes = | ContentTypes.TOOL_CALL | ContentTypes.IMAGE_FILE | ContentTypes.IMAGE_URL + | ContentTypes.SUMMARY | ContentTypes.ERROR; export default function useStepHandler({ @@ -65,6 +64,8 @@ export default function useStepHandler({ const stepMap = useRef(new Map()); /** Buffer for deltas that arrive before their corresponding run step */ const pendingDeltaBuffer = useRef(new Map()); + /** Coalesces rapid-fire summarize delta renders into a single rAF frame */ + const summarizeDeltaRaf = useRef(null); /** * Calculate content index for a run step. @@ -138,7 +139,7 @@ export default function useStepHandler({ text: (currentContent.text || '') + contentPart.text, }; - if (contentPart.tool_call_ids != null) { + if ('tool_call_ids' in contentPart && contentPart.tool_call_ids != null) { update.tool_call_ids = contentPart.tool_call_ids; } updatedContent[index] = update; @@ -173,6 +174,13 @@ export default function useStepHandler({ updatedContent[index] = { ...currentContent, }; + } else if (contentType === ContentTypes.SUMMARY) { + const currentSummary = updatedContent[index] as SummaryContentPart | undefined; + const incoming = contentPart as SummaryContentPart; + updatedContent[index] = { + ...incoming, + content: [...(currentSummary?.content ?? []), ...(incoming.content ?? [])], + }; } else if (contentType === ContentTypes.TOOL_CALL && 'tool_call' in contentPart) { const existingContent = updatedContent[index] as Agents.ToolCallContent | undefined; const existingToolCall = existingContent?.tool_call; @@ -243,7 +251,7 @@ export default function useStepHandler({ }; const stepHandler = useCallback( - ({ event, data }: TStepEvent, submission: EventSubmission) => { + (stepEvent: TStepEvent, submission: EventSubmission) => { const messages = getMessages() || []; const { userMessage } = submission; let parentMessageId = userMessage.messageId; @@ -260,8 +268,8 @@ export default function useStepHandler({ initialContent = submission?.initialResponse?.content ?? initialContent; } - if (event === 'on_run_step') { - const runStep = data as Agents.RunStep; + if (stepEvent.event === StepEvents.ON_RUN_STEP) { + const runStep = stepEvent.data; let responseMessageId = runStep.runId ?? ''; if (responseMessageId === Constants.USE_PRELIM_RESPONSE_MESSAGE_ID) { responseMessageId = submission?.initialResponse?.messageId ?? ''; @@ -355,15 +363,38 @@ export default function useStepHandler({ setMessages(updatedMessages); } + if (runStep.summary != null) { + const summaryPart: SummaryContentPart = { + type: ContentTypes.SUMMARY, + content: [], + summarizing: true, + model: runStep.summary.model, + provider: runStep.summary.provider, + }; + + let updatedResponse = { ...(messageMap.current.get(responseMessageId) ?? response) }; + updatedResponse = updateContent( + updatedResponse, + contentIndex, + summaryPart, + false, + getStepMetadata(runStep), + ); + + messageMap.current.set(responseMessageId, updatedResponse); + const currentMessages = getMessages() || []; + setMessages([...currentMessages.slice(0, -1), updatedResponse]); + } + const bufferedDeltas = pendingDeltaBuffer.current.get(runStep.id); if (bufferedDeltas && bufferedDeltas.length > 0) { pendingDeltaBuffer.current.delete(runStep.id); for (const bufferedDelta of bufferedDeltas) { - stepHandler({ event: bufferedDelta.event, data: bufferedDelta.data }, submission); + stepHandler(bufferedDelta, submission); } } - } else if (event === 'on_agent_update') { - const { agent_update } = data as Agents.AgentUpdate; + } else if (stepEvent.event === StepEvents.ON_AGENT_UPDATE) { + const { agent_update } = stepEvent.data; let responseMessageId = agent_update.runId || ''; if (responseMessageId === Constants.USE_PRELIM_RESPONSE_MESSAGE_ID) { responseMessageId = submission?.initialResponse?.messageId ?? ''; @@ -385,7 +416,7 @@ export default function useStepHandler({ const updatedResponse = updateContent( response, currentIndex, - data, + stepEvent.data, false, agentUpdateMeta, ); @@ -393,8 +424,8 @@ export default function useStepHandler({ const currentMessages = getMessages() || []; setMessages([...currentMessages.slice(0, -1), updatedResponse]); } - } else if (event === 'on_message_delta') { - const messageDelta = data as Agents.MessageDeltaEvent; + } else if (stepEvent.event === StepEvents.ON_MESSAGE_DELTA) { + const messageDelta = stepEvent.data; const runStep = stepMap.current.get(messageDelta.id); let responseMessageId = runStep?.runId ?? ''; if (responseMessageId === Constants.USE_PRELIM_RESPONSE_MESSAGE_ID) { @@ -404,7 +435,7 @@ export default function useStepHandler({ if (!runStep || !responseMessageId) { const buffer = pendingDeltaBuffer.current.get(messageDelta.id) ?? []; - buffer.push({ event: 'on_message_delta', data: messageDelta }); + buffer.push({ event: StepEvents.ON_MESSAGE_DELTA, data: messageDelta }); pendingDeltaBuffer.current.set(messageDelta.id, buffer); return; } @@ -436,8 +467,8 @@ export default function useStepHandler({ const currentMessages = getMessages() || []; setMessages([...currentMessages.slice(0, -1), updatedResponse]); } - } else if (event === 'on_reasoning_delta') { - const reasoningDelta = data as Agents.ReasoningDeltaEvent; + } else if (stepEvent.event === StepEvents.ON_REASONING_DELTA) { + const reasoningDelta = stepEvent.data; const runStep = stepMap.current.get(reasoningDelta.id); let responseMessageId = runStep?.runId ?? ''; if (responseMessageId === Constants.USE_PRELIM_RESPONSE_MESSAGE_ID) { @@ -447,7 +478,7 @@ export default function useStepHandler({ if (!runStep || !responseMessageId) { const buffer = pendingDeltaBuffer.current.get(reasoningDelta.id) ?? []; - buffer.push({ event: 'on_reasoning_delta', data: reasoningDelta }); + buffer.push({ event: StepEvents.ON_REASONING_DELTA, data: reasoningDelta }); pendingDeltaBuffer.current.set(reasoningDelta.id, buffer); return; } @@ -479,8 +510,8 @@ export default function useStepHandler({ const currentMessages = getMessages() || []; setMessages([...currentMessages.slice(0, -1), updatedResponse]); } - } else if (event === 'on_run_step_delta') { - const runStepDelta = data as Agents.RunStepDeltaEvent; + } else if (stepEvent.event === StepEvents.ON_RUN_STEP_DELTA) { + const runStepDelta = stepEvent.data; const runStep = stepMap.current.get(runStepDelta.id); let responseMessageId = runStep?.runId ?? ''; if (responseMessageId === Constants.USE_PRELIM_RESPONSE_MESSAGE_ID) { @@ -490,7 +521,7 @@ export default function useStepHandler({ if (!runStep || !responseMessageId) { const buffer = pendingDeltaBuffer.current.get(runStepDelta.id) ?? []; - buffer.push({ event: 'on_run_step_delta', data: runStepDelta }); + buffer.push({ event: StepEvents.ON_RUN_STEP_DELTA, data: runStepDelta }); pendingDeltaBuffer.current.set(runStepDelta.id, buffer); return; } @@ -538,8 +569,8 @@ export default function useStepHandler({ setMessages(updatedMessages); } - } else if (event === 'on_run_step_completed') { - const { result } = data as unknown as { result: Agents.ToolEndEvent }; + } else if (stepEvent.event === StepEvents.ON_RUN_STEP_COMPLETED) { + const { result } = stepEvent.data; const { id: stepId } = result; @@ -581,18 +612,116 @@ export default function useStepHandler({ setMessages(updatedMessages); } - } + } else if (stepEvent.event === StepEvents.ON_SUMMARIZE_START) { + announcePolite({ message: 'summarize_started', isStatus: true }); + } else if (stepEvent.event === StepEvents.ON_SUMMARIZE_DELTA) { + const deltaData = stepEvent.data; + const runStep = stepMap.current.get(deltaData.id); + let responseMessageId = runStep?.runId ?? ''; + if (responseMessageId === Constants.USE_PRELIM_RESPONSE_MESSAGE_ID) { + responseMessageId = submission?.initialResponse?.messageId ?? ''; + parentMessageId = submission?.initialResponse?.parentMessageId ?? ''; + } - return () => { - toolCallIdMap.current.clear(); - messageMap.current.clear(); - stepMap.current.clear(); - }; + if (!runStep || !responseMessageId) { + const buffer = pendingDeltaBuffer.current.get(deltaData.id) ?? []; + buffer.push({ event: StepEvents.ON_SUMMARIZE_DELTA, data: deltaData }); + pendingDeltaBuffer.current.set(deltaData.id, buffer); + return; + } + + const response = messageMap.current.get(responseMessageId); + if (response) { + const contentPart: SummaryContentPart = { + ...deltaData.delta.summary, + summarizing: true, + }; + + const contentIndex = runStep.index + initialContent.length; + const updatedResponse = updateContent( + response, + contentIndex, + contentPart, + false, + getStepMetadata(runStep), + ); + messageMap.current.set(responseMessageId, updatedResponse); + if (summarizeDeltaRaf.current == null) { + summarizeDeltaRaf.current = requestAnimationFrame(() => { + summarizeDeltaRaf.current = null; + const latest = messageMap.current.get(responseMessageId); + if (latest) { + const msgs = getMessages() || []; + setMessages([...msgs.slice(0, -1), latest]); + } + }); + } + } + } else if (stepEvent.event === StepEvents.ON_SUMMARIZE_COMPLETE) { + const completeData = stepEvent.data; + const completeRunStep = stepMap.current.get(completeData.id); + let completeMessageId = completeRunStep?.runId ?? ''; + if (completeMessageId === Constants.USE_PRELIM_RESPONSE_MESSAGE_ID) { + completeMessageId = submission?.initialResponse?.messageId ?? ''; + } + + const targetMessage = messageMap.current.get(completeMessageId); + if (!targetMessage || !Array.isArray(targetMessage.content)) { + return; + } + + const currentMessages = getMessages() || []; + const targetIndex = currentMessages.findIndex((m) => m.messageId === completeMessageId); + + if (completeData.error) { + const filtered = targetMessage.content.filter( + (part) => + part?.type !== ContentTypes.SUMMARY || !(part as SummaryContentPart).summarizing, + ); + if (filtered.length !== targetMessage.content.length) { + announcePolite({ message: 'summarize_failed', isStatus: true }); + const cleaned = { ...targetMessage, content: filtered }; + messageMap.current.set(completeMessageId, cleaned); + if (targetIndex >= 0) { + const updated = [...currentMessages]; + updated[targetIndex] = cleaned; + setMessages(updated); + } + } + } else { + let didFinalize = false; + const updatedContent = targetMessage.content.map((part) => { + if (part?.type === ContentTypes.SUMMARY && (part as SummaryContentPart).summarizing) { + didFinalize = true; + if (!completeData.summary) { + return { ...part, summarizing: false } as SummaryContentPart; + } + return { ...completeData.summary, summarizing: false } as SummaryContentPart; + } + return part; + }); + if (didFinalize && targetIndex >= 0) { + announcePolite({ message: 'summarize_completed', isStatus: true }); + const finalized = { ...targetMessage, content: updatedContent }; + messageMap.current.set(completeMessageId, finalized); + const updated = [...currentMessages]; + updated[targetIndex] = finalized; + setMessages(updated); + } + } + } else { + const _exhaustive: never = stepEvent; + console.warn('Unhandled step event', (_exhaustive as TStepEvent).event); + } }, [getMessages, lastAnnouncementTimeRef, announcePolite, setMessages, calculateContentIndex], ); const clearStepMaps = useCallback(() => { + if (summarizeDeltaRaf.current != null) { + cancelAnimationFrame(summarizeDeltaRaf.current); + summarizeDeltaRaf.current = null; + } toolCallIdMap.current.clear(); messageMap.current.clear(); stepMap.current.clear(); diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index 67111586ff..987ac314a9 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -5,6 +5,9 @@ "com_a11y_chats_date_section": "Chats from {{date}}", "com_a11y_end": "The AI has finished their reply.", "com_a11y_selected": "selected", + "com_a11y_summarize_completed": "Context summarized.", + "com_a11y_summarize_failed": "Summarization failed, continuing with available context.", + "com_a11y_summarize_started": "Summarizing context.", "com_a11y_start": "The AI has started their reply.", "com_agents_agent_card_label": "{{name}} agent. {{description}}", "com_agents_all": "All Agents", @@ -830,6 +833,7 @@ "com_ui_code": "Code", "com_ui_collapse": "Collapse", "com_ui_collapse_chat": "Collapse Chat", + "com_ui_collapse_summary": "Collapse Summary", "com_ui_collapse_thoughts": "Collapse Thoughts", "com_ui_command_placeholder": "Optional: Enter a command for the prompt or name will be used", "com_ui_command_usage_placeholder": "Select a Prompt by command or name", @@ -852,6 +856,7 @@ "com_ui_conversation": "conversation", "com_ui_conversation_label": "{{title}} conversation", "com_ui_conversation_not_found": "Conversation not found", + "com_ui_conversation_summarized": "Conversation summarized", "com_ui_conversations": "conversations", "com_ui_convo_archived": "Conversation archived", "com_ui_convo_delete_error": "Failed to delete conversation", @@ -862,6 +867,7 @@ "com_ui_copy_code": "Copy code", "com_ui_copy_link": "Copy link", "com_ui_copy_stack_trace": "Copy stack trace", + "com_ui_copy_summary": "Copy summary to clipboard", "com_ui_copy_thoughts_to_clipboard": "Copy thoughts to clipboard", "com_ui_copy_to_clipboard": "Copy to clipboard", "com_ui_copy_url_to_clipboard": "Copy URL to clipboard", @@ -1414,6 +1420,7 @@ "com_ui_storage": "Storage", "com_ui_storage_filter_sort": "Filter and Sort by Storage", "com_ui_submit": "Submit", + "com_ui_summarizing": "Summarizing...", "com_ui_support_contact": "Support Contact", "com_ui_support_contact_email": "Email", "com_ui_support_contact_email_invalid": "Please enter a valid email address", diff --git a/package-lock.json b/package-lock.json index aad4e24fda..b7e09a628e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -59,7 +59,7 @@ "@google/genai": "^1.19.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.80", - "@librechat/agents": "^3.1.57", + "@librechat/agents": "^3.1.62", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", @@ -3162,30 +3162,30 @@ } }, "node_modules/@aws-sdk/client-bedrock-runtime": { - "version": "3.1011.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-bedrock-runtime/-/client-bedrock-runtime-3.1011.0.tgz", - "integrity": "sha512-yn5oRLLP1TsGLZqlnyqBjAVmiexYR8/rPG8D+rI5f5+UIvb3zHOmHLXA1m41H/sKXI4embmXfUjvArmjTmfsIw==", + "version": "3.1014.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-bedrock-runtime/-/client-bedrock-runtime-3.1014.0.tgz", + "integrity": "sha512-K0TmX1D6dIh4J2QtqUuEXxbyMmtHD+kwHvUg1JwDXaLXC7zJJlR0p1692YBh/eze9tHbuKqP/VWzUy6XX9IPGw==", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", - "@aws-sdk/core": "^3.973.20", - "@aws-sdk/credential-provider-node": "^3.972.21", + "@aws-sdk/core": "^3.973.23", + "@aws-sdk/credential-provider-node": "^3.972.24", "@aws-sdk/eventstream-handler-node": "^3.972.11", "@aws-sdk/middleware-eventstream": "^3.972.8", "@aws-sdk/middleware-host-header": "^3.972.8", "@aws-sdk/middleware-logger": "^3.972.8", "@aws-sdk/middleware-recursion-detection": "^3.972.8", - "@aws-sdk/middleware-user-agent": "^3.972.21", + "@aws-sdk/middleware-user-agent": "^3.972.24", "@aws-sdk/middleware-websocket": "^3.972.13", - "@aws-sdk/region-config-resolver": "^3.972.8", - "@aws-sdk/token-providers": "3.1011.0", + "@aws-sdk/region-config-resolver": "^3.972.9", + "@aws-sdk/token-providers": "3.1014.0", "@aws-sdk/types": "^3.973.6", "@aws-sdk/util-endpoints": "^3.996.5", "@aws-sdk/util-user-agent-browser": "^3.972.8", - "@aws-sdk/util-user-agent-node": "^3.973.7", - "@smithy/config-resolver": "^4.4.11", - "@smithy/core": "^3.23.11", + "@aws-sdk/util-user-agent-node": "^3.973.10", + "@smithy/config-resolver": "^4.4.13", + "@smithy/core": "^3.23.12", "@smithy/eventstream-serde-browser": "^4.2.12", "@smithy/eventstream-serde-config-resolver": "^4.3.12", "@smithy/eventstream-serde-node": "^4.2.12", @@ -3193,25 +3193,25 @@ "@smithy/hash-node": "^4.2.12", "@smithy/invalid-dependency": "^4.2.12", "@smithy/middleware-content-length": "^4.2.12", - "@smithy/middleware-endpoint": "^4.4.25", - "@smithy/middleware-retry": "^4.4.42", - "@smithy/middleware-serde": "^4.2.14", + "@smithy/middleware-endpoint": "^4.4.27", + "@smithy/middleware-retry": "^4.4.44", + "@smithy/middleware-serde": "^4.2.15", "@smithy/middleware-stack": "^4.2.12", "@smithy/node-config-provider": "^4.3.12", - "@smithy/node-http-handler": "^4.4.16", + "@smithy/node-http-handler": "^4.5.0", "@smithy/protocol-http": "^5.3.12", - "@smithy/smithy-client": "^4.12.5", + "@smithy/smithy-client": "^4.12.7", "@smithy/types": "^4.13.1", "@smithy/url-parser": "^4.2.12", "@smithy/util-base64": "^4.3.2", "@smithy/util-body-length-browser": "^4.2.2", "@smithy/util-body-length-node": "^4.2.3", - "@smithy/util-defaults-mode-browser": "^4.3.41", - "@smithy/util-defaults-mode-node": "^4.2.44", + "@smithy/util-defaults-mode-browser": "^4.3.43", + "@smithy/util-defaults-mode-node": "^4.2.47", "@smithy/util-endpoints": "^3.3.3", "@smithy/util-middleware": "^4.2.12", "@smithy/util-retry": "^4.2.12", - "@smithy/util-stream": "^4.5.19", + "@smithy/util-stream": "^4.5.20", "@smithy/util-utf8": "^4.2.2", "tslib": "^2.6.2" }, @@ -3536,19 +3536,19 @@ } }, "node_modules/@aws-sdk/core": { - "version": "3.973.20", - "resolved": "https://registry.npmjs.org/@aws-sdk/core/-/core-3.973.20.tgz", - "integrity": "sha512-i3GuX+lowD892F3IuJf8o6AbyDupMTdyTxQrCJGcn71ni5hTZ82L4nQhcdumxZ7XPJRJJVHS/CR3uYOIIs0PVA==", + "version": "3.973.23", + "resolved": "https://registry.npmjs.org/@aws-sdk/core/-/core-3.973.23.tgz", + "integrity": "sha512-aoJncvD1XvloZ9JLnKqTRL9dBy+Szkryoag9VT+V1TqsuUgIxV9cnBVM/hrDi2vE8bDqLiDR8nirdRcCdtJu0w==", "license": "Apache-2.0", "dependencies": { "@aws-sdk/types": "^3.973.6", - "@aws-sdk/xml-builder": "^3.972.11", - "@smithy/core": "^3.23.11", + "@aws-sdk/xml-builder": "^3.972.15", + "@smithy/core": "^3.23.12", "@smithy/node-config-provider": "^4.3.12", "@smithy/property-provider": "^4.2.12", "@smithy/protocol-http": "^5.3.12", "@smithy/signature-v4": "^5.3.12", - "@smithy/smithy-client": "^4.12.5", + "@smithy/smithy-client": "^4.12.7", "@smithy/types": "^4.13.1", "@smithy/util-base64": "^4.3.2", "@smithy/util-middleware": "^4.2.12", @@ -3611,12 +3611,12 @@ } }, "node_modules/@aws-sdk/credential-provider-env": { - "version": "3.972.18", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-env/-/credential-provider-env-3.972.18.tgz", - "integrity": "sha512-X0B8AlQY507i5DwjLByeU2Af4ARsl9Vr84koDcXCbAkplmU+1xBFWxEPrWRAoh56waBne/yJqEloSwvRf4x6XA==", + "version": "3.972.21", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-env/-/credential-provider-env-3.972.21.tgz", + "integrity": "sha512-BkAfKq8Bd4shCtec1usNz//urPJF/SZy14qJyxkSaRJQ/Vv1gVh0VZSTmS7aE6aLMELkFV5wHHrS9ZcdG8Kxsg==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.973.20", + "@aws-sdk/core": "^3.973.23", "@aws-sdk/types": "^3.973.6", "@smithy/property-provider": "^4.2.12", "@smithy/types": "^4.13.1", @@ -3627,20 +3627,20 @@ } }, "node_modules/@aws-sdk/credential-provider-http": { - "version": "3.972.20", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-http/-/credential-provider-http-3.972.20.tgz", - "integrity": "sha512-ey9Lelj001+oOfrbKmS6R2CJAiXX7QKY4Vj9VJv6L2eE6/VjD8DocHIoYqztTm70xDLR4E1jYPTKfIui+eRNDA==", + "version": "3.972.23", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-http/-/credential-provider-http-3.972.23.tgz", + "integrity": "sha512-4XZ3+Gu5DY8/n8zQFHBgcKTF7hWQl42G6CY9xfXVo2d25FM/lYkpmuzhYopYoPL1ITWkJ2OSBQfYEu5JRfHOhA==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.973.20", + "@aws-sdk/core": "^3.973.23", "@aws-sdk/types": "^3.973.6", "@smithy/fetch-http-handler": "^5.3.15", - "@smithy/node-http-handler": "^4.4.16", + "@smithy/node-http-handler": "^4.5.0", "@smithy/property-provider": "^4.2.12", "@smithy/protocol-http": "^5.3.12", - "@smithy/smithy-client": "^4.12.5", + "@smithy/smithy-client": "^4.12.7", "@smithy/types": "^4.13.1", - "@smithy/util-stream": "^4.5.19", + "@smithy/util-stream": "^4.5.20", "tslib": "^2.6.2" }, "engines": { @@ -3648,19 +3648,19 @@ } }, "node_modules/@aws-sdk/credential-provider-ini": { - "version": "3.972.20", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.972.20.tgz", - "integrity": "sha512-5flXSnKHMloObNF+9N0cupKegnH1Z37cdVlpETVgx8/rAhCe+VNlkcZH3HDg2SDn9bI765S+rhNPXGDJJPfbtA==", + "version": "3.972.23", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.972.23.tgz", + "integrity": "sha512-PZLSmU0JFpNCDFReidBezsgL5ji9jOBry8CnZdw4Jj6d0K2z3Ftnp44NXgADqYx5BLMu/ZHujfeJReaDoV+IwQ==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.973.20", - "@aws-sdk/credential-provider-env": "^3.972.18", - "@aws-sdk/credential-provider-http": "^3.972.20", - "@aws-sdk/credential-provider-login": "^3.972.20", - "@aws-sdk/credential-provider-process": "^3.972.18", - "@aws-sdk/credential-provider-sso": "^3.972.20", - "@aws-sdk/credential-provider-web-identity": "^3.972.20", - "@aws-sdk/nested-clients": "^3.996.10", + "@aws-sdk/core": "^3.973.23", + "@aws-sdk/credential-provider-env": "^3.972.21", + "@aws-sdk/credential-provider-http": "^3.972.23", + "@aws-sdk/credential-provider-login": "^3.972.23", + "@aws-sdk/credential-provider-process": "^3.972.21", + "@aws-sdk/credential-provider-sso": "^3.972.23", + "@aws-sdk/credential-provider-web-identity": "^3.972.23", + "@aws-sdk/nested-clients": "^3.996.13", "@aws-sdk/types": "^3.973.6", "@smithy/credential-provider-imds": "^4.2.12", "@smithy/property-provider": "^4.2.12", @@ -3673,13 +3673,13 @@ } }, "node_modules/@aws-sdk/credential-provider-login": { - "version": "3.972.20", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-login/-/credential-provider-login-3.972.20.tgz", - "integrity": "sha512-gEWo54nfqp2jABMu6HNsjVC4hDLpg9HC8IKSJnp0kqWtxIJYHTmiLSsIfI4ScQjxEwpB+jOOH8dOLax1+hy/Hw==", + "version": "3.972.23", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-login/-/credential-provider-login-3.972.23.tgz", + "integrity": "sha512-OmE/pSkbMM3dCj1HdOnZ5kXnKK+R/Yz+kbBugraBecp0pGAs21eEURfQRz+1N2gzIHLVyGIP1MEjk/uSrFsngg==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.973.20", - "@aws-sdk/nested-clients": "^3.996.10", + "@aws-sdk/core": "^3.973.23", + "@aws-sdk/nested-clients": "^3.996.13", "@aws-sdk/types": "^3.973.6", "@smithy/property-provider": "^4.2.12", "@smithy/protocol-http": "^5.3.12", @@ -3692,17 +3692,17 @@ } }, "node_modules/@aws-sdk/credential-provider-node": { - "version": "3.972.21", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-node/-/credential-provider-node-3.972.21.tgz", - "integrity": "sha512-hah8if3/B/Q+LBYN5FukyQ1Mym6PLPDsBOBsIgNEYD6wLyZg0UmUF/OKIVC3nX9XH8TfTPuITK+7N/jenVACWA==", + "version": "3.972.24", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-node/-/credential-provider-node-3.972.24.tgz", + "integrity": "sha512-9Jwi7aps3AfUicJyF5udYadPypPpCwUZ6BSKr/QjRbVCpRVS1wc+1Q6AEZ/qz8J4JraeRd247pSzyMQSIHVebw==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/credential-provider-env": "^3.972.18", - "@aws-sdk/credential-provider-http": "^3.972.20", - "@aws-sdk/credential-provider-ini": "^3.972.20", - "@aws-sdk/credential-provider-process": "^3.972.18", - "@aws-sdk/credential-provider-sso": "^3.972.20", - "@aws-sdk/credential-provider-web-identity": "^3.972.20", + "@aws-sdk/credential-provider-env": "^3.972.21", + "@aws-sdk/credential-provider-http": "^3.972.23", + "@aws-sdk/credential-provider-ini": "^3.972.23", + "@aws-sdk/credential-provider-process": "^3.972.21", + "@aws-sdk/credential-provider-sso": "^3.972.23", + "@aws-sdk/credential-provider-web-identity": "^3.972.23", "@aws-sdk/types": "^3.973.6", "@smithy/credential-provider-imds": "^4.2.12", "@smithy/property-provider": "^4.2.12", @@ -3715,12 +3715,12 @@ } }, "node_modules/@aws-sdk/credential-provider-process": { - "version": "3.972.18", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-process/-/credential-provider-process-3.972.18.tgz", - "integrity": "sha512-Tpl7SRaPoOLT32jbTWchPsn52hYYgJ0kpiFgnwk8pxTANQdUymVSZkzFvv1+oOgZm1CrbQUP9MBeoMZ9IzLZjA==", + "version": "3.972.21", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-process/-/credential-provider-process-3.972.21.tgz", + "integrity": "sha512-nRxbeOJ1E1gVA0lNQezuMVndx+ZcuyaW/RB05pUsznN5BxykSlH6KkZ/7Ca/ubJf3i5N3p0gwNO5zgPSCzj+ww==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.973.20", + "@aws-sdk/core": "^3.973.23", "@aws-sdk/types": "^3.973.6", "@smithy/property-provider": "^4.2.12", "@smithy/shared-ini-file-loader": "^4.4.7", @@ -3732,32 +3732,14 @@ } }, "node_modules/@aws-sdk/credential-provider-sso": { - "version": "3.972.20", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.972.20.tgz", - "integrity": "sha512-p+R+PYR5Z7Gjqf/6pvbCnzEHcqPCpLzR7Yf127HjJ6EAb4hUcD+qsNRnuww1sB/RmSeCLxyay8FMyqREw4p1RA==", + "version": "3.972.23", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.972.23.tgz", + "integrity": "sha512-APUccADuYPLL0f2htpM8Z4czabSmHOdo4r41W6lKEZdy++cNJ42Radqy6x4TopENzr3hR6WYMyhiuiqtbf/nAA==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.973.20", - "@aws-sdk/nested-clients": "^3.996.10", - "@aws-sdk/token-providers": "3.1009.0", - "@aws-sdk/types": "^3.973.6", - "@smithy/property-provider": "^4.2.12", - "@smithy/shared-ini-file-loader": "^4.4.7", - "@smithy/types": "^4.13.1", - "tslib": "^2.6.2" - }, - "engines": { - "node": ">=20.0.0" - } - }, - "node_modules/@aws-sdk/credential-provider-sso/node_modules/@aws-sdk/token-providers": { - "version": "3.1009.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.1009.0.tgz", - "integrity": "sha512-KCPLuTqN9u0Rr38Arln78fRG9KXpzsPWmof+PZzfAHMMQq2QED6YjQrkrfiH7PDefLWEposY1o4/eGwrmKA4JA==", - "license": "Apache-2.0", - "dependencies": { - "@aws-sdk/core": "^3.973.20", - "@aws-sdk/nested-clients": "^3.996.10", + "@aws-sdk/core": "^3.973.23", + "@aws-sdk/nested-clients": "^3.996.13", + "@aws-sdk/token-providers": "3.1014.0", "@aws-sdk/types": "^3.973.6", "@smithy/property-provider": "^4.2.12", "@smithy/shared-ini-file-loader": "^4.4.7", @@ -3769,13 +3751,13 @@ } }, "node_modules/@aws-sdk/credential-provider-web-identity": { - "version": "3.972.20", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.972.20.tgz", - "integrity": "sha512-rWCmh8o7QY4CsUj63qopzMzkDq/yPpkrpb+CnjBEFSOg/02T/we7sSTVg4QsDiVS9uwZ8VyONhq98qt+pIh3KA==", + "version": "3.972.23", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.972.23.tgz", + "integrity": "sha512-H5JNqtIwOu/feInmMMWcK0dL5r897ReEn7n2m16Dd0DPD9gA2Hg8Cq4UDzZ/9OzaLh/uqBM6seixz0U6Fi2Eag==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.973.20", - "@aws-sdk/nested-clients": "^3.996.10", + "@aws-sdk/core": "^3.973.23", + "@aws-sdk/nested-clients": "^3.996.13", "@aws-sdk/types": "^3.973.6", "@smithy/property-provider": "^4.2.12", "@smithy/shared-ini-file-loader": "^4.4.7", @@ -4096,15 +4078,15 @@ } }, "node_modules/@aws-sdk/middleware-user-agent": { - "version": "3.972.21", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.972.21.tgz", - "integrity": "sha512-62XRl1GDYPpkt7cx1AX1SPy9wgNE9Iw/NPuurJu4lmhCWS7sGKO+kS53TQ8eRmIxy3skmvNInnk0ZbWrU5Dpyg==", + "version": "3.972.24", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.972.24.tgz", + "integrity": "sha512-dLTWy6IfAMhNiSEvMr07g/qZ54be6pLqlxVblbF6AzafmmGAzMMj8qMoY9B4+YgT+gY9IcuxZslNh03L6PyMCQ==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.973.20", + "@aws-sdk/core": "^3.973.23", "@aws-sdk/types": "^3.973.6", "@aws-sdk/util-endpoints": "^3.996.5", - "@smithy/core": "^3.23.11", + "@smithy/core": "^3.23.12", "@smithy/protocol-http": "^5.3.12", "@smithy/types": "^4.13.1", "@smithy/util-retry": "^4.2.12", @@ -4207,44 +4189,44 @@ } }, "node_modules/@aws-sdk/nested-clients": { - "version": "3.996.10", - "resolved": "https://registry.npmjs.org/@aws-sdk/nested-clients/-/nested-clients-3.996.10.tgz", - "integrity": "sha512-SlDol5Z+C7Ivnc2rKGqiqfSUmUZzY1qHfVs9myt/nxVwswgfpjdKahyTzLTx802Zfq0NFRs7AejwKzzzl5Co2w==", + "version": "3.996.13", + "resolved": "https://registry.npmjs.org/@aws-sdk/nested-clients/-/nested-clients-3.996.13.tgz", + "integrity": "sha512-ptZ1HF4yYHNJX8cgFF+8NdYO69XJKZn7ft0/ynV3c0hCbN+89fAbrLS+fqniU2tW8o9Kfqhj8FUh+IPXb2Qsuw==", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", - "@aws-sdk/core": "^3.973.20", + "@aws-sdk/core": "^3.973.23", "@aws-sdk/middleware-host-header": "^3.972.8", "@aws-sdk/middleware-logger": "^3.972.8", "@aws-sdk/middleware-recursion-detection": "^3.972.8", - "@aws-sdk/middleware-user-agent": "^3.972.21", - "@aws-sdk/region-config-resolver": "^3.972.8", + "@aws-sdk/middleware-user-agent": "^3.972.24", + "@aws-sdk/region-config-resolver": "^3.972.9", "@aws-sdk/types": "^3.973.6", "@aws-sdk/util-endpoints": "^3.996.5", "@aws-sdk/util-user-agent-browser": "^3.972.8", - "@aws-sdk/util-user-agent-node": "^3.973.7", - "@smithy/config-resolver": "^4.4.11", - "@smithy/core": "^3.23.11", + "@aws-sdk/util-user-agent-node": "^3.973.10", + "@smithy/config-resolver": "^4.4.13", + "@smithy/core": "^3.23.12", "@smithy/fetch-http-handler": "^5.3.15", "@smithy/hash-node": "^4.2.12", "@smithy/invalid-dependency": "^4.2.12", "@smithy/middleware-content-length": "^4.2.12", - "@smithy/middleware-endpoint": "^4.4.25", - "@smithy/middleware-retry": "^4.4.42", - "@smithy/middleware-serde": "^4.2.14", + "@smithy/middleware-endpoint": "^4.4.27", + "@smithy/middleware-retry": "^4.4.44", + "@smithy/middleware-serde": "^4.2.15", "@smithy/middleware-stack": "^4.2.12", "@smithy/node-config-provider": "^4.3.12", - "@smithy/node-http-handler": "^4.4.16", + "@smithy/node-http-handler": "^4.5.0", "@smithy/protocol-http": "^5.3.12", - "@smithy/smithy-client": "^4.12.5", + "@smithy/smithy-client": "^4.12.7", "@smithy/types": "^4.13.1", "@smithy/url-parser": "^4.2.12", "@smithy/util-base64": "^4.3.2", "@smithy/util-body-length-browser": "^4.2.2", "@smithy/util-body-length-node": "^4.2.3", - "@smithy/util-defaults-mode-browser": "^4.3.41", - "@smithy/util-defaults-mode-node": "^4.2.44", + "@smithy/util-defaults-mode-browser": "^4.3.43", + "@smithy/util-defaults-mode-node": "^4.2.47", "@smithy/util-endpoints": "^3.3.3", "@smithy/util-middleware": "^4.2.12", "@smithy/util-retry": "^4.2.12", @@ -4310,13 +4292,13 @@ } }, "node_modules/@aws-sdk/region-config-resolver": { - "version": "3.972.8", - "resolved": "https://registry.npmjs.org/@aws-sdk/region-config-resolver/-/region-config-resolver-3.972.8.tgz", - "integrity": "sha512-1eD4uhTDeambO/PNIDVG19A6+v4NdD7xzwLHDutHsUqz0B+i661MwQB2eYO4/crcCvCiQG4SRm1k81k54FEIvw==", + "version": "3.972.9", + "resolved": "https://registry.npmjs.org/@aws-sdk/region-config-resolver/-/region-config-resolver-3.972.9.tgz", + "integrity": "sha512-eQ+dFU05ZRC/lC2XpYlYSPlXtX3VT8sn5toxN2Fv7EXlMoA2p9V7vUBKqHunfD4TRLpxUq8Y8Ol/nCqiv327Ng==", "license": "Apache-2.0", "dependencies": { "@aws-sdk/types": "^3.973.6", - "@smithy/config-resolver": "^4.4.11", + "@smithy/config-resolver": "^4.4.13", "@smithy/node-config-provider": "^4.3.12", "@smithy/types": "^4.13.1", "tslib": "^2.6.2" @@ -4388,13 +4370,13 @@ } }, "node_modules/@aws-sdk/token-providers": { - "version": "3.1011.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.1011.0.tgz", - "integrity": "sha512-WSfBVDQ9uyh1GCR+DxxgHEvAKv+beMIlSeJ2pMAG1HTci340+xbtz1VFwnTJ5qCxrMi+E4dyDMiSAhDvHnq73A==", + "version": "3.1014.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.1014.0.tgz", + "integrity": "sha512-gHTHNUoaOGNrSWkl32A7wFsU78jlNTlqMccLu0byUk5CysYYXaxNMIonIVr4YcykC7vgtDS5ABuz83giy6fzJA==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.973.20", - "@aws-sdk/nested-clients": "^3.996.10", + "@aws-sdk/core": "^3.973.23", + "@aws-sdk/nested-clients": "^3.996.13", "@aws-sdk/types": "^3.973.6", "@smithy/property-provider": "^4.2.12", "@smithy/shared-ini-file-loader": "^4.4.7", @@ -4498,12 +4480,12 @@ } }, "node_modules/@aws-sdk/util-user-agent-node": { - "version": "3.973.7", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-node/-/util-user-agent-node-3.973.7.tgz", - "integrity": "sha512-Hz6EZMUAEzqUd7e+vZ9LE7mn+5gMbxltXy18v+YSFY+9LBJz15wkNZvw5JqfX3z0FS9n3bgUtz3L5rAsfh4YlA==", + "version": "3.973.10", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-node/-/util-user-agent-node-3.973.10.tgz", + "integrity": "sha512-E99zeTscCc+pTMfsvnfi6foPpKmdD1cZfOC7/P8UUrjsoQdg9VEWPRD+xdFduKnfPXwcvby58AlO9jwwF6U96g==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/middleware-user-agent": "^3.972.21", + "@aws-sdk/middleware-user-agent": "^3.972.24", "@aws-sdk/types": "^3.973.6", "@smithy/node-config-provider": "^4.3.12", "@smithy/types": "^4.13.1", @@ -4523,19 +4505,39 @@ } }, "node_modules/@aws-sdk/xml-builder": { - "version": "3.972.11", - "resolved": "https://registry.npmjs.org/@aws-sdk/xml-builder/-/xml-builder-3.972.11.tgz", - "integrity": "sha512-iitV/gZKQMvY9d7ovmyFnFuTHbBAtrmLnvaSb/3X8vOKyevwtpmEtyc8AdhVWZe0pI/1GsHxlEvQeOePFzy7KQ==", + "version": "3.972.15", + "resolved": "https://registry.npmjs.org/@aws-sdk/xml-builder/-/xml-builder-3.972.15.tgz", + "integrity": "sha512-PxMRlCFNiQnke9YR29vjFQwz4jq+6Q04rOVFeTDR2K7Qpv9h9FOWOxG+zJjageimYbWqE3bTuLjmryWHAWbvaA==", "license": "Apache-2.0", "dependencies": { "@smithy/types": "^4.13.1", - "fast-xml-parser": "5.4.1", + "fast-xml-parser": "5.5.8", "tslib": "^2.6.2" }, "engines": { "node": ">=20.0.0" } }, + "node_modules/@aws-sdk/xml-builder/node_modules/fast-xml-parser": { + "version": "5.5.8", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-5.5.8.tgz", + "integrity": "sha512-Z7Fh2nVQSb2d+poDViM063ix2ZGt9jmY1nWhPfHBOK2Hgnb/OW3P4Et3P/81SEej0J7QbWtJqxO05h8QYfK7LQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "license": "MIT", + "dependencies": { + "fast-xml-builder": "^1.1.4", + "path-expression-matcher": "^1.2.0", + "strnum": "^2.2.0" + }, + "bin": { + "fxparser": "src/cli/cli.js" + } + }, "node_modules/@aws/lambda-invoke-store": { "version": "0.2.3", "resolved": "https://registry.npmjs.org/@aws/lambda-invoke-store/-/lambda-invoke-store-0.2.3.tgz", @@ -11859,13 +11861,13 @@ } }, "node_modules/@librechat/agents": { - "version": "3.1.57", - "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.1.57.tgz", - "integrity": "sha512-fP/ZF7a7QL/MhXTfdzpG3cpOai9LSiKMiFX1X23o3t67Bqj9r5FuSVgu+UHDfO7o4Np82ZWw2nQJjcMJQbArLA==", + "version": "3.1.62", + "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.1.62.tgz", + "integrity": "sha512-QBZlJ4C89GmBg9w2qoWOWl1Y1xiRypUtIMBsL6eLPIsdbKHJ+GYO+076rfSD+tMqZB5ZbrxqPWOh+gxEXK1coQ==", "license": "MIT", "dependencies": { "@anthropic-ai/sdk": "^0.73.0", - "@aws-sdk/client-bedrock-runtime": "^3.980.0", + "@aws-sdk/client-bedrock-runtime": "^3.1013.0", "@langchain/anthropic": "^0.3.26", "@langchain/aws": "^0.1.15", "@langchain/core": "^0.3.80", @@ -19185,9 +19187,9 @@ } }, "node_modules/@smithy/config-resolver": { - "version": "4.4.11", - "resolved": "https://registry.npmjs.org/@smithy/config-resolver/-/config-resolver-4.4.11.tgz", - "integrity": "sha512-YxFiiG4YDAtX7WMN7RuhHZLeTmRRAOyCbr+zB8e3AQzHPnUhS8zXjB1+cniPVQI3xbWsQPM0X2aaIkO/ME0ymw==", + "version": "4.4.13", + "resolved": "https://registry.npmjs.org/@smithy/config-resolver/-/config-resolver-4.4.13.tgz", + "integrity": "sha512-iIzMC5NmOUP6WL6o8iPBjFhUhBZ9pPjpUpQYWMUFQqKyXXzOftbfK8zcQCz/jFV1Psmf05BK5ypx4K2r4Tnwdg==", "license": "Apache-2.0", "dependencies": { "@smithy/node-config-provider": "^4.3.12", @@ -19573,9 +19575,9 @@ } }, "node_modules/@smithy/middleware-endpoint": { - "version": "4.4.26", - "resolved": "https://registry.npmjs.org/@smithy/middleware-endpoint/-/middleware-endpoint-4.4.26.tgz", - "integrity": "sha512-8Qfikvd2GVKSm8S6IbjfwFlRY9VlMrj0Dp4vTwAuhqbX7NhJKE5DQc2bnfJIcY0B+2YKMDBWfvexbSZeejDgeg==", + "version": "4.4.27", + "resolved": "https://registry.npmjs.org/@smithy/middleware-endpoint/-/middleware-endpoint-4.4.27.tgz", + "integrity": "sha512-T3TFfUgXQlpcg+UdzcAISdZpj4Z+XECZ/cefgA6wLBd6V4lRi0svN2hBouN/be9dXQ31X4sLWz3fAQDf+nt6BA==", "license": "Apache-2.0", "dependencies": { "@smithy/core": "^3.23.12", @@ -19592,15 +19594,15 @@ } }, "node_modules/@smithy/middleware-retry": { - "version": "4.4.43", - "resolved": "https://registry.npmjs.org/@smithy/middleware-retry/-/middleware-retry-4.4.43.tgz", - "integrity": "sha512-ZwsifBdyuNHrFGmbc7bAfP2b54+kt9J2rhFd18ilQGAB+GDiP4SrawqyExbB7v455QVR7Psyhb2kjULvBPIhvA==", + "version": "4.4.44", + "resolved": "https://registry.npmjs.org/@smithy/middleware-retry/-/middleware-retry-4.4.44.tgz", + "integrity": "sha512-Y1Rav7m5CFRPQyM4CI0koD/bXjyjJu3EQxZZhtLGD88WIrBrQ7kqXM96ncd6rYnojwOo/u9MXu57JrEvu/nLrA==", "license": "Apache-2.0", "dependencies": { "@smithy/node-config-provider": "^4.3.12", "@smithy/protocol-http": "^5.3.12", "@smithy/service-error-classification": "^4.2.12", - "@smithy/smithy-client": "^4.12.6", + "@smithy/smithy-client": "^4.12.7", "@smithy/types": "^4.13.1", "@smithy/util-middleware": "^4.2.12", "@smithy/util-retry": "^4.2.12", @@ -19806,13 +19808,13 @@ } }, "node_modules/@smithy/smithy-client": { - "version": "4.12.6", - "resolved": "https://registry.npmjs.org/@smithy/smithy-client/-/smithy-client-4.12.6.tgz", - "integrity": "sha512-aib3f0jiMsJ6+cvDnXipBsGDL7ztknYSVqJs1FdN9P+u9tr/VzOR7iygSh6EUOdaBeMCMSh3N0VdyYsG4o91DQ==", + "version": "4.12.7", + "resolved": "https://registry.npmjs.org/@smithy/smithy-client/-/smithy-client-4.12.7.tgz", + "integrity": "sha512-q3gqnwml60G44FECaEEsdQMplYhDMZYCtYhMCzadCnRnnHIobZJjegmdoUo6ieLQlPUzvrMdIJUpx6DoPmzANQ==", "license": "Apache-2.0", "dependencies": { "@smithy/core": "^3.23.12", - "@smithy/middleware-endpoint": "^4.4.26", + "@smithy/middleware-endpoint": "^4.4.27", "@smithy/middleware-stack": "^4.2.12", "@smithy/protocol-http": "^5.3.12", "@smithy/types": "^4.13.1", @@ -19950,13 +19952,13 @@ } }, "node_modules/@smithy/util-defaults-mode-browser": { - "version": "4.3.42", - "resolved": "https://registry.npmjs.org/@smithy/util-defaults-mode-browser/-/util-defaults-mode-browser-4.3.42.tgz", - "integrity": "sha512-0vjwmcvkWAUtikXnWIUOyV6IFHTEeQUYh3JUZcDgcszF+hD/StAsQ3rCZNZEPHgI9kVNcbnyc8P2CBHnwgmcwg==", + "version": "4.3.43", + "resolved": "https://registry.npmjs.org/@smithy/util-defaults-mode-browser/-/util-defaults-mode-browser-4.3.43.tgz", + "integrity": "sha512-Qd/0wCKMaXxev/z00TvNzGCH2jlKKKxXP1aDxB6oKwSQthe3Og2dMhSayGCnsma1bK/kQX1+X7SMP99t6FgiiQ==", "license": "Apache-2.0", "dependencies": { "@smithy/property-provider": "^4.2.12", - "@smithy/smithy-client": "^4.12.6", + "@smithy/smithy-client": "^4.12.7", "@smithy/types": "^4.13.1", "tslib": "^2.6.2" }, @@ -19965,16 +19967,16 @@ } }, "node_modules/@smithy/util-defaults-mode-node": { - "version": "4.2.45", - "resolved": "https://registry.npmjs.org/@smithy/util-defaults-mode-node/-/util-defaults-mode-node-4.2.45.tgz", - "integrity": "sha512-q5dOqqfTgUcLe38TAGiFn9srToKj2YCHJ34QGOLzM+xYLLA+qRZv7N+33kl1MERVusue36ZHnlNaNEvY/PzSrw==", + "version": "4.2.47", + "resolved": "https://registry.npmjs.org/@smithy/util-defaults-mode-node/-/util-defaults-mode-node-4.2.47.tgz", + "integrity": "sha512-qSRbYp1EQ7th+sPFuVcVO05AE0QH635hycdEXlpzIahqHHf2Fyd/Zl+8v0XYMJ3cgDVPa0lkMefU7oNUjAP+DQ==", "license": "Apache-2.0", "dependencies": { - "@smithy/config-resolver": "^4.4.11", + "@smithy/config-resolver": "^4.4.13", "@smithy/credential-provider-imds": "^4.2.12", "@smithy/node-config-provider": "^4.3.12", "@smithy/property-provider": "^4.2.12", - "@smithy/smithy-client": "^4.12.6", + "@smithy/smithy-client": "^4.12.7", "@smithy/types": "^4.13.1", "tslib": "^2.6.2" }, @@ -34347,16 +34349,6 @@ "path-to-regexp": "^8.1.0" } }, - "node_modules/nise/node_modules/@sinonjs/fake-timers": { - "version": "13.0.5", - "resolved": "https://registry.npmjs.org/@sinonjs/fake-timers/-/fake-timers-13.0.5.tgz", - "integrity": "sha512-36/hTbH2uaWuGVERyC6da9YwGWnzUZXuPro/F2LfsdOsLnCojz/iSH8MxUt/FD2S5XBSVPhmArFUXcpCQ2Hkiw==", - "dev": true, - "license": "BSD-3-Clause", - "dependencies": { - "@sinonjs/commons": "^3.0.1" - } - }, "node_modules/node-domexception": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", @@ -35340,9 +35332,9 @@ } }, "node_modules/path-expression-matcher": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/path-expression-matcher/-/path-expression-matcher-1.1.3.tgz", - "integrity": "sha512-qdVgY8KXmVdJZRSS1JdEPOKPdTiEK/pi0RkcT2sw1RhXxohdujUlJFPuS1TSkevZ9vzd3ZlL7ULl1MHGTApKzQ==", + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/path-expression-matcher/-/path-expression-matcher-1.2.0.tgz", + "integrity": "sha512-DwmPWeFn+tq7TiyJ2CxezCAirXjFxvaiD03npak3cRjlP9+OjTmSy1EpIrEbh+l6JgUundniloMLDQ/6VTdhLQ==", "funding": [ { "type": "github", @@ -40706,9 +40698,9 @@ } }, "node_modules/strnum": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/strnum/-/strnum-2.1.2.tgz", - "integrity": "sha512-l63NF9y/cLROq/yqKXSLtcMeeyOfnSQlfMSlzFt/K73oIaD8DGaQWd7Z34X9GPiKqP5rbSh84Hl4bOlLcjiSrQ==", + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-2.2.1.tgz", + "integrity": "sha512-BwRvNd5/QoAtyW1na1y1LsJGQNvRlkde6Q/ipqqEaivoMdV+B1OMOTVdwR+N/cwVUcIt9PYyHmV8HyexCZSupg==", "funding": [ { "type": "github", @@ -43985,7 +43977,7 @@ "@google/genai": "^1.19.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.80", - "@librechat/agents": "^3.1.57", + "@librechat/agents": "^3.1.62", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.27.1", "@smithy/node-http-handler": "^4.4.5", diff --git a/packages/api/package.json b/packages/api/package.json index 71bb27a3c4..a4e74a7a3c 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -95,7 +95,7 @@ "@google/genai": "^1.19.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.80", - "@librechat/agents": "^3.1.57", + "@librechat/agents": "^3.1.62", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.27.1", "@smithy/node-http-handler": "^4.4.5", diff --git a/packages/api/src/agents/__tests__/estimateMediaTokensForMessage.spec.ts b/packages/api/src/agents/__tests__/estimateMediaTokensForMessage.spec.ts new file mode 100644 index 0000000000..370168ea5d --- /dev/null +++ b/packages/api/src/agents/__tests__/estimateMediaTokensForMessage.spec.ts @@ -0,0 +1,282 @@ +import { estimateMediaTokensForMessage } from '../client'; + +jest.mock('@librechat/agents', () => ({ + ...jest.requireActual('@librechat/agents'), + extractImageDimensions: jest.fn((data: string) => { + if (data.includes('VALID_PNG')) { + return { width: 800, height: 600 }; + } + return null; + }), + estimateAnthropicImageTokens: jest.fn( + (w: number, h: number) => Math.ceil((w * h) / 750), + ), + estimateOpenAIImageTokens: jest.fn( + (w: number, h: number) => Math.ceil((w * h) / 512) + 85, + ), +})); + +const fakeTokenCount = (text: string) => Math.ceil(text.length / 4); + +describe('estimateMediaTokensForMessage', () => { + describe('non-array content', () => { + it('returns 0 for string content', () => { + expect(estimateMediaTokensForMessage('hello', false)).toBe(0); + }); + + it('returns 0 for null', () => { + expect(estimateMediaTokensForMessage(null, false)).toBe(0); + }); + + it('returns 0 for undefined', () => { + expect(estimateMediaTokensForMessage(undefined, true)).toBe(0); + }); + + it('returns 0 for a number', () => { + expect(estimateMediaTokensForMessage(42, false)).toBe(0); + }); + }); + + describe('empty and malformed arrays', () => { + it('returns 0 for an empty array', () => { + expect(estimateMediaTokensForMessage([], false)).toBe(0); + }); + + it('skips null entries', () => { + expect(estimateMediaTokensForMessage([null, undefined], false)).toBe(0); + }); + + it('skips entries without a string type', () => { + expect(estimateMediaTokensForMessage([{ type: 123 }, { text: 'hi' }], false)).toBe(0); + }); + + it('skips text-only blocks (not media)', () => { + expect(estimateMediaTokensForMessage([{ type: 'text', text: 'hi' }], false)).toBe(0); + }); + }); + + describe('image_url blocks', () => { + it('falls back to 1024 for a remote URL (non-data)', () => { + const content = [{ type: 'image_url', image_url: 'https://example.com/img.png' }]; + expect(estimateMediaTokensForMessage(content, false)).toBe(1024); + }); + + it('falls back to 1024 when image_url is an object with non-data URL', () => { + const content = [{ type: 'image_url', image_url: { url: 'https://example.com/img.png' } }]; + expect(estimateMediaTokensForMessage(content, true)).toBe(1024); + }); + + it('falls back to 1024 when base64 data cannot be decoded', () => { + const content = [{ type: 'image_url', image_url: 'data:image/png;base64,SHORT' }]; + expect(estimateMediaTokensForMessage(content, false)).toBe(1024); + }); + + it('estimates tokens from decoded dimensions (OpenAI path)', () => { + const content = [{ type: 'image_url', image_url: 'data:image/png;base64,VALID_PNG_LONG_DATA' }]; + const result = estimateMediaTokensForMessage(content, false); + expect(result).toBeGreaterThan(0); + expect(result).not.toBe(1024); + }); + + it('estimates tokens from decoded dimensions (Claude path)', () => { + const content = [{ type: 'image_url', image_url: { url: 'data:image/png;base64,VALID_PNG_LONG_DATA' } }]; + const result = estimateMediaTokensForMessage(content, true); + expect(result).toBeGreaterThan(0); + expect(result).not.toBe(1024); + }); + }); + + describe('image blocks (Anthropic format)', () => { + it('falls back to 1024 when source is not base64', () => { + const content = [{ type: 'image', source: { type: 'url', data: 'https://example.com' } }]; + expect(estimateMediaTokensForMessage(content, true)).toBe(1024); + }); + + it('falls back to 1024 when dimensions cannot be extracted', () => { + const content = [{ type: 'image', source: { type: 'base64', data: 'INVALID' } }]; + expect(estimateMediaTokensForMessage(content, true)).toBe(1024); + }); + + it('estimates tokens from valid base64 image data', () => { + const content = [{ type: 'image', source: { type: 'base64', data: 'VALID_PNG' } }]; + const result = estimateMediaTokensForMessage(content, true); + expect(result).toBeGreaterThan(0); + expect(result).not.toBe(1024); + }); + }); + + describe('image_file blocks', () => { + it('falls back to 1024 (no base64 extraction path)', () => { + const content = [{ type: 'image_file', file_id: 'file-abc' }]; + expect(estimateMediaTokensForMessage(content, false)).toBe(1024); + }); + }); + + describe('document blocks - LangChain format (source_type)', () => { + it('counts tokens for text source_type with getTokenCount', () => { + const content = [{ + type: 'document', + source_type: 'text', + text: 'a'.repeat(400), + }]; + expect(estimateMediaTokensForMessage(content, false, fakeTokenCount)).toBe(100); + }); + + it('falls back to length/4 without getTokenCount', () => { + const content = [{ + type: 'document', + source_type: 'text', + text: 'a'.repeat(400), + }]; + expect(estimateMediaTokensForMessage(content, false)).toBe(100); + }); + + it('estimates PDF pages for base64 source_type with application/pdf mime', () => { + const pdfData = 'x'.repeat(150_000); + const content = [{ + type: 'document', + source_type: 'base64', + data: pdfData, + mime_type: 'application/pdf', + }]; + const result = estimateMediaTokensForMessage(content, false); + expect(result).toBe(2 * 1500); + }); + + it('uses Claude PDF rate when isClaude is true', () => { + const pdfData = 'x'.repeat(150_000); + const content = [{ + type: 'document', + source_type: 'base64', + data: pdfData, + mime_type: 'application/pdf', + }]; + const result = estimateMediaTokensForMessage(content, true); + expect(result).toBe(2 * 2000); + }); + + it('defaults to PDF estimation for empty mime_type', () => { + const pdfData = 'x'.repeat(10); + const content = [{ + type: 'document', + source_type: 'base64', + data: pdfData, + mime_type: '', + }]; + const result = estimateMediaTokensForMessage(content, false); + expect(result).toBe(1 * 1500); + }); + + it('handles image/* mime inside base64 source_type', () => { + const content = [{ + type: 'document', + source_type: 'base64', + data: 'VALID_PNG', + mime_type: 'image/png', + }]; + const result = estimateMediaTokensForMessage(content, true); + expect(result).toBeGreaterThan(0); + expect(result).not.toBe(1024); + }); + + it('falls back to 1024 for undecodable image in base64 source_type', () => { + const content = [{ + type: 'document', + source_type: 'base64', + data: 'BAD_DATA', + mime_type: 'image/jpeg', + }]; + expect(estimateMediaTokensForMessage(content, false)).toBe(1024); + }); + + it('falls back to URL_DOCUMENT_FALLBACK_TOKENS for unrecognized source_type', () => { + const content = [{ type: 'document', source_type: 'url' }]; + expect(estimateMediaTokensForMessage(content, false)).toBe(2000); + }); + }); + + describe('document blocks - Anthropic format (source object)', () => { + it('counts tokens for text source type with getTokenCount', () => { + const content = [{ + type: 'document', + source: { type: 'text', data: 'a'.repeat(800) }, + }]; + expect(estimateMediaTokensForMessage(content, true, fakeTokenCount)).toBe(200); + }); + + it('falls back to length/4 for text source without getTokenCount', () => { + const content = [{ + type: 'document', + source: { type: 'text', data: 'a'.repeat(800) }, + }]; + expect(estimateMediaTokensForMessage(content, true)).toBe(200); + }); + + it('estimates PDF pages for base64 source with application/pdf', () => { + const pdfData = 'x'.repeat(225_000); + const content = [{ + type: 'document', + source: { type: 'base64', data: pdfData, media_type: 'application/pdf' }, + }]; + const result = estimateMediaTokensForMessage(content, true); + expect(result).toBe(3 * 2000); + }); + + it('returns URL fallback for url source type', () => { + const content = [{ + type: 'document', + source: { type: 'url' }, + }]; + expect(estimateMediaTokensForMessage(content, false)).toBe(2000); + }); + + it('handles content source type with nested images', () => { + const content = [{ + type: 'document', + source: { + type: 'content', + content: [ + { type: 'image', source: { type: 'base64', data: 'VALID_PNG' } }, + { type: 'image', source: { type: 'base64', data: 'UNDECODABLE' } }, + ], + }, + }]; + const result = estimateMediaTokensForMessage(content, true); + expect(result).toBeGreaterThan(1024); + }); + + it('falls back to URL_DOCUMENT_FALLBACK_TOKENS when source has unknown type', () => { + const content = [{ type: 'document', source: { type: 'unknown_format' } }]; + expect(estimateMediaTokensForMessage(content, false)).toBe(2000); + }); + }); + + describe('file blocks', () => { + it('uses same logic as document for file type blocks', () => { + const content = [{ + type: 'file', + source_type: 'text', + text: 'a'.repeat(120), + }]; + expect(estimateMediaTokensForMessage(content, false, fakeTokenCount)).toBe(30); + }); + + it('falls back to URL_DOCUMENT_FALLBACK_TOKENS for file without source info', () => { + const content = [{ type: 'file' }]; + expect(estimateMediaTokensForMessage(content, false)).toBe(2000); + }); + }); + + describe('mixed content arrays', () => { + it('sums tokens across multiple media blocks', () => { + const content = [ + { type: 'text', text: 'hello' }, + { type: 'image_url', image_url: 'https://example.com/img.png' }, + { type: 'image_file', file_id: 'f1' }, + { type: 'document', source: { type: 'url' } }, + ]; + const result = estimateMediaTokensForMessage(content, false); + expect(result).toBe(1024 + 1024 + 2000); + }); + }); +}); diff --git a/packages/api/src/agents/__tests__/initialize.test.ts b/packages/api/src/agents/__tests__/initialize.test.ts index 01310a09c4..f9982a6e46 100644 --- a/packages/api/src/agents/__tests__/initialize.test.ts +++ b/packages/api/src/agents/__tests__/initialize.test.ts @@ -190,7 +190,7 @@ describe('initializeAgent — maxContextTokens', () => { db, ); - const expected = Math.round((modelDefault - maxOutputTokens) * 0.9); + const expected = Math.round((modelDefault - maxOutputTokens) * 0.95); expect(result.maxContextTokens).toBe(expected); }); @@ -222,7 +222,7 @@ describe('initializeAgent — maxContextTokens', () => { // optionalChainWithEmptyCheck(0, 200000, 18000) returns 0 (not null/undefined), // then Number(0) || 18000 = 18000 (the fallback default). expect(result.maxContextTokens).not.toBe(0); - const expected = Math.round((18000 - maxOutputTokens) * 0.9); + const expected = Math.round((18000 - maxOutputTokens) * 0.95); expect(result.maxContextTokens).toBe(expected); }); @@ -278,7 +278,59 @@ describe('initializeAgent — maxContextTokens', () => { db, ); - // Should NOT be overridden to Math.round((128000 - 4096) * 0.9) = 111,514 + // Should NOT be overridden to Math.round((128000 - 4096) * 0.95) = 117,709 expect(result.maxContextTokens).toBe(userValue); }); + + it('sets baseContextTokens to agentMaxContextNum minus maxOutputTokensNum', async () => { + const modelDefault = 200000; + const maxOutputTokens = 4096; + const { agent, req, res, loadTools, db } = createMocks({ + maxContextTokens: undefined, + modelDefault, + maxOutputTokens, + }); + + const result = await initializeAgent( + { + req, + res, + agent, + loadTools, + endpointOption: { endpoint: EModelEndpoint.agents }, + allowedProviders: new Set([Providers.OPENAI]), + isInitialAgent: true, + }, + db, + ); + + expect(result.baseContextTokens).toBe(modelDefault - maxOutputTokens); + }); + + it('clamps maxContextTokens to at least 1024 for tiny models', async () => { + const modelDefault = 1100; + const maxOutputTokens = 1050; + const { agent, req, res, loadTools, db } = createMocks({ + maxContextTokens: undefined, + modelDefault, + maxOutputTokens, + }); + + const result = await initializeAgent( + { + req, + res, + agent, + loadTools, + endpointOption: { endpoint: EModelEndpoint.agents }, + allowedProviders: new Set([Providers.OPENAI]), + isInitialAgent: true, + }, + db, + ); + + // baseContextTokens = 1100 - 1050 = 50, formula would give ~47.5 rounded + // but Math.max(1024, ...) clamps it + expect(result.maxContextTokens).toBe(1024); + }); }); diff --git a/packages/api/src/agents/__tests__/run-summarization.test.ts b/packages/api/src/agents/__tests__/run-summarization.test.ts new file mode 100644 index 0000000000..2bc0da253a --- /dev/null +++ b/packages/api/src/agents/__tests__/run-summarization.test.ts @@ -0,0 +1,299 @@ +import type { SummarizationConfig } from 'librechat-data-provider'; +import { createRun } from '~/agents/run'; + +// Mock winston logger +jest.mock('winston', () => ({ + createLogger: jest.fn(() => ({ + debug: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + info: jest.fn(), + })), + format: { combine: jest.fn(), colorize: jest.fn(), simple: jest.fn() }, + transports: { Console: jest.fn() }, +})); + +// Mock env utilities so header resolution doesn't fail +jest.mock('~/utils/env', () => ({ + resolveHeaders: jest.fn((opts: { headers: unknown }) => opts?.headers ?? {}), + createSafeUser: jest.fn(() => ({})), +})); + +// Mock Run.create to capture the graphConfig it receives +jest.mock('@librechat/agents', () => { + const actual = jest.requireActual('@librechat/agents'); + return { + ...actual, + Run: { + create: jest.fn().mockResolvedValue({ + processStream: jest.fn().mockResolvedValue(undefined), + }), + }, + }; +}); + +import { Run } from '@librechat/agents'; + +/** Minimal RunAgent factory */ +function makeAgent( + overrides?: Record, +): Record & { id: string; provider: string; model: string } { + return { + id: 'agent_1', + provider: 'openAI', + endpoint: 'openAI', + model: 'gpt-4o', + tools: [], + model_parameters: { model: 'gpt-4o' }, + maxContextTokens: 100_000, + toolContextMap: {}, + ...overrides, + }; +} + +/** Helper: call createRun and return the captured agentInputs array */ +async function callAndCapture( + opts: { + agents?: ReturnType[]; + summarizationConfig?: SummarizationConfig; + initialSummary?: { text: string; tokenCount: number }; + } = {}, +) { + const agents = opts.agents ?? [makeAgent()]; + const signal = new AbortController().signal; + + await createRun({ + agents: agents as never, + signal, + summarizationConfig: opts.summarizationConfig, + initialSummary: opts.initialSummary, + streaming: true, + streamUsage: true, + }); + + const createMock = Run.create as jest.Mock; + expect(createMock).toHaveBeenCalledTimes(1); + const callArgs = createMock.mock.calls[0][0]; + return callArgs.graphConfig.agents as Array>; +} + +beforeEach(() => { + jest.clearAllMocks(); +}); + +// --------------------------------------------------------------------------- +// Suite 1: reserveRatio +// --------------------------------------------------------------------------- +describe('reserveRatio', () => { + it('applies ratio from config using baseContextTokens, capped at maxContextTokens', async () => { + const agents = await callAndCapture({ + agents: [makeAgent({ baseContextTokens: 200_000, maxContextTokens: 200_000 })], + summarizationConfig: { reserveRatio: 0.03, provider: 'anthropic', model: 'claude' }, + }); + // Math.round(200000 * 0.97) = 194000, min(200000, 194000) = 194000 + expect(agents[0].maxContextTokens).toBe(194_000); + }); + + it('never exceeds user-configured maxContextTokens even when ratio computes higher', async () => { + const agents = await callAndCapture({ + agents: [makeAgent({ baseContextTokens: 200_000, maxContextTokens: 50_000 })], + summarizationConfig: { reserveRatio: 0.03, provider: 'anthropic', model: 'claude' }, + }); + // Math.round(200000 * 0.97) = 194000, but min(50000, 194000) = 50000 + expect(agents[0].maxContextTokens).toBe(50_000); + }); + + it('falls back to maxContextTokens when ratio is not set', async () => { + const agents = await callAndCapture({ + agents: [makeAgent({ maxContextTokens: 100_000, baseContextTokens: 200_000 })], + summarizationConfig: { provider: 'anthropic', model: 'claude' }, + }); + expect(agents[0].maxContextTokens).toBe(100_000); + }); + + it('falls back to maxContextTokens when ratio is 0', async () => { + const agents = await callAndCapture({ + agents: [makeAgent({ maxContextTokens: 100_000, baseContextTokens: 200_000 })], + summarizationConfig: { reserveRatio: 0, provider: 'anthropic', model: 'claude' }, + }); + expect(agents[0].maxContextTokens).toBe(100_000); + }); + + it('falls back to maxContextTokens when ratio is 1', async () => { + const agents = await callAndCapture({ + agents: [makeAgent({ maxContextTokens: 100_000, baseContextTokens: 200_000 })], + summarizationConfig: { reserveRatio: 1, provider: 'anthropic', model: 'claude' }, + }); + expect(agents[0].maxContextTokens).toBe(100_000); + }); + + it('falls back to maxContextTokens when baseContextTokens is undefined', async () => { + const agents = await callAndCapture({ + agents: [makeAgent({ maxContextTokens: 100_000 })], + summarizationConfig: { reserveRatio: 0.05, provider: 'anthropic', model: 'claude' }, + }); + expect(agents[0].maxContextTokens).toBe(100_000); + }); + + it('clamps to 1024 minimum but still capped at maxContextTokens', async () => { + const agents = await callAndCapture({ + agents: [makeAgent({ baseContextTokens: 500, maxContextTokens: 2000 })], + summarizationConfig: { reserveRatio: 0.99, provider: 'anthropic', model: 'claude' }, + }); + // Math.round(500 * 0.01) = 5 → clamped to 1024, min(2000, 1024) = 1024 + expect(agents[0].maxContextTokens).toBe(1024); + }); +}); + +// --------------------------------------------------------------------------- +// Suite 2: maxSummaryTokens passthrough +// --------------------------------------------------------------------------- +describe('maxSummaryTokens passthrough', () => { + it('forwards global maxSummaryTokens value', async () => { + const agents = await callAndCapture({ + summarizationConfig: { + provider: 'anthropic', + model: 'claude', + maxSummaryTokens: 4096, + }, + }); + const config = agents[0].summarizationConfig as Record; + expect(config.maxSummaryTokens).toBe(4096); + }); +}); + +// --------------------------------------------------------------------------- +// Suite 3: summarizationEnabled resolution +// --------------------------------------------------------------------------- +describe('summarizationEnabled resolution', () => { + it('true with provider + model + enabled', async () => { + const agents = await callAndCapture({ + summarizationConfig: { + enabled: true, + provider: 'anthropic', + model: 'claude-3-haiku', + }, + }); + expect(agents[0].summarizationEnabled).toBe(true); + }); + + it('false when provider is empty string', async () => { + const agents = await callAndCapture({ + summarizationConfig: { + enabled: true, + provider: '', + model: 'claude-3-haiku', + }, + }); + expect(agents[0].summarizationEnabled).toBe(false); + }); + + it('false when enabled is explicitly false', async () => { + const agents = await callAndCapture({ + summarizationConfig: { + enabled: false, + provider: 'anthropic', + model: 'claude-3-haiku', + }, + }); + expect(agents[0].summarizationEnabled).toBe(false); + }); + + it('true with self-summarize default when summarizationConfig is undefined', async () => { + const agents = await callAndCapture({ + summarizationConfig: undefined, + }); + expect(agents[0].summarizationEnabled).toBe(true); + const config = agents[0].summarizationConfig as Record; + expect(config.provider).toBe('openAI'); + expect(config.model).toBe('gpt-4o'); + }); +}); + +// --------------------------------------------------------------------------- +// Suite 4: summarizationConfig field passthrough +// --------------------------------------------------------------------------- +describe('summarizationConfig field passthrough', () => { + it('all fields pass through to agentInputs', async () => { + const agents = await callAndCapture({ + summarizationConfig: { + enabled: true, + trigger: { type: 'token_count', value: 8000 }, + provider: 'anthropic', + model: 'claude-3-haiku', + parameters: { temperature: 0.2 }, + prompt: 'Summarize this conversation', + updatePrompt: 'Update the existing summary with new messages', + reserveRatio: 0.1, + maxSummaryTokens: 4096, + }, + }); + const config = agents[0].summarizationConfig as Record; + expect(config).toBeDefined(); + // `enabled` is not forwarded to the agent-level config — it is resolved + // into the separate `summarizationEnabled` boolean on the agent input. + expect(agents[0].summarizationEnabled).toBe(true); + expect(config.trigger).toEqual({ type: 'token_count', value: 8000 }); + expect(config.provider).toBe('anthropic'); + expect(config.model).toBe('claude-3-haiku'); + expect(config.parameters).toEqual({ temperature: 0.2 }); + expect(config.prompt).toBe('Summarize this conversation'); + expect(config.updatePrompt).toBe('Update the existing summary with new messages'); + expect(config.reserveRatio).toBe(0.1); + expect(config.maxSummaryTokens).toBe(4096); + }); + + it('uses self-summarize default when no config provided', async () => { + const agents = await callAndCapture({ + summarizationConfig: undefined, + }); + const config = agents[0].summarizationConfig as Record; + expect(config).toBeDefined(); + // `enabled` is resolved into `summarizationEnabled`, not forwarded on config + expect(agents[0].summarizationEnabled).toBe(true); + expect(config.provider).toBe('openAI'); + expect(config.model).toBe('gpt-4o'); + }); +}); + +// --------------------------------------------------------------------------- +// Suite 5: Multi-agent + per-agent overrides +// --------------------------------------------------------------------------- +describe('multi-agent + per-agent overrides', () => { + it('different agents get different effectiveMaxContextTokens', async () => { + const agents = await callAndCapture({ + agents: [ + makeAgent({ id: 'agent_1', baseContextTokens: 200_000, maxContextTokens: 100_000 }), + makeAgent({ id: 'agent_2', baseContextTokens: 100_000, maxContextTokens: 50_000 }), + ], + summarizationConfig: { + reserveRatio: 0.1, + provider: 'anthropic', + model: 'claude', + }, + }); + // agent_1: Math.round(200000 * 0.9) = 180000, but capped at user's maxContextTokens (100000) + expect(agents[0].maxContextTokens).toBe(100_000); + // agent_2: Math.round(100000 * 0.9) = 90000, but capped at user's maxContextTokens (50000) + expect(agents[1].maxContextTokens).toBe(50_000); + }); +}); + +// --------------------------------------------------------------------------- +// Suite 6: initialSummary passthrough +// --------------------------------------------------------------------------- +describe('initialSummary passthrough', () => { + it('forwarded to agent inputs', async () => { + const summary = { text: 'Previous conversation summary', tokenCount: 500 }; + const agents = await callAndCapture({ + initialSummary: summary, + summarizationConfig: { provider: 'anthropic', model: 'claude' }, + }); + expect(agents[0].initialSummary).toEqual(summary); + }); + + it('undefined when not provided', async () => { + const agents = await callAndCapture({}); + expect(agents[0].initialSummary).toBeUndefined(); + }); +}); diff --git a/packages/api/src/agents/__tests__/summarization.e2e.test.ts b/packages/api/src/agents/__tests__/summarization.e2e.test.ts new file mode 100644 index 0000000000..03ef2ca6d4 --- /dev/null +++ b/packages/api/src/agents/__tests__/summarization.e2e.test.ts @@ -0,0 +1,595 @@ +/** + * E2E Backend Integration Tests for Summarization + * + * Exercises the FULL LibreChat -> agents pipeline: + * LibreChat's createRun (@librechat/api) + * -> agents package Run.create (@librechat/agents) + * -> graph execution -> summarization node -> events + * + * Uses real AI providers, real formatAgentMessages, real token accounting. + * Tracks summaries both mid-run and between runs. + * + * Run from packages/api: + * npx jest summarization.e2e --no-coverage --testTimeout=180000 + * + * Requires real API keys in the environment (ANTHROPIC_API_KEY, OPENAI_API_KEY). + */ +import { + Providers, + Calculator, + GraphEvents, + ToolEndHandler, + ModelEndHandler, + createTokenCounter, + formatAgentMessages, + ChatModelStreamHandler, + createContentAggregator, +} from '@librechat/agents'; +import type { + SummarizeCompleteEvent, + MessageContentComplex, + SummaryContentBlock, + SummarizeStartEvent, + TokenCounter, + EventHandler, +} from '@librechat/agents'; +import { hydrateMissingIndexTokenCounts } from '~/utils'; +import { ioredisClient, keyvRedisClient } from '~/cache'; +import { createRun } from '~/agents'; + +afterAll(async () => { + await ioredisClient?.quit().catch(() => {}); + await keyvRedisClient?.disconnect().catch(() => {}); +}); + +// --------------------------------------------------------------------------- +// Shared test infrastructure +// --------------------------------------------------------------------------- + +interface Spies { + onMessageDelta: jest.Mock; + onRunStep: jest.Mock; + onSummarizeStart: jest.Mock; + onSummarizeDelta: jest.Mock; + onSummarizeComplete: jest.Mock; +} + +type PayloadMessage = { + role: string; + content: string | Array>; +}; + +function getSummaryText(summary: SummaryContentBlock): string { + if (Array.isArray(summary.content)) { + return summary.content + .map((b: MessageContentComplex) => ('text' in b ? (b as { text: string }).text : '')) + .join(''); + } + return ''; +} + +function createSpies(): Spies { + return { + onMessageDelta: jest.fn(), + onRunStep: jest.fn(), + onSummarizeStart: jest.fn(), + onSummarizeDelta: jest.fn(), + onSummarizeComplete: jest.fn(), + }; +} + +function buildHandlers( + collectedUsage: ConstructorParameters[0], + aggregateContent: (params: { event: string; data: unknown }) => void, + spies: Spies, +): Record { + return { + [GraphEvents.TOOL_END]: new ToolEndHandler(), + [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage), + [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(), + [GraphEvents.ON_RUN_STEP]: { + handle: (event: string, data: unknown) => { + spies.onRunStep(event, data); + aggregateContent({ event, data }); + }, + }, + [GraphEvents.ON_RUN_STEP_COMPLETED]: { + handle: (event: string, data: unknown) => { + aggregateContent({ event, data }); + }, + }, + [GraphEvents.ON_RUN_STEP_DELTA]: { + handle: (event: string, data: unknown) => { + aggregateContent({ event, data }); + }, + }, + [GraphEvents.ON_MESSAGE_DELTA]: { + handle: (event: string, data: unknown, metadata?: Record) => { + spies.onMessageDelta(event, data, metadata); + aggregateContent({ event, data }); + }, + }, + [GraphEvents.TOOL_START]: { + handle: () => {}, + }, + [GraphEvents.ON_SUMMARIZE_START]: { + handle: (_event: string, data: unknown) => { + spies.onSummarizeStart(data); + }, + }, + [GraphEvents.ON_SUMMARIZE_DELTA]: { + handle: (_event: string, data: unknown) => { + spies.onSummarizeDelta(data); + aggregateContent({ event: GraphEvents.ON_SUMMARIZE_DELTA, data }); + }, + }, + [GraphEvents.ON_SUMMARIZE_COMPLETE]: { + handle: (_event: string, data: unknown) => { + spies.onSummarizeComplete(data); + }, + }, + }; +} + +function getDefaultModel(provider: string): string { + switch (provider) { + case Providers.ANTHROPIC: + return 'claude-haiku-4-5-20251001'; + case Providers.OPENAI: + return 'gpt-4.1-mini'; + default: + return 'gpt-4.1-mini'; + } +} + +// --------------------------------------------------------------------------- +// Turn runner — mirrors AgentClient.chatCompletion() message flow +// --------------------------------------------------------------------------- + +interface RunFullTurnParams { + payload: PayloadMessage[]; + agentProvider: string; + summarizationProvider: string; + summarizationModel?: string; + maxContextTokens: number; + instructions: string; + spies: Spies; + tokenCounter: TokenCounter; + model?: string; +} + +async function runFullTurn({ + payload, + agentProvider, + summarizationProvider, + summarizationModel, + maxContextTokens, + instructions, + spies, + tokenCounter, + model, +}: RunFullTurnParams) { + const collectedUsage: ConstructorParameters[0] = []; + const { contentParts, aggregateContent } = createContentAggregator(); + + const formatted = formatAgentMessages(payload as never, {}); + const { messages: initialMessages, summary: initialSummary } = formatted; + let { indexTokenCountMap } = formatted; + + indexTokenCountMap = hydrateMissingIndexTokenCounts({ + messages: initialMessages, + indexTokenCountMap: indexTokenCountMap as Record, + tokenCounter, + }); + + const abortController = new AbortController(); + const agent = { + id: `test-agent-${agentProvider}`, + name: 'Test Agent', + provider: agentProvider, + instructions, + tools: [new Calculator()], + maxContextTokens, + model_parameters: { + model: model || getDefaultModel(agentProvider), + streaming: true, + streamUsage: true, + }, + }; + + const summarizationConfig = { + enabled: true, + provider: summarizationProvider, + model: summarizationModel || getDefaultModel(summarizationProvider), + prompt: + 'You are a summarization assistant. Summarize the following conversation messages concisely, preserving key facts, decisions, and context needed to continue the conversation. Do not include preamble -- output only the summary.', + }; + + const run = await createRun({ + agents: [agent] as never, + messages: initialMessages, + indexTokenCountMap, + initialSummary, + runId: `e2e-${Date.now()}`, + signal: abortController.signal, + customHandlers: buildHandlers(collectedUsage, aggregateContent, spies) as never, + summarizationConfig, + tokenCounter, + }); + + const streamConfig = { + configurable: { thread_id: `e2e-${Date.now()}` }, + recursionLimit: 100, + streamMode: 'values' as const, + version: 'v2' as const, + }; + let result: unknown; + let processError: Error | undefined; + try { + result = await run.processStream({ messages: initialMessages }, streamConfig); + } catch (err) { + processError = err as Error; + } + const runMessages = run.getRunMessages() || []; + + return { + result, + processError, + runMessages, + collectedUsage, + contentParts, + indexTokenCountMap, + }; +} + +function getLastContent(runMessages: Array<{ content: string | unknown }>): string { + const last = runMessages[runMessages.length - 1]; + if (!last) { + return ''; + } + return typeof last.content === 'string' ? last.content : JSON.stringify(last.content); +} + +// --------------------------------------------------------------------------- +// Anthropic Tests +// --------------------------------------------------------------------------- + +const hasAnthropic = + process.env.ANTHROPIC_API_KEY != null && process.env.ANTHROPIC_API_KEY !== 'test'; +(hasAnthropic ? describe : describe.skip)('Anthropic Summarization E2E (LibreChat)', () => { + jest.setTimeout(180_000); + + const instructions = + 'You are an expert math tutor. You MUST use the calculator tool for ALL computations. Keep answers to 1-2 sentences.'; + + test('multi-turn triggers summarization, summary persists across runs', async () => { + const spies = createSpies(); + const tokenCounter = await createTokenCounter(); + const conversationPayload: PayloadMessage[] = []; + + const addTurn = async (userMsg: string, maxTokens: number) => { + conversationPayload.push({ role: 'user', content: userMsg }); + const result = await runFullTurn({ + payload: conversationPayload, + agentProvider: Providers.ANTHROPIC, + summarizationProvider: Providers.ANTHROPIC, + summarizationModel: 'claude-haiku-4-5-20251001', + maxContextTokens: maxTokens, + instructions, + spies, + tokenCounter, + }); + conversationPayload.push({ role: 'assistant', content: getLastContent(result.runMessages) }); + return result; + }; + + await addTurn('What is 12345 * 6789? Use the calculator.', 2000); + await addTurn( + 'Now divide that result by 137. Then multiply by 42. Calculator for each step.', + 2000, + ); + await addTurn( + 'Compute step by step: 1) 9876543 - 1234567 2) sqrt of result 3) Add 100. Calculator for each.', + 1500, + ); + await addTurn('What is 2^20? Calculator. Then list everything we calculated so far.', 800); + + if (spies.onSummarizeStart.mock.calls.length === 0) { + await addTurn('Calculate 355 / 113. Calculator.', 600); + } + if (spies.onSummarizeStart.mock.calls.length === 0) { + await addTurn('What is 999 * 999? Calculator.', 400); + } + + const startCalls = spies.onSummarizeStart.mock.calls.length; + const completeCalls = spies.onSummarizeComplete.mock.calls.length; + + expect(startCalls).toBeGreaterThanOrEqual(1); + expect(completeCalls).toBeGreaterThanOrEqual(1); + + const startPayload = spies.onSummarizeStart.mock.calls[0][0] as SummarizeStartEvent; + expect(startPayload.agentId).toBeDefined(); + expect(startPayload.provider).toBeDefined(); + expect(startPayload.messagesToRefineCount).toBeGreaterThan(0); + expect(startPayload.summaryVersion).toBeGreaterThanOrEqual(1); + + const completePayload = spies.onSummarizeComplete.mock.calls[0][0] as SummarizeCompleteEvent; + expect(completePayload.summary).toBeDefined(); + expect(getSummaryText(completePayload.summary!).length).toBeGreaterThan(10); + expect(completePayload.summary!.tokenCount).toBeGreaterThan(0); + expect(completePayload.summary!.tokenCount!).toBeLessThan(2000); + expect(completePayload.summary!.provider).toBeDefined(); + expect(completePayload.summary!.createdAt).toBeDefined(); + expect(completePayload.summary!.summaryVersion).toBeGreaterThanOrEqual(1); + + // --- Cross-run: persist summary -> formatAgentMessages -> new run --- + const summaryBlock = completePayload.summary!; + const crossRunPayload: PayloadMessage[] = [ + { + role: 'assistant', + content: [ + { + type: 'summary', + content: [{ type: 'text', text: getSummaryText(summaryBlock) }], + tokenCount: summaryBlock.tokenCount, + }, + ], + }, + conversationPayload[conversationPayload.length - 2], + conversationPayload[conversationPayload.length - 1], + { + role: 'user', + content: 'What was the first calculation we did? Verify with calculator.', + }, + ]; + + spies.onSummarizeStart.mockClear(); + spies.onSummarizeComplete.mockClear(); + + const crossRun = await runFullTurn({ + payload: crossRunPayload, + agentProvider: Providers.ANTHROPIC, + summarizationProvider: Providers.ANTHROPIC, + summarizationModel: 'claude-haiku-4-5-20251001', + maxContextTokens: 2000, + instructions, + spies, + tokenCounter, + }); + + console.log( + ` Cross-run: messages=${crossRun.runMessages.length}, content=${crossRun.contentParts.length}, deltas=${spies.onMessageDelta.mock.calls.length}`, + ); + // Content aggregator should have received response deltas even if getRunMessages is empty + expect(crossRun.contentParts.length + spies.onMessageDelta.mock.calls.length).toBeGreaterThan( + 0, + ); + }); + + test('tight context (maxContextTokens=200) does not infinite-loop', async () => { + const spies = createSpies(); + const tokenCounter = await createTokenCounter(); + const conversationPayload: PayloadMessage[] = []; + + conversationPayload.push({ role: 'user', content: 'What is 42 * 58? Calculator.' }); + const t1 = await runFullTurn({ + payload: conversationPayload, + agentProvider: Providers.ANTHROPIC, + summarizationProvider: Providers.ANTHROPIC, + summarizationModel: 'claude-haiku-4-5-20251001', + maxContextTokens: 2000, + instructions, + spies, + tokenCounter, + }); + conversationPayload.push({ role: 'assistant', content: getLastContent(t1.runMessages) }); + + conversationPayload.push({ role: 'user', content: 'Now compute 2436 + 1337. Calculator.' }); + const t2 = await runFullTurn({ + payload: conversationPayload, + agentProvider: Providers.ANTHROPIC, + summarizationProvider: Providers.ANTHROPIC, + summarizationModel: 'claude-haiku-4-5-20251001', + maxContextTokens: 2000, + instructions, + spies, + tokenCounter, + }); + conversationPayload.push({ role: 'assistant', content: getLastContent(t2.runMessages) }); + + conversationPayload.push({ role: 'user', content: 'What is 100 / 4? Calculator.' }); + + let error: Error | undefined; + try { + await runFullTurn({ + payload: conversationPayload, + agentProvider: Providers.ANTHROPIC, + summarizationProvider: Providers.ANTHROPIC, + summarizationModel: 'claude-haiku-4-5-20251001', + maxContextTokens: 200, + instructions, + spies, + tokenCounter, + }); + } catch (err) { + error = err as Error; + } + + // The key guarantee: the system terminates — no true infinite loop. + // With very tight context, the graph may either: + // 1. Complete normally (model responds within budget) + // 2. Hit recursion limit (bounded tool-call cycles) + // 3. Error with empty_messages (context too small for any message) + // All are valid termination modes. + if (error) { + const isCleanTermination = + error.message.includes('Recursion limit') || error.message.includes('empty_messages'); + + expect(isCleanTermination).toBe(true); + } + + // Summarization may or may not fire depending on whether the budget + // allows any messages before the graph terminates. With 200 tokens + // and instructions at ~100 tokens, there may be no room for history, + // which correctly skips summarization. + + console.log( + ` Tight context: summarize=${spies.onSummarizeStart.mock.calls.length}, error=${error?.message?.substring(0, 80) ?? 'none'}`, + ); + }); +}); + +// --------------------------------------------------------------------------- +// OpenAI Tests +// --------------------------------------------------------------------------- + +const hasOpenAI = process.env.OPENAI_API_KEY != null && process.env.OPENAI_API_KEY !== 'test'; +(hasOpenAI ? describe : describe.skip)('OpenAI Summarization E2E (LibreChat)', () => { + jest.setTimeout(180_000); + + const instructions = + 'You are a helpful math tutor. Use the calculator tool for ALL computations. Keep responses concise.'; + + test('multi-turn with cross-run summary continuity', async () => { + const spies = createSpies(); + const tokenCounter = await createTokenCounter(); + const conversationPayload: PayloadMessage[] = []; + + const addTurn = async (userMsg: string, maxTokens: number) => { + conversationPayload.push({ role: 'user', content: userMsg }); + const result = await runFullTurn({ + payload: conversationPayload, + agentProvider: Providers.OPENAI, + summarizationProvider: Providers.OPENAI, + summarizationModel: 'gpt-4.1-mini', + maxContextTokens: maxTokens, + instructions, + spies, + tokenCounter, + }); + conversationPayload.push({ role: 'assistant', content: getLastContent(result.runMessages) }); + return result; + }; + + await addTurn('What is 1234 * 5678? Calculator.', 2000); + await addTurn('Compute sqrt(7006652) with calculator.', 1500); + await addTurn('Calculate 99*101 and 2^15. Calculator for each.', 1200); + await addTurn('What is 314159 * 271828? Calculator. Remind me of all prior results.', 800); + + if (spies.onSummarizeStart.mock.calls.length === 0) { + await addTurn('Calculate 999999 / 7. Calculator.', 600); + } + if (spies.onSummarizeStart.mock.calls.length === 0) { + await addTurn('What is 42 + 58? Calculator.', 400); + } + if (spies.onSummarizeStart.mock.calls.length === 0) { + await addTurn('Calculate 7 * 13. Calculator.', 300); + } + if (spies.onSummarizeStart.mock.calls.length === 0) { + await addTurn('What is 100 - 37? Calculator.', 200); + } + + expect(spies.onSummarizeStart.mock.calls.length).toBeGreaterThanOrEqual(1); + expect(spies.onSummarizeComplete.mock.calls.length).toBeGreaterThanOrEqual(1); + + const complete = spies.onSummarizeComplete.mock.calls[0][0] as SummarizeCompleteEvent; + expect(getSummaryText(complete.summary!).length).toBeGreaterThan(10); + expect(complete.summary!.tokenCount).toBeGreaterThan(0); + expect(complete.summary!.summaryVersion).toBeGreaterThanOrEqual(1); + expect(complete.summary!.provider).toBe(Providers.OPENAI); + + const summaryBlock = complete.summary!; + const crossRunPayload: PayloadMessage[] = [ + { + role: 'assistant', + content: [ + { + type: 'summary', + content: [{ type: 'text', text: getSummaryText(summaryBlock) }], + tokenCount: summaryBlock.tokenCount, + }, + ], + }, + conversationPayload[conversationPayload.length - 2], + conversationPayload[conversationPayload.length - 1], + { role: 'user', content: 'What was the first number we calculated? Verify with calculator.' }, + ]; + + spies.onSummarizeStart.mockClear(); + spies.onSummarizeComplete.mockClear(); + + const crossRun = await runFullTurn({ + payload: crossRunPayload, + agentProvider: Providers.OPENAI, + summarizationProvider: Providers.OPENAI, + summarizationModel: 'gpt-4.1-mini', + maxContextTokens: 2000, + instructions, + spies, + tokenCounter, + }); + + console.log( + ` Cross-run: messages=${crossRun.runMessages.length}, content=${crossRun.contentParts.length}, deltas=${spies.onMessageDelta.mock.calls.length}`, + ); + expect(crossRun.contentParts.length + spies.onMessageDelta.mock.calls.length).toBeGreaterThan( + 0, + ); + }); +}); + +// --------------------------------------------------------------------------- +// Cross-provider: Anthropic agent, OpenAI summarizer +// --------------------------------------------------------------------------- + +const hasBothProviders = hasAnthropic && hasOpenAI; +(hasBothProviders ? describe : describe.skip)( + 'Cross-provider Summarization E2E (LibreChat)', + () => { + jest.setTimeout(180_000); + + const instructions = + 'You are a math assistant. Use the calculator for every computation. Be brief.'; + + test('Anthropic agent with OpenAI summarizer', async () => { + const spies = createSpies(); + const tokenCounter = await createTokenCounter(); + const conversationPayload: PayloadMessage[] = []; + + const addTurn = async (userMsg: string, maxTokens: number) => { + conversationPayload.push({ role: 'user', content: userMsg }); + const result = await runFullTurn({ + payload: conversationPayload, + agentProvider: Providers.ANTHROPIC, + summarizationProvider: Providers.OPENAI, + summarizationModel: 'gpt-4.1-mini', + maxContextTokens: maxTokens, + instructions, + spies, + tokenCounter, + }); + conversationPayload.push({ + role: 'assistant', + content: getLastContent(result.runMessages), + }); + return result; + }; + + await addTurn('Compute 54321 * 12345 using calculator.', 2000); + await addTurn('Now calculate 670592745 / 99991. Calculator.', 1500); + await addTurn('What is sqrt(670592745)? Calculator.', 1000); + await addTurn('Compute 2^32 with calculator. List all prior results.', 600); + + if (spies.onSummarizeStart.mock.calls.length === 0) { + await addTurn('13 * 17 * 19 = ? Calculator.', 400); + } + + expect(spies.onSummarizeComplete.mock.calls.length).toBeGreaterThanOrEqual(1); + const complete = spies.onSummarizeComplete.mock.calls[0][0] as SummarizeCompleteEvent; + + expect(complete.summary!.provider).toBe(Providers.OPENAI); + expect(complete.summary!.model).toBe('gpt-4.1-mini'); + expect(getSummaryText(complete.summary!).length).toBeGreaterThan(10); + }); + }, +); diff --git a/packages/api/src/agents/client.ts b/packages/api/src/agents/client.ts index fd5d50f211..c84230572f 100644 --- a/packages/api/src/agents/client.ts +++ b/packages/api/src/agents/client.ts @@ -1,6 +1,12 @@ import { logger } from '@librechat/data-schemas'; -import { isAgentsEndpoint } from 'librechat-data-provider'; -import { labelContentByAgent, getTokenCountForMessage } from '@librechat/agents'; +import { ContentTypes, isAgentsEndpoint } from 'librechat-data-provider'; +import { + labelContentByAgent, + extractImageDimensions, + getTokenCountForMessage, + estimateOpenAIImageTokens, + estimateAnthropicImageTokens, +} from '@librechat/agents'; import type { MessageContentComplex } from '@librechat/agents'; import type { Agent, TMessage } from 'librechat-data-provider'; import type { BaseMessage } from '@langchain/core/messages'; @@ -27,10 +33,247 @@ export function payloadParser({ req, endpoint }: { req: ServerRequest; endpoint: return req.body?.endpointOption?.model_parameters; } +/** + * Anthropic's API consistently reports ~10% more tokens than the local + * claude tokenizer due to internal message framing and content encoding. + * Verified empirically across content types via the count_tokens endpoint. + */ +export const CLAUDE_TOKEN_CORRECTION = 1.1; +const IMAGE_TOKEN_SAFETY_MARGIN = 1.05; +const BASE64_BYTES_PER_PDF_PAGE = 75_000; +const PDF_TOKENS_PER_PAGE_CLAUDE = 2000; +const PDF_TOKENS_PER_PAGE_OPENAI = 1500; +const URL_DOCUMENT_FALLBACK_TOKENS = 2000; + +type ContentBlock = { + type?: string; + image_url?: string | { url?: string }; + source?: { type?: string; data?: string; media_type?: string; content?: unknown[] }; + source_type?: string; + mime_type?: string; + data?: string; + text?: string; + tool_call?: { name?: string; args?: string; output?: string }; +}; + +function estimateImageDataTokens(data: string, isClaude: boolean): number { + const dims = extractImageDimensions(data); + if (dims == null) { + return 1024; + } + const raw = isClaude + ? estimateAnthropicImageTokens(dims.width, dims.height) + : estimateOpenAIImageTokens(dims.width, dims.height); + return Math.ceil(raw * IMAGE_TOKEN_SAFETY_MARGIN); +} + +function estimateImageBlockTokens(block: ContentBlock, isClaude: boolean): number { + let base64Data: string | undefined; + if (block.type === 'image_url') { + const url = typeof block.image_url === 'string' ? block.image_url : block.image_url?.url; + if (typeof url === 'string' && url.startsWith('data:')) { + base64Data = url; + } + } else if (block.type === 'image') { + if (block.source?.type === 'base64' && typeof block.source.data === 'string') { + base64Data = block.source.data; + } + } + if (base64Data == null) { + return 1024; + } + return estimateImageDataTokens(base64Data, isClaude); +} + +function estimateDocumentBlockTokens( + block: ContentBlock, + isClaude: boolean, + countTokens?: (text: string) => number, +): number { + const pdfPerPage = isClaude ? PDF_TOKENS_PER_PAGE_CLAUDE : PDF_TOKENS_PER_PAGE_OPENAI; + + if (typeof block.source_type === 'string') { + if (block.source_type === 'text' && typeof block.text === 'string') { + return countTokens != null ? countTokens(block.text) : Math.ceil(block.text.length / 4); + } + if (block.source_type === 'base64' && typeof block.data === 'string') { + const mime = (block.mime_type ?? '').split(';')[0]; + if (mime === 'application/pdf' || mime === '') { + return Math.max(1, Math.ceil(block.data.length / BASE64_BYTES_PER_PDF_PAGE)) * pdfPerPage; + } + if (mime.startsWith('image/')) { + return estimateImageDataTokens(block.data, isClaude); + } + return countTokens != null ? countTokens(block.data) : Math.ceil(block.data.length / 4); + } + return URL_DOCUMENT_FALLBACK_TOKENS; + } + + if (block.source != null) { + if (block.source.type === 'text' && typeof block.source.data === 'string') { + return countTokens != null + ? countTokens(block.source.data) + : Math.ceil(block.source.data.length / 4); + } + if (block.source.type === 'base64' && typeof block.source.data === 'string') { + const mime = (block.source.media_type ?? '').split(';')[0]; + if (mime === 'application/pdf' || mime === '') { + const pages = Math.max(1, Math.ceil(block.source.data.length / BASE64_BYTES_PER_PDF_PAGE)); + return pages * pdfPerPage; + } + if (mime.startsWith('image/')) { + return estimateImageDataTokens(block.source.data, isClaude); + } + return countTokens != null + ? countTokens(block.source.data) + : Math.ceil(block.source.data.length / 4); + } + if (block.source.type === 'url') { + return URL_DOCUMENT_FALLBACK_TOKENS; + } + if (block.source.type === 'content' && Array.isArray(block.source.content)) { + let tokens = 0; + for (const inner of block.source.content) { + const innerBlock = inner as ContentBlock | null; + if ( + innerBlock?.type === 'image' && + innerBlock.source?.type === 'base64' && + typeof innerBlock.source.data === 'string' + ) { + tokens += estimateImageDataTokens(innerBlock.source.data, isClaude); + } + } + return tokens; + } + } + + return URL_DOCUMENT_FALLBACK_TOKENS; +} + +/** + * Estimates token cost for image and document blocks in a message's + * content array. Covers: image_url, image, image_file, document, file. + */ +export function estimateMediaTokensForMessage( + content: unknown, + isClaude: boolean, + getTokenCount?: (text: string) => number, +): number { + if (!Array.isArray(content)) { + return 0; + } + let tokens = 0; + for (const block of content as ContentBlock[]) { + if (block == null || typeof block !== 'object' || typeof block.type !== 'string') { + continue; + } + const type = block.type; + if (type === 'image_url' || type === 'image' || type === 'image_file') { + tokens += estimateImageBlockTokens(block, isClaude); + continue; + } + if (type === 'document' || type === 'file') { + tokens += estimateDocumentBlockTokens(block, isClaude, getTokenCount); + } + } + return tokens; +} + +/** + * Single-pass token counter for formatted messages (plain objects with role/content/name). + * Handles text, tool_call, image, and document content types in one iteration, + * then applies Claude correction when applicable. + */ +export function countFormattedMessageTokens( + message: Partial>, + encoding: Parameters[1], +): number { + const countTokens = (text: string) => Tokenizer.getTokenCount(text, encoding); + const isClaude = encoding === 'claude'; + + let numTokens = 3; + + const processValue = (value: unknown): void => { + if (Array.isArray(value)) { + for (const item of value) { + if (item == null || typeof item !== 'object') { + continue; + } + const block = item as ContentBlock; + const type = block.type; + if (typeof type !== 'string') { + continue; + } + + if (type === ContentTypes.THINK || type === ContentTypes.ERROR) { + continue; + } + + if ( + type === ContentTypes.IMAGE_URL || + type === 'image' || + type === ContentTypes.IMAGE_FILE + ) { + numTokens += estimateImageBlockTokens(block, isClaude); + continue; + } + + if (type === 'document' || type === 'file') { + numTokens += estimateDocumentBlockTokens(block, isClaude, countTokens); + continue; + } + + if (type === ContentTypes.TOOL_CALL && block.tool_call != null) { + const { name, args, output } = block.tool_call; + if (typeof name === 'string' && name) { + numTokens += countTokens(name); + } + if (typeof args === 'string' && args) { + numTokens += countTokens(args); + } + if (typeof output === 'string' && output) { + numTokens += countTokens(output); + } + continue; + } + + const nestedValue = (item as Record)[type]; + if (nestedValue != null) { + processValue(nestedValue); + } + } + return; + } + + if (typeof value === 'string') { + numTokens += countTokens(value); + } else if (typeof value === 'number') { + numTokens += countTokens(value.toString()); + } else if (typeof value === 'boolean') { + numTokens += countTokens(value.toString()); + } + }; + + for (const [key, value] of Object.entries(message)) { + processValue(value); + if (key === 'name') { + numTokens += 1; + } + } + + return isClaude ? Math.ceil(numTokens * CLAUDE_TOKEN_CORRECTION) : numTokens; +} + export function createTokenCounter(encoding: Parameters[1]) { + const isClaude = encoding === 'claude'; + const countTokens = (text: string) => Tokenizer.getTokenCount(text, encoding); return function (message: BaseMessage) { - const countTokens = (text: string) => Tokenizer.getTokenCount(text, encoding); - return getTokenCountForMessage(message, countTokens); + const count = getTokenCountForMessage( + message, + countTokens, + encoding as 'claude' | 'o200k_base', + ); + return isClaude ? Math.ceil(count * CLAUDE_TOKEN_CORRECTION) : count; }; } diff --git a/packages/api/src/agents/initialize.ts b/packages/api/src/agents/initialize.ts index d5bfca5aba..81bc89cac4 100644 --- a/packages/api/src/agents/initialize.ts +++ b/packages/api/src/agents/initialize.ts @@ -33,6 +33,13 @@ import { getProviderConfig } from '~/endpoints'; import { primeResources } from './resources'; import type { TFilterFilesByAgentAccess } from './resources'; +/** + * Fraction of context budget reserved as headroom when no explicit maxContextTokens is set. + * Reduced from 0.10 to 0.05 alongside the introduction of summarization, which actively + * manages overflow. `createRun` can further override this via `SummarizationConfig.reserveRatio`. + */ +const DEFAULT_RESERVE_RATIO = 0.05; + /** * Extended agent type with additional fields needed after initialization */ @@ -41,6 +48,8 @@ export type InitializedAgent = Agent & { attachments: IMongoFile[]; toolContextMap: Record; maxContextTokens: number; + /** Pre-ratio context budget (agentMaxContextNum - maxOutputTokensNum). Used by createRun to apply a configurable reserve ratio. */ + baseContextTokens?: number; useLegacyContent: boolean; resendFiles: boolean; tool_resources?: AgentToolResources; @@ -55,6 +64,8 @@ export type InitializedAgent = Agent & { hasDeferredTools?: boolean; /** Whether the actions capability is enabled (resolved during tool loading) */ actionsEnabled?: boolean; + /** Maximum characters allowed in a single tool result before truncation. */ + maxToolResultChars?: number; }; /** @@ -311,7 +322,7 @@ export async function initializeAgent( actionsEnabled: undefined, }; - const { getOptions, overrideProvider } = getProviderConfig({ + const { getOptions, overrideProvider, customEndpointConfig } = getProviderConfig({ provider, appConfig: req.config, }); @@ -405,11 +416,25 @@ export async function initializeAgent( const agentMaxContextNum = Number(agentMaxContextTokens) || 18000; const maxOutputTokensNum = Number(maxOutputTokens) || 0; + const baseContextTokens = Math.max(0, agentMaxContextNum - maxOutputTokensNum); const finalAttachments: IMongoFile[] = (primedAttachments ?? []) .filter((a): a is TFile => a != null) .map((a) => a as unknown as IMongoFile); + const endpointConfigs = req.config?.endpoints; + const providerConfig = + customEndpointConfig ?? endpointConfigs?.[agent.provider as keyof typeof endpointConfigs]; + const providerMaxToolResultChars = + providerConfig != null && + typeof providerConfig === 'object' && + !Array.isArray(providerConfig) && + 'maxToolResultChars' in providerConfig + ? (providerConfig.maxToolResultChars as number | undefined) + : undefined; + const maxToolResultCharsResolved = + providerMaxToolResultChars ?? endpointConfigs?.all?.maxToolResultChars; + const initializedAgent: InitializedAgent = { ...agent, resendFiles, @@ -419,14 +444,16 @@ export async function initializeAgent( toolDefinitions, hasDeferredTools, actionsEnabled, + baseContextTokens, attachments: finalAttachments, toolContextMap: toolContextMap ?? {}, useLegacyContent: !!options.useLegacyContent, tools: (tools ?? []) as GenericTool[] & string[], + maxToolResultChars: maxToolResultCharsResolved, maxContextTokens: maxContextTokens != null && maxContextTokens > 0 ? maxContextTokens - : Math.round((agentMaxContextNum - maxOutputTokensNum) * 0.9), + : Math.max(1024, Math.round(baseContextTokens * (1 - DEFAULT_RESERVE_RATIO))), }; return initializedAgent; diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 189ef59469..b6b5e6a14d 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -1,8 +1,9 @@ import { Run, Providers, Constants } from '@librechat/agents'; import { providerEndpointMap, KnownEndpoints } from 'librechat-data-provider'; -import type { BaseMessage } from '@langchain/core/messages'; import type { + SummarizationConfig as AgentSummarizationConfig, MultiAgentGraphConfig, + ContextPruningConfig, OpenAIClientOptions, StandardGraphConfig, LCToolRegistry, @@ -12,8 +13,9 @@ import type { IState, LCTool, } from '@librechat/agents'; +import type { Agent, SummarizationConfig } from 'librechat-data-provider'; +import type { BaseMessage } from '@langchain/core/messages'; import type { IUser } from '@librechat/data-schemas'; -import type { Agent } from 'librechat-data-provider'; import type * as t from '~/types'; import { resolveHeaders, createSafeUser } from '~/utils/env'; @@ -162,6 +164,8 @@ export function getReasoningKey( type RunAgent = Omit & { tools?: GenericTool[]; maxContextTokens?: number; + /** Pre-ratio context budget from initializeAgent. */ + baseContextTokens?: number; useLegacyContent?: boolean; toolContextMap?: Record; toolRegistry?: LCToolRegistry; @@ -169,8 +173,65 @@ type RunAgent = Omit & { toolDefinitions?: LCTool[]; /** Precomputed flag indicating if any tools have defer_loading enabled */ hasDeferredTools?: boolean; + /** Optional per-agent summarization overrides */ + summarization?: SummarizationConfig; + /** + * Maximum characters allowed in a single tool result before truncation. + * Overrides the default computed from maxContextTokens. + */ + maxToolResultChars?: number; }; +function isNonEmptyString(value: unknown): value is string { + return typeof value === 'string' && value.trim().length > 0; +} + +/** Shapes a SummarizationConfig into the format expected by AgentInputs. */ +function shapeSummarizationConfig( + config: SummarizationConfig | undefined, + fallbackProvider: string, + fallbackModel: string | undefined, +) { + const provider = config?.provider ?? fallbackProvider; + const model = config?.model ?? fallbackModel; + const trigger = + config?.trigger?.type && config?.trigger?.value + ? { type: config.trigger.type, value: config.trigger.value } + : undefined; + + return { + enabled: config?.enabled !== false && isNonEmptyString(provider) && isNonEmptyString(model), + config: { + trigger, + provider, + model, + parameters: config?.parameters, + prompt: config?.prompt, + updatePrompt: config?.updatePrompt, + reserveRatio: config?.reserveRatio, + maxSummaryTokens: config?.maxSummaryTokens, + } satisfies AgentSummarizationConfig, + contextPruning: config?.contextPruning as ContextPruningConfig | undefined, + reserveRatio: config?.reserveRatio, + }; +} + +/** + * Applies `reserveRatio` against the pre-ratio base context budget, falling + * back to the pre-computed `maxContextTokens` from initializeAgent. + */ +function computeEffectiveMaxContextTokens( + reserveRatio: number | undefined, + baseContextTokens: number | undefined, + maxContextTokens: number | undefined, +): number | undefined { + if (reserveRatio == null || reserveRatio <= 0 || reserveRatio >= 1 || baseContextTokens == null) { + return maxContextTokens; + } + const ratioComputed = Math.max(1024, Math.round(baseContextTokens * (1 - reserveRatio))); + return Math.min(maxContextTokens ?? ratioComputed, ratioComputed); +} + /** * Creates a new Run instance with custom handlers and configuration. * @@ -196,6 +257,9 @@ export async function createRun({ tokenCounter, customHandlers, indexTokenCountMap, + summarizationConfig, + initialSummary, + calibrationRatio, streaming = true, streamUsage = true, }: { @@ -208,6 +272,11 @@ export async function createRun({ user?: IUser; /** Message history for extracting previously discovered tools */ messages?: BaseMessage[]; + summarizationConfig?: SummarizationConfig; + /** Cross-run summary from formatAgentMessages, forwarded to AgentContext */ + initialSummary?: { text: string; tokenCount: number }; + /** Calibration ratio from previous run's contextMeta, seeds the pruner EMA */ + calibrationRatio?: number; } & Pick): Promise< Run > { @@ -232,6 +301,13 @@ export async function createRun({ (providerEndpointMap[ agent.provider as keyof typeof providerEndpointMap ] as unknown as Providers) ?? agent.provider; + const selfModel = agent.model_parameters?.model ?? (agent.model as string | undefined); + + const summarization = shapeSummarizationConfig( + agent.summarization ?? summarizationConfig, + provider as string, + selfModel, + ); const llmConfig: t.RunLLMConfig = Object.assign( { @@ -299,6 +375,12 @@ export async function createRun({ } } + const effectiveMaxContextTokens = computeEffectiveMaxContextTokens( + summarization.reserveRatio, + agent.baseContextTokens, + agent.maxContextTokens, + ); + const reasoningKey = getReasoningKey(provider, llmConfig, agent.endpoint); const agentInput: AgentInputs = { provider, @@ -310,9 +392,14 @@ export async function createRun({ instructions: systemContent, name: agent.name ?? undefined, toolRegistry: agent.toolRegistry, - maxContextTokens: agent.maxContextTokens, + maxContextTokens: effectiveMaxContextTokens, useLegacyContent: agent.useLegacyContent ?? false, discoveredTools: discoveredTools.size > 0 ? Array.from(discoveredTools) : undefined, + summarizationEnabled: summarization.enabled, + summarizationConfig: summarization.config, + initialSummary, + contextPruningConfig: summarization.contextPruning, + maxToolResultChars: agent.maxToolResultChars, }; agentInputs.push(agentInput); }; @@ -339,5 +426,6 @@ export async function createRun({ tokenCounter, customHandlers, indexTokenCountMap, + calibrationRatio, }); } diff --git a/packages/api/src/agents/usage.spec.ts b/packages/api/src/agents/usage.spec.ts index d0b065b8ff..b75baf69a8 100644 --- a/packages/api/src/agents/usage.spec.ts +++ b/packages/api/src/agents/usage.spec.ts @@ -108,6 +108,46 @@ describe('recordCollectedUsage', () => { }); }); + describe('summarization usage segregation', () => { + it('includes summarization output tokens in total while billing under separate context', async () => { + const collectedUsage: UsageMetadata[] = [ + { + usage_type: 'message', + input_tokens: 120, + output_tokens: 40, + model: 'gpt-4', + }, + { + usage_type: 'summarization', + input_tokens: 30, + output_tokens: 12, + model: 'gpt-4.1-mini', + }, + ]; + + const result = await recordCollectedUsage(deps, { + ...baseParams, + collectedUsage, + }); + + expect(result).toEqual({ input_tokens: 120, output_tokens: 52 }); + expect(mockSpendTokens).toHaveBeenCalledTimes(2); + expect(mockSpendTokens).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ context: 'message', model: 'gpt-4' }), + expect.any(Object), + ); + expect(mockSpendTokens).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + context: 'summarization', + model: 'gpt-4.1-mini', + }), + expect.any(Object), + ); + }); + }); + describe('parallel execution (multiple agents)', () => { it('should handle parallel agents with independent input tokens', async () => { const collectedUsage: UsageMetadata[] = [ @@ -718,4 +758,130 @@ describe('recordCollectedUsage', () => { expect(result).toEqual({ input_tokens: 100, output_tokens: 50 }); }); }); + + describe('bulk write with summarization usage', () => { + let mockInsertMany: jest.Mock; + let mockUpdateBalance: jest.Mock; + let mockPricing: PricingFns; + let mockBulkWriteOps: BulkWriteDeps; + let bulkDeps: RecordUsageDeps; + + beforeEach(() => { + mockInsertMany = jest.fn().mockResolvedValue(undefined); + mockUpdateBalance = jest.fn().mockResolvedValue({}); + mockPricing = { + getMultiplier: jest.fn().mockReturnValue(1), + getCacheMultiplier: jest.fn().mockReturnValue(null), + }; + mockBulkWriteOps = { + insertMany: mockInsertMany, + updateBalance: mockUpdateBalance, + }; + bulkDeps = { + spendTokens: mockSpendTokens, + spendStructuredTokens: mockSpendStructuredTokens, + pricing: mockPricing, + bulkWriteOps: mockBulkWriteOps, + }; + }); + + it('combines message and summarization docs into a single bulk write', async () => { + const collectedUsage: UsageMetadata[] = [ + { + usage_type: 'message', + input_tokens: 200, + output_tokens: 80, + model: 'gpt-4', + }, + { + usage_type: 'summarization', + input_tokens: 50, + output_tokens: 20, + model: 'gpt-4.1-mini', + }, + ]; + + const result = await recordCollectedUsage(bulkDeps, { + ...baseParams, + collectedUsage, + }); + + expect(mockInsertMany).toHaveBeenCalledTimes(1); + expect(mockUpdateBalance).toHaveBeenCalledTimes(1); + expect(mockSpendTokens).not.toHaveBeenCalled(); + expect(mockSpendStructuredTokens).not.toHaveBeenCalled(); + + const insertedDocs = mockInsertMany.mock.calls[0][0]; + // 2 docs per entry (prompt + completion) x 2 entries = 4 docs + expect(insertedDocs).toHaveLength(4); + + const messageContextDocs = insertedDocs.filter( + (d: Record) => d.context === 'message', + ); + const summarizationContextDocs = insertedDocs.filter( + (d: Record) => d.context === 'summarization', + ); + expect(messageContextDocs).toHaveLength(2); + expect(summarizationContextDocs).toHaveLength(2); + + expect(result).toEqual({ input_tokens: 200, output_tokens: 100 }); + }); + + it('handles summarization-only usage in bulk mode', async () => { + const collectedUsage: UsageMetadata[] = [ + { + usage_type: 'summarization', + input_tokens: 60, + output_tokens: 25, + model: 'gpt-4.1-mini', + }, + ]; + + const result = await recordCollectedUsage(bulkDeps, { + ...baseParams, + collectedUsage, + }); + + expect(mockInsertMany).toHaveBeenCalledTimes(1); + expect(mockSpendTokens).not.toHaveBeenCalled(); + expect(mockSpendStructuredTokens).not.toHaveBeenCalled(); + + const insertedDocs = mockInsertMany.mock.calls[0][0]; + expect(insertedDocs).toHaveLength(2); + + const summarizationContextDocs = insertedDocs.filter( + (d: Record) => d.context === 'summarization', + ); + expect(summarizationContextDocs).toHaveLength(2); + + expect(result).toEqual({ input_tokens: 0, output_tokens: 25 }); + }); + + it('handles message-only usage in bulk mode', async () => { + const collectedUsage: UsageMetadata[] = [ + { input_tokens: 100, output_tokens: 50, model: 'gpt-4' }, + { input_tokens: 200, output_tokens: 60, model: 'gpt-4' }, + ]; + + const result = await recordCollectedUsage(bulkDeps, { + ...baseParams, + collectedUsage, + }); + + expect(mockInsertMany).toHaveBeenCalledTimes(1); + expect(mockSpendTokens).not.toHaveBeenCalled(); + expect(mockSpendStructuredTokens).not.toHaveBeenCalled(); + + const insertedDocs = mockInsertMany.mock.calls[0][0]; + // 2 docs per entry x 2 entries = 4 docs + expect(insertedDocs).toHaveLength(4); + + const messageContextDocs = insertedDocs.filter( + (d: Record) => d.context === 'message', + ); + expect(messageContextDocs).toHaveLength(4); + + expect(result).toEqual({ input_tokens: 100, output_tokens: 110 }); + }); + }); }); diff --git a/packages/api/src/agents/usage.ts b/packages/api/src/agents/usage.ts index c092702730..3b2497c947 100644 --- a/packages/api/src/agents/usage.ts +++ b/packages/api/src/agents/usage.ts @@ -73,104 +73,125 @@ export async function recordCollectedUsage( return; } - const firstUsage = collectedUsage[0]; + const messageUsages: UsageMetadata[] = []; + const summarizationUsages: UsageMetadata[] = []; + for (const usage of collectedUsage) { + if (usage == null) { + continue; + } + (usage.usage_type === 'summarization' ? summarizationUsages : messageUsages).push(usage); + } + + const firstUsage = messageUsages[0]; const input_tokens = - (firstUsage?.input_tokens || 0) + - (Number(firstUsage?.input_token_details?.cache_creation) || - Number(firstUsage?.cache_creation_input_tokens) || - 0) + - (Number(firstUsage?.input_token_details?.cache_read) || - Number(firstUsage?.cache_read_input_tokens) || - 0); + firstUsage == null + ? 0 + : (firstUsage.input_tokens || 0) + + (Number(firstUsage.input_token_details?.cache_creation) || + Number(firstUsage.cache_creation_input_tokens) || + 0) + + (Number(firstUsage.input_token_details?.cache_read) || + Number(firstUsage.cache_read_input_tokens) || + 0); let total_output_tokens = 0; const { pricing, bulkWriteOps } = deps; const useBulk = pricing && bulkWriteOps; - const allDocs: PreparedEntry[] = []; + const processUsageGroup = ( + usages: UsageMetadata[], + usageContext: string, + docs: PreparedEntry[], + ): void => { + for (const usage of usages) { + if (!usage) { + continue; + } - for (const usage of collectedUsage) { - if (!usage) { - continue; - } + const cache_creation = + Number(usage.input_token_details?.cache_creation) || + Number(usage.cache_creation_input_tokens) || + 0; + const cache_read = + Number(usage.input_token_details?.cache_read) || Number(usage.cache_read_input_tokens) || 0; - const cache_creation = - Number(usage.input_token_details?.cache_creation) || - Number(usage.cache_creation_input_tokens) || - 0; - const cache_read = - Number(usage.input_token_details?.cache_read) || Number(usage.cache_read_input_tokens) || 0; + total_output_tokens += Number(usage.output_tokens) || 0; - total_output_tokens += Number(usage.output_tokens) || 0; + const txMetadata: TxMetadata = { + user, + balance, + messageId, + transactions, + conversationId, + endpointTokenConfig, + context: usageContext, + model: usage.model ?? model, + }; - const txMetadata: TxMetadata = { - user, - context, - balance, - messageId, - transactions, - conversationId, - endpointTokenConfig, - model: usage.model ?? model, - }; - - if (useBulk) { - const entries = - cache_creation > 0 || cache_read > 0 - ? prepareStructuredTokenSpend( - txMetadata, - { - promptTokens: { - input: usage.input_tokens, - write: cache_creation, - read: cache_read, + if (useBulk) { + const entries = + cache_creation > 0 || cache_read > 0 + ? prepareStructuredTokenSpend( + txMetadata, + { + promptTokens: { + input: usage.input_tokens, + write: cache_creation, + read: cache_read, + }, + completionTokens: usage.output_tokens, }, - completionTokens: usage.output_tokens, - }, - pricing, - ) - : prepareTokenSpend( - txMetadata, - { - promptTokens: usage.input_tokens, - completionTokens: usage.output_tokens, - }, - pricing, - ); - allDocs.push(...entries); - continue; - } + pricing, + ) + : prepareTokenSpend( + txMetadata, + { + promptTokens: usage.input_tokens, + completionTokens: usage.output_tokens, + }, + pricing, + ); + docs.push(...entries); + continue; + } + + if (cache_creation > 0 || cache_read > 0) { + deps + .spendStructuredTokens(txMetadata, { + promptTokens: { + input: usage.input_tokens, + write: cache_creation, + read: cache_read, + }, + completionTokens: usage.output_tokens, + }) + .catch((err) => { + logger.error( + `[packages/api #recordCollectedUsage] Error spending structured ${usageContext} tokens`, + err, + ); + }); + continue; + } - if (cache_creation > 0 || cache_read > 0) { deps - .spendStructuredTokens(txMetadata, { - promptTokens: { - input: usage.input_tokens, - write: cache_creation, - read: cache_read, - }, + .spendTokens(txMetadata, { + promptTokens: usage.input_tokens, completionTokens: usage.output_tokens, }) .catch((err) => { logger.error( - '[packages/api #recordCollectedUsage] Error spending structured tokens', + `[packages/api #recordCollectedUsage] Error spending ${usageContext} tokens`, err, ); }); - continue; } + }; - deps - .spendTokens(txMetadata, { - promptTokens: usage.input_tokens, - completionTokens: usage.output_tokens, - }) - .catch((err) => { - logger.error('[packages/api #recordCollectedUsage] Error spending tokens', err); - }); - } - + const allDocs: PreparedEntry[] = []; + processUsageGroup(messageUsages, context, allDocs); + processUsageGroup(summarizationUsages, 'summarization', allDocs); if (useBulk && allDocs.length > 0) { try { await bulkWriteTransactions({ user, docs: allDocs }, bulkWriteOps); diff --git a/packages/api/src/app/AppService.spec.ts b/packages/api/src/app/AppService.spec.ts index a7b5a46054..df607d612b 100644 --- a/packages/api/src/app/AppService.spec.ts +++ b/packages/api/src/app/AppService.spec.ts @@ -181,6 +181,43 @@ describe('AppService', () => { ); }); + it('should enable summarization when it is configured without enabled flag', async () => { + const config = { + summarization: { + prompt: 'Summarize with emphasis on next actions', + }, + } as Partial & { summarization: Record }; + + const result = await AppService({ config }); + expect(result).toEqual( + expect.objectContaining({ + summarization: expect.objectContaining({ + enabled: true, + prompt: 'Summarize with emphasis on next actions', + }), + }), + ); + }); + + it('should preserve explicit summarization disable flag', async () => { + const config = { + summarization: { + enabled: false, + prompt: 'Ignored while disabled', + }, + } as Partial & { summarization: Record }; + + const result = await AppService({ config }); + expect(result).toEqual( + expect.objectContaining({ + summarization: expect.objectContaining({ + enabled: false, + prompt: 'Ignored while disabled', + }), + }), + ); + }); + it('should load and format tools accurately with defined structure', async () => { const config = {}; diff --git a/packages/api/src/stream/interfaces/IJobStore.ts b/packages/api/src/stream/interfaces/IJobStore.ts index 5486b941eb..fadddb840d 100644 --- a/packages/api/src/stream/interfaces/IJobStore.ts +++ b/packages/api/src/stream/interfaces/IJobStore.ts @@ -65,12 +65,18 @@ export interface SerializableJobData { * ``` */ export interface UsageMetadata { + /** Logical usage bucket for accounting/reporting. Defaults to model response usage. */ + usage_type?: 'message' | 'summarization'; /** Total input tokens (prompt tokens) */ input_tokens?: number; /** Total output tokens (completion tokens) */ output_tokens?: number; + /** Total billed tokens when provided by the model/runtime */ + total_tokens?: number; /** Model identifier that generated this usage */ model?: string; + /** Provider identifier that generated this usage */ + provider?: string; /** * OpenAI-style cache token details. * Present for OpenAI models (GPT-4, o1, etc.) diff --git a/packages/api/src/utils/index.ts b/packages/api/src/utils/index.ts index 50582832c0..2b4ac88245 100644 --- a/packages/api/src/utils/index.ts +++ b/packages/api/src/utils/index.ts @@ -24,6 +24,7 @@ export * from './text'; export * from './yaml'; export * from './http'; export * from './tokens'; +export * from './tokenMap'; export * from './url'; export * from './message'; export * from './tracing'; diff --git a/packages/api/src/utils/tokenMap.ts b/packages/api/src/utils/tokenMap.ts new file mode 100644 index 0000000000..71e2f65af6 --- /dev/null +++ b/packages/api/src/utils/tokenMap.ts @@ -0,0 +1,45 @@ +import type { BaseMessage } from '@langchain/core/messages'; + +/** Signature for a function that counts tokens in a LangChain message. */ +export type TokenCounter = (message: BaseMessage) => number; + +/** + * Lazily fills missing token counts for formatted LangChain messages. + * Preserves precomputed counts and only computes undefined indices. + * + * This is used after `formatAgentMessages` to ensure every message index + * has a token count before passing `indexTokenCountMap` to the agent run. + */ +export function hydrateMissingIndexTokenCounts({ + messages, + indexTokenCountMap, + tokenCounter, +}: { + messages: BaseMessage[]; + indexTokenCountMap: Record | undefined; + tokenCounter: TokenCounter; +}): Record { + const hydratedMap: Record = {}; + + if (indexTokenCountMap) { + for (const key in indexTokenCountMap) { + const tokenCount = indexTokenCountMap[Number(key)]; + if (typeof tokenCount === 'number' && Number.isFinite(tokenCount) && tokenCount > 0) { + hydratedMap[Number(key)] = tokenCount; + } + } + } + + for (let i = 0; i < messages.length; i++) { + if ( + typeof hydratedMap[i] === 'number' && + Number.isFinite(hydratedMap[i]) && + hydratedMap[i] > 0 + ) { + continue; + } + hydratedMap[i] = tokenCounter(messages[i]); + } + + return hydratedMap; +} diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 35411a1c9c..9bc3822c4b 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -205,6 +205,8 @@ export const baseEndpointSchema = z.object({ .optional(), titleEndpoint: z.string().optional(), titlePromptTemplate: z.string().optional(), + /** Maximum characters allowed in a single tool result before truncation. */ + maxToolResultChars: z.number().positive().optional(), }); export type TBaseEndpoint = z.infer; @@ -948,6 +950,34 @@ export const memorySchema = z.object({ export type TMemoryConfig = DeepPartial>; +export const summarizationTriggerSchema = z.object({ + type: z.enum(['token_count']), + value: z.number().positive(), +}); + +export const contextPruningSchema = z.object({ + enabled: z.boolean().optional(), + keepLastAssistants: z.number().min(0).max(10).optional(), + softTrimRatio: z.number().min(0).max(1).optional(), + hardClearRatio: z.number().min(0).max(1).optional(), + minPrunableToolChars: z.number().min(0).optional(), +}); + +export const summarizationConfigSchema = z.object({ + enabled: z.boolean().optional(), + provider: z.string().optional(), + model: z.string().optional(), + parameters: z.record(z.union([z.string(), z.number(), z.boolean(), z.null()])).optional(), + trigger: summarizationTriggerSchema.optional(), + prompt: z.string().optional(), + updatePrompt: z.string().optional(), + reserveRatio: z.number().min(0).max(1).optional(), + maxSummaryTokens: z.number().positive().optional(), + contextPruning: contextPruningSchema.optional(), +}); + +export type SummarizationConfig = z.infer; + const customEndpointsSchema = z.array(endpointSchema.partial()).optional(); export const configSchema = z.object({ @@ -956,6 +986,7 @@ export const configSchema = z.object({ ocr: ocrSchema.optional(), webSearch: webSearchSchema.optional(), memory: memorySchema.optional(), + summarization: summarizationConfigSchema.optional(), secureImageLinks: z.boolean().optional(), imageOutputType: z.nativeEnum(EImageOutputType).default(EImageOutputType.PNG), includedTools: z.array(z.string()).optional(), diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index 7eb0482e9f..19ba804556 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -630,6 +630,18 @@ export const tMessageSchema = z.object({ feedback: feedbackSchema.optional(), /** metadata */ metadata: z.record(z.unknown()).optional(), + contextMeta: z + .object({ + calibrationRatio: z + .number() + .optional() + .describe('EMA ratio of provider-reported vs local token estimates; seeds the pruner on subsequent runs'), + encoding: z + .string() + .optional() + .describe('Tokenizer encoding used when this ratio was computed (e.g. "claude", "o200k_base")'), + }) + .optional(), }); export type MemoryArtifact = { diff --git a/packages/data-provider/src/types/agents.ts b/packages/data-provider/src/types/agents.ts index ac3f464019..db70de8c9d 100644 --- a/packages/data-provider/src/types/agents.ts +++ b/packages/data-provider/src/types/agents.ts @@ -1,7 +1,7 @@ /* eslint-disable @typescript-eslint/no-namespace */ import { StepTypes, ContentTypes, ToolCallTypes } from './runs'; +import type { FunctionToolCall, SummaryContentPart } from './assistants'; import type { TAttachment, TPlugin } from 'src/schemas'; -import type { FunctionToolCall } from './assistants'; export namespace Agents { export type MessageType = 'human' | 'ai' | 'generic' | 'system' | 'function' | 'tool' | 'remove'; @@ -53,6 +53,8 @@ export namespace Agents { | MessageContentImageUrl | MessageContentVideoUrl | MessageContentInputAudio + | SummaryContentPart + | ToolCallContent // eslint-disable-next-line @typescript-eslint/no-explicit-any | (Record & { type?: ContentTypes | string }) // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -187,6 +189,7 @@ export namespace Agents { /** Group ID for parallel content - parts with same groupId are displayed in columns */ groupId?: number; // #new stepDetails: StepDetails; + summary?: SummaryContentPart; usage: null | object; }; @@ -313,6 +316,28 @@ export namespace Agents { | ContentTypes.VIDEO_URL | ContentTypes.INPUT_AUDIO | string; + + export interface SummarizeStartEvent { + agentId: string; + provider: string; + model?: string; + messagesToRefineCount: number; + summaryVersion: number; + } + + export interface SummarizeDeltaEvent { + id: string; + delta: { + summary: SummaryContentPart; + }; + } + + export interface SummarizeCompleteEvent { + id: string; + agentId: string; + summary?: SummaryContentPart; + error?: string; + } } export type ToolCallResult = { diff --git a/packages/data-provider/src/types/assistants.ts b/packages/data-provider/src/types/assistants.ts index 22072403d3..690b2e06d2 100644 --- a/packages/data-provider/src/types/assistants.ts +++ b/packages/data-provider/src/types/assistants.ts @@ -521,6 +521,21 @@ export type ContentPart = ( export type TextData = (Text & PartMetadata) | undefined; +export type SummaryContentPart = { + type: ContentTypes.SUMMARY; + content?: Array<{ type: ContentTypes.TEXT; text: string }>; + tokenCount?: number; + summarizing?: boolean; + summaryVersion?: number; + model?: string; + provider?: string; + createdAt?: string; + boundary?: { + messageId: string; + contentIndex: number; + }; +}; + export type TMessageContentParts = | ({ type: ContentTypes.ERROR; @@ -545,6 +560,7 @@ export type TMessageContentParts = PartMetadata; } & ContentMetadata) | ({ type: ContentTypes.IMAGE_FILE; image_file: ImageFile & PartMetadata } & ContentMetadata) + | (SummaryContentPart & ContentMetadata) | (Agents.AgentUpdate & ContentMetadata) | (Agents.MessageContentImageUrl & ContentMetadata) | (Agents.MessageContentVideoUrl & ContentMetadata) diff --git a/packages/data-provider/src/types/runs.ts b/packages/data-provider/src/types/runs.ts index de61357b92..b159f99daf 100644 --- a/packages/data-provider/src/types/runs.ts +++ b/packages/data-provider/src/types/runs.ts @@ -8,6 +8,7 @@ export enum ContentTypes { VIDEO_URL = 'video_url', INPUT_AUDIO = 'input_audio', AGENT_UPDATE = 'agent_update', + SUMMARY = 'summary', ERROR = 'error', } @@ -24,3 +25,16 @@ export enum ToolCallTypes { /* Agents Tool Call */ TOOL_CALL = 'tool_call', } + +/** Event names dispatched by the agent graph and consumed by step handlers. */ +export enum StepEvents { + ON_RUN_STEP = 'on_run_step', + ON_AGENT_UPDATE = 'on_agent_update', + ON_MESSAGE_DELTA = 'on_message_delta', + ON_REASONING_DELTA = 'on_reasoning_delta', + ON_RUN_STEP_DELTA = 'on_run_step_delta', + ON_RUN_STEP_COMPLETED = 'on_run_step_completed', + ON_SUMMARIZE_START = 'on_summarize_start', + ON_SUMMARIZE_DELTA = 'on_summarize_delta', + ON_SUMMARIZE_COMPLETE = 'on_summarize_complete', +} diff --git a/packages/data-schemas/src/app/service.ts b/packages/data-schemas/src/app/service.ts index 9f9f521f59..91407b06c4 100644 --- a/packages/data-schemas/src/app/service.ts +++ b/packages/data-schemas/src/app/service.ts @@ -1,4 +1,8 @@ -import { EModelEndpoint, getConfigDefaults } from 'librechat-data-provider'; +import { + EModelEndpoint, + getConfigDefaults, + summarizationConfigSchema, +} from 'librechat-data-provider'; import type { TCustomConfig, FileSources, DeepPartial } from 'librechat-data-provider'; import type { AppConfig, FunctionTool } from '~/types/app'; import { loadDefaultInterface } from './interface'; @@ -9,6 +13,25 @@ import { processModelSpecs } from './specs'; import { loadMemoryConfig } from './memory'; import { loadEndpoints } from './endpoints'; import { loadOCRConfig } from './ocr'; +import logger from '~/config/winston'; + +function loadSummarizationConfig(config: DeepPartial): AppConfig['summarization'] { + const raw = config.summarization; + if (!raw || typeof raw !== 'object') { + return undefined; + } + + const parsed = summarizationConfigSchema.safeParse(raw); + if (!parsed.success) { + logger.warn('[AppService] Invalid summarization config', parsed.error.flatten()); + return undefined; + } + + return { + ...parsed.data, + enabled: parsed.data.enabled !== false, + }; +} export type Paths = { root: string; @@ -41,6 +64,7 @@ export const AppService = async (params?: { const ocr = loadOCRConfig(config.ocr); const webSearch = loadWebSearchConfig(config.webSearch); const memory = loadMemoryConfig(config.memory); + const summarization = loadSummarizationConfig(config); const filteredTools = config.filteredTools; const includedTools = config.includedTools; const fileStrategy = (config.fileStrategy ?? configDefaults.fileStrategy) as @@ -76,18 +100,19 @@ export const AppService = async (params?: { speech, balance, actions, - transactions, - mcpConfig: mcpServersConfig, - mcpSettings, webSearch, + mcpSettings, + transactions, fileStrategy, registration, filteredTools, includedTools, + summarization, availableTools, imageOutputType, interfaceConfig, turnstileConfig, + mcpConfig: mcpServersConfig, fileStrategies: config.fileStrategies, }; diff --git a/packages/data-schemas/src/schema/message.ts b/packages/data-schemas/src/schema/message.ts index 610251443d..ff3468918e 100644 --- a/packages/data-schemas/src/schema/message.ts +++ b/packages/data-schemas/src/schema/message.ts @@ -114,6 +114,14 @@ const messageSchema: Schema = new Schema( type: String, }, metadata: { type: mongoose.Schema.Types.Mixed }, + contextMeta: { + type: { + calibrationRatio: { type: Number }, + encoding: { type: String }, + }, + _id: false, + default: undefined, + }, attachments: { type: [{ type: mongoose.Schema.Types.Mixed }], default: undefined }, /* attachments: { diff --git a/packages/data-schemas/src/types/app.ts b/packages/data-schemas/src/types/app.ts index 36891bfaec..73d65611b0 100644 --- a/packages/data-schemas/src/types/app.ts +++ b/packages/data-schemas/src/types/app.ts @@ -11,6 +11,7 @@ import type { TCustomEndpoints, TAssistantEndpoint, TAnthropicEndpoint, + SummarizationConfig, } from 'librechat-data-provider'; export type JsonSchemaType = { @@ -56,6 +57,8 @@ export interface AppConfig { }; /** Memory configuration */ memory?: TMemoryConfig; + /** Summarization configuration */ + summarization?: SummarizationConfig; /** Web search configuration */ webSearch?: TCustomConfig['webSearch']; /** File storage strategy ('local', 's3', 'firebase', 'azure_blob') */ diff --git a/packages/data-schemas/src/types/message.ts b/packages/data-schemas/src/types/message.ts index c3f465e711..201e5650ef 100644 --- a/packages/data-schemas/src/types/message.ts +++ b/packages/data-schemas/src/types/message.ts @@ -39,6 +39,10 @@ export interface IMessage extends Document { iconURL?: string; addedConvo?: boolean; metadata?: Record; + contextMeta?: { + calibrationRatio?: number; + encoding?: string; + }; attachments?: unknown[]; expiredAt?: Date | null; createdAt?: Date;