diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js index 85963aec58..2d008b9991 100644 --- a/api/app/clients/BaseClient.js +++ b/api/app/clients/BaseClient.js @@ -137,12 +137,14 @@ class BaseClient { * @param {AppConfig['balance']} [balance] * @param {number} promptTokens * @param {number} completionTokens + * @param {string} [messageId] * @returns {Promise} */ - async recordTokenUsage({ model, balance, promptTokens, completionTokens }) { + async recordTokenUsage({ model, balance, promptTokens, completionTokens, messageId }) { logger.debug('[BaseClient] `recordTokenUsage` not implemented.', { model, balance, + messageId, promptTokens, completionTokens, }); @@ -796,6 +798,7 @@ class BaseClient { completionTokens, balance: balanceConfig, model: responseMessage.model, + messageId: this.responseMessageId, }); } diff --git a/api/app/clients/tools/structured/GeminiImageGen.js b/api/app/clients/tools/structured/GeminiImageGen.js index b201db019d..0bd1e302ed 100644 --- a/api/app/clients/tools/structured/GeminiImageGen.js +++ b/api/app/clients/tools/structured/GeminiImageGen.js @@ -251,8 +251,9 @@ function checkForSafetyBlock(response) { * @param {string} params.userId - The user ID * @param {string} params.conversationId - The conversation ID * @param {string} params.model - The model name + * @param {string} [params.messageId] - The response message ID for transaction correlation */ -async function recordTokenUsage({ usageMetadata, req, userId, conversationId, model }) { +async function recordTokenUsage({ usageMetadata, req, userId, conversationId, model, messageId }) { if (!usageMetadata) { logger.debug('[GeminiImageGen] No usage metadata available for balance tracking'); return; @@ -288,6 +289,7 @@ async function recordTokenUsage({ usageMetadata, req, userId, conversationId, mo { user: userId, model, + messageId, conversationId, context: 'image_generation', balance, @@ -445,10 +447,14 @@ function createGeminiImageTool(fields = {}) { ]; const conversationId = runnableConfig?.configurable?.thread_id; + const messageId = + runnableConfig?.configurable?.run_id ?? + runnableConfig?.configurable?.requestBody?.messageId; recordTokenUsage({ usageMetadata: apiResponse.usageMetadata, req, userId, + messageId, conversationId, model: geminiModel, }).catch((error) => { diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 7aea6d1e8f..d69281d49c 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -663,6 +663,7 @@ class AgentClient extends BaseClient { context, balance, transactions, + messageId: this.responseMessageId, conversationId: this.conversationId, user: this.user ?? this.options.req.user?.id, endpointTokenConfig: this.options.endpointTokenConfig, @@ -1148,6 +1149,7 @@ class AgentClient extends BaseClient { model: clientOptions.model, balance: balanceConfig, transactions: transactionsConfig, + messageId: this.responseMessageId, }).catch((err) => { logger.error( '[api/server/controllers/agents/client.js #titleConvo] Error recording collected usage', @@ -1186,6 +1188,7 @@ class AgentClient extends BaseClient { model, context, balance, + messageId: this.responseMessageId, conversationId: this.conversationId, user: this.user ?? this.options.req.user?.id, endpointTokenConfig: this.options.endpointTokenConfig, @@ -1204,6 +1207,7 @@ class AgentClient extends BaseClient { model, balance, context: 'reasoning', + messageId: this.responseMessageId, conversationId: this.conversationId, user: this.user ?? this.options.req.user?.id, endpointTokenConfig: this.options.endpointTokenConfig, diff --git a/api/server/controllers/agents/client.test.js b/api/server/controllers/agents/client.test.js index 9dd3567047..42481e1644 100644 --- a/api/server/controllers/agents/client.test.js +++ b/api/server/controllers/agents/client.test.js @@ -263,6 +263,7 @@ describe('AgentClient - titleConvo', () => { transactions: { enabled: true, }, + messageId: 'response-123', }); }); diff --git a/api/server/controllers/agents/openai.js b/api/server/controllers/agents/openai.js index b334580eb1..a083bd9291 100644 --- a/api/server/controllers/agents/openai.js +++ b/api/server/controllers/agents/openai.js @@ -129,7 +129,6 @@ const OpenAIChatCompletionController = async (req, res) => { const appConfig = req.config; const requestStartTime = Date.now(); - // Validate request const validation = validateRequest(req.body); if (isChatCompletionValidationFailure(validation)) { return sendErrorResponse(res, 400, validation.error); @@ -150,20 +149,20 @@ const OpenAIChatCompletionController = async (req, res) => { ); } - // Generate IDs - const requestId = `chatcmpl-${nanoid()}`; + const responseId = `chatcmpl-${nanoid()}`; const conversationId = request.conversation_id ?? nanoid(); const parentMessageId = request.parent_message_id ?? null; const created = Math.floor(Date.now() / 1000); + /** @type {import('@librechat/api').OpenAIResponseContext} — key must be `requestId` to match the type used by createChunk/buildNonStreamingResponse */ const context = { created, - requestId, + requestId: responseId, model: agentId, }; logger.debug( - `[OpenAI API] Request ${requestId} started for agent ${agentId}, stream: ${request.stream}`, + `[OpenAI API] Response ${responseId} started for agent ${agentId}, stream: ${request.stream}`, ); // Set up abort controller @@ -450,11 +449,11 @@ const OpenAIChatCompletionController = async (req, res) => { agents: [primaryConfig], messages: formattedMessages, indexTokenCountMap, - runId: requestId, + runId: responseId, signal: abortController.signal, customHandlers: handlers, requestBody: { - messageId: requestId, + messageId: responseId, conversationId, }, user: { id: userId }, @@ -471,6 +470,10 @@ const OpenAIChatCompletionController = async (req, res) => { thread_id: conversationId, user_id: userId, user: createSafeUser(req.user), + requestBody: { + messageId: responseId, + conversationId, + }, ...(userMCPAuthMap != null && { userMCPAuthMap }), }, signal: abortController.signal, @@ -496,6 +499,7 @@ const OpenAIChatCompletionController = async (req, res) => { conversationId, collectedUsage, context: 'message', + messageId: responseId, balance: balanceConfig, transactions: transactionsConfig, model: primaryConfig.model || agent.model_parameters?.model, @@ -509,7 +513,7 @@ const OpenAIChatCompletionController = async (req, res) => { if (isStreaming) { sendFinalChunk(handlerConfig); res.end(); - logger.debug(`[OpenAI API] Request ${requestId} completed in ${duration}ms (streaming)`); + logger.debug(`[OpenAI API] Response ${responseId} completed in ${duration}ms (streaming)`); // Wait for artifact processing after response ends (non-blocking) if (artifactPromises.length > 0) { @@ -548,7 +552,9 @@ const OpenAIChatCompletionController = async (req, res) => { usage, ); res.json(response); - logger.debug(`[OpenAI API] Request ${requestId} completed in ${duration}ms (non-streaming)`); + logger.debug( + `[OpenAI API] Response ${responseId} completed in ${duration}ms (non-streaming)`, + ); } } catch (error) { const errorMessage = error instanceof Error ? error.message : 'An error occurred'; diff --git a/api/server/controllers/agents/responses.js b/api/server/controllers/agents/responses.js index afdb96be9f..8ce15766c7 100644 --- a/api/server/controllers/agents/responses.js +++ b/api/server/controllers/agents/responses.js @@ -486,6 +486,10 @@ const createResponse = async (req, res) => { thread_id: conversationId, user_id: userId, user: createSafeUser(req.user), + requestBody: { + messageId: responseId, + conversationId, + }, ...(userMCPAuthMap != null && { userMCPAuthMap }), }, signal: abortController.signal, @@ -511,6 +515,7 @@ const createResponse = async (req, res) => { conversationId, collectedUsage, context: 'message', + messageId: responseId, balance: balanceConfig, transactions: transactionsConfig, model: primaryConfig.model || agent.model_parameters?.model, @@ -630,6 +635,10 @@ const createResponse = async (req, res) => { thread_id: conversationId, user_id: userId, user: createSafeUser(req.user), + requestBody: { + messageId: responseId, + conversationId, + }, ...(userMCPAuthMap != null && { userMCPAuthMap }), }, signal: abortController.signal, @@ -655,6 +664,7 @@ const createResponse = async (req, res) => { conversationId, collectedUsage, context: 'message', + messageId: responseId, balance: balanceConfig, transactions: transactionsConfig, model: primaryConfig.model || agent.model_parameters?.model, diff --git a/api/server/middleware/abortMiddleware.js b/api/server/middleware/abortMiddleware.js index d07a09682d..acc9299b04 100644 --- a/api/server/middleware/abortMiddleware.js +++ b/api/server/middleware/abortMiddleware.js @@ -27,8 +27,15 @@ const { abortRun } = require('./abortRun'); * @param {string} params.conversationId - Conversation ID * @param {Array} params.collectedUsage - Usage metadata from all models * @param {string} [params.fallbackModel] - Fallback model name if not in usage + * @param {string} [params.messageId] - The response message ID for transaction correlation */ -async function spendCollectedUsage({ userId, conversationId, collectedUsage, fallbackModel }) { +async function spendCollectedUsage({ + userId, + conversationId, + collectedUsage, + fallbackModel, + messageId, +}) { if (!collectedUsage || collectedUsage.length === 0) { return; } @@ -50,6 +57,7 @@ async function spendCollectedUsage({ userId, conversationId, collectedUsage, fal const txMetadata = { context: 'abort', + messageId, conversationId, user: userId, model: usage.model ?? fallbackModel, @@ -144,6 +152,7 @@ async function abortMessage(req, res) { conversationId: jobData?.conversationId, collectedUsage, fallbackModel: jobData?.model, + messageId: jobData?.responseMessageId, }); } else { // Fallback: no collected usage, use text-based token counting for primary model only diff --git a/packages/api/src/agents/usage.spec.ts b/packages/api/src/agents/usage.spec.ts index 9c06567dc4..1937af5011 100644 --- a/packages/api/src/agents/usage.spec.ts +++ b/packages/api/src/agents/usage.spec.ts @@ -379,6 +379,7 @@ describe('recordCollectedUsage', () => { await recordCollectedUsage(deps, { ...baseParams, + messageId: 'msg-123', endpointTokenConfig, collectedUsage, }); @@ -389,6 +390,7 @@ describe('recordCollectedUsage', () => { conversationId: 'convo-123', model: 'gpt-4', context: 'message', + messageId: 'msg-123', balance: { enabled: true }, transactions: { enabled: true }, endpointTokenConfig, @@ -431,4 +433,93 @@ describe('recordCollectedUsage', () => { ); }); }); + + describe('messageId propagation', () => { + it('should pass messageId to spendTokens', async () => { + const collectedUsage: UsageMetadata[] = [ + { input_tokens: 10, output_tokens: 5, model: 'gpt-4' }, + ]; + + await recordCollectedUsage(deps, { + ...baseParams, + messageId: 'msg-1', + collectedUsage, + }); + + expect(mockSpendTokens).toHaveBeenCalledWith( + expect.objectContaining({ messageId: 'msg-1' }), + expect.any(Object), + ); + }); + + it('should pass messageId to spendStructuredTokens for cache paths', async () => { + const collectedUsage: UsageMetadata[] = [ + { + input_tokens: 100, + output_tokens: 50, + model: 'claude-3', + cache_creation_input_tokens: 25, + cache_read_input_tokens: 15, + }, + ]; + + await recordCollectedUsage(deps, { + ...baseParams, + messageId: 'msg-cache-1', + collectedUsage, + }); + + expect(mockSpendStructuredTokens).toHaveBeenCalledWith( + expect.objectContaining({ messageId: 'msg-cache-1' }), + expect.any(Object), + ); + expect(mockSpendTokens).not.toHaveBeenCalled(); + }); + + it('should pass undefined messageId when not provided', async () => { + const collectedUsage: UsageMetadata[] = [ + { input_tokens: 10, output_tokens: 5, model: 'gpt-4' }, + ]; + + await recordCollectedUsage(deps, { + user: 'user-123', + conversationId: 'convo-123', + collectedUsage, + }); + + expect(mockSpendTokens).toHaveBeenCalledWith( + expect.objectContaining({ messageId: undefined }), + expect.any(Object), + ); + }); + + it('should propagate messageId across multiple usage entries', async () => { + const collectedUsage: UsageMetadata[] = [ + { input_tokens: 100, output_tokens: 50, model: 'gpt-4' }, + { input_tokens: 200, output_tokens: 60, model: 'gpt-4' }, + { + input_tokens: 150, + output_tokens: 30, + model: 'gpt-4', + input_token_details: { cache_creation: 10, cache_read: 5 }, + }, + ]; + + await recordCollectedUsage(deps, { + ...baseParams, + messageId: 'msg-multi', + collectedUsage, + }); + + expect(mockSpendTokens).toHaveBeenCalledTimes(2); + expect(mockSpendStructuredTokens).toHaveBeenCalledTimes(1); + + for (const call of mockSpendTokens.mock.calls) { + expect(call[0]).toEqual(expect.objectContaining({ messageId: 'msg-multi' })); + } + expect(mockSpendStructuredTokens.mock.calls[0][0]).toEqual( + expect.objectContaining({ messageId: 'msg-multi' }), + ); + }); + }); }); diff --git a/packages/api/src/agents/usage.ts b/packages/api/src/agents/usage.ts index 545be9195d..351452d698 100644 --- a/packages/api/src/agents/usage.ts +++ b/packages/api/src/agents/usage.ts @@ -23,6 +23,7 @@ interface TxMetadata { user: string; model?: string; context: string; + messageId?: string; conversationId: string; balance?: Partial | null; transactions?: Partial; @@ -46,6 +47,7 @@ export interface RecordUsageParams { collectedUsage: UsageMetadata[]; model?: string; context?: string; + messageId?: string; balance?: Partial | null; transactions?: Partial; endpointTokenConfig?: EndpointTokenConfig; @@ -68,6 +70,7 @@ export async function recordCollectedUsage( user, model, balance, + messageId, transactions, conversationId, collectedUsage, @@ -108,11 +111,12 @@ export async function recordCollectedUsage( total_output_tokens += Number(usage.output_tokens) || 0; const txMetadata: TxMetadata = { + user, context, balance, + messageId, transactions, conversationId, - user, endpointTokenConfig, model: usage.model ?? model, }; diff --git a/packages/data-schemas/src/schema/transaction.ts b/packages/data-schemas/src/schema/transaction.ts index 6377db6d6c..6faf684b12 100644 --- a/packages/data-schemas/src/schema/transaction.ts +++ b/packages/data-schemas/src/schema/transaction.ts @@ -14,6 +14,7 @@ export interface ITransaction extends Document { inputTokens?: number; writeTokens?: number; readTokens?: number; + messageId?: string; createdAt?: Date; updatedAt?: Date; } @@ -52,6 +53,7 @@ const transactionSchema: Schema = new Schema( inputTokens: { type: Number }, writeTokens: { type: Number }, readTokens: { type: Number }, + messageId: { type: String }, }, { timestamps: true,