/** * Tests for AgentClient.recordCollectedUsage * * This is a critical function that handles token spending for agent LLM calls. * It must correctly handle: * - Sequential execution (single agent with tool calls) * - Parallel execution (multiple agents with independent inputs) * - Cache token handling (OpenAI and Anthropic formats) */ const { EModelEndpoint } = require('librechat-data-provider'); // Mock dependencies before requiring the module const mockSpendTokens = jest.fn().mockResolvedValue(); const mockSpendStructuredTokens = jest.fn().mockResolvedValue(); jest.mock('~/models/spendTokens', () => ({ spendTokens: (...args) => mockSpendTokens(...args), spendStructuredTokens: (...args) => mockSpendStructuredTokens(...args), })); jest.mock('~/config', () => ({ logger: { debug: jest.fn(), error: jest.fn(), warn: jest.fn(), info: jest.fn(), }, getMCPManager: jest.fn(() => ({ formatInstructionsForContext: jest.fn(), })), })); jest.mock('@librechat/agents', () => ({ ...jest.requireActual('@librechat/agents'), createMetadataAggregator: () => ({ handleLLMEnd: jest.fn(), collected: [], }), })); const AgentClient = require('./client'); describe('AgentClient - recordCollectedUsage', () => { let client; let mockAgent; let mockOptions; beforeEach(() => { jest.clearAllMocks(); mockAgent = { id: 'agent-123', endpoint: EModelEndpoint.openAI, provider: EModelEndpoint.openAI, model_parameters: { model: 'gpt-4', }, }; mockOptions = { req: { user: { id: 'user-123' }, body: { model: 'gpt-4', endpoint: EModelEndpoint.openAI }, }, res: {}, agent: mockAgent, endpointTokenConfig: {}, }; client = new AgentClient(mockOptions); client.conversationId = 'convo-123'; client.user = 'user-123'; }); describe('basic functionality', () => { it('should return early if collectedUsage is empty', async () => { await client.recordCollectedUsage({ collectedUsage: [], balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).not.toHaveBeenCalled(); expect(mockSpendStructuredTokens).not.toHaveBeenCalled(); expect(client.usage).toBeUndefined(); }); it('should return early if collectedUsage is null', async () => { await client.recordCollectedUsage({ collectedUsage: null, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).not.toHaveBeenCalled(); expect(client.usage).toBeUndefined(); }); it('should handle single usage entry correctly', async () => { const collectedUsage = [{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' }]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).toHaveBeenCalledTimes(1); expect(mockSpendTokens).toHaveBeenCalledWith( expect.objectContaining({ conversationId: 'convo-123', user: 'user-123', model: 'gpt-4', }), { promptTokens: 100, completionTokens: 50 }, ); expect(client.usage.input_tokens).toBe(100); expect(client.usage.output_tokens).toBe(50); }); it('should skip null entries in collectedUsage', async () => { const collectedUsage = [ { input_tokens: 100, output_tokens: 50, model: 'gpt-4' }, null, { input_tokens: 200, output_tokens: 60, model: 'gpt-4' }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).toHaveBeenCalledTimes(2); }); }); describe('sequential execution (single agent with tool calls)', () => { it('should calculate tokens correctly for sequential tool calls', async () => { // Sequential flow: output of call N becomes part of input for call N+1 // Call 1: input=100, output=50 // Call 2: input=150 (100+50), output=30 // Call 3: input=180 (150+30), output=20 const collectedUsage = [ { input_tokens: 100, output_tokens: 50, model: 'gpt-4' }, { input_tokens: 150, output_tokens: 30, model: 'gpt-4' }, { input_tokens: 180, output_tokens: 20, model: 'gpt-4' }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).toHaveBeenCalledTimes(3); // Total output should be sum of all output_tokens: 50 + 30 + 20 = 100 expect(client.usage.output_tokens).toBe(100); expect(client.usage.input_tokens).toBe(100); // First entry's input }); }); describe('parallel execution (multiple agents)', () => { it('should handle parallel agents with independent input tokens', async () => { // Parallel agents have INDEPENDENT input tokens (not cumulative) // Agent A: input=100, output=50 // Agent B: input=80, output=40 (different context, not 100+50) const collectedUsage = [ { input_tokens: 100, output_tokens: 50, model: 'gpt-4' }, { input_tokens: 80, output_tokens: 40, model: 'gpt-4' }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).toHaveBeenCalledTimes(2); // Expected total output: 50 + 40 = 90 // output_tokens must be positive and should reflect total output expect(client.usage.output_tokens).toBeGreaterThan(0); }); it('should NOT produce negative output_tokens for parallel execution', async () => { // Critical bug scenario: parallel agents where second agent has LOWER input tokens const collectedUsage = [ { input_tokens: 200, output_tokens: 100, model: 'gpt-4' }, { input_tokens: 50, output_tokens: 30, model: 'gpt-4' }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); // output_tokens MUST be positive for proper token tracking expect(client.usage.output_tokens).toBeGreaterThan(0); // Correct value should be 100 + 30 = 130 }); it('should calculate correct total output for parallel agents', async () => { // Three parallel agents with independent contexts const collectedUsage = [ { input_tokens: 100, output_tokens: 50, model: 'gpt-4' }, { input_tokens: 120, output_tokens: 60, model: 'gpt-4-turbo' }, { input_tokens: 80, output_tokens: 40, model: 'claude-3' }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).toHaveBeenCalledTimes(3); // Total output should be 50 + 60 + 40 = 150 expect(client.usage.output_tokens).toBe(150); }); it('should handle worst-case parallel scenario without negative tokens', async () => { // Extreme case: first agent has very high input, subsequent have low const collectedUsage = [ { input_tokens: 1000, output_tokens: 500, model: 'gpt-4' }, { input_tokens: 100, output_tokens: 50, model: 'gpt-4' }, { input_tokens: 50, output_tokens: 25, model: 'gpt-4' }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); // Must be positive, should be 500 + 50 + 25 = 575 expect(client.usage.output_tokens).toBeGreaterThan(0); expect(client.usage.output_tokens).toBe(575); }); }); describe('real-world scenarios', () => { it('should correctly sum output tokens for sequential tool calls with growing context', async () => { // Real production data: Claude Opus with multiple tool calls // Context grows as tool results are added, but output_tokens should only count model generations const collectedUsage = [ { input_tokens: 31596, output_tokens: 151, total_tokens: 31747, input_token_details: { cache_read: 0, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, { input_tokens: 35368, output_tokens: 150, total_tokens: 35518, input_token_details: { cache_read: 0, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, { input_tokens: 58362, output_tokens: 295, total_tokens: 58657, input_token_details: { cache_read: 0, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, { input_tokens: 112604, output_tokens: 193, total_tokens: 112797, input_token_details: { cache_read: 0, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, { input_tokens: 257440, output_tokens: 2217, total_tokens: 259657, input_token_details: { cache_read: 0, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); // input_tokens should be first entry's input (initial context) expect(client.usage.input_tokens).toBe(31596); // output_tokens should be sum of all model outputs: 151 + 150 + 295 + 193 + 2217 = 3006 // NOT the inflated value from incremental calculation (338,559) expect(client.usage.output_tokens).toBe(3006); // Verify spendTokens was called for each entry with correct values expect(mockSpendTokens).toHaveBeenCalledTimes(5); expect(mockSpendTokens).toHaveBeenNthCalledWith( 1, expect.objectContaining({ model: 'claude-opus-4-5-20251101' }), { promptTokens: 31596, completionTokens: 151 }, ); expect(mockSpendTokens).toHaveBeenNthCalledWith( 5, expect.objectContaining({ model: 'claude-opus-4-5-20251101' }), { promptTokens: 257440, completionTokens: 2217 }, ); }); it('should handle single followup message correctly', async () => { // Real production data: followup to the above conversation const collectedUsage = [ { input_tokens: 263406, output_tokens: 257, total_tokens: 263663, input_token_details: { cache_read: 0, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(client.usage.input_tokens).toBe(263406); expect(client.usage.output_tokens).toBe(257); expect(mockSpendTokens).toHaveBeenCalledTimes(1); expect(mockSpendTokens).toHaveBeenCalledWith( expect.objectContaining({ model: 'claude-opus-4-5-20251101' }), { promptTokens: 263406, completionTokens: 257 }, ); }); it('should ensure output_tokens > 0 check passes for BaseClient.sendMessage', async () => { // This verifies the fix for the duplicate token spending bug // BaseClient.sendMessage checks: if (usage != null && Number(usage[this.outputTokensKey]) > 0) const collectedUsage = [ { input_tokens: 31596, output_tokens: 151, model: 'claude-opus-4-5-20251101', }, { input_tokens: 35368, output_tokens: 150, model: 'claude-opus-4-5-20251101', }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); const usage = client.getStreamUsage(); // The check that was failing before the fix expect(usage).not.toBeNull(); expect(Number(usage.output_tokens)).toBeGreaterThan(0); // Verify correct value expect(usage.output_tokens).toBe(301); // 151 + 150 }); it('should correctly handle cache tokens with multiple tool calls', async () => { // Real production data: Claude Opus with cache tokens (prompt caching) // First entry has cache_creation, subsequent entries have cache_read const collectedUsage = [ { input_tokens: 788, output_tokens: 163, total_tokens: 951, input_token_details: { cache_read: 0, cache_creation: 30808 }, model: 'claude-opus-4-5-20251101', }, { input_tokens: 3802, output_tokens: 149, total_tokens: 3951, input_token_details: { cache_read: 30808, cache_creation: 768 }, model: 'claude-opus-4-5-20251101', }, { input_tokens: 26808, output_tokens: 225, total_tokens: 27033, input_token_details: { cache_read: 31576, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, { input_tokens: 80912, output_tokens: 204, total_tokens: 81116, input_token_details: { cache_read: 31576, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, { input_tokens: 136454, output_tokens: 206, total_tokens: 136660, input_token_details: { cache_read: 31576, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, { input_tokens: 146316, output_tokens: 224, total_tokens: 146540, input_token_details: { cache_read: 31576, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, { input_tokens: 150402, output_tokens: 1248, total_tokens: 151650, input_token_details: { cache_read: 31576, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, { input_tokens: 156268, output_tokens: 139, total_tokens: 156407, input_token_details: { cache_read: 31576, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, { input_tokens: 167126, output_tokens: 2961, total_tokens: 170087, input_token_details: { cache_read: 31576, cache_creation: 0 }, model: 'claude-opus-4-5-20251101', }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); // input_tokens = first entry's input + cache_creation + cache_read // = 788 + 30808 + 0 = 31596 expect(client.usage.input_tokens).toBe(31596); // output_tokens = sum of all output_tokens // = 163 + 149 + 225 + 204 + 206 + 224 + 1248 + 139 + 2961 = 5519 expect(client.usage.output_tokens).toBe(5519); // First 2 entries have cache tokens, should use spendStructuredTokens // Remaining 7 entries have cache_read but no cache_creation, still structured expect(mockSpendStructuredTokens).toHaveBeenCalledTimes(9); expect(mockSpendTokens).toHaveBeenCalledTimes(0); // Verify first entry uses structured tokens with cache_creation expect(mockSpendStructuredTokens).toHaveBeenNthCalledWith( 1, expect.objectContaining({ model: 'claude-opus-4-5-20251101' }), { promptTokens: { input: 788, write: 30808, read: 0 }, completionTokens: 163, }, ); // Verify second entry uses structured tokens with both cache_creation and cache_read expect(mockSpendStructuredTokens).toHaveBeenNthCalledWith( 2, expect.objectContaining({ model: 'claude-opus-4-5-20251101' }), { promptTokens: { input: 3802, write: 768, read: 30808 }, completionTokens: 149, }, ); }); }); describe('cache token handling', () => { it('should handle OpenAI format cache tokens (input_token_details)', async () => { const collectedUsage = [ { input_tokens: 100, output_tokens: 50, model: 'gpt-4', input_token_details: { cache_creation: 20, cache_read: 10, }, }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendStructuredTokens).toHaveBeenCalledTimes(1); expect(mockSpendStructuredTokens).toHaveBeenCalledWith( expect.objectContaining({ model: 'gpt-4' }), { promptTokens: { input: 100, write: 20, read: 10, }, completionTokens: 50, }, ); }); it('should handle Anthropic format cache tokens (cache_*_input_tokens)', async () => { const collectedUsage = [ { input_tokens: 100, output_tokens: 50, model: 'claude-3', cache_creation_input_tokens: 25, cache_read_input_tokens: 15, }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendStructuredTokens).toHaveBeenCalledTimes(1); expect(mockSpendStructuredTokens).toHaveBeenCalledWith( expect.objectContaining({ model: 'claude-3' }), { promptTokens: { input: 100, write: 25, read: 15, }, completionTokens: 50, }, ); }); it('should use spendTokens for entries without cache tokens', async () => { const collectedUsage = [{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' }]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).toHaveBeenCalledTimes(1); expect(mockSpendStructuredTokens).not.toHaveBeenCalled(); }); it('should handle mixed cache and non-cache entries', async () => { const collectedUsage = [ { input_tokens: 100, output_tokens: 50, model: 'gpt-4' }, { input_tokens: 150, output_tokens: 30, model: 'gpt-4', input_token_details: { cache_creation: 10, cache_read: 5 }, }, { input_tokens: 200, output_tokens: 20, model: 'gpt-4' }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).toHaveBeenCalledTimes(2); expect(mockSpendStructuredTokens).toHaveBeenCalledTimes(1); }); it('should include cache tokens in total input calculation', async () => { const collectedUsage = [ { input_tokens: 100, output_tokens: 50, model: 'gpt-4', input_token_details: { cache_creation: 20, cache_read: 10, }, }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); // Total input should include cache tokens: 100 + 20 + 10 = 130 expect(client.usage.input_tokens).toBe(130); }); }); describe('model fallback', () => { it('should use usage.model when available', async () => { const collectedUsage = [{ input_tokens: 100, output_tokens: 50, model: 'gpt-4-turbo' }]; await client.recordCollectedUsage({ model: 'fallback-model', collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).toHaveBeenCalledWith( expect.objectContaining({ model: 'gpt-4-turbo' }), expect.any(Object), ); }); it('should fallback to param model when usage.model is missing', async () => { const collectedUsage = [{ input_tokens: 100, output_tokens: 50 }]; await client.recordCollectedUsage({ model: 'param-model', collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).toHaveBeenCalledWith( expect.objectContaining({ model: 'param-model' }), expect.any(Object), ); }); it('should fallback to client.model when param model is missing', async () => { client.model = 'client-model'; const collectedUsage = [{ input_tokens: 100, output_tokens: 50 }]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).toHaveBeenCalledWith( expect.objectContaining({ model: 'client-model' }), expect.any(Object), ); }); it('should fallback to agent model_parameters.model as last resort', async () => { const collectedUsage = [{ input_tokens: 100, output_tokens: 50 }]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); expect(mockSpendTokens).toHaveBeenCalledWith( expect.objectContaining({ model: 'gpt-4' }), expect.any(Object), ); }); }); describe('getStreamUsage integration', () => { it('should return the usage object set by recordCollectedUsage', async () => { const collectedUsage = [{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' }]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); const usage = client.getStreamUsage(); expect(usage).toEqual({ input_tokens: 100, output_tokens: 50, }); }); it('should return undefined before recordCollectedUsage is called', () => { const usage = client.getStreamUsage(); expect(usage).toBeUndefined(); }); it('should have output_tokens > 0 for BaseClient.sendMessage check', async () => { // This test verifies the usage will pass the check in BaseClient.sendMessage: // if (usage != null && Number(usage[this.outputTokensKey]) > 0) const collectedUsage = [ { input_tokens: 200, output_tokens: 100, model: 'gpt-4' }, { input_tokens: 50, output_tokens: 30, model: 'gpt-4' }, ]; await client.recordCollectedUsage({ collectedUsage, balance: { enabled: true }, transactions: { enabled: true }, }); const usage = client.getStreamUsage(); expect(usage).not.toBeNull(); expect(Number(usage.output_tokens)).toBeGreaterThan(0); }); }); });