💰 fix: Multi-Agent Token Spending & Prevent Double-Spend (#11433)

* fix: Token Spending Logic for Multi-Agents on Abort Scenarios * Implemented logic to skip token spending if a conversation is aborted, preventing double-spending. * Introduced `spendCollectedUsage` function to handle token spending for multiple models during aborts, ensuring accurate accounting for parallel agents. * Updated `GenerationJobManager` to store and retrieve collected usage data for improved abort handling. * Added comprehensive tests for the new functionality, covering various scenarios including cache token handling and parallel agent usage. * fix: Memory Context Handling for Multi-Agents * Refactored `buildMessages` method to pass memory context to parallel agents, ensuring they share the same user context. * Improved handling of memory context when no existing instructions are present for parallel agents. * Added comprehensive tests to verify memory context propagation and behavior under various scenarios, including cases with no memory available and empty agent configurations. * Enhanced logging for better traceability of memory context additions to agents. * chore: Memory Context Documentation for Parallel Agents * Updated documentation in the `AgentClient` class to clarify the in-place mutation of agentConfig objects when passing memory context to parallel agents. * Added notes on the implications of mutating objects directly to ensure all parallel agents receive the correct memory context before execution. * chore: UsageMetadata Interface docs for Token Spending * Expanded the UsageMetadata interface to support both OpenAI and Anthropic cache token formats. * Added detailed documentation for cache token properties, including mutually exclusive fields for different model types. * Improved clarity on how to access cache token details for accurate token spending tracking. * fix: Enhance Token Spending Logic in Abort Middleware * Refactored `spendCollectedUsage` function to utilize Promise.all for concurrent token spending, improving performance and ensuring all operations complete before clearing the collectedUsage array. * Added documentation to clarify the importance of clearing the collectedUsage array to prevent double-spending in abort scenarios. * Updated tests to verify the correct behavior of the spending logic and the clearing of the array after spending operations.
2026-03-09 01:22:36 +01:00 · 2026-01-20 14:43:19 -05:00 · 2026-01-20 14:43:19 -05:00 · 36c5a88c4e
commit 36c5a88c4e
parent 32e6f3b8e5
11 changed files with 1440 additions and 28 deletions
--- a/api/server/controllers/agents/client.js
+++ b/api/server/controllers/agents/client.js
@ -522,14 +522,36 @@ class AgentClient extends BaseClient {
    }

    const withoutKeys = await this.useMemory();
-    if (withoutKeys) {
-      systemContent += `${memoryInstructions}\n\n# Existing memory about the user:\n${withoutKeys}`;
+    const memoryContext = withoutKeys
+      ? `${memoryInstructions}\n\n# Existing memory about the user:\n${withoutKeys}`
+      : '';
+    if (memoryContext) {
+      systemContent += memoryContext;
    }

    if (systemContent) {
      this.options.agent.instructions = systemContent;
    }

+    /**
+     * Pass memory context to parallel agents (addedConvo) so they have the same user context.
+     *
+     * NOTE: This intentionally mutates the agentConfig objects in place. The agentConfigs Map
+     * holds references to config objects that will be passed to the graph runtime. Mutating
+     * them here ensures all parallel agents receive the memory context before execution starts.
+     * Creating new objects would not work because the Map references would still point to the old objects.
+     */
+    if (memoryContext && this.agentConfigs?.size > 0) {
+      for (const [agentId, agentConfig] of this.agentConfigs.entries()) {
+        if (agentConfig.instructions) {
+          agentConfig.instructions = agentConfig.instructions + '\n\n' + memoryContext;
+        } else {
+          agentConfig.instructions = memoryContext;
+        }
+        logger.debug(`[AgentClient] Added memory context to parallel agent: ${agentId}`);
+      }
+    }
+
    return result;
  }

@ -1084,11 +1106,20 @@ class AgentClient extends BaseClient {
          this.artifactPromises.push(...attachments);
        }

-        await this.recordCollectedUsage({
-          context: 'message',
-          balance: balanceConfig,
-          transactions: transactionsConfig,
-        });
+        /** Skip token spending if aborted - the abort handler (abortMiddleware.js) handles it
+        This prevents double-spending when user aborts via `/api/agents/chat/abort` */
+        const wasAborted = abortController?.signal?.aborted;
+        if (!wasAborted) {
+          await this.recordCollectedUsage({
+            context: 'message',
+            balance: balanceConfig,
+            transactions: transactionsConfig,
+          });
+        } else {
+          logger.debug(
+            '[api/server/controllers/agents/client.js #chatCompletion] Skipping token spending - handled by abort middleware',
+          );
+        }
      } catch (err) {
        logger.error(
          '[api/server/controllers/agents/client.js #chatCompletion] Error in cleanup phase',
--- a/api/server/controllers/agents/client.test.js
+++ b/api/server/controllers/agents/client.test.js
@ -1849,4 +1849,224 @@ describe('AgentClient - titleConvo', () => {
      });
    });
  });
+
+  describe('buildMessages - memory context for parallel agents', () => {
+    let client;
+    let mockReq;
+    let mockRes;
+    let mockAgent;
+    let mockOptions;
+
+    beforeEach(() => {
+      jest.clearAllMocks();
+
+      mockAgent = {
+        id: 'primary-agent',
+        name: 'Primary Agent',
+        endpoint: EModelEndpoint.openAI,
+        provider: EModelEndpoint.openAI,
+        instructions: 'Primary agent instructions',
+        model_parameters: {
+          model: 'gpt-4',
+        },
+        tools: [],
+      };
+
+      mockReq = {
+        user: {
+          id: 'user-123',
+          personalization: {
+            memories: true,
+          },
+        },
+        body: {
+          endpoint: EModelEndpoint.openAI,
+        },
+        config: {
+          memory: {
+            disabled: false,
+          },
+        },
+      };
+
+      mockRes = {};
+
+      mockOptions = {
+        req: mockReq,
+        res: mockRes,
+        agent: mockAgent,
+        endpoint: EModelEndpoint.agents,
+      };
+
+      client = new AgentClient(mockOptions);
+      client.conversationId = 'convo-123';
+      client.responseMessageId = 'response-123';
+      client.shouldSummarize = false;
+      client.maxContextTokens = 4096;
+    });
+
+    it('should pass memory context to parallel agents (addedConvo)', async () => {
+      const memoryContent = 'User prefers dark mode. User is a software developer.';
+      client.useMemory = jest.fn().mockResolvedValue(memoryContent);
+
+      const parallelAgent1 = {
+        id: 'parallel-agent-1',
+        name: 'Parallel Agent 1',
+        instructions: 'Parallel agent 1 instructions',
+        provider: EModelEndpoint.openAI,
+      };
+
+      const parallelAgent2 = {
+        id: 'parallel-agent-2',
+        name: 'Parallel Agent 2',
+        instructions: 'Parallel agent 2 instructions',
+        provider: EModelEndpoint.anthropic,
+      };
+
+      client.agentConfigs = new Map([
+        ['parallel-agent-1', parallelAgent1],
+        ['parallel-agent-2', parallelAgent2],
+      ]);
+
+      const messages = [
+        {
+          messageId: 'msg-1',
+          parentMessageId: null,
+          sender: 'User',
+          text: 'Hello',
+          isCreatedByUser: true,
+        },
+      ];
+
+      await client.buildMessages(messages, null, {
+        instructions: 'Base instructions',
+        additional_instructions: null,
+      });
+
+      expect(client.useMemory).toHaveBeenCalled();
+
+      expect(client.options.agent.instructions).toContain('Base instructions');
+      expect(client.options.agent.instructions).toContain(memoryContent);
+
+      expect(parallelAgent1.instructions).toContain('Parallel agent 1 instructions');
+      expect(parallelAgent1.instructions).toContain(memoryContent);
+
+      expect(parallelAgent2.instructions).toContain('Parallel agent 2 instructions');
+      expect(parallelAgent2.instructions).toContain(memoryContent);
+    });
+
+    it('should not modify parallel agents when no memory context is available', async () => {
+      client.useMemory = jest.fn().mockResolvedValue(undefined);
+
+      const parallelAgent = {
+        id: 'parallel-agent-1',
+        name: 'Parallel Agent 1',
+        instructions: 'Original parallel instructions',
+        provider: EModelEndpoint.openAI,
+      };
+
+      client.agentConfigs = new Map([['parallel-agent-1', parallelAgent]]);
+
+      const messages = [
+        {
+          messageId: 'msg-1',
+          parentMessageId: null,
+          sender: 'User',
+          text: 'Hello',
+          isCreatedByUser: true,
+        },
+      ];
+
+      await client.buildMessages(messages, null, {
+        instructions: 'Base instructions',
+        additional_instructions: null,
+      });
+
+      expect(parallelAgent.instructions).toBe('Original parallel instructions');
+    });
+
+    it('should handle parallel agents without existing instructions', async () => {
+      const memoryContent = 'User is a data scientist.';
+      client.useMemory = jest.fn().mockResolvedValue(memoryContent);
+
+      const parallelAgentNoInstructions = {
+        id: 'parallel-agent-no-instructions',
+        name: 'Parallel Agent No Instructions',
+        provider: EModelEndpoint.openAI,
+      };
+
+      client.agentConfigs = new Map([
+        ['parallel-agent-no-instructions', parallelAgentNoInstructions],
+      ]);
+
+      const messages = [
+        {
+          messageId: 'msg-1',
+          parentMessageId: null,
+          sender: 'User',
+          text: 'Hello',
+          isCreatedByUser: true,
+        },
+      ];
+
+      await client.buildMessages(messages, null, {
+        instructions: null,
+        additional_instructions: null,
+      });
+
+      expect(parallelAgentNoInstructions.instructions).toContain(memoryContent);
+    });
+
+    it('should not modify agentConfigs when none exist', async () => {
+      const memoryContent = 'User prefers concise responses.';
+      client.useMemory = jest.fn().mockResolvedValue(memoryContent);
+
+      client.agentConfigs = null;
+
+      const messages = [
+        {
+          messageId: 'msg-1',
+          parentMessageId: null,
+          sender: 'User',
+          text: 'Hello',
+          isCreatedByUser: true,
+        },
+      ];
+
+      await expect(
+        client.buildMessages(messages, null, {
+          instructions: 'Base instructions',
+          additional_instructions: null,
+        }),
+      ).resolves.not.toThrow();
+
+      expect(client.options.agent.instructions).toContain(memoryContent);
+    });
+
+    it('should handle empty agentConfigs map', async () => {
+      const memoryContent = 'User likes detailed explanations.';
+      client.useMemory = jest.fn().mockResolvedValue(memoryContent);
+
+      client.agentConfigs = new Map();
+
+      const messages = [
+        {
+          messageId: 'msg-1',
+          parentMessageId: null,
+          sender: 'User',
+          text: 'Hello',
+          isCreatedByUser: true,
+        },
+      ];
+
+      await expect(
+        client.buildMessages(messages, null, {
+          instructions: 'Base instructions',
+          additional_instructions: null,
+        }),
+      ).resolves.not.toThrow();
+
+      expect(client.options.agent.instructions).toContain(memoryContent);
+    });
+  });
 });