🪙 refactor: Collected Usage & Anthropic Prompt Caching (#11319)

* 🔧 refactor: Improve token calculation in AgentClient.recordCollectedUsage - Updated the token calculation logic to sum output tokens directly from all entries, addressing issues with negative values in parallel execution scenarios. - Added comments for clarity on the usage of input tokens and output tokens. - Introduced a new test file for comprehensive testing of the recordCollectedUsage function, covering various execution scenarios including sequential and parallel processing, cache token handling, and model fallback logic. * 🔧 refactor: Anthropic `promptCache` handling in LLM configuration * 🔧 test: Add comprehensive test for cache token handling in recordCollectedUsage - Introduced a new test case to validate the handling of cache tokens across multiple tool calls in the recordCollectedUsage function. - Ensured correct calculations for input and output tokens, including scenarios with cache creation and reading. - Verified the expected interactions with token spending methods to enhance the robustness of the token management logic.
2026-03-08 00:52:37 +01:00 · 2026-01-12 23:02:08 -05:00 · 2026-01-12 23:02:08 -05:00 · 2a50c372ef
commit 2a50c372ef
parent 1329e16d3a
8 changed files with 828 additions and 40 deletions
--- a/packages/api/src/endpoints/anthropic/llm.spec.ts
+++ b/packages/api/src/endpoints/anthropic/llm.spec.ts
@ -87,7 +87,7 @@ describe('getLLMConfig', () => {
    expect(result.llmConfig.thinking).toHaveProperty('budget_tokens', 2000);
  });

-  it('should add "context-1m" beta header for claude-sonnet-4 model', () => {
+  it('should add "context-1m" beta header and promptCache boolean for claude-sonnet-4 model', () => {
    const modelOptions = {
      model: 'claude-sonnet-4-20250514',
      promptCache: true,
@ -98,9 +98,10 @@ describe('getLLMConfig', () => {
    expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
    const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
    expect(defaultHeaders['anthropic-beta']).toBe('context-1m-2025-08-07');
+    expect(result.llmConfig.promptCache).toBe(true);
  });

-  it('should add "context-1m" beta header for claude-sonnet-4 model formats', () => {
+  it('should add "context-1m" beta header and promptCache boolean for claude-sonnet-4 model formats', () => {
    const modelVariations = [
      'claude-sonnet-4-20250514',
      'claude-sonnet-4-latest',
@ -115,10 +116,11 @@ describe('getLLMConfig', () => {
      expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
      const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
      expect(defaultHeaders['anthropic-beta']).toBe('context-1m-2025-08-07');
+      expect(result.llmConfig.promptCache).toBe(true);
    });
  });

-  it('should not add beta headers for claude-opus-4-5 model (prompt caching no longer needs header)', () => {
+  it('should pass promptCache boolean for claude-opus-4-5 model (no beta header needed)', () => {
    const modelOptions = {
      model: 'claude-opus-4-5',
      promptCache: true,
@ -126,9 +128,10 @@ describe('getLLMConfig', () => {
    const result = getLLMConfig('test-key', { modelOptions });
    const clientOptions = result.llmConfig.clientOptions;
    expect(clientOptions?.defaultHeaders).toBeUndefined();
+    expect(result.llmConfig.promptCache).toBe(true);
  });

-  it('should not add beta headers for claude-opus-4-5 model formats (prompt caching no longer needs header)', () => {
+  it('should pass promptCache boolean for claude-opus-4-5 model formats (no beta header needed)', () => {
    const modelVariations = [
      'claude-opus-4-5',
      'claude-opus-4-5-20250420',
@ -141,6 +144,7 @@ describe('getLLMConfig', () => {
      const result = getLLMConfig('test-key', { modelOptions });
      const clientOptions = result.llmConfig.clientOptions;
      expect(clientOptions?.defaultHeaders).toBeUndefined();
+      expect(result.llmConfig.promptCache).toBe(true);
    });
  });

@ -299,10 +303,11 @@ describe('getLLMConfig', () => {
        },
      });

-      // claude-3-5-sonnet supports prompt caching and should get the max-tokens header
+      // claude-3-5-sonnet supports prompt caching and should get the max-tokens header and promptCache boolean
      expect(result.llmConfig.clientOptions?.defaultHeaders).toEqual({
        'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
      });
+      expect(result.llmConfig.promptCache).toBe(true);
    });

    it('should handle thinking and thinkingBudget options', () => {
@ -512,6 +517,8 @@ describe('getLLMConfig', () => {
        expect(result.llmConfig.clientOptions?.defaultHeaders).toEqual({
          'anthropic-beta': 'token-efficient-tools-2025-02-19,output-128k-2025-02-19',
        });
+        // Should pass promptCache boolean
+        expect(result.llmConfig.promptCache).toBe(true);
      });

      it('should handle web search functionality like production', () => {
@ -1160,21 +1167,66 @@ describe('getLLMConfig', () => {
      it('should handle prompt cache support logic for different models', () => {
        const testCases = [
          // Models that support prompt cache (and have other beta headers)
-          { model: 'claude-3-5-sonnet', promptCache: true, shouldHaveHeaders: true },
-          { model: 'claude-3.5-sonnet-20241022', promptCache: true, shouldHaveHeaders: true },
-          { model: 'claude-3-7-sonnet', promptCache: true, shouldHaveHeaders: true },
-          { model: 'claude-3.7-sonnet-20250109', promptCache: true, shouldHaveHeaders: true },
-          { model: 'claude-sonnet-4-20250514', promptCache: true, shouldHaveHeaders: true },
+          {
+            model: 'claude-3-5-sonnet',
+            promptCache: true,
+            shouldHaveHeaders: true,
+            shouldHavePromptCache: true,
+          },
+          {
+            model: 'claude-3.5-sonnet-20241022',
+            promptCache: true,
+            shouldHaveHeaders: true,
+            shouldHavePromptCache: true,
+          },
+          {
+            model: 'claude-3-7-sonnet',
+            promptCache: true,
+            shouldHaveHeaders: true,
+            shouldHavePromptCache: true,
+          },
+          {
+            model: 'claude-3.7-sonnet-20250109',
+            promptCache: true,
+            shouldHaveHeaders: true,
+            shouldHavePromptCache: true,
+          },
+          {
+            model: 'claude-sonnet-4-20250514',
+            promptCache: true,
+            shouldHaveHeaders: true,
+            shouldHavePromptCache: true,
+          },
          // Models that support prompt cache but have no additional beta headers needed
-          { model: 'claude-3-opus', promptCache: true, shouldHaveHeaders: false },
+          {
+            model: 'claude-3-opus',
+            promptCache: true,
+            shouldHaveHeaders: false,
+            shouldHavePromptCache: true,
+          },
          // Models that don't support prompt cache
-          { model: 'claude-3-5-sonnet-latest', promptCache: true, shouldHaveHeaders: false },
-          { model: 'claude-3.5-sonnet-latest', promptCache: true, shouldHaveHeaders: false },
+          {
+            model: 'claude-3-5-sonnet-latest',
+            promptCache: true,
+            shouldHaveHeaders: false,
+            shouldHavePromptCache: false,
+          },
+          {
+            model: 'claude-3.5-sonnet-latest',
+            promptCache: true,
+            shouldHaveHeaders: false,
+            shouldHavePromptCache: false,
+          },
          // Prompt cache disabled
-          { model: 'claude-3-5-sonnet', promptCache: false, shouldHaveHeaders: false },
+          {
+            model: 'claude-3-5-sonnet',
+            promptCache: false,
+            shouldHaveHeaders: false,
+            shouldHavePromptCache: false,
+          },
        ];

-        testCases.forEach(({ model, promptCache, shouldHaveHeaders }) => {
+        testCases.forEach(({ model, promptCache, shouldHaveHeaders, shouldHavePromptCache }) => {
          const result = getLLMConfig('test-key', {
            modelOptions: { model, promptCache },
          });
@ -1187,6 +1239,12 @@ describe('getLLMConfig', () => {
          } else {
            expect(headers).toBeUndefined();
          }
+
+          if (shouldHavePromptCache) {
+            expect(result.llmConfig.promptCache).toBe(true);
+          } else {
+            expect(result.llmConfig.promptCache).toBeUndefined();
+          }
        });
      });
    });
--- a/packages/api/src/endpoints/anthropic/llm.ts
+++ b/packages/api/src/endpoints/anthropic/llm.ts
@ -155,6 +155,12 @@ function getLLMConfig(

  const supportsCacheControl =
    systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model ?? '');
+
+  /** Pass promptCache boolean for downstream cache_control application */
+  if (supportsCacheControl) {
+    (requestOptions as Record<string, unknown>).promptCache = true;
+  }
+
  const headers = getClaudeHeaders(requestOptions.model ?? '', supportsCacheControl);
  if (headers && requestOptions.clientOptions) {
    requestOptions.clientOptions.defaultHeaders = headers;
--- a/packages/api/src/endpoints/openai/config.anthropic.spec.ts
+++ b/packages/api/src/endpoints/openai/config.anthropic.spec.ts
@ -39,6 +39,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
              type: 'enabled',
              budget_tokens: 2000,
            },
+            promptCache: true,
          },
        },
        configOptions: {
@ -87,6 +88,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
              type: 'enabled',
              budget_tokens: 3000,
            },
+            promptCache: true,
          },
        },
        configOptions: {
@ -134,6 +136,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
              user_id: 'user123',
            },
            topK: 50,
+            promptCache: true,
          },
        },
        configOptions: {
@ -175,6 +178,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
            metadata: {
              user_id: 'user456',
            },
+            promptCache: true,
          },
        },
        configOptions: {
@ -187,7 +191,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
      });
    });

-    it('should apply custom headers without anthropic-beta for models that dont need it', () => {
+    it('should apply custom headers and promptCache for models that support caching', () => {
      const apiKey = 'sk-custom';
      const endpoint = 'Anthropic (via LiteLLM)';
      const options = {
@ -218,6 +222,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
            metadata: {
              user_id: undefined,
            },
+            promptCache: true,
          },
        },
        configOptions: {
@ -300,6 +305,9 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
          stream: true,
          topP: 0.9,
          maxTokens: 2048,
+          modelKwargs: {
+            promptCache: true,
+          },
          // temperature is dropped
          // modelKwargs.topK is dropped
          // modelKwargs.metadata is dropped completely
@ -379,6 +387,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
            metadata: {
              user_id: 'searchUser',
            },
+            promptCache: true,
          },
        },
        configOptions: {
@ -425,6 +434,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
              user_id: 'testUser',
            },
            topK: 40,
+            promptCache: true,
          },
        },
        configOptions: {
@ -470,6 +480,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
            metadata: {
              user_id: 'addUser',
            },
+            promptCache: true,
            customParam1: 'value1', // Unknown params added to modelKwargs
            customParam2: 42,
          },
@ -519,6 +530,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
            metadata: {
              user_id: 'bothUser',
            },
+            promptCache: true,
            customParam: 'customValue',
            // topK is dropped
          },