💾 feat: Anthropic Prompt Caching (#3670)

* wip: initial cache control implementation, add typing for transactions handling * feat: first pass of Anthropic Prompt Caching * feat: standardize stream usage as pass in when calculating token counts * feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes * chore: imports order * refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount * feat: more accurate token counting using stream usage data * feat: Improve token counting accuracy with stream usage data * refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request * refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts * ci: fix tests
2026-03-08 00:52:37 +01:00 · 2024-08-17 03:24:09 -04:00 · 2024-08-17 03:24:09 -04:00 · a45b384bbc
commit a45b384bbc
parent 9f4c516615
17 changed files with 973 additions and 34 deletions
--- a/api/app/clients/specs/AnthropicClient.test.js
+++ b/api/app/clients/specs/AnthropicClient.test.js
@ -211,7 +211,21 @@ describe('AnthropicClient', () => {
      expect(anthropicClient._options.defaultHeaders).toBeDefined();
      expect(anthropicClient._options.defaultHeaders).toHaveProperty('anthropic-beta');
      expect(anthropicClient._options.defaultHeaders['anthropic-beta']).toBe(
-        'max-tokens-3-5-sonnet-2024-07-15',
+        'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
+      );
+    });
+
+    it('should add beta header for claude-3-haiku model', () => {
+      const client = new AnthropicClient('test-api-key');
+      const modelOptions = {
+        model: 'claude-3-haiku-2028',
+      };
+      client.setOptions({ modelOptions });
+      const anthropicClient = client.getClient(modelOptions);
+      expect(anthropicClient._options.defaultHeaders).toBeDefined();
+      expect(anthropicClient._options.defaultHeaders).toHaveProperty('anthropic-beta');
+      expect(anthropicClient._options.defaultHeaders['anthropic-beta']).toBe(
+        'prompt-caching-2024-07-31',
      );
    });

@ -226,4 +240,145 @@ describe('AnthropicClient', () => {
      expect(anthropicClient.defaultHeaders).not.toHaveProperty('anthropic-beta');
    });
  });
+
+  describe('calculateCurrentTokenCount', () => {
+    let client;
+
+    beforeEach(() => {
+      client = new AnthropicClient('test-api-key');
+    });
+
+    it('should calculate correct token count when usage is provided', () => {
+      const tokenCountMap = {
+        msg1: 10,
+        msg2: 20,
+        currentMsg: 30,
+      };
+      const currentMessageId = 'currentMsg';
+      const usage = {
+        input_tokens: 70,
+        output_tokens: 50,
+      };
+
+      const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
+
+      expect(result).toBe(40); // 70 - (10 + 20) = 40
+    });
+
+    it('should return original estimate if calculation results in negative value', () => {
+      const tokenCountMap = {
+        msg1: 40,
+        msg2: 50,
+        currentMsg: 30,
+      };
+      const currentMessageId = 'currentMsg';
+      const usage = {
+        input_tokens: 80,
+        output_tokens: 50,
+      };
+
+      const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
+
+      expect(result).toBe(30); // Original estimate
+    });
+
+    it('should handle cache creation and read input tokens', () => {
+      const tokenCountMap = {
+        msg1: 10,
+        msg2: 20,
+        currentMsg: 30,
+      };
+      const currentMessageId = 'currentMsg';
+      const usage = {
+        input_tokens: 50,
+        cache_creation_input_tokens: 10,
+        cache_read_input_tokens: 20,
+        output_tokens: 40,
+      };
+
+      const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
+
+      expect(result).toBe(50); // (50 + 10 + 20) - (10 + 20) = 50
+    });
+
+    it('should handle missing usage properties', () => {
+      const tokenCountMap = {
+        msg1: 10,
+        msg2: 20,
+        currentMsg: 30,
+      };
+      const currentMessageId = 'currentMsg';
+      const usage = {
+        output_tokens: 40,
+      };
+
+      const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
+
+      expect(result).toBe(30); // Original estimate
+    });
+
+    it('should handle empty tokenCountMap', () => {
+      const tokenCountMap = {};
+      const currentMessageId = 'currentMsg';
+      const usage = {
+        input_tokens: 50,
+        output_tokens: 40,
+      };
+
+      const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
+
+      expect(result).toBe(50);
+      expect(Number.isNaN(result)).toBe(false);
+    });
+
+    it('should handle zero values in usage', () => {
+      const tokenCountMap = {
+        msg1: 10,
+        currentMsg: 20,
+      };
+      const currentMessageId = 'currentMsg';
+      const usage = {
+        input_tokens: 0,
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0,
+        output_tokens: 0,
+      };
+
+      const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
+
+      expect(result).toBe(20); // Should return original estimate
+      expect(Number.isNaN(result)).toBe(false);
+    });
+
+    it('should handle undefined usage', () => {
+      const tokenCountMap = {
+        msg1: 10,
+        currentMsg: 20,
+      };
+      const currentMessageId = 'currentMsg';
+      const usage = undefined;
+
+      const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
+
+      expect(result).toBe(20); // Should return original estimate
+      expect(Number.isNaN(result)).toBe(false);
+    });
+
+    it('should handle non-numeric values in tokenCountMap', () => {
+      const tokenCountMap = {
+        msg1: 'ten',
+        currentMsg: 20,
+      };
+      const currentMessageId = 'currentMsg';
+      const usage = {
+        input_tokens: 30,
+        output_tokens: 10,
+      };
+
+      const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
+
+      expect(result).toBe(30); // Should return 30 (input_tokens) - 0 (ignored 'ten') = 30
+      expect(Number.isNaN(result)).toBe(false);
+    });
+  });
 });