💾 feat: Anthropic Prompt Caching (#3670)

* wip: initial cache control implementation, add typing for transactions handling

* feat: first pass of Anthropic Prompt Caching

* feat: standardize stream usage as pass in when calculating token counts

* feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes

* chore: imports order

* refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount

* feat: more accurate token counting using stream usage data

* feat: Improve token counting accuracy with stream usage data

* refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request

* refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts

* ci: fix tests
This commit is contained in:
Danny Avila 2024-08-17 03:24:09 -04:00 committed by GitHub
parent 9f4c516615
commit a45b384bbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 973 additions and 34 deletions

View file

@ -211,7 +211,21 @@ describe('AnthropicClient', () => {
expect(anthropicClient._options.defaultHeaders).toBeDefined();
expect(anthropicClient._options.defaultHeaders).toHaveProperty('anthropic-beta');
expect(anthropicClient._options.defaultHeaders['anthropic-beta']).toBe(
'max-tokens-3-5-sonnet-2024-07-15',
'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
);
});
it('should add beta header for claude-3-haiku model', () => {
const client = new AnthropicClient('test-api-key');
const modelOptions = {
model: 'claude-3-haiku-2028',
};
client.setOptions({ modelOptions });
const anthropicClient = client.getClient(modelOptions);
expect(anthropicClient._options.defaultHeaders).toBeDefined();
expect(anthropicClient._options.defaultHeaders).toHaveProperty('anthropic-beta');
expect(anthropicClient._options.defaultHeaders['anthropic-beta']).toBe(
'prompt-caching-2024-07-31',
);
});
@ -226,4 +240,145 @@ describe('AnthropicClient', () => {
expect(anthropicClient.defaultHeaders).not.toHaveProperty('anthropic-beta');
});
});
describe('calculateCurrentTokenCount', () => {
let client;
beforeEach(() => {
client = new AnthropicClient('test-api-key');
});
it('should calculate correct token count when usage is provided', () => {
const tokenCountMap = {
msg1: 10,
msg2: 20,
currentMsg: 30,
};
const currentMessageId = 'currentMsg';
const usage = {
input_tokens: 70,
output_tokens: 50,
};
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
expect(result).toBe(40); // 70 - (10 + 20) = 40
});
it('should return original estimate if calculation results in negative value', () => {
const tokenCountMap = {
msg1: 40,
msg2: 50,
currentMsg: 30,
};
const currentMessageId = 'currentMsg';
const usage = {
input_tokens: 80,
output_tokens: 50,
};
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
expect(result).toBe(30); // Original estimate
});
it('should handle cache creation and read input tokens', () => {
const tokenCountMap = {
msg1: 10,
msg2: 20,
currentMsg: 30,
};
const currentMessageId = 'currentMsg';
const usage = {
input_tokens: 50,
cache_creation_input_tokens: 10,
cache_read_input_tokens: 20,
output_tokens: 40,
};
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
expect(result).toBe(50); // (50 + 10 + 20) - (10 + 20) = 50
});
it('should handle missing usage properties', () => {
const tokenCountMap = {
msg1: 10,
msg2: 20,
currentMsg: 30,
};
const currentMessageId = 'currentMsg';
const usage = {
output_tokens: 40,
};
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
expect(result).toBe(30); // Original estimate
});
it('should handle empty tokenCountMap', () => {
const tokenCountMap = {};
const currentMessageId = 'currentMsg';
const usage = {
input_tokens: 50,
output_tokens: 40,
};
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
expect(result).toBe(50);
expect(Number.isNaN(result)).toBe(false);
});
it('should handle zero values in usage', () => {
const tokenCountMap = {
msg1: 10,
currentMsg: 20,
};
const currentMessageId = 'currentMsg';
const usage = {
input_tokens: 0,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
output_tokens: 0,
};
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
expect(result).toBe(20); // Should return original estimate
expect(Number.isNaN(result)).toBe(false);
});
it('should handle undefined usage', () => {
const tokenCountMap = {
msg1: 10,
currentMsg: 20,
};
const currentMessageId = 'currentMsg';
const usage = undefined;
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
expect(result).toBe(20); // Should return original estimate
expect(Number.isNaN(result)).toBe(false);
});
it('should handle non-numeric values in tokenCountMap', () => {
const tokenCountMap = {
msg1: 'ten',
currentMsg: 20,
};
const currentMessageId = 'currentMsg';
const usage = {
input_tokens: 30,
output_tokens: 10,
};
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
expect(result).toBe(30); // Should return 30 (input_tokens) - 0 (ignored 'ten') = 30
expect(Number.isNaN(result)).toBe(false);
});
});
});