mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-22 19:30:15 +01:00
💾 feat: Anthropic Prompt Caching (#3670)
* wip: initial cache control implementation, add typing for transactions handling * feat: first pass of Anthropic Prompt Caching * feat: standardize stream usage as pass in when calculating token counts * feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes * chore: imports order * refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount * feat: more accurate token counting using stream usage data * feat: Improve token counting accuracy with stream usage data * refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request * refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts * ci: fix tests
This commit is contained in:
parent
9f4c516615
commit
a45b384bbc
17 changed files with 973 additions and 34 deletions
|
|
@ -211,7 +211,21 @@ describe('AnthropicClient', () => {
|
|||
expect(anthropicClient._options.defaultHeaders).toBeDefined();
|
||||
expect(anthropicClient._options.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
expect(anthropicClient._options.defaultHeaders['anthropic-beta']).toBe(
|
||||
'max-tokens-3-5-sonnet-2024-07-15',
|
||||
'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
|
||||
);
|
||||
});
|
||||
|
||||
it('should add beta header for claude-3-haiku model', () => {
|
||||
const client = new AnthropicClient('test-api-key');
|
||||
const modelOptions = {
|
||||
model: 'claude-3-haiku-2028',
|
||||
};
|
||||
client.setOptions({ modelOptions });
|
||||
const anthropicClient = client.getClient(modelOptions);
|
||||
expect(anthropicClient._options.defaultHeaders).toBeDefined();
|
||||
expect(anthropicClient._options.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
expect(anthropicClient._options.defaultHeaders['anthropic-beta']).toBe(
|
||||
'prompt-caching-2024-07-31',
|
||||
);
|
||||
});
|
||||
|
||||
|
|
@ -226,4 +240,145 @@ describe('AnthropicClient', () => {
|
|||
expect(anthropicClient.defaultHeaders).not.toHaveProperty('anthropic-beta');
|
||||
});
|
||||
});
|
||||
|
||||
describe('calculateCurrentTokenCount', () => {
|
||||
let client;
|
||||
|
||||
beforeEach(() => {
|
||||
client = new AnthropicClient('test-api-key');
|
||||
});
|
||||
|
||||
it('should calculate correct token count when usage is provided', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 10,
|
||||
msg2: 20,
|
||||
currentMsg: 30,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
input_tokens: 70,
|
||||
output_tokens: 50,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(40); // 70 - (10 + 20) = 40
|
||||
});
|
||||
|
||||
it('should return original estimate if calculation results in negative value', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 40,
|
||||
msg2: 50,
|
||||
currentMsg: 30,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
input_tokens: 80,
|
||||
output_tokens: 50,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(30); // Original estimate
|
||||
});
|
||||
|
||||
it('should handle cache creation and read input tokens', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 10,
|
||||
msg2: 20,
|
||||
currentMsg: 30,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
input_tokens: 50,
|
||||
cache_creation_input_tokens: 10,
|
||||
cache_read_input_tokens: 20,
|
||||
output_tokens: 40,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(50); // (50 + 10 + 20) - (10 + 20) = 50
|
||||
});
|
||||
|
||||
it('should handle missing usage properties', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 10,
|
||||
msg2: 20,
|
||||
currentMsg: 30,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
output_tokens: 40,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(30); // Original estimate
|
||||
});
|
||||
|
||||
it('should handle empty tokenCountMap', () => {
|
||||
const tokenCountMap = {};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
input_tokens: 50,
|
||||
output_tokens: 40,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(50);
|
||||
expect(Number.isNaN(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('should handle zero values in usage', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 10,
|
||||
currentMsg: 20,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
input_tokens: 0,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(20); // Should return original estimate
|
||||
expect(Number.isNaN(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('should handle undefined usage', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 10,
|
||||
currentMsg: 20,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = undefined;
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(20); // Should return original estimate
|
||||
expect(Number.isNaN(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('should handle non-numeric values in tokenCountMap', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 'ten',
|
||||
currentMsg: 20,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
input_tokens: 30,
|
||||
output_tokens: 10,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(30); // Should return 30 (input_tokens) - 0 (ignored 'ten') = 30
|
||||
expect(Number.isNaN(result)).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue