mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-23 20:00:15 +01:00
💾 feat: Anthropic Prompt Caching (#3670)
* wip: initial cache control implementation, add typing for transactions handling * feat: first pass of Anthropic Prompt Caching * feat: standardize stream usage as pass in when calculating token counts * feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes * chore: imports order * refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount * feat: more accurate token counting using stream usage data * feat: Improve token counting accuracy with stream usage data * refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request * refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts * ci: fix tests
This commit is contained in:
parent
9f4c516615
commit
a45b384bbc
17 changed files with 973 additions and 34 deletions
43
api/app/clients/prompts/addCacheControl.js
Normal file
43
api/app/clients/prompts/addCacheControl.js
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Anthropic API: Adds cache control to the appropriate user messages in the payload.
|
||||
* @param {Array<AnthropicMessage>} messages - The array of message objects.
|
||||
* @returns {Array<AnthropicMessage>} - The updated array of message objects with cache control added.
|
||||
*/
|
||||
function addCacheControl(messages) {
|
||||
if (!Array.isArray(messages) || messages.length < 2) {
|
||||
return messages;
|
||||
}
|
||||
|
||||
const updatedMessages = [...messages];
|
||||
let userMessagesFound = 0;
|
||||
|
||||
for (let i = updatedMessages.length - 1; i >= 0 && userMessagesFound < 2; i--) {
|
||||
if (updatedMessages[i].role === 'user') {
|
||||
if (typeof updatedMessages[i].content === 'string') {
|
||||
updatedMessages[i] = {
|
||||
...updatedMessages[i],
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: updatedMessages[i].content,
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
],
|
||||
};
|
||||
} else if (Array.isArray(updatedMessages[i].content)) {
|
||||
updatedMessages[i] = {
|
||||
...updatedMessages[i],
|
||||
content: updatedMessages[i].content.map((item) => ({
|
||||
...item,
|
||||
cache_control: { type: 'ephemeral' },
|
||||
})),
|
||||
};
|
||||
}
|
||||
userMessagesFound++;
|
||||
}
|
||||
}
|
||||
|
||||
return updatedMessages;
|
||||
}
|
||||
|
||||
module.exports = addCacheControl;
|
||||
Loading…
Add table
Add a link
Reference in a new issue