mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
💾 feat: Anthropic Prompt Caching (#3670)
* wip: initial cache control implementation, add typing for transactions handling * feat: first pass of Anthropic Prompt Caching * feat: standardize stream usage as pass in when calculating token counts * feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes * chore: imports order * refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount * feat: more accurate token counting using stream usage data * feat: Improve token counting accuracy with stream usage data * refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request * refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts * ci: fix tests
This commit is contained in:
parent
9f4c516615
commit
a45b384bbc
17 changed files with 973 additions and 34 deletions
|
|
@ -70,6 +70,17 @@ const tokenValues = Object.assign(
|
|||
bedrockValues,
|
||||
);
|
||||
|
||||
/**
|
||||
* Mapping of model token sizes to their respective multipliers for cached input, read and write.
|
||||
* See Anthropic's documentation on this: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#pricing
|
||||
* The rates are 1 USD per 1M tokens.
|
||||
* @type {Object.<string, {write: number, read: number }>}
|
||||
*/
|
||||
const cacheTokenValues = {
|
||||
'claude-3-5-sonnet': { write: 3.75, read: 0.3 },
|
||||
'claude-3-haiku': { write: 0.3, read: 0.03 },
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves the key associated with a given model name.
|
||||
*
|
||||
|
|
@ -122,7 +133,7 @@ const getValueKey = (model, endpoint) => {
|
|||
*
|
||||
* @param {Object} params - The parameters for the function.
|
||||
* @param {string} [params.valueKey] - The key corresponding to the model name.
|
||||
* @param {string} [params.tokenType] - The type of token (e.g., 'prompt' or 'completion').
|
||||
* @param {'prompt' | 'completion'} [params.tokenType] - The type of token (e.g., 'prompt' or 'completion').
|
||||
* @param {string} [params.model] - The model name to derive the value key from if not provided.
|
||||
* @param {string} [params.endpoint] - The endpoint name to derive the value key from if not provided.
|
||||
* @param {EndpointTokenConfig} [params.endpointTokenConfig] - The token configuration for the endpoint.
|
||||
|
|
@ -147,7 +158,41 @@ const getMultiplier = ({ valueKey, tokenType, model, endpoint, endpointTokenConf
|
|||
}
|
||||
|
||||
// If we got this far, and values[tokenType] is undefined somehow, return a rough average of default multipliers
|
||||
return tokenValues[valueKey][tokenType] ?? defaultRate;
|
||||
return tokenValues[valueKey]?.[tokenType] ?? defaultRate;
|
||||
};
|
||||
|
||||
module.exports = { tokenValues, getValueKey, getMultiplier, defaultRate };
|
||||
/**
|
||||
* Retrieves the cache multiplier for a given value key and token type. If no value key is provided,
|
||||
* it attempts to derive it from the model name.
|
||||
*
|
||||
* @param {Object} params - The parameters for the function.
|
||||
* @param {string} [params.valueKey] - The key corresponding to the model name.
|
||||
* @param {'write' | 'read'} [params.cacheType] - The type of token (e.g., 'write' or 'read').
|
||||
* @param {string} [params.model] - The model name to derive the value key from if not provided.
|
||||
* @param {string} [params.endpoint] - The endpoint name to derive the value key from if not provided.
|
||||
* @param {EndpointTokenConfig} [params.endpointTokenConfig] - The token configuration for the endpoint.
|
||||
* @returns {number | null} The multiplier for the given parameters, or `null` if not found.
|
||||
*/
|
||||
const getCacheMultiplier = ({ valueKey, cacheType, model, endpoint, endpointTokenConfig }) => {
|
||||
if (endpointTokenConfig) {
|
||||
return endpointTokenConfig?.[model]?.[cacheType] ?? null;
|
||||
}
|
||||
|
||||
if (valueKey && cacheType) {
|
||||
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
|
||||
}
|
||||
|
||||
if (!cacheType || !model) {
|
||||
return null;
|
||||
}
|
||||
|
||||
valueKey = getValueKey(model, endpoint);
|
||||
if (!valueKey) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// If we got this far, and values[cacheType] is undefined somehow, return a rough average of default multipliers
|
||||
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
|
||||
};
|
||||
|
||||
module.exports = { tokenValues, getValueKey, getMultiplier, getCacheMultiplier, defaultRate };
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue