mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-21 10:50:14 +01:00
💾 feat: Anthropic Prompt Caching (#3670)
* wip: initial cache control implementation, add typing for transactions handling * feat: first pass of Anthropic Prompt Caching * feat: standardize stream usage as pass in when calculating token counts * feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes * chore: imports order * refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount * feat: more accurate token counting using stream usage data * feat: Improve token counting accuracy with stream usage data * refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request * refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts * ci: fix tests
This commit is contained in:
parent
9f4c516615
commit
a45b384bbc
17 changed files with 973 additions and 34 deletions
|
|
@ -11,7 +11,7 @@ const { logger } = require('~/config');
|
|||
* @param {String} txData.conversationId - The ID of the conversation.
|
||||
* @param {String} txData.model - The model name.
|
||||
* @param {String} txData.context - The context in which the transaction is made.
|
||||
* @param {String} [txData.endpointTokenConfig] - The current endpoint token config.
|
||||
* @param {EndpointTokenConfig} [txData.endpointTokenConfig] - The current endpoint token config.
|
||||
* @param {String} [txData.valueKey] - The value key (optional).
|
||||
* @param {Object} tokenUsage - The number of tokens used.
|
||||
* @param {Number} tokenUsage.promptTokens - The number of prompt tokens used.
|
||||
|
|
@ -66,4 +66,74 @@ const spendTokens = async (txData, tokenUsage) => {
|
|||
}
|
||||
};
|
||||
|
||||
module.exports = spendTokens;
|
||||
/**
|
||||
* Creates transactions to record the spending of structured tokens.
|
||||
*
|
||||
* @function
|
||||
* @async
|
||||
* @param {Object} txData - Transaction data.
|
||||
* @param {mongoose.Schema.Types.ObjectId} txData.user - The user ID.
|
||||
* @param {String} txData.conversationId - The ID of the conversation.
|
||||
* @param {String} txData.model - The model name.
|
||||
* @param {String} txData.context - The context in which the transaction is made.
|
||||
* @param {EndpointTokenConfig} [txData.endpointTokenConfig] - The current endpoint token config.
|
||||
* @param {String} [txData.valueKey] - The value key (optional).
|
||||
* @param {Object} tokenUsage - The number of tokens used.
|
||||
* @param {Object} tokenUsage.promptTokens - The number of prompt tokens used.
|
||||
* @param {Number} tokenUsage.promptTokens.input - The number of input tokens.
|
||||
* @param {Number} tokenUsage.promptTokens.write - The number of write tokens.
|
||||
* @param {Number} tokenUsage.promptTokens.read - The number of read tokens.
|
||||
* @param {Number} tokenUsage.completionTokens - The number of completion tokens used.
|
||||
* @returns {Promise<void>} - Returns nothing.
|
||||
* @throws {Error} - Throws an error if there's an issue creating the transactions.
|
||||
*/
|
||||
const spendStructuredTokens = async (txData, tokenUsage) => {
|
||||
const { promptTokens, completionTokens } = tokenUsage;
|
||||
logger.debug(
|
||||
`[spendStructuredTokens] conversationId: ${txData.conversationId}${
|
||||
txData?.context ? ` | Context: ${txData?.context}` : ''
|
||||
} | Token usage: `,
|
||||
{
|
||||
promptTokens,
|
||||
completionTokens,
|
||||
},
|
||||
);
|
||||
let prompt, completion;
|
||||
try {
|
||||
if (promptTokens) {
|
||||
const { input = 0, write = 0, read = 0 } = promptTokens;
|
||||
const promptAmount = input + write + read;
|
||||
prompt = await Transaction.createStructured({
|
||||
...txData,
|
||||
tokenType: 'prompt',
|
||||
rawAmount: -promptAmount,
|
||||
inputTokens: input,
|
||||
writeTokens: write,
|
||||
readTokens: read,
|
||||
});
|
||||
}
|
||||
|
||||
if (completionTokens) {
|
||||
completion = await Transaction.create({
|
||||
...txData,
|
||||
tokenType: 'completion',
|
||||
rawAmount: -completionTokens,
|
||||
});
|
||||
}
|
||||
|
||||
prompt &&
|
||||
completion &&
|
||||
logger.debug('[spendStructuredTokens] Transaction data record against balance:', {
|
||||
user: txData.user,
|
||||
prompt: prompt.tokenValue,
|
||||
promptRate: prompt.rate,
|
||||
completion: completion.tokenValue,
|
||||
completionRate: completion.rate,
|
||||
balance: completion.balance,
|
||||
});
|
||||
} catch (err) {
|
||||
logger.error('[spendStructuredTokens]', err);
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = { spendTokens, spendStructuredTokens };
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue