mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 17:00:15 +01:00
* wip: initial cache control implementation, add typing for transactions handling * feat: first pass of Anthropic Prompt Caching * feat: standardize stream usage as pass in when calculating token counts * feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes * chore: imports order * refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount * feat: more accurate token counting using stream usage data * feat: Improve token counting accuracy with stream usage data * refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request * refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts * ci: fix tests
139 lines
4.7 KiB
JavaScript
139 lines
4.7 KiB
JavaScript
const { Transaction } = require('./Transaction');
|
|
const { logger } = require('~/config');
|
|
|
|
/**
|
|
* Creates up to two transactions to record the spending of tokens.
|
|
*
|
|
* @function
|
|
* @async
|
|
* @param {Object} txData - Transaction data.
|
|
* @param {mongoose.Schema.Types.ObjectId} txData.user - The user ID.
|
|
* @param {String} txData.conversationId - The ID of the conversation.
|
|
* @param {String} txData.model - The model name.
|
|
* @param {String} txData.context - The context in which the transaction is made.
|
|
* @param {EndpointTokenConfig} [txData.endpointTokenConfig] - The current endpoint token config.
|
|
* @param {String} [txData.valueKey] - The value key (optional).
|
|
* @param {Object} tokenUsage - The number of tokens used.
|
|
* @param {Number} tokenUsage.promptTokens - The number of prompt tokens used.
|
|
* @param {Number} tokenUsage.completionTokens - The number of completion tokens used.
|
|
* @returns {Promise<void>} - Returns nothing.
|
|
* @throws {Error} - Throws an error if there's an issue creating the transactions.
|
|
*/
|
|
const spendTokens = async (txData, tokenUsage) => {
|
|
const { promptTokens, completionTokens } = tokenUsage;
|
|
logger.debug(
|
|
`[spendTokens] conversationId: ${txData.conversationId}${
|
|
txData?.context ? ` | Context: ${txData?.context}` : ''
|
|
} | Token usage: `,
|
|
{
|
|
promptTokens,
|
|
completionTokens,
|
|
},
|
|
);
|
|
let prompt, completion;
|
|
try {
|
|
if (promptTokens >= 0) {
|
|
prompt = await Transaction.create({
|
|
...txData,
|
|
tokenType: 'prompt',
|
|
rawAmount: -promptTokens,
|
|
});
|
|
}
|
|
|
|
if (!completionTokens && isNaN(completionTokens)) {
|
|
logger.debug('[spendTokens] !completionTokens', { prompt, completion });
|
|
return;
|
|
}
|
|
|
|
completion = await Transaction.create({
|
|
...txData,
|
|
tokenType: 'completion',
|
|
rawAmount: -completionTokens,
|
|
});
|
|
|
|
prompt &&
|
|
completion &&
|
|
logger.debug('[spendTokens] Transaction data record against balance:', {
|
|
user: txData.user,
|
|
prompt: prompt.prompt,
|
|
promptRate: prompt.rate,
|
|
completion: completion.completion,
|
|
completionRate: completion.rate,
|
|
balance: completion.balance,
|
|
});
|
|
} catch (err) {
|
|
logger.error('[spendTokens]', err);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Creates transactions to record the spending of structured tokens.
|
|
*
|
|
* @function
|
|
* @async
|
|
* @param {Object} txData - Transaction data.
|
|
* @param {mongoose.Schema.Types.ObjectId} txData.user - The user ID.
|
|
* @param {String} txData.conversationId - The ID of the conversation.
|
|
* @param {String} txData.model - The model name.
|
|
* @param {String} txData.context - The context in which the transaction is made.
|
|
* @param {EndpointTokenConfig} [txData.endpointTokenConfig] - The current endpoint token config.
|
|
* @param {String} [txData.valueKey] - The value key (optional).
|
|
* @param {Object} tokenUsage - The number of tokens used.
|
|
* @param {Object} tokenUsage.promptTokens - The number of prompt tokens used.
|
|
* @param {Number} tokenUsage.promptTokens.input - The number of input tokens.
|
|
* @param {Number} tokenUsage.promptTokens.write - The number of write tokens.
|
|
* @param {Number} tokenUsage.promptTokens.read - The number of read tokens.
|
|
* @param {Number} tokenUsage.completionTokens - The number of completion tokens used.
|
|
* @returns {Promise<void>} - Returns nothing.
|
|
* @throws {Error} - Throws an error if there's an issue creating the transactions.
|
|
*/
|
|
const spendStructuredTokens = async (txData, tokenUsage) => {
|
|
const { promptTokens, completionTokens } = tokenUsage;
|
|
logger.debug(
|
|
`[spendStructuredTokens] conversationId: ${txData.conversationId}${
|
|
txData?.context ? ` | Context: ${txData?.context}` : ''
|
|
} | Token usage: `,
|
|
{
|
|
promptTokens,
|
|
completionTokens,
|
|
},
|
|
);
|
|
let prompt, completion;
|
|
try {
|
|
if (promptTokens) {
|
|
const { input = 0, write = 0, read = 0 } = promptTokens;
|
|
const promptAmount = input + write + read;
|
|
prompt = await Transaction.createStructured({
|
|
...txData,
|
|
tokenType: 'prompt',
|
|
rawAmount: -promptAmount,
|
|
inputTokens: input,
|
|
writeTokens: write,
|
|
readTokens: read,
|
|
});
|
|
}
|
|
|
|
if (completionTokens) {
|
|
completion = await Transaction.create({
|
|
...txData,
|
|
tokenType: 'completion',
|
|
rawAmount: -completionTokens,
|
|
});
|
|
}
|
|
|
|
prompt &&
|
|
completion &&
|
|
logger.debug('[spendStructuredTokens] Transaction data record against balance:', {
|
|
user: txData.user,
|
|
prompt: prompt.tokenValue,
|
|
promptRate: prompt.rate,
|
|
completion: completion.tokenValue,
|
|
completionRate: completion.rate,
|
|
balance: completion.balance,
|
|
});
|
|
} catch (err) {
|
|
logger.error('[spendStructuredTokens]', err);
|
|
}
|
|
};
|
|
|
|
module.exports = { spendTokens, spendStructuredTokens };
|