mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-09 12:08:50 +01:00
💾 feat: Anthropic Prompt Caching (#3670)
* wip: initial cache control implementation, add typing for transactions handling * feat: first pass of Anthropic Prompt Caching * feat: standardize stream usage as pass in when calculating token counts * feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes * chore: imports order * refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount * feat: more accurate token counting using stream usage data * feat: Improve token counting accuracy with stream usage data * refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request * refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts * ci: fix tests
This commit is contained in:
parent
9f4c516615
commit
a45b384bbc
17 changed files with 973 additions and 34 deletions
|
|
@ -212,8 +212,8 @@ async function updateMessageText(req, { messageId, text }) {
|
|||
*
|
||||
* @async
|
||||
* @function updateMessage
|
||||
* @param {Object} message - The message object containing update data.
|
||||
* @param {Object} req - The request object.
|
||||
* @param {Object} message - The message object containing update data.
|
||||
* @param {string} message.messageId - The unique identifier for the message.
|
||||
* @param {string} [message.text] - The new text content of the message.
|
||||
* @param {Object[]} [message.files] - The files associated with the message.
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
const mongoose = require('mongoose');
|
||||
const { isEnabled } = require('../server/utils/handleText');
|
||||
const transactionSchema = require('./schema/transaction');
|
||||
const { getMultiplier } = require('./tx');
|
||||
const { getMultiplier, getCacheMultiplier } = require('./tx');
|
||||
const { logger } = require('~/config');
|
||||
const Balance = require('./Balance');
|
||||
const cancelRate = 1.15;
|
||||
|
||||
// Method to calculate and set the tokenValue for a transaction
|
||||
/** Method to calculate and set the tokenValue for a transaction */
|
||||
transactionSchema.methods.calculateTokenValue = function () {
|
||||
if (!this.valueKey || !this.tokenType) {
|
||||
this.tokenValue = this.rawAmount;
|
||||
|
|
@ -21,15 +21,17 @@ transactionSchema.methods.calculateTokenValue = function () {
|
|||
}
|
||||
};
|
||||
|
||||
// Static method to create a transaction and update the balance
|
||||
transactionSchema.statics.create = async function (transactionData) {
|
||||
/**
|
||||
* Static method to create a transaction and update the balance
|
||||
* @param {txData} txData - Transaction data.
|
||||
*/
|
||||
transactionSchema.statics.create = async function (txData) {
|
||||
const Transaction = this;
|
||||
|
||||
const transaction = new Transaction(transactionData);
|
||||
transaction.endpointTokenConfig = transactionData.endpointTokenConfig;
|
||||
const transaction = new Transaction(txData);
|
||||
transaction.endpointTokenConfig = txData.endpointTokenConfig;
|
||||
transaction.calculateTokenValue();
|
||||
|
||||
// Save the transaction
|
||||
await transaction.save();
|
||||
|
||||
if (!isEnabled(process.env.CHECK_BALANCE)) {
|
||||
|
|
@ -57,6 +59,104 @@ transactionSchema.statics.create = async function (transactionData) {
|
|||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Static method to create a structured transaction and update the balance
|
||||
* @param {txData} txData - Transaction data.
|
||||
*/
|
||||
transactionSchema.statics.createStructured = async function (txData) {
|
||||
const Transaction = this;
|
||||
|
||||
const transaction = new Transaction({
|
||||
...txData,
|
||||
endpointTokenConfig: txData.endpointTokenConfig,
|
||||
});
|
||||
|
||||
transaction.calculateStructuredTokenValue();
|
||||
|
||||
await transaction.save();
|
||||
|
||||
if (!isEnabled(process.env.CHECK_BALANCE)) {
|
||||
return transaction;
|
||||
}
|
||||
|
||||
let balance = await Balance.findOne({ user: transaction.user }).lean();
|
||||
let incrementValue = transaction.tokenValue;
|
||||
|
||||
if (balance && balance?.tokenCredits + incrementValue < 0) {
|
||||
incrementValue = -balance.tokenCredits;
|
||||
}
|
||||
|
||||
balance = await Balance.findOneAndUpdate(
|
||||
{ user: transaction.user },
|
||||
{ $inc: { tokenCredits: incrementValue } },
|
||||
{ upsert: true, new: true },
|
||||
).lean();
|
||||
|
||||
return {
|
||||
rate: transaction.rate,
|
||||
user: transaction.user.toString(),
|
||||
balance: balance.tokenCredits,
|
||||
[transaction.tokenType]: incrementValue,
|
||||
};
|
||||
};
|
||||
|
||||
/** Method to calculate token value for structured tokens */
|
||||
transactionSchema.methods.calculateStructuredTokenValue = function () {
|
||||
if (!this.tokenType) {
|
||||
this.tokenValue = this.rawAmount;
|
||||
return;
|
||||
}
|
||||
|
||||
const { model, endpointTokenConfig } = this;
|
||||
|
||||
if (this.tokenType === 'prompt') {
|
||||
const inputMultiplier = getMultiplier({ tokenType: 'prompt', model, endpointTokenConfig });
|
||||
const writeMultiplier =
|
||||
getCacheMultiplier({ cacheType: 'write', model, endpointTokenConfig }) ?? inputMultiplier;
|
||||
const readMultiplier =
|
||||
getCacheMultiplier({ cacheType: 'read', model, endpointTokenConfig }) ?? inputMultiplier;
|
||||
|
||||
this.rateDetail = {
|
||||
input: inputMultiplier,
|
||||
write: writeMultiplier,
|
||||
read: readMultiplier,
|
||||
};
|
||||
|
||||
const totalTokens = (this.inputTokens || 0) + (this.writeTokens || 0) + (this.readTokens || 0);
|
||||
|
||||
if (totalTokens > 0) {
|
||||
this.rate =
|
||||
(inputMultiplier * (this.inputTokens || 0) +
|
||||
writeMultiplier * (this.writeTokens || 0) +
|
||||
readMultiplier * (this.readTokens || 0)) /
|
||||
totalTokens;
|
||||
} else {
|
||||
this.rate = inputMultiplier; // Default to input rate if no tokens
|
||||
}
|
||||
|
||||
this.tokenValue =
|
||||
this.inputTokens * inputMultiplier +
|
||||
(this.writeTokens || 0) * writeMultiplier +
|
||||
(this.readTokens || 0) * readMultiplier;
|
||||
} else {
|
||||
const multiplier = Math.abs(
|
||||
getMultiplier({ tokenType: this.tokenType, model, endpointTokenConfig }),
|
||||
);
|
||||
this.rate = multiplier;
|
||||
this.tokenValue = this.rawAmount * multiplier;
|
||||
}
|
||||
|
||||
if (this.context && this.tokenType === 'completion' && this.context === 'incomplete') {
|
||||
this.tokenValue = Math.ceil(this.tokenValue * cancelRate);
|
||||
this.rate *= cancelRate;
|
||||
if (this.rateDetail) {
|
||||
this.rateDetail = Object.fromEntries(
|
||||
Object.entries(this.rateDetail).map(([k, v]) => [k, v * cancelRate]),
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const Transaction = mongoose.model('Transaction', transactionSchema);
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -30,6 +30,9 @@ const transactionSchema = mongoose.Schema(
|
|||
rate: Number,
|
||||
rawAmount: Number,
|
||||
tokenValue: Number,
|
||||
inputTokens: { type: Number },
|
||||
writeTokens: { type: Number },
|
||||
readTokens: { type: Number },
|
||||
},
|
||||
{
|
||||
timestamps: true,
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ const { logger } = require('~/config');
|
|||
* @param {String} txData.conversationId - The ID of the conversation.
|
||||
* @param {String} txData.model - The model name.
|
||||
* @param {String} txData.context - The context in which the transaction is made.
|
||||
* @param {String} [txData.endpointTokenConfig] - The current endpoint token config.
|
||||
* @param {EndpointTokenConfig} [txData.endpointTokenConfig] - The current endpoint token config.
|
||||
* @param {String} [txData.valueKey] - The value key (optional).
|
||||
* @param {Object} tokenUsage - The number of tokens used.
|
||||
* @param {Number} tokenUsage.promptTokens - The number of prompt tokens used.
|
||||
|
|
@ -66,4 +66,74 @@ const spendTokens = async (txData, tokenUsage) => {
|
|||
}
|
||||
};
|
||||
|
||||
module.exports = spendTokens;
|
||||
/**
|
||||
* Creates transactions to record the spending of structured tokens.
|
||||
*
|
||||
* @function
|
||||
* @async
|
||||
* @param {Object} txData - Transaction data.
|
||||
* @param {mongoose.Schema.Types.ObjectId} txData.user - The user ID.
|
||||
* @param {String} txData.conversationId - The ID of the conversation.
|
||||
* @param {String} txData.model - The model name.
|
||||
* @param {String} txData.context - The context in which the transaction is made.
|
||||
* @param {EndpointTokenConfig} [txData.endpointTokenConfig] - The current endpoint token config.
|
||||
* @param {String} [txData.valueKey] - The value key (optional).
|
||||
* @param {Object} tokenUsage - The number of tokens used.
|
||||
* @param {Object} tokenUsage.promptTokens - The number of prompt tokens used.
|
||||
* @param {Number} tokenUsage.promptTokens.input - The number of input tokens.
|
||||
* @param {Number} tokenUsage.promptTokens.write - The number of write tokens.
|
||||
* @param {Number} tokenUsage.promptTokens.read - The number of read tokens.
|
||||
* @param {Number} tokenUsage.completionTokens - The number of completion tokens used.
|
||||
* @returns {Promise<void>} - Returns nothing.
|
||||
* @throws {Error} - Throws an error if there's an issue creating the transactions.
|
||||
*/
|
||||
const spendStructuredTokens = async (txData, tokenUsage) => {
|
||||
const { promptTokens, completionTokens } = tokenUsage;
|
||||
logger.debug(
|
||||
`[spendStructuredTokens] conversationId: ${txData.conversationId}${
|
||||
txData?.context ? ` | Context: ${txData?.context}` : ''
|
||||
} | Token usage: `,
|
||||
{
|
||||
promptTokens,
|
||||
completionTokens,
|
||||
},
|
||||
);
|
||||
let prompt, completion;
|
||||
try {
|
||||
if (promptTokens) {
|
||||
const { input = 0, write = 0, read = 0 } = promptTokens;
|
||||
const promptAmount = input + write + read;
|
||||
prompt = await Transaction.createStructured({
|
||||
...txData,
|
||||
tokenType: 'prompt',
|
||||
rawAmount: -promptAmount,
|
||||
inputTokens: input,
|
||||
writeTokens: write,
|
||||
readTokens: read,
|
||||
});
|
||||
}
|
||||
|
||||
if (completionTokens) {
|
||||
completion = await Transaction.create({
|
||||
...txData,
|
||||
tokenType: 'completion',
|
||||
rawAmount: -completionTokens,
|
||||
});
|
||||
}
|
||||
|
||||
prompt &&
|
||||
completion &&
|
||||
logger.debug('[spendStructuredTokens] Transaction data record against balance:', {
|
||||
user: txData.user,
|
||||
prompt: prompt.tokenValue,
|
||||
promptRate: prompt.rate,
|
||||
completion: completion.tokenValue,
|
||||
completionRate: completion.rate,
|
||||
balance: completion.balance,
|
||||
});
|
||||
} catch (err) {
|
||||
logger.error('[spendStructuredTokens]', err);
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = { spendTokens, spendStructuredTokens };
|
||||
|
|
|
|||
|
|
@ -70,6 +70,17 @@ const tokenValues = Object.assign(
|
|||
bedrockValues,
|
||||
);
|
||||
|
||||
/**
|
||||
* Mapping of model token sizes to their respective multipliers for cached input, read and write.
|
||||
* See Anthropic's documentation on this: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#pricing
|
||||
* The rates are 1 USD per 1M tokens.
|
||||
* @type {Object.<string, {write: number, read: number }>}
|
||||
*/
|
||||
const cacheTokenValues = {
|
||||
'claude-3-5-sonnet': { write: 3.75, read: 0.3 },
|
||||
'claude-3-haiku': { write: 0.3, read: 0.03 },
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves the key associated with a given model name.
|
||||
*
|
||||
|
|
@ -122,7 +133,7 @@ const getValueKey = (model, endpoint) => {
|
|||
*
|
||||
* @param {Object} params - The parameters for the function.
|
||||
* @param {string} [params.valueKey] - The key corresponding to the model name.
|
||||
* @param {string} [params.tokenType] - The type of token (e.g., 'prompt' or 'completion').
|
||||
* @param {'prompt' | 'completion'} [params.tokenType] - The type of token (e.g., 'prompt' or 'completion').
|
||||
* @param {string} [params.model] - The model name to derive the value key from if not provided.
|
||||
* @param {string} [params.endpoint] - The endpoint name to derive the value key from if not provided.
|
||||
* @param {EndpointTokenConfig} [params.endpointTokenConfig] - The token configuration for the endpoint.
|
||||
|
|
@ -147,7 +158,41 @@ const getMultiplier = ({ valueKey, tokenType, model, endpoint, endpointTokenConf
|
|||
}
|
||||
|
||||
// If we got this far, and values[tokenType] is undefined somehow, return a rough average of default multipliers
|
||||
return tokenValues[valueKey][tokenType] ?? defaultRate;
|
||||
return tokenValues[valueKey]?.[tokenType] ?? defaultRate;
|
||||
};
|
||||
|
||||
module.exports = { tokenValues, getValueKey, getMultiplier, defaultRate };
|
||||
/**
|
||||
* Retrieves the cache multiplier for a given value key and token type. If no value key is provided,
|
||||
* it attempts to derive it from the model name.
|
||||
*
|
||||
* @param {Object} params - The parameters for the function.
|
||||
* @param {string} [params.valueKey] - The key corresponding to the model name.
|
||||
* @param {'write' | 'read'} [params.cacheType] - The type of token (e.g., 'write' or 'read').
|
||||
* @param {string} [params.model] - The model name to derive the value key from if not provided.
|
||||
* @param {string} [params.endpoint] - The endpoint name to derive the value key from if not provided.
|
||||
* @param {EndpointTokenConfig} [params.endpointTokenConfig] - The token configuration for the endpoint.
|
||||
* @returns {number | null} The multiplier for the given parameters, or `null` if not found.
|
||||
*/
|
||||
const getCacheMultiplier = ({ valueKey, cacheType, model, endpoint, endpointTokenConfig }) => {
|
||||
if (endpointTokenConfig) {
|
||||
return endpointTokenConfig?.[model]?.[cacheType] ?? null;
|
||||
}
|
||||
|
||||
if (valueKey && cacheType) {
|
||||
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
|
||||
}
|
||||
|
||||
if (!cacheType || !model) {
|
||||
return null;
|
||||
}
|
||||
|
||||
valueKey = getValueKey(model, endpoint);
|
||||
if (!valueKey) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// If we got this far, and values[cacheType] is undefined somehow, return a rough average of default multipliers
|
||||
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
|
||||
};
|
||||
|
||||
module.exports = { tokenValues, getValueKey, getMultiplier, getCacheMultiplier, defaultRate };
|
||||
|
|
|
|||
|
|
@ -1,4 +1,10 @@
|
|||
const { getValueKey, getMultiplier, defaultRate, tokenValues } = require('./tx');
|
||||
const {
|
||||
defaultRate,
|
||||
tokenValues,
|
||||
getValueKey,
|
||||
getMultiplier,
|
||||
getCacheMultiplier,
|
||||
} = require('./tx');
|
||||
|
||||
describe('getValueKey', () => {
|
||||
it('should return "16k" for model name containing "gpt-3.5-turbo-16k"', () => {
|
||||
|
|
@ -243,3 +249,76 @@ describe('AWS Bedrock Model Tests', () => {
|
|||
expect(results.every(Boolean)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getCacheMultiplier', () => {
|
||||
it('should return the correct cache multiplier for a given valueKey and cacheType', () => {
|
||||
expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'write' })).toBe(3.75);
|
||||
expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'read' })).toBe(0.3);
|
||||
expect(getCacheMultiplier({ valueKey: 'claude-3-haiku', cacheType: 'write' })).toBe(0.3);
|
||||
expect(getCacheMultiplier({ valueKey: 'claude-3-haiku', cacheType: 'read' })).toBe(0.03);
|
||||
});
|
||||
|
||||
it('should return null if cacheType is provided but not found in cacheTokenValues', () => {
|
||||
expect(
|
||||
getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'unknownType' }),
|
||||
).toBeNull();
|
||||
});
|
||||
|
||||
it('should derive the valueKey from the model if not provided', () => {
|
||||
expect(getCacheMultiplier({ cacheType: 'write', model: 'claude-3-5-sonnet-20240620' })).toBe(
|
||||
3.75,
|
||||
);
|
||||
expect(getCacheMultiplier({ cacheType: 'read', model: 'claude-3-haiku-20240307' })).toBe(0.03);
|
||||
});
|
||||
|
||||
it('should return null if only model or cacheType is missing', () => {
|
||||
expect(getCacheMultiplier({ cacheType: 'write' })).toBeNull();
|
||||
expect(getCacheMultiplier({ model: 'claude-3-5-sonnet' })).toBeNull();
|
||||
});
|
||||
|
||||
it('should return null if derived valueKey does not match any known patterns', () => {
|
||||
expect(getCacheMultiplier({ cacheType: 'write', model: 'gpt-4-some-other-info' })).toBeNull();
|
||||
});
|
||||
|
||||
it('should handle endpointTokenConfig if provided', () => {
|
||||
const endpointTokenConfig = {
|
||||
'custom-model': {
|
||||
write: 5,
|
||||
read: 1,
|
||||
},
|
||||
};
|
||||
expect(
|
||||
getCacheMultiplier({ model: 'custom-model', cacheType: 'write', endpointTokenConfig }),
|
||||
).toBe(5);
|
||||
expect(
|
||||
getCacheMultiplier({ model: 'custom-model', cacheType: 'read', endpointTokenConfig }),
|
||||
).toBe(1);
|
||||
});
|
||||
|
||||
it('should return null if model is not found in endpointTokenConfig', () => {
|
||||
const endpointTokenConfig = {
|
||||
'custom-model': {
|
||||
write: 5,
|
||||
read: 1,
|
||||
},
|
||||
};
|
||||
expect(
|
||||
getCacheMultiplier({ model: 'unknown-model', cacheType: 'write', endpointTokenConfig }),
|
||||
).toBeNull();
|
||||
});
|
||||
|
||||
it('should handle models with "bedrock/" prefix', () => {
|
||||
expect(
|
||||
getCacheMultiplier({
|
||||
model: 'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0',
|
||||
cacheType: 'write',
|
||||
}),
|
||||
).toBe(3.75);
|
||||
expect(
|
||||
getCacheMultiplier({
|
||||
model: 'bedrock/anthropic.claude-3-haiku-20240307-v1:0',
|
||||
cacheType: 'read',
|
||||
}),
|
||||
).toBe(0.03);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue