💾 feat: Anthropic Prompt Caching (#3670)

* wip: initial cache control implementation, add typing for transactions handling

* feat: first pass of Anthropic Prompt Caching

* feat: standardize stream usage as pass in when calculating token counts

* feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes

* chore: imports order

* refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount

* feat: more accurate token counting using stream usage data

* feat: Improve token counting accuracy with stream usage data

* refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request

* refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts

* ci: fix tests
This commit is contained in:
Danny Avila 2024-08-17 03:24:09 -04:00 committed by GitHub
parent 9f4c516615
commit a45b384bbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 973 additions and 34 deletions

View file

@ -212,8 +212,8 @@ async function updateMessageText(req, { messageId, text }) {
*
* @async
* @function updateMessage
* @param {Object} message - The message object containing update data.
* @param {Object} req - The request object.
* @param {Object} message - The message object containing update data.
* @param {string} message.messageId - The unique identifier for the message.
* @param {string} [message.text] - The new text content of the message.
* @param {Object[]} [message.files] - The files associated with the message.

View file

@ -1,12 +1,12 @@
const mongoose = require('mongoose');
const { isEnabled } = require('../server/utils/handleText');
const transactionSchema = require('./schema/transaction');
const { getMultiplier } = require('./tx');
const { getMultiplier, getCacheMultiplier } = require('./tx');
const { logger } = require('~/config');
const Balance = require('./Balance');
const cancelRate = 1.15;
// Method to calculate and set the tokenValue for a transaction
/** Method to calculate and set the tokenValue for a transaction */
transactionSchema.methods.calculateTokenValue = function () {
if (!this.valueKey || !this.tokenType) {
this.tokenValue = this.rawAmount;
@ -21,15 +21,17 @@ transactionSchema.methods.calculateTokenValue = function () {
}
};
// Static method to create a transaction and update the balance
transactionSchema.statics.create = async function (transactionData) {
/**
* Static method to create a transaction and update the balance
* @param {txData} txData - Transaction data.
*/
transactionSchema.statics.create = async function (txData) {
const Transaction = this;
const transaction = new Transaction(transactionData);
transaction.endpointTokenConfig = transactionData.endpointTokenConfig;
const transaction = new Transaction(txData);
transaction.endpointTokenConfig = txData.endpointTokenConfig;
transaction.calculateTokenValue();
// Save the transaction
await transaction.save();
if (!isEnabled(process.env.CHECK_BALANCE)) {
@ -57,6 +59,104 @@ transactionSchema.statics.create = async function (transactionData) {
};
};
/**
* Static method to create a structured transaction and update the balance
* @param {txData} txData - Transaction data.
*/
transactionSchema.statics.createStructured = async function (txData) {
const Transaction = this;
const transaction = new Transaction({
...txData,
endpointTokenConfig: txData.endpointTokenConfig,
});
transaction.calculateStructuredTokenValue();
await transaction.save();
if (!isEnabled(process.env.CHECK_BALANCE)) {
return transaction;
}
let balance = await Balance.findOne({ user: transaction.user }).lean();
let incrementValue = transaction.tokenValue;
if (balance && balance?.tokenCredits + incrementValue < 0) {
incrementValue = -balance.tokenCredits;
}
balance = await Balance.findOneAndUpdate(
{ user: transaction.user },
{ $inc: { tokenCredits: incrementValue } },
{ upsert: true, new: true },
).lean();
return {
rate: transaction.rate,
user: transaction.user.toString(),
balance: balance.tokenCredits,
[transaction.tokenType]: incrementValue,
};
};
/** Method to calculate token value for structured tokens */
transactionSchema.methods.calculateStructuredTokenValue = function () {
if (!this.tokenType) {
this.tokenValue = this.rawAmount;
return;
}
const { model, endpointTokenConfig } = this;
if (this.tokenType === 'prompt') {
const inputMultiplier = getMultiplier({ tokenType: 'prompt', model, endpointTokenConfig });
const writeMultiplier =
getCacheMultiplier({ cacheType: 'write', model, endpointTokenConfig }) ?? inputMultiplier;
const readMultiplier =
getCacheMultiplier({ cacheType: 'read', model, endpointTokenConfig }) ?? inputMultiplier;
this.rateDetail = {
input: inputMultiplier,
write: writeMultiplier,
read: readMultiplier,
};
const totalTokens = (this.inputTokens || 0) + (this.writeTokens || 0) + (this.readTokens || 0);
if (totalTokens > 0) {
this.rate =
(inputMultiplier * (this.inputTokens || 0) +
writeMultiplier * (this.writeTokens || 0) +
readMultiplier * (this.readTokens || 0)) /
totalTokens;
} else {
this.rate = inputMultiplier; // Default to input rate if no tokens
}
this.tokenValue =
this.inputTokens * inputMultiplier +
(this.writeTokens || 0) * writeMultiplier +
(this.readTokens || 0) * readMultiplier;
} else {
const multiplier = Math.abs(
getMultiplier({ tokenType: this.tokenType, model, endpointTokenConfig }),
);
this.rate = multiplier;
this.tokenValue = this.rawAmount * multiplier;
}
if (this.context && this.tokenType === 'completion' && this.context === 'incomplete') {
this.tokenValue = Math.ceil(this.tokenValue * cancelRate);
this.rate *= cancelRate;
if (this.rateDetail) {
this.rateDetail = Object.fromEntries(
Object.entries(this.rateDetail).map(([k, v]) => [k, v * cancelRate]),
);
}
}
};
const Transaction = mongoose.model('Transaction', transactionSchema);
/**

View file

@ -30,6 +30,9 @@ const transactionSchema = mongoose.Schema(
rate: Number,
rawAmount: Number,
tokenValue: Number,
inputTokens: { type: Number },
writeTokens: { type: Number },
readTokens: { type: Number },
},
{
timestamps: true,

View file

@ -11,7 +11,7 @@ const { logger } = require('~/config');
* @param {String} txData.conversationId - The ID of the conversation.
* @param {String} txData.model - The model name.
* @param {String} txData.context - The context in which the transaction is made.
* @param {String} [txData.endpointTokenConfig] - The current endpoint token config.
* @param {EndpointTokenConfig} [txData.endpointTokenConfig] - The current endpoint token config.
* @param {String} [txData.valueKey] - The value key (optional).
* @param {Object} tokenUsage - The number of tokens used.
* @param {Number} tokenUsage.promptTokens - The number of prompt tokens used.
@ -66,4 +66,74 @@ const spendTokens = async (txData, tokenUsage) => {
}
};
module.exports = spendTokens;
/**
* Creates transactions to record the spending of structured tokens.
*
* @function
* @async
* @param {Object} txData - Transaction data.
* @param {mongoose.Schema.Types.ObjectId} txData.user - The user ID.
* @param {String} txData.conversationId - The ID of the conversation.
* @param {String} txData.model - The model name.
* @param {String} txData.context - The context in which the transaction is made.
* @param {EndpointTokenConfig} [txData.endpointTokenConfig] - The current endpoint token config.
* @param {String} [txData.valueKey] - The value key (optional).
* @param {Object} tokenUsage - The number of tokens used.
* @param {Object} tokenUsage.promptTokens - The number of prompt tokens used.
* @param {Number} tokenUsage.promptTokens.input - The number of input tokens.
* @param {Number} tokenUsage.promptTokens.write - The number of write tokens.
* @param {Number} tokenUsage.promptTokens.read - The number of read tokens.
* @param {Number} tokenUsage.completionTokens - The number of completion tokens used.
* @returns {Promise<void>} - Returns nothing.
* @throws {Error} - Throws an error if there's an issue creating the transactions.
*/
const spendStructuredTokens = async (txData, tokenUsage) => {
const { promptTokens, completionTokens } = tokenUsage;
logger.debug(
`[spendStructuredTokens] conversationId: ${txData.conversationId}${
txData?.context ? ` | Context: ${txData?.context}` : ''
} | Token usage: `,
{
promptTokens,
completionTokens,
},
);
let prompt, completion;
try {
if (promptTokens) {
const { input = 0, write = 0, read = 0 } = promptTokens;
const promptAmount = input + write + read;
prompt = await Transaction.createStructured({
...txData,
tokenType: 'prompt',
rawAmount: -promptAmount,
inputTokens: input,
writeTokens: write,
readTokens: read,
});
}
if (completionTokens) {
completion = await Transaction.create({
...txData,
tokenType: 'completion',
rawAmount: -completionTokens,
});
}
prompt &&
completion &&
logger.debug('[spendStructuredTokens] Transaction data record against balance:', {
user: txData.user,
prompt: prompt.tokenValue,
promptRate: prompt.rate,
completion: completion.tokenValue,
completionRate: completion.rate,
balance: completion.balance,
});
} catch (err) {
logger.error('[spendStructuredTokens]', err);
}
};
module.exports = { spendTokens, spendStructuredTokens };

View file

@ -70,6 +70,17 @@ const tokenValues = Object.assign(
bedrockValues,
);
/**
* Mapping of model token sizes to their respective multipliers for cached input, read and write.
* See Anthropic's documentation on this: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#pricing
* The rates are 1 USD per 1M tokens.
* @type {Object.<string, {write: number, read: number }>}
*/
const cacheTokenValues = {
'claude-3-5-sonnet': { write: 3.75, read: 0.3 },
'claude-3-haiku': { write: 0.3, read: 0.03 },
};
/**
* Retrieves the key associated with a given model name.
*
@ -122,7 +133,7 @@ const getValueKey = (model, endpoint) => {
*
* @param {Object} params - The parameters for the function.
* @param {string} [params.valueKey] - The key corresponding to the model name.
* @param {string} [params.tokenType] - The type of token (e.g., 'prompt' or 'completion').
* @param {'prompt' | 'completion'} [params.tokenType] - The type of token (e.g., 'prompt' or 'completion').
* @param {string} [params.model] - The model name to derive the value key from if not provided.
* @param {string} [params.endpoint] - The endpoint name to derive the value key from if not provided.
* @param {EndpointTokenConfig} [params.endpointTokenConfig] - The token configuration for the endpoint.
@ -147,7 +158,41 @@ const getMultiplier = ({ valueKey, tokenType, model, endpoint, endpointTokenConf
}
// If we got this far, and values[tokenType] is undefined somehow, return a rough average of default multipliers
return tokenValues[valueKey][tokenType] ?? defaultRate;
return tokenValues[valueKey]?.[tokenType] ?? defaultRate;
};
module.exports = { tokenValues, getValueKey, getMultiplier, defaultRate };
/**
* Retrieves the cache multiplier for a given value key and token type. If no value key is provided,
* it attempts to derive it from the model name.
*
* @param {Object} params - The parameters for the function.
* @param {string} [params.valueKey] - The key corresponding to the model name.
* @param {'write' | 'read'} [params.cacheType] - The type of token (e.g., 'write' or 'read').
* @param {string} [params.model] - The model name to derive the value key from if not provided.
* @param {string} [params.endpoint] - The endpoint name to derive the value key from if not provided.
* @param {EndpointTokenConfig} [params.endpointTokenConfig] - The token configuration for the endpoint.
* @returns {number | null} The multiplier for the given parameters, or `null` if not found.
*/
const getCacheMultiplier = ({ valueKey, cacheType, model, endpoint, endpointTokenConfig }) => {
if (endpointTokenConfig) {
return endpointTokenConfig?.[model]?.[cacheType] ?? null;
}
if (valueKey && cacheType) {
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
}
if (!cacheType || !model) {
return null;
}
valueKey = getValueKey(model, endpoint);
if (!valueKey) {
return null;
}
// If we got this far, and values[cacheType] is undefined somehow, return a rough average of default multipliers
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
};
module.exports = { tokenValues, getValueKey, getMultiplier, getCacheMultiplier, defaultRate };

View file

@ -1,4 +1,10 @@
const { getValueKey, getMultiplier, defaultRate, tokenValues } = require('./tx');
const {
defaultRate,
tokenValues,
getValueKey,
getMultiplier,
getCacheMultiplier,
} = require('./tx');
describe('getValueKey', () => {
it('should return "16k" for model name containing "gpt-3.5-turbo-16k"', () => {
@ -243,3 +249,76 @@ describe('AWS Bedrock Model Tests', () => {
expect(results.every(Boolean)).toBe(true);
});
});
describe('getCacheMultiplier', () => {
it('should return the correct cache multiplier for a given valueKey and cacheType', () => {
expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'write' })).toBe(3.75);
expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'read' })).toBe(0.3);
expect(getCacheMultiplier({ valueKey: 'claude-3-haiku', cacheType: 'write' })).toBe(0.3);
expect(getCacheMultiplier({ valueKey: 'claude-3-haiku', cacheType: 'read' })).toBe(0.03);
});
it('should return null if cacheType is provided but not found in cacheTokenValues', () => {
expect(
getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'unknownType' }),
).toBeNull();
});
it('should derive the valueKey from the model if not provided', () => {
expect(getCacheMultiplier({ cacheType: 'write', model: 'claude-3-5-sonnet-20240620' })).toBe(
3.75,
);
expect(getCacheMultiplier({ cacheType: 'read', model: 'claude-3-haiku-20240307' })).toBe(0.03);
});
it('should return null if only model or cacheType is missing', () => {
expect(getCacheMultiplier({ cacheType: 'write' })).toBeNull();
expect(getCacheMultiplier({ model: 'claude-3-5-sonnet' })).toBeNull();
});
it('should return null if derived valueKey does not match any known patterns', () => {
expect(getCacheMultiplier({ cacheType: 'write', model: 'gpt-4-some-other-info' })).toBeNull();
});
it('should handle endpointTokenConfig if provided', () => {
const endpointTokenConfig = {
'custom-model': {
write: 5,
read: 1,
},
};
expect(
getCacheMultiplier({ model: 'custom-model', cacheType: 'write', endpointTokenConfig }),
).toBe(5);
expect(
getCacheMultiplier({ model: 'custom-model', cacheType: 'read', endpointTokenConfig }),
).toBe(1);
});
it('should return null if model is not found in endpointTokenConfig', () => {
const endpointTokenConfig = {
'custom-model': {
write: 5,
read: 1,
},
};
expect(
getCacheMultiplier({ model: 'unknown-model', cacheType: 'write', endpointTokenConfig }),
).toBeNull();
});
it('should handle models with "bedrock/" prefix', () => {
expect(
getCacheMultiplier({
model: 'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0',
cacheType: 'write',
}),
).toBe(3.75);
expect(
getCacheMultiplier({
model: 'bedrock/anthropic.claude-3-haiku-20240307-v1:0',
cacheType: 'read',
}),
).toBe(0.03);
});
});