💾 feat: Anthropic Prompt Caching (#3670)

* wip: initial cache control implementation, add typing for transactions handling * feat: first pass of Anthropic Prompt Caching * feat: standardize stream usage as pass in when calculating token counts * feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes * chore: imports order * refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount * feat: more accurate token counting using stream usage data * feat: Improve token counting accuracy with stream usage data * refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request * refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts * ci: fix tests
2026-02-24 11:24:10 +01:00 · 2024-08-17 03:24:09 -04:00 · 2024-08-17 03:24:09 -04:00 · a45b384bbc
commit a45b384bbc
parent 9f4c516615
17 changed files with 973 additions and 34 deletions
--- a/api/models/Message.js
+++ b/api/models/Message.js
@ -212,8 +212,8 @@ async function updateMessageText(req, { messageId, text }) {
 *
 * @async
 * @function updateMessage
- * @param {Object} message - The message object containing update data.
 * @param {Object} req - The request object.
+ * @param {Object} message - The message object containing update data.
 * @param {string} message.messageId - The unique identifier for the message.
 * @param {string} [message.text] - The new text content of the message.
 * @param {Object[]} [message.files] - The files associated with the message.
--- a/api/models/Transaction.js
+++ b/api/models/Transaction.js
@ -1,12 +1,12 @@
 const mongoose = require('mongoose');
 const { isEnabled } = require('../server/utils/handleText');
 const transactionSchema = require('./schema/transaction');
-const { getMultiplier } = require('./tx');
+const { getMultiplier, getCacheMultiplier } = require('./tx');
 const { logger } = require('~/config');
 const Balance = require('./Balance');
 const cancelRate = 1.15;

-// Method to calculate and set the tokenValue for a transaction
+/** Method to calculate and set the tokenValue for a transaction */
 transactionSchema.methods.calculateTokenValue = function () {
  if (!this.valueKey || !this.tokenType) {
    this.tokenValue = this.rawAmount;
@ -21,15 +21,17 @@ transactionSchema.methods.calculateTokenValue = function () {
  }
 };

-// Static method to create a transaction and update the balance
-transactionSchema.statics.create = async function (transactionData) {
+/**
+ * Static method to create a transaction and update the balance
+ * @param {txData} txData - Transaction data.
+ */
+transactionSchema.statics.create = async function (txData) {
  const Transaction = this;

-  const transaction = new Transaction(transactionData);
-  transaction.endpointTokenConfig = transactionData.endpointTokenConfig;
+  const transaction = new Transaction(txData);
+  transaction.endpointTokenConfig = txData.endpointTokenConfig;
  transaction.calculateTokenValue();

-  // Save the transaction
  await transaction.save();

  if (!isEnabled(process.env.CHECK_BALANCE)) {
@ -57,6 +59,104 @@ transactionSchema.statics.create = async function (transactionData) {
  };
 };

+/**
+ * Static method to create a structured transaction and update the balance
+ * @param {txData} txData - Transaction data.
+ */
+transactionSchema.statics.createStructured = async function (txData) {
+  const Transaction = this;
+
+  const transaction = new Transaction({
+    ...txData,
+    endpointTokenConfig: txData.endpointTokenConfig,
+  });
+
+  transaction.calculateStructuredTokenValue();
+
+  await transaction.save();
+
+  if (!isEnabled(process.env.CHECK_BALANCE)) {
+    return transaction;
+  }
+
+  let balance = await Balance.findOne({ user: transaction.user }).lean();
+  let incrementValue = transaction.tokenValue;
+
+  if (balance && balance?.tokenCredits + incrementValue < 0) {
+    incrementValue = -balance.tokenCredits;
+  }
+
+  balance = await Balance.findOneAndUpdate(
+    { user: transaction.user },
+    { $inc: { tokenCredits: incrementValue } },
+    { upsert: true, new: true },
+  ).lean();
+
+  return {
+    rate: transaction.rate,
+    user: transaction.user.toString(),
+    balance: balance.tokenCredits,
+    [transaction.tokenType]: incrementValue,
+  };
+};
+
+/** Method to calculate token value for structured tokens */
+transactionSchema.methods.calculateStructuredTokenValue = function () {
+  if (!this.tokenType) {
+    this.tokenValue = this.rawAmount;
+    return;
+  }
+
+  const { model, endpointTokenConfig } = this;
+
+  if (this.tokenType === 'prompt') {
+    const inputMultiplier = getMultiplier({ tokenType: 'prompt', model, endpointTokenConfig });
+    const writeMultiplier =
+      getCacheMultiplier({ cacheType: 'write', model, endpointTokenConfig }) ?? inputMultiplier;
+    const readMultiplier =
+      getCacheMultiplier({ cacheType: 'read', model, endpointTokenConfig }) ?? inputMultiplier;
+
+    this.rateDetail = {
+      input: inputMultiplier,
+      write: writeMultiplier,
+      read: readMultiplier,
+    };
+
+    const totalTokens = (this.inputTokens || 0) + (this.writeTokens || 0) + (this.readTokens || 0);
+
+    if (totalTokens > 0) {
+      this.rate =
+        (inputMultiplier * (this.inputTokens || 0) +
+          writeMultiplier * (this.writeTokens || 0) +
+          readMultiplier * (this.readTokens || 0)) /
+        totalTokens;
+    } else {
+      this.rate = inputMultiplier; // Default to input rate if no tokens
+    }
+
+    this.tokenValue =
+      this.inputTokens * inputMultiplier +
+      (this.writeTokens || 0) * writeMultiplier +
+      (this.readTokens || 0) * readMultiplier;
+  } else {
+    const multiplier = Math.abs(
+      getMultiplier({ tokenType: this.tokenType, model, endpointTokenConfig }),
+    );
+    this.rate = multiplier;
+    this.tokenValue = this.rawAmount * multiplier;
+  }
+
+  if (this.context && this.tokenType === 'completion' && this.context === 'incomplete') {
+    this.tokenValue = Math.ceil(this.tokenValue * cancelRate);
+    this.rate *= cancelRate;
+    if (this.rateDetail) {
+      this.rateDetail = Object.fromEntries(
+        Object.entries(this.rateDetail).map(([k, v]) => [k, v * cancelRate]),
+      );
+    }
+  }
+};
+
 const Transaction = mongoose.model('Transaction', transactionSchema);

 /**
--- a/api/models/schema/transaction.js
+++ b/api/models/schema/transaction.js
@ -30,6 +30,9 @@ const transactionSchema = mongoose.Schema(
    rate: Number,
    rawAmount: Number,
    tokenValue: Number,
+    inputTokens: { type: Number },
+    writeTokens: { type: Number },
+    readTokens: { type: Number },
  },
  {
    timestamps: true,
--- a/api/models/spendTokens.js
+++ b/api/models/spendTokens.js
@ -11,7 +11,7 @@ const { logger } = require('~/config');
 * @param {String} txData.conversationId - The ID of the conversation.
 * @param {String} txData.model - The model name.
 * @param {String} txData.context - The context in which the transaction is made.
- * @param {String} [txData.endpointTokenConfig] - The current endpoint token config.
+ * @param {EndpointTokenConfig} [txData.endpointTokenConfig] - The current endpoint token config.
 * @param {String} [txData.valueKey] - The value key (optional).
 * @param {Object} tokenUsage - The number of tokens used.
 * @param {Number} tokenUsage.promptTokens - The number of prompt tokens used.
@ -66,4 +66,74 @@ const spendTokens = async (txData, tokenUsage) => {
  }
 };

-module.exports = spendTokens;
+/**
+ * Creates transactions to record the spending of structured tokens.
+ *
+ * @function
+ * @async
+ * @param {Object} txData - Transaction data.
+ * @param {mongoose.Schema.Types.ObjectId} txData.user - The user ID.
+ * @param {String} txData.conversationId - The ID of the conversation.
+ * @param {String} txData.model - The model name.
+ * @param {String} txData.context - The context in which the transaction is made.
+ * @param {EndpointTokenConfig} [txData.endpointTokenConfig] - The current endpoint token config.
+ * @param {String} [txData.valueKey] - The value key (optional).
+ * @param {Object} tokenUsage - The number of tokens used.
+ * @param {Object} tokenUsage.promptTokens - The number of prompt tokens used.
+ * @param {Number} tokenUsage.promptTokens.input - The number of input tokens.
+ * @param {Number} tokenUsage.promptTokens.write - The number of write tokens.
+ * @param {Number} tokenUsage.promptTokens.read - The number of read tokens.
+ * @param {Number} tokenUsage.completionTokens - The number of completion tokens used.
+ * @returns {Promise<void>} - Returns nothing.
+ * @throws {Error} - Throws an error if there's an issue creating the transactions.
+ */
+const spendStructuredTokens = async (txData, tokenUsage) => {
+  const { promptTokens, completionTokens } = tokenUsage;
+  logger.debug(
+    `[spendStructuredTokens] conversationId: ${txData.conversationId}${
+      txData?.context ? ` | Context: ${txData?.context}` : ''
+    } | Token usage: `,
+    {
+      promptTokens,
+      completionTokens,
+    },
+  );
+  let prompt, completion;
+  try {
+    if (promptTokens) {
+      const { input = 0, write = 0, read = 0 } = promptTokens;
+      const promptAmount = input + write + read;
+      prompt = await Transaction.createStructured({
+        ...txData,
+        tokenType: 'prompt',
+        rawAmount: -promptAmount,
+        inputTokens: input,
+        writeTokens: write,
+        readTokens: read,
+      });
+    }
+
+    if (completionTokens) {
+      completion = await Transaction.create({
+        ...txData,
+        tokenType: 'completion',
+        rawAmount: -completionTokens,
+      });
+    }
+
+    prompt &&
+      completion &&
+      logger.debug('[spendStructuredTokens] Transaction data record against balance:', {
+        user: txData.user,
+        prompt: prompt.tokenValue,
+        promptRate: prompt.rate,
+        completion: completion.tokenValue,
+        completionRate: completion.rate,
+        balance: completion.balance,
+      });
+  } catch (err) {
+    logger.error('[spendStructuredTokens]', err);
+  }
+};
+
+module.exports = { spendTokens, spendStructuredTokens };
--- a/api/models/tx.js
+++ b/api/models/tx.js
@ -70,6 +70,17 @@ const tokenValues = Object.assign(
  bedrockValues,
 );

+/**
+ * Mapping of model token sizes to their respective multipliers for cached input, read and write.
+ * See Anthropic's documentation on this: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#pricing
+ * The rates are 1 USD per 1M tokens.
+ * @type {Object.<string, {write: number, read: number }>}
+ */
+const cacheTokenValues = {
+  'claude-3-5-sonnet': { write: 3.75, read: 0.3 },
+  'claude-3-haiku': { write: 0.3, read: 0.03 },
+};
+
 /**
 * Retrieves the key associated with a given model name.
 *
@ -122,7 +133,7 @@ const getValueKey = (model, endpoint) => {
 *
 * @param {Object} params - The parameters for the function.
 * @param {string} [params.valueKey] - The key corresponding to the model name.
- * @param {string} [params.tokenType] - The type of token (e.g., 'prompt' or 'completion').
+ * @param {'prompt' | 'completion'} [params.tokenType] - The type of token (e.g., 'prompt' or 'completion').
 * @param {string} [params.model] - The model name to derive the value key from if not provided.
 * @param {string} [params.endpoint] - The endpoint name to derive the value key from if not provided.
 * @param {EndpointTokenConfig} [params.endpointTokenConfig] - The token configuration for the endpoint.
@ -147,7 +158,41 @@ const getMultiplier = ({ valueKey, tokenType, model, endpoint, endpointTokenConf
  }

  // If we got this far, and values[tokenType] is undefined somehow, return a rough average of default multipliers
-  return tokenValues[valueKey][tokenType] ?? defaultRate;
+  return tokenValues[valueKey]?.[tokenType] ?? defaultRate;
 };

-module.exports = { tokenValues, getValueKey, getMultiplier, defaultRate };
+/**
+ * Retrieves the cache multiplier for a given value key and token type. If no value key is provided,
+ * it attempts to derive it from the model name.
+ *
+ * @param {Object} params - The parameters for the function.
+ * @param {string} [params.valueKey] - The key corresponding to the model name.
+ * @param {'write' | 'read'} [params.cacheType] - The type of token (e.g., 'write' or 'read').
+ * @param {string} [params.model] - The model name to derive the value key from if not provided.
+ * @param {string} [params.endpoint] - The endpoint name to derive the value key from if not provided.
+ * @param {EndpointTokenConfig} [params.endpointTokenConfig] - The token configuration for the endpoint.
+ * @returns {number | null} The multiplier for the given parameters, or `null` if not found.
+ */
+const getCacheMultiplier = ({ valueKey, cacheType, model, endpoint, endpointTokenConfig }) => {
+  if (endpointTokenConfig) {
+    return endpointTokenConfig?.[model]?.[cacheType] ?? null;
+  }
+
+  if (valueKey && cacheType) {
+    return cacheTokenValues[valueKey]?.[cacheType] ?? null;
+  }
+
+  if (!cacheType || !model) {
+    return null;
+  }
+
+  valueKey = getValueKey(model, endpoint);
+  if (!valueKey) {
+    return null;
+  }
+
+  // If we got this far, and values[cacheType] is undefined somehow, return a rough average of default multipliers
+  return cacheTokenValues[valueKey]?.[cacheType] ?? null;
+};
+
+module.exports = { tokenValues, getValueKey, getMultiplier, getCacheMultiplier, defaultRate };
--- a/api/models/tx.spec.js
+++ b/api/models/tx.spec.js
@ -1,4 +1,10 @@
-const { getValueKey, getMultiplier, defaultRate, tokenValues } = require('./tx');
+const {
+  defaultRate,
+  tokenValues,
+  getValueKey,
+  getMultiplier,
+  getCacheMultiplier,
+} = require('./tx');

 describe('getValueKey', () => {
  it('should return "16k" for model name containing "gpt-3.5-turbo-16k"', () => {
@ -243,3 +249,76 @@ describe('AWS Bedrock Model Tests', () => {
    expect(results.every(Boolean)).toBe(true);
  });
 });
+
+describe('getCacheMultiplier', () => {
+  it('should return the correct cache multiplier for a given valueKey and cacheType', () => {
+    expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'write' })).toBe(3.75);
+    expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'read' })).toBe(0.3);
+    expect(getCacheMultiplier({ valueKey: 'claude-3-haiku', cacheType: 'write' })).toBe(0.3);
+    expect(getCacheMultiplier({ valueKey: 'claude-3-haiku', cacheType: 'read' })).toBe(0.03);
+  });
+
+  it('should return null if cacheType is provided but not found in cacheTokenValues', () => {
+    expect(
+      getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'unknownType' }),
+    ).toBeNull();
+  });
+
+  it('should derive the valueKey from the model if not provided', () => {
+    expect(getCacheMultiplier({ cacheType: 'write', model: 'claude-3-5-sonnet-20240620' })).toBe(
+      3.75,
+    );
+    expect(getCacheMultiplier({ cacheType: 'read', model: 'claude-3-haiku-20240307' })).toBe(0.03);
+  });
+
+  it('should return null if only model or cacheType is missing', () => {
+    expect(getCacheMultiplier({ cacheType: 'write' })).toBeNull();
+    expect(getCacheMultiplier({ model: 'claude-3-5-sonnet' })).toBeNull();
+  });
+
+  it('should return null if derived valueKey does not match any known patterns', () => {
+    expect(getCacheMultiplier({ cacheType: 'write', model: 'gpt-4-some-other-info' })).toBeNull();
+  });
+
+  it('should handle endpointTokenConfig if provided', () => {
+    const endpointTokenConfig = {
+      'custom-model': {
+        write: 5,
+        read: 1,
+      },
+    };
+    expect(
+      getCacheMultiplier({ model: 'custom-model', cacheType: 'write', endpointTokenConfig }),
+    ).toBe(5);
+    expect(
+      getCacheMultiplier({ model: 'custom-model', cacheType: 'read', endpointTokenConfig }),
+    ).toBe(1);
+  });
+
+  it('should return null if model is not found in endpointTokenConfig', () => {
+    const endpointTokenConfig = {
+      'custom-model': {
+        write: 5,
+        read: 1,
+      },
+    };
+    expect(
+      getCacheMultiplier({ model: 'unknown-model', cacheType: 'write', endpointTokenConfig }),
+    ).toBeNull();
+  });
+
+  it('should handle models with "bedrock/" prefix', () => {
+    expect(
+      getCacheMultiplier({
+        model: 'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0',
+        cacheType: 'write',
+      }),
+    ).toBe(3.75);
+    expect(
+      getCacheMultiplier({
+        model: 'bedrock/anthropic.claude-3-haiku-20240307-v1:0',
+        cacheType: 'read',
+      }),
+    ).toBe(0.03);
+  });
+});