💾 feat: Anthropic Prompt Caching (#3670)

* wip: initial cache control implementation, add typing for transactions handling * feat: first pass of Anthropic Prompt Caching * feat: standardize stream usage as pass in when calculating token counts * feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes * chore: imports order * refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount * feat: more accurate token counting using stream usage data * feat: Improve token counting accuracy with stream usage data * refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request * refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts * ci: fix tests
2025-12-16 16:30:15 +01:00 · 2024-08-17 03:24:09 -04:00 · 2024-08-17 03:24:09 -04:00 · a45b384bbc
commit a45b384bbc
parent 9f4c516615
17 changed files with 973 additions and 34 deletions
--- a/api/typedefs.js
+++ b/api/typedefs.js
@ -26,6 +26,24 @@
 * @memberof typedefs
 */

+/**
+ * @exports AnthropicMessage
+ * @typedef {import('@anthropic-ai/sdk').default.MessageParam} AnthropicMessage
+ * @memberof typedefs
+ */
+
+/**
+ * @exports AnthropicMessageStartEvent
+ * @typedef {import('@anthropic-ai/sdk').default.MessageStartEvent} AnthropicMessageStartEvent
+ * @memberof typedefs
+ */
+
+/**
+ * @exports AnthropicMessageDeltaEvent
+ * @typedef {import('@anthropic-ai/sdk').default.MessageDeltaEvent} AnthropicMessageDeltaEvent
+ * @memberof typedefs
+ */
+
 /**
 * @exports GenerativeModel
 * @typedef {import('@google/generative-ai').GenerativeModel} GenerativeModel
@ -1311,6 +1329,33 @@
 * @method messageCompleted Handles the completion of a message processing.
 */

+/* TX Types */
+
+/**
+ * @typedef {object} txData - Transaction data.
+ * @property {mongoose.Schema.Types.ObjectId} user - The user ID.
+ * @property {String} conversationId - The ID of the conversation.
+ * @property {String} model - The model name.
+ * @property {String} context - The context in which the transaction is made.
+ * @property {EndpointTokenConfig} [endpointTokenConfig] - The current endpoint token config.
+ * @property {object} [cacheUsage] - Cache usage, if any.
+ * @property {String} [valueKey] - The value key (optional).
+ * @memberof typedefs
+ */
+
+/**
+ * https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#pricing
+ * @typedef {object} AnthropicStreamUsage - Stream usage for Anthropic
+ * @property {number} [input_tokens] - The number of input tokens used.
+ * @property {number} [cache_creation_input_tokens] - The number of cache creation input tokens used (write).
+ * @property {number} [cache_read_input_tokens] - The number of cache input tokens used (read).
+ * @property {number} [output_tokens] - The number of output tokens used.
+ */
+
+/**
+ * @typedef {AnthropicStreamUsage} StreamUsage - Stream usage for all providers (currently only Anthropic)
+ */
+
 /* Native app/client methods */

 /**