💾 feat: Anthropic Prompt Caching (#3670)

* wip: initial cache control implementation, add typing for transactions handling

* feat: first pass of Anthropic Prompt Caching

* feat: standardize stream usage as pass in when calculating token counts

* feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes

* chore: imports order

* refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount

* feat: more accurate token counting using stream usage data

* feat: Improve token counting accuracy with stream usage data

* refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request

* refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts

* ci: fix tests
This commit is contained in:
Danny Avila 2024-08-17 03:24:09 -04:00 committed by GitHub
parent 9f4c516615
commit a45b384bbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 973 additions and 34 deletions

View file

@ -26,6 +26,24 @@
* @memberof typedefs
*/
/**
* @exports AnthropicMessage
* @typedef {import('@anthropic-ai/sdk').default.MessageParam} AnthropicMessage
* @memberof typedefs
*/
/**
* @exports AnthropicMessageStartEvent
* @typedef {import('@anthropic-ai/sdk').default.MessageStartEvent} AnthropicMessageStartEvent
* @memberof typedefs
*/
/**
* @exports AnthropicMessageDeltaEvent
* @typedef {import('@anthropic-ai/sdk').default.MessageDeltaEvent} AnthropicMessageDeltaEvent
* @memberof typedefs
*/
/**
* @exports GenerativeModel
* @typedef {import('@google/generative-ai').GenerativeModel} GenerativeModel
@ -1311,6 +1329,33 @@
* @method messageCompleted Handles the completion of a message processing.
*/
/* TX Types */
/**
* @typedef {object} txData - Transaction data.
* @property {mongoose.Schema.Types.ObjectId} user - The user ID.
* @property {String} conversationId - The ID of the conversation.
* @property {String} model - The model name.
* @property {String} context - The context in which the transaction is made.
* @property {EndpointTokenConfig} [endpointTokenConfig] - The current endpoint token config.
* @property {object} [cacheUsage] - Cache usage, if any.
* @property {String} [valueKey] - The value key (optional).
* @memberof typedefs
*/
/**
* https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#pricing
* @typedef {object} AnthropicStreamUsage - Stream usage for Anthropic
* @property {number} [input_tokens] - The number of input tokens used.
* @property {number} [cache_creation_input_tokens] - The number of cache creation input tokens used (write).
* @property {number} [cache_read_input_tokens] - The number of cache input tokens used (read).
* @property {number} [output_tokens] - The number of output tokens used.
*/
/**
* @typedef {AnthropicStreamUsage} StreamUsage - Stream usage for all providers (currently only Anthropic)
*/
/* Native app/client methods */
/**