mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-06 10:38:50 +01:00
💾 feat: Anthropic Prompt Caching (#3670)
* wip: initial cache control implementation, add typing for transactions handling * feat: first pass of Anthropic Prompt Caching * feat: standardize stream usage as pass in when calculating token counts * feat: Add getCacheMultiplier function to calculate cache multiplier for different valueKeys and cacheTypes * chore: imports order * refactor: token usage recording in AnthropicClient, no need to "correct" as we have the correct amount * feat: more accurate token counting using stream usage data * feat: Improve token counting accuracy with stream usage data * refactor: ensure more accurate than not token estimations if custom instructions or files are not being resent with every request * refactor: cleanup updateUserMessageTokenCount to allow transactions to be as accurate as possible even if we shouldn't update user message token counts * ci: fix tests
This commit is contained in:
parent
9f4c516615
commit
a45b384bbc
17 changed files with 973 additions and 34 deletions
|
|
@ -12,12 +12,13 @@ const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
|||
const {
|
||||
truncateText,
|
||||
formatMessage,
|
||||
addCacheControl,
|
||||
titleFunctionPrompt,
|
||||
parseParamFromPrompt,
|
||||
createContextHandlers,
|
||||
} = require('./prompts');
|
||||
const spendTokens = require('~/models/spendTokens');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
|
||||
const { getModelMaxTokens, matchModelName } = require('~/utils');
|
||||
const { sleep } = require('~/server/utils');
|
||||
const BaseClient = require('./BaseClient');
|
||||
const { logger } = require('~/config');
|
||||
|
|
@ -32,6 +33,7 @@ function delayBeforeRetry(attempts, baseDelay = 1000) {
|
|||
return new Promise((resolve) => setTimeout(resolve, baseDelay * attempts));
|
||||
}
|
||||
|
||||
const tokenEventTypes = new Set(['message_start', 'message_delta']);
|
||||
const { legacy } = anthropicSettings;
|
||||
|
||||
class AnthropicClient extends BaseClient {
|
||||
|
|
@ -44,6 +46,24 @@ class AnthropicClient extends BaseClient {
|
|||
? options.contextStrategy.toLowerCase()
|
||||
: 'discard';
|
||||
this.setOptions(options);
|
||||
/** @type {string | undefined} */
|
||||
this.systemMessage;
|
||||
/** @type {AnthropicMessageStartEvent| undefined} */
|
||||
this.message_start;
|
||||
/** @type {AnthropicMessageDeltaEvent| undefined} */
|
||||
this.message_delta;
|
||||
/** Whether the model is part of the Claude 3 Family
|
||||
* @type {boolean} */
|
||||
this.isClaude3;
|
||||
/** Whether to use Messages API or Completions API
|
||||
* @type {boolean} */
|
||||
this.useMessages;
|
||||
/** Whether or not the model is limited to the legacy amount of output tokens
|
||||
* @type {boolean} */
|
||||
this.isLegacyOutput;
|
||||
/** Whether or not the model supports Prompt Caching
|
||||
* @type {boolean} */
|
||||
this.supportsCacheControl;
|
||||
}
|
||||
|
||||
setOptions(options) {
|
||||
|
|
@ -69,8 +89,10 @@ class AnthropicClient extends BaseClient {
|
|||
model: modelOptions.model || anthropicSettings.model.default,
|
||||
};
|
||||
|
||||
this.isClaude3 = this.modelOptions.model.includes('claude-3');
|
||||
this.isLegacyOutput = !this.modelOptions.model.includes('claude-3-5-sonnet');
|
||||
const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic);
|
||||
this.isClaude3 = modelMatch.startsWith('claude-3');
|
||||
this.isLegacyOutput = !modelMatch.startsWith('claude-3-5-sonnet');
|
||||
this.supportsCacheControl = this.checkPromptCacheSupport(modelMatch);
|
||||
|
||||
if (
|
||||
this.isLegacyOutput &&
|
||||
|
|
@ -147,19 +169,74 @@ class AnthropicClient extends BaseClient {
|
|||
options.baseURL = this.options.reverseProxyUrl;
|
||||
}
|
||||
|
||||
if (requestOptions?.model && requestOptions.model.includes('claude-3-5-sonnet')) {
|
||||
if (
|
||||
this.supportsCacheControl &&
|
||||
requestOptions?.model &&
|
||||
requestOptions.model.includes('claude-3-5-sonnet')
|
||||
) {
|
||||
options.defaultHeaders = {
|
||||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
|
||||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
|
||||
};
|
||||
} else if (this.supportsCacheControl) {
|
||||
options.defaultHeaders = {
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31',
|
||||
};
|
||||
}
|
||||
|
||||
return new Anthropic(options);
|
||||
}
|
||||
|
||||
getTokenCountForResponse(response) {
|
||||
/**
|
||||
* Get stream usage as returned by this client's API response.
|
||||
* @returns {AnthropicStreamUsage} The stream usage object.
|
||||
*/
|
||||
getStreamUsage() {
|
||||
const inputUsage = this.message_start?.message?.usage ?? {};
|
||||
const outputUsage = this.message_delta?.usage ?? {};
|
||||
return Object.assign({}, inputUsage, outputUsage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the correct token count for the current message based on the token count map and API usage.
|
||||
* Edge case: If the calculation results in a negative value, it returns the original estimate.
|
||||
* If revisiting a conversation with a chat history entirely composed of token estimates,
|
||||
* the cumulative token count going forward should become more accurate as the conversation progresses.
|
||||
* @param {Object} params - The parameters for the calculation.
|
||||
* @param {Record<string, number>} params.tokenCountMap - A map of message IDs to their token counts.
|
||||
* @param {string} params.currentMessageId - The ID of the current message to calculate.
|
||||
* @param {AnthropicStreamUsage} params.usage - The usage object returned by the API.
|
||||
* @returns {number} The correct token count for the current message.
|
||||
*/
|
||||
calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) {
|
||||
const originalEstimate = tokenCountMap[currentMessageId] || 0;
|
||||
|
||||
if (!usage || typeof usage.input_tokens !== 'number') {
|
||||
return originalEstimate;
|
||||
}
|
||||
|
||||
tokenCountMap[currentMessageId] = 0;
|
||||
const totalTokensFromMap = Object.values(tokenCountMap).reduce((sum, count) => {
|
||||
const numCount = Number(count);
|
||||
return sum + (isNaN(numCount) ? 0 : numCount);
|
||||
}, 0);
|
||||
const totalInputTokens =
|
||||
(usage.input_tokens ?? 0) +
|
||||
(usage.cache_creation_input_tokens ?? 0) +
|
||||
(usage.cache_read_input_tokens ?? 0);
|
||||
|
||||
const currentMessageTokens = totalInputTokens - totalTokensFromMap;
|
||||
return currentMessageTokens > 0 ? currentMessageTokens : originalEstimate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Token Count for LibreChat Message
|
||||
* @param {TMessage} responseMessage
|
||||
* @returns {number}
|
||||
*/
|
||||
getTokenCountForResponse(responseMessage) {
|
||||
return this.getTokenCountForMessage({
|
||||
role: 'assistant',
|
||||
content: response.text,
|
||||
content: responseMessage.text,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -212,7 +289,38 @@ class AnthropicClient extends BaseClient {
|
|||
return files;
|
||||
}
|
||||
|
||||
async recordTokenUsage({ promptTokens, completionTokens, model, context = 'message' }) {
|
||||
/**
|
||||
* @param {object} params
|
||||
* @param {number} params.promptTokens
|
||||
* @param {number} params.completionTokens
|
||||
* @param {AnthropicStreamUsage} [params.usage]
|
||||
* @param {string} [params.model]
|
||||
* @param {string} [params.context='message']
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async recordTokenUsage({ promptTokens, completionTokens, usage, model, context = 'message' }) {
|
||||
if (usage != null && usage?.input_tokens != null) {
|
||||
const input = usage.input_tokens ?? 0;
|
||||
const write = usage.cache_creation_input_tokens ?? 0;
|
||||
const read = usage.cache_read_input_tokens ?? 0;
|
||||
|
||||
await spendStructuredTokens(
|
||||
{
|
||||
context,
|
||||
user: this.user,
|
||||
conversationId: this.conversationId,
|
||||
model: model ?? this.modelOptions.model,
|
||||
endpointTokenConfig: this.options.endpointTokenConfig,
|
||||
},
|
||||
{
|
||||
promptTokens: { input, write, read },
|
||||
completionTokens,
|
||||
},
|
||||
);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
await spendTokens(
|
||||
{
|
||||
context,
|
||||
|
|
@ -560,6 +668,18 @@ class AnthropicClient extends BaseClient {
|
|||
: await client.completions.create(options);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} modelName
|
||||
* @returns {boolean}
|
||||
*/
|
||||
checkPromptCacheSupport(modelName) {
|
||||
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
|
||||
if (modelMatch === 'claude-3-5-sonnet' || modelMatch === 'claude-3-haiku') {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async sendCompletion(payload, { onProgress, abortController }) {
|
||||
if (!abortController) {
|
||||
abortController = new AbortController();
|
||||
|
|
@ -606,10 +726,22 @@ class AnthropicClient extends BaseClient {
|
|||
requestOptions.max_tokens_to_sample = maxOutputTokens || 1500;
|
||||
}
|
||||
|
||||
if (this.systemMessage) {
|
||||
if (this.systemMessage && this.supportsCacheControl === true) {
|
||||
requestOptions.system = [
|
||||
{
|
||||
type: 'text',
|
||||
text: this.systemMessage,
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
];
|
||||
} else if (this.systemMessage) {
|
||||
requestOptions.system = this.systemMessage;
|
||||
}
|
||||
|
||||
if (this.supportsCacheControl === true && this.useMessages) {
|
||||
requestOptions.messages = addCacheControl(requestOptions.messages);
|
||||
}
|
||||
|
||||
logger.debug('[AnthropicClient]', { ...requestOptions });
|
||||
|
||||
const handleChunk = (currentChunk) => {
|
||||
|
|
@ -639,6 +771,11 @@ class AnthropicClient extends BaseClient {
|
|||
|
||||
for await (const completion of response) {
|
||||
// Handle each completion as before
|
||||
const type = completion?.type ?? '';
|
||||
if (tokenEventTypes.has(type)) {
|
||||
logger.debug(`[AnthropicClient] ${type}`, completion);
|
||||
this[type] = completion;
|
||||
}
|
||||
if (completion?.delta?.text) {
|
||||
handleChunk(completion.delta.text);
|
||||
} else if (completion.completion) {
|
||||
|
|
@ -727,6 +864,8 @@ class AnthropicClient extends BaseClient {
|
|||
*/
|
||||
async titleConvo({ text, responseText = '' }) {
|
||||
let title = 'New Chat';
|
||||
this.message_delta = undefined;
|
||||
this.message_start = undefined;
|
||||
const convo = `<initial_message>
|
||||
${truncateText(text)}
|
||||
</initial_message>
|
||||
|
|
|
|||
|
|
@ -54,10 +54,22 @@ class BaseClient {
|
|||
throw new Error('Subclasses attempted to call summarizeMessages without implementing it');
|
||||
}
|
||||
|
||||
async getTokenCountForResponse(response) {
|
||||
logger.debug('`[BaseClient] recordTokenUsage` not implemented.', response);
|
||||
/**
|
||||
* Abstract method to get the token count for a message. Subclasses must implement this method.
|
||||
* @param {TMessage} responseMessage
|
||||
* @returns {number}
|
||||
*/
|
||||
getTokenCountForResponse(responseMessage) {
|
||||
logger.debug('`[BaseClient] recordTokenUsage` not implemented.', responseMessage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract method to record token usage. Subclasses must implement this method.
|
||||
* If a correction to the token usage is needed, the method should return an object with the corrected token counts.
|
||||
* @param {number} promptTokens
|
||||
* @param {number} completionTokens
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async recordTokenUsage({ promptTokens, completionTokens }) {
|
||||
logger.debug('`[BaseClient] recordTokenUsage` not implemented.', {
|
||||
promptTokens,
|
||||
|
|
@ -536,13 +548,31 @@ class BaseClient {
|
|||
this.getTokenCountForResponse &&
|
||||
this.getTokenCount
|
||||
) {
|
||||
responseMessage.tokenCount = this.getTokenCountForResponse(responseMessage);
|
||||
const completionTokens = this.getTokenCount(completion);
|
||||
await this.recordTokenUsage({ promptTokens, completionTokens });
|
||||
let completionTokens;
|
||||
|
||||
/**
|
||||
* Metadata about input/output costs for the current message. The client
|
||||
* should provide a function to get the current stream usage metadata; if not,
|
||||
* use the legacy token estimations.
|
||||
* @type {StreamUsage | null} */
|
||||
const usage = this.getStreamUsage != null ? this.getStreamUsage() : null;
|
||||
|
||||
if (usage != null && Number(usage.output_tokens) > 0) {
|
||||
responseMessage.tokenCount = usage.output_tokens;
|
||||
completionTokens = responseMessage.tokenCount;
|
||||
await this.updateUserMessageTokenCount({ usage, tokenCountMap, userMessage, opts });
|
||||
} else {
|
||||
responseMessage.tokenCount = this.getTokenCountForResponse(responseMessage);
|
||||
completionTokens = this.getTokenCount(completion);
|
||||
}
|
||||
|
||||
await this.recordTokenUsage({ promptTokens, completionTokens, usage });
|
||||
}
|
||||
|
||||
if (this.userMessagePromise) {
|
||||
await this.userMessagePromise;
|
||||
}
|
||||
|
||||
this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
|
||||
const messageCache = getLogStores(CacheKeys.MESSAGES);
|
||||
messageCache.set(
|
||||
|
|
@ -557,6 +587,66 @@ class BaseClient {
|
|||
return responseMessage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream usage should only be used for user message token count re-calculation if:
|
||||
* - The stream usage is available, with input tokens greater than 0,
|
||||
* - the client provides a function to calculate the current token count,
|
||||
* - files are being resent with every message (default behavior; or if `false`, with no attachments),
|
||||
* - the `promptPrefix` (custom instructions) is not set.
|
||||
*
|
||||
* In these cases, the legacy token estimations would be more accurate.
|
||||
*
|
||||
* TODO: included system messages in the `orderedMessages` accounting, potentially as a
|
||||
* separate message in the UI. ChatGPT does this through "hidden" system messages.
|
||||
* @param {object} params
|
||||
* @param {StreamUsage} params.usage
|
||||
* @param {Record<string, number>} params.tokenCountMap
|
||||
* @param {TMessage} params.userMessage
|
||||
* @param {object} params.opts
|
||||
*/
|
||||
async updateUserMessageTokenCount({ usage, tokenCountMap, userMessage, opts }) {
|
||||
/** @type {boolean} */
|
||||
const shouldUpdateCount =
|
||||
this.calculateCurrentTokenCount != null &&
|
||||
Number(usage.input_tokens) > 0 &&
|
||||
(this.options.resendFiles ||
|
||||
(!this.options.resendFiles && !this.options.attachments?.length)) &&
|
||||
!this.options.promptPrefix;
|
||||
|
||||
if (!shouldUpdateCount) {
|
||||
return;
|
||||
}
|
||||
|
||||
const userMessageTokenCount = this.calculateCurrentTokenCount({
|
||||
currentMessageId: userMessage.messageId,
|
||||
tokenCountMap,
|
||||
usage,
|
||||
});
|
||||
|
||||
if (userMessageTokenCount === userMessage.tokenCount) {
|
||||
return;
|
||||
}
|
||||
|
||||
userMessage.tokenCount = userMessageTokenCount;
|
||||
/*
|
||||
Note: `AskController` saves the user message, so we update the count of its `userMessage` reference
|
||||
*/
|
||||
if (typeof opts?.getReqData === 'function') {
|
||||
opts.getReqData({
|
||||
userMessage,
|
||||
});
|
||||
}
|
||||
/*
|
||||
Note: we update the user message to be sure it gets the calculated token count;
|
||||
though `AskController` saves the user message, EditController does not
|
||||
*/
|
||||
await this.userMessagePromise;
|
||||
await this.updateMessageInDatabase({
|
||||
messageId: userMessage.messageId,
|
||||
tokenCount: userMessageTokenCount,
|
||||
});
|
||||
}
|
||||
|
||||
async loadHistory(conversationId, parentMessageId = null) {
|
||||
logger.debug('[BaseClient] Loading history:', { conversationId, parentMessageId });
|
||||
|
||||
|
|
@ -644,6 +734,10 @@ class BaseClient {
|
|||
return { message: savedMessage, conversation };
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a message in the database.
|
||||
* @param {Partial<TMessage>} message
|
||||
*/
|
||||
async updateMessageInDatabase(message) {
|
||||
await updateMessage(this.options.req, message);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,9 +27,9 @@ const {
|
|||
createContextHandlers,
|
||||
} = require('./prompts');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
||||
const { spendTokens } = require('~/models/spendTokens');
|
||||
const { isEnabled, sleep } = require('~/server/utils');
|
||||
const { handleOpenAIErrors } = require('./tools/util');
|
||||
const spendTokens = require('~/models/spendTokens');
|
||||
const { createLLM, RunManager } = require('./llm');
|
||||
const ChatGPTClient = require('./ChatGPTClient');
|
||||
const { summaryBuffer } = require('./memory');
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
const { createStartHandler } = require('~/app/clients/callbacks');
|
||||
const spendTokens = require('~/models/spendTokens');
|
||||
const { spendTokens } = require('~/models/spendTokens');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
class RunManager {
|
||||
|
|
|
|||
43
api/app/clients/prompts/addCacheControl.js
Normal file
43
api/app/clients/prompts/addCacheControl.js
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Anthropic API: Adds cache control to the appropriate user messages in the payload.
|
||||
* @param {Array<AnthropicMessage>} messages - The array of message objects.
|
||||
* @returns {Array<AnthropicMessage>} - The updated array of message objects with cache control added.
|
||||
*/
|
||||
function addCacheControl(messages) {
|
||||
if (!Array.isArray(messages) || messages.length < 2) {
|
||||
return messages;
|
||||
}
|
||||
|
||||
const updatedMessages = [...messages];
|
||||
let userMessagesFound = 0;
|
||||
|
||||
for (let i = updatedMessages.length - 1; i >= 0 && userMessagesFound < 2; i--) {
|
||||
if (updatedMessages[i].role === 'user') {
|
||||
if (typeof updatedMessages[i].content === 'string') {
|
||||
updatedMessages[i] = {
|
||||
...updatedMessages[i],
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: updatedMessages[i].content,
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
],
|
||||
};
|
||||
} else if (Array.isArray(updatedMessages[i].content)) {
|
||||
updatedMessages[i] = {
|
||||
...updatedMessages[i],
|
||||
content: updatedMessages[i].content.map((item) => ({
|
||||
...item,
|
||||
cache_control: { type: 'ephemeral' },
|
||||
})),
|
||||
};
|
||||
}
|
||||
userMessagesFound++;
|
||||
}
|
||||
}
|
||||
|
||||
return updatedMessages;
|
||||
}
|
||||
|
||||
module.exports = addCacheControl;
|
||||
164
api/app/clients/prompts/addCacheControl.spec.js
Normal file
164
api/app/clients/prompts/addCacheControl.spec.js
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
const addCacheControl = require('./addCacheControl');
|
||||
|
||||
describe('addCacheControl', () => {
|
||||
test('should add cache control to the last two user messages with array content', () => {
|
||||
const messages = [
|
||||
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
|
||||
{ role: 'assistant', content: [{ type: 'text', text: 'Hi there' }] },
|
||||
{ role: 'user', content: [{ type: 'text', text: 'How are you?' }] },
|
||||
{ role: 'assistant', content: [{ type: 'text', text: 'I\'m doing well, thanks!' }] },
|
||||
{ role: 'user', content: [{ type: 'text', text: 'Great!' }] },
|
||||
];
|
||||
|
||||
const result = addCacheControl(messages);
|
||||
|
||||
expect(result[0].content[0]).not.toHaveProperty('cache_control');
|
||||
expect(result[2].content[0].cache_control).toEqual({ type: 'ephemeral' });
|
||||
expect(result[4].content[0].cache_control).toEqual({ type: 'ephemeral' });
|
||||
});
|
||||
|
||||
test('should add cache control to the last two user messages with string content', () => {
|
||||
const messages = [
|
||||
{ role: 'user', content: 'Hello' },
|
||||
{ role: 'assistant', content: 'Hi there' },
|
||||
{ role: 'user', content: 'How are you?' },
|
||||
{ role: 'assistant', content: 'I\'m doing well, thanks!' },
|
||||
{ role: 'user', content: 'Great!' },
|
||||
];
|
||||
|
||||
const result = addCacheControl(messages);
|
||||
|
||||
expect(result[0].content).toBe('Hello');
|
||||
expect(result[2].content[0]).toEqual({
|
||||
type: 'text',
|
||||
text: 'How are you?',
|
||||
cache_control: { type: 'ephemeral' },
|
||||
});
|
||||
expect(result[4].content[0]).toEqual({
|
||||
type: 'text',
|
||||
text: 'Great!',
|
||||
cache_control: { type: 'ephemeral' },
|
||||
});
|
||||
});
|
||||
|
||||
test('should handle mixed string and array content', () => {
|
||||
const messages = [
|
||||
{ role: 'user', content: 'Hello' },
|
||||
{ role: 'assistant', content: 'Hi there' },
|
||||
{ role: 'user', content: [{ type: 'text', text: 'How are you?' }] },
|
||||
];
|
||||
|
||||
const result = addCacheControl(messages);
|
||||
|
||||
expect(result[0].content[0]).toEqual({
|
||||
type: 'text',
|
||||
text: 'Hello',
|
||||
cache_control: { type: 'ephemeral' },
|
||||
});
|
||||
expect(result[2].content[0].cache_control).toEqual({ type: 'ephemeral' });
|
||||
});
|
||||
|
||||
test('should handle less than two user messages', () => {
|
||||
const messages = [
|
||||
{ role: 'user', content: 'Hello' },
|
||||
{ role: 'assistant', content: 'Hi there' },
|
||||
];
|
||||
|
||||
const result = addCacheControl(messages);
|
||||
|
||||
expect(result[0].content[0]).toEqual({
|
||||
type: 'text',
|
||||
text: 'Hello',
|
||||
cache_control: { type: 'ephemeral' },
|
||||
});
|
||||
expect(result[1].content).toBe('Hi there');
|
||||
});
|
||||
|
||||
test('should return original array if no user messages', () => {
|
||||
const messages = [
|
||||
{ role: 'assistant', content: 'Hi there' },
|
||||
{ role: 'assistant', content: 'How can I help?' },
|
||||
];
|
||||
|
||||
const result = addCacheControl(messages);
|
||||
|
||||
expect(result).toEqual(messages);
|
||||
});
|
||||
|
||||
test('should handle empty array', () => {
|
||||
const messages = [];
|
||||
const result = addCacheControl(messages);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
test('should handle non-array input', () => {
|
||||
const messages = 'not an array';
|
||||
const result = addCacheControl(messages);
|
||||
expect(result).toBe('not an array');
|
||||
});
|
||||
|
||||
test('should not modify assistant messages', () => {
|
||||
const messages = [
|
||||
{ role: 'user', content: 'Hello' },
|
||||
{ role: 'assistant', content: 'Hi there' },
|
||||
{ role: 'user', content: 'How are you?' },
|
||||
];
|
||||
|
||||
const result = addCacheControl(messages);
|
||||
|
||||
expect(result[1].content).toBe('Hi there');
|
||||
});
|
||||
|
||||
test('should handle multiple content items in user messages', () => {
|
||||
const messages = [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: 'Hello' },
|
||||
{ type: 'image', url: 'http://example.com/image.jpg' },
|
||||
],
|
||||
},
|
||||
{ role: 'assistant', content: 'Hi there' },
|
||||
{ role: 'user', content: 'How are you?' },
|
||||
];
|
||||
|
||||
const result = addCacheControl(messages);
|
||||
|
||||
expect(result[0].content[0].cache_control).toEqual({ type: 'ephemeral' });
|
||||
expect(result[0].content[1].cache_control).toEqual({ type: 'ephemeral' });
|
||||
expect(result[2].content[0]).toEqual({
|
||||
type: 'text',
|
||||
text: 'How are you?',
|
||||
cache_control: { type: 'ephemeral' },
|
||||
});
|
||||
});
|
||||
|
||||
test('should handle an array with mixed content types', () => {
|
||||
const messages = [
|
||||
{ role: 'user', content: 'Hello' },
|
||||
{ role: 'assistant', content: 'Hi there' },
|
||||
{ role: 'user', content: [{ type: 'text', text: 'How are you?' }] },
|
||||
{ role: 'assistant', content: 'I\'m doing well, thanks!' },
|
||||
{ role: 'user', content: 'Great!' },
|
||||
];
|
||||
|
||||
const result = addCacheControl(messages);
|
||||
console.dir(result, { depth: null });
|
||||
|
||||
expect(result[0].content).toEqual('Hello');
|
||||
expect(result[2].content[0]).toEqual({
|
||||
type: 'text',
|
||||
text: 'How are you?',
|
||||
cache_control: { type: 'ephemeral' },
|
||||
});
|
||||
expect(result[4].content).toEqual([
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Great!',
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
]);
|
||||
expect(result[1].content).toBe('Hi there');
|
||||
expect(result[3].content).toBe('I\'m doing well, thanks!');
|
||||
});
|
||||
});
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
const addCacheControl = require('./addCacheControl');
|
||||
const formatMessages = require('./formatMessages');
|
||||
const summaryPrompts = require('./summaryPrompts');
|
||||
const handleInputs = require('./handleInputs');
|
||||
|
|
@ -8,6 +9,7 @@ const createVisionPrompt = require('./createVisionPrompt');
|
|||
const createContextHandlers = require('./createContextHandlers');
|
||||
|
||||
module.exports = {
|
||||
addCacheControl,
|
||||
...formatMessages,
|
||||
...summaryPrompts,
|
||||
...handleInputs,
|
||||
|
|
|
|||
|
|
@ -211,7 +211,21 @@ describe('AnthropicClient', () => {
|
|||
expect(anthropicClient._options.defaultHeaders).toBeDefined();
|
||||
expect(anthropicClient._options.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
expect(anthropicClient._options.defaultHeaders['anthropic-beta']).toBe(
|
||||
'max-tokens-3-5-sonnet-2024-07-15',
|
||||
'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
|
||||
);
|
||||
});
|
||||
|
||||
it('should add beta header for claude-3-haiku model', () => {
|
||||
const client = new AnthropicClient('test-api-key');
|
||||
const modelOptions = {
|
||||
model: 'claude-3-haiku-2028',
|
||||
};
|
||||
client.setOptions({ modelOptions });
|
||||
const anthropicClient = client.getClient(modelOptions);
|
||||
expect(anthropicClient._options.defaultHeaders).toBeDefined();
|
||||
expect(anthropicClient._options.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
expect(anthropicClient._options.defaultHeaders['anthropic-beta']).toBe(
|
||||
'prompt-caching-2024-07-31',
|
||||
);
|
||||
});
|
||||
|
||||
|
|
@ -226,4 +240,145 @@ describe('AnthropicClient', () => {
|
|||
expect(anthropicClient.defaultHeaders).not.toHaveProperty('anthropic-beta');
|
||||
});
|
||||
});
|
||||
|
||||
describe('calculateCurrentTokenCount', () => {
|
||||
let client;
|
||||
|
||||
beforeEach(() => {
|
||||
client = new AnthropicClient('test-api-key');
|
||||
});
|
||||
|
||||
it('should calculate correct token count when usage is provided', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 10,
|
||||
msg2: 20,
|
||||
currentMsg: 30,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
input_tokens: 70,
|
||||
output_tokens: 50,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(40); // 70 - (10 + 20) = 40
|
||||
});
|
||||
|
||||
it('should return original estimate if calculation results in negative value', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 40,
|
||||
msg2: 50,
|
||||
currentMsg: 30,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
input_tokens: 80,
|
||||
output_tokens: 50,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(30); // Original estimate
|
||||
});
|
||||
|
||||
it('should handle cache creation and read input tokens', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 10,
|
||||
msg2: 20,
|
||||
currentMsg: 30,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
input_tokens: 50,
|
||||
cache_creation_input_tokens: 10,
|
||||
cache_read_input_tokens: 20,
|
||||
output_tokens: 40,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(50); // (50 + 10 + 20) - (10 + 20) = 50
|
||||
});
|
||||
|
||||
it('should handle missing usage properties', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 10,
|
||||
msg2: 20,
|
||||
currentMsg: 30,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
output_tokens: 40,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(30); // Original estimate
|
||||
});
|
||||
|
||||
it('should handle empty tokenCountMap', () => {
|
||||
const tokenCountMap = {};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
input_tokens: 50,
|
||||
output_tokens: 40,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(50);
|
||||
expect(Number.isNaN(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('should handle zero values in usage', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 10,
|
||||
currentMsg: 20,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
input_tokens: 0,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(20); // Should return original estimate
|
||||
expect(Number.isNaN(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('should handle undefined usage', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 10,
|
||||
currentMsg: 20,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = undefined;
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(20); // Should return original estimate
|
||||
expect(Number.isNaN(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('should handle non-numeric values in tokenCountMap', () => {
|
||||
const tokenCountMap = {
|
||||
msg1: 'ten',
|
||||
currentMsg: 20,
|
||||
};
|
||||
const currentMessageId = 'currentMsg';
|
||||
const usage = {
|
||||
input_tokens: 30,
|
||||
output_tokens: 10,
|
||||
};
|
||||
|
||||
const result = client.calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage });
|
||||
|
||||
expect(result).toBe(30); // Should return 30 (input_tokens) - 0 (ignored 'ten') = 30
|
||||
expect(Number.isNaN(result)).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue