feat: Anthropic Agents Prompt Caching & UI Accessibility Enhancements (#6045)

* chore: remove auto-focus for now

* refactor: move react-hook-form Controller Logic to AgentSelect from AgentPanel

* fix: a11y focus issue with AgentSelect by never replacing it in its component tree

* fix: maintain ComboBox focus and force re-render on agent ID change in AgentPanel

* chore: `gemini-2.0-flash-lite-preview-02-05` (deprecated)

* refactor: extract cache control logic and headers configuration to helper functions in AnthropicClient

* feat: anthropic agents prompt caching

* chore: bump @librechat/agents and related dependencies

* fix: typo
This commit is contained in:
Danny Avila 2025-02-25 22:14:58 -05:00 committed by GitHub
parent d3d7d11ea8
commit e14df5956a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 4460 additions and 1477 deletions

View file

@ -17,6 +17,11 @@ const {
parseParamFromPrompt,
createContextHandlers,
} = require('./prompts');
const {
getClaudeHeaders,
configureReasoning,
checkPromptCacheSupport,
} = require('~/server/services/Endpoints/anthropic/helpers');
const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
const Tokenizer = require('~/server/services/Tokenizer');
@ -101,8 +106,7 @@ class AnthropicClient extends BaseClient {
const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic);
this.isClaude3 = modelMatch.includes('claude-3');
this.isLegacyOutput = !modelMatch.includes('claude-3-5-sonnet');
this.supportsCacheControl =
this.options.promptCache && this.checkPromptCacheSupport(modelMatch);
this.supportsCacheControl = this.options.promptCache && checkPromptCacheSupport(modelMatch);
if (
this.isLegacyOutput &&
@ -174,26 +178,9 @@ class AnthropicClient extends BaseClient {
options.baseURL = this.options.reverseProxyUrl;
}
if (
this.supportsCacheControl &&
requestOptions?.model &&
requestOptions.model.includes('claude-3-5-sonnet')
) {
options.defaultHeaders = {
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
};
} else if (
this.supportsCacheControl &&
requestOptions?.model &&
requestOptions.model.includes('claude-3-7')
) {
options.defaultHeaders = {
'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
};
} else if (this.supportsCacheControl) {
options.defaultHeaders = {
'anthropic-beta': 'prompt-caching-2024-07-31',
};
const headers = getClaudeHeaders(requestOptions?.model, this.supportsCacheControl);
if (headers) {
options.defaultHeaders = headers;
}
return new Anthropic(options);
@ -684,27 +671,6 @@ class AnthropicClient extends BaseClient {
: await client.completions.create(options);
}
/**
* @param {string} modelName
* @returns {boolean}
*/
checkPromptCacheSupport(modelName) {
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
if (modelMatch.includes('claude-3-5-sonnet-latest')) {
return false;
}
if (
modelMatch === 'claude-3-7-sonnet' ||
modelMatch === 'claude-3-5-sonnet' ||
modelMatch === 'claude-3-5-haiku' ||
modelMatch === 'claude-3-haiku' ||
modelMatch === 'claude-3-opus'
) {
return true;
}
return false;
}
getMessageMapMethod() {
/**
* @param {TMessage} msg
@ -761,7 +727,7 @@ class AnthropicClient extends BaseClient {
topK: top_k,
} = this.modelOptions;
const requestOptions = {
let requestOptions = {
model,
stream: stream || true,
stop_sequences,
@ -780,40 +746,10 @@ class AnthropicClient extends BaseClient {
requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default;
}
if (
this.options.thinking &&
requestOptions?.model &&
requestOptions.model.includes('claude-3-7')
) {
requestOptions.thinking = {
type: 'enabled',
};
}
if (requestOptions.thinking != null && this.options.thinkingBudget != null) {
requestOptions.thinking = {
...requestOptions.thinking,
budget_tokens: this.options.thinkingBudget,
};
}
if (
requestOptions.thinking != null &&
(requestOptions.max_tokens == null ||
requestOptions.thinking.budget_tokens > requestOptions.max_tokens)
) {
const maxTokens = anthropicSettings.maxOutputTokens.reset(requestOptions.model);
requestOptions.max_tokens = requestOptions.max_tokens ?? maxTokens;
logger.warn(
requestOptions.max_tokens === maxTokens
? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
: `[AnthropicClient] thinking budget_tokens (${requestOptions.thinking.budget_tokens}) exceeds max_tokens (${requestOptions.max_tokens}). Adjusting budget_tokens.`,
);
requestOptions.thinking.budget_tokens = Math.min(
requestOptions.thinking.budget_tokens,
Math.floor(requestOptions.max_tokens * 0.9),
);
}
requestOptions = configureReasoning(requestOptions, {
thinking: this.options.thinking,
thinkingBudget: this.options.thinkingBudget,
});
if (this.systemMessage && this.supportsCacheControl === true) {
requestOptions.system = [