feat: Anthropic Agents Prompt Caching & UI Accessibility Enhancements (#6045)

* chore: remove auto-focus for now

* refactor: move react-hook-form Controller Logic to AgentSelect from AgentPanel

* fix: a11y focus issue with AgentSelect by never replacing it in its component tree

* fix: maintain ComboBox focus and force re-render on agent ID change in AgentPanel

* chore: `gemini-2.0-flash-lite-preview-02-05` (deprecated)

* refactor: extract cache control logic and headers configuration to helper functions in AnthropicClient

* feat: anthropic agents prompt caching

* chore: bump @librechat/agents and related dependencies

* fix: typo
This commit is contained in:
Danny Avila 2025-02-25 22:14:58 -05:00 committed by GitHub
parent d3d7d11ea8
commit e14df5956a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 4460 additions and 1477 deletions

View file

@ -22,6 +22,7 @@ const {
} = require('librechat-data-provider');
const {
formatMessage,
addCacheControl,
formatAgentMessages,
formatContentStrings,
createContextHandlers,
@ -589,7 +590,7 @@ class AgentClient extends BaseClient {
* @param {number} [i]
* @param {TMessageContentParts[]} [contentData]
*/
const runAgent = async (agent, messages, i = 0, contentData = []) => {
const runAgent = async (agent, _messages, i = 0, contentData = []) => {
config.configurable.model = agent.model_parameters.model;
if (i > 0) {
this.model = agent.model_parameters.model;
@ -622,12 +623,21 @@ class AgentClient extends BaseClient {
}
if (noSystemMessages === true && systemContent?.length) {
let latestMessage = messages.pop().content;
let latestMessage = _messages.pop().content;
if (typeof latestMessage !== 'string') {
latestMessage = latestMessage[0].text;
}
latestMessage = [systemContent, latestMessage].join('\n');
messages.push(new HumanMessage(latestMessage));
_messages.push(new HumanMessage(latestMessage));
}
let messages = _messages;
if (
agent.model_parameters?.clientOptions?.defaultHeaders?.['anthropic-beta']?.includes(
'prompt-caching',
)
) {
messages = addCacheControl(messages);
}
run = await createRun({

View file

@ -0,0 +1,110 @@
const { EModelEndpoint, anthropicSettings } = require('librechat-data-provider');
const { matchModelName } = require('~/utils');
const { logger } = require('~/config');
/**
* @param {string} modelName
* @returns {boolean}
*/
function checkPromptCacheSupport(modelName) {
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
if (
modelMatch.includes('claude-3-5-sonnet-latest') ||
modelMatch.includes('claude-3.5-sonnet-latest')
) {
return false;
}
if (
modelMatch === 'claude-3-7-sonnet' ||
modelMatch === 'claude-3-5-sonnet' ||
modelMatch === 'claude-3-5-haiku' ||
modelMatch === 'claude-3-haiku' ||
modelMatch === 'claude-3-opus' ||
modelMatch === 'claude-3.7-sonnet' ||
modelMatch === 'claude-3.5-sonnet' ||
modelMatch === 'claude-3.5-haiku'
) {
return true;
}
return false;
}
/**
* Gets the appropriate headers for Claude models with cache control
* @param {string} model The model name
* @param {boolean} supportsCacheControl Whether the model supports cache control
* @returns {AnthropicClientOptions['extendedOptions']['defaultHeaders']|undefined} The headers object or undefined if not applicable
*/
function getClaudeHeaders(model, supportsCacheControl) {
if (!supportsCacheControl) {
return undefined;
}
if (/claude-3[-.]5-sonnet/.test(model)) {
return {
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
};
} else if (/claude-3[-.]7/.test(model)) {
return {
'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
};
} else {
return {
'anthropic-beta': 'prompt-caching-2024-07-31',
};
}
}
/**
* Configures reasoning-related options for Claude models
* @param {AnthropicClientOptions & { max_tokens?: number }} anthropicInput The request options object
* @param {Object} extendedOptions Additional client configuration options
* @param {boolean} extendedOptions.thinking Whether thinking is enabled in client config
* @param {number|null} extendedOptions.thinkingBudget The token budget for thinking
* @returns {Object} Updated request options
*/
function configureReasoning(anthropicInput, extendedOptions = {}) {
const updatedOptions = { ...anthropicInput };
const currentMaxTokens = updatedOptions.max_tokens ?? updatedOptions.maxTokens;
if (
extendedOptions.thinking &&
updatedOptions?.model &&
/claude-3[-.]7/.test(updatedOptions.model)
) {
updatedOptions.thinking = {
type: 'enabled',
};
}
if (updatedOptions.thinking != null && extendedOptions.thinkingBudget != null) {
updatedOptions.thinking = {
...updatedOptions.thinking,
budget_tokens: extendedOptions.thinkingBudget,
};
}
if (
updatedOptions.thinking != null &&
(currentMaxTokens == null || updatedOptions.thinking.budget_tokens > currentMaxTokens)
) {
const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model);
updatedOptions.max_tokens = currentMaxTokens ?? maxTokens;
logger.warn(
updatedOptions.max_tokens === maxTokens
? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
: `[AnthropicClient] thinking budget_tokens (${updatedOptions.thinking.budget_tokens}) exceeds max_tokens (${updatedOptions.max_tokens}). Adjusting budget_tokens.`,
);
updatedOptions.thinking.budget_tokens = Math.min(
updatedOptions.thinking.budget_tokens,
Math.floor(updatedOptions.max_tokens * 0.9),
);
}
return updatedOptions;
}
module.exports = { checkPromptCacheSupport, getClaudeHeaders, configureReasoning };

View file

@ -1,5 +1,6 @@
const { HttpsProxyAgent } = require('https-proxy-agent');
const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
const { checkPromptCacheSupport, getClaudeHeaders } = require('./helpers');
/**
* Generates configuration options for creating an Anthropic language model (LLM) instance.
@ -20,6 +21,14 @@ const { anthropicSettings, removeNullishValues } = require('librechat-data-provi
* @returns {Object} Configuration options for creating an Anthropic LLM instance, with null and undefined values removed.
*/
function getLLMConfig(apiKey, options = {}) {
const systemOptions = {
thinking: options.modelOptions.thinking ?? anthropicSettings.thinking.default,
promptCache: options.modelOptions.promptCache ?? anthropicSettings.promptCache.default,
thinkingBudget: options.modelOptions.thinkingBudget ?? anthropicSettings.thinkingBudget.default,
};
for (let key in systemOptions) {
delete options.modelOptions[key];
}
const defaultOptions = {
model: anthropicSettings.model.default,
maxOutputTokens: anthropicSettings.maxOutputTokens.default,
@ -29,7 +38,7 @@ function getLLMConfig(apiKey, options = {}) {
const mergedOptions = Object.assign(defaultOptions, options.modelOptions);
/** @type {AnthropicClientOptions} */
const requestOptions = {
let requestOptions = {
apiKey,
model: mergedOptions.model,
stream: mergedOptions.stream,
@ -42,6 +51,13 @@ function getLLMConfig(apiKey, options = {}) {
clientOptions: {},
};
const supportsCacheControl =
systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model);
const headers = getClaudeHeaders(requestOptions.model, supportsCacheControl);
if (headers) {
requestOptions.clientOptions.defaultHeaders = headers;
}
if (options.proxy) {
requestOptions.clientOptions.httpAgent = new HttpsProxyAgent(options.proxy);
}