✨ feat: Anthropic Agents Prompt Caching & UI Accessibility Enhancements (#6045)

* chore: remove auto-focus for now * refactor: move react-hook-form Controller Logic to AgentSelect from AgentPanel * fix: a11y focus issue with AgentSelect by never replacing it in its component tree * fix: maintain ComboBox focus and force re-render on agent ID change in AgentPanel * chore: `gemini-2.0-flash-lite-preview-02-05` (deprecated) * refactor: extract cache control logic and headers configuration to helper functions in AnthropicClient * feat: anthropic agents prompt caching * chore: bump @librechat/agents and related dependencies * fix: typo
2026-02-15 06:58:10 +01:00 · 2025-02-25 22:14:58 -05:00 · 2025-02-25 22:14:58 -05:00 · e14df5956a
commit e14df5956a
parent d3d7d11ea8
12 changed files with 4460 additions and 1477 deletions
--- a/api/app/clients/AnthropicClient.js
+++ b/api/app/clients/AnthropicClient.js
@ -17,6 +17,11 @@ const {
  parseParamFromPrompt,
  createContextHandlers,
 } = require('./prompts');
+const {
+  getClaudeHeaders,
+  configureReasoning,
+  checkPromptCacheSupport,
+} = require('~/server/services/Endpoints/anthropic/helpers');
 const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
 const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
 const Tokenizer = require('~/server/services/Tokenizer');
@ -101,8 +106,7 @@ class AnthropicClient extends BaseClient {
    const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic);
    this.isClaude3 = modelMatch.includes('claude-3');
    this.isLegacyOutput = !modelMatch.includes('claude-3-5-sonnet');
-    this.supportsCacheControl =
-      this.options.promptCache && this.checkPromptCacheSupport(modelMatch);
+    this.supportsCacheControl = this.options.promptCache && checkPromptCacheSupport(modelMatch);

    if (
      this.isLegacyOutput &&
@ -174,26 +178,9 @@ class AnthropicClient extends BaseClient {
      options.baseURL = this.options.reverseProxyUrl;
    }

-    if (
-      this.supportsCacheControl &&
-      requestOptions?.model &&
-      requestOptions.model.includes('claude-3-5-sonnet')
-    ) {
-      options.defaultHeaders = {
-        'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
-      };
-    } else if (
-      this.supportsCacheControl &&
-      requestOptions?.model &&
-      requestOptions.model.includes('claude-3-7')
-    ) {
-      options.defaultHeaders = {
-        'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
-      };
-    } else if (this.supportsCacheControl) {
-      options.defaultHeaders = {
-        'anthropic-beta': 'prompt-caching-2024-07-31',
-      };
+    const headers = getClaudeHeaders(requestOptions?.model, this.supportsCacheControl);
+    if (headers) {
+      options.defaultHeaders = headers;
    }

    return new Anthropic(options);
@ -684,27 +671,6 @@ class AnthropicClient extends BaseClient {
      : await client.completions.create(options);
  }

-  /**
-   * @param {string} modelName
-   * @returns {boolean}
-   */
-  checkPromptCacheSupport(modelName) {
-    const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
-    if (modelMatch.includes('claude-3-5-sonnet-latest')) {
-      return false;
-    }
-    if (
-      modelMatch === 'claude-3-7-sonnet' ||
-      modelMatch === 'claude-3-5-sonnet' ||
-      modelMatch === 'claude-3-5-haiku' ||
-      modelMatch === 'claude-3-haiku' ||
-      modelMatch === 'claude-3-opus'
-    ) {
-      return true;
-    }
-    return false;
-  }
-
  getMessageMapMethod() {
    /**
     * @param {TMessage} msg
@ -761,7 +727,7 @@ class AnthropicClient extends BaseClient {
      topK: top_k,
    } = this.modelOptions;

-    const requestOptions = {
+    let requestOptions = {
      model,
      stream: stream || true,
      stop_sequences,
@ -780,40 +746,10 @@ class AnthropicClient extends BaseClient {
      requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default;
    }

-    if (
-      this.options.thinking &&
-      requestOptions?.model &&
-      requestOptions.model.includes('claude-3-7')
-    ) {
-      requestOptions.thinking = {
-        type: 'enabled',
-      };
-    }
-    if (requestOptions.thinking != null && this.options.thinkingBudget != null) {
-      requestOptions.thinking = {
-        ...requestOptions.thinking,
-        budget_tokens: this.options.thinkingBudget,
-      };
-    }
-    if (
-      requestOptions.thinking != null &&
-      (requestOptions.max_tokens == null ||
-        requestOptions.thinking.budget_tokens > requestOptions.max_tokens)
-    ) {
-      const maxTokens = anthropicSettings.maxOutputTokens.reset(requestOptions.model);
-      requestOptions.max_tokens = requestOptions.max_tokens ?? maxTokens;
-
-      logger.warn(
-        requestOptions.max_tokens === maxTokens
-          ? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
-          : `[AnthropicClient] thinking budget_tokens (${requestOptions.thinking.budget_tokens}) exceeds max_tokens (${requestOptions.max_tokens}). Adjusting budget_tokens.`,
-      );
-
-      requestOptions.thinking.budget_tokens = Math.min(
-        requestOptions.thinking.budget_tokens,
-        Math.floor(requestOptions.max_tokens * 0.9),
-      );
-    }
+    requestOptions = configureReasoning(requestOptions, {
+      thinking: this.options.thinking,
+      thinkingBudget: this.options.thinkingBudget,
+    });

    if (this.systemMessage && this.supportsCacheControl === true) {
      requestOptions.system = [
--- a/api/app/clients/prompts/addCacheControl.js
+++ b/api/app/clients/prompts/addCacheControl.js
@ -1,7 +1,7 @@
 /**
 * Anthropic API: Adds cache control to the appropriate user messages in the payload.
- * @param {Array<AnthropicMessage>} messages - The array of message objects.
- * @returns {Array<AnthropicMessage>} - The updated array of message objects with cache control added.
+ * @param {Array<AnthropicMessage | BaseMessage>} messages - The array of message objects.
+ * @returns {Array<AnthropicMessage | BaseMessage>} - The updated array of message objects with cache control added.
 */
 function addCacheControl(messages) {
  if (!Array.isArray(messages) || messages.length < 2) {
@ -13,7 +13,9 @@ function addCacheControl(messages) {

  for (let i = updatedMessages.length - 1; i >= 0 && userMessagesModified < 2; i--) {
    const message = updatedMessages[i];
-    if (message.role !== 'user') {
+    if (message.getType != null && message.getType() !== 'human') {
+      continue;
+    } else if (message.getType == null && message.role !== 'user') {
      continue;
    }

--- a/api/package.json
+++ b/api/package.json
@ -41,11 +41,11 @@
    "@keyv/mongo": "^2.1.8",
    "@keyv/redis": "^2.8.1",
    "@langchain/community": "^0.3.14",
-    "@langchain/core": "^0.3.37",
-    "@langchain/google-genai": "^0.1.7",
-    "@langchain/google-vertexai": "^0.1.8",
+    "@langchain/core": "^0.3.40",
+    "@langchain/google-genai": "^0.1.9",
+    "@langchain/google-vertexai": "^0.2.0",
    "@langchain/textsplitters": "^0.1.0",
-    "@librechat/agents": "^2.1.2",
+    "@librechat/agents": "^2.1.3",
    "@waylaidwanderer/fetch-event-source": "^3.0.1",
    "axios": "1.7.8",
    "bcryptjs": "^2.4.3",
--- a/api/server/controllers/agents/client.js
+++ b/api/server/controllers/agents/client.js
@ -22,6 +22,7 @@ const {
 } = require('librechat-data-provider');
 const {
  formatMessage,
+  addCacheControl,
  formatAgentMessages,
  formatContentStrings,
  createContextHandlers,
@ -589,7 +590,7 @@ class AgentClient extends BaseClient {
       * @param {number} [i]
       * @param {TMessageContentParts[]} [contentData]
       */
-      const runAgent = async (agent, messages, i = 0, contentData = []) => {
+      const runAgent = async (agent, _messages, i = 0, contentData = []) => {
        config.configurable.model = agent.model_parameters.model;
        if (i > 0) {
          this.model = agent.model_parameters.model;
@ -622,12 +623,21 @@ class AgentClient extends BaseClient {
        }

        if (noSystemMessages === true && systemContent?.length) {
-          let latestMessage = messages.pop().content;
+          let latestMessage = _messages.pop().content;
          if (typeof latestMessage !== 'string') {
            latestMessage = latestMessage[0].text;
          }
          latestMessage = [systemContent, latestMessage].join('\n');
-          messages.push(new HumanMessage(latestMessage));
+          _messages.push(new HumanMessage(latestMessage));
+        }
+
+        let messages = _messages;
+        if (
+          agent.model_parameters?.clientOptions?.defaultHeaders?.['anthropic-beta']?.includes(
+            'prompt-caching',
+          )
+        ) {
+          messages = addCacheControl(messages);
        }

        run = await createRun({
--- a/api/server/services/Endpoints/anthropic/helpers.js
+++ b/api/server/services/Endpoints/anthropic/helpers.js
@ -0,0 +1,110 @@
+const { EModelEndpoint, anthropicSettings } = require('librechat-data-provider');
+const { matchModelName } = require('~/utils');
+const { logger } = require('~/config');
+
+/**
+ * @param {string} modelName
+ * @returns {boolean}
+ */
+function checkPromptCacheSupport(modelName) {
+  const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
+  if (
+    modelMatch.includes('claude-3-5-sonnet-latest') ||
+    modelMatch.includes('claude-3.5-sonnet-latest')
+  ) {
+    return false;
+  }
+
+  if (
+    modelMatch === 'claude-3-7-sonnet' ||
+    modelMatch === 'claude-3-5-sonnet' ||
+    modelMatch === 'claude-3-5-haiku' ||
+    modelMatch === 'claude-3-haiku' ||
+    modelMatch === 'claude-3-opus' ||
+    modelMatch === 'claude-3.7-sonnet' ||
+    modelMatch === 'claude-3.5-sonnet' ||
+    modelMatch === 'claude-3.5-haiku'
+  ) {
+    return true;
+  }
+
+  return false;
+}
+
+/**
+ * Gets the appropriate headers for Claude models with cache control
+ * @param {string} model The model name
+ * @param {boolean} supportsCacheControl Whether the model supports cache control
+ * @returns {AnthropicClientOptions['extendedOptions']['defaultHeaders']|undefined} The headers object or undefined if not applicable
+ */
+function getClaudeHeaders(model, supportsCacheControl) {
+  if (!supportsCacheControl) {
+    return undefined;
+  }
+
+  if (/claude-3[-.]5-sonnet/.test(model)) {
+    return {
+      'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
+    };
+  } else if (/claude-3[-.]7/.test(model)) {
+    return {
+      'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
+    };
+  } else {
+    return {
+      'anthropic-beta': 'prompt-caching-2024-07-31',
+    };
+  }
+}
+
+/**
+ * Configures reasoning-related options for Claude models
+ * @param {AnthropicClientOptions & { max_tokens?: number }} anthropicInput The request options object
+ * @param {Object} extendedOptions Additional client configuration options
+ * @param {boolean} extendedOptions.thinking Whether thinking is enabled in client config
+ * @param {number|null} extendedOptions.thinkingBudget The token budget for thinking
+ * @returns {Object} Updated request options
+ */
+function configureReasoning(anthropicInput, extendedOptions = {}) {
+  const updatedOptions = { ...anthropicInput };
+  const currentMaxTokens = updatedOptions.max_tokens ?? updatedOptions.maxTokens;
+  if (
+    extendedOptions.thinking &&
+    updatedOptions?.model &&
+    /claude-3[-.]7/.test(updatedOptions.model)
+  ) {
+    updatedOptions.thinking = {
+      type: 'enabled',
+    };
+  }
+
+  if (updatedOptions.thinking != null && extendedOptions.thinkingBudget != null) {
+    updatedOptions.thinking = {
+      ...updatedOptions.thinking,
+      budget_tokens: extendedOptions.thinkingBudget,
+    };
+  }
+
+  if (
+    updatedOptions.thinking != null &&
+    (currentMaxTokens == null || updatedOptions.thinking.budget_tokens > currentMaxTokens)
+  ) {
+    const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model);
+    updatedOptions.max_tokens = currentMaxTokens ?? maxTokens;
+
+    logger.warn(
+      updatedOptions.max_tokens === maxTokens
+        ? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
+        : `[AnthropicClient] thinking budget_tokens (${updatedOptions.thinking.budget_tokens}) exceeds max_tokens (${updatedOptions.max_tokens}). Adjusting budget_tokens.`,
+    );
+
+    updatedOptions.thinking.budget_tokens = Math.min(
+      updatedOptions.thinking.budget_tokens,
+      Math.floor(updatedOptions.max_tokens * 0.9),
+    );
+  }
+
+  return updatedOptions;
+}
+
+module.exports = { checkPromptCacheSupport, getClaudeHeaders, configureReasoning };
--- a/api/server/services/Endpoints/anthropic/llm.js
+++ b/api/server/services/Endpoints/anthropic/llm.js
@ -1,5 +1,6 @@
 const { HttpsProxyAgent } = require('https-proxy-agent');
 const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
+const { checkPromptCacheSupport, getClaudeHeaders } = require('./helpers');

 /**
 * Generates configuration options for creating an Anthropic language model (LLM) instance.
@ -20,6 +21,14 @@ const { anthropicSettings, removeNullishValues } = require('librechat-data-provi
 * @returns {Object} Configuration options for creating an Anthropic LLM instance, with null and undefined values removed.
 */
 function getLLMConfig(apiKey, options = {}) {
+  const systemOptions = {
+    thinking: options.modelOptions.thinking ?? anthropicSettings.thinking.default,
+    promptCache: options.modelOptions.promptCache ?? anthropicSettings.promptCache.default,
+    thinkingBudget: options.modelOptions.thinkingBudget ?? anthropicSettings.thinkingBudget.default,
+  };
+  for (let key in systemOptions) {
+    delete options.modelOptions[key];
+  }
  const defaultOptions = {
    model: anthropicSettings.model.default,
    maxOutputTokens: anthropicSettings.maxOutputTokens.default,
@ -29,7 +38,7 @@ function getLLMConfig(apiKey, options = {}) {
  const mergedOptions = Object.assign(defaultOptions, options.modelOptions);

  /** @type {AnthropicClientOptions} */
-  const requestOptions = {
+  let requestOptions = {
    apiKey,
    model: mergedOptions.model,
    stream: mergedOptions.stream,
@ -42,6 +51,13 @@ function getLLMConfig(apiKey, options = {}) {
    clientOptions: {},
  };

+  const supportsCacheControl =
+    systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model);
+  const headers = getClaudeHeaders(requestOptions.model, supportsCacheControl);
+  if (headers) {
+    requestOptions.clientOptions.defaultHeaders = headers;
+  }
+
  if (options.proxy) {
    requestOptions.clientOptions.httpAgent = new HttpsProxyAgent(options.proxy);
  }