✨ feat: Anthropic Agents Prompt Caching & UI Accessibility Enhancements (#6045)

* chore: remove auto-focus for now * refactor: move react-hook-form Controller Logic to AgentSelect from AgentPanel * fix: a11y focus issue with AgentSelect by never replacing it in its component tree * fix: maintain ComboBox focus and force re-render on agent ID change in AgentPanel * chore: `gemini-2.0-flash-lite-preview-02-05` (deprecated) * refactor: extract cache control logic and headers configuration to helper functions in AnthropicClient * feat: anthropic agents prompt caching * chore: bump @librechat/agents and related dependencies * fix: typo
2025-12-23 11:50:14 +01:00 · 2025-02-25 22:14:58 -05:00 · 2025-02-25 22:14:58 -05:00 · e14df5956a
commit e14df5956a
parent d3d7d11ea8
12 changed files with 4460 additions and 1477 deletions
--- a/api/app/clients/AnthropicClient.js
+++ b/api/app/clients/AnthropicClient.js
@ -17,6 +17,11 @@ const {
  parseParamFromPrompt,
  createContextHandlers,
 } = require('./prompts');
+const {
+  getClaudeHeaders,
+  configureReasoning,
+  checkPromptCacheSupport,
+} = require('~/server/services/Endpoints/anthropic/helpers');
 const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
 const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
 const Tokenizer = require('~/server/services/Tokenizer');
@ -101,8 +106,7 @@ class AnthropicClient extends BaseClient {
    const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic);
    this.isClaude3 = modelMatch.includes('claude-3');
    this.isLegacyOutput = !modelMatch.includes('claude-3-5-sonnet');
-    this.supportsCacheControl =
-      this.options.promptCache && this.checkPromptCacheSupport(modelMatch);
+    this.supportsCacheControl = this.options.promptCache && checkPromptCacheSupport(modelMatch);

    if (
      this.isLegacyOutput &&
@ -174,26 +178,9 @@ class AnthropicClient extends BaseClient {
      options.baseURL = this.options.reverseProxyUrl;
    }

-    if (
-      this.supportsCacheControl &&
-      requestOptions?.model &&
-      requestOptions.model.includes('claude-3-5-sonnet')
-    ) {
-      options.defaultHeaders = {
-        'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
-      };
-    } else if (
-      this.supportsCacheControl &&
-      requestOptions?.model &&
-      requestOptions.model.includes('claude-3-7')
-    ) {
-      options.defaultHeaders = {
-        'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
-      };
-    } else if (this.supportsCacheControl) {
-      options.defaultHeaders = {
-        'anthropic-beta': 'prompt-caching-2024-07-31',
-      };
+    const headers = getClaudeHeaders(requestOptions?.model, this.supportsCacheControl);
+    if (headers) {
+      options.defaultHeaders = headers;
    }

    return new Anthropic(options);
@ -684,27 +671,6 @@ class AnthropicClient extends BaseClient {
      : await client.completions.create(options);
  }

-  /**
-   * @param {string} modelName
-   * @returns {boolean}
-   */
-  checkPromptCacheSupport(modelName) {
-    const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
-    if (modelMatch.includes('claude-3-5-sonnet-latest')) {
-      return false;
-    }
-    if (
-      modelMatch === 'claude-3-7-sonnet' ||
-      modelMatch === 'claude-3-5-sonnet' ||
-      modelMatch === 'claude-3-5-haiku' ||
-      modelMatch === 'claude-3-haiku' ||
-      modelMatch === 'claude-3-opus'
-    ) {
-      return true;
-    }
-    return false;
-  }
-
  getMessageMapMethod() {
    /**
     * @param {TMessage} msg
@ -761,7 +727,7 @@ class AnthropicClient extends BaseClient {
      topK: top_k,
    } = this.modelOptions;

-    const requestOptions = {
+    let requestOptions = {
      model,
      stream: stream || true,
      stop_sequences,
@ -780,40 +746,10 @@ class AnthropicClient extends BaseClient {
      requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default;
    }

-    if (
-      this.options.thinking &&
-      requestOptions?.model &&
-      requestOptions.model.includes('claude-3-7')
-    ) {
-      requestOptions.thinking = {
-        type: 'enabled',
-      };
-    }
-    if (requestOptions.thinking != null && this.options.thinkingBudget != null) {
-      requestOptions.thinking = {
-        ...requestOptions.thinking,
-        budget_tokens: this.options.thinkingBudget,
-      };
-    }
-    if (
-      requestOptions.thinking != null &&
-      (requestOptions.max_tokens == null ||
-        requestOptions.thinking.budget_tokens > requestOptions.max_tokens)
-    ) {
-      const maxTokens = anthropicSettings.maxOutputTokens.reset(requestOptions.model);
-      requestOptions.max_tokens = requestOptions.max_tokens ?? maxTokens;
-
-      logger.warn(
-        requestOptions.max_tokens === maxTokens
-          ? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
-          : `[AnthropicClient] thinking budget_tokens (${requestOptions.thinking.budget_tokens}) exceeds max_tokens (${requestOptions.max_tokens}). Adjusting budget_tokens.`,
-      );
-
-      requestOptions.thinking.budget_tokens = Math.min(
-        requestOptions.thinking.budget_tokens,
-        Math.floor(requestOptions.max_tokens * 0.9),
-      );
-    }
+    requestOptions = configureReasoning(requestOptions, {
+      thinking: this.options.thinking,
+      thinkingBudget: this.options.thinkingBudget,
+    });

    if (this.systemMessage && this.supportsCacheControl === true) {
      requestOptions.system = [