✨ feat: Anthropic Agents Prompt Caching & UI Accessibility Enhancements (#6045)

* chore: remove auto-focus for now * refactor: move react-hook-form Controller Logic to AgentSelect from AgentPanel * fix: a11y focus issue with AgentSelect by never replacing it in its component tree * fix: maintain ComboBox focus and force re-render on agent ID change in AgentPanel * chore: `gemini-2.0-flash-lite-preview-02-05` (deprecated) * refactor: extract cache control logic and headers configuration to helper functions in AnthropicClient * feat: anthropic agents prompt caching * chore: bump @librechat/agents and related dependencies * fix: typo
2025-12-20 10:20:15 +01:00 · 2025-02-25 22:14:58 -05:00 · 2025-02-25 22:14:58 -05:00 · e14df5956a
commit e14df5956a
parent d3d7d11ea8
12 changed files with 4460 additions and 1477 deletions
--- a/api/server/services/Endpoints/anthropic/helpers.js
+++ b/api/server/services/Endpoints/anthropic/helpers.js
@ -0,0 +1,110 @@
+const { EModelEndpoint, anthropicSettings } = require('librechat-data-provider');
+const { matchModelName } = require('~/utils');
+const { logger } = require('~/config');
+
+/**
+ * @param {string} modelName
+ * @returns {boolean}
+ */
+function checkPromptCacheSupport(modelName) {
+  const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
+  if (
+    modelMatch.includes('claude-3-5-sonnet-latest') ||
+    modelMatch.includes('claude-3.5-sonnet-latest')
+  ) {
+    return false;
+  }
+
+  if (
+    modelMatch === 'claude-3-7-sonnet' ||
+    modelMatch === 'claude-3-5-sonnet' ||
+    modelMatch === 'claude-3-5-haiku' ||
+    modelMatch === 'claude-3-haiku' ||
+    modelMatch === 'claude-3-opus' ||
+    modelMatch === 'claude-3.7-sonnet' ||
+    modelMatch === 'claude-3.5-sonnet' ||
+    modelMatch === 'claude-3.5-haiku'
+  ) {
+    return true;
+  }
+
+  return false;
+}
+
+/**
+ * Gets the appropriate headers for Claude models with cache control
+ * @param {string} model The model name
+ * @param {boolean} supportsCacheControl Whether the model supports cache control
+ * @returns {AnthropicClientOptions['extendedOptions']['defaultHeaders']|undefined} The headers object or undefined if not applicable
+ */
+function getClaudeHeaders(model, supportsCacheControl) {
+  if (!supportsCacheControl) {
+    return undefined;
+  }
+
+  if (/claude-3[-.]5-sonnet/.test(model)) {
+    return {
+      'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
+    };
+  } else if (/claude-3[-.]7/.test(model)) {
+    return {
+      'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
+    };
+  } else {
+    return {
+      'anthropic-beta': 'prompt-caching-2024-07-31',
+    };
+  }
+}
+
+/**
+ * Configures reasoning-related options for Claude models
+ * @param {AnthropicClientOptions & { max_tokens?: number }} anthropicInput The request options object
+ * @param {Object} extendedOptions Additional client configuration options
+ * @param {boolean} extendedOptions.thinking Whether thinking is enabled in client config
+ * @param {number|null} extendedOptions.thinkingBudget The token budget for thinking
+ * @returns {Object} Updated request options
+ */
+function configureReasoning(anthropicInput, extendedOptions = {}) {
+  const updatedOptions = { ...anthropicInput };
+  const currentMaxTokens = updatedOptions.max_tokens ?? updatedOptions.maxTokens;
+  if (
+    extendedOptions.thinking &&
+    updatedOptions?.model &&
+    /claude-3[-.]7/.test(updatedOptions.model)
+  ) {
+    updatedOptions.thinking = {
+      type: 'enabled',
+    };
+  }
+
+  if (updatedOptions.thinking != null && extendedOptions.thinkingBudget != null) {
+    updatedOptions.thinking = {
+      ...updatedOptions.thinking,
+      budget_tokens: extendedOptions.thinkingBudget,
+    };
+  }
+
+  if (
+    updatedOptions.thinking != null &&
+    (currentMaxTokens == null || updatedOptions.thinking.budget_tokens > currentMaxTokens)
+  ) {
+    const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model);
+    updatedOptions.max_tokens = currentMaxTokens ?? maxTokens;
+
+    logger.warn(
+      updatedOptions.max_tokens === maxTokens
+        ? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
+        : `[AnthropicClient] thinking budget_tokens (${updatedOptions.thinking.budget_tokens}) exceeds max_tokens (${updatedOptions.max_tokens}). Adjusting budget_tokens.`,
+    );
+
+    updatedOptions.thinking.budget_tokens = Math.min(
+      updatedOptions.thinking.budget_tokens,
+      Math.floor(updatedOptions.max_tokens * 0.9),
+    );
+  }
+
+  return updatedOptions;
+}
+
+module.exports = { checkPromptCacheSupport, getClaudeHeaders, configureReasoning };