🚀 feat: Agent Cache Tokens & Anthropic Reasoning Support (#6098)

* fix: handling of top_k and top_p parameters for Claude-3.7 models (allowed without reasoning) * feat: bump @librechat/agents for Anthropic Reasoning support * fix: update reasoning handling for OpenRouter integration * fix: enhance agent token spending logic to include cache creation and read details * fix: update logic for thinking status in ContentParts component * refactor: improve agent title handling * chore: bump @librechat/agents to version 2.1.7 for parallel tool calling for Google models
2026-03-07 08:40:19 +01:00 · 2025-02-27 12:59:51 -05:00 · 2025-02-27 12:59:51 -05:00 · 9802629848
commit 9802629848
parent 34f967eff8
11 changed files with 187 additions and 40 deletions
--- a/api/server/services/Endpoints/agents/title.js
+++ b/api/server/services/Endpoints/agents/title.js
@ -20,10 +20,19 @@ const addTitle = async (req, { text, response, client }) => {

  const titleCache = getLogStores(CacheKeys.GEN_TITLE);
  const key = `${req.user.id}-${response.conversationId}`;
+  const responseText =
+    response?.content && Array.isArray(response?.content)
+      ? response.content.reduce((acc, block) => {
+        if (block?.type === 'text') {
+          return acc + block.text;
+        }
+        return acc;
+      }, '')
+      : (response?.content ?? response?.text ?? '');

  const title = await client.titleConvo({
    text,
-    responseText: response?.text ?? '',
+    responseText,
    conversationId: response.conversationId,
  });
  await titleCache.set(key, title, 120000);
--- a/api/server/services/Endpoints/anthropic/llm.js
+++ b/api/server/services/Endpoints/anthropic/llm.js
@ -1,6 +1,6 @@
 const { HttpsProxyAgent } = require('https-proxy-agent');
 const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
-const { checkPromptCacheSupport, getClaudeHeaders } = require('./helpers');
+const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = require('./helpers');

 /**
 * Generates configuration options for creating an Anthropic language model (LLM) instance.
@ -49,13 +49,14 @@ function getLLMConfig(apiKey, options = {}) {
    clientOptions: {},
  };

+  requestOptions = configureReasoning(requestOptions, systemOptions);
+
  if (!/claude-3[-.]7/.test(mergedOptions.model)) {
-    if (mergedOptions.topP !== undefined) {
-      requestOptions.topP = mergedOptions.topP;
-    }
-    if (mergedOptions.topK !== undefined) {
-      requestOptions.topK = mergedOptions.topK;
-    }
+    requestOptions.topP = mergedOptions.topP;
+    requestOptions.topK = mergedOptions.topK;
+  } else if (requestOptions.thinking == null) {
+    requestOptions.topP = mergedOptions.topP;
+    requestOptions.topK = mergedOptions.topK;
  }

  const supportsCacheControl =
--- a/api/server/services/Endpoints/anthropic/llm.spec.js
+++ b/api/server/services/Endpoints/anthropic/llm.spec.js
@ -109,4 +109,45 @@ describe('getLLMConfig', () => {
    // Just verifying that the promptCache setting is processed
    expect(result.llmConfig).toBeDefined();
  });
+
+  it('should include topK and topP for Claude-3.7 models when thinking is not enabled', () => {
+    // Test with thinking explicitly set to null/undefined
+    const result = getLLMConfig('test-api-key', {
+      modelOptions: {
+        model: 'claude-3-7-sonnet',
+        topK: 10,
+        topP: 0.9,
+        thinking: false,
+      },
+    });
+
+    expect(result.llmConfig).toHaveProperty('topK', 10);
+    expect(result.llmConfig).toHaveProperty('topP', 0.9);
+
+    // Test with thinking explicitly set to false
+    const result2 = getLLMConfig('test-api-key', {
+      modelOptions: {
+        model: 'claude-3-7-sonnet',
+        topK: 10,
+        topP: 0.9,
+        thinking: false,
+      },
+    });
+
+    expect(result2.llmConfig).toHaveProperty('topK', 10);
+    expect(result2.llmConfig).toHaveProperty('topP', 0.9);
+
+    // Test with decimal notation as well
+    const result3 = getLLMConfig('test-api-key', {
+      modelOptions: {
+        model: 'claude-3.7-sonnet',
+        topK: 10,
+        topP: 0.9,
+        thinking: false,
+      },
+    });
+
+    expect(result3.llmConfig).toHaveProperty('topK', 10);
+    expect(result3.llmConfig).toHaveProperty('topP', 0.9);
+  });
 });
--- a/api/server/services/Endpoints/openAI/llm.js
+++ b/api/server/services/Endpoints/openAI/llm.js
@ -29,7 +29,6 @@ function getLLMConfig(apiKey, options = {}) {
  const {
    modelOptions = {},
    reverseProxyUrl,
-    useOpenRouter,
    defaultQuery,
    headers,
    proxy,
@ -56,9 +55,11 @@ function getLLMConfig(apiKey, options = {}) {
    });
  }

+  let useOpenRouter;
  /** @type {OpenAIClientOptions['configuration']} */
  const configOptions = {};
-  if (useOpenRouter || (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter))) {
+  if (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter)) {
+    useOpenRouter = true;
    llmConfig.include_reasoning = true;
    configOptions.baseURL = reverseProxyUrl;
    configOptions.defaultHeaders = Object.assign(
@ -118,6 +119,13 @@ function getLLMConfig(apiKey, options = {}) {
    llmConfig.organization = process.env.OPENAI_ORGANIZATION;
  }

+  if (useOpenRouter && llmConfig.reasoning_effort != null) {
+    llmConfig.reasoning = {
+      effort: llmConfig.reasoning_effort,
+    };
+    delete llmConfig.reasoning_effort;
+  }
+
  return {
    /** @type {OpenAIClientOptions} */
    llmConfig,