🚀 feat: Agent Cache Tokens & Anthropic Reasoning Support (#6098)

* fix: handling of top_k and top_p parameters for Claude-3.7 models (allowed without reasoning) * feat: bump @librechat/agents for Anthropic Reasoning support * fix: update reasoning handling for OpenRouter integration * fix: enhance agent token spending logic to include cache creation and read details * fix: update logic for thinking status in ContentParts component * refactor: improve agent title handling * chore: bump @librechat/agents to version 2.1.7 for parallel tool calling for Google models
2025-12-18 01:10:14 +01:00 · 2025-02-27 12:59:51 -05:00 · 2025-02-27 12:59:51 -05:00 · 9802629848
commit 9802629848
parent 34f967eff8
11 changed files with 187 additions and 40 deletions
--- a/api/app/clients/AnthropicClient.js
+++ b/api/app/clients/AnthropicClient.js
@ -746,15 +746,6 @@ class AnthropicClient extends BaseClient {
      metadata,
    };
    if (!/claude-3[-.]7/.test(model)) {
      if (top_p !== undefined) {
        requestOptions.top_p = top_p;
      }
      if (top_k !== undefined) {
        requestOptions.top_k = top_k;
      }
    }
    if (this.useMessages) {
      requestOptions.messages = payload;
      requestOptions.max_tokens =
@ -769,6 +760,14 @@ class AnthropicClient extends BaseClient {
      thinkingBudget: this.options.thinkingBudget,
    });
    if (!/claude-3[-.]7/.test(model)) {
      requestOptions.top_p = top_p;
      requestOptions.top_k = top_k;
    } else if (requestOptions.thinking == null) {
      requestOptions.topP = top_p;
      requestOptions.topK = top_k;
    }
    if (this.systemMessage && this.supportsCacheControl === true) {
      requestOptions.system = [
        {
--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@ -1309,6 +1309,12 @@ ${convo}
        modelOptions.include_reasoning = true;
        reasoningKey = 'reasoning';
      }
      if (this.useOpenRouter && modelOptions.reasoning_effort != null) {
        modelOptions.reasoning = {
          effort: modelOptions.reasoning_effort,
        };
        delete modelOptions.reasoning_effort;
      }
      this.streamHandler = new SplitStreamHandler({
        reasoningKey,
--- a/api/app/clients/specs/AnthropicClient.test.js
+++ b/api/app/clients/specs/AnthropicClient.test.js
@ -680,4 +680,53 @@ describe('AnthropicClient', () => {
      expect(capturedOptions).not.toHaveProperty('top_p');
    });
  });
  it('should include top_k and top_p parameters for Claude-3.7 models when thinking is explicitly disabled', async () => {
    const client = new AnthropicClient('test-api-key', {
      modelOptions: {
        model: 'claude-3-7-sonnet',
        temperature: 0.7,
        topK: 10,
        topP: 0.9,
      },
      thinking: false,
    });
    async function* mockAsyncGenerator() {
      yield { type: 'message_start', message: { usage: {} } };
      yield { delta: { text: 'Test response' } };
      yield { type: 'message_delta', usage: {} };
    }
    jest.spyOn(client, 'createResponse').mockImplementation(() => {
      return mockAsyncGenerator();
    });
    let capturedOptions = null;
    jest.spyOn(client, 'getClient').mockImplementation((options) => {
      capturedOptions = options;
      return {};
    });
    const payload = [{ role: 'user', content: 'Test message' }];
    await client.sendCompletion(payload, {});
    expect(capturedOptions).toHaveProperty('topK', 10);
    expect(capturedOptions).toHaveProperty('topP', 0.9);
    client.setOptions({
      modelOptions: {
        model: 'claude-3.7-sonnet',
        temperature: 0.7,
        topK: 10,
        topP: 0.9,
      },
      thinking: false,
    });
    await client.sendCompletion(payload, {});
    expect(capturedOptions).toHaveProperty('topK', 10);
    expect(capturedOptions).toHaveProperty('topP', 0.9);
  });
 });
--- a/api/package.json
+++ b/api/package.json
@ -45,7 +45,7 @@
    "@langchain/google-genai": "^0.1.9",
    "@langchain/google-vertexai": "^0.2.0",
    "@langchain/textsplitters": "^0.1.0",
-    "@librechat/agents": "^2.1.3",
+    "@librechat/agents": "^2.1.7",
    "@waylaidwanderer/fetch-event-source": "^3.0.1",
    "axios": "1.7.8",
    "bcryptjs": "^2.4.3",
--- a/api/server/controllers/agents/client.js
+++ b/api/server/controllers/agents/client.js
@ -27,10 +27,10 @@ const {
  formatContentStrings,
  createContextHandlers,
 } = require('~/app/clients/prompts');
-const { encodeAndFormat } = require('~/server/services/Files/images/encode');
+const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
 const { getBufferString, HumanMessage } = require('@langchain/core/messages');
 const { encodeAndFormat } = require('~/server/services/Files/images/encode');
 const Tokenizer = require('~/server/services/Tokenizer');
 const { spendTokens } = require('~/models/spendTokens');
 const BaseClient = require('~/app/clients/BaseClient');
 const { createRun } = require('./run');
 const { logger } = require('~/config');
@ -380,15 +380,34 @@ class AgentClient extends BaseClient {
    if (!collectedUsage || !collectedUsage.length) {
      return;
    }
-    const input_tokens = collectedUsage[0]?.input_tokens || 0;
+    const input_tokens =
      (collectedUsage[0]?.input_tokens || 0) +
      (Number(collectedUsage[0]?.input_token_details?.cache_creation) || 0) +
      (Number(collectedUsage[0]?.input_token_details?.cache_read) || 0);
    let output_tokens = 0;
    let previousTokens = input_tokens; // Start with original input
    for (let i = 0; i < collectedUsage.length; i++) {
      const usage = collectedUsage[i];
      if (!usage) {
        continue;
      }
      const cache_creation = Number(usage.input_token_details?.cache_creation) || 0;
      const cache_read = Number(usage.input_token_details?.cache_read) || 0;
      const txMetadata = {
        context,
        conversationId: this.conversationId,
        user: this.user ?? this.options.req.user?.id,
        endpointTokenConfig: this.options.endpointTokenConfig,
        model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
      };
      if (i > 0) {
        // Count new tokens generated (input_tokens minus previous accumulated tokens)
-        output_tokens += (Number(usage.input_tokens) || 0) - previousTokens;
+        output_tokens +=
          (Number(usage.input_tokens) || 0) + cache_creation + cache_read - previousTokens;
      }
      // Add this message's output tokens
@ -396,16 +415,26 @@ class AgentClient extends BaseClient {
      // Update previousTokens to include this message's output
      previousTokens += Number(usage.output_tokens) || 0;
-      spendTokens(
+
-        {
+      if (cache_creation > 0 || cache_read > 0) {
-          context,
+        spendStructuredTokens(txMetadata, {
-          conversationId: this.conversationId,
+          promptTokens: {
-          user: this.user ?? this.options.req.user?.id,
+            input: usage.input_tokens,
-          endpointTokenConfig: this.options.endpointTokenConfig,
+            write: cache_creation,
-          model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
+            read: cache_read,
-        },
+          },
-        { promptTokens: usage.input_tokens, completionTokens: usage.output_tokens },
+          completionTokens: usage.output_tokens,
-      ).catch((err) => {
+        }).catch((err) => {
          logger.error(
            '[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending structured tokens',
            err,
          );
        });
      }
      spendTokens(txMetadata, {
        promptTokens: usage.input_tokens,
        completionTokens: usage.output_tokens,
      }).catch((err) => {
        logger.error(
          '[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending tokens',
          err,
@ -792,7 +821,10 @@ class AgentClient extends BaseClient {
      throw new Error('Run not initialized');
    }
    const { handleLLMEnd, collected: collectedMetadata } = createMetadataAggregator();
-    const clientOptions = {};
+    /** @type {import('@librechat/agents').ClientOptions} */
    const clientOptions = {
      maxTokens: 75,
    };
    const providerConfig = this.options.req.app.locals[this.options.agent.provider];
    if (
      providerConfig &&
--- a/api/server/services/Endpoints/agents/title.js
+++ b/api/server/services/Endpoints/agents/title.js
@ -20,10 +20,19 @@ const addTitle = async (req, { text, response, client }) => {
  const titleCache = getLogStores(CacheKeys.GEN_TITLE);
  const key = `${req.user.id}-${response.conversationId}`;
  const responseText =
    response?.content && Array.isArray(response?.content)
      ? response.content.reduce((acc, block) => {
        if (block?.type === 'text') {
          return acc + block.text;
        }
        return acc;
      }, '')
      : (response?.content ?? response?.text ?? '');
  const title = await client.titleConvo({
    text,
-    responseText: response?.text ?? '',
+    responseText,
    conversationId: response.conversationId,
  });
  await titleCache.set(key, title, 120000);
--- a/api/server/services/Endpoints/anthropic/llm.js
+++ b/api/server/services/Endpoints/anthropic/llm.js
@ -1,6 +1,6 @@
 const { HttpsProxyAgent } = require('https-proxy-agent');
 const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
-const { checkPromptCacheSupport, getClaudeHeaders } = require('./helpers');
+const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = require('./helpers');
 /**
 * Generates configuration options for creating an Anthropic language model (LLM) instance.
@ -49,13 +49,14 @@ function getLLMConfig(apiKey, options = {}) {
    clientOptions: {},
  };
  requestOptions = configureReasoning(requestOptions, systemOptions);
  if (!/claude-3[-.]7/.test(mergedOptions.model)) {
-    if (mergedOptions.topP !== undefined) {
+    requestOptions.topP = mergedOptions.topP;
-      requestOptions.topP = mergedOptions.topP;
+    requestOptions.topK = mergedOptions.topK;
-    }
+  } else if (requestOptions.thinking == null) {
-    if (mergedOptions.topK !== undefined) {
+    requestOptions.topP = mergedOptions.topP;
-      requestOptions.topK = mergedOptions.topK;
+    requestOptions.topK = mergedOptions.topK;
    }
  }
  const supportsCacheControl =
--- a/api/server/services/Endpoints/anthropic/llm.spec.js
+++ b/api/server/services/Endpoints/anthropic/llm.spec.js
@ -109,4 +109,45 @@ describe('getLLMConfig', () => {
    // Just verifying that the promptCache setting is processed
    expect(result.llmConfig).toBeDefined();
  });
  it('should include topK and topP for Claude-3.7 models when thinking is not enabled', () => {
    // Test with thinking explicitly set to null/undefined
    const result = getLLMConfig('test-api-key', {
      modelOptions: {
        model: 'claude-3-7-sonnet',
        topK: 10,
        topP: 0.9,
        thinking: false,
      },
    });
    expect(result.llmConfig).toHaveProperty('topK', 10);
    expect(result.llmConfig).toHaveProperty('topP', 0.9);
    // Test with thinking explicitly set to false
    const result2 = getLLMConfig('test-api-key', {
      modelOptions: {
        model: 'claude-3-7-sonnet',
        topK: 10,
        topP: 0.9,
        thinking: false,
      },
    });
    expect(result2.llmConfig).toHaveProperty('topK', 10);
    expect(result2.llmConfig).toHaveProperty('topP', 0.9);
    // Test with decimal notation as well
    const result3 = getLLMConfig('test-api-key', {
      modelOptions: {
        model: 'claude-3.7-sonnet',
        topK: 10,
        topP: 0.9,
        thinking: false,
      },
    });
    expect(result3.llmConfig).toHaveProperty('topK', 10);
    expect(result3.llmConfig).toHaveProperty('topP', 0.9);
  });
 });
--- a/api/server/services/Endpoints/openAI/llm.js
+++ b/api/server/services/Endpoints/openAI/llm.js
@ -29,7 +29,6 @@ function getLLMConfig(apiKey, options = {}) {
  const {
    modelOptions = {},
    reverseProxyUrl,
    useOpenRouter,
    defaultQuery,
    headers,
    proxy,
@ -56,9 +55,11 @@ function getLLMConfig(apiKey, options = {}) {
    });
  }
  let useOpenRouter;
  /** @type {OpenAIClientOptions['configuration']} */
  const configOptions = {};
-  if (useOpenRouter || (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter))) {
+  if (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter)) {
    useOpenRouter = true;
    llmConfig.include_reasoning = true;
    configOptions.baseURL = reverseProxyUrl;
    configOptions.defaultHeaders = Object.assign(
@ -118,6 +119,13 @@ function getLLMConfig(apiKey, options = {}) {
    llmConfig.organization = process.env.OPENAI_ORGANIZATION;
  }
  if (useOpenRouter && llmConfig.reasoning_effort != null) {
    llmConfig.reasoning = {
      effort: llmConfig.reasoning_effort,
    };
    delete llmConfig.reasoning_effort;
  }
  return {
    /** @type {OpenAIClientOptions} */
    llmConfig,
--- a/client/src/components/Chat/Messages/Content/ContentParts.tsx
+++ b/client/src/components/Chat/Messages/Content/ContentParts.tsx
@ -109,7 +109,9 @@ const ContentParts = memo(
                  return val;
                })
              }
-              label={isSubmitting ? localize('com_ui_thinking') : localize('com_ui_thoughts')}
+              label={
                isSubmitting && isLast ? localize('com_ui_thinking') : localize('com_ui_thoughts')
              }
            />
          </div>
        )}
--- a/package-lock.json
+++ b/package-lock.json
@ -61,7 +61,7 @@
        "@langchain/google-genai": "^0.1.9",
        "@langchain/google-vertexai": "^0.2.0",
        "@langchain/textsplitters": "^0.1.0",
-        "@librechat/agents": "^2.1.3",
+        "@librechat/agents": "^2.1.7",
        "@waylaidwanderer/fetch-event-source": "^3.0.1",
        "axios": "1.7.8",
        "bcryptjs": "^2.4.3",
@ -15984,9 +15984,9 @@
      }
    },
    "node_modules/@librechat/agents": {
-      "version": "2.1.3",
+      "version": "2.1.7",
-      "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.1.3.tgz",
+      "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.1.7.tgz",
-      "integrity": "sha512-4pPkLpjhA3DDiZQOULcrpbdQaOBC4JuUMdcVTUyYBHcA63SJT3olstmRQkGKNvoXLFLeQyJ0jkOqkEpzLJzk/g==",
+      "integrity": "sha512-/+AvxH75K0dSSUeHqT8jPZCcqcQUWdB56g9ls7ho0Nw9vdxfezBhF/hXnOk5oORHeEXlGEKNE6YPyjAhCmNIOg==",
      "dependencies": {
        "@aws-crypto/sha256-js": "^5.2.0",
        "@aws-sdk/credential-provider-node": "^3.613.0",