🚀 feat: Claude 3.7 Support + Reasoning (#6008)

* fix: missing console color methods for admin scripts * feat: Anthropic Claude 3.7 Sonnet Support * feat: update eventsource to version 3.0.2 and upgrade @modelcontextprotocol/sdk to 1.4.1 * fix: update DynamicInput to handle number type and improve initial value logic * feat: first pass Anthropic Reasoning (Claude 3.7) * feat: implement streaming support in AnthropicClient with reasoning UI handling * feat: add missing xAI (grok) models
2026-02-19 08:58:09 +01:00 · 2025-02-24 20:08:55 -05:00 · 2025-02-24 20:08:55 -05:00 · 50e8769340
commit 50e8769340
parent 0e719592c6
17 changed files with 421 additions and 77 deletions
--- a/api/app/clients/AnthropicClient.js
+++ b/api/app/clients/AnthropicClient.js
@ -7,6 +7,7 @@ const {
  getResponseSender,
  validateVisionModel,
 } = require('librechat-data-provider');
+const { SplitStreamHandler, GraphEvents } = require('@librechat/agents');
 const { encodeAndFormat } = require('~/server/services/Files/images/encode');
 const {
  truncateText,
@ -19,9 +20,9 @@ const {
 const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
 const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
 const Tokenizer = require('~/server/services/Tokenizer');
+const { logger, sendEvent } = require('~/config');
 const { sleep } = require('~/server/utils');
 const BaseClient = require('./BaseClient');
-const { logger } = require('~/config');

 const HUMAN_PROMPT = '\n\nHuman:';
 const AI_PROMPT = '\n\nAssistant:';
@ -68,6 +69,8 @@ class AnthropicClient extends BaseClient {
    /** The key for the usage object's output tokens
     * @type {string} */
    this.outputTokensKey = 'output_tokens';
+    /** @type {SplitStreamHandler | undefined} */
+    this.streamHandler;
  }

  setOptions(options) {
@ -125,7 +128,7 @@ class AnthropicClient extends BaseClient {
        this.options.endpointType ?? this.options.endpoint,
        this.options.endpointTokenConfig,
      ) ??
-      1500;
+      anthropicSettings.maxOutputTokens.reset(this.modelOptions.model);
    this.maxPromptTokens =
      this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;

@ -179,6 +182,14 @@ class AnthropicClient extends BaseClient {
      options.defaultHeaders = {
        'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
      };
+    } else if (
+      this.supportsCacheControl &&
+      requestOptions?.model &&
+      requestOptions.model.includes('claude-3-7')
+    ) {
+      options.defaultHeaders = {
+        'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
+      };
    } else if (this.supportsCacheControl) {
      options.defaultHeaders = {
        'anthropic-beta': 'prompt-caching-2024-07-31',
@ -668,7 +679,7 @@ class AnthropicClient extends BaseClient {
   * @returns {Promise<Anthropic.default.Message | Anthropic.default.Completion>} The response from the Anthropic client.
   */
  async createResponse(client, options, useMessages) {
-    return useMessages ?? this.useMessages
+    return (useMessages ?? this.useMessages)
      ? await client.messages.create(options)
      : await client.completions.create(options);
  }
@ -683,6 +694,7 @@ class AnthropicClient extends BaseClient {
      return false;
    }
    if (
+      modelMatch === 'claude-3-7-sonnet' ||
      modelMatch === 'claude-3-5-sonnet' ||
      modelMatch === 'claude-3-5-haiku' ||
      modelMatch === 'claude-3-haiku' ||
@ -693,6 +705,35 @@ class AnthropicClient extends BaseClient {
    return false;
  }

+  getMessageMapMethod() {
+    /**
+     * @param {TMessage} msg
+     */
+    return (msg) => {
+      if (msg.text != null && msg.text && msg.text.startsWith(':::thinking')) {
+        msg.text = msg.text.replace(/:::thinking.*?:::/gs, '').trim();
+      }
+
+      return msg;
+    };
+  }
+
+  /**
+   * @param {string[]} [intermediateReply]
+   * @returns {string}
+   */
+  getStreamText(intermediateReply) {
+    if (!this.streamHandler) {
+      return intermediateReply?.join('') ?? '';
+    }
+
+    const reasoningText = this.streamHandler.reasoningTokens.join('');
+
+    const reasoningBlock = reasoningText.length > 0 ? `:::thinking\n${reasoningText}\n:::\n` : '';
+
+    return `${reasoningBlock}${this.streamHandler.tokens.join('')}`;
+  }
+
  async sendCompletion(payload, { onProgress, abortController }) {
    if (!abortController) {
      abortController = new AbortController();
@ -710,7 +751,6 @@ class AnthropicClient extends BaseClient {
      user_id: this.user,
    };

-    let text = '';
    const {
      stream,
      model,
@ -733,10 +773,46 @@ class AnthropicClient extends BaseClient {

    if (this.useMessages) {
      requestOptions.messages = payload;
-      requestOptions.max_tokens = maxOutputTokens || legacy.maxOutputTokens.default;
+      requestOptions.max_tokens =
+        maxOutputTokens || anthropicSettings.maxOutputTokens.reset(requestOptions.model);
    } else {
      requestOptions.prompt = payload;
-      requestOptions.max_tokens_to_sample = maxOutputTokens || 1500;
+      requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default;
+    }
+
+    if (
+      this.options.thinking &&
+      requestOptions?.model &&
+      requestOptions.model.includes('claude-3-7')
+    ) {
+      requestOptions.thinking = {
+        type: 'enabled',
+      };
+    }
+    if (requestOptions.thinking != null && this.options.thinkingBudget != null) {
+      requestOptions.thinking = {
+        ...requestOptions.thinking,
+        budget_tokens: this.options.thinkingBudget,
+      };
+    }
+    if (
+      requestOptions.thinking != null &&
+      (requestOptions.max_tokens == null ||
+        requestOptions.thinking.budget_tokens > requestOptions.max_tokens)
+    ) {
+      const maxTokens = anthropicSettings.maxOutputTokens.reset(requestOptions.model);
+      requestOptions.max_tokens = requestOptions.max_tokens ?? maxTokens;
+
+      logger.warn(
+        requestOptions.max_tokens === maxTokens
+          ? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
+          : `[AnthropicClient] thinking budget_tokens (${requestOptions.thinking.budget_tokens}) exceeds max_tokens (${requestOptions.max_tokens}). Adjusting budget_tokens.`,
+      );
+
+      requestOptions.thinking.budget_tokens = Math.min(
+        requestOptions.thinking.budget_tokens,
+        Math.floor(requestOptions.max_tokens * 0.9),
+      );
    }

    if (this.systemMessage && this.supportsCacheControl === true) {
@ -756,13 +832,17 @@ class AnthropicClient extends BaseClient {
    }

    logger.debug('[AnthropicClient]', { ...requestOptions });
+    this.streamHandler = new SplitStreamHandler({
+      accumulate: true,
+      runId: this.responseMessageId,
+      handlers: {
+        [GraphEvents.ON_RUN_STEP]: (event) => sendEvent(this.options.res, event),
+        [GraphEvents.ON_MESSAGE_DELTA]: (event) => sendEvent(this.options.res, event),
+        [GraphEvents.ON_REASONING_DELTA]: (event) => sendEvent(this.options.res, event),
+      },
+    });

-    const handleChunk = (currentChunk) => {
-      if (currentChunk) {
-        text += currentChunk;
-        onProgress(currentChunk);
-      }
-    };
+    let intermediateReply = this.streamHandler.tokens;

    const maxRetries = 3;
    const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
@ -782,6 +862,31 @@ class AnthropicClient extends BaseClient {
            }
          });

+          /** @param {string} chunk */
+          const handleChunk = (chunk) => {
+            this.streamHandler.handle({
+              choices: [
+                {
+                  delta: {
+                    content: chunk,
+                  },
+                },
+              ],
+            });
+          };
+          /** @param {string} chunk */
+          const handleReasoningChunk = (chunk) => {
+            this.streamHandler.handle({
+              choices: [
+                {
+                  delta: {
+                    reasoning_content: chunk,
+                  },
+                },
+              ],
+            });
+          };
+
          for await (const completion of response) {
            // Handle each completion as before
            const type = completion?.type ?? '';
@ -789,7 +894,9 @@ class AnthropicClient extends BaseClient {
              logger.debug(`[AnthropicClient] ${type}`, completion);
              this[type] = completion;
            }
-            if (completion?.delta?.text) {
+            if (completion?.delta?.thinking) {
+              handleReasoningChunk(completion.delta.thinking);
+            } else if (completion?.delta?.text) {
              handleChunk(completion.delta.text);
            } else if (completion.completion) {
              handleChunk(completion.completion);
@ -808,6 +915,10 @@ class AnthropicClient extends BaseClient {

          if (attempts < maxRetries) {
            await delayBeforeRetry(attempts, 350);
+          } else if (this.streamHandler && this.streamHandler.reasoningTokens.length) {
+            return this.getStreamText();
+          } else if (intermediateReply.length > 0) {
+            return this.getStreamText(intermediateReply);
          } else {
            throw new Error(`Operation failed after ${maxRetries} attempts: ${error.message}`);
          }
@ -823,8 +934,7 @@ class AnthropicClient extends BaseClient {
    }

    await processResponse.bind(this)();
-
-    return text.trim();
+    return this.getStreamText(intermediateReply);
  }

  getSaveOptions() {
@ -834,6 +944,8 @@ class AnthropicClient extends BaseClient {
      promptPrefix: this.options.promptPrefix,
      modelLabel: this.options.modelLabel,
      promptCache: this.options.promptCache,
+      thinking: this.options.thinking,
+      thinkingBudget: this.options.thinkingBudget,
      resendFiles: this.options.resendFiles,
      iconURL: this.options.iconURL,
      greeting: this.options.greeting,