🚀 feat: Claude 3.7 Support + Reasoning (#6008)

* fix: missing console color methods for admin scripts

* feat: Anthropic Claude 3.7 Sonnet Support

* feat: update eventsource to version 3.0.2 and upgrade @modelcontextprotocol/sdk to 1.4.1

* fix: update DynamicInput to handle number type and improve initial value logic

* feat: first pass Anthropic Reasoning (Claude 3.7)

* feat: implement streaming support in AnthropicClient with reasoning UI handling

* feat: add missing xAI (grok) models
This commit is contained in:
Danny Avila 2025-02-24 20:08:55 -05:00 committed by GitHub
parent 0e719592c6
commit 50e8769340
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 421 additions and 77 deletions

View file

@ -7,6 +7,7 @@ const {
getResponseSender,
validateVisionModel,
} = require('librechat-data-provider');
const { SplitStreamHandler, GraphEvents } = require('@librechat/agents');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const {
truncateText,
@ -19,9 +20,9 @@ const {
const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
const Tokenizer = require('~/server/services/Tokenizer');
const { logger, sendEvent } = require('~/config');
const { sleep } = require('~/server/utils');
const BaseClient = require('./BaseClient');
const { logger } = require('~/config');
const HUMAN_PROMPT = '\n\nHuman:';
const AI_PROMPT = '\n\nAssistant:';
@ -68,6 +69,8 @@ class AnthropicClient extends BaseClient {
/** The key for the usage object's output tokens
* @type {string} */
this.outputTokensKey = 'output_tokens';
/** @type {SplitStreamHandler | undefined} */
this.streamHandler;
}
setOptions(options) {
@ -125,7 +128,7 @@ class AnthropicClient extends BaseClient {
this.options.endpointType ?? this.options.endpoint,
this.options.endpointTokenConfig,
) ??
1500;
anthropicSettings.maxOutputTokens.reset(this.modelOptions.model);
this.maxPromptTokens =
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
@ -179,6 +182,14 @@ class AnthropicClient extends BaseClient {
options.defaultHeaders = {
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
};
} else if (
this.supportsCacheControl &&
requestOptions?.model &&
requestOptions.model.includes('claude-3-7')
) {
options.defaultHeaders = {
'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
};
} else if (this.supportsCacheControl) {
options.defaultHeaders = {
'anthropic-beta': 'prompt-caching-2024-07-31',
@ -668,7 +679,7 @@ class AnthropicClient extends BaseClient {
* @returns {Promise<Anthropic.default.Message | Anthropic.default.Completion>} The response from the Anthropic client.
*/
async createResponse(client, options, useMessages) {
return useMessages ?? this.useMessages
return (useMessages ?? this.useMessages)
? await client.messages.create(options)
: await client.completions.create(options);
}
@ -683,6 +694,7 @@ class AnthropicClient extends BaseClient {
return false;
}
if (
modelMatch === 'claude-3-7-sonnet' ||
modelMatch === 'claude-3-5-sonnet' ||
modelMatch === 'claude-3-5-haiku' ||
modelMatch === 'claude-3-haiku' ||
@ -693,6 +705,35 @@ class AnthropicClient extends BaseClient {
return false;
}
getMessageMapMethod() {
/**
* @param {TMessage} msg
*/
return (msg) => {
if (msg.text != null && msg.text && msg.text.startsWith(':::thinking')) {
msg.text = msg.text.replace(/:::thinking.*?:::/gs, '').trim();
}
return msg;
};
}
/**
* @param {string[]} [intermediateReply]
* @returns {string}
*/
getStreamText(intermediateReply) {
if (!this.streamHandler) {
return intermediateReply?.join('') ?? '';
}
const reasoningText = this.streamHandler.reasoningTokens.join('');
const reasoningBlock = reasoningText.length > 0 ? `:::thinking\n${reasoningText}\n:::\n` : '';
return `${reasoningBlock}${this.streamHandler.tokens.join('')}`;
}
async sendCompletion(payload, { onProgress, abortController }) {
if (!abortController) {
abortController = new AbortController();
@ -710,7 +751,6 @@ class AnthropicClient extends BaseClient {
user_id: this.user,
};
let text = '';
const {
stream,
model,
@ -733,10 +773,46 @@ class AnthropicClient extends BaseClient {
if (this.useMessages) {
requestOptions.messages = payload;
requestOptions.max_tokens = maxOutputTokens || legacy.maxOutputTokens.default;
requestOptions.max_tokens =
maxOutputTokens || anthropicSettings.maxOutputTokens.reset(requestOptions.model);
} else {
requestOptions.prompt = payload;
requestOptions.max_tokens_to_sample = maxOutputTokens || 1500;
requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default;
}
if (
this.options.thinking &&
requestOptions?.model &&
requestOptions.model.includes('claude-3-7')
) {
requestOptions.thinking = {
type: 'enabled',
};
}
if (requestOptions.thinking != null && this.options.thinkingBudget != null) {
requestOptions.thinking = {
...requestOptions.thinking,
budget_tokens: this.options.thinkingBudget,
};
}
if (
requestOptions.thinking != null &&
(requestOptions.max_tokens == null ||
requestOptions.thinking.budget_tokens > requestOptions.max_tokens)
) {
const maxTokens = anthropicSettings.maxOutputTokens.reset(requestOptions.model);
requestOptions.max_tokens = requestOptions.max_tokens ?? maxTokens;
logger.warn(
requestOptions.max_tokens === maxTokens
? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
: `[AnthropicClient] thinking budget_tokens (${requestOptions.thinking.budget_tokens}) exceeds max_tokens (${requestOptions.max_tokens}). Adjusting budget_tokens.`,
);
requestOptions.thinking.budget_tokens = Math.min(
requestOptions.thinking.budget_tokens,
Math.floor(requestOptions.max_tokens * 0.9),
);
}
if (this.systemMessage && this.supportsCacheControl === true) {
@ -756,13 +832,17 @@ class AnthropicClient extends BaseClient {
}
logger.debug('[AnthropicClient]', { ...requestOptions });
this.streamHandler = new SplitStreamHandler({
accumulate: true,
runId: this.responseMessageId,
handlers: {
[GraphEvents.ON_RUN_STEP]: (event) => sendEvent(this.options.res, event),
[GraphEvents.ON_MESSAGE_DELTA]: (event) => sendEvent(this.options.res, event),
[GraphEvents.ON_REASONING_DELTA]: (event) => sendEvent(this.options.res, event),
},
});
const handleChunk = (currentChunk) => {
if (currentChunk) {
text += currentChunk;
onProgress(currentChunk);
}
};
let intermediateReply = this.streamHandler.tokens;
const maxRetries = 3;
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
@ -782,6 +862,31 @@ class AnthropicClient extends BaseClient {
}
});
/** @param {string} chunk */
const handleChunk = (chunk) => {
this.streamHandler.handle({
choices: [
{
delta: {
content: chunk,
},
},
],
});
};
/** @param {string} chunk */
const handleReasoningChunk = (chunk) => {
this.streamHandler.handle({
choices: [
{
delta: {
reasoning_content: chunk,
},
},
],
});
};
for await (const completion of response) {
// Handle each completion as before
const type = completion?.type ?? '';
@ -789,7 +894,9 @@ class AnthropicClient extends BaseClient {
logger.debug(`[AnthropicClient] ${type}`, completion);
this[type] = completion;
}
if (completion?.delta?.text) {
if (completion?.delta?.thinking) {
handleReasoningChunk(completion.delta.thinking);
} else if (completion?.delta?.text) {
handleChunk(completion.delta.text);
} else if (completion.completion) {
handleChunk(completion.completion);
@ -808,6 +915,10 @@ class AnthropicClient extends BaseClient {
if (attempts < maxRetries) {
await delayBeforeRetry(attempts, 350);
} else if (this.streamHandler && this.streamHandler.reasoningTokens.length) {
return this.getStreamText();
} else if (intermediateReply.length > 0) {
return this.getStreamText(intermediateReply);
} else {
throw new Error(`Operation failed after ${maxRetries} attempts: ${error.message}`);
}
@ -823,8 +934,7 @@ class AnthropicClient extends BaseClient {
}
await processResponse.bind(this)();
return text.trim();
return this.getStreamText(intermediateReply);
}
getSaveOptions() {
@ -834,6 +944,8 @@ class AnthropicClient extends BaseClient {
promptPrefix: this.options.promptPrefix,
modelLabel: this.options.modelLabel,
promptCache: this.options.promptCache,
thinking: this.options.thinking,
thinkingBudget: this.options.thinkingBudget,
resendFiles: this.options.resendFiles,
iconURL: this.options.iconURL,
greeting: this.options.greeting,