🚀 feat: Claude 3.7 Support + Reasoning (#6008)

* fix: missing console color methods for admin scripts

* feat: Anthropic Claude 3.7 Sonnet Support

* feat: update eventsource to version 3.0.2 and upgrade @modelcontextprotocol/sdk to 1.4.1

* fix: update DynamicInput to handle number type and improve initial value logic

* feat: first pass Anthropic Reasoning (Claude 3.7)

* feat: implement streaming support in AnthropicClient with reasoning UI handling

* feat: add missing xAI (grok) models
This commit is contained in:
Danny Avila 2025-02-24 20:08:55 -05:00 committed by GitHub
parent 0e719592c6
commit 50e8769340
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 421 additions and 77 deletions

View file

@ -88,7 +88,7 @@ PROXY=
#============#
ANTHROPIC_API_KEY=user_provided
# ANTHROPIC_MODELS=claude-3-5-haiku-20241022,claude-3-5-sonnet-20241022,claude-3-5-sonnet-latest,claude-3-5-sonnet-20240620,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307,claude-2.1,claude-2,claude-1.2,claude-1,claude-1-100k,claude-instant-1,claude-instant-1-100k
# ANTHROPIC_MODELS=claude-3-7-sonnet-latest,claude-3-7-sonnet-20250219,claude-3-5-haiku-20241022,claude-3-5-sonnet-20241022,claude-3-5-sonnet-latest,claude-3-5-sonnet-20240620,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307,claude-2.1,claude-2,claude-1.2,claude-1,claude-1-100k,claude-instant-1,claude-instant-1-100k
# ANTHROPIC_REVERSE_PROXY=
#============#

View file

@ -7,6 +7,7 @@ const {
getResponseSender,
validateVisionModel,
} = require('librechat-data-provider');
const { SplitStreamHandler, GraphEvents } = require('@librechat/agents');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const {
truncateText,
@ -19,9 +20,9 @@ const {
const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
const Tokenizer = require('~/server/services/Tokenizer');
const { logger, sendEvent } = require('~/config');
const { sleep } = require('~/server/utils');
const BaseClient = require('./BaseClient');
const { logger } = require('~/config');
const HUMAN_PROMPT = '\n\nHuman:';
const AI_PROMPT = '\n\nAssistant:';
@ -68,6 +69,8 @@ class AnthropicClient extends BaseClient {
/** The key for the usage object's output tokens
* @type {string} */
this.outputTokensKey = 'output_tokens';
/** @type {SplitStreamHandler | undefined} */
this.streamHandler;
}
setOptions(options) {
@ -125,7 +128,7 @@ class AnthropicClient extends BaseClient {
this.options.endpointType ?? this.options.endpoint,
this.options.endpointTokenConfig,
) ??
1500;
anthropicSettings.maxOutputTokens.reset(this.modelOptions.model);
this.maxPromptTokens =
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
@ -179,6 +182,14 @@ class AnthropicClient extends BaseClient {
options.defaultHeaders = {
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
};
} else if (
this.supportsCacheControl &&
requestOptions?.model &&
requestOptions.model.includes('claude-3-7')
) {
options.defaultHeaders = {
'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
};
} else if (this.supportsCacheControl) {
options.defaultHeaders = {
'anthropic-beta': 'prompt-caching-2024-07-31',
@ -668,7 +679,7 @@ class AnthropicClient extends BaseClient {
* @returns {Promise<Anthropic.default.Message | Anthropic.default.Completion>} The response from the Anthropic client.
*/
async createResponse(client, options, useMessages) {
return useMessages ?? this.useMessages
return (useMessages ?? this.useMessages)
? await client.messages.create(options)
: await client.completions.create(options);
}
@ -683,6 +694,7 @@ class AnthropicClient extends BaseClient {
return false;
}
if (
modelMatch === 'claude-3-7-sonnet' ||
modelMatch === 'claude-3-5-sonnet' ||
modelMatch === 'claude-3-5-haiku' ||
modelMatch === 'claude-3-haiku' ||
@ -693,6 +705,35 @@ class AnthropicClient extends BaseClient {
return false;
}
getMessageMapMethod() {
/**
* @param {TMessage} msg
*/
return (msg) => {
if (msg.text != null && msg.text && msg.text.startsWith(':::thinking')) {
msg.text = msg.text.replace(/:::thinking.*?:::/gs, '').trim();
}
return msg;
};
}
/**
* @param {string[]} [intermediateReply]
* @returns {string}
*/
getStreamText(intermediateReply) {
if (!this.streamHandler) {
return intermediateReply?.join('') ?? '';
}
const reasoningText = this.streamHandler.reasoningTokens.join('');
const reasoningBlock = reasoningText.length > 0 ? `:::thinking\n${reasoningText}\n:::\n` : '';
return `${reasoningBlock}${this.streamHandler.tokens.join('')}`;
}
async sendCompletion(payload, { onProgress, abortController }) {
if (!abortController) {
abortController = new AbortController();
@ -710,7 +751,6 @@ class AnthropicClient extends BaseClient {
user_id: this.user,
};
let text = '';
const {
stream,
model,
@ -733,10 +773,46 @@ class AnthropicClient extends BaseClient {
if (this.useMessages) {
requestOptions.messages = payload;
requestOptions.max_tokens = maxOutputTokens || legacy.maxOutputTokens.default;
requestOptions.max_tokens =
maxOutputTokens || anthropicSettings.maxOutputTokens.reset(requestOptions.model);
} else {
requestOptions.prompt = payload;
requestOptions.max_tokens_to_sample = maxOutputTokens || 1500;
requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default;
}
if (
this.options.thinking &&
requestOptions?.model &&
requestOptions.model.includes('claude-3-7')
) {
requestOptions.thinking = {
type: 'enabled',
};
}
if (requestOptions.thinking != null && this.options.thinkingBudget != null) {
requestOptions.thinking = {
...requestOptions.thinking,
budget_tokens: this.options.thinkingBudget,
};
}
if (
requestOptions.thinking != null &&
(requestOptions.max_tokens == null ||
requestOptions.thinking.budget_tokens > requestOptions.max_tokens)
) {
const maxTokens = anthropicSettings.maxOutputTokens.reset(requestOptions.model);
requestOptions.max_tokens = requestOptions.max_tokens ?? maxTokens;
logger.warn(
requestOptions.max_tokens === maxTokens
? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
: `[AnthropicClient] thinking budget_tokens (${requestOptions.thinking.budget_tokens}) exceeds max_tokens (${requestOptions.max_tokens}). Adjusting budget_tokens.`,
);
requestOptions.thinking.budget_tokens = Math.min(
requestOptions.thinking.budget_tokens,
Math.floor(requestOptions.max_tokens * 0.9),
);
}
if (this.systemMessage && this.supportsCacheControl === true) {
@ -756,13 +832,17 @@ class AnthropicClient extends BaseClient {
}
logger.debug('[AnthropicClient]', { ...requestOptions });
this.streamHandler = new SplitStreamHandler({
accumulate: true,
runId: this.responseMessageId,
handlers: {
[GraphEvents.ON_RUN_STEP]: (event) => sendEvent(this.options.res, event),
[GraphEvents.ON_MESSAGE_DELTA]: (event) => sendEvent(this.options.res, event),
[GraphEvents.ON_REASONING_DELTA]: (event) => sendEvent(this.options.res, event),
},
});
const handleChunk = (currentChunk) => {
if (currentChunk) {
text += currentChunk;
onProgress(currentChunk);
}
};
let intermediateReply = this.streamHandler.tokens;
const maxRetries = 3;
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
@ -782,6 +862,31 @@ class AnthropicClient extends BaseClient {
}
});
/** @param {string} chunk */
const handleChunk = (chunk) => {
this.streamHandler.handle({
choices: [
{
delta: {
content: chunk,
},
},
],
});
};
/** @param {string} chunk */
const handleReasoningChunk = (chunk) => {
this.streamHandler.handle({
choices: [
{
delta: {
reasoning_content: chunk,
},
},
],
});
};
for await (const completion of response) {
// Handle each completion as before
const type = completion?.type ?? '';
@ -789,7 +894,9 @@ class AnthropicClient extends BaseClient {
logger.debug(`[AnthropicClient] ${type}`, completion);
this[type] = completion;
}
if (completion?.delta?.text) {
if (completion?.delta?.thinking) {
handleReasoningChunk(completion.delta.thinking);
} else if (completion?.delta?.text) {
handleChunk(completion.delta.text);
} else if (completion.completion) {
handleChunk(completion.completion);
@ -808,6 +915,10 @@ class AnthropicClient extends BaseClient {
if (attempts < maxRetries) {
await delayBeforeRetry(attempts, 350);
} else if (this.streamHandler && this.streamHandler.reasoningTokens.length) {
return this.getStreamText();
} else if (intermediateReply.length > 0) {
return this.getStreamText(intermediateReply);
} else {
throw new Error(`Operation failed after ${maxRetries} attempts: ${error.message}`);
}
@ -823,8 +934,7 @@ class AnthropicClient extends BaseClient {
}
await processResponse.bind(this)();
return text.trim();
return this.getStreamText(intermediateReply);
}
getSaveOptions() {
@ -834,6 +944,8 @@ class AnthropicClient extends BaseClient {
promptPrefix: this.options.promptPrefix,
modelLabel: this.options.modelLabel,
promptCache: this.options.promptCache,
thinking: this.options.thinking,
thinkingBudget: this.options.thinkingBudget,
resendFiles: this.options.resendFiles,
iconURL: this.options.iconURL,
greeting: this.options.greeting,

View file

@ -70,6 +70,12 @@ const conversationPreset = {
promptCache: {
type: Boolean,
},
thinking: {
type: Boolean,
},
thinkingBudget: {
type: Number,
},
system: {
type: String,
},

View file

@ -88,6 +88,8 @@ const tokenValues = Object.assign(
'claude-3-sonnet': { prompt: 3, completion: 15 },
'claude-3-5-sonnet': { prompt: 3, completion: 15 },
'claude-3.5-sonnet': { prompt: 3, completion: 15 },
'claude-3-7-sonnet': { prompt: 3, completion: 15 },
'claude-3.7-sonnet': { prompt: 3, completion: 15 },
'claude-3-5-haiku': { prompt: 0.8, completion: 4 },
'claude-3.5-haiku': { prompt: 0.8, completion: 4 },
'claude-3-haiku': { prompt: 0.25, completion: 1.25 },
@ -110,6 +112,14 @@ const tokenValues = Object.assign(
'gemini-1.5': { prompt: 2.5, completion: 10 },
'gemini-pro-vision': { prompt: 0.5, completion: 1.5 },
gemini: { prompt: 0.5, completion: 1.5 },
'grok-2-vision-1212': { prompt: 2.0, completion: 10.0 },
'grok-2-vision-latest': { prompt: 2.0, completion: 10.0 },
'grok-2-vision': { prompt: 2.0, completion: 10.0 },
'grok-vision-beta': { prompt: 5.0, completion: 15.0 },
'grok-2-1212': { prompt: 2.0, completion: 10.0 },
'grok-2-latest': { prompt: 2.0, completion: 10.0 },
'grok-2': { prompt: 2.0, completion: 10.0 },
'grok-beta': { prompt: 5.0, completion: 15.0 },
},
bedrockValues,
);
@ -121,6 +131,8 @@ const tokenValues = Object.assign(
* @type {Object.<string, {write: number, read: number }>}
*/
const cacheTokenValues = {
'claude-3.7-sonnet': { write: 3.75, read: 0.3 },
'claude-3-7-sonnet': { write: 3.75, read: 0.3 },
'claude-3.5-sonnet': { write: 3.75, read: 0.3 },
'claude-3-5-sonnet': { write: 3.75, read: 0.3 },
'claude-3.5-haiku': { write: 1, read: 0.08 },

View file

@ -80,6 +80,20 @@ describe('getValueKey', () => {
expect(getValueKey('chatgpt-4o-latest-0718')).toBe('gpt-4o');
});
it('should return "claude-3-7-sonnet" for model type of "claude-3-7-sonnet-"', () => {
expect(getValueKey('claude-3-7-sonnet-20240620')).toBe('claude-3-7-sonnet');
expect(getValueKey('anthropic/claude-3-7-sonnet')).toBe('claude-3-7-sonnet');
expect(getValueKey('claude-3-7-sonnet-turbo')).toBe('claude-3-7-sonnet');
expect(getValueKey('claude-3-7-sonnet-0125')).toBe('claude-3-7-sonnet');
});
it('should return "claude-3.7-sonnet" for model type of "claude-3.7-sonnet-"', () => {
expect(getValueKey('claude-3.7-sonnet-20240620')).toBe('claude-3.7-sonnet');
expect(getValueKey('anthropic/claude-3.7-sonnet')).toBe('claude-3.7-sonnet');
expect(getValueKey('claude-3.7-sonnet-turbo')).toBe('claude-3.7-sonnet');
expect(getValueKey('claude-3.7-sonnet-0125')).toBe('claude-3.7-sonnet');
});
it('should return "claude-3-5-sonnet" for model type of "claude-3-5-sonnet-"', () => {
expect(getValueKey('claude-3-5-sonnet-20240620')).toBe('claude-3-5-sonnet');
expect(getValueKey('anthropic/claude-3-5-sonnet')).toBe('claude-3-5-sonnet');
@ -458,3 +472,30 @@ describe('Google Model Tests', () => {
});
});
});
describe('Grok Model Tests - Pricing', () => {
describe('getMultiplier', () => {
test('should return correct prompt and completion rates for Grok vision models', () => {
const models = ['grok-2-vision-1212', 'grok-2-vision', 'grok-2-vision-latest'];
models.forEach((model) => {
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(2.0);
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(10.0);
});
});
test('should return correct prompt and completion rates for Grok text models', () => {
const models = ['grok-2-1212', 'grok-2', 'grok-2-latest'];
models.forEach((model) => {
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(2.0);
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(10.0);
});
});
test('should return correct prompt and completion rates for Grok beta models', () => {
expect(getMultiplier({ model: 'grok-vision-beta', tokenType: 'prompt' })).toBe(5.0);
expect(getMultiplier({ model: 'grok-vision-beta', tokenType: 'completion' })).toBe(15.0);
expect(getMultiplier({ model: 'grok-beta', tokenType: 'prompt' })).toBe(5.0);
expect(getMultiplier({ model: 'grok-beta', tokenType: 'completion' })).toBe(15.0);
});
});
});

View file

@ -34,7 +34,7 @@
},
"homepage": "https://librechat.ai",
"dependencies": {
"@anthropic-ai/sdk": "^0.32.1",
"@anthropic-ai/sdk": "^0.37.0",
"@azure/search-documents": "^12.0.0",
"@google/generative-ai": "^0.21.0",
"@googleapis/youtube": "^20.0.0",
@ -57,6 +57,7 @@
"cors": "^2.8.5",
"dedent": "^1.5.3",
"dotenv": "^16.0.3",
"eventsource": "^3.0.2",
"express": "^4.21.2",
"express-mongo-sanitize": "^2.2.0",
"express-rate-limit": "^7.4.1",

View file

@ -1,4 +1,4 @@
const { removeNullishValues } = require('librechat-data-provider');
const { removeNullishValues, anthropicSettings } = require('librechat-data-provider');
const generateArtifactsPrompt = require('~/app/clients/prompts/artifacts');
const buildOptions = (endpoint, parsedBody) => {
@ -6,8 +6,10 @@ const buildOptions = (endpoint, parsedBody) => {
modelLabel,
promptPrefix,
maxContextTokens,
resendFiles = true,
promptCache = true,
resendFiles = anthropicSettings.resendFiles.default,
promptCache = anthropicSettings.promptCache.default,
thinking = anthropicSettings.thinking.default,
thinkingBudget = anthropicSettings.thinkingBudget.default,
iconURL,
greeting,
spec,
@ -21,6 +23,8 @@ const buildOptions = (endpoint, parsedBody) => {
promptPrefix,
resendFiles,
promptCache,
thinking,
thinkingBudget,
iconURL,
greeting,
spec,

View file

@ -74,6 +74,7 @@ const anthropicModels = {
'claude-instant': 100000,
'claude-2': 100000,
'claude-2.1': 200000,
'claude-3': 200000,
'claude-3-haiku': 200000,
'claude-3-sonnet': 200000,
'claude-3-opus': 200000,
@ -81,6 +82,8 @@ const anthropicModels = {
'claude-3-5-haiku': 200000,
'claude-3-5-sonnet': 200000,
'claude-3.5-sonnet': 200000,
'claude-3-7-sonnet': 200000,
'claude-3.7-sonnet': 200000,
'claude-3-5-sonnet-latest': 200000,
'claude-3.5-sonnet-latest': 200000,
};
@ -183,7 +186,18 @@ const bedrockModels = {
...amazonModels,
};
const aggregateModels = { ...openAIModels, ...googleModels, ...bedrockModels };
const xAIModels = {
'grok-beta': 131072,
'grok-vision-beta': 8192,
'grok-2': 131072,
'grok-2-latest': 131072,
'grok-2-1212': 131072,
'grok-2-vision': 32768,
'grok-2-vision-latest': 32768,
'grok-2-vision-1212': 32768,
};
const aggregateModels = { ...openAIModels, ...googleModels, ...bedrockModels, ...xAIModels };
const maxTokensMap = {
[EModelEndpoint.azureOpenAI]: openAIModels,

View file

@ -116,6 +116,7 @@ describe('getModelMaxTokens', () => {
'claude-3-sonnet',
'claude-3-opus',
'claude-3-5-sonnet',
'claude-3-7-sonnet',
];
const maxTokens = {
@ -483,3 +484,68 @@ describe('Meta Models Tests', () => {
});
});
});
describe('Grok Model Tests - Tokens', () => {
describe('getModelMaxTokens', () => {
test('should return correct tokens for Grok vision models', () => {
expect(getModelMaxTokens('grok-2-vision-1212')).toBe(32768);
expect(getModelMaxTokens('grok-2-vision')).toBe(32768);
expect(getModelMaxTokens('grok-2-vision-latest')).toBe(32768);
});
test('should return correct tokens for Grok beta models', () => {
expect(getModelMaxTokens('grok-vision-beta')).toBe(8192);
expect(getModelMaxTokens('grok-beta')).toBe(131072);
});
test('should return correct tokens for Grok text models', () => {
expect(getModelMaxTokens('grok-2-1212')).toBe(131072);
expect(getModelMaxTokens('grok-2')).toBe(131072);
expect(getModelMaxTokens('grok-2-latest')).toBe(131072);
});
test('should handle partial matches for Grok models with prefixes', () => {
// Vision models should match before general models
expect(getModelMaxTokens('openai/grok-2-vision-1212')).toBe(32768);
expect(getModelMaxTokens('openai/grok-2-vision')).toBe(32768);
expect(getModelMaxTokens('openai/grok-2-vision-latest')).toBe(32768);
// Beta models
expect(getModelMaxTokens('openai/grok-vision-beta')).toBe(8192);
expect(getModelMaxTokens('openai/grok-beta')).toBe(131072);
// Text models
expect(getModelMaxTokens('openai/grok-2-1212')).toBe(131072);
expect(getModelMaxTokens('openai/grok-2')).toBe(131072);
expect(getModelMaxTokens('openai/grok-2-latest')).toBe(131072);
});
});
describe('matchModelName', () => {
test('should match exact Grok model names', () => {
// Vision models
expect(matchModelName('grok-2-vision-1212')).toBe('grok-2-vision-1212');
expect(matchModelName('grok-2-vision')).toBe('grok-2-vision');
expect(matchModelName('grok-2-vision-latest')).toBe('grok-2-vision-latest');
// Beta models
expect(matchModelName('grok-vision-beta')).toBe('grok-vision-beta');
expect(matchModelName('grok-beta')).toBe('grok-beta');
// Text models
expect(matchModelName('grok-2-1212')).toBe('grok-2-1212');
expect(matchModelName('grok-2')).toBe('grok-2');
expect(matchModelName('grok-2-latest')).toBe('grok-2-latest');
});
test('should match Grok model variations with prefixes', () => {
// Vision models should match before general models
expect(matchModelName('openai/grok-2-vision-1212')).toBe('grok-2-vision-1212');
expect(matchModelName('openai/grok-2-vision')).toBe('grok-2-vision');
expect(matchModelName('openai/grok-2-vision-latest')).toBe('grok-2-vision-latest');
// Beta models
expect(matchModelName('openai/grok-vision-beta')).toBe('grok-vision-beta');
expect(matchModelName('openai/grok-beta')).toBe('grok-beta');
// Text models
expect(matchModelName('openai/grok-2-1212')).toBe('grok-2-1212');
expect(matchModelName('openai/grok-2')).toBe('grok-2');
expect(matchModelName('openai/grok-2-latest')).toBe('grok-2-latest');
});
});
});

View file

@ -27,12 +27,9 @@ function DynamicInput({
const localize = useLocalize();
const { preset } = useChatContext();
const [setInputValue, inputValue, setLocalValue] = useDebouncedInput<string | null>({
const [setInputValue, inputValue, setLocalValue] = useDebouncedInput<string | number>({
optionKey: optionType !== OptionTypes.Custom ? settingKey : undefined,
initialValue:
optionType !== OptionTypes.Custom
? (conversation?.[settingKey] as string)
: (defaultValue as string),
initialValue: optionType !== OptionTypes.Custom ? conversation?.[settingKey] : defaultValue,
setter: () => ({}),
setOption,
});
@ -88,9 +85,13 @@ function DynamicInput({
<Input
id={`${settingKey}-dynamic-input`}
disabled={readonly}
value={inputValue ?? ''}
value={inputValue ?? defaultValue ?? ''}
onChange={handleInputChange}
placeholder={placeholderCode ? localize(placeholder as TranslationKeys) || placeholder : placeholder}
placeholder={
placeholderCode
? localize(placeholder as TranslationKeys) || placeholder
: placeholder
}
className={cn(
'flex h-10 max-h-10 w-full resize-none border-none bg-surface-secondary px-3 py-2',
)}
@ -98,7 +99,11 @@ function DynamicInput({
</HoverCardTrigger>
{description && (
<OptionHover
description={descriptionCode ? localize(description as TranslationKeys) || description : description}
description={
descriptionCode
? localize(description as TranslationKeys) || description
: description
}
side={ESide.Left}
/>
)}

View file

@ -278,12 +278,42 @@ const anthropic: Record<string, SettingDefinition> = {
description: 'com_endpoint_anthropic_prompt_cache',
descriptionCode: true,
type: 'boolean',
default: true,
default: anthropicSettings.promptCache.default,
component: 'switch',
optionType: 'conversation',
showDefault: false,
columnSpan: 2,
},
thinking: {
key: 'thinking',
label: 'com_endpoint_thinking',
labelCode: true,
description: 'com_endpoint_anthropic_thinking',
descriptionCode: true,
type: 'boolean',
default: anthropicSettings.thinking.default,
component: 'switch',
optionType: 'conversation',
showDefault: false,
columnSpan: 2,
},
thinkingBudget: {
key: 'thinkingBudget',
label: 'com_endpoint_thinking_budget',
labelCode: true,
description: 'com_endpoint_anthropic_thinking_budget',
descriptionCode: true,
type: 'number',
component: 'input',
default: anthropicSettings.thinkingBudget.default,
range: {
min: anthropicSettings.thinkingBudget.min,
max: anthropicSettings.thinkingBudget.max,
step: anthropicSettings.thinkingBudget.step,
},
optionType: 'conversation',
columnSpan: 2,
},
};
const bedrock: Record<string, SettingDefinition> = {
@ -492,6 +522,8 @@ const anthropicConfig: SettingsConfiguration = [
anthropic.topK,
librechat.resendFiles,
anthropic.promptCache,
anthropic.thinking,
anthropic.thinkingBudget,
];
const anthropicCol1: SettingsConfiguration = [
@ -508,6 +540,8 @@ const anthropicCol2: SettingsConfiguration = [
anthropic.topK,
librechat.resendFiles,
anthropic.promptCache,
anthropic.thinking,
anthropic.thinkingBudget,
];
const bedrockAnthropic: SettingsConfiguration = [

View file

@ -140,6 +140,10 @@
"com_endpoint_ai": "AI",
"com_endpoint_anthropic_maxoutputtokens": "Maximum number of tokens that can be generated in the response. Specify a lower value for shorter responses and a higher value for longer responses. Note: models may stop before reaching this maximum.",
"com_endpoint_anthropic_prompt_cache": "Prompt caching allows reusing large context or instructions across API calls, reducing costs and latency",
"com_endpoint_thinking": "Thinking",
"com_endpoint_thinking_budget": "Thinking Budget",
"com_endpoint_anthropic_thinking": "Enables internal reasoning for supported Claude models (3.7 Sonnet). Note: requires \"Thinking Budget\" to be set and lower than \"Max Output Tokens\"",
"com_endpoint_anthropic_thinking_budget": "Determines the max number of tokens Claude is allowed use for its internal reasoning process. Larger budgets can improve response quality by enabling more thorough analysis for complex problems, although Claude may not use the entire budget allocated, especially at ranges above 32K. This setting must be lower than \"Max Output Tokens.\"",
"com_endpoint_anthropic_temp": "Ranges from 0 to 1. Use temp closer to 0 for analytical / multiple choice, and closer to 1 for creative and generative tasks. We recommend altering this or Top P but not both.",
"com_endpoint_anthropic_topk": "Top-k changes how the model selects tokens for output. A top-k of 1 means the selected token is the most probable among all tokens in the model's vocabulary (also called greedy decoding), while a top-k of 3 means that the next token is selected from among the 3 most probable tokens (using temperature).",
"com_endpoint_anthropic_topp": "Top-p changes how the model selects tokens for output. Tokens are selected from most K (see topK parameter) probable to least until the sum of their probabilities equals the top-p value.",
@ -829,4 +833,4 @@
"com_ui_zoom": "Zoom",
"com_user_message": "You",
"com_warning_resubmit_unsupported": "Resubmitting the AI message is not supported for this endpoint."
}
}

View file

@ -1,5 +1,4 @@
const path = require('path');
require('module-alias/register');
const moduleAlias = require('module-alias');
@ -7,6 +6,7 @@ const basePath = path.resolve(__dirname, '..', 'api');
moduleAlias.addAlias('~', basePath);
const connectDb = require('~/lib/db/connectDb');
require('./helpers');
async function connect() {
/**

110
package-lock.json generated
View file

@ -50,7 +50,7 @@
"version": "v0.7.7-rc1",
"license": "ISC",
"dependencies": {
"@anthropic-ai/sdk": "^0.32.1",
"@anthropic-ai/sdk": "^0.37.0",
"@azure/search-documents": "^12.0.0",
"@google/generative-ai": "^0.21.0",
"@googleapis/youtube": "^20.0.0",
@ -73,6 +73,7 @@
"cors": "^2.8.5",
"dedent": "^1.5.3",
"dotenv": "^16.0.3",
"eventsource": "^3.0.2",
"express": "^4.21.2",
"express-mongo-sanitize": "^2.2.0",
"express-rate-limit": "^7.4.1",
@ -130,6 +131,20 @@
"supertest": "^7.0.0"
}
},
"api/node_modules/@anthropic-ai/sdk": {
"version": "0.37.0",
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.37.0.tgz",
"integrity": "sha512-tHjX2YbkUBwEgg0JZU3EFSSAQPoK4qQR/NFYa8Vtzd5UAyXzZksCw2In69Rml4R/TyHPBfRYaLK35XiOe33pjw==",
"dependencies": {
"@types/node": "^18.11.18",
"@types/node-fetch": "^2.6.4",
"abort-controller": "^3.0.0",
"agentkeepalive": "^4.2.1",
"form-data-encoder": "1.7.2",
"formdata-node": "^4.3.2",
"node-fetch": "^2.6.7"
}
},
"api/node_modules/@langchain/community": {
"version": "0.3.14",
"resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.3.14.tgz",
@ -13718,41 +13733,6 @@
"node-fetch": "^2.6.7"
}
},
"node_modules/@modelcontextprotocol/sdk": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.0.3.tgz",
"integrity": "sha512-2as3cX/VJ0YBHGmdv3GFyTpoM8q2gqE98zh3Vf1NwnsSY0h3mvoO07MUzfygCKkWsFjcZm4otIiqD6Xh7kiSBQ==",
"dependencies": {
"content-type": "^1.0.5",
"raw-body": "^3.0.0",
"zod": "^3.23.8"
}
},
"node_modules/@modelcontextprotocol/sdk/node_modules/iconv-lite": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
"integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
"dependencies": {
"safer-buffer": ">= 2.1.2 < 3.0.0"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/@modelcontextprotocol/sdk/node_modules/raw-body": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.0.tgz",
"integrity": "sha512-RmkhL8CAyCRPXCE28MMH0z2PNWQBNk2Q09ZdxM9IOOXwxwZbN+qbWaatPkdkWIKL2ZVDImrN/pK5HTRz2PcS4g==",
"dependencies": {
"bytes": "3.1.2",
"http-errors": "2.0.0",
"iconv-lite": "0.6.3",
"unpipe": "1.0.0"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/@mongodb-js/saslprep": {
"version": "1.1.9",
"resolved": "https://registry.npmjs.org/@mongodb-js/saslprep/-/saslprep-1.1.9.tgz",
@ -22807,9 +22787,9 @@
}
},
"node_modules/eventsource": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.1.tgz",
"integrity": "sha512-tyGtsrTc9fi+N5qFU6G2MLjcBbsdCOQ/QE9Cc96Mt6q02YkQrIJGOaNMg6qiXRJDzxecN7BntJYNRE/j0OIhMQ==",
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.2.tgz",
"integrity": "sha512-YolzkJNxsTL3tCJMWFxpxtG2sCjbZ4LQUBUrkdaJK0ub0p6lmJt+2+1SwhKjLc652lpH9L/79Ptez972H9tphw==",
"dependencies": {
"eventsource-parser": "^3.0.0"
},
@ -39025,9 +39005,9 @@
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"@modelcontextprotocol/sdk": "^1.0.3",
"@modelcontextprotocol/sdk": "^1.4.1",
"diff": "^7.0.0",
"eventsource": "^3.0.1",
"eventsource": "^3.0.2",
"express": "^4.21.2"
},
"devDependencies": {
@ -39061,6 +39041,21 @@
"keyv": "^4.5.4"
}
},
"packages/mcp/node_modules/@modelcontextprotocol/sdk": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.4.1.tgz",
"integrity": "sha512-wS6YC4lkUZ9QpP+/7NBTlVNiEvsnyl0xF7rRusLF+RsG0xDPc/zWR7fEEyhKnnNutGsDAZh59l/AeoWGwIb1+g==",
"dependencies": {
"content-type": "^1.0.5",
"eventsource": "^3.0.2",
"raw-body": "^3.0.0",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.24.1"
},
"engines": {
"node": ">=18"
}
},
"packages/mcp/node_modules/brace-expansion": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
@ -39090,6 +39085,17 @@
"url": "https://github.com/sponsors/isaacs"
}
},
"packages/mcp/node_modules/iconv-lite": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
"integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
"dependencies": {
"safer-buffer": ">= 2.1.2 < 3.0.0"
},
"engines": {
"node": ">=0.10.0"
}
},
"packages/mcp/node_modules/jackspeak": {
"version": "3.4.3",
"resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz",
@ -39120,6 +39126,20 @@
"url": "https://github.com/sponsors/isaacs"
}
},
"packages/mcp/node_modules/raw-body": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.0.tgz",
"integrity": "sha512-RmkhL8CAyCRPXCE28MMH0z2PNWQBNk2Q09ZdxM9IOOXwxwZbN+qbWaatPkdkWIKL2ZVDImrN/pK5HTRz2PcS4g==",
"dependencies": {
"bytes": "3.1.2",
"http-errors": "2.0.0",
"iconv-lite": "0.6.3",
"unpipe": "1.0.0"
},
"engines": {
"node": ">= 0.8"
}
},
"packages/mcp/node_modules/rimraf": {
"version": "5.0.10",
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.10.tgz",
@ -39134,6 +39154,14 @@
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"packages/mcp/node_modules/zod-to-json-schema": {
"version": "3.24.3",
"resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.24.3.tgz",
"integrity": "sha512-HIAfWdYIt1sssHfYZFCXp4rU1w2r8hVVXYIlmoa0r0gABLs5di3RCqPU5DDROogVz1pAdYBaz7HK5n9pSUNs3A==",
"peerDependencies": {
"zod": "^3.24.1"
}
}
}
}

View file

@ -648,6 +648,8 @@ const sharedOpenAIModels = [
];
const sharedAnthropicModels = [
'claude-3-7-sonnet-latest',
'claude-3-7-sonnet-20250219',
'claude-3-5-haiku-20241022',
'claude-3-5-sonnet-20241022',
'claude-3-5-sonnet-20240620',

View file

@ -256,7 +256,7 @@ const ANTHROPIC_MAX_OUTPUT = 8192;
const LEGACY_ANTHROPIC_MAX_OUTPUT = 4096;
export const anthropicSettings = {
model: {
default: 'claude-3-5-sonnet-20241022',
default: 'claude-3-5-sonnet-latest',
},
temperature: {
min: 0,
@ -267,13 +267,22 @@ export const anthropicSettings = {
promptCache: {
default: true,
},
thinking: {
default: true,
},
thinkingBudget: {
min: 1024,
step: 100,
max: 200000,
default: 2000,
},
maxOutputTokens: {
min: 1,
max: ANTHROPIC_MAX_OUTPUT,
step: 1,
default: ANTHROPIC_MAX_OUTPUT,
reset: (modelName: string) => {
if (modelName.includes('claude-3-5-sonnet')) {
if (modelName.includes('claude-3-5-sonnet') || modelName.includes('claude-3-7-sonnet')) {
return ANTHROPIC_MAX_OUTPUT;
}
@ -556,6 +565,8 @@ export const tConversationSchema = z.object({
/* Anthropic */
promptCache: z.boolean().optional(),
system: z.string().optional(),
thinking: z.boolean().optional(),
thinkingBudget: coerceNumber.optional(),
/* artifacts */
artifacts: z.string().optional(),
/* google */
@ -672,6 +683,8 @@ export const tQueryParamsSchema = tConversationSchema
maxOutputTokens: true,
/** @endpoints anthropic */
promptCache: true,
thinking: true,
thinkingBudget: true,
/** @endpoints bedrock */
region: true,
/** @endpoints bedrock */
@ -1067,6 +1080,8 @@ export const anthropicSchema = tConversationSchema
topK: true,
resendFiles: true,
promptCache: true,
thinking: true,
thinkingBudget: true,
artifacts: true,
iconURL: true,
greeting: true,

View file

@ -69,9 +69,9 @@
"registry": "https://registry.npmjs.org/"
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.0.3",
"@modelcontextprotocol/sdk": "^1.4.1",
"diff": "^7.0.0",
"eventsource": "^3.0.1",
"eventsource": "^3.0.2",
"express": "^4.21.2"
},
"peerDependencies": {