diff --git a/api/app/clients/specs/AnthropicClient.test.js b/api/app/clients/specs/AnthropicClient.test.js index fbcd2b75e..35477005f 100644 --- a/api/app/clients/specs/AnthropicClient.test.js +++ b/api/app/clients/specs/AnthropicClient.test.js @@ -245,7 +245,7 @@ describe('AnthropicClient', () => { }); describe('Claude 4 model headers', () => { - it('should add "prompt-caching" beta header for claude-sonnet-4 model', () => { + it('should add "prompt-caching" and "context-1m" beta headers for claude-sonnet-4 model', () => { const client = new AnthropicClient('test-api-key'); const modelOptions = { model: 'claude-sonnet-4-20250514', @@ -255,10 +255,30 @@ describe('AnthropicClient', () => { expect(anthropicClient._options.defaultHeaders).toBeDefined(); expect(anthropicClient._options.defaultHeaders).toHaveProperty('anthropic-beta'); expect(anthropicClient._options.defaultHeaders['anthropic-beta']).toBe( - 'prompt-caching-2024-07-31', + 'prompt-caching-2024-07-31,context-1m-2025-08-07', ); }); + it('should add "prompt-caching" and "context-1m" beta headers for claude-sonnet-4 model formats', () => { + const client = new AnthropicClient('test-api-key'); + const modelVariations = [ + 'claude-sonnet-4-20250514', + 'claude-sonnet-4-latest', + 'anthropic/claude-sonnet-4-20250514', + ]; + + modelVariations.forEach((model) => { + const modelOptions = { model }; + client.setOptions({ modelOptions, promptCache: true }); + const anthropicClient = client.getClient(modelOptions); + expect(anthropicClient._options.defaultHeaders).toBeDefined(); + expect(anthropicClient._options.defaultHeaders).toHaveProperty('anthropic-beta'); + expect(anthropicClient._options.defaultHeaders['anthropic-beta']).toBe( + 'prompt-caching-2024-07-31,context-1m-2025-08-07', + ); + }); + }); + it('should add "prompt-caching" beta header for claude-opus-4 model', () => { const client = new AnthropicClient('test-api-key'); const modelOptions = { @@ -273,20 +293,6 @@ describe('AnthropicClient', () => { ); }); - it('should add "prompt-caching" beta header for claude-4-sonnet model', () => { - const client = new AnthropicClient('test-api-key'); - const modelOptions = { - model: 'claude-4-sonnet-20250514', - }; - client.setOptions({ modelOptions, promptCache: true }); - const anthropicClient = client.getClient(modelOptions); - expect(anthropicClient._options.defaultHeaders).toBeDefined(); - expect(anthropicClient._options.defaultHeaders).toHaveProperty('anthropic-beta'); - expect(anthropicClient._options.defaultHeaders['anthropic-beta']).toBe( - 'prompt-caching-2024-07-31', - ); - }); - it('should add "prompt-caching" beta header for claude-4-opus model', () => { const client = new AnthropicClient('test-api-key'); const modelOptions = { diff --git a/api/server/services/Endpoints/anthropic/helpers.js b/api/server/services/Endpoints/anthropic/helpers.js index 60040ed98..e47e5abb4 100644 --- a/api/server/services/Endpoints/anthropic/helpers.js +++ b/api/server/services/Endpoints/anthropic/helpers.js @@ -45,6 +45,10 @@ function getClaudeHeaders(model, supportsCacheControl) { 'anthropic-beta': 'token-efficient-tools-2025-02-19,output-128k-2025-02-19,prompt-caching-2024-07-31', }; + } else if (/claude-sonnet-4/.test(model)) { + return { + 'anthropic-beta': 'prompt-caching-2024-07-31,context-1m-2025-08-07', + }; } else if ( /claude-(?:sonnet|opus|haiku)-[4-9]/.test(model) || /claude-[4-9]-(?:sonnet|opus|haiku)?/.test(model) || diff --git a/api/server/services/Endpoints/anthropic/llm.spec.js b/api/server/services/Endpoints/anthropic/llm.spec.js index cd29975e0..a1cdc160c 100644 --- a/api/server/services/Endpoints/anthropic/llm.spec.js +++ b/api/server/services/Endpoints/anthropic/llm.spec.js @@ -1,50 +1,19 @@ -const { anthropicSettings, removeNullishValues } = require('librechat-data-provider'); const { getLLMConfig } = require('~/server/services/Endpoints/anthropic/llm'); -const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = require('./helpers'); jest.mock('https-proxy-agent', () => ({ HttpsProxyAgent: jest.fn().mockImplementation((proxy) => ({ proxy })), })); -jest.mock('./helpers', () => ({ - checkPromptCacheSupport: jest.fn(), - getClaudeHeaders: jest.fn(), - configureReasoning: jest.fn((requestOptions) => requestOptions), -})); - -jest.mock('librechat-data-provider', () => ({ - anthropicSettings: { - model: { default: 'claude-3-opus-20240229' }, - maxOutputTokens: { default: 4096, reset: jest.fn(() => 4096) }, - thinking: { default: false }, - promptCache: { default: false }, - thinkingBudget: { default: null }, - }, - removeNullishValues: jest.fn((obj) => { - const result = {}; - for (const key in obj) { - if (obj[key] !== null && obj[key] !== undefined) { - result[key] = obj[key]; - } - } - return result; - }), -})); - describe('getLLMConfig', () => { beforeEach(() => { jest.clearAllMocks(); - checkPromptCacheSupport.mockReturnValue(false); - getClaudeHeaders.mockReturnValue(undefined); - configureReasoning.mockImplementation((requestOptions) => requestOptions); - anthropicSettings.maxOutputTokens.reset.mockReturnValue(4096); }); it('should create a basic configuration with default values', () => { const result = getLLMConfig('test-api-key', { modelOptions: {} }); expect(result.llmConfig).toHaveProperty('apiKey', 'test-api-key'); - expect(result.llmConfig).toHaveProperty('model', anthropicSettings.model.default); + expect(result.llmConfig).toHaveProperty('model', 'claude-3-5-sonnet-latest'); expect(result.llmConfig).toHaveProperty('stream', true); expect(result.llmConfig).toHaveProperty('maxTokens'); }); @@ -99,40 +68,73 @@ describe('getLLMConfig', () => { expect(result.llmConfig).toHaveProperty('topP', 0.9); }); - it('should NOT include topK and topP for Claude-3-7 models (hyphen notation)', () => { - configureReasoning.mockImplementation((requestOptions) => { - requestOptions.thinking = { type: 'enabled' }; - return requestOptions; - }); - + it('should NOT include topK and topP for Claude-3-7 models with thinking enabled (hyphen notation)', () => { const result = getLLMConfig('test-api-key', { modelOptions: { model: 'claude-3-7-sonnet', topK: 10, topP: 0.9, + thinking: true, }, }); expect(result.llmConfig).not.toHaveProperty('topK'); expect(result.llmConfig).not.toHaveProperty('topP'); + expect(result.llmConfig).toHaveProperty('thinking'); + expect(result.llmConfig.thinking).toHaveProperty('type', 'enabled'); + // When thinking is enabled, it uses the default thinkingBudget of 2000 + expect(result.llmConfig.thinking).toHaveProperty('budget_tokens', 2000); }); - it('should NOT include topK and topP for Claude-3.7 models (decimal notation)', () => { - configureReasoning.mockImplementation((requestOptions) => { - requestOptions.thinking = { type: 'enabled' }; - return requestOptions; - }); + it('should add "prompt-caching" and "context-1m" beta headers for claude-sonnet-4 model', () => { + const modelOptions = { + model: 'claude-sonnet-4-20250514', + promptCache: true, + }; + const result = getLLMConfig('test-key', { modelOptions }); + const clientOptions = result.llmConfig.clientOptions; + expect(clientOptions.defaultHeaders).toBeDefined(); + expect(clientOptions.defaultHeaders).toHaveProperty('anthropic-beta'); + expect(clientOptions.defaultHeaders['anthropic-beta']).toBe( + 'prompt-caching-2024-07-31,context-1m-2025-08-07', + ); + }); + it('should add "prompt-caching" and "context-1m" beta headers for claude-sonnet-4 model formats', () => { + const modelVariations = [ + 'claude-sonnet-4-20250514', + 'claude-sonnet-4-latest', + 'anthropic/claude-sonnet-4-20250514', + ]; + + modelVariations.forEach((model) => { + const modelOptions = { model, promptCache: true }; + const result = getLLMConfig('test-key', { modelOptions }); + const clientOptions = result.llmConfig.clientOptions; + expect(clientOptions.defaultHeaders).toBeDefined(); + expect(clientOptions.defaultHeaders).toHaveProperty('anthropic-beta'); + expect(clientOptions.defaultHeaders['anthropic-beta']).toBe( + 'prompt-caching-2024-07-31,context-1m-2025-08-07', + ); + }); + }); + + it('should NOT include topK and topP for Claude-3.7 models with thinking enabled (decimal notation)', () => { const result = getLLMConfig('test-api-key', { modelOptions: { model: 'claude-3.7-sonnet', topK: 10, topP: 0.9, + thinking: true, }, }); expect(result.llmConfig).not.toHaveProperty('topK'); expect(result.llmConfig).not.toHaveProperty('topP'); + expect(result.llmConfig).toHaveProperty('thinking'); + expect(result.llmConfig.thinking).toHaveProperty('type', 'enabled'); + // When thinking is enabled, it uses the default thinkingBudget of 2000 + expect(result.llmConfig.thinking).toHaveProperty('budget_tokens', 2000); }); it('should handle custom maxOutputTokens', () => { @@ -233,7 +235,6 @@ describe('getLLMConfig', () => { }); it('should handle maxOutputTokens when explicitly set to falsy value', () => { - anthropicSettings.maxOutputTokens.reset.mockReturnValue(8192); const result = getLLMConfig('test-api-key', { modelOptions: { model: 'claude-3-opus', @@ -241,8 +242,8 @@ describe('getLLMConfig', () => { }, }); - expect(anthropicSettings.maxOutputTokens.reset).toHaveBeenCalledWith('claude-3-opus'); - expect(result.llmConfig).toHaveProperty('maxTokens', 8192); + // The actual anthropicSettings.maxOutputTokens.reset('claude-3-opus') returns 4096 + expect(result.llmConfig).toHaveProperty('maxTokens', 4096); }); it('should handle both proxy and reverseProxyUrl', () => { @@ -263,9 +264,6 @@ describe('getLLMConfig', () => { }); it('should handle prompt cache with supported model', () => { - checkPromptCacheSupport.mockReturnValue(true); - getClaudeHeaders.mockReturnValue({ 'anthropic-beta': 'prompt-caching-2024-07-31' }); - const result = getLLMConfig('test-api-key', { modelOptions: { model: 'claude-3-5-sonnet', @@ -273,43 +271,38 @@ describe('getLLMConfig', () => { }, }); - expect(checkPromptCacheSupport).toHaveBeenCalledWith('claude-3-5-sonnet'); - expect(getClaudeHeaders).toHaveBeenCalledWith('claude-3-5-sonnet', true); + // claude-3-5-sonnet supports prompt caching and should get the appropriate headers expect(result.llmConfig.clientOptions.defaultHeaders).toEqual({ - 'anthropic-beta': 'prompt-caching-2024-07-31', + 'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31', }); }); it('should handle thinking and thinkingBudget options', () => { - configureReasoning.mockImplementation((requestOptions, systemOptions) => { - if (systemOptions.thinking) { - requestOptions.thinking = { type: 'enabled' }; - } - if (systemOptions.thinkingBudget) { - requestOptions.thinking = { - ...requestOptions.thinking, - budget_tokens: systemOptions.thinkingBudget, - }; - } - return requestOptions; - }); - - getLLMConfig('test-api-key', { + const result = getLLMConfig('test-api-key', { modelOptions: { model: 'claude-3-7-sonnet', thinking: true, - thinkingBudget: 5000, + thinkingBudget: 10000, // This exceeds the default max_tokens of 8192 }, }); - expect(configureReasoning).toHaveBeenCalledWith( - expect.any(Object), - expect.objectContaining({ + // The function should add thinking configuration for claude-3-7 models + expect(result.llmConfig).toHaveProperty('thinking'); + expect(result.llmConfig.thinking).toHaveProperty('type', 'enabled'); + // With claude-3-7-sonnet, the max_tokens default is 8192 + // Budget tokens gets adjusted to 90% of max_tokens (8192 * 0.9 = 7372) when it exceeds max_tokens + expect(result.llmConfig.thinking).toHaveProperty('budget_tokens', 7372); + + // Test with budget_tokens within max_tokens limit + const result2 = getLLMConfig('test-api-key', { + modelOptions: { + model: 'claude-3-7-sonnet', thinking: true, - promptCache: false, - thinkingBudget: 5000, - }), - ); + thinkingBudget: 2000, + }, + }); + + expect(result2.llmConfig.thinking).toHaveProperty('budget_tokens', 2000); }); it('should remove system options from modelOptions', () => { @@ -330,16 +323,6 @@ describe('getLLMConfig', () => { }); it('should handle all nullish values removal', () => { - removeNullishValues.mockImplementation((obj) => { - const cleaned = {}; - Object.entries(obj).forEach(([key, value]) => { - if (value !== null && value !== undefined) { - cleaned[key] = value; - } - }); - return cleaned; - }); - const result = getLLMConfig('test-api-key', { modelOptions: { temperature: null, diff --git a/api/utils/tokens.js b/api/utils/tokens.js index 0785dda01..c94c0ccdf 100644 --- a/api/utils/tokens.js +++ b/api/utils/tokens.js @@ -108,7 +108,7 @@ const anthropicModels = { 'claude-3.7-sonnet': 200000, 'claude-3-5-sonnet-latest': 200000, 'claude-3.5-sonnet-latest': 200000, - 'claude-sonnet-4': 200000, + 'claude-sonnet-4': 1000000, 'claude-opus-4': 200000, 'claude-4': 200000, }; @@ -487,5 +487,7 @@ module.exports = { matchModelName, processModelData, getModelMaxTokens, + getModelTokenValue, + findMatchingPattern, getModelMaxOutputTokens, }; diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index cc09bab31..6d09b012b 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -1,6 +1,7 @@ const { EModelEndpoint } = require('librechat-data-provider'); const { maxOutputTokensMap, + findMatchingPattern, getModelMaxTokens, processModelData, matchModelName, @@ -749,8 +750,12 @@ describe('Grok Model Tests - Tokens', () => { describe('Claude Model Tests', () => { it('should return correct context length for Claude 4 models', () => { - expect(getModelMaxTokens('claude-sonnet-4')).toBe(200000); - expect(getModelMaxTokens('claude-opus-4')).toBe(200000); + expect(getModelMaxTokens('claude-sonnet-4')).toBe( + maxTokensMap[EModelEndpoint.anthropic]['claude-sonnet-4'], + ); + expect(getModelMaxTokens('claude-opus-4')).toBe( + maxTokensMap[EModelEndpoint.anthropic]['claude-opus-4'], + ); }); it('should handle Claude 4 model name variations with different prefixes and suffixes', () => { @@ -772,7 +777,8 @@ describe('Claude Model Tests', () => { ]; modelVariations.forEach((model) => { - expect(getModelMaxTokens(model)).toBe(200000); + const modelKey = findMatchingPattern(model, maxTokensMap[EModelEndpoint.anthropic]); + expect(getModelMaxTokens(model)).toBe(maxTokensMap[EModelEndpoint.anthropic][modelKey]); }); });