diff --git a/packages/api/src/endpoints/anthropic/llm.spec.ts b/packages/api/src/endpoints/anthropic/llm.spec.ts index 447c10a073..a203f50533 100644 --- a/packages/api/src/endpoints/anthropic/llm.spec.ts +++ b/packages/api/src/endpoints/anthropic/llm.spec.ts @@ -245,8 +245,8 @@ describe('getLLMConfig', () => { }, }); - // The actual anthropicSettings.maxOutputTokens.reset('claude-3-opus') returns 4096 - expect(result.llmConfig).toHaveProperty('maxTokens', 4096); + // The actual anthropicSettings.maxOutputTokens.reset('claude-3-opus') returns 8192 + expect(result.llmConfig).toHaveProperty('maxTokens', 8192); }); it('should handle both proxy and reverseProxyUrl', () => { @@ -698,9 +698,17 @@ describe('getLLMConfig', () => { { model: 'claude-3.5-sonnet-20241022', expectedMaxTokens: 8192 }, { model: 'claude-3-7-sonnet', expectedMaxTokens: 8192 }, { model: 'claude-3.7-sonnet-20250109', expectedMaxTokens: 8192 }, - { model: 'claude-3-opus', expectedMaxTokens: 4096 }, - { model: 'claude-3-haiku', expectedMaxTokens: 4096 }, - { model: 'claude-2.1', expectedMaxTokens: 4096 }, + { model: 'claude-3-opus', expectedMaxTokens: 8192 }, + { model: 'claude-3-haiku', expectedMaxTokens: 8192 }, + { model: 'claude-2.1', expectedMaxTokens: 8192 }, + { model: 'claude-sonnet-4-5', expectedMaxTokens: 64000 }, + { model: 'claude-sonnet-4-5-20250929', expectedMaxTokens: 64000 }, + { model: 'claude-haiku-4-5', expectedMaxTokens: 64000 }, + { model: 'claude-haiku-4-5-20251001', expectedMaxTokens: 64000 }, + { model: 'claude-opus-4-1', expectedMaxTokens: 32000 }, + { model: 'claude-opus-4-1-20250805', expectedMaxTokens: 32000 }, + { model: 'claude-sonnet-4-20250514', expectedMaxTokens: 64000 }, + { model: 'claude-opus-4-0', expectedMaxTokens: 32000 }, ]; testCases.forEach(({ model, expectedMaxTokens }) => { @@ -729,6 +737,222 @@ describe('getLLMConfig', () => { }); }); + describe('Claude 4.x Model maxOutputTokens Defaults', () => { + it('should default Claude Sonnet 4.x models to 64K tokens', () => { + const testCases = ['claude-sonnet-4-5', 'claude-sonnet-4-5-20250929', 'claude-sonnet-4.5']; + + testCases.forEach((model) => { + const result = getLLMConfig('test-key', { + modelOptions: { model }, + }); + expect(result.llmConfig.maxTokens).toBe(64000); + }); + }); + + it('should default Claude Haiku 4.x models to 64K tokens', () => { + const testCases = ['claude-haiku-4-5', 'claude-haiku-4-5-20251001', 'claude-haiku-4.5']; + + testCases.forEach((model) => { + const result = getLLMConfig('test-key', { + modelOptions: { model }, + }); + expect(result.llmConfig.maxTokens).toBe(64000); + }); + }); + + it('should default Claude Opus 4.x models to 32K tokens', () => { + const testCases = ['claude-opus-4-1', 'claude-opus-4-1-20250805', 'claude-opus-4.1']; + + testCases.forEach((model) => { + const result = getLLMConfig('test-key', { + modelOptions: { model }, + }); + expect(result.llmConfig.maxTokens).toBe(32000); + }); + }); + + it('should default future Claude 4.x Sonnet/Haiku models to 64K (future-proofing)', () => { + const testCases = ['claude-sonnet-4-20250514', 'claude-sonnet-4-9', 'claude-haiku-4-8']; + + testCases.forEach((model) => { + const result = getLLMConfig('test-key', { + modelOptions: { model }, + }); + expect(result.llmConfig.maxTokens).toBe(64000); + }); + }); + + it('should default future Claude 4.x Opus models to 32K (future-proofing)', () => { + const testCases = ['claude-opus-4-0', 'claude-opus-4-7']; + + testCases.forEach((model) => { + const result = getLLMConfig('test-key', { + modelOptions: { model }, + }); + expect(result.llmConfig.maxTokens).toBe(32000); + }); + }); + + it('should handle explicit maxOutputTokens override for Claude 4.x models', () => { + const result = getLLMConfig('test-key', { + modelOptions: { + model: 'claude-sonnet-4-5', + maxOutputTokens: 64000, // Explicitly set to 64K + }, + }); + + expect(result.llmConfig.maxTokens).toBe(64000); + }); + + it('should handle undefined maxOutputTokens for Claude 4.x (use reset default)', () => { + const testCases = [ + { model: 'claude-sonnet-4-5', expected: 64000 }, + { model: 'claude-haiku-4-5', expected: 64000 }, + { model: 'claude-opus-4-1', expected: 32000 }, + ]; + + testCases.forEach(({ model, expected }) => { + const result = getLLMConfig('test-key', { + modelOptions: { + model, + maxOutputTokens: undefined, + }, + }); + expect(result.llmConfig.maxTokens).toBe(expected); + }); + }); + + it('should handle Claude 4 Sonnet/Haiku with thinking enabled', () => { + const testCases = ['claude-sonnet-4-5', 'claude-haiku-4-5']; + + testCases.forEach((model) => { + const result = getLLMConfig('test-key', { + modelOptions: { + model, + thinking: true, + thinkingBudget: 10000, + }, + }); + + expect(result.llmConfig.thinking).toMatchObject({ + type: 'enabled', + budget_tokens: 10000, + }); + expect(result.llmConfig.maxTokens).toBe(64000); + }); + }); + + it('should handle Claude 4 Opus with thinking enabled', () => { + const result = getLLMConfig('test-key', { + modelOptions: { + model: 'claude-opus-4-1', + thinking: true, + thinkingBudget: 10000, + }, + }); + + expect(result.llmConfig.thinking).toMatchObject({ + type: 'enabled', + budget_tokens: 10000, + }); + expect(result.llmConfig.maxTokens).toBe(32000); + }); + + it('should respect model-specific maxOutputTokens for Claude 4.x models', () => { + const testCases = [ + { model: 'claude-sonnet-4-5', maxOutputTokens: 50000, expected: 50000 }, + { model: 'claude-haiku-4-5', maxOutputTokens: 40000, expected: 40000 }, + { model: 'claude-opus-4-1', maxOutputTokens: 20000, expected: 20000 }, + ]; + + testCases.forEach(({ model, maxOutputTokens, expected }) => { + const result = getLLMConfig('test-key', { + modelOptions: { + model, + maxOutputTokens, + }, + }); + expect(result.llmConfig.maxTokens).toBe(expected); + }); + }); + + it('should future-proof Claude 5.x Sonnet models with 64K default', () => { + const testCases = [ + 'claude-sonnet-5', + 'claude-sonnet-5-0', + 'claude-sonnet-5-2-20260101', + 'claude-sonnet-5.5', + ]; + + testCases.forEach((model) => { + const result = getLLMConfig('test-key', { + modelOptions: { model }, + }); + expect(result.llmConfig.maxTokens).toBe(64000); + }); + }); + + it('should future-proof Claude 5.x Haiku models with 64K default', () => { + const testCases = [ + 'claude-haiku-5', + 'claude-haiku-5-0', + 'claude-haiku-5-2-20260101', + 'claude-haiku-5.5', + ]; + + testCases.forEach((model) => { + const result = getLLMConfig('test-key', { + modelOptions: { model }, + }); + expect(result.llmConfig.maxTokens).toBe(64000); + }); + }); + + it('should future-proof Claude 5.x Opus models with 32K default', () => { + const testCases = [ + 'claude-opus-5', + 'claude-opus-5-0', + 'claude-opus-5-2-20260101', + 'claude-opus-5.5', + ]; + + testCases.forEach((model) => { + const result = getLLMConfig('test-key', { + modelOptions: { model }, + }); + expect(result.llmConfig.maxTokens).toBe(32000); + }); + }); + + it('should future-proof Claude 6-9.x models with correct defaults', () => { + const testCases = [ + // Claude 6.x + { model: 'claude-sonnet-6', expected: 64000 }, + { model: 'claude-haiku-6-0', expected: 64000 }, + { model: 'claude-opus-6-1', expected: 32000 }, + // Claude 7.x + { model: 'claude-sonnet-7-20270101', expected: 64000 }, + { model: 'claude-haiku-7.5', expected: 64000 }, + { model: 'claude-opus-7', expected: 32000 }, + // Claude 8.x + { model: 'claude-sonnet-8', expected: 64000 }, + { model: 'claude-haiku-8-2', expected: 64000 }, + { model: 'claude-opus-8-latest', expected: 32000 }, + // Claude 9.x + { model: 'claude-sonnet-9', expected: 64000 }, + { model: 'claude-haiku-9', expected: 64000 }, + { model: 'claude-opus-9', expected: 32000 }, + ]; + + testCases.forEach(({ model, expected }) => { + const result = getLLMConfig('test-key', { + modelOptions: { model }, + }); + expect(result.llmConfig.maxTokens).toBe(expected); + }); + }); + }); + describe('Parameter Boundary and Validation Logic', () => { it('should handle temperature boundary values', () => { const testCases = [ @@ -784,7 +1008,7 @@ describe('getLLMConfig', () => { it('should handle maxOutputTokens boundary values', () => { const testCases = [ { model: 'claude-3-opus', maxOutputTokens: 1, expected: 1 }, // min - { model: 'claude-3-opus', maxOutputTokens: 4096, expected: 4096 }, // max for legacy + { model: 'claude-3-opus', maxOutputTokens: 8192, expected: 8192 }, // default for claude-3 { model: 'claude-3-5-sonnet', maxOutputTokens: 1, expected: 1 }, // min { model: 'claude-3-5-sonnet', maxOutputTokens: 200000, expected: 200000 }, // max for new { model: 'claude-3-7-sonnet', maxOutputTokens: 8192, expected: 8192 }, // default diff --git a/packages/api/src/endpoints/anthropic/llm.ts b/packages/api/src/endpoints/anthropic/llm.ts index 1ba4090c70..11df21b6e9 100644 --- a/packages/api/src/endpoints/anthropic/llm.ts +++ b/packages/api/src/endpoints/anthropic/llm.ts @@ -34,7 +34,6 @@ function getLLMConfig( const defaultOptions = { model: anthropicSettings.model.default, - maxOutputTokens: anthropicSettings.maxOutputTokens.default, stream: true, }; diff --git a/packages/api/src/endpoints/openai/config.anthropic.spec.ts b/packages/api/src/endpoints/openai/config.anthropic.spec.ts index cf82efcaed..ed41082547 100644 --- a/packages/api/src/endpoints/openai/config.anthropic.spec.ts +++ b/packages/api/src/endpoints/openai/config.anthropic.spec.ts @@ -30,7 +30,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => { apiKey: 'sk-xxxx', model: 'claude-sonnet-4', stream: true, - maxTokens: 8192, + maxTokens: 64000, modelKwargs: { metadata: { user_id: 'some_user_id', diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index ab8b88ec0f..81bc90a9c7 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -992,6 +992,8 @@ const sharedOpenAIModels = [ const sharedAnthropicModels = [ 'claude-sonnet-4-5', 'claude-sonnet-4-5-20250929', + 'claude-haiku-4-5', + 'claude-haiku-4-5-20251001', 'claude-opus-4-1', 'claude-opus-4-1-20250805', 'claude-sonnet-4-20250514', @@ -1017,6 +1019,9 @@ const sharedAnthropicModels = [ ]; export const bedrockModels = [ + 'anthropic.claude-sonnet-4-5-20250929-v1:0', + 'anthropic.claude-haiku-4-5-20251001-v1:0', + 'anthropic.claude-opus-4-1-20250805-v1:0', 'anthropic.claude-3-5-sonnet-20241022-v2:0', 'anthropic.claude-3-5-sonnet-20240620-v1:0', 'anthropic.claude-3-5-haiku-20241022-v1:0', diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index a279f4f84d..ca0e31ddbc 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -339,7 +339,7 @@ export const googleSettings = { }, thinkingBudget: { min: -1 as const, - max: 32768 as const, + max: 32000 as const, step: 1 as const, /** `-1` = Dynamic Thinking, meaning the model will adjust * the budget based on the complexity of the request. @@ -349,6 +349,8 @@ export const googleSettings = { }; const ANTHROPIC_MAX_OUTPUT = 128000 as const; +const CLAUDE_4_64K_MAX_OUTPUT = 64000 as const; +const CLAUDE_32K_MAX_OUTPUT = 32000 as const; const DEFAULT_MAX_OUTPUT = 8192 as const; const LEGACY_ANTHROPIC_MAX_OUTPUT = 4096 as const; export const anthropicSettings = { @@ -379,18 +381,27 @@ export const anthropicSettings = { step: 1 as const, default: DEFAULT_MAX_OUTPUT, reset: (modelName: string) => { - if (/claude-3[-.]5-sonnet/.test(modelName) || /claude-3[-.]7/.test(modelName)) { - return DEFAULT_MAX_OUTPUT; + if (/claude-(?:sonnet|haiku)[-.]?[4-9]/.test(modelName)) { + return CLAUDE_4_64K_MAX_OUTPUT; } - return 4096; + if (/claude-opus[-.]?[4-9]/.test(modelName)) { + return CLAUDE_32K_MAX_OUTPUT; + } + + return DEFAULT_MAX_OUTPUT; }, set: (value: number, modelName: string) => { - if ( - !(/claude-3[-.]5-sonnet/.test(modelName) || /claude-3[-.]7/.test(modelName)) && - value > LEGACY_ANTHROPIC_MAX_OUTPUT - ) { - return LEGACY_ANTHROPIC_MAX_OUTPUT; + if (/claude-(?:sonnet|haiku)[-.]?[4-9]/.test(modelName) && value > CLAUDE_4_64K_MAX_OUTPUT) { + return CLAUDE_4_64K_MAX_OUTPUT; + } + + if (/claude-(?:opus|haiku)[-.]?[4-9]/.test(modelName) && value > CLAUDE_32K_MAX_OUTPUT) { + return CLAUDE_32K_MAX_OUTPUT; + } + + if (value > ANTHROPIC_MAX_OUTPUT) { + return ANTHROPIC_MAX_OUTPUT; } return value;