mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-18 09:20:15 +01:00
🎚️ fix: Default Max Output Tokens for Claude 4+ Models (#10293)
This commit is contained in:
parent
70ff6e94f2
commit
8adef91cf5
5 changed files with 256 additions and 17 deletions
|
|
@ -245,8 +245,8 @@ describe('getLLMConfig', () => {
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
// The actual anthropicSettings.maxOutputTokens.reset('claude-3-opus') returns 4096
|
// The actual anthropicSettings.maxOutputTokens.reset('claude-3-opus') returns 8192
|
||||||
expect(result.llmConfig).toHaveProperty('maxTokens', 4096);
|
expect(result.llmConfig).toHaveProperty('maxTokens', 8192);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle both proxy and reverseProxyUrl', () => {
|
it('should handle both proxy and reverseProxyUrl', () => {
|
||||||
|
|
@ -698,9 +698,17 @@ describe('getLLMConfig', () => {
|
||||||
{ model: 'claude-3.5-sonnet-20241022', expectedMaxTokens: 8192 },
|
{ model: 'claude-3.5-sonnet-20241022', expectedMaxTokens: 8192 },
|
||||||
{ model: 'claude-3-7-sonnet', expectedMaxTokens: 8192 },
|
{ model: 'claude-3-7-sonnet', expectedMaxTokens: 8192 },
|
||||||
{ model: 'claude-3.7-sonnet-20250109', expectedMaxTokens: 8192 },
|
{ model: 'claude-3.7-sonnet-20250109', expectedMaxTokens: 8192 },
|
||||||
{ model: 'claude-3-opus', expectedMaxTokens: 4096 },
|
{ model: 'claude-3-opus', expectedMaxTokens: 8192 },
|
||||||
{ model: 'claude-3-haiku', expectedMaxTokens: 4096 },
|
{ model: 'claude-3-haiku', expectedMaxTokens: 8192 },
|
||||||
{ model: 'claude-2.1', expectedMaxTokens: 4096 },
|
{ model: 'claude-2.1', expectedMaxTokens: 8192 },
|
||||||
|
{ model: 'claude-sonnet-4-5', expectedMaxTokens: 64000 },
|
||||||
|
{ model: 'claude-sonnet-4-5-20250929', expectedMaxTokens: 64000 },
|
||||||
|
{ model: 'claude-haiku-4-5', expectedMaxTokens: 64000 },
|
||||||
|
{ model: 'claude-haiku-4-5-20251001', expectedMaxTokens: 64000 },
|
||||||
|
{ model: 'claude-opus-4-1', expectedMaxTokens: 32000 },
|
||||||
|
{ model: 'claude-opus-4-1-20250805', expectedMaxTokens: 32000 },
|
||||||
|
{ model: 'claude-sonnet-4-20250514', expectedMaxTokens: 64000 },
|
||||||
|
{ model: 'claude-opus-4-0', expectedMaxTokens: 32000 },
|
||||||
];
|
];
|
||||||
|
|
||||||
testCases.forEach(({ model, expectedMaxTokens }) => {
|
testCases.forEach(({ model, expectedMaxTokens }) => {
|
||||||
|
|
@ -729,6 +737,222 @@ describe('getLLMConfig', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('Claude 4.x Model maxOutputTokens Defaults', () => {
|
||||||
|
it('should default Claude Sonnet 4.x models to 64K tokens', () => {
|
||||||
|
const testCases = ['claude-sonnet-4-5', 'claude-sonnet-4-5-20250929', 'claude-sonnet-4.5'];
|
||||||
|
|
||||||
|
testCases.forEach((model) => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: { model },
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(64000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should default Claude Haiku 4.x models to 64K tokens', () => {
|
||||||
|
const testCases = ['claude-haiku-4-5', 'claude-haiku-4-5-20251001', 'claude-haiku-4.5'];
|
||||||
|
|
||||||
|
testCases.forEach((model) => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: { model },
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(64000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should default Claude Opus 4.x models to 32K tokens', () => {
|
||||||
|
const testCases = ['claude-opus-4-1', 'claude-opus-4-1-20250805', 'claude-opus-4.1'];
|
||||||
|
|
||||||
|
testCases.forEach((model) => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: { model },
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(32000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should default future Claude 4.x Sonnet/Haiku models to 64K (future-proofing)', () => {
|
||||||
|
const testCases = ['claude-sonnet-4-20250514', 'claude-sonnet-4-9', 'claude-haiku-4-8'];
|
||||||
|
|
||||||
|
testCases.forEach((model) => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: { model },
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(64000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should default future Claude 4.x Opus models to 32K (future-proofing)', () => {
|
||||||
|
const testCases = ['claude-opus-4-0', 'claude-opus-4-7'];
|
||||||
|
|
||||||
|
testCases.forEach((model) => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: { model },
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(32000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle explicit maxOutputTokens override for Claude 4.x models', () => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: {
|
||||||
|
model: 'claude-sonnet-4-5',
|
||||||
|
maxOutputTokens: 64000, // Explicitly set to 64K
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(64000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle undefined maxOutputTokens for Claude 4.x (use reset default)', () => {
|
||||||
|
const testCases = [
|
||||||
|
{ model: 'claude-sonnet-4-5', expected: 64000 },
|
||||||
|
{ model: 'claude-haiku-4-5', expected: 64000 },
|
||||||
|
{ model: 'claude-opus-4-1', expected: 32000 },
|
||||||
|
];
|
||||||
|
|
||||||
|
testCases.forEach(({ model, expected }) => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: {
|
||||||
|
model,
|
||||||
|
maxOutputTokens: undefined,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(expected);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle Claude 4 Sonnet/Haiku with thinking enabled', () => {
|
||||||
|
const testCases = ['claude-sonnet-4-5', 'claude-haiku-4-5'];
|
||||||
|
|
||||||
|
testCases.forEach((model) => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: {
|
||||||
|
model,
|
||||||
|
thinking: true,
|
||||||
|
thinkingBudget: 10000,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.llmConfig.thinking).toMatchObject({
|
||||||
|
type: 'enabled',
|
||||||
|
budget_tokens: 10000,
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(64000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle Claude 4 Opus with thinking enabled', () => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: {
|
||||||
|
model: 'claude-opus-4-1',
|
||||||
|
thinking: true,
|
||||||
|
thinkingBudget: 10000,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.llmConfig.thinking).toMatchObject({
|
||||||
|
type: 'enabled',
|
||||||
|
budget_tokens: 10000,
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(32000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should respect model-specific maxOutputTokens for Claude 4.x models', () => {
|
||||||
|
const testCases = [
|
||||||
|
{ model: 'claude-sonnet-4-5', maxOutputTokens: 50000, expected: 50000 },
|
||||||
|
{ model: 'claude-haiku-4-5', maxOutputTokens: 40000, expected: 40000 },
|
||||||
|
{ model: 'claude-opus-4-1', maxOutputTokens: 20000, expected: 20000 },
|
||||||
|
];
|
||||||
|
|
||||||
|
testCases.forEach(({ model, maxOutputTokens, expected }) => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: {
|
||||||
|
model,
|
||||||
|
maxOutputTokens,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(expected);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should future-proof Claude 5.x Sonnet models with 64K default', () => {
|
||||||
|
const testCases = [
|
||||||
|
'claude-sonnet-5',
|
||||||
|
'claude-sonnet-5-0',
|
||||||
|
'claude-sonnet-5-2-20260101',
|
||||||
|
'claude-sonnet-5.5',
|
||||||
|
];
|
||||||
|
|
||||||
|
testCases.forEach((model) => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: { model },
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(64000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should future-proof Claude 5.x Haiku models with 64K default', () => {
|
||||||
|
const testCases = [
|
||||||
|
'claude-haiku-5',
|
||||||
|
'claude-haiku-5-0',
|
||||||
|
'claude-haiku-5-2-20260101',
|
||||||
|
'claude-haiku-5.5',
|
||||||
|
];
|
||||||
|
|
||||||
|
testCases.forEach((model) => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: { model },
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(64000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should future-proof Claude 5.x Opus models with 32K default', () => {
|
||||||
|
const testCases = [
|
||||||
|
'claude-opus-5',
|
||||||
|
'claude-opus-5-0',
|
||||||
|
'claude-opus-5-2-20260101',
|
||||||
|
'claude-opus-5.5',
|
||||||
|
];
|
||||||
|
|
||||||
|
testCases.forEach((model) => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: { model },
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(32000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should future-proof Claude 6-9.x models with correct defaults', () => {
|
||||||
|
const testCases = [
|
||||||
|
// Claude 6.x
|
||||||
|
{ model: 'claude-sonnet-6', expected: 64000 },
|
||||||
|
{ model: 'claude-haiku-6-0', expected: 64000 },
|
||||||
|
{ model: 'claude-opus-6-1', expected: 32000 },
|
||||||
|
// Claude 7.x
|
||||||
|
{ model: 'claude-sonnet-7-20270101', expected: 64000 },
|
||||||
|
{ model: 'claude-haiku-7.5', expected: 64000 },
|
||||||
|
{ model: 'claude-opus-7', expected: 32000 },
|
||||||
|
// Claude 8.x
|
||||||
|
{ model: 'claude-sonnet-8', expected: 64000 },
|
||||||
|
{ model: 'claude-haiku-8-2', expected: 64000 },
|
||||||
|
{ model: 'claude-opus-8-latest', expected: 32000 },
|
||||||
|
// Claude 9.x
|
||||||
|
{ model: 'claude-sonnet-9', expected: 64000 },
|
||||||
|
{ model: 'claude-haiku-9', expected: 64000 },
|
||||||
|
{ model: 'claude-opus-9', expected: 32000 },
|
||||||
|
];
|
||||||
|
|
||||||
|
testCases.forEach(({ model, expected }) => {
|
||||||
|
const result = getLLMConfig('test-key', {
|
||||||
|
modelOptions: { model },
|
||||||
|
});
|
||||||
|
expect(result.llmConfig.maxTokens).toBe(expected);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('Parameter Boundary and Validation Logic', () => {
|
describe('Parameter Boundary and Validation Logic', () => {
|
||||||
it('should handle temperature boundary values', () => {
|
it('should handle temperature boundary values', () => {
|
||||||
const testCases = [
|
const testCases = [
|
||||||
|
|
@ -784,7 +1008,7 @@ describe('getLLMConfig', () => {
|
||||||
it('should handle maxOutputTokens boundary values', () => {
|
it('should handle maxOutputTokens boundary values', () => {
|
||||||
const testCases = [
|
const testCases = [
|
||||||
{ model: 'claude-3-opus', maxOutputTokens: 1, expected: 1 }, // min
|
{ model: 'claude-3-opus', maxOutputTokens: 1, expected: 1 }, // min
|
||||||
{ model: 'claude-3-opus', maxOutputTokens: 4096, expected: 4096 }, // max for legacy
|
{ model: 'claude-3-opus', maxOutputTokens: 8192, expected: 8192 }, // default for claude-3
|
||||||
{ model: 'claude-3-5-sonnet', maxOutputTokens: 1, expected: 1 }, // min
|
{ model: 'claude-3-5-sonnet', maxOutputTokens: 1, expected: 1 }, // min
|
||||||
{ model: 'claude-3-5-sonnet', maxOutputTokens: 200000, expected: 200000 }, // max for new
|
{ model: 'claude-3-5-sonnet', maxOutputTokens: 200000, expected: 200000 }, // max for new
|
||||||
{ model: 'claude-3-7-sonnet', maxOutputTokens: 8192, expected: 8192 }, // default
|
{ model: 'claude-3-7-sonnet', maxOutputTokens: 8192, expected: 8192 }, // default
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,6 @@ function getLLMConfig(
|
||||||
|
|
||||||
const defaultOptions = {
|
const defaultOptions = {
|
||||||
model: anthropicSettings.model.default,
|
model: anthropicSettings.model.default,
|
||||||
maxOutputTokens: anthropicSettings.maxOutputTokens.default,
|
|
||||||
stream: true,
|
stream: true,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
||||||
apiKey: 'sk-xxxx',
|
apiKey: 'sk-xxxx',
|
||||||
model: 'claude-sonnet-4',
|
model: 'claude-sonnet-4',
|
||||||
stream: true,
|
stream: true,
|
||||||
maxTokens: 8192,
|
maxTokens: 64000,
|
||||||
modelKwargs: {
|
modelKwargs: {
|
||||||
metadata: {
|
metadata: {
|
||||||
user_id: 'some_user_id',
|
user_id: 'some_user_id',
|
||||||
|
|
|
||||||
|
|
@ -992,6 +992,8 @@ const sharedOpenAIModels = [
|
||||||
const sharedAnthropicModels = [
|
const sharedAnthropicModels = [
|
||||||
'claude-sonnet-4-5',
|
'claude-sonnet-4-5',
|
||||||
'claude-sonnet-4-5-20250929',
|
'claude-sonnet-4-5-20250929',
|
||||||
|
'claude-haiku-4-5',
|
||||||
|
'claude-haiku-4-5-20251001',
|
||||||
'claude-opus-4-1',
|
'claude-opus-4-1',
|
||||||
'claude-opus-4-1-20250805',
|
'claude-opus-4-1-20250805',
|
||||||
'claude-sonnet-4-20250514',
|
'claude-sonnet-4-20250514',
|
||||||
|
|
@ -1017,6 +1019,9 @@ const sharedAnthropicModels = [
|
||||||
];
|
];
|
||||||
|
|
||||||
export const bedrockModels = [
|
export const bedrockModels = [
|
||||||
|
'anthropic.claude-sonnet-4-5-20250929-v1:0',
|
||||||
|
'anthropic.claude-haiku-4-5-20251001-v1:0',
|
||||||
|
'anthropic.claude-opus-4-1-20250805-v1:0',
|
||||||
'anthropic.claude-3-5-sonnet-20241022-v2:0',
|
'anthropic.claude-3-5-sonnet-20241022-v2:0',
|
||||||
'anthropic.claude-3-5-sonnet-20240620-v1:0',
|
'anthropic.claude-3-5-sonnet-20240620-v1:0',
|
||||||
'anthropic.claude-3-5-haiku-20241022-v1:0',
|
'anthropic.claude-3-5-haiku-20241022-v1:0',
|
||||||
|
|
|
||||||
|
|
@ -339,7 +339,7 @@ export const googleSettings = {
|
||||||
},
|
},
|
||||||
thinkingBudget: {
|
thinkingBudget: {
|
||||||
min: -1 as const,
|
min: -1 as const,
|
||||||
max: 32768 as const,
|
max: 32000 as const,
|
||||||
step: 1 as const,
|
step: 1 as const,
|
||||||
/** `-1` = Dynamic Thinking, meaning the model will adjust
|
/** `-1` = Dynamic Thinking, meaning the model will adjust
|
||||||
* the budget based on the complexity of the request.
|
* the budget based on the complexity of the request.
|
||||||
|
|
@ -349,6 +349,8 @@ export const googleSettings = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const ANTHROPIC_MAX_OUTPUT = 128000 as const;
|
const ANTHROPIC_MAX_OUTPUT = 128000 as const;
|
||||||
|
const CLAUDE_4_64K_MAX_OUTPUT = 64000 as const;
|
||||||
|
const CLAUDE_32K_MAX_OUTPUT = 32000 as const;
|
||||||
const DEFAULT_MAX_OUTPUT = 8192 as const;
|
const DEFAULT_MAX_OUTPUT = 8192 as const;
|
||||||
const LEGACY_ANTHROPIC_MAX_OUTPUT = 4096 as const;
|
const LEGACY_ANTHROPIC_MAX_OUTPUT = 4096 as const;
|
||||||
export const anthropicSettings = {
|
export const anthropicSettings = {
|
||||||
|
|
@ -379,18 +381,27 @@ export const anthropicSettings = {
|
||||||
step: 1 as const,
|
step: 1 as const,
|
||||||
default: DEFAULT_MAX_OUTPUT,
|
default: DEFAULT_MAX_OUTPUT,
|
||||||
reset: (modelName: string) => {
|
reset: (modelName: string) => {
|
||||||
if (/claude-3[-.]5-sonnet/.test(modelName) || /claude-3[-.]7/.test(modelName)) {
|
if (/claude-(?:sonnet|haiku)[-.]?[4-9]/.test(modelName)) {
|
||||||
return DEFAULT_MAX_OUTPUT;
|
return CLAUDE_4_64K_MAX_OUTPUT;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 4096;
|
if (/claude-opus[-.]?[4-9]/.test(modelName)) {
|
||||||
|
return CLAUDE_32K_MAX_OUTPUT;
|
||||||
|
}
|
||||||
|
|
||||||
|
return DEFAULT_MAX_OUTPUT;
|
||||||
},
|
},
|
||||||
set: (value: number, modelName: string) => {
|
set: (value: number, modelName: string) => {
|
||||||
if (
|
if (/claude-(?:sonnet|haiku)[-.]?[4-9]/.test(modelName) && value > CLAUDE_4_64K_MAX_OUTPUT) {
|
||||||
!(/claude-3[-.]5-sonnet/.test(modelName) || /claude-3[-.]7/.test(modelName)) &&
|
return CLAUDE_4_64K_MAX_OUTPUT;
|
||||||
value > LEGACY_ANTHROPIC_MAX_OUTPUT
|
}
|
||||||
) {
|
|
||||||
return LEGACY_ANTHROPIC_MAX_OUTPUT;
|
if (/claude-(?:opus|haiku)[-.]?[4-9]/.test(modelName) && value > CLAUDE_32K_MAX_OUTPUT) {
|
||||||
|
return CLAUDE_32K_MAX_OUTPUT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value > ANTHROPIC_MAX_OUTPUT) {
|
||||||
|
return ANTHROPIC_MAX_OUTPUT;
|
||||||
}
|
}
|
||||||
|
|
||||||
return value;
|
return value;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue