From 21e00168b107f15bcbbf9bc2e1afd00e5a479d50 Mon Sep 17 00:00:00 2001 From: Dustin Healy <54083382+dustinhealy@users.noreply.github.com> Date: Sun, 10 Aug 2025 10:58:25 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=AA=99=20fix:=20Max=20Output=20Tokens=20R?= =?UTF-8?q?efactor=20for=20Responses=20API=20(#8972)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🪙 fix: Max Output Tokens Refactor for Responses API (#8972) chore: Remove `max_output_tokens` from model kwargs in `titleConvo` if provided --- api/app/clients/OpenAIClient.js | 4 +- api/server/controllers/agents/client.js | 2 + api/server/controllers/agents/client.test.js | 61 +++++++++++++++++ .../api/src/agents/__tests__/memory.test.ts | 68 +++++++++++++++++++ packages/api/src/agents/memory.ts | 6 +- packages/api/src/endpoints/openai/llm.spec.ts | 2 +- packages/api/src/endpoints/openai/llm.ts | 4 +- 7 files changed, 143 insertions(+), 4 deletions(-) diff --git a/api/app/clients/OpenAIClient.js b/api/app/clients/OpenAIClient.js index 2eda322640..700fca6a7b 100644 --- a/api/app/clients/OpenAIClient.js +++ b/api/app/clients/OpenAIClient.js @@ -1222,7 +1222,9 @@ ${convo} } if (this.isOmni === true && modelOptions.max_tokens != null) { - modelOptions.max_completion_tokens = modelOptions.max_tokens; + const paramName = + modelOptions.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens'; + modelOptions[paramName] = modelOptions.max_tokens; delete modelOptions.max_tokens; } if (this.isOmni === true && modelOptions.temperature != null) { diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 25d98aa3fc..803475e2e3 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -1154,6 +1154,8 @@ class AgentClient extends BaseClient { } if (shouldRemoveMaxTokens && clientOptions?.modelKwargs?.max_completion_tokens != null) { delete clientOptions.modelKwargs.max_completion_tokens; + } else if (shouldRemoveMaxTokens && clientOptions?.modelKwargs?.max_output_tokens != null) { + delete clientOptions.modelKwargs.max_output_tokens; } clientOptions = Object.assign( diff --git a/api/server/controllers/agents/client.test.js b/api/server/controllers/agents/client.test.js index 5079bcebd8..27a54c9874 100644 --- a/api/server/controllers/agents/client.test.js +++ b/api/server/controllers/agents/client.test.js @@ -786,6 +786,28 @@ describe('AgentClient - titleConvo', () => { expect(clientOptions.temperature).toBe(0.7); // Other options should remain }); + it('should move maxTokens to modelKwargs.max_output_tokens for GPT-5 models with useResponsesApi', () => { + const clientOptions = { + model: 'gpt-5', + maxTokens: 2048, + temperature: 0.7, + useResponsesApi: true, + }; + + if (/\bgpt-[5-9]\b/i.test(clientOptions.model) && clientOptions.maxTokens != null) { + clientOptions.modelKwargs = clientOptions.modelKwargs ?? {}; + const paramName = + clientOptions.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens'; + clientOptions.modelKwargs[paramName] = clientOptions.maxTokens; + delete clientOptions.maxTokens; + } + + expect(clientOptions.maxTokens).toBeUndefined(); + expect(clientOptions.modelKwargs).toBeDefined(); + expect(clientOptions.modelKwargs.max_output_tokens).toBe(2048); + expect(clientOptions.temperature).toBe(0.7); // Other options should remain + }); + it('should handle GPT-5+ models with existing modelKwargs', () => { const clientOptions = { model: 'gpt-6', @@ -866,6 +888,45 @@ describe('AgentClient - titleConvo', () => { }); }); + it('should not swap max token param for older models when using useResponsesApi', () => { + const testCases = [ + { model: 'gpt-5', shouldTransform: true }, + { model: 'gpt-5-turbo', shouldTransform: true }, + { model: 'gpt-6', shouldTransform: true }, + { model: 'gpt-7-preview', shouldTransform: true }, + { model: 'gpt-8', shouldTransform: true }, + { model: 'gpt-9-mini', shouldTransform: true }, + { model: 'gpt-4', shouldTransform: false }, + { model: 'gpt-4o', shouldTransform: false }, + { model: 'gpt-3.5-turbo', shouldTransform: false }, + { model: 'claude-3', shouldTransform: false }, + ]; + + testCases.forEach(({ model, shouldTransform }) => { + const clientOptions = { + model, + maxTokens: 1000, + useResponsesApi: true, + }; + + if (/\bgpt-[5-9]\b/i.test(clientOptions.model) && clientOptions.maxTokens != null) { + clientOptions.modelKwargs = clientOptions.modelKwargs ?? {}; + const paramName = + clientOptions.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens'; + clientOptions.modelKwargs[paramName] = clientOptions.maxTokens; + delete clientOptions.maxTokens; + } + + if (shouldTransform) { + expect(clientOptions.maxTokens).toBeUndefined(); + expect(clientOptions.modelKwargs?.max_output_tokens).toBe(1000); + } else { + expect(clientOptions.maxTokens).toBe(1000); + expect(clientOptions.modelKwargs).toBeUndefined(); + } + }); + }); + it('should not transform if maxTokens is null or undefined', () => { const testCases = [ { model: 'gpt-5', maxTokens: null }, diff --git a/packages/api/src/agents/__tests__/memory.test.ts b/packages/api/src/agents/__tests__/memory.test.ts index 7b69992792..74cd0f4354 100644 --- a/packages/api/src/agents/__tests__/memory.test.ts +++ b/packages/api/src/agents/__tests__/memory.test.ts @@ -398,4 +398,72 @@ describe('processMemory - GPT-5+ handling', () => { }), ); }); + + it('should use max_output_tokens when useResponsesApi is true', async () => { + await processMemory({ + res: mockRes as Response, + userId: 'test-user', + setMemory: mockSetMemory, + deleteMemory: mockDeleteMemory, + messages: [], + memory: 'Test memory', + messageId: 'msg-123', + conversationId: 'conv-123', + instructions: 'Test instructions', + llmConfig: { + provider: Providers.OPENAI, + model: 'gpt-5', + maxTokens: 1000, + useResponsesApi: true, + }, + }); + + const { Run } = jest.requireMock('@librechat/agents'); + expect(Run.create).toHaveBeenCalledWith( + expect.objectContaining({ + graphConfig: expect.objectContaining({ + llmConfig: expect.objectContaining({ + model: 'gpt-5', + modelKwargs: { + max_output_tokens: 1000, + }, + }), + }), + }), + ); + }); + + it('should use max_completion_tokens when useResponsesApi is false or undefined', async () => { + await processMemory({ + res: mockRes as Response, + userId: 'test-user', + setMemory: mockSetMemory, + deleteMemory: mockDeleteMemory, + messages: [], + memory: 'Test memory', + messageId: 'msg-123', + conversationId: 'conv-123', + instructions: 'Test instructions', + llmConfig: { + provider: Providers.OPENAI, + model: 'gpt-5', + maxTokens: 1000, + useResponsesApi: false, + }, + }); + + const { Run } = jest.requireMock('@librechat/agents'); + expect(Run.create).toHaveBeenCalledWith( + expect.objectContaining({ + graphConfig: expect.objectContaining({ + llmConfig: expect.objectContaining({ + model: 'gpt-5', + modelKwargs: { + max_completion_tokens: 1000, + }, + }), + }), + }), + ); + }); }); diff --git a/packages/api/src/agents/memory.ts b/packages/api/src/agents/memory.ts index 298ebc04f6..e5fb55ef43 100644 --- a/packages/api/src/agents/memory.ts +++ b/packages/api/src/agents/memory.ts @@ -352,7 +352,11 @@ ${memory ?? 'No existing memories'}`; // Move maxTokens to modelKwargs for GPT-5+ models if ('maxTokens' in finalLLMConfig && finalLLMConfig.maxTokens != null) { const modelKwargs = (finalLLMConfig as OpenAIClientOptions).modelKwargs ?? {}; - modelKwargs.max_completion_tokens = finalLLMConfig.maxTokens; + const paramName = + (finalLLMConfig as OpenAIClientOptions).useResponsesApi === true + ? 'max_output_tokens' + : 'max_completion_tokens'; + modelKwargs[paramName] = finalLLMConfig.maxTokens; delete finalLLMConfig.maxTokens; (finalLLMConfig as OpenAIClientOptions).modelKwargs = modelKwargs; } diff --git a/packages/api/src/endpoints/openai/llm.spec.ts b/packages/api/src/endpoints/openai/llm.spec.ts index 79d9e05627..90a96c118c 100644 --- a/packages/api/src/endpoints/openai/llm.spec.ts +++ b/packages/api/src/endpoints/openai/llm.spec.ts @@ -373,7 +373,7 @@ describe('getOpenAIConfig', () => { text: { verbosity: Verbosity.medium, }, - max_completion_tokens: 1500, + max_output_tokens: 1500, }); }); diff --git a/packages/api/src/endpoints/openai/llm.ts b/packages/api/src/endpoints/openai/llm.ts index e5c1b49c53..dca5ff7f11 100644 --- a/packages/api/src/endpoints/openai/llm.ts +++ b/packages/api/src/endpoints/openai/llm.ts @@ -300,7 +300,9 @@ export function getOpenAIConfig( } if (llmConfig.model && /\bgpt-[5-9]\b/i.test(llmConfig.model) && llmConfig.maxTokens != null) { - modelKwargs.max_completion_tokens = llmConfig.maxTokens; + const paramName = + llmConfig.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens'; + modelKwargs[paramName] = llmConfig.maxTokens; delete llmConfig.maxTokens; hasModelKwargs = true; }