🪙 fix: Max Output Tokens Refactor for Responses API (#8972)

🪙 fix: Max Output Tokens Refactor for Responses API (#8972) chore: Remove `max_output_tokens` from model kwargs in `titleConvo` if provided
2026-02-14 14:38:11 +01:00 · 2025-08-10 10:58:25 -07:00 · 2025-08-10 10:58:25 -07:00 · 21e00168b1
commit 21e00168b1
parent da3730b7d6
7 changed files with 143 additions and 4 deletions
--- a/packages/api/src/agents/tests/memory.test.ts
+++ b/packages/api/src/agents/tests/memory.test.ts
@ -398,4 +398,72 @@ describe('processMemory - GPT-5+ handling', () => {
      }),
    );
  });
+
+  it('should use max_output_tokens when useResponsesApi is true', async () => {
+    await processMemory({
+      res: mockRes as Response,
+      userId: 'test-user',
+      setMemory: mockSetMemory,
+      deleteMemory: mockDeleteMemory,
+      messages: [],
+      memory: 'Test memory',
+      messageId: 'msg-123',
+      conversationId: 'conv-123',
+      instructions: 'Test instructions',
+      llmConfig: {
+        provider: Providers.OPENAI,
+        model: 'gpt-5',
+        maxTokens: 1000,
+        useResponsesApi: true,
+      },
+    });
+
+    const { Run } = jest.requireMock('@librechat/agents');
+    expect(Run.create).toHaveBeenCalledWith(
+      expect.objectContaining({
+        graphConfig: expect.objectContaining({
+          llmConfig: expect.objectContaining({
+            model: 'gpt-5',
+            modelKwargs: {
+              max_output_tokens: 1000,
+            },
+          }),
+        }),
+      }),
+    );
+  });
+
+  it('should use max_completion_tokens when useResponsesApi is false or undefined', async () => {
+    await processMemory({
+      res: mockRes as Response,
+      userId: 'test-user',
+      setMemory: mockSetMemory,
+      deleteMemory: mockDeleteMemory,
+      messages: [],
+      memory: 'Test memory',
+      messageId: 'msg-123',
+      conversationId: 'conv-123',
+      instructions: 'Test instructions',
+      llmConfig: {
+        provider: Providers.OPENAI,
+        model: 'gpt-5',
+        maxTokens: 1000,
+        useResponsesApi: false,
+      },
+    });
+
+    const { Run } = jest.requireMock('@librechat/agents');
+    expect(Run.create).toHaveBeenCalledWith(
+      expect.objectContaining({
+        graphConfig: expect.objectContaining({
+          llmConfig: expect.objectContaining({
+            model: 'gpt-5',
+            modelKwargs: {
+              max_completion_tokens: 1000,
+            },
+          }),
+        }),
+      }),
+    );
+  });
 });
--- a/packages/api/src/agents/memory.ts
+++ b/packages/api/src/agents/memory.ts
@ -352,7 +352,11 @@ ${memory ?? 'No existing memories'}`;
      // Move maxTokens to modelKwargs for GPT-5+ models
      if ('maxTokens' in finalLLMConfig && finalLLMConfig.maxTokens != null) {
        const modelKwargs = (finalLLMConfig as OpenAIClientOptions).modelKwargs ?? {};
-        modelKwargs.max_completion_tokens = finalLLMConfig.maxTokens;
+        const paramName =
+          (finalLLMConfig as OpenAIClientOptions).useResponsesApi === true
+            ? 'max_output_tokens'
+            : 'max_completion_tokens';
+        modelKwargs[paramName] = finalLLMConfig.maxTokens;
        delete finalLLMConfig.maxTokens;
        (finalLLMConfig as OpenAIClientOptions).modelKwargs = modelKwargs;
      }
--- a/packages/api/src/endpoints/openai/llm.spec.ts
+++ b/packages/api/src/endpoints/openai/llm.spec.ts
@ -373,7 +373,7 @@ describe('getOpenAIConfig', () => {
      text: {
        verbosity: Verbosity.medium,
      },
-      max_completion_tokens: 1500,
+      max_output_tokens: 1500,
    });
  });

--- a/packages/api/src/endpoints/openai/llm.ts
+++ b/packages/api/src/endpoints/openai/llm.ts
@ -300,7 +300,9 @@ export function getOpenAIConfig(
  }

  if (llmConfig.model && /\bgpt-[5-9]\b/i.test(llmConfig.model) && llmConfig.maxTokens != null) {
-    modelKwargs.max_completion_tokens = llmConfig.maxTokens;
+    const paramName =
+      llmConfig.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
+    modelKwargs[paramName] = llmConfig.maxTokens;
    delete llmConfig.maxTokens;
    hasModelKwargs = true;
  }