🪙 fix: Max Output Tokens Refactor for Responses API (#8972)

🪙 fix: Max Output Tokens Refactor for Responses API (#8972)

chore: Remove `max_output_tokens` from model kwargs in `titleConvo` if provided
This commit is contained in:
Dustin Healy 2025-08-10 10:58:25 -07:00 committed by Danny Avila
parent da3730b7d6
commit 21e00168b1
No known key found for this signature in database
GPG key ID: BF31EEB2C5CA0956
7 changed files with 143 additions and 4 deletions

View file

@ -398,4 +398,72 @@ describe('processMemory - GPT-5+ handling', () => {
}),
);
});
it('should use max_output_tokens when useResponsesApi is true', async () => {
await processMemory({
res: mockRes as Response,
userId: 'test-user',
setMemory: mockSetMemory,
deleteMemory: mockDeleteMemory,
messages: [],
memory: 'Test memory',
messageId: 'msg-123',
conversationId: 'conv-123',
instructions: 'Test instructions',
llmConfig: {
provider: Providers.OPENAI,
model: 'gpt-5',
maxTokens: 1000,
useResponsesApi: true,
},
});
const { Run } = jest.requireMock('@librechat/agents');
expect(Run.create).toHaveBeenCalledWith(
expect.objectContaining({
graphConfig: expect.objectContaining({
llmConfig: expect.objectContaining({
model: 'gpt-5',
modelKwargs: {
max_output_tokens: 1000,
},
}),
}),
}),
);
});
it('should use max_completion_tokens when useResponsesApi is false or undefined', async () => {
await processMemory({
res: mockRes as Response,
userId: 'test-user',
setMemory: mockSetMemory,
deleteMemory: mockDeleteMemory,
messages: [],
memory: 'Test memory',
messageId: 'msg-123',
conversationId: 'conv-123',
instructions: 'Test instructions',
llmConfig: {
provider: Providers.OPENAI,
model: 'gpt-5',
maxTokens: 1000,
useResponsesApi: false,
},
});
const { Run } = jest.requireMock('@librechat/agents');
expect(Run.create).toHaveBeenCalledWith(
expect.objectContaining({
graphConfig: expect.objectContaining({
llmConfig: expect.objectContaining({
model: 'gpt-5',
modelKwargs: {
max_completion_tokens: 1000,
},
}),
}),
}),
);
});
});

View file

@ -352,7 +352,11 @@ ${memory ?? 'No existing memories'}`;
// Move maxTokens to modelKwargs for GPT-5+ models
if ('maxTokens' in finalLLMConfig && finalLLMConfig.maxTokens != null) {
const modelKwargs = (finalLLMConfig as OpenAIClientOptions).modelKwargs ?? {};
modelKwargs.max_completion_tokens = finalLLMConfig.maxTokens;
const paramName =
(finalLLMConfig as OpenAIClientOptions).useResponsesApi === true
? 'max_output_tokens'
: 'max_completion_tokens';
modelKwargs[paramName] = finalLLMConfig.maxTokens;
delete finalLLMConfig.maxTokens;
(finalLLMConfig as OpenAIClientOptions).modelKwargs = modelKwargs;
}

View file

@ -373,7 +373,7 @@ describe('getOpenAIConfig', () => {
text: {
verbosity: Verbosity.medium,
},
max_completion_tokens: 1500,
max_output_tokens: 1500,
});
});

View file

@ -300,7 +300,9 @@ export function getOpenAIConfig(
}
if (llmConfig.model && /\bgpt-[5-9]\b/i.test(llmConfig.model) && llmConfig.maxTokens != null) {
modelKwargs.max_completion_tokens = llmConfig.maxTokens;
const paramName =
llmConfig.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
modelKwargs[paramName] = llmConfig.maxTokens;
delete llmConfig.maxTokens;
hasModelKwargs = true;
}