mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
🪙 fix: Max Output Tokens Refactor for Responses API (#8972)
🪙 fix: Max Output Tokens Refactor for Responses API (#8972) chore: Remove `max_output_tokens` from model kwargs in `titleConvo` if provided
This commit is contained in:
parent
da3730b7d6
commit
21e00168b1
7 changed files with 143 additions and 4 deletions
|
|
@ -1222,7 +1222,9 @@ ${convo}
|
|||
}
|
||||
|
||||
if (this.isOmni === true && modelOptions.max_tokens != null) {
|
||||
modelOptions.max_completion_tokens = modelOptions.max_tokens;
|
||||
const paramName =
|
||||
modelOptions.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
|
||||
modelOptions[paramName] = modelOptions.max_tokens;
|
||||
delete modelOptions.max_tokens;
|
||||
}
|
||||
if (this.isOmni === true && modelOptions.temperature != null) {
|
||||
|
|
|
|||
|
|
@ -1154,6 +1154,8 @@ class AgentClient extends BaseClient {
|
|||
}
|
||||
if (shouldRemoveMaxTokens && clientOptions?.modelKwargs?.max_completion_tokens != null) {
|
||||
delete clientOptions.modelKwargs.max_completion_tokens;
|
||||
} else if (shouldRemoveMaxTokens && clientOptions?.modelKwargs?.max_output_tokens != null) {
|
||||
delete clientOptions.modelKwargs.max_output_tokens;
|
||||
}
|
||||
|
||||
clientOptions = Object.assign(
|
||||
|
|
|
|||
|
|
@ -786,6 +786,28 @@ describe('AgentClient - titleConvo', () => {
|
|||
expect(clientOptions.temperature).toBe(0.7); // Other options should remain
|
||||
});
|
||||
|
||||
it('should move maxTokens to modelKwargs.max_output_tokens for GPT-5 models with useResponsesApi', () => {
|
||||
const clientOptions = {
|
||||
model: 'gpt-5',
|
||||
maxTokens: 2048,
|
||||
temperature: 0.7,
|
||||
useResponsesApi: true,
|
||||
};
|
||||
|
||||
if (/\bgpt-[5-9]\b/i.test(clientOptions.model) && clientOptions.maxTokens != null) {
|
||||
clientOptions.modelKwargs = clientOptions.modelKwargs ?? {};
|
||||
const paramName =
|
||||
clientOptions.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
|
||||
clientOptions.modelKwargs[paramName] = clientOptions.maxTokens;
|
||||
delete clientOptions.maxTokens;
|
||||
}
|
||||
|
||||
expect(clientOptions.maxTokens).toBeUndefined();
|
||||
expect(clientOptions.modelKwargs).toBeDefined();
|
||||
expect(clientOptions.modelKwargs.max_output_tokens).toBe(2048);
|
||||
expect(clientOptions.temperature).toBe(0.7); // Other options should remain
|
||||
});
|
||||
|
||||
it('should handle GPT-5+ models with existing modelKwargs', () => {
|
||||
const clientOptions = {
|
||||
model: 'gpt-6',
|
||||
|
|
@ -866,6 +888,45 @@ describe('AgentClient - titleConvo', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('should not swap max token param for older models when using useResponsesApi', () => {
|
||||
const testCases = [
|
||||
{ model: 'gpt-5', shouldTransform: true },
|
||||
{ model: 'gpt-5-turbo', shouldTransform: true },
|
||||
{ model: 'gpt-6', shouldTransform: true },
|
||||
{ model: 'gpt-7-preview', shouldTransform: true },
|
||||
{ model: 'gpt-8', shouldTransform: true },
|
||||
{ model: 'gpt-9-mini', shouldTransform: true },
|
||||
{ model: 'gpt-4', shouldTransform: false },
|
||||
{ model: 'gpt-4o', shouldTransform: false },
|
||||
{ model: 'gpt-3.5-turbo', shouldTransform: false },
|
||||
{ model: 'claude-3', shouldTransform: false },
|
||||
];
|
||||
|
||||
testCases.forEach(({ model, shouldTransform }) => {
|
||||
const clientOptions = {
|
||||
model,
|
||||
maxTokens: 1000,
|
||||
useResponsesApi: true,
|
||||
};
|
||||
|
||||
if (/\bgpt-[5-9]\b/i.test(clientOptions.model) && clientOptions.maxTokens != null) {
|
||||
clientOptions.modelKwargs = clientOptions.modelKwargs ?? {};
|
||||
const paramName =
|
||||
clientOptions.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
|
||||
clientOptions.modelKwargs[paramName] = clientOptions.maxTokens;
|
||||
delete clientOptions.maxTokens;
|
||||
}
|
||||
|
||||
if (shouldTransform) {
|
||||
expect(clientOptions.maxTokens).toBeUndefined();
|
||||
expect(clientOptions.modelKwargs?.max_output_tokens).toBe(1000);
|
||||
} else {
|
||||
expect(clientOptions.maxTokens).toBe(1000);
|
||||
expect(clientOptions.modelKwargs).toBeUndefined();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
it('should not transform if maxTokens is null or undefined', () => {
|
||||
const testCases = [
|
||||
{ model: 'gpt-5', maxTokens: null },
|
||||
|
|
|
|||
|
|
@ -398,4 +398,72 @@ describe('processMemory - GPT-5+ handling', () => {
|
|||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('should use max_output_tokens when useResponsesApi is true', async () => {
|
||||
await processMemory({
|
||||
res: mockRes as Response,
|
||||
userId: 'test-user',
|
||||
setMemory: mockSetMemory,
|
||||
deleteMemory: mockDeleteMemory,
|
||||
messages: [],
|
||||
memory: 'Test memory',
|
||||
messageId: 'msg-123',
|
||||
conversationId: 'conv-123',
|
||||
instructions: 'Test instructions',
|
||||
llmConfig: {
|
||||
provider: Providers.OPENAI,
|
||||
model: 'gpt-5',
|
||||
maxTokens: 1000,
|
||||
useResponsesApi: true,
|
||||
},
|
||||
});
|
||||
|
||||
const { Run } = jest.requireMock('@librechat/agents');
|
||||
expect(Run.create).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
graphConfig: expect.objectContaining({
|
||||
llmConfig: expect.objectContaining({
|
||||
model: 'gpt-5',
|
||||
modelKwargs: {
|
||||
max_output_tokens: 1000,
|
||||
},
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('should use max_completion_tokens when useResponsesApi is false or undefined', async () => {
|
||||
await processMemory({
|
||||
res: mockRes as Response,
|
||||
userId: 'test-user',
|
||||
setMemory: mockSetMemory,
|
||||
deleteMemory: mockDeleteMemory,
|
||||
messages: [],
|
||||
memory: 'Test memory',
|
||||
messageId: 'msg-123',
|
||||
conversationId: 'conv-123',
|
||||
instructions: 'Test instructions',
|
||||
llmConfig: {
|
||||
provider: Providers.OPENAI,
|
||||
model: 'gpt-5',
|
||||
maxTokens: 1000,
|
||||
useResponsesApi: false,
|
||||
},
|
||||
});
|
||||
|
||||
const { Run } = jest.requireMock('@librechat/agents');
|
||||
expect(Run.create).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
graphConfig: expect.objectContaining({
|
||||
llmConfig: expect.objectContaining({
|
||||
model: 'gpt-5',
|
||||
modelKwargs: {
|
||||
max_completion_tokens: 1000,
|
||||
},
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -352,7 +352,11 @@ ${memory ?? 'No existing memories'}`;
|
|||
// Move maxTokens to modelKwargs for GPT-5+ models
|
||||
if ('maxTokens' in finalLLMConfig && finalLLMConfig.maxTokens != null) {
|
||||
const modelKwargs = (finalLLMConfig as OpenAIClientOptions).modelKwargs ?? {};
|
||||
modelKwargs.max_completion_tokens = finalLLMConfig.maxTokens;
|
||||
const paramName =
|
||||
(finalLLMConfig as OpenAIClientOptions).useResponsesApi === true
|
||||
? 'max_output_tokens'
|
||||
: 'max_completion_tokens';
|
||||
modelKwargs[paramName] = finalLLMConfig.maxTokens;
|
||||
delete finalLLMConfig.maxTokens;
|
||||
(finalLLMConfig as OpenAIClientOptions).modelKwargs = modelKwargs;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -373,7 +373,7 @@ describe('getOpenAIConfig', () => {
|
|||
text: {
|
||||
verbosity: Verbosity.medium,
|
||||
},
|
||||
max_completion_tokens: 1500,
|
||||
max_output_tokens: 1500,
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -300,7 +300,9 @@ export function getOpenAIConfig(
|
|||
}
|
||||
|
||||
if (llmConfig.model && /\bgpt-[5-9]\b/i.test(llmConfig.model) && llmConfig.maxTokens != null) {
|
||||
modelKwargs.max_completion_tokens = llmConfig.maxTokens;
|
||||
const paramName =
|
||||
llmConfig.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
|
||||
modelKwargs[paramName] = llmConfig.maxTokens;
|
||||
delete llmConfig.maxTokens;
|
||||
hasModelKwargs = true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue