🪙 fix: Max Output Tokens Refactor for Responses API (#8972)

🪙 fix: Max Output Tokens Refactor for Responses API (#8972)

chore: Remove `max_output_tokens` from model kwargs in `titleConvo` if provided
This commit is contained in:
Dustin Healy 2025-08-10 10:58:25 -07:00 committed by Danny Avila
parent da3730b7d6
commit 21e00168b1
No known key found for this signature in database
GPG key ID: BF31EEB2C5CA0956
7 changed files with 143 additions and 4 deletions

View file

@ -1222,7 +1222,9 @@ ${convo}
}
if (this.isOmni === true && modelOptions.max_tokens != null) {
modelOptions.max_completion_tokens = modelOptions.max_tokens;
const paramName =
modelOptions.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
modelOptions[paramName] = modelOptions.max_tokens;
delete modelOptions.max_tokens;
}
if (this.isOmni === true && modelOptions.temperature != null) {

View file

@ -1154,6 +1154,8 @@ class AgentClient extends BaseClient {
}
if (shouldRemoveMaxTokens && clientOptions?.modelKwargs?.max_completion_tokens != null) {
delete clientOptions.modelKwargs.max_completion_tokens;
} else if (shouldRemoveMaxTokens && clientOptions?.modelKwargs?.max_output_tokens != null) {
delete clientOptions.modelKwargs.max_output_tokens;
}
clientOptions = Object.assign(

View file

@ -786,6 +786,28 @@ describe('AgentClient - titleConvo', () => {
expect(clientOptions.temperature).toBe(0.7); // Other options should remain
});
it('should move maxTokens to modelKwargs.max_output_tokens for GPT-5 models with useResponsesApi', () => {
const clientOptions = {
model: 'gpt-5',
maxTokens: 2048,
temperature: 0.7,
useResponsesApi: true,
};
if (/\bgpt-[5-9]\b/i.test(clientOptions.model) && clientOptions.maxTokens != null) {
clientOptions.modelKwargs = clientOptions.modelKwargs ?? {};
const paramName =
clientOptions.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
clientOptions.modelKwargs[paramName] = clientOptions.maxTokens;
delete clientOptions.maxTokens;
}
expect(clientOptions.maxTokens).toBeUndefined();
expect(clientOptions.modelKwargs).toBeDefined();
expect(clientOptions.modelKwargs.max_output_tokens).toBe(2048);
expect(clientOptions.temperature).toBe(0.7); // Other options should remain
});
it('should handle GPT-5+ models with existing modelKwargs', () => {
const clientOptions = {
model: 'gpt-6',
@ -866,6 +888,45 @@ describe('AgentClient - titleConvo', () => {
});
});
it('should not swap max token param for older models when using useResponsesApi', () => {
const testCases = [
{ model: 'gpt-5', shouldTransform: true },
{ model: 'gpt-5-turbo', shouldTransform: true },
{ model: 'gpt-6', shouldTransform: true },
{ model: 'gpt-7-preview', shouldTransform: true },
{ model: 'gpt-8', shouldTransform: true },
{ model: 'gpt-9-mini', shouldTransform: true },
{ model: 'gpt-4', shouldTransform: false },
{ model: 'gpt-4o', shouldTransform: false },
{ model: 'gpt-3.5-turbo', shouldTransform: false },
{ model: 'claude-3', shouldTransform: false },
];
testCases.forEach(({ model, shouldTransform }) => {
const clientOptions = {
model,
maxTokens: 1000,
useResponsesApi: true,
};
if (/\bgpt-[5-9]\b/i.test(clientOptions.model) && clientOptions.maxTokens != null) {
clientOptions.modelKwargs = clientOptions.modelKwargs ?? {};
const paramName =
clientOptions.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
clientOptions.modelKwargs[paramName] = clientOptions.maxTokens;
delete clientOptions.maxTokens;
}
if (shouldTransform) {
expect(clientOptions.maxTokens).toBeUndefined();
expect(clientOptions.modelKwargs?.max_output_tokens).toBe(1000);
} else {
expect(clientOptions.maxTokens).toBe(1000);
expect(clientOptions.modelKwargs).toBeUndefined();
}
});
});
it('should not transform if maxTokens is null or undefined', () => {
const testCases = [
{ model: 'gpt-5', maxTokens: null },