From d3622844ad51945cde2d21d46b6626fd4f8df692 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Tue, 3 Mar 2026 20:44:05 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=B0=20feat:=20Add=20gpt-5.3=20context?= =?UTF-8?q?=20window=20and=20pricing=20(#12049)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 💰 feat: Add gpt-5.3 context window and pricing * 💰 feat: Add OpenAI cached input pricing and `gpt-5.2-pro` model - Add cached input pricing (write/read) for gpt-4o, gpt-4.1, gpt-5.x, o1, o3, o4-mini models with correct per-family discount tiers - Add gpt-5.2-pro pricing ($21/$168), context window, and max output - Pro models (gpt-5-pro, gpt-5.2-pro) correctly excluded from cache pricing as OpenAI does not support caching for these * 🔍 fix: Address review findings for OpenAI pricing - Add o1-preview to cacheTokenValues (50% discount, same as o1) - Fix comment to enumerate all models per discount tier - Add cache tests for dated variants (gpt-4o-2024-08-06, etc.) - Add gpt-5-mini/gpt-5-nano to 10% ratio invariant test - Replace forEach with for...of in new test code - Fix inconsistent test description phrasing - Add gpt-5.3-preview to context window tests --- api/models/tx.js | 24 ++++++++ api/models/tx.spec.js | 96 ++++++++++++++++++++++++++++++++ api/utils/tokens.spec.js | 34 ++++++++++- packages/api/src/utils/tokens.ts | 4 ++ 4 files changed, 156 insertions(+), 2 deletions(-) diff --git a/api/models/tx.js b/api/models/tx.js index 488ae7604e..b8790a8a75 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -150,9 +150,11 @@ const tokenValues = Object.assign( 'gpt-5': { prompt: 1.25, completion: 10 }, 'gpt-5.1': { prompt: 1.25, completion: 10 }, 'gpt-5.2': { prompt: 1.75, completion: 14 }, + 'gpt-5.3': { prompt: 1.75, completion: 14 }, 'gpt-5-nano': { prompt: 0.05, completion: 0.4 }, 'gpt-5-mini': { prompt: 0.25, completion: 2 }, 'gpt-5-pro': { prompt: 15, completion: 120 }, + 'gpt-5.2-pro': { prompt: 21, completion: 168 }, o1: { prompt: 15, completion: 60 }, 'o1-mini': { prompt: 1.1, completion: 4.4 }, 'o1-preview': { prompt: 15, completion: 60 }, @@ -316,6 +318,28 @@ const cacheTokenValues = { 'claude-opus-4': { write: 18.75, read: 1.5 }, 'claude-opus-4-5': { write: 6.25, read: 0.5 }, 'claude-opus-4-6': { write: 6.25, read: 0.5 }, + // OpenAI models — cached input discount varies by family: + // gpt-4o (incl. mini), o1 (incl. mini/preview): 50% off + // gpt-4.1 (incl. mini/nano), o3 (incl. mini), o4-mini: 75% off + // gpt-5.x (excl. pro variants): 90% off + // gpt-5-pro, gpt-5.2-pro: no caching + 'gpt-4o': { write: 2.5, read: 1.25 }, + 'gpt-4o-mini': { write: 0.15, read: 0.075 }, + 'gpt-4.1': { write: 2, read: 0.5 }, + 'gpt-4.1-mini': { write: 0.4, read: 0.1 }, + 'gpt-4.1-nano': { write: 0.1, read: 0.025 }, + 'gpt-5': { write: 1.25, read: 0.125 }, + 'gpt-5.1': { write: 1.25, read: 0.125 }, + 'gpt-5.2': { write: 1.75, read: 0.175 }, + 'gpt-5.3': { write: 1.75, read: 0.175 }, + 'gpt-5-mini': { write: 0.25, read: 0.025 }, + 'gpt-5-nano': { write: 0.05, read: 0.005 }, + o1: { write: 15, read: 7.5 }, + 'o1-mini': { write: 1.1, read: 0.55 }, + 'o1-preview': { write: 15, read: 7.5 }, + o3: { write: 2, read: 0.5 }, + 'o3-mini': { write: 1.1, read: 0.275 }, + 'o4-mini': { write: 1.1, read: 0.275 }, // DeepSeek models - cache hit: $0.028/1M, cache miss: $0.28/1M deepseek: { write: 0.28, read: 0.028 }, 'deepseek-chat': { write: 0.28, read: 0.028 }, diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js index b5c6d1714e..bf718fa07d 100644 --- a/api/models/tx.spec.js +++ b/api/models/tx.spec.js @@ -52,6 +52,13 @@ describe('getValueKey', () => { expect(getValueKey('openai/gpt-5.2')).toBe('gpt-5.2'); }); + it('should return "gpt-5.3" for model name containing "gpt-5.3"', () => { + expect(getValueKey('gpt-5.3')).toBe('gpt-5.3'); + expect(getValueKey('gpt-5.3-chat-latest')).toBe('gpt-5.3'); + expect(getValueKey('gpt-5.3-codex')).toBe('gpt-5.3'); + expect(getValueKey('openai/gpt-5.3')).toBe('gpt-5.3'); + }); + it('should return "gpt-3.5-turbo-1106" for model name containing "gpt-3.5-turbo-1106"', () => { expect(getValueKey('gpt-3.5-turbo-1106-some-other-info')).toBe('gpt-3.5-turbo-1106'); expect(getValueKey('openai/gpt-3.5-turbo-1106')).toBe('gpt-3.5-turbo-1106'); @@ -138,6 +145,12 @@ describe('getValueKey', () => { expect(getValueKey('gpt-5-pro-preview')).toBe('gpt-5-pro'); }); + it('should return "gpt-5.2-pro" for model name containing "gpt-5.2-pro"', () => { + expect(getValueKey('gpt-5.2-pro')).toBe('gpt-5.2-pro'); + expect(getValueKey('gpt-5.2-pro-2025-03-01')).toBe('gpt-5.2-pro'); + expect(getValueKey('openai/gpt-5.2-pro')).toBe('gpt-5.2-pro'); + }); + it('should return "gpt-4o" for model type of "gpt-4o"', () => { expect(getValueKey('gpt-4o-2024-08-06')).toBe('gpt-4o'); expect(getValueKey('gpt-4o-2024-08-06-0718')).toBe('gpt-4o'); @@ -336,6 +349,18 @@ describe('getMultiplier', () => { ); }); + it('should return the correct multiplier for gpt-5.2-pro', () => { + expect(getMultiplier({ model: 'gpt-5.2-pro', tokenType: 'prompt' })).toBe( + tokenValues['gpt-5.2-pro'].prompt, + ); + expect(getMultiplier({ model: 'gpt-5.2-pro', tokenType: 'completion' })).toBe( + tokenValues['gpt-5.2-pro'].completion, + ); + expect(getMultiplier({ model: 'openai/gpt-5.2-pro', tokenType: 'prompt' })).toBe( + tokenValues['gpt-5.2-pro'].prompt, + ); + }); + it('should return the correct multiplier for gpt-5.1', () => { expect(getMultiplier({ model: 'gpt-5.1', tokenType: 'prompt' })).toBe( tokenValues['gpt-5.1'].prompt, @@ -360,6 +385,21 @@ describe('getMultiplier', () => { ); }); + it('should return the correct multiplier for gpt-5.3', () => { + expect(getMultiplier({ model: 'gpt-5.3', tokenType: 'prompt' })).toBe( + tokenValues['gpt-5.3'].prompt, + ); + expect(getMultiplier({ model: 'gpt-5.3', tokenType: 'completion' })).toBe( + tokenValues['gpt-5.3'].completion, + ); + expect(getMultiplier({ model: 'gpt-5.3-codex', tokenType: 'prompt' })).toBe( + tokenValues['gpt-5.3'].prompt, + ); + expect(getMultiplier({ model: 'openai/gpt-5.3', tokenType: 'completion' })).toBe( + tokenValues['gpt-5.3'].completion, + ); + }); + it('should return the correct multiplier for gpt-4o', () => { const valueKey = getValueKey('gpt-4o-2024-08-06'); expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4o'].prompt); @@ -1326,6 +1366,62 @@ describe('getCacheMultiplier', () => { ).toBeNull(); }); + it('should return correct cache multipliers for OpenAI models', () => { + const openaiCacheModels = [ + 'gpt-4o', + 'gpt-4o-mini', + 'gpt-4.1', + 'gpt-4.1-mini', + 'gpt-4.1-nano', + 'gpt-5', + 'gpt-5.1', + 'gpt-5.2', + 'gpt-5.3', + 'gpt-5-mini', + 'gpt-5-nano', + 'o1', + 'o1-mini', + 'o1-preview', + 'o3', + 'o3-mini', + 'o4-mini', + ]; + + for (const model of openaiCacheModels) { + expect(getCacheMultiplier({ model, cacheType: 'write' })).toBe(cacheTokenValues[model].write); + expect(getCacheMultiplier({ model, cacheType: 'read' })).toBe(cacheTokenValues[model].read); + } + }); + + it('should return correct cache multipliers for OpenAI dated variants', () => { + expect(getCacheMultiplier({ model: 'gpt-4o-2024-08-06', cacheType: 'read' })).toBe( + cacheTokenValues['gpt-4o'].read, + ); + expect(getCacheMultiplier({ model: 'gpt-4.1-2026-01-01', cacheType: 'read' })).toBe( + cacheTokenValues['gpt-4.1'].read, + ); + expect(getCacheMultiplier({ model: 'gpt-5.3-codex', cacheType: 'read' })).toBe( + cacheTokenValues['gpt-5.3'].read, + ); + expect(getCacheMultiplier({ model: 'openai/gpt-5.3', cacheType: 'write' })).toBe( + cacheTokenValues['gpt-5.3'].write, + ); + }); + + it('should return null for pro models that do not support caching', () => { + expect(getCacheMultiplier({ model: 'gpt-5-pro', cacheType: 'read' })).toBeNull(); + expect(getCacheMultiplier({ model: 'gpt-5-pro', cacheType: 'write' })).toBeNull(); + expect(getCacheMultiplier({ model: 'gpt-5.2-pro', cacheType: 'read' })).toBeNull(); + expect(getCacheMultiplier({ model: 'gpt-5.2-pro', cacheType: 'write' })).toBeNull(); + }); + + it('should have consistent 10% cache read pricing for gpt-5.x models', () => { + const gpt5CacheModels = ['gpt-5', 'gpt-5.1', 'gpt-5.2', 'gpt-5.3', 'gpt-5-mini', 'gpt-5-nano']; + for (const model of gpt5CacheModels) { + expect(cacheTokenValues[model].read).toBeCloseTo(cacheTokenValues[model].write * 0.1, 10); + } + }); + it('should handle models with "bedrock/" prefix', () => { expect( getCacheMultiplier({ diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index efbd962a8c..c19c7471d5 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -200,6 +200,20 @@ describe('getModelMaxTokens', () => { ); }); + test('should return correct tokens for gpt-5.3 matches', () => { + expect(getModelMaxTokens('gpt-5.3')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5.3']); + expect(getModelMaxTokens('gpt-5.3-codex')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5.3']); + expect(getModelMaxTokens('openai/gpt-5.3')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5.3'], + ); + expect(getModelMaxTokens('gpt-5.3-2025-03-01')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5.3'], + ); + expect(getModelMaxTokens('gpt-5.3-preview')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5.3'], + ); + }); + test('should return correct tokens for Anthropic models', () => { const models = [ 'claude-2.1', @@ -492,7 +506,17 @@ describe('getModelMaxTokens', () => { test('should return correct max output tokens for GPT-5 models', () => { const { getModelMaxOutputTokens } = require('@librechat/api'); - ['gpt-5', 'gpt-5-mini', 'gpt-5-nano', 'gpt-5-pro'].forEach((model) => { + const gpt5Models = [ + 'gpt-5', + 'gpt-5.1', + 'gpt-5.2', + 'gpt-5.3', + 'gpt-5-mini', + 'gpt-5-nano', + 'gpt-5-pro', + 'gpt-5.2-pro', + ]; + for (const model of gpt5Models) { expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]); expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe( maxOutputTokensMap[EModelEndpoint.openAI][model], @@ -500,7 +524,7 @@ describe('getModelMaxTokens', () => { expect(getModelMaxOutputTokens(model, EModelEndpoint.azureOpenAI)).toBe( maxOutputTokensMap[EModelEndpoint.azureOpenAI][model], ); - }); + } }); test('should return correct max output tokens for GPT-OSS models', () => { @@ -612,6 +636,12 @@ describe('matchModelName', () => { expect(matchModelName('gpt-5-pro-2025-01-30-0130')).toBe('gpt-5-pro'); }); + it('should return the closest matching key for gpt-5.3 matches', () => { + expect(matchModelName('openai/gpt-5.3')).toBe('gpt-5.3'); + expect(matchModelName('gpt-5.3-codex')).toBe('gpt-5.3'); + expect(matchModelName('gpt-5.3-2025-03-01')).toBe('gpt-5.3'); + }); + // Tests for Google models it('should return the exact model name if it exists in maxTokensMap - Google models', () => { expect(matchModelName('text-bison-32k', EModelEndpoint.google)).toBe('text-bison-32k'); diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index b515ed8f75..ad7cf1a8db 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -51,9 +51,11 @@ const openAIModels = { 'gpt-5': 400000, 'gpt-5.1': 400000, 'gpt-5.2': 400000, + 'gpt-5.3': 400000, 'gpt-5-mini': 400000, 'gpt-5-nano': 400000, 'gpt-5-pro': 400000, + 'gpt-5.2-pro': 400000, 'gpt-4o': 127500, // -500 from max 'gpt-4o-mini': 127500, // -500 from max 'gpt-4o-2024-05-13': 127500, // -500 from max @@ -389,9 +391,11 @@ export const modelMaxOutputs = { 'gpt-5': 128000, 'gpt-5.1': 128000, 'gpt-5.2': 128000, + 'gpt-5.3': 128000, 'gpt-5-mini': 128000, 'gpt-5-nano': 128000, 'gpt-5-pro': 128000, + 'gpt-5.2-pro': 128000, 'gpt-oss-20b': 131000, 'gpt-oss-120b': 131000, system_default: 32000,