From a79f7cebd545c2855072fad328938993a5280ef2 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Fri, 6 Mar 2026 02:11:01 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20feat:=20GPT-5.4=20and=20GPT-5.4-?= =?UTF-8?q?pro=20Context=20+=20Pricing=20(#12099)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨ feat: Add support for new GPT-5.4 and GPT-5.4-pro models - Introduced new token values and cache settings for 'gpt-5.4' and 'gpt-5.4-pro' in the API model configurations. - Updated maximum output limits for the new models in the tokens utility. - Included 'gpt-5.4' and 'gpt-5.4-pro' in the shared OpenAI models list for consistent access across the application. * 🔧 update: Enhance GPT-5.4 and GPT-5.4-pro model configurations - Refined token pricing and cache settings for 'gpt-5.4' and 'gpt-5.4-pro' in the API model configurations. - Added tests for cache multipliers and maximum token limits for the new models. - Updated shared OpenAI models list to include 'gpt-5.4-thinking' and added a note for verifying pricing before release. * 🔧 update: Add clarification to token pricing for 'gpt-5.4-pro' - Added a comment to the 'gpt-5.4-pro' model configuration in tokens.ts to specify that it shares the same token window as 'gpt-5.4', enhancing clarity for future reference. --- api/models/tx.js | 37 ++++++++------------ api/models/tx.spec.js | 51 +++++++++++++++++++++++++++- api/utils/tokens.spec.js | 27 +++++++++++++++ packages/api/src/utils/tokens.ts | 4 +++ packages/data-provider/src/config.ts | 5 +++ 5 files changed, 100 insertions(+), 24 deletions(-) diff --git a/api/models/tx.js b/api/models/tx.js index b8790a8a75..ce14fad3a0 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -4,31 +4,18 @@ const defaultRate = 6; /** * Token Pricing Configuration * - * IMPORTANT: Key Ordering for Pattern Matching - * ============================================ - * The `findMatchingPattern` function iterates through object keys in REVERSE order - * (last-defined keys are checked first) and uses `modelName.includes(key)` for matching. + * Pattern Matching + * ================ + * `findMatchingPattern` (from @librechat/api) uses `modelName.includes(key)` and selects + * the LONGEST matching key. If a key's length equals the model name's length (exact match), + * it returns immediately. Definition order does NOT affect correctness. * - * This means: - * 1. BASE PATTERNS must be defined FIRST (e.g., "kimi", "moonshot") - * 2. SPECIFIC PATTERNS must be defined AFTER their base patterns (e.g., "kimi-k2", "kimi-k2.5") - * - * Example ordering for Kimi models: - * kimi: { prompt: 0.6, completion: 2.5 }, // Base pattern - checked last - * 'kimi-k2': { prompt: 0.6, completion: 2.5 }, // More specific - checked before "kimi" - * 'kimi-k2.5': { prompt: 0.6, completion: 3.0 }, // Most specific - checked first - * - * Why this matters: - * - Model name "kimi-k2.5" contains both "kimi" and "kimi-k2" as substrings - * - If "kimi" were checked first, it would incorrectly match and return wrong pricing - * - By defining specific patterns AFTER base patterns, they're checked first in reverse iteration + * Key ordering matters only for: + * 1. Performance: list older/less common models first so newer/common models + * are found earlier in the reverse scan. + * 2. Same-length tie-breaking: the last-defined key wins on equal-length matches. * * This applies to BOTH `tokenValues` and `cacheTokenValues` objects. - * - * When adding new model families: - * 1. Define the base/generic pattern first - * 2. Define increasingly specific patterns after - * 3. Ensure no pattern is a substring of another that should match differently */ /** @@ -151,6 +138,9 @@ const tokenValues = Object.assign( 'gpt-5.1': { prompt: 1.25, completion: 10 }, 'gpt-5.2': { prompt: 1.75, completion: 14 }, 'gpt-5.3': { prompt: 1.75, completion: 14 }, + 'gpt-5.4': { prompt: 2.5, completion: 15 }, + // TODO: gpt-5.4-pro pricing not yet officially published — verify before release + 'gpt-5.4-pro': { prompt: 5, completion: 30 }, 'gpt-5-nano': { prompt: 0.05, completion: 0.4 }, 'gpt-5-mini': { prompt: 0.25, completion: 2 }, 'gpt-5-pro': { prompt: 15, completion: 120 }, @@ -322,7 +312,7 @@ const cacheTokenValues = { // gpt-4o (incl. mini), o1 (incl. mini/preview): 50% off // gpt-4.1 (incl. mini/nano), o3 (incl. mini), o4-mini: 75% off // gpt-5.x (excl. pro variants): 90% off - // gpt-5-pro, gpt-5.2-pro: no caching + // gpt-5-pro, gpt-5.2-pro, gpt-5.4-pro: no caching 'gpt-4o': { write: 2.5, read: 1.25 }, 'gpt-4o-mini': { write: 0.15, read: 0.075 }, 'gpt-4.1': { write: 2, read: 0.5 }, @@ -332,6 +322,7 @@ const cacheTokenValues = { 'gpt-5.1': { write: 1.25, read: 0.125 }, 'gpt-5.2': { write: 1.75, read: 0.175 }, 'gpt-5.3': { write: 1.75, read: 0.175 }, + 'gpt-5.4': { write: 2.5, read: 0.25 }, 'gpt-5-mini': { write: 0.25, read: 0.025 }, 'gpt-5-nano': { write: 0.05, read: 0.005 }, o1: { write: 15, read: 7.5 }, diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js index bf718fa07d..666cd0a3b8 100644 --- a/api/models/tx.spec.js +++ b/api/models/tx.spec.js @@ -59,6 +59,17 @@ describe('getValueKey', () => { expect(getValueKey('openai/gpt-5.3')).toBe('gpt-5.3'); }); + it('should return "gpt-5.4" for model name containing "gpt-5.4"', () => { + expect(getValueKey('gpt-5.4')).toBe('gpt-5.4'); + expect(getValueKey('gpt-5.4-thinking')).toBe('gpt-5.4'); + expect(getValueKey('openai/gpt-5.4')).toBe('gpt-5.4'); + }); + + it('should return "gpt-5.4-pro" for model name containing "gpt-5.4-pro"', () => { + expect(getValueKey('gpt-5.4-pro')).toBe('gpt-5.4-pro'); + expect(getValueKey('openai/gpt-5.4-pro')).toBe('gpt-5.4-pro'); + }); + it('should return "gpt-3.5-turbo-1106" for model name containing "gpt-3.5-turbo-1106"', () => { expect(getValueKey('gpt-3.5-turbo-1106-some-other-info')).toBe('gpt-3.5-turbo-1106'); expect(getValueKey('openai/gpt-3.5-turbo-1106')).toBe('gpt-3.5-turbo-1106'); @@ -400,6 +411,33 @@ describe('getMultiplier', () => { ); }); + it('should return the correct multiplier for gpt-5.4', () => { + expect(getMultiplier({ model: 'gpt-5.4', tokenType: 'prompt' })).toBe( + tokenValues['gpt-5.4'].prompt, + ); + expect(getMultiplier({ model: 'gpt-5.4', tokenType: 'completion' })).toBe( + tokenValues['gpt-5.4'].completion, + ); + expect(getMultiplier({ model: 'gpt-5.4-thinking', tokenType: 'prompt' })).toBe( + tokenValues['gpt-5.4'].prompt, + ); + expect(getMultiplier({ model: 'openai/gpt-5.4', tokenType: 'completion' })).toBe( + tokenValues['gpt-5.4'].completion, + ); + }); + + it('should return the correct multiplier for gpt-5.4-pro', () => { + expect(getMultiplier({ model: 'gpt-5.4-pro', tokenType: 'prompt' })).toBe( + tokenValues['gpt-5.4-pro'].prompt, + ); + expect(getMultiplier({ model: 'gpt-5.4-pro', tokenType: 'completion' })).toBe( + tokenValues['gpt-5.4-pro'].completion, + ); + expect(getMultiplier({ model: 'openai/gpt-5.4-pro', tokenType: 'prompt' })).toBe( + tokenValues['gpt-5.4-pro'].prompt, + ); + }); + it('should return the correct multiplier for gpt-4o', () => { const valueKey = getValueKey('gpt-4o-2024-08-06'); expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4o'].prompt); @@ -1377,6 +1415,7 @@ describe('getCacheMultiplier', () => { 'gpt-5.1', 'gpt-5.2', 'gpt-5.3', + 'gpt-5.4', 'gpt-5-mini', 'gpt-5-nano', 'o1', @@ -1413,10 +1452,20 @@ describe('getCacheMultiplier', () => { expect(getCacheMultiplier({ model: 'gpt-5-pro', cacheType: 'write' })).toBeNull(); expect(getCacheMultiplier({ model: 'gpt-5.2-pro', cacheType: 'read' })).toBeNull(); expect(getCacheMultiplier({ model: 'gpt-5.2-pro', cacheType: 'write' })).toBeNull(); + expect(getCacheMultiplier({ model: 'gpt-5.4-pro', cacheType: 'read' })).toBeNull(); + expect(getCacheMultiplier({ model: 'gpt-5.4-pro', cacheType: 'write' })).toBeNull(); }); it('should have consistent 10% cache read pricing for gpt-5.x models', () => { - const gpt5CacheModels = ['gpt-5', 'gpt-5.1', 'gpt-5.2', 'gpt-5.3', 'gpt-5-mini', 'gpt-5-nano']; + const gpt5CacheModels = [ + 'gpt-5', + 'gpt-5.1', + 'gpt-5.2', + 'gpt-5.3', + 'gpt-5.4', + 'gpt-5-mini', + 'gpt-5-nano', + ]; for (const model of gpt5CacheModels) { expect(cacheTokenValues[model].read).toBeCloseTo(cacheTokenValues[model].write * 0.1, 10); } diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index 50974022cd..6cecdb95c8 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -214,6 +214,25 @@ describe('getModelMaxTokens', () => { ); }); + test('should return correct tokens for gpt-5.4 matches', () => { + expect(getModelMaxTokens('gpt-5.4')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5.4']); + expect(getModelMaxTokens('gpt-5.4-thinking')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5.4'], + ); + expect(getModelMaxTokens('openai/gpt-5.4')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5.4'], + ); + }); + + test('should return correct tokens for gpt-5.4-pro matches', () => { + expect(getModelMaxTokens('gpt-5.4-pro')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5.4-pro'], + ); + expect(getModelMaxTokens('openai/gpt-5.4-pro')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5.4-pro'], + ); + }); + test('should return correct tokens for Anthropic models', () => { const models = [ 'claude-2.1', @@ -495,6 +514,8 @@ describe('getModelMaxTokens', () => { 'gpt-5.1', 'gpt-5.2', 'gpt-5.3', + 'gpt-5.4', + 'gpt-5.4-pro', 'gpt-5-mini', 'gpt-5-nano', 'gpt-5-pro', @@ -804,6 +825,12 @@ describe('matchModelName', () => { expect(matchModelName('gpt-5.3-2025-03-01')).toBe('gpt-5.3'); }); + it('should return the closest matching key for gpt-5.4 matches', () => { + expect(matchModelName('openai/gpt-5.4')).toBe('gpt-5.4'); + expect(matchModelName('gpt-5.4-thinking')).toBe('gpt-5.4'); + expect(matchModelName('gpt-5.4-pro')).toBe('gpt-5.4-pro'); + }); + it('should return the input model name if no match is found - Google models', () => { expect(matchModelName('unknown-google-model', EModelEndpoint.google)).toBe( 'unknown-google-model', diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index b07f94f946..32b2fc6036 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -55,6 +55,8 @@ const openAIModels = { 'gpt-5.1': 400000, 'gpt-5.2': 400000, 'gpt-5.3': 400000, + 'gpt-5.4': 272000, // standard context; 1M experimental available via API opt-in (2x rate) + 'gpt-5.4-pro': 272000, // same window as gpt-5.4 'gpt-5-mini': 400000, 'gpt-5-nano': 400000, 'gpt-5-pro': 400000, @@ -361,6 +363,8 @@ export const modelMaxOutputs = { 'gpt-5.1': 128000, 'gpt-5.2': 128000, 'gpt-5.3': 128000, + 'gpt-5.4': 128000, + 'gpt-5.4-pro': 128000, 'gpt-5-mini': 128000, 'gpt-5-nano': 128000, 'gpt-5-pro': 128000, diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index b89d27aac7..6a77508f59 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -1101,6 +1101,10 @@ export const alternateName = { }; const sharedOpenAIModels = [ + 'gpt-5.4', + // TODO: gpt-5.4-thinking may have separate reasoning token pricing — verify before release + 'gpt-5.4-thinking', + 'gpt-5.4-pro', 'gpt-5.1', 'gpt-5.1-chat-latest', 'gpt-5.1-codex', @@ -1276,6 +1280,7 @@ export const visionModels = [ 'o4-mini', 'o3', 'o1', + 'gpt-5', 'gpt-4.1', 'gpt-4.5', 'llava',