mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-03-07 08:40:19 +01:00
🤖 feat: GPT-5.4 and GPT-5.4-pro Context + Pricing (#12099)
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
* ✨ feat: Add support for new GPT-5.4 and GPT-5.4-pro models - Introduced new token values and cache settings for 'gpt-5.4' and 'gpt-5.4-pro' in the API model configurations. - Updated maximum output limits for the new models in the tokens utility. - Included 'gpt-5.4' and 'gpt-5.4-pro' in the shared OpenAI models list for consistent access across the application. * 🔧 update: Enhance GPT-5.4 and GPT-5.4-pro model configurations - Refined token pricing and cache settings for 'gpt-5.4' and 'gpt-5.4-pro' in the API model configurations. - Added tests for cache multipliers and maximum token limits for the new models. - Updated shared OpenAI models list to include 'gpt-5.4-thinking' and added a note for verifying pricing before release. * 🔧 update: Add clarification to token pricing for 'gpt-5.4-pro' - Added a comment to the 'gpt-5.4-pro' model configuration in tokens.ts to specify that it shares the same token window as 'gpt-5.4', enhancing clarity for future reference.
This commit is contained in:
parent
3b84cc048a
commit
a79f7cebd5
5 changed files with 100 additions and 24 deletions
|
|
@ -4,31 +4,18 @@ const defaultRate = 6;
|
|||
/**
|
||||
* Token Pricing Configuration
|
||||
*
|
||||
* IMPORTANT: Key Ordering for Pattern Matching
|
||||
* ============================================
|
||||
* The `findMatchingPattern` function iterates through object keys in REVERSE order
|
||||
* (last-defined keys are checked first) and uses `modelName.includes(key)` for matching.
|
||||
* Pattern Matching
|
||||
* ================
|
||||
* `findMatchingPattern` (from @librechat/api) uses `modelName.includes(key)` and selects
|
||||
* the LONGEST matching key. If a key's length equals the model name's length (exact match),
|
||||
* it returns immediately. Definition order does NOT affect correctness.
|
||||
*
|
||||
* This means:
|
||||
* 1. BASE PATTERNS must be defined FIRST (e.g., "kimi", "moonshot")
|
||||
* 2. SPECIFIC PATTERNS must be defined AFTER their base patterns (e.g., "kimi-k2", "kimi-k2.5")
|
||||
*
|
||||
* Example ordering for Kimi models:
|
||||
* kimi: { prompt: 0.6, completion: 2.5 }, // Base pattern - checked last
|
||||
* 'kimi-k2': { prompt: 0.6, completion: 2.5 }, // More specific - checked before "kimi"
|
||||
* 'kimi-k2.5': { prompt: 0.6, completion: 3.0 }, // Most specific - checked first
|
||||
*
|
||||
* Why this matters:
|
||||
* - Model name "kimi-k2.5" contains both "kimi" and "kimi-k2" as substrings
|
||||
* - If "kimi" were checked first, it would incorrectly match and return wrong pricing
|
||||
* - By defining specific patterns AFTER base patterns, they're checked first in reverse iteration
|
||||
* Key ordering matters only for:
|
||||
* 1. Performance: list older/less common models first so newer/common models
|
||||
* are found earlier in the reverse scan.
|
||||
* 2. Same-length tie-breaking: the last-defined key wins on equal-length matches.
|
||||
*
|
||||
* This applies to BOTH `tokenValues` and `cacheTokenValues` objects.
|
||||
*
|
||||
* When adding new model families:
|
||||
* 1. Define the base/generic pattern first
|
||||
* 2. Define increasingly specific patterns after
|
||||
* 3. Ensure no pattern is a substring of another that should match differently
|
||||
*/
|
||||
|
||||
/**
|
||||
|
|
@ -151,6 +138,9 @@ const tokenValues = Object.assign(
|
|||
'gpt-5.1': { prompt: 1.25, completion: 10 },
|
||||
'gpt-5.2': { prompt: 1.75, completion: 14 },
|
||||
'gpt-5.3': { prompt: 1.75, completion: 14 },
|
||||
'gpt-5.4': { prompt: 2.5, completion: 15 },
|
||||
// TODO: gpt-5.4-pro pricing not yet officially published — verify before release
|
||||
'gpt-5.4-pro': { prompt: 5, completion: 30 },
|
||||
'gpt-5-nano': { prompt: 0.05, completion: 0.4 },
|
||||
'gpt-5-mini': { prompt: 0.25, completion: 2 },
|
||||
'gpt-5-pro': { prompt: 15, completion: 120 },
|
||||
|
|
@ -322,7 +312,7 @@ const cacheTokenValues = {
|
|||
// gpt-4o (incl. mini), o1 (incl. mini/preview): 50% off
|
||||
// gpt-4.1 (incl. mini/nano), o3 (incl. mini), o4-mini: 75% off
|
||||
// gpt-5.x (excl. pro variants): 90% off
|
||||
// gpt-5-pro, gpt-5.2-pro: no caching
|
||||
// gpt-5-pro, gpt-5.2-pro, gpt-5.4-pro: no caching
|
||||
'gpt-4o': { write: 2.5, read: 1.25 },
|
||||
'gpt-4o-mini': { write: 0.15, read: 0.075 },
|
||||
'gpt-4.1': { write: 2, read: 0.5 },
|
||||
|
|
@ -332,6 +322,7 @@ const cacheTokenValues = {
|
|||
'gpt-5.1': { write: 1.25, read: 0.125 },
|
||||
'gpt-5.2': { write: 1.75, read: 0.175 },
|
||||
'gpt-5.3': { write: 1.75, read: 0.175 },
|
||||
'gpt-5.4': { write: 2.5, read: 0.25 },
|
||||
'gpt-5-mini': { write: 0.25, read: 0.025 },
|
||||
'gpt-5-nano': { write: 0.05, read: 0.005 },
|
||||
o1: { write: 15, read: 7.5 },
|
||||
|
|
|
|||
|
|
@ -59,6 +59,17 @@ describe('getValueKey', () => {
|
|||
expect(getValueKey('openai/gpt-5.3')).toBe('gpt-5.3');
|
||||
});
|
||||
|
||||
it('should return "gpt-5.4" for model name containing "gpt-5.4"', () => {
|
||||
expect(getValueKey('gpt-5.4')).toBe('gpt-5.4');
|
||||
expect(getValueKey('gpt-5.4-thinking')).toBe('gpt-5.4');
|
||||
expect(getValueKey('openai/gpt-5.4')).toBe('gpt-5.4');
|
||||
});
|
||||
|
||||
it('should return "gpt-5.4-pro" for model name containing "gpt-5.4-pro"', () => {
|
||||
expect(getValueKey('gpt-5.4-pro')).toBe('gpt-5.4-pro');
|
||||
expect(getValueKey('openai/gpt-5.4-pro')).toBe('gpt-5.4-pro');
|
||||
});
|
||||
|
||||
it('should return "gpt-3.5-turbo-1106" for model name containing "gpt-3.5-turbo-1106"', () => {
|
||||
expect(getValueKey('gpt-3.5-turbo-1106-some-other-info')).toBe('gpt-3.5-turbo-1106');
|
||||
expect(getValueKey('openai/gpt-3.5-turbo-1106')).toBe('gpt-3.5-turbo-1106');
|
||||
|
|
@ -400,6 +411,33 @@ describe('getMultiplier', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('should return the correct multiplier for gpt-5.4', () => {
|
||||
expect(getMultiplier({ model: 'gpt-5.4', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['gpt-5.4'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'gpt-5.4', tokenType: 'completion' })).toBe(
|
||||
tokenValues['gpt-5.4'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'gpt-5.4-thinking', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['gpt-5.4'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'openai/gpt-5.4', tokenType: 'completion' })).toBe(
|
||||
tokenValues['gpt-5.4'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return the correct multiplier for gpt-5.4-pro', () => {
|
||||
expect(getMultiplier({ model: 'gpt-5.4-pro', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['gpt-5.4-pro'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'gpt-5.4-pro', tokenType: 'completion' })).toBe(
|
||||
tokenValues['gpt-5.4-pro'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'openai/gpt-5.4-pro', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['gpt-5.4-pro'].prompt,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return the correct multiplier for gpt-4o', () => {
|
||||
const valueKey = getValueKey('gpt-4o-2024-08-06');
|
||||
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4o'].prompt);
|
||||
|
|
@ -1377,6 +1415,7 @@ describe('getCacheMultiplier', () => {
|
|||
'gpt-5.1',
|
||||
'gpt-5.2',
|
||||
'gpt-5.3',
|
||||
'gpt-5.4',
|
||||
'gpt-5-mini',
|
||||
'gpt-5-nano',
|
||||
'o1',
|
||||
|
|
@ -1413,10 +1452,20 @@ describe('getCacheMultiplier', () => {
|
|||
expect(getCacheMultiplier({ model: 'gpt-5-pro', cacheType: 'write' })).toBeNull();
|
||||
expect(getCacheMultiplier({ model: 'gpt-5.2-pro', cacheType: 'read' })).toBeNull();
|
||||
expect(getCacheMultiplier({ model: 'gpt-5.2-pro', cacheType: 'write' })).toBeNull();
|
||||
expect(getCacheMultiplier({ model: 'gpt-5.4-pro', cacheType: 'read' })).toBeNull();
|
||||
expect(getCacheMultiplier({ model: 'gpt-5.4-pro', cacheType: 'write' })).toBeNull();
|
||||
});
|
||||
|
||||
it('should have consistent 10% cache read pricing for gpt-5.x models', () => {
|
||||
const gpt5CacheModels = ['gpt-5', 'gpt-5.1', 'gpt-5.2', 'gpt-5.3', 'gpt-5-mini', 'gpt-5-nano'];
|
||||
const gpt5CacheModels = [
|
||||
'gpt-5',
|
||||
'gpt-5.1',
|
||||
'gpt-5.2',
|
||||
'gpt-5.3',
|
||||
'gpt-5.4',
|
||||
'gpt-5-mini',
|
||||
'gpt-5-nano',
|
||||
];
|
||||
for (const model of gpt5CacheModels) {
|
||||
expect(cacheTokenValues[model].read).toBeCloseTo(cacheTokenValues[model].write * 0.1, 10);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -214,6 +214,25 @@ describe('getModelMaxTokens', () => {
|
|||
);
|
||||
});
|
||||
|
||||
test('should return correct tokens for gpt-5.4 matches', () => {
|
||||
expect(getModelMaxTokens('gpt-5.4')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5.4']);
|
||||
expect(getModelMaxTokens('gpt-5.4-thinking')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['gpt-5.4'],
|
||||
);
|
||||
expect(getModelMaxTokens('openai/gpt-5.4')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['gpt-5.4'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should return correct tokens for gpt-5.4-pro matches', () => {
|
||||
expect(getModelMaxTokens('gpt-5.4-pro')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['gpt-5.4-pro'],
|
||||
);
|
||||
expect(getModelMaxTokens('openai/gpt-5.4-pro')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['gpt-5.4-pro'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should return correct tokens for Anthropic models', () => {
|
||||
const models = [
|
||||
'claude-2.1',
|
||||
|
|
@ -495,6 +514,8 @@ describe('getModelMaxTokens', () => {
|
|||
'gpt-5.1',
|
||||
'gpt-5.2',
|
||||
'gpt-5.3',
|
||||
'gpt-5.4',
|
||||
'gpt-5.4-pro',
|
||||
'gpt-5-mini',
|
||||
'gpt-5-nano',
|
||||
'gpt-5-pro',
|
||||
|
|
@ -804,6 +825,12 @@ describe('matchModelName', () => {
|
|||
expect(matchModelName('gpt-5.3-2025-03-01')).toBe('gpt-5.3');
|
||||
});
|
||||
|
||||
it('should return the closest matching key for gpt-5.4 matches', () => {
|
||||
expect(matchModelName('openai/gpt-5.4')).toBe('gpt-5.4');
|
||||
expect(matchModelName('gpt-5.4-thinking')).toBe('gpt-5.4');
|
||||
expect(matchModelName('gpt-5.4-pro')).toBe('gpt-5.4-pro');
|
||||
});
|
||||
|
||||
it('should return the input model name if no match is found - Google models', () => {
|
||||
expect(matchModelName('unknown-google-model', EModelEndpoint.google)).toBe(
|
||||
'unknown-google-model',
|
||||
|
|
|
|||
|
|
@ -55,6 +55,8 @@ const openAIModels = {
|
|||
'gpt-5.1': 400000,
|
||||
'gpt-5.2': 400000,
|
||||
'gpt-5.3': 400000,
|
||||
'gpt-5.4': 272000, // standard context; 1M experimental available via API opt-in (2x rate)
|
||||
'gpt-5.4-pro': 272000, // same window as gpt-5.4
|
||||
'gpt-5-mini': 400000,
|
||||
'gpt-5-nano': 400000,
|
||||
'gpt-5-pro': 400000,
|
||||
|
|
@ -361,6 +363,8 @@ export const modelMaxOutputs = {
|
|||
'gpt-5.1': 128000,
|
||||
'gpt-5.2': 128000,
|
||||
'gpt-5.3': 128000,
|
||||
'gpt-5.4': 128000,
|
||||
'gpt-5.4-pro': 128000,
|
||||
'gpt-5-mini': 128000,
|
||||
'gpt-5-nano': 128000,
|
||||
'gpt-5-pro': 128000,
|
||||
|
|
|
|||
|
|
@ -1101,6 +1101,10 @@ export const alternateName = {
|
|||
};
|
||||
|
||||
const sharedOpenAIModels = [
|
||||
'gpt-5.4',
|
||||
// TODO: gpt-5.4-thinking may have separate reasoning token pricing — verify before release
|
||||
'gpt-5.4-thinking',
|
||||
'gpt-5.4-pro',
|
||||
'gpt-5.1',
|
||||
'gpt-5.1-chat-latest',
|
||||
'gpt-5.1-codex',
|
||||
|
|
@ -1276,6 +1280,7 @@ export const visionModels = [
|
|||
'o4-mini',
|
||||
'o3',
|
||||
'o1',
|
||||
'gpt-5',
|
||||
'gpt-4.1',
|
||||
'gpt-4.5',
|
||||
'llava',
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue