diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index c19c7471d5..50974022cd 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -251,16 +251,6 @@ describe('getModelMaxTokens', () => { }); }); - // Tests for Google models - test('should return correct tokens for exact match - Google models', () => { - expect(getModelMaxTokens('text-bison-32k', EModelEndpoint.google)).toBe( - maxTokensMap[EModelEndpoint.google]['text-bison-32k'], - ); - expect(getModelMaxTokens('codechat-bison-32k', EModelEndpoint.google)).toBe( - maxTokensMap[EModelEndpoint.google]['codechat-bison-32k'], - ); - }); - test('should return undefined for no match - Google models', () => { expect(getModelMaxTokens('unknown-google-model', EModelEndpoint.google)).toBeUndefined(); }); @@ -317,12 +307,6 @@ describe('getModelMaxTokens', () => { expect(getModelMaxTokens('gemini-pro', EModelEndpoint.google)).toBe( maxTokensMap[EModelEndpoint.google]['gemini'], ); - expect(getModelMaxTokens('code-', EModelEndpoint.google)).toBe( - maxTokensMap[EModelEndpoint.google]['code-'], - ); - expect(getModelMaxTokens('chat-', EModelEndpoint.google)).toBe( - maxTokensMap[EModelEndpoint.google]['chat-'], - ); }); test('should return correct tokens for partial match - Cohere models', () => { @@ -541,6 +525,184 @@ describe('getModelMaxTokens', () => { }); }); +describe('findMatchingPattern - longest match wins', () => { + test('should prefer longer matching key over shorter cross-provider pattern', () => { + const result = findMatchingPattern( + 'gpt-5.2-chat-2025-12-11', + maxTokensMap[EModelEndpoint.openAI], + ); + expect(result).toBe('gpt-5.2'); + }); + + test('should match gpt-5.2 tokens for date-suffixed chat variant', () => { + expect(getModelMaxTokens('gpt-5.2-chat-2025-12-11')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5.2'], + ); + }); + + test('should match gpt-5.2-pro over shorter patterns', () => { + expect(getModelMaxTokens('gpt-5.2-pro-chat-2025-12-11')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5.2-pro'], + ); + }); + + test('should match gpt-5-mini over gpt-5 for mini variants', () => { + expect(getModelMaxTokens('gpt-5-mini-chat-2025-01-01')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5-mini'], + ); + }); + + test('should prefer gpt-4-1106 over gpt-4 for versioned model names', () => { + const result = findMatchingPattern('gpt-4-1106-preview', maxTokensMap[EModelEndpoint.openAI]); + expect(result).toBe('gpt-4-1106'); + }); + + test('should prefer gpt-4-32k-0613 over gpt-4-32k for exact versioned names', () => { + const result = findMatchingPattern('gpt-4-32k-0613', maxTokensMap[EModelEndpoint.openAI]); + expect(result).toBe('gpt-4-32k-0613'); + }); + + test('should prefer claude-3-5-sonnet over claude-3', () => { + const result = findMatchingPattern( + 'claude-3-5-sonnet-20241022', + maxTokensMap[EModelEndpoint.anthropic], + ); + expect(result).toBe('claude-3-5-sonnet'); + }); + + test('should prefer gemini-2.0-flash-lite over gemini-2.0-flash', () => { + const result = findMatchingPattern( + 'gemini-2.0-flash-lite-preview', + maxTokensMap[EModelEndpoint.google], + ); + expect(result).toBe('gemini-2.0-flash-lite'); + }); +}); + +describe('findMatchingPattern - bestLength selection', () => { + test('should return the longest matching key when multiple keys match', () => { + const tokensMap = { short: 100, 'short-med': 200, 'short-med-long': 300 }; + expect(findMatchingPattern('short-med-long-extra', tokensMap)).toBe('short-med-long'); + }); + + test('should return the longest match regardless of key insertion order', () => { + const tokensMap = { 'a-b-c': 300, a: 100, 'a-b': 200 }; + expect(findMatchingPattern('a-b-c-d', tokensMap)).toBe('a-b-c'); + }); + + test('should return null when no key matches', () => { + const tokensMap = { alpha: 100, beta: 200 }; + expect(findMatchingPattern('gamma-delta', tokensMap)).toBeNull(); + }); + + test('should return the single matching key when only one matches', () => { + const tokensMap = { alpha: 100, beta: 200, gamma: 300 }; + expect(findMatchingPattern('beta-extended', tokensMap)).toBe('beta'); + }); + + test('should match case-insensitively against model name', () => { + const tokensMap = { 'gpt-5': 400000 }; + expect(findMatchingPattern('GPT-5-turbo', tokensMap)).toBe('gpt-5'); + }); + + test('should select the longest key among overlapping substring matches', () => { + const tokensMap = { 'gpt-': 100, 'gpt-5': 200, 'gpt-5.2': 300, 'gpt-5.2-pro': 400 }; + expect(findMatchingPattern('gpt-5.2-pro-2025-01-01', tokensMap)).toBe('gpt-5.2-pro'); + expect(findMatchingPattern('gpt-5.2-chat-2025-01-01', tokensMap)).toBe('gpt-5.2'); + expect(findMatchingPattern('gpt-5.1-preview', tokensMap)).toBe('gpt-5'); + expect(findMatchingPattern('gpt-unknown', tokensMap)).toBe('gpt-'); + }); + + test('should not be confused by a short key that appears later in the model name', () => { + const tokensMap = { 'model-v2': 200, v2: 100 }; + expect(findMatchingPattern('model-v2-extended', tokensMap)).toBe('model-v2'); + }); + + test('should handle exact-length match as the best match', () => { + const tokensMap = { 'exact-model': 500, exact: 100 }; + expect(findMatchingPattern('exact-model', tokensMap)).toBe('exact-model'); + }); + + test('should return null for empty model name', () => { + expect(findMatchingPattern('', { 'gpt-5': 400000 })).toBeNull(); + }); + + test('should prefer last-defined key on same-length ties', () => { + const tokensMap = { 'aa-bb': 100, 'cc-dd': 200 }; + // model name contains both 5-char keys; last-defined wins in reverse iteration + expect(findMatchingPattern('aa-bb-cc-dd', tokensMap)).toBe('cc-dd'); + }); + + test('longest match beats short cross-provider pattern even when both present', () => { + const tokensMap = { 'gpt-5.2': 400000, 'chat-': 8187 }; + expect(findMatchingPattern('gpt-5.2-chat-2025-12-11', tokensMap)).toBe('gpt-5.2'); + }); + + test('should match case-insensitively against keys', () => { + const tokensMap = { 'GPT-5': 400000 }; + expect(findMatchingPattern('gpt-5-turbo', tokensMap)).toBe('GPT-5'); + }); +}); + +describe('findMatchingPattern - iteration performance', () => { + let includesSpy; + + beforeEach(() => { + includesSpy = jest.spyOn(String.prototype, 'includes'); + }); + + afterEach(() => { + includesSpy.mockRestore(); + }); + + test('exact match early-exits with minimal includes() checks', () => { + const openAIMap = maxTokensMap[EModelEndpoint.openAI]; + const keys = Object.keys(openAIMap); + const lastKey = keys[keys.length - 1]; + includesSpy.mockClear(); + const result = findMatchingPattern(lastKey, openAIMap); + const exactCalls = includesSpy.mock.calls.length; + + expect(result).toBe(lastKey); + expect(exactCalls).toBe(1); + }); + + test('bestLength check skips includes() for shorter keys after a long match', () => { + const openAIMap = maxTokensMap[EModelEndpoint.openAI]; + includesSpy.mockClear(); + findMatchingPattern('gpt-3.5-turbo-0301-test', openAIMap); + const longKeyCalls = includesSpy.mock.calls.length; + + includesSpy.mockClear(); + findMatchingPattern('gpt-5.3-chat-latest', openAIMap); + const shortKeyCalls = includesSpy.mock.calls.length; + + // gpt-3.5-turbo-0301 (20 chars) matches early, then bestLength prunes most keys + // gpt-5.3 (7 chars) is short, so fewer keys are pruned by the length check + expect(longKeyCalls).toBeLessThan(shortKeyCalls); + }); + + test('last-defined keys are checked first in reverse iteration', () => { + const tokensMap = { first: 100, second: 200, third: 300 }; + includesSpy.mockClear(); + const result = findMatchingPattern('third', tokensMap); + const calls = includesSpy.mock.calls.length; + + // 'third' is last key, found on first reverse check, exact match exits immediately + expect(result).toBe('third'); + expect(calls).toBe(1); + }); +}); + +describe('deprecated PaLM2/Codey model removal', () => { + test('deprecated PaLM2/Codey models no longer have token entries', () => { + expect(getModelMaxTokens('text-bison-32k', EModelEndpoint.google)).toBeUndefined(); + expect(getModelMaxTokens('codechat-bison-32k', EModelEndpoint.google)).toBeUndefined(); + expect(getModelMaxTokens('code-bison', EModelEndpoint.google)).toBeUndefined(); + expect(getModelMaxTokens('chat-bison', EModelEndpoint.google)).toBeUndefined(); + }); +}); + describe('matchModelName', () => { it('should return the exact model name if it exists in maxTokensMap', () => { expect(matchModelName('gpt-4-32k-0613')).toBe('gpt-4-32k-0613'); @@ -642,22 +804,11 @@ describe('matchModelName', () => { expect(matchModelName('gpt-5.3-2025-03-01')).toBe('gpt-5.3'); }); - // Tests for Google models - it('should return the exact model name if it exists in maxTokensMap - Google models', () => { - expect(matchModelName('text-bison-32k', EModelEndpoint.google)).toBe('text-bison-32k'); - expect(matchModelName('codechat-bison-32k', EModelEndpoint.google)).toBe('codechat-bison-32k'); - }); - it('should return the input model name if no match is found - Google models', () => { expect(matchModelName('unknown-google-model', EModelEndpoint.google)).toBe( 'unknown-google-model', ); }); - - it('should return the closest matching key for partial matches - Google models', () => { - expect(matchModelName('code-', EModelEndpoint.google)).toBe('code-'); - expect(matchModelName('chat-', EModelEndpoint.google)).toBe('chat-'); - }); }); describe('Meta Models Tests', () => { diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index ad7cf1a8db..b07f94f946 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -5,38 +5,30 @@ import type { EndpointTokenConfig, TokenConfig } from '~/types'; /** * Model Token Configuration Maps * - * IMPORTANT: Key Ordering for Pattern Matching - * ============================================ - * The `findMatchingPattern` function iterates through object keys in REVERSE order - * (last-defined keys are checked first) and uses `modelName.includes(key)` for matching. + * Pattern Matching + * ================ + * `findMatchingPattern` uses `modelName.includes(key)` and selects the **longest** + * matching key. If a key's length equals the model name's length (exact match), it + * returns immediately — no further keys are checked. * - * This means: - * 1. BASE PATTERNS must be defined FIRST (e.g., "kimi", "moonshot") - * 2. SPECIFIC PATTERNS must be defined AFTER their base patterns (e.g., "kimi-k2", "kimi-k2.5") + * For keys of different lengths, definition order does not affect the result — the + * longest match always wins. For **same-length ties**, the function iterates in + * reverse, so the last-defined key wins. Key ordering therefore matters for: * - * Example ordering for Kimi models: - * kimi: 262144, // Base pattern - checked last - * 'kimi-k2': 262144, // More specific - checked before "kimi" - * 'kimi-k2.5': 262144, // Most specific - checked first - * - * Why this matters: - * - Model name "kimi-k2.5" contains both "kimi" and "kimi-k2" as substrings - * - If "kimi" were checked first, it would incorrectly match "kimi-k2.5" - * - By defining specific patterns AFTER base patterns, they're checked first in reverse iteration - * - * When adding new model families: - * 1. Define the base/generic pattern first - * 2. Define increasingly specific patterns after - * 3. Ensure no pattern is a substring of another that should match differently + * 1. **Performance**: list older/legacy models first, newer models last — newer + * models are more commonly used and will match earlier in the reverse scan. + * 2. **Same-length tie-breaking**: in `aggregateModels`, OpenAI is spread last + * so its keys are preferred when two keys of equal length both match. */ const openAIModels = { - 'o4-mini': 200000, - 'o3-mini': 195000, // -5000 from max - o3: 200000, - o1: 195000, // -5000 from max - 'o1-mini': 127500, // -500 from max - 'o1-preview': 127500, // -500 from max + 'gpt-3.5-turbo-0301': 4092, // -5 from max + 'gpt-3.5-turbo-0613': 4092, // -5 from max + 'gpt-3.5-turbo-16k': 16375, // -10 from max + 'gpt-3.5-turbo-16k-0613': 16375, // -10 from max + 'gpt-3.5-turbo-1106': 16375, // -10 from max + 'gpt-3.5-turbo-0125': 16375, // -10 from max + 'gpt-3.5-turbo': 16375, // -10 from max 'gpt-4': 8187, // -5 from max 'gpt-4-0613': 8187, // -5 from max 'gpt-4-32k': 32758, // -10 from max @@ -44,7 +36,18 @@ const openAIModels = { 'gpt-4-32k-0613': 32758, // -10 from max 'gpt-4-1106': 127500, // -500 from max 'gpt-4-0125': 127500, // -500 from max + 'gpt-4-turbo': 127500, // -500 from max + 'gpt-4-vision': 127500, // -500 from max + 'gpt-4o-2024-05-13': 127500, // -500 from max + 'gpt-4o-mini': 127500, // -500 from max + 'gpt-4o': 127500, // -500 from max 'gpt-4.5': 127500, // -500 from max + 'o1-mini': 127500, // -500 from max + 'o1-preview': 127500, // -500 from max + o1: 195000, // -5000 from max + 'o3-mini': 195000, // -5000 from max + o3: 200000, + 'o4-mini': 200000, 'gpt-4.1': 1047576, 'gpt-4.1-mini': 1047576, 'gpt-4.1-nano': 1047576, @@ -56,18 +59,6 @@ const openAIModels = { 'gpt-5-nano': 400000, 'gpt-5-pro': 400000, 'gpt-5.2-pro': 400000, - 'gpt-4o': 127500, // -500 from max - 'gpt-4o-mini': 127500, // -500 from max - 'gpt-4o-2024-05-13': 127500, // -500 from max - 'gpt-4-turbo': 127500, // -500 from max - 'gpt-4-vision': 127500, // -500 from max - 'gpt-3.5-turbo': 16375, // -10 from max - 'gpt-3.5-turbo-0613': 4092, // -5 from max - 'gpt-3.5-turbo-0301': 4092, // -5 from max - 'gpt-3.5-turbo-16k': 16375, // -10 from max - 'gpt-3.5-turbo-16k-0613': 16375, // -10 from max - 'gpt-3.5-turbo-1106': 16375, // -10 from max - 'gpt-3.5-turbo-0125': 16375, // -10 from max }; const mistralModels = { @@ -76,15 +67,15 @@ const mistralModels = { 'mistral-small': 31990, // -10 from max 'mixtral-8x7b': 31990, // -10 from max 'mixtral-8x22b': 65536, - 'mistral-large': 131000, 'mistral-large-2402': 127500, 'mistral-large-2407': 127500, + 'mistral-large': 131000, + 'mistral-saba': 32000, + 'ministral-3b': 131000, + 'ministral-8b': 131000, 'mistral-nemo': 131000, 'pixtral-large': 131000, - 'mistral-saba': 32000, codestral: 256000, - 'ministral-8b': 131000, - 'ministral-3b': 131000, }; const cohereModels = { @@ -105,32 +96,22 @@ const googleModels = { 'gemma-3-27b': 131072, gemini: 30720, // -2048 from max 'gemini-pro-vision': 12288, + 'gemini-1.5': 1000000, + 'gemini-1.5-flash': 1000000, + 'gemini-1.5-flash-8b': 1000000, + 'gemini-2.0': 2000000, + 'gemini-2.0-flash': 1000000, + 'gemini-2.0-flash-lite': 1000000, 'gemini-exp': 2000000, - 'gemini-3': 1000000, // 1M input tokens, 64k output tokens - 'gemini-3-pro-image': 1000000, - 'gemini-3.1': 1000000, // 1M input tokens, 64k output tokens - 'gemini-3.1-flash-lite': 1000000, // 1M input tokens, 64k output tokens - 'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens + 'gemini-2.5': 1000000, 'gemini-2.5-pro': 1000000, 'gemini-2.5-flash': 1000000, 'gemini-2.5-flash-image': 1000000, 'gemini-2.5-flash-lite': 1000000, - 'gemini-2.0': 2000000, - 'gemini-2.0-flash': 1000000, - 'gemini-2.0-flash-lite': 1000000, - 'gemini-1.5': 1000000, - 'gemini-1.5-flash': 1000000, - 'gemini-1.5-flash-8b': 1000000, - 'text-bison-32k': 32758, // -10 from max - 'chat-bison-32k': 32758, // -10 from max - 'code-bison-32k': 32758, // -10 from max - 'codechat-bison-32k': 32758, - /* Codey, -5 from max: 6144 */ - 'code-': 6139, - 'codechat-': 6139, - /* PaLM2, -5 from max: 8192 */ - 'text-': 8187, - 'chat-': 8187, + 'gemini-3': 1000000, + 'gemini-3-pro-image': 1000000, + 'gemini-3.1': 1000000, + 'gemini-3.1-flash-lite': 1000000, }; const anthropicModels = { @@ -142,49 +123,35 @@ const anthropicModels = { 'claude-3-haiku': 200000, 'claude-3-sonnet': 200000, 'claude-3-opus': 200000, - 'claude-3.5-haiku': 200000, - 'claude-3-5-haiku': 200000, 'claude-3-5-sonnet': 200000, 'claude-3.5-sonnet': 200000, - 'claude-3-7-sonnet': 200000, - 'claude-3.7-sonnet': 200000, 'claude-3-5-sonnet-latest': 200000, 'claude-3.5-sonnet-latest': 200000, - 'claude-haiku-4-5': 200000, - 'claude-sonnet-4': 1000000, - 'claude-sonnet-4-6': 1000000, + 'claude-3-5-haiku': 200000, + 'claude-3.5-haiku': 200000, + 'claude-3-7-sonnet': 200000, + 'claude-3.7-sonnet': 200000, 'claude-4': 200000, + 'claude-haiku-4-5': 200000, 'claude-opus-4': 200000, 'claude-opus-4-5': 200000, + 'claude-sonnet-4': 1000000, + 'claude-sonnet-4-6': 1000000, 'claude-opus-4-6': 1000000, }; const deepseekModels = { deepseek: 128000, 'deepseek-chat': 128000, - 'deepseek-reasoner': 128000, - 'deepseek-r1': 128000, 'deepseek-v3': 128000, 'deepseek.r1': 128000, + 'deepseek-r1': 128000, + 'deepseek-reasoner': 128000, }; const moonshotModels = { - // Base patterns (check last due to reverse iteration) - kimi: 262144, + // moonshot-v1 series (older) moonshot: 131072, - // kimi-k2 series (specific patterns) - 'kimi-latest': 128000, - 'kimi-k2': 262144, - 'kimi-k2.5': 262144, - 'kimi-k2-turbo': 262144, - 'kimi-k2-turbo-preview': 262144, - 'kimi-k2-0905': 262144, - 'kimi-k2-0905-preview': 262144, - 'kimi-k2-0711': 131072, - 'kimi-k2-0711-preview': 131072, - 'kimi-k2-thinking': 262144, - 'kimi-k2-thinking-turbo': 262144, - // moonshot-v1 series (specific patterns) 'moonshot-v1': 131072, 'moonshot-v1-auto': 131072, 'moonshot-v1-8k': 8192, @@ -196,99 +163,100 @@ const moonshotModels = { 'moonshot-v1-128k': 131072, 'moonshot-v1-128k-vision': 131072, 'moonshot-v1-128k-vision-preview': 131072, + // kimi series + kimi: 262144, + 'kimi-latest': 128000, + 'kimi-k2-0711': 131072, + 'kimi-k2-0711-preview': 131072, + 'kimi-k2-0905': 262144, + 'kimi-k2-0905-preview': 262144, + 'kimi-k2': 262144, + 'kimi-k2-turbo': 262144, + 'kimi-k2-turbo-preview': 262144, + 'kimi-k2-thinking': 262144, + 'kimi-k2-thinking-turbo': 262144, + 'kimi-k2.5': 262144, // Bedrock moonshot models + 'moonshot.kimi-k2-0711': 131072, 'moonshot.kimi': 262144, 'moonshot.kimi-k2': 262144, - 'moonshot.kimi-k2.5': 262144, 'moonshot.kimi-k2-thinking': 262144, - 'moonshot.kimi-k2-0711': 131072, 'moonshotai.kimi': 262144, + 'moonshot.kimi-k2.5': 262144, 'moonshotai.kimi-k2.5': 262144, }; const metaModels = { - // Basic patterns - llama3: 8000, + // Llama 2 (oldest) llama2: 4000, - 'llama-3': 8000, 'llama-2': 4000, - - // llama3.x pattern + 'llama2-13b': 4000, + 'llama2-70b': 4000, + 'llama2:70b': 4000, + // Llama 3 base + llama3: 8000, + 'llama-3': 8000, + 'llama3-8b': 8000, + 'llama3-70b': 8000, + 'llama3:8b': 8000, + 'llama3:70b': 8000, + // Llama 3.1 'llama3.1': 127500, - 'llama3.2': 127500, - 'llama3.3': 127500, - - // llama3-x pattern 'llama3-1': 127500, - 'llama3-2': 127500, - 'llama3-3': 127500, - - // llama-3.x pattern 'llama-3.1': 127500, - 'llama-3.2': 127500, - 'llama-3.3': 127500, - - // llama3.x:Nb pattern - 'llama3.1:405b': 127500, - 'llama3.1:70b': 127500, 'llama3.1:8b': 127500, + 'llama3.1:70b': 127500, + 'llama3.1:405b': 127500, + 'llama3-1-8b': 127500, + 'llama3-1-70b': 127500, + 'llama3-1-405b': 127500, + 'llama-3.1-8b': 127500, + 'llama-3.1-70b': 127500, + 'llama-3.1-405b': 127500, + // Llama 3.2 + 'llama3.2': 127500, + 'llama3-2': 127500, + 'llama-3.2': 127500, 'llama3.2:1b': 127500, 'llama3.2:3b': 127500, 'llama3.2:11b': 127500, 'llama3.2:90b': 127500, - 'llama3.3:70b': 127500, - - // llama3-x-Nb pattern - 'llama3-1-405b': 127500, - 'llama3-1-70b': 127500, - 'llama3-1-8b': 127500, 'llama3-2-1b': 127500, 'llama3-2-3b': 127500, 'llama3-2-11b': 127500, 'llama3-2-90b': 127500, - 'llama3-3-70b': 127500, - - // llama-3.x-Nb pattern - 'llama-3.1-405b': 127500, - 'llama-3.1-70b': 127500, - 'llama-3.1-8b': 127500, 'llama-3.2-1b': 127500, 'llama-3.2-3b': 127500, 'llama-3.2-11b': 127500, 'llama-3.2-90b': 127500, + // Llama 3.3 (newest) + 'llama3.3': 127500, + 'llama3-3': 127500, + 'llama-3.3': 127500, + 'llama3.3:70b': 127500, + 'llama3-3-70b': 127500, 'llama-3.3-70b': 127500, - - // Original llama2/3 patterns - 'llama3-70b': 8000, - 'llama3-8b': 8000, - 'llama2-70b': 4000, - 'llama2-13b': 4000, - 'llama3:70b': 8000, - 'llama3:8b': 8000, - 'llama2:70b': 4000, }; const qwenModels = { qwen: 32000, 'qwen2.5': 32000, - 'qwen-turbo': 1000000, - 'qwen-plus': 131000, 'qwen-max': 32000, + 'qwen-plus': 131000, + 'qwen-turbo': 1000000, 'qwq-32b': 32000, - // Qwen3 models - qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context) - 'qwen3-8b': 128000, + // Qwen3 models (newest) + qwen3: 40960, 'qwen3-14b': 40960, 'qwen3-30b-a3b': 40960, 'qwen3-32b': 40960, 'qwen3-235b-a22b': 40960, - // Qwen3 VL (Vision-Language) models + 'qwen3-8b': 128000, + 'qwen3-vl-235b-a22b': 131072, 'qwen3-vl-8b-thinking': 256000, + 'qwen3-max': 256000, 'qwen3-vl-8b-instruct': 262144, 'qwen3-vl-30b-a3b': 262144, - 'qwen3-vl-235b-a22b': 131072, - // Qwen3 specialized models - 'qwen3-max': 256000, 'qwen3-coder': 262144, 'qwen3-coder-30b-a3b': 262144, 'qwen3-coder-plus': 128000, @@ -321,7 +289,6 @@ const openAIBedrockModels = { }; const bedrockModels = { - ...anthropicModels, ...mistralModels, ...cohereModels, ...deepseekModels, @@ -330,6 +297,7 @@ const bedrockModels = { ...ai21Models, ...amazonModels, ...openAIBedrockModels, + ...anthropicModels, }; const xAIModels = { @@ -346,24 +314,13 @@ const xAIModels = { 'grok-3-fast': 131072, 'grok-3-mini': 131072, 'grok-3-mini-fast': 131072, + 'grok-code-fast': 256000, // 256K context 'grok-4': 256000, // 256K context 'grok-4-fast': 2000000, // 2M context 'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants) - 'grok-code-fast': 256000, // 256K context }; const aggregateModels = { - ...openAIModels, - ...googleModels, - ...bedrockModels, - ...xAIModels, - ...qwenModels, - // GPT-OSS - 'gpt-oss': 131000, - 'gpt-oss:20b': 131000, - 'gpt-oss-20b': 131000, - 'gpt-oss:120b': 131000, - 'gpt-oss-120b': 131000, // GLM models (Zhipu AI) glm4: 128000, 'glm-4': 128000, @@ -372,6 +329,18 @@ const aggregateModels = { 'glm-4.5-air': 131000, 'glm-4.5v': 66000, 'glm-4.6': 200000, + // GPT-OSS + 'gpt-oss': 131000, + 'gpt-oss:20b': 131000, + 'gpt-oss-20b': 131000, + 'gpt-oss:120b': 131000, + 'gpt-oss-120b': 131000, + ...qwenModels, + ...xAIModels, + ...googleModels, + ...bedrockModels, + // OpenAI last — reverse iteration checks last-spread keys first for same-length ties + ...openAIModels, }; export const maxTokensMap = { @@ -435,26 +404,28 @@ export const maxOutputTokensMap = { [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, }; -/** - * Finds the first matching pattern in the tokens map. - * @param {string} modelName - * @param {Record | EndpointTokenConfig} tokensMap - * @returns {string|null} - */ +/** Finds the longest matching key in the tokens map via substring match. */ export function findMatchingPattern( modelName: string, tokensMap: Record | EndpointTokenConfig, ): string | null { const keys = Object.keys(tokensMap); const lowerModelName = modelName.toLowerCase(); + let bestMatch: string | null = null; + let bestLength = 0; for (let i = keys.length - 1; i >= 0; i--) { - const modelKey = keys[i]; - if (lowerModelName.includes(modelKey)) { - return modelKey; + const key = keys[i]; + const lowerKey = key.toLowerCase(); + if (lowerKey.length > bestLength && lowerModelName.includes(lowerKey)) { + if (lowerKey.length === lowerModelName.length) { + return key; + } + bestMatch = key; + bestLength = lowerKey.length; } } - return null; + return bestMatch; } /**