🏆 fix: Longest-or-Exact-Key Match in findMatchingPattern, Remove Deprecated Models (#12073)

* 🔧 fix: Use longest-match in findMatchingPattern, remove deprecated PaLM2/Codey models findMatchingPattern now selects the longest matching key instead of the first reverse-order match, preventing cross-provider substring collisions (e.g., "gpt-5.2-chat-2025-12-11" incorrectly matching Google's "chat-" pattern instead of OpenAI's "gpt-5.2"). Adds early exit when key length equals model name length. Reorders aggregateModels spreads so OpenAI is last (preferred on same-length ties). Removes deprecated PaLM2/Codey entries from googleModels. * refactor: re-order models based on more likely usage * refactor: Improve key matching logic in findMatchingPattern Updated the findMatchingPattern function to enhance key matching by ensuring case-insensitive comparisons and maintaining the longest match priority. Clarified comments regarding key ordering and performance implications, emphasizing the importance of defining older models first for efficiency and the handling of same-length ties. This refactor aims to improve code clarity and maintainability. * test: Enhance findMatchingPattern tests for edge cases and performance Added new test cases to the findMatchingPattern function, covering scenarios such as empty model names, case-insensitive matching, and performance optimizations. Included checks for longest match priority and ensured deprecated PaLM2/Codey models are no longer present in token entries. This update aims to improve test coverage and validate the function's behavior under various conditions. * test: Update findMatchingPattern test to use last key for exact match validation Modified the test for findMatchingPattern to utilize the last key from the openAIMap for exact match checks, ensuring the test accurately reflects the expected behavior of the function. This change enhances the clarity and reliability of the test case.
2026-03-09 17:42:38 +01:00 · 2026-03-04 19:34:13 -05:00 · 2026-03-04 19:34:13 -05:00 · 956f8fb6f0
commit 956f8fb6f0
parent c6dba9f0a1
2 changed files with 311 additions and 189 deletions
--- a/api/utils/tokens.spec.js
+++ b/api/utils/tokens.spec.js
@ -251,16 +251,6 @@ describe('getModelMaxTokens', () => {
    });
  });
  // Tests for Google models
  test('should return correct tokens for exact match - Google models', () => {
    expect(getModelMaxTokens('text-bison-32k', EModelEndpoint.google)).toBe(
      maxTokensMap[EModelEndpoint.google]['text-bison-32k'],
    );
    expect(getModelMaxTokens('codechat-bison-32k', EModelEndpoint.google)).toBe(
      maxTokensMap[EModelEndpoint.google]['codechat-bison-32k'],
    );
  });
  test('should return undefined for no match - Google models', () => {
    expect(getModelMaxTokens('unknown-google-model', EModelEndpoint.google)).toBeUndefined();
  });
@ -317,12 +307,6 @@ describe('getModelMaxTokens', () => {
    expect(getModelMaxTokens('gemini-pro', EModelEndpoint.google)).toBe(
      maxTokensMap[EModelEndpoint.google]['gemini'],
    );
    expect(getModelMaxTokens('code-', EModelEndpoint.google)).toBe(
      maxTokensMap[EModelEndpoint.google]['code-'],
    );
    expect(getModelMaxTokens('chat-', EModelEndpoint.google)).toBe(
      maxTokensMap[EModelEndpoint.google]['chat-'],
    );
  });
  test('should return correct tokens for partial match - Cohere models', () => {
@ -541,6 +525,184 @@ describe('getModelMaxTokens', () => {
  });
 });
 describe('findMatchingPattern - longest match wins', () => {
  test('should prefer longer matching key over shorter cross-provider pattern', () => {
    const result = findMatchingPattern(
      'gpt-5.2-chat-2025-12-11',
      maxTokensMap[EModelEndpoint.openAI],
    );
    expect(result).toBe('gpt-5.2');
  });
  test('should match gpt-5.2 tokens for date-suffixed chat variant', () => {
    expect(getModelMaxTokens('gpt-5.2-chat-2025-12-11')).toBe(
      maxTokensMap[EModelEndpoint.openAI]['gpt-5.2'],
    );
  });
  test('should match gpt-5.2-pro over shorter patterns', () => {
    expect(getModelMaxTokens('gpt-5.2-pro-chat-2025-12-11')).toBe(
      maxTokensMap[EModelEndpoint.openAI]['gpt-5.2-pro'],
    );
  });
  test('should match gpt-5-mini over gpt-5 for mini variants', () => {
    expect(getModelMaxTokens('gpt-5-mini-chat-2025-01-01')).toBe(
      maxTokensMap[EModelEndpoint.openAI]['gpt-5-mini'],
    );
  });
  test('should prefer gpt-4-1106 over gpt-4 for versioned model names', () => {
    const result = findMatchingPattern('gpt-4-1106-preview', maxTokensMap[EModelEndpoint.openAI]);
    expect(result).toBe('gpt-4-1106');
  });
  test('should prefer gpt-4-32k-0613 over gpt-4-32k for exact versioned names', () => {
    const result = findMatchingPattern('gpt-4-32k-0613', maxTokensMap[EModelEndpoint.openAI]);
    expect(result).toBe('gpt-4-32k-0613');
  });
  test('should prefer claude-3-5-sonnet over claude-3', () => {
    const result = findMatchingPattern(
      'claude-3-5-sonnet-20241022',
      maxTokensMap[EModelEndpoint.anthropic],
    );
    expect(result).toBe('claude-3-5-sonnet');
  });
  test('should prefer gemini-2.0-flash-lite over gemini-2.0-flash', () => {
    const result = findMatchingPattern(
      'gemini-2.0-flash-lite-preview',
      maxTokensMap[EModelEndpoint.google],
    );
    expect(result).toBe('gemini-2.0-flash-lite');
  });
 });
 describe('findMatchingPattern - bestLength selection', () => {
  test('should return the longest matching key when multiple keys match', () => {
    const tokensMap = { short: 100, 'short-med': 200, 'short-med-long': 300 };
    expect(findMatchingPattern('short-med-long-extra', tokensMap)).toBe('short-med-long');
  });
  test('should return the longest match regardless of key insertion order', () => {
    const tokensMap = { 'a-b-c': 300, a: 100, 'a-b': 200 };
    expect(findMatchingPattern('a-b-c-d', tokensMap)).toBe('a-b-c');
  });
  test('should return null when no key matches', () => {
    const tokensMap = { alpha: 100, beta: 200 };
    expect(findMatchingPattern('gamma-delta', tokensMap)).toBeNull();
  });
  test('should return the single matching key when only one matches', () => {
    const tokensMap = { alpha: 100, beta: 200, gamma: 300 };
    expect(findMatchingPattern('beta-extended', tokensMap)).toBe('beta');
  });
  test('should match case-insensitively against model name', () => {
    const tokensMap = { 'gpt-5': 400000 };
    expect(findMatchingPattern('GPT-5-turbo', tokensMap)).toBe('gpt-5');
  });
  test('should select the longest key among overlapping substring matches', () => {
    const tokensMap = { 'gpt-': 100, 'gpt-5': 200, 'gpt-5.2': 300, 'gpt-5.2-pro': 400 };
    expect(findMatchingPattern('gpt-5.2-pro-2025-01-01', tokensMap)).toBe('gpt-5.2-pro');
    expect(findMatchingPattern('gpt-5.2-chat-2025-01-01', tokensMap)).toBe('gpt-5.2');
    expect(findMatchingPattern('gpt-5.1-preview', tokensMap)).toBe('gpt-5');
    expect(findMatchingPattern('gpt-unknown', tokensMap)).toBe('gpt-');
  });
  test('should not be confused by a short key that appears later in the model name', () => {
    const tokensMap = { 'model-v2': 200, v2: 100 };
    expect(findMatchingPattern('model-v2-extended', tokensMap)).toBe('model-v2');
  });
  test('should handle exact-length match as the best match', () => {
    const tokensMap = { 'exact-model': 500, exact: 100 };
    expect(findMatchingPattern('exact-model', tokensMap)).toBe('exact-model');
  });
  test('should return null for empty model name', () => {
    expect(findMatchingPattern('', { 'gpt-5': 400000 })).toBeNull();
  });
  test('should prefer last-defined key on same-length ties', () => {
    const tokensMap = { 'aa-bb': 100, 'cc-dd': 200 };
    // model name contains both 5-char keys; last-defined wins in reverse iteration
    expect(findMatchingPattern('aa-bb-cc-dd', tokensMap)).toBe('cc-dd');
  });
  test('longest match beats short cross-provider pattern even when both present', () => {
    const tokensMap = { 'gpt-5.2': 400000, 'chat-': 8187 };
    expect(findMatchingPattern('gpt-5.2-chat-2025-12-11', tokensMap)).toBe('gpt-5.2');
  });
  test('should match case-insensitively against keys', () => {
    const tokensMap = { 'GPT-5': 400000 };
    expect(findMatchingPattern('gpt-5-turbo', tokensMap)).toBe('GPT-5');
  });
 });
 describe('findMatchingPattern - iteration performance', () => {
  let includesSpy;
  beforeEach(() => {
    includesSpy = jest.spyOn(String.prototype, 'includes');
  });
  afterEach(() => {
    includesSpy.mockRestore();
  });
  test('exact match early-exits with minimal includes() checks', () => {
    const openAIMap = maxTokensMap[EModelEndpoint.openAI];
    const keys = Object.keys(openAIMap);
    const lastKey = keys[keys.length - 1];
    includesSpy.mockClear();
    const result = findMatchingPattern(lastKey, openAIMap);
    const exactCalls = includesSpy.mock.calls.length;
    expect(result).toBe(lastKey);
    expect(exactCalls).toBe(1);
  });
  test('bestLength check skips includes() for shorter keys after a long match', () => {
    const openAIMap = maxTokensMap[EModelEndpoint.openAI];
    includesSpy.mockClear();
    findMatchingPattern('gpt-3.5-turbo-0301-test', openAIMap);
    const longKeyCalls = includesSpy.mock.calls.length;
    includesSpy.mockClear();
    findMatchingPattern('gpt-5.3-chat-latest', openAIMap);
    const shortKeyCalls = includesSpy.mock.calls.length;
    // gpt-3.5-turbo-0301 (20 chars) matches early, then bestLength prunes most keys
    // gpt-5.3 (7 chars) is short, so fewer keys are pruned by the length check
    expect(longKeyCalls).toBeLessThan(shortKeyCalls);
  });
  test('last-defined keys are checked first in reverse iteration', () => {
    const tokensMap = { first: 100, second: 200, third: 300 };
    includesSpy.mockClear();
    const result = findMatchingPattern('third', tokensMap);
    const calls = includesSpy.mock.calls.length;
    // 'third' is last key, found on first reverse check, exact match exits immediately
    expect(result).toBe('third');
    expect(calls).toBe(1);
  });
 });
 describe('deprecated PaLM2/Codey model removal', () => {
  test('deprecated PaLM2/Codey models no longer have token entries', () => {
    expect(getModelMaxTokens('text-bison-32k', EModelEndpoint.google)).toBeUndefined();
    expect(getModelMaxTokens('codechat-bison-32k', EModelEndpoint.google)).toBeUndefined();
    expect(getModelMaxTokens('code-bison', EModelEndpoint.google)).toBeUndefined();
    expect(getModelMaxTokens('chat-bison', EModelEndpoint.google)).toBeUndefined();
  });
 });
 describe('matchModelName', () => {
  it('should return the exact model name if it exists in maxTokensMap', () => {
    expect(matchModelName('gpt-4-32k-0613')).toBe('gpt-4-32k-0613');
@ -642,22 +804,11 @@ describe('matchModelName', () => {
    expect(matchModelName('gpt-5.3-2025-03-01')).toBe('gpt-5.3');
  });
  // Tests for Google models
  it('should return the exact model name if it exists in maxTokensMap - Google models', () => {
    expect(matchModelName('text-bison-32k', EModelEndpoint.google)).toBe('text-bison-32k');
    expect(matchModelName('codechat-bison-32k', EModelEndpoint.google)).toBe('codechat-bison-32k');
  });
  it('should return the input model name if no match is found - Google models', () => {
    expect(matchModelName('unknown-google-model', EModelEndpoint.google)).toBe(
      'unknown-google-model',
    );
  });
  it('should return the closest matching key for partial matches - Google models', () => {
    expect(matchModelName('code-', EModelEndpoint.google)).toBe('code-');
    expect(matchModelName('chat-', EModelEndpoint.google)).toBe('chat-');
  });
 });
 describe('Meta Models Tests', () => {
--- a/packages/api/src/utils/tokens.ts
+++ b/packages/api/src/utils/tokens.ts
@ -5,38 +5,30 @@ import type { EndpointTokenConfig, TokenConfig } from '~/types';
 /**
 * Model Token Configuration Maps
 *
- * IMPORTANT: Key Ordering for Pattern Matching
+ * Pattern Matching
- * ============================================
+ * ================
- * The `findMatchingPattern` function iterates through object keys in REVERSE order
+ * `findMatchingPattern` uses `modelName.includes(key)` and selects the **longest**
- * (last-defined keys are checked first) and uses `modelName.includes(key)` for matching.
+ * matching key. If a key's length equals the model name's length (exact match), it
 * returns immediately — no further keys are checked.
 *
- * This means:
+ * For keys of different lengths, definition order does not affect the result — the
- * 1. BASE PATTERNS must be defined FIRST (e.g., "kimi", "moonshot")
+ * longest match always wins. For **same-length ties**, the function iterates in
- * 2. SPECIFIC PATTERNS must be defined AFTER their base patterns (e.g., "kimi-k2", "kimi-k2.5")
+ * reverse, so the last-defined key wins. Key ordering therefore matters for:
 *
- * Example ordering for Kimi models:
+ * 1. **Performance**: list older/legacy models first, newer models last — newer
- *   kimi: 262144,           // Base pattern - checked last
+ *    models are more commonly used and will match earlier in the reverse scan.
- *   'kimi-k2': 262144,      // More specific - checked before "kimi"
+ * 2. **Same-length tie-breaking**: in `aggregateModels`, OpenAI is spread last
- *   'kimi-k2.5': 262144,    // Most specific - checked first
+ *    so its keys are preferred when two keys of equal length both match.
 *
 * Why this matters:
 * - Model name "kimi-k2.5" contains both "kimi" and "kimi-k2" as substrings
 * - If "kimi" were checked first, it would incorrectly match "kimi-k2.5"
 * - By defining specific patterns AFTER base patterns, they're checked first in reverse iteration
 *
 * When adding new model families:
 * 1. Define the base/generic pattern first
 * 2. Define increasingly specific patterns after
 * 3. Ensure no pattern is a substring of another that should match differently
 */
 const openAIModels = {
-  'o4-mini': 200000,
+  'gpt-3.5-turbo-0301': 4092, // -5 from max
-  'o3-mini': 195000, // -5000 from max
+  'gpt-3.5-turbo-0613': 4092, // -5 from max
-  o3: 200000,
+  'gpt-3.5-turbo-16k': 16375, // -10 from max
-  o1: 195000, // -5000 from max
+  'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
-  'o1-mini': 127500, // -500 from max
+  'gpt-3.5-turbo-1106': 16375, // -10 from max
-  'o1-preview': 127500, // -500 from max
+  'gpt-3.5-turbo-0125': 16375, // -10 from max
  'gpt-3.5-turbo': 16375, // -10 from max
  'gpt-4': 8187, // -5 from max
  'gpt-4-0613': 8187, // -5 from max
  'gpt-4-32k': 32758, // -10 from max
@ -44,7 +36,18 @@ const openAIModels = {
  'gpt-4-32k-0613': 32758, // -10 from max
  'gpt-4-1106': 127500, // -500 from max
  'gpt-4-0125': 127500, // -500 from max
  'gpt-4-turbo': 127500, // -500 from max
  'gpt-4-vision': 127500, // -500 from max
  'gpt-4o-2024-05-13': 127500, // -500 from max
  'gpt-4o-mini': 127500, // -500 from max
  'gpt-4o': 127500, // -500 from max
  'gpt-4.5': 127500, // -500 from max
  'o1-mini': 127500, // -500 from max
  'o1-preview': 127500, // -500 from max
  o1: 195000, // -5000 from max
  'o3-mini': 195000, // -5000 from max
  o3: 200000,
  'o4-mini': 200000,
  'gpt-4.1': 1047576,
  'gpt-4.1-mini': 1047576,
  'gpt-4.1-nano': 1047576,
@ -56,18 +59,6 @@ const openAIModels = {
  'gpt-5-nano': 400000,
  'gpt-5-pro': 400000,
  'gpt-5.2-pro': 400000,
  'gpt-4o': 127500, // -500 from max
  'gpt-4o-mini': 127500, // -500 from max
  'gpt-4o-2024-05-13': 127500, // -500 from max
  'gpt-4-turbo': 127500, // -500 from max
  'gpt-4-vision': 127500, // -500 from max
  'gpt-3.5-turbo': 16375, // -10 from max
  'gpt-3.5-turbo-0613': 4092, // -5 from max
  'gpt-3.5-turbo-0301': 4092, // -5 from max
  'gpt-3.5-turbo-16k': 16375, // -10 from max
  'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
  'gpt-3.5-turbo-1106': 16375, // -10 from max
  'gpt-3.5-turbo-0125': 16375, // -10 from max
 };
 const mistralModels = {
@ -76,15 +67,15 @@ const mistralModels = {
  'mistral-small': 31990, // -10 from max
  'mixtral-8x7b': 31990, // -10 from max
  'mixtral-8x22b': 65536,
  'mistral-large': 131000,
  'mistral-large-2402': 127500,
  'mistral-large-2407': 127500,
  'mistral-large': 131000,
  'mistral-saba': 32000,
  'ministral-3b': 131000,
  'ministral-8b': 131000,
  'mistral-nemo': 131000,
  'pixtral-large': 131000,
  'mistral-saba': 32000,
  codestral: 256000,
  'ministral-8b': 131000,
  'ministral-3b': 131000,
 };
 const cohereModels = {
@ -105,32 +96,22 @@ const googleModels = {
  'gemma-3-27b': 131072,
  gemini: 30720, // -2048 from max
  'gemini-pro-vision': 12288,
  'gemini-1.5': 1000000,
  'gemini-1.5-flash': 1000000,
  'gemini-1.5-flash-8b': 1000000,
  'gemini-2.0': 2000000,
  'gemini-2.0-flash': 1000000,
  'gemini-2.0-flash-lite': 1000000,
  'gemini-exp': 2000000,
-  'gemini-3': 1000000, // 1M input tokens, 64k output tokens
+  'gemini-2.5': 1000000,
  'gemini-3-pro-image': 1000000,
  'gemini-3.1': 1000000, // 1M input tokens, 64k output tokens
  'gemini-3.1-flash-lite': 1000000, // 1M input tokens, 64k output tokens
  'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
  'gemini-2.5-pro': 1000000,
  'gemini-2.5-flash': 1000000,
  'gemini-2.5-flash-image': 1000000,
  'gemini-2.5-flash-lite': 1000000,
-  'gemini-2.0': 2000000,
+  'gemini-3': 1000000,
-  'gemini-2.0-flash': 1000000,
+  'gemini-3-pro-image': 1000000,
-  'gemini-2.0-flash-lite': 1000000,
+  'gemini-3.1': 1000000,
-  'gemini-1.5': 1000000,
+  'gemini-3.1-flash-lite': 1000000,
  'gemini-1.5-flash': 1000000,
  'gemini-1.5-flash-8b': 1000000,
  'text-bison-32k': 32758, // -10 from max
  'chat-bison-32k': 32758, // -10 from max
  'code-bison-32k': 32758, // -10 from max
  'codechat-bison-32k': 32758,
  /* Codey, -5 from max: 6144 */
  'code-': 6139,
  'codechat-': 6139,
  /* PaLM2, -5 from max: 8192 */
  'text-': 8187,
  'chat-': 8187,
 };
 const anthropicModels = {
@ -142,49 +123,35 @@ const anthropicModels = {
  'claude-3-haiku': 200000,
  'claude-3-sonnet': 200000,
  'claude-3-opus': 200000,
  'claude-3.5-haiku': 200000,
  'claude-3-5-haiku': 200000,
  'claude-3-5-sonnet': 200000,
  'claude-3.5-sonnet': 200000,
  'claude-3-7-sonnet': 200000,
  'claude-3.7-sonnet': 200000,
  'claude-3-5-sonnet-latest': 200000,
  'claude-3.5-sonnet-latest': 200000,
-  'claude-haiku-4-5': 200000,
+  'claude-3-5-haiku': 200000,
-  'claude-sonnet-4': 1000000,
+  'claude-3.5-haiku': 200000,
-  'claude-sonnet-4-6': 1000000,
+  'claude-3-7-sonnet': 200000,
  'claude-3.7-sonnet': 200000,
  'claude-4': 200000,
  'claude-haiku-4-5': 200000,
  'claude-opus-4': 200000,
  'claude-opus-4-5': 200000,
  'claude-sonnet-4': 1000000,
  'claude-sonnet-4-6': 1000000,
  'claude-opus-4-6': 1000000,
 };
 const deepseekModels = {
  deepseek: 128000,
  'deepseek-chat': 128000,
  'deepseek-reasoner': 128000,
  'deepseek-r1': 128000,
  'deepseek-v3': 128000,
  'deepseek.r1': 128000,
  'deepseek-r1': 128000,
  'deepseek-reasoner': 128000,
 };
 const moonshotModels = {
-  // Base patterns (check last due to reverse iteration)
+  // moonshot-v1 series (older)
  kimi: 262144,
  moonshot: 131072,
  // kimi-k2 series (specific patterns)
  'kimi-latest': 128000,
  'kimi-k2': 262144,
  'kimi-k2.5': 262144,
  'kimi-k2-turbo': 262144,
  'kimi-k2-turbo-preview': 262144,
  'kimi-k2-0905': 262144,
  'kimi-k2-0905-preview': 262144,
  'kimi-k2-0711': 131072,
  'kimi-k2-0711-preview': 131072,
  'kimi-k2-thinking': 262144,
  'kimi-k2-thinking-turbo': 262144,
  // moonshot-v1 series (specific patterns)
  'moonshot-v1': 131072,
  'moonshot-v1-auto': 131072,
  'moonshot-v1-8k': 8192,
@ -196,99 +163,100 @@ const moonshotModels = {
  'moonshot-v1-128k': 131072,
  'moonshot-v1-128k-vision': 131072,
  'moonshot-v1-128k-vision-preview': 131072,
  // kimi series
  kimi: 262144,
  'kimi-latest': 128000,
  'kimi-k2-0711': 131072,
  'kimi-k2-0711-preview': 131072,
  'kimi-k2-0905': 262144,
  'kimi-k2-0905-preview': 262144,
  'kimi-k2': 262144,
  'kimi-k2-turbo': 262144,
  'kimi-k2-turbo-preview': 262144,
  'kimi-k2-thinking': 262144,
  'kimi-k2-thinking-turbo': 262144,
  'kimi-k2.5': 262144,
  // Bedrock moonshot models
  'moonshot.kimi-k2-0711': 131072,
  'moonshot.kimi': 262144,
  'moonshot.kimi-k2': 262144,
  'moonshot.kimi-k2.5': 262144,
  'moonshot.kimi-k2-thinking': 262144,
  'moonshot.kimi-k2-0711': 131072,
  'moonshotai.kimi': 262144,
  'moonshot.kimi-k2.5': 262144,
  'moonshotai.kimi-k2.5': 262144,
 };
 const metaModels = {
-  // Basic patterns
+  // Llama 2 (oldest)
  llama3: 8000,
  llama2: 4000,
  'llama-3': 8000,
  'llama-2': 4000,
-
+  'llama2-13b': 4000,
-  // llama3.x pattern
+  'llama2-70b': 4000,
  'llama2:70b': 4000,
  // Llama 3 base
  llama3: 8000,
  'llama-3': 8000,
  'llama3-8b': 8000,
  'llama3-70b': 8000,
  'llama3:8b': 8000,
  'llama3:70b': 8000,
  // Llama 3.1
  'llama3.1': 127500,
  'llama3.2': 127500,
  'llama3.3': 127500,
  // llama3-x pattern
  'llama3-1': 127500,
  'llama3-2': 127500,
  'llama3-3': 127500,
  // llama-3.x pattern
  'llama-3.1': 127500,
  'llama-3.2': 127500,
  'llama-3.3': 127500,
  // llama3.x:Nb pattern
  'llama3.1:405b': 127500,
  'llama3.1:70b': 127500,
  'llama3.1:8b': 127500,
  'llama3.1:70b': 127500,
  'llama3.1:405b': 127500,
  'llama3-1-8b': 127500,
  'llama3-1-70b': 127500,
  'llama3-1-405b': 127500,
  'llama-3.1-8b': 127500,
  'llama-3.1-70b': 127500,
  'llama-3.1-405b': 127500,
  // Llama 3.2
  'llama3.2': 127500,
  'llama3-2': 127500,
  'llama-3.2': 127500,
  'llama3.2:1b': 127500,
  'llama3.2:3b': 127500,
  'llama3.2:11b': 127500,
  'llama3.2:90b': 127500,
  'llama3.3:70b': 127500,
  // llama3-x-Nb pattern
  'llama3-1-405b': 127500,
  'llama3-1-70b': 127500,
  'llama3-1-8b': 127500,
  'llama3-2-1b': 127500,
  'llama3-2-3b': 127500,
  'llama3-2-11b': 127500,
  'llama3-2-90b': 127500,
  'llama3-3-70b': 127500,
  // llama-3.x-Nb pattern
  'llama-3.1-405b': 127500,
  'llama-3.1-70b': 127500,
  'llama-3.1-8b': 127500,
  'llama-3.2-1b': 127500,
  'llama-3.2-3b': 127500,
  'llama-3.2-11b': 127500,
  'llama-3.2-90b': 127500,
  // Llama 3.3 (newest)
  'llama3.3': 127500,
  'llama3-3': 127500,
  'llama-3.3': 127500,
  'llama3.3:70b': 127500,
  'llama3-3-70b': 127500,
  'llama-3.3-70b': 127500,
  // Original llama2/3 patterns
  'llama3-70b': 8000,
  'llama3-8b': 8000,
  'llama2-70b': 4000,
  'llama2-13b': 4000,
  'llama3:70b': 8000,
  'llama3:8b': 8000,
  'llama2:70b': 4000,
 };
 const qwenModels = {
  qwen: 32000,
  'qwen2.5': 32000,
  'qwen-turbo': 1000000,
  'qwen-plus': 131000,
  'qwen-max': 32000,
  'qwen-plus': 131000,
  'qwen-turbo': 1000000,
  'qwq-32b': 32000,
-  // Qwen3 models
+  // Qwen3 models (newest)
-  qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
+  qwen3: 40960,
  'qwen3-8b': 128000,
  'qwen3-14b': 40960,
  'qwen3-30b-a3b': 40960,
  'qwen3-32b': 40960,
  'qwen3-235b-a22b': 40960,
-  // Qwen3 VL (Vision-Language) models
+  'qwen3-8b': 128000,
  'qwen3-vl-235b-a22b': 131072,
  'qwen3-vl-8b-thinking': 256000,
  'qwen3-max': 256000,
  'qwen3-vl-8b-instruct': 262144,
  'qwen3-vl-30b-a3b': 262144,
  'qwen3-vl-235b-a22b': 131072,
  // Qwen3 specialized models
  'qwen3-max': 256000,
  'qwen3-coder': 262144,
  'qwen3-coder-30b-a3b': 262144,
  'qwen3-coder-plus': 128000,
@ -321,7 +289,6 @@ const openAIBedrockModels = {
 };
 const bedrockModels = {
  ...anthropicModels,
  ...mistralModels,
  ...cohereModels,
  ...deepseekModels,
@ -330,6 +297,7 @@ const bedrockModels = {
  ...ai21Models,
  ...amazonModels,
  ...openAIBedrockModels,
  ...anthropicModels,
 };
 const xAIModels = {
@ -346,24 +314,13 @@ const xAIModels = {
  'grok-3-fast': 131072,
  'grok-3-mini': 131072,
  'grok-3-mini-fast': 131072,
  'grok-code-fast': 256000, // 256K context
  'grok-4': 256000, // 256K context
  'grok-4-fast': 2000000, // 2M context
  'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
  'grok-code-fast': 256000, // 256K context
 };
 const aggregateModels = {
  ...openAIModels,
  ...googleModels,
  ...bedrockModels,
  ...xAIModels,
  ...qwenModels,
  // GPT-OSS
  'gpt-oss': 131000,
  'gpt-oss:20b': 131000,
  'gpt-oss-20b': 131000,
  'gpt-oss:120b': 131000,
  'gpt-oss-120b': 131000,
  // GLM models (Zhipu AI)
  glm4: 128000,
  'glm-4': 128000,
@ -372,6 +329,18 @@ const aggregateModels = {
  'glm-4.5-air': 131000,
  'glm-4.5v': 66000,
  'glm-4.6': 200000,
  // GPT-OSS
  'gpt-oss': 131000,
  'gpt-oss:20b': 131000,
  'gpt-oss-20b': 131000,
  'gpt-oss:120b': 131000,
  'gpt-oss-120b': 131000,
  ...qwenModels,
  ...xAIModels,
  ...googleModels,
  ...bedrockModels,
  // OpenAI last — reverse iteration checks last-spread keys first for same-length ties
  ...openAIModels,
 };
 export const maxTokensMap = {
@ -435,26 +404,28 @@ export const maxOutputTokensMap = {
  [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
 };
-/**
+/** Finds the longest matching key in the tokens map via substring match. */
 * Finds the first matching pattern in the tokens map.
 * @param {string} modelName
 * @param {Record<string, number> | EndpointTokenConfig} tokensMap
 * @returns {string|null}
 */
 export function findMatchingPattern(
  modelName: string,
  tokensMap: Record<string, number> | EndpointTokenConfig,
 ): string | null {
  const keys = Object.keys(tokensMap);
  const lowerModelName = modelName.toLowerCase();
  let bestMatch: string | null = null;
  let bestLength = 0;
  for (let i = keys.length - 1; i >= 0; i--) {
-    const modelKey = keys[i];
+    const key = keys[i];
-    if (lowerModelName.includes(modelKey)) {
+    const lowerKey = key.toLowerCase();
-      return modelKey;
+    if (lowerKey.length > bestLength && lowerModelName.includes(lowerKey)) {
      if (lowerKey.length === lowerModelName.length) {
        return key;
      }
      bestMatch = key;
      bestLength = lowerKey.length;
    }
  }
-  return null;
+  return bestMatch;
 }
 /**