diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js
index c19c7471d5..50974022cd 100644
--- a/api/utils/tokens.spec.js
+++ b/api/utils/tokens.spec.js
@@ -251,16 +251,6 @@ describe('getModelMaxTokens', () => {
     });
   });
 
-  // Tests for Google models
-  test('should return correct tokens for exact match - Google models', () => {
-    expect(getModelMaxTokens('text-bison-32k', EModelEndpoint.google)).toBe(
-      maxTokensMap[EModelEndpoint.google]['text-bison-32k'],
-    );
-    expect(getModelMaxTokens('codechat-bison-32k', EModelEndpoint.google)).toBe(
-      maxTokensMap[EModelEndpoint.google]['codechat-bison-32k'],
-    );
-  });
-
   test('should return undefined for no match - Google models', () => {
     expect(getModelMaxTokens('unknown-google-model', EModelEndpoint.google)).toBeUndefined();
   });
@@ -317,12 +307,6 @@ describe('getModelMaxTokens', () => {
     expect(getModelMaxTokens('gemini-pro', EModelEndpoint.google)).toBe(
       maxTokensMap[EModelEndpoint.google]['gemini'],
     );
-    expect(getModelMaxTokens('code-', EModelEndpoint.google)).toBe(
-      maxTokensMap[EModelEndpoint.google]['code-'],
-    );
-    expect(getModelMaxTokens('chat-', EModelEndpoint.google)).toBe(
-      maxTokensMap[EModelEndpoint.google]['chat-'],
-    );
   });
 
   test('should return correct tokens for partial match - Cohere models', () => {
@@ -541,6 +525,184 @@ describe('getModelMaxTokens', () => {
   });
 });
 
+describe('findMatchingPattern - longest match wins', () => {
+  test('should prefer longer matching key over shorter cross-provider pattern', () => {
+    const result = findMatchingPattern(
+      'gpt-5.2-chat-2025-12-11',
+      maxTokensMap[EModelEndpoint.openAI],
+    );
+    expect(result).toBe('gpt-5.2');
+  });
+
+  test('should match gpt-5.2 tokens for date-suffixed chat variant', () => {
+    expect(getModelMaxTokens('gpt-5.2-chat-2025-12-11')).toBe(
+      maxTokensMap[EModelEndpoint.openAI]['gpt-5.2'],
+    );
+  });
+
+  test('should match gpt-5.2-pro over shorter patterns', () => {
+    expect(getModelMaxTokens('gpt-5.2-pro-chat-2025-12-11')).toBe(
+      maxTokensMap[EModelEndpoint.openAI]['gpt-5.2-pro'],
+    );
+  });
+
+  test('should match gpt-5-mini over gpt-5 for mini variants', () => {
+    expect(getModelMaxTokens('gpt-5-mini-chat-2025-01-01')).toBe(
+      maxTokensMap[EModelEndpoint.openAI]['gpt-5-mini'],
+    );
+  });
+
+  test('should prefer gpt-4-1106 over gpt-4 for versioned model names', () => {
+    const result = findMatchingPattern('gpt-4-1106-preview', maxTokensMap[EModelEndpoint.openAI]);
+    expect(result).toBe('gpt-4-1106');
+  });
+
+  test('should prefer gpt-4-32k-0613 over gpt-4-32k for exact versioned names', () => {
+    const result = findMatchingPattern('gpt-4-32k-0613', maxTokensMap[EModelEndpoint.openAI]);
+    expect(result).toBe('gpt-4-32k-0613');
+  });
+
+  test('should prefer claude-3-5-sonnet over claude-3', () => {
+    const result = findMatchingPattern(
+      'claude-3-5-sonnet-20241022',
+      maxTokensMap[EModelEndpoint.anthropic],
+    );
+    expect(result).toBe('claude-3-5-sonnet');
+  });
+
+  test('should prefer gemini-2.0-flash-lite over gemini-2.0-flash', () => {
+    const result = findMatchingPattern(
+      'gemini-2.0-flash-lite-preview',
+      maxTokensMap[EModelEndpoint.google],
+    );
+    expect(result).toBe('gemini-2.0-flash-lite');
+  });
+});
+
+describe('findMatchingPattern - bestLength selection', () => {
+  test('should return the longest matching key when multiple keys match', () => {
+    const tokensMap = { short: 100, 'short-med': 200, 'short-med-long': 300 };
+    expect(findMatchingPattern('short-med-long-extra', tokensMap)).toBe('short-med-long');
+  });
+
+  test('should return the longest match regardless of key insertion order', () => {
+    const tokensMap = { 'a-b-c': 300, a: 100, 'a-b': 200 };
+    expect(findMatchingPattern('a-b-c-d', tokensMap)).toBe('a-b-c');
+  });
+
+  test('should return null when no key matches', () => {
+    const tokensMap = { alpha: 100, beta: 200 };
+    expect(findMatchingPattern('gamma-delta', tokensMap)).toBeNull();
+  });
+
+  test('should return the single matching key when only one matches', () => {
+    const tokensMap = { alpha: 100, beta: 200, gamma: 300 };
+    expect(findMatchingPattern('beta-extended', tokensMap)).toBe('beta');
+  });
+
+  test('should match case-insensitively against model name', () => {
+    const tokensMap = { 'gpt-5': 400000 };
+    expect(findMatchingPattern('GPT-5-turbo', tokensMap)).toBe('gpt-5');
+  });
+
+  test('should select the longest key among overlapping substring matches', () => {
+    const tokensMap = { 'gpt-': 100, 'gpt-5': 200, 'gpt-5.2': 300, 'gpt-5.2-pro': 400 };
+    expect(findMatchingPattern('gpt-5.2-pro-2025-01-01', tokensMap)).toBe('gpt-5.2-pro');
+    expect(findMatchingPattern('gpt-5.2-chat-2025-01-01', tokensMap)).toBe('gpt-5.2');
+    expect(findMatchingPattern('gpt-5.1-preview', tokensMap)).toBe('gpt-5');
+    expect(findMatchingPattern('gpt-unknown', tokensMap)).toBe('gpt-');
+  });
+
+  test('should not be confused by a short key that appears later in the model name', () => {
+    const tokensMap = { 'model-v2': 200, v2: 100 };
+    expect(findMatchingPattern('model-v2-extended', tokensMap)).toBe('model-v2');
+  });
+
+  test('should handle exact-length match as the best match', () => {
+    const tokensMap = { 'exact-model': 500, exact: 100 };
+    expect(findMatchingPattern('exact-model', tokensMap)).toBe('exact-model');
+  });
+
+  test('should return null for empty model name', () => {
+    expect(findMatchingPattern('', { 'gpt-5': 400000 })).toBeNull();
+  });
+
+  test('should prefer last-defined key on same-length ties', () => {
+    const tokensMap = { 'aa-bb': 100, 'cc-dd': 200 };
+    // model name contains both 5-char keys; last-defined wins in reverse iteration
+    expect(findMatchingPattern('aa-bb-cc-dd', tokensMap)).toBe('cc-dd');
+  });
+
+  test('longest match beats short cross-provider pattern even when both present', () => {
+    const tokensMap = { 'gpt-5.2': 400000, 'chat-': 8187 };
+    expect(findMatchingPattern('gpt-5.2-chat-2025-12-11', tokensMap)).toBe('gpt-5.2');
+  });
+
+  test('should match case-insensitively against keys', () => {
+    const tokensMap = { 'GPT-5': 400000 };
+    expect(findMatchingPattern('gpt-5-turbo', tokensMap)).toBe('GPT-5');
+  });
+});
+
+describe('findMatchingPattern - iteration performance', () => {
+  let includesSpy;
+
+  beforeEach(() => {
+    includesSpy = jest.spyOn(String.prototype, 'includes');
+  });
+
+  afterEach(() => {
+    includesSpy.mockRestore();
+  });
+
+  test('exact match early-exits with minimal includes() checks', () => {
+    const openAIMap = maxTokensMap[EModelEndpoint.openAI];
+    const keys = Object.keys(openAIMap);
+    const lastKey = keys[keys.length - 1];
+    includesSpy.mockClear();
+    const result = findMatchingPattern(lastKey, openAIMap);
+    const exactCalls = includesSpy.mock.calls.length;
+
+    expect(result).toBe(lastKey);
+    expect(exactCalls).toBe(1);
+  });
+
+  test('bestLength check skips includes() for shorter keys after a long match', () => {
+    const openAIMap = maxTokensMap[EModelEndpoint.openAI];
+    includesSpy.mockClear();
+    findMatchingPattern('gpt-3.5-turbo-0301-test', openAIMap);
+    const longKeyCalls = includesSpy.mock.calls.length;
+
+    includesSpy.mockClear();
+    findMatchingPattern('gpt-5.3-chat-latest', openAIMap);
+    const shortKeyCalls = includesSpy.mock.calls.length;
+
+    // gpt-3.5-turbo-0301 (20 chars) matches early, then bestLength prunes most keys
+    // gpt-5.3 (7 chars) is short, so fewer keys are pruned by the length check
+    expect(longKeyCalls).toBeLessThan(shortKeyCalls);
+  });
+
+  test('last-defined keys are checked first in reverse iteration', () => {
+    const tokensMap = { first: 100, second: 200, third: 300 };
+    includesSpy.mockClear();
+    const result = findMatchingPattern('third', tokensMap);
+    const calls = includesSpy.mock.calls.length;
+
+    // 'third' is last key, found on first reverse check, exact match exits immediately
+    expect(result).toBe('third');
+    expect(calls).toBe(1);
+  });
+});
+
+describe('deprecated PaLM2/Codey model removal', () => {
+  test('deprecated PaLM2/Codey models no longer have token entries', () => {
+    expect(getModelMaxTokens('text-bison-32k', EModelEndpoint.google)).toBeUndefined();
+    expect(getModelMaxTokens('codechat-bison-32k', EModelEndpoint.google)).toBeUndefined();
+    expect(getModelMaxTokens('code-bison', EModelEndpoint.google)).toBeUndefined();
+    expect(getModelMaxTokens('chat-bison', EModelEndpoint.google)).toBeUndefined();
+  });
+});
+
 describe('matchModelName', () => {
   it('should return the exact model name if it exists in maxTokensMap', () => {
     expect(matchModelName('gpt-4-32k-0613')).toBe('gpt-4-32k-0613');
@@ -642,22 +804,11 @@ describe('matchModelName', () => {
     expect(matchModelName('gpt-5.3-2025-03-01')).toBe('gpt-5.3');
   });
 
-  // Tests for Google models
-  it('should return the exact model name if it exists in maxTokensMap - Google models', () => {
-    expect(matchModelName('text-bison-32k', EModelEndpoint.google)).toBe('text-bison-32k');
-    expect(matchModelName('codechat-bison-32k', EModelEndpoint.google)).toBe('codechat-bison-32k');
-  });
-
   it('should return the input model name if no match is found - Google models', () => {
     expect(matchModelName('unknown-google-model', EModelEndpoint.google)).toBe(
       'unknown-google-model',
     );
   });
-
-  it('should return the closest matching key for partial matches - Google models', () => {
-    expect(matchModelName('code-', EModelEndpoint.google)).toBe('code-');
-    expect(matchModelName('chat-', EModelEndpoint.google)).toBe('chat-');
-  });
 });
 
 describe('Meta Models Tests', () => {
diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts
index ad7cf1a8db..b07f94f946 100644
--- a/packages/api/src/utils/tokens.ts
+++ b/packages/api/src/utils/tokens.ts
@@ -5,38 +5,30 @@ import type { EndpointTokenConfig, TokenConfig } from '~/types';
 /**
  * Model Token Configuration Maps
  *
- * IMPORTANT: Key Ordering for Pattern Matching
- * ============================================
- * The `findMatchingPattern` function iterates through object keys in REVERSE order
- * (last-defined keys are checked first) and uses `modelName.includes(key)` for matching.
+ * Pattern Matching
+ * ================
+ * `findMatchingPattern` uses `modelName.includes(key)` and selects the **longest**
+ * matching key. If a key's length equals the model name's length (exact match), it
+ * returns immediately — no further keys are checked.
  *
- * This means:
- * 1. BASE PATTERNS must be defined FIRST (e.g., "kimi", "moonshot")
- * 2. SPECIFIC PATTERNS must be defined AFTER their base patterns (e.g., "kimi-k2", "kimi-k2.5")
+ * For keys of different lengths, definition order does not affect the result — the
+ * longest match always wins. For **same-length ties**, the function iterates in
+ * reverse, so the last-defined key wins. Key ordering therefore matters for:
  *
- * Example ordering for Kimi models:
- *   kimi: 262144,           // Base pattern - checked last
- *   'kimi-k2': 262144,      // More specific - checked before "kimi"
- *   'kimi-k2.5': 262144,    // Most specific - checked first
- *
- * Why this matters:
- * - Model name "kimi-k2.5" contains both "kimi" and "kimi-k2" as substrings
- * - If "kimi" were checked first, it would incorrectly match "kimi-k2.5"
- * - By defining specific patterns AFTER base patterns, they're checked first in reverse iteration
- *
- * When adding new model families:
- * 1. Define the base/generic pattern first
- * 2. Define increasingly specific patterns after
- * 3. Ensure no pattern is a substring of another that should match differently
+ * 1. **Performance**: list older/legacy models first, newer models last — newer
+ *    models are more commonly used and will match earlier in the reverse scan.
+ * 2. **Same-length tie-breaking**: in `aggregateModels`, OpenAI is spread last
+ *    so its keys are preferred when two keys of equal length both match.
  */
 
 const openAIModels = {
-  'o4-mini': 200000,
-  'o3-mini': 195000, // -5000 from max
-  o3: 200000,
-  o1: 195000, // -5000 from max
-  'o1-mini': 127500, // -500 from max
-  'o1-preview': 127500, // -500 from max
+  'gpt-3.5-turbo-0301': 4092, // -5 from max
+  'gpt-3.5-turbo-0613': 4092, // -5 from max
+  'gpt-3.5-turbo-16k': 16375, // -10 from max
+  'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
+  'gpt-3.5-turbo-1106': 16375, // -10 from max
+  'gpt-3.5-turbo-0125': 16375, // -10 from max
+  'gpt-3.5-turbo': 16375, // -10 from max
   'gpt-4': 8187, // -5 from max
   'gpt-4-0613': 8187, // -5 from max
   'gpt-4-32k': 32758, // -10 from max
@@ -44,7 +36,18 @@ const openAIModels = {
   'gpt-4-32k-0613': 32758, // -10 from max
   'gpt-4-1106': 127500, // -500 from max
   'gpt-4-0125': 127500, // -500 from max
+  'gpt-4-turbo': 127500, // -500 from max
+  'gpt-4-vision': 127500, // -500 from max
+  'gpt-4o-2024-05-13': 127500, // -500 from max
+  'gpt-4o-mini': 127500, // -500 from max
+  'gpt-4o': 127500, // -500 from max
   'gpt-4.5': 127500, // -500 from max
+  'o1-mini': 127500, // -500 from max
+  'o1-preview': 127500, // -500 from max
+  o1: 195000, // -5000 from max
+  'o3-mini': 195000, // -5000 from max
+  o3: 200000,
+  'o4-mini': 200000,
   'gpt-4.1': 1047576,
   'gpt-4.1-mini': 1047576,
   'gpt-4.1-nano': 1047576,
@@ -56,18 +59,6 @@ const openAIModels = {
   'gpt-5-nano': 400000,
   'gpt-5-pro': 400000,
   'gpt-5.2-pro': 400000,
-  'gpt-4o': 127500, // -500 from max
-  'gpt-4o-mini': 127500, // -500 from max
-  'gpt-4o-2024-05-13': 127500, // -500 from max
-  'gpt-4-turbo': 127500, // -500 from max
-  'gpt-4-vision': 127500, // -500 from max
-  'gpt-3.5-turbo': 16375, // -10 from max
-  'gpt-3.5-turbo-0613': 4092, // -5 from max
-  'gpt-3.5-turbo-0301': 4092, // -5 from max
-  'gpt-3.5-turbo-16k': 16375, // -10 from max
-  'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
-  'gpt-3.5-turbo-1106': 16375, // -10 from max
-  'gpt-3.5-turbo-0125': 16375, // -10 from max
 };
 
 const mistralModels = {
@@ -76,15 +67,15 @@ const mistralModels = {
   'mistral-small': 31990, // -10 from max
   'mixtral-8x7b': 31990, // -10 from max
   'mixtral-8x22b': 65536,
-  'mistral-large': 131000,
   'mistral-large-2402': 127500,
   'mistral-large-2407': 127500,
+  'mistral-large': 131000,
+  'mistral-saba': 32000,
+  'ministral-3b': 131000,
+  'ministral-8b': 131000,
   'mistral-nemo': 131000,
   'pixtral-large': 131000,
-  'mistral-saba': 32000,
   codestral: 256000,
-  'ministral-8b': 131000,
-  'ministral-3b': 131000,
 };
 
 const cohereModels = {
@@ -105,32 +96,22 @@ const googleModels = {
   'gemma-3-27b': 131072,
   gemini: 30720, // -2048 from max
   'gemini-pro-vision': 12288,
+  'gemini-1.5': 1000000,
+  'gemini-1.5-flash': 1000000,
+  'gemini-1.5-flash-8b': 1000000,
+  'gemini-2.0': 2000000,
+  'gemini-2.0-flash': 1000000,
+  'gemini-2.0-flash-lite': 1000000,
   'gemini-exp': 2000000,
-  'gemini-3': 1000000, // 1M input tokens, 64k output tokens
-  'gemini-3-pro-image': 1000000,
-  'gemini-3.1': 1000000, // 1M input tokens, 64k output tokens
-  'gemini-3.1-flash-lite': 1000000, // 1M input tokens, 64k output tokens
-  'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
+  'gemini-2.5': 1000000,
   'gemini-2.5-pro': 1000000,
   'gemini-2.5-flash': 1000000,
   'gemini-2.5-flash-image': 1000000,
   'gemini-2.5-flash-lite': 1000000,
-  'gemini-2.0': 2000000,
-  'gemini-2.0-flash': 1000000,
-  'gemini-2.0-flash-lite': 1000000,
-  'gemini-1.5': 1000000,
-  'gemini-1.5-flash': 1000000,
-  'gemini-1.5-flash-8b': 1000000,
-  'text-bison-32k': 32758, // -10 from max
-  'chat-bison-32k': 32758, // -10 from max
-  'code-bison-32k': 32758, // -10 from max
-  'codechat-bison-32k': 32758,
-  /* Codey, -5 from max: 6144 */
-  'code-': 6139,
-  'codechat-': 6139,
-  /* PaLM2, -5 from max: 8192 */
-  'text-': 8187,
-  'chat-': 8187,
+  'gemini-3': 1000000,
+  'gemini-3-pro-image': 1000000,
+  'gemini-3.1': 1000000,
+  'gemini-3.1-flash-lite': 1000000,
 };
 
 const anthropicModels = {
@@ -142,49 +123,35 @@ const anthropicModels = {
   'claude-3-haiku': 200000,
   'claude-3-sonnet': 200000,
   'claude-3-opus': 200000,
-  'claude-3.5-haiku': 200000,
-  'claude-3-5-haiku': 200000,
   'claude-3-5-sonnet': 200000,
   'claude-3.5-sonnet': 200000,
-  'claude-3-7-sonnet': 200000,
-  'claude-3.7-sonnet': 200000,
   'claude-3-5-sonnet-latest': 200000,
   'claude-3.5-sonnet-latest': 200000,
-  'claude-haiku-4-5': 200000,
-  'claude-sonnet-4': 1000000,
-  'claude-sonnet-4-6': 1000000,
+  'claude-3-5-haiku': 200000,
+  'claude-3.5-haiku': 200000,
+  'claude-3-7-sonnet': 200000,
+  'claude-3.7-sonnet': 200000,
   'claude-4': 200000,
+  'claude-haiku-4-5': 200000,
   'claude-opus-4': 200000,
   'claude-opus-4-5': 200000,
+  'claude-sonnet-4': 1000000,
+  'claude-sonnet-4-6': 1000000,
   'claude-opus-4-6': 1000000,
 };
 
 const deepseekModels = {
   deepseek: 128000,
   'deepseek-chat': 128000,
-  'deepseek-reasoner': 128000,
-  'deepseek-r1': 128000,
   'deepseek-v3': 128000,
   'deepseek.r1': 128000,
+  'deepseek-r1': 128000,
+  'deepseek-reasoner': 128000,
 };
 
 const moonshotModels = {
-  // Base patterns (check last due to reverse iteration)
-  kimi: 262144,
+  // moonshot-v1 series (older)
   moonshot: 131072,
-  // kimi-k2 series (specific patterns)
-  'kimi-latest': 128000,
-  'kimi-k2': 262144,
-  'kimi-k2.5': 262144,
-  'kimi-k2-turbo': 262144,
-  'kimi-k2-turbo-preview': 262144,
-  'kimi-k2-0905': 262144,
-  'kimi-k2-0905-preview': 262144,
-  'kimi-k2-0711': 131072,
-  'kimi-k2-0711-preview': 131072,
-  'kimi-k2-thinking': 262144,
-  'kimi-k2-thinking-turbo': 262144,
-  // moonshot-v1 series (specific patterns)
   'moonshot-v1': 131072,
   'moonshot-v1-auto': 131072,
   'moonshot-v1-8k': 8192,
@@ -196,99 +163,100 @@ const moonshotModels = {
   'moonshot-v1-128k': 131072,
   'moonshot-v1-128k-vision': 131072,
   'moonshot-v1-128k-vision-preview': 131072,
+  // kimi series
+  kimi: 262144,
+  'kimi-latest': 128000,
+  'kimi-k2-0711': 131072,
+  'kimi-k2-0711-preview': 131072,
+  'kimi-k2-0905': 262144,
+  'kimi-k2-0905-preview': 262144,
+  'kimi-k2': 262144,
+  'kimi-k2-turbo': 262144,
+  'kimi-k2-turbo-preview': 262144,
+  'kimi-k2-thinking': 262144,
+  'kimi-k2-thinking-turbo': 262144,
+  'kimi-k2.5': 262144,
   // Bedrock moonshot models
+  'moonshot.kimi-k2-0711': 131072,
   'moonshot.kimi': 262144,
   'moonshot.kimi-k2': 262144,
-  'moonshot.kimi-k2.5': 262144,
   'moonshot.kimi-k2-thinking': 262144,
-  'moonshot.kimi-k2-0711': 131072,
   'moonshotai.kimi': 262144,
+  'moonshot.kimi-k2.5': 262144,
   'moonshotai.kimi-k2.5': 262144,
 };
 
 const metaModels = {
-  // Basic patterns
-  llama3: 8000,
+  // Llama 2 (oldest)
   llama2: 4000,
-  'llama-3': 8000,
   'llama-2': 4000,
-
-  // llama3.x pattern
+  'llama2-13b': 4000,
+  'llama2-70b': 4000,
+  'llama2:70b': 4000,
+  // Llama 3 base
+  llama3: 8000,
+  'llama-3': 8000,
+  'llama3-8b': 8000,
+  'llama3-70b': 8000,
+  'llama3:8b': 8000,
+  'llama3:70b': 8000,
+  // Llama 3.1
   'llama3.1': 127500,
-  'llama3.2': 127500,
-  'llama3.3': 127500,
-
-  // llama3-x pattern
   'llama3-1': 127500,
-  'llama3-2': 127500,
-  'llama3-3': 127500,
-
-  // llama-3.x pattern
   'llama-3.1': 127500,
-  'llama-3.2': 127500,
-  'llama-3.3': 127500,
-
-  // llama3.x:Nb pattern
-  'llama3.1:405b': 127500,
-  'llama3.1:70b': 127500,
   'llama3.1:8b': 127500,
+  'llama3.1:70b': 127500,
+  'llama3.1:405b': 127500,
+  'llama3-1-8b': 127500,
+  'llama3-1-70b': 127500,
+  'llama3-1-405b': 127500,
+  'llama-3.1-8b': 127500,
+  'llama-3.1-70b': 127500,
+  'llama-3.1-405b': 127500,
+  // Llama 3.2
+  'llama3.2': 127500,
+  'llama3-2': 127500,
+  'llama-3.2': 127500,
   'llama3.2:1b': 127500,
   'llama3.2:3b': 127500,
   'llama3.2:11b': 127500,
   'llama3.2:90b': 127500,
-  'llama3.3:70b': 127500,
-
-  // llama3-x-Nb pattern
-  'llama3-1-405b': 127500,
-  'llama3-1-70b': 127500,
-  'llama3-1-8b': 127500,
   'llama3-2-1b': 127500,
   'llama3-2-3b': 127500,
   'llama3-2-11b': 127500,
   'llama3-2-90b': 127500,
-  'llama3-3-70b': 127500,
-
-  // llama-3.x-Nb pattern
-  'llama-3.1-405b': 127500,
-  'llama-3.1-70b': 127500,
-  'llama-3.1-8b': 127500,
   'llama-3.2-1b': 127500,
   'llama-3.2-3b': 127500,
   'llama-3.2-11b': 127500,
   'llama-3.2-90b': 127500,
+  // Llama 3.3 (newest)
+  'llama3.3': 127500,
+  'llama3-3': 127500,
+  'llama-3.3': 127500,
+  'llama3.3:70b': 127500,
+  'llama3-3-70b': 127500,
   'llama-3.3-70b': 127500,
-
-  // Original llama2/3 patterns
-  'llama3-70b': 8000,
-  'llama3-8b': 8000,
-  'llama2-70b': 4000,
-  'llama2-13b': 4000,
-  'llama3:70b': 8000,
-  'llama3:8b': 8000,
-  'llama2:70b': 4000,
 };
 
 const qwenModels = {
   qwen: 32000,
   'qwen2.5': 32000,
-  'qwen-turbo': 1000000,
-  'qwen-plus': 131000,
   'qwen-max': 32000,
+  'qwen-plus': 131000,
+  'qwen-turbo': 1000000,
   'qwq-32b': 32000,
-  // Qwen3 models
-  qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
-  'qwen3-8b': 128000,
+  // Qwen3 models (newest)
+  qwen3: 40960,
   'qwen3-14b': 40960,
   'qwen3-30b-a3b': 40960,
   'qwen3-32b': 40960,
   'qwen3-235b-a22b': 40960,
-  // Qwen3 VL (Vision-Language) models
+  'qwen3-8b': 128000,
+  'qwen3-vl-235b-a22b': 131072,
   'qwen3-vl-8b-thinking': 256000,
+  'qwen3-max': 256000,
   'qwen3-vl-8b-instruct': 262144,
   'qwen3-vl-30b-a3b': 262144,
-  'qwen3-vl-235b-a22b': 131072,
-  // Qwen3 specialized models
-  'qwen3-max': 256000,
   'qwen3-coder': 262144,
   'qwen3-coder-30b-a3b': 262144,
   'qwen3-coder-plus': 128000,
@@ -321,7 +289,6 @@ const openAIBedrockModels = {
 };
 
 const bedrockModels = {
-  ...anthropicModels,
   ...mistralModels,
   ...cohereModels,
   ...deepseekModels,
@@ -330,6 +297,7 @@ const bedrockModels = {
   ...ai21Models,
   ...amazonModels,
   ...openAIBedrockModels,
+  ...anthropicModels,
 };
 
 const xAIModels = {
@@ -346,24 +314,13 @@ const xAIModels = {
   'grok-3-fast': 131072,
   'grok-3-mini': 131072,
   'grok-3-mini-fast': 131072,
+  'grok-code-fast': 256000, // 256K context
   'grok-4': 256000, // 256K context
   'grok-4-fast': 2000000, // 2M context
   'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
-  'grok-code-fast': 256000, // 256K context
 };
 
 const aggregateModels = {
-  ...openAIModels,
-  ...googleModels,
-  ...bedrockModels,
-  ...xAIModels,
-  ...qwenModels,
-  // GPT-OSS
-  'gpt-oss': 131000,
-  'gpt-oss:20b': 131000,
-  'gpt-oss-20b': 131000,
-  'gpt-oss:120b': 131000,
-  'gpt-oss-120b': 131000,
   // GLM models (Zhipu AI)
   glm4: 128000,
   'glm-4': 128000,
@@ -372,6 +329,18 @@ const aggregateModels = {
   'glm-4.5-air': 131000,
   'glm-4.5v': 66000,
   'glm-4.6': 200000,
+  // GPT-OSS
+  'gpt-oss': 131000,
+  'gpt-oss:20b': 131000,
+  'gpt-oss-20b': 131000,
+  'gpt-oss:120b': 131000,
+  'gpt-oss-120b': 131000,
+  ...qwenModels,
+  ...xAIModels,
+  ...googleModels,
+  ...bedrockModels,
+  // OpenAI last — reverse iteration checks last-spread keys first for same-length ties
+  ...openAIModels,
 };
 
 export const maxTokensMap = {
@@ -435,26 +404,28 @@ export const maxOutputTokensMap = {
   [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
 };
 
-/**
- * Finds the first matching pattern in the tokens map.
- * @param {string} modelName
- * @param {Record<string, number> | EndpointTokenConfig} tokensMap
- * @returns {string|null}
- */
+/** Finds the longest matching key in the tokens map via substring match. */
 export function findMatchingPattern(
   modelName: string,
   tokensMap: Record<string, number> | EndpointTokenConfig,
 ): string | null {
   const keys = Object.keys(tokensMap);
   const lowerModelName = modelName.toLowerCase();
+  let bestMatch: string | null = null;
+  let bestLength = 0;
   for (let i = keys.length - 1; i >= 0; i--) {
-    const modelKey = keys[i];
-    if (lowerModelName.includes(modelKey)) {
-      return modelKey;
+    const key = keys[i];
+    const lowerKey = key.toLowerCase();
+    if (lowerKey.length > bestLength && lowerModelName.includes(lowerKey)) {
+      if (lowerKey.length === lowerModelName.length) {
+        return key;
+      }
+      bestMatch = key;
+      bestLength = lowerKey.length;
     }
   }
 
-  return null;
+  return bestMatch;
 }
 
 /**