diff --git a/api/app/clients/specs/FakeClient.js b/api/app/clients/specs/FakeClient.js
index 58480b4018..d1d07a967d 100644
--- a/api/app/clients/specs/FakeClient.js
+++ b/api/app/clients/specs/FakeClient.js
@@ -1,5 +1,4 @@
 const { getModelMaxTokens } = require('@librechat/api');
-const { TOKEN_DEFAULTS } = require('librechat-data-provider');
 const BaseClient = require('../BaseClient');
 
 class FakeClient extends BaseClient {
@@ -42,9 +41,7 @@ class FakeClient extends BaseClient {
     }
 
     this.maxContextTokens =
-      this.options.maxContextTokens ??
-      getModelMaxTokens(this.modelOptions.model) ??
-      TOKEN_DEFAULTS.LEGACY_CONTEXT_FALLBACK;
+      this.options.maxContextTokens ?? getModelMaxTokens(this.modelOptions.model) ?? 4097;
   }
   buildMessages() {}
   getTokenCount(str) {
diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js
index 11f9128837..df1bec8619 100644
--- a/api/models/tx.spec.js
+++ b/api/models/tx.spec.js
@@ -1,4 +1,6 @@
-const { EModelEndpoint, maxTokensMap } = require('librechat-data-provider');
+/** Note: No hard-coded values should be used in this file. */
+const { maxTokensMap } = require('@librechat/api');
+const { EModelEndpoint } = require('librechat-data-provider');
 const {
   defaultRate,
   tokenValues,
diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js
index a9448aae41..0cfdc30227 100644
--- a/api/utils/tokens.spec.js
+++ b/api/utils/tokens.spec.js
@@ -1,8 +1,11 @@
-const { EModelEndpoint, maxTokensMap, maxOutputTokensMap } = require('librechat-data-provider');
+/** Note: No hard-coded values should be used in this file. */
+const { EModelEndpoint } = require('librechat-data-provider');
 const {
+  maxTokensMap,
   matchModelName,
   processModelData,
   getModelMaxTokens,
+  maxOutputTokensMap,
   findMatchingPattern,
 } = require('@librechat/api');
 
diff --git a/packages/api/src/agents/initialize.ts b/packages/api/src/agents/initialize.ts
index 3e1b09df18..008aa4c0ba 100644
--- a/packages/api/src/agents/initialize.ts
+++ b/packages/api/src/agents/initialize.ts
@@ -8,7 +8,6 @@ import {
   isAgentsEndpoint,
   replaceSpecialVars,
   providerEndpointMap,
-  TOKEN_DEFAULTS,
 } from 'librechat-data-provider';
 import type {
   AgentToolResources,
@@ -338,7 +337,7 @@ export async function initializeAgent(
       providerEndpointMap[provider as keyof typeof providerEndpointMap],
       options.endpointTokenConfig,
     ),
-    TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK,
+    18000,
   );
 
   if (
@@ -395,7 +394,7 @@ export async function initializeAgent(
     agent.additional_instructions = artifactsPromptResult ?? undefined;
   }
 
-  const agentMaxContextNum = Number(agentMaxContextTokens) || TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK;
+  const agentMaxContextNum = Number(agentMaxContextTokens) || 18000;
   const maxOutputTokensNum = Number(maxOutputTokens) || 0;
 
   const finalAttachments: IMongoFile[] = (primedAttachments ?? [])
@@ -414,9 +413,7 @@ export async function initializeAgent(
     toolContextMap: toolContextMap ?? {},
     useLegacyContent: !!options.useLegacyContent,
     tools: (tools ?? []) as GenericTool[] & string[],
-    maxContextTokens: Math.round(
-      (agentMaxContextNum - maxOutputTokensNum) * TOKEN_DEFAULTS.CONTEXT_SAFETY_MARGIN,
-    ),
+    maxContextTokens: Math.round((agentMaxContextNum - maxOutputTokensNum) * 0.9),
   };
 
   return initializedAgent;
diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts
index 571bc93052..49f1640a7a 100644
--- a/packages/api/src/utils/tokens.ts
+++ b/packages/api/src/utils/tokens.ts
@@ -1,7 +1,424 @@
 import z from 'zod';
-import { EModelEndpoint, maxTokensMap, maxOutputTokensMap } from 'librechat-data-provider';
+import { EModelEndpoint } from 'librechat-data-provider';
 import type { EndpointTokenConfig, TokenConfig } from '~/types';
 
+/**
+ * Model Token Configuration Maps
+ *
+ * IMPORTANT: Key Ordering for Pattern Matching
+ * ============================================
+ * The `findMatchingPattern` function iterates through object keys in REVERSE order
+ * (last-defined keys are checked first) and uses `modelName.includes(key)` for matching.
+ *
+ * This means:
+ * 1. BASE PATTERNS must be defined FIRST (e.g., "kimi", "moonshot")
+ * 2. SPECIFIC PATTERNS must be defined AFTER their base patterns (e.g., "kimi-k2", "kimi-k2.5")
+ *
+ * Example ordering for Kimi models:
+ *   kimi: 262144,           // Base pattern - checked last
+ *   'kimi-k2': 262144,      // More specific - checked before "kimi"
+ *   'kimi-k2.5': 262144,    // Most specific - checked first
+ *
+ * Why this matters:
+ * - Model name "kimi-k2.5" contains both "kimi" and "kimi-k2" as substrings
+ * - If "kimi" were checked first, it would incorrectly match "kimi-k2.5"
+ * - By defining specific patterns AFTER base patterns, they're checked first in reverse iteration
+ *
+ * When adding new model families:
+ * 1. Define the base/generic pattern first
+ * 2. Define increasingly specific patterns after
+ * 3. Ensure no pattern is a substring of another that should match differently
+ */
+
+const openAIModels = {
+  'o4-mini': 200000,
+  'o3-mini': 195000, // -5000 from max
+  o3: 200000,
+  o1: 195000, // -5000 from max
+  'o1-mini': 127500, // -500 from max
+  'o1-preview': 127500, // -500 from max
+  'gpt-4': 8187, // -5 from max
+  'gpt-4-0613': 8187, // -5 from max
+  'gpt-4-32k': 32758, // -10 from max
+  'gpt-4-32k-0314': 32758, // -10 from max
+  'gpt-4-32k-0613': 32758, // -10 from max
+  'gpt-4-1106': 127500, // -500 from max
+  'gpt-4-0125': 127500, // -500 from max
+  'gpt-4.5': 127500, // -500 from max
+  'gpt-4.1': 1047576,
+  'gpt-4.1-mini': 1047576,
+  'gpt-4.1-nano': 1047576,
+  'gpt-5': 400000,
+  'gpt-5.1': 400000,
+  'gpt-5.2': 400000,
+  'gpt-5-mini': 400000,
+  'gpt-5-nano': 400000,
+  'gpt-5-pro': 400000,
+  'gpt-4o': 127500, // -500 from max
+  'gpt-4o-mini': 127500, // -500 from max
+  'gpt-4o-2024-05-13': 127500, // -500 from max
+  'gpt-4-turbo': 127500, // -500 from max
+  'gpt-4-vision': 127500, // -500 from max
+  'gpt-3.5-turbo': 16375, // -10 from max
+  'gpt-3.5-turbo-0613': 4092, // -5 from max
+  'gpt-3.5-turbo-0301': 4092, // -5 from max
+  'gpt-3.5-turbo-16k': 16375, // -10 from max
+  'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
+  'gpt-3.5-turbo-1106': 16375, // -10 from max
+  'gpt-3.5-turbo-0125': 16375, // -10 from max
+};
+
+const mistralModels = {
+  'mistral-': 31990, // -10 from max
+  'mistral-7b': 31990, // -10 from max
+  'mistral-small': 31990, // -10 from max
+  'mixtral-8x7b': 31990, // -10 from max
+  'mixtral-8x22b': 65536,
+  'mistral-large': 131000,
+  'mistral-large-2402': 127500,
+  'mistral-large-2407': 127500,
+  'mistral-nemo': 131000,
+  'pixtral-large': 131000,
+  'mistral-saba': 32000,
+  codestral: 256000,
+  'ministral-8b': 131000,
+  'ministral-3b': 131000,
+};
+
+const cohereModels = {
+  'command-light': 4086, // -10 from max
+  'command-light-nightly': 8182, // -10 from max
+  command: 4086, // -10 from max
+  'command-nightly': 8182, // -10 from max
+  'command-text': 4086, // -10 from max
+  'command-r': 127500, // -500 from max
+  'command-r-plus': 127500, // -500 from max
+};
+
+const googleModels = {
+  /* Max I/O is combined so we subtract the amount from max response tokens for actual total */
+  gemma: 8196,
+  'gemma-2': 32768,
+  'gemma-3': 32768,
+  'gemma-3-27b': 131072,
+  gemini: 30720, // -2048 from max
+  'gemini-pro-vision': 12288,
+  'gemini-exp': 2000000,
+  'gemini-3': 1000000, // 1M input tokens, 64k output tokens
+  'gemini-3-pro-image': 1000000,
+  'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
+  'gemini-2.5-pro': 1000000,
+  'gemini-2.5-flash': 1000000,
+  'gemini-2.5-flash-image': 1000000,
+  'gemini-2.5-flash-lite': 1000000,
+  'gemini-2.0': 2000000,
+  'gemini-2.0-flash': 1000000,
+  'gemini-2.0-flash-lite': 1000000,
+  'gemini-1.5': 1000000,
+  'gemini-1.5-flash': 1000000,
+  'gemini-1.5-flash-8b': 1000000,
+  'text-bison-32k': 32758, // -10 from max
+  'chat-bison-32k': 32758, // -10 from max
+  'code-bison-32k': 32758, // -10 from max
+  'codechat-bison-32k': 32758,
+  /* Codey, -5 from max: 6144 */
+  'code-': 6139,
+  'codechat-': 6139,
+  /* PaLM2, -5 from max: 8192 */
+  'text-': 8187,
+  'chat-': 8187,
+};
+
+const anthropicModels = {
+  'claude-': 100000,
+  'claude-instant': 100000,
+  'claude-2': 100000,
+  'claude-2.1': 200000,
+  'claude-3': 200000,
+  'claude-3-haiku': 200000,
+  'claude-3-sonnet': 200000,
+  'claude-3-opus': 200000,
+  'claude-3.5-haiku': 200000,
+  'claude-3-5-haiku': 200000,
+  'claude-3-5-sonnet': 200000,
+  'claude-3.5-sonnet': 200000,
+  'claude-3-7-sonnet': 200000,
+  'claude-3.7-sonnet': 200000,
+  'claude-3-5-sonnet-latest': 200000,
+  'claude-3.5-sonnet-latest': 200000,
+  'claude-haiku-4-5': 200000,
+  'claude-sonnet-4': 1000000,
+  'claude-4': 200000,
+  'claude-opus-4': 200000,
+  'claude-opus-4-5': 200000,
+  'claude-opus-4-6': 1000000,
+};
+
+const deepseekModels = {
+  deepseek: 128000,
+  'deepseek-chat': 128000,
+  'deepseek-reasoner': 128000,
+  'deepseek-r1': 128000,
+  'deepseek-v3': 128000,
+  'deepseek.r1': 128000,
+};
+
+const moonshotModels = {
+  // Base patterns (check last due to reverse iteration)
+  kimi: 262144,
+  moonshot: 131072,
+  // kimi-k2 series (specific patterns)
+  'kimi-latest': 128000,
+  'kimi-k2': 262144,
+  'kimi-k2.5': 262144,
+  'kimi-k2-turbo': 262144,
+  'kimi-k2-turbo-preview': 262144,
+  'kimi-k2-0905': 262144,
+  'kimi-k2-0905-preview': 262144,
+  'kimi-k2-0711': 131072,
+  'kimi-k2-0711-preview': 131072,
+  'kimi-k2-thinking': 262144,
+  'kimi-k2-thinking-turbo': 262144,
+  // moonshot-v1 series (specific patterns)
+  'moonshot-v1': 131072,
+  'moonshot-v1-auto': 131072,
+  'moonshot-v1-8k': 8192,
+  'moonshot-v1-8k-vision': 8192,
+  'moonshot-v1-8k-vision-preview': 8192,
+  'moonshot-v1-32k': 32768,
+  'moonshot-v1-32k-vision': 32768,
+  'moonshot-v1-32k-vision-preview': 32768,
+  'moonshot-v1-128k': 131072,
+  'moonshot-v1-128k-vision': 131072,
+  'moonshot-v1-128k-vision-preview': 131072,
+  // Bedrock moonshot models
+  'moonshot.kimi': 262144,
+  'moonshot.kimi-k2': 262144,
+  'moonshot.kimi-k2.5': 262144,
+  'moonshot.kimi-k2-thinking': 262144,
+  'moonshot.kimi-k2-0711': 131072,
+};
+
+const metaModels = {
+  // Basic patterns
+  llama3: 8000,
+  llama2: 4000,
+  'llama-3': 8000,
+  'llama-2': 4000,
+
+  // llama3.x pattern
+  'llama3.1': 127500,
+  'llama3.2': 127500,
+  'llama3.3': 127500,
+
+  // llama3-x pattern
+  'llama3-1': 127500,
+  'llama3-2': 127500,
+  'llama3-3': 127500,
+
+  // llama-3.x pattern
+  'llama-3.1': 127500,
+  'llama-3.2': 127500,
+  'llama-3.3': 127500,
+
+  // llama3.x:Nb pattern
+  'llama3.1:405b': 127500,
+  'llama3.1:70b': 127500,
+  'llama3.1:8b': 127500,
+  'llama3.2:1b': 127500,
+  'llama3.2:3b': 127500,
+  'llama3.2:11b': 127500,
+  'llama3.2:90b': 127500,
+  'llama3.3:70b': 127500,
+
+  // llama3-x-Nb pattern
+  'llama3-1-405b': 127500,
+  'llama3-1-70b': 127500,
+  'llama3-1-8b': 127500,
+  'llama3-2-1b': 127500,
+  'llama3-2-3b': 127500,
+  'llama3-2-11b': 127500,
+  'llama3-2-90b': 127500,
+  'llama3-3-70b': 127500,
+
+  // llama-3.x-Nb pattern
+  'llama-3.1-405b': 127500,
+  'llama-3.1-70b': 127500,
+  'llama-3.1-8b': 127500,
+  'llama-3.2-1b': 127500,
+  'llama-3.2-3b': 127500,
+  'llama-3.2-11b': 127500,
+  'llama-3.2-90b': 127500,
+  'llama-3.3-70b': 127500,
+
+  // Original llama2/3 patterns
+  'llama3-70b': 8000,
+  'llama3-8b': 8000,
+  'llama2-70b': 4000,
+  'llama2-13b': 4000,
+  'llama3:70b': 8000,
+  'llama3:8b': 8000,
+  'llama2:70b': 4000,
+};
+
+const qwenModels = {
+  qwen: 32000,
+  'qwen2.5': 32000,
+  'qwen-turbo': 1000000,
+  'qwen-plus': 131000,
+  'qwen-max': 32000,
+  'qwq-32b': 32000,
+  // Qwen3 models
+  qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
+  'qwen3-8b': 128000,
+  'qwen3-14b': 40960,
+  'qwen3-30b-a3b': 40960,
+  'qwen3-32b': 40960,
+  'qwen3-235b-a22b': 40960,
+  // Qwen3 VL (Vision-Language) models
+  'qwen3-vl-8b-thinking': 256000,
+  'qwen3-vl-8b-instruct': 262144,
+  'qwen3-vl-30b-a3b': 262144,
+  'qwen3-vl-235b-a22b': 131072,
+  // Qwen3 specialized models
+  'qwen3-max': 256000,
+  'qwen3-coder': 262144,
+  'qwen3-coder-30b-a3b': 262144,
+  'qwen3-coder-plus': 128000,
+  'qwen3-coder-flash': 128000,
+  'qwen3-next-80b-a3b': 262144,
+};
+
+const ai21Models = {
+  'j2-mid': 8182, // -10 from max
+  'j2-ultra': 8182, // -10 from max
+  'jamba-instruct': 255500, // -500 from max
+};
+
+const amazonModels = {
+  // Amazon Titan models
+  'titan-text-lite': 4000,
+  'titan-text-express': 8000,
+  'titan-text-premier': 31500, // -500 from max
+  // Amazon Nova models
+  // https://aws.amazon.com/ai/generative-ai/nova/
+  'nova-micro': 127000, // -1000 from max
+  'nova-lite': 295000, // -5000 from max
+  'nova-pro': 295000, // -5000 from max
+  'nova-premier': 995000, // -5000 from max
+};
+
+const bedrockModels = {
+  ...anthropicModels,
+  ...mistralModels,
+  ...cohereModels,
+  ...deepseekModels,
+  ...moonshotModels,
+  ...metaModels,
+  ...ai21Models,
+  ...amazonModels,
+};
+
+const xAIModels = {
+  grok: 131072,
+  'grok-beta': 131072,
+  'grok-vision-beta': 8192,
+  'grok-2': 131072,
+  'grok-2-latest': 131072,
+  'grok-2-1212': 131072,
+  'grok-2-vision': 32768,
+  'grok-2-vision-latest': 32768,
+  'grok-2-vision-1212': 32768,
+  'grok-3': 131072,
+  'grok-3-fast': 131072,
+  'grok-3-mini': 131072,
+  'grok-3-mini-fast': 131072,
+  'grok-4': 256000, // 256K context
+  'grok-4-fast': 2000000, // 2M context
+  'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
+  'grok-code-fast': 256000, // 256K context
+};
+
+const aggregateModels = {
+  ...openAIModels,
+  ...googleModels,
+  ...bedrockModels,
+  ...xAIModels,
+  ...qwenModels,
+  // GPT-OSS
+  'gpt-oss': 131000,
+  'gpt-oss:20b': 131000,
+  'gpt-oss-20b': 131000,
+  'gpt-oss:120b': 131000,
+  'gpt-oss-120b': 131000,
+  // GLM models (Zhipu AI)
+  glm4: 128000,
+  'glm-4': 128000,
+  'glm-4-32b': 128000,
+  'glm-4.5': 131000,
+  'glm-4.5-air': 131000,
+  'glm-4.5v': 66000,
+  'glm-4.6': 200000,
+};
+
+export const maxTokensMap = {
+  [EModelEndpoint.azureOpenAI]: openAIModels,
+  [EModelEndpoint.openAI]: aggregateModels,
+  [EModelEndpoint.agents]: aggregateModels,
+  [EModelEndpoint.custom]: aggregateModels,
+  [EModelEndpoint.google]: googleModels,
+  [EModelEndpoint.anthropic]: anthropicModels,
+  [EModelEndpoint.bedrock]: bedrockModels,
+};
+
+export const modelMaxOutputs = {
+  o1: 32268, // -500 from max: 32,768
+  'o1-mini': 65136, // -500 from max: 65,536
+  'o1-preview': 32268, // -500 from max: 32,768
+  'gpt-5': 128000,
+  'gpt-5.1': 128000,
+  'gpt-5.2': 128000,
+  'gpt-5-mini': 128000,
+  'gpt-5-nano': 128000,
+  'gpt-5-pro': 128000,
+  'gpt-oss-20b': 131000,
+  'gpt-oss-120b': 131000,
+  system_default: 32000,
+};
+
+/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */
+const anthropicMaxOutputs = {
+  'claude-3-haiku': 4096,
+  'claude-3-sonnet': 4096,
+  'claude-3-opus': 4096,
+  'claude-haiku-4-5': 64000,
+  'claude-sonnet-4': 64000,
+  'claude-opus-4': 32000,
+  'claude-opus-4-5': 64000,
+  'claude-opus-4-6': 128000,
+  'claude-3.5-sonnet': 8192,
+  'claude-3-5-sonnet': 8192,
+  'claude-3.7-sonnet': 128000,
+  'claude-3-7-sonnet': 128000,
+};
+
+/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */
+const deepseekMaxOutputs = {
+  deepseek: 8000, // deepseek-chat default: 4K, max: 8K
+  'deepseek-chat': 8000,
+  'deepseek-reasoner': 64000, // default: 32K, max: 64K
+  'deepseek-r1': 64000,
+  'deepseek-v3': 8000,
+  'deepseek.r1': 64000,
+};
+
+export const maxOutputTokensMap = {
+  [EModelEndpoint.anthropic]: anthropicMaxOutputs,
+  [EModelEndpoint.azureOpenAI]: modelMaxOutputs,
+  [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
+  [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
+};
+
 /**
  * Finds the first matching pattern in the tokens map.
  * @param {string} modelName
diff --git a/packages/data-provider/specs/tokens.spec.ts b/packages/data-provider/specs/tokens.spec.ts
deleted file mode 100644
index 37eeecbea6..0000000000
--- a/packages/data-provider/specs/tokens.spec.ts
+++ /dev/null
@@ -1,152 +0,0 @@
-import {
-  findMatchingPattern,
-  getModelMaxTokens,
-  getModelMaxOutputTokens,
-  matchModelName,
-  maxTokensMap,
-} from '../src/tokens';
-import { EModelEndpoint } from '../src/schemas';
-
-describe('Token Pattern Matching', () => {
-  describe('findMatchingPattern', () => {
-    const testMap: Record<string, number> = {
-      'claude-': 100000,
-      'claude-3': 200000,
-      'claude-3-opus': 200000,
-      'gpt-4': 8000,
-      'gpt-4-turbo': 128000,
-    };
-
-    it('should match exact model names', () => {
-      expect(findMatchingPattern('claude-3-opus', testMap)).toBe('claude-3-opus');
-      expect(findMatchingPattern('gpt-4-turbo', testMap)).toBe('gpt-4-turbo');
-    });
-
-    it('should match more specific patterns first (reverse order)', () => {
-      // claude-3-opus-20240229 should match 'claude-3-opus' not 'claude-3' or 'claude-'
-      expect(findMatchingPattern('claude-3-opus-20240229', testMap)).toBe('claude-3-opus');
-    });
-
-    it('should fall back to broader patterns when no specific match', () => {
-      // claude-3-haiku should match 'claude-3' (not 'claude-3-opus')
-      expect(findMatchingPattern('claude-3-haiku', testMap)).toBe('claude-3');
-    });
-
-    it('should be case-insensitive', () => {
-      expect(findMatchingPattern('Claude-3-Opus', testMap)).toBe('claude-3-opus');
-      expect(findMatchingPattern('GPT-4-TURBO', testMap)).toBe('gpt-4-turbo');
-    });
-
-    it('should return null for unmatched models', () => {
-      expect(findMatchingPattern('unknown-model', testMap)).toBeNull();
-      expect(findMatchingPattern('llama-2', testMap)).toBeNull();
-    });
-
-    it('should NOT match when pattern appears in middle of model name (startsWith behavior)', () => {
-      // This is the key fix: "my-claude-wrapper" should NOT match "claude-"
-      expect(findMatchingPattern('my-claude-wrapper', testMap)).toBeNull();
-      expect(findMatchingPattern('openai-gpt-4-proxy', testMap)).toBeNull();
-      expect(findMatchingPattern('custom-claude-3-service', testMap)).toBeNull();
-    });
-
-    it('should handle empty string model name', () => {
-      expect(findMatchingPattern('', testMap)).toBeNull();
-    });
-
-    it('should handle empty tokens map', () => {
-      expect(findMatchingPattern('claude-3', {})).toBeNull();
-    });
-  });
-
-  describe('getModelMaxTokens', () => {
-    it('should return exact match tokens', () => {
-      expect(getModelMaxTokens('gpt-4o', EModelEndpoint.openAI)).toBe(127500);
-      expect(getModelMaxTokens('claude-3-opus', EModelEndpoint.anthropic)).toBe(200000);
-    });
-
-    it('should return pattern-matched tokens', () => {
-      // claude-3-opus-20240229 should match claude-3-opus pattern
-      expect(getModelMaxTokens('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(200000);
-    });
-
-    it('should return undefined for unknown models', () => {
-      expect(getModelMaxTokens('completely-unknown-model', EModelEndpoint.openAI)).toBeUndefined();
-    });
-
-    it('should fall back to openAI for unknown endpoints', () => {
-      const result = getModelMaxTokens('gpt-4o', 'unknown-endpoint');
-      expect(result).toBe(127500);
-    });
-
-    it('should handle non-string input gracefully', () => {
-      expect(getModelMaxTokens(null as unknown as string)).toBeUndefined();
-      expect(getModelMaxTokens(undefined as unknown as string)).toBeUndefined();
-      expect(getModelMaxTokens(123 as unknown as string)).toBeUndefined();
-    });
-
-    it('should NOT match model names with pattern in middle', () => {
-      // A model like "my-gpt-4-wrapper" should not match "gpt-4"
-      expect(getModelMaxTokens('my-gpt-4-wrapper', EModelEndpoint.openAI)).toBeUndefined();
-    });
-  });
-
-  describe('getModelMaxOutputTokens', () => {
-    it('should return exact match output tokens', () => {
-      expect(getModelMaxOutputTokens('o1', EModelEndpoint.openAI)).toBe(32268);
-      expect(getModelMaxOutputTokens('claude-3-opus', EModelEndpoint.anthropic)).toBe(4096);
-    });
-
-    it('should return pattern-matched output tokens', () => {
-      expect(getModelMaxOutputTokens('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(
-        4096,
-      );
-    });
-
-    it('should return system_default for unknown models (openAI endpoint)', () => {
-      expect(getModelMaxOutputTokens('unknown-model', EModelEndpoint.openAI)).toBe(32000);
-    });
-
-    it('should handle non-string input gracefully', () => {
-      expect(getModelMaxOutputTokens(null as unknown as string)).toBeUndefined();
-      expect(getModelMaxOutputTokens(undefined as unknown as string)).toBeUndefined();
-    });
-  });
-
-  describe('matchModelName', () => {
-    it('should return exact match model name', () => {
-      expect(matchModelName('gpt-4o', EModelEndpoint.openAI)).toBe('gpt-4o');
-    });
-
-    it('should return pattern key for pattern matches', () => {
-      expect(matchModelName('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(
-        'claude-3-opus',
-      );
-    });
-
-    it('should return input for unknown models', () => {
-      expect(matchModelName('unknown-model', EModelEndpoint.openAI)).toBe('unknown-model');
-    });
-
-    it('should handle non-string input gracefully', () => {
-      expect(matchModelName(null as unknown as string)).toBeUndefined();
-    });
-  });
-
-  describe('maxTokensMap structure', () => {
-    it('should have entries for all major endpoints', () => {
-      expect(maxTokensMap[EModelEndpoint.openAI]).toBeDefined();
-      expect(maxTokensMap[EModelEndpoint.anthropic]).toBeDefined();
-      expect(maxTokensMap[EModelEndpoint.google]).toBeDefined();
-      expect(maxTokensMap[EModelEndpoint.azureOpenAI]).toBeDefined();
-      expect(maxTokensMap[EModelEndpoint.bedrock]).toBeDefined();
-    });
-
-    it('should have positive token values', () => {
-      Object.values(maxTokensMap).forEach((endpointMap) => {
-        Object.entries(endpointMap).forEach(([model, tokens]) => {
-          expect(tokens).toBeGreaterThan(0);
-        });
-      });
-    });
-  });
-});
diff --git a/packages/data-provider/src/index.ts b/packages/data-provider/src/index.ts
index ba21ece55e..c57ca82845 100644
--- a/packages/data-provider/src/index.ts
+++ b/packages/data-provider/src/index.ts
@@ -47,5 +47,3 @@ export { default as createPayload } from './createPayload';
 /* feedback */
 export * from './feedback';
 export * from './parameterSettings';
-/* token limits */
-export * from './tokens';
diff --git a/packages/data-provider/src/tokens.ts b/packages/data-provider/src/tokens.ts
deleted file mode 100644
index 40df709023..0000000000
--- a/packages/data-provider/src/tokens.ts
+++ /dev/null
@@ -1,527 +0,0 @@
-import { EModelEndpoint } from './schemas';
-
-/**
- * Model context window token limits.
- * These values represent the maximum context tokens (input) for each model.
- * Values are slightly reduced from actual max to leave room for output tokens.
- */
-
-const openAIModels: Record<string, number> = {
-  'o4-mini': 200000,
-  'o3-mini': 195000, // -5000 from max
-  o3: 200000,
-  o1: 195000, // -5000 from max
-  'o1-mini': 127500, // -500 from max
-  'o1-preview': 127500, // -500 from max
-  'gpt-4': 8187, // -5 from max
-  'gpt-4-0613': 8187, // -5 from max
-  'gpt-4-32k': 32758, // -10 from max
-  'gpt-4-32k-0314': 32758, // -10 from max
-  'gpt-4-32k-0613': 32758, // -10 from max
-  'gpt-4-1106': 127500, // -500 from max
-  'gpt-4-0125': 127500, // -500 from max
-  'gpt-4.5': 127500, // -500 from max
-  'gpt-4.1': 1047576,
-  'gpt-4.1-mini': 1047576,
-  'gpt-4.1-nano': 1047576,
-  'gpt-5': 400000,
-  'gpt-5-mini': 400000,
-  'gpt-5-nano': 400000,
-  'gpt-5-pro': 400000,
-  'gpt-4o': 127500, // -500 from max
-  'gpt-4o-mini': 127500, // -500 from max
-  'gpt-4o-2024-05-13': 127500, // -500 from max
-  'gpt-4-turbo': 127500, // -500 from max
-  'gpt-4-vision': 127500, // -500 from max
-  'gpt-3.5-turbo': 16375, // -10 from max
-  'gpt-3.5-turbo-0613': 4092, // -5 from max
-  'gpt-3.5-turbo-0301': 4092, // -5 from max
-  'gpt-3.5-turbo-16k': 16375, // -10 from max
-  'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
-  'gpt-3.5-turbo-1106': 16375, // -10 from max
-  'gpt-3.5-turbo-0125': 16375, // -10 from max
-};
-
-const mistralModels: Record<string, number> = {
-  'mistral-': 31990, // -10 from max
-  'mistral-7b': 31990, // -10 from max
-  'mistral-small': 31990, // -10 from max
-  'mixtral-8x7b': 31990, // -10 from max
-  'mixtral-8x22b': 65536,
-  'mistral-large': 131000,
-  'mistral-large-2402': 127500,
-  'mistral-large-2407': 127500,
-  'mistral-nemo': 131000,
-  'pixtral-large': 131000,
-  'mistral-saba': 32000,
-  codestral: 256000,
-  'ministral-8b': 131000,
-  'ministral-3b': 131000,
-};
-
-const cohereModels: Record<string, number> = {
-  'command-light': 4086, // -10 from max
-  'command-light-nightly': 8182, // -10 from max
-  command: 4086, // -10 from max
-  'command-nightly': 8182, // -10 from max
-  'command-text': 4086, // -10 from max
-  'command-r': 127500, // -500 from max
-  'command-r-plus': 127500, // -500 from max
-};
-
-const googleModels: Record<string, number> = {
-  /* Max I/O is combined so we subtract the amount from max response tokens for actual total */
-  gemma: 8196,
-  'gemma-2': 32768,
-  'gemma-3': 32768,
-  'gemma-3-27b': 131072,
-  gemini: 30720, // -2048 from max
-  'gemini-pro-vision': 12288,
-  'gemini-exp': 2000000,
-  'gemini-3': 1000000, // 1M input tokens, 64k output tokens
-  'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
-  'gemini-2.5-pro': 1000000,
-  'gemini-2.5-flash': 1000000,
-  'gemini-2.5-flash-lite': 1000000,
-  'gemini-2.0': 2000000,
-  'gemini-2.0-flash': 1000000,
-  'gemini-2.0-flash-lite': 1000000,
-  'gemini-1.5': 1000000,
-  'gemini-1.5-flash': 1000000,
-  'gemini-1.5-flash-8b': 1000000,
-  'text-bison-32k': 32758, // -10 from max
-  'chat-bison-32k': 32758, // -10 from max
-  'code-bison-32k': 32758, // -10 from max
-  'codechat-bison-32k': 32758,
-  /* Codey, -5 from max: 6144 */
-  'code-': 6139,
-  'codechat-': 6139,
-  /* PaLM2, -5 from max: 8192 */
-  'text-': 8187,
-  'chat-': 8187,
-};
-
-const anthropicModels: Record<string, number> = {
-  'claude-': 100000,
-  'claude-instant': 100000,
-  'claude-2': 100000,
-  'claude-2.1': 200000,
-  'claude-3': 200000,
-  'claude-3-haiku': 200000,
-  'claude-3-sonnet': 200000,
-  'claude-3-opus': 200000,
-  'claude-3.5-haiku': 200000,
-  'claude-3-5-haiku': 200000,
-  'claude-3-5-sonnet': 200000,
-  'claude-3.5-sonnet': 200000,
-  'claude-3-7-sonnet': 200000,
-  'claude-3.7-sonnet': 200000,
-  'claude-3-5-sonnet-latest': 200000,
-  'claude-3.5-sonnet-latest': 200000,
-  'claude-haiku-4-5': 200000,
-  'claude-sonnet-4': 1000000,
-  'claude-4': 200000,
-  'claude-opus-4': 200000,
-  'claude-opus-4-5': 200000,
-};
-
-const deepseekModels: Record<string, number> = {
-  deepseek: 128000,
-  'deepseek-chat': 128000,
-  'deepseek-reasoner': 128000,
-  'deepseek-r1': 128000,
-  'deepseek-v3': 128000,
-  'deepseek.r1': 128000,
-};
-
-const metaModels: Record<string, number> = {
-  // Basic patterns
-  llama3: 8000,
-  llama2: 4000,
-  'llama-3': 8000,
-  'llama-2': 4000,
-
-  // llama3.x pattern
-  'llama3.1': 127500,
-  'llama3.2': 127500,
-  'llama3.3': 127500,
-
-  // llama3-x pattern
-  'llama3-1': 127500,
-  'llama3-2': 127500,
-  'llama3-3': 127500,
-
-  // llama-3.x pattern
-  'llama-3.1': 127500,
-  'llama-3.2': 127500,
-  'llama-3.3': 127500,
-
-  // llama3.x:Nb pattern
-  'llama3.1:405b': 127500,
-  'llama3.1:70b': 127500,
-  'llama3.1:8b': 127500,
-  'llama3.2:1b': 127500,
-  'llama3.2:3b': 127500,
-  'llama3.2:11b': 127500,
-  'llama3.2:90b': 127500,
-  'llama3.3:70b': 127500,
-
-  // llama3-x-Nb pattern
-  'llama3-1-405b': 127500,
-  'llama3-1-70b': 127500,
-  'llama3-1-8b': 127500,
-  'llama3-2-1b': 127500,
-  'llama3-2-3b': 127500,
-  'llama3-2-11b': 127500,
-  'llama3-2-90b': 127500,
-  'llama3-3-70b': 127500,
-
-  // llama-3.x-Nb pattern
-  'llama-3.1-405b': 127500,
-  'llama-3.1-70b': 127500,
-  'llama-3.1-8b': 127500,
-  'llama-3.2-1b': 127500,
-  'llama-3.2-3b': 127500,
-  'llama-3.2-11b': 127500,
-  'llama-3.2-90b': 127500,
-  'llama-3.3-70b': 127500,
-
-  // Original llama2/3 patterns
-  'llama3-70b': 8000,
-  'llama3-8b': 8000,
-  'llama2-70b': 4000,
-  'llama2-13b': 4000,
-  'llama3:70b': 8000,
-  'llama3:8b': 8000,
-  'llama2:70b': 4000,
-};
-
-const qwenModels: Record<string, number> = {
-  qwen: 32000,
-  'qwen2.5': 32000,
-  'qwen-turbo': 1000000,
-  'qwen-plus': 131000,
-  'qwen-max': 32000,
-  'qwq-32b': 32000,
-  // Qwen3 models
-  qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
-  'qwen3-8b': 128000,
-  'qwen3-14b': 40960,
-  'qwen3-30b-a3b': 40960,
-  'qwen3-32b': 40960,
-  'qwen3-235b-a22b': 40960,
-  // Qwen3 VL (Vision-Language) models
-  'qwen3-vl-8b-thinking': 256000,
-  'qwen3-vl-8b-instruct': 262144,
-  'qwen3-vl-30b-a3b': 262144,
-  'qwen3-vl-235b-a22b': 131072,
-  // Qwen3 specialized models
-  'qwen3-max': 256000,
-  'qwen3-coder': 262144,
-  'qwen3-coder-30b-a3b': 262144,
-  'qwen3-coder-plus': 128000,
-  'qwen3-coder-flash': 128000,
-  'qwen3-next-80b-a3b': 262144,
-};
-
-const ai21Models: Record<string, number> = {
-  'j2-mid': 8182, // -10 from max
-  'j2-ultra': 8182, // -10 from max
-  'jamba-instruct': 255500, // -500 from max
-};
-
-const amazonModels: Record<string, number> = {
-  // Amazon Titan models
-  'titan-text-lite': 4000,
-  'titan-text-express': 8000,
-  'titan-text-premier': 31500, // -500 from max
-  // Amazon Nova models
-  // https://aws.amazon.com/ai/generative-ai/nova/
-  'nova-micro': 127000, // -1000 from max
-  'nova-lite': 295000, // -5000 from max
-  'nova-pro': 295000, // -5000 from max
-  'nova-premier': 995000, // -5000 from max
-};
-
-const bedrockModels: Record<string, number> = {
-  ...anthropicModels,
-  ...mistralModels,
-  ...cohereModels,
-  ...deepseekModels,
-  ...metaModels,
-  ...ai21Models,
-  ...amazonModels,
-};
-
-const xAIModels: Record<string, number> = {
-  grok: 131072,
-  'grok-beta': 131072,
-  'grok-vision-beta': 8192,
-  'grok-2': 131072,
-  'grok-2-latest': 131072,
-  'grok-2-1212': 131072,
-  'grok-2-vision': 32768,
-  'grok-2-vision-latest': 32768,
-  'grok-2-vision-1212': 32768,
-  'grok-3': 131072,
-  'grok-3-fast': 131072,
-  'grok-3-mini': 131072,
-  'grok-3-mini-fast': 131072,
-  'grok-4': 256000, // 256K context
-  'grok-4-fast': 2000000, // 2M context
-  'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
-  'grok-code-fast': 256000, // 256K context
-};
-
-const aggregateModels: Record<string, number> = {
-  ...openAIModels,
-  ...googleModels,
-  ...bedrockModels,
-  ...xAIModels,
-  ...qwenModels,
-  // misc.
-  kimi: 131000,
-  // GPT-OSS
-  'gpt-oss': 131000,
-  'gpt-oss:20b': 131000,
-  'gpt-oss-20b': 131000,
-  'gpt-oss:120b': 131000,
-  'gpt-oss-120b': 131000,
-  // GLM models (Zhipu AI)
-  glm4: 128000,
-  'glm-4': 128000,
-  'glm-4-32b': 128000,
-  'glm-4.5': 131000,
-  'glm-4.5-air': 131000,
-  'glm-4.5v': 66000,
-  'glm-4.6': 200000,
-};
-
-/**
- * Map of endpoint to model context token limits.
- */
-export const maxTokensMap: Record<string, Record<string, number>> = {
-  [EModelEndpoint.azureOpenAI]: openAIModels,
-  [EModelEndpoint.openAI]: aggregateModels,
-  [EModelEndpoint.agents]: aggregateModels,
-  [EModelEndpoint.custom]: aggregateModels,
-  [EModelEndpoint.google]: googleModels,
-  [EModelEndpoint.anthropic]: anthropicModels,
-  [EModelEndpoint.bedrock]: bedrockModels,
-};
-
-/**
- * Finds the most specific matching pattern in the tokens map.
- * Matches the longest key that the model name starts with.
- *
- * @param modelName - The model name to match against patterns.
- * @param tokensMap - Map of model patterns to token limits.
- * @returns The matched pattern key or null if no match found.
- */
-export function findMatchingPattern(
-  modelName: string,
-  tokensMap: Record<string, number>,
-): string | null {
-  const lowerModelName = modelName.toLowerCase();
-  // Sort keys by length descending to match most specific (longest) pattern first
-  const keys = Object.keys(tokensMap).sort((a, b) => b.length - a.length);
-  for (const modelKey of keys) {
-    if (lowerModelName.startsWith(modelKey.toLowerCase())) {
-      return modelKey;
-    }
-  }
-  return null;
-}
-
-/**
- * Retrieves the maximum context tokens for a given model name.
- *
- * @param modelName - The name of the model to look up.
- * @param endpoint - The endpoint (default is 'openAI').
- * @returns The maximum context tokens for the given model or undefined if no match is found.
- *
- * @example
- * getModelMaxTokens('gpt-4o'); // Returns 127500
- * getModelMaxTokens('claude-3-opus', 'anthropic'); // Returns 200000
- * getModelMaxTokens('unknown-model'); // Returns undefined
- */
-export function getModelMaxTokens(
-  modelName: string,
-  endpoint: string = EModelEndpoint.openAI,
-): number | undefined {
-  if (typeof modelName !== 'string') {
-    return undefined;
-  }
-
-  const tokensMap = maxTokensMap[endpoint];
-  if (!tokensMap) {
-    // Fall back to aggregate models for unknown endpoints
-    return getModelMaxTokens(modelName, EModelEndpoint.openAI);
-  }
-
-  // Try exact match first
-  if (tokensMap[modelName] !== undefined) {
-    return tokensMap[modelName];
-  }
-
-  // Try pattern matching
-  const matchedPattern = findMatchingPattern(modelName, tokensMap);
-  if (matchedPattern) {
-    return tokensMap[matchedPattern];
-  }
-
-  return undefined;
-}
-
-/**
- * Retrieves the model name key for a given model name input.
- * If the exact model name isn't found, it searches for partial matches.
- *
- * @param modelName - The name of the model to look up.
- * @param endpoint - The endpoint (default is 'openAI').
- * @returns The model name key for the given model; returns input if no match is found.
- */
-export function matchModelName(
-  modelName: string,
-  endpoint: string = EModelEndpoint.openAI,
-): string | undefined {
-  if (typeof modelName !== 'string') {
-    return undefined;
-  }
-
-  const tokensMap = maxTokensMap[endpoint];
-  if (!tokensMap) {
-    return modelName;
-  }
-
-  if (tokensMap[modelName] !== undefined) {
-    return modelName;
-  }
-
-  const matchedPattern = findMatchingPattern(modelName, tokensMap);
-  return matchedPattern || modelName;
-}
-
-// Individual model maps are available for advanced use cases
-// but not re-exported to avoid conflicts with config.ts
-
-// =============================================================================
-// OUTPUT TOKEN LIMITS
-// =============================================================================
-
-/**
- * Maximum output tokens for OpenAI and similar models.
- * Values from official documentation, slightly reduced to leave safety margin.
- */
-const modelMaxOutputs: Record<string, number> = {
-  o1: 32268, // -500 from max: 32,768
-  'o1-mini': 65136, // -500 from max: 65,536
-  'o1-preview': 32268, // -500 from max: 32,768
-  'gpt-5': 128000,
-  'gpt-5-mini': 128000,
-  'gpt-5-nano': 128000,
-  'gpt-5-pro': 128000,
-  'gpt-oss-20b': 131000,
-  'gpt-oss-120b': 131000,
-  system_default: 32000,
-};
-
-/**
- * Maximum output tokens for Anthropic Claude models.
- * Values from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names
- */
-const anthropicMaxOutputs: Record<string, number> = {
-  'claude-3-haiku': 4096,
-  'claude-3-sonnet': 4096,
-  'claude-3-opus': 4096,
-  'claude-haiku-4-5': 64000,
-  'claude-sonnet-4': 64000,
-  'claude-opus-4': 32000,
-  'claude-opus-4-5': 64000,
-  'claude-3.5-sonnet': 8192,
-  'claude-3-5-sonnet': 8192,
-  'claude-3.7-sonnet': 128000,
-  'claude-3-7-sonnet': 128000,
-};
-
-/**
- * Maximum output tokens for DeepSeek models.
- * Values from https://api-docs.deepseek.com/quick_start/pricing
- */
-const deepseekMaxOutputs: Record<string, number> = {
-  deepseek: 8000, // deepseek-chat default: 4K, max: 8K
-  'deepseek-chat': 8000,
-  'deepseek-reasoner': 64000, // default: 32K, max: 64K
-  'deepseek-r1': 64000,
-  'deepseek-v3': 8000,
-  'deepseek.r1': 64000,
-};
-
-/**
- * Map of endpoint to model max output token limits.
- */
-export const maxOutputTokensMap: Record<string, Record<string, number>> = {
-  [EModelEndpoint.anthropic]: anthropicMaxOutputs,
-  [EModelEndpoint.azureOpenAI]: modelMaxOutputs,
-  [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
-  [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
-};
-
-/**
- * Retrieves the maximum output tokens for a given model name.
- *
- * @param modelName - The name of the model to look up.
- * @param endpoint - The endpoint (default is 'openAI').
- * @returns The maximum output tokens for the given model or undefined if no match is found.
- *
- * @example
- * getModelMaxOutputTokens('o1'); // Returns 32268
- * getModelMaxOutputTokens('claude-3-opus', 'anthropic'); // Returns 4096
- * getModelMaxOutputTokens('unknown-model'); // Returns 32000 (system_default)
- */
-export function getModelMaxOutputTokens(
-  modelName: string,
-  endpoint: string = EModelEndpoint.openAI,
-): number | undefined {
-  if (typeof modelName !== 'string') {
-    return undefined;
-  }
-
-  const tokensMap = maxOutputTokensMap[endpoint];
-  if (!tokensMap) {
-    // Fall back to openAI for unknown endpoints
-    return getModelMaxOutputTokens(modelName, EModelEndpoint.openAI);
-  }
-
-  // Try exact match first
-  if (tokensMap[modelName] !== undefined) {
-    return tokensMap[modelName];
-  }
-
-  // Try pattern matching
-  const matchedPattern = findMatchingPattern(modelName, tokensMap);
-  if (matchedPattern) {
-    return tokensMap[matchedPattern];
-  }
-
-  // Return system_default if available
-  return tokensMap.system_default;
-}
-
-// =============================================================================
-// TOKEN DEFAULTS
-// =============================================================================
-
-/**
- * Centralized token-related default values.
- */
-export const TOKEN_DEFAULTS = {
-  /** Fallback context window for agents when model lookup fails */
-  AGENT_CONTEXT_FALLBACK: 18000,
-  /** Legacy fallback for older clients */
-  LEGACY_CONTEXT_FALLBACK: 4097,
-  /** Safety margin multiplier (0.9 = reserve 10% for response) */
-  CONTEXT_SAFETY_MARGIN: 0.9,
-  /** Default max output tokens when not specified */
-  DEFAULT_MAX_OUTPUT: 32000,
-} as const;