refactor: remove type re-exports from @librechat/api tokens

Update all imports of TokenConfig and EndpointTokenConfig to import directly from librechat-data-provider instead of re-exporting through packages/api/src/types/tokens.ts. Remove the now-unnecessary re-export file and its barrel export.
2026-02-11 12:04:24 +01:00 · 2026-02-08 17:36:44 +01:00 · 2026-02-08 17:36:44 +01:00 · 9ff227a5b2
commit 9ff227a5b2
parent 3e9ddbf073
12 changed files with 584 additions and 592 deletions
--- a/packages/api/src/agents/initialize.ts
+++ b/packages/api/src/agents/initialize.ts
@ -8,6 +8,7 @@ import {
  isAgentsEndpoint,
  replaceSpecialVars,
  providerEndpointMap,
+  getModelMaxTokens,
 } from 'librechat-data-provider';
 import type {
  AgentToolResources,
@ -21,12 +22,7 @@ import type { GenericTool, LCToolRegistry, ToolMap, LCTool } from '@librechat/ag
 import type { Response as ServerResponse } from 'express';
 import type { IMongoFile } from '@librechat/data-schemas';
 import type { InitializeResultBase, ServerRequest, EndpointDbMethods } from '~/types';
-import {
-  optionalChainWithEmptyCheck,
-  extractLibreChatParams,
-  getModelMaxTokens,
-  getThreadData,
-} from '~/utils';
+import { optionalChainWithEmptyCheck, extractLibreChatParams, getThreadData } from '~/utils';
 import { filterFilesByEndpointConfig } from '~/files';
 import { generateArtifactsPrompt } from '~/prompts';
 import { getProviderConfig } from '~/endpoints';
--- a/packages/api/src/agents/usage.ts
+++ b/packages/api/src/agents/usage.ts
@ -1,7 +1,7 @@
 import { logger } from '@librechat/data-schemas';
 import type { TCustomConfig, TTransactionsConfig } from 'librechat-data-provider';
 import type { UsageMetadata } from '../stream/interfaces/IJobStore';
-import type { EndpointTokenConfig } from '../types/tokens';
+import type { EndpointTokenConfig } from 'librechat-data-provider';

 interface TokenUsage {
  promptTokens?: number;
--- a/packages/api/src/endpoints/anthropic/helpers.ts
+++ b/packages/api/src/endpoints/anthropic/helpers.ts
@ -6,8 +6,8 @@ import {
  anthropicSettings,
  supportsContext1m,
  supportsAdaptiveThinking,
+  matchModelName,
 } from 'librechat-data-provider';
-import { matchModelName } from '~/utils/tokens';

 /**
 * @param {string} modelName
--- a/packages/api/src/endpoints/custom/initialize.ts
+++ b/packages/api/src/endpoints/custom/initialize.ts
@ -5,9 +5,9 @@ import {
  FetchTokenConfig,
  extractEnvVariable,
 } from 'librechat-data-provider';
-import type { TEndpoint } from 'librechat-data-provider';
+import type { TEndpoint, EndpointTokenConfig } from 'librechat-data-provider';
 import type { AppConfig } from '@librechat/data-schemas';
-import type { BaseInitializeParams, InitializeResultBase, EndpointTokenConfig } from '~/types';
+import type { BaseInitializeParams, InitializeResultBase } from '~/types';
 import { getOpenAIConfig } from '~/endpoints/openai/config';
 import { getCustomEndpointConfig } from '~/app/config';
 import { fetchModels } from '~/endpoints/models';
--- a/packages/api/src/types/endpoints.ts
+++ b/packages/api/src/types/endpoints.ts
@ -1,6 +1,7 @@
 import type { ClientOptions, OpenAIClientOptions } from '@librechat/agents';
 import type { TConfig } from 'librechat-data-provider';
-import type { EndpointTokenConfig, ServerRequest } from '~/types';
+import type { EndpointTokenConfig } from 'librechat-data-provider';
+import type { ServerRequest } from '~/types';

 export type TCustomEndpointsConfig = Partial<{ [key: string]: Omit<TConfig, 'order'> }>;

--- a/packages/api/src/types/index.ts
+++ b/packages/api/src/types/index.ts
@ -12,5 +12,4 @@ export * from './mistral';
 export type * from './openai';
 export * from './prompts';
 export * from './run';
-export * from './tokens';
 export * from './stream';
--- a/packages/api/src/types/tokens.ts
+++ b/packages/api/src/types/tokens.ts
@ -1,17 +0,0 @@
-/** Configuration object mapping model keys to their respective prompt, completion rates, and context limit
- *
- * Note: the [key: string]: unknown is not in the original JSDoc typedef in /api/typedefs.js, but I've included it since
- * getModelMaxOutputTokens calls getModelTokenValue with a key of 'output', which was not in the original JSDoc typedef,
- * but would be referenced in a TokenConfig in the if(matchedPattern) portion of getModelTokenValue.
- * So in order to preserve functionality for that case and any others which might reference an additional key I'm unaware of,
- * I've included it here until the interface can be typed more tightly.
- */
-export interface TokenConfig {
-  prompt: number;
-  completion: number;
-  context: number;
-  [key: string]: unknown;
-}
-
-/** An endpoint's config object mapping model keys to their respective prompt, completion rates, and context limit */
-export type EndpointTokenConfig = Record<string, TokenConfig>;
--- a/packages/api/src/utils/tokens.ts
+++ b/packages/api/src/utils/tokens.ts
@ -1,558 +1,5 @@
 import z from 'zod';
-import { EModelEndpoint } from 'librechat-data-provider';
-import type { EndpointTokenConfig, TokenConfig } from '~/types';
-
-/**
- * Model Token Configuration Maps
- *
- * IMPORTANT: Key Ordering for Pattern Matching
- * ============================================
- * The `findMatchingPattern` function iterates through object keys in REVERSE order
- * (last-defined keys are checked first) and uses `modelName.includes(key)` for matching.
- *
- * This means:
- * 1. BASE PATTERNS must be defined FIRST (e.g., "kimi", "moonshot")
- * 2. SPECIFIC PATTERNS must be defined AFTER their base patterns (e.g., "kimi-k2", "kimi-k2.5")
- *
- * Example ordering for Kimi models:
- *   kimi: 262144,           // Base pattern - checked last
- *   'kimi-k2': 262144,      // More specific - checked before "kimi"
- *   'kimi-k2.5': 262144,    // Most specific - checked first
- *
- * Why this matters:
- * - Model name "kimi-k2.5" contains both "kimi" and "kimi-k2" as substrings
- * - If "kimi" were checked first, it would incorrectly match "kimi-k2.5"
- * - By defining specific patterns AFTER base patterns, they're checked first in reverse iteration
- *
- * When adding new model families:
- * 1. Define the base/generic pattern first
- * 2. Define increasingly specific patterns after
- * 3. Ensure no pattern is a substring of another that should match differently
- */
-
-const openAIModels = {
-  'o4-mini': 200000,
-  'o3-mini': 195000, // -5000 from max
-  o3: 200000,
-  o1: 195000, // -5000 from max
-  'o1-mini': 127500, // -500 from max
-  'o1-preview': 127500, // -500 from max
-  'gpt-4': 8187, // -5 from max
-  'gpt-4-0613': 8187, // -5 from max
-  'gpt-4-32k': 32758, // -10 from max
-  'gpt-4-32k-0314': 32758, // -10 from max
-  'gpt-4-32k-0613': 32758, // -10 from max
-  'gpt-4-1106': 127500, // -500 from max
-  'gpt-4-0125': 127500, // -500 from max
-  'gpt-4.5': 127500, // -500 from max
-  'gpt-4.1': 1047576,
-  'gpt-4.1-mini': 1047576,
-  'gpt-4.1-nano': 1047576,
-  'gpt-5': 400000,
-  'gpt-5.1': 400000,
-  'gpt-5.2': 400000,
-  'gpt-5-mini': 400000,
-  'gpt-5-nano': 400000,
-  'gpt-5-pro': 400000,
-  'gpt-4o': 127500, // -500 from max
-  'gpt-4o-mini': 127500, // -500 from max
-  'gpt-4o-2024-05-13': 127500, // -500 from max
-  'gpt-4-turbo': 127500, // -500 from max
-  'gpt-4-vision': 127500, // -500 from max
-  'gpt-3.5-turbo': 16375, // -10 from max
-  'gpt-3.5-turbo-0613': 4092, // -5 from max
-  'gpt-3.5-turbo-0301': 4092, // -5 from max
-  'gpt-3.5-turbo-16k': 16375, // -10 from max
-  'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
-  'gpt-3.5-turbo-1106': 16375, // -10 from max
-  'gpt-3.5-turbo-0125': 16375, // -10 from max
-};
-
-const mistralModels = {
-  'mistral-': 31990, // -10 from max
-  'mistral-7b': 31990, // -10 from max
-  'mistral-small': 31990, // -10 from max
-  'mixtral-8x7b': 31990, // -10 from max
-  'mixtral-8x22b': 65536,
-  'mistral-large': 131000,
-  'mistral-large-2402': 127500,
-  'mistral-large-2407': 127500,
-  'mistral-nemo': 131000,
-  'pixtral-large': 131000,
-  'mistral-saba': 32000,
-  codestral: 256000,
-  'ministral-8b': 131000,
-  'ministral-3b': 131000,
-};
-
-const cohereModels = {
-  'command-light': 4086, // -10 from max
-  'command-light-nightly': 8182, // -10 from max
-  command: 4086, // -10 from max
-  'command-nightly': 8182, // -10 from max
-  'command-text': 4086, // -10 from max
-  'command-r': 127500, // -500 from max
-  'command-r-plus': 127500, // -500 from max
-};
-
-const googleModels = {
-  /* Max I/O is combined so we subtract the amount from max response tokens for actual total */
-  gemma: 8196,
-  'gemma-2': 32768,
-  'gemma-3': 32768,
-  'gemma-3-27b': 131072,
-  gemini: 30720, // -2048 from max
-  'gemini-pro-vision': 12288,
-  'gemini-exp': 2000000,
-  'gemini-3': 1000000, // 1M input tokens, 64k output tokens
-  'gemini-3-pro-image': 1000000,
-  'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
-  'gemini-2.5-pro': 1000000,
-  'gemini-2.5-flash': 1000000,
-  'gemini-2.5-flash-image': 1000000,
-  'gemini-2.5-flash-lite': 1000000,
-  'gemini-2.0': 2000000,
-  'gemini-2.0-flash': 1000000,
-  'gemini-2.0-flash-lite': 1000000,
-  'gemini-1.5': 1000000,
-  'gemini-1.5-flash': 1000000,
-  'gemini-1.5-flash-8b': 1000000,
-  'text-bison-32k': 32758, // -10 from max
-  'chat-bison-32k': 32758, // -10 from max
-  'code-bison-32k': 32758, // -10 from max
-  'codechat-bison-32k': 32758,
-  /* Codey, -5 from max: 6144 */
-  'code-': 6139,
-  'codechat-': 6139,
-  /* PaLM2, -5 from max: 8192 */
-  'text-': 8187,
-  'chat-': 8187,
-};
-
-const anthropicModels = {
-  'claude-': 100000,
-  'claude-instant': 100000,
-  'claude-2': 100000,
-  'claude-2.1': 200000,
-  'claude-3': 200000,
-  'claude-3-haiku': 200000,
-  'claude-3-sonnet': 200000,
-  'claude-3-opus': 200000,
-  'claude-3.5-haiku': 200000,
-  'claude-3-5-haiku': 200000,
-  'claude-3-5-sonnet': 200000,
-  'claude-3.5-sonnet': 200000,
-  'claude-3-7-sonnet': 200000,
-  'claude-3.7-sonnet': 200000,
-  'claude-3-5-sonnet-latest': 200000,
-  'claude-3.5-sonnet-latest': 200000,
-  'claude-haiku-4-5': 200000,
-  'claude-sonnet-4': 1000000,
-  'claude-4': 200000,
-  'claude-opus-4': 200000,
-  'claude-opus-4-5': 200000,
-  'claude-opus-4-6': 1000000,
-};
-
-const deepseekModels = {
-  deepseek: 128000,
-  'deepseek-chat': 128000,
-  'deepseek-reasoner': 128000,
-  'deepseek-r1': 128000,
-  'deepseek-v3': 128000,
-  'deepseek.r1': 128000,
-};
-
-const moonshotModels = {
-  // Base patterns (check last due to reverse iteration)
-  kimi: 262144,
-  moonshot: 131072,
-  // kimi-k2 series (specific patterns)
-  'kimi-latest': 128000,
-  'kimi-k2': 262144,
-  'kimi-k2.5': 262144,
-  'kimi-k2-turbo': 262144,
-  'kimi-k2-turbo-preview': 262144,
-  'kimi-k2-0905': 262144,
-  'kimi-k2-0905-preview': 262144,
-  'kimi-k2-0711': 131072,
-  'kimi-k2-0711-preview': 131072,
-  'kimi-k2-thinking': 262144,
-  'kimi-k2-thinking-turbo': 262144,
-  // moonshot-v1 series (specific patterns)
-  'moonshot-v1': 131072,
-  'moonshot-v1-auto': 131072,
-  'moonshot-v1-8k': 8192,
-  'moonshot-v1-8k-vision': 8192,
-  'moonshot-v1-8k-vision-preview': 8192,
-  'moonshot-v1-32k': 32768,
-  'moonshot-v1-32k-vision': 32768,
-  'moonshot-v1-32k-vision-preview': 32768,
-  'moonshot-v1-128k': 131072,
-  'moonshot-v1-128k-vision': 131072,
-  'moonshot-v1-128k-vision-preview': 131072,
-  // Bedrock moonshot models
-  'moonshot.kimi': 262144,
-  'moonshot.kimi-k2': 262144,
-  'moonshot.kimi-k2.5': 262144,
-  'moonshot.kimi-k2-thinking': 262144,
-  'moonshot.kimi-k2-0711': 131072,
-};
-
-const metaModels = {
-  // Basic patterns
-  llama3: 8000,
-  llama2: 4000,
-  'llama-3': 8000,
-  'llama-2': 4000,
-
-  // llama3.x pattern
-  'llama3.1': 127500,
-  'llama3.2': 127500,
-  'llama3.3': 127500,
-
-  // llama3-x pattern
-  'llama3-1': 127500,
-  'llama3-2': 127500,
-  'llama3-3': 127500,
-
-  // llama-3.x pattern
-  'llama-3.1': 127500,
-  'llama-3.2': 127500,
-  'llama-3.3': 127500,
-
-  // llama3.x:Nb pattern
-  'llama3.1:405b': 127500,
-  'llama3.1:70b': 127500,
-  'llama3.1:8b': 127500,
-  'llama3.2:1b': 127500,
-  'llama3.2:3b': 127500,
-  'llama3.2:11b': 127500,
-  'llama3.2:90b': 127500,
-  'llama3.3:70b': 127500,
-
-  // llama3-x-Nb pattern
-  'llama3-1-405b': 127500,
-  'llama3-1-70b': 127500,
-  'llama3-1-8b': 127500,
-  'llama3-2-1b': 127500,
-  'llama3-2-3b': 127500,
-  'llama3-2-11b': 127500,
-  'llama3-2-90b': 127500,
-  'llama3-3-70b': 127500,
-
-  // llama-3.x-Nb pattern
-  'llama-3.1-405b': 127500,
-  'llama-3.1-70b': 127500,
-  'llama-3.1-8b': 127500,
-  'llama-3.2-1b': 127500,
-  'llama-3.2-3b': 127500,
-  'llama-3.2-11b': 127500,
-  'llama-3.2-90b': 127500,
-  'llama-3.3-70b': 127500,
-
-  // Original llama2/3 patterns
-  'llama3-70b': 8000,
-  'llama3-8b': 8000,
-  'llama2-70b': 4000,
-  'llama2-13b': 4000,
-  'llama3:70b': 8000,
-  'llama3:8b': 8000,
-  'llama2:70b': 4000,
-};
-
-const qwenModels = {
-  qwen: 32000,
-  'qwen2.5': 32000,
-  'qwen-turbo': 1000000,
-  'qwen-plus': 131000,
-  'qwen-max': 32000,
-  'qwq-32b': 32000,
-  // Qwen3 models
-  qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
-  'qwen3-8b': 128000,
-  'qwen3-14b': 40960,
-  'qwen3-30b-a3b': 40960,
-  'qwen3-32b': 40960,
-  'qwen3-235b-a22b': 40960,
-  // Qwen3 VL (Vision-Language) models
-  'qwen3-vl-8b-thinking': 256000,
-  'qwen3-vl-8b-instruct': 262144,
-  'qwen3-vl-30b-a3b': 262144,
-  'qwen3-vl-235b-a22b': 131072,
-  // Qwen3 specialized models
-  'qwen3-max': 256000,
-  'qwen3-coder': 262144,
-  'qwen3-coder-30b-a3b': 262144,
-  'qwen3-coder-plus': 128000,
-  'qwen3-coder-flash': 128000,
-  'qwen3-next-80b-a3b': 262144,
-};
-
-const ai21Models = {
-  'j2-mid': 8182, // -10 from max
-  'j2-ultra': 8182, // -10 from max
-  'jamba-instruct': 255500, // -500 from max
-};
-
-const amazonModels = {
-  // Amazon Titan models
-  'titan-text-lite': 4000,
-  'titan-text-express': 8000,
-  'titan-text-premier': 31500, // -500 from max
-  // Amazon Nova models
-  // https://aws.amazon.com/ai/generative-ai/nova/
-  'nova-micro': 127000, // -1000 from max
-  'nova-lite': 295000, // -5000 from max
-  'nova-pro': 295000, // -5000 from max
-  'nova-premier': 995000, // -5000 from max
-};
-
-const bedrockModels = {
-  ...anthropicModels,
-  ...mistralModels,
-  ...cohereModels,
-  ...deepseekModels,
-  ...moonshotModels,
-  ...metaModels,
-  ...ai21Models,
-  ...amazonModels,
-};
-
-const xAIModels = {
-  grok: 131072,
-  'grok-beta': 131072,
-  'grok-vision-beta': 8192,
-  'grok-2': 131072,
-  'grok-2-latest': 131072,
-  'grok-2-1212': 131072,
-  'grok-2-vision': 32768,
-  'grok-2-vision-latest': 32768,
-  'grok-2-vision-1212': 32768,
-  'grok-3': 131072,
-  'grok-3-fast': 131072,
-  'grok-3-mini': 131072,
-  'grok-3-mini-fast': 131072,
-  'grok-4': 256000, // 256K context
-  'grok-4-fast': 2000000, // 2M context
-  'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
-  'grok-code-fast': 256000, // 256K context
-};
-
-const aggregateModels = {
-  ...openAIModels,
-  ...googleModels,
-  ...bedrockModels,
-  ...xAIModels,
-  ...qwenModels,
-  // GPT-OSS
-  'gpt-oss': 131000,
-  'gpt-oss:20b': 131000,
-  'gpt-oss-20b': 131000,
-  'gpt-oss:120b': 131000,
-  'gpt-oss-120b': 131000,
-  // GLM models (Zhipu AI)
-  glm4: 128000,
-  'glm-4': 128000,
-  'glm-4-32b': 128000,
-  'glm-4.5': 131000,
-  'glm-4.5-air': 131000,
-  'glm-4.5v': 66000,
-  'glm-4.6': 200000,
-};
-
-export const maxTokensMap = {
-  [EModelEndpoint.azureOpenAI]: openAIModels,
-  [EModelEndpoint.openAI]: aggregateModels,
-  [EModelEndpoint.agents]: aggregateModels,
-  [EModelEndpoint.custom]: aggregateModels,
-  [EModelEndpoint.google]: googleModels,
-  [EModelEndpoint.anthropic]: anthropicModels,
-  [EModelEndpoint.bedrock]: bedrockModels,
-};
-
-export const modelMaxOutputs = {
-  o1: 32268, // -500 from max: 32,768
-  'o1-mini': 65136, // -500 from max: 65,536
-  'o1-preview': 32268, // -500 from max: 32,768
-  'gpt-5': 128000,
-  'gpt-5.1': 128000,
-  'gpt-5.2': 128000,
-  'gpt-5-mini': 128000,
-  'gpt-5-nano': 128000,
-  'gpt-5-pro': 128000,
-  'gpt-oss-20b': 131000,
-  'gpt-oss-120b': 131000,
-  system_default: 32000,
-};
-
-/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */
-const anthropicMaxOutputs = {
-  'claude-3-haiku': 4096,
-  'claude-3-sonnet': 4096,
-  'claude-3-opus': 4096,
-  'claude-haiku-4-5': 64000,
-  'claude-sonnet-4': 64000,
-  'claude-opus-4': 32000,
-  'claude-opus-4-5': 64000,
-  'claude-opus-4-6': 128000,
-  'claude-3.5-sonnet': 8192,
-  'claude-3-5-sonnet': 8192,
-  'claude-3.7-sonnet': 128000,
-  'claude-3-7-sonnet': 128000,
-};
-
-/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */
-const deepseekMaxOutputs = {
-  deepseek: 8000, // deepseek-chat default: 4K, max: 8K
-  'deepseek-chat': 8000,
-  'deepseek-reasoner': 64000, // default: 32K, max: 64K
-  'deepseek-r1': 64000,
-  'deepseek-v3': 8000,
-  'deepseek.r1': 64000,
-};
-
-export const maxOutputTokensMap = {
-  [EModelEndpoint.anthropic]: anthropicMaxOutputs,
-  [EModelEndpoint.azureOpenAI]: modelMaxOutputs,
-  [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
-  [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
-};
-
-/**
- * Finds the first matching pattern in the tokens map.
- * @param {string} modelName
- * @param {Record<string, number> | EndpointTokenConfig} tokensMap
- * @returns {string|null}
- */
-export function findMatchingPattern(
-  modelName: string,
-  tokensMap: Record<string, number> | EndpointTokenConfig,
-): string | null {
-  const keys = Object.keys(tokensMap);
-  const lowerModelName = modelName.toLowerCase();
-  for (let i = keys.length - 1; i >= 0; i--) {
-    const modelKey = keys[i];
-    if (lowerModelName.includes(modelKey)) {
-      return modelKey;
-    }
-  }
-
-  return null;
-}
-
-/**
- * Retrieves a token value for a given model name from a tokens map.
- *
- * @param modelName - The name of the model to look up.
- * @param tokensMap - The map of model names to token values.
- * @param [key='context'] - The key to look up in the tokens map.
- * @returns The token value for the given model or undefined if no match is found.
- */
-export function getModelTokenValue(
-  modelName: string,
-  tokensMap?: EndpointTokenConfig | Record<string, number>,
-  key = 'context' as keyof TokenConfig,
-): number | undefined {
-  if (typeof modelName !== 'string' || !tokensMap) {
-    return undefined;
-  }
-
-  const value = tokensMap[modelName];
-  if (typeof value === 'number') {
-    return value;
-  }
-
-  if (value?.context) {
-    return value.context;
-  }
-
-  const matchedPattern = findMatchingPattern(modelName, tokensMap);
-
-  if (matchedPattern) {
-    const result = tokensMap[matchedPattern];
-    if (typeof result === 'number') {
-      return result;
-    }
-
-    const tokenValue = result?.[key];
-    if (typeof tokenValue === 'number') {
-      return tokenValue;
-    }
-    return tokensMap.system_default as number | undefined;
-  }
-
-  return tokensMap.system_default as number | undefined;
-}
-
-/**
- * Retrieves the maximum tokens for a given model name.
- *
- * @param modelName - The name of the model to look up.
- * @param endpoint - The endpoint (default is 'openAI').
- * @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
- * @returns The maximum tokens for the given model or undefined if no match is found.
- */
-export function getModelMaxTokens(
-  modelName: string,
-  endpoint = EModelEndpoint.openAI,
-  endpointTokenConfig?: EndpointTokenConfig,
-): number | undefined {
-  const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint as keyof typeof maxTokensMap];
-  return getModelTokenValue(modelName, tokensMap);
-}
-
-/**
- * Retrieves the maximum output tokens for a given model name.
- *
- * @param modelName - The name of the model to look up.
- * @param endpoint - The endpoint (default is 'openAI').
- * @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
- * @returns The maximum output tokens for the given model or undefined if no match is found.
- */
-export function getModelMaxOutputTokens(
-  modelName: string,
-  endpoint = EModelEndpoint.openAI,
-  endpointTokenConfig?: EndpointTokenConfig,
-): number | undefined {
-  const tokensMap =
-    endpointTokenConfig ?? maxOutputTokensMap[endpoint as keyof typeof maxOutputTokensMap];
-  return getModelTokenValue(modelName, tokensMap, 'output');
-}
-
-/**
- * Retrieves the model name key for a given model name input. If the exact model name isn't found,
- * it searches for partial matches within the model name, checking keys in reverse order.
- *
- * @param modelName - The name of the model to look up.
- * @param endpoint - The endpoint (default is 'openAI').
- * @returns The model name key for the given model; returns input if no match is found and is string.
- *
- * @example
- * matchModelName('gpt-4-32k-0613'); // Returns 'gpt-4-32k-0613'
- * matchModelName('gpt-4-32k-unknown'); // Returns 'gpt-4-32k'
- * matchModelName('unknown-model'); // Returns undefined
- */
-export function matchModelName(
-  modelName: string,
-  endpoint = EModelEndpoint.openAI,
-): string | undefined {
-  if (typeof modelName !== 'string') {
-    return undefined;
-  }
-
-  const tokensMap: Record<string, number> = maxTokensMap[endpoint as keyof typeof maxTokensMap];
-  if (!tokensMap) {
-    return modelName;
-  }
-
-  if (tokensMap[modelName]) {
-    return modelName;
-  }
-
-  const matchedPattern = findMatchingPattern(modelName, tokensMap);
-  return matchedPattern || modelName;
-}
+import type { EndpointTokenConfig } from 'librechat-data-provider';

 export const modelSchema = z.object({
  id: z.string(),
--- a/packages/data-provider/src/index.ts
+++ b/packages/data-provider/src/index.ts
@ -40,6 +40,8 @@ export { dataService };
 import * as dataService from './data-service';
 /* general helpers */
 export * from './utils';
+/* tokens */
+export * from './tokens';
 export * from './actions';
 export { default as createPayload } from './createPayload';
 // /* react query hooks */
--- a/packages/data-provider/src/tokens.ts
+++ b/packages/data-provider/src/tokens.ts
@ -0,0 +1,564 @@
+import { EModelEndpoint } from './schemas';
+
+/** Configuration object mapping model keys to their respective prompt, completion rates, and context limit */
+export interface TokenConfig {
+  prompt: number;
+  completion: number;
+  context: number;
+  [key: string]: unknown;
+}
+
+/** An endpoint's config object mapping model keys to their respective prompt, completion rates, and context limit */
+export type EndpointTokenConfig = Record<string, TokenConfig>;
+
+/**
+ * Model Token Configuration Maps
+ *
+ * IMPORTANT: Key Ordering for Pattern Matching
+ * ============================================
+ * The `findMatchingPattern` function iterates through object keys in REVERSE order
+ * (last-defined keys are checked first) and uses `modelName.includes(key)` for matching.
+ *
+ * This means:
+ * 1. BASE PATTERNS must be defined FIRST (e.g., "kimi", "moonshot")
+ * 2. SPECIFIC PATTERNS must be defined AFTER their base patterns (e.g., "kimi-k2", "kimi-k2.5")
+ *
+ * Example ordering for Kimi models:
+ *   kimi: 262144,           // Base pattern - checked last
+ *   'kimi-k2': 262144,      // More specific - checked before "kimi"
+ *   'kimi-k2.5': 262144,    // Most specific - checked first
+ *
+ * Why this matters:
+ * - Model name "kimi-k2.5" contains both "kimi" and "kimi-k2" as substrings
+ * - If "kimi" were checked first, it would incorrectly match "kimi-k2.5"
+ * - By defining specific patterns AFTER base patterns, they're checked first in reverse iteration
+ *
+ * When adding new model families:
+ * 1. Define the base/generic pattern first
+ * 2. Define increasingly specific patterns after
+ * 3. Ensure no pattern is a substring of another that should match differently
+ */
+
+const openAIModels = {
+  'o4-mini': 200000,
+  'o3-mini': 195000, // -5000 from max
+  o3: 200000,
+  o1: 195000, // -5000 from max
+  'o1-mini': 127500, // -500 from max
+  'o1-preview': 127500, // -500 from max
+  'gpt-4': 8187, // -5 from max
+  'gpt-4-0613': 8187, // -5 from max
+  'gpt-4-32k': 32758, // -10 from max
+  'gpt-4-32k-0314': 32758, // -10 from max
+  'gpt-4-32k-0613': 32758, // -10 from max
+  'gpt-4-1106': 127500, // -500 from max
+  'gpt-4-0125': 127500, // -500 from max
+  'gpt-4.5': 127500, // -500 from max
+  'gpt-4.1': 1047576,
+  'gpt-4.1-mini': 1047576,
+  'gpt-4.1-nano': 1047576,
+  'gpt-5': 400000,
+  'gpt-5.1': 400000,
+  'gpt-5.2': 400000,
+  'gpt-5-mini': 400000,
+  'gpt-5-nano': 400000,
+  'gpt-5-pro': 400000,
+  'gpt-4o': 127500, // -500 from max
+  'gpt-4o-mini': 127500, // -500 from max
+  'gpt-4o-2024-05-13': 127500, // -500 from max
+  'gpt-4-turbo': 127500, // -500 from max
+  'gpt-4-vision': 127500, // -500 from max
+  'gpt-3.5-turbo': 16375, // -10 from max
+  'gpt-3.5-turbo-0613': 4092, // -5 from max
+  'gpt-3.5-turbo-0301': 4092, // -5 from max
+  'gpt-3.5-turbo-16k': 16375, // -10 from max
+  'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
+  'gpt-3.5-turbo-1106': 16375, // -10 from max
+  'gpt-3.5-turbo-0125': 16375, // -10 from max
+};
+
+const mistralModels = {
+  'mistral-': 31990, // -10 from max
+  'mistral-7b': 31990, // -10 from max
+  'mistral-small': 31990, // -10 from max
+  'mixtral-8x7b': 31990, // -10 from max
+  'mixtral-8x22b': 65536,
+  'mistral-large': 131000,
+  'mistral-large-2402': 127500,
+  'mistral-large-2407': 127500,
+  'mistral-nemo': 131000,
+  'pixtral-large': 131000,
+  'mistral-saba': 32000,
+  codestral: 256000,
+  'ministral-8b': 131000,
+  'ministral-3b': 131000,
+};
+
+const cohereModels = {
+  'command-light': 4086, // -10 from max
+  'command-light-nightly': 8182, // -10 from max
+  command: 4086, // -10 from max
+  'command-nightly': 8182, // -10 from max
+  'command-text': 4086, // -10 from max
+  'command-r': 127500, // -500 from max
+  'command-r-plus': 127500, // -500 from max
+};
+
+const googleModels = {
+  /* Max I/O is combined so we subtract the amount from max response tokens for actual total */
+  gemma: 8196,
+  'gemma-2': 32768,
+  'gemma-3': 32768,
+  'gemma-3-27b': 131072,
+  gemini: 30720, // -2048 from max
+  'gemini-pro-vision': 12288,
+  'gemini-exp': 2000000,
+  'gemini-3': 1000000, // 1M input tokens, 64k output tokens
+  'gemini-3-pro-image': 1000000,
+  'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
+  'gemini-2.5-pro': 1000000,
+  'gemini-2.5-flash': 1000000,
+  'gemini-2.5-flash-image': 1000000,
+  'gemini-2.5-flash-lite': 1000000,
+  'gemini-2.0': 2000000,
+  'gemini-2.0-flash': 1000000,
+  'gemini-2.0-flash-lite': 1000000,
+  'gemini-1.5': 1000000,
+  'gemini-1.5-flash': 1000000,
+  'gemini-1.5-flash-8b': 1000000,
+  'text-bison-32k': 32758, // -10 from max
+  'chat-bison-32k': 32758, // -10 from max
+  'code-bison-32k': 32758, // -10 from max
+  'codechat-bison-32k': 32758,
+  /* Codey, -5 from max: 6144 */
+  'code-': 6139,
+  'codechat-': 6139,
+  /* PaLM2, -5 from max: 8192 */
+  'text-': 8187,
+  'chat-': 8187,
+};
+
+const anthropicModels = {
+  'claude-': 100000,
+  'claude-instant': 100000,
+  'claude-2': 100000,
+  'claude-2.1': 200000,
+  'claude-3': 200000,
+  'claude-3-haiku': 200000,
+  'claude-3-sonnet': 200000,
+  'claude-3-opus': 200000,
+  'claude-3.5-haiku': 200000,
+  'claude-3-5-haiku': 200000,
+  'claude-3-5-sonnet': 200000,
+  'claude-3.5-sonnet': 200000,
+  'claude-3-7-sonnet': 200000,
+  'claude-3.7-sonnet': 200000,
+  'claude-3-5-sonnet-latest': 200000,
+  'claude-3.5-sonnet-latest': 200000,
+  'claude-haiku-4-5': 200000,
+  'claude-sonnet-4': 1000000,
+  'claude-4': 200000,
+  'claude-opus-4': 200000,
+  'claude-opus-4-5': 200000,
+  'claude-opus-4-6': 1000000,
+};
+
+const deepseekModels = {
+  deepseek: 128000,
+  'deepseek-chat': 128000,
+  'deepseek-reasoner': 128000,
+  'deepseek-r1': 128000,
+  'deepseek-v3': 128000,
+  'deepseek.r1': 128000,
+};
+
+const moonshotModels = {
+  // Base patterns (check last due to reverse iteration)
+  kimi: 262144,
+  moonshot: 131072,
+  // kimi-k2 series (specific patterns)
+  'kimi-latest': 128000,
+  'kimi-k2': 262144,
+  'kimi-k2.5': 262144,
+  'kimi-k2-turbo': 262144,
+  'kimi-k2-turbo-preview': 262144,
+  'kimi-k2-0905': 262144,
+  'kimi-k2-0905-preview': 262144,
+  'kimi-k2-0711': 131072,
+  'kimi-k2-0711-preview': 131072,
+  'kimi-k2-thinking': 262144,
+  'kimi-k2-thinking-turbo': 262144,
+  // moonshot-v1 series (specific patterns)
+  'moonshot-v1': 131072,
+  'moonshot-v1-auto': 131072,
+  'moonshot-v1-8k': 8192,
+  'moonshot-v1-8k-vision': 8192,
+  'moonshot-v1-8k-vision-preview': 8192,
+  'moonshot-v1-32k': 32768,
+  'moonshot-v1-32k-vision': 32768,
+  'moonshot-v1-32k-vision-preview': 32768,
+  'moonshot-v1-128k': 131072,
+  'moonshot-v1-128k-vision': 131072,
+  'moonshot-v1-128k-vision-preview': 131072,
+  // Bedrock moonshot models
+  'moonshot.kimi': 262144,
+  'moonshot.kimi-k2': 262144,
+  'moonshot.kimi-k2.5': 262144,
+  'moonshot.kimi-k2-thinking': 262144,
+  'moonshot.kimi-k2-0711': 131072,
+};
+
+const metaModels = {
+  // Basic patterns
+  llama3: 8000,
+  llama2: 4000,
+  'llama-3': 8000,
+  'llama-2': 4000,
+
+  // llama3.x pattern
+  'llama3.1': 127500,
+  'llama3.2': 127500,
+  'llama3.3': 127500,
+
+  // llama3-x pattern
+  'llama3-1': 127500,
+  'llama3-2': 127500,
+  'llama3-3': 127500,
+
+  // llama-3.x pattern
+  'llama-3.1': 127500,
+  'llama-3.2': 127500,
+  'llama-3.3': 127500,
+
+  // llama3.x:Nb pattern
+  'llama3.1:405b': 127500,
+  'llama3.1:70b': 127500,
+  'llama3.1:8b': 127500,
+  'llama3.2:1b': 127500,
+  'llama3.2:3b': 127500,
+  'llama3.2:11b': 127500,
+  'llama3.2:90b': 127500,
+  'llama3.3:70b': 127500,
+
+  // llama3-x-Nb pattern
+  'llama3-1-405b': 127500,
+  'llama3-1-70b': 127500,
+  'llama3-1-8b': 127500,
+  'llama3-2-1b': 127500,
+  'llama3-2-3b': 127500,
+  'llama3-2-11b': 127500,
+  'llama3-2-90b': 127500,
+  'llama3-3-70b': 127500,
+
+  // llama-3.x-Nb pattern
+  'llama-3.1-405b': 127500,
+  'llama-3.1-70b': 127500,
+  'llama-3.1-8b': 127500,
+  'llama-3.2-1b': 127500,
+  'llama-3.2-3b': 127500,
+  'llama-3.2-11b': 127500,
+  'llama-3.2-90b': 127500,
+  'llama-3.3-70b': 127500,
+
+  // Original llama2/3 patterns
+  'llama3-70b': 8000,
+  'llama3-8b': 8000,
+  'llama2-70b': 4000,
+  'llama2-13b': 4000,
+  'llama3:70b': 8000,
+  'llama3:8b': 8000,
+  'llama2:70b': 4000,
+};
+
+const qwenModels = {
+  qwen: 32000,
+  'qwen2.5': 32000,
+  'qwen-turbo': 1000000,
+  'qwen-plus': 131000,
+  'qwen-max': 32000,
+  'qwq-32b': 32000,
+  // Qwen3 models
+  qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
+  'qwen3-8b': 128000,
+  'qwen3-14b': 40960,
+  'qwen3-30b-a3b': 40960,
+  'qwen3-32b': 40960,
+  'qwen3-235b-a22b': 40960,
+  // Qwen3 VL (Vision-Language) models
+  'qwen3-vl-8b-thinking': 256000,
+  'qwen3-vl-8b-instruct': 262144,
+  'qwen3-vl-30b-a3b': 262144,
+  'qwen3-vl-235b-a22b': 131072,
+  // Qwen3 specialized models
+  'qwen3-max': 256000,
+  'qwen3-coder': 262144,
+  'qwen3-coder-30b-a3b': 262144,
+  'qwen3-coder-plus': 128000,
+  'qwen3-coder-flash': 128000,
+  'qwen3-next-80b-a3b': 262144,
+};
+
+const ai21Models = {
+  'j2-mid': 8182, // -10 from max
+  'j2-ultra': 8182, // -10 from max
+  'jamba-instruct': 255500, // -500 from max
+};
+
+const amazonModels = {
+  // Amazon Titan models
+  'titan-text-lite': 4000,
+  'titan-text-express': 8000,
+  'titan-text-premier': 31500, // -500 from max
+  // Amazon Nova models
+  // https://aws.amazon.com/ai/generative-ai/nova/
+  'nova-micro': 127000, // -1000 from max
+  'nova-lite': 295000, // -5000 from max
+  'nova-pro': 295000, // -5000 from max
+  'nova-premier': 995000, // -5000 from max
+};
+
+const bedrockModels = {
+  ...anthropicModels,
+  ...mistralModels,
+  ...cohereModels,
+  ...deepseekModels,
+  ...moonshotModels,
+  ...metaModels,
+  ...ai21Models,
+  ...amazonModels,
+};
+
+const xAIModels = {
+  grok: 131072,
+  'grok-beta': 131072,
+  'grok-vision-beta': 8192,
+  'grok-2': 131072,
+  'grok-2-latest': 131072,
+  'grok-2-1212': 131072,
+  'grok-2-vision': 32768,
+  'grok-2-vision-latest': 32768,
+  'grok-2-vision-1212': 32768,
+  'grok-3': 131072,
+  'grok-3-fast': 131072,
+  'grok-3-mini': 131072,
+  'grok-3-mini-fast': 131072,
+  'grok-4': 256000, // 256K context
+  'grok-4-fast': 2000000, // 2M context
+  'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
+  'grok-code-fast': 256000, // 256K context
+};
+
+const aggregateModels = {
+  ...openAIModels,
+  ...googleModels,
+  ...bedrockModels,
+  ...xAIModels,
+  ...qwenModels,
+  // GPT-OSS
+  'gpt-oss': 131000,
+  'gpt-oss:20b': 131000,
+  'gpt-oss-20b': 131000,
+  'gpt-oss:120b': 131000,
+  'gpt-oss-120b': 131000,
+  // GLM models (Zhipu AI)
+  glm4: 128000,
+  'glm-4': 128000,
+  'glm-4-32b': 128000,
+  'glm-4.5': 131000,
+  'glm-4.5-air': 131000,
+  'glm-4.5v': 66000,
+  'glm-4.6': 200000,
+};
+
+export const maxTokensMap = {
+  [EModelEndpoint.azureOpenAI]: openAIModels,
+  [EModelEndpoint.openAI]: aggregateModels,
+  [EModelEndpoint.agents]: aggregateModels,
+  [EModelEndpoint.custom]: aggregateModels,
+  [EModelEndpoint.google]: googleModels,
+  [EModelEndpoint.anthropic]: anthropicModels,
+  [EModelEndpoint.bedrock]: bedrockModels,
+};
+
+export const modelMaxOutputs = {
+  o1: 32268, // -500 from max: 32,768
+  'o1-mini': 65136, // -500 from max: 65,536
+  'o1-preview': 32268, // -500 from max: 32,768
+  'gpt-5': 128000,
+  'gpt-5.1': 128000,
+  'gpt-5.2': 128000,
+  'gpt-5-mini': 128000,
+  'gpt-5-nano': 128000,
+  'gpt-5-pro': 128000,
+  'gpt-oss-20b': 131000,
+  'gpt-oss-120b': 131000,
+  system_default: 32000,
+};
+
+/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */
+const anthropicMaxOutputs = {
+  'claude-3-haiku': 4096,
+  'claude-3-sonnet': 4096,
+  'claude-3-opus': 4096,
+  'claude-haiku-4-5': 64000,
+  'claude-sonnet-4': 64000,
+  'claude-opus-4': 32000,
+  'claude-opus-4-5': 64000,
+  'claude-opus-4-6': 128000,
+  'claude-3.5-sonnet': 8192,
+  'claude-3-5-sonnet': 8192,
+  'claude-3.7-sonnet': 128000,
+  'claude-3-7-sonnet': 128000,
+};
+
+/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */
+const deepseekMaxOutputs = {
+  deepseek: 8000, // deepseek-chat default: 4K, max: 8K
+  'deepseek-chat': 8000,
+  'deepseek-reasoner': 64000, // default: 32K, max: 64K
+  'deepseek-r1': 64000,
+  'deepseek-v3': 8000,
+  'deepseek.r1': 64000,
+};
+
+export const maxOutputTokensMap = {
+  [EModelEndpoint.anthropic]: anthropicMaxOutputs,
+  [EModelEndpoint.azureOpenAI]: modelMaxOutputs,
+  [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
+  [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
+};
+
+/**
+ * Finds the first matching pattern in the tokens map.
+ * @param {string} modelName
+ * @param {Record<string, number> | EndpointTokenConfig} tokensMap
+ * @returns {string|null}
+ */
+export function findMatchingPattern(
+  modelName: string,
+  tokensMap: Record<string, number> | EndpointTokenConfig,
+): string | null {
+  const keys = Object.keys(tokensMap);
+  const lowerModelName = modelName.toLowerCase();
+  for (let i = keys.length - 1; i >= 0; i--) {
+    const modelKey = keys[i];
+    if (lowerModelName.includes(modelKey)) {
+      return modelKey;
+    }
+  }
+
+  return null;
+}
+
+/**
+ * Retrieves a token value for a given model name from a tokens map.
+ *
+ * @param modelName - The name of the model to look up.
+ * @param tokensMap - The map of model names to token values.
+ * @param [key='context'] - The key to look up in the tokens map.
+ * @returns The token value for the given model or undefined if no match is found.
+ */
+export function getModelTokenValue(
+  modelName: string,
+  tokensMap?: EndpointTokenConfig | Record<string, number>,
+  key = 'context' as keyof TokenConfig,
+): number | undefined {
+  if (typeof modelName !== 'string' || !tokensMap) {
+    return undefined;
+  }
+
+  const value = tokensMap[modelName];
+  if (typeof value === 'number') {
+    return value;
+  }
+
+  if (value?.context) {
+    return value.context;
+  }
+
+  const matchedPattern = findMatchingPattern(modelName, tokensMap);
+
+  if (matchedPattern) {
+    const result = tokensMap[matchedPattern];
+    if (typeof result === 'number') {
+      return result;
+    }
+
+    const tokenValue = result?.[key];
+    if (typeof tokenValue === 'number') {
+      return tokenValue;
+    }
+    return tokensMap.system_default as number | undefined;
+  }
+
+  return tokensMap.system_default as number | undefined;
+}
+
+/**
+ * Retrieves the maximum tokens for a given model name.
+ *
+ * @param modelName - The name of the model to look up.
+ * @param endpoint - The endpoint (default is 'openAI').
+ * @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
+ * @returns The maximum tokens for the given model or undefined if no match is found.
+ */
+export function getModelMaxTokens(
+  modelName: string,
+  endpoint = EModelEndpoint.openAI,
+  endpointTokenConfig?: EndpointTokenConfig,
+): number | undefined {
+  const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint as keyof typeof maxTokensMap];
+  return getModelTokenValue(modelName, tokensMap);
+}
+
+/**
+ * Retrieves the maximum output tokens for a given model name.
+ *
+ * @param modelName - The name of the model to look up.
+ * @param endpoint - The endpoint (default is 'openAI').
+ * @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
+ * @returns The maximum output tokens for the given model or undefined if no match is found.
+ */
+export function getModelMaxOutputTokens(
+  modelName: string,
+  endpoint = EModelEndpoint.openAI,
+  endpointTokenConfig?: EndpointTokenConfig,
+): number | undefined {
+  const tokensMap =
+    endpointTokenConfig ?? maxOutputTokensMap[endpoint as keyof typeof maxOutputTokensMap];
+  return getModelTokenValue(modelName, tokensMap, 'output');
+}
+
+/**
+ * Retrieves the model name key for a given model name input. If the exact model name isn't found,
+ * it searches for partial matches within the model name, checking keys in reverse order.
+ *
+ * @param modelName - The name of the model to look up.
+ * @param endpoint - The endpoint (default is 'openAI').
+ * @returns The model name key for the given model; returns input if no match is found and is string.
+ *
+ * @example
+ * matchModelName('gpt-4-32k-0613'); // Returns 'gpt-4-32k-0613'
+ * matchModelName('gpt-4-32k-unknown'); // Returns 'gpt-4-32k'
+ * matchModelName('unknown-model'); // Returns undefined
+ */
+export function matchModelName(
+  modelName: string,
+  endpoint = EModelEndpoint.openAI,
+): string | undefined {
+  if (typeof modelName !== 'string') {
+    return undefined;
+  }
+
+  const tokensMap: Record<string, number> = maxTokensMap[endpoint as keyof typeof maxTokensMap];
+  if (!tokensMap) {
+    return modelName;
+  }
+
+  if (tokensMap[modelName]) {
+    return modelName;
+  }
+
+  const matchedPattern = findMatchingPattern(modelName, tokensMap);
+  return matchedPattern || modelName;
+}