✨ feat: Add token usage indicator to chat input

Add TokenUsageIndicator component with circular progress ring Create useTokenUsage hook with Jotai atom for state Add model context window lookups to data-provider Consolidate token utilities (output limits, TOKEN_DEFAULTS) Display input/output tokens and percentage of context used
2025-12-17 00:40:14 +01:00 · 2025-12-14 00:36:45 +01:00 · 2025-12-14 00:36:45 +01:00 · 841a37e8cb
commit 841a37e8cb
parent 4d7e6b4a58
11 changed files with 710 additions and 348 deletions
--- a/api/app/clients/specs/FakeClient.js
+++ b/api/app/clients/specs/FakeClient.js
@ -1,4 +1,5 @@
 const { getModelMaxTokens } = require('@librechat/api');
+const { TOKEN_DEFAULTS } = require('librechat-data-provider');
 const BaseClient = require('../BaseClient');

 class FakeClient extends BaseClient {
@ -41,7 +42,9 @@ class FakeClient extends BaseClient {
    }

    this.maxContextTokens =
-      this.options.maxContextTokens ?? getModelMaxTokens(this.modelOptions.model) ?? 4097;
+      this.options.maxContextTokens ??
+      getModelMaxTokens(this.modelOptions.model) ??
+      TOKEN_DEFAULTS.LEGACY_CONTEXT_FALLBACK;
  }
  buildMessages() {}
  getTokenCount(str) {
--- a/client/src/components/Chat/Input/ChatForm.tsx
+++ b/client/src/components/Chat/Input/ChatForm.tsx
@ -18,7 +18,9 @@ import {
  useQueryParams,
  useSubmitMessage,
  useFocusChatEffect,
+  useTokenUsageComputation,
 } from '~/hooks';
+import TokenUsageIndicator from './TokenUsageIndicator';
 import { mainTextareaId, BadgeItem } from '~/common';
 import AttachFileChat from './Files/AttachFileChat';
 import FileFormChat from './Files/FileFormChat';
@ -39,6 +41,7 @@ const ChatForm = memo(({ index = 0 }: { index?: number }) => {
  const submitButtonRef = useRef<HTMLButtonElement>(null);
  const textAreaRef = useRef<HTMLTextAreaElement>(null);
  useFocusChatEffect(textAreaRef);
+  useTokenUsageComputation();
  const localize = useLocalize();

  const [isCollapsed, setIsCollapsed] = useState(false);
@ -332,6 +335,7 @@ const ChatForm = memo(({ index = 0 }: { index?: number }) => {
                }
              />
              <div className="mx-auto flex" />
+              <TokenUsageIndicator />
              {SpeechToText && (
                <AudioRecorder
                  methods={methods}
--- a/client/src/components/Chat/Input/TokenUsageIndicator.tsx
+++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
@ -0,0 +1,87 @@
+import { memo } from 'react';
+import { TooltipAnchor } from '@librechat/client';
+import { useTokenUsage } from '~/hooks';
+import { cn } from '~/utils';
+
+function formatTokens(n: number): string {
+  if (n >= 1000000) {
+    return `${(n / 1000000).toFixed(1)}M`;
+  }
+  if (n >= 1000) {
+    return `${(n / 1000).toFixed(1)}K`;
+  }
+  return n.toString();
+}
+
+const TokenUsageIndicator = memo(function TokenUsageIndicator() {
+  const { inputTokens, outputTokens, maxContext } = useTokenUsage();
+
+  const totalUsed = inputTokens + outputTokens;
+  const hasMaxContext = maxContext !== null && maxContext > 0;
+  const percentage = hasMaxContext ? Math.min((totalUsed / maxContext) * 100, 100) : 0;
+
+  // Ring calculations
+  const size = 28;
+  const strokeWidth = 2.5;
+  const radius = (size - strokeWidth) / 2;
+  const circumference = 2 * Math.PI * radius;
+  const offset = circumference - (percentage / 100) * circumference;
+
+  const tooltipText = hasMaxContext
+    ? `Input: ${formatTokens(inputTokens)} | Output: ${formatTokens(outputTokens)} | Max: ${formatTokens(maxContext)}`
+    : `Input: ${formatTokens(inputTokens)} | Output: ${formatTokens(outputTokens)} | Max: N/A`;
+
+  // Color based on percentage
+  const getProgressColor = () => {
+    if (!hasMaxContext) {
+      return 'stroke-text-secondary';
+    }
+    if (percentage > 90) {
+      return 'stroke-red-500';
+    }
+    if (percentage > 75) {
+      return 'stroke-yellow-500';
+    }
+    return 'stroke-green-500';
+  };
+
+  return (
+    <TooltipAnchor
+      description={tooltipText}
+      render={
+        <div className="flex size-9 items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover">
+          <svg
+            width={size}
+            height={size}
+            viewBox={`0 0 ${size} ${size}`}
+            className="rotate-[-90deg]"
+          >
+            {/* Background ring */}
+            <circle
+              cx={size / 2}
+              cy={size / 2}
+              r={radius}
+              fill="transparent"
+              strokeWidth={strokeWidth}
+              className="stroke-border-medium"
+            />
+            {/* Progress ring */}
+            <circle
+              cx={size / 2}
+              cy={size / 2}
+              r={radius}
+              fill="transparent"
+              strokeWidth={strokeWidth}
+              strokeDasharray={circumference}
+              strokeDashoffset={hasMaxContext ? offset : circumference}
+              strokeLinecap="round"
+              className={cn('transition-all duration-300', getProgressColor())}
+            />
+          </svg>
+        </div>
+      }
+    />
+  );
+});
+
+export default TokenUsageIndicator;
--- a/client/src/hooks/index.ts
+++ b/client/src/hooks/index.ts
@ -35,3 +35,4 @@ export { default as useTextToSpeech } from './Input/useTextToSpeech';
 export { default as useGenerationsByLatest } from './useGenerationsByLatest';
 export { default as useLocalizedConfig } from './useLocalizedConfig';
 export { default as useResourcePermissions } from './useResourcePermissions';
+export { default as useTokenUsage, useTokenUsageComputation } from './useTokenUsage';
--- a/client/src/hooks/useTokenUsage.ts
+++ b/client/src/hooks/useTokenUsage.ts
@ -0,0 +1,77 @@
+import { useEffect, useMemo } from 'react';
+import { useSetAtom, useAtomValue } from 'jotai';
+import type { TMessage } from 'librechat-data-provider';
+import { getModelMaxTokens } from 'librechat-data-provider';
+import { tokenUsageAtom, type TokenUsage } from '~/store/tokenUsage';
+import { useGetMessagesByConvoId } from '~/data-provider';
+import { useChatContext } from '~/Providers';
+
+/**
+ * Hook to compute and update token usage from conversation messages.
+ * Should be called in a component that has access to useChatContext.
+ */
+export function useTokenUsageComputation() {
+  const { conversation } = useChatContext();
+  const conversationId = conversation?.conversationId ?? '';
+  const setTokenUsage = useSetAtom(tokenUsageAtom);
+
+  // Use the query hook to get reactive messages
+  const { data: messages } = useGetMessagesByConvoId(conversationId, {
+    enabled: !!conversationId && conversationId !== 'new',
+  });
+
+  // Compute token usage whenever messages change
+  const tokenData = useMemo(() => {
+    let inputTokens = 0;
+    let outputTokens = 0;
+
+    if (messages && Array.isArray(messages)) {
+      for (const msg of messages as TMessage[]) {
+        const count = msg.tokenCount ?? 0;
+        if (msg.isCreatedByUser) {
+          inputTokens += count;
+        } else {
+          outputTokens += count;
+        }
+      }
+    }
+
+    // Determine max context: explicit setting or model default
+    let maxContext: number | null = conversation?.maxContextTokens ?? null;
+
+    // If no explicit maxContextTokens, try to look up model default
+    if (maxContext === null && conversation?.model) {
+      const endpoint = conversation.endpointType ?? conversation.endpoint ?? '';
+      const modelDefault = getModelMaxTokens(conversation.model, endpoint);
+      if (modelDefault !== undefined) {
+        maxContext = modelDefault;
+      }
+    }
+
+    return {
+      inputTokens,
+      outputTokens,
+      maxContext,
+    };
+  }, [
+    messages,
+    conversation?.maxContextTokens,
+    conversation?.model,
+    conversation?.endpoint,
+    conversation?.endpointType,
+  ]);
+
+  // Update the atom when computed values change
+  useEffect(() => {
+    setTokenUsage(tokenData);
+  }, [tokenData, setTokenUsage]);
+}
+
+/**
+ * Hook to read the current token usage values.
+ */
+export function useTokenUsage(): TokenUsage {
+  return useAtomValue(tokenUsageAtom);
+}
+
+export default useTokenUsage;
--- a/client/src/store/tokenUsage.ts
+++ b/client/src/store/tokenUsage.ts
@ -0,0 +1,13 @@
+import { atom } from 'jotai';
+
+export type TokenUsage = {
+  inputTokens: number;
+  outputTokens: number;
+  maxContext: number | null; // null = N/A
+};
+
+export const tokenUsageAtom = atom<TokenUsage>({
+  inputTokens: 0,
+  outputTokens: 0,
+  maxContext: null,
+});
--- a/packages/api/src/agents/initialize.ts
+++ b/packages/api/src/agents/initialize.ts
@ -7,6 +7,7 @@ import {
  isAgentsEndpoint,
  replaceSpecialVars,
  providerEndpointMap,
+  TOKEN_DEFAULTS,
 } from 'librechat-data-provider';
 import type {
  AgentToolResources,
@ -240,7 +241,7 @@ export async function initializeAgent(
      providerEndpointMap[provider as keyof typeof providerEndpointMap],
      options.endpointTokenConfig,
    ),
-    18000,
+    TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK,
  );

  if (
@ -293,7 +294,7 @@ export async function initializeAgent(
    agent.additional_instructions = artifactsPromptResult ?? undefined;
  }

-  const agentMaxContextNum = Number(agentMaxContextTokens) || 18000;
+  const agentMaxContextNum = Number(agentMaxContextTokens) || TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK;
  const maxOutputTokensNum = Number(maxOutputTokens) || 0;

  const finalAttachments: IMongoFile[] = (primedAttachments ?? [])
@ -308,7 +309,9 @@ export async function initializeAgent(
    userMCPAuthMap,
    toolContextMap: toolContextMap ?? {},
    useLegacyContent: !!options.useLegacyContent,
-    maxContextTokens: Math.round((agentMaxContextNum - maxOutputTokensNum) * 0.9),
+    maxContextTokens: Math.round(
+      (agentMaxContextNum - maxOutputTokensNum) * TOKEN_DEFAULTS.CONTEXT_SAFETY_MARGIN,
+    ),
  };

  return initializedAgent;
--- a/packages/api/src/utils/tokens.ts
+++ b/packages/api/src/utils/tokens.ts
@ -1,351 +1,25 @@
 import z from 'zod';
-import { EModelEndpoint } from 'librechat-data-provider';
+import {
+  EModelEndpoint,
+  maxTokensMap,
+  maxOutputTokensMap,
+  TOKEN_DEFAULTS,
+  findMatchingPattern as findMatchingPatternSimple,
+  getModelMaxTokens as getModelMaxTokensSimple,
+  getModelMaxOutputTokens as getModelMaxOutputTokensSimple,
+  matchModelName as matchModelNameSimple,
+} from 'librechat-data-provider';
 import type { EndpointTokenConfig, TokenConfig } from '~/types';

-const openAIModels = {
-  'o4-mini': 200000,
-  'o3-mini': 195000, // -5000 from max
-  o3: 200000,
-  o1: 195000, // -5000 from max
-  'o1-mini': 127500, // -500 from max
-  'o1-preview': 127500, // -500 from max
-  'gpt-4': 8187, // -5 from max
-  'gpt-4-0613': 8187, // -5 from max
-  'gpt-4-32k': 32758, // -10 from max
-  'gpt-4-32k-0314': 32758, // -10 from max
-  'gpt-4-32k-0613': 32758, // -10 from max
-  'gpt-4-1106': 127500, // -500 from max
-  'gpt-4-0125': 127500, // -500 from max
-  'gpt-4.5': 127500, // -500 from max
-  'gpt-4.1': 1047576,
-  'gpt-4.1-mini': 1047576,
-  'gpt-4.1-nano': 1047576,
-  'gpt-5': 400000,
-  'gpt-5-mini': 400000,
-  'gpt-5-nano': 400000,
-  'gpt-5-pro': 400000,
-  'gpt-4o': 127500, // -500 from max
-  'gpt-4o-mini': 127500, // -500 from max
-  'gpt-4o-2024-05-13': 127500, // -500 from max
-  'gpt-4-turbo': 127500, // -500 from max
-  'gpt-4-vision': 127500, // -500 from max
-  'gpt-3.5-turbo': 16375, // -10 from max
-  'gpt-3.5-turbo-0613': 4092, // -5 from max
-  'gpt-3.5-turbo-0301': 4092, // -5 from max
-  'gpt-3.5-turbo-16k': 16375, // -10 from max
-  'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
-  'gpt-3.5-turbo-1106': 16375, // -10 from max
-  'gpt-3.5-turbo-0125': 16375, // -10 from max
-};
+// Re-export from data-provider for backwards compatibility
+export { maxTokensMap, maxOutputTokensMap, TOKEN_DEFAULTS };

-const mistralModels = {
-  'mistral-': 31990, // -10 from max
-  'mistral-7b': 31990, // -10 from max
-  'mistral-small': 31990, // -10 from max
-  'mixtral-8x7b': 31990, // -10 from max
-  'mixtral-8x22b': 65536,
-  'mistral-large': 131000,
-  'mistral-large-2402': 127500,
-  'mistral-large-2407': 127500,
-  'mistral-nemo': 131000,
-  'pixtral-large': 131000,
-  'mistral-saba': 32000,
-  codestral: 256000,
-  'ministral-8b': 131000,
-  'ministral-3b': 131000,
-};
-
-const cohereModels = {
-  'command-light': 4086, // -10 from max
-  'command-light-nightly': 8182, // -10 from max
-  command: 4086, // -10 from max
-  'command-nightly': 8182, // -10 from max
-  'command-text': 4086, // -10 from max
-  'command-r': 127500, // -500 from max
-  'command-r-plus': 127500, // -500 from max
-};
-
-const googleModels = {
-  /* Max I/O is combined so we subtract the amount from max response tokens for actual total */
-  gemma: 8196,
-  'gemma-2': 32768,
-  'gemma-3': 32768,
-  'gemma-3-27b': 131072,
-  gemini: 30720, // -2048 from max
-  'gemini-pro-vision': 12288,
-  'gemini-exp': 2000000,
-  'gemini-3': 1000000, // 1M input tokens, 64k output tokens
-  'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
-  'gemini-2.5-pro': 1000000,
-  'gemini-2.5-flash': 1000000,
-  'gemini-2.5-flash-lite': 1000000,
-  'gemini-2.0': 2000000,
-  'gemini-2.0-flash': 1000000,
-  'gemini-2.0-flash-lite': 1000000,
-  'gemini-1.5': 1000000,
-  'gemini-1.5-flash': 1000000,
-  'gemini-1.5-flash-8b': 1000000,
-  'text-bison-32k': 32758, // -10 from max
-  'chat-bison-32k': 32758, // -10 from max
-  'code-bison-32k': 32758, // -10 from max
-  'codechat-bison-32k': 32758,
-  /* Codey, -5 from max: 6144 */
-  'code-': 6139,
-  'codechat-': 6139,
-  /* PaLM2, -5 from max: 8192 */
-  'text-': 8187,
-  'chat-': 8187,
-};
-
-const anthropicModels = {
-  'claude-': 100000,
-  'claude-instant': 100000,
-  'claude-2': 100000,
-  'claude-2.1': 200000,
-  'claude-3': 200000,
-  'claude-3-haiku': 200000,
-  'claude-3-sonnet': 200000,
-  'claude-3-opus': 200000,
-  'claude-3.5-haiku': 200000,
-  'claude-3-5-haiku': 200000,
-  'claude-3-5-sonnet': 200000,
-  'claude-3.5-sonnet': 200000,
-  'claude-3-7-sonnet': 200000,
-  'claude-3.7-sonnet': 200000,
-  'claude-3-5-sonnet-latest': 200000,
-  'claude-3.5-sonnet-latest': 200000,
-  'claude-haiku-4-5': 200000,
-  'claude-sonnet-4': 1000000,
-  'claude-4': 200000,
-  'claude-opus-4': 200000,
-  'claude-opus-4-5': 200000,
-};
-
-const deepseekModels = {
-  deepseek: 128000,
-  'deepseek-chat': 128000,
-  'deepseek-reasoner': 128000,
-  'deepseek-r1': 128000,
-  'deepseek-v3': 128000,
-  'deepseek.r1': 128000,
-};
-
-const metaModels = {
-  // Basic patterns
-  llama3: 8000,
-  llama2: 4000,
-  'llama-3': 8000,
-  'llama-2': 4000,
-
-  // llama3.x pattern
-  'llama3.1': 127500,
-  'llama3.2': 127500,
-  'llama3.3': 127500,
-
-  // llama3-x pattern
-  'llama3-1': 127500,
-  'llama3-2': 127500,
-  'llama3-3': 127500,
-
-  // llama-3.x pattern
-  'llama-3.1': 127500,
-  'llama-3.2': 127500,
-  'llama-3.3': 127500,
-
-  // llama3.x:Nb pattern
-  'llama3.1:405b': 127500,
-  'llama3.1:70b': 127500,
-  'llama3.1:8b': 127500,
-  'llama3.2:1b': 127500,
-  'llama3.2:3b': 127500,
-  'llama3.2:11b': 127500,
-  'llama3.2:90b': 127500,
-  'llama3.3:70b': 127500,
-
-  // llama3-x-Nb pattern
-  'llama3-1-405b': 127500,
-  'llama3-1-70b': 127500,
-  'llama3-1-8b': 127500,
-  'llama3-2-1b': 127500,
-  'llama3-2-3b': 127500,
-  'llama3-2-11b': 127500,
-  'llama3-2-90b': 127500,
-  'llama3-3-70b': 127500,
-
-  // llama-3.x-Nb pattern
-  'llama-3.1-405b': 127500,
-  'llama-3.1-70b': 127500,
-  'llama-3.1-8b': 127500,
-  'llama-3.2-1b': 127500,
-  'llama-3.2-3b': 127500,
-  'llama-3.2-11b': 127500,
-  'llama-3.2-90b': 127500,
-  'llama-3.3-70b': 127500,
-
-  // Original llama2/3 patterns
-  'llama3-70b': 8000,
-  'llama3-8b': 8000,
-  'llama2-70b': 4000,
-  'llama2-13b': 4000,
-  'llama3:70b': 8000,
-  'llama3:8b': 8000,
-  'llama2:70b': 4000,
-};
-
-const qwenModels = {
-  qwen: 32000,
-  'qwen2.5': 32000,
-  'qwen-turbo': 1000000,
-  'qwen-plus': 131000,
-  'qwen-max': 32000,
-  'qwq-32b': 32000,
-  // Qwen3 models
-  qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
-  'qwen3-8b': 128000,
-  'qwen3-14b': 40960,
-  'qwen3-30b-a3b': 40960,
-  'qwen3-32b': 40960,
-  'qwen3-235b-a22b': 40960,
-  // Qwen3 VL (Vision-Language) models
-  'qwen3-vl-8b-thinking': 256000,
-  'qwen3-vl-8b-instruct': 262144,
-  'qwen3-vl-30b-a3b': 262144,
-  'qwen3-vl-235b-a22b': 131072,
-  // Qwen3 specialized models
-  'qwen3-max': 256000,
-  'qwen3-coder': 262144,
-  'qwen3-coder-30b-a3b': 262144,
-  'qwen3-coder-plus': 128000,
-  'qwen3-coder-flash': 128000,
-  'qwen3-next-80b-a3b': 262144,
-};
-
-const ai21Models = {
-  'j2-mid': 8182, // -10 from max
-  'j2-ultra': 8182, // -10 from max
-  'jamba-instruct': 255500, // -500 from max
-};
-
-const amazonModels = {
-  // Amazon Titan models
-  'titan-text-lite': 4000,
-  'titan-text-express': 8000,
-  'titan-text-premier': 31500, // -500 from max
-  // Amazon Nova models
-  // https://aws.amazon.com/ai/generative-ai/nova/
-  'nova-micro': 127000, // -1000 from max
-  'nova-lite': 295000, // -5000 from max
-  'nova-pro': 295000, // -5000 from max
-  'nova-premier': 995000, // -5000 from max
-};
-
-const bedrockModels = {
-  ...anthropicModels,
-  ...mistralModels,
-  ...cohereModels,
-  ...deepseekModels,
-  ...metaModels,
-  ...ai21Models,
-  ...amazonModels,
-};
-
-const xAIModels = {
-  grok: 131072,
-  'grok-beta': 131072,
-  'grok-vision-beta': 8192,
-  'grok-2': 131072,
-  'grok-2-latest': 131072,
-  'grok-2-1212': 131072,
-  'grok-2-vision': 32768,
-  'grok-2-vision-latest': 32768,
-  'grok-2-vision-1212': 32768,
-  'grok-3': 131072,
-  'grok-3-fast': 131072,
-  'grok-3-mini': 131072,
-  'grok-3-mini-fast': 131072,
-  'grok-4': 256000, // 256K context
-  'grok-4-fast': 2000000, // 2M context
-  'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
-  'grok-code-fast': 256000, // 256K context
-};
-
-const aggregateModels = {
-  ...openAIModels,
-  ...googleModels,
-  ...bedrockModels,
-  ...xAIModels,
-  ...qwenModels,
-  // misc.
-  kimi: 131000,
-  // GPT-OSS
-  'gpt-oss': 131000,
-  'gpt-oss:20b': 131000,
-  'gpt-oss-20b': 131000,
-  'gpt-oss:120b': 131000,
-  'gpt-oss-120b': 131000,
-  // GLM models (Zhipu AI)
-  glm4: 128000,
-  'glm-4': 128000,
-  'glm-4-32b': 128000,
-  'glm-4.5': 131000,
-  'glm-4.5-air': 131000,
-  'glm-4.5v': 66000,
-  'glm-4.6': 200000,
-};
-
-export const maxTokensMap = {
-  [EModelEndpoint.azureOpenAI]: openAIModels,
-  [EModelEndpoint.openAI]: aggregateModels,
-  [EModelEndpoint.agents]: aggregateModels,
-  [EModelEndpoint.custom]: aggregateModels,
-  [EModelEndpoint.google]: googleModels,
-  [EModelEndpoint.anthropic]: anthropicModels,
-  [EModelEndpoint.bedrock]: bedrockModels,
-};
-
-export const modelMaxOutputs = {
-  o1: 32268, // -500 from max: 32,768
-  'o1-mini': 65136, // -500 from max: 65,536
-  'o1-preview': 32268, // -500 from max: 32,768
-  'gpt-5': 128000,
-  'gpt-5-mini': 128000,
-  'gpt-5-nano': 128000,
-  'gpt-5-pro': 128000,
-  'gpt-oss-20b': 131000,
-  'gpt-oss-120b': 131000,
-  system_default: 32000,
-};
-
-/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */
-const anthropicMaxOutputs = {
-  'claude-3-haiku': 4096,
-  'claude-3-sonnet': 4096,
-  'claude-3-opus': 4096,
-  'claude-haiku-4-5': 64000,
-  'claude-sonnet-4': 64000,
-  'claude-opus-4': 32000,
-  'claude-opus-4-5': 64000,
-  'claude-3.5-sonnet': 8192,
-  'claude-3-5-sonnet': 8192,
-  'claude-3.7-sonnet': 128000,
-  'claude-3-7-sonnet': 128000,
-};
-
-/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */
-const deepseekMaxOutputs = {
-  deepseek: 8000, // deepseek-chat default: 4K, max: 8K
-  'deepseek-chat': 8000,
-  'deepseek-reasoner': 64000, // default: 32K, max: 64K
-  'deepseek-r1': 64000,
-  'deepseek-v3': 8000,
-  'deepseek.r1': 64000,
-};
-
-export const maxOutputTokensMap = {
-  [EModelEndpoint.anthropic]: anthropicMaxOutputs,
-  [EModelEndpoint.azureOpenAI]: modelMaxOutputs,
-  [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
-  [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
+// Re-export simple versions (for use without EndpointTokenConfig)
+export {
+  findMatchingPatternSimple,
+  getModelMaxTokensSimple,
+  getModelMaxOutputTokensSimple,
+  matchModelNameSimple,
 };

 /**
--- a/packages/data-provider/src/index.ts
+++ b/packages/data-provider/src/index.ts
@ -47,3 +47,5 @@ export { default as createPayload } from './createPayload';
 /* feedback */
 export * from './feedback';
 export * from './parameterSettings';
+/* token limits */
+export * from './tokens';
--- a/packages/data-provider/src/schemas.ts
+++ b/packages/data-provider/src/schemas.ts
@ -618,6 +618,7 @@ export type TMessage = z.input<typeof tMessageSchema> & {
  attachments?: TAttachment[];
  clientTimestamp?: string;
  feedback?: TFeedback;
+  tokenCount?: number;
 };

 export const coerceNumber = z.union([z.number(), z.string()]).transform((val) => {
--- a/packages/data-provider/src/tokens.ts
+++ b/packages/data-provider/src/tokens.ts
@ -0,0 +1,497 @@
+import { EModelEndpoint } from './schemas';
+
+/**
+ * Model context window token limits.
+ * These values represent the maximum context tokens (input) for each model.
+ * Values are slightly reduced from actual max to leave room for output tokens.
+ */
+
+const openAIModels: Record<string, number> = {
+  'o4-mini': 200000,
+  'o3-mini': 195000,
+  o3: 200000,
+  o1: 195000,
+  'o1-mini': 127500,
+  'o1-preview': 127500,
+  'gpt-4': 8187,
+  'gpt-4-0613': 8187,
+  'gpt-4-32k': 32758,
+  'gpt-4-32k-0314': 32758,
+  'gpt-4-32k-0613': 32758,
+  'gpt-4-1106': 127500,
+  'gpt-4-0125': 127500,
+  'gpt-4.5': 127500,
+  'gpt-4.1': 1047576,
+  'gpt-4.1-mini': 1047576,
+  'gpt-4.1-nano': 1047576,
+  'gpt-5': 400000,
+  'gpt-5-mini': 400000,
+  'gpt-5-nano': 400000,
+  'gpt-5-pro': 400000,
+  'gpt-4o': 127500,
+  'gpt-4o-mini': 127500,
+  'gpt-4o-2024-05-13': 127500,
+  'gpt-4-turbo': 127500,
+  'gpt-4-vision': 127500,
+  'gpt-3.5-turbo': 16375,
+  'gpt-3.5-turbo-0613': 4092,
+  'gpt-3.5-turbo-0301': 4092,
+  'gpt-3.5-turbo-16k': 16375,
+  'gpt-3.5-turbo-16k-0613': 16375,
+  'gpt-3.5-turbo-1106': 16375,
+  'gpt-3.5-turbo-0125': 16375,
+};
+
+const mistralModels: Record<string, number> = {
+  'mistral-': 31990,
+  'mistral-7b': 31990,
+  'mistral-small': 31990,
+  'mixtral-8x7b': 31990,
+  'mixtral-8x22b': 65536,
+  'mistral-large': 131000,
+  'mistral-large-2402': 127500,
+  'mistral-large-2407': 127500,
+  'mistral-nemo': 131000,
+  'pixtral-large': 131000,
+  'mistral-saba': 32000,
+  codestral: 256000,
+  'ministral-8b': 131000,
+  'ministral-3b': 131000,
+};
+
+const cohereModels: Record<string, number> = {
+  'command-light': 4086,
+  'command-light-nightly': 8182,
+  command: 4086,
+  'command-nightly': 8182,
+  'command-text': 4086,
+  'command-r': 127500,
+  'command-r-plus': 127500,
+};
+
+const googleModels: Record<string, number> = {
+  gemma: 8196,
+  'gemma-2': 32768,
+  'gemma-3': 32768,
+  'gemma-3-27b': 131072,
+  gemini: 30720,
+  'gemini-pro-vision': 12288,
+  'gemini-exp': 2000000,
+  'gemini-3': 1000000,
+  'gemini-2.5': 1000000,
+  'gemini-2.5-pro': 1000000,
+  'gemini-2.5-flash': 1000000,
+  'gemini-2.5-flash-lite': 1000000,
+  'gemini-2.0': 2000000,
+  'gemini-2.0-flash': 1000000,
+  'gemini-2.0-flash-lite': 1000000,
+  'gemini-1.5': 1000000,
+  'gemini-1.5-flash': 1000000,
+  'gemini-1.5-flash-8b': 1000000,
+  'text-bison-32k': 32758,
+  'chat-bison-32k': 32758,
+  'code-bison-32k': 32758,
+  'codechat-bison-32k': 32758,
+  'code-': 6139,
+  'codechat-': 6139,
+  'text-': 8187,
+  'chat-': 8187,
+};
+
+const anthropicModels: Record<string, number> = {
+  'claude-': 100000,
+  'claude-instant': 100000,
+  'claude-2': 100000,
+  'claude-2.1': 200000,
+  'claude-3': 200000,
+  'claude-3-haiku': 200000,
+  'claude-3-sonnet': 200000,
+  'claude-3-opus': 200000,
+  'claude-3.5-haiku': 200000,
+  'claude-3-5-haiku': 200000,
+  'claude-3-5-sonnet': 200000,
+  'claude-3.5-sonnet': 200000,
+  'claude-3-7-sonnet': 200000,
+  'claude-3.7-sonnet': 200000,
+  'claude-3-5-sonnet-latest': 200000,
+  'claude-3.5-sonnet-latest': 200000,
+  'claude-haiku-4-5': 200000,
+  'claude-sonnet-4': 1000000,
+  'claude-4': 200000,
+  'claude-opus-4': 200000,
+  'claude-opus-4-5': 200000,
+};
+
+const deepseekModels: Record<string, number> = {
+  deepseek: 128000,
+  'deepseek-chat': 128000,
+  'deepseek-reasoner': 128000,
+  'deepseek-r1': 128000,
+  'deepseek-v3': 128000,
+  'deepseek.r1': 128000,
+};
+
+const metaModels: Record<string, number> = {
+  llama3: 8000,
+  llama2: 4000,
+  'llama-3': 8000,
+  'llama-2': 4000,
+  'llama3.1': 127500,
+  'llama3.2': 127500,
+  'llama3.3': 127500,
+  'llama3-1': 127500,
+  'llama3-2': 127500,
+  'llama3-3': 127500,
+  'llama-3.1': 127500,
+  'llama-3.2': 127500,
+  'llama-3.3': 127500,
+  'llama3.1:405b': 127500,
+  'llama3.1:70b': 127500,
+  'llama3.1:8b': 127500,
+  'llama3.2:1b': 127500,
+  'llama3.2:3b': 127500,
+  'llama3.2:11b': 127500,
+  'llama3.2:90b': 127500,
+  'llama3.3:70b': 127500,
+  'llama3-1-405b': 127500,
+  'llama3-1-70b': 127500,
+  'llama3-1-8b': 127500,
+  'llama3-2-1b': 127500,
+  'llama3-2-3b': 127500,
+  'llama3-2-11b': 127500,
+  'llama3-2-90b': 127500,
+  'llama3-3-70b': 127500,
+  'llama-3.1-405b': 127500,
+  'llama-3.1-70b': 127500,
+  'llama-3.1-8b': 127500,
+  'llama-3.2-1b': 127500,
+  'llama-3.2-3b': 127500,
+  'llama-3.2-11b': 127500,
+  'llama-3.2-90b': 127500,
+  'llama-3.3-70b': 127500,
+  'llama3-70b': 8000,
+  'llama3-8b': 8000,
+  'llama2-70b': 4000,
+  'llama2-13b': 4000,
+  'llama3:70b': 8000,
+  'llama3:8b': 8000,
+  'llama2:70b': 4000,
+};
+
+const qwenModels: Record<string, number> = {
+  qwen: 32000,
+  'qwen2.5': 32000,
+  'qwen-turbo': 1000000,
+  'qwen-plus': 131000,
+  'qwen-max': 32000,
+  'qwq-32b': 32000,
+  qwen3: 40960,
+  'qwen3-8b': 128000,
+  'qwen3-14b': 40960,
+  'qwen3-30b-a3b': 40960,
+  'qwen3-32b': 40960,
+  'qwen3-235b-a22b': 40960,
+  'qwen3-vl-8b-thinking': 256000,
+  'qwen3-vl-8b-instruct': 262144,
+  'qwen3-vl-30b-a3b': 262144,
+  'qwen3-vl-235b-a22b': 131072,
+  'qwen3-max': 256000,
+  'qwen3-coder': 262144,
+  'qwen3-coder-30b-a3b': 262144,
+  'qwen3-coder-plus': 128000,
+  'qwen3-coder-flash': 128000,
+  'qwen3-next-80b-a3b': 262144,
+};
+
+const ai21Models: Record<string, number> = {
+  'j2-mid': 8182,
+  'j2-ultra': 8182,
+  'jamba-instruct': 255500,
+};
+
+const amazonModels: Record<string, number> = {
+  'titan-text-lite': 4000,
+  'titan-text-express': 8000,
+  'titan-text-premier': 31500,
+  'nova-micro': 127000,
+  'nova-lite': 295000,
+  'nova-pro': 295000,
+  'nova-premier': 995000,
+};
+
+const bedrockModels: Record<string, number> = {
+  ...anthropicModels,
+  ...mistralModels,
+  ...cohereModels,
+  ...deepseekModels,
+  ...metaModels,
+  ...ai21Models,
+  ...amazonModels,
+};
+
+const xAIModels: Record<string, number> = {
+  grok: 131072,
+  'grok-beta': 131072,
+  'grok-vision-beta': 8192,
+  'grok-2': 131072,
+  'grok-2-latest': 131072,
+  'grok-2-1212': 131072,
+  'grok-2-vision': 32768,
+  'grok-2-vision-latest': 32768,
+  'grok-2-vision-1212': 32768,
+  'grok-3': 131072,
+  'grok-3-fast': 131072,
+  'grok-3-mini': 131072,
+  'grok-3-mini-fast': 131072,
+  'grok-4': 256000,
+  'grok-4-fast': 2000000,
+  'grok-4-1-fast': 2000000,
+  'grok-code-fast': 256000,
+};
+
+const aggregateModels: Record<string, number> = {
+  ...openAIModels,
+  ...googleModels,
+  ...bedrockModels,
+  ...xAIModels,
+  ...qwenModels,
+  kimi: 131000,
+  'gpt-oss': 131000,
+  'gpt-oss:20b': 131000,
+  'gpt-oss-20b': 131000,
+  'gpt-oss:120b': 131000,
+  'gpt-oss-120b': 131000,
+  glm4: 128000,
+  'glm-4': 128000,
+  'glm-4-32b': 128000,
+  'glm-4.5': 131000,
+  'glm-4.5-air': 131000,
+  'glm-4.5v': 66000,
+  'glm-4.6': 200000,
+};
+
+/**
+ * Map of endpoint to model context token limits.
+ */
+export const maxTokensMap: Record<string, Record<string, number>> = {
+  [EModelEndpoint.azureOpenAI]: openAIModels,
+  [EModelEndpoint.openAI]: aggregateModels,
+  [EModelEndpoint.agents]: aggregateModels,
+  [EModelEndpoint.custom]: aggregateModels,
+  [EModelEndpoint.google]: googleModels,
+  [EModelEndpoint.anthropic]: anthropicModels,
+  [EModelEndpoint.bedrock]: bedrockModels,
+};
+
+/**
+ * Finds the first matching pattern in the tokens map.
+ * Searches in reverse order to match more specific patterns first.
+ */
+export function findMatchingPattern(
+  modelName: string,
+  tokensMap: Record<string, number>,
+): string | null {
+  const keys = Object.keys(tokensMap);
+  const lowerModelName = modelName.toLowerCase();
+  for (let i = keys.length - 1; i >= 0; i--) {
+    const modelKey = keys[i];
+    if (lowerModelName.includes(modelKey)) {
+      return modelKey;
+    }
+  }
+  return null;
+}
+
+/**
+ * Retrieves the maximum context tokens for a given model name.
+ *
+ * @param modelName - The name of the model to look up.
+ * @param endpoint - The endpoint (default is 'openAI').
+ * @returns The maximum context tokens for the given model or undefined if no match is found.
+ *
+ * @example
+ * getModelMaxTokens('gpt-4o'); // Returns 127500
+ * getModelMaxTokens('claude-3-opus', 'anthropic'); // Returns 200000
+ * getModelMaxTokens('unknown-model'); // Returns undefined
+ */
+export function getModelMaxTokens(
+  modelName: string,
+  endpoint: string = EModelEndpoint.openAI,
+): number | undefined {
+  if (typeof modelName !== 'string') {
+    return undefined;
+  }
+
+  const tokensMap = maxTokensMap[endpoint];
+  if (!tokensMap) {
+    // Fall back to aggregate models for unknown endpoints
+    return getModelMaxTokens(modelName, EModelEndpoint.openAI);
+  }
+
+  // Try exact match first
+  if (tokensMap[modelName] !== undefined) {
+    return tokensMap[modelName];
+  }
+
+  // Try pattern matching
+  const matchedPattern = findMatchingPattern(modelName, tokensMap);
+  if (matchedPattern) {
+    return tokensMap[matchedPattern];
+  }
+
+  return undefined;
+}
+
+/**
+ * Retrieves the model name key for a given model name input.
+ * If the exact model name isn't found, it searches for partial matches.
+ *
+ * @param modelName - The name of the model to look up.
+ * @param endpoint - The endpoint (default is 'openAI').
+ * @returns The model name key for the given model; returns input if no match is found.
+ */
+export function matchModelName(
+  modelName: string,
+  endpoint: string = EModelEndpoint.openAI,
+): string | undefined {
+  if (typeof modelName !== 'string') {
+    return undefined;
+  }
+
+  const tokensMap = maxTokensMap[endpoint];
+  if (!tokensMap) {
+    return modelName;
+  }
+
+  if (tokensMap[modelName] !== undefined) {
+    return modelName;
+  }
+
+  const matchedPattern = findMatchingPattern(modelName, tokensMap);
+  return matchedPattern || modelName;
+}
+
+// Individual model maps are available for advanced use cases
+// but not re-exported to avoid conflicts with config.ts
+
+// =============================================================================
+// OUTPUT TOKEN LIMITS
+// =============================================================================
+
+/**
+ * Maximum output tokens for OpenAI and similar models.
+ * Values from official documentation, slightly reduced to leave safety margin.
+ */
+const modelMaxOutputs: Record<string, number> = {
+  o1: 32268, // -500 from max: 32,768
+  'o1-mini': 65136, // -500 from max: 65,536
+  'o1-preview': 32268, // -500 from max: 32,768
+  'gpt-5': 128000,
+  'gpt-5-mini': 128000,
+  'gpt-5-nano': 128000,
+  'gpt-5-pro': 128000,
+  'gpt-oss-20b': 131000,
+  'gpt-oss-120b': 131000,
+  system_default: 32000,
+};
+
+/**
+ * Maximum output tokens for Anthropic Claude models.
+ * Values from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names
+ */
+const anthropicMaxOutputs: Record<string, number> = {
+  'claude-3-haiku': 4096,
+  'claude-3-sonnet': 4096,
+  'claude-3-opus': 4096,
+  'claude-haiku-4-5': 64000,
+  'claude-sonnet-4': 64000,
+  'claude-opus-4': 32000,
+  'claude-opus-4-5': 64000,
+  'claude-3.5-sonnet': 8192,
+  'claude-3-5-sonnet': 8192,
+  'claude-3.7-sonnet': 128000,
+  'claude-3-7-sonnet': 128000,
+};
+
+/**
+ * Maximum output tokens for DeepSeek models.
+ * Values from https://api-docs.deepseek.com/quick_start/pricing
+ */
+const deepseekMaxOutputs: Record<string, number> = {
+  deepseek: 8000, // deepseek-chat default: 4K, max: 8K
+  'deepseek-chat': 8000,
+  'deepseek-reasoner': 64000, // default: 32K, max: 64K
+  'deepseek-r1': 64000,
+  'deepseek-v3': 8000,
+  'deepseek.r1': 64000,
+};
+
+/**
+ * Map of endpoint to model max output token limits.
+ */
+export const maxOutputTokensMap: Record<string, Record<string, number>> = {
+  [EModelEndpoint.anthropic]: anthropicMaxOutputs,
+  [EModelEndpoint.azureOpenAI]: modelMaxOutputs,
+  [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
+  [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
+};
+
+/**
+ * Retrieves the maximum output tokens for a given model name.
+ *
+ * @param modelName - The name of the model to look up.
+ * @param endpoint - The endpoint (default is 'openAI').
+ * @returns The maximum output tokens for the given model or undefined if no match is found.
+ *
+ * @example
+ * getModelMaxOutputTokens('o1'); // Returns 32268
+ * getModelMaxOutputTokens('claude-3-opus', 'anthropic'); // Returns 4096
+ * getModelMaxOutputTokens('unknown-model'); // Returns 32000 (system_default)
+ */
+export function getModelMaxOutputTokens(
+  modelName: string,
+  endpoint: string = EModelEndpoint.openAI,
+): number | undefined {
+  if (typeof modelName !== 'string') {
+    return undefined;
+  }
+
+  const tokensMap = maxOutputTokensMap[endpoint];
+  if (!tokensMap) {
+    // Fall back to openAI for unknown endpoints
+    return getModelMaxOutputTokens(modelName, EModelEndpoint.openAI);
+  }
+
+  // Try exact match first
+  if (tokensMap[modelName] !== undefined) {
+    return tokensMap[modelName];
+  }
+
+  // Try pattern matching
+  const matchedPattern = findMatchingPattern(modelName, tokensMap);
+  if (matchedPattern) {
+    return tokensMap[matchedPattern];
+  }
+
+  // Return system_default if available
+  return tokensMap.system_default;
+}
+
+// =============================================================================
+// TOKEN DEFAULTS
+// =============================================================================
+
+/**
+ * Centralized token-related default values.
+ * These replace hardcoded magic numbers throughout the codebase.
+ */
+export const TOKEN_DEFAULTS = {
+  /** Fallback context window for agents when model lookup fails */
+  AGENT_CONTEXT_FALLBACK: 18000,
+  /** Legacy fallback for older clients */
+  LEGACY_CONTEXT_FALLBACK: 4097,
+  /** Safety margin multiplier (0.9 = reserve 10% for response) */
+  CONTEXT_SAFETY_MARGIN: 0.9,
+  /** Default max output tokens when not specified */
+  DEFAULT_MAX_OUTPUT: 32000,
+} as const;