From 841a37e8cbf2dba093b4f9e5966f6987888e15fb Mon Sep 17 00:00:00 2001
From: Marco Beretta <81851188+berry-13@users.noreply.github.com>
Date: Sun, 14 Dec 2025 00:36:45 +0100
Subject: [PATCH 01/12] =?UTF-8?q?=E2=9C=A8=20feat:=20Add=20token=20usage?=
 =?UTF-8?q?=20indicator=20to=20chat=20input=20Add=20TokenUsageIndicator=20?=
 =?UTF-8?q?component=20with=20circular=20progress=20ring=20Create=20useTok?=
 =?UTF-8?q?enUsage=20hook=20with=20Jotai=20atom=20for=20state=20Add=20mode?=
 =?UTF-8?q?l=20context=20window=20lookups=20to=20data-provider=20Consolida?=
 =?UTF-8?q?te=20token=20utilities=20(output=20limits,=20TOKEN=5FDEFAULTS)?=
 =?UTF-8?q?=20Display=20input/output=20tokens=20and=20percentage=20of=20co?=
 =?UTF-8?q?ntext=20used?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 api/app/clients/specs/FakeClient.js           |   5 +-
 client/src/components/Chat/Input/ChatForm.tsx |   4 +
 .../Chat/Input/TokenUsageIndicator.tsx        |  87 +++
 client/src/hooks/index.ts                     |   1 +
 client/src/hooks/useTokenUsage.ts             |  77 +++
 client/src/store/tokenUsage.ts                |  13 +
 packages/api/src/agents/initialize.ts         |   9 +-
 packages/api/src/utils/tokens.ts              | 362 +------------
 packages/data-provider/src/index.ts           |   2 +
 packages/data-provider/src/schemas.ts         |   1 +
 packages/data-provider/src/tokens.ts          | 497 ++++++++++++++++++
 11 files changed, 710 insertions(+), 348 deletions(-)
 create mode 100644 client/src/components/Chat/Input/TokenUsageIndicator.tsx
 create mode 100644 client/src/hooks/useTokenUsage.ts
 create mode 100644 client/src/store/tokenUsage.ts
 create mode 100644 packages/data-provider/src/tokens.ts
diff --git a/api/app/clients/specs/FakeClient.js b/api/app/clients/specs/FakeClient.js
index d1d07a967d..58480b4018 100644
--- a/api/app/clients/specs/FakeClient.js
+++ b/api/app/clients/specs/FakeClient.js
@@ -1,4 +1,5 @@
 const { getModelMaxTokens } = require('@librechat/api');
+const { TOKEN_DEFAULTS } = require('librechat-data-provider');
 const BaseClient = require('../BaseClient');
 
 class FakeClient extends BaseClient {
@@ -41,7 +42,9 @@ class FakeClient extends BaseClient {
     }
 
     this.maxContextTokens =
-      this.options.maxContextTokens ?? getModelMaxTokens(this.modelOptions.model) ?? 4097;
+      this.options.maxContextTokens ??
+      getModelMaxTokens(this.modelOptions.model) ??
+      TOKEN_DEFAULTS.LEGACY_CONTEXT_FALLBACK;
   }
   buildMessages() {}
   getTokenCount(str) {
diff --git a/client/src/components/Chat/Input/ChatForm.tsx b/client/src/components/Chat/Input/ChatForm.tsx
index 8cccf6cf53..9012a734bd 100644
--- a/client/src/components/Chat/Input/ChatForm.tsx
+++ b/client/src/components/Chat/Input/ChatForm.tsx
@@ -18,7 +18,9 @@ import {
   useQueryParams,
   useSubmitMessage,
   useFocusChatEffect,
+  useTokenUsageComputation,
 } from '~/hooks';
+import TokenUsageIndicator from './TokenUsageIndicator';
 import { mainTextareaId, BadgeItem } from '~/common';
 import AttachFileChat from './Files/AttachFileChat';
 import FileFormChat from './Files/FileFormChat';
@@ -39,6 +41,7 @@ const ChatForm = memo(({ index = 0 }: { index?: number }) => {
   const submitButtonRef = useRef<HTMLButtonElement>(null);
   const textAreaRef = useRef<HTMLTextAreaElement>(null);
   useFocusChatEffect(textAreaRef);
+  useTokenUsageComputation();
   const localize = useLocalize();
 
   const [isCollapsed, setIsCollapsed] = useState(false);
@@ -332,6 +335,7 @@ const ChatForm = memo(({ index = 0 }: { index?: number }) => {
                 }
               />
               <div className="mx-auto flex" />
+              <TokenUsageIndicator />
               {SpeechToText && (
                 <AudioRecorder
                   methods={methods}
diff --git a/client/src/components/Chat/Input/TokenUsageIndicator.tsx b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
new file mode 100644
index 0000000000..affe950d6e
--- /dev/null
+++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
@@ -0,0 +1,87 @@
+import { memo } from 'react';
+import { TooltipAnchor } from '@librechat/client';
+import { useTokenUsage } from '~/hooks';
+import { cn } from '~/utils';
+
+function formatTokens(n: number): string {
+  if (n >= 1000000) {
+    return `${(n / 1000000).toFixed(1)}M`;
+  }
+  if (n >= 1000) {
+    return `${(n / 1000).toFixed(1)}K`;
+  }
+  return n.toString();
+}
+
+const TokenUsageIndicator = memo(function TokenUsageIndicator() {
+  const { inputTokens, outputTokens, maxContext } = useTokenUsage();
+
+  const totalUsed = inputTokens + outputTokens;
+  const hasMaxContext = maxContext !== null && maxContext > 0;
+  const percentage = hasMaxContext ? Math.min((totalUsed / maxContext) * 100, 100) : 0;
+
+  // Ring calculations
+  const size = 28;
+  const strokeWidth = 2.5;
+  const radius = (size - strokeWidth) / 2;
+  const circumference = 2 * Math.PI * radius;
+  const offset = circumference - (percentage / 100) * circumference;
+
+  const tooltipText = hasMaxContext
+    ? `Input: ${formatTokens(inputTokens)} | Output: ${formatTokens(outputTokens)} | Max: ${formatTokens(maxContext)}`
+    : `Input: ${formatTokens(inputTokens)} | Output: ${formatTokens(outputTokens)} | Max: N/A`;
+
+  // Color based on percentage
+  const getProgressColor = () => {
+    if (!hasMaxContext) {
+      return 'stroke-text-secondary';
+    }
+    if (percentage > 90) {
+      return 'stroke-red-500';
+    }
+    if (percentage > 75) {
+      return 'stroke-yellow-500';
+    }
+    return 'stroke-green-500';
+  };
+
+  return (
+    <TooltipAnchor
+      description={tooltipText}
+      render={
+        <div className="flex size-9 items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover">
+          <svg
+            width={size}
+            height={size}
+            viewBox={`0 0 ${size} ${size}`}
+            className="rotate-[-90deg]"
+          >
+            {/* Background ring */}
+            <circle
+              cx={size / 2}
+              cy={size / 2}
+              r={radius}
+              fill="transparent"
+              strokeWidth={strokeWidth}
+              className="stroke-border-medium"
+            />
+            {/* Progress ring */}
+            <circle
+              cx={size / 2}
+              cy={size / 2}
+              r={radius}
+              fill="transparent"
+              strokeWidth={strokeWidth}
+              strokeDasharray={circumference}
+              strokeDashoffset={hasMaxContext ? offset : circumference}
+              strokeLinecap="round"
+              className={cn('transition-all duration-300', getProgressColor())}
+            />
+          </svg>
+        </div>
+      }
+    />
+  );
+});
+
+export default TokenUsageIndicator;
diff --git a/client/src/hooks/index.ts b/client/src/hooks/index.ts
index f8e23a95e1..62e09a5b17 100644
--- a/client/src/hooks/index.ts
+++ b/client/src/hooks/index.ts
@@ -35,3 +35,4 @@ export { default as useTextToSpeech } from './Input/useTextToSpeech';
 export { default as useGenerationsByLatest } from './useGenerationsByLatest';
 export { default as useLocalizedConfig } from './useLocalizedConfig';
 export { default as useResourcePermissions } from './useResourcePermissions';
+export { default as useTokenUsage, useTokenUsageComputation } from './useTokenUsage';
diff --git a/client/src/hooks/useTokenUsage.ts b/client/src/hooks/useTokenUsage.ts
new file mode 100644
index 0000000000..80b6e6144f
--- /dev/null
+++ b/client/src/hooks/useTokenUsage.ts
@@ -0,0 +1,77 @@
+import { useEffect, useMemo } from 'react';
+import { useSetAtom, useAtomValue } from 'jotai';
+import type { TMessage } from 'librechat-data-provider';
+import { getModelMaxTokens } from 'librechat-data-provider';
+import { tokenUsageAtom, type TokenUsage } from '~/store/tokenUsage';
+import { useGetMessagesByConvoId } from '~/data-provider';
+import { useChatContext } from '~/Providers';
+
+/**
+ * Hook to compute and update token usage from conversation messages.
+ * Should be called in a component that has access to useChatContext.
+ */
+export function useTokenUsageComputation() {
+  const { conversation } = useChatContext();
+  const conversationId = conversation?.conversationId ?? '';
+  const setTokenUsage = useSetAtom(tokenUsageAtom);
+
+  // Use the query hook to get reactive messages
+  const { data: messages } = useGetMessagesByConvoId(conversationId, {
+    enabled: !!conversationId && conversationId !== 'new',
+  });
+
+  // Compute token usage whenever messages change
+  const tokenData = useMemo(() => {
+    let inputTokens = 0;
+    let outputTokens = 0;
+
+    if (messages && Array.isArray(messages)) {
+      for (const msg of messages as TMessage[]) {
+        const count = msg.tokenCount ?? 0;
+        if (msg.isCreatedByUser) {
+          inputTokens += count;
+        } else {
+          outputTokens += count;
+        }
+      }
+    }
+
+    // Determine max context: explicit setting or model default
+    let maxContext: number | null = conversation?.maxContextTokens ?? null;
+
+    // If no explicit maxContextTokens, try to look up model default
+    if (maxContext === null && conversation?.model) {
+      const endpoint = conversation.endpointType ?? conversation.endpoint ?? '';
+      const modelDefault = getModelMaxTokens(conversation.model, endpoint);
+      if (modelDefault !== undefined) {
+        maxContext = modelDefault;
+      }
+    }
+
+    return {
+      inputTokens,
+      outputTokens,
+      maxContext,
+    };
+  }, [
+    messages,
+    conversation?.maxContextTokens,
+    conversation?.model,
+    conversation?.endpoint,
+    conversation?.endpointType,
+  ]);
+
+  // Update the atom when computed values change
+  useEffect(() => {
+    setTokenUsage(tokenData);
+  }, [tokenData, setTokenUsage]);
+}
+
+/**
+ * Hook to read the current token usage values.
+ */
+export function useTokenUsage(): TokenUsage {
+  return useAtomValue(tokenUsageAtom);
+}
+
+export default useTokenUsage;
diff --git a/client/src/store/tokenUsage.ts b/client/src/store/tokenUsage.ts
new file mode 100644
index 0000000000..e4965484bf
--- /dev/null
+++ b/client/src/store/tokenUsage.ts
@@ -0,0 +1,13 @@
+import { atom } from 'jotai';
+
+export type TokenUsage = {
+  inputTokens: number;
+  outputTokens: number;
+  maxContext: number | null; // null = N/A
+};
+
+export const tokenUsageAtom = atom<TokenUsage>({
+  inputTokens: 0,
+  outputTokens: 0,
+  maxContext: null,
+});
diff --git a/packages/api/src/agents/initialize.ts b/packages/api/src/agents/initialize.ts
index a37ddf4848..d5d2bbd6e6 100644
--- a/packages/api/src/agents/initialize.ts
+++ b/packages/api/src/agents/initialize.ts
@@ -7,6 +7,7 @@ import {
   isAgentsEndpoint,
   replaceSpecialVars,
   providerEndpointMap,
+  TOKEN_DEFAULTS,
 } from 'librechat-data-provider';
 import type {
   AgentToolResources,
@@ -240,7 +241,7 @@ export async function initializeAgent(
       providerEndpointMap[provider as keyof typeof providerEndpointMap],
       options.endpointTokenConfig,
     ),
-    18000,
+    TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK,
   );
 
   if (
@@ -293,7 +294,7 @@ export async function initializeAgent(
     agent.additional_instructions = artifactsPromptResult ?? undefined;
   }
 
-  const agentMaxContextNum = Number(agentMaxContextTokens) || 18000;
+  const agentMaxContextNum = Number(agentMaxContextTokens) || TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK;
   const maxOutputTokensNum = Number(maxOutputTokens) || 0;
 
   const finalAttachments: IMongoFile[] = (primedAttachments ?? [])
@@ -308,7 +309,9 @@ export async function initializeAgent(
     userMCPAuthMap,
     toolContextMap: toolContextMap ?? {},
     useLegacyContent: !!options.useLegacyContent,
-    maxContextTokens: Math.round((agentMaxContextNum - maxOutputTokensNum) * 0.9),
+    maxContextTokens: Math.round(
+      (agentMaxContextNum - maxOutputTokensNum) * TOKEN_DEFAULTS.CONTEXT_SAFETY_MARGIN,
+    ),
   };
 
   return initializedAgent;
diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts
index 12b356c6a7..d75936350f 100644
--- a/packages/api/src/utils/tokens.ts
+++ b/packages/api/src/utils/tokens.ts
@@ -1,351 +1,25 @@
 import z from 'zod';
-import { EModelEndpoint } from 'librechat-data-provider';
+import {
+  EModelEndpoint,
+  maxTokensMap,
+  maxOutputTokensMap,
+  TOKEN_DEFAULTS,
+  findMatchingPattern as findMatchingPatternSimple,
+  getModelMaxTokens as getModelMaxTokensSimple,
+  getModelMaxOutputTokens as getModelMaxOutputTokensSimple,
+  matchModelName as matchModelNameSimple,
+} from 'librechat-data-provider';
 import type { EndpointTokenConfig, TokenConfig } from '~/types';
 
-const openAIModels = {
-  'o4-mini': 200000,
-  'o3-mini': 195000, // -5000 from max
-  o3: 200000,
-  o1: 195000, // -5000 from max
-  'o1-mini': 127500, // -500 from max
-  'o1-preview': 127500, // -500 from max
-  'gpt-4': 8187, // -5 from max
-  'gpt-4-0613': 8187, // -5 from max
-  'gpt-4-32k': 32758, // -10 from max
-  'gpt-4-32k-0314': 32758, // -10 from max
-  'gpt-4-32k-0613': 32758, // -10 from max
-  'gpt-4-1106': 127500, // -500 from max
-  'gpt-4-0125': 127500, // -500 from max
-  'gpt-4.5': 127500, // -500 from max
-  'gpt-4.1': 1047576,
-  'gpt-4.1-mini': 1047576,
-  'gpt-4.1-nano': 1047576,
-  'gpt-5': 400000,
-  'gpt-5-mini': 400000,
-  'gpt-5-nano': 400000,
-  'gpt-5-pro': 400000,
-  'gpt-4o': 127500, // -500 from max
-  'gpt-4o-mini': 127500, // -500 from max
-  'gpt-4o-2024-05-13': 127500, // -500 from max
-  'gpt-4-turbo': 127500, // -500 from max
-  'gpt-4-vision': 127500, // -500 from max
-  'gpt-3.5-turbo': 16375, // -10 from max
-  'gpt-3.5-turbo-0613': 4092, // -5 from max
-  'gpt-3.5-turbo-0301': 4092, // -5 from max
-  'gpt-3.5-turbo-16k': 16375, // -10 from max
-  'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
-  'gpt-3.5-turbo-1106': 16375, // -10 from max
-  'gpt-3.5-turbo-0125': 16375, // -10 from max
-};
+// Re-export from data-provider for backwards compatibility
+export { maxTokensMap, maxOutputTokensMap, TOKEN_DEFAULTS };
 
-const mistralModels = {
-  'mistral-': 31990, // -10 from max
-  'mistral-7b': 31990, // -10 from max
-  'mistral-small': 31990, // -10 from max
-  'mixtral-8x7b': 31990, // -10 from max
-  'mixtral-8x22b': 65536,
-  'mistral-large': 131000,
-  'mistral-large-2402': 127500,
-  'mistral-large-2407': 127500,
-  'mistral-nemo': 131000,
-  'pixtral-large': 131000,
-  'mistral-saba': 32000,
-  codestral: 256000,
-  'ministral-8b': 131000,
-  'ministral-3b': 131000,
-};
-
-const cohereModels = {
-  'command-light': 4086, // -10 from max
-  'command-light-nightly': 8182, // -10 from max
-  command: 4086, // -10 from max
-  'command-nightly': 8182, // -10 from max
-  'command-text': 4086, // -10 from max
-  'command-r': 127500, // -500 from max
-  'command-r-plus': 127500, // -500 from max
-};
-
-const googleModels = {
-  /* Max I/O is combined so we subtract the amount from max response tokens for actual total */
-  gemma: 8196,
-  'gemma-2': 32768,
-  'gemma-3': 32768,
-  'gemma-3-27b': 131072,
-  gemini: 30720, // -2048 from max
-  'gemini-pro-vision': 12288,
-  'gemini-exp': 2000000,
-  'gemini-3': 1000000, // 1M input tokens, 64k output tokens
-  'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
-  'gemini-2.5-pro': 1000000,
-  'gemini-2.5-flash': 1000000,
-  'gemini-2.5-flash-lite': 1000000,
-  'gemini-2.0': 2000000,
-  'gemini-2.0-flash': 1000000,
-  'gemini-2.0-flash-lite': 1000000,
-  'gemini-1.5': 1000000,
-  'gemini-1.5-flash': 1000000,
-  'gemini-1.5-flash-8b': 1000000,
-  'text-bison-32k': 32758, // -10 from max
-  'chat-bison-32k': 32758, // -10 from max
-  'code-bison-32k': 32758, // -10 from max
-  'codechat-bison-32k': 32758,
-  /* Codey, -5 from max: 6144 */
-  'code-': 6139,
-  'codechat-': 6139,
-  /* PaLM2, -5 from max: 8192 */
-  'text-': 8187,
-  'chat-': 8187,
-};
-
-const anthropicModels = {
-  'claude-': 100000,
-  'claude-instant': 100000,
-  'claude-2': 100000,
-  'claude-2.1': 200000,
-  'claude-3': 200000,
-  'claude-3-haiku': 200000,
-  'claude-3-sonnet': 200000,
-  'claude-3-opus': 200000,
-  'claude-3.5-haiku': 200000,
-  'claude-3-5-haiku': 200000,
-  'claude-3-5-sonnet': 200000,
-  'claude-3.5-sonnet': 200000,
-  'claude-3-7-sonnet': 200000,
-  'claude-3.7-sonnet': 200000,
-  'claude-3-5-sonnet-latest': 200000,
-  'claude-3.5-sonnet-latest': 200000,
-  'claude-haiku-4-5': 200000,
-  'claude-sonnet-4': 1000000,
-  'claude-4': 200000,
-  'claude-opus-4': 200000,
-  'claude-opus-4-5': 200000,
-};
-
-const deepseekModels = {
-  deepseek: 128000,
-  'deepseek-chat': 128000,
-  'deepseek-reasoner': 128000,
-  'deepseek-r1': 128000,
-  'deepseek-v3': 128000,
-  'deepseek.r1': 128000,
-};
-
-const metaModels = {
-  // Basic patterns
-  llama3: 8000,
-  llama2: 4000,
-  'llama-3': 8000,
-  'llama-2': 4000,
-
-  // llama3.x pattern
-  'llama3.1': 127500,
-  'llama3.2': 127500,
-  'llama3.3': 127500,
-
-  // llama3-x pattern
-  'llama3-1': 127500,
-  'llama3-2': 127500,
-  'llama3-3': 127500,
-
-  // llama-3.x pattern
-  'llama-3.1': 127500,
-  'llama-3.2': 127500,
-  'llama-3.3': 127500,
-
-  // llama3.x:Nb pattern
-  'llama3.1:405b': 127500,
-  'llama3.1:70b': 127500,
-  'llama3.1:8b': 127500,
-  'llama3.2:1b': 127500,
-  'llama3.2:3b': 127500,
-  'llama3.2:11b': 127500,
-  'llama3.2:90b': 127500,
-  'llama3.3:70b': 127500,
-
-  // llama3-x-Nb pattern
-  'llama3-1-405b': 127500,
-  'llama3-1-70b': 127500,
-  'llama3-1-8b': 127500,
-  'llama3-2-1b': 127500,
-  'llama3-2-3b': 127500,
-  'llama3-2-11b': 127500,
-  'llama3-2-90b': 127500,
-  'llama3-3-70b': 127500,
-
-  // llama-3.x-Nb pattern
-  'llama-3.1-405b': 127500,
-  'llama-3.1-70b': 127500,
-  'llama-3.1-8b': 127500,
-  'llama-3.2-1b': 127500,
-  'llama-3.2-3b': 127500,
-  'llama-3.2-11b': 127500,
-  'llama-3.2-90b': 127500,
-  'llama-3.3-70b': 127500,
-
-  // Original llama2/3 patterns
-  'llama3-70b': 8000,
-  'llama3-8b': 8000,
-  'llama2-70b': 4000,
-  'llama2-13b': 4000,
-  'llama3:70b': 8000,
-  'llama3:8b': 8000,
-  'llama2:70b': 4000,
-};
-
-const qwenModels = {
-  qwen: 32000,
-  'qwen2.5': 32000,
-  'qwen-turbo': 1000000,
-  'qwen-plus': 131000,
-  'qwen-max': 32000,
-  'qwq-32b': 32000,
-  // Qwen3 models
-  qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
-  'qwen3-8b': 128000,
-  'qwen3-14b': 40960,
-  'qwen3-30b-a3b': 40960,
-  'qwen3-32b': 40960,
-  'qwen3-235b-a22b': 40960,
-  // Qwen3 VL (Vision-Language) models
-  'qwen3-vl-8b-thinking': 256000,
-  'qwen3-vl-8b-instruct': 262144,
-  'qwen3-vl-30b-a3b': 262144,
-  'qwen3-vl-235b-a22b': 131072,
-  // Qwen3 specialized models
-  'qwen3-max': 256000,
-  'qwen3-coder': 262144,
-  'qwen3-coder-30b-a3b': 262144,
-  'qwen3-coder-plus': 128000,
-  'qwen3-coder-flash': 128000,
-  'qwen3-next-80b-a3b': 262144,
-};
-
-const ai21Models = {
-  'j2-mid': 8182, // -10 from max
-  'j2-ultra': 8182, // -10 from max
-  'jamba-instruct': 255500, // -500 from max
-};
-
-const amazonModels = {
-  // Amazon Titan models
-  'titan-text-lite': 4000,
-  'titan-text-express': 8000,
-  'titan-text-premier': 31500, // -500 from max
-  // Amazon Nova models
-  // https://aws.amazon.com/ai/generative-ai/nova/
-  'nova-micro': 127000, // -1000 from max
-  'nova-lite': 295000, // -5000 from max
-  'nova-pro': 295000, // -5000 from max
-  'nova-premier': 995000, // -5000 from max
-};
-
-const bedrockModels = {
-  ...anthropicModels,
-  ...mistralModels,
-  ...cohereModels,
-  ...deepseekModels,
-  ...metaModels,
-  ...ai21Models,
-  ...amazonModels,
-};
-
-const xAIModels = {
-  grok: 131072,
-  'grok-beta': 131072,
-  'grok-vision-beta': 8192,
-  'grok-2': 131072,
-  'grok-2-latest': 131072,
-  'grok-2-1212': 131072,
-  'grok-2-vision': 32768,
-  'grok-2-vision-latest': 32768,
-  'grok-2-vision-1212': 32768,
-  'grok-3': 131072,
-  'grok-3-fast': 131072,
-  'grok-3-mini': 131072,
-  'grok-3-mini-fast': 131072,
-  'grok-4': 256000, // 256K context
-  'grok-4-fast': 2000000, // 2M context
-  'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
-  'grok-code-fast': 256000, // 256K context
-};
-
-const aggregateModels = {
-  ...openAIModels,
-  ...googleModels,
-  ...bedrockModels,
-  ...xAIModels,
-  ...qwenModels,
-  // misc.
-  kimi: 131000,
-  // GPT-OSS
-  'gpt-oss': 131000,
-  'gpt-oss:20b': 131000,
-  'gpt-oss-20b': 131000,
-  'gpt-oss:120b': 131000,
-  'gpt-oss-120b': 131000,
-  // GLM models (Zhipu AI)
-  glm4: 128000,
-  'glm-4': 128000,
-  'glm-4-32b': 128000,
-  'glm-4.5': 131000,
-  'glm-4.5-air': 131000,
-  'glm-4.5v': 66000,
-  'glm-4.6': 200000,
-};
-
-export const maxTokensMap = {
-  [EModelEndpoint.azureOpenAI]: openAIModels,
-  [EModelEndpoint.openAI]: aggregateModels,
-  [EModelEndpoint.agents]: aggregateModels,
-  [EModelEndpoint.custom]: aggregateModels,
-  [EModelEndpoint.google]: googleModels,
-  [EModelEndpoint.anthropic]: anthropicModels,
-  [EModelEndpoint.bedrock]: bedrockModels,
-};
-
-export const modelMaxOutputs = {
-  o1: 32268, // -500 from max: 32,768
-  'o1-mini': 65136, // -500 from max: 65,536
-  'o1-preview': 32268, // -500 from max: 32,768
-  'gpt-5': 128000,
-  'gpt-5-mini': 128000,
-  'gpt-5-nano': 128000,
-  'gpt-5-pro': 128000,
-  'gpt-oss-20b': 131000,
-  'gpt-oss-120b': 131000,
-  system_default: 32000,
-};
-
-/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */
-const anthropicMaxOutputs = {
-  'claude-3-haiku': 4096,
-  'claude-3-sonnet': 4096,
-  'claude-3-opus': 4096,
-  'claude-haiku-4-5': 64000,
-  'claude-sonnet-4': 64000,
-  'claude-opus-4': 32000,
-  'claude-opus-4-5': 64000,
-  'claude-3.5-sonnet': 8192,
-  'claude-3-5-sonnet': 8192,
-  'claude-3.7-sonnet': 128000,
-  'claude-3-7-sonnet': 128000,
-};
-
-/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */
-const deepseekMaxOutputs = {
-  deepseek: 8000, // deepseek-chat default: 4K, max: 8K
-  'deepseek-chat': 8000,
-  'deepseek-reasoner': 64000, // default: 32K, max: 64K
-  'deepseek-r1': 64000,
-  'deepseek-v3': 8000,
-  'deepseek.r1': 64000,
-};
-
-export const maxOutputTokensMap = {
-  [EModelEndpoint.anthropic]: anthropicMaxOutputs,
-  [EModelEndpoint.azureOpenAI]: modelMaxOutputs,
-  [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
-  [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
+// Re-export simple versions (for use without EndpointTokenConfig)
+export {
+  findMatchingPatternSimple,
+  getModelMaxTokensSimple,
+  getModelMaxOutputTokensSimple,
+  matchModelNameSimple,
 };
 
 /**
diff --git a/packages/data-provider/src/index.ts b/packages/data-provider/src/index.ts
index c57ca82845..ba21ece55e 100644
--- a/packages/data-provider/src/index.ts
+++ b/packages/data-provider/src/index.ts
@@ -47,3 +47,5 @@ export { default as createPayload } from './createPayload';
 /* feedback */
 export * from './feedback';
 export * from './parameterSettings';
+/* token limits */
+export * from './tokens';
diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts
index 7dabc549db..9d9c540c85 100644
--- a/packages/data-provider/src/schemas.ts
+++ b/packages/data-provider/src/schemas.ts
@@ -618,6 +618,7 @@ export type TMessage = z.input<typeof tMessageSchema> & {
   attachments?: TAttachment[];
   clientTimestamp?: string;
   feedback?: TFeedback;
+  tokenCount?: number;
 };
 
 export const coerceNumber = z.union([z.number(), z.string()]).transform((val) => {
diff --git a/packages/data-provider/src/tokens.ts b/packages/data-provider/src/tokens.ts
new file mode 100644
index 0000000000..130723232f
--- /dev/null
+++ b/packages/data-provider/src/tokens.ts
@@ -0,0 +1,497 @@
+import { EModelEndpoint } from './schemas';
+
+/**
+ * Model context window token limits.
+ * These values represent the maximum context tokens (input) for each model.
+ * Values are slightly reduced from actual max to leave room for output tokens.
+ */
+
+const openAIModels: Record<string, number> = {
+  'o4-mini': 200000,
+  'o3-mini': 195000,
+  o3: 200000,
+  o1: 195000,
+  'o1-mini': 127500,
+  'o1-preview': 127500,
+  'gpt-4': 8187,
+  'gpt-4-0613': 8187,
+  'gpt-4-32k': 32758,
+  'gpt-4-32k-0314': 32758,
+  'gpt-4-32k-0613': 32758,
+  'gpt-4-1106': 127500,
+  'gpt-4-0125': 127500,
+  'gpt-4.5': 127500,
+  'gpt-4.1': 1047576,
+  'gpt-4.1-mini': 1047576,
+  'gpt-4.1-nano': 1047576,
+  'gpt-5': 400000,
+  'gpt-5-mini': 400000,
+  'gpt-5-nano': 400000,
+  'gpt-5-pro': 400000,
+  'gpt-4o': 127500,
+  'gpt-4o-mini': 127500,
+  'gpt-4o-2024-05-13': 127500,
+  'gpt-4-turbo': 127500,
+  'gpt-4-vision': 127500,
+  'gpt-3.5-turbo': 16375,
+  'gpt-3.5-turbo-0613': 4092,
+  'gpt-3.5-turbo-0301': 4092,
+  'gpt-3.5-turbo-16k': 16375,
+  'gpt-3.5-turbo-16k-0613': 16375,
+  'gpt-3.5-turbo-1106': 16375,
+  'gpt-3.5-turbo-0125': 16375,
+};
+
+const mistralModels: Record<string, number> = {
+  'mistral-': 31990,
+  'mistral-7b': 31990,
+  'mistral-small': 31990,
+  'mixtral-8x7b': 31990,
+  'mixtral-8x22b': 65536,
+  'mistral-large': 131000,
+  'mistral-large-2402': 127500,
+  'mistral-large-2407': 127500,
+  'mistral-nemo': 131000,
+  'pixtral-large': 131000,
+  'mistral-saba': 32000,
+  codestral: 256000,
+  'ministral-8b': 131000,
+  'ministral-3b': 131000,
+};
+
+const cohereModels: Record<string, number> = {
+  'command-light': 4086,
+  'command-light-nightly': 8182,
+  command: 4086,
+  'command-nightly': 8182,
+  'command-text': 4086,
+  'command-r': 127500,
+  'command-r-plus': 127500,
+};
+
+const googleModels: Record<string, number> = {
+  gemma: 8196,
+  'gemma-2': 32768,
+  'gemma-3': 32768,
+  'gemma-3-27b': 131072,
+  gemini: 30720,
+  'gemini-pro-vision': 12288,
+  'gemini-exp': 2000000,
+  'gemini-3': 1000000,
+  'gemini-2.5': 1000000,
+  'gemini-2.5-pro': 1000000,
+  'gemini-2.5-flash': 1000000,
+  'gemini-2.5-flash-lite': 1000000,
+  'gemini-2.0': 2000000,
+  'gemini-2.0-flash': 1000000,
+  'gemini-2.0-flash-lite': 1000000,
+  'gemini-1.5': 1000000,
+  'gemini-1.5-flash': 1000000,
+  'gemini-1.5-flash-8b': 1000000,
+  'text-bison-32k': 32758,
+  'chat-bison-32k': 32758,
+  'code-bison-32k': 32758,
+  'codechat-bison-32k': 32758,
+  'code-': 6139,
+  'codechat-': 6139,
+  'text-': 8187,
+  'chat-': 8187,
+};
+
+const anthropicModels: Record<string, number> = {
+  'claude-': 100000,
+  'claude-instant': 100000,
+  'claude-2': 100000,
+  'claude-2.1': 200000,
+  'claude-3': 200000,
+  'claude-3-haiku': 200000,
+  'claude-3-sonnet': 200000,
+  'claude-3-opus': 200000,
+  'claude-3.5-haiku': 200000,
+  'claude-3-5-haiku': 200000,
+  'claude-3-5-sonnet': 200000,
+  'claude-3.5-sonnet': 200000,
+  'claude-3-7-sonnet': 200000,
+  'claude-3.7-sonnet': 200000,
+  'claude-3-5-sonnet-latest': 200000,
+  'claude-3.5-sonnet-latest': 200000,
+  'claude-haiku-4-5': 200000,
+  'claude-sonnet-4': 1000000,
+  'claude-4': 200000,
+  'claude-opus-4': 200000,
+  'claude-opus-4-5': 200000,
+};
+
+const deepseekModels: Record<string, number> = {
+  deepseek: 128000,
+  'deepseek-chat': 128000,
+  'deepseek-reasoner': 128000,
+  'deepseek-r1': 128000,
+  'deepseek-v3': 128000,
+  'deepseek.r1': 128000,
+};
+
+const metaModels: Record<string, number> = {
+  llama3: 8000,
+  llama2: 4000,
+  'llama-3': 8000,
+  'llama-2': 4000,
+  'llama3.1': 127500,
+  'llama3.2': 127500,
+  'llama3.3': 127500,
+  'llama3-1': 127500,
+  'llama3-2': 127500,
+  'llama3-3': 127500,
+  'llama-3.1': 127500,
+  'llama-3.2': 127500,
+  'llama-3.3': 127500,
+  'llama3.1:405b': 127500,
+  'llama3.1:70b': 127500,
+  'llama3.1:8b': 127500,
+  'llama3.2:1b': 127500,
+  'llama3.2:3b': 127500,
+  'llama3.2:11b': 127500,
+  'llama3.2:90b': 127500,
+  'llama3.3:70b': 127500,
+  'llama3-1-405b': 127500,
+  'llama3-1-70b': 127500,
+  'llama3-1-8b': 127500,
+  'llama3-2-1b': 127500,
+  'llama3-2-3b': 127500,
+  'llama3-2-11b': 127500,
+  'llama3-2-90b': 127500,
+  'llama3-3-70b': 127500,
+  'llama-3.1-405b': 127500,
+  'llama-3.1-70b': 127500,
+  'llama-3.1-8b': 127500,
+  'llama-3.2-1b': 127500,
+  'llama-3.2-3b': 127500,
+  'llama-3.2-11b': 127500,
+  'llama-3.2-90b': 127500,
+  'llama-3.3-70b': 127500,
+  'llama3-70b': 8000,
+  'llama3-8b': 8000,
+  'llama2-70b': 4000,
+  'llama2-13b': 4000,
+  'llama3:70b': 8000,
+  'llama3:8b': 8000,
+  'llama2:70b': 4000,
+};
+
+const qwenModels: Record<string, number> = {
+  qwen: 32000,
+  'qwen2.5': 32000,
+  'qwen-turbo': 1000000,
+  'qwen-plus': 131000,
+  'qwen-max': 32000,
+  'qwq-32b': 32000,
+  qwen3: 40960,
+  'qwen3-8b': 128000,
+  'qwen3-14b': 40960,
+  'qwen3-30b-a3b': 40960,
+  'qwen3-32b': 40960,
+  'qwen3-235b-a22b': 40960,
+  'qwen3-vl-8b-thinking': 256000,
+  'qwen3-vl-8b-instruct': 262144,
+  'qwen3-vl-30b-a3b': 262144,
+  'qwen3-vl-235b-a22b': 131072,
+  'qwen3-max': 256000,
+  'qwen3-coder': 262144,
+  'qwen3-coder-30b-a3b': 262144,
+  'qwen3-coder-plus': 128000,
+  'qwen3-coder-flash': 128000,
+  'qwen3-next-80b-a3b': 262144,
+};
+
+const ai21Models: Record<string, number> = {
+  'j2-mid': 8182,
+  'j2-ultra': 8182,
+  'jamba-instruct': 255500,
+};
+
+const amazonModels: Record<string, number> = {
+  'titan-text-lite': 4000,
+  'titan-text-express': 8000,
+  'titan-text-premier': 31500,
+  'nova-micro': 127000,
+  'nova-lite': 295000,
+  'nova-pro': 295000,
+  'nova-premier': 995000,
+};
+
+const bedrockModels: Record<string, number> = {
+  ...anthropicModels,
+  ...mistralModels,
+  ...cohereModels,
+  ...deepseekModels,
+  ...metaModels,
+  ...ai21Models,
+  ...amazonModels,
+};
+
+const xAIModels: Record<string, number> = {
+  grok: 131072,
+  'grok-beta': 131072,
+  'grok-vision-beta': 8192,
+  'grok-2': 131072,
+  'grok-2-latest': 131072,
+  'grok-2-1212': 131072,
+  'grok-2-vision': 32768,
+  'grok-2-vision-latest': 32768,
+  'grok-2-vision-1212': 32768,
+  'grok-3': 131072,
+  'grok-3-fast': 131072,
+  'grok-3-mini': 131072,
+  'grok-3-mini-fast': 131072,
+  'grok-4': 256000,
+  'grok-4-fast': 2000000,
+  'grok-4-1-fast': 2000000,
+  'grok-code-fast': 256000,
+};
+
+const aggregateModels: Record<string, number> = {
+  ...openAIModels,
+  ...googleModels,
+  ...bedrockModels,
+  ...xAIModels,
+  ...qwenModels,
+  kimi: 131000,
+  'gpt-oss': 131000,
+  'gpt-oss:20b': 131000,
+  'gpt-oss-20b': 131000,
+  'gpt-oss:120b': 131000,
+  'gpt-oss-120b': 131000,
+  glm4: 128000,
+  'glm-4': 128000,
+  'glm-4-32b': 128000,
+  'glm-4.5': 131000,
+  'glm-4.5-air': 131000,
+  'glm-4.5v': 66000,
+  'glm-4.6': 200000,
+};
+
+/**
+ * Map of endpoint to model context token limits.
+ */
+export const maxTokensMap: Record<string, Record<string, number>> = {
+  [EModelEndpoint.azureOpenAI]: openAIModels,
+  [EModelEndpoint.openAI]: aggregateModels,
+  [EModelEndpoint.agents]: aggregateModels,
+  [EModelEndpoint.custom]: aggregateModels,
+  [EModelEndpoint.google]: googleModels,
+  [EModelEndpoint.anthropic]: anthropicModels,
+  [EModelEndpoint.bedrock]: bedrockModels,
+};
+
+/**
+ * Finds the first matching pattern in the tokens map.
+ * Searches in reverse order to match more specific patterns first.
+ */
+export function findMatchingPattern(
+  modelName: string,
+  tokensMap: Record<string, number>,
+): string | null {
+  const keys = Object.keys(tokensMap);
+  const lowerModelName = modelName.toLowerCase();
+  for (let i = keys.length - 1; i >= 0; i--) {
+    const modelKey = keys[i];
+    if (lowerModelName.includes(modelKey)) {
+      return modelKey;
+    }
+  }
+  return null;
+}
+
+/**
+ * Retrieves the maximum context tokens for a given model name.
+ *
+ * @param modelName - The name of the model to look up.
+ * @param endpoint - The endpoint (default is 'openAI').
+ * @returns The maximum context tokens for the given model or undefined if no match is found.
+ *
+ * @example
+ * getModelMaxTokens('gpt-4o'); // Returns 127500
+ * getModelMaxTokens('claude-3-opus', 'anthropic'); // Returns 200000
+ * getModelMaxTokens('unknown-model'); // Returns undefined
+ */
+export function getModelMaxTokens(
+  modelName: string,
+  endpoint: string = EModelEndpoint.openAI,
+): number | undefined {
+  if (typeof modelName !== 'string') {
+    return undefined;
+  }
+
+  const tokensMap = maxTokensMap[endpoint];
+  if (!tokensMap) {
+    // Fall back to aggregate models for unknown endpoints
+    return getModelMaxTokens(modelName, EModelEndpoint.openAI);
+  }
+
+  // Try exact match first
+  if (tokensMap[modelName] !== undefined) {
+    return tokensMap[modelName];
+  }
+
+  // Try pattern matching
+  const matchedPattern = findMatchingPattern(modelName, tokensMap);
+  if (matchedPattern) {
+    return tokensMap[matchedPattern];
+  }
+
+  return undefined;
+}
+
+/**
+ * Retrieves the model name key for a given model name input.
+ * If the exact model name isn't found, it searches for partial matches.
+ *
+ * @param modelName - The name of the model to look up.
+ * @param endpoint - The endpoint (default is 'openAI').
+ * @returns The model name key for the given model; returns input if no match is found.
+ */
+export function matchModelName(
+  modelName: string,
+  endpoint: string = EModelEndpoint.openAI,
+): string | undefined {
+  if (typeof modelName !== 'string') {
+    return undefined;
+  }
+
+  const tokensMap = maxTokensMap[endpoint];
+  if (!tokensMap) {
+    return modelName;
+  }
+
+  if (tokensMap[modelName] !== undefined) {
+    return modelName;
+  }
+
+  const matchedPattern = findMatchingPattern(modelName, tokensMap);
+  return matchedPattern || modelName;
+}
+
+// Individual model maps are available for advanced use cases
+// but not re-exported to avoid conflicts with config.ts
+
+// =============================================================================
+// OUTPUT TOKEN LIMITS
+// =============================================================================
+
+/**
+ * Maximum output tokens for OpenAI and similar models.
+ * Values from official documentation, slightly reduced to leave safety margin.
+ */
+const modelMaxOutputs: Record<string, number> = {
+  o1: 32268, // -500 from max: 32,768
+  'o1-mini': 65136, // -500 from max: 65,536
+  'o1-preview': 32268, // -500 from max: 32,768
+  'gpt-5': 128000,
+  'gpt-5-mini': 128000,
+  'gpt-5-nano': 128000,
+  'gpt-5-pro': 128000,
+  'gpt-oss-20b': 131000,
+  'gpt-oss-120b': 131000,
+  system_default: 32000,
+};
+
+/**
+ * Maximum output tokens for Anthropic Claude models.
+ * Values from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names
+ */
+const anthropicMaxOutputs: Record<string, number> = {
+  'claude-3-haiku': 4096,
+  'claude-3-sonnet': 4096,
+  'claude-3-opus': 4096,
+  'claude-haiku-4-5': 64000,
+  'claude-sonnet-4': 64000,
+  'claude-opus-4': 32000,
+  'claude-opus-4-5': 64000,
+  'claude-3.5-sonnet': 8192,
+  'claude-3-5-sonnet': 8192,
+  'claude-3.7-sonnet': 128000,
+  'claude-3-7-sonnet': 128000,
+};
+
+/**
+ * Maximum output tokens for DeepSeek models.
+ * Values from https://api-docs.deepseek.com/quick_start/pricing
+ */
+const deepseekMaxOutputs: Record<string, number> = {
+  deepseek: 8000, // deepseek-chat default: 4K, max: 8K
+  'deepseek-chat': 8000,
+  'deepseek-reasoner': 64000, // default: 32K, max: 64K
+  'deepseek-r1': 64000,
+  'deepseek-v3': 8000,
+  'deepseek.r1': 64000,
+};
+
+/**
+ * Map of endpoint to model max output token limits.
+ */
+export const maxOutputTokensMap: Record<string, Record<string, number>> = {
+  [EModelEndpoint.anthropic]: anthropicMaxOutputs,
+  [EModelEndpoint.azureOpenAI]: modelMaxOutputs,
+  [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
+  [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
+};
+
+/**
+ * Retrieves the maximum output tokens for a given model name.
+ *
+ * @param modelName - The name of the model to look up.
+ * @param endpoint - The endpoint (default is 'openAI').
+ * @returns The maximum output tokens for the given model or undefined if no match is found.
+ *
+ * @example
+ * getModelMaxOutputTokens('o1'); // Returns 32268
+ * getModelMaxOutputTokens('claude-3-opus', 'anthropic'); // Returns 4096
+ * getModelMaxOutputTokens('unknown-model'); // Returns 32000 (system_default)
+ */
+export function getModelMaxOutputTokens(
+  modelName: string,
+  endpoint: string = EModelEndpoint.openAI,
+): number | undefined {
+  if (typeof modelName !== 'string') {
+    return undefined;
+  }
+
+  const tokensMap = maxOutputTokensMap[endpoint];
+  if (!tokensMap) {
+    // Fall back to openAI for unknown endpoints
+    return getModelMaxOutputTokens(modelName, EModelEndpoint.openAI);
+  }
+
+  // Try exact match first
+  if (tokensMap[modelName] !== undefined) {
+    return tokensMap[modelName];
+  }
+
+  // Try pattern matching
+  const matchedPattern = findMatchingPattern(modelName, tokensMap);
+  if (matchedPattern) {
+    return tokensMap[matchedPattern];
+  }
+
+  // Return system_default if available
+  return tokensMap.system_default;
+}
+
+// =============================================================================
+// TOKEN DEFAULTS
+// =============================================================================
+
+/**
+ * Centralized token-related default values.
+ * These replace hardcoded magic numbers throughout the codebase.
+ */
+export const TOKEN_DEFAULTS = {
+  /** Fallback context window for agents when model lookup fails */
+  AGENT_CONTEXT_FALLBACK: 18000,
+  /** Legacy fallback for older clients */
+  LEGACY_CONTEXT_FALLBACK: 4097,
+  /** Safety margin multiplier (0.9 = reserve 10% for response) */
+  CONTEXT_SAFETY_MARGIN: 0.9,
+  /** Default max output tokens when not specified */
+  DEFAULT_MAX_OUTPUT: 32000,
+} as const;

From 8cedd5f45e160039f3b36aa3e0f96c8868caac9f Mon Sep 17 00:00:00 2001
From: Marco Beretta <81851188+berry-13@users.noreply.github.com>
Date: Sun, 14 Dec 2025 02:33:10 +0100
Subject: [PATCH 02/12] =?UTF-8?q?=E2=99=BF=20fix:=20Add=20i18n=20and=20acc?=
 =?UTF-8?q?essibility=20to=20TokenUsageIndicator?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Chat/Input/TokenUsageIndicator.tsx        | 27 +++++++++++++++----
 client/src/locales/en/translation.json        |  4 +++
 client/src/store/index.ts                     |  3 +++
 3 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/client/src/components/Chat/Input/TokenUsageIndicator.tsx b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
index affe950d6e..d22fad8cef 100644
--- a/client/src/components/Chat/Input/TokenUsageIndicator.tsx
+++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
@@ -1,6 +1,6 @@
 import { memo } from 'react';
 import { TooltipAnchor } from '@librechat/client';
-import { useTokenUsage } from '~/hooks';
+import { useLocalize, useTokenUsage } from '~/hooks';
 import { cn } from '~/utils';
 
 function formatTokens(n: number): string {
@@ -14,6 +14,7 @@ function formatTokens(n: number): string {
 }
 
 const TokenUsageIndicator = memo(function TokenUsageIndicator() {
+  const localize = useLocalize();
   const { inputTokens, outputTokens, maxContext } = useTokenUsage();
 
   const totalUsed = inputTokens + outputTokens;
@@ -28,10 +29,21 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() {
   const offset = circumference - (percentage / 100) * circumference;
 
   const tooltipText = hasMaxContext
-    ? `Input: ${formatTokens(inputTokens)} | Output: ${formatTokens(outputTokens)} | Max: ${formatTokens(maxContext)}`
-    : `Input: ${formatTokens(inputTokens)} | Output: ${formatTokens(outputTokens)} | Max: N/A`;
+    ? localize('com_ui_token_usage_with_max', {
+        0: formatTokens(inputTokens),
+        1: formatTokens(outputTokens),
+        2: formatTokens(maxContext),
+      })
+    : localize('com_ui_token_usage_no_max', {
+        0: formatTokens(inputTokens),
+        1: formatTokens(outputTokens),
+      });
 
-  // Color based on percentage
+  const ariaLabel = hasMaxContext
+    ? localize('com_ui_token_usage_aria', { 0: Math.round(percentage).toString() })
+    : localize('com_ui_token_usage_indicator');
+
+  // Color based on percentage (using raw colors to match existing patterns in AudioRecorder.tsx)
   const getProgressColor = () => {
     if (!hasMaxContext) {
       return 'stroke-text-secondary';
@@ -49,12 +61,17 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() {
     <TooltipAnchor
       description={tooltipText}
       render={
-        <div className="flex size-9 items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover">
+        <div
+          className="flex size-9 items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover"
+          role="img"
+          aria-label={ariaLabel}
+        >
           <svg
             width={size}
             height={size}
             viewBox={`0 0 ${size} ${size}`}
             className="rotate-[-90deg]"
+            aria-hidden="true"
           >
             {/* Background ring */}
             <circle
diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json
index e97f74ad68..4379ad6904 100644
--- a/client/src/locales/en/translation.json
+++ b/client/src/locales/en/translation.json
@@ -1322,6 +1322,10 @@
   "com_ui_token": "token",
   "com_ui_token_exchange_method": "Token Exchange Method",
   "com_ui_token_url": "Token URL",
+  "com_ui_token_usage_aria": "Context window {{0}}% used",
+  "com_ui_token_usage_indicator": "Token usage indicator",
+  "com_ui_token_usage_no_max": "Input: {{0}} | Output: {{1}} | Max: N/A",
+  "com_ui_token_usage_with_max": "Input: {{0}} | Output: {{1}} | Max: {{2}}",
   "com_ui_tokens": "tokens",
   "com_ui_tool_collection_prefix": "A collection of tools from",
   "com_ui_tool_list_collapse": "Collapse {{serverName}} tool list",
diff --git a/client/src/store/index.ts b/client/src/store/index.ts
index 25d721e65b..e490c11635 100644
--- a/client/src/store/index.ts
+++ b/client/src/store/index.ts
@@ -12,9 +12,11 @@ import lang from './language';
 import settings from './settings';
 import misc from './misc';
 import isTemporary from './temporary';
+import * as tokenUsage from './tokenUsage';
 export * from './agents';
 export * from './mcp';
 export * from './favorites';
+export * from './tokenUsage';
 
 export default {
   ...artifacts,
@@ -31,4 +33,5 @@ export default {
   ...settings,
   ...misc,
   ...isTemporary,
+  ...tokenUsage,
 };

From 2c86a5dcc7f12c42dac3200710d18bf26919b338 Mon Sep 17 00:00:00 2001
From: Marco Beretta <81851188+berry-13@users.noreply.github.com>
Date: Sun, 14 Dec 2025 02:45:25 +0100
Subject: [PATCH 03/12] =?UTF-8?q?=F0=9F=93=9D=20chore:=20Restore=20comment?=
 =?UTF-8?q?s=20to=20tokens.ts?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 packages/data-provider/src/tokens.ts | 135 ++++++++++++++++-----------
 1 file changed, 81 insertions(+), 54 deletions(-)

diff --git a/packages/data-provider/src/tokens.ts b/packages/data-provider/src/tokens.ts
index 130723232f..c5bbbb233b 100644
--- a/packages/data-provider/src/tokens.ts
+++ b/packages/data-provider/src/tokens.ts
@@ -8,19 +8,19 @@ import { EModelEndpoint } from './schemas';
 
 const openAIModels: Record<string, number> = {
   'o4-mini': 200000,
-  'o3-mini': 195000,
+  'o3-mini': 195000, // -5000 from max
   o3: 200000,
-  o1: 195000,
-  'o1-mini': 127500,
-  'o1-preview': 127500,
-  'gpt-4': 8187,
-  'gpt-4-0613': 8187,
-  'gpt-4-32k': 32758,
-  'gpt-4-32k-0314': 32758,
-  'gpt-4-32k-0613': 32758,
-  'gpt-4-1106': 127500,
-  'gpt-4-0125': 127500,
-  'gpt-4.5': 127500,
+  o1: 195000, // -5000 from max
+  'o1-mini': 127500, // -500 from max
+  'o1-preview': 127500, // -500 from max
+  'gpt-4': 8187, // -5 from max
+  'gpt-4-0613': 8187, // -5 from max
+  'gpt-4-32k': 32758, // -10 from max
+  'gpt-4-32k-0314': 32758, // -10 from max
+  'gpt-4-32k-0613': 32758, // -10 from max
+  'gpt-4-1106': 127500, // -500 from max
+  'gpt-4-0125': 127500, // -500 from max
+  'gpt-4.5': 127500, // -500 from max
   'gpt-4.1': 1047576,
   'gpt-4.1-mini': 1047576,
   'gpt-4.1-nano': 1047576,
@@ -28,25 +28,25 @@ const openAIModels: Record<string, number> = {
   'gpt-5-mini': 400000,
   'gpt-5-nano': 400000,
   'gpt-5-pro': 400000,
-  'gpt-4o': 127500,
-  'gpt-4o-mini': 127500,
-  'gpt-4o-2024-05-13': 127500,
-  'gpt-4-turbo': 127500,
-  'gpt-4-vision': 127500,
-  'gpt-3.5-turbo': 16375,
-  'gpt-3.5-turbo-0613': 4092,
-  'gpt-3.5-turbo-0301': 4092,
-  'gpt-3.5-turbo-16k': 16375,
-  'gpt-3.5-turbo-16k-0613': 16375,
-  'gpt-3.5-turbo-1106': 16375,
-  'gpt-3.5-turbo-0125': 16375,
+  'gpt-4o': 127500, // -500 from max
+  'gpt-4o-mini': 127500, // -500 from max
+  'gpt-4o-2024-05-13': 127500, // -500 from max
+  'gpt-4-turbo': 127500, // -500 from max
+  'gpt-4-vision': 127500, // -500 from max
+  'gpt-3.5-turbo': 16375, // -10 from max
+  'gpt-3.5-turbo-0613': 4092, // -5 from max
+  'gpt-3.5-turbo-0301': 4092, // -5 from max
+  'gpt-3.5-turbo-16k': 16375, // -10 from max
+  'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
+  'gpt-3.5-turbo-1106': 16375, // -10 from max
+  'gpt-3.5-turbo-0125': 16375, // -10 from max
 };
 
 const mistralModels: Record<string, number> = {
-  'mistral-': 31990,
-  'mistral-7b': 31990,
-  'mistral-small': 31990,
-  'mixtral-8x7b': 31990,
+  'mistral-': 31990, // -10 from max
+  'mistral-7b': 31990, // -10 from max
+  'mistral-small': 31990, // -10 from max
+  'mixtral-8x7b': 31990, // -10 from max
   'mixtral-8x22b': 65536,
   'mistral-large': 131000,
   'mistral-large-2402': 127500,
@@ -60,25 +60,26 @@ const mistralModels: Record<string, number> = {
 };
 
 const cohereModels: Record<string, number> = {
-  'command-light': 4086,
-  'command-light-nightly': 8182,
-  command: 4086,
-  'command-nightly': 8182,
-  'command-text': 4086,
-  'command-r': 127500,
-  'command-r-plus': 127500,
+  'command-light': 4086, // -10 from max
+  'command-light-nightly': 8182, // -10 from max
+  command: 4086, // -10 from max
+  'command-nightly': 8182, // -10 from max
+  'command-text': 4086, // -10 from max
+  'command-r': 127500, // -500 from max
+  'command-r-plus': 127500, // -500 from max
 };
 
 const googleModels: Record<string, number> = {
+  /* Max I/O is combined so we subtract the amount from max response tokens for actual total */
   gemma: 8196,
   'gemma-2': 32768,
   'gemma-3': 32768,
   'gemma-3-27b': 131072,
-  gemini: 30720,
+  gemini: 30720, // -2048 from max
   'gemini-pro-vision': 12288,
   'gemini-exp': 2000000,
-  'gemini-3': 1000000,
-  'gemini-2.5': 1000000,
+  'gemini-3': 1000000, // 1M input tokens, 64k output tokens
+  'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
   'gemini-2.5-pro': 1000000,
   'gemini-2.5-flash': 1000000,
   'gemini-2.5-flash-lite': 1000000,
@@ -88,12 +89,14 @@ const googleModels: Record<string, number> = {
   'gemini-1.5': 1000000,
   'gemini-1.5-flash': 1000000,
   'gemini-1.5-flash-8b': 1000000,
-  'text-bison-32k': 32758,
-  'chat-bison-32k': 32758,
-  'code-bison-32k': 32758,
+  'text-bison-32k': 32758, // -10 from max
+  'chat-bison-32k': 32758, // -10 from max
+  'code-bison-32k': 32758, // -10 from max
   'codechat-bison-32k': 32758,
+  /* Codey, -5 from max: 6144 */
   'code-': 6139,
   'codechat-': 6139,
+  /* PaLM2, -5 from max: 8192 */
   'text-': 8187,
   'chat-': 8187,
 };
@@ -132,19 +135,28 @@ const deepseekModels: Record<string, number> = {
 };
 
 const metaModels: Record<string, number> = {
+  // Basic patterns
   llama3: 8000,
   llama2: 4000,
   'llama-3': 8000,
   'llama-2': 4000,
+
+  // llama3.x pattern
   'llama3.1': 127500,
   'llama3.2': 127500,
   'llama3.3': 127500,
+
+  // llama3-x pattern
   'llama3-1': 127500,
   'llama3-2': 127500,
   'llama3-3': 127500,
+
+  // llama-3.x pattern
   'llama-3.1': 127500,
   'llama-3.2': 127500,
   'llama-3.3': 127500,
+
+  // llama3.x:Nb pattern
   'llama3.1:405b': 127500,
   'llama3.1:70b': 127500,
   'llama3.1:8b': 127500,
@@ -153,6 +165,8 @@ const metaModels: Record<string, number> = {
   'llama3.2:11b': 127500,
   'llama3.2:90b': 127500,
   'llama3.3:70b': 127500,
+
+  // llama3-x-Nb pattern
   'llama3-1-405b': 127500,
   'llama3-1-70b': 127500,
   'llama3-1-8b': 127500,
@@ -161,6 +175,8 @@ const metaModels: Record<string, number> = {
   'llama3-2-11b': 127500,
   'llama3-2-90b': 127500,
   'llama3-3-70b': 127500,
+
+  // llama-3.x-Nb pattern
   'llama-3.1-405b': 127500,
   'llama-3.1-70b': 127500,
   'llama-3.1-8b': 127500,
@@ -169,6 +185,8 @@ const metaModels: Record<string, number> = {
   'llama-3.2-11b': 127500,
   'llama-3.2-90b': 127500,
   'llama-3.3-70b': 127500,
+
+  // Original llama2/3 patterns
   'llama3-70b': 8000,
   'llama3-8b': 8000,
   'llama2-70b': 4000,
@@ -185,16 +203,19 @@ const qwenModels: Record<string, number> = {
   'qwen-plus': 131000,
   'qwen-max': 32000,
   'qwq-32b': 32000,
-  qwen3: 40960,
+  // Qwen3 models
+  qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
   'qwen3-8b': 128000,
   'qwen3-14b': 40960,
   'qwen3-30b-a3b': 40960,
   'qwen3-32b': 40960,
   'qwen3-235b-a22b': 40960,
+  // Qwen3 VL (Vision-Language) models
   'qwen3-vl-8b-thinking': 256000,
   'qwen3-vl-8b-instruct': 262144,
   'qwen3-vl-30b-a3b': 262144,
   'qwen3-vl-235b-a22b': 131072,
+  // Qwen3 specialized models
   'qwen3-max': 256000,
   'qwen3-coder': 262144,
   'qwen3-coder-30b-a3b': 262144,
@@ -204,19 +225,22 @@ const qwenModels: Record<string, number> = {
 };
 
 const ai21Models: Record<string, number> = {
-  'j2-mid': 8182,
-  'j2-ultra': 8182,
-  'jamba-instruct': 255500,
+  'j2-mid': 8182, // -10 from max
+  'j2-ultra': 8182, // -10 from max
+  'jamba-instruct': 255500, // -500 from max
 };
 
 const amazonModels: Record<string, number> = {
+  // Amazon Titan models
   'titan-text-lite': 4000,
   'titan-text-express': 8000,
-  'titan-text-premier': 31500,
-  'nova-micro': 127000,
-  'nova-lite': 295000,
-  'nova-pro': 295000,
-  'nova-premier': 995000,
+  'titan-text-premier': 31500, // -500 from max
+  // Amazon Nova models
+  // https://aws.amazon.com/ai/generative-ai/nova/
+  'nova-micro': 127000, // -1000 from max
+  'nova-lite': 295000, // -5000 from max
+  'nova-pro': 295000, // -5000 from max
+  'nova-premier': 995000, // -5000 from max
 };
 
 const bedrockModels: Record<string, number> = {
@@ -243,10 +267,10 @@ const xAIModels: Record<string, number> = {
   'grok-3-fast': 131072,
   'grok-3-mini': 131072,
   'grok-3-mini-fast': 131072,
-  'grok-4': 256000,
-  'grok-4-fast': 2000000,
-  'grok-4-1-fast': 2000000,
-  'grok-code-fast': 256000,
+  'grok-4': 256000, // 256K context
+  'grok-4-fast': 2000000, // 2M context
+  'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
+  'grok-code-fast': 256000, // 256K context
 };
 
 const aggregateModels: Record<string, number> = {
@@ -255,12 +279,15 @@ const aggregateModels: Record<string, number> = {
   ...bedrockModels,
   ...xAIModels,
   ...qwenModels,
+  // misc.
   kimi: 131000,
+  // GPT-OSS
   'gpt-oss': 131000,
   'gpt-oss:20b': 131000,
   'gpt-oss-20b': 131000,
   'gpt-oss:120b': 131000,
   'gpt-oss-120b': 131000,
+  // GLM models (Zhipu AI)
   glm4: 128000,
   'glm-4': 128000,
   'glm-4-32b': 128000,

From 0452594b000006adc25566f72458f447d65059f4 Mon Sep 17 00:00:00 2001
From: Marco Beretta <81851188+berry-13@users.noreply.github.com>
Date: Mon, 15 Dec 2025 16:53:23 +0100
Subject: [PATCH 04/12] refactor: Consolidate imports from
 librechat-data-provider in tokens files

---
 api/models/tx.spec.js            |  3 +--
 api/utils/tokens.spec.js         |  4 +---
 packages/api/src/utils/tokens.ts | 22 +---------------------
 3 files changed, 3 insertions(+), 26 deletions(-)

diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js
index 18030abb21..cef300a81f 100644
--- a/api/models/tx.spec.js
+++ b/api/models/tx.spec.js
@@ -1,5 +1,4 @@
-const { maxTokensMap } = require('@librechat/api');
-const { EModelEndpoint } = require('librechat-data-provider');
+const { EModelEndpoint, maxTokensMap } = require('librechat-data-provider');
 const {
   defaultRate,
   tokenValues,
diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js
index 3336a0f82d..37495b347b 100644
--- a/api/utils/tokens.spec.js
+++ b/api/utils/tokens.spec.js
@@ -1,10 +1,8 @@
-const { EModelEndpoint } = require('librechat-data-provider');
+const { EModelEndpoint, maxTokensMap, maxOutputTokensMap } = require('librechat-data-provider');
 const {
-  maxTokensMap,
   matchModelName,
   processModelData,
   getModelMaxTokens,
-  maxOutputTokensMap,
   findMatchingPattern,
 } = require('@librechat/api');
 
diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts
index d75936350f..571bc93052 100644
--- a/packages/api/src/utils/tokens.ts
+++ b/packages/api/src/utils/tokens.ts
@@ -1,27 +1,7 @@
 import z from 'zod';
-import {
-  EModelEndpoint,
-  maxTokensMap,
-  maxOutputTokensMap,
-  TOKEN_DEFAULTS,
-  findMatchingPattern as findMatchingPatternSimple,
-  getModelMaxTokens as getModelMaxTokensSimple,
-  getModelMaxOutputTokens as getModelMaxOutputTokensSimple,
-  matchModelName as matchModelNameSimple,
-} from 'librechat-data-provider';
+import { EModelEndpoint, maxTokensMap, maxOutputTokensMap } from 'librechat-data-provider';
 import type { EndpointTokenConfig, TokenConfig } from '~/types';
 
-// Re-export from data-provider for backwards compatibility
-export { maxTokensMap, maxOutputTokensMap, TOKEN_DEFAULTS };
-
-// Re-export simple versions (for use without EndpointTokenConfig)
-export {
-  findMatchingPatternSimple,
-  getModelMaxTokensSimple,
-  getModelMaxOutputTokensSimple,
-  matchModelNameSimple,
-};
-
 /**
  * Finds the first matching pattern in the tokens map.
  * @param {string} modelName

From e0610b0d141b249f70f3ce443ee8209928584f5b Mon Sep 17 00:00:00 2001
From: Marco Beretta <81851188+berry-13@users.noreply.github.com>
Date: Mon, 15 Dec 2025 17:15:58 +0100
Subject: [PATCH 05/12] =?UTF-8?q?=E2=99=BF=20feat:=20Enhance=20TokenUsageI?=
 =?UTF-8?q?ndicator=20accessibility=20with=20detailed=20aria=20labels?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Chat/Input/TokenUsageIndicator.tsx         | 18 +++++++++++++-----
 client/src/locales/en/translation.json         |  3 ++-
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/client/src/components/Chat/Input/TokenUsageIndicator.tsx b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
index d22fad8cef..d99c1bcf94 100644
--- a/client/src/components/Chat/Input/TokenUsageIndicator.tsx
+++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
@@ -23,7 +23,7 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() {
 
   // Ring calculations
   const size = 28;
-  const strokeWidth = 2.5;
+  const strokeWidth = 3.5;
   const radius = (size - strokeWidth) / 2;
   const circumference = 2 * Math.PI * radius;
   const offset = circumference - (percentage / 100) * circumference;
@@ -40,10 +40,18 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() {
       });
 
   const ariaLabel = hasMaxContext
-    ? localize('com_ui_token_usage_aria', { 0: Math.round(percentage).toString() })
-    : localize('com_ui_token_usage_indicator');
+    ? localize('com_ui_token_usage_aria_full', {
+        0: formatTokens(inputTokens),
+        1: formatTokens(outputTokens),
+        2: formatTokens(maxContext),
+        3: Math.round(percentage).toString(),
+      })
+    : localize('com_ui_token_usage_aria_no_max', {
+        0: formatTokens(inputTokens),
+        1: formatTokens(outputTokens),
+      });
 
-  // Color based on percentage (using raw colors to match existing patterns in AudioRecorder.tsx)
+  // Color based on percentage
   const getProgressColor = () => {
     if (!hasMaxContext) {
       return 'stroke-text-secondary';
@@ -80,7 +88,7 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() {
               r={radius}
               fill="transparent"
               strokeWidth={strokeWidth}
-              className="stroke-border-medium"
+              className="stroke-border-heavy"
             />
             {/* Progress ring */}
             <circle
diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json
index 4379ad6904..9a15b24253 100644
--- a/client/src/locales/en/translation.json
+++ b/client/src/locales/en/translation.json
@@ -1322,7 +1322,8 @@
   "com_ui_token": "token",
   "com_ui_token_exchange_method": "Token Exchange Method",
   "com_ui_token_url": "Token URL",
-  "com_ui_token_usage_aria": "Context window {{0}}% used",
+  "com_ui_token_usage_aria_full": "Token usage: {{0}} input, {{1}} output, {{2}} max context, {{3}}% used",
+  "com_ui_token_usage_aria_no_max": "Token usage: {{0}} input, {{1}} output",
   "com_ui_token_usage_indicator": "Token usage indicator",
   "com_ui_token_usage_no_max": "Input: {{0}} | Output: {{1}} | Max: N/A",
   "com_ui_token_usage_with_max": "Input: {{0}} | Output: {{1}} | Max: {{2}}",

From 0420ed2e68e049eb1d637c1b1cc5221d1df5429a Mon Sep 17 00:00:00 2001
From: Marco Beretta <81851188+berry-13@users.noreply.github.com>
Date: Mon, 15 Dec 2025 17:56:50 +0100
Subject: [PATCH 06/12] =?UTF-8?q?=E2=9C=A8=20feat:=20Enhance=20token=20usa?=
 =?UTF-8?q?ge=20computation=20with=20improved=20message=20handling=20and?=
 =?UTF-8?q?=20context=20management?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 api/app/clients/BaseClient.js           |  1 -
 api/server/controllers/agents/client.js |  2 ++
 client/src/hooks/useTokenUsage.ts       | 31 ++++++++++++++++++++-----
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js
index 126efcc385..10efcea417 100644
--- a/api/app/clients/BaseClient.js
+++ b/api/app/clients/BaseClient.js
@@ -806,7 +806,6 @@ class BaseClient {
       user,
     );
     this.savedMessageIds.add(responseMessage.messageId);
-    delete responseMessage.tokenCount;
     return responseMessage;
   }
 
diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js
index faf3c58399..209333d074 100644
--- a/api/server/controllers/agents/client.js
+++ b/api/server/controllers/agents/client.js
@@ -240,6 +240,8 @@ class AgentClient extends BaseClient {
       Object.assign(
         {
           endpoint: this.options.endpoint,
+          endpointType: this.options.endpointType,
+          model: this.options.agent?.model_parameters?.model,
           agent_id: this.options.agent.id,
           modelLabel: this.options.modelLabel,
           maxContextTokens: this.options.maxContextTokens,
diff --git a/client/src/hooks/useTokenUsage.ts b/client/src/hooks/useTokenUsage.ts
index 80b6e6144f..2dce4f87b2 100644
--- a/client/src/hooks/useTokenUsage.ts
+++ b/client/src/hooks/useTokenUsage.ts
@@ -1,7 +1,8 @@
 import { useEffect, useMemo } from 'react';
+import { useParams } from 'react-router-dom';
 import { useSetAtom, useAtomValue } from 'jotai';
-import type { TMessage } from 'librechat-data-provider';
 import { getModelMaxTokens } from 'librechat-data-provider';
+import type { TMessage } from 'librechat-data-provider';
 import { tokenUsageAtom, type TokenUsage } from '~/store/tokenUsage';
 import { useGetMessagesByConvoId } from '~/data-provider';
 import { useChatContext } from '~/Providers';
@@ -14,19 +15,37 @@ export function useTokenUsageComputation() {
   const { conversation } = useChatContext();
   const conversationId = conversation?.conversationId ?? '';
   const setTokenUsage = useSetAtom(tokenUsageAtom);
+  const { conversationId: paramId } = useParams();
+
+  // Determine the query key to use - same logic as useChatHelpers
+  const queryParam = paramId === 'new' ? paramId : conversationId || paramId || '';
 
   // Use the query hook to get reactive messages
-  const { data: messages } = useGetMessagesByConvoId(conversationId, {
-    enabled: !!conversationId && conversationId !== 'new',
+  // Subscribe to both the paramId-based key and conversationId-based key
+  const { data: messages } = useGetMessagesByConvoId(queryParam, {
+    enabled: !!queryParam,
   });
 
+  // Also subscribe to the actual conversationId if different from queryParam
+  // This ensures we get updates when conversation transitions from 'new' to actual ID
+  const { data: messagesById } = useGetMessagesByConvoId(conversationId, {
+    enabled: !!conversationId && conversationId !== 'new' && conversationId !== queryParam,
+  });
+
+  // Use whichever has more messages (handles transition from new -> actual ID)
+  const effectiveMessages = useMemo(() => {
+    const msgArray = messages ?? [];
+    const msgByIdArray = messagesById ?? [];
+    return msgByIdArray.length > msgArray.length ? msgByIdArray : msgArray;
+  }, [messages, messagesById]);
+
   // Compute token usage whenever messages change
   const tokenData = useMemo(() => {
     let inputTokens = 0;
     let outputTokens = 0;
 
-    if (messages && Array.isArray(messages)) {
-      for (const msg of messages as TMessage[]) {
+    if (effectiveMessages && Array.isArray(effectiveMessages)) {
+      for (const msg of effectiveMessages as TMessage[]) {
         const count = msg.tokenCount ?? 0;
         if (msg.isCreatedByUser) {
           inputTokens += count;
@@ -54,7 +73,7 @@ export function useTokenUsageComputation() {
       maxContext,
     };
   }, [
-    messages,
+    effectiveMessages,
     conversation?.maxContextTokens,
     conversation?.model,
     conversation?.endpoint,

From 963ad6e29c0f57b66cfa37789856a54130d9581f Mon Sep 17 00:00:00 2001
From: Marco Beretta <81851188+berry-13@users.noreply.github.com>
Date: Mon, 15 Dec 2025 22:04:32 +0100
Subject: [PATCH 07/12] feat: improve token usage progress visualization with
 hover card and detailed breakdown

---
 .../Chat/Input/TokenUsageIndicator.tsx        | 176 +++++++++++++++---
 client/src/locales/en/translation.json        |   6 +
 2 files changed, 161 insertions(+), 21 deletions(-)

diff --git a/client/src/components/Chat/Input/TokenUsageIndicator.tsx b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
index d99c1bcf94..a70f356d03 100644
--- a/client/src/components/Chat/Input/TokenUsageIndicator.tsx
+++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
@@ -1,5 +1,5 @@
 import { memo } from 'react';
-import { TooltipAnchor } from '@librechat/client';
+import { HoverCard, HoverCardTrigger, HoverCardContent, HoverCardPortal } from '@librechat/client';
 import { useLocalize, useTokenUsage } from '~/hooks';
 import { cn } from '~/utils';
 
@@ -13,6 +13,147 @@ function formatTokens(n: number): string {
   return n.toString();
 }
 
+interface ProgressBarProps {
+  value: number;
+  max: number;
+  colorClass: string;
+  showPercentage?: boolean;
+}
+
+function ProgressBar({ value, max, colorClass, showPercentage = false }: ProgressBarProps) {
+  const percentage = max > 0 ? Math.min((value / max) * 100, 100) : 0;
+
+  return (
+    <div className="flex items-center gap-2">
+      <div className="h-2 flex-1 overflow-hidden rounded-full bg-surface-tertiary">
+        <div
+          className={cn('h-full rounded-full transition-all duration-300', colorClass)}
+          style={{ width: `${percentage}%` }}
+        />
+      </div>
+      {showPercentage && (
+        <span className="min-w-[3rem] text-right text-xs text-text-secondary">
+          {Math.round(percentage)}%
+        </span>
+      )}
+    </div>
+  );
+}
+
+interface TokenRowProps {
+  label: string;
+  value: number;
+  total: number;
+  colorClass: string;
+}
+
+function TokenRow({ label, value, total, colorClass }: TokenRowProps) {
+  const percentage = total > 0 ? Math.round((value / total) * 100) : 0;
+
+  return (
+    <div className="space-y-1">
+      <div className="flex items-center justify-between text-sm">
+        <span className="text-text-secondary">{label}</span>
+        <span className="font-medium text-text-primary">
+          {formatTokens(value)}
+          <span className="ml-1 text-xs text-text-tertiary">({percentage}%)</span>
+        </span>
+      </div>
+      <ProgressBar value={value} max={total} colorClass={colorClass} />
+    </div>
+  );
+}
+
+function TokenUsageContent() {
+  const localize = useLocalize();
+  const { inputTokens, outputTokens, maxContext } = useTokenUsage();
+
+  const totalUsed = inputTokens + outputTokens;
+  const hasMaxContext = maxContext !== null && maxContext > 0;
+  const percentage = hasMaxContext ? Math.min((totalUsed / maxContext) * 100, 100) : 0;
+
+  const getMainProgressColor = () => {
+    if (!hasMaxContext) {
+      return 'bg-text-secondary';
+    }
+    if (percentage > 90) {
+      return 'bg-red-500';
+    }
+    if (percentage > 75) {
+      return 'bg-yellow-500';
+    }
+    return 'bg-green-500';
+  };
+
+  return (
+    <div className="w-full space-y-3">
+      {/* Header */}
+      <div className="flex items-center justify-between">
+        <span className="text-sm font-medium text-text-primary">
+          {localize('com_ui_token_usage_context')}
+        </span>
+        {hasMaxContext && (
+          <span
+            className={cn('text-xs font-medium', {
+              'text-red-500': percentage > 90,
+              'text-yellow-500': percentage > 75 && percentage <= 90,
+              'text-green-500': percentage <= 75,
+            })}
+          >
+            {localize('com_ui_token_usage_percent', { 0: Math.round(percentage).toString() })}
+          </span>
+        )}
+      </div>
+
+      {/* Main Progress Bar */}
+      {hasMaxContext && (
+        <div className="space-y-1">
+          <ProgressBar value={totalUsed} max={maxContext} colorClass={getMainProgressColor()} />
+          <div className="flex justify-between text-xs text-text-tertiary">
+            <span>{formatTokens(totalUsed)}</span>
+            <span>{formatTokens(maxContext)}</span>
+          </div>
+        </div>
+      )}
+
+      {/* Divider */}
+      <div className="border-t border-border-light" />
+
+      {/* Input/Output Breakdown */}
+      <div className="space-y-3">
+        <TokenRow
+          label={localize('com_ui_token_usage_input')}
+          value={inputTokens}
+          total={totalUsed}
+          colorClass="bg-blue-500"
+        />
+        <TokenRow
+          label={localize('com_ui_token_usage_output')}
+          value={outputTokens}
+          total={totalUsed}
+          colorClass="bg-green-500"
+        />
+      </div>
+
+      {/* Total Section */}
+      <div className="border-t border-border-light pt-2">
+        <div className="flex items-center justify-between text-sm">
+          <span className="text-text-secondary">{localize('com_ui_token_usage_total')}</span>
+          <span className="font-medium text-text-primary">{formatTokens(totalUsed)}</span>
+        </div>
+      </div>
+
+      {/* Max Context (when available) */}
+      {hasMaxContext && (
+        <div className="flex items-center justify-between text-sm">
+          <span className="text-text-secondary">{localize('com_ui_token_usage_max_context')}</span>
+          <span className="font-medium text-text-primary">{formatTokens(maxContext)}</span>
+        </div>
+      )}
+    </div>
+  );
+}
+
 const TokenUsageIndicator = memo(function TokenUsageIndicator() {
   const localize = useLocalize();
   const { inputTokens, outputTokens, maxContext } = useTokenUsage();
@@ -28,17 +169,6 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() {
   const circumference = 2 * Math.PI * radius;
   const offset = circumference - (percentage / 100) * circumference;
 
-  const tooltipText = hasMaxContext
-    ? localize('com_ui_token_usage_with_max', {
-        0: formatTokens(inputTokens),
-        1: formatTokens(outputTokens),
-        2: formatTokens(maxContext),
-      })
-    : localize('com_ui_token_usage_no_max', {
-        0: formatTokens(inputTokens),
-        1: formatTokens(outputTokens),
-      });
-
   const ariaLabel = hasMaxContext
     ? localize('com_ui_token_usage_aria_full', {
         0: formatTokens(inputTokens),
@@ -66,12 +196,11 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() {
   };
 
   return (
-    <TooltipAnchor
-      description={tooltipText}
-      render={
-        <div
-          className="flex size-9 items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover"
-          role="img"
+    <HoverCard openDelay={200} closeDelay={100}>
+      <HoverCardTrigger asChild>
+        <button
+          type="button"
+          className="flex size-9 items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring-primary"
           aria-label={ariaLabel}
         >
           <svg
@@ -103,9 +232,14 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() {
               className={cn('transition-all duration-300', getProgressColor())}
             />
           </svg>
-        </div>
-      }
-    />
+        </button>
+      </HoverCardTrigger>
+      <HoverCardPortal>
+        <HoverCardContent side="top" align="end" className="p-3">
+          <TokenUsageContent />
+        </HoverCardContent>
+      </HoverCardPortal>
+    </HoverCard>
   );
 });
 
diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json
index 9a15b24253..4efe902309 100644
--- a/client/src/locales/en/translation.json
+++ b/client/src/locales/en/translation.json
@@ -1324,8 +1324,14 @@
   "com_ui_token_url": "Token URL",
   "com_ui_token_usage_aria_full": "Token usage: {{0}} input, {{1}} output, {{2}} max context, {{3}}% used",
   "com_ui_token_usage_aria_no_max": "Token usage: {{0}} input, {{1}} output",
+  "com_ui_token_usage_context": "Context Usage",
   "com_ui_token_usage_indicator": "Token usage indicator",
+  "com_ui_token_usage_input": "Input",
+  "com_ui_token_usage_max_context": "Max Context",
   "com_ui_token_usage_no_max": "Input: {{0}} | Output: {{1}} | Max: N/A",
+  "com_ui_token_usage_output": "Output",
+  "com_ui_token_usage_percent": "{{0}}% used",
+  "com_ui_token_usage_total": "Total",
   "com_ui_token_usage_with_max": "Input: {{0}} | Output: {{1}} | Max: {{2}}",
   "com_ui_tokens": "tokens",
   "com_ui_tool_collection_prefix": "A collection of tools from",

From 29b4d6300541541a84527f856fdcba79f4f4b15a Mon Sep 17 00:00:00 2001
From: Marco Beretta <81851188+berry-13@users.noreply.github.com>
Date: Mon, 15 Dec 2025 22:16:39 +0100
Subject: [PATCH 08/12] fix: improve ProgressBar and TokenRow a11y

---
 .../Chat/Input/TokenUsageIndicator.tsx          | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/client/src/components/Chat/Input/TokenUsageIndicator.tsx b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
index a70f356d03..72562044d9 100644
--- a/client/src/components/Chat/Input/TokenUsageIndicator.tsx
+++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
@@ -25,11 +25,14 @@ function ProgressBar({ value, max, colorClass, showPercentage = false }: Progres
 
   return (
     <div className="flex items-center gap-2">
-      <div className="h-2 flex-1 overflow-hidden rounded-full bg-surface-tertiary">
-        <div
-          className={cn('h-full rounded-full transition-all duration-300', colorClass)}
-          style={{ width: `${percentage}%` }}
-        />
+      <div className="h-2 flex-1 overflow-hidden rounded-full bg-surface-secondary">
+        <div className="flex h-full rounded-full">
+          <div
+            className={cn('rounded-full transition-all duration-300', colorClass)}
+            style={{ width: `${percentage}%` }}
+          />
+          <div className="flex-1 bg-surface-hover" />
+        </div>
       </div>
       {showPercentage && (
         <span className="min-w-[3rem] text-right text-xs text-text-secondary">
@@ -56,7 +59,7 @@ function TokenRow({ label, value, total, colorClass }: TokenRowProps) {
         <span className="text-text-secondary">{label}</span>
         <span className="font-medium text-text-primary">
           {formatTokens(value)}
-          <span className="ml-1 text-xs text-text-tertiary">({percentage}%)</span>
+          <span className="ml-1 text-xs text-text-secondary">({percentage}%)</span>
         </span>
       </div>
       <ProgressBar value={value} max={total} colorClass={colorClass} />
@@ -109,7 +112,7 @@ function TokenUsageContent() {
       {hasMaxContext && (
         <div className="space-y-1">
           <ProgressBar value={totalUsed} max={maxContext} colorClass={getMainProgressColor()} />
-          <div className="flex justify-between text-xs text-text-tertiary">
+          <div className="flex justify-between text-xs text-text-secondary">
             <span>{formatTokens(totalUsed)}</span>
             <span>{formatTokens(maxContext)}</span>
           </div>

From 0b84a255365d18fb8b5ce06caf2aa261a9466a1b Mon Sep 17 00:00:00 2001
From: Marco Beretta <81851188+berry-13@users.noreply.github.com>
Date: Mon, 15 Dec 2025 23:31:14 +0100
Subject: [PATCH 09/12] chore: remove unused i18n strings

---
 client/src/locales/en/translation.json | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json
index 4efe902309..187cd94d48 100644
--- a/client/src/locales/en/translation.json
+++ b/client/src/locales/en/translation.json
@@ -1325,14 +1325,11 @@
   "com_ui_token_usage_aria_full": "Token usage: {{0}} input, {{1}} output, {{2}} max context, {{3}}% used",
   "com_ui_token_usage_aria_no_max": "Token usage: {{0}} input, {{1}} output",
   "com_ui_token_usage_context": "Context Usage",
-  "com_ui_token_usage_indicator": "Token usage indicator",
   "com_ui_token_usage_input": "Input",
   "com_ui_token_usage_max_context": "Max Context",
-  "com_ui_token_usage_no_max": "Input: {{0}} | Output: {{1}} | Max: N/A",
   "com_ui_token_usage_output": "Output",
   "com_ui_token_usage_percent": "{{0}}% used",
   "com_ui_token_usage_total": "Total",
-  "com_ui_token_usage_with_max": "Input: {{0}} | Output: {{1}} | Max: {{2}}",
   "com_ui_tokens": "tokens",
   "com_ui_tool_collection_prefix": "A collection of tools from",
   "com_ui_tool_list_collapse": "Collapse {{serverName}} tool list",

From 01ca9b1655a751f9130d3ec725c61d3d88fb5616 Mon Sep 17 00:00:00 2001
From: Marco Beretta <81851188+berry-13@users.noreply.github.com>
Date: Mon, 15 Dec 2025 23:55:43 +0100
Subject: [PATCH 10/12] feat: enhance token formatting and reset logic for new
 conversations

---
 .../src/components/Chat/Input/TokenUsageIndicator.tsx |  4 ++--
 client/src/hooks/useTokenUsage.ts                     | 11 +++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/client/src/components/Chat/Input/TokenUsageIndicator.tsx b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
index 72562044d9..f4a847ef06 100644
--- a/client/src/components/Chat/Input/TokenUsageIndicator.tsx
+++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
@@ -5,10 +5,10 @@ import { cn } from '~/utils';
 
 function formatTokens(n: number): string {
   if (n >= 1000000) {
-    return `${(n / 1000000).toFixed(1)}M`;
+    return `${(n / 1000000).toFixed(1).replace(/\.0$/, '')}M`;
   }
   if (n >= 1000) {
-    return `${(n / 1000).toFixed(1)}K`;
+    return `${(n / 1000).toFixed(1).replace(/\.0$/, '')}K`;
   }
   return n.toString();
 }
diff --git a/client/src/hooks/useTokenUsage.ts b/client/src/hooks/useTokenUsage.ts
index 2dce4f87b2..3141c9c454 100644
--- a/client/src/hooks/useTokenUsage.ts
+++ b/client/src/hooks/useTokenUsage.ts
@@ -84,6 +84,17 @@ export function useTokenUsageComputation() {
   useEffect(() => {
     setTokenUsage(tokenData);
   }, [tokenData, setTokenUsage]);
+
+  // Reset token usage when starting a new conversation
+  useEffect(() => {
+    if (paramId === 'new' && effectiveMessages.length === 0) {
+      setTokenUsage({
+        inputTokens: 0,
+        outputTokens: 0,
+        maxContext: null,
+      });
+    }
+  }, [paramId, effectiveMessages.length, setTokenUsage]);
 }
 
 /**

From 71b94cdcaac6fe4a9a579eb8ac84094a9bd71f5a Mon Sep 17 00:00:00 2001
From: Marco Beretta <81851188+berry-13@users.noreply.github.com>
Date: Mon, 15 Dec 2025 23:56:34 +0100
Subject: [PATCH 11/12] feat: enhance accessibility in TokenUsageIndicator

---
 .../Chat/Input/TokenUsageIndicator.tsx        | 75 ++++++++++++-------
 1 file changed, 46 insertions(+), 29 deletions(-)

diff --git a/client/src/components/Chat/Input/TokenUsageIndicator.tsx b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
index f4a847ef06..1e8658bfdd 100644
--- a/client/src/components/Chat/Input/TokenUsageIndicator.tsx
+++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
@@ -17,15 +17,23 @@ interface ProgressBarProps {
   value: number;
   max: number;
   colorClass: string;
+  label: string;
   showPercentage?: boolean;
 }
 
-function ProgressBar({ value, max, colorClass, showPercentage = false }: ProgressBarProps) {
+function ProgressBar({ value, max, colorClass, label, showPercentage = false }: ProgressBarProps) {
   const percentage = max > 0 ? Math.min((value / max) * 100, 100) : 0;
 
   return (
     <div className="flex items-center gap-2">
-      <div className="h-2 flex-1 overflow-hidden rounded-full bg-surface-secondary">
+      <div
+        role="progressbar"
+        aria-valuenow={Math.round(percentage)}
+        aria-valuemin={0}
+        aria-valuemax={100}
+        aria-label={label}
+        className="h-2 flex-1 overflow-hidden rounded-full bg-surface-secondary"
+      >
         <div className="flex h-full rounded-full">
           <div
             className={cn('rounded-full transition-all duration-300', colorClass)}
@@ -35,7 +43,7 @@ function ProgressBar({ value, max, colorClass, showPercentage = false }: Progres
         </div>
       </div>
       {showPercentage && (
-        <span className="min-w-[3rem] text-right text-xs text-text-secondary">
+        <span className="min-w-[3rem] text-right text-xs text-text-secondary" aria-hidden="true">
           {Math.round(percentage)}%
         </span>
       )}
@@ -48,9 +56,10 @@ interface TokenRowProps {
   value: number;
   total: number;
   colorClass: string;
+  ariaLabel: string;
 }
 
-function TokenRow({ label, value, total, colorClass }: TokenRowProps) {
+function TokenRow({ label, value, total, colorClass, ariaLabel }: TokenRowProps) {
   const percentage = total > 0 ? Math.round((value / total) * 100) : 0;
 
   return (
@@ -59,10 +68,12 @@ function TokenRow({ label, value, total, colorClass }: TokenRowProps) {
         <span className="text-text-secondary">{label}</span>
         <span className="font-medium text-text-primary">
           {formatTokens(value)}
-          <span className="ml-1 text-xs text-text-secondary">({percentage}%)</span>
+          <span className="ml-1 text-xs text-text-secondary" aria-hidden="true">
+            ({percentage}%)
+          </span>
         </span>
       </div>
-      <ProgressBar value={value} max={total} colorClass={colorClass} />
+      <ProgressBar value={value} max={total} colorClass={colorClass} label={ariaLabel} />
     </div>
   );
 }
@@ -88,11 +99,18 @@ function TokenUsageContent() {
     return 'bg-green-500';
   };
 
+  const inputPercentage = totalUsed > 0 ? Math.round((inputTokens / totalUsed) * 100) : 0;
+  const outputPercentage = totalUsed > 0 ? Math.round((outputTokens / totalUsed) * 100) : 0;
+
   return (
-    <div className="w-full space-y-3">
+    <div
+      className="w-full space-y-3"
+      role="region"
+      aria-label={localize('com_ui_token_usage_context')}
+    >
       {/* Header */}
       <div className="flex items-center justify-between">
-        <span className="text-sm font-medium text-text-primary">
+        <span className="text-sm font-medium text-text-primary" id="token-usage-title">
           {localize('com_ui_token_usage_context')}
         </span>
         {hasMaxContext && (
@@ -111,8 +129,13 @@ function TokenUsageContent() {
       {/* Main Progress Bar */}
       {hasMaxContext && (
         <div className="space-y-1">
-          <ProgressBar value={totalUsed} max={maxContext} colorClass={getMainProgressColor()} />
-          <div className="flex justify-between text-xs text-text-secondary">
+          <ProgressBar
+            value={totalUsed}
+            max={maxContext}
+            colorClass={getMainProgressColor()}
+            label={`${localize('com_ui_token_usage_context')}: ${formatTokens(totalUsed)} of ${formatTokens(maxContext)}, ${Math.round(percentage)}%`}
+          />
+          <div className="flex justify-between text-xs text-text-secondary" aria-hidden="true">
             <span>{formatTokens(totalUsed)}</span>
             <span>{formatTokens(maxContext)}</span>
           </div>
@@ -120,7 +143,7 @@ function TokenUsageContent() {
       )}
 
       {/* Divider */}
-      <div className="border-t border-border-light" />
+      <div className="border-t border-border-light" role="separator" />
 
       {/* Input/Output Breakdown */}
       <div className="space-y-3">
@@ -129,30 +152,16 @@ function TokenUsageContent() {
           value={inputTokens}
           total={totalUsed}
           colorClass="bg-blue-500"
+          ariaLabel={`${localize('com_ui_token_usage_input')}: ${formatTokens(inputTokens)}, ${inputPercentage}% of total`}
         />
         <TokenRow
           label={localize('com_ui_token_usage_output')}
           value={outputTokens}
           total={totalUsed}
           colorClass="bg-green-500"
+          ariaLabel={`${localize('com_ui_token_usage_output')}: ${formatTokens(outputTokens)}, ${outputPercentage}% of total`}
         />
       </div>
-
-      {/* Total Section */}
-      <div className="border-t border-border-light pt-2">
-        <div className="flex items-center justify-between text-sm">
-          <span className="text-text-secondary">{localize('com_ui_token_usage_total')}</span>
-          <span className="font-medium text-text-primary">{formatTokens(totalUsed)}</span>
-        </div>
-      </div>
-
-      {/* Max Context (when available) */}
-      {hasMaxContext && (
-        <div className="flex items-center justify-between text-sm">
-          <span className="text-text-secondary">{localize('com_ui_token_usage_max_context')}</span>
-          <span className="font-medium text-text-primary">{formatTokens(maxContext)}</span>
-        </div>
-      )}
     </div>
   );
 }
@@ -203,8 +212,9 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() {
       <HoverCardTrigger asChild>
         <button
           type="button"
-          className="flex size-9 items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring-primary"
+          className="flex size-9 items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring"
           aria-label={ariaLabel}
+          aria-haspopup="dialog"
         >
           <svg
             width={size}
@@ -212,6 +222,7 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() {
             viewBox={`0 0 ${size} ${size}`}
             className="rotate-[-90deg]"
             aria-hidden="true"
+            focusable="false"
           >
             {/* Background ring */}
             <circle
@@ -238,7 +249,13 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() {
         </button>
       </HoverCardTrigger>
       <HoverCardPortal>
-        <HoverCardContent side="top" align="end" className="p-3">
+        <HoverCardContent
+          side="top"
+          align="end"
+          className="p-3"
+          role="dialog"
+          aria-label={localize('com_ui_token_usage_context')}
+        >
           <TokenUsageContent />
         </HoverCardContent>
       </HoverCardPortal>

From 9c61d73076052f7eeee16652d25736eff760fe33 Mon Sep 17 00:00:00 2001
From: Marco Beretta <81851188+berry-13@users.noreply.github.com>
Date: Tue, 16 Dec 2025 00:20:57 +0100
Subject: [PATCH 12/12] feat: enhance token usage visualization and matching
 logic in TokenUsageIndicator and tokens module

---
 .../Chat/Input/TokenUsageIndicator.tsx        |  86 +++++-----
 packages/data-provider/specs/tokens.spec.ts   | 152 ++++++++++++++++++
 packages/data-provider/src/tokens.ts          |   7 +-
 3 files changed, 206 insertions(+), 39 deletions(-)
 create mode 100644 packages/data-provider/specs/tokens.spec.ts

diff --git a/client/src/components/Chat/Input/TokenUsageIndicator.tsx b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
index 1e8658bfdd..3008244d56 100644
--- a/client/src/components/Chat/Input/TokenUsageIndicator.tsx
+++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx
@@ -4,13 +4,10 @@ import { useLocalize, useTokenUsage } from '~/hooks';
 import { cn } from '~/utils';
 
 function formatTokens(n: number): string {
-  if (n >= 1000000) {
-    return `${(n / 1000000).toFixed(1).replace(/\.0$/, '')}M`;
-  }
-  if (n >= 1000) {
-    return `${(n / 1000).toFixed(1).replace(/\.0$/, '')}K`;
-  }
-  return n.toString();
+  return new Intl.NumberFormat(undefined, {
+    notation: 'compact',
+    maximumFractionDigits: 1,
+  }).format(n);
 }
 
 interface ProgressBarProps {
@@ -19,30 +16,48 @@ interface ProgressBarProps {
   colorClass: string;
   label: string;
   showPercentage?: boolean;
+  indeterminate?: boolean;
 }
 
-function ProgressBar({ value, max, colorClass, label, showPercentage = false }: ProgressBarProps) {
+function ProgressBar({
+  value,
+  max,
+  colorClass,
+  label,
+  showPercentage = false,
+  indeterminate = false,
+}: ProgressBarProps) {
   const percentage = max > 0 ? Math.min((value / max) * 100, 100) : 0;
 
   return (
     <div className="flex items-center gap-2">
       <div
         role="progressbar"
-        aria-valuenow={Math.round(percentage)}
+        aria-valuenow={indeterminate ? undefined : Math.round(percentage)}
         aria-valuemin={0}
         aria-valuemax={100}
         aria-label={label}
         className="h-2 flex-1 overflow-hidden rounded-full bg-surface-secondary"
       >
-        <div className="flex h-full rounded-full">
+        {indeterminate ? (
           <div
-            className={cn('rounded-full transition-all duration-300', colorClass)}
-            style={{ width: `${percentage}%` }}
+            className="h-full w-full rounded-full"
+            style={{
+              background:
+                'repeating-linear-gradient(-45deg, var(--border-medium), var(--border-medium) 4px, var(--surface-tertiary) 4px, var(--surface-tertiary) 8px)',
+            }}
           />
-          <div className="flex-1 bg-surface-hover" />
-        </div>
+        ) : (
+          <div className="flex h-full rounded-full">
+            <div
+              className={cn('rounded-full transition-all duration-300', colorClass)}
+              style={{ width: `${percentage}%` }}
+            />
+            <div className="flex-1 bg-surface-hover" />
+          </div>
+        )}
       </div>
-      {showPercentage && (
+      {showPercentage && !indeterminate && (
         <span className="min-w-[3rem] text-right text-xs text-text-secondary" aria-hidden="true">
           {Math.round(percentage)}%
         </span>
@@ -80,7 +95,7 @@ function TokenRow({ label, value, total, colorClass, ariaLabel }: TokenRowProps)
 
 function TokenUsageContent() {
   const localize = useLocalize();
-  const { inputTokens, outputTokens, maxContext } = useTokenUsage();
+  const { inputTokens = 0, outputTokens = 0, maxContext = null } = useTokenUsage() ?? {};
 
   const totalUsed = inputTokens + outputTokens;
   const hasMaxContext = maxContext !== null && maxContext > 0;
@@ -127,20 +142,23 @@ function TokenUsageContent() {
       </div>
 
       {/* Main Progress Bar */}
-      {hasMaxContext && (
-        <div className="space-y-1">
-          <ProgressBar
-            value={totalUsed}
-            max={maxContext}
-            colorClass={getMainProgressColor()}
-            label={`${localize('com_ui_token_usage_context')}: ${formatTokens(totalUsed)} of ${formatTokens(maxContext)}, ${Math.round(percentage)}%`}
-          />
-          <div className="flex justify-between text-xs text-text-secondary" aria-hidden="true">
-            <span>{formatTokens(totalUsed)}</span>
-            <span>{formatTokens(maxContext)}</span>
-          </div>
+      <div className="space-y-1">
+        <ProgressBar
+          value={totalUsed}
+          max={hasMaxContext ? maxContext : 0}
+          colorClass={getMainProgressColor()}
+          label={
+            hasMaxContext
+              ? `${localize('com_ui_token_usage_context')}: ${formatTokens(totalUsed)} of ${formatTokens(maxContext)}, ${Math.round(percentage)}%`
+              : `${localize('com_ui_token_usage_context')}: ${formatTokens(totalUsed)} tokens used, max context unknown`
+          }
+          indeterminate={!hasMaxContext}
+        />
+        <div className="flex justify-between text-xs text-text-secondary" aria-hidden="true">
+          <span>{formatTokens(totalUsed)}</span>
+          <span>{hasMaxContext ? formatTokens(maxContext) : 'N/A'}</span>
         </div>
-      )}
+      </div>
 
       {/* Divider */}
       <div className="border-t border-border-light" role="separator" />
@@ -168,7 +186,7 @@ function TokenUsageContent() {
 
 const TokenUsageIndicator = memo(function TokenUsageIndicator() {
   const localize = useLocalize();
-  const { inputTokens, outputTokens, maxContext } = useTokenUsage();
+  const { inputTokens = 0, outputTokens = 0, maxContext = null } = useTokenUsage() ?? {};
 
   const totalUsed = inputTokens + outputTokens;
   const hasMaxContext = maxContext !== null && maxContext > 0;
@@ -249,13 +267,7 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() {
         </button>
       </HoverCardTrigger>
       <HoverCardPortal>
-        <HoverCardContent
-          side="top"
-          align="end"
-          className="p-3"
-          role="dialog"
-          aria-label={localize('com_ui_token_usage_context')}
-        >
+        <HoverCardContent side="top" align="end" className="p-3">
           <TokenUsageContent />
         </HoverCardContent>
       </HoverCardPortal>
diff --git a/packages/data-provider/specs/tokens.spec.ts b/packages/data-provider/specs/tokens.spec.ts
new file mode 100644
index 0000000000..37eeecbea6
--- /dev/null
+++ b/packages/data-provider/specs/tokens.spec.ts
@@ -0,0 +1,152 @@
+import {
+  findMatchingPattern,
+  getModelMaxTokens,
+  getModelMaxOutputTokens,
+  matchModelName,
+  maxTokensMap,
+} from '../src/tokens';
+import { EModelEndpoint } from '../src/schemas';
+
+describe('Token Pattern Matching', () => {
+  describe('findMatchingPattern', () => {
+    const testMap: Record<string, number> = {
+      'claude-': 100000,
+      'claude-3': 200000,
+      'claude-3-opus': 200000,
+      'gpt-4': 8000,
+      'gpt-4-turbo': 128000,
+    };
+
+    it('should match exact model names', () => {
+      expect(findMatchingPattern('claude-3-opus', testMap)).toBe('claude-3-opus');
+      expect(findMatchingPattern('gpt-4-turbo', testMap)).toBe('gpt-4-turbo');
+    });
+
+    it('should match more specific patterns first (reverse order)', () => {
+      // claude-3-opus-20240229 should match 'claude-3-opus' not 'claude-3' or 'claude-'
+      expect(findMatchingPattern('claude-3-opus-20240229', testMap)).toBe('claude-3-opus');
+    });
+
+    it('should fall back to broader patterns when no specific match', () => {
+      // claude-3-haiku should match 'claude-3' (not 'claude-3-opus')
+      expect(findMatchingPattern('claude-3-haiku', testMap)).toBe('claude-3');
+    });
+
+    it('should be case-insensitive', () => {
+      expect(findMatchingPattern('Claude-3-Opus', testMap)).toBe('claude-3-opus');
+      expect(findMatchingPattern('GPT-4-TURBO', testMap)).toBe('gpt-4-turbo');
+    });
+
+    it('should return null for unmatched models', () => {
+      expect(findMatchingPattern('unknown-model', testMap)).toBeNull();
+      expect(findMatchingPattern('llama-2', testMap)).toBeNull();
+    });
+
+    it('should NOT match when pattern appears in middle of model name (startsWith behavior)', () => {
+      // This is the key fix: "my-claude-wrapper" should NOT match "claude-"
+      expect(findMatchingPattern('my-claude-wrapper', testMap)).toBeNull();
+      expect(findMatchingPattern('openai-gpt-4-proxy', testMap)).toBeNull();
+      expect(findMatchingPattern('custom-claude-3-service', testMap)).toBeNull();
+    });
+
+    it('should handle empty string model name', () => {
+      expect(findMatchingPattern('', testMap)).toBeNull();
+    });
+
+    it('should handle empty tokens map', () => {
+      expect(findMatchingPattern('claude-3', {})).toBeNull();
+    });
+  });
+
+  describe('getModelMaxTokens', () => {
+    it('should return exact match tokens', () => {
+      expect(getModelMaxTokens('gpt-4o', EModelEndpoint.openAI)).toBe(127500);
+      expect(getModelMaxTokens('claude-3-opus', EModelEndpoint.anthropic)).toBe(200000);
+    });
+
+    it('should return pattern-matched tokens', () => {
+      // claude-3-opus-20240229 should match claude-3-opus pattern
+      expect(getModelMaxTokens('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(200000);
+    });
+
+    it('should return undefined for unknown models', () => {
+      expect(getModelMaxTokens('completely-unknown-model', EModelEndpoint.openAI)).toBeUndefined();
+    });
+
+    it('should fall back to openAI for unknown endpoints', () => {
+      const result = getModelMaxTokens('gpt-4o', 'unknown-endpoint');
+      expect(result).toBe(127500);
+    });
+
+    it('should handle non-string input gracefully', () => {
+      expect(getModelMaxTokens(null as unknown as string)).toBeUndefined();
+      expect(getModelMaxTokens(undefined as unknown as string)).toBeUndefined();
+      expect(getModelMaxTokens(123 as unknown as string)).toBeUndefined();
+    });
+
+    it('should NOT match model names with pattern in middle', () => {
+      // A model like "my-gpt-4-wrapper" should not match "gpt-4"
+      expect(getModelMaxTokens('my-gpt-4-wrapper', EModelEndpoint.openAI)).toBeUndefined();
+    });
+  });
+
+  describe('getModelMaxOutputTokens', () => {
+    it('should return exact match output tokens', () => {
+      expect(getModelMaxOutputTokens('o1', EModelEndpoint.openAI)).toBe(32268);
+      expect(getModelMaxOutputTokens('claude-3-opus', EModelEndpoint.anthropic)).toBe(4096);
+    });
+
+    it('should return pattern-matched output tokens', () => {
+      expect(getModelMaxOutputTokens('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(
+        4096,
+      );
+    });
+
+    it('should return system_default for unknown models (openAI endpoint)', () => {
+      expect(getModelMaxOutputTokens('unknown-model', EModelEndpoint.openAI)).toBe(32000);
+    });
+
+    it('should handle non-string input gracefully', () => {
+      expect(getModelMaxOutputTokens(null as unknown as string)).toBeUndefined();
+      expect(getModelMaxOutputTokens(undefined as unknown as string)).toBeUndefined();
+    });
+  });
+
+  describe('matchModelName', () => {
+    it('should return exact match model name', () => {
+      expect(matchModelName('gpt-4o', EModelEndpoint.openAI)).toBe('gpt-4o');
+    });
+
+    it('should return pattern key for pattern matches', () => {
+      expect(matchModelName('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(
+        'claude-3-opus',
+      );
+    });
+
+    it('should return input for unknown models', () => {
+      expect(matchModelName('unknown-model', EModelEndpoint.openAI)).toBe('unknown-model');
+    });
+
+    it('should handle non-string input gracefully', () => {
+      expect(matchModelName(null as unknown as string)).toBeUndefined();
+    });
+  });
+
+  describe('maxTokensMap structure', () => {
+    it('should have entries for all major endpoints', () => {
+      expect(maxTokensMap[EModelEndpoint.openAI]).toBeDefined();
+      expect(maxTokensMap[EModelEndpoint.anthropic]).toBeDefined();
+      expect(maxTokensMap[EModelEndpoint.google]).toBeDefined();
+      expect(maxTokensMap[EModelEndpoint.azureOpenAI]).toBeDefined();
+      expect(maxTokensMap[EModelEndpoint.bedrock]).toBeDefined();
+    });
+
+    it('should have positive token values', () => {
+      Object.values(maxTokensMap).forEach((endpointMap) => {
+        Object.entries(endpointMap).forEach(([model, tokens]) => {
+          expect(tokens).toBeGreaterThan(0);
+        });
+      });
+    });
+  });
+});
diff --git a/packages/data-provider/src/tokens.ts b/packages/data-provider/src/tokens.ts
index c5bbbb233b..f5c6d6eedc 100644
--- a/packages/data-provider/src/tokens.ts
+++ b/packages/data-provider/src/tokens.ts
@@ -313,6 +313,10 @@ export const maxTokensMap: Record<string, Record<string, number>> = {
 /**
  * Finds the first matching pattern in the tokens map.
  * Searches in reverse order to match more specific patterns first.
+ *
+ * Note: This relies on the insertion order of keys in the tokensMap object.
+ * More specific patterns must be defined later in the object to be matched first.
+ * If the order of keys is changed, the matching behavior may be affected.
  */
 export function findMatchingPattern(
   modelName: string,
@@ -322,7 +326,7 @@ export function findMatchingPattern(
   const lowerModelName = modelName.toLowerCase();
   for (let i = keys.length - 1; i >= 0; i--) {
     const modelKey = keys[i];
-    if (lowerModelName.includes(modelKey)) {
+    if (lowerModelName.startsWith(modelKey)) {
       return modelKey;
     }
   }
@@ -510,7 +514,6 @@ export function getModelMaxOutputTokens(
 
 /**
  * Centralized token-related default values.
- * These replace hardcoded magic numbers throughout the codebase.
  */
 export const TOKEN_DEFAULTS = {
   /** Fallback context window for agents when model lookup fails */