From 841a37e8cbf2dba093b4f9e5966f6987888e15fb Mon Sep 17 00:00:00 2001 From: Marco Beretta <81851188+berry-13@users.noreply.github.com> Date: Sun, 14 Dec 2025 00:36:45 +0100 Subject: [PATCH 01/12] =?UTF-8?q?=E2=9C=A8=20feat:=20Add=20token=20usage?= =?UTF-8?q?=20indicator=20to=20chat=20input=20Add=20TokenUsageIndicator=20?= =?UTF-8?q?component=20with=20circular=20progress=20ring=20Create=20useTok?= =?UTF-8?q?enUsage=20hook=20with=20Jotai=20atom=20for=20state=20Add=20mode?= =?UTF-8?q?l=20context=20window=20lookups=20to=20data-provider=20Consolida?= =?UTF-8?q?te=20token=20utilities=20(output=20limits,=20TOKEN=5FDEFAULTS)?= =?UTF-8?q?=20Display=20input/output=20tokens=20and=20percentage=20of=20co?= =?UTF-8?q?ntext=20used?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/clients/specs/FakeClient.js | 5 +- client/src/components/Chat/Input/ChatForm.tsx | 4 + .../Chat/Input/TokenUsageIndicator.tsx | 87 +++ client/src/hooks/index.ts | 1 + client/src/hooks/useTokenUsage.ts | 77 +++ client/src/store/tokenUsage.ts | 13 + packages/api/src/agents/initialize.ts | 9 +- packages/api/src/utils/tokens.ts | 362 +------------ packages/data-provider/src/index.ts | 2 + packages/data-provider/src/schemas.ts | 1 + packages/data-provider/src/tokens.ts | 497 ++++++++++++++++++ 11 files changed, 710 insertions(+), 348 deletions(-) create mode 100644 client/src/components/Chat/Input/TokenUsageIndicator.tsx create mode 100644 client/src/hooks/useTokenUsage.ts create mode 100644 client/src/store/tokenUsage.ts create mode 100644 packages/data-provider/src/tokens.ts diff --git a/api/app/clients/specs/FakeClient.js b/api/app/clients/specs/FakeClient.js index d1d07a967d..58480b4018 100644 --- a/api/app/clients/specs/FakeClient.js +++ b/api/app/clients/specs/FakeClient.js @@ -1,4 +1,5 @@ const { getModelMaxTokens } = require('@librechat/api'); +const { TOKEN_DEFAULTS } = require('librechat-data-provider'); const BaseClient = require('../BaseClient'); class FakeClient extends BaseClient { @@ -41,7 +42,9 @@ class FakeClient extends BaseClient { } this.maxContextTokens = - this.options.maxContextTokens ?? getModelMaxTokens(this.modelOptions.model) ?? 4097; + this.options.maxContextTokens ?? + getModelMaxTokens(this.modelOptions.model) ?? + TOKEN_DEFAULTS.LEGACY_CONTEXT_FALLBACK; } buildMessages() {} getTokenCount(str) { diff --git a/client/src/components/Chat/Input/ChatForm.tsx b/client/src/components/Chat/Input/ChatForm.tsx index 8cccf6cf53..9012a734bd 100644 --- a/client/src/components/Chat/Input/ChatForm.tsx +++ b/client/src/components/Chat/Input/ChatForm.tsx @@ -18,7 +18,9 @@ import { useQueryParams, useSubmitMessage, useFocusChatEffect, + useTokenUsageComputation, } from '~/hooks'; +import TokenUsageIndicator from './TokenUsageIndicator'; import { mainTextareaId, BadgeItem } from '~/common'; import AttachFileChat from './Files/AttachFileChat'; import FileFormChat from './Files/FileFormChat'; @@ -39,6 +41,7 @@ const ChatForm = memo(({ index = 0 }: { index?: number }) => { const submitButtonRef = useRef(null); const textAreaRef = useRef(null); useFocusChatEffect(textAreaRef); + useTokenUsageComputation(); const localize = useLocalize(); const [isCollapsed, setIsCollapsed] = useState(false); @@ -332,6 +335,7 @@ const ChatForm = memo(({ index = 0 }: { index?: number }) => { } />
+ {SpeechToText && ( = 1000000) { + return `${(n / 1000000).toFixed(1)}M`; + } + if (n >= 1000) { + return `${(n / 1000).toFixed(1)}K`; + } + return n.toString(); +} + +const TokenUsageIndicator = memo(function TokenUsageIndicator() { + const { inputTokens, outputTokens, maxContext } = useTokenUsage(); + + const totalUsed = inputTokens + outputTokens; + const hasMaxContext = maxContext !== null && maxContext > 0; + const percentage = hasMaxContext ? Math.min((totalUsed / maxContext) * 100, 100) : 0; + + // Ring calculations + const size = 28; + const strokeWidth = 2.5; + const radius = (size - strokeWidth) / 2; + const circumference = 2 * Math.PI * radius; + const offset = circumference - (percentage / 100) * circumference; + + const tooltipText = hasMaxContext + ? `Input: ${formatTokens(inputTokens)} | Output: ${formatTokens(outputTokens)} | Max: ${formatTokens(maxContext)}` + : `Input: ${formatTokens(inputTokens)} | Output: ${formatTokens(outputTokens)} | Max: N/A`; + + // Color based on percentage + const getProgressColor = () => { + if (!hasMaxContext) { + return 'stroke-text-secondary'; + } + if (percentage > 90) { + return 'stroke-red-500'; + } + if (percentage > 75) { + return 'stroke-yellow-500'; + } + return 'stroke-green-500'; + }; + + return ( + + + {/* Background ring */} + + {/* Progress ring */} + + +
+ } + /> + ); +}); + +export default TokenUsageIndicator; diff --git a/client/src/hooks/index.ts b/client/src/hooks/index.ts index f8e23a95e1..62e09a5b17 100644 --- a/client/src/hooks/index.ts +++ b/client/src/hooks/index.ts @@ -35,3 +35,4 @@ export { default as useTextToSpeech } from './Input/useTextToSpeech'; export { default as useGenerationsByLatest } from './useGenerationsByLatest'; export { default as useLocalizedConfig } from './useLocalizedConfig'; export { default as useResourcePermissions } from './useResourcePermissions'; +export { default as useTokenUsage, useTokenUsageComputation } from './useTokenUsage'; diff --git a/client/src/hooks/useTokenUsage.ts b/client/src/hooks/useTokenUsage.ts new file mode 100644 index 0000000000..80b6e6144f --- /dev/null +++ b/client/src/hooks/useTokenUsage.ts @@ -0,0 +1,77 @@ +import { useEffect, useMemo } from 'react'; +import { useSetAtom, useAtomValue } from 'jotai'; +import type { TMessage } from 'librechat-data-provider'; +import { getModelMaxTokens } from 'librechat-data-provider'; +import { tokenUsageAtom, type TokenUsage } from '~/store/tokenUsage'; +import { useGetMessagesByConvoId } from '~/data-provider'; +import { useChatContext } from '~/Providers'; + +/** + * Hook to compute and update token usage from conversation messages. + * Should be called in a component that has access to useChatContext. + */ +export function useTokenUsageComputation() { + const { conversation } = useChatContext(); + const conversationId = conversation?.conversationId ?? ''; + const setTokenUsage = useSetAtom(tokenUsageAtom); + + // Use the query hook to get reactive messages + const { data: messages } = useGetMessagesByConvoId(conversationId, { + enabled: !!conversationId && conversationId !== 'new', + }); + + // Compute token usage whenever messages change + const tokenData = useMemo(() => { + let inputTokens = 0; + let outputTokens = 0; + + if (messages && Array.isArray(messages)) { + for (const msg of messages as TMessage[]) { + const count = msg.tokenCount ?? 0; + if (msg.isCreatedByUser) { + inputTokens += count; + } else { + outputTokens += count; + } + } + } + + // Determine max context: explicit setting or model default + let maxContext: number | null = conversation?.maxContextTokens ?? null; + + // If no explicit maxContextTokens, try to look up model default + if (maxContext === null && conversation?.model) { + const endpoint = conversation.endpointType ?? conversation.endpoint ?? ''; + const modelDefault = getModelMaxTokens(conversation.model, endpoint); + if (modelDefault !== undefined) { + maxContext = modelDefault; + } + } + + return { + inputTokens, + outputTokens, + maxContext, + }; + }, [ + messages, + conversation?.maxContextTokens, + conversation?.model, + conversation?.endpoint, + conversation?.endpointType, + ]); + + // Update the atom when computed values change + useEffect(() => { + setTokenUsage(tokenData); + }, [tokenData, setTokenUsage]); +} + +/** + * Hook to read the current token usage values. + */ +export function useTokenUsage(): TokenUsage { + return useAtomValue(tokenUsageAtom); +} + +export default useTokenUsage; diff --git a/client/src/store/tokenUsage.ts b/client/src/store/tokenUsage.ts new file mode 100644 index 0000000000..e4965484bf --- /dev/null +++ b/client/src/store/tokenUsage.ts @@ -0,0 +1,13 @@ +import { atom } from 'jotai'; + +export type TokenUsage = { + inputTokens: number; + outputTokens: number; + maxContext: number | null; // null = N/A +}; + +export const tokenUsageAtom = atom({ + inputTokens: 0, + outputTokens: 0, + maxContext: null, +}); diff --git a/packages/api/src/agents/initialize.ts b/packages/api/src/agents/initialize.ts index a37ddf4848..d5d2bbd6e6 100644 --- a/packages/api/src/agents/initialize.ts +++ b/packages/api/src/agents/initialize.ts @@ -7,6 +7,7 @@ import { isAgentsEndpoint, replaceSpecialVars, providerEndpointMap, + TOKEN_DEFAULTS, } from 'librechat-data-provider'; import type { AgentToolResources, @@ -240,7 +241,7 @@ export async function initializeAgent( providerEndpointMap[provider as keyof typeof providerEndpointMap], options.endpointTokenConfig, ), - 18000, + TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK, ); if ( @@ -293,7 +294,7 @@ export async function initializeAgent( agent.additional_instructions = artifactsPromptResult ?? undefined; } - const agentMaxContextNum = Number(agentMaxContextTokens) || 18000; + const agentMaxContextNum = Number(agentMaxContextTokens) || TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK; const maxOutputTokensNum = Number(maxOutputTokens) || 0; const finalAttachments: IMongoFile[] = (primedAttachments ?? []) @@ -308,7 +309,9 @@ export async function initializeAgent( userMCPAuthMap, toolContextMap: toolContextMap ?? {}, useLegacyContent: !!options.useLegacyContent, - maxContextTokens: Math.round((agentMaxContextNum - maxOutputTokensNum) * 0.9), + maxContextTokens: Math.round( + (agentMaxContextNum - maxOutputTokensNum) * TOKEN_DEFAULTS.CONTEXT_SAFETY_MARGIN, + ), }; return initializedAgent; diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index 12b356c6a7..d75936350f 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -1,351 +1,25 @@ import z from 'zod'; -import { EModelEndpoint } from 'librechat-data-provider'; +import { + EModelEndpoint, + maxTokensMap, + maxOutputTokensMap, + TOKEN_DEFAULTS, + findMatchingPattern as findMatchingPatternSimple, + getModelMaxTokens as getModelMaxTokensSimple, + getModelMaxOutputTokens as getModelMaxOutputTokensSimple, + matchModelName as matchModelNameSimple, +} from 'librechat-data-provider'; import type { EndpointTokenConfig, TokenConfig } from '~/types'; -const openAIModels = { - 'o4-mini': 200000, - 'o3-mini': 195000, // -5000 from max - o3: 200000, - o1: 195000, // -5000 from max - 'o1-mini': 127500, // -500 from max - 'o1-preview': 127500, // -500 from max - 'gpt-4': 8187, // -5 from max - 'gpt-4-0613': 8187, // -5 from max - 'gpt-4-32k': 32758, // -10 from max - 'gpt-4-32k-0314': 32758, // -10 from max - 'gpt-4-32k-0613': 32758, // -10 from max - 'gpt-4-1106': 127500, // -500 from max - 'gpt-4-0125': 127500, // -500 from max - 'gpt-4.5': 127500, // -500 from max - 'gpt-4.1': 1047576, - 'gpt-4.1-mini': 1047576, - 'gpt-4.1-nano': 1047576, - 'gpt-5': 400000, - 'gpt-5-mini': 400000, - 'gpt-5-nano': 400000, - 'gpt-5-pro': 400000, - 'gpt-4o': 127500, // -500 from max - 'gpt-4o-mini': 127500, // -500 from max - 'gpt-4o-2024-05-13': 127500, // -500 from max - 'gpt-4-turbo': 127500, // -500 from max - 'gpt-4-vision': 127500, // -500 from max - 'gpt-3.5-turbo': 16375, // -10 from max - 'gpt-3.5-turbo-0613': 4092, // -5 from max - 'gpt-3.5-turbo-0301': 4092, // -5 from max - 'gpt-3.5-turbo-16k': 16375, // -10 from max - 'gpt-3.5-turbo-16k-0613': 16375, // -10 from max - 'gpt-3.5-turbo-1106': 16375, // -10 from max - 'gpt-3.5-turbo-0125': 16375, // -10 from max -}; +// Re-export from data-provider for backwards compatibility +export { maxTokensMap, maxOutputTokensMap, TOKEN_DEFAULTS }; -const mistralModels = { - 'mistral-': 31990, // -10 from max - 'mistral-7b': 31990, // -10 from max - 'mistral-small': 31990, // -10 from max - 'mixtral-8x7b': 31990, // -10 from max - 'mixtral-8x22b': 65536, - 'mistral-large': 131000, - 'mistral-large-2402': 127500, - 'mistral-large-2407': 127500, - 'mistral-nemo': 131000, - 'pixtral-large': 131000, - 'mistral-saba': 32000, - codestral: 256000, - 'ministral-8b': 131000, - 'ministral-3b': 131000, -}; - -const cohereModels = { - 'command-light': 4086, // -10 from max - 'command-light-nightly': 8182, // -10 from max - command: 4086, // -10 from max - 'command-nightly': 8182, // -10 from max - 'command-text': 4086, // -10 from max - 'command-r': 127500, // -500 from max - 'command-r-plus': 127500, // -500 from max -}; - -const googleModels = { - /* Max I/O is combined so we subtract the amount from max response tokens for actual total */ - gemma: 8196, - 'gemma-2': 32768, - 'gemma-3': 32768, - 'gemma-3-27b': 131072, - gemini: 30720, // -2048 from max - 'gemini-pro-vision': 12288, - 'gemini-exp': 2000000, - 'gemini-3': 1000000, // 1M input tokens, 64k output tokens - 'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens - 'gemini-2.5-pro': 1000000, - 'gemini-2.5-flash': 1000000, - 'gemini-2.5-flash-lite': 1000000, - 'gemini-2.0': 2000000, - 'gemini-2.0-flash': 1000000, - 'gemini-2.0-flash-lite': 1000000, - 'gemini-1.5': 1000000, - 'gemini-1.5-flash': 1000000, - 'gemini-1.5-flash-8b': 1000000, - 'text-bison-32k': 32758, // -10 from max - 'chat-bison-32k': 32758, // -10 from max - 'code-bison-32k': 32758, // -10 from max - 'codechat-bison-32k': 32758, - /* Codey, -5 from max: 6144 */ - 'code-': 6139, - 'codechat-': 6139, - /* PaLM2, -5 from max: 8192 */ - 'text-': 8187, - 'chat-': 8187, -}; - -const anthropicModels = { - 'claude-': 100000, - 'claude-instant': 100000, - 'claude-2': 100000, - 'claude-2.1': 200000, - 'claude-3': 200000, - 'claude-3-haiku': 200000, - 'claude-3-sonnet': 200000, - 'claude-3-opus': 200000, - 'claude-3.5-haiku': 200000, - 'claude-3-5-haiku': 200000, - 'claude-3-5-sonnet': 200000, - 'claude-3.5-sonnet': 200000, - 'claude-3-7-sonnet': 200000, - 'claude-3.7-sonnet': 200000, - 'claude-3-5-sonnet-latest': 200000, - 'claude-3.5-sonnet-latest': 200000, - 'claude-haiku-4-5': 200000, - 'claude-sonnet-4': 1000000, - 'claude-4': 200000, - 'claude-opus-4': 200000, - 'claude-opus-4-5': 200000, -}; - -const deepseekModels = { - deepseek: 128000, - 'deepseek-chat': 128000, - 'deepseek-reasoner': 128000, - 'deepseek-r1': 128000, - 'deepseek-v3': 128000, - 'deepseek.r1': 128000, -}; - -const metaModels = { - // Basic patterns - llama3: 8000, - llama2: 4000, - 'llama-3': 8000, - 'llama-2': 4000, - - // llama3.x pattern - 'llama3.1': 127500, - 'llama3.2': 127500, - 'llama3.3': 127500, - - // llama3-x pattern - 'llama3-1': 127500, - 'llama3-2': 127500, - 'llama3-3': 127500, - - // llama-3.x pattern - 'llama-3.1': 127500, - 'llama-3.2': 127500, - 'llama-3.3': 127500, - - // llama3.x:Nb pattern - 'llama3.1:405b': 127500, - 'llama3.1:70b': 127500, - 'llama3.1:8b': 127500, - 'llama3.2:1b': 127500, - 'llama3.2:3b': 127500, - 'llama3.2:11b': 127500, - 'llama3.2:90b': 127500, - 'llama3.3:70b': 127500, - - // llama3-x-Nb pattern - 'llama3-1-405b': 127500, - 'llama3-1-70b': 127500, - 'llama3-1-8b': 127500, - 'llama3-2-1b': 127500, - 'llama3-2-3b': 127500, - 'llama3-2-11b': 127500, - 'llama3-2-90b': 127500, - 'llama3-3-70b': 127500, - - // llama-3.x-Nb pattern - 'llama-3.1-405b': 127500, - 'llama-3.1-70b': 127500, - 'llama-3.1-8b': 127500, - 'llama-3.2-1b': 127500, - 'llama-3.2-3b': 127500, - 'llama-3.2-11b': 127500, - 'llama-3.2-90b': 127500, - 'llama-3.3-70b': 127500, - - // Original llama2/3 patterns - 'llama3-70b': 8000, - 'llama3-8b': 8000, - 'llama2-70b': 4000, - 'llama2-13b': 4000, - 'llama3:70b': 8000, - 'llama3:8b': 8000, - 'llama2:70b': 4000, -}; - -const qwenModels = { - qwen: 32000, - 'qwen2.5': 32000, - 'qwen-turbo': 1000000, - 'qwen-plus': 131000, - 'qwen-max': 32000, - 'qwq-32b': 32000, - // Qwen3 models - qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context) - 'qwen3-8b': 128000, - 'qwen3-14b': 40960, - 'qwen3-30b-a3b': 40960, - 'qwen3-32b': 40960, - 'qwen3-235b-a22b': 40960, - // Qwen3 VL (Vision-Language) models - 'qwen3-vl-8b-thinking': 256000, - 'qwen3-vl-8b-instruct': 262144, - 'qwen3-vl-30b-a3b': 262144, - 'qwen3-vl-235b-a22b': 131072, - // Qwen3 specialized models - 'qwen3-max': 256000, - 'qwen3-coder': 262144, - 'qwen3-coder-30b-a3b': 262144, - 'qwen3-coder-plus': 128000, - 'qwen3-coder-flash': 128000, - 'qwen3-next-80b-a3b': 262144, -}; - -const ai21Models = { - 'j2-mid': 8182, // -10 from max - 'j2-ultra': 8182, // -10 from max - 'jamba-instruct': 255500, // -500 from max -}; - -const amazonModels = { - // Amazon Titan models - 'titan-text-lite': 4000, - 'titan-text-express': 8000, - 'titan-text-premier': 31500, // -500 from max - // Amazon Nova models - // https://aws.amazon.com/ai/generative-ai/nova/ - 'nova-micro': 127000, // -1000 from max - 'nova-lite': 295000, // -5000 from max - 'nova-pro': 295000, // -5000 from max - 'nova-premier': 995000, // -5000 from max -}; - -const bedrockModels = { - ...anthropicModels, - ...mistralModels, - ...cohereModels, - ...deepseekModels, - ...metaModels, - ...ai21Models, - ...amazonModels, -}; - -const xAIModels = { - grok: 131072, - 'grok-beta': 131072, - 'grok-vision-beta': 8192, - 'grok-2': 131072, - 'grok-2-latest': 131072, - 'grok-2-1212': 131072, - 'grok-2-vision': 32768, - 'grok-2-vision-latest': 32768, - 'grok-2-vision-1212': 32768, - 'grok-3': 131072, - 'grok-3-fast': 131072, - 'grok-3-mini': 131072, - 'grok-3-mini-fast': 131072, - 'grok-4': 256000, // 256K context - 'grok-4-fast': 2000000, // 2M context - 'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants) - 'grok-code-fast': 256000, // 256K context -}; - -const aggregateModels = { - ...openAIModels, - ...googleModels, - ...bedrockModels, - ...xAIModels, - ...qwenModels, - // misc. - kimi: 131000, - // GPT-OSS - 'gpt-oss': 131000, - 'gpt-oss:20b': 131000, - 'gpt-oss-20b': 131000, - 'gpt-oss:120b': 131000, - 'gpt-oss-120b': 131000, - // GLM models (Zhipu AI) - glm4: 128000, - 'glm-4': 128000, - 'glm-4-32b': 128000, - 'glm-4.5': 131000, - 'glm-4.5-air': 131000, - 'glm-4.5v': 66000, - 'glm-4.6': 200000, -}; - -export const maxTokensMap = { - [EModelEndpoint.azureOpenAI]: openAIModels, - [EModelEndpoint.openAI]: aggregateModels, - [EModelEndpoint.agents]: aggregateModels, - [EModelEndpoint.custom]: aggregateModels, - [EModelEndpoint.google]: googleModels, - [EModelEndpoint.anthropic]: anthropicModels, - [EModelEndpoint.bedrock]: bedrockModels, -}; - -export const modelMaxOutputs = { - o1: 32268, // -500 from max: 32,768 - 'o1-mini': 65136, // -500 from max: 65,536 - 'o1-preview': 32268, // -500 from max: 32,768 - 'gpt-5': 128000, - 'gpt-5-mini': 128000, - 'gpt-5-nano': 128000, - 'gpt-5-pro': 128000, - 'gpt-oss-20b': 131000, - 'gpt-oss-120b': 131000, - system_default: 32000, -}; - -/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */ -const anthropicMaxOutputs = { - 'claude-3-haiku': 4096, - 'claude-3-sonnet': 4096, - 'claude-3-opus': 4096, - 'claude-haiku-4-5': 64000, - 'claude-sonnet-4': 64000, - 'claude-opus-4': 32000, - 'claude-opus-4-5': 64000, - 'claude-3.5-sonnet': 8192, - 'claude-3-5-sonnet': 8192, - 'claude-3.7-sonnet': 128000, - 'claude-3-7-sonnet': 128000, -}; - -/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */ -const deepseekMaxOutputs = { - deepseek: 8000, // deepseek-chat default: 4K, max: 8K - 'deepseek-chat': 8000, - 'deepseek-reasoner': 64000, // default: 32K, max: 64K - 'deepseek-r1': 64000, - 'deepseek-v3': 8000, - 'deepseek.r1': 64000, -}; - -export const maxOutputTokensMap = { - [EModelEndpoint.anthropic]: anthropicMaxOutputs, - [EModelEndpoint.azureOpenAI]: modelMaxOutputs, - [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, - [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, +// Re-export simple versions (for use without EndpointTokenConfig) +export { + findMatchingPatternSimple, + getModelMaxTokensSimple, + getModelMaxOutputTokensSimple, + matchModelNameSimple, }; /** diff --git a/packages/data-provider/src/index.ts b/packages/data-provider/src/index.ts index c57ca82845..ba21ece55e 100644 --- a/packages/data-provider/src/index.ts +++ b/packages/data-provider/src/index.ts @@ -47,3 +47,5 @@ export { default as createPayload } from './createPayload'; /* feedback */ export * from './feedback'; export * from './parameterSettings'; +/* token limits */ +export * from './tokens'; diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index 7dabc549db..9d9c540c85 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -618,6 +618,7 @@ export type TMessage = z.input & { attachments?: TAttachment[]; clientTimestamp?: string; feedback?: TFeedback; + tokenCount?: number; }; export const coerceNumber = z.union([z.number(), z.string()]).transform((val) => { diff --git a/packages/data-provider/src/tokens.ts b/packages/data-provider/src/tokens.ts new file mode 100644 index 0000000000..130723232f --- /dev/null +++ b/packages/data-provider/src/tokens.ts @@ -0,0 +1,497 @@ +import { EModelEndpoint } from './schemas'; + +/** + * Model context window token limits. + * These values represent the maximum context tokens (input) for each model. + * Values are slightly reduced from actual max to leave room for output tokens. + */ + +const openAIModels: Record = { + 'o4-mini': 200000, + 'o3-mini': 195000, + o3: 200000, + o1: 195000, + 'o1-mini': 127500, + 'o1-preview': 127500, + 'gpt-4': 8187, + 'gpt-4-0613': 8187, + 'gpt-4-32k': 32758, + 'gpt-4-32k-0314': 32758, + 'gpt-4-32k-0613': 32758, + 'gpt-4-1106': 127500, + 'gpt-4-0125': 127500, + 'gpt-4.5': 127500, + 'gpt-4.1': 1047576, + 'gpt-4.1-mini': 1047576, + 'gpt-4.1-nano': 1047576, + 'gpt-5': 400000, + 'gpt-5-mini': 400000, + 'gpt-5-nano': 400000, + 'gpt-5-pro': 400000, + 'gpt-4o': 127500, + 'gpt-4o-mini': 127500, + 'gpt-4o-2024-05-13': 127500, + 'gpt-4-turbo': 127500, + 'gpt-4-vision': 127500, + 'gpt-3.5-turbo': 16375, + 'gpt-3.5-turbo-0613': 4092, + 'gpt-3.5-turbo-0301': 4092, + 'gpt-3.5-turbo-16k': 16375, + 'gpt-3.5-turbo-16k-0613': 16375, + 'gpt-3.5-turbo-1106': 16375, + 'gpt-3.5-turbo-0125': 16375, +}; + +const mistralModels: Record = { + 'mistral-': 31990, + 'mistral-7b': 31990, + 'mistral-small': 31990, + 'mixtral-8x7b': 31990, + 'mixtral-8x22b': 65536, + 'mistral-large': 131000, + 'mistral-large-2402': 127500, + 'mistral-large-2407': 127500, + 'mistral-nemo': 131000, + 'pixtral-large': 131000, + 'mistral-saba': 32000, + codestral: 256000, + 'ministral-8b': 131000, + 'ministral-3b': 131000, +}; + +const cohereModels: Record = { + 'command-light': 4086, + 'command-light-nightly': 8182, + command: 4086, + 'command-nightly': 8182, + 'command-text': 4086, + 'command-r': 127500, + 'command-r-plus': 127500, +}; + +const googleModels: Record = { + gemma: 8196, + 'gemma-2': 32768, + 'gemma-3': 32768, + 'gemma-3-27b': 131072, + gemini: 30720, + 'gemini-pro-vision': 12288, + 'gemini-exp': 2000000, + 'gemini-3': 1000000, + 'gemini-2.5': 1000000, + 'gemini-2.5-pro': 1000000, + 'gemini-2.5-flash': 1000000, + 'gemini-2.5-flash-lite': 1000000, + 'gemini-2.0': 2000000, + 'gemini-2.0-flash': 1000000, + 'gemini-2.0-flash-lite': 1000000, + 'gemini-1.5': 1000000, + 'gemini-1.5-flash': 1000000, + 'gemini-1.5-flash-8b': 1000000, + 'text-bison-32k': 32758, + 'chat-bison-32k': 32758, + 'code-bison-32k': 32758, + 'codechat-bison-32k': 32758, + 'code-': 6139, + 'codechat-': 6139, + 'text-': 8187, + 'chat-': 8187, +}; + +const anthropicModels: Record = { + 'claude-': 100000, + 'claude-instant': 100000, + 'claude-2': 100000, + 'claude-2.1': 200000, + 'claude-3': 200000, + 'claude-3-haiku': 200000, + 'claude-3-sonnet': 200000, + 'claude-3-opus': 200000, + 'claude-3.5-haiku': 200000, + 'claude-3-5-haiku': 200000, + 'claude-3-5-sonnet': 200000, + 'claude-3.5-sonnet': 200000, + 'claude-3-7-sonnet': 200000, + 'claude-3.7-sonnet': 200000, + 'claude-3-5-sonnet-latest': 200000, + 'claude-3.5-sonnet-latest': 200000, + 'claude-haiku-4-5': 200000, + 'claude-sonnet-4': 1000000, + 'claude-4': 200000, + 'claude-opus-4': 200000, + 'claude-opus-4-5': 200000, +}; + +const deepseekModels: Record = { + deepseek: 128000, + 'deepseek-chat': 128000, + 'deepseek-reasoner': 128000, + 'deepseek-r1': 128000, + 'deepseek-v3': 128000, + 'deepseek.r1': 128000, +}; + +const metaModels: Record = { + llama3: 8000, + llama2: 4000, + 'llama-3': 8000, + 'llama-2': 4000, + 'llama3.1': 127500, + 'llama3.2': 127500, + 'llama3.3': 127500, + 'llama3-1': 127500, + 'llama3-2': 127500, + 'llama3-3': 127500, + 'llama-3.1': 127500, + 'llama-3.2': 127500, + 'llama-3.3': 127500, + 'llama3.1:405b': 127500, + 'llama3.1:70b': 127500, + 'llama3.1:8b': 127500, + 'llama3.2:1b': 127500, + 'llama3.2:3b': 127500, + 'llama3.2:11b': 127500, + 'llama3.2:90b': 127500, + 'llama3.3:70b': 127500, + 'llama3-1-405b': 127500, + 'llama3-1-70b': 127500, + 'llama3-1-8b': 127500, + 'llama3-2-1b': 127500, + 'llama3-2-3b': 127500, + 'llama3-2-11b': 127500, + 'llama3-2-90b': 127500, + 'llama3-3-70b': 127500, + 'llama-3.1-405b': 127500, + 'llama-3.1-70b': 127500, + 'llama-3.1-8b': 127500, + 'llama-3.2-1b': 127500, + 'llama-3.2-3b': 127500, + 'llama-3.2-11b': 127500, + 'llama-3.2-90b': 127500, + 'llama-3.3-70b': 127500, + 'llama3-70b': 8000, + 'llama3-8b': 8000, + 'llama2-70b': 4000, + 'llama2-13b': 4000, + 'llama3:70b': 8000, + 'llama3:8b': 8000, + 'llama2:70b': 4000, +}; + +const qwenModels: Record = { + qwen: 32000, + 'qwen2.5': 32000, + 'qwen-turbo': 1000000, + 'qwen-plus': 131000, + 'qwen-max': 32000, + 'qwq-32b': 32000, + qwen3: 40960, + 'qwen3-8b': 128000, + 'qwen3-14b': 40960, + 'qwen3-30b-a3b': 40960, + 'qwen3-32b': 40960, + 'qwen3-235b-a22b': 40960, + 'qwen3-vl-8b-thinking': 256000, + 'qwen3-vl-8b-instruct': 262144, + 'qwen3-vl-30b-a3b': 262144, + 'qwen3-vl-235b-a22b': 131072, + 'qwen3-max': 256000, + 'qwen3-coder': 262144, + 'qwen3-coder-30b-a3b': 262144, + 'qwen3-coder-plus': 128000, + 'qwen3-coder-flash': 128000, + 'qwen3-next-80b-a3b': 262144, +}; + +const ai21Models: Record = { + 'j2-mid': 8182, + 'j2-ultra': 8182, + 'jamba-instruct': 255500, +}; + +const amazonModels: Record = { + 'titan-text-lite': 4000, + 'titan-text-express': 8000, + 'titan-text-premier': 31500, + 'nova-micro': 127000, + 'nova-lite': 295000, + 'nova-pro': 295000, + 'nova-premier': 995000, +}; + +const bedrockModels: Record = { + ...anthropicModels, + ...mistralModels, + ...cohereModels, + ...deepseekModels, + ...metaModels, + ...ai21Models, + ...amazonModels, +}; + +const xAIModels: Record = { + grok: 131072, + 'grok-beta': 131072, + 'grok-vision-beta': 8192, + 'grok-2': 131072, + 'grok-2-latest': 131072, + 'grok-2-1212': 131072, + 'grok-2-vision': 32768, + 'grok-2-vision-latest': 32768, + 'grok-2-vision-1212': 32768, + 'grok-3': 131072, + 'grok-3-fast': 131072, + 'grok-3-mini': 131072, + 'grok-3-mini-fast': 131072, + 'grok-4': 256000, + 'grok-4-fast': 2000000, + 'grok-4-1-fast': 2000000, + 'grok-code-fast': 256000, +}; + +const aggregateModels: Record = { + ...openAIModels, + ...googleModels, + ...bedrockModels, + ...xAIModels, + ...qwenModels, + kimi: 131000, + 'gpt-oss': 131000, + 'gpt-oss:20b': 131000, + 'gpt-oss-20b': 131000, + 'gpt-oss:120b': 131000, + 'gpt-oss-120b': 131000, + glm4: 128000, + 'glm-4': 128000, + 'glm-4-32b': 128000, + 'glm-4.5': 131000, + 'glm-4.5-air': 131000, + 'glm-4.5v': 66000, + 'glm-4.6': 200000, +}; + +/** + * Map of endpoint to model context token limits. + */ +export const maxTokensMap: Record> = { + [EModelEndpoint.azureOpenAI]: openAIModels, + [EModelEndpoint.openAI]: aggregateModels, + [EModelEndpoint.agents]: aggregateModels, + [EModelEndpoint.custom]: aggregateModels, + [EModelEndpoint.google]: googleModels, + [EModelEndpoint.anthropic]: anthropicModels, + [EModelEndpoint.bedrock]: bedrockModels, +}; + +/** + * Finds the first matching pattern in the tokens map. + * Searches in reverse order to match more specific patterns first. + */ +export function findMatchingPattern( + modelName: string, + tokensMap: Record, +): string | null { + const keys = Object.keys(tokensMap); + const lowerModelName = modelName.toLowerCase(); + for (let i = keys.length - 1; i >= 0; i--) { + const modelKey = keys[i]; + if (lowerModelName.includes(modelKey)) { + return modelKey; + } + } + return null; +} + +/** + * Retrieves the maximum context tokens for a given model name. + * + * @param modelName - The name of the model to look up. + * @param endpoint - The endpoint (default is 'openAI'). + * @returns The maximum context tokens for the given model or undefined if no match is found. + * + * @example + * getModelMaxTokens('gpt-4o'); // Returns 127500 + * getModelMaxTokens('claude-3-opus', 'anthropic'); // Returns 200000 + * getModelMaxTokens('unknown-model'); // Returns undefined + */ +export function getModelMaxTokens( + modelName: string, + endpoint: string = EModelEndpoint.openAI, +): number | undefined { + if (typeof modelName !== 'string') { + return undefined; + } + + const tokensMap = maxTokensMap[endpoint]; + if (!tokensMap) { + // Fall back to aggregate models for unknown endpoints + return getModelMaxTokens(modelName, EModelEndpoint.openAI); + } + + // Try exact match first + if (tokensMap[modelName] !== undefined) { + return tokensMap[modelName]; + } + + // Try pattern matching + const matchedPattern = findMatchingPattern(modelName, tokensMap); + if (matchedPattern) { + return tokensMap[matchedPattern]; + } + + return undefined; +} + +/** + * Retrieves the model name key for a given model name input. + * If the exact model name isn't found, it searches for partial matches. + * + * @param modelName - The name of the model to look up. + * @param endpoint - The endpoint (default is 'openAI'). + * @returns The model name key for the given model; returns input if no match is found. + */ +export function matchModelName( + modelName: string, + endpoint: string = EModelEndpoint.openAI, +): string | undefined { + if (typeof modelName !== 'string') { + return undefined; + } + + const tokensMap = maxTokensMap[endpoint]; + if (!tokensMap) { + return modelName; + } + + if (tokensMap[modelName] !== undefined) { + return modelName; + } + + const matchedPattern = findMatchingPattern(modelName, tokensMap); + return matchedPattern || modelName; +} + +// Individual model maps are available for advanced use cases +// but not re-exported to avoid conflicts with config.ts + +// ============================================================================= +// OUTPUT TOKEN LIMITS +// ============================================================================= + +/** + * Maximum output tokens for OpenAI and similar models. + * Values from official documentation, slightly reduced to leave safety margin. + */ +const modelMaxOutputs: Record = { + o1: 32268, // -500 from max: 32,768 + 'o1-mini': 65136, // -500 from max: 65,536 + 'o1-preview': 32268, // -500 from max: 32,768 + 'gpt-5': 128000, + 'gpt-5-mini': 128000, + 'gpt-5-nano': 128000, + 'gpt-5-pro': 128000, + 'gpt-oss-20b': 131000, + 'gpt-oss-120b': 131000, + system_default: 32000, +}; + +/** + * Maximum output tokens for Anthropic Claude models. + * Values from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names + */ +const anthropicMaxOutputs: Record = { + 'claude-3-haiku': 4096, + 'claude-3-sonnet': 4096, + 'claude-3-opus': 4096, + 'claude-haiku-4-5': 64000, + 'claude-sonnet-4': 64000, + 'claude-opus-4': 32000, + 'claude-opus-4-5': 64000, + 'claude-3.5-sonnet': 8192, + 'claude-3-5-sonnet': 8192, + 'claude-3.7-sonnet': 128000, + 'claude-3-7-sonnet': 128000, +}; + +/** + * Maximum output tokens for DeepSeek models. + * Values from https://api-docs.deepseek.com/quick_start/pricing + */ +const deepseekMaxOutputs: Record = { + deepseek: 8000, // deepseek-chat default: 4K, max: 8K + 'deepseek-chat': 8000, + 'deepseek-reasoner': 64000, // default: 32K, max: 64K + 'deepseek-r1': 64000, + 'deepseek-v3': 8000, + 'deepseek.r1': 64000, +}; + +/** + * Map of endpoint to model max output token limits. + */ +export const maxOutputTokensMap: Record> = { + [EModelEndpoint.anthropic]: anthropicMaxOutputs, + [EModelEndpoint.azureOpenAI]: modelMaxOutputs, + [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, + [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, +}; + +/** + * Retrieves the maximum output tokens for a given model name. + * + * @param modelName - The name of the model to look up. + * @param endpoint - The endpoint (default is 'openAI'). + * @returns The maximum output tokens for the given model or undefined if no match is found. + * + * @example + * getModelMaxOutputTokens('o1'); // Returns 32268 + * getModelMaxOutputTokens('claude-3-opus', 'anthropic'); // Returns 4096 + * getModelMaxOutputTokens('unknown-model'); // Returns 32000 (system_default) + */ +export function getModelMaxOutputTokens( + modelName: string, + endpoint: string = EModelEndpoint.openAI, +): number | undefined { + if (typeof modelName !== 'string') { + return undefined; + } + + const tokensMap = maxOutputTokensMap[endpoint]; + if (!tokensMap) { + // Fall back to openAI for unknown endpoints + return getModelMaxOutputTokens(modelName, EModelEndpoint.openAI); + } + + // Try exact match first + if (tokensMap[modelName] !== undefined) { + return tokensMap[modelName]; + } + + // Try pattern matching + const matchedPattern = findMatchingPattern(modelName, tokensMap); + if (matchedPattern) { + return tokensMap[matchedPattern]; + } + + // Return system_default if available + return tokensMap.system_default; +} + +// ============================================================================= +// TOKEN DEFAULTS +// ============================================================================= + +/** + * Centralized token-related default values. + * These replace hardcoded magic numbers throughout the codebase. + */ +export const TOKEN_DEFAULTS = { + /** Fallback context window for agents when model lookup fails */ + AGENT_CONTEXT_FALLBACK: 18000, + /** Legacy fallback for older clients */ + LEGACY_CONTEXT_FALLBACK: 4097, + /** Safety margin multiplier (0.9 = reserve 10% for response) */ + CONTEXT_SAFETY_MARGIN: 0.9, + /** Default max output tokens when not specified */ + DEFAULT_MAX_OUTPUT: 32000, +} as const; From 8cedd5f45e160039f3b36aa3e0f96c8868caac9f Mon Sep 17 00:00:00 2001 From: Marco Beretta <81851188+berry-13@users.noreply.github.com> Date: Sun, 14 Dec 2025 02:33:10 +0100 Subject: [PATCH 02/12] =?UTF-8?q?=E2=99=BF=20fix:=20Add=20i18n=20and=20acc?= =?UTF-8?q?essibility=20to=20TokenUsageIndicator?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Chat/Input/TokenUsageIndicator.tsx | 27 +++++++++++++++---- client/src/locales/en/translation.json | 4 +++ client/src/store/index.ts | 3 +++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/client/src/components/Chat/Input/TokenUsageIndicator.tsx b/client/src/components/Chat/Input/TokenUsageIndicator.tsx index affe950d6e..d22fad8cef 100644 --- a/client/src/components/Chat/Input/TokenUsageIndicator.tsx +++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx @@ -1,6 +1,6 @@ import { memo } from 'react'; import { TooltipAnchor } from '@librechat/client'; -import { useTokenUsage } from '~/hooks'; +import { useLocalize, useTokenUsage } from '~/hooks'; import { cn } from '~/utils'; function formatTokens(n: number): string { @@ -14,6 +14,7 @@ function formatTokens(n: number): string { } const TokenUsageIndicator = memo(function TokenUsageIndicator() { + const localize = useLocalize(); const { inputTokens, outputTokens, maxContext } = useTokenUsage(); const totalUsed = inputTokens + outputTokens; @@ -28,10 +29,21 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() { const offset = circumference - (percentage / 100) * circumference; const tooltipText = hasMaxContext - ? `Input: ${formatTokens(inputTokens)} | Output: ${formatTokens(outputTokens)} | Max: ${formatTokens(maxContext)}` - : `Input: ${formatTokens(inputTokens)} | Output: ${formatTokens(outputTokens)} | Max: N/A`; + ? localize('com_ui_token_usage_with_max', { + 0: formatTokens(inputTokens), + 1: formatTokens(outputTokens), + 2: formatTokens(maxContext), + }) + : localize('com_ui_token_usage_no_max', { + 0: formatTokens(inputTokens), + 1: formatTokens(outputTokens), + }); - // Color based on percentage + const ariaLabel = hasMaxContext + ? localize('com_ui_token_usage_aria', { 0: Math.round(percentage).toString() }) + : localize('com_ui_token_usage_indicator'); + + // Color based on percentage (using raw colors to match existing patterns in AudioRecorder.tsx) const getProgressColor = () => { if (!hasMaxContext) { return 'stroke-text-secondary'; @@ -49,12 +61,17 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() { +
+
+
+
+ {showPercentage && ( + + {Math.round(percentage)}% + + )} +
+ ); +} + +interface TokenRowProps { + label: string; + value: number; + total: number; + colorClass: string; +} + +function TokenRow({ label, value, total, colorClass }: TokenRowProps) { + const percentage = total > 0 ? Math.round((value / total) * 100) : 0; + + return ( +
+
+ {label} + + {formatTokens(value)} + ({percentage}%) + +
+ +
+ ); +} + +function TokenUsageContent() { + const localize = useLocalize(); + const { inputTokens, outputTokens, maxContext } = useTokenUsage(); + + const totalUsed = inputTokens + outputTokens; + const hasMaxContext = maxContext !== null && maxContext > 0; + const percentage = hasMaxContext ? Math.min((totalUsed / maxContext) * 100, 100) : 0; + + const getMainProgressColor = () => { + if (!hasMaxContext) { + return 'bg-text-secondary'; + } + if (percentage > 90) { + return 'bg-red-500'; + } + if (percentage > 75) { + return 'bg-yellow-500'; + } + return 'bg-green-500'; + }; + + return ( +
+ {/* Header */} +
+ + {localize('com_ui_token_usage_context')} + + {hasMaxContext && ( + 90, + 'text-yellow-500': percentage > 75 && percentage <= 90, + 'text-green-500': percentage <= 75, + })} + > + {localize('com_ui_token_usage_percent', { 0: Math.round(percentage).toString() })} + + )} +
+ + {/* Main Progress Bar */} + {hasMaxContext && ( +
+ +
+ {formatTokens(totalUsed)} + {formatTokens(maxContext)} +
+
+ )} + + {/* Divider */} +
+ + {/* Input/Output Breakdown */} +
+ + +
+ + {/* Total Section */} +
+
+ {localize('com_ui_token_usage_total')} + {formatTokens(totalUsed)} +
+
+ + {/* Max Context (when available) */} + {hasMaxContext && ( +
+ {localize('com_ui_token_usage_max_context')} + {formatTokens(maxContext)} +
+ )} +
+ ); +} + const TokenUsageIndicator = memo(function TokenUsageIndicator() { const localize = useLocalize(); const { inputTokens, outputTokens, maxContext } = useTokenUsage(); @@ -28,17 +169,6 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() { const circumference = 2 * Math.PI * radius; const offset = circumference - (percentage / 100) * circumference; - const tooltipText = hasMaxContext - ? localize('com_ui_token_usage_with_max', { - 0: formatTokens(inputTokens), - 1: formatTokens(outputTokens), - 2: formatTokens(maxContext), - }) - : localize('com_ui_token_usage_no_max', { - 0: formatTokens(inputTokens), - 1: formatTokens(outputTokens), - }); - const ariaLabel = hasMaxContext ? localize('com_ui_token_usage_aria_full', { 0: formatTokens(inputTokens), @@ -66,12 +196,11 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() { }; return ( - + +
- } - /> + + + + + + + + ); }); diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index 9a15b24253..4efe902309 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -1324,8 +1324,14 @@ "com_ui_token_url": "Token URL", "com_ui_token_usage_aria_full": "Token usage: {{0}} input, {{1}} output, {{2}} max context, {{3}}% used", "com_ui_token_usage_aria_no_max": "Token usage: {{0}} input, {{1}} output", + "com_ui_token_usage_context": "Context Usage", "com_ui_token_usage_indicator": "Token usage indicator", + "com_ui_token_usage_input": "Input", + "com_ui_token_usage_max_context": "Max Context", "com_ui_token_usage_no_max": "Input: {{0}} | Output: {{1}} | Max: N/A", + "com_ui_token_usage_output": "Output", + "com_ui_token_usage_percent": "{{0}}% used", + "com_ui_token_usage_total": "Total", "com_ui_token_usage_with_max": "Input: {{0}} | Output: {{1}} | Max: {{2}}", "com_ui_tokens": "tokens", "com_ui_tool_collection_prefix": "A collection of tools from", From 29b4d6300541541a84527f856fdcba79f4f4b15a Mon Sep 17 00:00:00 2001 From: Marco Beretta <81851188+berry-13@users.noreply.github.com> Date: Mon, 15 Dec 2025 22:16:39 +0100 Subject: [PATCH 08/12] fix: improve ProgressBar and TokenRow a11y --- .../Chat/Input/TokenUsageIndicator.tsx | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/client/src/components/Chat/Input/TokenUsageIndicator.tsx b/client/src/components/Chat/Input/TokenUsageIndicator.tsx index a70f356d03..72562044d9 100644 --- a/client/src/components/Chat/Input/TokenUsageIndicator.tsx +++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx @@ -25,11 +25,14 @@ function ProgressBar({ value, max, colorClass, showPercentage = false }: Progres return (
-
-
+
+
+
+
+
{showPercentage && ( @@ -56,7 +59,7 @@ function TokenRow({ label, value, total, colorClass }: TokenRowProps) { {label} {formatTokens(value)} - ({percentage}%) + ({percentage}%)
@@ -109,7 +112,7 @@ function TokenUsageContent() { {hasMaxContext && (
-
+
{formatTokens(totalUsed)} {formatTokens(maxContext)}
From 0b84a255365d18fb8b5ce06caf2aa261a9466a1b Mon Sep 17 00:00:00 2001 From: Marco Beretta <81851188+berry-13@users.noreply.github.com> Date: Mon, 15 Dec 2025 23:31:14 +0100 Subject: [PATCH 09/12] chore: remove unused i18n strings --- client/src/locales/en/translation.json | 3 --- 1 file changed, 3 deletions(-) diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index 4efe902309..187cd94d48 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -1325,14 +1325,11 @@ "com_ui_token_usage_aria_full": "Token usage: {{0}} input, {{1}} output, {{2}} max context, {{3}}% used", "com_ui_token_usage_aria_no_max": "Token usage: {{0}} input, {{1}} output", "com_ui_token_usage_context": "Context Usage", - "com_ui_token_usage_indicator": "Token usage indicator", "com_ui_token_usage_input": "Input", "com_ui_token_usage_max_context": "Max Context", - "com_ui_token_usage_no_max": "Input: {{0}} | Output: {{1}} | Max: N/A", "com_ui_token_usage_output": "Output", "com_ui_token_usage_percent": "{{0}}% used", "com_ui_token_usage_total": "Total", - "com_ui_token_usage_with_max": "Input: {{0}} | Output: {{1}} | Max: {{2}}", "com_ui_tokens": "tokens", "com_ui_tool_collection_prefix": "A collection of tools from", "com_ui_tool_list_collapse": "Collapse {{serverName}} tool list", From 01ca9b1655a751f9130d3ec725c61d3d88fb5616 Mon Sep 17 00:00:00 2001 From: Marco Beretta <81851188+berry-13@users.noreply.github.com> Date: Mon, 15 Dec 2025 23:55:43 +0100 Subject: [PATCH 10/12] feat: enhance token formatting and reset logic for new conversations --- .../src/components/Chat/Input/TokenUsageIndicator.tsx | 4 ++-- client/src/hooks/useTokenUsage.ts | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/client/src/components/Chat/Input/TokenUsageIndicator.tsx b/client/src/components/Chat/Input/TokenUsageIndicator.tsx index 72562044d9..f4a847ef06 100644 --- a/client/src/components/Chat/Input/TokenUsageIndicator.tsx +++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx @@ -5,10 +5,10 @@ import { cn } from '~/utils'; function formatTokens(n: number): string { if (n >= 1000000) { - return `${(n / 1000000).toFixed(1)}M`; + return `${(n / 1000000).toFixed(1).replace(/\.0$/, '')}M`; } if (n >= 1000) { - return `${(n / 1000).toFixed(1)}K`; + return `${(n / 1000).toFixed(1).replace(/\.0$/, '')}K`; } return n.toString(); } diff --git a/client/src/hooks/useTokenUsage.ts b/client/src/hooks/useTokenUsage.ts index 2dce4f87b2..3141c9c454 100644 --- a/client/src/hooks/useTokenUsage.ts +++ b/client/src/hooks/useTokenUsage.ts @@ -84,6 +84,17 @@ export function useTokenUsageComputation() { useEffect(() => { setTokenUsage(tokenData); }, [tokenData, setTokenUsage]); + + // Reset token usage when starting a new conversation + useEffect(() => { + if (paramId === 'new' && effectiveMessages.length === 0) { + setTokenUsage({ + inputTokens: 0, + outputTokens: 0, + maxContext: null, + }); + } + }, [paramId, effectiveMessages.length, setTokenUsage]); } /** From 71b94cdcaac6fe4a9a579eb8ac84094a9bd71f5a Mon Sep 17 00:00:00 2001 From: Marco Beretta <81851188+berry-13@users.noreply.github.com> Date: Mon, 15 Dec 2025 23:56:34 +0100 Subject: [PATCH 11/12] feat: enhance accessibility in TokenUsageIndicator --- .../Chat/Input/TokenUsageIndicator.tsx | 75 ++++++++++++------- 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/client/src/components/Chat/Input/TokenUsageIndicator.tsx b/client/src/components/Chat/Input/TokenUsageIndicator.tsx index f4a847ef06..1e8658bfdd 100644 --- a/client/src/components/Chat/Input/TokenUsageIndicator.tsx +++ b/client/src/components/Chat/Input/TokenUsageIndicator.tsx @@ -17,15 +17,23 @@ interface ProgressBarProps { value: number; max: number; colorClass: string; + label: string; showPercentage?: boolean; } -function ProgressBar({ value, max, colorClass, showPercentage = false }: ProgressBarProps) { +function ProgressBar({ value, max, colorClass, label, showPercentage = false }: ProgressBarProps) { const percentage = max > 0 ? Math.min((value / max) * 100, 100) : 0; return (
-
+
{showPercentage && ( - + )} @@ -48,9 +56,10 @@ interface TokenRowProps { value: number; total: number; colorClass: string; + ariaLabel: string; } -function TokenRow({ label, value, total, colorClass }: TokenRowProps) { +function TokenRow({ label, value, total, colorClass, ariaLabel }: TokenRowProps) { const percentage = total > 0 ? Math.round((value / total) * 100) : 0; return ( @@ -59,10 +68,12 @@ function TokenRow({ label, value, total, colorClass }: TokenRowProps) { {label} {formatTokens(value)} - ({percentage}%) +
- +
); } @@ -88,11 +99,18 @@ function TokenUsageContent() { return 'bg-green-500'; }; + const inputPercentage = totalUsed > 0 ? Math.round((inputTokens / totalUsed) * 100) : 0; + const outputPercentage = totalUsed > 0 ? Math.round((outputTokens / totalUsed) * 100) : 0; + return ( -
+
{/* Header */}
- + {localize('com_ui_token_usage_context')} {hasMaxContext && ( @@ -111,8 +129,13 @@ function TokenUsageContent() { {/* Main Progress Bar */} {hasMaxContext && (
- -
+ + @@ -120,7 +143,7 @@ function TokenUsageContent() { )} {/* Divider */} -
+
{/* Input/Output Breakdown */}
@@ -129,30 +152,16 @@ function TokenUsageContent() { value={inputTokens} total={totalUsed} colorClass="bg-blue-500" + ariaLabel={`${localize('com_ui_token_usage_input')}: ${formatTokens(inputTokens)}, ${inputPercentage}% of total`} />
- - {/* Total Section */} -
-
- {localize('com_ui_token_usage_total')} - {formatTokens(totalUsed)} -
-
- - {/* Max Context (when available) */} - {hasMaxContext && ( -
- {localize('com_ui_token_usage_max_context')} - {formatTokens(maxContext)} -
- )}
); } @@ -203,8 +212,9 @@ const TokenUsageIndicator = memo(function TokenUsageIndicator() { - + diff --git a/packages/data-provider/specs/tokens.spec.ts b/packages/data-provider/specs/tokens.spec.ts new file mode 100644 index 0000000000..37eeecbea6 --- /dev/null +++ b/packages/data-provider/specs/tokens.spec.ts @@ -0,0 +1,152 @@ +import { + findMatchingPattern, + getModelMaxTokens, + getModelMaxOutputTokens, + matchModelName, + maxTokensMap, +} from '../src/tokens'; +import { EModelEndpoint } from '../src/schemas'; + +describe('Token Pattern Matching', () => { + describe('findMatchingPattern', () => { + const testMap: Record = { + 'claude-': 100000, + 'claude-3': 200000, + 'claude-3-opus': 200000, + 'gpt-4': 8000, + 'gpt-4-turbo': 128000, + }; + + it('should match exact model names', () => { + expect(findMatchingPattern('claude-3-opus', testMap)).toBe('claude-3-opus'); + expect(findMatchingPattern('gpt-4-turbo', testMap)).toBe('gpt-4-turbo'); + }); + + it('should match more specific patterns first (reverse order)', () => { + // claude-3-opus-20240229 should match 'claude-3-opus' not 'claude-3' or 'claude-' + expect(findMatchingPattern('claude-3-opus-20240229', testMap)).toBe('claude-3-opus'); + }); + + it('should fall back to broader patterns when no specific match', () => { + // claude-3-haiku should match 'claude-3' (not 'claude-3-opus') + expect(findMatchingPattern('claude-3-haiku', testMap)).toBe('claude-3'); + }); + + it('should be case-insensitive', () => { + expect(findMatchingPattern('Claude-3-Opus', testMap)).toBe('claude-3-opus'); + expect(findMatchingPattern('GPT-4-TURBO', testMap)).toBe('gpt-4-turbo'); + }); + + it('should return null for unmatched models', () => { + expect(findMatchingPattern('unknown-model', testMap)).toBeNull(); + expect(findMatchingPattern('llama-2', testMap)).toBeNull(); + }); + + it('should NOT match when pattern appears in middle of model name (startsWith behavior)', () => { + // This is the key fix: "my-claude-wrapper" should NOT match "claude-" + expect(findMatchingPattern('my-claude-wrapper', testMap)).toBeNull(); + expect(findMatchingPattern('openai-gpt-4-proxy', testMap)).toBeNull(); + expect(findMatchingPattern('custom-claude-3-service', testMap)).toBeNull(); + }); + + it('should handle empty string model name', () => { + expect(findMatchingPattern('', testMap)).toBeNull(); + }); + + it('should handle empty tokens map', () => { + expect(findMatchingPattern('claude-3', {})).toBeNull(); + }); + }); + + describe('getModelMaxTokens', () => { + it('should return exact match tokens', () => { + expect(getModelMaxTokens('gpt-4o', EModelEndpoint.openAI)).toBe(127500); + expect(getModelMaxTokens('claude-3-opus', EModelEndpoint.anthropic)).toBe(200000); + }); + + it('should return pattern-matched tokens', () => { + // claude-3-opus-20240229 should match claude-3-opus pattern + expect(getModelMaxTokens('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(200000); + }); + + it('should return undefined for unknown models', () => { + expect(getModelMaxTokens('completely-unknown-model', EModelEndpoint.openAI)).toBeUndefined(); + }); + + it('should fall back to openAI for unknown endpoints', () => { + const result = getModelMaxTokens('gpt-4o', 'unknown-endpoint'); + expect(result).toBe(127500); + }); + + it('should handle non-string input gracefully', () => { + expect(getModelMaxTokens(null as unknown as string)).toBeUndefined(); + expect(getModelMaxTokens(undefined as unknown as string)).toBeUndefined(); + expect(getModelMaxTokens(123 as unknown as string)).toBeUndefined(); + }); + + it('should NOT match model names with pattern in middle', () => { + // A model like "my-gpt-4-wrapper" should not match "gpt-4" + expect(getModelMaxTokens('my-gpt-4-wrapper', EModelEndpoint.openAI)).toBeUndefined(); + }); + }); + + describe('getModelMaxOutputTokens', () => { + it('should return exact match output tokens', () => { + expect(getModelMaxOutputTokens('o1', EModelEndpoint.openAI)).toBe(32268); + expect(getModelMaxOutputTokens('claude-3-opus', EModelEndpoint.anthropic)).toBe(4096); + }); + + it('should return pattern-matched output tokens', () => { + expect(getModelMaxOutputTokens('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe( + 4096, + ); + }); + + it('should return system_default for unknown models (openAI endpoint)', () => { + expect(getModelMaxOutputTokens('unknown-model', EModelEndpoint.openAI)).toBe(32000); + }); + + it('should handle non-string input gracefully', () => { + expect(getModelMaxOutputTokens(null as unknown as string)).toBeUndefined(); + expect(getModelMaxOutputTokens(undefined as unknown as string)).toBeUndefined(); + }); + }); + + describe('matchModelName', () => { + it('should return exact match model name', () => { + expect(matchModelName('gpt-4o', EModelEndpoint.openAI)).toBe('gpt-4o'); + }); + + it('should return pattern key for pattern matches', () => { + expect(matchModelName('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe( + 'claude-3-opus', + ); + }); + + it('should return input for unknown models', () => { + expect(matchModelName('unknown-model', EModelEndpoint.openAI)).toBe('unknown-model'); + }); + + it('should handle non-string input gracefully', () => { + expect(matchModelName(null as unknown as string)).toBeUndefined(); + }); + }); + + describe('maxTokensMap structure', () => { + it('should have entries for all major endpoints', () => { + expect(maxTokensMap[EModelEndpoint.openAI]).toBeDefined(); + expect(maxTokensMap[EModelEndpoint.anthropic]).toBeDefined(); + expect(maxTokensMap[EModelEndpoint.google]).toBeDefined(); + expect(maxTokensMap[EModelEndpoint.azureOpenAI]).toBeDefined(); + expect(maxTokensMap[EModelEndpoint.bedrock]).toBeDefined(); + }); + + it('should have positive token values', () => { + Object.values(maxTokensMap).forEach((endpointMap) => { + Object.entries(endpointMap).forEach(([model, tokens]) => { + expect(tokens).toBeGreaterThan(0); + }); + }); + }); + }); +}); diff --git a/packages/data-provider/src/tokens.ts b/packages/data-provider/src/tokens.ts index c5bbbb233b..f5c6d6eedc 100644 --- a/packages/data-provider/src/tokens.ts +++ b/packages/data-provider/src/tokens.ts @@ -313,6 +313,10 @@ export const maxTokensMap: Record> = { /** * Finds the first matching pattern in the tokens map. * Searches in reverse order to match more specific patterns first. + * + * Note: This relies on the insertion order of keys in the tokensMap object. + * More specific patterns must be defined later in the object to be matched first. + * If the order of keys is changed, the matching behavior may be affected. */ export function findMatchingPattern( modelName: string, @@ -322,7 +326,7 @@ export function findMatchingPattern( const lowerModelName = modelName.toLowerCase(); for (let i = keys.length - 1; i >= 0; i--) { const modelKey = keys[i]; - if (lowerModelName.includes(modelKey)) { + if (lowerModelName.startsWith(modelKey)) { return modelKey; } } @@ -510,7 +514,6 @@ export function getModelMaxOutputTokens( /** * Centralized token-related default values. - * These replace hardcoded magic numbers throughout the codebase. */ export const TOKEN_DEFAULTS = { /** Fallback context window for agents when model lookup fails */