From 841a37e8cbf2dba093b4f9e5966f6987888e15fb Mon Sep 17 00:00:00 2001 From: Marco Beretta <81851188+berry-13@users.noreply.github.com> Date: Sun, 14 Dec 2025 00:36:45 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat:=20Add=20token=20usage=20indic?= =?UTF-8?q?ator=20to=20chat=20input=20Add=20TokenUsageIndicator=20componen?= =?UTF-8?q?t=20with=20circular=20progress=20ring=20Create=20useTokenUsage?= =?UTF-8?q?=20hook=20with=20Jotai=20atom=20for=20state=20Add=20model=20con?= =?UTF-8?q?text=20window=20lookups=20to=20data-provider=20Consolidate=20to?= =?UTF-8?q?ken=20utilities=20(output=20limits,=20TOKEN=5FDEFAULTS)=20Displ?= =?UTF-8?q?ay=20input/output=20tokens=20and=20percentage=20of=20context=20?= =?UTF-8?q?used?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/app/clients/specs/FakeClient.js | 5 +- client/src/components/Chat/Input/ChatForm.tsx | 4 + .../Chat/Input/TokenUsageIndicator.tsx | 87 +++ client/src/hooks/index.ts | 1 + client/src/hooks/useTokenUsage.ts | 77 +++ client/src/store/tokenUsage.ts | 13 + packages/api/src/agents/initialize.ts | 9 +- packages/api/src/utils/tokens.ts | 362 +------------ packages/data-provider/src/index.ts | 2 + packages/data-provider/src/schemas.ts | 1 + packages/data-provider/src/tokens.ts | 497 ++++++++++++++++++ 11 files changed, 710 insertions(+), 348 deletions(-) create mode 100644 client/src/components/Chat/Input/TokenUsageIndicator.tsx create mode 100644 client/src/hooks/useTokenUsage.ts create mode 100644 client/src/store/tokenUsage.ts create mode 100644 packages/data-provider/src/tokens.ts diff --git a/api/app/clients/specs/FakeClient.js b/api/app/clients/specs/FakeClient.js index d1d07a967d..58480b4018 100644 --- a/api/app/clients/specs/FakeClient.js +++ b/api/app/clients/specs/FakeClient.js @@ -1,4 +1,5 @@ const { getModelMaxTokens } = require('@librechat/api'); +const { TOKEN_DEFAULTS } = require('librechat-data-provider'); const BaseClient = require('../BaseClient'); class FakeClient extends BaseClient { @@ -41,7 +42,9 @@ class FakeClient extends BaseClient { } this.maxContextTokens = - this.options.maxContextTokens ?? getModelMaxTokens(this.modelOptions.model) ?? 4097; + this.options.maxContextTokens ?? + getModelMaxTokens(this.modelOptions.model) ?? + TOKEN_DEFAULTS.LEGACY_CONTEXT_FALLBACK; } buildMessages() {} getTokenCount(str) { diff --git a/client/src/components/Chat/Input/ChatForm.tsx b/client/src/components/Chat/Input/ChatForm.tsx index 8cccf6cf53..9012a734bd 100644 --- a/client/src/components/Chat/Input/ChatForm.tsx +++ b/client/src/components/Chat/Input/ChatForm.tsx @@ -18,7 +18,9 @@ import { useQueryParams, useSubmitMessage, useFocusChatEffect, + useTokenUsageComputation, } from '~/hooks'; +import TokenUsageIndicator from './TokenUsageIndicator'; import { mainTextareaId, BadgeItem } from '~/common'; import AttachFileChat from './Files/AttachFileChat'; import FileFormChat from './Files/FileFormChat'; @@ -39,6 +41,7 @@ const ChatForm = memo(({ index = 0 }: { index?: number }) => { const submitButtonRef = useRef(null); const textAreaRef = useRef(null); useFocusChatEffect(textAreaRef); + useTokenUsageComputation(); const localize = useLocalize(); const [isCollapsed, setIsCollapsed] = useState(false); @@ -332,6 +335,7 @@ const ChatForm = memo(({ index = 0 }: { index?: number }) => { } />
+ {SpeechToText && ( = 1000000) { + return `${(n / 1000000).toFixed(1)}M`; + } + if (n >= 1000) { + return `${(n / 1000).toFixed(1)}K`; + } + return n.toString(); +} + +const TokenUsageIndicator = memo(function TokenUsageIndicator() { + const { inputTokens, outputTokens, maxContext } = useTokenUsage(); + + const totalUsed = inputTokens + outputTokens; + const hasMaxContext = maxContext !== null && maxContext > 0; + const percentage = hasMaxContext ? Math.min((totalUsed / maxContext) * 100, 100) : 0; + + // Ring calculations + const size = 28; + const strokeWidth = 2.5; + const radius = (size - strokeWidth) / 2; + const circumference = 2 * Math.PI * radius; + const offset = circumference - (percentage / 100) * circumference; + + const tooltipText = hasMaxContext + ? `Input: ${formatTokens(inputTokens)} | Output: ${formatTokens(outputTokens)} | Max: ${formatTokens(maxContext)}` + : `Input: ${formatTokens(inputTokens)} | Output: ${formatTokens(outputTokens)} | Max: N/A`; + + // Color based on percentage + const getProgressColor = () => { + if (!hasMaxContext) { + return 'stroke-text-secondary'; + } + if (percentage > 90) { + return 'stroke-red-500'; + } + if (percentage > 75) { + return 'stroke-yellow-500'; + } + return 'stroke-green-500'; + }; + + return ( + + + {/* Background ring */} + + {/* Progress ring */} + + +
+ } + /> + ); +}); + +export default TokenUsageIndicator; diff --git a/client/src/hooks/index.ts b/client/src/hooks/index.ts index f8e23a95e1..62e09a5b17 100644 --- a/client/src/hooks/index.ts +++ b/client/src/hooks/index.ts @@ -35,3 +35,4 @@ export { default as useTextToSpeech } from './Input/useTextToSpeech'; export { default as useGenerationsByLatest } from './useGenerationsByLatest'; export { default as useLocalizedConfig } from './useLocalizedConfig'; export { default as useResourcePermissions } from './useResourcePermissions'; +export { default as useTokenUsage, useTokenUsageComputation } from './useTokenUsage'; diff --git a/client/src/hooks/useTokenUsage.ts b/client/src/hooks/useTokenUsage.ts new file mode 100644 index 0000000000..80b6e6144f --- /dev/null +++ b/client/src/hooks/useTokenUsage.ts @@ -0,0 +1,77 @@ +import { useEffect, useMemo } from 'react'; +import { useSetAtom, useAtomValue } from 'jotai'; +import type { TMessage } from 'librechat-data-provider'; +import { getModelMaxTokens } from 'librechat-data-provider'; +import { tokenUsageAtom, type TokenUsage } from '~/store/tokenUsage'; +import { useGetMessagesByConvoId } from '~/data-provider'; +import { useChatContext } from '~/Providers'; + +/** + * Hook to compute and update token usage from conversation messages. + * Should be called in a component that has access to useChatContext. + */ +export function useTokenUsageComputation() { + const { conversation } = useChatContext(); + const conversationId = conversation?.conversationId ?? ''; + const setTokenUsage = useSetAtom(tokenUsageAtom); + + // Use the query hook to get reactive messages + const { data: messages } = useGetMessagesByConvoId(conversationId, { + enabled: !!conversationId && conversationId !== 'new', + }); + + // Compute token usage whenever messages change + const tokenData = useMemo(() => { + let inputTokens = 0; + let outputTokens = 0; + + if (messages && Array.isArray(messages)) { + for (const msg of messages as TMessage[]) { + const count = msg.tokenCount ?? 0; + if (msg.isCreatedByUser) { + inputTokens += count; + } else { + outputTokens += count; + } + } + } + + // Determine max context: explicit setting or model default + let maxContext: number | null = conversation?.maxContextTokens ?? null; + + // If no explicit maxContextTokens, try to look up model default + if (maxContext === null && conversation?.model) { + const endpoint = conversation.endpointType ?? conversation.endpoint ?? ''; + const modelDefault = getModelMaxTokens(conversation.model, endpoint); + if (modelDefault !== undefined) { + maxContext = modelDefault; + } + } + + return { + inputTokens, + outputTokens, + maxContext, + }; + }, [ + messages, + conversation?.maxContextTokens, + conversation?.model, + conversation?.endpoint, + conversation?.endpointType, + ]); + + // Update the atom when computed values change + useEffect(() => { + setTokenUsage(tokenData); + }, [tokenData, setTokenUsage]); +} + +/** + * Hook to read the current token usage values. + */ +export function useTokenUsage(): TokenUsage { + return useAtomValue(tokenUsageAtom); +} + +export default useTokenUsage; diff --git a/client/src/store/tokenUsage.ts b/client/src/store/tokenUsage.ts new file mode 100644 index 0000000000..e4965484bf --- /dev/null +++ b/client/src/store/tokenUsage.ts @@ -0,0 +1,13 @@ +import { atom } from 'jotai'; + +export type TokenUsage = { + inputTokens: number; + outputTokens: number; + maxContext: number | null; // null = N/A +}; + +export const tokenUsageAtom = atom({ + inputTokens: 0, + outputTokens: 0, + maxContext: null, +}); diff --git a/packages/api/src/agents/initialize.ts b/packages/api/src/agents/initialize.ts index a37ddf4848..d5d2bbd6e6 100644 --- a/packages/api/src/agents/initialize.ts +++ b/packages/api/src/agents/initialize.ts @@ -7,6 +7,7 @@ import { isAgentsEndpoint, replaceSpecialVars, providerEndpointMap, + TOKEN_DEFAULTS, } from 'librechat-data-provider'; import type { AgentToolResources, @@ -240,7 +241,7 @@ export async function initializeAgent( providerEndpointMap[provider as keyof typeof providerEndpointMap], options.endpointTokenConfig, ), - 18000, + TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK, ); if ( @@ -293,7 +294,7 @@ export async function initializeAgent( agent.additional_instructions = artifactsPromptResult ?? undefined; } - const agentMaxContextNum = Number(agentMaxContextTokens) || 18000; + const agentMaxContextNum = Number(agentMaxContextTokens) || TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK; const maxOutputTokensNum = Number(maxOutputTokens) || 0; const finalAttachments: IMongoFile[] = (primedAttachments ?? []) @@ -308,7 +309,9 @@ export async function initializeAgent( userMCPAuthMap, toolContextMap: toolContextMap ?? {}, useLegacyContent: !!options.useLegacyContent, - maxContextTokens: Math.round((agentMaxContextNum - maxOutputTokensNum) * 0.9), + maxContextTokens: Math.round( + (agentMaxContextNum - maxOutputTokensNum) * TOKEN_DEFAULTS.CONTEXT_SAFETY_MARGIN, + ), }; return initializedAgent; diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index 12b356c6a7..d75936350f 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -1,351 +1,25 @@ import z from 'zod'; -import { EModelEndpoint } from 'librechat-data-provider'; +import { + EModelEndpoint, + maxTokensMap, + maxOutputTokensMap, + TOKEN_DEFAULTS, + findMatchingPattern as findMatchingPatternSimple, + getModelMaxTokens as getModelMaxTokensSimple, + getModelMaxOutputTokens as getModelMaxOutputTokensSimple, + matchModelName as matchModelNameSimple, +} from 'librechat-data-provider'; import type { EndpointTokenConfig, TokenConfig } from '~/types'; -const openAIModels = { - 'o4-mini': 200000, - 'o3-mini': 195000, // -5000 from max - o3: 200000, - o1: 195000, // -5000 from max - 'o1-mini': 127500, // -500 from max - 'o1-preview': 127500, // -500 from max - 'gpt-4': 8187, // -5 from max - 'gpt-4-0613': 8187, // -5 from max - 'gpt-4-32k': 32758, // -10 from max - 'gpt-4-32k-0314': 32758, // -10 from max - 'gpt-4-32k-0613': 32758, // -10 from max - 'gpt-4-1106': 127500, // -500 from max - 'gpt-4-0125': 127500, // -500 from max - 'gpt-4.5': 127500, // -500 from max - 'gpt-4.1': 1047576, - 'gpt-4.1-mini': 1047576, - 'gpt-4.1-nano': 1047576, - 'gpt-5': 400000, - 'gpt-5-mini': 400000, - 'gpt-5-nano': 400000, - 'gpt-5-pro': 400000, - 'gpt-4o': 127500, // -500 from max - 'gpt-4o-mini': 127500, // -500 from max - 'gpt-4o-2024-05-13': 127500, // -500 from max - 'gpt-4-turbo': 127500, // -500 from max - 'gpt-4-vision': 127500, // -500 from max - 'gpt-3.5-turbo': 16375, // -10 from max - 'gpt-3.5-turbo-0613': 4092, // -5 from max - 'gpt-3.5-turbo-0301': 4092, // -5 from max - 'gpt-3.5-turbo-16k': 16375, // -10 from max - 'gpt-3.5-turbo-16k-0613': 16375, // -10 from max - 'gpt-3.5-turbo-1106': 16375, // -10 from max - 'gpt-3.5-turbo-0125': 16375, // -10 from max -}; +// Re-export from data-provider for backwards compatibility +export { maxTokensMap, maxOutputTokensMap, TOKEN_DEFAULTS }; -const mistralModels = { - 'mistral-': 31990, // -10 from max - 'mistral-7b': 31990, // -10 from max - 'mistral-small': 31990, // -10 from max - 'mixtral-8x7b': 31990, // -10 from max - 'mixtral-8x22b': 65536, - 'mistral-large': 131000, - 'mistral-large-2402': 127500, - 'mistral-large-2407': 127500, - 'mistral-nemo': 131000, - 'pixtral-large': 131000, - 'mistral-saba': 32000, - codestral: 256000, - 'ministral-8b': 131000, - 'ministral-3b': 131000, -}; - -const cohereModels = { - 'command-light': 4086, // -10 from max - 'command-light-nightly': 8182, // -10 from max - command: 4086, // -10 from max - 'command-nightly': 8182, // -10 from max - 'command-text': 4086, // -10 from max - 'command-r': 127500, // -500 from max - 'command-r-plus': 127500, // -500 from max -}; - -const googleModels = { - /* Max I/O is combined so we subtract the amount from max response tokens for actual total */ - gemma: 8196, - 'gemma-2': 32768, - 'gemma-3': 32768, - 'gemma-3-27b': 131072, - gemini: 30720, // -2048 from max - 'gemini-pro-vision': 12288, - 'gemini-exp': 2000000, - 'gemini-3': 1000000, // 1M input tokens, 64k output tokens - 'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens - 'gemini-2.5-pro': 1000000, - 'gemini-2.5-flash': 1000000, - 'gemini-2.5-flash-lite': 1000000, - 'gemini-2.0': 2000000, - 'gemini-2.0-flash': 1000000, - 'gemini-2.0-flash-lite': 1000000, - 'gemini-1.5': 1000000, - 'gemini-1.5-flash': 1000000, - 'gemini-1.5-flash-8b': 1000000, - 'text-bison-32k': 32758, // -10 from max - 'chat-bison-32k': 32758, // -10 from max - 'code-bison-32k': 32758, // -10 from max - 'codechat-bison-32k': 32758, - /* Codey, -5 from max: 6144 */ - 'code-': 6139, - 'codechat-': 6139, - /* PaLM2, -5 from max: 8192 */ - 'text-': 8187, - 'chat-': 8187, -}; - -const anthropicModels = { - 'claude-': 100000, - 'claude-instant': 100000, - 'claude-2': 100000, - 'claude-2.1': 200000, - 'claude-3': 200000, - 'claude-3-haiku': 200000, - 'claude-3-sonnet': 200000, - 'claude-3-opus': 200000, - 'claude-3.5-haiku': 200000, - 'claude-3-5-haiku': 200000, - 'claude-3-5-sonnet': 200000, - 'claude-3.5-sonnet': 200000, - 'claude-3-7-sonnet': 200000, - 'claude-3.7-sonnet': 200000, - 'claude-3-5-sonnet-latest': 200000, - 'claude-3.5-sonnet-latest': 200000, - 'claude-haiku-4-5': 200000, - 'claude-sonnet-4': 1000000, - 'claude-4': 200000, - 'claude-opus-4': 200000, - 'claude-opus-4-5': 200000, -}; - -const deepseekModels = { - deepseek: 128000, - 'deepseek-chat': 128000, - 'deepseek-reasoner': 128000, - 'deepseek-r1': 128000, - 'deepseek-v3': 128000, - 'deepseek.r1': 128000, -}; - -const metaModels = { - // Basic patterns - llama3: 8000, - llama2: 4000, - 'llama-3': 8000, - 'llama-2': 4000, - - // llama3.x pattern - 'llama3.1': 127500, - 'llama3.2': 127500, - 'llama3.3': 127500, - - // llama3-x pattern - 'llama3-1': 127500, - 'llama3-2': 127500, - 'llama3-3': 127500, - - // llama-3.x pattern - 'llama-3.1': 127500, - 'llama-3.2': 127500, - 'llama-3.3': 127500, - - // llama3.x:Nb pattern - 'llama3.1:405b': 127500, - 'llama3.1:70b': 127500, - 'llama3.1:8b': 127500, - 'llama3.2:1b': 127500, - 'llama3.2:3b': 127500, - 'llama3.2:11b': 127500, - 'llama3.2:90b': 127500, - 'llama3.3:70b': 127500, - - // llama3-x-Nb pattern - 'llama3-1-405b': 127500, - 'llama3-1-70b': 127500, - 'llama3-1-8b': 127500, - 'llama3-2-1b': 127500, - 'llama3-2-3b': 127500, - 'llama3-2-11b': 127500, - 'llama3-2-90b': 127500, - 'llama3-3-70b': 127500, - - // llama-3.x-Nb pattern - 'llama-3.1-405b': 127500, - 'llama-3.1-70b': 127500, - 'llama-3.1-8b': 127500, - 'llama-3.2-1b': 127500, - 'llama-3.2-3b': 127500, - 'llama-3.2-11b': 127500, - 'llama-3.2-90b': 127500, - 'llama-3.3-70b': 127500, - - // Original llama2/3 patterns - 'llama3-70b': 8000, - 'llama3-8b': 8000, - 'llama2-70b': 4000, - 'llama2-13b': 4000, - 'llama3:70b': 8000, - 'llama3:8b': 8000, - 'llama2:70b': 4000, -}; - -const qwenModels = { - qwen: 32000, - 'qwen2.5': 32000, - 'qwen-turbo': 1000000, - 'qwen-plus': 131000, - 'qwen-max': 32000, - 'qwq-32b': 32000, - // Qwen3 models - qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context) - 'qwen3-8b': 128000, - 'qwen3-14b': 40960, - 'qwen3-30b-a3b': 40960, - 'qwen3-32b': 40960, - 'qwen3-235b-a22b': 40960, - // Qwen3 VL (Vision-Language) models - 'qwen3-vl-8b-thinking': 256000, - 'qwen3-vl-8b-instruct': 262144, - 'qwen3-vl-30b-a3b': 262144, - 'qwen3-vl-235b-a22b': 131072, - // Qwen3 specialized models - 'qwen3-max': 256000, - 'qwen3-coder': 262144, - 'qwen3-coder-30b-a3b': 262144, - 'qwen3-coder-plus': 128000, - 'qwen3-coder-flash': 128000, - 'qwen3-next-80b-a3b': 262144, -}; - -const ai21Models = { - 'j2-mid': 8182, // -10 from max - 'j2-ultra': 8182, // -10 from max - 'jamba-instruct': 255500, // -500 from max -}; - -const amazonModels = { - // Amazon Titan models - 'titan-text-lite': 4000, - 'titan-text-express': 8000, - 'titan-text-premier': 31500, // -500 from max - // Amazon Nova models - // https://aws.amazon.com/ai/generative-ai/nova/ - 'nova-micro': 127000, // -1000 from max - 'nova-lite': 295000, // -5000 from max - 'nova-pro': 295000, // -5000 from max - 'nova-premier': 995000, // -5000 from max -}; - -const bedrockModels = { - ...anthropicModels, - ...mistralModels, - ...cohereModels, - ...deepseekModels, - ...metaModels, - ...ai21Models, - ...amazonModels, -}; - -const xAIModels = { - grok: 131072, - 'grok-beta': 131072, - 'grok-vision-beta': 8192, - 'grok-2': 131072, - 'grok-2-latest': 131072, - 'grok-2-1212': 131072, - 'grok-2-vision': 32768, - 'grok-2-vision-latest': 32768, - 'grok-2-vision-1212': 32768, - 'grok-3': 131072, - 'grok-3-fast': 131072, - 'grok-3-mini': 131072, - 'grok-3-mini-fast': 131072, - 'grok-4': 256000, // 256K context - 'grok-4-fast': 2000000, // 2M context - 'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants) - 'grok-code-fast': 256000, // 256K context -}; - -const aggregateModels = { - ...openAIModels, - ...googleModels, - ...bedrockModels, - ...xAIModels, - ...qwenModels, - // misc. - kimi: 131000, - // GPT-OSS - 'gpt-oss': 131000, - 'gpt-oss:20b': 131000, - 'gpt-oss-20b': 131000, - 'gpt-oss:120b': 131000, - 'gpt-oss-120b': 131000, - // GLM models (Zhipu AI) - glm4: 128000, - 'glm-4': 128000, - 'glm-4-32b': 128000, - 'glm-4.5': 131000, - 'glm-4.5-air': 131000, - 'glm-4.5v': 66000, - 'glm-4.6': 200000, -}; - -export const maxTokensMap = { - [EModelEndpoint.azureOpenAI]: openAIModels, - [EModelEndpoint.openAI]: aggregateModels, - [EModelEndpoint.agents]: aggregateModels, - [EModelEndpoint.custom]: aggregateModels, - [EModelEndpoint.google]: googleModels, - [EModelEndpoint.anthropic]: anthropicModels, - [EModelEndpoint.bedrock]: bedrockModels, -}; - -export const modelMaxOutputs = { - o1: 32268, // -500 from max: 32,768 - 'o1-mini': 65136, // -500 from max: 65,536 - 'o1-preview': 32268, // -500 from max: 32,768 - 'gpt-5': 128000, - 'gpt-5-mini': 128000, - 'gpt-5-nano': 128000, - 'gpt-5-pro': 128000, - 'gpt-oss-20b': 131000, - 'gpt-oss-120b': 131000, - system_default: 32000, -}; - -/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */ -const anthropicMaxOutputs = { - 'claude-3-haiku': 4096, - 'claude-3-sonnet': 4096, - 'claude-3-opus': 4096, - 'claude-haiku-4-5': 64000, - 'claude-sonnet-4': 64000, - 'claude-opus-4': 32000, - 'claude-opus-4-5': 64000, - 'claude-3.5-sonnet': 8192, - 'claude-3-5-sonnet': 8192, - 'claude-3.7-sonnet': 128000, - 'claude-3-7-sonnet': 128000, -}; - -/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */ -const deepseekMaxOutputs = { - deepseek: 8000, // deepseek-chat default: 4K, max: 8K - 'deepseek-chat': 8000, - 'deepseek-reasoner': 64000, // default: 32K, max: 64K - 'deepseek-r1': 64000, - 'deepseek-v3': 8000, - 'deepseek.r1': 64000, -}; - -export const maxOutputTokensMap = { - [EModelEndpoint.anthropic]: anthropicMaxOutputs, - [EModelEndpoint.azureOpenAI]: modelMaxOutputs, - [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, - [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, +// Re-export simple versions (for use without EndpointTokenConfig) +export { + findMatchingPatternSimple, + getModelMaxTokensSimple, + getModelMaxOutputTokensSimple, + matchModelNameSimple, }; /** diff --git a/packages/data-provider/src/index.ts b/packages/data-provider/src/index.ts index c57ca82845..ba21ece55e 100644 --- a/packages/data-provider/src/index.ts +++ b/packages/data-provider/src/index.ts @@ -47,3 +47,5 @@ export { default as createPayload } from './createPayload'; /* feedback */ export * from './feedback'; export * from './parameterSettings'; +/* token limits */ +export * from './tokens'; diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index 7dabc549db..9d9c540c85 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -618,6 +618,7 @@ export type TMessage = z.input & { attachments?: TAttachment[]; clientTimestamp?: string; feedback?: TFeedback; + tokenCount?: number; }; export const coerceNumber = z.union([z.number(), z.string()]).transform((val) => { diff --git a/packages/data-provider/src/tokens.ts b/packages/data-provider/src/tokens.ts new file mode 100644 index 0000000000..130723232f --- /dev/null +++ b/packages/data-provider/src/tokens.ts @@ -0,0 +1,497 @@ +import { EModelEndpoint } from './schemas'; + +/** + * Model context window token limits. + * These values represent the maximum context tokens (input) for each model. + * Values are slightly reduced from actual max to leave room for output tokens. + */ + +const openAIModels: Record = { + 'o4-mini': 200000, + 'o3-mini': 195000, + o3: 200000, + o1: 195000, + 'o1-mini': 127500, + 'o1-preview': 127500, + 'gpt-4': 8187, + 'gpt-4-0613': 8187, + 'gpt-4-32k': 32758, + 'gpt-4-32k-0314': 32758, + 'gpt-4-32k-0613': 32758, + 'gpt-4-1106': 127500, + 'gpt-4-0125': 127500, + 'gpt-4.5': 127500, + 'gpt-4.1': 1047576, + 'gpt-4.1-mini': 1047576, + 'gpt-4.1-nano': 1047576, + 'gpt-5': 400000, + 'gpt-5-mini': 400000, + 'gpt-5-nano': 400000, + 'gpt-5-pro': 400000, + 'gpt-4o': 127500, + 'gpt-4o-mini': 127500, + 'gpt-4o-2024-05-13': 127500, + 'gpt-4-turbo': 127500, + 'gpt-4-vision': 127500, + 'gpt-3.5-turbo': 16375, + 'gpt-3.5-turbo-0613': 4092, + 'gpt-3.5-turbo-0301': 4092, + 'gpt-3.5-turbo-16k': 16375, + 'gpt-3.5-turbo-16k-0613': 16375, + 'gpt-3.5-turbo-1106': 16375, + 'gpt-3.5-turbo-0125': 16375, +}; + +const mistralModels: Record = { + 'mistral-': 31990, + 'mistral-7b': 31990, + 'mistral-small': 31990, + 'mixtral-8x7b': 31990, + 'mixtral-8x22b': 65536, + 'mistral-large': 131000, + 'mistral-large-2402': 127500, + 'mistral-large-2407': 127500, + 'mistral-nemo': 131000, + 'pixtral-large': 131000, + 'mistral-saba': 32000, + codestral: 256000, + 'ministral-8b': 131000, + 'ministral-3b': 131000, +}; + +const cohereModels: Record = { + 'command-light': 4086, + 'command-light-nightly': 8182, + command: 4086, + 'command-nightly': 8182, + 'command-text': 4086, + 'command-r': 127500, + 'command-r-plus': 127500, +}; + +const googleModels: Record = { + gemma: 8196, + 'gemma-2': 32768, + 'gemma-3': 32768, + 'gemma-3-27b': 131072, + gemini: 30720, + 'gemini-pro-vision': 12288, + 'gemini-exp': 2000000, + 'gemini-3': 1000000, + 'gemini-2.5': 1000000, + 'gemini-2.5-pro': 1000000, + 'gemini-2.5-flash': 1000000, + 'gemini-2.5-flash-lite': 1000000, + 'gemini-2.0': 2000000, + 'gemini-2.0-flash': 1000000, + 'gemini-2.0-flash-lite': 1000000, + 'gemini-1.5': 1000000, + 'gemini-1.5-flash': 1000000, + 'gemini-1.5-flash-8b': 1000000, + 'text-bison-32k': 32758, + 'chat-bison-32k': 32758, + 'code-bison-32k': 32758, + 'codechat-bison-32k': 32758, + 'code-': 6139, + 'codechat-': 6139, + 'text-': 8187, + 'chat-': 8187, +}; + +const anthropicModels: Record = { + 'claude-': 100000, + 'claude-instant': 100000, + 'claude-2': 100000, + 'claude-2.1': 200000, + 'claude-3': 200000, + 'claude-3-haiku': 200000, + 'claude-3-sonnet': 200000, + 'claude-3-opus': 200000, + 'claude-3.5-haiku': 200000, + 'claude-3-5-haiku': 200000, + 'claude-3-5-sonnet': 200000, + 'claude-3.5-sonnet': 200000, + 'claude-3-7-sonnet': 200000, + 'claude-3.7-sonnet': 200000, + 'claude-3-5-sonnet-latest': 200000, + 'claude-3.5-sonnet-latest': 200000, + 'claude-haiku-4-5': 200000, + 'claude-sonnet-4': 1000000, + 'claude-4': 200000, + 'claude-opus-4': 200000, + 'claude-opus-4-5': 200000, +}; + +const deepseekModels: Record = { + deepseek: 128000, + 'deepseek-chat': 128000, + 'deepseek-reasoner': 128000, + 'deepseek-r1': 128000, + 'deepseek-v3': 128000, + 'deepseek.r1': 128000, +}; + +const metaModels: Record = { + llama3: 8000, + llama2: 4000, + 'llama-3': 8000, + 'llama-2': 4000, + 'llama3.1': 127500, + 'llama3.2': 127500, + 'llama3.3': 127500, + 'llama3-1': 127500, + 'llama3-2': 127500, + 'llama3-3': 127500, + 'llama-3.1': 127500, + 'llama-3.2': 127500, + 'llama-3.3': 127500, + 'llama3.1:405b': 127500, + 'llama3.1:70b': 127500, + 'llama3.1:8b': 127500, + 'llama3.2:1b': 127500, + 'llama3.2:3b': 127500, + 'llama3.2:11b': 127500, + 'llama3.2:90b': 127500, + 'llama3.3:70b': 127500, + 'llama3-1-405b': 127500, + 'llama3-1-70b': 127500, + 'llama3-1-8b': 127500, + 'llama3-2-1b': 127500, + 'llama3-2-3b': 127500, + 'llama3-2-11b': 127500, + 'llama3-2-90b': 127500, + 'llama3-3-70b': 127500, + 'llama-3.1-405b': 127500, + 'llama-3.1-70b': 127500, + 'llama-3.1-8b': 127500, + 'llama-3.2-1b': 127500, + 'llama-3.2-3b': 127500, + 'llama-3.2-11b': 127500, + 'llama-3.2-90b': 127500, + 'llama-3.3-70b': 127500, + 'llama3-70b': 8000, + 'llama3-8b': 8000, + 'llama2-70b': 4000, + 'llama2-13b': 4000, + 'llama3:70b': 8000, + 'llama3:8b': 8000, + 'llama2:70b': 4000, +}; + +const qwenModels: Record = { + qwen: 32000, + 'qwen2.5': 32000, + 'qwen-turbo': 1000000, + 'qwen-plus': 131000, + 'qwen-max': 32000, + 'qwq-32b': 32000, + qwen3: 40960, + 'qwen3-8b': 128000, + 'qwen3-14b': 40960, + 'qwen3-30b-a3b': 40960, + 'qwen3-32b': 40960, + 'qwen3-235b-a22b': 40960, + 'qwen3-vl-8b-thinking': 256000, + 'qwen3-vl-8b-instruct': 262144, + 'qwen3-vl-30b-a3b': 262144, + 'qwen3-vl-235b-a22b': 131072, + 'qwen3-max': 256000, + 'qwen3-coder': 262144, + 'qwen3-coder-30b-a3b': 262144, + 'qwen3-coder-plus': 128000, + 'qwen3-coder-flash': 128000, + 'qwen3-next-80b-a3b': 262144, +}; + +const ai21Models: Record = { + 'j2-mid': 8182, + 'j2-ultra': 8182, + 'jamba-instruct': 255500, +}; + +const amazonModels: Record = { + 'titan-text-lite': 4000, + 'titan-text-express': 8000, + 'titan-text-premier': 31500, + 'nova-micro': 127000, + 'nova-lite': 295000, + 'nova-pro': 295000, + 'nova-premier': 995000, +}; + +const bedrockModels: Record = { + ...anthropicModels, + ...mistralModels, + ...cohereModels, + ...deepseekModels, + ...metaModels, + ...ai21Models, + ...amazonModels, +}; + +const xAIModels: Record = { + grok: 131072, + 'grok-beta': 131072, + 'grok-vision-beta': 8192, + 'grok-2': 131072, + 'grok-2-latest': 131072, + 'grok-2-1212': 131072, + 'grok-2-vision': 32768, + 'grok-2-vision-latest': 32768, + 'grok-2-vision-1212': 32768, + 'grok-3': 131072, + 'grok-3-fast': 131072, + 'grok-3-mini': 131072, + 'grok-3-mini-fast': 131072, + 'grok-4': 256000, + 'grok-4-fast': 2000000, + 'grok-4-1-fast': 2000000, + 'grok-code-fast': 256000, +}; + +const aggregateModels: Record = { + ...openAIModels, + ...googleModels, + ...bedrockModels, + ...xAIModels, + ...qwenModels, + kimi: 131000, + 'gpt-oss': 131000, + 'gpt-oss:20b': 131000, + 'gpt-oss-20b': 131000, + 'gpt-oss:120b': 131000, + 'gpt-oss-120b': 131000, + glm4: 128000, + 'glm-4': 128000, + 'glm-4-32b': 128000, + 'glm-4.5': 131000, + 'glm-4.5-air': 131000, + 'glm-4.5v': 66000, + 'glm-4.6': 200000, +}; + +/** + * Map of endpoint to model context token limits. + */ +export const maxTokensMap: Record> = { + [EModelEndpoint.azureOpenAI]: openAIModels, + [EModelEndpoint.openAI]: aggregateModels, + [EModelEndpoint.agents]: aggregateModels, + [EModelEndpoint.custom]: aggregateModels, + [EModelEndpoint.google]: googleModels, + [EModelEndpoint.anthropic]: anthropicModels, + [EModelEndpoint.bedrock]: bedrockModels, +}; + +/** + * Finds the first matching pattern in the tokens map. + * Searches in reverse order to match more specific patterns first. + */ +export function findMatchingPattern( + modelName: string, + tokensMap: Record, +): string | null { + const keys = Object.keys(tokensMap); + const lowerModelName = modelName.toLowerCase(); + for (let i = keys.length - 1; i >= 0; i--) { + const modelKey = keys[i]; + if (lowerModelName.includes(modelKey)) { + return modelKey; + } + } + return null; +} + +/** + * Retrieves the maximum context tokens for a given model name. + * + * @param modelName - The name of the model to look up. + * @param endpoint - The endpoint (default is 'openAI'). + * @returns The maximum context tokens for the given model or undefined if no match is found. + * + * @example + * getModelMaxTokens('gpt-4o'); // Returns 127500 + * getModelMaxTokens('claude-3-opus', 'anthropic'); // Returns 200000 + * getModelMaxTokens('unknown-model'); // Returns undefined + */ +export function getModelMaxTokens( + modelName: string, + endpoint: string = EModelEndpoint.openAI, +): number | undefined { + if (typeof modelName !== 'string') { + return undefined; + } + + const tokensMap = maxTokensMap[endpoint]; + if (!tokensMap) { + // Fall back to aggregate models for unknown endpoints + return getModelMaxTokens(modelName, EModelEndpoint.openAI); + } + + // Try exact match first + if (tokensMap[modelName] !== undefined) { + return tokensMap[modelName]; + } + + // Try pattern matching + const matchedPattern = findMatchingPattern(modelName, tokensMap); + if (matchedPattern) { + return tokensMap[matchedPattern]; + } + + return undefined; +} + +/** + * Retrieves the model name key for a given model name input. + * If the exact model name isn't found, it searches for partial matches. + * + * @param modelName - The name of the model to look up. + * @param endpoint - The endpoint (default is 'openAI'). + * @returns The model name key for the given model; returns input if no match is found. + */ +export function matchModelName( + modelName: string, + endpoint: string = EModelEndpoint.openAI, +): string | undefined { + if (typeof modelName !== 'string') { + return undefined; + } + + const tokensMap = maxTokensMap[endpoint]; + if (!tokensMap) { + return modelName; + } + + if (tokensMap[modelName] !== undefined) { + return modelName; + } + + const matchedPattern = findMatchingPattern(modelName, tokensMap); + return matchedPattern || modelName; +} + +// Individual model maps are available for advanced use cases +// but not re-exported to avoid conflicts with config.ts + +// ============================================================================= +// OUTPUT TOKEN LIMITS +// ============================================================================= + +/** + * Maximum output tokens for OpenAI and similar models. + * Values from official documentation, slightly reduced to leave safety margin. + */ +const modelMaxOutputs: Record = { + o1: 32268, // -500 from max: 32,768 + 'o1-mini': 65136, // -500 from max: 65,536 + 'o1-preview': 32268, // -500 from max: 32,768 + 'gpt-5': 128000, + 'gpt-5-mini': 128000, + 'gpt-5-nano': 128000, + 'gpt-5-pro': 128000, + 'gpt-oss-20b': 131000, + 'gpt-oss-120b': 131000, + system_default: 32000, +}; + +/** + * Maximum output tokens for Anthropic Claude models. + * Values from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names + */ +const anthropicMaxOutputs: Record = { + 'claude-3-haiku': 4096, + 'claude-3-sonnet': 4096, + 'claude-3-opus': 4096, + 'claude-haiku-4-5': 64000, + 'claude-sonnet-4': 64000, + 'claude-opus-4': 32000, + 'claude-opus-4-5': 64000, + 'claude-3.5-sonnet': 8192, + 'claude-3-5-sonnet': 8192, + 'claude-3.7-sonnet': 128000, + 'claude-3-7-sonnet': 128000, +}; + +/** + * Maximum output tokens for DeepSeek models. + * Values from https://api-docs.deepseek.com/quick_start/pricing + */ +const deepseekMaxOutputs: Record = { + deepseek: 8000, // deepseek-chat default: 4K, max: 8K + 'deepseek-chat': 8000, + 'deepseek-reasoner': 64000, // default: 32K, max: 64K + 'deepseek-r1': 64000, + 'deepseek-v3': 8000, + 'deepseek.r1': 64000, +}; + +/** + * Map of endpoint to model max output token limits. + */ +export const maxOutputTokensMap: Record> = { + [EModelEndpoint.anthropic]: anthropicMaxOutputs, + [EModelEndpoint.azureOpenAI]: modelMaxOutputs, + [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, + [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, +}; + +/** + * Retrieves the maximum output tokens for a given model name. + * + * @param modelName - The name of the model to look up. + * @param endpoint - The endpoint (default is 'openAI'). + * @returns The maximum output tokens for the given model or undefined if no match is found. + * + * @example + * getModelMaxOutputTokens('o1'); // Returns 32268 + * getModelMaxOutputTokens('claude-3-opus', 'anthropic'); // Returns 4096 + * getModelMaxOutputTokens('unknown-model'); // Returns 32000 (system_default) + */ +export function getModelMaxOutputTokens( + modelName: string, + endpoint: string = EModelEndpoint.openAI, +): number | undefined { + if (typeof modelName !== 'string') { + return undefined; + } + + const tokensMap = maxOutputTokensMap[endpoint]; + if (!tokensMap) { + // Fall back to openAI for unknown endpoints + return getModelMaxOutputTokens(modelName, EModelEndpoint.openAI); + } + + // Try exact match first + if (tokensMap[modelName] !== undefined) { + return tokensMap[modelName]; + } + + // Try pattern matching + const matchedPattern = findMatchingPattern(modelName, tokensMap); + if (matchedPattern) { + return tokensMap[matchedPattern]; + } + + // Return system_default if available + return tokensMap.system_default; +} + +// ============================================================================= +// TOKEN DEFAULTS +// ============================================================================= + +/** + * Centralized token-related default values. + * These replace hardcoded magic numbers throughout the codebase. + */ +export const TOKEN_DEFAULTS = { + /** Fallback context window for agents when model lookup fails */ + AGENT_CONTEXT_FALLBACK: 18000, + /** Legacy fallback for older clients */ + LEGACY_CONTEXT_FALLBACK: 4097, + /** Safety margin multiplier (0.9 = reserve 10% for response) */ + CONTEXT_SAFETY_MARGIN: 0.9, + /** Default max output tokens when not specified */ + DEFAULT_MAX_OUTPUT: 32000, +} as const;