mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
Merge 9c61d73076 into 5bfebc7c9d
This commit is contained in:
commit
c91bc818aa
18 changed files with 1111 additions and 357 deletions
|
|
@ -806,7 +806,6 @@ class BaseClient {
|
|||
user,
|
||||
);
|
||||
this.savedMessageIds.add(responseMessage.messageId);
|
||||
delete responseMessage.tokenCount;
|
||||
return responseMessage;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
const { getModelMaxTokens } = require('@librechat/api');
|
||||
const { TOKEN_DEFAULTS } = require('librechat-data-provider');
|
||||
const BaseClient = require('../BaseClient');
|
||||
|
||||
class FakeClient extends BaseClient {
|
||||
|
|
@ -41,7 +42,9 @@ class FakeClient extends BaseClient {
|
|||
}
|
||||
|
||||
this.maxContextTokens =
|
||||
this.options.maxContextTokens ?? getModelMaxTokens(this.modelOptions.model) ?? 4097;
|
||||
this.options.maxContextTokens ??
|
||||
getModelMaxTokens(this.modelOptions.model) ??
|
||||
TOKEN_DEFAULTS.LEGACY_CONTEXT_FALLBACK;
|
||||
}
|
||||
buildMessages() {}
|
||||
getTokenCount(str) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
const { maxTokensMap } = require('@librechat/api');
|
||||
const { EModelEndpoint } = require('librechat-data-provider');
|
||||
const { EModelEndpoint, maxTokensMap } = require('librechat-data-provider');
|
||||
const {
|
||||
defaultRate,
|
||||
tokenValues,
|
||||
|
|
|
|||
|
|
@ -240,6 +240,8 @@ class AgentClient extends BaseClient {
|
|||
Object.assign(
|
||||
{
|
||||
endpoint: this.options.endpoint,
|
||||
endpointType: this.options.endpointType,
|
||||
model: this.options.agent?.model_parameters?.model,
|
||||
agent_id: this.options.agent.id,
|
||||
modelLabel: this.options.modelLabel,
|
||||
maxContextTokens: this.options.maxContextTokens,
|
||||
|
|
|
|||
|
|
@ -1,10 +1,8 @@
|
|||
const { EModelEndpoint } = require('librechat-data-provider');
|
||||
const { EModelEndpoint, maxTokensMap, maxOutputTokensMap } = require('librechat-data-provider');
|
||||
const {
|
||||
maxTokensMap,
|
||||
matchModelName,
|
||||
processModelData,
|
||||
getModelMaxTokens,
|
||||
maxOutputTokensMap,
|
||||
findMatchingPattern,
|
||||
} = require('@librechat/api');
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,9 @@ import {
|
|||
useQueryParams,
|
||||
useSubmitMessage,
|
||||
useFocusChatEffect,
|
||||
useTokenUsageComputation,
|
||||
} from '~/hooks';
|
||||
import TokenUsageIndicator from './TokenUsageIndicator';
|
||||
import { mainTextareaId, BadgeItem } from '~/common';
|
||||
import AttachFileChat from './Files/AttachFileChat';
|
||||
import FileFormChat from './Files/FileFormChat';
|
||||
|
|
@ -39,6 +41,7 @@ const ChatForm = memo(({ index = 0 }: { index?: number }) => {
|
|||
const submitButtonRef = useRef<HTMLButtonElement>(null);
|
||||
const textAreaRef = useRef<HTMLTextAreaElement>(null);
|
||||
useFocusChatEffect(textAreaRef);
|
||||
useTokenUsageComputation();
|
||||
const localize = useLocalize();
|
||||
|
||||
const [isCollapsed, setIsCollapsed] = useState(false);
|
||||
|
|
@ -332,6 +335,7 @@ const ChatForm = memo(({ index = 0 }: { index?: number }) => {
|
|||
}
|
||||
/>
|
||||
<div className="mx-auto flex" />
|
||||
<TokenUsageIndicator />
|
||||
{SpeechToText && (
|
||||
<AudioRecorder
|
||||
methods={methods}
|
||||
|
|
|
|||
278
client/src/components/Chat/Input/TokenUsageIndicator.tsx
Normal file
278
client/src/components/Chat/Input/TokenUsageIndicator.tsx
Normal file
|
|
@ -0,0 +1,278 @@
|
|||
import { memo } from 'react';
|
||||
import { HoverCard, HoverCardTrigger, HoverCardContent, HoverCardPortal } from '@librechat/client';
|
||||
import { useLocalize, useTokenUsage } from '~/hooks';
|
||||
import { cn } from '~/utils';
|
||||
|
||||
function formatTokens(n: number): string {
|
||||
return new Intl.NumberFormat(undefined, {
|
||||
notation: 'compact',
|
||||
maximumFractionDigits: 1,
|
||||
}).format(n);
|
||||
}
|
||||
|
||||
interface ProgressBarProps {
|
||||
value: number;
|
||||
max: number;
|
||||
colorClass: string;
|
||||
label: string;
|
||||
showPercentage?: boolean;
|
||||
indeterminate?: boolean;
|
||||
}
|
||||
|
||||
function ProgressBar({
|
||||
value,
|
||||
max,
|
||||
colorClass,
|
||||
label,
|
||||
showPercentage = false,
|
||||
indeterminate = false,
|
||||
}: ProgressBarProps) {
|
||||
const percentage = max > 0 ? Math.min((value / max) * 100, 100) : 0;
|
||||
|
||||
return (
|
||||
<div className="flex items-center gap-2">
|
||||
<div
|
||||
role="progressbar"
|
||||
aria-valuenow={indeterminate ? undefined : Math.round(percentage)}
|
||||
aria-valuemin={0}
|
||||
aria-valuemax={100}
|
||||
aria-label={label}
|
||||
className="h-2 flex-1 overflow-hidden rounded-full bg-surface-secondary"
|
||||
>
|
||||
{indeterminate ? (
|
||||
<div
|
||||
className="h-full w-full rounded-full"
|
||||
style={{
|
||||
background:
|
||||
'repeating-linear-gradient(-45deg, var(--border-medium), var(--border-medium) 4px, var(--surface-tertiary) 4px, var(--surface-tertiary) 8px)',
|
||||
}}
|
||||
/>
|
||||
) : (
|
||||
<div className="flex h-full rounded-full">
|
||||
<div
|
||||
className={cn('rounded-full transition-all duration-300', colorClass)}
|
||||
style={{ width: `${percentage}%` }}
|
||||
/>
|
||||
<div className="flex-1 bg-surface-hover" />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{showPercentage && !indeterminate && (
|
||||
<span className="min-w-[3rem] text-right text-xs text-text-secondary" aria-hidden="true">
|
||||
{Math.round(percentage)}%
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
interface TokenRowProps {
|
||||
label: string;
|
||||
value: number;
|
||||
total: number;
|
||||
colorClass: string;
|
||||
ariaLabel: string;
|
||||
}
|
||||
|
||||
function TokenRow({ label, value, total, colorClass, ariaLabel }: TokenRowProps) {
|
||||
const percentage = total > 0 ? Math.round((value / total) * 100) : 0;
|
||||
|
||||
return (
|
||||
<div className="space-y-1">
|
||||
<div className="flex items-center justify-between text-sm">
|
||||
<span className="text-text-secondary">{label}</span>
|
||||
<span className="font-medium text-text-primary">
|
||||
{formatTokens(value)}
|
||||
<span className="ml-1 text-xs text-text-secondary" aria-hidden="true">
|
||||
({percentage}%)
|
||||
</span>
|
||||
</span>
|
||||
</div>
|
||||
<ProgressBar value={value} max={total} colorClass={colorClass} label={ariaLabel} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function TokenUsageContent() {
|
||||
const localize = useLocalize();
|
||||
const { inputTokens = 0, outputTokens = 0, maxContext = null } = useTokenUsage() ?? {};
|
||||
|
||||
const totalUsed = inputTokens + outputTokens;
|
||||
const hasMaxContext = maxContext !== null && maxContext > 0;
|
||||
const percentage = hasMaxContext ? Math.min((totalUsed / maxContext) * 100, 100) : 0;
|
||||
|
||||
const getMainProgressColor = () => {
|
||||
if (!hasMaxContext) {
|
||||
return 'bg-text-secondary';
|
||||
}
|
||||
if (percentage > 90) {
|
||||
return 'bg-red-500';
|
||||
}
|
||||
if (percentage > 75) {
|
||||
return 'bg-yellow-500';
|
||||
}
|
||||
return 'bg-green-500';
|
||||
};
|
||||
|
||||
const inputPercentage = totalUsed > 0 ? Math.round((inputTokens / totalUsed) * 100) : 0;
|
||||
const outputPercentage = totalUsed > 0 ? Math.round((outputTokens / totalUsed) * 100) : 0;
|
||||
|
||||
return (
|
||||
<div
|
||||
className="w-full space-y-3"
|
||||
role="region"
|
||||
aria-label={localize('com_ui_token_usage_context')}
|
||||
>
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-sm font-medium text-text-primary" id="token-usage-title">
|
||||
{localize('com_ui_token_usage_context')}
|
||||
</span>
|
||||
{hasMaxContext && (
|
||||
<span
|
||||
className={cn('text-xs font-medium', {
|
||||
'text-red-500': percentage > 90,
|
||||
'text-yellow-500': percentage > 75 && percentage <= 90,
|
||||
'text-green-500': percentage <= 75,
|
||||
})}
|
||||
>
|
||||
{localize('com_ui_token_usage_percent', { 0: Math.round(percentage).toString() })}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Main Progress Bar */}
|
||||
<div className="space-y-1">
|
||||
<ProgressBar
|
||||
value={totalUsed}
|
||||
max={hasMaxContext ? maxContext : 0}
|
||||
colorClass={getMainProgressColor()}
|
||||
label={
|
||||
hasMaxContext
|
||||
? `${localize('com_ui_token_usage_context')}: ${formatTokens(totalUsed)} of ${formatTokens(maxContext)}, ${Math.round(percentage)}%`
|
||||
: `${localize('com_ui_token_usage_context')}: ${formatTokens(totalUsed)} tokens used, max context unknown`
|
||||
}
|
||||
indeterminate={!hasMaxContext}
|
||||
/>
|
||||
<div className="flex justify-between text-xs text-text-secondary" aria-hidden="true">
|
||||
<span>{formatTokens(totalUsed)}</span>
|
||||
<span>{hasMaxContext ? formatTokens(maxContext) : 'N/A'}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Divider */}
|
||||
<div className="border-t border-border-light" role="separator" />
|
||||
|
||||
{/* Input/Output Breakdown */}
|
||||
<div className="space-y-3">
|
||||
<TokenRow
|
||||
label={localize('com_ui_token_usage_input')}
|
||||
value={inputTokens}
|
||||
total={totalUsed}
|
||||
colorClass="bg-blue-500"
|
||||
ariaLabel={`${localize('com_ui_token_usage_input')}: ${formatTokens(inputTokens)}, ${inputPercentage}% of total`}
|
||||
/>
|
||||
<TokenRow
|
||||
label={localize('com_ui_token_usage_output')}
|
||||
value={outputTokens}
|
||||
total={totalUsed}
|
||||
colorClass="bg-green-500"
|
||||
ariaLabel={`${localize('com_ui_token_usage_output')}: ${formatTokens(outputTokens)}, ${outputPercentage}% of total`}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const TokenUsageIndicator = memo(function TokenUsageIndicator() {
|
||||
const localize = useLocalize();
|
||||
const { inputTokens = 0, outputTokens = 0, maxContext = null } = useTokenUsage() ?? {};
|
||||
|
||||
const totalUsed = inputTokens + outputTokens;
|
||||
const hasMaxContext = maxContext !== null && maxContext > 0;
|
||||
const percentage = hasMaxContext ? Math.min((totalUsed / maxContext) * 100, 100) : 0;
|
||||
|
||||
// Ring calculations
|
||||
const size = 28;
|
||||
const strokeWidth = 3.5;
|
||||
const radius = (size - strokeWidth) / 2;
|
||||
const circumference = 2 * Math.PI * radius;
|
||||
const offset = circumference - (percentage / 100) * circumference;
|
||||
|
||||
const ariaLabel = hasMaxContext
|
||||
? localize('com_ui_token_usage_aria_full', {
|
||||
0: formatTokens(inputTokens),
|
||||
1: formatTokens(outputTokens),
|
||||
2: formatTokens(maxContext),
|
||||
3: Math.round(percentage).toString(),
|
||||
})
|
||||
: localize('com_ui_token_usage_aria_no_max', {
|
||||
0: formatTokens(inputTokens),
|
||||
1: formatTokens(outputTokens),
|
||||
});
|
||||
|
||||
// Color based on percentage
|
||||
const getProgressColor = () => {
|
||||
if (!hasMaxContext) {
|
||||
return 'stroke-text-secondary';
|
||||
}
|
||||
if (percentage > 90) {
|
||||
return 'stroke-red-500';
|
||||
}
|
||||
if (percentage > 75) {
|
||||
return 'stroke-yellow-500';
|
||||
}
|
||||
return 'stroke-green-500';
|
||||
};
|
||||
|
||||
return (
|
||||
<HoverCard openDelay={200} closeDelay={100}>
|
||||
<HoverCardTrigger asChild>
|
||||
<button
|
||||
type="button"
|
||||
className="flex size-9 items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring"
|
||||
aria-label={ariaLabel}
|
||||
aria-haspopup="dialog"
|
||||
>
|
||||
<svg
|
||||
width={size}
|
||||
height={size}
|
||||
viewBox={`0 0 ${size} ${size}`}
|
||||
className="rotate-[-90deg]"
|
||||
aria-hidden="true"
|
||||
focusable="false"
|
||||
>
|
||||
{/* Background ring */}
|
||||
<circle
|
||||
cx={size / 2}
|
||||
cy={size / 2}
|
||||
r={radius}
|
||||
fill="transparent"
|
||||
strokeWidth={strokeWidth}
|
||||
className="stroke-border-heavy"
|
||||
/>
|
||||
{/* Progress ring */}
|
||||
<circle
|
||||
cx={size / 2}
|
||||
cy={size / 2}
|
||||
r={radius}
|
||||
fill="transparent"
|
||||
strokeWidth={strokeWidth}
|
||||
strokeDasharray={circumference}
|
||||
strokeDashoffset={hasMaxContext ? offset : circumference}
|
||||
strokeLinecap="round"
|
||||
className={cn('transition-all duration-300', getProgressColor())}
|
||||
/>
|
||||
</svg>
|
||||
</button>
|
||||
</HoverCardTrigger>
|
||||
<HoverCardPortal>
|
||||
<HoverCardContent side="top" align="end" className="p-3">
|
||||
<TokenUsageContent />
|
||||
</HoverCardContent>
|
||||
</HoverCardPortal>
|
||||
</HoverCard>
|
||||
);
|
||||
});
|
||||
|
||||
export default TokenUsageIndicator;
|
||||
|
|
@ -35,3 +35,4 @@ export { default as useTextToSpeech } from './Input/useTextToSpeech';
|
|||
export { default as useGenerationsByLatest } from './useGenerationsByLatest';
|
||||
export { default as useLocalizedConfig } from './useLocalizedConfig';
|
||||
export { default as useResourcePermissions } from './useResourcePermissions';
|
||||
export { default as useTokenUsage, useTokenUsageComputation } from './useTokenUsage';
|
||||
|
|
|
|||
107
client/src/hooks/useTokenUsage.ts
Normal file
107
client/src/hooks/useTokenUsage.ts
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
import { useEffect, useMemo } from 'react';
|
||||
import { useParams } from 'react-router-dom';
|
||||
import { useSetAtom, useAtomValue } from 'jotai';
|
||||
import { getModelMaxTokens } from 'librechat-data-provider';
|
||||
import type { TMessage } from 'librechat-data-provider';
|
||||
import { tokenUsageAtom, type TokenUsage } from '~/store/tokenUsage';
|
||||
import { useGetMessagesByConvoId } from '~/data-provider';
|
||||
import { useChatContext } from '~/Providers';
|
||||
|
||||
/**
|
||||
* Hook to compute and update token usage from conversation messages.
|
||||
* Should be called in a component that has access to useChatContext.
|
||||
*/
|
||||
export function useTokenUsageComputation() {
|
||||
const { conversation } = useChatContext();
|
||||
const conversationId = conversation?.conversationId ?? '';
|
||||
const setTokenUsage = useSetAtom(tokenUsageAtom);
|
||||
const { conversationId: paramId } = useParams();
|
||||
|
||||
// Determine the query key to use - same logic as useChatHelpers
|
||||
const queryParam = paramId === 'new' ? paramId : conversationId || paramId || '';
|
||||
|
||||
// Use the query hook to get reactive messages
|
||||
// Subscribe to both the paramId-based key and conversationId-based key
|
||||
const { data: messages } = useGetMessagesByConvoId(queryParam, {
|
||||
enabled: !!queryParam,
|
||||
});
|
||||
|
||||
// Also subscribe to the actual conversationId if different from queryParam
|
||||
// This ensures we get updates when conversation transitions from 'new' to actual ID
|
||||
const { data: messagesById } = useGetMessagesByConvoId(conversationId, {
|
||||
enabled: !!conversationId && conversationId !== 'new' && conversationId !== queryParam,
|
||||
});
|
||||
|
||||
// Use whichever has more messages (handles transition from new -> actual ID)
|
||||
const effectiveMessages = useMemo(() => {
|
||||
const msgArray = messages ?? [];
|
||||
const msgByIdArray = messagesById ?? [];
|
||||
return msgByIdArray.length > msgArray.length ? msgByIdArray : msgArray;
|
||||
}, [messages, messagesById]);
|
||||
|
||||
// Compute token usage whenever messages change
|
||||
const tokenData = useMemo(() => {
|
||||
let inputTokens = 0;
|
||||
let outputTokens = 0;
|
||||
|
||||
if (effectiveMessages && Array.isArray(effectiveMessages)) {
|
||||
for (const msg of effectiveMessages as TMessage[]) {
|
||||
const count = msg.tokenCount ?? 0;
|
||||
if (msg.isCreatedByUser) {
|
||||
inputTokens += count;
|
||||
} else {
|
||||
outputTokens += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine max context: explicit setting or model default
|
||||
let maxContext: number | null = conversation?.maxContextTokens ?? null;
|
||||
|
||||
// If no explicit maxContextTokens, try to look up model default
|
||||
if (maxContext === null && conversation?.model) {
|
||||
const endpoint = conversation.endpointType ?? conversation.endpoint ?? '';
|
||||
const modelDefault = getModelMaxTokens(conversation.model, endpoint);
|
||||
if (modelDefault !== undefined) {
|
||||
maxContext = modelDefault;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
maxContext,
|
||||
};
|
||||
}, [
|
||||
effectiveMessages,
|
||||
conversation?.maxContextTokens,
|
||||
conversation?.model,
|
||||
conversation?.endpoint,
|
||||
conversation?.endpointType,
|
||||
]);
|
||||
|
||||
// Update the atom when computed values change
|
||||
useEffect(() => {
|
||||
setTokenUsage(tokenData);
|
||||
}, [tokenData, setTokenUsage]);
|
||||
|
||||
// Reset token usage when starting a new conversation
|
||||
useEffect(() => {
|
||||
if (paramId === 'new' && effectiveMessages.length === 0) {
|
||||
setTokenUsage({
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
maxContext: null,
|
||||
});
|
||||
}
|
||||
}, [paramId, effectiveMessages.length, setTokenUsage]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook to read the current token usage values.
|
||||
*/
|
||||
export function useTokenUsage(): TokenUsage {
|
||||
return useAtomValue(tokenUsageAtom);
|
||||
}
|
||||
|
||||
export default useTokenUsage;
|
||||
|
|
@ -1330,6 +1330,14 @@
|
|||
"com_ui_token": "token",
|
||||
"com_ui_token_exchange_method": "Token Exchange Method",
|
||||
"com_ui_token_url": "Token URL",
|
||||
"com_ui_token_usage_aria_full": "Token usage: {{0}} input, {{1}} output, {{2}} max context, {{3}}% used",
|
||||
"com_ui_token_usage_aria_no_max": "Token usage: {{0}} input, {{1}} output",
|
||||
"com_ui_token_usage_context": "Context Usage",
|
||||
"com_ui_token_usage_input": "Input",
|
||||
"com_ui_token_usage_max_context": "Max Context",
|
||||
"com_ui_token_usage_output": "Output",
|
||||
"com_ui_token_usage_percent": "{{0}}% used",
|
||||
"com_ui_token_usage_total": "Total",
|
||||
"com_ui_tokens": "tokens",
|
||||
"com_ui_tool_collection_prefix": "A collection of tools from",
|
||||
"com_ui_tool_list_collapse": "Collapse {{serverName}} tool list",
|
||||
|
|
|
|||
|
|
@ -12,9 +12,11 @@ import lang from './language';
|
|||
import settings from './settings';
|
||||
import misc from './misc';
|
||||
import isTemporary from './temporary';
|
||||
import * as tokenUsage from './tokenUsage';
|
||||
export * from './agents';
|
||||
export * from './mcp';
|
||||
export * from './favorites';
|
||||
export * from './tokenUsage';
|
||||
|
||||
export default {
|
||||
...artifacts,
|
||||
|
|
@ -31,4 +33,5 @@ export default {
|
|||
...settings,
|
||||
...misc,
|
||||
...isTemporary,
|
||||
...tokenUsage,
|
||||
};
|
||||
|
|
|
|||
13
client/src/store/tokenUsage.ts
Normal file
13
client/src/store/tokenUsage.ts
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
import { atom } from 'jotai';
|
||||
|
||||
export type TokenUsage = {
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
maxContext: number | null; // null = N/A
|
||||
};
|
||||
|
||||
export const tokenUsageAtom = atom<TokenUsage>({
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
maxContext: null,
|
||||
});
|
||||
|
|
@ -7,6 +7,7 @@ import {
|
|||
isAgentsEndpoint,
|
||||
replaceSpecialVars,
|
||||
providerEndpointMap,
|
||||
TOKEN_DEFAULTS,
|
||||
} from 'librechat-data-provider';
|
||||
import type {
|
||||
AgentToolResources,
|
||||
|
|
@ -240,7 +241,7 @@ export async function initializeAgent(
|
|||
providerEndpointMap[provider as keyof typeof providerEndpointMap],
|
||||
options.endpointTokenConfig,
|
||||
),
|
||||
18000,
|
||||
TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK,
|
||||
);
|
||||
|
||||
if (
|
||||
|
|
@ -293,7 +294,7 @@ export async function initializeAgent(
|
|||
agent.additional_instructions = artifactsPromptResult ?? undefined;
|
||||
}
|
||||
|
||||
const agentMaxContextNum = Number(agentMaxContextTokens) || 18000;
|
||||
const agentMaxContextNum = Number(agentMaxContextTokens) || TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK;
|
||||
const maxOutputTokensNum = Number(maxOutputTokens) || 0;
|
||||
|
||||
const finalAttachments: IMongoFile[] = (primedAttachments ?? [])
|
||||
|
|
@ -308,7 +309,9 @@ export async function initializeAgent(
|
|||
userMCPAuthMap,
|
||||
toolContextMap: toolContextMap ?? {},
|
||||
useLegacyContent: !!options.useLegacyContent,
|
||||
maxContextTokens: Math.round((agentMaxContextNum - maxOutputTokensNum) * 0.9),
|
||||
maxContextTokens: Math.round(
|
||||
(agentMaxContextNum - maxOutputTokensNum) * TOKEN_DEFAULTS.CONTEXT_SAFETY_MARGIN,
|
||||
),
|
||||
};
|
||||
|
||||
return initializedAgent;
|
||||
|
|
|
|||
|
|
@ -1,353 +1,7 @@
|
|||
import z from 'zod';
|
||||
import { EModelEndpoint } from 'librechat-data-provider';
|
||||
import { EModelEndpoint, maxTokensMap, maxOutputTokensMap } from 'librechat-data-provider';
|
||||
import type { EndpointTokenConfig, TokenConfig } from '~/types';
|
||||
|
||||
const openAIModels = {
|
||||
'o4-mini': 200000,
|
||||
'o3-mini': 195000, // -5000 from max
|
||||
o3: 200000,
|
||||
o1: 195000, // -5000 from max
|
||||
'o1-mini': 127500, // -500 from max
|
||||
'o1-preview': 127500, // -500 from max
|
||||
'gpt-4': 8187, // -5 from max
|
||||
'gpt-4-0613': 8187, // -5 from max
|
||||
'gpt-4-32k': 32758, // -10 from max
|
||||
'gpt-4-32k-0314': 32758, // -10 from max
|
||||
'gpt-4-32k-0613': 32758, // -10 from max
|
||||
'gpt-4-1106': 127500, // -500 from max
|
||||
'gpt-4-0125': 127500, // -500 from max
|
||||
'gpt-4.5': 127500, // -500 from max
|
||||
'gpt-4.1': 1047576,
|
||||
'gpt-4.1-mini': 1047576,
|
||||
'gpt-4.1-nano': 1047576,
|
||||
'gpt-5': 400000,
|
||||
'gpt-5-mini': 400000,
|
||||
'gpt-5-nano': 400000,
|
||||
'gpt-5-pro': 400000,
|
||||
'gpt-4o': 127500, // -500 from max
|
||||
'gpt-4o-mini': 127500, // -500 from max
|
||||
'gpt-4o-2024-05-13': 127500, // -500 from max
|
||||
'gpt-4-turbo': 127500, // -500 from max
|
||||
'gpt-4-vision': 127500, // -500 from max
|
||||
'gpt-3.5-turbo': 16375, // -10 from max
|
||||
'gpt-3.5-turbo-0613': 4092, // -5 from max
|
||||
'gpt-3.5-turbo-0301': 4092, // -5 from max
|
||||
'gpt-3.5-turbo-16k': 16375, // -10 from max
|
||||
'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
|
||||
'gpt-3.5-turbo-1106': 16375, // -10 from max
|
||||
'gpt-3.5-turbo-0125': 16375, // -10 from max
|
||||
};
|
||||
|
||||
const mistralModels = {
|
||||
'mistral-': 31990, // -10 from max
|
||||
'mistral-7b': 31990, // -10 from max
|
||||
'mistral-small': 31990, // -10 from max
|
||||
'mixtral-8x7b': 31990, // -10 from max
|
||||
'mixtral-8x22b': 65536,
|
||||
'mistral-large': 131000,
|
||||
'mistral-large-2402': 127500,
|
||||
'mistral-large-2407': 127500,
|
||||
'mistral-nemo': 131000,
|
||||
'pixtral-large': 131000,
|
||||
'mistral-saba': 32000,
|
||||
codestral: 256000,
|
||||
'ministral-8b': 131000,
|
||||
'ministral-3b': 131000,
|
||||
};
|
||||
|
||||
const cohereModels = {
|
||||
'command-light': 4086, // -10 from max
|
||||
'command-light-nightly': 8182, // -10 from max
|
||||
command: 4086, // -10 from max
|
||||
'command-nightly': 8182, // -10 from max
|
||||
'command-text': 4086, // -10 from max
|
||||
'command-r': 127500, // -500 from max
|
||||
'command-r-plus': 127500, // -500 from max
|
||||
};
|
||||
|
||||
const googleModels = {
|
||||
/* Max I/O is combined so we subtract the amount from max response tokens for actual total */
|
||||
gemma: 8196,
|
||||
'gemma-2': 32768,
|
||||
'gemma-3': 32768,
|
||||
'gemma-3-27b': 131072,
|
||||
gemini: 30720, // -2048 from max
|
||||
'gemini-pro-vision': 12288,
|
||||
'gemini-exp': 2000000,
|
||||
'gemini-3': 1000000, // 1M input tokens, 64k output tokens
|
||||
'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
|
||||
'gemini-2.5-pro': 1000000,
|
||||
'gemini-2.5-flash': 1000000,
|
||||
'gemini-2.5-flash-lite': 1000000,
|
||||
'gemini-2.0': 2000000,
|
||||
'gemini-2.0-flash': 1000000,
|
||||
'gemini-2.0-flash-lite': 1000000,
|
||||
'gemini-1.5': 1000000,
|
||||
'gemini-1.5-flash': 1000000,
|
||||
'gemini-1.5-flash-8b': 1000000,
|
||||
'text-bison-32k': 32758, // -10 from max
|
||||
'chat-bison-32k': 32758, // -10 from max
|
||||
'code-bison-32k': 32758, // -10 from max
|
||||
'codechat-bison-32k': 32758,
|
||||
/* Codey, -5 from max: 6144 */
|
||||
'code-': 6139,
|
||||
'codechat-': 6139,
|
||||
/* PaLM2, -5 from max: 8192 */
|
||||
'text-': 8187,
|
||||
'chat-': 8187,
|
||||
};
|
||||
|
||||
const anthropicModels = {
|
||||
'claude-': 100000,
|
||||
'claude-instant': 100000,
|
||||
'claude-2': 100000,
|
||||
'claude-2.1': 200000,
|
||||
'claude-3': 200000,
|
||||
'claude-3-haiku': 200000,
|
||||
'claude-3-sonnet': 200000,
|
||||
'claude-3-opus': 200000,
|
||||
'claude-3.5-haiku': 200000,
|
||||
'claude-3-5-haiku': 200000,
|
||||
'claude-3-5-sonnet': 200000,
|
||||
'claude-3.5-sonnet': 200000,
|
||||
'claude-3-7-sonnet': 200000,
|
||||
'claude-3.7-sonnet': 200000,
|
||||
'claude-3-5-sonnet-latest': 200000,
|
||||
'claude-3.5-sonnet-latest': 200000,
|
||||
'claude-haiku-4-5': 200000,
|
||||
'claude-sonnet-4': 1000000,
|
||||
'claude-4': 200000,
|
||||
'claude-opus-4': 200000,
|
||||
'claude-opus-4-5': 200000,
|
||||
};
|
||||
|
||||
const deepseekModels = {
|
||||
deepseek: 128000,
|
||||
'deepseek-chat': 128000,
|
||||
'deepseek-reasoner': 128000,
|
||||
'deepseek-r1': 128000,
|
||||
'deepseek-v3': 128000,
|
||||
'deepseek.r1': 128000,
|
||||
};
|
||||
|
||||
const metaModels = {
|
||||
// Basic patterns
|
||||
llama3: 8000,
|
||||
llama2: 4000,
|
||||
'llama-3': 8000,
|
||||
'llama-2': 4000,
|
||||
|
||||
// llama3.x pattern
|
||||
'llama3.1': 127500,
|
||||
'llama3.2': 127500,
|
||||
'llama3.3': 127500,
|
||||
|
||||
// llama3-x pattern
|
||||
'llama3-1': 127500,
|
||||
'llama3-2': 127500,
|
||||
'llama3-3': 127500,
|
||||
|
||||
// llama-3.x pattern
|
||||
'llama-3.1': 127500,
|
||||
'llama-3.2': 127500,
|
||||
'llama-3.3': 127500,
|
||||
|
||||
// llama3.x:Nb pattern
|
||||
'llama3.1:405b': 127500,
|
||||
'llama3.1:70b': 127500,
|
||||
'llama3.1:8b': 127500,
|
||||
'llama3.2:1b': 127500,
|
||||
'llama3.2:3b': 127500,
|
||||
'llama3.2:11b': 127500,
|
||||
'llama3.2:90b': 127500,
|
||||
'llama3.3:70b': 127500,
|
||||
|
||||
// llama3-x-Nb pattern
|
||||
'llama3-1-405b': 127500,
|
||||
'llama3-1-70b': 127500,
|
||||
'llama3-1-8b': 127500,
|
||||
'llama3-2-1b': 127500,
|
||||
'llama3-2-3b': 127500,
|
||||
'llama3-2-11b': 127500,
|
||||
'llama3-2-90b': 127500,
|
||||
'llama3-3-70b': 127500,
|
||||
|
||||
// llama-3.x-Nb pattern
|
||||
'llama-3.1-405b': 127500,
|
||||
'llama-3.1-70b': 127500,
|
||||
'llama-3.1-8b': 127500,
|
||||
'llama-3.2-1b': 127500,
|
||||
'llama-3.2-3b': 127500,
|
||||
'llama-3.2-11b': 127500,
|
||||
'llama-3.2-90b': 127500,
|
||||
'llama-3.3-70b': 127500,
|
||||
|
||||
// Original llama2/3 patterns
|
||||
'llama3-70b': 8000,
|
||||
'llama3-8b': 8000,
|
||||
'llama2-70b': 4000,
|
||||
'llama2-13b': 4000,
|
||||
'llama3:70b': 8000,
|
||||
'llama3:8b': 8000,
|
||||
'llama2:70b': 4000,
|
||||
};
|
||||
|
||||
const qwenModels = {
|
||||
qwen: 32000,
|
||||
'qwen2.5': 32000,
|
||||
'qwen-turbo': 1000000,
|
||||
'qwen-plus': 131000,
|
||||
'qwen-max': 32000,
|
||||
'qwq-32b': 32000,
|
||||
// Qwen3 models
|
||||
qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
|
||||
'qwen3-8b': 128000,
|
||||
'qwen3-14b': 40960,
|
||||
'qwen3-30b-a3b': 40960,
|
||||
'qwen3-32b': 40960,
|
||||
'qwen3-235b-a22b': 40960,
|
||||
// Qwen3 VL (Vision-Language) models
|
||||
'qwen3-vl-8b-thinking': 256000,
|
||||
'qwen3-vl-8b-instruct': 262144,
|
||||
'qwen3-vl-30b-a3b': 262144,
|
||||
'qwen3-vl-235b-a22b': 131072,
|
||||
// Qwen3 specialized models
|
||||
'qwen3-max': 256000,
|
||||
'qwen3-coder': 262144,
|
||||
'qwen3-coder-30b-a3b': 262144,
|
||||
'qwen3-coder-plus': 128000,
|
||||
'qwen3-coder-flash': 128000,
|
||||
'qwen3-next-80b-a3b': 262144,
|
||||
};
|
||||
|
||||
const ai21Models = {
|
||||
'j2-mid': 8182, // -10 from max
|
||||
'j2-ultra': 8182, // -10 from max
|
||||
'jamba-instruct': 255500, // -500 from max
|
||||
};
|
||||
|
||||
const amazonModels = {
|
||||
// Amazon Titan models
|
||||
'titan-text-lite': 4000,
|
||||
'titan-text-express': 8000,
|
||||
'titan-text-premier': 31500, // -500 from max
|
||||
// Amazon Nova models
|
||||
// https://aws.amazon.com/ai/generative-ai/nova/
|
||||
'nova-micro': 127000, // -1000 from max
|
||||
'nova-lite': 295000, // -5000 from max
|
||||
'nova-pro': 295000, // -5000 from max
|
||||
'nova-premier': 995000, // -5000 from max
|
||||
};
|
||||
|
||||
const bedrockModels = {
|
||||
...anthropicModels,
|
||||
...mistralModels,
|
||||
...cohereModels,
|
||||
...deepseekModels,
|
||||
...metaModels,
|
||||
...ai21Models,
|
||||
...amazonModels,
|
||||
};
|
||||
|
||||
const xAIModels = {
|
||||
grok: 131072,
|
||||
'grok-beta': 131072,
|
||||
'grok-vision-beta': 8192,
|
||||
'grok-2': 131072,
|
||||
'grok-2-latest': 131072,
|
||||
'grok-2-1212': 131072,
|
||||
'grok-2-vision': 32768,
|
||||
'grok-2-vision-latest': 32768,
|
||||
'grok-2-vision-1212': 32768,
|
||||
'grok-3': 131072,
|
||||
'grok-3-fast': 131072,
|
||||
'grok-3-mini': 131072,
|
||||
'grok-3-mini-fast': 131072,
|
||||
'grok-4': 256000, // 256K context
|
||||
'grok-4-fast': 2000000, // 2M context
|
||||
'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
|
||||
'grok-code-fast': 256000, // 256K context
|
||||
};
|
||||
|
||||
const aggregateModels = {
|
||||
...openAIModels,
|
||||
...googleModels,
|
||||
...bedrockModels,
|
||||
...xAIModels,
|
||||
...qwenModels,
|
||||
// misc.
|
||||
kimi: 131000,
|
||||
// GPT-OSS
|
||||
'gpt-oss': 131000,
|
||||
'gpt-oss:20b': 131000,
|
||||
'gpt-oss-20b': 131000,
|
||||
'gpt-oss:120b': 131000,
|
||||
'gpt-oss-120b': 131000,
|
||||
// GLM models (Zhipu AI)
|
||||
glm4: 128000,
|
||||
'glm-4': 128000,
|
||||
'glm-4-32b': 128000,
|
||||
'glm-4.5': 131000,
|
||||
'glm-4.5-air': 131000,
|
||||
'glm-4.5v': 66000,
|
||||
'glm-4.6': 200000,
|
||||
};
|
||||
|
||||
export const maxTokensMap = {
|
||||
[EModelEndpoint.azureOpenAI]: openAIModels,
|
||||
[EModelEndpoint.openAI]: aggregateModels,
|
||||
[EModelEndpoint.agents]: aggregateModels,
|
||||
[EModelEndpoint.custom]: aggregateModels,
|
||||
[EModelEndpoint.google]: googleModels,
|
||||
[EModelEndpoint.anthropic]: anthropicModels,
|
||||
[EModelEndpoint.bedrock]: bedrockModels,
|
||||
};
|
||||
|
||||
export const modelMaxOutputs = {
|
||||
o1: 32268, // -500 from max: 32,768
|
||||
'o1-mini': 65136, // -500 from max: 65,536
|
||||
'o1-preview': 32268, // -500 from max: 32,768
|
||||
'gpt-5': 128000,
|
||||
'gpt-5-mini': 128000,
|
||||
'gpt-5-nano': 128000,
|
||||
'gpt-5-pro': 128000,
|
||||
'gpt-oss-20b': 131000,
|
||||
'gpt-oss-120b': 131000,
|
||||
system_default: 32000,
|
||||
};
|
||||
|
||||
/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */
|
||||
const anthropicMaxOutputs = {
|
||||
'claude-3-haiku': 4096,
|
||||
'claude-3-sonnet': 4096,
|
||||
'claude-3-opus': 4096,
|
||||
'claude-haiku-4-5': 64000,
|
||||
'claude-sonnet-4': 64000,
|
||||
'claude-opus-4': 32000,
|
||||
'claude-opus-4-5': 64000,
|
||||
'claude-3.5-sonnet': 8192,
|
||||
'claude-3-5-sonnet': 8192,
|
||||
'claude-3.7-sonnet': 128000,
|
||||
'claude-3-7-sonnet': 128000,
|
||||
};
|
||||
|
||||
/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */
|
||||
const deepseekMaxOutputs = {
|
||||
deepseek: 8000, // deepseek-chat default: 4K, max: 8K
|
||||
'deepseek-chat': 8000,
|
||||
'deepseek-reasoner': 64000, // default: 32K, max: 64K
|
||||
'deepseek-r1': 64000,
|
||||
'deepseek-v3': 8000,
|
||||
'deepseek.r1': 64000,
|
||||
};
|
||||
|
||||
export const maxOutputTokensMap = {
|
||||
[EModelEndpoint.anthropic]: anthropicMaxOutputs,
|
||||
[EModelEndpoint.azureOpenAI]: modelMaxOutputs,
|
||||
[EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
|
||||
[EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
|
||||
};
|
||||
|
||||
/**
|
||||
* Finds the first matching pattern in the tokens map.
|
||||
* @param {string} modelName
|
||||
|
|
|
|||
152
packages/data-provider/specs/tokens.spec.ts
Normal file
152
packages/data-provider/specs/tokens.spec.ts
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
import {
|
||||
findMatchingPattern,
|
||||
getModelMaxTokens,
|
||||
getModelMaxOutputTokens,
|
||||
matchModelName,
|
||||
maxTokensMap,
|
||||
} from '../src/tokens';
|
||||
import { EModelEndpoint } from '../src/schemas';
|
||||
|
||||
describe('Token Pattern Matching', () => {
|
||||
describe('findMatchingPattern', () => {
|
||||
const testMap: Record<string, number> = {
|
||||
'claude-': 100000,
|
||||
'claude-3': 200000,
|
||||
'claude-3-opus': 200000,
|
||||
'gpt-4': 8000,
|
||||
'gpt-4-turbo': 128000,
|
||||
};
|
||||
|
||||
it('should match exact model names', () => {
|
||||
expect(findMatchingPattern('claude-3-opus', testMap)).toBe('claude-3-opus');
|
||||
expect(findMatchingPattern('gpt-4-turbo', testMap)).toBe('gpt-4-turbo');
|
||||
});
|
||||
|
||||
it('should match more specific patterns first (reverse order)', () => {
|
||||
// claude-3-opus-20240229 should match 'claude-3-opus' not 'claude-3' or 'claude-'
|
||||
expect(findMatchingPattern('claude-3-opus-20240229', testMap)).toBe('claude-3-opus');
|
||||
});
|
||||
|
||||
it('should fall back to broader patterns when no specific match', () => {
|
||||
// claude-3-haiku should match 'claude-3' (not 'claude-3-opus')
|
||||
expect(findMatchingPattern('claude-3-haiku', testMap)).toBe('claude-3');
|
||||
});
|
||||
|
||||
it('should be case-insensitive', () => {
|
||||
expect(findMatchingPattern('Claude-3-Opus', testMap)).toBe('claude-3-opus');
|
||||
expect(findMatchingPattern('GPT-4-TURBO', testMap)).toBe('gpt-4-turbo');
|
||||
});
|
||||
|
||||
it('should return null for unmatched models', () => {
|
||||
expect(findMatchingPattern('unknown-model', testMap)).toBeNull();
|
||||
expect(findMatchingPattern('llama-2', testMap)).toBeNull();
|
||||
});
|
||||
|
||||
it('should NOT match when pattern appears in middle of model name (startsWith behavior)', () => {
|
||||
// This is the key fix: "my-claude-wrapper" should NOT match "claude-"
|
||||
expect(findMatchingPattern('my-claude-wrapper', testMap)).toBeNull();
|
||||
expect(findMatchingPattern('openai-gpt-4-proxy', testMap)).toBeNull();
|
||||
expect(findMatchingPattern('custom-claude-3-service', testMap)).toBeNull();
|
||||
});
|
||||
|
||||
it('should handle empty string model name', () => {
|
||||
expect(findMatchingPattern('', testMap)).toBeNull();
|
||||
});
|
||||
|
||||
it('should handle empty tokens map', () => {
|
||||
expect(findMatchingPattern('claude-3', {})).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getModelMaxTokens', () => {
|
||||
it('should return exact match tokens', () => {
|
||||
expect(getModelMaxTokens('gpt-4o', EModelEndpoint.openAI)).toBe(127500);
|
||||
expect(getModelMaxTokens('claude-3-opus', EModelEndpoint.anthropic)).toBe(200000);
|
||||
});
|
||||
|
||||
it('should return pattern-matched tokens', () => {
|
||||
// claude-3-opus-20240229 should match claude-3-opus pattern
|
||||
expect(getModelMaxTokens('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(200000);
|
||||
});
|
||||
|
||||
it('should return undefined for unknown models', () => {
|
||||
expect(getModelMaxTokens('completely-unknown-model', EModelEndpoint.openAI)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should fall back to openAI for unknown endpoints', () => {
|
||||
const result = getModelMaxTokens('gpt-4o', 'unknown-endpoint');
|
||||
expect(result).toBe(127500);
|
||||
});
|
||||
|
||||
it('should handle non-string input gracefully', () => {
|
||||
expect(getModelMaxTokens(null as unknown as string)).toBeUndefined();
|
||||
expect(getModelMaxTokens(undefined as unknown as string)).toBeUndefined();
|
||||
expect(getModelMaxTokens(123 as unknown as string)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should NOT match model names with pattern in middle', () => {
|
||||
// A model like "my-gpt-4-wrapper" should not match "gpt-4"
|
||||
expect(getModelMaxTokens('my-gpt-4-wrapper', EModelEndpoint.openAI)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getModelMaxOutputTokens', () => {
|
||||
it('should return exact match output tokens', () => {
|
||||
expect(getModelMaxOutputTokens('o1', EModelEndpoint.openAI)).toBe(32268);
|
||||
expect(getModelMaxOutputTokens('claude-3-opus', EModelEndpoint.anthropic)).toBe(4096);
|
||||
});
|
||||
|
||||
it('should return pattern-matched output tokens', () => {
|
||||
expect(getModelMaxOutputTokens('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(
|
||||
4096,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return system_default for unknown models (openAI endpoint)', () => {
|
||||
expect(getModelMaxOutputTokens('unknown-model', EModelEndpoint.openAI)).toBe(32000);
|
||||
});
|
||||
|
||||
it('should handle non-string input gracefully', () => {
|
||||
expect(getModelMaxOutputTokens(null as unknown as string)).toBeUndefined();
|
||||
expect(getModelMaxOutputTokens(undefined as unknown as string)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('matchModelName', () => {
|
||||
it('should return exact match model name', () => {
|
||||
expect(matchModelName('gpt-4o', EModelEndpoint.openAI)).toBe('gpt-4o');
|
||||
});
|
||||
|
||||
it('should return pattern key for pattern matches', () => {
|
||||
expect(matchModelName('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(
|
||||
'claude-3-opus',
|
||||
);
|
||||
});
|
||||
|
||||
it('should return input for unknown models', () => {
|
||||
expect(matchModelName('unknown-model', EModelEndpoint.openAI)).toBe('unknown-model');
|
||||
});
|
||||
|
||||
it('should handle non-string input gracefully', () => {
|
||||
expect(matchModelName(null as unknown as string)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('maxTokensMap structure', () => {
|
||||
it('should have entries for all major endpoints', () => {
|
||||
expect(maxTokensMap[EModelEndpoint.openAI]).toBeDefined();
|
||||
expect(maxTokensMap[EModelEndpoint.anthropic]).toBeDefined();
|
||||
expect(maxTokensMap[EModelEndpoint.google]).toBeDefined();
|
||||
expect(maxTokensMap[EModelEndpoint.azureOpenAI]).toBeDefined();
|
||||
expect(maxTokensMap[EModelEndpoint.bedrock]).toBeDefined();
|
||||
});
|
||||
|
||||
it('should have positive token values', () => {
|
||||
Object.values(maxTokensMap).forEach((endpointMap) => {
|
||||
Object.entries(endpointMap).forEach(([model, tokens]) => {
|
||||
expect(tokens).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -47,3 +47,5 @@ export { default as createPayload } from './createPayload';
|
|||
/* feedback */
|
||||
export * from './feedback';
|
||||
export * from './parameterSettings';
|
||||
/* token limits */
|
||||
export * from './tokens';
|
||||
|
|
|
|||
|
|
@ -618,6 +618,7 @@ export type TMessage = z.input<typeof tMessageSchema> & {
|
|||
attachments?: TAttachment[];
|
||||
clientTimestamp?: string;
|
||||
feedback?: TFeedback;
|
||||
tokenCount?: number;
|
||||
};
|
||||
|
||||
export const coerceNumber = z.union([z.number(), z.string()]).transform((val) => {
|
||||
|
|
|
|||
527
packages/data-provider/src/tokens.ts
Normal file
527
packages/data-provider/src/tokens.ts
Normal file
|
|
@ -0,0 +1,527 @@
|
|||
import { EModelEndpoint } from './schemas';
|
||||
|
||||
/**
|
||||
* Model context window token limits.
|
||||
* These values represent the maximum context tokens (input) for each model.
|
||||
* Values are slightly reduced from actual max to leave room for output tokens.
|
||||
*/
|
||||
|
||||
const openAIModels: Record<string, number> = {
|
||||
'o4-mini': 200000,
|
||||
'o3-mini': 195000, // -5000 from max
|
||||
o3: 200000,
|
||||
o1: 195000, // -5000 from max
|
||||
'o1-mini': 127500, // -500 from max
|
||||
'o1-preview': 127500, // -500 from max
|
||||
'gpt-4': 8187, // -5 from max
|
||||
'gpt-4-0613': 8187, // -5 from max
|
||||
'gpt-4-32k': 32758, // -10 from max
|
||||
'gpt-4-32k-0314': 32758, // -10 from max
|
||||
'gpt-4-32k-0613': 32758, // -10 from max
|
||||
'gpt-4-1106': 127500, // -500 from max
|
||||
'gpt-4-0125': 127500, // -500 from max
|
||||
'gpt-4.5': 127500, // -500 from max
|
||||
'gpt-4.1': 1047576,
|
||||
'gpt-4.1-mini': 1047576,
|
||||
'gpt-4.1-nano': 1047576,
|
||||
'gpt-5': 400000,
|
||||
'gpt-5-mini': 400000,
|
||||
'gpt-5-nano': 400000,
|
||||
'gpt-5-pro': 400000,
|
||||
'gpt-4o': 127500, // -500 from max
|
||||
'gpt-4o-mini': 127500, // -500 from max
|
||||
'gpt-4o-2024-05-13': 127500, // -500 from max
|
||||
'gpt-4-turbo': 127500, // -500 from max
|
||||
'gpt-4-vision': 127500, // -500 from max
|
||||
'gpt-3.5-turbo': 16375, // -10 from max
|
||||
'gpt-3.5-turbo-0613': 4092, // -5 from max
|
||||
'gpt-3.5-turbo-0301': 4092, // -5 from max
|
||||
'gpt-3.5-turbo-16k': 16375, // -10 from max
|
||||
'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
|
||||
'gpt-3.5-turbo-1106': 16375, // -10 from max
|
||||
'gpt-3.5-turbo-0125': 16375, // -10 from max
|
||||
};
|
||||
|
||||
const mistralModels: Record<string, number> = {
|
||||
'mistral-': 31990, // -10 from max
|
||||
'mistral-7b': 31990, // -10 from max
|
||||
'mistral-small': 31990, // -10 from max
|
||||
'mixtral-8x7b': 31990, // -10 from max
|
||||
'mixtral-8x22b': 65536,
|
||||
'mistral-large': 131000,
|
||||
'mistral-large-2402': 127500,
|
||||
'mistral-large-2407': 127500,
|
||||
'mistral-nemo': 131000,
|
||||
'pixtral-large': 131000,
|
||||
'mistral-saba': 32000,
|
||||
codestral: 256000,
|
||||
'ministral-8b': 131000,
|
||||
'ministral-3b': 131000,
|
||||
};
|
||||
|
||||
const cohereModels: Record<string, number> = {
|
||||
'command-light': 4086, // -10 from max
|
||||
'command-light-nightly': 8182, // -10 from max
|
||||
command: 4086, // -10 from max
|
||||
'command-nightly': 8182, // -10 from max
|
||||
'command-text': 4086, // -10 from max
|
||||
'command-r': 127500, // -500 from max
|
||||
'command-r-plus': 127500, // -500 from max
|
||||
};
|
||||
|
||||
const googleModels: Record<string, number> = {
|
||||
/* Max I/O is combined so we subtract the amount from max response tokens for actual total */
|
||||
gemma: 8196,
|
||||
'gemma-2': 32768,
|
||||
'gemma-3': 32768,
|
||||
'gemma-3-27b': 131072,
|
||||
gemini: 30720, // -2048 from max
|
||||
'gemini-pro-vision': 12288,
|
||||
'gemini-exp': 2000000,
|
||||
'gemini-3': 1000000, // 1M input tokens, 64k output tokens
|
||||
'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
|
||||
'gemini-2.5-pro': 1000000,
|
||||
'gemini-2.5-flash': 1000000,
|
||||
'gemini-2.5-flash-lite': 1000000,
|
||||
'gemini-2.0': 2000000,
|
||||
'gemini-2.0-flash': 1000000,
|
||||
'gemini-2.0-flash-lite': 1000000,
|
||||
'gemini-1.5': 1000000,
|
||||
'gemini-1.5-flash': 1000000,
|
||||
'gemini-1.5-flash-8b': 1000000,
|
||||
'text-bison-32k': 32758, // -10 from max
|
||||
'chat-bison-32k': 32758, // -10 from max
|
||||
'code-bison-32k': 32758, // -10 from max
|
||||
'codechat-bison-32k': 32758,
|
||||
/* Codey, -5 from max: 6144 */
|
||||
'code-': 6139,
|
||||
'codechat-': 6139,
|
||||
/* PaLM2, -5 from max: 8192 */
|
||||
'text-': 8187,
|
||||
'chat-': 8187,
|
||||
};
|
||||
|
||||
const anthropicModels: Record<string, number> = {
|
||||
'claude-': 100000,
|
||||
'claude-instant': 100000,
|
||||
'claude-2': 100000,
|
||||
'claude-2.1': 200000,
|
||||
'claude-3': 200000,
|
||||
'claude-3-haiku': 200000,
|
||||
'claude-3-sonnet': 200000,
|
||||
'claude-3-opus': 200000,
|
||||
'claude-3.5-haiku': 200000,
|
||||
'claude-3-5-haiku': 200000,
|
||||
'claude-3-5-sonnet': 200000,
|
||||
'claude-3.5-sonnet': 200000,
|
||||
'claude-3-7-sonnet': 200000,
|
||||
'claude-3.7-sonnet': 200000,
|
||||
'claude-3-5-sonnet-latest': 200000,
|
||||
'claude-3.5-sonnet-latest': 200000,
|
||||
'claude-haiku-4-5': 200000,
|
||||
'claude-sonnet-4': 1000000,
|
||||
'claude-4': 200000,
|
||||
'claude-opus-4': 200000,
|
||||
'claude-opus-4-5': 200000,
|
||||
};
|
||||
|
||||
const deepseekModels: Record<string, number> = {
|
||||
deepseek: 128000,
|
||||
'deepseek-chat': 128000,
|
||||
'deepseek-reasoner': 128000,
|
||||
'deepseek-r1': 128000,
|
||||
'deepseek-v3': 128000,
|
||||
'deepseek.r1': 128000,
|
||||
};
|
||||
|
||||
const metaModels: Record<string, number> = {
|
||||
// Basic patterns
|
||||
llama3: 8000,
|
||||
llama2: 4000,
|
||||
'llama-3': 8000,
|
||||
'llama-2': 4000,
|
||||
|
||||
// llama3.x pattern
|
||||
'llama3.1': 127500,
|
||||
'llama3.2': 127500,
|
||||
'llama3.3': 127500,
|
||||
|
||||
// llama3-x pattern
|
||||
'llama3-1': 127500,
|
||||
'llama3-2': 127500,
|
||||
'llama3-3': 127500,
|
||||
|
||||
// llama-3.x pattern
|
||||
'llama-3.1': 127500,
|
||||
'llama-3.2': 127500,
|
||||
'llama-3.3': 127500,
|
||||
|
||||
// llama3.x:Nb pattern
|
||||
'llama3.1:405b': 127500,
|
||||
'llama3.1:70b': 127500,
|
||||
'llama3.1:8b': 127500,
|
||||
'llama3.2:1b': 127500,
|
||||
'llama3.2:3b': 127500,
|
||||
'llama3.2:11b': 127500,
|
||||
'llama3.2:90b': 127500,
|
||||
'llama3.3:70b': 127500,
|
||||
|
||||
// llama3-x-Nb pattern
|
||||
'llama3-1-405b': 127500,
|
||||
'llama3-1-70b': 127500,
|
||||
'llama3-1-8b': 127500,
|
||||
'llama3-2-1b': 127500,
|
||||
'llama3-2-3b': 127500,
|
||||
'llama3-2-11b': 127500,
|
||||
'llama3-2-90b': 127500,
|
||||
'llama3-3-70b': 127500,
|
||||
|
||||
// llama-3.x-Nb pattern
|
||||
'llama-3.1-405b': 127500,
|
||||
'llama-3.1-70b': 127500,
|
||||
'llama-3.1-8b': 127500,
|
||||
'llama-3.2-1b': 127500,
|
||||
'llama-3.2-3b': 127500,
|
||||
'llama-3.2-11b': 127500,
|
||||
'llama-3.2-90b': 127500,
|
||||
'llama-3.3-70b': 127500,
|
||||
|
||||
// Original llama2/3 patterns
|
||||
'llama3-70b': 8000,
|
||||
'llama3-8b': 8000,
|
||||
'llama2-70b': 4000,
|
||||
'llama2-13b': 4000,
|
||||
'llama3:70b': 8000,
|
||||
'llama3:8b': 8000,
|
||||
'llama2:70b': 4000,
|
||||
};
|
||||
|
||||
const qwenModels: Record<string, number> = {
|
||||
qwen: 32000,
|
||||
'qwen2.5': 32000,
|
||||
'qwen-turbo': 1000000,
|
||||
'qwen-plus': 131000,
|
||||
'qwen-max': 32000,
|
||||
'qwq-32b': 32000,
|
||||
// Qwen3 models
|
||||
qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
|
||||
'qwen3-8b': 128000,
|
||||
'qwen3-14b': 40960,
|
||||
'qwen3-30b-a3b': 40960,
|
||||
'qwen3-32b': 40960,
|
||||
'qwen3-235b-a22b': 40960,
|
||||
// Qwen3 VL (Vision-Language) models
|
||||
'qwen3-vl-8b-thinking': 256000,
|
||||
'qwen3-vl-8b-instruct': 262144,
|
||||
'qwen3-vl-30b-a3b': 262144,
|
||||
'qwen3-vl-235b-a22b': 131072,
|
||||
// Qwen3 specialized models
|
||||
'qwen3-max': 256000,
|
||||
'qwen3-coder': 262144,
|
||||
'qwen3-coder-30b-a3b': 262144,
|
||||
'qwen3-coder-plus': 128000,
|
||||
'qwen3-coder-flash': 128000,
|
||||
'qwen3-next-80b-a3b': 262144,
|
||||
};
|
||||
|
||||
const ai21Models: Record<string, number> = {
|
||||
'j2-mid': 8182, // -10 from max
|
||||
'j2-ultra': 8182, // -10 from max
|
||||
'jamba-instruct': 255500, // -500 from max
|
||||
};
|
||||
|
||||
const amazonModels: Record<string, number> = {
|
||||
// Amazon Titan models
|
||||
'titan-text-lite': 4000,
|
||||
'titan-text-express': 8000,
|
||||
'titan-text-premier': 31500, // -500 from max
|
||||
// Amazon Nova models
|
||||
// https://aws.amazon.com/ai/generative-ai/nova/
|
||||
'nova-micro': 127000, // -1000 from max
|
||||
'nova-lite': 295000, // -5000 from max
|
||||
'nova-pro': 295000, // -5000 from max
|
||||
'nova-premier': 995000, // -5000 from max
|
||||
};
|
||||
|
||||
const bedrockModels: Record<string, number> = {
|
||||
...anthropicModels,
|
||||
...mistralModels,
|
||||
...cohereModels,
|
||||
...deepseekModels,
|
||||
...metaModels,
|
||||
...ai21Models,
|
||||
...amazonModels,
|
||||
};
|
||||
|
||||
const xAIModels: Record<string, number> = {
|
||||
grok: 131072,
|
||||
'grok-beta': 131072,
|
||||
'grok-vision-beta': 8192,
|
||||
'grok-2': 131072,
|
||||
'grok-2-latest': 131072,
|
||||
'grok-2-1212': 131072,
|
||||
'grok-2-vision': 32768,
|
||||
'grok-2-vision-latest': 32768,
|
||||
'grok-2-vision-1212': 32768,
|
||||
'grok-3': 131072,
|
||||
'grok-3-fast': 131072,
|
||||
'grok-3-mini': 131072,
|
||||
'grok-3-mini-fast': 131072,
|
||||
'grok-4': 256000, // 256K context
|
||||
'grok-4-fast': 2000000, // 2M context
|
||||
'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
|
||||
'grok-code-fast': 256000, // 256K context
|
||||
};
|
||||
|
||||
const aggregateModels: Record<string, number> = {
|
||||
...openAIModels,
|
||||
...googleModels,
|
||||
...bedrockModels,
|
||||
...xAIModels,
|
||||
...qwenModels,
|
||||
// misc.
|
||||
kimi: 131000,
|
||||
// GPT-OSS
|
||||
'gpt-oss': 131000,
|
||||
'gpt-oss:20b': 131000,
|
||||
'gpt-oss-20b': 131000,
|
||||
'gpt-oss:120b': 131000,
|
||||
'gpt-oss-120b': 131000,
|
||||
// GLM models (Zhipu AI)
|
||||
glm4: 128000,
|
||||
'glm-4': 128000,
|
||||
'glm-4-32b': 128000,
|
||||
'glm-4.5': 131000,
|
||||
'glm-4.5-air': 131000,
|
||||
'glm-4.5v': 66000,
|
||||
'glm-4.6': 200000,
|
||||
};
|
||||
|
||||
/**
|
||||
* Map of endpoint to model context token limits.
|
||||
*/
|
||||
export const maxTokensMap: Record<string, Record<string, number>> = {
|
||||
[EModelEndpoint.azureOpenAI]: openAIModels,
|
||||
[EModelEndpoint.openAI]: aggregateModels,
|
||||
[EModelEndpoint.agents]: aggregateModels,
|
||||
[EModelEndpoint.custom]: aggregateModels,
|
||||
[EModelEndpoint.google]: googleModels,
|
||||
[EModelEndpoint.anthropic]: anthropicModels,
|
||||
[EModelEndpoint.bedrock]: bedrockModels,
|
||||
};
|
||||
|
||||
/**
|
||||
* Finds the first matching pattern in the tokens map.
|
||||
* Searches in reverse order to match more specific patterns first.
|
||||
*
|
||||
* Note: This relies on the insertion order of keys in the tokensMap object.
|
||||
* More specific patterns must be defined later in the object to be matched first.
|
||||
* If the order of keys is changed, the matching behavior may be affected.
|
||||
*/
|
||||
export function findMatchingPattern(
|
||||
modelName: string,
|
||||
tokensMap: Record<string, number>,
|
||||
): string | null {
|
||||
const keys = Object.keys(tokensMap);
|
||||
const lowerModelName = modelName.toLowerCase();
|
||||
for (let i = keys.length - 1; i >= 0; i--) {
|
||||
const modelKey = keys[i];
|
||||
if (lowerModelName.startsWith(modelKey)) {
|
||||
return modelKey;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the maximum context tokens for a given model name.
|
||||
*
|
||||
* @param modelName - The name of the model to look up.
|
||||
* @param endpoint - The endpoint (default is 'openAI').
|
||||
* @returns The maximum context tokens for the given model or undefined if no match is found.
|
||||
*
|
||||
* @example
|
||||
* getModelMaxTokens('gpt-4o'); // Returns 127500
|
||||
* getModelMaxTokens('claude-3-opus', 'anthropic'); // Returns 200000
|
||||
* getModelMaxTokens('unknown-model'); // Returns undefined
|
||||
*/
|
||||
export function getModelMaxTokens(
|
||||
modelName: string,
|
||||
endpoint: string = EModelEndpoint.openAI,
|
||||
): number | undefined {
|
||||
if (typeof modelName !== 'string') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const tokensMap = maxTokensMap[endpoint];
|
||||
if (!tokensMap) {
|
||||
// Fall back to aggregate models for unknown endpoints
|
||||
return getModelMaxTokens(modelName, EModelEndpoint.openAI);
|
||||
}
|
||||
|
||||
// Try exact match first
|
||||
if (tokensMap[modelName] !== undefined) {
|
||||
return tokensMap[modelName];
|
||||
}
|
||||
|
||||
// Try pattern matching
|
||||
const matchedPattern = findMatchingPattern(modelName, tokensMap);
|
||||
if (matchedPattern) {
|
||||
return tokensMap[matchedPattern];
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the model name key for a given model name input.
|
||||
* If the exact model name isn't found, it searches for partial matches.
|
||||
*
|
||||
* @param modelName - The name of the model to look up.
|
||||
* @param endpoint - The endpoint (default is 'openAI').
|
||||
* @returns The model name key for the given model; returns input if no match is found.
|
||||
*/
|
||||
export function matchModelName(
|
||||
modelName: string,
|
||||
endpoint: string = EModelEndpoint.openAI,
|
||||
): string | undefined {
|
||||
if (typeof modelName !== 'string') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const tokensMap = maxTokensMap[endpoint];
|
||||
if (!tokensMap) {
|
||||
return modelName;
|
||||
}
|
||||
|
||||
if (tokensMap[modelName] !== undefined) {
|
||||
return modelName;
|
||||
}
|
||||
|
||||
const matchedPattern = findMatchingPattern(modelName, tokensMap);
|
||||
return matchedPattern || modelName;
|
||||
}
|
||||
|
||||
// Individual model maps are available for advanced use cases
|
||||
// but not re-exported to avoid conflicts with config.ts
|
||||
|
||||
// =============================================================================
|
||||
// OUTPUT TOKEN LIMITS
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Maximum output tokens for OpenAI and similar models.
|
||||
* Values from official documentation, slightly reduced to leave safety margin.
|
||||
*/
|
||||
const modelMaxOutputs: Record<string, number> = {
|
||||
o1: 32268, // -500 from max: 32,768
|
||||
'o1-mini': 65136, // -500 from max: 65,536
|
||||
'o1-preview': 32268, // -500 from max: 32,768
|
||||
'gpt-5': 128000,
|
||||
'gpt-5-mini': 128000,
|
||||
'gpt-5-nano': 128000,
|
||||
'gpt-5-pro': 128000,
|
||||
'gpt-oss-20b': 131000,
|
||||
'gpt-oss-120b': 131000,
|
||||
system_default: 32000,
|
||||
};
|
||||
|
||||
/**
|
||||
* Maximum output tokens for Anthropic Claude models.
|
||||
* Values from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names
|
||||
*/
|
||||
const anthropicMaxOutputs: Record<string, number> = {
|
||||
'claude-3-haiku': 4096,
|
||||
'claude-3-sonnet': 4096,
|
||||
'claude-3-opus': 4096,
|
||||
'claude-haiku-4-5': 64000,
|
||||
'claude-sonnet-4': 64000,
|
||||
'claude-opus-4': 32000,
|
||||
'claude-opus-4-5': 64000,
|
||||
'claude-3.5-sonnet': 8192,
|
||||
'claude-3-5-sonnet': 8192,
|
||||
'claude-3.7-sonnet': 128000,
|
||||
'claude-3-7-sonnet': 128000,
|
||||
};
|
||||
|
||||
/**
|
||||
* Maximum output tokens for DeepSeek models.
|
||||
* Values from https://api-docs.deepseek.com/quick_start/pricing
|
||||
*/
|
||||
const deepseekMaxOutputs: Record<string, number> = {
|
||||
deepseek: 8000, // deepseek-chat default: 4K, max: 8K
|
||||
'deepseek-chat': 8000,
|
||||
'deepseek-reasoner': 64000, // default: 32K, max: 64K
|
||||
'deepseek-r1': 64000,
|
||||
'deepseek-v3': 8000,
|
||||
'deepseek.r1': 64000,
|
||||
};
|
||||
|
||||
/**
|
||||
* Map of endpoint to model max output token limits.
|
||||
*/
|
||||
export const maxOutputTokensMap: Record<string, Record<string, number>> = {
|
||||
[EModelEndpoint.anthropic]: anthropicMaxOutputs,
|
||||
[EModelEndpoint.azureOpenAI]: modelMaxOutputs,
|
||||
[EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
|
||||
[EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves the maximum output tokens for a given model name.
|
||||
*
|
||||
* @param modelName - The name of the model to look up.
|
||||
* @param endpoint - The endpoint (default is 'openAI').
|
||||
* @returns The maximum output tokens for the given model or undefined if no match is found.
|
||||
*
|
||||
* @example
|
||||
* getModelMaxOutputTokens('o1'); // Returns 32268
|
||||
* getModelMaxOutputTokens('claude-3-opus', 'anthropic'); // Returns 4096
|
||||
* getModelMaxOutputTokens('unknown-model'); // Returns 32000 (system_default)
|
||||
*/
|
||||
export function getModelMaxOutputTokens(
|
||||
modelName: string,
|
||||
endpoint: string = EModelEndpoint.openAI,
|
||||
): number | undefined {
|
||||
if (typeof modelName !== 'string') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const tokensMap = maxOutputTokensMap[endpoint];
|
||||
if (!tokensMap) {
|
||||
// Fall back to openAI for unknown endpoints
|
||||
return getModelMaxOutputTokens(modelName, EModelEndpoint.openAI);
|
||||
}
|
||||
|
||||
// Try exact match first
|
||||
if (tokensMap[modelName] !== undefined) {
|
||||
return tokensMap[modelName];
|
||||
}
|
||||
|
||||
// Try pattern matching
|
||||
const matchedPattern = findMatchingPattern(modelName, tokensMap);
|
||||
if (matchedPattern) {
|
||||
return tokensMap[matchedPattern];
|
||||
}
|
||||
|
||||
// Return system_default if available
|
||||
return tokensMap.system_default;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// TOKEN DEFAULTS
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Centralized token-related default values.
|
||||
*/
|
||||
export const TOKEN_DEFAULTS = {
|
||||
/** Fallback context window for agents when model lookup fails */
|
||||
AGENT_CONTEXT_FALLBACK: 18000,
|
||||
/** Legacy fallback for older clients */
|
||||
LEGACY_CONTEXT_FALLBACK: 4097,
|
||||
/** Safety margin multiplier (0.9 = reserve 10% for response) */
|
||||
CONTEXT_SAFETY_MARGIN: 0.9,
|
||||
/** Default max output tokens when not specified */
|
||||
DEFAULT_MAX_OUTPUT: 32000,
|
||||
} as const;
|
||||
Loading…
Add table
Add a link
Reference in a new issue