This commit is contained in:
Marco Beretta 2025-12-16 10:11:31 +08:00 committed by GitHub
commit c91bc818aa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 1111 additions and 357 deletions

View file

@ -7,6 +7,7 @@ import {
isAgentsEndpoint,
replaceSpecialVars,
providerEndpointMap,
TOKEN_DEFAULTS,
} from 'librechat-data-provider';
import type {
AgentToolResources,
@ -240,7 +241,7 @@ export async function initializeAgent(
providerEndpointMap[provider as keyof typeof providerEndpointMap],
options.endpointTokenConfig,
),
18000,
TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK,
);
if (
@ -293,7 +294,7 @@ export async function initializeAgent(
agent.additional_instructions = artifactsPromptResult ?? undefined;
}
const agentMaxContextNum = Number(agentMaxContextTokens) || 18000;
const agentMaxContextNum = Number(agentMaxContextTokens) || TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK;
const maxOutputTokensNum = Number(maxOutputTokens) || 0;
const finalAttachments: IMongoFile[] = (primedAttachments ?? [])
@ -308,7 +309,9 @@ export async function initializeAgent(
userMCPAuthMap,
toolContextMap: toolContextMap ?? {},
useLegacyContent: !!options.useLegacyContent,
maxContextTokens: Math.round((agentMaxContextNum - maxOutputTokensNum) * 0.9),
maxContextTokens: Math.round(
(agentMaxContextNum - maxOutputTokensNum) * TOKEN_DEFAULTS.CONTEXT_SAFETY_MARGIN,
),
};
return initializedAgent;

View file

@ -1,353 +1,7 @@
import z from 'zod';
import { EModelEndpoint } from 'librechat-data-provider';
import { EModelEndpoint, maxTokensMap, maxOutputTokensMap } from 'librechat-data-provider';
import type { EndpointTokenConfig, TokenConfig } from '~/types';
const openAIModels = {
'o4-mini': 200000,
'o3-mini': 195000, // -5000 from max
o3: 200000,
o1: 195000, // -5000 from max
'o1-mini': 127500, // -500 from max
'o1-preview': 127500, // -500 from max
'gpt-4': 8187, // -5 from max
'gpt-4-0613': 8187, // -5 from max
'gpt-4-32k': 32758, // -10 from max
'gpt-4-32k-0314': 32758, // -10 from max
'gpt-4-32k-0613': 32758, // -10 from max
'gpt-4-1106': 127500, // -500 from max
'gpt-4-0125': 127500, // -500 from max
'gpt-4.5': 127500, // -500 from max
'gpt-4.1': 1047576,
'gpt-4.1-mini': 1047576,
'gpt-4.1-nano': 1047576,
'gpt-5': 400000,
'gpt-5-mini': 400000,
'gpt-5-nano': 400000,
'gpt-5-pro': 400000,
'gpt-4o': 127500, // -500 from max
'gpt-4o-mini': 127500, // -500 from max
'gpt-4o-2024-05-13': 127500, // -500 from max
'gpt-4-turbo': 127500, // -500 from max
'gpt-4-vision': 127500, // -500 from max
'gpt-3.5-turbo': 16375, // -10 from max
'gpt-3.5-turbo-0613': 4092, // -5 from max
'gpt-3.5-turbo-0301': 4092, // -5 from max
'gpt-3.5-turbo-16k': 16375, // -10 from max
'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
'gpt-3.5-turbo-1106': 16375, // -10 from max
'gpt-3.5-turbo-0125': 16375, // -10 from max
};
const mistralModels = {
'mistral-': 31990, // -10 from max
'mistral-7b': 31990, // -10 from max
'mistral-small': 31990, // -10 from max
'mixtral-8x7b': 31990, // -10 from max
'mixtral-8x22b': 65536,
'mistral-large': 131000,
'mistral-large-2402': 127500,
'mistral-large-2407': 127500,
'mistral-nemo': 131000,
'pixtral-large': 131000,
'mistral-saba': 32000,
codestral: 256000,
'ministral-8b': 131000,
'ministral-3b': 131000,
};
const cohereModels = {
'command-light': 4086, // -10 from max
'command-light-nightly': 8182, // -10 from max
command: 4086, // -10 from max
'command-nightly': 8182, // -10 from max
'command-text': 4086, // -10 from max
'command-r': 127500, // -500 from max
'command-r-plus': 127500, // -500 from max
};
const googleModels = {
/* Max I/O is combined so we subtract the amount from max response tokens for actual total */
gemma: 8196,
'gemma-2': 32768,
'gemma-3': 32768,
'gemma-3-27b': 131072,
gemini: 30720, // -2048 from max
'gemini-pro-vision': 12288,
'gemini-exp': 2000000,
'gemini-3': 1000000, // 1M input tokens, 64k output tokens
'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
'gemini-2.5-pro': 1000000,
'gemini-2.5-flash': 1000000,
'gemini-2.5-flash-lite': 1000000,
'gemini-2.0': 2000000,
'gemini-2.0-flash': 1000000,
'gemini-2.0-flash-lite': 1000000,
'gemini-1.5': 1000000,
'gemini-1.5-flash': 1000000,
'gemini-1.5-flash-8b': 1000000,
'text-bison-32k': 32758, // -10 from max
'chat-bison-32k': 32758, // -10 from max
'code-bison-32k': 32758, // -10 from max
'codechat-bison-32k': 32758,
/* Codey, -5 from max: 6144 */
'code-': 6139,
'codechat-': 6139,
/* PaLM2, -5 from max: 8192 */
'text-': 8187,
'chat-': 8187,
};
const anthropicModels = {
'claude-': 100000,
'claude-instant': 100000,
'claude-2': 100000,
'claude-2.1': 200000,
'claude-3': 200000,
'claude-3-haiku': 200000,
'claude-3-sonnet': 200000,
'claude-3-opus': 200000,
'claude-3.5-haiku': 200000,
'claude-3-5-haiku': 200000,
'claude-3-5-sonnet': 200000,
'claude-3.5-sonnet': 200000,
'claude-3-7-sonnet': 200000,
'claude-3.7-sonnet': 200000,
'claude-3-5-sonnet-latest': 200000,
'claude-3.5-sonnet-latest': 200000,
'claude-haiku-4-5': 200000,
'claude-sonnet-4': 1000000,
'claude-4': 200000,
'claude-opus-4': 200000,
'claude-opus-4-5': 200000,
};
const deepseekModels = {
deepseek: 128000,
'deepseek-chat': 128000,
'deepseek-reasoner': 128000,
'deepseek-r1': 128000,
'deepseek-v3': 128000,
'deepseek.r1': 128000,
};
const metaModels = {
// Basic patterns
llama3: 8000,
llama2: 4000,
'llama-3': 8000,
'llama-2': 4000,
// llama3.x pattern
'llama3.1': 127500,
'llama3.2': 127500,
'llama3.3': 127500,
// llama3-x pattern
'llama3-1': 127500,
'llama3-2': 127500,
'llama3-3': 127500,
// llama-3.x pattern
'llama-3.1': 127500,
'llama-3.2': 127500,
'llama-3.3': 127500,
// llama3.x:Nb pattern
'llama3.1:405b': 127500,
'llama3.1:70b': 127500,
'llama3.1:8b': 127500,
'llama3.2:1b': 127500,
'llama3.2:3b': 127500,
'llama3.2:11b': 127500,
'llama3.2:90b': 127500,
'llama3.3:70b': 127500,
// llama3-x-Nb pattern
'llama3-1-405b': 127500,
'llama3-1-70b': 127500,
'llama3-1-8b': 127500,
'llama3-2-1b': 127500,
'llama3-2-3b': 127500,
'llama3-2-11b': 127500,
'llama3-2-90b': 127500,
'llama3-3-70b': 127500,
// llama-3.x-Nb pattern
'llama-3.1-405b': 127500,
'llama-3.1-70b': 127500,
'llama-3.1-8b': 127500,
'llama-3.2-1b': 127500,
'llama-3.2-3b': 127500,
'llama-3.2-11b': 127500,
'llama-3.2-90b': 127500,
'llama-3.3-70b': 127500,
// Original llama2/3 patterns
'llama3-70b': 8000,
'llama3-8b': 8000,
'llama2-70b': 4000,
'llama2-13b': 4000,
'llama3:70b': 8000,
'llama3:8b': 8000,
'llama2:70b': 4000,
};
const qwenModels = {
qwen: 32000,
'qwen2.5': 32000,
'qwen-turbo': 1000000,
'qwen-plus': 131000,
'qwen-max': 32000,
'qwq-32b': 32000,
// Qwen3 models
qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
'qwen3-8b': 128000,
'qwen3-14b': 40960,
'qwen3-30b-a3b': 40960,
'qwen3-32b': 40960,
'qwen3-235b-a22b': 40960,
// Qwen3 VL (Vision-Language) models
'qwen3-vl-8b-thinking': 256000,
'qwen3-vl-8b-instruct': 262144,
'qwen3-vl-30b-a3b': 262144,
'qwen3-vl-235b-a22b': 131072,
// Qwen3 specialized models
'qwen3-max': 256000,
'qwen3-coder': 262144,
'qwen3-coder-30b-a3b': 262144,
'qwen3-coder-plus': 128000,
'qwen3-coder-flash': 128000,
'qwen3-next-80b-a3b': 262144,
};
const ai21Models = {
'j2-mid': 8182, // -10 from max
'j2-ultra': 8182, // -10 from max
'jamba-instruct': 255500, // -500 from max
};
const amazonModels = {
// Amazon Titan models
'titan-text-lite': 4000,
'titan-text-express': 8000,
'titan-text-premier': 31500, // -500 from max
// Amazon Nova models
// https://aws.amazon.com/ai/generative-ai/nova/
'nova-micro': 127000, // -1000 from max
'nova-lite': 295000, // -5000 from max
'nova-pro': 295000, // -5000 from max
'nova-premier': 995000, // -5000 from max
};
const bedrockModels = {
...anthropicModels,
...mistralModels,
...cohereModels,
...deepseekModels,
...metaModels,
...ai21Models,
...amazonModels,
};
const xAIModels = {
grok: 131072,
'grok-beta': 131072,
'grok-vision-beta': 8192,
'grok-2': 131072,
'grok-2-latest': 131072,
'grok-2-1212': 131072,
'grok-2-vision': 32768,
'grok-2-vision-latest': 32768,
'grok-2-vision-1212': 32768,
'grok-3': 131072,
'grok-3-fast': 131072,
'grok-3-mini': 131072,
'grok-3-mini-fast': 131072,
'grok-4': 256000, // 256K context
'grok-4-fast': 2000000, // 2M context
'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
'grok-code-fast': 256000, // 256K context
};
const aggregateModels = {
...openAIModels,
...googleModels,
...bedrockModels,
...xAIModels,
...qwenModels,
// misc.
kimi: 131000,
// GPT-OSS
'gpt-oss': 131000,
'gpt-oss:20b': 131000,
'gpt-oss-20b': 131000,
'gpt-oss:120b': 131000,
'gpt-oss-120b': 131000,
// GLM models (Zhipu AI)
glm4: 128000,
'glm-4': 128000,
'glm-4-32b': 128000,
'glm-4.5': 131000,
'glm-4.5-air': 131000,
'glm-4.5v': 66000,
'glm-4.6': 200000,
};
export const maxTokensMap = {
[EModelEndpoint.azureOpenAI]: openAIModels,
[EModelEndpoint.openAI]: aggregateModels,
[EModelEndpoint.agents]: aggregateModels,
[EModelEndpoint.custom]: aggregateModels,
[EModelEndpoint.google]: googleModels,
[EModelEndpoint.anthropic]: anthropicModels,
[EModelEndpoint.bedrock]: bedrockModels,
};
export const modelMaxOutputs = {
o1: 32268, // -500 from max: 32,768
'o1-mini': 65136, // -500 from max: 65,536
'o1-preview': 32268, // -500 from max: 32,768
'gpt-5': 128000,
'gpt-5-mini': 128000,
'gpt-5-nano': 128000,
'gpt-5-pro': 128000,
'gpt-oss-20b': 131000,
'gpt-oss-120b': 131000,
system_default: 32000,
};
/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */
const anthropicMaxOutputs = {
'claude-3-haiku': 4096,
'claude-3-sonnet': 4096,
'claude-3-opus': 4096,
'claude-haiku-4-5': 64000,
'claude-sonnet-4': 64000,
'claude-opus-4': 32000,
'claude-opus-4-5': 64000,
'claude-3.5-sonnet': 8192,
'claude-3-5-sonnet': 8192,
'claude-3.7-sonnet': 128000,
'claude-3-7-sonnet': 128000,
};
/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */
const deepseekMaxOutputs = {
deepseek: 8000, // deepseek-chat default: 4K, max: 8K
'deepseek-chat': 8000,
'deepseek-reasoner': 64000, // default: 32K, max: 64K
'deepseek-r1': 64000,
'deepseek-v3': 8000,
'deepseek.r1': 64000,
};
export const maxOutputTokensMap = {
[EModelEndpoint.anthropic]: anthropicMaxOutputs,
[EModelEndpoint.azureOpenAI]: modelMaxOutputs,
[EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
[EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
};
/**
* Finds the first matching pattern in the tokens map.
* @param {string} modelName

View file

@ -0,0 +1,152 @@
import {
findMatchingPattern,
getModelMaxTokens,
getModelMaxOutputTokens,
matchModelName,
maxTokensMap,
} from '../src/tokens';
import { EModelEndpoint } from '../src/schemas';
describe('Token Pattern Matching', () => {
describe('findMatchingPattern', () => {
const testMap: Record<string, number> = {
'claude-': 100000,
'claude-3': 200000,
'claude-3-opus': 200000,
'gpt-4': 8000,
'gpt-4-turbo': 128000,
};
it('should match exact model names', () => {
expect(findMatchingPattern('claude-3-opus', testMap)).toBe('claude-3-opus');
expect(findMatchingPattern('gpt-4-turbo', testMap)).toBe('gpt-4-turbo');
});
it('should match more specific patterns first (reverse order)', () => {
// claude-3-opus-20240229 should match 'claude-3-opus' not 'claude-3' or 'claude-'
expect(findMatchingPattern('claude-3-opus-20240229', testMap)).toBe('claude-3-opus');
});
it('should fall back to broader patterns when no specific match', () => {
// claude-3-haiku should match 'claude-3' (not 'claude-3-opus')
expect(findMatchingPattern('claude-3-haiku', testMap)).toBe('claude-3');
});
it('should be case-insensitive', () => {
expect(findMatchingPattern('Claude-3-Opus', testMap)).toBe('claude-3-opus');
expect(findMatchingPattern('GPT-4-TURBO', testMap)).toBe('gpt-4-turbo');
});
it('should return null for unmatched models', () => {
expect(findMatchingPattern('unknown-model', testMap)).toBeNull();
expect(findMatchingPattern('llama-2', testMap)).toBeNull();
});
it('should NOT match when pattern appears in middle of model name (startsWith behavior)', () => {
// This is the key fix: "my-claude-wrapper" should NOT match "claude-"
expect(findMatchingPattern('my-claude-wrapper', testMap)).toBeNull();
expect(findMatchingPattern('openai-gpt-4-proxy', testMap)).toBeNull();
expect(findMatchingPattern('custom-claude-3-service', testMap)).toBeNull();
});
it('should handle empty string model name', () => {
expect(findMatchingPattern('', testMap)).toBeNull();
});
it('should handle empty tokens map', () => {
expect(findMatchingPattern('claude-3', {})).toBeNull();
});
});
describe('getModelMaxTokens', () => {
it('should return exact match tokens', () => {
expect(getModelMaxTokens('gpt-4o', EModelEndpoint.openAI)).toBe(127500);
expect(getModelMaxTokens('claude-3-opus', EModelEndpoint.anthropic)).toBe(200000);
});
it('should return pattern-matched tokens', () => {
// claude-3-opus-20240229 should match claude-3-opus pattern
expect(getModelMaxTokens('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(200000);
});
it('should return undefined for unknown models', () => {
expect(getModelMaxTokens('completely-unknown-model', EModelEndpoint.openAI)).toBeUndefined();
});
it('should fall back to openAI for unknown endpoints', () => {
const result = getModelMaxTokens('gpt-4o', 'unknown-endpoint');
expect(result).toBe(127500);
});
it('should handle non-string input gracefully', () => {
expect(getModelMaxTokens(null as unknown as string)).toBeUndefined();
expect(getModelMaxTokens(undefined as unknown as string)).toBeUndefined();
expect(getModelMaxTokens(123 as unknown as string)).toBeUndefined();
});
it('should NOT match model names with pattern in middle', () => {
// A model like "my-gpt-4-wrapper" should not match "gpt-4"
expect(getModelMaxTokens('my-gpt-4-wrapper', EModelEndpoint.openAI)).toBeUndefined();
});
});
describe('getModelMaxOutputTokens', () => {
it('should return exact match output tokens', () => {
expect(getModelMaxOutputTokens('o1', EModelEndpoint.openAI)).toBe(32268);
expect(getModelMaxOutputTokens('claude-3-opus', EModelEndpoint.anthropic)).toBe(4096);
});
it('should return pattern-matched output tokens', () => {
expect(getModelMaxOutputTokens('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(
4096,
);
});
it('should return system_default for unknown models (openAI endpoint)', () => {
expect(getModelMaxOutputTokens('unknown-model', EModelEndpoint.openAI)).toBe(32000);
});
it('should handle non-string input gracefully', () => {
expect(getModelMaxOutputTokens(null as unknown as string)).toBeUndefined();
expect(getModelMaxOutputTokens(undefined as unknown as string)).toBeUndefined();
});
});
describe('matchModelName', () => {
it('should return exact match model name', () => {
expect(matchModelName('gpt-4o', EModelEndpoint.openAI)).toBe('gpt-4o');
});
it('should return pattern key for pattern matches', () => {
expect(matchModelName('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(
'claude-3-opus',
);
});
it('should return input for unknown models', () => {
expect(matchModelName('unknown-model', EModelEndpoint.openAI)).toBe('unknown-model');
});
it('should handle non-string input gracefully', () => {
expect(matchModelName(null as unknown as string)).toBeUndefined();
});
});
describe('maxTokensMap structure', () => {
it('should have entries for all major endpoints', () => {
expect(maxTokensMap[EModelEndpoint.openAI]).toBeDefined();
expect(maxTokensMap[EModelEndpoint.anthropic]).toBeDefined();
expect(maxTokensMap[EModelEndpoint.google]).toBeDefined();
expect(maxTokensMap[EModelEndpoint.azureOpenAI]).toBeDefined();
expect(maxTokensMap[EModelEndpoint.bedrock]).toBeDefined();
});
it('should have positive token values', () => {
Object.values(maxTokensMap).forEach((endpointMap) => {
Object.entries(endpointMap).forEach(([model, tokens]) => {
expect(tokens).toBeGreaterThan(0);
});
});
});
});
});

View file

@ -47,3 +47,5 @@ export { default as createPayload } from './createPayload';
/* feedback */
export * from './feedback';
export * from './parameterSettings';
/* token limits */
export * from './tokens';

View file

@ -618,6 +618,7 @@ export type TMessage = z.input<typeof tMessageSchema> & {
attachments?: TAttachment[];
clientTimestamp?: string;
feedback?: TFeedback;
tokenCount?: number;
};
export const coerceNumber = z.union([z.number(), z.string()]).transform((val) => {

View file

@ -0,0 +1,527 @@
import { EModelEndpoint } from './schemas';
/**
* Model context window token limits.
* These values represent the maximum context tokens (input) for each model.
* Values are slightly reduced from actual max to leave room for output tokens.
*/
const openAIModels: Record<string, number> = {
'o4-mini': 200000,
'o3-mini': 195000, // -5000 from max
o3: 200000,
o1: 195000, // -5000 from max
'o1-mini': 127500, // -500 from max
'o1-preview': 127500, // -500 from max
'gpt-4': 8187, // -5 from max
'gpt-4-0613': 8187, // -5 from max
'gpt-4-32k': 32758, // -10 from max
'gpt-4-32k-0314': 32758, // -10 from max
'gpt-4-32k-0613': 32758, // -10 from max
'gpt-4-1106': 127500, // -500 from max
'gpt-4-0125': 127500, // -500 from max
'gpt-4.5': 127500, // -500 from max
'gpt-4.1': 1047576,
'gpt-4.1-mini': 1047576,
'gpt-4.1-nano': 1047576,
'gpt-5': 400000,
'gpt-5-mini': 400000,
'gpt-5-nano': 400000,
'gpt-5-pro': 400000,
'gpt-4o': 127500, // -500 from max
'gpt-4o-mini': 127500, // -500 from max
'gpt-4o-2024-05-13': 127500, // -500 from max
'gpt-4-turbo': 127500, // -500 from max
'gpt-4-vision': 127500, // -500 from max
'gpt-3.5-turbo': 16375, // -10 from max
'gpt-3.5-turbo-0613': 4092, // -5 from max
'gpt-3.5-turbo-0301': 4092, // -5 from max
'gpt-3.5-turbo-16k': 16375, // -10 from max
'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
'gpt-3.5-turbo-1106': 16375, // -10 from max
'gpt-3.5-turbo-0125': 16375, // -10 from max
};
const mistralModels: Record<string, number> = {
'mistral-': 31990, // -10 from max
'mistral-7b': 31990, // -10 from max
'mistral-small': 31990, // -10 from max
'mixtral-8x7b': 31990, // -10 from max
'mixtral-8x22b': 65536,
'mistral-large': 131000,
'mistral-large-2402': 127500,
'mistral-large-2407': 127500,
'mistral-nemo': 131000,
'pixtral-large': 131000,
'mistral-saba': 32000,
codestral: 256000,
'ministral-8b': 131000,
'ministral-3b': 131000,
};
const cohereModels: Record<string, number> = {
'command-light': 4086, // -10 from max
'command-light-nightly': 8182, // -10 from max
command: 4086, // -10 from max
'command-nightly': 8182, // -10 from max
'command-text': 4086, // -10 from max
'command-r': 127500, // -500 from max
'command-r-plus': 127500, // -500 from max
};
const googleModels: Record<string, number> = {
/* Max I/O is combined so we subtract the amount from max response tokens for actual total */
gemma: 8196,
'gemma-2': 32768,
'gemma-3': 32768,
'gemma-3-27b': 131072,
gemini: 30720, // -2048 from max
'gemini-pro-vision': 12288,
'gemini-exp': 2000000,
'gemini-3': 1000000, // 1M input tokens, 64k output tokens
'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
'gemini-2.5-pro': 1000000,
'gemini-2.5-flash': 1000000,
'gemini-2.5-flash-lite': 1000000,
'gemini-2.0': 2000000,
'gemini-2.0-flash': 1000000,
'gemini-2.0-flash-lite': 1000000,
'gemini-1.5': 1000000,
'gemini-1.5-flash': 1000000,
'gemini-1.5-flash-8b': 1000000,
'text-bison-32k': 32758, // -10 from max
'chat-bison-32k': 32758, // -10 from max
'code-bison-32k': 32758, // -10 from max
'codechat-bison-32k': 32758,
/* Codey, -5 from max: 6144 */
'code-': 6139,
'codechat-': 6139,
/* PaLM2, -5 from max: 8192 */
'text-': 8187,
'chat-': 8187,
};
const anthropicModels: Record<string, number> = {
'claude-': 100000,
'claude-instant': 100000,
'claude-2': 100000,
'claude-2.1': 200000,
'claude-3': 200000,
'claude-3-haiku': 200000,
'claude-3-sonnet': 200000,
'claude-3-opus': 200000,
'claude-3.5-haiku': 200000,
'claude-3-5-haiku': 200000,
'claude-3-5-sonnet': 200000,
'claude-3.5-sonnet': 200000,
'claude-3-7-sonnet': 200000,
'claude-3.7-sonnet': 200000,
'claude-3-5-sonnet-latest': 200000,
'claude-3.5-sonnet-latest': 200000,
'claude-haiku-4-5': 200000,
'claude-sonnet-4': 1000000,
'claude-4': 200000,
'claude-opus-4': 200000,
'claude-opus-4-5': 200000,
};
const deepseekModels: Record<string, number> = {
deepseek: 128000,
'deepseek-chat': 128000,
'deepseek-reasoner': 128000,
'deepseek-r1': 128000,
'deepseek-v3': 128000,
'deepseek.r1': 128000,
};
const metaModels: Record<string, number> = {
// Basic patterns
llama3: 8000,
llama2: 4000,
'llama-3': 8000,
'llama-2': 4000,
// llama3.x pattern
'llama3.1': 127500,
'llama3.2': 127500,
'llama3.3': 127500,
// llama3-x pattern
'llama3-1': 127500,
'llama3-2': 127500,
'llama3-3': 127500,
// llama-3.x pattern
'llama-3.1': 127500,
'llama-3.2': 127500,
'llama-3.3': 127500,
// llama3.x:Nb pattern
'llama3.1:405b': 127500,
'llama3.1:70b': 127500,
'llama3.1:8b': 127500,
'llama3.2:1b': 127500,
'llama3.2:3b': 127500,
'llama3.2:11b': 127500,
'llama3.2:90b': 127500,
'llama3.3:70b': 127500,
// llama3-x-Nb pattern
'llama3-1-405b': 127500,
'llama3-1-70b': 127500,
'llama3-1-8b': 127500,
'llama3-2-1b': 127500,
'llama3-2-3b': 127500,
'llama3-2-11b': 127500,
'llama3-2-90b': 127500,
'llama3-3-70b': 127500,
// llama-3.x-Nb pattern
'llama-3.1-405b': 127500,
'llama-3.1-70b': 127500,
'llama-3.1-8b': 127500,
'llama-3.2-1b': 127500,
'llama-3.2-3b': 127500,
'llama-3.2-11b': 127500,
'llama-3.2-90b': 127500,
'llama-3.3-70b': 127500,
// Original llama2/3 patterns
'llama3-70b': 8000,
'llama3-8b': 8000,
'llama2-70b': 4000,
'llama2-13b': 4000,
'llama3:70b': 8000,
'llama3:8b': 8000,
'llama2:70b': 4000,
};
const qwenModels: Record<string, number> = {
qwen: 32000,
'qwen2.5': 32000,
'qwen-turbo': 1000000,
'qwen-plus': 131000,
'qwen-max': 32000,
'qwq-32b': 32000,
// Qwen3 models
qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
'qwen3-8b': 128000,
'qwen3-14b': 40960,
'qwen3-30b-a3b': 40960,
'qwen3-32b': 40960,
'qwen3-235b-a22b': 40960,
// Qwen3 VL (Vision-Language) models
'qwen3-vl-8b-thinking': 256000,
'qwen3-vl-8b-instruct': 262144,
'qwen3-vl-30b-a3b': 262144,
'qwen3-vl-235b-a22b': 131072,
// Qwen3 specialized models
'qwen3-max': 256000,
'qwen3-coder': 262144,
'qwen3-coder-30b-a3b': 262144,
'qwen3-coder-plus': 128000,
'qwen3-coder-flash': 128000,
'qwen3-next-80b-a3b': 262144,
};
const ai21Models: Record<string, number> = {
'j2-mid': 8182, // -10 from max
'j2-ultra': 8182, // -10 from max
'jamba-instruct': 255500, // -500 from max
};
const amazonModels: Record<string, number> = {
// Amazon Titan models
'titan-text-lite': 4000,
'titan-text-express': 8000,
'titan-text-premier': 31500, // -500 from max
// Amazon Nova models
// https://aws.amazon.com/ai/generative-ai/nova/
'nova-micro': 127000, // -1000 from max
'nova-lite': 295000, // -5000 from max
'nova-pro': 295000, // -5000 from max
'nova-premier': 995000, // -5000 from max
};
const bedrockModels: Record<string, number> = {
...anthropicModels,
...mistralModels,
...cohereModels,
...deepseekModels,
...metaModels,
...ai21Models,
...amazonModels,
};
const xAIModels: Record<string, number> = {
grok: 131072,
'grok-beta': 131072,
'grok-vision-beta': 8192,
'grok-2': 131072,
'grok-2-latest': 131072,
'grok-2-1212': 131072,
'grok-2-vision': 32768,
'grok-2-vision-latest': 32768,
'grok-2-vision-1212': 32768,
'grok-3': 131072,
'grok-3-fast': 131072,
'grok-3-mini': 131072,
'grok-3-mini-fast': 131072,
'grok-4': 256000, // 256K context
'grok-4-fast': 2000000, // 2M context
'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants)
'grok-code-fast': 256000, // 256K context
};
const aggregateModels: Record<string, number> = {
...openAIModels,
...googleModels,
...bedrockModels,
...xAIModels,
...qwenModels,
// misc.
kimi: 131000,
// GPT-OSS
'gpt-oss': 131000,
'gpt-oss:20b': 131000,
'gpt-oss-20b': 131000,
'gpt-oss:120b': 131000,
'gpt-oss-120b': 131000,
// GLM models (Zhipu AI)
glm4: 128000,
'glm-4': 128000,
'glm-4-32b': 128000,
'glm-4.5': 131000,
'glm-4.5-air': 131000,
'glm-4.5v': 66000,
'glm-4.6': 200000,
};
/**
* Map of endpoint to model context token limits.
*/
export const maxTokensMap: Record<string, Record<string, number>> = {
[EModelEndpoint.azureOpenAI]: openAIModels,
[EModelEndpoint.openAI]: aggregateModels,
[EModelEndpoint.agents]: aggregateModels,
[EModelEndpoint.custom]: aggregateModels,
[EModelEndpoint.google]: googleModels,
[EModelEndpoint.anthropic]: anthropicModels,
[EModelEndpoint.bedrock]: bedrockModels,
};
/**
* Finds the first matching pattern in the tokens map.
* Searches in reverse order to match more specific patterns first.
*
* Note: This relies on the insertion order of keys in the tokensMap object.
* More specific patterns must be defined later in the object to be matched first.
* If the order of keys is changed, the matching behavior may be affected.
*/
export function findMatchingPattern(
modelName: string,
tokensMap: Record<string, number>,
): string | null {
const keys = Object.keys(tokensMap);
const lowerModelName = modelName.toLowerCase();
for (let i = keys.length - 1; i >= 0; i--) {
const modelKey = keys[i];
if (lowerModelName.startsWith(modelKey)) {
return modelKey;
}
}
return null;
}
/**
* Retrieves the maximum context tokens for a given model name.
*
* @param modelName - The name of the model to look up.
* @param endpoint - The endpoint (default is 'openAI').
* @returns The maximum context tokens for the given model or undefined if no match is found.
*
* @example
* getModelMaxTokens('gpt-4o'); // Returns 127500
* getModelMaxTokens('claude-3-opus', 'anthropic'); // Returns 200000
* getModelMaxTokens('unknown-model'); // Returns undefined
*/
export function getModelMaxTokens(
modelName: string,
endpoint: string = EModelEndpoint.openAI,
): number | undefined {
if (typeof modelName !== 'string') {
return undefined;
}
const tokensMap = maxTokensMap[endpoint];
if (!tokensMap) {
// Fall back to aggregate models for unknown endpoints
return getModelMaxTokens(modelName, EModelEndpoint.openAI);
}
// Try exact match first
if (tokensMap[modelName] !== undefined) {
return tokensMap[modelName];
}
// Try pattern matching
const matchedPattern = findMatchingPattern(modelName, tokensMap);
if (matchedPattern) {
return tokensMap[matchedPattern];
}
return undefined;
}
/**
* Retrieves the model name key for a given model name input.
* If the exact model name isn't found, it searches for partial matches.
*
* @param modelName - The name of the model to look up.
* @param endpoint - The endpoint (default is 'openAI').
* @returns The model name key for the given model; returns input if no match is found.
*/
export function matchModelName(
modelName: string,
endpoint: string = EModelEndpoint.openAI,
): string | undefined {
if (typeof modelName !== 'string') {
return undefined;
}
const tokensMap = maxTokensMap[endpoint];
if (!tokensMap) {
return modelName;
}
if (tokensMap[modelName] !== undefined) {
return modelName;
}
const matchedPattern = findMatchingPattern(modelName, tokensMap);
return matchedPattern || modelName;
}
// Individual model maps are available for advanced use cases
// but not re-exported to avoid conflicts with config.ts
// =============================================================================
// OUTPUT TOKEN LIMITS
// =============================================================================
/**
* Maximum output tokens for OpenAI and similar models.
* Values from official documentation, slightly reduced to leave safety margin.
*/
const modelMaxOutputs: Record<string, number> = {
o1: 32268, // -500 from max: 32,768
'o1-mini': 65136, // -500 from max: 65,536
'o1-preview': 32268, // -500 from max: 32,768
'gpt-5': 128000,
'gpt-5-mini': 128000,
'gpt-5-nano': 128000,
'gpt-5-pro': 128000,
'gpt-oss-20b': 131000,
'gpt-oss-120b': 131000,
system_default: 32000,
};
/**
* Maximum output tokens for Anthropic Claude models.
* Values from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names
*/
const anthropicMaxOutputs: Record<string, number> = {
'claude-3-haiku': 4096,
'claude-3-sonnet': 4096,
'claude-3-opus': 4096,
'claude-haiku-4-5': 64000,
'claude-sonnet-4': 64000,
'claude-opus-4': 32000,
'claude-opus-4-5': 64000,
'claude-3.5-sonnet': 8192,
'claude-3-5-sonnet': 8192,
'claude-3.7-sonnet': 128000,
'claude-3-7-sonnet': 128000,
};
/**
* Maximum output tokens for DeepSeek models.
* Values from https://api-docs.deepseek.com/quick_start/pricing
*/
const deepseekMaxOutputs: Record<string, number> = {
deepseek: 8000, // deepseek-chat default: 4K, max: 8K
'deepseek-chat': 8000,
'deepseek-reasoner': 64000, // default: 32K, max: 64K
'deepseek-r1': 64000,
'deepseek-v3': 8000,
'deepseek.r1': 64000,
};
/**
* Map of endpoint to model max output token limits.
*/
export const maxOutputTokensMap: Record<string, Record<string, number>> = {
[EModelEndpoint.anthropic]: anthropicMaxOutputs,
[EModelEndpoint.azureOpenAI]: modelMaxOutputs,
[EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
[EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
};
/**
* Retrieves the maximum output tokens for a given model name.
*
* @param modelName - The name of the model to look up.
* @param endpoint - The endpoint (default is 'openAI').
* @returns The maximum output tokens for the given model or undefined if no match is found.
*
* @example
* getModelMaxOutputTokens('o1'); // Returns 32268
* getModelMaxOutputTokens('claude-3-opus', 'anthropic'); // Returns 4096
* getModelMaxOutputTokens('unknown-model'); // Returns 32000 (system_default)
*/
export function getModelMaxOutputTokens(
modelName: string,
endpoint: string = EModelEndpoint.openAI,
): number | undefined {
if (typeof modelName !== 'string') {
return undefined;
}
const tokensMap = maxOutputTokensMap[endpoint];
if (!tokensMap) {
// Fall back to openAI for unknown endpoints
return getModelMaxOutputTokens(modelName, EModelEndpoint.openAI);
}
// Try exact match first
if (tokensMap[modelName] !== undefined) {
return tokensMap[modelName];
}
// Try pattern matching
const matchedPattern = findMatchingPattern(modelName, tokensMap);
if (matchedPattern) {
return tokensMap[matchedPattern];
}
// Return system_default if available
return tokensMap.system_default;
}
// =============================================================================
// TOKEN DEFAULTS
// =============================================================================
/**
* Centralized token-related default values.
*/
export const TOKEN_DEFAULTS = {
/** Fallback context window for agents when model lookup fails */
AGENT_CONTEXT_FALLBACK: 18000,
/** Legacy fallback for older clients */
LEGACY_CONTEXT_FALLBACK: 4097,
/** Safety margin multiplier (0.9 = reserve 10% for response) */
CONTEXT_SAFETY_MARGIN: 0.9,
/** Default max output tokens when not specified */
DEFAULT_MAX_OUTPUT: 32000,
} as const;