diff --git a/api/app/clients/specs/FakeClient.js b/api/app/clients/specs/FakeClient.js index 58480b4018..d1d07a967d 100644 --- a/api/app/clients/specs/FakeClient.js +++ b/api/app/clients/specs/FakeClient.js @@ -1,5 +1,4 @@ const { getModelMaxTokens } = require('@librechat/api'); -const { TOKEN_DEFAULTS } = require('librechat-data-provider'); const BaseClient = require('../BaseClient'); class FakeClient extends BaseClient { @@ -42,9 +41,7 @@ class FakeClient extends BaseClient { } this.maxContextTokens = - this.options.maxContextTokens ?? - getModelMaxTokens(this.modelOptions.model) ?? - TOKEN_DEFAULTS.LEGACY_CONTEXT_FALLBACK; + this.options.maxContextTokens ?? getModelMaxTokens(this.modelOptions.model) ?? 4097; } buildMessages() {} getTokenCount(str) { diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js index 11f9128837..df1bec8619 100644 --- a/api/models/tx.spec.js +++ b/api/models/tx.spec.js @@ -1,4 +1,6 @@ -const { EModelEndpoint, maxTokensMap } = require('librechat-data-provider'); +/** Note: No hard-coded values should be used in this file. */ +const { maxTokensMap } = require('@librechat/api'); +const { EModelEndpoint } = require('librechat-data-provider'); const { defaultRate, tokenValues, diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index a9448aae41..0cfdc30227 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -1,8 +1,11 @@ -const { EModelEndpoint, maxTokensMap, maxOutputTokensMap } = require('librechat-data-provider'); +/** Note: No hard-coded values should be used in this file. */ +const { EModelEndpoint } = require('librechat-data-provider'); const { + maxTokensMap, matchModelName, processModelData, getModelMaxTokens, + maxOutputTokensMap, findMatchingPattern, } = require('@librechat/api'); diff --git a/packages/api/src/agents/initialize.ts b/packages/api/src/agents/initialize.ts index 3e1b09df18..008aa4c0ba 100644 --- a/packages/api/src/agents/initialize.ts +++ b/packages/api/src/agents/initialize.ts @@ -8,7 +8,6 @@ import { isAgentsEndpoint, replaceSpecialVars, providerEndpointMap, - TOKEN_DEFAULTS, } from 'librechat-data-provider'; import type { AgentToolResources, @@ -338,7 +337,7 @@ export async function initializeAgent( providerEndpointMap[provider as keyof typeof providerEndpointMap], options.endpointTokenConfig, ), - TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK, + 18000, ); if ( @@ -395,7 +394,7 @@ export async function initializeAgent( agent.additional_instructions = artifactsPromptResult ?? undefined; } - const agentMaxContextNum = Number(agentMaxContextTokens) || TOKEN_DEFAULTS.AGENT_CONTEXT_FALLBACK; + const agentMaxContextNum = Number(agentMaxContextTokens) || 18000; const maxOutputTokensNum = Number(maxOutputTokens) || 0; const finalAttachments: IMongoFile[] = (primedAttachments ?? []) @@ -414,9 +413,7 @@ export async function initializeAgent( toolContextMap: toolContextMap ?? {}, useLegacyContent: !!options.useLegacyContent, tools: (tools ?? []) as GenericTool[] & string[], - maxContextTokens: Math.round( - (agentMaxContextNum - maxOutputTokensNum) * TOKEN_DEFAULTS.CONTEXT_SAFETY_MARGIN, - ), + maxContextTokens: Math.round((agentMaxContextNum - maxOutputTokensNum) * 0.9), }; return initializedAgent; diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index 571bc93052..49f1640a7a 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -1,7 +1,424 @@ import z from 'zod'; -import { EModelEndpoint, maxTokensMap, maxOutputTokensMap } from 'librechat-data-provider'; +import { EModelEndpoint } from 'librechat-data-provider'; import type { EndpointTokenConfig, TokenConfig } from '~/types'; +/** + * Model Token Configuration Maps + * + * IMPORTANT: Key Ordering for Pattern Matching + * ============================================ + * The `findMatchingPattern` function iterates through object keys in REVERSE order + * (last-defined keys are checked first) and uses `modelName.includes(key)` for matching. + * + * This means: + * 1. BASE PATTERNS must be defined FIRST (e.g., "kimi", "moonshot") + * 2. SPECIFIC PATTERNS must be defined AFTER their base patterns (e.g., "kimi-k2", "kimi-k2.5") + * + * Example ordering for Kimi models: + * kimi: 262144, // Base pattern - checked last + * 'kimi-k2': 262144, // More specific - checked before "kimi" + * 'kimi-k2.5': 262144, // Most specific - checked first + * + * Why this matters: + * - Model name "kimi-k2.5" contains both "kimi" and "kimi-k2" as substrings + * - If "kimi" were checked first, it would incorrectly match "kimi-k2.5" + * - By defining specific patterns AFTER base patterns, they're checked first in reverse iteration + * + * When adding new model families: + * 1. Define the base/generic pattern first + * 2. Define increasingly specific patterns after + * 3. Ensure no pattern is a substring of another that should match differently + */ + +const openAIModels = { + 'o4-mini': 200000, + 'o3-mini': 195000, // -5000 from max + o3: 200000, + o1: 195000, // -5000 from max + 'o1-mini': 127500, // -500 from max + 'o1-preview': 127500, // -500 from max + 'gpt-4': 8187, // -5 from max + 'gpt-4-0613': 8187, // -5 from max + 'gpt-4-32k': 32758, // -10 from max + 'gpt-4-32k-0314': 32758, // -10 from max + 'gpt-4-32k-0613': 32758, // -10 from max + 'gpt-4-1106': 127500, // -500 from max + 'gpt-4-0125': 127500, // -500 from max + 'gpt-4.5': 127500, // -500 from max + 'gpt-4.1': 1047576, + 'gpt-4.1-mini': 1047576, + 'gpt-4.1-nano': 1047576, + 'gpt-5': 400000, + 'gpt-5.1': 400000, + 'gpt-5.2': 400000, + 'gpt-5-mini': 400000, + 'gpt-5-nano': 400000, + 'gpt-5-pro': 400000, + 'gpt-4o': 127500, // -500 from max + 'gpt-4o-mini': 127500, // -500 from max + 'gpt-4o-2024-05-13': 127500, // -500 from max + 'gpt-4-turbo': 127500, // -500 from max + 'gpt-4-vision': 127500, // -500 from max + 'gpt-3.5-turbo': 16375, // -10 from max + 'gpt-3.5-turbo-0613': 4092, // -5 from max + 'gpt-3.5-turbo-0301': 4092, // -5 from max + 'gpt-3.5-turbo-16k': 16375, // -10 from max + 'gpt-3.5-turbo-16k-0613': 16375, // -10 from max + 'gpt-3.5-turbo-1106': 16375, // -10 from max + 'gpt-3.5-turbo-0125': 16375, // -10 from max +}; + +const mistralModels = { + 'mistral-': 31990, // -10 from max + 'mistral-7b': 31990, // -10 from max + 'mistral-small': 31990, // -10 from max + 'mixtral-8x7b': 31990, // -10 from max + 'mixtral-8x22b': 65536, + 'mistral-large': 131000, + 'mistral-large-2402': 127500, + 'mistral-large-2407': 127500, + 'mistral-nemo': 131000, + 'pixtral-large': 131000, + 'mistral-saba': 32000, + codestral: 256000, + 'ministral-8b': 131000, + 'ministral-3b': 131000, +}; + +const cohereModels = { + 'command-light': 4086, // -10 from max + 'command-light-nightly': 8182, // -10 from max + command: 4086, // -10 from max + 'command-nightly': 8182, // -10 from max + 'command-text': 4086, // -10 from max + 'command-r': 127500, // -500 from max + 'command-r-plus': 127500, // -500 from max +}; + +const googleModels = { + /* Max I/O is combined so we subtract the amount from max response tokens for actual total */ + gemma: 8196, + 'gemma-2': 32768, + 'gemma-3': 32768, + 'gemma-3-27b': 131072, + gemini: 30720, // -2048 from max + 'gemini-pro-vision': 12288, + 'gemini-exp': 2000000, + 'gemini-3': 1000000, // 1M input tokens, 64k output tokens + 'gemini-3-pro-image': 1000000, + 'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens + 'gemini-2.5-pro': 1000000, + 'gemini-2.5-flash': 1000000, + 'gemini-2.5-flash-image': 1000000, + 'gemini-2.5-flash-lite': 1000000, + 'gemini-2.0': 2000000, + 'gemini-2.0-flash': 1000000, + 'gemini-2.0-flash-lite': 1000000, + 'gemini-1.5': 1000000, + 'gemini-1.5-flash': 1000000, + 'gemini-1.5-flash-8b': 1000000, + 'text-bison-32k': 32758, // -10 from max + 'chat-bison-32k': 32758, // -10 from max + 'code-bison-32k': 32758, // -10 from max + 'codechat-bison-32k': 32758, + /* Codey, -5 from max: 6144 */ + 'code-': 6139, + 'codechat-': 6139, + /* PaLM2, -5 from max: 8192 */ + 'text-': 8187, + 'chat-': 8187, +}; + +const anthropicModels = { + 'claude-': 100000, + 'claude-instant': 100000, + 'claude-2': 100000, + 'claude-2.1': 200000, + 'claude-3': 200000, + 'claude-3-haiku': 200000, + 'claude-3-sonnet': 200000, + 'claude-3-opus': 200000, + 'claude-3.5-haiku': 200000, + 'claude-3-5-haiku': 200000, + 'claude-3-5-sonnet': 200000, + 'claude-3.5-sonnet': 200000, + 'claude-3-7-sonnet': 200000, + 'claude-3.7-sonnet': 200000, + 'claude-3-5-sonnet-latest': 200000, + 'claude-3.5-sonnet-latest': 200000, + 'claude-haiku-4-5': 200000, + 'claude-sonnet-4': 1000000, + 'claude-4': 200000, + 'claude-opus-4': 200000, + 'claude-opus-4-5': 200000, + 'claude-opus-4-6': 1000000, +}; + +const deepseekModels = { + deepseek: 128000, + 'deepseek-chat': 128000, + 'deepseek-reasoner': 128000, + 'deepseek-r1': 128000, + 'deepseek-v3': 128000, + 'deepseek.r1': 128000, +}; + +const moonshotModels = { + // Base patterns (check last due to reverse iteration) + kimi: 262144, + moonshot: 131072, + // kimi-k2 series (specific patterns) + 'kimi-latest': 128000, + 'kimi-k2': 262144, + 'kimi-k2.5': 262144, + 'kimi-k2-turbo': 262144, + 'kimi-k2-turbo-preview': 262144, + 'kimi-k2-0905': 262144, + 'kimi-k2-0905-preview': 262144, + 'kimi-k2-0711': 131072, + 'kimi-k2-0711-preview': 131072, + 'kimi-k2-thinking': 262144, + 'kimi-k2-thinking-turbo': 262144, + // moonshot-v1 series (specific patterns) + 'moonshot-v1': 131072, + 'moonshot-v1-auto': 131072, + 'moonshot-v1-8k': 8192, + 'moonshot-v1-8k-vision': 8192, + 'moonshot-v1-8k-vision-preview': 8192, + 'moonshot-v1-32k': 32768, + 'moonshot-v1-32k-vision': 32768, + 'moonshot-v1-32k-vision-preview': 32768, + 'moonshot-v1-128k': 131072, + 'moonshot-v1-128k-vision': 131072, + 'moonshot-v1-128k-vision-preview': 131072, + // Bedrock moonshot models + 'moonshot.kimi': 262144, + 'moonshot.kimi-k2': 262144, + 'moonshot.kimi-k2.5': 262144, + 'moonshot.kimi-k2-thinking': 262144, + 'moonshot.kimi-k2-0711': 131072, +}; + +const metaModels = { + // Basic patterns + llama3: 8000, + llama2: 4000, + 'llama-3': 8000, + 'llama-2': 4000, + + // llama3.x pattern + 'llama3.1': 127500, + 'llama3.2': 127500, + 'llama3.3': 127500, + + // llama3-x pattern + 'llama3-1': 127500, + 'llama3-2': 127500, + 'llama3-3': 127500, + + // llama-3.x pattern + 'llama-3.1': 127500, + 'llama-3.2': 127500, + 'llama-3.3': 127500, + + // llama3.x:Nb pattern + 'llama3.1:405b': 127500, + 'llama3.1:70b': 127500, + 'llama3.1:8b': 127500, + 'llama3.2:1b': 127500, + 'llama3.2:3b': 127500, + 'llama3.2:11b': 127500, + 'llama3.2:90b': 127500, + 'llama3.3:70b': 127500, + + // llama3-x-Nb pattern + 'llama3-1-405b': 127500, + 'llama3-1-70b': 127500, + 'llama3-1-8b': 127500, + 'llama3-2-1b': 127500, + 'llama3-2-3b': 127500, + 'llama3-2-11b': 127500, + 'llama3-2-90b': 127500, + 'llama3-3-70b': 127500, + + // llama-3.x-Nb pattern + 'llama-3.1-405b': 127500, + 'llama-3.1-70b': 127500, + 'llama-3.1-8b': 127500, + 'llama-3.2-1b': 127500, + 'llama-3.2-3b': 127500, + 'llama-3.2-11b': 127500, + 'llama-3.2-90b': 127500, + 'llama-3.3-70b': 127500, + + // Original llama2/3 patterns + 'llama3-70b': 8000, + 'llama3-8b': 8000, + 'llama2-70b': 4000, + 'llama2-13b': 4000, + 'llama3:70b': 8000, + 'llama3:8b': 8000, + 'llama2:70b': 4000, +}; + +const qwenModels = { + qwen: 32000, + 'qwen2.5': 32000, + 'qwen-turbo': 1000000, + 'qwen-plus': 131000, + 'qwen-max': 32000, + 'qwq-32b': 32000, + // Qwen3 models + qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context) + 'qwen3-8b': 128000, + 'qwen3-14b': 40960, + 'qwen3-30b-a3b': 40960, + 'qwen3-32b': 40960, + 'qwen3-235b-a22b': 40960, + // Qwen3 VL (Vision-Language) models + 'qwen3-vl-8b-thinking': 256000, + 'qwen3-vl-8b-instruct': 262144, + 'qwen3-vl-30b-a3b': 262144, + 'qwen3-vl-235b-a22b': 131072, + // Qwen3 specialized models + 'qwen3-max': 256000, + 'qwen3-coder': 262144, + 'qwen3-coder-30b-a3b': 262144, + 'qwen3-coder-plus': 128000, + 'qwen3-coder-flash': 128000, + 'qwen3-next-80b-a3b': 262144, +}; + +const ai21Models = { + 'j2-mid': 8182, // -10 from max + 'j2-ultra': 8182, // -10 from max + 'jamba-instruct': 255500, // -500 from max +}; + +const amazonModels = { + // Amazon Titan models + 'titan-text-lite': 4000, + 'titan-text-express': 8000, + 'titan-text-premier': 31500, // -500 from max + // Amazon Nova models + // https://aws.amazon.com/ai/generative-ai/nova/ + 'nova-micro': 127000, // -1000 from max + 'nova-lite': 295000, // -5000 from max + 'nova-pro': 295000, // -5000 from max + 'nova-premier': 995000, // -5000 from max +}; + +const bedrockModels = { + ...anthropicModels, + ...mistralModels, + ...cohereModels, + ...deepseekModels, + ...moonshotModels, + ...metaModels, + ...ai21Models, + ...amazonModels, +}; + +const xAIModels = { + grok: 131072, + 'grok-beta': 131072, + 'grok-vision-beta': 8192, + 'grok-2': 131072, + 'grok-2-latest': 131072, + 'grok-2-1212': 131072, + 'grok-2-vision': 32768, + 'grok-2-vision-latest': 32768, + 'grok-2-vision-1212': 32768, + 'grok-3': 131072, + 'grok-3-fast': 131072, + 'grok-3-mini': 131072, + 'grok-3-mini-fast': 131072, + 'grok-4': 256000, // 256K context + 'grok-4-fast': 2000000, // 2M context + 'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants) + 'grok-code-fast': 256000, // 256K context +}; + +const aggregateModels = { + ...openAIModels, + ...googleModels, + ...bedrockModels, + ...xAIModels, + ...qwenModels, + // GPT-OSS + 'gpt-oss': 131000, + 'gpt-oss:20b': 131000, + 'gpt-oss-20b': 131000, + 'gpt-oss:120b': 131000, + 'gpt-oss-120b': 131000, + // GLM models (Zhipu AI) + glm4: 128000, + 'glm-4': 128000, + 'glm-4-32b': 128000, + 'glm-4.5': 131000, + 'glm-4.5-air': 131000, + 'glm-4.5v': 66000, + 'glm-4.6': 200000, +}; + +export const maxTokensMap = { + [EModelEndpoint.azureOpenAI]: openAIModels, + [EModelEndpoint.openAI]: aggregateModels, + [EModelEndpoint.agents]: aggregateModels, + [EModelEndpoint.custom]: aggregateModels, + [EModelEndpoint.google]: googleModels, + [EModelEndpoint.anthropic]: anthropicModels, + [EModelEndpoint.bedrock]: bedrockModels, +}; + +export const modelMaxOutputs = { + o1: 32268, // -500 from max: 32,768 + 'o1-mini': 65136, // -500 from max: 65,536 + 'o1-preview': 32268, // -500 from max: 32,768 + 'gpt-5': 128000, + 'gpt-5.1': 128000, + 'gpt-5.2': 128000, + 'gpt-5-mini': 128000, + 'gpt-5-nano': 128000, + 'gpt-5-pro': 128000, + 'gpt-oss-20b': 131000, + 'gpt-oss-120b': 131000, + system_default: 32000, +}; + +/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */ +const anthropicMaxOutputs = { + 'claude-3-haiku': 4096, + 'claude-3-sonnet': 4096, + 'claude-3-opus': 4096, + 'claude-haiku-4-5': 64000, + 'claude-sonnet-4': 64000, + 'claude-opus-4': 32000, + 'claude-opus-4-5': 64000, + 'claude-opus-4-6': 128000, + 'claude-3.5-sonnet': 8192, + 'claude-3-5-sonnet': 8192, + 'claude-3.7-sonnet': 128000, + 'claude-3-7-sonnet': 128000, +}; + +/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */ +const deepseekMaxOutputs = { + deepseek: 8000, // deepseek-chat default: 4K, max: 8K + 'deepseek-chat': 8000, + 'deepseek-reasoner': 64000, // default: 32K, max: 64K + 'deepseek-r1': 64000, + 'deepseek-v3': 8000, + 'deepseek.r1': 64000, +}; + +export const maxOutputTokensMap = { + [EModelEndpoint.anthropic]: anthropicMaxOutputs, + [EModelEndpoint.azureOpenAI]: modelMaxOutputs, + [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, + [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, +}; + /** * Finds the first matching pattern in the tokens map. * @param {string} modelName diff --git a/packages/data-provider/specs/tokens.spec.ts b/packages/data-provider/specs/tokens.spec.ts deleted file mode 100644 index 37eeecbea6..0000000000 --- a/packages/data-provider/specs/tokens.spec.ts +++ /dev/null @@ -1,152 +0,0 @@ -import { - findMatchingPattern, - getModelMaxTokens, - getModelMaxOutputTokens, - matchModelName, - maxTokensMap, -} from '../src/tokens'; -import { EModelEndpoint } from '../src/schemas'; - -describe('Token Pattern Matching', () => { - describe('findMatchingPattern', () => { - const testMap: Record = { - 'claude-': 100000, - 'claude-3': 200000, - 'claude-3-opus': 200000, - 'gpt-4': 8000, - 'gpt-4-turbo': 128000, - }; - - it('should match exact model names', () => { - expect(findMatchingPattern('claude-3-opus', testMap)).toBe('claude-3-opus'); - expect(findMatchingPattern('gpt-4-turbo', testMap)).toBe('gpt-4-turbo'); - }); - - it('should match more specific patterns first (reverse order)', () => { - // claude-3-opus-20240229 should match 'claude-3-opus' not 'claude-3' or 'claude-' - expect(findMatchingPattern('claude-3-opus-20240229', testMap)).toBe('claude-3-opus'); - }); - - it('should fall back to broader patterns when no specific match', () => { - // claude-3-haiku should match 'claude-3' (not 'claude-3-opus') - expect(findMatchingPattern('claude-3-haiku', testMap)).toBe('claude-3'); - }); - - it('should be case-insensitive', () => { - expect(findMatchingPattern('Claude-3-Opus', testMap)).toBe('claude-3-opus'); - expect(findMatchingPattern('GPT-4-TURBO', testMap)).toBe('gpt-4-turbo'); - }); - - it('should return null for unmatched models', () => { - expect(findMatchingPattern('unknown-model', testMap)).toBeNull(); - expect(findMatchingPattern('llama-2', testMap)).toBeNull(); - }); - - it('should NOT match when pattern appears in middle of model name (startsWith behavior)', () => { - // This is the key fix: "my-claude-wrapper" should NOT match "claude-" - expect(findMatchingPattern('my-claude-wrapper', testMap)).toBeNull(); - expect(findMatchingPattern('openai-gpt-4-proxy', testMap)).toBeNull(); - expect(findMatchingPattern('custom-claude-3-service', testMap)).toBeNull(); - }); - - it('should handle empty string model name', () => { - expect(findMatchingPattern('', testMap)).toBeNull(); - }); - - it('should handle empty tokens map', () => { - expect(findMatchingPattern('claude-3', {})).toBeNull(); - }); - }); - - describe('getModelMaxTokens', () => { - it('should return exact match tokens', () => { - expect(getModelMaxTokens('gpt-4o', EModelEndpoint.openAI)).toBe(127500); - expect(getModelMaxTokens('claude-3-opus', EModelEndpoint.anthropic)).toBe(200000); - }); - - it('should return pattern-matched tokens', () => { - // claude-3-opus-20240229 should match claude-3-opus pattern - expect(getModelMaxTokens('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe(200000); - }); - - it('should return undefined for unknown models', () => { - expect(getModelMaxTokens('completely-unknown-model', EModelEndpoint.openAI)).toBeUndefined(); - }); - - it('should fall back to openAI for unknown endpoints', () => { - const result = getModelMaxTokens('gpt-4o', 'unknown-endpoint'); - expect(result).toBe(127500); - }); - - it('should handle non-string input gracefully', () => { - expect(getModelMaxTokens(null as unknown as string)).toBeUndefined(); - expect(getModelMaxTokens(undefined as unknown as string)).toBeUndefined(); - expect(getModelMaxTokens(123 as unknown as string)).toBeUndefined(); - }); - - it('should NOT match model names with pattern in middle', () => { - // A model like "my-gpt-4-wrapper" should not match "gpt-4" - expect(getModelMaxTokens('my-gpt-4-wrapper', EModelEndpoint.openAI)).toBeUndefined(); - }); - }); - - describe('getModelMaxOutputTokens', () => { - it('should return exact match output tokens', () => { - expect(getModelMaxOutputTokens('o1', EModelEndpoint.openAI)).toBe(32268); - expect(getModelMaxOutputTokens('claude-3-opus', EModelEndpoint.anthropic)).toBe(4096); - }); - - it('should return pattern-matched output tokens', () => { - expect(getModelMaxOutputTokens('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe( - 4096, - ); - }); - - it('should return system_default for unknown models (openAI endpoint)', () => { - expect(getModelMaxOutputTokens('unknown-model', EModelEndpoint.openAI)).toBe(32000); - }); - - it('should handle non-string input gracefully', () => { - expect(getModelMaxOutputTokens(null as unknown as string)).toBeUndefined(); - expect(getModelMaxOutputTokens(undefined as unknown as string)).toBeUndefined(); - }); - }); - - describe('matchModelName', () => { - it('should return exact match model name', () => { - expect(matchModelName('gpt-4o', EModelEndpoint.openAI)).toBe('gpt-4o'); - }); - - it('should return pattern key for pattern matches', () => { - expect(matchModelName('claude-3-opus-20240229', EModelEndpoint.anthropic)).toBe( - 'claude-3-opus', - ); - }); - - it('should return input for unknown models', () => { - expect(matchModelName('unknown-model', EModelEndpoint.openAI)).toBe('unknown-model'); - }); - - it('should handle non-string input gracefully', () => { - expect(matchModelName(null as unknown as string)).toBeUndefined(); - }); - }); - - describe('maxTokensMap structure', () => { - it('should have entries for all major endpoints', () => { - expect(maxTokensMap[EModelEndpoint.openAI]).toBeDefined(); - expect(maxTokensMap[EModelEndpoint.anthropic]).toBeDefined(); - expect(maxTokensMap[EModelEndpoint.google]).toBeDefined(); - expect(maxTokensMap[EModelEndpoint.azureOpenAI]).toBeDefined(); - expect(maxTokensMap[EModelEndpoint.bedrock]).toBeDefined(); - }); - - it('should have positive token values', () => { - Object.values(maxTokensMap).forEach((endpointMap) => { - Object.entries(endpointMap).forEach(([model, tokens]) => { - expect(tokens).toBeGreaterThan(0); - }); - }); - }); - }); -}); diff --git a/packages/data-provider/src/index.ts b/packages/data-provider/src/index.ts index ba21ece55e..c57ca82845 100644 --- a/packages/data-provider/src/index.ts +++ b/packages/data-provider/src/index.ts @@ -47,5 +47,3 @@ export { default as createPayload } from './createPayload'; /* feedback */ export * from './feedback'; export * from './parameterSettings'; -/* token limits */ -export * from './tokens'; diff --git a/packages/data-provider/src/tokens.ts b/packages/data-provider/src/tokens.ts deleted file mode 100644 index 40df709023..0000000000 --- a/packages/data-provider/src/tokens.ts +++ /dev/null @@ -1,527 +0,0 @@ -import { EModelEndpoint } from './schemas'; - -/** - * Model context window token limits. - * These values represent the maximum context tokens (input) for each model. - * Values are slightly reduced from actual max to leave room for output tokens. - */ - -const openAIModels: Record = { - 'o4-mini': 200000, - 'o3-mini': 195000, // -5000 from max - o3: 200000, - o1: 195000, // -5000 from max - 'o1-mini': 127500, // -500 from max - 'o1-preview': 127500, // -500 from max - 'gpt-4': 8187, // -5 from max - 'gpt-4-0613': 8187, // -5 from max - 'gpt-4-32k': 32758, // -10 from max - 'gpt-4-32k-0314': 32758, // -10 from max - 'gpt-4-32k-0613': 32758, // -10 from max - 'gpt-4-1106': 127500, // -500 from max - 'gpt-4-0125': 127500, // -500 from max - 'gpt-4.5': 127500, // -500 from max - 'gpt-4.1': 1047576, - 'gpt-4.1-mini': 1047576, - 'gpt-4.1-nano': 1047576, - 'gpt-5': 400000, - 'gpt-5-mini': 400000, - 'gpt-5-nano': 400000, - 'gpt-5-pro': 400000, - 'gpt-4o': 127500, // -500 from max - 'gpt-4o-mini': 127500, // -500 from max - 'gpt-4o-2024-05-13': 127500, // -500 from max - 'gpt-4-turbo': 127500, // -500 from max - 'gpt-4-vision': 127500, // -500 from max - 'gpt-3.5-turbo': 16375, // -10 from max - 'gpt-3.5-turbo-0613': 4092, // -5 from max - 'gpt-3.5-turbo-0301': 4092, // -5 from max - 'gpt-3.5-turbo-16k': 16375, // -10 from max - 'gpt-3.5-turbo-16k-0613': 16375, // -10 from max - 'gpt-3.5-turbo-1106': 16375, // -10 from max - 'gpt-3.5-turbo-0125': 16375, // -10 from max -}; - -const mistralModels: Record = { - 'mistral-': 31990, // -10 from max - 'mistral-7b': 31990, // -10 from max - 'mistral-small': 31990, // -10 from max - 'mixtral-8x7b': 31990, // -10 from max - 'mixtral-8x22b': 65536, - 'mistral-large': 131000, - 'mistral-large-2402': 127500, - 'mistral-large-2407': 127500, - 'mistral-nemo': 131000, - 'pixtral-large': 131000, - 'mistral-saba': 32000, - codestral: 256000, - 'ministral-8b': 131000, - 'ministral-3b': 131000, -}; - -const cohereModels: Record = { - 'command-light': 4086, // -10 from max - 'command-light-nightly': 8182, // -10 from max - command: 4086, // -10 from max - 'command-nightly': 8182, // -10 from max - 'command-text': 4086, // -10 from max - 'command-r': 127500, // -500 from max - 'command-r-plus': 127500, // -500 from max -}; - -const googleModels: Record = { - /* Max I/O is combined so we subtract the amount from max response tokens for actual total */ - gemma: 8196, - 'gemma-2': 32768, - 'gemma-3': 32768, - 'gemma-3-27b': 131072, - gemini: 30720, // -2048 from max - 'gemini-pro-vision': 12288, - 'gemini-exp': 2000000, - 'gemini-3': 1000000, // 1M input tokens, 64k output tokens - 'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens - 'gemini-2.5-pro': 1000000, - 'gemini-2.5-flash': 1000000, - 'gemini-2.5-flash-lite': 1000000, - 'gemini-2.0': 2000000, - 'gemini-2.0-flash': 1000000, - 'gemini-2.0-flash-lite': 1000000, - 'gemini-1.5': 1000000, - 'gemini-1.5-flash': 1000000, - 'gemini-1.5-flash-8b': 1000000, - 'text-bison-32k': 32758, // -10 from max - 'chat-bison-32k': 32758, // -10 from max - 'code-bison-32k': 32758, // -10 from max - 'codechat-bison-32k': 32758, - /* Codey, -5 from max: 6144 */ - 'code-': 6139, - 'codechat-': 6139, - /* PaLM2, -5 from max: 8192 */ - 'text-': 8187, - 'chat-': 8187, -}; - -const anthropicModels: Record = { - 'claude-': 100000, - 'claude-instant': 100000, - 'claude-2': 100000, - 'claude-2.1': 200000, - 'claude-3': 200000, - 'claude-3-haiku': 200000, - 'claude-3-sonnet': 200000, - 'claude-3-opus': 200000, - 'claude-3.5-haiku': 200000, - 'claude-3-5-haiku': 200000, - 'claude-3-5-sonnet': 200000, - 'claude-3.5-sonnet': 200000, - 'claude-3-7-sonnet': 200000, - 'claude-3.7-sonnet': 200000, - 'claude-3-5-sonnet-latest': 200000, - 'claude-3.5-sonnet-latest': 200000, - 'claude-haiku-4-5': 200000, - 'claude-sonnet-4': 1000000, - 'claude-4': 200000, - 'claude-opus-4': 200000, - 'claude-opus-4-5': 200000, -}; - -const deepseekModels: Record = { - deepseek: 128000, - 'deepseek-chat': 128000, - 'deepseek-reasoner': 128000, - 'deepseek-r1': 128000, - 'deepseek-v3': 128000, - 'deepseek.r1': 128000, -}; - -const metaModels: Record = { - // Basic patterns - llama3: 8000, - llama2: 4000, - 'llama-3': 8000, - 'llama-2': 4000, - - // llama3.x pattern - 'llama3.1': 127500, - 'llama3.2': 127500, - 'llama3.3': 127500, - - // llama3-x pattern - 'llama3-1': 127500, - 'llama3-2': 127500, - 'llama3-3': 127500, - - // llama-3.x pattern - 'llama-3.1': 127500, - 'llama-3.2': 127500, - 'llama-3.3': 127500, - - // llama3.x:Nb pattern - 'llama3.1:405b': 127500, - 'llama3.1:70b': 127500, - 'llama3.1:8b': 127500, - 'llama3.2:1b': 127500, - 'llama3.2:3b': 127500, - 'llama3.2:11b': 127500, - 'llama3.2:90b': 127500, - 'llama3.3:70b': 127500, - - // llama3-x-Nb pattern - 'llama3-1-405b': 127500, - 'llama3-1-70b': 127500, - 'llama3-1-8b': 127500, - 'llama3-2-1b': 127500, - 'llama3-2-3b': 127500, - 'llama3-2-11b': 127500, - 'llama3-2-90b': 127500, - 'llama3-3-70b': 127500, - - // llama-3.x-Nb pattern - 'llama-3.1-405b': 127500, - 'llama-3.1-70b': 127500, - 'llama-3.1-8b': 127500, - 'llama-3.2-1b': 127500, - 'llama-3.2-3b': 127500, - 'llama-3.2-11b': 127500, - 'llama-3.2-90b': 127500, - 'llama-3.3-70b': 127500, - - // Original llama2/3 patterns - 'llama3-70b': 8000, - 'llama3-8b': 8000, - 'llama2-70b': 4000, - 'llama2-13b': 4000, - 'llama3:70b': 8000, - 'llama3:8b': 8000, - 'llama2:70b': 4000, -}; - -const qwenModels: Record = { - qwen: 32000, - 'qwen2.5': 32000, - 'qwen-turbo': 1000000, - 'qwen-plus': 131000, - 'qwen-max': 32000, - 'qwq-32b': 32000, - // Qwen3 models - qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context) - 'qwen3-8b': 128000, - 'qwen3-14b': 40960, - 'qwen3-30b-a3b': 40960, - 'qwen3-32b': 40960, - 'qwen3-235b-a22b': 40960, - // Qwen3 VL (Vision-Language) models - 'qwen3-vl-8b-thinking': 256000, - 'qwen3-vl-8b-instruct': 262144, - 'qwen3-vl-30b-a3b': 262144, - 'qwen3-vl-235b-a22b': 131072, - // Qwen3 specialized models - 'qwen3-max': 256000, - 'qwen3-coder': 262144, - 'qwen3-coder-30b-a3b': 262144, - 'qwen3-coder-plus': 128000, - 'qwen3-coder-flash': 128000, - 'qwen3-next-80b-a3b': 262144, -}; - -const ai21Models: Record = { - 'j2-mid': 8182, // -10 from max - 'j2-ultra': 8182, // -10 from max - 'jamba-instruct': 255500, // -500 from max -}; - -const amazonModels: Record = { - // Amazon Titan models - 'titan-text-lite': 4000, - 'titan-text-express': 8000, - 'titan-text-premier': 31500, // -500 from max - // Amazon Nova models - // https://aws.amazon.com/ai/generative-ai/nova/ - 'nova-micro': 127000, // -1000 from max - 'nova-lite': 295000, // -5000 from max - 'nova-pro': 295000, // -5000 from max - 'nova-premier': 995000, // -5000 from max -}; - -const bedrockModels: Record = { - ...anthropicModels, - ...mistralModels, - ...cohereModels, - ...deepseekModels, - ...metaModels, - ...ai21Models, - ...amazonModels, -}; - -const xAIModels: Record = { - grok: 131072, - 'grok-beta': 131072, - 'grok-vision-beta': 8192, - 'grok-2': 131072, - 'grok-2-latest': 131072, - 'grok-2-1212': 131072, - 'grok-2-vision': 32768, - 'grok-2-vision-latest': 32768, - 'grok-2-vision-1212': 32768, - 'grok-3': 131072, - 'grok-3-fast': 131072, - 'grok-3-mini': 131072, - 'grok-3-mini-fast': 131072, - 'grok-4': 256000, // 256K context - 'grok-4-fast': 2000000, // 2M context - 'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants) - 'grok-code-fast': 256000, // 256K context -}; - -const aggregateModels: Record = { - ...openAIModels, - ...googleModels, - ...bedrockModels, - ...xAIModels, - ...qwenModels, - // misc. - kimi: 131000, - // GPT-OSS - 'gpt-oss': 131000, - 'gpt-oss:20b': 131000, - 'gpt-oss-20b': 131000, - 'gpt-oss:120b': 131000, - 'gpt-oss-120b': 131000, - // GLM models (Zhipu AI) - glm4: 128000, - 'glm-4': 128000, - 'glm-4-32b': 128000, - 'glm-4.5': 131000, - 'glm-4.5-air': 131000, - 'glm-4.5v': 66000, - 'glm-4.6': 200000, -}; - -/** - * Map of endpoint to model context token limits. - */ -export const maxTokensMap: Record> = { - [EModelEndpoint.azureOpenAI]: openAIModels, - [EModelEndpoint.openAI]: aggregateModels, - [EModelEndpoint.agents]: aggregateModels, - [EModelEndpoint.custom]: aggregateModels, - [EModelEndpoint.google]: googleModels, - [EModelEndpoint.anthropic]: anthropicModels, - [EModelEndpoint.bedrock]: bedrockModels, -}; - -/** - * Finds the most specific matching pattern in the tokens map. - * Matches the longest key that the model name starts with. - * - * @param modelName - The model name to match against patterns. - * @param tokensMap - Map of model patterns to token limits. - * @returns The matched pattern key or null if no match found. - */ -export function findMatchingPattern( - modelName: string, - tokensMap: Record, -): string | null { - const lowerModelName = modelName.toLowerCase(); - // Sort keys by length descending to match most specific (longest) pattern first - const keys = Object.keys(tokensMap).sort((a, b) => b.length - a.length); - for (const modelKey of keys) { - if (lowerModelName.startsWith(modelKey.toLowerCase())) { - return modelKey; - } - } - return null; -} - -/** - * Retrieves the maximum context tokens for a given model name. - * - * @param modelName - The name of the model to look up. - * @param endpoint - The endpoint (default is 'openAI'). - * @returns The maximum context tokens for the given model or undefined if no match is found. - * - * @example - * getModelMaxTokens('gpt-4o'); // Returns 127500 - * getModelMaxTokens('claude-3-opus', 'anthropic'); // Returns 200000 - * getModelMaxTokens('unknown-model'); // Returns undefined - */ -export function getModelMaxTokens( - modelName: string, - endpoint: string = EModelEndpoint.openAI, -): number | undefined { - if (typeof modelName !== 'string') { - return undefined; - } - - const tokensMap = maxTokensMap[endpoint]; - if (!tokensMap) { - // Fall back to aggregate models for unknown endpoints - return getModelMaxTokens(modelName, EModelEndpoint.openAI); - } - - // Try exact match first - if (tokensMap[modelName] !== undefined) { - return tokensMap[modelName]; - } - - // Try pattern matching - const matchedPattern = findMatchingPattern(modelName, tokensMap); - if (matchedPattern) { - return tokensMap[matchedPattern]; - } - - return undefined; -} - -/** - * Retrieves the model name key for a given model name input. - * If the exact model name isn't found, it searches for partial matches. - * - * @param modelName - The name of the model to look up. - * @param endpoint - The endpoint (default is 'openAI'). - * @returns The model name key for the given model; returns input if no match is found. - */ -export function matchModelName( - modelName: string, - endpoint: string = EModelEndpoint.openAI, -): string | undefined { - if (typeof modelName !== 'string') { - return undefined; - } - - const tokensMap = maxTokensMap[endpoint]; - if (!tokensMap) { - return modelName; - } - - if (tokensMap[modelName] !== undefined) { - return modelName; - } - - const matchedPattern = findMatchingPattern(modelName, tokensMap); - return matchedPattern || modelName; -} - -// Individual model maps are available for advanced use cases -// but not re-exported to avoid conflicts with config.ts - -// ============================================================================= -// OUTPUT TOKEN LIMITS -// ============================================================================= - -/** - * Maximum output tokens for OpenAI and similar models. - * Values from official documentation, slightly reduced to leave safety margin. - */ -const modelMaxOutputs: Record = { - o1: 32268, // -500 from max: 32,768 - 'o1-mini': 65136, // -500 from max: 65,536 - 'o1-preview': 32268, // -500 from max: 32,768 - 'gpt-5': 128000, - 'gpt-5-mini': 128000, - 'gpt-5-nano': 128000, - 'gpt-5-pro': 128000, - 'gpt-oss-20b': 131000, - 'gpt-oss-120b': 131000, - system_default: 32000, -}; - -/** - * Maximum output tokens for Anthropic Claude models. - * Values from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names - */ -const anthropicMaxOutputs: Record = { - 'claude-3-haiku': 4096, - 'claude-3-sonnet': 4096, - 'claude-3-opus': 4096, - 'claude-haiku-4-5': 64000, - 'claude-sonnet-4': 64000, - 'claude-opus-4': 32000, - 'claude-opus-4-5': 64000, - 'claude-3.5-sonnet': 8192, - 'claude-3-5-sonnet': 8192, - 'claude-3.7-sonnet': 128000, - 'claude-3-7-sonnet': 128000, -}; - -/** - * Maximum output tokens for DeepSeek models. - * Values from https://api-docs.deepseek.com/quick_start/pricing - */ -const deepseekMaxOutputs: Record = { - deepseek: 8000, // deepseek-chat default: 4K, max: 8K - 'deepseek-chat': 8000, - 'deepseek-reasoner': 64000, // default: 32K, max: 64K - 'deepseek-r1': 64000, - 'deepseek-v3': 8000, - 'deepseek.r1': 64000, -}; - -/** - * Map of endpoint to model max output token limits. - */ -export const maxOutputTokensMap: Record> = { - [EModelEndpoint.anthropic]: anthropicMaxOutputs, - [EModelEndpoint.azureOpenAI]: modelMaxOutputs, - [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, - [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, -}; - -/** - * Retrieves the maximum output tokens for a given model name. - * - * @param modelName - The name of the model to look up. - * @param endpoint - The endpoint (default is 'openAI'). - * @returns The maximum output tokens for the given model or undefined if no match is found. - * - * @example - * getModelMaxOutputTokens('o1'); // Returns 32268 - * getModelMaxOutputTokens('claude-3-opus', 'anthropic'); // Returns 4096 - * getModelMaxOutputTokens('unknown-model'); // Returns 32000 (system_default) - */ -export function getModelMaxOutputTokens( - modelName: string, - endpoint: string = EModelEndpoint.openAI, -): number | undefined { - if (typeof modelName !== 'string') { - return undefined; - } - - const tokensMap = maxOutputTokensMap[endpoint]; - if (!tokensMap) { - // Fall back to openAI for unknown endpoints - return getModelMaxOutputTokens(modelName, EModelEndpoint.openAI); - } - - // Try exact match first - if (tokensMap[modelName] !== undefined) { - return tokensMap[modelName]; - } - - // Try pattern matching - const matchedPattern = findMatchingPattern(modelName, tokensMap); - if (matchedPattern) { - return tokensMap[matchedPattern]; - } - - // Return system_default if available - return tokensMap.system_default; -} - -// ============================================================================= -// TOKEN DEFAULTS -// ============================================================================= - -/** - * Centralized token-related default values. - */ -export const TOKEN_DEFAULTS = { - /** Fallback context window for agents when model lookup fails */ - AGENT_CONTEXT_FALLBACK: 18000, - /** Legacy fallback for older clients */ - LEGACY_CONTEXT_FALLBACK: 4097, - /** Safety margin multiplier (0.9 = reserve 10% for response) */ - CONTEXT_SAFETY_MARGIN: 0.9, - /** Default max output tokens when not specified */ - DEFAULT_MAX_OUTPUT: 32000, -} as const;