diff --git a/api/models/tx.js b/api/models/tx.js index 462396d860..92f2432d0e 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -1,4 +1,4 @@ -const { matchModelName } = require('@librechat/api'); +const { matchModelName, findMatchingPattern } = require('@librechat/api'); const defaultRate = 6; /** @@ -6,44 +6,58 @@ const defaultRate = 6; * source: https://aws.amazon.com/bedrock/pricing/ * */ const bedrockValues = { - // Basic llama2 patterns + // Basic llama2 patterns (base defaults to smallest variant) + llama2: { prompt: 0.75, completion: 1.0 }, + 'llama-2': { prompt: 0.75, completion: 1.0 }, 'llama2-13b': { prompt: 0.75, completion: 1.0 }, - 'llama2:13b': { prompt: 0.75, completion: 1.0 }, 'llama2:70b': { prompt: 1.95, completion: 2.56 }, 'llama2-70b': { prompt: 1.95, completion: 2.56 }, - // Basic llama3 patterns + // Basic llama3 patterns (base defaults to smallest variant) + llama3: { prompt: 0.3, completion: 0.6 }, + 'llama-3': { prompt: 0.3, completion: 0.6 }, 'llama3-8b': { prompt: 0.3, completion: 0.6 }, 'llama3:8b': { prompt: 0.3, completion: 0.6 }, 'llama3-70b': { prompt: 2.65, completion: 3.5 }, 'llama3:70b': { prompt: 2.65, completion: 3.5 }, - // llama3-x-Nb pattern + // llama3-x-Nb pattern (base defaults to smallest variant) + 'llama3-1': { prompt: 0.22, completion: 0.22 }, 'llama3-1-8b': { prompt: 0.22, completion: 0.22 }, 'llama3-1-70b': { prompt: 0.72, completion: 0.72 }, 'llama3-1-405b': { prompt: 2.4, completion: 2.4 }, + 'llama3-2': { prompt: 0.1, completion: 0.1 }, 'llama3-2-1b': { prompt: 0.1, completion: 0.1 }, 'llama3-2-3b': { prompt: 0.15, completion: 0.15 }, 'llama3-2-11b': { prompt: 0.16, completion: 0.16 }, 'llama3-2-90b': { prompt: 0.72, completion: 0.72 }, + 'llama3-3': { prompt: 2.65, completion: 3.5 }, + 'llama3-3-70b': { prompt: 2.65, completion: 3.5 }, - // llama3.x:Nb pattern + // llama3.x:Nb pattern (base defaults to smallest variant) + 'llama3.1': { prompt: 0.22, completion: 0.22 }, 'llama3.1:8b': { prompt: 0.22, completion: 0.22 }, 'llama3.1:70b': { prompt: 0.72, completion: 0.72 }, 'llama3.1:405b': { prompt: 2.4, completion: 2.4 }, + 'llama3.2': { prompt: 0.1, completion: 0.1 }, 'llama3.2:1b': { prompt: 0.1, completion: 0.1 }, 'llama3.2:3b': { prompt: 0.15, completion: 0.15 }, 'llama3.2:11b': { prompt: 0.16, completion: 0.16 }, 'llama3.2:90b': { prompt: 0.72, completion: 0.72 }, + 'llama3.3': { prompt: 2.65, completion: 3.5 }, + 'llama3.3:70b': { prompt: 2.65, completion: 3.5 }, - // llama-3.x-Nb pattern + // llama-3.x-Nb pattern (base defaults to smallest variant) + 'llama-3.1': { prompt: 0.22, completion: 0.22 }, 'llama-3.1-8b': { prompt: 0.22, completion: 0.22 }, 'llama-3.1-70b': { prompt: 0.72, completion: 0.72 }, 'llama-3.1-405b': { prompt: 2.4, completion: 2.4 }, + 'llama-3.2': { prompt: 0.1, completion: 0.1 }, 'llama-3.2-1b': { prompt: 0.1, completion: 0.1 }, 'llama-3.2-3b': { prompt: 0.15, completion: 0.15 }, 'llama-3.2-11b': { prompt: 0.16, completion: 0.16 }, 'llama-3.2-90b': { prompt: 0.72, completion: 0.72 }, + 'llama-3.3': { prompt: 2.65, completion: 3.5 }, 'llama-3.3-70b': { prompt: 2.65, completion: 3.5 }, 'mistral-7b': { prompt: 0.15, completion: 0.2 }, 'mistral-small': { prompt: 0.15, completion: 0.2 }, @@ -52,15 +66,19 @@ const bedrockValues = { 'mistral-large-2407': { prompt: 3.0, completion: 9.0 }, 'command-text': { prompt: 1.5, completion: 2.0 }, 'command-light': { prompt: 0.3, completion: 0.6 }, - 'ai21.j2-mid-v1': { prompt: 12.5, completion: 12.5 }, - 'ai21.j2-ultra-v1': { prompt: 18.8, completion: 18.8 }, - 'ai21.jamba-instruct-v1:0': { prompt: 0.5, completion: 0.7 }, - 'amazon.titan-text-lite-v1': { prompt: 0.15, completion: 0.2 }, - 'amazon.titan-text-express-v1': { prompt: 0.2, completion: 0.6 }, - 'amazon.titan-text-premier-v1:0': { prompt: 0.5, completion: 1.5 }, - 'amazon.nova-micro-v1:0': { prompt: 0.035, completion: 0.14 }, - 'amazon.nova-lite-v1:0': { prompt: 0.06, completion: 0.24 }, - 'amazon.nova-pro-v1:0': { prompt: 0.8, completion: 3.2 }, + // AI21 models + 'j2-mid': { prompt: 12.5, completion: 12.5 }, + 'j2-ultra': { prompt: 18.8, completion: 18.8 }, + 'jamba-instruct': { prompt: 0.5, completion: 0.7 }, + // Amazon Titan models + 'titan-text-lite': { prompt: 0.15, completion: 0.2 }, + 'titan-text-express': { prompt: 0.2, completion: 0.6 }, + 'titan-text-premier': { prompt: 0.5, completion: 1.5 }, + // Amazon Nova models + 'nova-micro': { prompt: 0.035, completion: 0.14 }, + 'nova-lite': { prompt: 0.06, completion: 0.24 }, + 'nova-pro': { prompt: 0.8, completion: 3.2 }, + 'nova-premier': { prompt: 2.5, completion: 12.5 }, 'deepseek.r1': { prompt: 1.35, completion: 5.4 }, }; @@ -71,100 +89,136 @@ const bedrockValues = { */ const tokenValues = Object.assign( { + // Legacy token size mappings (generic patterns - check LAST) '8k': { prompt: 30, completion: 60 }, '32k': { prompt: 60, completion: 120 }, '4k': { prompt: 1.5, completion: 2 }, '16k': { prompt: 3, completion: 4 }, + // Generic fallback patterns (check LAST) + 'claude-': { prompt: 0.8, completion: 2.4 }, + deepseek: { prompt: 0.28, completion: 0.42 }, + command: { prompt: 0.38, completion: 0.38 }, + gemma: { prompt: 0.02, completion: 0.04 }, // Base pattern (using gemma-3n-e4b pricing) + gemini: { prompt: 0.5, completion: 1.5 }, + 'gpt-oss': { prompt: 0.05, completion: 0.2 }, + // Specific model variants (check FIRST - more specific patterns at end) 'gpt-3.5-turbo-1106': { prompt: 1, completion: 2 }, - 'o4-mini': { prompt: 1.1, completion: 4.4 }, - 'o3-mini': { prompt: 1.1, completion: 4.4 }, - o3: { prompt: 2, completion: 8 }, - 'o1-mini': { prompt: 1.1, completion: 4.4 }, - 'o1-preview': { prompt: 15, completion: 60 }, - o1: { prompt: 15, completion: 60 }, + 'gpt-3.5-turbo-0125': { prompt: 0.5, completion: 1.5 }, + 'gpt-4-1106': { prompt: 10, completion: 30 }, + 'gpt-4.1': { prompt: 2, completion: 8 }, 'gpt-4.1-nano': { prompt: 0.1, completion: 0.4 }, 'gpt-4.1-mini': { prompt: 0.4, completion: 1.6 }, - 'gpt-4.1': { prompt: 2, completion: 8 }, 'gpt-4.5': { prompt: 75, completion: 150 }, - 'gpt-4o-mini': { prompt: 0.15, completion: 0.6 }, - 'gpt-5': { prompt: 1.25, completion: 10 }, - 'gpt-5-mini': { prompt: 0.25, completion: 2 }, - 'gpt-5-nano': { prompt: 0.05, completion: 0.4 }, 'gpt-4o': { prompt: 2.5, completion: 10 }, 'gpt-4o-2024-05-13': { prompt: 5, completion: 15 }, - 'gpt-4-1106': { prompt: 10, completion: 30 }, - 'gpt-3.5-turbo-0125': { prompt: 0.5, completion: 1.5 }, - 'claude-3-opus': { prompt: 15, completion: 75 }, + 'gpt-4o-mini': { prompt: 0.15, completion: 0.6 }, + 'gpt-5': { prompt: 1.25, completion: 10 }, + 'gpt-5-nano': { prompt: 0.05, completion: 0.4 }, + 'gpt-5-mini': { prompt: 0.25, completion: 2 }, + 'gpt-5-pro': { prompt: 15, completion: 120 }, + o1: { prompt: 15, completion: 60 }, + 'o1-mini': { prompt: 1.1, completion: 4.4 }, + 'o1-preview': { prompt: 15, completion: 60 }, + o3: { prompt: 2, completion: 8 }, + 'o3-mini': { prompt: 1.1, completion: 4.4 }, + 'o4-mini': { prompt: 1.1, completion: 4.4 }, + 'claude-instant': { prompt: 0.8, completion: 2.4 }, + 'claude-2': { prompt: 8, completion: 24 }, + 'claude-2.1': { prompt: 8, completion: 24 }, + 'claude-3-haiku': { prompt: 0.25, completion: 1.25 }, 'claude-3-sonnet': { prompt: 3, completion: 15 }, + 'claude-3-opus': { prompt: 15, completion: 75 }, + 'claude-3-5-haiku': { prompt: 0.8, completion: 4 }, + 'claude-3.5-haiku': { prompt: 0.8, completion: 4 }, 'claude-3-5-sonnet': { prompt: 3, completion: 15 }, 'claude-3.5-sonnet': { prompt: 3, completion: 15 }, 'claude-3-7-sonnet': { prompt: 3, completion: 15 }, 'claude-3.7-sonnet': { prompt: 3, completion: 15 }, - 'claude-3-5-haiku': { prompt: 0.8, completion: 4 }, - 'claude-3.5-haiku': { prompt: 0.8, completion: 4 }, - 'claude-3-haiku': { prompt: 0.25, completion: 1.25 }, - 'claude-sonnet-4': { prompt: 3, completion: 15 }, + 'claude-haiku-4-5': { prompt: 1, completion: 5 }, 'claude-opus-4': { prompt: 15, completion: 75 }, - 'claude-2.1': { prompt: 8, completion: 24 }, - 'claude-2': { prompt: 8, completion: 24 }, - 'claude-instant': { prompt: 0.8, completion: 2.4 }, - 'claude-': { prompt: 0.8, completion: 2.4 }, - 'command-r-plus': { prompt: 3, completion: 15 }, + 'claude-sonnet-4': { prompt: 3, completion: 15 }, 'command-r': { prompt: 0.5, completion: 1.5 }, + 'command-r-plus': { prompt: 3, completion: 15 }, + 'command-text': { prompt: 1.5, completion: 2.0 }, 'deepseek-reasoner': { prompt: 0.28, completion: 0.42 }, - deepseek: { prompt: 0.28, completion: 0.42 }, - /* cohere doesn't have rates for the older command models, - so this was from https://artificialanalysis.ai/models/command-light/providers */ - command: { prompt: 0.38, completion: 0.38 }, - gemma: { prompt: 0, completion: 0 }, // https://ai.google.dev/pricing - 'gemma-2': { prompt: 0, completion: 0 }, // https://ai.google.dev/pricing - 'gemma-3': { prompt: 0, completion: 0 }, // https://ai.google.dev/pricing - 'gemma-3-27b': { prompt: 0, completion: 0 }, // https://ai.google.dev/pricing - 'gemini-2.0-flash-lite': { prompt: 0.075, completion: 0.3 }, + 'deepseek-r1': { prompt: 0.4, completion: 2.0 }, + 'deepseek-v3': { prompt: 0.2, completion: 0.8 }, + 'gemma-2': { prompt: 0.01, completion: 0.03 }, // Base pattern (using gemma-2-9b pricing) + 'gemma-3': { prompt: 0.02, completion: 0.04 }, // Base pattern (using gemma-3n-e4b pricing) + 'gemma-3-27b': { prompt: 0.09, completion: 0.16 }, + 'gemini-1.5': { prompt: 2.5, completion: 10 }, + 'gemini-1.5-flash': { prompt: 0.15, completion: 0.6 }, + 'gemini-1.5-flash-8b': { prompt: 0.075, completion: 0.3 }, + 'gemini-2.0': { prompt: 0.1, completion: 0.4 }, // Base pattern (using 2.0-flash pricing) 'gemini-2.0-flash': { prompt: 0.1, completion: 0.4 }, - 'gemini-2.0': { prompt: 0, completion: 0 }, // https://ai.google.dev/pricing - 'gemini-2.5-pro': { prompt: 1.25, completion: 10 }, + 'gemini-2.0-flash-lite': { prompt: 0.075, completion: 0.3 }, + 'gemini-2.5': { prompt: 0.3, completion: 2.5 }, // Base pattern (using 2.5-flash pricing) 'gemini-2.5-flash': { prompt: 0.3, completion: 2.5 }, 'gemini-2.5-flash-lite': { prompt: 0.1, completion: 0.4 }, - 'gemini-2.5': { prompt: 0, completion: 0 }, // Free for a period of time - 'gemini-1.5-flash-8b': { prompt: 0.075, completion: 0.3 }, - 'gemini-1.5-flash': { prompt: 0.15, completion: 0.6 }, - 'gemini-1.5': { prompt: 2.5, completion: 10 }, + 'gemini-2.5-pro': { prompt: 1.25, completion: 10 }, 'gemini-pro-vision': { prompt: 0.5, completion: 1.5 }, - gemini: { prompt: 0.5, completion: 1.5 }, - 'grok-2-vision-1212': { prompt: 2.0, completion: 10.0 }, - 'grok-2-vision-latest': { prompt: 2.0, completion: 10.0 }, - 'grok-2-vision': { prompt: 2.0, completion: 10.0 }, + grok: { prompt: 2.0, completion: 10.0 }, // Base pattern defaults to grok-2 + 'grok-beta': { prompt: 5.0, completion: 15.0 }, 'grok-vision-beta': { prompt: 5.0, completion: 15.0 }, + 'grok-2': { prompt: 2.0, completion: 10.0 }, 'grok-2-1212': { prompt: 2.0, completion: 10.0 }, 'grok-2-latest': { prompt: 2.0, completion: 10.0 }, - 'grok-2': { prompt: 2.0, completion: 10.0 }, - 'grok-3-mini-fast': { prompt: 0.6, completion: 4 }, - 'grok-3-mini': { prompt: 0.3, completion: 0.5 }, - 'grok-3-fast': { prompt: 5.0, completion: 25.0 }, + 'grok-2-vision': { prompt: 2.0, completion: 10.0 }, + 'grok-2-vision-1212': { prompt: 2.0, completion: 10.0 }, + 'grok-2-vision-latest': { prompt: 2.0, completion: 10.0 }, 'grok-3': { prompt: 3.0, completion: 15.0 }, + 'grok-3-fast': { prompt: 5.0, completion: 25.0 }, + 'grok-3-mini': { prompt: 0.3, completion: 0.5 }, + 'grok-3-mini-fast': { prompt: 0.6, completion: 4 }, 'grok-4': { prompt: 3.0, completion: 15.0 }, - 'grok-beta': { prompt: 5.0, completion: 15.0 }, - 'mistral-large': { prompt: 2.0, completion: 6.0 }, - 'pixtral-large': { prompt: 2.0, completion: 6.0 }, - 'mistral-saba': { prompt: 0.2, completion: 0.6 }, codestral: { prompt: 0.3, completion: 0.9 }, - 'ministral-8b': { prompt: 0.1, completion: 0.1 }, 'ministral-3b': { prompt: 0.04, completion: 0.04 }, - // GPT-OSS models - 'gpt-oss': { prompt: 0.05, completion: 0.2 }, + 'ministral-8b': { prompt: 0.1, completion: 0.1 }, + 'mistral-nemo': { prompt: 0.15, completion: 0.15 }, + 'mistral-saba': { prompt: 0.2, completion: 0.6 }, + 'pixtral-large': { prompt: 2.0, completion: 6.0 }, + 'mistral-large': { prompt: 2.0, completion: 6.0 }, + 'mixtral-8x22b': { prompt: 0.65, completion: 0.65 }, + kimi: { prompt: 0.14, completion: 2.49 }, // Base pattern (using kimi-k2 pricing) + // GPT-OSS models (specific sizes) 'gpt-oss:20b': { prompt: 0.05, completion: 0.2 }, 'gpt-oss-20b': { prompt: 0.05, completion: 0.2 }, 'gpt-oss:120b': { prompt: 0.15, completion: 0.6 }, 'gpt-oss-120b': { prompt: 0.15, completion: 0.6 }, - // GLM models (Zhipu AI) + // GLM models (Zhipu AI) - general to specific glm4: { prompt: 0.1, completion: 0.1 }, 'glm-4': { prompt: 0.1, completion: 0.1 }, 'glm-4-32b': { prompt: 0.1, completion: 0.1 }, 'glm-4.5': { prompt: 0.35, completion: 1.55 }, - 'glm-4.5v': { prompt: 0.6, completion: 1.8 }, 'glm-4.5-air': { prompt: 0.14, completion: 0.86 }, + 'glm-4.5v': { prompt: 0.6, completion: 1.8 }, 'glm-4.6': { prompt: 0.5, completion: 1.75 }, + // Qwen models + qwen: { prompt: 0.08, completion: 0.33 }, // Qwen base pattern (using qwen2.5-72b pricing) + 'qwen2.5': { prompt: 0.08, completion: 0.33 }, // Qwen 2.5 base pattern + 'qwen-turbo': { prompt: 0.05, completion: 0.2 }, + 'qwen-plus': { prompt: 0.4, completion: 1.2 }, + 'qwen-max': { prompt: 1.6, completion: 6.4 }, + 'qwq-32b': { prompt: 0.15, completion: 0.4 }, + // Qwen3 models + qwen3: { prompt: 0.035, completion: 0.138 }, // Qwen3 base pattern (using qwen3-4b pricing) + 'qwen3-8b': { prompt: 0.035, completion: 0.138 }, + 'qwen3-14b': { prompt: 0.05, completion: 0.22 }, + 'qwen3-30b-a3b': { prompt: 0.06, completion: 0.22 }, + 'qwen3-32b': { prompt: 0.05, completion: 0.2 }, + 'qwen3-235b-a22b': { prompt: 0.08, completion: 0.55 }, + // Qwen3 VL (Vision-Language) models + 'qwen3-vl-8b-thinking': { prompt: 0.18, completion: 2.1 }, + 'qwen3-vl-8b-instruct': { prompt: 0.18, completion: 0.69 }, + 'qwen3-vl-30b-a3b': { prompt: 0.29, completion: 1.0 }, + 'qwen3-vl-235b-a22b': { prompt: 0.3, completion: 1.2 }, + // Qwen3 specialized models + 'qwen3-max': { prompt: 1.2, completion: 6 }, + 'qwen3-coder': { prompt: 0.22, completion: 0.95 }, + 'qwen3-coder-30b-a3b': { prompt: 0.06, completion: 0.25 }, + 'qwen3-coder-plus': { prompt: 1, completion: 5 }, + 'qwen3-coder-flash': { prompt: 0.3, completion: 1.5 }, + 'qwen3-next-80b-a3b': { prompt: 0.1, completion: 0.8 }, }, bedrockValues, ); @@ -195,67 +249,39 @@ const cacheTokenValues = { * @returns {string|undefined} The key corresponding to the model name, or undefined if no match is found. */ const getValueKey = (model, endpoint) => { + if (!model || typeof model !== 'string') { + return undefined; + } + + // Use findMatchingPattern directly against tokenValues for efficient lookup + if (!endpoint || (typeof endpoint === 'string' && !tokenValues[endpoint])) { + const matchedKey = findMatchingPattern(model, tokenValues); + if (matchedKey) { + return matchedKey; + } + } + + // Fallback: use matchModelName for edge cases and legacy handling const modelName = matchModelName(model, endpoint); if (!modelName) { return undefined; } + // Legacy token size mappings and aliases for older models if (modelName.includes('gpt-3.5-turbo-16k')) { return '16k'; - } else if (modelName.includes('gpt-3.5-turbo-0125')) { - return 'gpt-3.5-turbo-0125'; - } else if (modelName.includes('gpt-3.5-turbo-1106')) { - return 'gpt-3.5-turbo-1106'; } else if (modelName.includes('gpt-3.5')) { return '4k'; - } else if (modelName.includes('o4-mini')) { - return 'o4-mini'; - } else if (modelName.includes('o4')) { - return 'o4'; - } else if (modelName.includes('o3-mini')) { - return 'o3-mini'; - } else if (modelName.includes('o3')) { - return 'o3'; - } else if (modelName.includes('o1-preview')) { - return 'o1-preview'; - } else if (modelName.includes('o1-mini')) { - return 'o1-mini'; - } else if (modelName.includes('o1')) { - return 'o1'; - } else if (modelName.includes('gpt-4.5')) { - return 'gpt-4.5'; - } else if (modelName.includes('gpt-4.1-nano')) { - return 'gpt-4.1-nano'; - } else if (modelName.includes('gpt-4.1-mini')) { - return 'gpt-4.1-mini'; - } else if (modelName.includes('gpt-4.1')) { - return 'gpt-4.1'; - } else if (modelName.includes('gpt-4o-2024-05-13')) { - return 'gpt-4o-2024-05-13'; - } else if (modelName.includes('gpt-5-nano')) { - return 'gpt-5-nano'; - } else if (modelName.includes('gpt-5-mini')) { - return 'gpt-5-mini'; - } else if (modelName.includes('gpt-5')) { - return 'gpt-5'; - } else if (modelName.includes('gpt-4o-mini')) { - return 'gpt-4o-mini'; - } else if (modelName.includes('gpt-4o')) { - return 'gpt-4o'; } else if (modelName.includes('gpt-4-vision')) { - return 'gpt-4-1106'; - } else if (modelName.includes('gpt-4-1106')) { - return 'gpt-4-1106'; + return 'gpt-4-1106'; // Alias for gpt-4-vision } else if (modelName.includes('gpt-4-0125')) { - return 'gpt-4-1106'; + return 'gpt-4-1106'; // Alias for gpt-4-0125 } else if (modelName.includes('gpt-4-turbo')) { - return 'gpt-4-1106'; + return 'gpt-4-1106'; // Alias for gpt-4-turbo } else if (modelName.includes('gpt-4-32k')) { return '32k'; } else if (modelName.includes('gpt-4')) { return '8k'; - } else if (tokenValues[modelName]) { - return modelName; } return undefined; diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js index 3cbce34295..670ea9d5ec 100644 --- a/api/models/tx.spec.js +++ b/api/models/tx.spec.js @@ -1,3 +1,4 @@ +const { maxTokensMap } = require('@librechat/api'); const { EModelEndpoint } = require('librechat-data-provider'); const { defaultRate, @@ -113,6 +114,14 @@ describe('getValueKey', () => { expect(getValueKey('gpt-5-nano-2025-01-30-0130')).toBe('gpt-5-nano'); }); + it('should return "gpt-5-pro" for model type of "gpt-5-pro"', () => { + expect(getValueKey('gpt-5-pro-2025-01-30')).toBe('gpt-5-pro'); + expect(getValueKey('openai/gpt-5-pro')).toBe('gpt-5-pro'); + expect(getValueKey('gpt-5-pro-0130')).toBe('gpt-5-pro'); + expect(getValueKey('gpt-5-pro-2025-01-30-0130')).toBe('gpt-5-pro'); + expect(getValueKey('gpt-5-pro-preview')).toBe('gpt-5-pro'); + }); + it('should return "gpt-4o" for model type of "gpt-4o"', () => { expect(getValueKey('gpt-4o-2024-08-06')).toBe('gpt-4o'); expect(getValueKey('gpt-4o-2024-08-06-0718')).toBe('gpt-4o'); @@ -288,6 +297,20 @@ describe('getMultiplier', () => { ); }); + it('should return the correct multiplier for gpt-5-pro', () => { + const valueKey = getValueKey('gpt-5-pro-2025-01-30'); + expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5-pro'].prompt); + expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe( + tokenValues['gpt-5-pro'].completion, + ); + expect(getMultiplier({ model: 'gpt-5-pro-preview', tokenType: 'prompt' })).toBe( + tokenValues['gpt-5-pro'].prompt, + ); + expect(getMultiplier({ model: 'openai/gpt-5-pro', tokenType: 'completion' })).toBe( + tokenValues['gpt-5-pro'].completion, + ); + }); + it('should return the correct multiplier for gpt-4o', () => { const valueKey = getValueKey('gpt-4o-2024-08-06'); expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4o'].prompt); @@ -471,6 +494,249 @@ describe('AWS Bedrock Model Tests', () => { }); }); +describe('Amazon Model Tests', () => { + describe('Amazon Nova Models', () => { + it('should return correct pricing for nova-premier', () => { + expect(getMultiplier({ model: 'nova-premier', tokenType: 'prompt' })).toBe( + tokenValues['nova-premier'].prompt, + ); + expect(getMultiplier({ model: 'nova-premier', tokenType: 'completion' })).toBe( + tokenValues['nova-premier'].completion, + ); + expect(getMultiplier({ model: 'amazon.nova-premier-v1:0', tokenType: 'prompt' })).toBe( + tokenValues['nova-premier'].prompt, + ); + expect(getMultiplier({ model: 'amazon.nova-premier-v1:0', tokenType: 'completion' })).toBe( + tokenValues['nova-premier'].completion, + ); + }); + + it('should return correct pricing for nova-pro', () => { + expect(getMultiplier({ model: 'nova-pro', tokenType: 'prompt' })).toBe( + tokenValues['nova-pro'].prompt, + ); + expect(getMultiplier({ model: 'nova-pro', tokenType: 'completion' })).toBe( + tokenValues['nova-pro'].completion, + ); + expect(getMultiplier({ model: 'amazon.nova-pro-v1:0', tokenType: 'prompt' })).toBe( + tokenValues['nova-pro'].prompt, + ); + expect(getMultiplier({ model: 'amazon.nova-pro-v1:0', tokenType: 'completion' })).toBe( + tokenValues['nova-pro'].completion, + ); + }); + + it('should return correct pricing for nova-lite', () => { + expect(getMultiplier({ model: 'nova-lite', tokenType: 'prompt' })).toBe( + tokenValues['nova-lite'].prompt, + ); + expect(getMultiplier({ model: 'nova-lite', tokenType: 'completion' })).toBe( + tokenValues['nova-lite'].completion, + ); + expect(getMultiplier({ model: 'amazon.nova-lite-v1:0', tokenType: 'prompt' })).toBe( + tokenValues['nova-lite'].prompt, + ); + expect(getMultiplier({ model: 'amazon.nova-lite-v1:0', tokenType: 'completion' })).toBe( + tokenValues['nova-lite'].completion, + ); + }); + + it('should return correct pricing for nova-micro', () => { + expect(getMultiplier({ model: 'nova-micro', tokenType: 'prompt' })).toBe( + tokenValues['nova-micro'].prompt, + ); + expect(getMultiplier({ model: 'nova-micro', tokenType: 'completion' })).toBe( + tokenValues['nova-micro'].completion, + ); + expect(getMultiplier({ model: 'amazon.nova-micro-v1:0', tokenType: 'prompt' })).toBe( + tokenValues['nova-micro'].prompt, + ); + expect(getMultiplier({ model: 'amazon.nova-micro-v1:0', tokenType: 'completion' })).toBe( + tokenValues['nova-micro'].completion, + ); + }); + + it('should match both short and full model names to the same pricing', () => { + const models = ['nova-micro', 'nova-lite', 'nova-pro', 'nova-premier']; + const fullModels = [ + 'amazon.nova-micro-v1:0', + 'amazon.nova-lite-v1:0', + 'amazon.nova-pro-v1:0', + 'amazon.nova-premier-v1:0', + ]; + + models.forEach((shortModel, i) => { + const fullModel = fullModels[i]; + const shortPrompt = getMultiplier({ model: shortModel, tokenType: 'prompt' }); + const fullPrompt = getMultiplier({ model: fullModel, tokenType: 'prompt' }); + const shortCompletion = getMultiplier({ model: shortModel, tokenType: 'completion' }); + const fullCompletion = getMultiplier({ model: fullModel, tokenType: 'completion' }); + + expect(shortPrompt).toBe(fullPrompt); + expect(shortCompletion).toBe(fullCompletion); + expect(shortPrompt).toBe(tokenValues[shortModel].prompt); + expect(shortCompletion).toBe(tokenValues[shortModel].completion); + }); + }); + }); + + describe('Amazon Titan Models', () => { + it('should return correct pricing for titan-text-premier', () => { + expect(getMultiplier({ model: 'titan-text-premier', tokenType: 'prompt' })).toBe( + tokenValues['titan-text-premier'].prompt, + ); + expect(getMultiplier({ model: 'titan-text-premier', tokenType: 'completion' })).toBe( + tokenValues['titan-text-premier'].completion, + ); + expect(getMultiplier({ model: 'amazon.titan-text-premier-v1:0', tokenType: 'prompt' })).toBe( + tokenValues['titan-text-premier'].prompt, + ); + expect( + getMultiplier({ model: 'amazon.titan-text-premier-v1:0', tokenType: 'completion' }), + ).toBe(tokenValues['titan-text-premier'].completion); + }); + + it('should return correct pricing for titan-text-express', () => { + expect(getMultiplier({ model: 'titan-text-express', tokenType: 'prompt' })).toBe( + tokenValues['titan-text-express'].prompt, + ); + expect(getMultiplier({ model: 'titan-text-express', tokenType: 'completion' })).toBe( + tokenValues['titan-text-express'].completion, + ); + expect(getMultiplier({ model: 'amazon.titan-text-express-v1', tokenType: 'prompt' })).toBe( + tokenValues['titan-text-express'].prompt, + ); + expect( + getMultiplier({ model: 'amazon.titan-text-express-v1', tokenType: 'completion' }), + ).toBe(tokenValues['titan-text-express'].completion); + }); + + it('should return correct pricing for titan-text-lite', () => { + expect(getMultiplier({ model: 'titan-text-lite', tokenType: 'prompt' })).toBe( + tokenValues['titan-text-lite'].prompt, + ); + expect(getMultiplier({ model: 'titan-text-lite', tokenType: 'completion' })).toBe( + tokenValues['titan-text-lite'].completion, + ); + expect(getMultiplier({ model: 'amazon.titan-text-lite-v1', tokenType: 'prompt' })).toBe( + tokenValues['titan-text-lite'].prompt, + ); + expect(getMultiplier({ model: 'amazon.titan-text-lite-v1', tokenType: 'completion' })).toBe( + tokenValues['titan-text-lite'].completion, + ); + }); + + it('should match both short and full model names to the same pricing', () => { + const models = ['titan-text-lite', 'titan-text-express', 'titan-text-premier']; + const fullModels = [ + 'amazon.titan-text-lite-v1', + 'amazon.titan-text-express-v1', + 'amazon.titan-text-premier-v1:0', + ]; + + models.forEach((shortModel, i) => { + const fullModel = fullModels[i]; + const shortPrompt = getMultiplier({ model: shortModel, tokenType: 'prompt' }); + const fullPrompt = getMultiplier({ model: fullModel, tokenType: 'prompt' }); + const shortCompletion = getMultiplier({ model: shortModel, tokenType: 'completion' }); + const fullCompletion = getMultiplier({ model: fullModel, tokenType: 'completion' }); + + expect(shortPrompt).toBe(fullPrompt); + expect(shortCompletion).toBe(fullCompletion); + expect(shortPrompt).toBe(tokenValues[shortModel].prompt); + expect(shortCompletion).toBe(tokenValues[shortModel].completion); + }); + }); + }); +}); + +describe('AI21 Model Tests', () => { + describe('AI21 J2 Models', () => { + it('should return correct pricing for j2-mid', () => { + expect(getMultiplier({ model: 'j2-mid', tokenType: 'prompt' })).toBe( + tokenValues['j2-mid'].prompt, + ); + expect(getMultiplier({ model: 'j2-mid', tokenType: 'completion' })).toBe( + tokenValues['j2-mid'].completion, + ); + expect(getMultiplier({ model: 'ai21.j2-mid-v1', tokenType: 'prompt' })).toBe( + tokenValues['j2-mid'].prompt, + ); + expect(getMultiplier({ model: 'ai21.j2-mid-v1', tokenType: 'completion' })).toBe( + tokenValues['j2-mid'].completion, + ); + }); + + it('should return correct pricing for j2-ultra', () => { + expect(getMultiplier({ model: 'j2-ultra', tokenType: 'prompt' })).toBe( + tokenValues['j2-ultra'].prompt, + ); + expect(getMultiplier({ model: 'j2-ultra', tokenType: 'completion' })).toBe( + tokenValues['j2-ultra'].completion, + ); + expect(getMultiplier({ model: 'ai21.j2-ultra-v1', tokenType: 'prompt' })).toBe( + tokenValues['j2-ultra'].prompt, + ); + expect(getMultiplier({ model: 'ai21.j2-ultra-v1', tokenType: 'completion' })).toBe( + tokenValues['j2-ultra'].completion, + ); + }); + + it('should match both short and full model names to the same pricing', () => { + const models = ['j2-mid', 'j2-ultra']; + const fullModels = ['ai21.j2-mid-v1', 'ai21.j2-ultra-v1']; + + models.forEach((shortModel, i) => { + const fullModel = fullModels[i]; + const shortPrompt = getMultiplier({ model: shortModel, tokenType: 'prompt' }); + const fullPrompt = getMultiplier({ model: fullModel, tokenType: 'prompt' }); + const shortCompletion = getMultiplier({ model: shortModel, tokenType: 'completion' }); + const fullCompletion = getMultiplier({ model: fullModel, tokenType: 'completion' }); + + expect(shortPrompt).toBe(fullPrompt); + expect(shortCompletion).toBe(fullCompletion); + expect(shortPrompt).toBe(tokenValues[shortModel].prompt); + expect(shortCompletion).toBe(tokenValues[shortModel].completion); + }); + }); + }); + + describe('AI21 Jamba Models', () => { + it('should return correct pricing for jamba-instruct', () => { + expect(getMultiplier({ model: 'jamba-instruct', tokenType: 'prompt' })).toBe( + tokenValues['jamba-instruct'].prompt, + ); + expect(getMultiplier({ model: 'jamba-instruct', tokenType: 'completion' })).toBe( + tokenValues['jamba-instruct'].completion, + ); + expect(getMultiplier({ model: 'ai21.jamba-instruct-v1:0', tokenType: 'prompt' })).toBe( + tokenValues['jamba-instruct'].prompt, + ); + expect(getMultiplier({ model: 'ai21.jamba-instruct-v1:0', tokenType: 'completion' })).toBe( + tokenValues['jamba-instruct'].completion, + ); + }); + + it('should match both short and full model names to the same pricing', () => { + const shortPrompt = getMultiplier({ model: 'jamba-instruct', tokenType: 'prompt' }); + const fullPrompt = getMultiplier({ + model: 'ai21.jamba-instruct-v1:0', + tokenType: 'prompt', + }); + const shortCompletion = getMultiplier({ model: 'jamba-instruct', tokenType: 'completion' }); + const fullCompletion = getMultiplier({ + model: 'ai21.jamba-instruct-v1:0', + tokenType: 'completion', + }); + + expect(shortPrompt).toBe(fullPrompt); + expect(shortCompletion).toBe(fullCompletion); + expect(shortPrompt).toBe(tokenValues['jamba-instruct'].prompt); + expect(shortCompletion).toBe(tokenValues['jamba-instruct'].completion); + }); + }); +}); + describe('Deepseek Model Tests', () => { const deepseekModels = ['deepseek-chat', 'deepseek-coder', 'deepseek-reasoner', 'deepseek.r1']; @@ -502,6 +768,187 @@ describe('Deepseek Model Tests', () => { }); }); +describe('Qwen3 Model Tests', () => { + describe('Qwen3 Base Models', () => { + it('should return correct pricing for qwen3 base pattern', () => { + expect(getMultiplier({ model: 'qwen3', tokenType: 'prompt' })).toBe( + tokenValues['qwen3'].prompt, + ); + expect(getMultiplier({ model: 'qwen3', tokenType: 'completion' })).toBe( + tokenValues['qwen3'].completion, + ); + }); + + it('should return correct pricing for qwen3-4b (falls back to qwen3)', () => { + expect(getMultiplier({ model: 'qwen3-4b', tokenType: 'prompt' })).toBe( + tokenValues['qwen3'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-4b', tokenType: 'completion' })).toBe( + tokenValues['qwen3'].completion, + ); + }); + + it('should return correct pricing for qwen3-8b', () => { + expect(getMultiplier({ model: 'qwen3-8b', tokenType: 'prompt' })).toBe( + tokenValues['qwen3-8b'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-8b', tokenType: 'completion' })).toBe( + tokenValues['qwen3-8b'].completion, + ); + }); + + it('should return correct pricing for qwen3-14b', () => { + expect(getMultiplier({ model: 'qwen3-14b', tokenType: 'prompt' })).toBe( + tokenValues['qwen3-14b'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-14b', tokenType: 'completion' })).toBe( + tokenValues['qwen3-14b'].completion, + ); + }); + + it('should return correct pricing for qwen3-235b-a22b', () => { + expect(getMultiplier({ model: 'qwen3-235b-a22b', tokenType: 'prompt' })).toBe( + tokenValues['qwen3-235b-a22b'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-235b-a22b', tokenType: 'completion' })).toBe( + tokenValues['qwen3-235b-a22b'].completion, + ); + }); + + it('should handle model name variations with provider prefixes', () => { + const models = [ + { input: 'qwen3', expected: 'qwen3' }, + { input: 'qwen3-4b', expected: 'qwen3' }, + { input: 'qwen3-8b', expected: 'qwen3-8b' }, + { input: 'qwen3-32b', expected: 'qwen3-32b' }, + ]; + models.forEach(({ input, expected }) => { + const withPrefix = `alibaba/${input}`; + expect(getMultiplier({ model: withPrefix, tokenType: 'prompt' })).toBe( + tokenValues[expected].prompt, + ); + expect(getMultiplier({ model: withPrefix, tokenType: 'completion' })).toBe( + tokenValues[expected].completion, + ); + }); + }); + }); + + describe('Qwen3 VL (Vision-Language) Models', () => { + it('should return correct pricing for qwen3-vl-8b-thinking', () => { + expect(getMultiplier({ model: 'qwen3-vl-8b-thinking', tokenType: 'prompt' })).toBe( + tokenValues['qwen3-vl-8b-thinking'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-vl-8b-thinking', tokenType: 'completion' })).toBe( + tokenValues['qwen3-vl-8b-thinking'].completion, + ); + }); + + it('should return correct pricing for qwen3-vl-8b-instruct', () => { + expect(getMultiplier({ model: 'qwen3-vl-8b-instruct', tokenType: 'prompt' })).toBe( + tokenValues['qwen3-vl-8b-instruct'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-vl-8b-instruct', tokenType: 'completion' })).toBe( + tokenValues['qwen3-vl-8b-instruct'].completion, + ); + }); + + it('should return correct pricing for qwen3-vl-30b-a3b', () => { + expect(getMultiplier({ model: 'qwen3-vl-30b-a3b', tokenType: 'prompt' })).toBe( + tokenValues['qwen3-vl-30b-a3b'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-vl-30b-a3b', tokenType: 'completion' })).toBe( + tokenValues['qwen3-vl-30b-a3b'].completion, + ); + }); + + it('should return correct pricing for qwen3-vl-235b-a22b', () => { + expect(getMultiplier({ model: 'qwen3-vl-235b-a22b', tokenType: 'prompt' })).toBe( + tokenValues['qwen3-vl-235b-a22b'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-vl-235b-a22b', tokenType: 'completion' })).toBe( + tokenValues['qwen3-vl-235b-a22b'].completion, + ); + }); + }); + + describe('Qwen3 Specialized Models', () => { + it('should return correct pricing for qwen3-max', () => { + expect(getMultiplier({ model: 'qwen3-max', tokenType: 'prompt' })).toBe( + tokenValues['qwen3-max'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-max', tokenType: 'completion' })).toBe( + tokenValues['qwen3-max'].completion, + ); + }); + + it('should return correct pricing for qwen3-coder', () => { + expect(getMultiplier({ model: 'qwen3-coder', tokenType: 'prompt' })).toBe( + tokenValues['qwen3-coder'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-coder', tokenType: 'completion' })).toBe( + tokenValues['qwen3-coder'].completion, + ); + }); + + it('should return correct pricing for qwen3-coder-plus', () => { + expect(getMultiplier({ model: 'qwen3-coder-plus', tokenType: 'prompt' })).toBe( + tokenValues['qwen3-coder-plus'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-coder-plus', tokenType: 'completion' })).toBe( + tokenValues['qwen3-coder-plus'].completion, + ); + }); + + it('should return correct pricing for qwen3-coder-flash', () => { + expect(getMultiplier({ model: 'qwen3-coder-flash', tokenType: 'prompt' })).toBe( + tokenValues['qwen3-coder-flash'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-coder-flash', tokenType: 'completion' })).toBe( + tokenValues['qwen3-coder-flash'].completion, + ); + }); + + it('should return correct pricing for qwen3-next-80b-a3b', () => { + expect(getMultiplier({ model: 'qwen3-next-80b-a3b', tokenType: 'prompt' })).toBe( + tokenValues['qwen3-next-80b-a3b'].prompt, + ); + expect(getMultiplier({ model: 'qwen3-next-80b-a3b', tokenType: 'completion' })).toBe( + tokenValues['qwen3-next-80b-a3b'].completion, + ); + }); + }); + + describe('Qwen3 Model Variations', () => { + it('should handle all qwen3 models with provider prefixes', () => { + const models = ['qwen3', 'qwen3-8b', 'qwen3-max', 'qwen3-coder', 'qwen3-vl-8b-instruct']; + const prefixes = ['alibaba', 'qwen', 'openrouter']; + + models.forEach((model) => { + prefixes.forEach((prefix) => { + const fullModel = `${prefix}/${model}`; + expect(getMultiplier({ model: fullModel, tokenType: 'prompt' })).toBe( + tokenValues[model].prompt, + ); + expect(getMultiplier({ model: fullModel, tokenType: 'completion' })).toBe( + tokenValues[model].completion, + ); + }); + }); + }); + + it('should handle qwen3-4b falling back to qwen3 base pattern', () => { + const testCases = ['qwen3-4b', 'alibaba/qwen3-4b', 'qwen/qwen3-4b-preview']; + testCases.forEach((model) => { + expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(tokenValues['qwen3'].prompt); + expect(getMultiplier({ model, tokenType: 'completion' })).toBe( + tokenValues['qwen3'].completion, + ); + }); + }); + }); +}); + describe('getCacheMultiplier', () => { it('should return the correct cache multiplier for a given valueKey and cacheType', () => { expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'write' })).toBe( @@ -914,6 +1361,37 @@ describe('Claude Model Tests', () => { ); }); + it('should return correct prompt and completion rates for Claude Haiku 4.5', () => { + expect(getMultiplier({ model: 'claude-haiku-4-5', tokenType: 'prompt' })).toBe( + tokenValues['claude-haiku-4-5'].prompt, + ); + expect(getMultiplier({ model: 'claude-haiku-4-5', tokenType: 'completion' })).toBe( + tokenValues['claude-haiku-4-5'].completion, + ); + }); + + it('should handle Claude Haiku 4.5 model name variations', () => { + const modelVariations = [ + 'claude-haiku-4-5', + 'claude-haiku-4-5-20250420', + 'claude-haiku-4-5-latest', + 'anthropic/claude-haiku-4-5', + 'claude-haiku-4-5/anthropic', + 'claude-haiku-4-5-preview', + ]; + + modelVariations.forEach((model) => { + const valueKey = getValueKey(model); + expect(valueKey).toBe('claude-haiku-4-5'); + expect(getMultiplier({ model, tokenType: 'prompt' })).toBe( + tokenValues['claude-haiku-4-5'].prompt, + ); + expect(getMultiplier({ model, tokenType: 'completion' })).toBe( + tokenValues['claude-haiku-4-5'].completion, + ); + }); + }); + it('should handle Claude 4 model name variations with different prefixes and suffixes', () => { const modelVariations = [ 'claude-sonnet-4', @@ -991,3 +1469,119 @@ describe('Claude Model Tests', () => { }); }); }); + +describe('tokens.ts and tx.js sync validation', () => { + it('should resolve all models in maxTokensMap to pricing via getValueKey', () => { + const tokensKeys = Object.keys(maxTokensMap[EModelEndpoint.openAI]); + const txKeys = Object.keys(tokenValues); + + const unresolved = []; + + tokensKeys.forEach((key) => { + // Skip legacy token size mappings (e.g., '4k', '8k', '16k', '32k') + if (/^\d+k$/.test(key)) return; + + // Skip generic pattern keys (end with '-' or ':') + if (key.endsWith('-') || key.endsWith(':')) return; + + // Try to resolve via getValueKey + const resolvedKey = getValueKey(key); + + // If it resolves and the resolved key has pricing, success + if (resolvedKey && txKeys.includes(resolvedKey)) return; + + // If it resolves to a legacy key (4k, 8k, etc), also OK + if (resolvedKey && /^\d+k$/.test(resolvedKey)) return; + + // If we get here, this model can't get pricing - flag it + unresolved.push({ + key, + resolvedKey: resolvedKey || 'undefined', + context: maxTokensMap[EModelEndpoint.openAI][key], + }); + }); + + if (unresolved.length > 0) { + console.log('\nModels that cannot resolve to pricing via getValueKey:'); + unresolved.forEach(({ key, resolvedKey, context }) => { + console.log(` - '${key}' → '${resolvedKey}' (context: ${context})`); + }); + } + + expect(unresolved).toEqual([]); + }); + + it('should not have redundant dated variants with same pricing and context as base model', () => { + const txKeys = Object.keys(tokenValues); + const redundant = []; + + txKeys.forEach((key) => { + // Check if this is a dated variant (ends with -YYYY-MM-DD) + if (key.match(/.*-\d{4}-\d{2}-\d{2}$/)) { + const baseKey = key.replace(/-\d{4}-\d{2}-\d{2}$/, ''); + + if (txKeys.includes(baseKey)) { + const variantPricing = tokenValues[key]; + const basePricing = tokenValues[baseKey]; + const variantContext = maxTokensMap[EModelEndpoint.openAI][key]; + const baseContext = maxTokensMap[EModelEndpoint.openAI][baseKey]; + + const samePricing = + variantPricing.prompt === basePricing.prompt && + variantPricing.completion === basePricing.completion; + const sameContext = variantContext === baseContext; + + if (samePricing && sameContext) { + redundant.push({ + key, + baseKey, + pricing: `${variantPricing.prompt}/${variantPricing.completion}`, + context: variantContext, + }); + } + } + } + }); + + if (redundant.length > 0) { + console.log('\nRedundant dated variants found (same pricing and context as base):'); + redundant.forEach(({ key, baseKey, pricing, context }) => { + console.log(` - '${key}' → '${baseKey}' (pricing: ${pricing}, context: ${context})`); + console.log(` Can be removed - pattern matching will handle it`); + }); + } + + expect(redundant).toEqual([]); + }); + + it('should have context windows in tokens.ts for all models with pricing in tx.js (openAI catch-all)', () => { + const txKeys = Object.keys(tokenValues); + const missingContext = []; + + txKeys.forEach((key) => { + // Skip legacy token size mappings (4k, 8k, 16k, 32k) + if (/^\d+k$/.test(key)) return; + + // Check if this model has a context window defined + const context = maxTokensMap[EModelEndpoint.openAI][key]; + + if (!context) { + const pricing = tokenValues[key]; + missingContext.push({ + key, + pricing: `${pricing.prompt}/${pricing.completion}`, + }); + } + }); + + if (missingContext.length > 0) { + console.log('\nModels with pricing but missing context in tokens.ts:'); + missingContext.forEach(({ key, pricing }) => { + console.log(` - '${key}' (pricing: ${pricing})`); + console.log(` Add to tokens.ts openAIModels/bedrockModels/etc.`); + }); + } + + expect(missingContext).toEqual([]); + }); +}); diff --git a/api/server/services/Endpoints/agents/agent.js b/api/server/services/Endpoints/agents/agent.js index 1966834ed4..ec9d56d026 100644 --- a/api/server/services/Endpoints/agents/agent.js +++ b/api/server/services/Endpoints/agents/agent.js @@ -143,7 +143,7 @@ const initializeAgent = async ({ const agentMaxContextTokens = optionalChainWithEmptyCheck( maxContextTokens, getModelMaxTokens(tokensModel, providerEndpointMap[provider], options.endpointTokenConfig), - 4096, + 18000, ); if ( diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index 162827767f..12daf64e47 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -186,6 +186,19 @@ describe('getModelMaxTokens', () => { ); }); + test('should return correct tokens for gpt-5-pro matches', () => { + expect(getModelMaxTokens('gpt-5-pro')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro']); + expect(getModelMaxTokens('gpt-5-pro-preview')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'], + ); + expect(getModelMaxTokens('openai/gpt-5-pro')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'], + ); + expect(getModelMaxTokens('gpt-5-pro-2025-01-30')).toBe( + maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'], + ); + }); + test('should return correct tokens for Anthropic models', () => { const models = [ 'claude-2.1', @@ -469,7 +482,7 @@ describe('getModelMaxTokens', () => { test('should return correct max output tokens for GPT-5 models', () => { const { getModelMaxOutputTokens } = require('@librechat/api'); - ['gpt-5', 'gpt-5-mini', 'gpt-5-nano'].forEach((model) => { + ['gpt-5', 'gpt-5-mini', 'gpt-5-nano', 'gpt-5-pro'].forEach((model) => { expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]); expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe( maxOutputTokensMap[EModelEndpoint.openAI][model], @@ -582,6 +595,13 @@ describe('matchModelName', () => { expect(matchModelName('gpt-5-nano-2025-01-30')).toBe('gpt-5-nano'); }); + it('should return the closest matching key for gpt-5-pro matches', () => { + expect(matchModelName('openai/gpt-5-pro')).toBe('gpt-5-pro'); + expect(matchModelName('gpt-5-pro-preview')).toBe('gpt-5-pro'); + expect(matchModelName('gpt-5-pro-2025-01-30')).toBe('gpt-5-pro'); + expect(matchModelName('gpt-5-pro-2025-01-30-0130')).toBe('gpt-5-pro'); + }); + // Tests for Google models it('should return the exact model name if it exists in maxTokensMap - Google models', () => { expect(matchModelName('text-bison-32k', EModelEndpoint.google)).toBe('text-bison-32k'); @@ -832,6 +852,49 @@ describe('Claude Model Tests', () => { ); }); + it('should return correct context length for Claude Haiku 4.5', () => { + expect(getModelMaxTokens('claude-haiku-4-5', EModelEndpoint.anthropic)).toBe( + maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'], + ); + expect(getModelMaxTokens('claude-haiku-4-5')).toBe( + maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'], + ); + }); + + it('should handle Claude Haiku 4.5 model name variations', () => { + const modelVariations = [ + 'claude-haiku-4-5', + 'claude-haiku-4-5-20250420', + 'claude-haiku-4-5-latest', + 'anthropic/claude-haiku-4-5', + 'claude-haiku-4-5/anthropic', + 'claude-haiku-4-5-preview', + ]; + + modelVariations.forEach((model) => { + const modelKey = findMatchingPattern(model, maxTokensMap[EModelEndpoint.anthropic]); + expect(modelKey).toBe('claude-haiku-4-5'); + expect(getModelMaxTokens(model, EModelEndpoint.anthropic)).toBe( + maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'], + ); + }); + }); + + it('should match model names correctly for Claude Haiku 4.5', () => { + const modelVariations = [ + 'claude-haiku-4-5', + 'claude-haiku-4-5-20250420', + 'claude-haiku-4-5-latest', + 'anthropic/claude-haiku-4-5', + 'claude-haiku-4-5/anthropic', + 'claude-haiku-4-5-preview', + ]; + + modelVariations.forEach((model) => { + expect(matchModelName(model, EModelEndpoint.anthropic)).toBe('claude-haiku-4-5'); + }); + }); + it('should handle Claude 4 model name variations with different prefixes and suffixes', () => { const modelVariations = [ 'claude-sonnet-4', @@ -924,6 +987,121 @@ describe('Kimi Model Tests', () => { }); }); +describe('Qwen3 Model Tests', () => { + describe('getModelMaxTokens', () => { + test('should return correct tokens for Qwen3 base pattern', () => { + expect(getModelMaxTokens('qwen3')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']); + }); + + test('should return correct tokens for qwen3-4b (falls back to qwen3)', () => { + expect(getModelMaxTokens('qwen3-4b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']); + }); + + test('should return correct tokens for Qwen3 base models', () => { + expect(getModelMaxTokens('qwen3-8b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-8b']); + expect(getModelMaxTokens('qwen3-14b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-14b']); + expect(getModelMaxTokens('qwen3-32b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-32b']); + expect(getModelMaxTokens('qwen3-235b-a22b')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-235b-a22b'], + ); + }); + + test('should return correct tokens for Qwen3 VL (Vision-Language) models', () => { + expect(getModelMaxTokens('qwen3-vl-8b-thinking')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-thinking'], + ); + expect(getModelMaxTokens('qwen3-vl-8b-instruct')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-instruct'], + ); + expect(getModelMaxTokens('qwen3-vl-30b-a3b')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-30b-a3b'], + ); + expect(getModelMaxTokens('qwen3-vl-235b-a22b')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-235b-a22b'], + ); + }); + + test('should return correct tokens for Qwen3 specialized models', () => { + expect(getModelMaxTokens('qwen3-max')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-max']); + expect(getModelMaxTokens('qwen3-coder')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-coder'], + ); + expect(getModelMaxTokens('qwen3-coder-30b-a3b')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-30b-a3b'], + ); + expect(getModelMaxTokens('qwen3-coder-plus')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-plus'], + ); + expect(getModelMaxTokens('qwen3-coder-flash')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-flash'], + ); + expect(getModelMaxTokens('qwen3-next-80b-a3b')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-next-80b-a3b'], + ); + }); + + test('should handle Qwen3 models with provider prefixes', () => { + expect(getModelMaxTokens('alibaba/qwen3')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']); + expect(getModelMaxTokens('alibaba/qwen3-4b')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3'], + ); + expect(getModelMaxTokens('qwen/qwen3-8b')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-8b'], + ); + expect(getModelMaxTokens('openrouter/qwen3-max')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-max'], + ); + expect(getModelMaxTokens('alibaba/qwen3-vl-8b-instruct')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-instruct'], + ); + expect(getModelMaxTokens('qwen/qwen3-coder')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-coder'], + ); + }); + + test('should handle Qwen3 models with suffixes', () => { + expect(getModelMaxTokens('qwen3-preview')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']); + expect(getModelMaxTokens('qwen3-4b-preview')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3'], + ); + expect(getModelMaxTokens('qwen3-8b-latest')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-8b'], + ); + expect(getModelMaxTokens('qwen3-max-2024')).toBe( + maxTokensMap[EModelEndpoint.openAI]['qwen3-max'], + ); + }); + }); + + describe('matchModelName', () => { + test('should match exact Qwen3 model names', () => { + expect(matchModelName('qwen3')).toBe('qwen3'); + expect(matchModelName('qwen3-4b')).toBe('qwen3'); + expect(matchModelName('qwen3-8b')).toBe('qwen3-8b'); + expect(matchModelName('qwen3-vl-8b-thinking')).toBe('qwen3-vl-8b-thinking'); + expect(matchModelName('qwen3-max')).toBe('qwen3-max'); + expect(matchModelName('qwen3-coder')).toBe('qwen3-coder'); + }); + + test('should match Qwen3 model variations with provider prefixes', () => { + expect(matchModelName('alibaba/qwen3')).toBe('qwen3'); + expect(matchModelName('alibaba/qwen3-4b')).toBe('qwen3'); + expect(matchModelName('qwen/qwen3-8b')).toBe('qwen3-8b'); + expect(matchModelName('openrouter/qwen3-max')).toBe('qwen3-max'); + expect(matchModelName('alibaba/qwen3-vl-8b-instruct')).toBe('qwen3-vl-8b-instruct'); + expect(matchModelName('qwen/qwen3-coder')).toBe('qwen3-coder'); + }); + + test('should match Qwen3 model variations with suffixes', () => { + expect(matchModelName('qwen3-preview')).toBe('qwen3'); + expect(matchModelName('qwen3-4b-preview')).toBe('qwen3'); + expect(matchModelName('qwen3-8b-latest')).toBe('qwen3-8b'); + expect(matchModelName('qwen3-max-2024')).toBe('qwen3-max'); + expect(matchModelName('qwen3-coder-v1')).toBe('qwen3-coder'); + }); + }); +}); + describe('GLM Model Tests (Zhipu AI)', () => { describe('getModelMaxTokens', () => { test('should return correct tokens for GLM models', () => { diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index d527836642..32921ca851 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -40,10 +40,10 @@ const openAIModels = { 'gpt-5': 400000, 'gpt-5-mini': 400000, 'gpt-5-nano': 400000, + 'gpt-5-pro': 400000, 'gpt-4o': 127500, // -500 from max 'gpt-4o-mini': 127500, // -500 from max 'gpt-4o-2024-05-13': 127500, // -500 from max - 'gpt-4o-2024-08-06': 127500, // -500 from max 'gpt-4-turbo': 127500, // -500 from max 'gpt-4-vision': 127500, // -500 from max 'gpt-3.5-turbo': 16375, // -10 from max @@ -60,9 +60,11 @@ const mistralModels = { 'mistral-7b': 31990, // -10 from max 'mistral-small': 31990, // -10 from max 'mixtral-8x7b': 31990, // -10 from max + 'mixtral-8x22b': 65536, 'mistral-large': 131000, 'mistral-large-2402': 127500, 'mistral-large-2407': 127500, + 'mistral-nemo': 131000, 'pixtral-large': 131000, 'mistral-saba': 32000, codestral: 256000, @@ -75,6 +77,7 @@ const cohereModels = { 'command-light-nightly': 8182, // -10 from max command: 4086, // -10 from max 'command-nightly': 8182, // -10 from max + 'command-text': 4086, // -10 from max 'command-r': 127500, // -500 from max 'command-r-plus': 127500, // -500 from max }; @@ -127,14 +130,17 @@ const anthropicModels = { 'claude-3.7-sonnet': 200000, 'claude-3-5-sonnet-latest': 200000, 'claude-3.5-sonnet-latest': 200000, + 'claude-haiku-4-5': 200000, 'claude-sonnet-4': 1000000, 'claude-opus-4': 200000, 'claude-4': 200000, }; const deepseekModels = { - 'deepseek-reasoner': 128000, deepseek: 128000, + 'deepseek-reasoner': 128000, + 'deepseek-r1': 128000, + 'deepseek-v3': 128000, 'deepseek.r1': 128000, }; @@ -200,32 +206,57 @@ const metaModels = { 'llama2:70b': 4000, }; -const ollamaModels = { +const qwenModels = { + qwen: 32000, 'qwen2.5': 32000, + 'qwen-turbo': 1000000, + 'qwen-plus': 131000, + 'qwen-max': 32000, + 'qwq-32b': 32000, + // Qwen3 models + qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context) + 'qwen3-8b': 128000, + 'qwen3-14b': 40960, + 'qwen3-30b-a3b': 40960, + 'qwen3-32b': 40960, + 'qwen3-235b-a22b': 40960, + // Qwen3 VL (Vision-Language) models + 'qwen3-vl-8b-thinking': 256000, + 'qwen3-vl-8b-instruct': 262144, + 'qwen3-vl-30b-a3b': 262144, + 'qwen3-vl-235b-a22b': 131072, + // Qwen3 specialized models + 'qwen3-max': 256000, + 'qwen3-coder': 262144, + 'qwen3-coder-30b-a3b': 262144, + 'qwen3-coder-plus': 128000, + 'qwen3-coder-flash': 128000, + 'qwen3-next-80b-a3b': 262144, }; const ai21Models = { - 'ai21.j2-mid-v1': 8182, // -10 from max - 'ai21.j2-ultra-v1': 8182, // -10 from max - 'ai21.jamba-instruct-v1:0': 255500, // -500 from max + 'j2-mid': 8182, // -10 from max + 'j2-ultra': 8182, // -10 from max + 'jamba-instruct': 255500, // -500 from max }; const amazonModels = { - 'amazon.titan-text-lite-v1': 4000, - 'amazon.titan-text-express-v1': 8000, - 'amazon.titan-text-premier-v1:0': 31500, // -500 from max + // Amazon Titan models + 'titan-text-lite': 4000, + 'titan-text-express': 8000, + 'titan-text-premier': 31500, // -500 from max + // Amazon Nova models // https://aws.amazon.com/ai/generative-ai/nova/ - 'amazon.nova-micro-v1:0': 127000, // -1000 from max, - 'amazon.nova-lite-v1:0': 295000, // -5000 from max, - 'amazon.nova-pro-v1:0': 295000, // -5000 from max, - 'amazon.nova-premier-v1:0': 995000, // -5000 from max, + 'nova-micro': 127000, // -1000 from max + 'nova-lite': 295000, // -5000 from max + 'nova-pro': 295000, // -5000 from max + 'nova-premier': 995000, // -5000 from max }; const bedrockModels = { ...anthropicModels, ...mistralModels, ...cohereModels, - ...ollamaModels, ...deepseekModels, ...metaModels, ...ai21Models, @@ -254,6 +285,7 @@ const aggregateModels = { ...googleModels, ...bedrockModels, ...xAIModels, + ...qwenModels, // misc. kimi: 131000, // GPT-OSS @@ -289,6 +321,7 @@ export const modelMaxOutputs = { 'gpt-5': 128000, 'gpt-5-mini': 128000, 'gpt-5-nano': 128000, + 'gpt-5-pro': 128000, 'gpt-oss-20b': 131000, 'gpt-oss-120b': 131000, system_default: 32000, @@ -299,6 +332,7 @@ const anthropicMaxOutputs = { 'claude-3-haiku': 4096, 'claude-3-sonnet': 4096, 'claude-3-opus': 4096, + 'claude-haiku-4-5': 64000, 'claude-opus-4': 32000, 'claude-sonnet-4': 64000, 'claude-3.5-sonnet': 8192,