mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173)
Some checks failed
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Has been cancelled
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Has been cancelled
Some checks failed
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Has been cancelled
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Has been cancelled
* updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
This commit is contained in:
parent
589f119310
commit
36f0365fd4
5 changed files with 964 additions and 132 deletions
258
api/models/tx.js
258
api/models/tx.js
|
|
@ -1,4 +1,4 @@
|
|||
const { matchModelName } = require('@librechat/api');
|
||||
const { matchModelName, findMatchingPattern } = require('@librechat/api');
|
||||
const defaultRate = 6;
|
||||
|
||||
/**
|
||||
|
|
@ -6,44 +6,58 @@ const defaultRate = 6;
|
|||
* source: https://aws.amazon.com/bedrock/pricing/
|
||||
* */
|
||||
const bedrockValues = {
|
||||
// Basic llama2 patterns
|
||||
// Basic llama2 patterns (base defaults to smallest variant)
|
||||
llama2: { prompt: 0.75, completion: 1.0 },
|
||||
'llama-2': { prompt: 0.75, completion: 1.0 },
|
||||
'llama2-13b': { prompt: 0.75, completion: 1.0 },
|
||||
'llama2:13b': { prompt: 0.75, completion: 1.0 },
|
||||
'llama2:70b': { prompt: 1.95, completion: 2.56 },
|
||||
'llama2-70b': { prompt: 1.95, completion: 2.56 },
|
||||
|
||||
// Basic llama3 patterns
|
||||
// Basic llama3 patterns (base defaults to smallest variant)
|
||||
llama3: { prompt: 0.3, completion: 0.6 },
|
||||
'llama-3': { prompt: 0.3, completion: 0.6 },
|
||||
'llama3-8b': { prompt: 0.3, completion: 0.6 },
|
||||
'llama3:8b': { prompt: 0.3, completion: 0.6 },
|
||||
'llama3-70b': { prompt: 2.65, completion: 3.5 },
|
||||
'llama3:70b': { prompt: 2.65, completion: 3.5 },
|
||||
|
||||
// llama3-x-Nb pattern
|
||||
// llama3-x-Nb pattern (base defaults to smallest variant)
|
||||
'llama3-1': { prompt: 0.22, completion: 0.22 },
|
||||
'llama3-1-8b': { prompt: 0.22, completion: 0.22 },
|
||||
'llama3-1-70b': { prompt: 0.72, completion: 0.72 },
|
||||
'llama3-1-405b': { prompt: 2.4, completion: 2.4 },
|
||||
'llama3-2': { prompt: 0.1, completion: 0.1 },
|
||||
'llama3-2-1b': { prompt: 0.1, completion: 0.1 },
|
||||
'llama3-2-3b': { prompt: 0.15, completion: 0.15 },
|
||||
'llama3-2-11b': { prompt: 0.16, completion: 0.16 },
|
||||
'llama3-2-90b': { prompt: 0.72, completion: 0.72 },
|
||||
'llama3-3': { prompt: 2.65, completion: 3.5 },
|
||||
'llama3-3-70b': { prompt: 2.65, completion: 3.5 },
|
||||
|
||||
// llama3.x:Nb pattern
|
||||
// llama3.x:Nb pattern (base defaults to smallest variant)
|
||||
'llama3.1': { prompt: 0.22, completion: 0.22 },
|
||||
'llama3.1:8b': { prompt: 0.22, completion: 0.22 },
|
||||
'llama3.1:70b': { prompt: 0.72, completion: 0.72 },
|
||||
'llama3.1:405b': { prompt: 2.4, completion: 2.4 },
|
||||
'llama3.2': { prompt: 0.1, completion: 0.1 },
|
||||
'llama3.2:1b': { prompt: 0.1, completion: 0.1 },
|
||||
'llama3.2:3b': { prompt: 0.15, completion: 0.15 },
|
||||
'llama3.2:11b': { prompt: 0.16, completion: 0.16 },
|
||||
'llama3.2:90b': { prompt: 0.72, completion: 0.72 },
|
||||
'llama3.3': { prompt: 2.65, completion: 3.5 },
|
||||
'llama3.3:70b': { prompt: 2.65, completion: 3.5 },
|
||||
|
||||
// llama-3.x-Nb pattern
|
||||
// llama-3.x-Nb pattern (base defaults to smallest variant)
|
||||
'llama-3.1': { prompt: 0.22, completion: 0.22 },
|
||||
'llama-3.1-8b': { prompt: 0.22, completion: 0.22 },
|
||||
'llama-3.1-70b': { prompt: 0.72, completion: 0.72 },
|
||||
'llama-3.1-405b': { prompt: 2.4, completion: 2.4 },
|
||||
'llama-3.2': { prompt: 0.1, completion: 0.1 },
|
||||
'llama-3.2-1b': { prompt: 0.1, completion: 0.1 },
|
||||
'llama-3.2-3b': { prompt: 0.15, completion: 0.15 },
|
||||
'llama-3.2-11b': { prompt: 0.16, completion: 0.16 },
|
||||
'llama-3.2-90b': { prompt: 0.72, completion: 0.72 },
|
||||
'llama-3.3': { prompt: 2.65, completion: 3.5 },
|
||||
'llama-3.3-70b': { prompt: 2.65, completion: 3.5 },
|
||||
'mistral-7b': { prompt: 0.15, completion: 0.2 },
|
||||
'mistral-small': { prompt: 0.15, completion: 0.2 },
|
||||
|
|
@ -52,15 +66,19 @@ const bedrockValues = {
|
|||
'mistral-large-2407': { prompt: 3.0, completion: 9.0 },
|
||||
'command-text': { prompt: 1.5, completion: 2.0 },
|
||||
'command-light': { prompt: 0.3, completion: 0.6 },
|
||||
'ai21.j2-mid-v1': { prompt: 12.5, completion: 12.5 },
|
||||
'ai21.j2-ultra-v1': { prompt: 18.8, completion: 18.8 },
|
||||
'ai21.jamba-instruct-v1:0': { prompt: 0.5, completion: 0.7 },
|
||||
'amazon.titan-text-lite-v1': { prompt: 0.15, completion: 0.2 },
|
||||
'amazon.titan-text-express-v1': { prompt: 0.2, completion: 0.6 },
|
||||
'amazon.titan-text-premier-v1:0': { prompt: 0.5, completion: 1.5 },
|
||||
'amazon.nova-micro-v1:0': { prompt: 0.035, completion: 0.14 },
|
||||
'amazon.nova-lite-v1:0': { prompt: 0.06, completion: 0.24 },
|
||||
'amazon.nova-pro-v1:0': { prompt: 0.8, completion: 3.2 },
|
||||
// AI21 models
|
||||
'j2-mid': { prompt: 12.5, completion: 12.5 },
|
||||
'j2-ultra': { prompt: 18.8, completion: 18.8 },
|
||||
'jamba-instruct': { prompt: 0.5, completion: 0.7 },
|
||||
// Amazon Titan models
|
||||
'titan-text-lite': { prompt: 0.15, completion: 0.2 },
|
||||
'titan-text-express': { prompt: 0.2, completion: 0.6 },
|
||||
'titan-text-premier': { prompt: 0.5, completion: 1.5 },
|
||||
// Amazon Nova models
|
||||
'nova-micro': { prompt: 0.035, completion: 0.14 },
|
||||
'nova-lite': { prompt: 0.06, completion: 0.24 },
|
||||
'nova-pro': { prompt: 0.8, completion: 3.2 },
|
||||
'nova-premier': { prompt: 2.5, completion: 12.5 },
|
||||
'deepseek.r1': { prompt: 1.35, completion: 5.4 },
|
||||
};
|
||||
|
||||
|
|
@ -71,100 +89,136 @@ const bedrockValues = {
|
|||
*/
|
||||
const tokenValues = Object.assign(
|
||||
{
|
||||
// Legacy token size mappings (generic patterns - check LAST)
|
||||
'8k': { prompt: 30, completion: 60 },
|
||||
'32k': { prompt: 60, completion: 120 },
|
||||
'4k': { prompt: 1.5, completion: 2 },
|
||||
'16k': { prompt: 3, completion: 4 },
|
||||
// Generic fallback patterns (check LAST)
|
||||
'claude-': { prompt: 0.8, completion: 2.4 },
|
||||
deepseek: { prompt: 0.28, completion: 0.42 },
|
||||
command: { prompt: 0.38, completion: 0.38 },
|
||||
gemma: { prompt: 0.02, completion: 0.04 }, // Base pattern (using gemma-3n-e4b pricing)
|
||||
gemini: { prompt: 0.5, completion: 1.5 },
|
||||
'gpt-oss': { prompt: 0.05, completion: 0.2 },
|
||||
// Specific model variants (check FIRST - more specific patterns at end)
|
||||
'gpt-3.5-turbo-1106': { prompt: 1, completion: 2 },
|
||||
'o4-mini': { prompt: 1.1, completion: 4.4 },
|
||||
'o3-mini': { prompt: 1.1, completion: 4.4 },
|
||||
o3: { prompt: 2, completion: 8 },
|
||||
'o1-mini': { prompt: 1.1, completion: 4.4 },
|
||||
'o1-preview': { prompt: 15, completion: 60 },
|
||||
o1: { prompt: 15, completion: 60 },
|
||||
'gpt-3.5-turbo-0125': { prompt: 0.5, completion: 1.5 },
|
||||
'gpt-4-1106': { prompt: 10, completion: 30 },
|
||||
'gpt-4.1': { prompt: 2, completion: 8 },
|
||||
'gpt-4.1-nano': { prompt: 0.1, completion: 0.4 },
|
||||
'gpt-4.1-mini': { prompt: 0.4, completion: 1.6 },
|
||||
'gpt-4.1': { prompt: 2, completion: 8 },
|
||||
'gpt-4.5': { prompt: 75, completion: 150 },
|
||||
'gpt-4o-mini': { prompt: 0.15, completion: 0.6 },
|
||||
'gpt-5': { prompt: 1.25, completion: 10 },
|
||||
'gpt-5-mini': { prompt: 0.25, completion: 2 },
|
||||
'gpt-5-nano': { prompt: 0.05, completion: 0.4 },
|
||||
'gpt-4o': { prompt: 2.5, completion: 10 },
|
||||
'gpt-4o-2024-05-13': { prompt: 5, completion: 15 },
|
||||
'gpt-4-1106': { prompt: 10, completion: 30 },
|
||||
'gpt-3.5-turbo-0125': { prompt: 0.5, completion: 1.5 },
|
||||
'claude-3-opus': { prompt: 15, completion: 75 },
|
||||
'gpt-4o-mini': { prompt: 0.15, completion: 0.6 },
|
||||
'gpt-5': { prompt: 1.25, completion: 10 },
|
||||
'gpt-5-nano': { prompt: 0.05, completion: 0.4 },
|
||||
'gpt-5-mini': { prompt: 0.25, completion: 2 },
|
||||
'gpt-5-pro': { prompt: 15, completion: 120 },
|
||||
o1: { prompt: 15, completion: 60 },
|
||||
'o1-mini': { prompt: 1.1, completion: 4.4 },
|
||||
'o1-preview': { prompt: 15, completion: 60 },
|
||||
o3: { prompt: 2, completion: 8 },
|
||||
'o3-mini': { prompt: 1.1, completion: 4.4 },
|
||||
'o4-mini': { prompt: 1.1, completion: 4.4 },
|
||||
'claude-instant': { prompt: 0.8, completion: 2.4 },
|
||||
'claude-2': { prompt: 8, completion: 24 },
|
||||
'claude-2.1': { prompt: 8, completion: 24 },
|
||||
'claude-3-haiku': { prompt: 0.25, completion: 1.25 },
|
||||
'claude-3-sonnet': { prompt: 3, completion: 15 },
|
||||
'claude-3-opus': { prompt: 15, completion: 75 },
|
||||
'claude-3-5-haiku': { prompt: 0.8, completion: 4 },
|
||||
'claude-3.5-haiku': { prompt: 0.8, completion: 4 },
|
||||
'claude-3-5-sonnet': { prompt: 3, completion: 15 },
|
||||
'claude-3.5-sonnet': { prompt: 3, completion: 15 },
|
||||
'claude-3-7-sonnet': { prompt: 3, completion: 15 },
|
||||
'claude-3.7-sonnet': { prompt: 3, completion: 15 },
|
||||
'claude-3-5-haiku': { prompt: 0.8, completion: 4 },
|
||||
'claude-3.5-haiku': { prompt: 0.8, completion: 4 },
|
||||
'claude-3-haiku': { prompt: 0.25, completion: 1.25 },
|
||||
'claude-sonnet-4': { prompt: 3, completion: 15 },
|
||||
'claude-haiku-4-5': { prompt: 1, completion: 5 },
|
||||
'claude-opus-4': { prompt: 15, completion: 75 },
|
||||
'claude-2.1': { prompt: 8, completion: 24 },
|
||||
'claude-2': { prompt: 8, completion: 24 },
|
||||
'claude-instant': { prompt: 0.8, completion: 2.4 },
|
||||
'claude-': { prompt: 0.8, completion: 2.4 },
|
||||
'command-r-plus': { prompt: 3, completion: 15 },
|
||||
'claude-sonnet-4': { prompt: 3, completion: 15 },
|
||||
'command-r': { prompt: 0.5, completion: 1.5 },
|
||||
'command-r-plus': { prompt: 3, completion: 15 },
|
||||
'command-text': { prompt: 1.5, completion: 2.0 },
|
||||
'deepseek-reasoner': { prompt: 0.28, completion: 0.42 },
|
||||
deepseek: { prompt: 0.28, completion: 0.42 },
|
||||
/* cohere doesn't have rates for the older command models,
|
||||
so this was from https://artificialanalysis.ai/models/command-light/providers */
|
||||
command: { prompt: 0.38, completion: 0.38 },
|
||||
gemma: { prompt: 0, completion: 0 }, // https://ai.google.dev/pricing
|
||||
'gemma-2': { prompt: 0, completion: 0 }, // https://ai.google.dev/pricing
|
||||
'gemma-3': { prompt: 0, completion: 0 }, // https://ai.google.dev/pricing
|
||||
'gemma-3-27b': { prompt: 0, completion: 0 }, // https://ai.google.dev/pricing
|
||||
'gemini-2.0-flash-lite': { prompt: 0.075, completion: 0.3 },
|
||||
'deepseek-r1': { prompt: 0.4, completion: 2.0 },
|
||||
'deepseek-v3': { prompt: 0.2, completion: 0.8 },
|
||||
'gemma-2': { prompt: 0.01, completion: 0.03 }, // Base pattern (using gemma-2-9b pricing)
|
||||
'gemma-3': { prompt: 0.02, completion: 0.04 }, // Base pattern (using gemma-3n-e4b pricing)
|
||||
'gemma-3-27b': { prompt: 0.09, completion: 0.16 },
|
||||
'gemini-1.5': { prompt: 2.5, completion: 10 },
|
||||
'gemini-1.5-flash': { prompt: 0.15, completion: 0.6 },
|
||||
'gemini-1.5-flash-8b': { prompt: 0.075, completion: 0.3 },
|
||||
'gemini-2.0': { prompt: 0.1, completion: 0.4 }, // Base pattern (using 2.0-flash pricing)
|
||||
'gemini-2.0-flash': { prompt: 0.1, completion: 0.4 },
|
||||
'gemini-2.0': { prompt: 0, completion: 0 }, // https://ai.google.dev/pricing
|
||||
'gemini-2.5-pro': { prompt: 1.25, completion: 10 },
|
||||
'gemini-2.0-flash-lite': { prompt: 0.075, completion: 0.3 },
|
||||
'gemini-2.5': { prompt: 0.3, completion: 2.5 }, // Base pattern (using 2.5-flash pricing)
|
||||
'gemini-2.5-flash': { prompt: 0.3, completion: 2.5 },
|
||||
'gemini-2.5-flash-lite': { prompt: 0.1, completion: 0.4 },
|
||||
'gemini-2.5': { prompt: 0, completion: 0 }, // Free for a period of time
|
||||
'gemini-1.5-flash-8b': { prompt: 0.075, completion: 0.3 },
|
||||
'gemini-1.5-flash': { prompt: 0.15, completion: 0.6 },
|
||||
'gemini-1.5': { prompt: 2.5, completion: 10 },
|
||||
'gemini-2.5-pro': { prompt: 1.25, completion: 10 },
|
||||
'gemini-pro-vision': { prompt: 0.5, completion: 1.5 },
|
||||
gemini: { prompt: 0.5, completion: 1.5 },
|
||||
'grok-2-vision-1212': { prompt: 2.0, completion: 10.0 },
|
||||
'grok-2-vision-latest': { prompt: 2.0, completion: 10.0 },
|
||||
'grok-2-vision': { prompt: 2.0, completion: 10.0 },
|
||||
grok: { prompt: 2.0, completion: 10.0 }, // Base pattern defaults to grok-2
|
||||
'grok-beta': { prompt: 5.0, completion: 15.0 },
|
||||
'grok-vision-beta': { prompt: 5.0, completion: 15.0 },
|
||||
'grok-2': { prompt: 2.0, completion: 10.0 },
|
||||
'grok-2-1212': { prompt: 2.0, completion: 10.0 },
|
||||
'grok-2-latest': { prompt: 2.0, completion: 10.0 },
|
||||
'grok-2': { prompt: 2.0, completion: 10.0 },
|
||||
'grok-3-mini-fast': { prompt: 0.6, completion: 4 },
|
||||
'grok-3-mini': { prompt: 0.3, completion: 0.5 },
|
||||
'grok-3-fast': { prompt: 5.0, completion: 25.0 },
|
||||
'grok-2-vision': { prompt: 2.0, completion: 10.0 },
|
||||
'grok-2-vision-1212': { prompt: 2.0, completion: 10.0 },
|
||||
'grok-2-vision-latest': { prompt: 2.0, completion: 10.0 },
|
||||
'grok-3': { prompt: 3.0, completion: 15.0 },
|
||||
'grok-3-fast': { prompt: 5.0, completion: 25.0 },
|
||||
'grok-3-mini': { prompt: 0.3, completion: 0.5 },
|
||||
'grok-3-mini-fast': { prompt: 0.6, completion: 4 },
|
||||
'grok-4': { prompt: 3.0, completion: 15.0 },
|
||||
'grok-beta': { prompt: 5.0, completion: 15.0 },
|
||||
'mistral-large': { prompt: 2.0, completion: 6.0 },
|
||||
'pixtral-large': { prompt: 2.0, completion: 6.0 },
|
||||
'mistral-saba': { prompt: 0.2, completion: 0.6 },
|
||||
codestral: { prompt: 0.3, completion: 0.9 },
|
||||
'ministral-8b': { prompt: 0.1, completion: 0.1 },
|
||||
'ministral-3b': { prompt: 0.04, completion: 0.04 },
|
||||
// GPT-OSS models
|
||||
'gpt-oss': { prompt: 0.05, completion: 0.2 },
|
||||
'ministral-8b': { prompt: 0.1, completion: 0.1 },
|
||||
'mistral-nemo': { prompt: 0.15, completion: 0.15 },
|
||||
'mistral-saba': { prompt: 0.2, completion: 0.6 },
|
||||
'pixtral-large': { prompt: 2.0, completion: 6.0 },
|
||||
'mistral-large': { prompt: 2.0, completion: 6.0 },
|
||||
'mixtral-8x22b': { prompt: 0.65, completion: 0.65 },
|
||||
kimi: { prompt: 0.14, completion: 2.49 }, // Base pattern (using kimi-k2 pricing)
|
||||
// GPT-OSS models (specific sizes)
|
||||
'gpt-oss:20b': { prompt: 0.05, completion: 0.2 },
|
||||
'gpt-oss-20b': { prompt: 0.05, completion: 0.2 },
|
||||
'gpt-oss:120b': { prompt: 0.15, completion: 0.6 },
|
||||
'gpt-oss-120b': { prompt: 0.15, completion: 0.6 },
|
||||
// GLM models (Zhipu AI)
|
||||
// GLM models (Zhipu AI) - general to specific
|
||||
glm4: { prompt: 0.1, completion: 0.1 },
|
||||
'glm-4': { prompt: 0.1, completion: 0.1 },
|
||||
'glm-4-32b': { prompt: 0.1, completion: 0.1 },
|
||||
'glm-4.5': { prompt: 0.35, completion: 1.55 },
|
||||
'glm-4.5v': { prompt: 0.6, completion: 1.8 },
|
||||
'glm-4.5-air': { prompt: 0.14, completion: 0.86 },
|
||||
'glm-4.5v': { prompt: 0.6, completion: 1.8 },
|
||||
'glm-4.6': { prompt: 0.5, completion: 1.75 },
|
||||
// Qwen models
|
||||
qwen: { prompt: 0.08, completion: 0.33 }, // Qwen base pattern (using qwen2.5-72b pricing)
|
||||
'qwen2.5': { prompt: 0.08, completion: 0.33 }, // Qwen 2.5 base pattern
|
||||
'qwen-turbo': { prompt: 0.05, completion: 0.2 },
|
||||
'qwen-plus': { prompt: 0.4, completion: 1.2 },
|
||||
'qwen-max': { prompt: 1.6, completion: 6.4 },
|
||||
'qwq-32b': { prompt: 0.15, completion: 0.4 },
|
||||
// Qwen3 models
|
||||
qwen3: { prompt: 0.035, completion: 0.138 }, // Qwen3 base pattern (using qwen3-4b pricing)
|
||||
'qwen3-8b': { prompt: 0.035, completion: 0.138 },
|
||||
'qwen3-14b': { prompt: 0.05, completion: 0.22 },
|
||||
'qwen3-30b-a3b': { prompt: 0.06, completion: 0.22 },
|
||||
'qwen3-32b': { prompt: 0.05, completion: 0.2 },
|
||||
'qwen3-235b-a22b': { prompt: 0.08, completion: 0.55 },
|
||||
// Qwen3 VL (Vision-Language) models
|
||||
'qwen3-vl-8b-thinking': { prompt: 0.18, completion: 2.1 },
|
||||
'qwen3-vl-8b-instruct': { prompt: 0.18, completion: 0.69 },
|
||||
'qwen3-vl-30b-a3b': { prompt: 0.29, completion: 1.0 },
|
||||
'qwen3-vl-235b-a22b': { prompt: 0.3, completion: 1.2 },
|
||||
// Qwen3 specialized models
|
||||
'qwen3-max': { prompt: 1.2, completion: 6 },
|
||||
'qwen3-coder': { prompt: 0.22, completion: 0.95 },
|
||||
'qwen3-coder-30b-a3b': { prompt: 0.06, completion: 0.25 },
|
||||
'qwen3-coder-plus': { prompt: 1, completion: 5 },
|
||||
'qwen3-coder-flash': { prompt: 0.3, completion: 1.5 },
|
||||
'qwen3-next-80b-a3b': { prompt: 0.1, completion: 0.8 },
|
||||
},
|
||||
bedrockValues,
|
||||
);
|
||||
|
|
@ -195,67 +249,39 @@ const cacheTokenValues = {
|
|||
* @returns {string|undefined} The key corresponding to the model name, or undefined if no match is found.
|
||||
*/
|
||||
const getValueKey = (model, endpoint) => {
|
||||
if (!model || typeof model !== 'string') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// Use findMatchingPattern directly against tokenValues for efficient lookup
|
||||
if (!endpoint || (typeof endpoint === 'string' && !tokenValues[endpoint])) {
|
||||
const matchedKey = findMatchingPattern(model, tokenValues);
|
||||
if (matchedKey) {
|
||||
return matchedKey;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: use matchModelName for edge cases and legacy handling
|
||||
const modelName = matchModelName(model, endpoint);
|
||||
if (!modelName) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// Legacy token size mappings and aliases for older models
|
||||
if (modelName.includes('gpt-3.5-turbo-16k')) {
|
||||
return '16k';
|
||||
} else if (modelName.includes('gpt-3.5-turbo-0125')) {
|
||||
return 'gpt-3.5-turbo-0125';
|
||||
} else if (modelName.includes('gpt-3.5-turbo-1106')) {
|
||||
return 'gpt-3.5-turbo-1106';
|
||||
} else if (modelName.includes('gpt-3.5')) {
|
||||
return '4k';
|
||||
} else if (modelName.includes('o4-mini')) {
|
||||
return 'o4-mini';
|
||||
} else if (modelName.includes('o4')) {
|
||||
return 'o4';
|
||||
} else if (modelName.includes('o3-mini')) {
|
||||
return 'o3-mini';
|
||||
} else if (modelName.includes('o3')) {
|
||||
return 'o3';
|
||||
} else if (modelName.includes('o1-preview')) {
|
||||
return 'o1-preview';
|
||||
} else if (modelName.includes('o1-mini')) {
|
||||
return 'o1-mini';
|
||||
} else if (modelName.includes('o1')) {
|
||||
return 'o1';
|
||||
} else if (modelName.includes('gpt-4.5')) {
|
||||
return 'gpt-4.5';
|
||||
} else if (modelName.includes('gpt-4.1-nano')) {
|
||||
return 'gpt-4.1-nano';
|
||||
} else if (modelName.includes('gpt-4.1-mini')) {
|
||||
return 'gpt-4.1-mini';
|
||||
} else if (modelName.includes('gpt-4.1')) {
|
||||
return 'gpt-4.1';
|
||||
} else if (modelName.includes('gpt-4o-2024-05-13')) {
|
||||
return 'gpt-4o-2024-05-13';
|
||||
} else if (modelName.includes('gpt-5-nano')) {
|
||||
return 'gpt-5-nano';
|
||||
} else if (modelName.includes('gpt-5-mini')) {
|
||||
return 'gpt-5-mini';
|
||||
} else if (modelName.includes('gpt-5')) {
|
||||
return 'gpt-5';
|
||||
} else if (modelName.includes('gpt-4o-mini')) {
|
||||
return 'gpt-4o-mini';
|
||||
} else if (modelName.includes('gpt-4o')) {
|
||||
return 'gpt-4o';
|
||||
} else if (modelName.includes('gpt-4-vision')) {
|
||||
return 'gpt-4-1106';
|
||||
} else if (modelName.includes('gpt-4-1106')) {
|
||||
return 'gpt-4-1106';
|
||||
return 'gpt-4-1106'; // Alias for gpt-4-vision
|
||||
} else if (modelName.includes('gpt-4-0125')) {
|
||||
return 'gpt-4-1106';
|
||||
return 'gpt-4-1106'; // Alias for gpt-4-0125
|
||||
} else if (modelName.includes('gpt-4-turbo')) {
|
||||
return 'gpt-4-1106';
|
||||
return 'gpt-4-1106'; // Alias for gpt-4-turbo
|
||||
} else if (modelName.includes('gpt-4-32k')) {
|
||||
return '32k';
|
||||
} else if (modelName.includes('gpt-4')) {
|
||||
return '8k';
|
||||
} else if (tokenValues[modelName]) {
|
||||
return modelName;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
const { maxTokensMap } = require('@librechat/api');
|
||||
const { EModelEndpoint } = require('librechat-data-provider');
|
||||
const {
|
||||
defaultRate,
|
||||
|
|
@ -113,6 +114,14 @@ describe('getValueKey', () => {
|
|||
expect(getValueKey('gpt-5-nano-2025-01-30-0130')).toBe('gpt-5-nano');
|
||||
});
|
||||
|
||||
it('should return "gpt-5-pro" for model type of "gpt-5-pro"', () => {
|
||||
expect(getValueKey('gpt-5-pro-2025-01-30')).toBe('gpt-5-pro');
|
||||
expect(getValueKey('openai/gpt-5-pro')).toBe('gpt-5-pro');
|
||||
expect(getValueKey('gpt-5-pro-0130')).toBe('gpt-5-pro');
|
||||
expect(getValueKey('gpt-5-pro-2025-01-30-0130')).toBe('gpt-5-pro');
|
||||
expect(getValueKey('gpt-5-pro-preview')).toBe('gpt-5-pro');
|
||||
});
|
||||
|
||||
it('should return "gpt-4o" for model type of "gpt-4o"', () => {
|
||||
expect(getValueKey('gpt-4o-2024-08-06')).toBe('gpt-4o');
|
||||
expect(getValueKey('gpt-4o-2024-08-06-0718')).toBe('gpt-4o');
|
||||
|
|
@ -288,6 +297,20 @@ describe('getMultiplier', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('should return the correct multiplier for gpt-5-pro', () => {
|
||||
const valueKey = getValueKey('gpt-5-pro-2025-01-30');
|
||||
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5-pro'].prompt);
|
||||
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
||||
tokenValues['gpt-5-pro'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'gpt-5-pro-preview', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['gpt-5-pro'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'openai/gpt-5-pro', tokenType: 'completion' })).toBe(
|
||||
tokenValues['gpt-5-pro'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return the correct multiplier for gpt-4o', () => {
|
||||
const valueKey = getValueKey('gpt-4o-2024-08-06');
|
||||
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4o'].prompt);
|
||||
|
|
@ -471,6 +494,249 @@ describe('AWS Bedrock Model Tests', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('Amazon Model Tests', () => {
|
||||
describe('Amazon Nova Models', () => {
|
||||
it('should return correct pricing for nova-premier', () => {
|
||||
expect(getMultiplier({ model: 'nova-premier', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['nova-premier'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'nova-premier', tokenType: 'completion' })).toBe(
|
||||
tokenValues['nova-premier'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'amazon.nova-premier-v1:0', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['nova-premier'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'amazon.nova-premier-v1:0', tokenType: 'completion' })).toBe(
|
||||
tokenValues['nova-premier'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for nova-pro', () => {
|
||||
expect(getMultiplier({ model: 'nova-pro', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['nova-pro'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'nova-pro', tokenType: 'completion' })).toBe(
|
||||
tokenValues['nova-pro'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'amazon.nova-pro-v1:0', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['nova-pro'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'amazon.nova-pro-v1:0', tokenType: 'completion' })).toBe(
|
||||
tokenValues['nova-pro'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for nova-lite', () => {
|
||||
expect(getMultiplier({ model: 'nova-lite', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['nova-lite'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'nova-lite', tokenType: 'completion' })).toBe(
|
||||
tokenValues['nova-lite'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'amazon.nova-lite-v1:0', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['nova-lite'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'amazon.nova-lite-v1:0', tokenType: 'completion' })).toBe(
|
||||
tokenValues['nova-lite'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for nova-micro', () => {
|
||||
expect(getMultiplier({ model: 'nova-micro', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['nova-micro'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'nova-micro', tokenType: 'completion' })).toBe(
|
||||
tokenValues['nova-micro'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'amazon.nova-micro-v1:0', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['nova-micro'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'amazon.nova-micro-v1:0', tokenType: 'completion' })).toBe(
|
||||
tokenValues['nova-micro'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should match both short and full model names to the same pricing', () => {
|
||||
const models = ['nova-micro', 'nova-lite', 'nova-pro', 'nova-premier'];
|
||||
const fullModels = [
|
||||
'amazon.nova-micro-v1:0',
|
||||
'amazon.nova-lite-v1:0',
|
||||
'amazon.nova-pro-v1:0',
|
||||
'amazon.nova-premier-v1:0',
|
||||
];
|
||||
|
||||
models.forEach((shortModel, i) => {
|
||||
const fullModel = fullModels[i];
|
||||
const shortPrompt = getMultiplier({ model: shortModel, tokenType: 'prompt' });
|
||||
const fullPrompt = getMultiplier({ model: fullModel, tokenType: 'prompt' });
|
||||
const shortCompletion = getMultiplier({ model: shortModel, tokenType: 'completion' });
|
||||
const fullCompletion = getMultiplier({ model: fullModel, tokenType: 'completion' });
|
||||
|
||||
expect(shortPrompt).toBe(fullPrompt);
|
||||
expect(shortCompletion).toBe(fullCompletion);
|
||||
expect(shortPrompt).toBe(tokenValues[shortModel].prompt);
|
||||
expect(shortCompletion).toBe(tokenValues[shortModel].completion);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Amazon Titan Models', () => {
|
||||
it('should return correct pricing for titan-text-premier', () => {
|
||||
expect(getMultiplier({ model: 'titan-text-premier', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['titan-text-premier'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'titan-text-premier', tokenType: 'completion' })).toBe(
|
||||
tokenValues['titan-text-premier'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'amazon.titan-text-premier-v1:0', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['titan-text-premier'].prompt,
|
||||
);
|
||||
expect(
|
||||
getMultiplier({ model: 'amazon.titan-text-premier-v1:0', tokenType: 'completion' }),
|
||||
).toBe(tokenValues['titan-text-premier'].completion);
|
||||
});
|
||||
|
||||
it('should return correct pricing for titan-text-express', () => {
|
||||
expect(getMultiplier({ model: 'titan-text-express', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['titan-text-express'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'titan-text-express', tokenType: 'completion' })).toBe(
|
||||
tokenValues['titan-text-express'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'amazon.titan-text-express-v1', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['titan-text-express'].prompt,
|
||||
);
|
||||
expect(
|
||||
getMultiplier({ model: 'amazon.titan-text-express-v1', tokenType: 'completion' }),
|
||||
).toBe(tokenValues['titan-text-express'].completion);
|
||||
});
|
||||
|
||||
it('should return correct pricing for titan-text-lite', () => {
|
||||
expect(getMultiplier({ model: 'titan-text-lite', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['titan-text-lite'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'titan-text-lite', tokenType: 'completion' })).toBe(
|
||||
tokenValues['titan-text-lite'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'amazon.titan-text-lite-v1', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['titan-text-lite'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'amazon.titan-text-lite-v1', tokenType: 'completion' })).toBe(
|
||||
tokenValues['titan-text-lite'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should match both short and full model names to the same pricing', () => {
|
||||
const models = ['titan-text-lite', 'titan-text-express', 'titan-text-premier'];
|
||||
const fullModels = [
|
||||
'amazon.titan-text-lite-v1',
|
||||
'amazon.titan-text-express-v1',
|
||||
'amazon.titan-text-premier-v1:0',
|
||||
];
|
||||
|
||||
models.forEach((shortModel, i) => {
|
||||
const fullModel = fullModels[i];
|
||||
const shortPrompt = getMultiplier({ model: shortModel, tokenType: 'prompt' });
|
||||
const fullPrompt = getMultiplier({ model: fullModel, tokenType: 'prompt' });
|
||||
const shortCompletion = getMultiplier({ model: shortModel, tokenType: 'completion' });
|
||||
const fullCompletion = getMultiplier({ model: fullModel, tokenType: 'completion' });
|
||||
|
||||
expect(shortPrompt).toBe(fullPrompt);
|
||||
expect(shortCompletion).toBe(fullCompletion);
|
||||
expect(shortPrompt).toBe(tokenValues[shortModel].prompt);
|
||||
expect(shortCompletion).toBe(tokenValues[shortModel].completion);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('AI21 Model Tests', () => {
|
||||
describe('AI21 J2 Models', () => {
|
||||
it('should return correct pricing for j2-mid', () => {
|
||||
expect(getMultiplier({ model: 'j2-mid', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['j2-mid'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'j2-mid', tokenType: 'completion' })).toBe(
|
||||
tokenValues['j2-mid'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'ai21.j2-mid-v1', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['j2-mid'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'ai21.j2-mid-v1', tokenType: 'completion' })).toBe(
|
||||
tokenValues['j2-mid'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for j2-ultra', () => {
|
||||
expect(getMultiplier({ model: 'j2-ultra', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['j2-ultra'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'j2-ultra', tokenType: 'completion' })).toBe(
|
||||
tokenValues['j2-ultra'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'ai21.j2-ultra-v1', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['j2-ultra'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'ai21.j2-ultra-v1', tokenType: 'completion' })).toBe(
|
||||
tokenValues['j2-ultra'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should match both short and full model names to the same pricing', () => {
|
||||
const models = ['j2-mid', 'j2-ultra'];
|
||||
const fullModels = ['ai21.j2-mid-v1', 'ai21.j2-ultra-v1'];
|
||||
|
||||
models.forEach((shortModel, i) => {
|
||||
const fullModel = fullModels[i];
|
||||
const shortPrompt = getMultiplier({ model: shortModel, tokenType: 'prompt' });
|
||||
const fullPrompt = getMultiplier({ model: fullModel, tokenType: 'prompt' });
|
||||
const shortCompletion = getMultiplier({ model: shortModel, tokenType: 'completion' });
|
||||
const fullCompletion = getMultiplier({ model: fullModel, tokenType: 'completion' });
|
||||
|
||||
expect(shortPrompt).toBe(fullPrompt);
|
||||
expect(shortCompletion).toBe(fullCompletion);
|
||||
expect(shortPrompt).toBe(tokenValues[shortModel].prompt);
|
||||
expect(shortCompletion).toBe(tokenValues[shortModel].completion);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('AI21 Jamba Models', () => {
|
||||
it('should return correct pricing for jamba-instruct', () => {
|
||||
expect(getMultiplier({ model: 'jamba-instruct', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['jamba-instruct'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'jamba-instruct', tokenType: 'completion' })).toBe(
|
||||
tokenValues['jamba-instruct'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'ai21.jamba-instruct-v1:0', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['jamba-instruct'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'ai21.jamba-instruct-v1:0', tokenType: 'completion' })).toBe(
|
||||
tokenValues['jamba-instruct'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should match both short and full model names to the same pricing', () => {
|
||||
const shortPrompt = getMultiplier({ model: 'jamba-instruct', tokenType: 'prompt' });
|
||||
const fullPrompt = getMultiplier({
|
||||
model: 'ai21.jamba-instruct-v1:0',
|
||||
tokenType: 'prompt',
|
||||
});
|
||||
const shortCompletion = getMultiplier({ model: 'jamba-instruct', tokenType: 'completion' });
|
||||
const fullCompletion = getMultiplier({
|
||||
model: 'ai21.jamba-instruct-v1:0',
|
||||
tokenType: 'completion',
|
||||
});
|
||||
|
||||
expect(shortPrompt).toBe(fullPrompt);
|
||||
expect(shortCompletion).toBe(fullCompletion);
|
||||
expect(shortPrompt).toBe(tokenValues['jamba-instruct'].prompt);
|
||||
expect(shortCompletion).toBe(tokenValues['jamba-instruct'].completion);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Deepseek Model Tests', () => {
|
||||
const deepseekModels = ['deepseek-chat', 'deepseek-coder', 'deepseek-reasoner', 'deepseek.r1'];
|
||||
|
||||
|
|
@ -502,6 +768,187 @@ describe('Deepseek Model Tests', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('Qwen3 Model Tests', () => {
|
||||
describe('Qwen3 Base Models', () => {
|
||||
it('should return correct pricing for qwen3 base pattern', () => {
|
||||
expect(getMultiplier({ model: 'qwen3', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for qwen3-4b (falls back to qwen3)', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-4b', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-4b', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for qwen3-8b', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-8b', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3-8b'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-8b', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3-8b'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for qwen3-14b', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-14b', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3-14b'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-14b', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3-14b'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for qwen3-235b-a22b', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-235b-a22b', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3-235b-a22b'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-235b-a22b', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3-235b-a22b'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle model name variations with provider prefixes', () => {
|
||||
const models = [
|
||||
{ input: 'qwen3', expected: 'qwen3' },
|
||||
{ input: 'qwen3-4b', expected: 'qwen3' },
|
||||
{ input: 'qwen3-8b', expected: 'qwen3-8b' },
|
||||
{ input: 'qwen3-32b', expected: 'qwen3-32b' },
|
||||
];
|
||||
models.forEach(({ input, expected }) => {
|
||||
const withPrefix = `alibaba/${input}`;
|
||||
expect(getMultiplier({ model: withPrefix, tokenType: 'prompt' })).toBe(
|
||||
tokenValues[expected].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: withPrefix, tokenType: 'completion' })).toBe(
|
||||
tokenValues[expected].completion,
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Qwen3 VL (Vision-Language) Models', () => {
|
||||
it('should return correct pricing for qwen3-vl-8b-thinking', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-vl-8b-thinking', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3-vl-8b-thinking'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-vl-8b-thinking', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3-vl-8b-thinking'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for qwen3-vl-8b-instruct', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-vl-8b-instruct', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3-vl-8b-instruct'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-vl-8b-instruct', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3-vl-8b-instruct'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for qwen3-vl-30b-a3b', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-vl-30b-a3b', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3-vl-30b-a3b'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-vl-30b-a3b', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3-vl-30b-a3b'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for qwen3-vl-235b-a22b', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-vl-235b-a22b', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3-vl-235b-a22b'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-vl-235b-a22b', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3-vl-235b-a22b'].completion,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Qwen3 Specialized Models', () => {
|
||||
it('should return correct pricing for qwen3-max', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-max', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3-max'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-max', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3-max'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for qwen3-coder', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-coder', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3-coder'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-coder', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3-coder'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for qwen3-coder-plus', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-coder-plus', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3-coder-plus'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-coder-plus', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3-coder-plus'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for qwen3-coder-flash', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-coder-flash', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3-coder-flash'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-coder-flash', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3-coder-flash'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct pricing for qwen3-next-80b-a3b', () => {
|
||||
expect(getMultiplier({ model: 'qwen3-next-80b-a3b', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['qwen3-next-80b-a3b'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'qwen3-next-80b-a3b', tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3-next-80b-a3b'].completion,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Qwen3 Model Variations', () => {
|
||||
it('should handle all qwen3 models with provider prefixes', () => {
|
||||
const models = ['qwen3', 'qwen3-8b', 'qwen3-max', 'qwen3-coder', 'qwen3-vl-8b-instruct'];
|
||||
const prefixes = ['alibaba', 'qwen', 'openrouter'];
|
||||
|
||||
models.forEach((model) => {
|
||||
prefixes.forEach((prefix) => {
|
||||
const fullModel = `${prefix}/${model}`;
|
||||
expect(getMultiplier({ model: fullModel, tokenType: 'prompt' })).toBe(
|
||||
tokenValues[model].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: fullModel, tokenType: 'completion' })).toBe(
|
||||
tokenValues[model].completion,
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle qwen3-4b falling back to qwen3 base pattern', () => {
|
||||
const testCases = ['qwen3-4b', 'alibaba/qwen3-4b', 'qwen/qwen3-4b-preview'];
|
||||
testCases.forEach((model) => {
|
||||
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(tokenValues['qwen3'].prompt);
|
||||
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
|
||||
tokenValues['qwen3'].completion,
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('getCacheMultiplier', () => {
|
||||
it('should return the correct cache multiplier for a given valueKey and cacheType', () => {
|
||||
expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'write' })).toBe(
|
||||
|
|
@ -914,6 +1361,37 @@ describe('Claude Model Tests', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('should return correct prompt and completion rates for Claude Haiku 4.5', () => {
|
||||
expect(getMultiplier({ model: 'claude-haiku-4-5', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['claude-haiku-4-5'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'claude-haiku-4-5', tokenType: 'completion' })).toBe(
|
||||
tokenValues['claude-haiku-4-5'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle Claude Haiku 4.5 model name variations', () => {
|
||||
const modelVariations = [
|
||||
'claude-haiku-4-5',
|
||||
'claude-haiku-4-5-20250420',
|
||||
'claude-haiku-4-5-latest',
|
||||
'anthropic/claude-haiku-4-5',
|
||||
'claude-haiku-4-5/anthropic',
|
||||
'claude-haiku-4-5-preview',
|
||||
];
|
||||
|
||||
modelVariations.forEach((model) => {
|
||||
const valueKey = getValueKey(model);
|
||||
expect(valueKey).toBe('claude-haiku-4-5');
|
||||
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(
|
||||
tokenValues['claude-haiku-4-5'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
|
||||
tokenValues['claude-haiku-4-5'].completion,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Claude 4 model name variations with different prefixes and suffixes', () => {
|
||||
const modelVariations = [
|
||||
'claude-sonnet-4',
|
||||
|
|
@ -991,3 +1469,119 @@ describe('Claude Model Tests', () => {
|
|||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('tokens.ts and tx.js sync validation', () => {
|
||||
it('should resolve all models in maxTokensMap to pricing via getValueKey', () => {
|
||||
const tokensKeys = Object.keys(maxTokensMap[EModelEndpoint.openAI]);
|
||||
const txKeys = Object.keys(tokenValues);
|
||||
|
||||
const unresolved = [];
|
||||
|
||||
tokensKeys.forEach((key) => {
|
||||
// Skip legacy token size mappings (e.g., '4k', '8k', '16k', '32k')
|
||||
if (/^\d+k$/.test(key)) return;
|
||||
|
||||
// Skip generic pattern keys (end with '-' or ':')
|
||||
if (key.endsWith('-') || key.endsWith(':')) return;
|
||||
|
||||
// Try to resolve via getValueKey
|
||||
const resolvedKey = getValueKey(key);
|
||||
|
||||
// If it resolves and the resolved key has pricing, success
|
||||
if (resolvedKey && txKeys.includes(resolvedKey)) return;
|
||||
|
||||
// If it resolves to a legacy key (4k, 8k, etc), also OK
|
||||
if (resolvedKey && /^\d+k$/.test(resolvedKey)) return;
|
||||
|
||||
// If we get here, this model can't get pricing - flag it
|
||||
unresolved.push({
|
||||
key,
|
||||
resolvedKey: resolvedKey || 'undefined',
|
||||
context: maxTokensMap[EModelEndpoint.openAI][key],
|
||||
});
|
||||
});
|
||||
|
||||
if (unresolved.length > 0) {
|
||||
console.log('\nModels that cannot resolve to pricing via getValueKey:');
|
||||
unresolved.forEach(({ key, resolvedKey, context }) => {
|
||||
console.log(` - '${key}' → '${resolvedKey}' (context: ${context})`);
|
||||
});
|
||||
}
|
||||
|
||||
expect(unresolved).toEqual([]);
|
||||
});
|
||||
|
||||
it('should not have redundant dated variants with same pricing and context as base model', () => {
|
||||
const txKeys = Object.keys(tokenValues);
|
||||
const redundant = [];
|
||||
|
||||
txKeys.forEach((key) => {
|
||||
// Check if this is a dated variant (ends with -YYYY-MM-DD)
|
||||
if (key.match(/.*-\d{4}-\d{2}-\d{2}$/)) {
|
||||
const baseKey = key.replace(/-\d{4}-\d{2}-\d{2}$/, '');
|
||||
|
||||
if (txKeys.includes(baseKey)) {
|
||||
const variantPricing = tokenValues[key];
|
||||
const basePricing = tokenValues[baseKey];
|
||||
const variantContext = maxTokensMap[EModelEndpoint.openAI][key];
|
||||
const baseContext = maxTokensMap[EModelEndpoint.openAI][baseKey];
|
||||
|
||||
const samePricing =
|
||||
variantPricing.prompt === basePricing.prompt &&
|
||||
variantPricing.completion === basePricing.completion;
|
||||
const sameContext = variantContext === baseContext;
|
||||
|
||||
if (samePricing && sameContext) {
|
||||
redundant.push({
|
||||
key,
|
||||
baseKey,
|
||||
pricing: `${variantPricing.prompt}/${variantPricing.completion}`,
|
||||
context: variantContext,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (redundant.length > 0) {
|
||||
console.log('\nRedundant dated variants found (same pricing and context as base):');
|
||||
redundant.forEach(({ key, baseKey, pricing, context }) => {
|
||||
console.log(` - '${key}' → '${baseKey}' (pricing: ${pricing}, context: ${context})`);
|
||||
console.log(` Can be removed - pattern matching will handle it`);
|
||||
});
|
||||
}
|
||||
|
||||
expect(redundant).toEqual([]);
|
||||
});
|
||||
|
||||
it('should have context windows in tokens.ts for all models with pricing in tx.js (openAI catch-all)', () => {
|
||||
const txKeys = Object.keys(tokenValues);
|
||||
const missingContext = [];
|
||||
|
||||
txKeys.forEach((key) => {
|
||||
// Skip legacy token size mappings (4k, 8k, 16k, 32k)
|
||||
if (/^\d+k$/.test(key)) return;
|
||||
|
||||
// Check if this model has a context window defined
|
||||
const context = maxTokensMap[EModelEndpoint.openAI][key];
|
||||
|
||||
if (!context) {
|
||||
const pricing = tokenValues[key];
|
||||
missingContext.push({
|
||||
key,
|
||||
pricing: `${pricing.prompt}/${pricing.completion}`,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
if (missingContext.length > 0) {
|
||||
console.log('\nModels with pricing but missing context in tokens.ts:');
|
||||
missingContext.forEach(({ key, pricing }) => {
|
||||
console.log(` - '${key}' (pricing: ${pricing})`);
|
||||
console.log(` Add to tokens.ts openAIModels/bedrockModels/etc.`);
|
||||
});
|
||||
}
|
||||
|
||||
expect(missingContext).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -143,7 +143,7 @@ const initializeAgent = async ({
|
|||
const agentMaxContextTokens = optionalChainWithEmptyCheck(
|
||||
maxContextTokens,
|
||||
getModelMaxTokens(tokensModel, providerEndpointMap[provider], options.endpointTokenConfig),
|
||||
4096,
|
||||
18000,
|
||||
);
|
||||
|
||||
if (
|
||||
|
|
|
|||
|
|
@ -186,6 +186,19 @@ describe('getModelMaxTokens', () => {
|
|||
);
|
||||
});
|
||||
|
||||
test('should return correct tokens for gpt-5-pro matches', () => {
|
||||
expect(getModelMaxTokens('gpt-5-pro')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro']);
|
||||
expect(getModelMaxTokens('gpt-5-pro-preview')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
|
||||
);
|
||||
expect(getModelMaxTokens('openai/gpt-5-pro')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
|
||||
);
|
||||
expect(getModelMaxTokens('gpt-5-pro-2025-01-30')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should return correct tokens for Anthropic models', () => {
|
||||
const models = [
|
||||
'claude-2.1',
|
||||
|
|
@ -469,7 +482,7 @@ describe('getModelMaxTokens', () => {
|
|||
|
||||
test('should return correct max output tokens for GPT-5 models', () => {
|
||||
const { getModelMaxOutputTokens } = require('@librechat/api');
|
||||
['gpt-5', 'gpt-5-mini', 'gpt-5-nano'].forEach((model) => {
|
||||
['gpt-5', 'gpt-5-mini', 'gpt-5-nano', 'gpt-5-pro'].forEach((model) => {
|
||||
expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]);
|
||||
expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe(
|
||||
maxOutputTokensMap[EModelEndpoint.openAI][model],
|
||||
|
|
@ -582,6 +595,13 @@ describe('matchModelName', () => {
|
|||
expect(matchModelName('gpt-5-nano-2025-01-30')).toBe('gpt-5-nano');
|
||||
});
|
||||
|
||||
it('should return the closest matching key for gpt-5-pro matches', () => {
|
||||
expect(matchModelName('openai/gpt-5-pro')).toBe('gpt-5-pro');
|
||||
expect(matchModelName('gpt-5-pro-preview')).toBe('gpt-5-pro');
|
||||
expect(matchModelName('gpt-5-pro-2025-01-30')).toBe('gpt-5-pro');
|
||||
expect(matchModelName('gpt-5-pro-2025-01-30-0130')).toBe('gpt-5-pro');
|
||||
});
|
||||
|
||||
// Tests for Google models
|
||||
it('should return the exact model name if it exists in maxTokensMap - Google models', () => {
|
||||
expect(matchModelName('text-bison-32k', EModelEndpoint.google)).toBe('text-bison-32k');
|
||||
|
|
@ -832,6 +852,49 @@ describe('Claude Model Tests', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('should return correct context length for Claude Haiku 4.5', () => {
|
||||
expect(getModelMaxTokens('claude-haiku-4-5', EModelEndpoint.anthropic)).toBe(
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
|
||||
);
|
||||
expect(getModelMaxTokens('claude-haiku-4-5')).toBe(
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle Claude Haiku 4.5 model name variations', () => {
|
||||
const modelVariations = [
|
||||
'claude-haiku-4-5',
|
||||
'claude-haiku-4-5-20250420',
|
||||
'claude-haiku-4-5-latest',
|
||||
'anthropic/claude-haiku-4-5',
|
||||
'claude-haiku-4-5/anthropic',
|
||||
'claude-haiku-4-5-preview',
|
||||
];
|
||||
|
||||
modelVariations.forEach((model) => {
|
||||
const modelKey = findMatchingPattern(model, maxTokensMap[EModelEndpoint.anthropic]);
|
||||
expect(modelKey).toBe('claude-haiku-4-5');
|
||||
expect(getModelMaxTokens(model, EModelEndpoint.anthropic)).toBe(
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it('should match model names correctly for Claude Haiku 4.5', () => {
|
||||
const modelVariations = [
|
||||
'claude-haiku-4-5',
|
||||
'claude-haiku-4-5-20250420',
|
||||
'claude-haiku-4-5-latest',
|
||||
'anthropic/claude-haiku-4-5',
|
||||
'claude-haiku-4-5/anthropic',
|
||||
'claude-haiku-4-5-preview',
|
||||
];
|
||||
|
||||
modelVariations.forEach((model) => {
|
||||
expect(matchModelName(model, EModelEndpoint.anthropic)).toBe('claude-haiku-4-5');
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Claude 4 model name variations with different prefixes and suffixes', () => {
|
||||
const modelVariations = [
|
||||
'claude-sonnet-4',
|
||||
|
|
@ -924,6 +987,121 @@ describe('Kimi Model Tests', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('Qwen3 Model Tests', () => {
|
||||
describe('getModelMaxTokens', () => {
|
||||
test('should return correct tokens for Qwen3 base pattern', () => {
|
||||
expect(getModelMaxTokens('qwen3')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
|
||||
});
|
||||
|
||||
test('should return correct tokens for qwen3-4b (falls back to qwen3)', () => {
|
||||
expect(getModelMaxTokens('qwen3-4b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
|
||||
});
|
||||
|
||||
test('should return correct tokens for Qwen3 base models', () => {
|
||||
expect(getModelMaxTokens('qwen3-8b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-8b']);
|
||||
expect(getModelMaxTokens('qwen3-14b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-14b']);
|
||||
expect(getModelMaxTokens('qwen3-32b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-32b']);
|
||||
expect(getModelMaxTokens('qwen3-235b-a22b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-235b-a22b'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should return correct tokens for Qwen3 VL (Vision-Language) models', () => {
|
||||
expect(getModelMaxTokens('qwen3-vl-8b-thinking')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-thinking'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-vl-8b-instruct')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-instruct'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-vl-30b-a3b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-30b-a3b'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-vl-235b-a22b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-235b-a22b'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should return correct tokens for Qwen3 specialized models', () => {
|
||||
expect(getModelMaxTokens('qwen3-max')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-max']);
|
||||
expect(getModelMaxTokens('qwen3-coder')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-coder-30b-a3b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-30b-a3b'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-coder-plus')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-plus'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-coder-flash')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-flash'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-next-80b-a3b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-next-80b-a3b'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should handle Qwen3 models with provider prefixes', () => {
|
||||
expect(getModelMaxTokens('alibaba/qwen3')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
|
||||
expect(getModelMaxTokens('alibaba/qwen3-4b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen/qwen3-8b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-8b'],
|
||||
);
|
||||
expect(getModelMaxTokens('openrouter/qwen3-max')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-max'],
|
||||
);
|
||||
expect(getModelMaxTokens('alibaba/qwen3-vl-8b-instruct')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-instruct'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen/qwen3-coder')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should handle Qwen3 models with suffixes', () => {
|
||||
expect(getModelMaxTokens('qwen3-preview')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
|
||||
expect(getModelMaxTokens('qwen3-4b-preview')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-8b-latest')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-8b'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-max-2024')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-max'],
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('matchModelName', () => {
|
||||
test('should match exact Qwen3 model names', () => {
|
||||
expect(matchModelName('qwen3')).toBe('qwen3');
|
||||
expect(matchModelName('qwen3-4b')).toBe('qwen3');
|
||||
expect(matchModelName('qwen3-8b')).toBe('qwen3-8b');
|
||||
expect(matchModelName('qwen3-vl-8b-thinking')).toBe('qwen3-vl-8b-thinking');
|
||||
expect(matchModelName('qwen3-max')).toBe('qwen3-max');
|
||||
expect(matchModelName('qwen3-coder')).toBe('qwen3-coder');
|
||||
});
|
||||
|
||||
test('should match Qwen3 model variations with provider prefixes', () => {
|
||||
expect(matchModelName('alibaba/qwen3')).toBe('qwen3');
|
||||
expect(matchModelName('alibaba/qwen3-4b')).toBe('qwen3');
|
||||
expect(matchModelName('qwen/qwen3-8b')).toBe('qwen3-8b');
|
||||
expect(matchModelName('openrouter/qwen3-max')).toBe('qwen3-max');
|
||||
expect(matchModelName('alibaba/qwen3-vl-8b-instruct')).toBe('qwen3-vl-8b-instruct');
|
||||
expect(matchModelName('qwen/qwen3-coder')).toBe('qwen3-coder');
|
||||
});
|
||||
|
||||
test('should match Qwen3 model variations with suffixes', () => {
|
||||
expect(matchModelName('qwen3-preview')).toBe('qwen3');
|
||||
expect(matchModelName('qwen3-4b-preview')).toBe('qwen3');
|
||||
expect(matchModelName('qwen3-8b-latest')).toBe('qwen3-8b');
|
||||
expect(matchModelName('qwen3-max-2024')).toBe('qwen3-max');
|
||||
expect(matchModelName('qwen3-coder-v1')).toBe('qwen3-coder');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('GLM Model Tests (Zhipu AI)', () => {
|
||||
describe('getModelMaxTokens', () => {
|
||||
test('should return correct tokens for GLM models', () => {
|
||||
|
|
|
|||
|
|
@ -40,10 +40,10 @@ const openAIModels = {
|
|||
'gpt-5': 400000,
|
||||
'gpt-5-mini': 400000,
|
||||
'gpt-5-nano': 400000,
|
||||
'gpt-5-pro': 400000,
|
||||
'gpt-4o': 127500, // -500 from max
|
||||
'gpt-4o-mini': 127500, // -500 from max
|
||||
'gpt-4o-2024-05-13': 127500, // -500 from max
|
||||
'gpt-4o-2024-08-06': 127500, // -500 from max
|
||||
'gpt-4-turbo': 127500, // -500 from max
|
||||
'gpt-4-vision': 127500, // -500 from max
|
||||
'gpt-3.5-turbo': 16375, // -10 from max
|
||||
|
|
@ -60,9 +60,11 @@ const mistralModels = {
|
|||
'mistral-7b': 31990, // -10 from max
|
||||
'mistral-small': 31990, // -10 from max
|
||||
'mixtral-8x7b': 31990, // -10 from max
|
||||
'mixtral-8x22b': 65536,
|
||||
'mistral-large': 131000,
|
||||
'mistral-large-2402': 127500,
|
||||
'mistral-large-2407': 127500,
|
||||
'mistral-nemo': 131000,
|
||||
'pixtral-large': 131000,
|
||||
'mistral-saba': 32000,
|
||||
codestral: 256000,
|
||||
|
|
@ -75,6 +77,7 @@ const cohereModels = {
|
|||
'command-light-nightly': 8182, // -10 from max
|
||||
command: 4086, // -10 from max
|
||||
'command-nightly': 8182, // -10 from max
|
||||
'command-text': 4086, // -10 from max
|
||||
'command-r': 127500, // -500 from max
|
||||
'command-r-plus': 127500, // -500 from max
|
||||
};
|
||||
|
|
@ -127,14 +130,17 @@ const anthropicModels = {
|
|||
'claude-3.7-sonnet': 200000,
|
||||
'claude-3-5-sonnet-latest': 200000,
|
||||
'claude-3.5-sonnet-latest': 200000,
|
||||
'claude-haiku-4-5': 200000,
|
||||
'claude-sonnet-4': 1000000,
|
||||
'claude-opus-4': 200000,
|
||||
'claude-4': 200000,
|
||||
};
|
||||
|
||||
const deepseekModels = {
|
||||
'deepseek-reasoner': 128000,
|
||||
deepseek: 128000,
|
||||
'deepseek-reasoner': 128000,
|
||||
'deepseek-r1': 128000,
|
||||
'deepseek-v3': 128000,
|
||||
'deepseek.r1': 128000,
|
||||
};
|
||||
|
||||
|
|
@ -200,32 +206,57 @@ const metaModels = {
|
|||
'llama2:70b': 4000,
|
||||
};
|
||||
|
||||
const ollamaModels = {
|
||||
const qwenModels = {
|
||||
qwen: 32000,
|
||||
'qwen2.5': 32000,
|
||||
'qwen-turbo': 1000000,
|
||||
'qwen-plus': 131000,
|
||||
'qwen-max': 32000,
|
||||
'qwq-32b': 32000,
|
||||
// Qwen3 models
|
||||
qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
|
||||
'qwen3-8b': 128000,
|
||||
'qwen3-14b': 40960,
|
||||
'qwen3-30b-a3b': 40960,
|
||||
'qwen3-32b': 40960,
|
||||
'qwen3-235b-a22b': 40960,
|
||||
// Qwen3 VL (Vision-Language) models
|
||||
'qwen3-vl-8b-thinking': 256000,
|
||||
'qwen3-vl-8b-instruct': 262144,
|
||||
'qwen3-vl-30b-a3b': 262144,
|
||||
'qwen3-vl-235b-a22b': 131072,
|
||||
// Qwen3 specialized models
|
||||
'qwen3-max': 256000,
|
||||
'qwen3-coder': 262144,
|
||||
'qwen3-coder-30b-a3b': 262144,
|
||||
'qwen3-coder-plus': 128000,
|
||||
'qwen3-coder-flash': 128000,
|
||||
'qwen3-next-80b-a3b': 262144,
|
||||
};
|
||||
|
||||
const ai21Models = {
|
||||
'ai21.j2-mid-v1': 8182, // -10 from max
|
||||
'ai21.j2-ultra-v1': 8182, // -10 from max
|
||||
'ai21.jamba-instruct-v1:0': 255500, // -500 from max
|
||||
'j2-mid': 8182, // -10 from max
|
||||
'j2-ultra': 8182, // -10 from max
|
||||
'jamba-instruct': 255500, // -500 from max
|
||||
};
|
||||
|
||||
const amazonModels = {
|
||||
'amazon.titan-text-lite-v1': 4000,
|
||||
'amazon.titan-text-express-v1': 8000,
|
||||
'amazon.titan-text-premier-v1:0': 31500, // -500 from max
|
||||
// Amazon Titan models
|
||||
'titan-text-lite': 4000,
|
||||
'titan-text-express': 8000,
|
||||
'titan-text-premier': 31500, // -500 from max
|
||||
// Amazon Nova models
|
||||
// https://aws.amazon.com/ai/generative-ai/nova/
|
||||
'amazon.nova-micro-v1:0': 127000, // -1000 from max,
|
||||
'amazon.nova-lite-v1:0': 295000, // -5000 from max,
|
||||
'amazon.nova-pro-v1:0': 295000, // -5000 from max,
|
||||
'amazon.nova-premier-v1:0': 995000, // -5000 from max,
|
||||
'nova-micro': 127000, // -1000 from max
|
||||
'nova-lite': 295000, // -5000 from max
|
||||
'nova-pro': 295000, // -5000 from max
|
||||
'nova-premier': 995000, // -5000 from max
|
||||
};
|
||||
|
||||
const bedrockModels = {
|
||||
...anthropicModels,
|
||||
...mistralModels,
|
||||
...cohereModels,
|
||||
...ollamaModels,
|
||||
...deepseekModels,
|
||||
...metaModels,
|
||||
...ai21Models,
|
||||
|
|
@ -254,6 +285,7 @@ const aggregateModels = {
|
|||
...googleModels,
|
||||
...bedrockModels,
|
||||
...xAIModels,
|
||||
...qwenModels,
|
||||
// misc.
|
||||
kimi: 131000,
|
||||
// GPT-OSS
|
||||
|
|
@ -289,6 +321,7 @@ export const modelMaxOutputs = {
|
|||
'gpt-5': 128000,
|
||||
'gpt-5-mini': 128000,
|
||||
'gpt-5-nano': 128000,
|
||||
'gpt-5-pro': 128000,
|
||||
'gpt-oss-20b': 131000,
|
||||
'gpt-oss-120b': 131000,
|
||||
system_default: 32000,
|
||||
|
|
@ -299,6 +332,7 @@ const anthropicMaxOutputs = {
|
|||
'claude-3-haiku': 4096,
|
||||
'claude-3-sonnet': 4096,
|
||||
'claude-3-opus': 4096,
|
||||
'claude-haiku-4-5': 64000,
|
||||
'claude-opus-4': 32000,
|
||||
'claude-sonnet-4': 64000,
|
||||
'claude-3.5-sonnet': 8192,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue