🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173)

* updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2026-04-05 07:17:18 +02:00 · 2025-10-19 09:23:27 -04:00 · 2025-10-19 09:23:27 -04:00 · 36f0365fd4
commit 36f0365fd4
parent 589f119310
5 changed files with 964 additions and 132 deletions
--- a/packages/api/src/utils/tokens.ts
+++ b/packages/api/src/utils/tokens.ts
@ -40,10 +40,10 @@ const openAIModels = {
  'gpt-5': 400000,
  'gpt-5-mini': 400000,
  'gpt-5-nano': 400000,
+  'gpt-5-pro': 400000,
  'gpt-4o': 127500, // -500 from max
  'gpt-4o-mini': 127500, // -500 from max
  'gpt-4o-2024-05-13': 127500, // -500 from max
-  'gpt-4o-2024-08-06': 127500, // -500 from max
  'gpt-4-turbo': 127500, // -500 from max
  'gpt-4-vision': 127500, // -500 from max
  'gpt-3.5-turbo': 16375, // -10 from max
@ -60,9 +60,11 @@ const mistralModels = {
  'mistral-7b': 31990, // -10 from max
  'mistral-small': 31990, // -10 from max
  'mixtral-8x7b': 31990, // -10 from max
+  'mixtral-8x22b': 65536,
  'mistral-large': 131000,
  'mistral-large-2402': 127500,
  'mistral-large-2407': 127500,
+  'mistral-nemo': 131000,
  'pixtral-large': 131000,
  'mistral-saba': 32000,
  codestral: 256000,
@ -75,6 +77,7 @@ const cohereModels = {
  'command-light-nightly': 8182, // -10 from max
  command: 4086, // -10 from max
  'command-nightly': 8182, // -10 from max
+  'command-text': 4086, // -10 from max
  'command-r': 127500, // -500 from max
  'command-r-plus': 127500, // -500 from max
 };
@ -127,14 +130,17 @@ const anthropicModels = {
  'claude-3.7-sonnet': 200000,
  'claude-3-5-sonnet-latest': 200000,
  'claude-3.5-sonnet-latest': 200000,
+  'claude-haiku-4-5': 200000,
  'claude-sonnet-4': 1000000,
  'claude-opus-4': 200000,
  'claude-4': 200000,
 };

 const deepseekModels = {
-  'deepseek-reasoner': 128000,
  deepseek: 128000,
+  'deepseek-reasoner': 128000,
+  'deepseek-r1': 128000,
+  'deepseek-v3': 128000,
  'deepseek.r1': 128000,
 };

@ -200,32 +206,57 @@ const metaModels = {
  'llama2:70b': 4000,
 };

-const ollamaModels = {
+const qwenModels = {
+  qwen: 32000,
  'qwen2.5': 32000,
+  'qwen-turbo': 1000000,
+  'qwen-plus': 131000,
+  'qwen-max': 32000,
+  'qwq-32b': 32000,
+  // Qwen3 models
+  qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
+  'qwen3-8b': 128000,
+  'qwen3-14b': 40960,
+  'qwen3-30b-a3b': 40960,
+  'qwen3-32b': 40960,
+  'qwen3-235b-a22b': 40960,
+  // Qwen3 VL (Vision-Language) models
+  'qwen3-vl-8b-thinking': 256000,
+  'qwen3-vl-8b-instruct': 262144,
+  'qwen3-vl-30b-a3b': 262144,
+  'qwen3-vl-235b-a22b': 131072,
+  // Qwen3 specialized models
+  'qwen3-max': 256000,
+  'qwen3-coder': 262144,
+  'qwen3-coder-30b-a3b': 262144,
+  'qwen3-coder-plus': 128000,
+  'qwen3-coder-flash': 128000,
+  'qwen3-next-80b-a3b': 262144,
 };

 const ai21Models = {
-  'ai21.j2-mid-v1': 8182, // -10 from max
-  'ai21.j2-ultra-v1': 8182, // -10 from max
-  'ai21.jamba-instruct-v1:0': 255500, // -500 from max
+  'j2-mid': 8182, // -10 from max
+  'j2-ultra': 8182, // -10 from max
+  'jamba-instruct': 255500, // -500 from max
 };

 const amazonModels = {
-  'amazon.titan-text-lite-v1': 4000,
-  'amazon.titan-text-express-v1': 8000,
-  'amazon.titan-text-premier-v1:0': 31500, // -500 from max
+  // Amazon Titan models
+  'titan-text-lite': 4000,
+  'titan-text-express': 8000,
+  'titan-text-premier': 31500, // -500 from max
+  // Amazon Nova models
  // https://aws.amazon.com/ai/generative-ai/nova/
-  'amazon.nova-micro-v1:0': 127000, // -1000 from max,
-  'amazon.nova-lite-v1:0': 295000, // -5000 from max,
-  'amazon.nova-pro-v1:0': 295000, // -5000 from max,
-  'amazon.nova-premier-v1:0': 995000, // -5000 from max,
+  'nova-micro': 127000, // -1000 from max
+  'nova-lite': 295000, // -5000 from max
+  'nova-pro': 295000, // -5000 from max
+  'nova-premier': 995000, // -5000 from max
 };

 const bedrockModels = {
  ...anthropicModels,
  ...mistralModels,
  ...cohereModels,
-  ...ollamaModels,
  ...deepseekModels,
  ...metaModels,
  ...ai21Models,
@ -254,6 +285,7 @@ const aggregateModels = {
  ...googleModels,
  ...bedrockModels,
  ...xAIModels,
+  ...qwenModels,
  // misc.
  kimi: 131000,
  // GPT-OSS
@ -289,6 +321,7 @@ export const modelMaxOutputs = {
  'gpt-5': 128000,
  'gpt-5-mini': 128000,
  'gpt-5-nano': 128000,
+  'gpt-5-pro': 128000,
  'gpt-oss-20b': 131000,
  'gpt-oss-120b': 131000,
  system_default: 32000,
@ -299,6 +332,7 @@ const anthropicMaxOutputs = {
  'claude-3-haiku': 4096,
  'claude-3-sonnet': 4096,
  'claude-3-opus': 4096,
+  'claude-haiku-4-5': 64000,
  'claude-opus-4': 32000,
  'claude-sonnet-4': 64000,
  'claude-3.5-sonnet': 8192,