🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173)
Some checks failed
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Has been cancelled
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Has been cancelled

* updated gpt5-pro

it is here and on openrouter
https://platform.openai.com/docs/models/gpt-5-pro

* feat: Add gpt-5-pro pricing
- Implemented handling for the new gpt-5-pro model in the getValueKey function.
- Updated tests to ensure correct behavior for gpt-5-pro across various scenarios.
- Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files.
- Enhanced model matching functionality to include gpt-5-pro variations.

* refactor: optimize model pricing and validation logic

- Added new model pricing entries for llama2, llama3, and qwen variants in tx.js.
- Updated tokenValues to include additional models and their pricing structures.
- Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing.
- Refactored getValueKey function to improve model matching and resolution efficiency.
- Removed outdated model entries from tokens.ts to streamline pricing management.

* fix: add missing pricing

* chore: update model pricing for qwen and gemma variants

* chore: update model pricing and add validation for context windows

- Removed outdated model entries from tx.js and updated tokenValues with new models.
- Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts.
- Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions.

* chore: update model names and pricing for AI21 and Amazon models

- Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency.
- Updated pricing values in tokens.ts to reflect the new model names.
- Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models.

* feat: add pricing and validation for Claude Haiku 4.5 model

* chore: increase default max context tokens to 18000 for agents

* feat: add Qwen3 model pricing and validation tests

* chore: reorganize and update Qwen model pricing in tx.js and tokens.ts

---------

Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
This commit is contained in:
Danny Avila 2025-10-19 09:23:27 -04:00 committed by GitHub
parent 589f119310
commit 36f0365fd4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 964 additions and 132 deletions

View file

@ -40,10 +40,10 @@ const openAIModels = {
'gpt-5': 400000,
'gpt-5-mini': 400000,
'gpt-5-nano': 400000,
'gpt-5-pro': 400000,
'gpt-4o': 127500, // -500 from max
'gpt-4o-mini': 127500, // -500 from max
'gpt-4o-2024-05-13': 127500, // -500 from max
'gpt-4o-2024-08-06': 127500, // -500 from max
'gpt-4-turbo': 127500, // -500 from max
'gpt-4-vision': 127500, // -500 from max
'gpt-3.5-turbo': 16375, // -10 from max
@ -60,9 +60,11 @@ const mistralModels = {
'mistral-7b': 31990, // -10 from max
'mistral-small': 31990, // -10 from max
'mixtral-8x7b': 31990, // -10 from max
'mixtral-8x22b': 65536,
'mistral-large': 131000,
'mistral-large-2402': 127500,
'mistral-large-2407': 127500,
'mistral-nemo': 131000,
'pixtral-large': 131000,
'mistral-saba': 32000,
codestral: 256000,
@ -75,6 +77,7 @@ const cohereModels = {
'command-light-nightly': 8182, // -10 from max
command: 4086, // -10 from max
'command-nightly': 8182, // -10 from max
'command-text': 4086, // -10 from max
'command-r': 127500, // -500 from max
'command-r-plus': 127500, // -500 from max
};
@ -127,14 +130,17 @@ const anthropicModels = {
'claude-3.7-sonnet': 200000,
'claude-3-5-sonnet-latest': 200000,
'claude-3.5-sonnet-latest': 200000,
'claude-haiku-4-5': 200000,
'claude-sonnet-4': 1000000,
'claude-opus-4': 200000,
'claude-4': 200000,
};
const deepseekModels = {
'deepseek-reasoner': 128000,
deepseek: 128000,
'deepseek-reasoner': 128000,
'deepseek-r1': 128000,
'deepseek-v3': 128000,
'deepseek.r1': 128000,
};
@ -200,32 +206,57 @@ const metaModels = {
'llama2:70b': 4000,
};
const ollamaModels = {
const qwenModels = {
qwen: 32000,
'qwen2.5': 32000,
'qwen-turbo': 1000000,
'qwen-plus': 131000,
'qwen-max': 32000,
'qwq-32b': 32000,
// Qwen3 models
qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
'qwen3-8b': 128000,
'qwen3-14b': 40960,
'qwen3-30b-a3b': 40960,
'qwen3-32b': 40960,
'qwen3-235b-a22b': 40960,
// Qwen3 VL (Vision-Language) models
'qwen3-vl-8b-thinking': 256000,
'qwen3-vl-8b-instruct': 262144,
'qwen3-vl-30b-a3b': 262144,
'qwen3-vl-235b-a22b': 131072,
// Qwen3 specialized models
'qwen3-max': 256000,
'qwen3-coder': 262144,
'qwen3-coder-30b-a3b': 262144,
'qwen3-coder-plus': 128000,
'qwen3-coder-flash': 128000,
'qwen3-next-80b-a3b': 262144,
};
const ai21Models = {
'ai21.j2-mid-v1': 8182, // -10 from max
'ai21.j2-ultra-v1': 8182, // -10 from max
'ai21.jamba-instruct-v1:0': 255500, // -500 from max
'j2-mid': 8182, // -10 from max
'j2-ultra': 8182, // -10 from max
'jamba-instruct': 255500, // -500 from max
};
const amazonModels = {
'amazon.titan-text-lite-v1': 4000,
'amazon.titan-text-express-v1': 8000,
'amazon.titan-text-premier-v1:0': 31500, // -500 from max
// Amazon Titan models
'titan-text-lite': 4000,
'titan-text-express': 8000,
'titan-text-premier': 31500, // -500 from max
// Amazon Nova models
// https://aws.amazon.com/ai/generative-ai/nova/
'amazon.nova-micro-v1:0': 127000, // -1000 from max,
'amazon.nova-lite-v1:0': 295000, // -5000 from max,
'amazon.nova-pro-v1:0': 295000, // -5000 from max,
'amazon.nova-premier-v1:0': 995000, // -5000 from max,
'nova-micro': 127000, // -1000 from max
'nova-lite': 295000, // -5000 from max
'nova-pro': 295000, // -5000 from max
'nova-premier': 995000, // -5000 from max
};
const bedrockModels = {
...anthropicModels,
...mistralModels,
...cohereModels,
...ollamaModels,
...deepseekModels,
...metaModels,
...ai21Models,
@ -254,6 +285,7 @@ const aggregateModels = {
...googleModels,
...bedrockModels,
...xAIModels,
...qwenModels,
// misc.
kimi: 131000,
// GPT-OSS
@ -289,6 +321,7 @@ export const modelMaxOutputs = {
'gpt-5': 128000,
'gpt-5-mini': 128000,
'gpt-5-nano': 128000,
'gpt-5-pro': 128000,
'gpt-oss-20b': 131000,
'gpt-oss-120b': 131000,
system_default: 32000,
@ -299,6 +332,7 @@ const anthropicMaxOutputs = {
'claude-3-haiku': 4096,
'claude-3-sonnet': 4096,
'claude-3-opus': 4096,
'claude-haiku-4-5': 64000,
'claude-opus-4': 32000,
'claude-sonnet-4': 64000,
'claude-3.5-sonnet': 8192,