mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-22 11:20:15 +01:00
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173)
Some checks failed
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Has been cancelled
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Has been cancelled
Some checks failed
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Has been cancelled
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Has been cancelled
* updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
This commit is contained in:
parent
589f119310
commit
36f0365fd4
5 changed files with 964 additions and 132 deletions
|
|
@ -40,10 +40,10 @@ const openAIModels = {
|
|||
'gpt-5': 400000,
|
||||
'gpt-5-mini': 400000,
|
||||
'gpt-5-nano': 400000,
|
||||
'gpt-5-pro': 400000,
|
||||
'gpt-4o': 127500, // -500 from max
|
||||
'gpt-4o-mini': 127500, // -500 from max
|
||||
'gpt-4o-2024-05-13': 127500, // -500 from max
|
||||
'gpt-4o-2024-08-06': 127500, // -500 from max
|
||||
'gpt-4-turbo': 127500, // -500 from max
|
||||
'gpt-4-vision': 127500, // -500 from max
|
||||
'gpt-3.5-turbo': 16375, // -10 from max
|
||||
|
|
@ -60,9 +60,11 @@ const mistralModels = {
|
|||
'mistral-7b': 31990, // -10 from max
|
||||
'mistral-small': 31990, // -10 from max
|
||||
'mixtral-8x7b': 31990, // -10 from max
|
||||
'mixtral-8x22b': 65536,
|
||||
'mistral-large': 131000,
|
||||
'mistral-large-2402': 127500,
|
||||
'mistral-large-2407': 127500,
|
||||
'mistral-nemo': 131000,
|
||||
'pixtral-large': 131000,
|
||||
'mistral-saba': 32000,
|
||||
codestral: 256000,
|
||||
|
|
@ -75,6 +77,7 @@ const cohereModels = {
|
|||
'command-light-nightly': 8182, // -10 from max
|
||||
command: 4086, // -10 from max
|
||||
'command-nightly': 8182, // -10 from max
|
||||
'command-text': 4086, // -10 from max
|
||||
'command-r': 127500, // -500 from max
|
||||
'command-r-plus': 127500, // -500 from max
|
||||
};
|
||||
|
|
@ -127,14 +130,17 @@ const anthropicModels = {
|
|||
'claude-3.7-sonnet': 200000,
|
||||
'claude-3-5-sonnet-latest': 200000,
|
||||
'claude-3.5-sonnet-latest': 200000,
|
||||
'claude-haiku-4-5': 200000,
|
||||
'claude-sonnet-4': 1000000,
|
||||
'claude-opus-4': 200000,
|
||||
'claude-4': 200000,
|
||||
};
|
||||
|
||||
const deepseekModels = {
|
||||
'deepseek-reasoner': 128000,
|
||||
deepseek: 128000,
|
||||
'deepseek-reasoner': 128000,
|
||||
'deepseek-r1': 128000,
|
||||
'deepseek-v3': 128000,
|
||||
'deepseek.r1': 128000,
|
||||
};
|
||||
|
||||
|
|
@ -200,32 +206,57 @@ const metaModels = {
|
|||
'llama2:70b': 4000,
|
||||
};
|
||||
|
||||
const ollamaModels = {
|
||||
const qwenModels = {
|
||||
qwen: 32000,
|
||||
'qwen2.5': 32000,
|
||||
'qwen-turbo': 1000000,
|
||||
'qwen-plus': 131000,
|
||||
'qwen-max': 32000,
|
||||
'qwq-32b': 32000,
|
||||
// Qwen3 models
|
||||
qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context)
|
||||
'qwen3-8b': 128000,
|
||||
'qwen3-14b': 40960,
|
||||
'qwen3-30b-a3b': 40960,
|
||||
'qwen3-32b': 40960,
|
||||
'qwen3-235b-a22b': 40960,
|
||||
// Qwen3 VL (Vision-Language) models
|
||||
'qwen3-vl-8b-thinking': 256000,
|
||||
'qwen3-vl-8b-instruct': 262144,
|
||||
'qwen3-vl-30b-a3b': 262144,
|
||||
'qwen3-vl-235b-a22b': 131072,
|
||||
// Qwen3 specialized models
|
||||
'qwen3-max': 256000,
|
||||
'qwen3-coder': 262144,
|
||||
'qwen3-coder-30b-a3b': 262144,
|
||||
'qwen3-coder-plus': 128000,
|
||||
'qwen3-coder-flash': 128000,
|
||||
'qwen3-next-80b-a3b': 262144,
|
||||
};
|
||||
|
||||
const ai21Models = {
|
||||
'ai21.j2-mid-v1': 8182, // -10 from max
|
||||
'ai21.j2-ultra-v1': 8182, // -10 from max
|
||||
'ai21.jamba-instruct-v1:0': 255500, // -500 from max
|
||||
'j2-mid': 8182, // -10 from max
|
||||
'j2-ultra': 8182, // -10 from max
|
||||
'jamba-instruct': 255500, // -500 from max
|
||||
};
|
||||
|
||||
const amazonModels = {
|
||||
'amazon.titan-text-lite-v1': 4000,
|
||||
'amazon.titan-text-express-v1': 8000,
|
||||
'amazon.titan-text-premier-v1:0': 31500, // -500 from max
|
||||
// Amazon Titan models
|
||||
'titan-text-lite': 4000,
|
||||
'titan-text-express': 8000,
|
||||
'titan-text-premier': 31500, // -500 from max
|
||||
// Amazon Nova models
|
||||
// https://aws.amazon.com/ai/generative-ai/nova/
|
||||
'amazon.nova-micro-v1:0': 127000, // -1000 from max,
|
||||
'amazon.nova-lite-v1:0': 295000, // -5000 from max,
|
||||
'amazon.nova-pro-v1:0': 295000, // -5000 from max,
|
||||
'amazon.nova-premier-v1:0': 995000, // -5000 from max,
|
||||
'nova-micro': 127000, // -1000 from max
|
||||
'nova-lite': 295000, // -5000 from max
|
||||
'nova-pro': 295000, // -5000 from max
|
||||
'nova-premier': 995000, // -5000 from max
|
||||
};
|
||||
|
||||
const bedrockModels = {
|
||||
...anthropicModels,
|
||||
...mistralModels,
|
||||
...cohereModels,
|
||||
...ollamaModels,
|
||||
...deepseekModels,
|
||||
...metaModels,
|
||||
...ai21Models,
|
||||
|
|
@ -254,6 +285,7 @@ const aggregateModels = {
|
|||
...googleModels,
|
||||
...bedrockModels,
|
||||
...xAIModels,
|
||||
...qwenModels,
|
||||
// misc.
|
||||
kimi: 131000,
|
||||
// GPT-OSS
|
||||
|
|
@ -289,6 +321,7 @@ export const modelMaxOutputs = {
|
|||
'gpt-5': 128000,
|
||||
'gpt-5-mini': 128000,
|
||||
'gpt-5-nano': 128000,
|
||||
'gpt-5-pro': 128000,
|
||||
'gpt-oss-20b': 131000,
|
||||
'gpt-oss-120b': 131000,
|
||||
system_default: 32000,
|
||||
|
|
@ -299,6 +332,7 @@ const anthropicMaxOutputs = {
|
|||
'claude-3-haiku': 4096,
|
||||
'claude-3-sonnet': 4096,
|
||||
'claude-3-opus': 4096,
|
||||
'claude-haiku-4-5': 64000,
|
||||
'claude-opus-4': 32000,
|
||||
'claude-sonnet-4': 64000,
|
||||
'claude-3.5-sonnet': 8192,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue