🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173)
Some checks failed
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Has been cancelled
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Has been cancelled

* updated gpt5-pro

it is here and on openrouter
https://platform.openai.com/docs/models/gpt-5-pro

* feat: Add gpt-5-pro pricing
- Implemented handling for the new gpt-5-pro model in the getValueKey function.
- Updated tests to ensure correct behavior for gpt-5-pro across various scenarios.
- Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files.
- Enhanced model matching functionality to include gpt-5-pro variations.

* refactor: optimize model pricing and validation logic

- Added new model pricing entries for llama2, llama3, and qwen variants in tx.js.
- Updated tokenValues to include additional models and their pricing structures.
- Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing.
- Refactored getValueKey function to improve model matching and resolution efficiency.
- Removed outdated model entries from tokens.ts to streamline pricing management.

* fix: add missing pricing

* chore: update model pricing for qwen and gemma variants

* chore: update model pricing and add validation for context windows

- Removed outdated model entries from tx.js and updated tokenValues with new models.
- Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts.
- Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions.

* chore: update model names and pricing for AI21 and Amazon models

- Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency.
- Updated pricing values in tokens.ts to reflect the new model names.
- Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models.

* feat: add pricing and validation for Claude Haiku 4.5 model

* chore: increase default max context tokens to 18000 for agents

* feat: add Qwen3 model pricing and validation tests

* chore: reorganize and update Qwen model pricing in tx.js and tokens.ts

---------

Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
This commit is contained in:
Danny Avila 2025-10-19 09:23:27 -04:00 committed by GitHub
parent 589f119310
commit 36f0365fd4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 964 additions and 132 deletions

View file

@ -186,6 +186,19 @@ describe('getModelMaxTokens', () => {
);
});
test('should return correct tokens for gpt-5-pro matches', () => {
expect(getModelMaxTokens('gpt-5-pro')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro']);
expect(getModelMaxTokens('gpt-5-pro-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
);
expect(getModelMaxTokens('openai/gpt-5-pro')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
);
expect(getModelMaxTokens('gpt-5-pro-2025-01-30')).toBe(
maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
);
});
test('should return correct tokens for Anthropic models', () => {
const models = [
'claude-2.1',
@ -469,7 +482,7 @@ describe('getModelMaxTokens', () => {
test('should return correct max output tokens for GPT-5 models', () => {
const { getModelMaxOutputTokens } = require('@librechat/api');
['gpt-5', 'gpt-5-mini', 'gpt-5-nano'].forEach((model) => {
['gpt-5', 'gpt-5-mini', 'gpt-5-nano', 'gpt-5-pro'].forEach((model) => {
expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]);
expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe(
maxOutputTokensMap[EModelEndpoint.openAI][model],
@ -582,6 +595,13 @@ describe('matchModelName', () => {
expect(matchModelName('gpt-5-nano-2025-01-30')).toBe('gpt-5-nano');
});
it('should return the closest matching key for gpt-5-pro matches', () => {
expect(matchModelName('openai/gpt-5-pro')).toBe('gpt-5-pro');
expect(matchModelName('gpt-5-pro-preview')).toBe('gpt-5-pro');
expect(matchModelName('gpt-5-pro-2025-01-30')).toBe('gpt-5-pro');
expect(matchModelName('gpt-5-pro-2025-01-30-0130')).toBe('gpt-5-pro');
});
// Tests for Google models
it('should return the exact model name if it exists in maxTokensMap - Google models', () => {
expect(matchModelName('text-bison-32k', EModelEndpoint.google)).toBe('text-bison-32k');
@ -832,6 +852,49 @@ describe('Claude Model Tests', () => {
);
});
it('should return correct context length for Claude Haiku 4.5', () => {
expect(getModelMaxTokens('claude-haiku-4-5', EModelEndpoint.anthropic)).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
);
expect(getModelMaxTokens('claude-haiku-4-5')).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
);
});
it('should handle Claude Haiku 4.5 model name variations', () => {
const modelVariations = [
'claude-haiku-4-5',
'claude-haiku-4-5-20250420',
'claude-haiku-4-5-latest',
'anthropic/claude-haiku-4-5',
'claude-haiku-4-5/anthropic',
'claude-haiku-4-5-preview',
];
modelVariations.forEach((model) => {
const modelKey = findMatchingPattern(model, maxTokensMap[EModelEndpoint.anthropic]);
expect(modelKey).toBe('claude-haiku-4-5');
expect(getModelMaxTokens(model, EModelEndpoint.anthropic)).toBe(
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
);
});
});
it('should match model names correctly for Claude Haiku 4.5', () => {
const modelVariations = [
'claude-haiku-4-5',
'claude-haiku-4-5-20250420',
'claude-haiku-4-5-latest',
'anthropic/claude-haiku-4-5',
'claude-haiku-4-5/anthropic',
'claude-haiku-4-5-preview',
];
modelVariations.forEach((model) => {
expect(matchModelName(model, EModelEndpoint.anthropic)).toBe('claude-haiku-4-5');
});
});
it('should handle Claude 4 model name variations with different prefixes and suffixes', () => {
const modelVariations = [
'claude-sonnet-4',
@ -924,6 +987,121 @@ describe('Kimi Model Tests', () => {
});
});
describe('Qwen3 Model Tests', () => {
describe('getModelMaxTokens', () => {
test('should return correct tokens for Qwen3 base pattern', () => {
expect(getModelMaxTokens('qwen3')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
});
test('should return correct tokens for qwen3-4b (falls back to qwen3)', () => {
expect(getModelMaxTokens('qwen3-4b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
});
test('should return correct tokens for Qwen3 base models', () => {
expect(getModelMaxTokens('qwen3-8b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-8b']);
expect(getModelMaxTokens('qwen3-14b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-14b']);
expect(getModelMaxTokens('qwen3-32b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-32b']);
expect(getModelMaxTokens('qwen3-235b-a22b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-235b-a22b'],
);
});
test('should return correct tokens for Qwen3 VL (Vision-Language) models', () => {
expect(getModelMaxTokens('qwen3-vl-8b-thinking')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-thinking'],
);
expect(getModelMaxTokens('qwen3-vl-8b-instruct')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-instruct'],
);
expect(getModelMaxTokens('qwen3-vl-30b-a3b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-30b-a3b'],
);
expect(getModelMaxTokens('qwen3-vl-235b-a22b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-235b-a22b'],
);
});
test('should return correct tokens for Qwen3 specialized models', () => {
expect(getModelMaxTokens('qwen3-max')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-max']);
expect(getModelMaxTokens('qwen3-coder')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder'],
);
expect(getModelMaxTokens('qwen3-coder-30b-a3b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-30b-a3b'],
);
expect(getModelMaxTokens('qwen3-coder-plus')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-plus'],
);
expect(getModelMaxTokens('qwen3-coder-flash')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-flash'],
);
expect(getModelMaxTokens('qwen3-next-80b-a3b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-next-80b-a3b'],
);
});
test('should handle Qwen3 models with provider prefixes', () => {
expect(getModelMaxTokens('alibaba/qwen3')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
expect(getModelMaxTokens('alibaba/qwen3-4b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3'],
);
expect(getModelMaxTokens('qwen/qwen3-8b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-8b'],
);
expect(getModelMaxTokens('openrouter/qwen3-max')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-max'],
);
expect(getModelMaxTokens('alibaba/qwen3-vl-8b-instruct')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-instruct'],
);
expect(getModelMaxTokens('qwen/qwen3-coder')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder'],
);
});
test('should handle Qwen3 models with suffixes', () => {
expect(getModelMaxTokens('qwen3-preview')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
expect(getModelMaxTokens('qwen3-4b-preview')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3'],
);
expect(getModelMaxTokens('qwen3-8b-latest')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-8b'],
);
expect(getModelMaxTokens('qwen3-max-2024')).toBe(
maxTokensMap[EModelEndpoint.openAI]['qwen3-max'],
);
});
});
describe('matchModelName', () => {
test('should match exact Qwen3 model names', () => {
expect(matchModelName('qwen3')).toBe('qwen3');
expect(matchModelName('qwen3-4b')).toBe('qwen3');
expect(matchModelName('qwen3-8b')).toBe('qwen3-8b');
expect(matchModelName('qwen3-vl-8b-thinking')).toBe('qwen3-vl-8b-thinking');
expect(matchModelName('qwen3-max')).toBe('qwen3-max');
expect(matchModelName('qwen3-coder')).toBe('qwen3-coder');
});
test('should match Qwen3 model variations with provider prefixes', () => {
expect(matchModelName('alibaba/qwen3')).toBe('qwen3');
expect(matchModelName('alibaba/qwen3-4b')).toBe('qwen3');
expect(matchModelName('qwen/qwen3-8b')).toBe('qwen3-8b');
expect(matchModelName('openrouter/qwen3-max')).toBe('qwen3-max');
expect(matchModelName('alibaba/qwen3-vl-8b-instruct')).toBe('qwen3-vl-8b-instruct');
expect(matchModelName('qwen/qwen3-coder')).toBe('qwen3-coder');
});
test('should match Qwen3 model variations with suffixes', () => {
expect(matchModelName('qwen3-preview')).toBe('qwen3');
expect(matchModelName('qwen3-4b-preview')).toBe('qwen3');
expect(matchModelName('qwen3-8b-latest')).toBe('qwen3-8b');
expect(matchModelName('qwen3-max-2024')).toBe('qwen3-max');
expect(matchModelName('qwen3-coder-v1')).toBe('qwen3-coder');
});
});
});
describe('GLM Model Tests (Zhipu AI)', () => {
describe('getModelMaxTokens', () => {
test('should return correct tokens for GLM models', () => {