mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-01 15:21:50 +01:00
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173)
Some checks failed
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Has been cancelled
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Has been cancelled
Some checks failed
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Has been cancelled
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Has been cancelled
* updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
This commit is contained in:
parent
589f119310
commit
36f0365fd4
5 changed files with 964 additions and 132 deletions
|
|
@ -186,6 +186,19 @@ describe('getModelMaxTokens', () => {
|
|||
);
|
||||
});
|
||||
|
||||
test('should return correct tokens for gpt-5-pro matches', () => {
|
||||
expect(getModelMaxTokens('gpt-5-pro')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro']);
|
||||
expect(getModelMaxTokens('gpt-5-pro-preview')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
|
||||
);
|
||||
expect(getModelMaxTokens('openai/gpt-5-pro')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
|
||||
);
|
||||
expect(getModelMaxTokens('gpt-5-pro-2025-01-30')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should return correct tokens for Anthropic models', () => {
|
||||
const models = [
|
||||
'claude-2.1',
|
||||
|
|
@ -469,7 +482,7 @@ describe('getModelMaxTokens', () => {
|
|||
|
||||
test('should return correct max output tokens for GPT-5 models', () => {
|
||||
const { getModelMaxOutputTokens } = require('@librechat/api');
|
||||
['gpt-5', 'gpt-5-mini', 'gpt-5-nano'].forEach((model) => {
|
||||
['gpt-5', 'gpt-5-mini', 'gpt-5-nano', 'gpt-5-pro'].forEach((model) => {
|
||||
expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]);
|
||||
expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe(
|
||||
maxOutputTokensMap[EModelEndpoint.openAI][model],
|
||||
|
|
@ -582,6 +595,13 @@ describe('matchModelName', () => {
|
|||
expect(matchModelName('gpt-5-nano-2025-01-30')).toBe('gpt-5-nano');
|
||||
});
|
||||
|
||||
it('should return the closest matching key for gpt-5-pro matches', () => {
|
||||
expect(matchModelName('openai/gpt-5-pro')).toBe('gpt-5-pro');
|
||||
expect(matchModelName('gpt-5-pro-preview')).toBe('gpt-5-pro');
|
||||
expect(matchModelName('gpt-5-pro-2025-01-30')).toBe('gpt-5-pro');
|
||||
expect(matchModelName('gpt-5-pro-2025-01-30-0130')).toBe('gpt-5-pro');
|
||||
});
|
||||
|
||||
// Tests for Google models
|
||||
it('should return the exact model name if it exists in maxTokensMap - Google models', () => {
|
||||
expect(matchModelName('text-bison-32k', EModelEndpoint.google)).toBe('text-bison-32k');
|
||||
|
|
@ -832,6 +852,49 @@ describe('Claude Model Tests', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('should return correct context length for Claude Haiku 4.5', () => {
|
||||
expect(getModelMaxTokens('claude-haiku-4-5', EModelEndpoint.anthropic)).toBe(
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
|
||||
);
|
||||
expect(getModelMaxTokens('claude-haiku-4-5')).toBe(
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle Claude Haiku 4.5 model name variations', () => {
|
||||
const modelVariations = [
|
||||
'claude-haiku-4-5',
|
||||
'claude-haiku-4-5-20250420',
|
||||
'claude-haiku-4-5-latest',
|
||||
'anthropic/claude-haiku-4-5',
|
||||
'claude-haiku-4-5/anthropic',
|
||||
'claude-haiku-4-5-preview',
|
||||
];
|
||||
|
||||
modelVariations.forEach((model) => {
|
||||
const modelKey = findMatchingPattern(model, maxTokensMap[EModelEndpoint.anthropic]);
|
||||
expect(modelKey).toBe('claude-haiku-4-5');
|
||||
expect(getModelMaxTokens(model, EModelEndpoint.anthropic)).toBe(
|
||||
maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it('should match model names correctly for Claude Haiku 4.5', () => {
|
||||
const modelVariations = [
|
||||
'claude-haiku-4-5',
|
||||
'claude-haiku-4-5-20250420',
|
||||
'claude-haiku-4-5-latest',
|
||||
'anthropic/claude-haiku-4-5',
|
||||
'claude-haiku-4-5/anthropic',
|
||||
'claude-haiku-4-5-preview',
|
||||
];
|
||||
|
||||
modelVariations.forEach((model) => {
|
||||
expect(matchModelName(model, EModelEndpoint.anthropic)).toBe('claude-haiku-4-5');
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Claude 4 model name variations with different prefixes and suffixes', () => {
|
||||
const modelVariations = [
|
||||
'claude-sonnet-4',
|
||||
|
|
@ -924,6 +987,121 @@ describe('Kimi Model Tests', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('Qwen3 Model Tests', () => {
|
||||
describe('getModelMaxTokens', () => {
|
||||
test('should return correct tokens for Qwen3 base pattern', () => {
|
||||
expect(getModelMaxTokens('qwen3')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
|
||||
});
|
||||
|
||||
test('should return correct tokens for qwen3-4b (falls back to qwen3)', () => {
|
||||
expect(getModelMaxTokens('qwen3-4b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
|
||||
});
|
||||
|
||||
test('should return correct tokens for Qwen3 base models', () => {
|
||||
expect(getModelMaxTokens('qwen3-8b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-8b']);
|
||||
expect(getModelMaxTokens('qwen3-14b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-14b']);
|
||||
expect(getModelMaxTokens('qwen3-32b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-32b']);
|
||||
expect(getModelMaxTokens('qwen3-235b-a22b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-235b-a22b'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should return correct tokens for Qwen3 VL (Vision-Language) models', () => {
|
||||
expect(getModelMaxTokens('qwen3-vl-8b-thinking')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-thinking'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-vl-8b-instruct')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-instruct'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-vl-30b-a3b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-30b-a3b'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-vl-235b-a22b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-235b-a22b'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should return correct tokens for Qwen3 specialized models', () => {
|
||||
expect(getModelMaxTokens('qwen3-max')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-max']);
|
||||
expect(getModelMaxTokens('qwen3-coder')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-coder-30b-a3b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-30b-a3b'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-coder-plus')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-plus'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-coder-flash')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-flash'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-next-80b-a3b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-next-80b-a3b'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should handle Qwen3 models with provider prefixes', () => {
|
||||
expect(getModelMaxTokens('alibaba/qwen3')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
|
||||
expect(getModelMaxTokens('alibaba/qwen3-4b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen/qwen3-8b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-8b'],
|
||||
);
|
||||
expect(getModelMaxTokens('openrouter/qwen3-max')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-max'],
|
||||
);
|
||||
expect(getModelMaxTokens('alibaba/qwen3-vl-8b-instruct')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-instruct'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen/qwen3-coder')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-coder'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should handle Qwen3 models with suffixes', () => {
|
||||
expect(getModelMaxTokens('qwen3-preview')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
|
||||
expect(getModelMaxTokens('qwen3-4b-preview')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-8b-latest')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-8b'],
|
||||
);
|
||||
expect(getModelMaxTokens('qwen3-max-2024')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['qwen3-max'],
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('matchModelName', () => {
|
||||
test('should match exact Qwen3 model names', () => {
|
||||
expect(matchModelName('qwen3')).toBe('qwen3');
|
||||
expect(matchModelName('qwen3-4b')).toBe('qwen3');
|
||||
expect(matchModelName('qwen3-8b')).toBe('qwen3-8b');
|
||||
expect(matchModelName('qwen3-vl-8b-thinking')).toBe('qwen3-vl-8b-thinking');
|
||||
expect(matchModelName('qwen3-max')).toBe('qwen3-max');
|
||||
expect(matchModelName('qwen3-coder')).toBe('qwen3-coder');
|
||||
});
|
||||
|
||||
test('should match Qwen3 model variations with provider prefixes', () => {
|
||||
expect(matchModelName('alibaba/qwen3')).toBe('qwen3');
|
||||
expect(matchModelName('alibaba/qwen3-4b')).toBe('qwen3');
|
||||
expect(matchModelName('qwen/qwen3-8b')).toBe('qwen3-8b');
|
||||
expect(matchModelName('openrouter/qwen3-max')).toBe('qwen3-max');
|
||||
expect(matchModelName('alibaba/qwen3-vl-8b-instruct')).toBe('qwen3-vl-8b-instruct');
|
||||
expect(matchModelName('qwen/qwen3-coder')).toBe('qwen3-coder');
|
||||
});
|
||||
|
||||
test('should match Qwen3 model variations with suffixes', () => {
|
||||
expect(matchModelName('qwen3-preview')).toBe('qwen3');
|
||||
expect(matchModelName('qwen3-4b-preview')).toBe('qwen3');
|
||||
expect(matchModelName('qwen3-8b-latest')).toBe('qwen3-8b');
|
||||
expect(matchModelName('qwen3-max-2024')).toBe('qwen3-max');
|
||||
expect(matchModelName('qwen3-coder-v1')).toBe('qwen3-coder');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('GLM Model Tests (Zhipu AI)', () => {
|
||||
describe('getModelMaxTokens', () => {
|
||||
test('should return correct tokens for GLM models', () => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue