🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173)

* updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2026-02-01 15:21:50 +01:00 · 2025-10-19 09:23:27 -04:00 · 2025-10-19 09:23:27 -04:00 · 36f0365fd4
commit 36f0365fd4
parent 589f119310
5 changed files with 964 additions and 132 deletions
--- a/api/utils/tokens.spec.js
+++ b/api/utils/tokens.spec.js
@ -186,6 +186,19 @@ describe('getModelMaxTokens', () => {
    );
  });

+  test('should return correct tokens for gpt-5-pro matches', () => {
+    expect(getModelMaxTokens('gpt-5-pro')).toBe(maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro']);
+    expect(getModelMaxTokens('gpt-5-pro-preview')).toBe(
+      maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
+    );
+    expect(getModelMaxTokens('openai/gpt-5-pro')).toBe(
+      maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
+    );
+    expect(getModelMaxTokens('gpt-5-pro-2025-01-30')).toBe(
+      maxTokensMap[EModelEndpoint.openAI]['gpt-5-pro'],
+    );
+  });
+
  test('should return correct tokens for Anthropic models', () => {
    const models = [
      'claude-2.1',
@ -469,7 +482,7 @@ describe('getModelMaxTokens', () => {

  test('should return correct max output tokens for GPT-5 models', () => {
    const { getModelMaxOutputTokens } = require('@librechat/api');
-    ['gpt-5', 'gpt-5-mini', 'gpt-5-nano'].forEach((model) => {
+    ['gpt-5', 'gpt-5-mini', 'gpt-5-nano', 'gpt-5-pro'].forEach((model) => {
      expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]);
      expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe(
        maxOutputTokensMap[EModelEndpoint.openAI][model],
@ -582,6 +595,13 @@ describe('matchModelName', () => {
    expect(matchModelName('gpt-5-nano-2025-01-30')).toBe('gpt-5-nano');
  });

+  it('should return the closest matching key for gpt-5-pro matches', () => {
+    expect(matchModelName('openai/gpt-5-pro')).toBe('gpt-5-pro');
+    expect(matchModelName('gpt-5-pro-preview')).toBe('gpt-5-pro');
+    expect(matchModelName('gpt-5-pro-2025-01-30')).toBe('gpt-5-pro');
+    expect(matchModelName('gpt-5-pro-2025-01-30-0130')).toBe('gpt-5-pro');
+  });
+
  // Tests for Google models
  it('should return the exact model name if it exists in maxTokensMap - Google models', () => {
    expect(matchModelName('text-bison-32k', EModelEndpoint.google)).toBe('text-bison-32k');
@ -832,6 +852,49 @@ describe('Claude Model Tests', () => {
    );
  });

+  it('should return correct context length for Claude Haiku 4.5', () => {
+    expect(getModelMaxTokens('claude-haiku-4-5', EModelEndpoint.anthropic)).toBe(
+      maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
+    );
+    expect(getModelMaxTokens('claude-haiku-4-5')).toBe(
+      maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
+    );
+  });
+
+  it('should handle Claude Haiku 4.5 model name variations', () => {
+    const modelVariations = [
+      'claude-haiku-4-5',
+      'claude-haiku-4-5-20250420',
+      'claude-haiku-4-5-latest',
+      'anthropic/claude-haiku-4-5',
+      'claude-haiku-4-5/anthropic',
+      'claude-haiku-4-5-preview',
+    ];
+
+    modelVariations.forEach((model) => {
+      const modelKey = findMatchingPattern(model, maxTokensMap[EModelEndpoint.anthropic]);
+      expect(modelKey).toBe('claude-haiku-4-5');
+      expect(getModelMaxTokens(model, EModelEndpoint.anthropic)).toBe(
+        maxTokensMap[EModelEndpoint.anthropic]['claude-haiku-4-5'],
+      );
+    });
+  });
+
+  it('should match model names correctly for Claude Haiku 4.5', () => {
+    const modelVariations = [
+      'claude-haiku-4-5',
+      'claude-haiku-4-5-20250420',
+      'claude-haiku-4-5-latest',
+      'anthropic/claude-haiku-4-5',
+      'claude-haiku-4-5/anthropic',
+      'claude-haiku-4-5-preview',
+    ];
+
+    modelVariations.forEach((model) => {
+      expect(matchModelName(model, EModelEndpoint.anthropic)).toBe('claude-haiku-4-5');
+    });
+  });
+
  it('should handle Claude 4 model name variations with different prefixes and suffixes', () => {
    const modelVariations = [
      'claude-sonnet-4',
@ -924,6 +987,121 @@ describe('Kimi Model Tests', () => {
  });
 });

+describe('Qwen3 Model Tests', () => {
+  describe('getModelMaxTokens', () => {
+    test('should return correct tokens for Qwen3 base pattern', () => {
+      expect(getModelMaxTokens('qwen3')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
+    });
+
+    test('should return correct tokens for qwen3-4b (falls back to qwen3)', () => {
+      expect(getModelMaxTokens('qwen3-4b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
+    });
+
+    test('should return correct tokens for Qwen3 base models', () => {
+      expect(getModelMaxTokens('qwen3-8b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-8b']);
+      expect(getModelMaxTokens('qwen3-14b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-14b']);
+      expect(getModelMaxTokens('qwen3-32b')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-32b']);
+      expect(getModelMaxTokens('qwen3-235b-a22b')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-235b-a22b'],
+      );
+    });
+
+    test('should return correct tokens for Qwen3 VL (Vision-Language) models', () => {
+      expect(getModelMaxTokens('qwen3-vl-8b-thinking')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-thinking'],
+      );
+      expect(getModelMaxTokens('qwen3-vl-8b-instruct')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-instruct'],
+      );
+      expect(getModelMaxTokens('qwen3-vl-30b-a3b')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-30b-a3b'],
+      );
+      expect(getModelMaxTokens('qwen3-vl-235b-a22b')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-235b-a22b'],
+      );
+    });
+
+    test('should return correct tokens for Qwen3 specialized models', () => {
+      expect(getModelMaxTokens('qwen3-max')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3-max']);
+      expect(getModelMaxTokens('qwen3-coder')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-coder'],
+      );
+      expect(getModelMaxTokens('qwen3-coder-30b-a3b')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-30b-a3b'],
+      );
+      expect(getModelMaxTokens('qwen3-coder-plus')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-plus'],
+      );
+      expect(getModelMaxTokens('qwen3-coder-flash')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-coder-flash'],
+      );
+      expect(getModelMaxTokens('qwen3-next-80b-a3b')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-next-80b-a3b'],
+      );
+    });
+
+    test('should handle Qwen3 models with provider prefixes', () => {
+      expect(getModelMaxTokens('alibaba/qwen3')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
+      expect(getModelMaxTokens('alibaba/qwen3-4b')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3'],
+      );
+      expect(getModelMaxTokens('qwen/qwen3-8b')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-8b'],
+      );
+      expect(getModelMaxTokens('openrouter/qwen3-max')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-max'],
+      );
+      expect(getModelMaxTokens('alibaba/qwen3-vl-8b-instruct')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-vl-8b-instruct'],
+      );
+      expect(getModelMaxTokens('qwen/qwen3-coder')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-coder'],
+      );
+    });
+
+    test('should handle Qwen3 models with suffixes', () => {
+      expect(getModelMaxTokens('qwen3-preview')).toBe(maxTokensMap[EModelEndpoint.openAI]['qwen3']);
+      expect(getModelMaxTokens('qwen3-4b-preview')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3'],
+      );
+      expect(getModelMaxTokens('qwen3-8b-latest')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-8b'],
+      );
+      expect(getModelMaxTokens('qwen3-max-2024')).toBe(
+        maxTokensMap[EModelEndpoint.openAI]['qwen3-max'],
+      );
+    });
+  });
+
+  describe('matchModelName', () => {
+    test('should match exact Qwen3 model names', () => {
+      expect(matchModelName('qwen3')).toBe('qwen3');
+      expect(matchModelName('qwen3-4b')).toBe('qwen3');
+      expect(matchModelName('qwen3-8b')).toBe('qwen3-8b');
+      expect(matchModelName('qwen3-vl-8b-thinking')).toBe('qwen3-vl-8b-thinking');
+      expect(matchModelName('qwen3-max')).toBe('qwen3-max');
+      expect(matchModelName('qwen3-coder')).toBe('qwen3-coder');
+    });
+
+    test('should match Qwen3 model variations with provider prefixes', () => {
+      expect(matchModelName('alibaba/qwen3')).toBe('qwen3');
+      expect(matchModelName('alibaba/qwen3-4b')).toBe('qwen3');
+      expect(matchModelName('qwen/qwen3-8b')).toBe('qwen3-8b');
+      expect(matchModelName('openrouter/qwen3-max')).toBe('qwen3-max');
+      expect(matchModelName('alibaba/qwen3-vl-8b-instruct')).toBe('qwen3-vl-8b-instruct');
+      expect(matchModelName('qwen/qwen3-coder')).toBe('qwen3-coder');
+    });
+
+    test('should match Qwen3 model variations with suffixes', () => {
+      expect(matchModelName('qwen3-preview')).toBe('qwen3');
+      expect(matchModelName('qwen3-4b-preview')).toBe('qwen3');
+      expect(matchModelName('qwen3-8b-latest')).toBe('qwen3-8b');
+      expect(matchModelName('qwen3-max-2024')).toBe('qwen3-max');
+      expect(matchModelName('qwen3-coder-v1')).toBe('qwen3-coder');
+    });
+  });
+});
+
 describe('GLM Model Tests (Zhipu AI)', () => {
  describe('getModelMaxTokens', () => {
    test('should return correct tokens for GLM models', () => {