🤖 feat: Tool Calling Support for DeepSeek V3.2 + OpenRouter Reasoning (#10752)

* 🔧 chore: Update @librechat/agents to version 3.0.35 * ✨ feat: Add DeepSeek Model Pricing and Token Handling - Introduced pricing and token limits for 'deepseek-chat' and 'deepseek-reasoner' models, including prompt and completion rates. - Enhanced tests to validate pricing and token limits for DeepSeek models, ensuring correct handling of model variations and provider prefixes. - Updated cache multipliers for DeepSeek models to reflect new pricing structure. - Improved max output token handling for DeepSeek models, ensuring consistency across different endpoints.
2025-12-16 16:30:15 +01:00 · 2025-12-01 14:27:08 -05:00 · 2025-12-01 14:27:08 -05:00 · 4202db1c99
commit 4202db1c99
parent 026890cd27
7 changed files with 144 additions and 11 deletions
--- a/api/models/tx.js
+++ b/api/models/tx.js
@ -141,6 +141,7 @@ const tokenValues = Object.assign(
    'command-r': { prompt: 0.5, completion: 1.5 },
    'command-r-plus': { prompt: 3, completion: 15 },
    'command-text': { prompt: 1.5, completion: 2.0 },
+    'deepseek-chat': { prompt: 0.28, completion: 0.42 },
    'deepseek-reasoner': { prompt: 0.28, completion: 0.42 },
    'deepseek-r1': { prompt: 0.4, completion: 2.0 },
    'deepseek-v3': { prompt: 0.2, completion: 0.8 },
@ -246,6 +247,10 @@ const cacheTokenValues = {
  'claude-sonnet-4': { write: 3.75, read: 0.3 },
  'claude-opus-4': { write: 18.75, read: 1.5 },
  'claude-opus-4-5': { write: 6.25, read: 0.5 },
+  // DeepSeek models - cache hit: $0.028/1M, cache miss: $0.28/1M
+  deepseek: { write: 0.28, read: 0.028 },
+  'deepseek-chat': { write: 0.28, read: 0.028 },
+  'deepseek-reasoner': { write: 0.28, read: 0.028 },
 };

 /**
--- a/api/models/tx.spec.js
+++ b/api/models/tx.spec.js
@ -766,6 +766,78 @@ describe('Deepseek Model Tests', () => {
    const result = tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt;
    expect(result).toBe(true);
  });
+
+  it('should return correct pricing for deepseek-chat', () => {
+    expect(getMultiplier({ model: 'deepseek-chat', tokenType: 'prompt' })).toBe(
+      tokenValues['deepseek-chat'].prompt,
+    );
+    expect(getMultiplier({ model: 'deepseek-chat', tokenType: 'completion' })).toBe(
+      tokenValues['deepseek-chat'].completion,
+    );
+    expect(tokenValues['deepseek-chat'].prompt).toBe(0.28);
+    expect(tokenValues['deepseek-chat'].completion).toBe(0.42);
+  });
+
+  it('should return correct pricing for deepseek-reasoner', () => {
+    expect(getMultiplier({ model: 'deepseek-reasoner', tokenType: 'prompt' })).toBe(
+      tokenValues['deepseek-reasoner'].prompt,
+    );
+    expect(getMultiplier({ model: 'deepseek-reasoner', tokenType: 'completion' })).toBe(
+      tokenValues['deepseek-reasoner'].completion,
+    );
+    expect(tokenValues['deepseek-reasoner'].prompt).toBe(0.28);
+    expect(tokenValues['deepseek-reasoner'].completion).toBe(0.42);
+  });
+
+  it('should handle DeepSeek model name variations with provider prefixes', () => {
+    const modelVariations = [
+      'deepseek/deepseek-chat',
+      'openrouter/deepseek-chat',
+      'deepseek/deepseek-reasoner',
+    ];
+
+    modelVariations.forEach((model) => {
+      const promptMultiplier = getMultiplier({ model, tokenType: 'prompt' });
+      const completionMultiplier = getMultiplier({ model, tokenType: 'completion' });
+      expect(promptMultiplier).toBe(0.28);
+      expect(completionMultiplier).toBe(0.42);
+    });
+  });
+
+  it('should return correct cache multipliers for DeepSeek models', () => {
+    expect(getCacheMultiplier({ model: 'deepseek-chat', cacheType: 'write' })).toBe(
+      cacheTokenValues['deepseek-chat'].write,
+    );
+    expect(getCacheMultiplier({ model: 'deepseek-chat', cacheType: 'read' })).toBe(
+      cacheTokenValues['deepseek-chat'].read,
+    );
+    expect(getCacheMultiplier({ model: 'deepseek-reasoner', cacheType: 'write' })).toBe(
+      cacheTokenValues['deepseek-reasoner'].write,
+    );
+    expect(getCacheMultiplier({ model: 'deepseek-reasoner', cacheType: 'read' })).toBe(
+      cacheTokenValues['deepseek-reasoner'].read,
+    );
+  });
+
+  it('should return correct cache pricing values for DeepSeek models', () => {
+    expect(cacheTokenValues['deepseek-chat'].write).toBe(0.28);
+    expect(cacheTokenValues['deepseek-chat'].read).toBe(0.028);
+    expect(cacheTokenValues['deepseek-reasoner'].write).toBe(0.28);
+    expect(cacheTokenValues['deepseek-reasoner'].read).toBe(0.028);
+    expect(cacheTokenValues['deepseek'].write).toBe(0.28);
+    expect(cacheTokenValues['deepseek'].read).toBe(0.028);
+  });
+
+  it('should handle DeepSeek cache multipliers with model variations', () => {
+    const modelVariations = ['deepseek/deepseek-chat', 'openrouter/deepseek-reasoner'];
+
+    modelVariations.forEach((model) => {
+      const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' });
+      const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' });
+      expect(writeMultiplier).toBe(0.28);
+      expect(readMultiplier).toBe(0.028);
+    });
+  });
 });

 describe('Qwen3 Model Tests', () => {
--- a/api/package.json
+++ b/api/package.json
@ -47,7 +47,7 @@
    "@langchain/google-genai": "^0.2.13",
    "@langchain/google-vertexai": "^0.2.13",
    "@langchain/textsplitters": "^0.1.0",
-    "@librechat/agents": "^3.0.34",
+    "@librechat/agents": "^3.0.35",
    "@librechat/api": "*",
    "@librechat/data-schemas": "*",
    "@microsoft/microsoft-graph-client": "^3.0.7",
--- a/api/utils/tokens.spec.js
+++ b/api/utils/tokens.spec.js
@ -665,7 +665,7 @@ describe('Meta Models Tests', () => {

    test('should match Deepseek model variations', () => {
      expect(getModelMaxTokens('deepseek-chat')).toBe(
-        maxTokensMap[EModelEndpoint.openAI]['deepseek'],
+        maxTokensMap[EModelEndpoint.openAI]['deepseek-chat'],
      );
      expect(getModelMaxTokens('deepseek-coder')).toBe(
        maxTokensMap[EModelEndpoint.openAI]['deepseek'],
@ -677,6 +677,20 @@ describe('Meta Models Tests', () => {
        maxTokensMap[EModelEndpoint.openAI]['deepseek.r1'],
      );
    });
+
+    test('should return 128000 context tokens for all DeepSeek models', () => {
+      expect(getModelMaxTokens('deepseek-chat')).toBe(128000);
+      expect(getModelMaxTokens('deepseek-reasoner')).toBe(128000);
+      expect(getModelMaxTokens('deepseek-r1')).toBe(128000);
+      expect(getModelMaxTokens('deepseek-v3')).toBe(128000);
+      expect(getModelMaxTokens('deepseek.r1')).toBe(128000);
+    });
+
+    test('should handle DeepSeek models with provider prefixes', () => {
+      expect(getModelMaxTokens('deepseek/deepseek-chat')).toBe(128000);
+      expect(getModelMaxTokens('openrouter/deepseek-reasoner')).toBe(128000);
+      expect(getModelMaxTokens('openai/deepseek-v3')).toBe(128000);
+    });
  });

  describe('matchModelName', () => {
@ -705,11 +719,42 @@ describe('Meta Models Tests', () => {
    });

    test('should match Deepseek model variations', () => {
-      expect(matchModelName('deepseek-chat')).toBe('deepseek');
+      expect(matchModelName('deepseek-chat')).toBe('deepseek-chat');
      expect(matchModelName('deepseek-coder')).toBe('deepseek');
    });
  });

+  describe('DeepSeek Max Output Tokens', () => {
+    const { getModelMaxOutputTokens } = require('@librechat/api');
+
+    test('should return correct max output tokens for deepseek-chat', () => {
+      expect(getModelMaxOutputTokens('deepseek-chat')).toBe(8000);
+      expect(getModelMaxOutputTokens('deepseek-chat', EModelEndpoint.openAI)).toBe(8000);
+      expect(getModelMaxOutputTokens('deepseek-chat', EModelEndpoint.custom)).toBe(8000);
+    });
+
+    test('should return correct max output tokens for deepseek-reasoner', () => {
+      expect(getModelMaxOutputTokens('deepseek-reasoner')).toBe(64000);
+      expect(getModelMaxOutputTokens('deepseek-reasoner', EModelEndpoint.openAI)).toBe(64000);
+      expect(getModelMaxOutputTokens('deepseek-reasoner', EModelEndpoint.custom)).toBe(64000);
+    });
+
+    test('should return correct max output tokens for deepseek-r1', () => {
+      expect(getModelMaxOutputTokens('deepseek-r1')).toBe(64000);
+      expect(getModelMaxOutputTokens('deepseek-r1', EModelEndpoint.openAI)).toBe(64000);
+    });
+
+    test('should return correct max output tokens for deepseek base pattern', () => {
+      expect(getModelMaxOutputTokens('deepseek')).toBe(8000);
+      expect(getModelMaxOutputTokens('deepseek-v3')).toBe(8000);
+    });
+
+    test('should handle DeepSeek models with provider prefixes for max output tokens', () => {
+      expect(getModelMaxOutputTokens('deepseek/deepseek-chat')).toBe(8000);
+      expect(getModelMaxOutputTokens('openrouter/deepseek-reasoner')).toBe(64000);
+    });
+  });
+
  describe('processModelData with Meta models', () => {
    test('should process Meta model data correctly', () => {
      const input = {
--- a/package-lock.json
+++ b/package-lock.json
@ -61,7 +61,7 @@
        "@langchain/google-genai": "^0.2.13",
        "@langchain/google-vertexai": "^0.2.13",
        "@langchain/textsplitters": "^0.1.0",
-        "@librechat/agents": "^3.0.34",
+        "@librechat/agents": "^3.0.35",
        "@librechat/api": "*",
        "@librechat/data-schemas": "*",
        "@microsoft/microsoft-graph-client": "^3.0.7",
@ -16289,9 +16289,9 @@
      }
    },
    "node_modules/@librechat/agents": {
-      "version": "3.0.34",
-      "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.34.tgz",
-      "integrity": "sha512-sngGZewCp/p8nMhUVJdXNZw74MpehJ1SMvWPxR9euLdOQX2qgR23/c2/5VxpdsR4gXmOSxpdhsc5cin2y6R/mA==",
+      "version": "3.0.35",
+      "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.35.tgz",
+      "integrity": "sha512-9KcTin8CtJIsADkcZtyCHwfn9GQ7AIRWTAhNDFtxXsuqmlnytD8bjh0GiRc6uOOQ/Dw8zL/oRcqNDubempwBfg==",
      "license": "MIT",
      "dependencies": {
        "@langchain/anthropic": "^0.3.26",
@ -46275,7 +46275,7 @@
        "@azure/storage-blob": "^12.27.0",
        "@keyv/redis": "^4.3.3",
        "@langchain/core": "^0.3.79",
-        "@librechat/agents": "^3.0.34",
+        "@librechat/agents": "^3.0.35",
        "@librechat/data-schemas": "*",
        "@modelcontextprotocol/sdk": "^1.21.0",
        "axios": "^1.12.1",
--- a/packages/api/package.json
+++ b/packages/api/package.json
@ -84,7 +84,7 @@
    "@azure/storage-blob": "^12.27.0",
    "@keyv/redis": "^4.3.3",
    "@langchain/core": "^0.3.79",
-    "@librechat/agents": "^3.0.34",
+    "@librechat/agents": "^3.0.35",
    "@librechat/data-schemas": "*",
    "@modelcontextprotocol/sdk": "^1.21.0",
    "axios": "^1.12.1",
--- a/packages/api/src/utils/tokens.ts
+++ b/packages/api/src/utils/tokens.ts
@ -140,6 +140,7 @@ const anthropicModels = {

 const deepseekModels = {
  deepseek: 128000,
+  'deepseek-chat': 128000,
  'deepseek-reasoner': 128000,
  'deepseek-r1': 128000,
  'deepseek-v3': 128000,
@ -347,11 +348,21 @@ const anthropicMaxOutputs = {
  'claude-3-7-sonnet': 128000,
 };

+/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */
+const deepseekMaxOutputs = {
+  deepseek: 8000, // deepseek-chat default: 4K, max: 8K
+  'deepseek-chat': 8000,
+  'deepseek-reasoner': 64000, // default: 32K, max: 64K
+  'deepseek-r1': 64000,
+  'deepseek-v3': 8000,
+  'deepseek.r1': 64000,
+};
+
 export const maxOutputTokensMap = {
  [EModelEndpoint.anthropic]: anthropicMaxOutputs,
  [EModelEndpoint.azureOpenAI]: modelMaxOutputs,
-  [EModelEndpoint.openAI]: modelMaxOutputs,
-  [EModelEndpoint.custom]: modelMaxOutputs,
+  [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
+  [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs },
 };

 /**