From 4202db1c992667d2217182e1d64e8c3628004f2c Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Mon, 1 Dec 2025 14:27:08 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20feat:=20Tool=20Calling=20Support?= =?UTF-8?q?=20for=20DeepSeek=20V3.2=20+=20OpenRouter=20Reasoning=20(#10752?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🔧 chore: Update @librechat/agents to version 3.0.35 * ✨ feat: Add DeepSeek Model Pricing and Token Handling - Introduced pricing and token limits for 'deepseek-chat' and 'deepseek-reasoner' models, including prompt and completion rates. - Enhanced tests to validate pricing and token limits for DeepSeek models, ensuring correct handling of model variations and provider prefixes. - Updated cache multipliers for DeepSeek models to reflect new pricing structure. - Improved max output token handling for DeepSeek models, ensuring consistency across different endpoints. --- api/models/tx.js | 5 +++ api/models/tx.spec.js | 72 ++++++++++++++++++++++++++++++++ api/package.json | 2 +- api/utils/tokens.spec.js | 49 +++++++++++++++++++++- package-lock.json | 10 ++--- packages/api/package.json | 2 +- packages/api/src/utils/tokens.ts | 15 ++++++- 7 files changed, 144 insertions(+), 11 deletions(-) diff --git a/api/models/tx.js b/api/models/tx.js index 4ea93e1a1e..aa213d3475 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -141,6 +141,7 @@ const tokenValues = Object.assign( 'command-r': { prompt: 0.5, completion: 1.5 }, 'command-r-plus': { prompt: 3, completion: 15 }, 'command-text': { prompt: 1.5, completion: 2.0 }, + 'deepseek-chat': { prompt: 0.28, completion: 0.42 }, 'deepseek-reasoner': { prompt: 0.28, completion: 0.42 }, 'deepseek-r1': { prompt: 0.4, completion: 2.0 }, 'deepseek-v3': { prompt: 0.2, completion: 0.8 }, @@ -246,6 +247,10 @@ const cacheTokenValues = { 'claude-sonnet-4': { write: 3.75, read: 0.3 }, 'claude-opus-4': { write: 18.75, read: 1.5 }, 'claude-opus-4-5': { write: 6.25, read: 0.5 }, + // DeepSeek models - cache hit: $0.028/1M, cache miss: $0.28/1M + deepseek: { write: 0.28, read: 0.028 }, + 'deepseek-chat': { write: 0.28, read: 0.028 }, + 'deepseek-reasoner': { write: 0.28, read: 0.028 }, }; /** diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js index 75a8fa1922..18030abb21 100644 --- a/api/models/tx.spec.js +++ b/api/models/tx.spec.js @@ -766,6 +766,78 @@ describe('Deepseek Model Tests', () => { const result = tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt; expect(result).toBe(true); }); + + it('should return correct pricing for deepseek-chat', () => { + expect(getMultiplier({ model: 'deepseek-chat', tokenType: 'prompt' })).toBe( + tokenValues['deepseek-chat'].prompt, + ); + expect(getMultiplier({ model: 'deepseek-chat', tokenType: 'completion' })).toBe( + tokenValues['deepseek-chat'].completion, + ); + expect(tokenValues['deepseek-chat'].prompt).toBe(0.28); + expect(tokenValues['deepseek-chat'].completion).toBe(0.42); + }); + + it('should return correct pricing for deepseek-reasoner', () => { + expect(getMultiplier({ model: 'deepseek-reasoner', tokenType: 'prompt' })).toBe( + tokenValues['deepseek-reasoner'].prompt, + ); + expect(getMultiplier({ model: 'deepseek-reasoner', tokenType: 'completion' })).toBe( + tokenValues['deepseek-reasoner'].completion, + ); + expect(tokenValues['deepseek-reasoner'].prompt).toBe(0.28); + expect(tokenValues['deepseek-reasoner'].completion).toBe(0.42); + }); + + it('should handle DeepSeek model name variations with provider prefixes', () => { + const modelVariations = [ + 'deepseek/deepseek-chat', + 'openrouter/deepseek-chat', + 'deepseek/deepseek-reasoner', + ]; + + modelVariations.forEach((model) => { + const promptMultiplier = getMultiplier({ model, tokenType: 'prompt' }); + const completionMultiplier = getMultiplier({ model, tokenType: 'completion' }); + expect(promptMultiplier).toBe(0.28); + expect(completionMultiplier).toBe(0.42); + }); + }); + + it('should return correct cache multipliers for DeepSeek models', () => { + expect(getCacheMultiplier({ model: 'deepseek-chat', cacheType: 'write' })).toBe( + cacheTokenValues['deepseek-chat'].write, + ); + expect(getCacheMultiplier({ model: 'deepseek-chat', cacheType: 'read' })).toBe( + cacheTokenValues['deepseek-chat'].read, + ); + expect(getCacheMultiplier({ model: 'deepseek-reasoner', cacheType: 'write' })).toBe( + cacheTokenValues['deepseek-reasoner'].write, + ); + expect(getCacheMultiplier({ model: 'deepseek-reasoner', cacheType: 'read' })).toBe( + cacheTokenValues['deepseek-reasoner'].read, + ); + }); + + it('should return correct cache pricing values for DeepSeek models', () => { + expect(cacheTokenValues['deepseek-chat'].write).toBe(0.28); + expect(cacheTokenValues['deepseek-chat'].read).toBe(0.028); + expect(cacheTokenValues['deepseek-reasoner'].write).toBe(0.28); + expect(cacheTokenValues['deepseek-reasoner'].read).toBe(0.028); + expect(cacheTokenValues['deepseek'].write).toBe(0.28); + expect(cacheTokenValues['deepseek'].read).toBe(0.028); + }); + + it('should handle DeepSeek cache multipliers with model variations', () => { + const modelVariations = ['deepseek/deepseek-chat', 'openrouter/deepseek-reasoner']; + + modelVariations.forEach((model) => { + const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' }); + const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' }); + expect(writeMultiplier).toBe(0.28); + expect(readMultiplier).toBe(0.028); + }); + }); }); describe('Qwen3 Model Tests', () => { diff --git a/api/package.json b/api/package.json index 69354e1812..0936bede48 100644 --- a/api/package.json +++ b/api/package.json @@ -47,7 +47,7 @@ "@langchain/google-genai": "^0.2.13", "@langchain/google-vertexai": "^0.2.13", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^3.0.34", + "@librechat/agents": "^3.0.35", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index a169c31622..3336a0f82d 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -665,7 +665,7 @@ describe('Meta Models Tests', () => { test('should match Deepseek model variations', () => { expect(getModelMaxTokens('deepseek-chat')).toBe( - maxTokensMap[EModelEndpoint.openAI]['deepseek'], + maxTokensMap[EModelEndpoint.openAI]['deepseek-chat'], ); expect(getModelMaxTokens('deepseek-coder')).toBe( maxTokensMap[EModelEndpoint.openAI]['deepseek'], @@ -677,6 +677,20 @@ describe('Meta Models Tests', () => { maxTokensMap[EModelEndpoint.openAI]['deepseek.r1'], ); }); + + test('should return 128000 context tokens for all DeepSeek models', () => { + expect(getModelMaxTokens('deepseek-chat')).toBe(128000); + expect(getModelMaxTokens('deepseek-reasoner')).toBe(128000); + expect(getModelMaxTokens('deepseek-r1')).toBe(128000); + expect(getModelMaxTokens('deepseek-v3')).toBe(128000); + expect(getModelMaxTokens('deepseek.r1')).toBe(128000); + }); + + test('should handle DeepSeek models with provider prefixes', () => { + expect(getModelMaxTokens('deepseek/deepseek-chat')).toBe(128000); + expect(getModelMaxTokens('openrouter/deepseek-reasoner')).toBe(128000); + expect(getModelMaxTokens('openai/deepseek-v3')).toBe(128000); + }); }); describe('matchModelName', () => { @@ -705,11 +719,42 @@ describe('Meta Models Tests', () => { }); test('should match Deepseek model variations', () => { - expect(matchModelName('deepseek-chat')).toBe('deepseek'); + expect(matchModelName('deepseek-chat')).toBe('deepseek-chat'); expect(matchModelName('deepseek-coder')).toBe('deepseek'); }); }); + describe('DeepSeek Max Output Tokens', () => { + const { getModelMaxOutputTokens } = require('@librechat/api'); + + test('should return correct max output tokens for deepseek-chat', () => { + expect(getModelMaxOutputTokens('deepseek-chat')).toBe(8000); + expect(getModelMaxOutputTokens('deepseek-chat', EModelEndpoint.openAI)).toBe(8000); + expect(getModelMaxOutputTokens('deepseek-chat', EModelEndpoint.custom)).toBe(8000); + }); + + test('should return correct max output tokens for deepseek-reasoner', () => { + expect(getModelMaxOutputTokens('deepseek-reasoner')).toBe(64000); + expect(getModelMaxOutputTokens('deepseek-reasoner', EModelEndpoint.openAI)).toBe(64000); + expect(getModelMaxOutputTokens('deepseek-reasoner', EModelEndpoint.custom)).toBe(64000); + }); + + test('should return correct max output tokens for deepseek-r1', () => { + expect(getModelMaxOutputTokens('deepseek-r1')).toBe(64000); + expect(getModelMaxOutputTokens('deepseek-r1', EModelEndpoint.openAI)).toBe(64000); + }); + + test('should return correct max output tokens for deepseek base pattern', () => { + expect(getModelMaxOutputTokens('deepseek')).toBe(8000); + expect(getModelMaxOutputTokens('deepseek-v3')).toBe(8000); + }); + + test('should handle DeepSeek models with provider prefixes for max output tokens', () => { + expect(getModelMaxOutputTokens('deepseek/deepseek-chat')).toBe(8000); + expect(getModelMaxOutputTokens('openrouter/deepseek-reasoner')).toBe(64000); + }); + }); + describe('processModelData with Meta models', () => { test('should process Meta model data correctly', () => { const input = { diff --git a/package-lock.json b/package-lock.json index 8145faae7d..c0f359f04b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -61,7 +61,7 @@ "@langchain/google-genai": "^0.2.13", "@langchain/google-vertexai": "^0.2.13", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^3.0.34", + "@librechat/agents": "^3.0.35", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", @@ -16289,9 +16289,9 @@ } }, "node_modules/@librechat/agents": { - "version": "3.0.34", - "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.34.tgz", - "integrity": "sha512-sngGZewCp/p8nMhUVJdXNZw74MpehJ1SMvWPxR9euLdOQX2qgR23/c2/5VxpdsR4gXmOSxpdhsc5cin2y6R/mA==", + "version": "3.0.35", + "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.35.tgz", + "integrity": "sha512-9KcTin8CtJIsADkcZtyCHwfn9GQ7AIRWTAhNDFtxXsuqmlnytD8bjh0GiRc6uOOQ/Dw8zL/oRcqNDubempwBfg==", "license": "MIT", "dependencies": { "@langchain/anthropic": "^0.3.26", @@ -46275,7 +46275,7 @@ "@azure/storage-blob": "^12.27.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.79", - "@librechat/agents": "^3.0.34", + "@librechat/agents": "^3.0.35", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.21.0", "axios": "^1.12.1", diff --git a/packages/api/package.json b/packages/api/package.json index c248621148..36a290fb57 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -84,7 +84,7 @@ "@azure/storage-blob": "^12.27.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.79", - "@librechat/agents": "^3.0.34", + "@librechat/agents": "^3.0.35", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.21.0", "axios": "^1.12.1", diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index 7dda247cc1..33196ab7dc 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -140,6 +140,7 @@ const anthropicModels = { const deepseekModels = { deepseek: 128000, + 'deepseek-chat': 128000, 'deepseek-reasoner': 128000, 'deepseek-r1': 128000, 'deepseek-v3': 128000, @@ -347,11 +348,21 @@ const anthropicMaxOutputs = { 'claude-3-7-sonnet': 128000, }; +/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */ +const deepseekMaxOutputs = { + deepseek: 8000, // deepseek-chat default: 4K, max: 8K + 'deepseek-chat': 8000, + 'deepseek-reasoner': 64000, // default: 32K, max: 64K + 'deepseek-r1': 64000, + 'deepseek-v3': 8000, + 'deepseek.r1': 64000, +}; + export const maxOutputTokensMap = { [EModelEndpoint.anthropic]: anthropicMaxOutputs, [EModelEndpoint.azureOpenAI]: modelMaxOutputs, - [EModelEndpoint.openAI]: modelMaxOutputs, - [EModelEndpoint.custom]: modelMaxOutputs, + [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, + [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, }; /**