diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index 5601e0e3cd..8fce279bf1 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -886,7 +886,8 @@ class GoogleClient extends BaseClient { } getSafetySettings() { - const isGemini2 = this.modelOptions.model.includes('gemini-2.0'); + const model = this.modelOptions.model; + const isGemini2 = model.includes('gemini-2.0') && !model.includes('thinking'); const mapThreshold = (value) => { if (isGemini2 && value === 'BLOCK_NONE') { return 'OFF'; diff --git a/api/models/tx.js b/api/models/tx.js index 918899cacb..9554894ddf 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -96,6 +96,8 @@ const tokenValues = Object.assign( 'claude-': { prompt: 0.8, completion: 2.4 }, 'command-r-plus': { prompt: 3, completion: 15 }, 'command-r': { prompt: 0.5, completion: 1.5 }, + 'deepseek-reasoner': { prompt: 0.14, completion: 0.55 }, + deepseek: { prompt: 0.07, completion: 0.28 }, /* cohere doesn't have rates for the older command models, so this was from https://artificialanalysis.ai/models/command-light/providers */ command: { prompt: 0.38, completion: 0.38 }, diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js index 238ca7b895..0492a0eedb 100644 --- a/api/models/tx.spec.js +++ b/api/models/tx.spec.js @@ -263,6 +263,37 @@ describe('AWS Bedrock Model Tests', () => { }); }); +describe('Deepseek Model Tests', () => { + const deepseekModels = ['deepseek-chat', 'deepseek-coder', 'deepseek-reasoner']; + + it('should return the correct prompt multipliers for all models', () => { + const results = deepseekModels.map((model) => { + const valueKey = getValueKey(model); + const multiplier = getMultiplier({ valueKey, tokenType: 'prompt' }); + return tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt; + }); + expect(results.every(Boolean)).toBe(true); + }); + + it('should return the correct completion multipliers for all models', () => { + const results = deepseekModels.map((model) => { + const valueKey = getValueKey(model); + const multiplier = getMultiplier({ valueKey, tokenType: 'completion' }); + return tokenValues[valueKey].completion && multiplier === tokenValues[valueKey].completion; + }); + expect(results.every(Boolean)).toBe(true); + }); + + it('should return the correct prompt multipliers for reasoning model', () => { + const model = 'deepseek-reasoner'; + const valueKey = getValueKey(model); + expect(valueKey).toBe(model); + const multiplier = getMultiplier({ valueKey, tokenType: 'prompt' }); + const result = tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt; + expect(result).toBe(true); + }); +}); + describe('getCacheMultiplier', () => { it('should return the correct cache multiplier for a given valueKey and cacheType', () => { expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'write' })).toBe( diff --git a/api/server/services/Endpoints/google/llm.js b/api/server/services/Endpoints/google/llm.js index 92eca9a6ab..f19d0539c7 100644 --- a/api/server/services/Endpoints/google/llm.js +++ b/api/server/services/Endpoints/google/llm.js @@ -87,7 +87,8 @@ function getLLMConfig(credentials, options = {}) { maxRetries: 2, }; - const isGemini2 = llmConfig.model.includes('gemini-2.0'); + /** Used only for Safety Settings */ + const isGemini2 = llmConfig.model.includes('gemini-2.0') && !llmConfig.model.includes('thinking'); const isGenerativeModel = llmConfig.model.includes('gemini'); const isChatModel = !isGenerativeModel && llmConfig.model.includes('chat'); const isTextModel = !isGenerativeModel && !isChatModel && /code|text/.test(llmConfig.model); diff --git a/api/utils/tokens.js b/api/utils/tokens.js index 68ceca797f..72151fcb19 100644 --- a/api/utils/tokens.js +++ b/api/utils/tokens.js @@ -82,7 +82,8 @@ const anthropicModels = { }; const deepseekModels = { - deepseek: 127500, + 'deepseek-reasoner': 63000, // -1000 from max (API) + deepseek: 63000, // -1000 from max (API) }; const metaModels = { diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index b1f37bb1f4..f478c4769b 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -385,8 +385,15 @@ describe('Meta Models Tests', () => { }); test('should match Deepseek model variations', () => { - expect(getModelMaxTokens('deepseek-chat')).toBe(127500); - expect(getModelMaxTokens('deepseek-coder')).toBe(127500); + expect(getModelMaxTokens('deepseek-chat')).toBe( + maxTokensMap[EModelEndpoint.openAI]['deepseek'], + ); + expect(getModelMaxTokens('deepseek-coder')).toBe( + maxTokensMap[EModelEndpoint.openAI]['deepseek'], + ); + expect(getModelMaxTokens('deepseek-reasoner')).toBe( + maxTokensMap[EModelEndpoint.openAI]['deepseek-reasoner'], + ); }); });