diff --git a/.env.example b/.env.example index a6ff6157ce..50229b1997 100644 --- a/.env.example +++ b/.env.example @@ -193,10 +193,10 @@ GOOGLE_KEY=user_provided # GOOGLE_AUTH_HEADER=true # Gemini API (AI Studio) -# GOOGLE_MODELS=gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash,gemini-2.0-flash-lite +# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash,gemini-2.0-flash-lite # Vertex AI -# GOOGLE_MODELS=gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-001,gemini-2.0-flash-lite-001 +# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-001,gemini-2.0-flash-lite-001 # GOOGLE_TITLE_MODEL=gemini-2.0-flash-lite-001 diff --git a/api/models/Transaction.spec.js b/api/models/Transaction.spec.js index 4b478d4dc3..545c7b2755 100644 --- a/api/models/Transaction.spec.js +++ b/api/models/Transaction.spec.js @@ -823,6 +823,139 @@ describe('Premium Token Pricing Integration Tests', () => { expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedTotalCost, 0); }); + test('spendTokens should apply standard pricing for gemini-3.1-pro-preview below threshold', async () => { + const userId = new mongoose.Types.ObjectId(); + const initialBalance = 100000000; + await Balance.create({ user: userId, tokenCredits: initialBalance }); + + const model = 'gemini-3.1-pro-preview'; + const promptTokens = 100000; + const completionTokens = 500; + + const txData = { + user: userId, + conversationId: 'test-gemini31-below', + model, + context: 'test', + endpointTokenConfig: null, + balance: { enabled: true }, + }; + + await spendTokens(txData, { promptTokens, completionTokens }); + + const standardPromptRate = tokenValues['gemini-3.1'].prompt; + const standardCompletionRate = tokenValues['gemini-3.1'].completion; + const expectedCost = + promptTokens * standardPromptRate + completionTokens * standardCompletionRate; + + const updatedBalance = await Balance.findOne({ user: userId }); + expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0); + }); + + test('spendTokens should apply premium pricing for gemini-3.1-pro-preview above threshold', async () => { + const userId = new mongoose.Types.ObjectId(); + const initialBalance = 100000000; + await Balance.create({ user: userId, tokenCredits: initialBalance }); + + const model = 'gemini-3.1-pro-preview'; + const promptTokens = 250000; + const completionTokens = 500; + + const txData = { + user: userId, + conversationId: 'test-gemini31-above', + model, + context: 'test', + endpointTokenConfig: null, + balance: { enabled: true }, + }; + + await spendTokens(txData, { promptTokens, completionTokens }); + + const premiumPromptRate = premiumTokenValues['gemini-3.1'].prompt; + const premiumCompletionRate = premiumTokenValues['gemini-3.1'].completion; + const expectedCost = + promptTokens * premiumPromptRate + completionTokens * premiumCompletionRate; + + const updatedBalance = await Balance.findOne({ user: userId }); + expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0); + }); + + test('spendTokens should apply standard pricing for gemini-3.1-pro-preview at exactly the threshold', async () => { + const userId = new mongoose.Types.ObjectId(); + const initialBalance = 100000000; + await Balance.create({ user: userId, tokenCredits: initialBalance }); + + const model = 'gemini-3.1-pro-preview'; + const promptTokens = premiumTokenValues['gemini-3.1'].threshold; + const completionTokens = 500; + + const txData = { + user: userId, + conversationId: 'test-gemini31-exact', + model, + context: 'test', + endpointTokenConfig: null, + balance: { enabled: true }, + }; + + await spendTokens(txData, { promptTokens, completionTokens }); + + const standardPromptRate = tokenValues['gemini-3.1'].prompt; + const standardCompletionRate = tokenValues['gemini-3.1'].completion; + const expectedCost = + promptTokens * standardPromptRate + completionTokens * standardCompletionRate; + + const updatedBalance = await Balance.findOne({ user: userId }); + expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0); + }); + + test('spendStructuredTokens should apply premium pricing for gemini-3.1 when total input exceeds threshold', async () => { + const userId = new mongoose.Types.ObjectId(); + const initialBalance = 100000000; + await Balance.create({ user: userId, tokenCredits: initialBalance }); + + const model = 'gemini-3.1-pro-preview'; + const txData = { + user: userId, + conversationId: 'test-gemini31-structured-premium', + model, + context: 'message', + endpointTokenConfig: null, + balance: { enabled: true }, + }; + + const tokenUsage = { + promptTokens: { + input: 200000, + write: 10000, + read: 5000, + }, + completionTokens: 1000, + }; + + const totalInput = + tokenUsage.promptTokens.input + tokenUsage.promptTokens.write + tokenUsage.promptTokens.read; + + await spendStructuredTokens(txData, tokenUsage); + + const premiumPromptRate = premiumTokenValues['gemini-3.1'].prompt; + const premiumCompletionRate = premiumTokenValues['gemini-3.1'].completion; + const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' }); + const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' }); + + const expectedPromptCost = + tokenUsage.promptTokens.input * premiumPromptRate + + tokenUsage.promptTokens.write * writeMultiplier + + tokenUsage.promptTokens.read * readMultiplier; + const expectedCompletionCost = tokenUsage.completionTokens * premiumCompletionRate; + const expectedTotalCost = expectedPromptCost + expectedCompletionCost; + + const updatedBalance = await Balance.findOne({ user: userId }); + expect(totalInput).toBeGreaterThan(premiumTokenValues['gemini-3.1'].threshold); + expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedTotalCost, 0); + }); + test('non-premium models should not be affected by inputTokenCount regardless of prompt size', async () => { const userId = new mongoose.Types.ObjectId(); const initialBalance = 100000000; diff --git a/api/models/spendTokens.spec.js b/api/models/spendTokens.spec.js index c076d29700..dfeec5ee83 100644 --- a/api/models/spendTokens.spec.js +++ b/api/models/spendTokens.spec.js @@ -878,6 +878,135 @@ describe('spendTokens', () => { expect(result.completion.completion).toBeCloseTo(-expectedCompletionCost, 0); }); + it('should charge standard rates for gemini-3.1-pro-preview when prompt tokens are below threshold', async () => { + const initialBalance = 100000000; + await Balance.create({ + user: userId, + tokenCredits: initialBalance, + }); + + const model = 'gemini-3.1-pro-preview'; + const promptTokens = 100000; + const completionTokens = 500; + + const txData = { + user: userId, + conversationId: 'test-gemini31-standard-pricing', + model, + context: 'test', + balance: { enabled: true }, + }; + + await spendTokens(txData, { promptTokens, completionTokens }); + + const expectedCost = + promptTokens * tokenValues['gemini-3.1'].prompt + + completionTokens * tokenValues['gemini-3.1'].completion; + + const balance = await Balance.findOne({ user: userId }); + expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0); + }); + + it('should charge premium rates for gemini-3.1-pro-preview when prompt tokens exceed threshold', async () => { + const initialBalance = 100000000; + await Balance.create({ + user: userId, + tokenCredits: initialBalance, + }); + + const model = 'gemini-3.1-pro-preview'; + const promptTokens = 250000; + const completionTokens = 500; + + const txData = { + user: userId, + conversationId: 'test-gemini31-premium-pricing', + model, + context: 'test', + balance: { enabled: true }, + }; + + await spendTokens(txData, { promptTokens, completionTokens }); + + const expectedCost = + promptTokens * premiumTokenValues['gemini-3.1'].prompt + + completionTokens * premiumTokenValues['gemini-3.1'].completion; + + const balance = await Balance.findOne({ user: userId }); + expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0); + }); + + it('should charge premium rates for gemini-3.1-pro-preview-customtools when prompt tokens exceed threshold', async () => { + const initialBalance = 100000000; + await Balance.create({ + user: userId, + tokenCredits: initialBalance, + }); + + const model = 'gemini-3.1-pro-preview-customtools'; + const promptTokens = 250000; + const completionTokens = 500; + + const txData = { + user: userId, + conversationId: 'test-gemini31-customtools-premium', + model, + context: 'test', + balance: { enabled: true }, + }; + + await spendTokens(txData, { promptTokens, completionTokens }); + + const expectedCost = + promptTokens * premiumTokenValues['gemini-3.1'].prompt + + completionTokens * premiumTokenValues['gemini-3.1'].completion; + + const balance = await Balance.findOne({ user: userId }); + expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0); + }); + + it('should charge premium rates for structured gemini-3.1 tokens when total input exceeds threshold', async () => { + const initialBalance = 100000000; + await Balance.create({ + user: userId, + tokenCredits: initialBalance, + }); + + const model = 'gemini-3.1-pro-preview'; + const txData = { + user: userId, + conversationId: 'test-gemini31-structured-premium', + model, + context: 'test', + balance: { enabled: true }, + }; + + const tokenUsage = { + promptTokens: { + input: 200000, + write: 10000, + read: 5000, + }, + completionTokens: 1000, + }; + + const result = await spendStructuredTokens(txData, tokenUsage); + + const premiumPromptRate = premiumTokenValues['gemini-3.1'].prompt; + const premiumCompletionRate = premiumTokenValues['gemini-3.1'].completion; + const writeRate = getCacheMultiplier({ model, cacheType: 'write' }); + const readRate = getCacheMultiplier({ model, cacheType: 'read' }); + + const expectedPromptCost = + tokenUsage.promptTokens.input * premiumPromptRate + + tokenUsage.promptTokens.write * writeRate + + tokenUsage.promptTokens.read * readRate; + const expectedCompletionCost = tokenUsage.completionTokens * premiumCompletionRate; + + expect(result.prompt.prompt).toBeCloseTo(-expectedPromptCost, 0); + expect(result.completion.completion).toBeCloseTo(-expectedCompletionCost, 0); + }); + it('should not apply premium pricing to non-premium models regardless of prompt size', async () => { const initialBalance = 100000000; await Balance.create({ diff --git a/api/models/tx.js b/api/models/tx.js index 9a6305ec5c..a13143a862 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -200,6 +200,7 @@ const tokenValues = Object.assign( 'gemini-2.5-flash-image': { prompt: 0.15, completion: 30 }, 'gemini-3': { prompt: 2, completion: 12 }, 'gemini-3-pro-image': { prompt: 2, completion: 120 }, + 'gemini-3.1': { prompt: 2, completion: 12 }, 'gemini-pro-vision': { prompt: 0.5, completion: 1.5 }, grok: { prompt: 2.0, completion: 10.0 }, // Base pattern defaults to grok-2 'grok-beta': { prompt: 5.0, completion: 15.0 }, @@ -330,6 +331,8 @@ const cacheTokenValues = { 'kimi-k2-0711-preview': { write: 0.6, read: 0.15 }, 'kimi-k2-thinking': { write: 0.6, read: 0.15 }, 'kimi-k2-thinking-turbo': { write: 1.15, read: 0.15 }, + // Gemini 3.1 models - cache read: $0.20/1M (<=200k), cache write: standard input price + 'gemini-3.1': { write: 2, read: 0.2 }, }; /** @@ -340,6 +343,7 @@ const cacheTokenValues = { const premiumTokenValues = { 'claude-opus-4-6': { threshold: 200000, prompt: 10, completion: 37.5 }, 'claude-sonnet-4-6': { threshold: 200000, prompt: 6, completion: 22.5 }, + 'gemini-3.1': { threshold: 200000, prompt: 4, completion: 18 }, }; /** diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js index df1bec8619..b58afa9c70 100644 --- a/api/models/tx.spec.js +++ b/api/models/tx.spec.js @@ -1345,6 +1345,8 @@ describe('getCacheMultiplier', () => { describe('Google Model Tests', () => { const googleModels = [ 'gemini-3', + 'gemini-3.1-pro-preview', + 'gemini-3.1-pro-preview-customtools', 'gemini-2.5-pro', 'gemini-2.5-flash', 'gemini-2.5-flash-lite', @@ -1389,6 +1391,8 @@ describe('Google Model Tests', () => { it('should map to the correct model keys', () => { const expected = { 'gemini-3': 'gemini-3', + 'gemini-3.1-pro-preview': 'gemini-3.1', + 'gemini-3.1-pro-preview-customtools': 'gemini-3.1', 'gemini-2.5-pro': 'gemini-2.5-pro', 'gemini-2.5-flash': 'gemini-2.5-flash', 'gemini-2.5-flash-lite': 'gemini-2.5-flash-lite', @@ -1432,6 +1436,174 @@ describe('Google Model Tests', () => { ).toBe(tokenValues[expected].completion); }); }); + + it('should return correct prompt and completion rates for Gemini 3.1', () => { + expect( + getMultiplier({ + model: 'gemini-3.1-pro-preview', + tokenType: 'prompt', + endpoint: EModelEndpoint.google, + }), + ).toBe(tokenValues['gemini-3.1'].prompt); + expect( + getMultiplier({ + model: 'gemini-3.1-pro-preview', + tokenType: 'completion', + endpoint: EModelEndpoint.google, + }), + ).toBe(tokenValues['gemini-3.1'].completion); + expect( + getMultiplier({ + model: 'gemini-3.1-pro-preview-customtools', + tokenType: 'prompt', + endpoint: EModelEndpoint.google, + }), + ).toBe(tokenValues['gemini-3.1'].prompt); + expect( + getMultiplier({ + model: 'gemini-3.1-pro-preview-customtools', + tokenType: 'completion', + endpoint: EModelEndpoint.google, + }), + ).toBe(tokenValues['gemini-3.1'].completion); + }); + + it('should return correct cache rates for Gemini 3.1', () => { + ['gemini-3.1-pro-preview', 'gemini-3.1-pro-preview-customtools'].forEach((model) => { + expect(getCacheMultiplier({ model, cacheType: 'write' })).toBe( + cacheTokenValues['gemini-3.1'].write, + ); + expect(getCacheMultiplier({ model, cacheType: 'read' })).toBe( + cacheTokenValues['gemini-3.1'].read, + ); + }); + }); +}); + +describe('Gemini 3.1 Premium Token Pricing', () => { + const premiumKey = 'gemini-3.1'; + const premiumEntry = premiumTokenValues[premiumKey]; + const { threshold } = premiumEntry; + const belowThreshold = threshold - 1; + const aboveThreshold = threshold + 1; + const wellAboveThreshold = threshold * 2; + + it('should have premium pricing defined for gemini-3.1', () => { + expect(premiumEntry).toBeDefined(); + expect(premiumEntry.threshold).toBeDefined(); + expect(premiumEntry.prompt).toBeDefined(); + expect(premiumEntry.completion).toBeDefined(); + expect(premiumEntry.prompt).toBeGreaterThan(tokenValues[premiumKey].prompt); + expect(premiumEntry.completion).toBeGreaterThan(tokenValues[premiumKey].completion); + }); + + it('should return null from getPremiumRate when inputTokenCount is below or at threshold', () => { + expect(getPremiumRate(premiumKey, 'prompt', belowThreshold)).toBeNull(); + expect(getPremiumRate(premiumKey, 'completion', belowThreshold)).toBeNull(); + expect(getPremiumRate(premiumKey, 'prompt', threshold)).toBeNull(); + }); + + it('should return premium rate from getPremiumRate when inputTokenCount exceeds threshold', () => { + expect(getPremiumRate(premiumKey, 'prompt', aboveThreshold)).toBe(premiumEntry.prompt); + expect(getPremiumRate(premiumKey, 'completion', aboveThreshold)).toBe(premiumEntry.completion); + expect(getPremiumRate(premiumKey, 'prompt', wellAboveThreshold)).toBe(premiumEntry.prompt); + }); + + it('should return null from getPremiumRate when inputTokenCount is undefined or null', () => { + expect(getPremiumRate(premiumKey, 'prompt', undefined)).toBeNull(); + expect(getPremiumRate(premiumKey, 'prompt', null)).toBeNull(); + }); + + it('should return standard rate from getMultiplier when inputTokenCount is below threshold', () => { + expect( + getMultiplier({ + model: 'gemini-3.1-pro-preview', + tokenType: 'prompt', + inputTokenCount: belowThreshold, + }), + ).toBe(tokenValues[premiumKey].prompt); + expect( + getMultiplier({ + model: 'gemini-3.1-pro-preview', + tokenType: 'completion', + inputTokenCount: belowThreshold, + }), + ).toBe(tokenValues[premiumKey].completion); + }); + + it('should return premium rate from getMultiplier when inputTokenCount exceeds threshold', () => { + expect( + getMultiplier({ + model: 'gemini-3.1-pro-preview', + tokenType: 'prompt', + inputTokenCount: aboveThreshold, + }), + ).toBe(premiumEntry.prompt); + expect( + getMultiplier({ + model: 'gemini-3.1-pro-preview', + tokenType: 'completion', + inputTokenCount: aboveThreshold, + }), + ).toBe(premiumEntry.completion); + }); + + it('should return standard rate from getMultiplier when inputTokenCount is exactly at threshold', () => { + expect( + getMultiplier({ + model: 'gemini-3.1-pro-preview', + tokenType: 'prompt', + inputTokenCount: threshold, + }), + ).toBe(tokenValues[premiumKey].prompt); + }); + + it('should apply premium pricing to customtools variant above threshold', () => { + expect( + getMultiplier({ + model: 'gemini-3.1-pro-preview-customtools', + tokenType: 'prompt', + inputTokenCount: aboveThreshold, + }), + ).toBe(premiumEntry.prompt); + expect( + getMultiplier({ + model: 'gemini-3.1-pro-preview-customtools', + tokenType: 'completion', + inputTokenCount: aboveThreshold, + }), + ).toBe(premiumEntry.completion); + }); + + it('should use standard rate when inputTokenCount is not provided', () => { + expect(getMultiplier({ model: 'gemini-3.1-pro-preview', tokenType: 'prompt' })).toBe( + tokenValues[premiumKey].prompt, + ); + expect(getMultiplier({ model: 'gemini-3.1-pro-preview', tokenType: 'completion' })).toBe( + tokenValues[premiumKey].completion, + ); + }); + + it('should apply premium pricing through getMultiplier with valueKey path', () => { + const valueKey = getValueKey('gemini-3.1-pro-preview'); + expect(valueKey).toBe(premiumKey); + expect(getMultiplier({ valueKey, tokenType: 'prompt', inputTokenCount: aboveThreshold })).toBe( + premiumEntry.prompt, + ); + expect( + getMultiplier({ valueKey, tokenType: 'completion', inputTokenCount: aboveThreshold }), + ).toBe(premiumEntry.completion); + }); + + it('should apply standard pricing through getMultiplier with valueKey path when below threshold', () => { + const valueKey = getValueKey('gemini-3.1-pro-preview'); + expect(getMultiplier({ valueKey, tokenType: 'prompt', inputTokenCount: belowThreshold })).toBe( + tokenValues[premiumKey].prompt, + ); + expect( + getMultiplier({ valueKey, tokenType: 'completion', inputTokenCount: belowThreshold }), + ).toBe(tokenValues[premiumKey].completion); + }); }); describe('Grok Model Tests - Pricing', () => { diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index 18905d6d18..efbd962a8c 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -279,6 +279,12 @@ describe('getModelMaxTokens', () => { expect(getModelMaxTokens('gemini-3', EModelEndpoint.google)).toBe( maxTokensMap[EModelEndpoint.google]['gemini-3'], ); + expect(getModelMaxTokens('gemini-3.1-pro-preview', EModelEndpoint.google)).toBe( + maxTokensMap[EModelEndpoint.google]['gemini-3.1'], + ); + expect(getModelMaxTokens('gemini-3.1-pro-preview-customtools', EModelEndpoint.google)).toBe( + maxTokensMap[EModelEndpoint.google]['gemini-3.1'], + ); expect(getModelMaxTokens('gemini-2.5-pro', EModelEndpoint.google)).toBe( maxTokensMap[EModelEndpoint.google]['gemini-2.5-pro'], ); diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index a824afa489..faeb8f0f90 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -106,6 +106,7 @@ const googleModels = { 'gemini-exp': 2000000, 'gemini-3': 1000000, // 1M input tokens, 64k output tokens 'gemini-3-pro-image': 1000000, + 'gemini-3.1': 1000000, // 1M input tokens, 64k output tokens 'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens 'gemini-2.5-pro': 1000000, 'gemini-2.5-flash': 1000000, diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 360cce69ba..82d477e54e 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -1192,6 +1192,9 @@ export const defaultModels = { [EModelEndpoint.assistants]: [...sharedOpenAIModels, 'chatgpt-4o-latest'], [EModelEndpoint.agents]: sharedOpenAIModels, // TODO: Add agent models (agentsModels) [EModelEndpoint.google]: [ + // Gemini 3.1 Models + 'gemini-3.1-pro-preview', + 'gemini-3.1-pro-preview-customtools', // Gemini 2.5 Models 'gemini-2.5-pro', 'gemini-2.5-flash',