mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-21 18:04:08 +01:00
🤖 feat: Gemini 3.1 Pricing and Context Window (#11884)
- Added support for the new Gemini 3.1 models, including 'gemini-3.1-pro-preview' and 'gemini-3.1-pro-preview-customtools'. - Updated pricing logic to apply standard and premium rates based on token usage thresholds for the new models. - Enhanced tests to validate pricing behavior for both standard and premium scenarios. - Modified configuration files to include Gemini 3.1 models in the default model lists and token value mappings. - Updated environment example file to reflect the new model options.
This commit is contained in:
parent
a103ce72b4
commit
7a1d2969b8
8 changed files with 450 additions and 2 deletions
|
|
@ -823,6 +823,139 @@ describe('Premium Token Pricing Integration Tests', () => {
|
|||
expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedTotalCost, 0);
|
||||
});
|
||||
|
||||
test('spendTokens should apply standard pricing for gemini-3.1-pro-preview below threshold', async () => {
|
||||
const userId = new mongoose.Types.ObjectId();
|
||||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'gemini-3.1-pro-preview';
|
||||
const promptTokens = 100000;
|
||||
const completionTokens = 500;
|
||||
|
||||
const txData = {
|
||||
user: userId,
|
||||
conversationId: 'test-gemini31-below',
|
||||
model,
|
||||
context: 'test',
|
||||
endpointTokenConfig: null,
|
||||
balance: { enabled: true },
|
||||
};
|
||||
|
||||
await spendTokens(txData, { promptTokens, completionTokens });
|
||||
|
||||
const standardPromptRate = tokenValues['gemini-3.1'].prompt;
|
||||
const standardCompletionRate = tokenValues['gemini-3.1'].completion;
|
||||
const expectedCost =
|
||||
promptTokens * standardPromptRate + completionTokens * standardCompletionRate;
|
||||
|
||||
const updatedBalance = await Balance.findOne({ user: userId });
|
||||
expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
|
||||
});
|
||||
|
||||
test('spendTokens should apply premium pricing for gemini-3.1-pro-preview above threshold', async () => {
|
||||
const userId = new mongoose.Types.ObjectId();
|
||||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'gemini-3.1-pro-preview';
|
||||
const promptTokens = 250000;
|
||||
const completionTokens = 500;
|
||||
|
||||
const txData = {
|
||||
user: userId,
|
||||
conversationId: 'test-gemini31-above',
|
||||
model,
|
||||
context: 'test',
|
||||
endpointTokenConfig: null,
|
||||
balance: { enabled: true },
|
||||
};
|
||||
|
||||
await spendTokens(txData, { promptTokens, completionTokens });
|
||||
|
||||
const premiumPromptRate = premiumTokenValues['gemini-3.1'].prompt;
|
||||
const premiumCompletionRate = premiumTokenValues['gemini-3.1'].completion;
|
||||
const expectedCost =
|
||||
promptTokens * premiumPromptRate + completionTokens * premiumCompletionRate;
|
||||
|
||||
const updatedBalance = await Balance.findOne({ user: userId });
|
||||
expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
|
||||
});
|
||||
|
||||
test('spendTokens should apply standard pricing for gemini-3.1-pro-preview at exactly the threshold', async () => {
|
||||
const userId = new mongoose.Types.ObjectId();
|
||||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'gemini-3.1-pro-preview';
|
||||
const promptTokens = premiumTokenValues['gemini-3.1'].threshold;
|
||||
const completionTokens = 500;
|
||||
|
||||
const txData = {
|
||||
user: userId,
|
||||
conversationId: 'test-gemini31-exact',
|
||||
model,
|
||||
context: 'test',
|
||||
endpointTokenConfig: null,
|
||||
balance: { enabled: true },
|
||||
};
|
||||
|
||||
await spendTokens(txData, { promptTokens, completionTokens });
|
||||
|
||||
const standardPromptRate = tokenValues['gemini-3.1'].prompt;
|
||||
const standardCompletionRate = tokenValues['gemini-3.1'].completion;
|
||||
const expectedCost =
|
||||
promptTokens * standardPromptRate + completionTokens * standardCompletionRate;
|
||||
|
||||
const updatedBalance = await Balance.findOne({ user: userId });
|
||||
expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
|
||||
});
|
||||
|
||||
test('spendStructuredTokens should apply premium pricing for gemini-3.1 when total input exceeds threshold', async () => {
|
||||
const userId = new mongoose.Types.ObjectId();
|
||||
const initialBalance = 100000000;
|
||||
await Balance.create({ user: userId, tokenCredits: initialBalance });
|
||||
|
||||
const model = 'gemini-3.1-pro-preview';
|
||||
const txData = {
|
||||
user: userId,
|
||||
conversationId: 'test-gemini31-structured-premium',
|
||||
model,
|
||||
context: 'message',
|
||||
endpointTokenConfig: null,
|
||||
balance: { enabled: true },
|
||||
};
|
||||
|
||||
const tokenUsage = {
|
||||
promptTokens: {
|
||||
input: 200000,
|
||||
write: 10000,
|
||||
read: 5000,
|
||||
},
|
||||
completionTokens: 1000,
|
||||
};
|
||||
|
||||
const totalInput =
|
||||
tokenUsage.promptTokens.input + tokenUsage.promptTokens.write + tokenUsage.promptTokens.read;
|
||||
|
||||
await spendStructuredTokens(txData, tokenUsage);
|
||||
|
||||
const premiumPromptRate = premiumTokenValues['gemini-3.1'].prompt;
|
||||
const premiumCompletionRate = premiumTokenValues['gemini-3.1'].completion;
|
||||
const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' });
|
||||
const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' });
|
||||
|
||||
const expectedPromptCost =
|
||||
tokenUsage.promptTokens.input * premiumPromptRate +
|
||||
tokenUsage.promptTokens.write * writeMultiplier +
|
||||
tokenUsage.promptTokens.read * readMultiplier;
|
||||
const expectedCompletionCost = tokenUsage.completionTokens * premiumCompletionRate;
|
||||
const expectedTotalCost = expectedPromptCost + expectedCompletionCost;
|
||||
|
||||
const updatedBalance = await Balance.findOne({ user: userId });
|
||||
expect(totalInput).toBeGreaterThan(premiumTokenValues['gemini-3.1'].threshold);
|
||||
expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedTotalCost, 0);
|
||||
});
|
||||
|
||||
test('non-premium models should not be affected by inputTokenCount regardless of prompt size', async () => {
|
||||
const userId = new mongoose.Types.ObjectId();
|
||||
const initialBalance = 100000000;
|
||||
|
|
|
|||
|
|
@ -878,6 +878,135 @@ describe('spendTokens', () => {
|
|||
expect(result.completion.completion).toBeCloseTo(-expectedCompletionCost, 0);
|
||||
});
|
||||
|
||||
it('should charge standard rates for gemini-3.1-pro-preview when prompt tokens are below threshold', async () => {
|
||||
const initialBalance = 100000000;
|
||||
await Balance.create({
|
||||
user: userId,
|
||||
tokenCredits: initialBalance,
|
||||
});
|
||||
|
||||
const model = 'gemini-3.1-pro-preview';
|
||||
const promptTokens = 100000;
|
||||
const completionTokens = 500;
|
||||
|
||||
const txData = {
|
||||
user: userId,
|
||||
conversationId: 'test-gemini31-standard-pricing',
|
||||
model,
|
||||
context: 'test',
|
||||
balance: { enabled: true },
|
||||
};
|
||||
|
||||
await spendTokens(txData, { promptTokens, completionTokens });
|
||||
|
||||
const expectedCost =
|
||||
promptTokens * tokenValues['gemini-3.1'].prompt +
|
||||
completionTokens * tokenValues['gemini-3.1'].completion;
|
||||
|
||||
const balance = await Balance.findOne({ user: userId });
|
||||
expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
|
||||
});
|
||||
|
||||
it('should charge premium rates for gemini-3.1-pro-preview when prompt tokens exceed threshold', async () => {
|
||||
const initialBalance = 100000000;
|
||||
await Balance.create({
|
||||
user: userId,
|
||||
tokenCredits: initialBalance,
|
||||
});
|
||||
|
||||
const model = 'gemini-3.1-pro-preview';
|
||||
const promptTokens = 250000;
|
||||
const completionTokens = 500;
|
||||
|
||||
const txData = {
|
||||
user: userId,
|
||||
conversationId: 'test-gemini31-premium-pricing',
|
||||
model,
|
||||
context: 'test',
|
||||
balance: { enabled: true },
|
||||
};
|
||||
|
||||
await spendTokens(txData, { promptTokens, completionTokens });
|
||||
|
||||
const expectedCost =
|
||||
promptTokens * premiumTokenValues['gemini-3.1'].prompt +
|
||||
completionTokens * premiumTokenValues['gemini-3.1'].completion;
|
||||
|
||||
const balance = await Balance.findOne({ user: userId });
|
||||
expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
|
||||
});
|
||||
|
||||
it('should charge premium rates for gemini-3.1-pro-preview-customtools when prompt tokens exceed threshold', async () => {
|
||||
const initialBalance = 100000000;
|
||||
await Balance.create({
|
||||
user: userId,
|
||||
tokenCredits: initialBalance,
|
||||
});
|
||||
|
||||
const model = 'gemini-3.1-pro-preview-customtools';
|
||||
const promptTokens = 250000;
|
||||
const completionTokens = 500;
|
||||
|
||||
const txData = {
|
||||
user: userId,
|
||||
conversationId: 'test-gemini31-customtools-premium',
|
||||
model,
|
||||
context: 'test',
|
||||
balance: { enabled: true },
|
||||
};
|
||||
|
||||
await spendTokens(txData, { promptTokens, completionTokens });
|
||||
|
||||
const expectedCost =
|
||||
promptTokens * premiumTokenValues['gemini-3.1'].prompt +
|
||||
completionTokens * premiumTokenValues['gemini-3.1'].completion;
|
||||
|
||||
const balance = await Balance.findOne({ user: userId });
|
||||
expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
|
||||
});
|
||||
|
||||
it('should charge premium rates for structured gemini-3.1 tokens when total input exceeds threshold', async () => {
|
||||
const initialBalance = 100000000;
|
||||
await Balance.create({
|
||||
user: userId,
|
||||
tokenCredits: initialBalance,
|
||||
});
|
||||
|
||||
const model = 'gemini-3.1-pro-preview';
|
||||
const txData = {
|
||||
user: userId,
|
||||
conversationId: 'test-gemini31-structured-premium',
|
||||
model,
|
||||
context: 'test',
|
||||
balance: { enabled: true },
|
||||
};
|
||||
|
||||
const tokenUsage = {
|
||||
promptTokens: {
|
||||
input: 200000,
|
||||
write: 10000,
|
||||
read: 5000,
|
||||
},
|
||||
completionTokens: 1000,
|
||||
};
|
||||
|
||||
const result = await spendStructuredTokens(txData, tokenUsage);
|
||||
|
||||
const premiumPromptRate = premiumTokenValues['gemini-3.1'].prompt;
|
||||
const premiumCompletionRate = premiumTokenValues['gemini-3.1'].completion;
|
||||
const writeRate = getCacheMultiplier({ model, cacheType: 'write' });
|
||||
const readRate = getCacheMultiplier({ model, cacheType: 'read' });
|
||||
|
||||
const expectedPromptCost =
|
||||
tokenUsage.promptTokens.input * premiumPromptRate +
|
||||
tokenUsage.promptTokens.write * writeRate +
|
||||
tokenUsage.promptTokens.read * readRate;
|
||||
const expectedCompletionCost = tokenUsage.completionTokens * premiumCompletionRate;
|
||||
|
||||
expect(result.prompt.prompt).toBeCloseTo(-expectedPromptCost, 0);
|
||||
expect(result.completion.completion).toBeCloseTo(-expectedCompletionCost, 0);
|
||||
});
|
||||
|
||||
it('should not apply premium pricing to non-premium models regardless of prompt size', async () => {
|
||||
const initialBalance = 100000000;
|
||||
await Balance.create({
|
||||
|
|
|
|||
|
|
@ -200,6 +200,7 @@ const tokenValues = Object.assign(
|
|||
'gemini-2.5-flash-image': { prompt: 0.15, completion: 30 },
|
||||
'gemini-3': { prompt: 2, completion: 12 },
|
||||
'gemini-3-pro-image': { prompt: 2, completion: 120 },
|
||||
'gemini-3.1': { prompt: 2, completion: 12 },
|
||||
'gemini-pro-vision': { prompt: 0.5, completion: 1.5 },
|
||||
grok: { prompt: 2.0, completion: 10.0 }, // Base pattern defaults to grok-2
|
||||
'grok-beta': { prompt: 5.0, completion: 15.0 },
|
||||
|
|
@ -330,6 +331,8 @@ const cacheTokenValues = {
|
|||
'kimi-k2-0711-preview': { write: 0.6, read: 0.15 },
|
||||
'kimi-k2-thinking': { write: 0.6, read: 0.15 },
|
||||
'kimi-k2-thinking-turbo': { write: 1.15, read: 0.15 },
|
||||
// Gemini 3.1 models - cache read: $0.20/1M (<=200k), cache write: standard input price
|
||||
'gemini-3.1': { write: 2, read: 0.2 },
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -340,6 +343,7 @@ const cacheTokenValues = {
|
|||
const premiumTokenValues = {
|
||||
'claude-opus-4-6': { threshold: 200000, prompt: 10, completion: 37.5 },
|
||||
'claude-sonnet-4-6': { threshold: 200000, prompt: 6, completion: 22.5 },
|
||||
'gemini-3.1': { threshold: 200000, prompt: 4, completion: 18 },
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1345,6 +1345,8 @@ describe('getCacheMultiplier', () => {
|
|||
describe('Google Model Tests', () => {
|
||||
const googleModels = [
|
||||
'gemini-3',
|
||||
'gemini-3.1-pro-preview',
|
||||
'gemini-3.1-pro-preview-customtools',
|
||||
'gemini-2.5-pro',
|
||||
'gemini-2.5-flash',
|
||||
'gemini-2.5-flash-lite',
|
||||
|
|
@ -1389,6 +1391,8 @@ describe('Google Model Tests', () => {
|
|||
it('should map to the correct model keys', () => {
|
||||
const expected = {
|
||||
'gemini-3': 'gemini-3',
|
||||
'gemini-3.1-pro-preview': 'gemini-3.1',
|
||||
'gemini-3.1-pro-preview-customtools': 'gemini-3.1',
|
||||
'gemini-2.5-pro': 'gemini-2.5-pro',
|
||||
'gemini-2.5-flash': 'gemini-2.5-flash',
|
||||
'gemini-2.5-flash-lite': 'gemini-2.5-flash-lite',
|
||||
|
|
@ -1432,6 +1436,174 @@ describe('Google Model Tests', () => {
|
|||
).toBe(tokenValues[expected].completion);
|
||||
});
|
||||
});
|
||||
|
||||
it('should return correct prompt and completion rates for Gemini 3.1', () => {
|
||||
expect(
|
||||
getMultiplier({
|
||||
model: 'gemini-3.1-pro-preview',
|
||||
tokenType: 'prompt',
|
||||
endpoint: EModelEndpoint.google,
|
||||
}),
|
||||
).toBe(tokenValues['gemini-3.1'].prompt);
|
||||
expect(
|
||||
getMultiplier({
|
||||
model: 'gemini-3.1-pro-preview',
|
||||
tokenType: 'completion',
|
||||
endpoint: EModelEndpoint.google,
|
||||
}),
|
||||
).toBe(tokenValues['gemini-3.1'].completion);
|
||||
expect(
|
||||
getMultiplier({
|
||||
model: 'gemini-3.1-pro-preview-customtools',
|
||||
tokenType: 'prompt',
|
||||
endpoint: EModelEndpoint.google,
|
||||
}),
|
||||
).toBe(tokenValues['gemini-3.1'].prompt);
|
||||
expect(
|
||||
getMultiplier({
|
||||
model: 'gemini-3.1-pro-preview-customtools',
|
||||
tokenType: 'completion',
|
||||
endpoint: EModelEndpoint.google,
|
||||
}),
|
||||
).toBe(tokenValues['gemini-3.1'].completion);
|
||||
});
|
||||
|
||||
it('should return correct cache rates for Gemini 3.1', () => {
|
||||
['gemini-3.1-pro-preview', 'gemini-3.1-pro-preview-customtools'].forEach((model) => {
|
||||
expect(getCacheMultiplier({ model, cacheType: 'write' })).toBe(
|
||||
cacheTokenValues['gemini-3.1'].write,
|
||||
);
|
||||
expect(getCacheMultiplier({ model, cacheType: 'read' })).toBe(
|
||||
cacheTokenValues['gemini-3.1'].read,
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Gemini 3.1 Premium Token Pricing', () => {
|
||||
const premiumKey = 'gemini-3.1';
|
||||
const premiumEntry = premiumTokenValues[premiumKey];
|
||||
const { threshold } = premiumEntry;
|
||||
const belowThreshold = threshold - 1;
|
||||
const aboveThreshold = threshold + 1;
|
||||
const wellAboveThreshold = threshold * 2;
|
||||
|
||||
it('should have premium pricing defined for gemini-3.1', () => {
|
||||
expect(premiumEntry).toBeDefined();
|
||||
expect(premiumEntry.threshold).toBeDefined();
|
||||
expect(premiumEntry.prompt).toBeDefined();
|
||||
expect(premiumEntry.completion).toBeDefined();
|
||||
expect(premiumEntry.prompt).toBeGreaterThan(tokenValues[premiumKey].prompt);
|
||||
expect(premiumEntry.completion).toBeGreaterThan(tokenValues[premiumKey].completion);
|
||||
});
|
||||
|
||||
it('should return null from getPremiumRate when inputTokenCount is below or at threshold', () => {
|
||||
expect(getPremiumRate(premiumKey, 'prompt', belowThreshold)).toBeNull();
|
||||
expect(getPremiumRate(premiumKey, 'completion', belowThreshold)).toBeNull();
|
||||
expect(getPremiumRate(premiumKey, 'prompt', threshold)).toBeNull();
|
||||
});
|
||||
|
||||
it('should return premium rate from getPremiumRate when inputTokenCount exceeds threshold', () => {
|
||||
expect(getPremiumRate(premiumKey, 'prompt', aboveThreshold)).toBe(premiumEntry.prompt);
|
||||
expect(getPremiumRate(premiumKey, 'completion', aboveThreshold)).toBe(premiumEntry.completion);
|
||||
expect(getPremiumRate(premiumKey, 'prompt', wellAboveThreshold)).toBe(premiumEntry.prompt);
|
||||
});
|
||||
|
||||
it('should return null from getPremiumRate when inputTokenCount is undefined or null', () => {
|
||||
expect(getPremiumRate(premiumKey, 'prompt', undefined)).toBeNull();
|
||||
expect(getPremiumRate(premiumKey, 'prompt', null)).toBeNull();
|
||||
});
|
||||
|
||||
it('should return standard rate from getMultiplier when inputTokenCount is below threshold', () => {
|
||||
expect(
|
||||
getMultiplier({
|
||||
model: 'gemini-3.1-pro-preview',
|
||||
tokenType: 'prompt',
|
||||
inputTokenCount: belowThreshold,
|
||||
}),
|
||||
).toBe(tokenValues[premiumKey].prompt);
|
||||
expect(
|
||||
getMultiplier({
|
||||
model: 'gemini-3.1-pro-preview',
|
||||
tokenType: 'completion',
|
||||
inputTokenCount: belowThreshold,
|
||||
}),
|
||||
).toBe(tokenValues[premiumKey].completion);
|
||||
});
|
||||
|
||||
it('should return premium rate from getMultiplier when inputTokenCount exceeds threshold', () => {
|
||||
expect(
|
||||
getMultiplier({
|
||||
model: 'gemini-3.1-pro-preview',
|
||||
tokenType: 'prompt',
|
||||
inputTokenCount: aboveThreshold,
|
||||
}),
|
||||
).toBe(premiumEntry.prompt);
|
||||
expect(
|
||||
getMultiplier({
|
||||
model: 'gemini-3.1-pro-preview',
|
||||
tokenType: 'completion',
|
||||
inputTokenCount: aboveThreshold,
|
||||
}),
|
||||
).toBe(premiumEntry.completion);
|
||||
});
|
||||
|
||||
it('should return standard rate from getMultiplier when inputTokenCount is exactly at threshold', () => {
|
||||
expect(
|
||||
getMultiplier({
|
||||
model: 'gemini-3.1-pro-preview',
|
||||
tokenType: 'prompt',
|
||||
inputTokenCount: threshold,
|
||||
}),
|
||||
).toBe(tokenValues[premiumKey].prompt);
|
||||
});
|
||||
|
||||
it('should apply premium pricing to customtools variant above threshold', () => {
|
||||
expect(
|
||||
getMultiplier({
|
||||
model: 'gemini-3.1-pro-preview-customtools',
|
||||
tokenType: 'prompt',
|
||||
inputTokenCount: aboveThreshold,
|
||||
}),
|
||||
).toBe(premiumEntry.prompt);
|
||||
expect(
|
||||
getMultiplier({
|
||||
model: 'gemini-3.1-pro-preview-customtools',
|
||||
tokenType: 'completion',
|
||||
inputTokenCount: aboveThreshold,
|
||||
}),
|
||||
).toBe(premiumEntry.completion);
|
||||
});
|
||||
|
||||
it('should use standard rate when inputTokenCount is not provided', () => {
|
||||
expect(getMultiplier({ model: 'gemini-3.1-pro-preview', tokenType: 'prompt' })).toBe(
|
||||
tokenValues[premiumKey].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'gemini-3.1-pro-preview', tokenType: 'completion' })).toBe(
|
||||
tokenValues[premiumKey].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should apply premium pricing through getMultiplier with valueKey path', () => {
|
||||
const valueKey = getValueKey('gemini-3.1-pro-preview');
|
||||
expect(valueKey).toBe(premiumKey);
|
||||
expect(getMultiplier({ valueKey, tokenType: 'prompt', inputTokenCount: aboveThreshold })).toBe(
|
||||
premiumEntry.prompt,
|
||||
);
|
||||
expect(
|
||||
getMultiplier({ valueKey, tokenType: 'completion', inputTokenCount: aboveThreshold }),
|
||||
).toBe(premiumEntry.completion);
|
||||
});
|
||||
|
||||
it('should apply standard pricing through getMultiplier with valueKey path when below threshold', () => {
|
||||
const valueKey = getValueKey('gemini-3.1-pro-preview');
|
||||
expect(getMultiplier({ valueKey, tokenType: 'prompt', inputTokenCount: belowThreshold })).toBe(
|
||||
tokenValues[premiumKey].prompt,
|
||||
);
|
||||
expect(
|
||||
getMultiplier({ valueKey, tokenType: 'completion', inputTokenCount: belowThreshold }),
|
||||
).toBe(tokenValues[premiumKey].completion);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Grok Model Tests - Pricing', () => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue