mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-05 18:18:51 +01:00
🚀 fix: Resolve Google Client Issues, CDN Screenshots, Update Models (#5703)
* 🤖 refactor: streamline model selection logic for title model in GoogleClient
* refactor: add options for empty object schemas in convertJsonSchemaToZod
* refactor: add utility function to check for empty object schemas in convertJsonSchemaToZod
* fix: Google MCP Tool errors, and remove Object Unescaping as Google fixed this
* fix: google safetySettings
* feat: add safety settings exclusion via GOOGLE_EXCLUDE_SAFETY_SETTINGS environment variable
* fix: rename environment variable for console JSON string length
* fix: disable portal for dropdown in ExportModal component
* fix: screenshot functionality to use image placeholder for remote images
* feat: add visionMode property to BaseClient and initialize in GoogleClient to fix resendFiles issue
* fix: enhance formatMessages to include image URLs in message content for Vertex AI
* fix: safety settings for titleChatCompletion
* fix: remove deprecated model assignment in GoogleClient and streamline title model retrieval
* fix: remove unused image preloading logic in ScreenshotContext
* chore: update default google models to latest models shared by vertex ai and gen ai
* refactor: enhance Google error messaging
* fix: update token values and model limits for Gemini models
* ci: fix model matching
* chore: bump version of librechat-data-provider to 0.7.699
This commit is contained in:
parent
33e60c379b
commit
63afb317c6
19 changed files with 939 additions and 720 deletions
|
|
@ -57,6 +57,8 @@ class BaseClient {
|
|||
this.continued;
|
||||
/** @type {TMessage[]} */
|
||||
this.currentMessages = [];
|
||||
/** @type {import('librechat-data-provider').VisionModes | undefined} */
|
||||
this.visionMode;
|
||||
}
|
||||
|
||||
setOptions() {
|
||||
|
|
@ -1095,7 +1097,7 @@ class BaseClient {
|
|||
file_id: { $in: fileIds },
|
||||
});
|
||||
|
||||
await this.addImageURLs(message, files);
|
||||
await this.addImageURLs(message, files, this.visionMode);
|
||||
|
||||
this.message_file_map[message.messageId] = files;
|
||||
return message;
|
||||
|
|
|
|||
|
|
@ -10,11 +10,13 @@ const {
|
|||
getResponseSender,
|
||||
endpointSettings,
|
||||
EModelEndpoint,
|
||||
ContentTypes,
|
||||
VisionModes,
|
||||
ErrorTypes,
|
||||
Constants,
|
||||
AuthKeys,
|
||||
} = require('librechat-data-provider');
|
||||
const { getSafetySettings } = require('~/server/services/Endpoints/google/llm');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images');
|
||||
const Tokenizer = require('~/server/services/Tokenizer');
|
||||
const { spendTokens } = require('~/models/spendTokens');
|
||||
|
|
@ -70,7 +72,7 @@ class GoogleClient extends BaseClient {
|
|||
/** The key for the usage object's output tokens
|
||||
* @type {string} */
|
||||
this.outputTokensKey = 'output_tokens';
|
||||
|
||||
this.visionMode = VisionModes.generative;
|
||||
if (options.skipSetOptions) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -215,10 +217,29 @@ class GoogleClient extends BaseClient {
|
|||
}
|
||||
|
||||
formatMessages() {
|
||||
return ((message) => ({
|
||||
author: message?.author ?? (message.isCreatedByUser ? this.userLabel : this.modelLabel),
|
||||
content: message?.content ?? message.text,
|
||||
})).bind(this);
|
||||
return ((message) => {
|
||||
const msg = {
|
||||
author: message?.author ?? (message.isCreatedByUser ? this.userLabel : this.modelLabel),
|
||||
content: message?.content ?? message.text,
|
||||
};
|
||||
|
||||
if (!message.image_urls?.length) {
|
||||
return msg;
|
||||
}
|
||||
|
||||
msg.content = (
|
||||
!Array.isArray(msg.content)
|
||||
? [
|
||||
{
|
||||
type: ContentTypes.TEXT,
|
||||
[ContentTypes.TEXT]: msg.content,
|
||||
},
|
||||
]
|
||||
: msg.content
|
||||
).concat(message.image_urls);
|
||||
|
||||
return msg;
|
||||
}).bind(this);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -566,6 +587,7 @@ class GoogleClient extends BaseClient {
|
|||
|
||||
if (this.project_id != null) {
|
||||
logger.debug('Creating VertexAI client');
|
||||
this.visionMode = undefined;
|
||||
clientOptions.streaming = true;
|
||||
const client = new ChatVertexAI(clientOptions);
|
||||
client.temperature = clientOptions.temperature;
|
||||
|
|
@ -607,13 +629,14 @@ class GoogleClient extends BaseClient {
|
|||
}
|
||||
|
||||
async getCompletion(_payload, options = {}) {
|
||||
const safetySettings = this.getSafetySettings();
|
||||
const { onProgress, abortController } = options;
|
||||
const safetySettings = getSafetySettings(this.modelOptions.model);
|
||||
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
|
||||
const modelName = this.modelOptions.modelName ?? this.modelOptions.model ?? '';
|
||||
|
||||
let reply = '';
|
||||
|
||||
/** @type {Error} */
|
||||
let error;
|
||||
try {
|
||||
if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) {
|
||||
/** @type {GenAI} */
|
||||
|
|
@ -714,8 +737,16 @@ class GoogleClient extends BaseClient {
|
|||
this.usage = usageMetadata;
|
||||
}
|
||||
} catch (e) {
|
||||
error = e;
|
||||
logger.error('[GoogleClient] There was an issue generating the completion', e);
|
||||
}
|
||||
|
||||
if (error != null && reply === '') {
|
||||
const errorMessage = `{ "type": "${ErrorTypes.GoogleError}", "info": "${
|
||||
error.message ?? 'The Google provider failed to generate content, please contact the Admin.'
|
||||
}" }`;
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
return reply;
|
||||
}
|
||||
|
||||
|
|
@ -781,12 +812,11 @@ class GoogleClient extends BaseClient {
|
|||
* Stripped-down logic for generating a title. This uses the non-streaming APIs, since the user does not see titles streaming
|
||||
*/
|
||||
async titleChatCompletion(_payload, options = {}) {
|
||||
const { abortController } = options;
|
||||
const safetySettings = this.getSafetySettings();
|
||||
|
||||
let reply = '';
|
||||
const { abortController } = options;
|
||||
|
||||
const model = this.modelOptions.modelName ?? this.modelOptions.model ?? '';
|
||||
const safetySettings = getSafetySettings(model);
|
||||
if (!EXCLUDED_GENAI_MODELS.test(model) && !this.project_id) {
|
||||
logger.debug('Identified titling model as GenAI version');
|
||||
/** @type {GenerativeModel} */
|
||||
|
|
@ -844,17 +874,6 @@ class GoogleClient extends BaseClient {
|
|||
},
|
||||
]);
|
||||
|
||||
const model = process.env.GOOGLE_TITLE_MODEL ?? this.modelOptions.model;
|
||||
const availableModels = this.options.modelsConfig?.[EModelEndpoint.google];
|
||||
this.isVisionModel = validateVisionModel({ model, availableModels });
|
||||
|
||||
if (this.isVisionModel) {
|
||||
logger.warn(
|
||||
`Current vision model does not support titling without an attachment; falling back to default model ${settings.model.default}`,
|
||||
);
|
||||
this.modelOptions.model = settings.model.default;
|
||||
}
|
||||
|
||||
try {
|
||||
this.initializeClient();
|
||||
title = await this.titleChatCompletion(payload, {
|
||||
|
|
@ -892,48 +911,6 @@ class GoogleClient extends BaseClient {
|
|||
return reply.trim();
|
||||
}
|
||||
|
||||
getSafetySettings() {
|
||||
const model = this.modelOptions.model;
|
||||
const isGemini2 = model.includes('gemini-2.0') && !model.includes('thinking');
|
||||
const mapThreshold = (value) => {
|
||||
if (isGemini2 && value === 'BLOCK_NONE') {
|
||||
return 'OFF';
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
return [
|
||||
{
|
||||
category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
|
||||
threshold: mapThreshold(
|
||||
process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
|
||||
),
|
||||
},
|
||||
{
|
||||
category: 'HARM_CATEGORY_HATE_SPEECH',
|
||||
threshold: mapThreshold(
|
||||
process.env.GOOGLE_SAFETY_HATE_SPEECH || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
|
||||
),
|
||||
},
|
||||
{
|
||||
category: 'HARM_CATEGORY_HARASSMENT',
|
||||
threshold: mapThreshold(
|
||||
process.env.GOOGLE_SAFETY_HARASSMENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
|
||||
),
|
||||
},
|
||||
{
|
||||
category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
|
||||
threshold: mapThreshold(
|
||||
process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
|
||||
),
|
||||
},
|
||||
{
|
||||
category: 'HARM_CATEGORY_CIVIC_INTEGRITY',
|
||||
threshold: mapThreshold(process.env.GOOGLE_SAFETY_CIVIC_INTEGRITY || 'BLOCK_NONE'),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
getEncoding() {
|
||||
return 'cl100k_base';
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ const traverse = require('traverse');
|
|||
|
||||
const SPLAT_SYMBOL = Symbol.for('splat');
|
||||
const MESSAGE_SYMBOL = Symbol.for('message');
|
||||
const CONSOLE_JSON_LONG_STRING_LENGTH=parseInt(process.env.CONSOLE_JSON_LONG_STRING_LENGTH) || 255;
|
||||
const CONSOLE_JSON_STRING_LENGTH = parseInt(process.env.CONSOLE_JSON_STRING_LENGTH) || 255;
|
||||
|
||||
const sensitiveKeys = [
|
||||
/^(sk-)[^\s]+/, // OpenAI API key pattern
|
||||
|
|
@ -206,13 +206,13 @@ const jsonTruncateFormat = winston.format((info) => {
|
|||
seen.add(obj);
|
||||
|
||||
if (Array.isArray(obj)) {
|
||||
return obj.map(item => truncateObject(item));
|
||||
return obj.map((item) => truncateObject(item));
|
||||
}
|
||||
|
||||
const newObj = {};
|
||||
Object.entries(obj).forEach(([key, value]) => {
|
||||
if (typeof value === 'string') {
|
||||
newObj[key] = truncateLongStrings(value, CONSOLE_JSON_LONG_STRING_LENGTH);
|
||||
newObj[key] = truncateLongStrings(value, CONSOLE_JSON_STRING_LENGTH);
|
||||
} else {
|
||||
newObj[key] = truncateObject(value);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -102,9 +102,14 @@ const tokenValues = Object.assign(
|
|||
/* cohere doesn't have rates for the older command models,
|
||||
so this was from https://artificialanalysis.ai/models/command-light/providers */
|
||||
command: { prompt: 0.38, completion: 0.38 },
|
||||
'gemini-2.0-flash-lite': { prompt: 0.075, completion: 0.3 },
|
||||
'gemini-2.0-flash': { prompt: 0.1, completion: 0.7 },
|
||||
'gemini-2.0': { prompt: 0, completion: 0 }, // https://ai.google.dev/pricing
|
||||
'gemini-1.5': { prompt: 7, completion: 21 }, // May 2nd, 2024 pricing
|
||||
gemini: { prompt: 0.5, completion: 1.5 }, // May 2nd, 2024 pricing
|
||||
'gemini-1.5-flash-8b': { prompt: 0.075, completion: 0.3 },
|
||||
'gemini-1.5-flash': { prompt: 0.15, completion: 0.6 },
|
||||
'gemini-1.5': { prompt: 2.5, completion: 10 },
|
||||
'gemini-pro-vision': { prompt: 0.5, completion: 1.5 },
|
||||
gemini: { prompt: 0.5, completion: 1.5 },
|
||||
},
|
||||
bedrockValues,
|
||||
);
|
||||
|
|
|
|||
|
|
@ -380,3 +380,81 @@ describe('getCacheMultiplier', () => {
|
|||
).toBe(0.03);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Google Model Tests', () => {
|
||||
const googleModels = [
|
||||
'gemini-2.0-flash-lite-preview-02-05',
|
||||
'gemini-2.0-flash-001',
|
||||
'gemini-2.0-flash-exp',
|
||||
'gemini-2.0-pro-exp-02-05',
|
||||
'gemini-1.5-flash-8b',
|
||||
'gemini-1.5-flash-thinking',
|
||||
'gemini-1.5-pro-latest',
|
||||
'gemini-1.5-pro-preview-0409',
|
||||
'gemini-pro-vision',
|
||||
'gemini-1.0',
|
||||
'gemini-pro',
|
||||
];
|
||||
|
||||
it('should return the correct prompt and completion rates for all models', () => {
|
||||
const results = googleModels.map((model) => {
|
||||
const valueKey = getValueKey(model, EModelEndpoint.google);
|
||||
const promptRate = getMultiplier({
|
||||
model,
|
||||
tokenType: 'prompt',
|
||||
endpoint: EModelEndpoint.google,
|
||||
});
|
||||
const completionRate = getMultiplier({
|
||||
model,
|
||||
tokenType: 'completion',
|
||||
endpoint: EModelEndpoint.google,
|
||||
});
|
||||
return { model, valueKey, promptRate, completionRate };
|
||||
});
|
||||
|
||||
results.forEach(({ valueKey, promptRate, completionRate }) => {
|
||||
expect(promptRate).toBe(tokenValues[valueKey].prompt);
|
||||
expect(completionRate).toBe(tokenValues[valueKey].completion);
|
||||
});
|
||||
});
|
||||
|
||||
it('should map to the correct model keys', () => {
|
||||
const expected = {
|
||||
'gemini-2.0-flash-lite-preview-02-05': 'gemini-2.0-flash-lite',
|
||||
'gemini-2.0-flash-001': 'gemini-2.0-flash',
|
||||
'gemini-2.0-flash-exp': 'gemini-2.0-flash',
|
||||
'gemini-2.0-pro-exp-02-05': 'gemini-2.0',
|
||||
'gemini-1.5-flash-8b': 'gemini-1.5-flash-8b',
|
||||
'gemini-1.5-flash-thinking': 'gemini-1.5-flash',
|
||||
'gemini-1.5-pro-latest': 'gemini-1.5',
|
||||
'gemini-1.5-pro-preview-0409': 'gemini-1.5',
|
||||
'gemini-pro-vision': 'gemini-pro-vision',
|
||||
'gemini-1.0': 'gemini',
|
||||
'gemini-pro': 'gemini',
|
||||
};
|
||||
|
||||
Object.entries(expected).forEach(([model, expectedKey]) => {
|
||||
const valueKey = getValueKey(model, EModelEndpoint.google);
|
||||
expect(valueKey).toBe(expectedKey);
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle model names with different formats', () => {
|
||||
const testCases = [
|
||||
{ input: 'google/gemini-pro', expected: 'gemini' },
|
||||
{ input: 'gemini-pro/google', expected: 'gemini' },
|
||||
{ input: 'google/gemini-2.0-flash-lite', expected: 'gemini-2.0-flash-lite' },
|
||||
];
|
||||
|
||||
testCases.forEach(({ input, expected }) => {
|
||||
const valueKey = getValueKey(input, EModelEndpoint.google);
|
||||
expect(valueKey).toBe(expected);
|
||||
expect(
|
||||
getMultiplier({ model: input, tokenType: 'prompt', endpoint: EModelEndpoint.google }),
|
||||
).toBe(tokenValues[expected].prompt);
|
||||
expect(
|
||||
getMultiplier({ model: input, tokenType: 'completion', endpoint: EModelEndpoint.google }),
|
||||
).toBe(tokenValues[expected].completion);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@
|
|||
"@langchain/google-genai": "^0.1.7",
|
||||
"@langchain/google-vertexai": "^0.1.8",
|
||||
"@langchain/textsplitters": "^0.1.0",
|
||||
"@librechat/agents": "^2.0.1",
|
||||
"@librechat/agents": "^2.0.2",
|
||||
"@waylaidwanderer/fetch-event-source": "^3.0.1",
|
||||
"axios": "^1.7.7",
|
||||
"bcryptjs": "^2.4.3",
|
||||
|
|
|
|||
|
|
@ -1,18 +1,42 @@
|
|||
const { Providers } = require('@librechat/agents');
|
||||
const { AuthKeys } = require('librechat-data-provider');
|
||||
const { isEnabled } = require('~/server/utils');
|
||||
|
||||
function getThresholdMapping(model) {
|
||||
const gemini1Pattern = /gemini-(1\.0|1\.5|pro$|1\.0-pro|1\.5-pro|1\.5-flash-001)/;
|
||||
const restrictedPattern = /(gemini-(1\.5-flash-8b|2\.0|exp)|learnlm)/;
|
||||
|
||||
if (gemini1Pattern.test(model)) {
|
||||
return (value) => {
|
||||
if (value === 'OFF') {
|
||||
return 'BLOCK_NONE';
|
||||
}
|
||||
return value;
|
||||
};
|
||||
}
|
||||
|
||||
if (restrictedPattern.test(model)) {
|
||||
return (value) => {
|
||||
if (value === 'OFF' || value === 'HARM_BLOCK_THRESHOLD_UNSPECIFIED') {
|
||||
return 'BLOCK_NONE';
|
||||
}
|
||||
return value;
|
||||
};
|
||||
}
|
||||
|
||||
return (value) => value;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {boolean} isGemini2
|
||||
* @returns {Array<{category: string, threshold: string}>}
|
||||
* @param {string} model
|
||||
* @returns {Array<{category: string, threshold: string}> | undefined}
|
||||
*/
|
||||
function getSafetySettings(isGemini2) {
|
||||
const mapThreshold = (value) => {
|
||||
if (isGemini2 && value === 'BLOCK_NONE') {
|
||||
return 'OFF';
|
||||
}
|
||||
return value;
|
||||
};
|
||||
function getSafetySettings(model) {
|
||||
if (isEnabled(process.env.GOOGLE_EXCLUDE_SAFETY_SETTINGS)) {
|
||||
return undefined;
|
||||
}
|
||||
const mapThreshold = getThresholdMapping(model);
|
||||
|
||||
return [
|
||||
{
|
||||
|
|
@ -85,8 +109,7 @@ function getLLMConfig(credentials, options = {}) {
|
|||
};
|
||||
|
||||
/** Used only for Safety Settings */
|
||||
const isGemini2 = llmConfig.model.includes('gemini-2.0') && !llmConfig.model.includes('thinking');
|
||||
llmConfig.safetySettings = getSafetySettings(isGemini2);
|
||||
llmConfig.safetySettings = getSafetySettings(llmConfig.model);
|
||||
|
||||
let provider;
|
||||
|
||||
|
|
@ -153,4 +176,5 @@ function getLLMConfig(credentials, options = {}) {
|
|||
|
||||
module.exports = {
|
||||
getLLMConfig,
|
||||
getSafetySettings,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
const { CacheKeys, Constants } = require('librechat-data-provider');
|
||||
const { EModelEndpoint, CacheKeys, Constants, googleSettings } = require('librechat-data-provider');
|
||||
const getLogStores = require('~/cache/getLogStores');
|
||||
const initializeClient = require('./initialize');
|
||||
const { isEnabled } = require('~/server/utils');
|
||||
const { saveConvo } = require('~/models');
|
||||
const { logger } = require('~/config');
|
||||
const initializeClient = require('./initialize');
|
||||
|
||||
const addTitle = async (req, { text, response, client }) => {
|
||||
const { TITLE_CONVO = 'true' } = process.env ?? {};
|
||||
|
|
@ -14,22 +13,16 @@ const addTitle = async (req, { text, response, client }) => {
|
|||
if (client.options.titleConvo === false) {
|
||||
return;
|
||||
}
|
||||
|
||||
const DEFAULT_TITLE_MODEL = 'gemini-pro';
|
||||
const { GOOGLE_TITLE_MODEL } = process.env ?? {};
|
||||
|
||||
let model = GOOGLE_TITLE_MODEL ?? DEFAULT_TITLE_MODEL;
|
||||
const providerConfig = req.app.locals[EModelEndpoint.google];
|
||||
let model =
|
||||
providerConfig?.titleModel ??
|
||||
GOOGLE_TITLE_MODEL ??
|
||||
client.options?.modelOptions.model ??
|
||||
googleSettings.model.default;
|
||||
|
||||
if (GOOGLE_TITLE_MODEL === Constants.CURRENT_MODEL) {
|
||||
model = client.options?.modelOptions.model;
|
||||
|
||||
if (client.isVisionModel) {
|
||||
logger.warn(
|
||||
`current_model was specified for Google title request, but the model ${model} cannot process a text-only conversation. Falling back to ${DEFAULT_TITLE_MODEL}`,
|
||||
);
|
||||
|
||||
model = DEFAULT_TITLE_MODEL;
|
||||
}
|
||||
}
|
||||
|
||||
const titleEndpointOptions = {
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
const { z } = require('zod');
|
||||
const { tool } = require('@langchain/core/tools');
|
||||
const { Constants: AgentConstants } = require('@librechat/agents');
|
||||
const { Constants: AgentConstants, Providers } = require('@librechat/agents');
|
||||
const {
|
||||
Constants,
|
||||
convertJsonSchemaToZod,
|
||||
ContentTypes,
|
||||
isAssistantsEndpoint,
|
||||
convertJsonSchemaToZod,
|
||||
} = require('librechat-data-provider');
|
||||
const { logger, getMCPManager } = require('~/config');
|
||||
|
||||
|
|
@ -25,7 +27,15 @@ async function createMCPTool({ req, toolKey, provider }) {
|
|||
}
|
||||
/** @type {LCTool} */
|
||||
const { description, parameters } = toolDefinition;
|
||||
const schema = convertJsonSchemaToZod(parameters);
|
||||
const isGoogle = provider === Providers.VERTEXAI || provider === Providers.GOOGLE;
|
||||
let schema = convertJsonSchemaToZod(parameters, {
|
||||
allowEmptyObject: !isGoogle,
|
||||
});
|
||||
|
||||
if (!schema) {
|
||||
schema = z.object({ input: z.string().optional() });
|
||||
}
|
||||
|
||||
const [toolName, serverName] = toolKey.split(Constants.mcp_delimiter);
|
||||
/** @type {(toolInput: Object | string) => Promise<unknown>} */
|
||||
const _call = async (toolInput) => {
|
||||
|
|
@ -35,6 +45,9 @@ async function createMCPTool({ req, toolKey, provider }) {
|
|||
if (isAssistantsEndpoint(provider) && Array.isArray(result)) {
|
||||
return result[0];
|
||||
}
|
||||
if (isGoogle && Array.isArray(result[0]) && result[0][0]?.type === ContentTypes.TEXT) {
|
||||
return [result[0][0].text, result[1]];
|
||||
}
|
||||
return result;
|
||||
} catch (error) {
|
||||
logger.error(`${toolName} MCP server tool call failed`, error);
|
||||
|
|
|
|||
|
|
@ -49,11 +49,14 @@ const cohereModels = {
|
|||
const googleModels = {
|
||||
/* Max I/O is combined so we subtract the amount from max response tokens for actual total */
|
||||
gemini: 30720, // -2048 from max
|
||||
'gemini-pro-vision': 12288, // -4096 from max
|
||||
'gemini-exp': 8000,
|
||||
'gemini-2.0-flash-thinking-exp': 30720, // -2048 from max
|
||||
'gemini-2.0': 1048576,
|
||||
'gemini-1.5': 1048576,
|
||||
'gemini-pro-vision': 12288,
|
||||
'gemini-exp': 2000000,
|
||||
'gemini-2.0': 2000000,
|
||||
'gemini-2.0-flash': 1000000,
|
||||
'gemini-2.0-flash-lite': 1000000,
|
||||
'gemini-1.5': 1000000,
|
||||
'gemini-1.5-flash': 1000000,
|
||||
'gemini-1.5-flash-8b': 1000000,
|
||||
'text-bison-32k': 32758, // -10 from max
|
||||
'chat-bison-32k': 32758, // -10 from max
|
||||
'code-bison-32k': 32758, // -10 from max
|
||||
|
|
|
|||
|
|
@ -154,6 +154,24 @@ describe('getModelMaxTokens', () => {
|
|||
});
|
||||
|
||||
test('should return correct tokens for partial match - Google models', () => {
|
||||
expect(getModelMaxTokens('gemini-2.0-flash-lite-preview-02-05', EModelEndpoint.google)).toBe(
|
||||
maxTokensMap[EModelEndpoint.google]['gemini-2.0-flash-lite'],
|
||||
);
|
||||
expect(getModelMaxTokens('gemini-2.0-flash-001', EModelEndpoint.google)).toBe(
|
||||
maxTokensMap[EModelEndpoint.google]['gemini-2.0-flash'],
|
||||
);
|
||||
expect(getModelMaxTokens('gemini-2.0-flash-exp', EModelEndpoint.google)).toBe(
|
||||
maxTokensMap[EModelEndpoint.google]['gemini-2.0-flash'],
|
||||
);
|
||||
expect(getModelMaxTokens('gemini-2.0-pro-exp-02-05', EModelEndpoint.google)).toBe(
|
||||
maxTokensMap[EModelEndpoint.google]['gemini-2.0'],
|
||||
);
|
||||
expect(getModelMaxTokens('gemini-1.5-flash-8b', EModelEndpoint.google)).toBe(
|
||||
maxTokensMap[EModelEndpoint.google]['gemini-1.5-flash-8b'],
|
||||
);
|
||||
expect(getModelMaxTokens('gemini-1.5-flash-thinking', EModelEndpoint.google)).toBe(
|
||||
maxTokensMap[EModelEndpoint.google]['gemini-1.5-flash'],
|
||||
);
|
||||
expect(getModelMaxTokens('gemini-1.5-pro-latest', EModelEndpoint.google)).toBe(
|
||||
maxTokensMap[EModelEndpoint.google]['gemini-1.5'],
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue