🚀 fix: Resolve Google Client Issues, CDN Screenshots, Update Models (#5703)

* 🤖 refactor: streamline model selection logic for title model in GoogleClient

* refactor: add options for empty object schemas in convertJsonSchemaToZod

* refactor: add utility function to check for empty object schemas in convertJsonSchemaToZod

* fix: Google MCP Tool errors, and remove Object Unescaping as Google fixed this

* fix: google safetySettings

* feat: add safety settings exclusion via GOOGLE_EXCLUDE_SAFETY_SETTINGS environment variable

* fix: rename environment variable for console JSON string length

* fix: disable portal for dropdown in ExportModal component

* fix: screenshot functionality to use image placeholder for remote images

* feat: add visionMode property to BaseClient and initialize in GoogleClient to fix resendFiles issue

* fix: enhance formatMessages to include image URLs in message content for Vertex AI

* fix: safety settings for titleChatCompletion

* fix: remove deprecated model assignment in GoogleClient and streamline title model retrieval

* fix: remove unused image preloading logic in ScreenshotContext

* chore: update default google models to latest models shared by vertex ai and gen ai

* refactor: enhance Google error messaging

* fix: update token values and model limits for Gemini models

* ci: fix model matching

* chore: bump version of librechat-data-provider to 0.7.699
This commit is contained in:
Danny Avila 2025-02-06 18:13:18 -05:00 committed by GitHub
parent 33e60c379b
commit 63afb317c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 939 additions and 720 deletions

View file

@ -57,6 +57,8 @@ class BaseClient {
this.continued;
/** @type {TMessage[]} */
this.currentMessages = [];
/** @type {import('librechat-data-provider').VisionModes | undefined} */
this.visionMode;
}
setOptions() {
@ -1095,7 +1097,7 @@ class BaseClient {
file_id: { $in: fileIds },
});
await this.addImageURLs(message, files);
await this.addImageURLs(message, files, this.visionMode);
this.message_file_map[message.messageId] = files;
return message;

View file

@ -10,11 +10,13 @@ const {
getResponseSender,
endpointSettings,
EModelEndpoint,
ContentTypes,
VisionModes,
ErrorTypes,
Constants,
AuthKeys,
} = require('librechat-data-provider');
const { getSafetySettings } = require('~/server/services/Endpoints/google/llm');
const { encodeAndFormat } = require('~/server/services/Files/images');
const Tokenizer = require('~/server/services/Tokenizer');
const { spendTokens } = require('~/models/spendTokens');
@ -70,7 +72,7 @@ class GoogleClient extends BaseClient {
/** The key for the usage object's output tokens
* @type {string} */
this.outputTokensKey = 'output_tokens';
this.visionMode = VisionModes.generative;
if (options.skipSetOptions) {
return;
}
@ -215,10 +217,29 @@ class GoogleClient extends BaseClient {
}
formatMessages() {
return ((message) => ({
author: message?.author ?? (message.isCreatedByUser ? this.userLabel : this.modelLabel),
content: message?.content ?? message.text,
})).bind(this);
return ((message) => {
const msg = {
author: message?.author ?? (message.isCreatedByUser ? this.userLabel : this.modelLabel),
content: message?.content ?? message.text,
};
if (!message.image_urls?.length) {
return msg;
}
msg.content = (
!Array.isArray(msg.content)
? [
{
type: ContentTypes.TEXT,
[ContentTypes.TEXT]: msg.content,
},
]
: msg.content
).concat(message.image_urls);
return msg;
}).bind(this);
}
/**
@ -566,6 +587,7 @@ class GoogleClient extends BaseClient {
if (this.project_id != null) {
logger.debug('Creating VertexAI client');
this.visionMode = undefined;
clientOptions.streaming = true;
const client = new ChatVertexAI(clientOptions);
client.temperature = clientOptions.temperature;
@ -607,13 +629,14 @@ class GoogleClient extends BaseClient {
}
async getCompletion(_payload, options = {}) {
const safetySettings = this.getSafetySettings();
const { onProgress, abortController } = options;
const safetySettings = getSafetySettings(this.modelOptions.model);
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
const modelName = this.modelOptions.modelName ?? this.modelOptions.model ?? '';
let reply = '';
/** @type {Error} */
let error;
try {
if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) {
/** @type {GenAI} */
@ -714,8 +737,16 @@ class GoogleClient extends BaseClient {
this.usage = usageMetadata;
}
} catch (e) {
error = e;
logger.error('[GoogleClient] There was an issue generating the completion', e);
}
if (error != null && reply === '') {
const errorMessage = `{ "type": "${ErrorTypes.GoogleError}", "info": "${
error.message ?? 'The Google provider failed to generate content, please contact the Admin.'
}" }`;
throw new Error(errorMessage);
}
return reply;
}
@ -781,12 +812,11 @@ class GoogleClient extends BaseClient {
* Stripped-down logic for generating a title. This uses the non-streaming APIs, since the user does not see titles streaming
*/
async titleChatCompletion(_payload, options = {}) {
const { abortController } = options;
const safetySettings = this.getSafetySettings();
let reply = '';
const { abortController } = options;
const model = this.modelOptions.modelName ?? this.modelOptions.model ?? '';
const safetySettings = getSafetySettings(model);
if (!EXCLUDED_GENAI_MODELS.test(model) && !this.project_id) {
logger.debug('Identified titling model as GenAI version');
/** @type {GenerativeModel} */
@ -844,17 +874,6 @@ class GoogleClient extends BaseClient {
},
]);
const model = process.env.GOOGLE_TITLE_MODEL ?? this.modelOptions.model;
const availableModels = this.options.modelsConfig?.[EModelEndpoint.google];
this.isVisionModel = validateVisionModel({ model, availableModels });
if (this.isVisionModel) {
logger.warn(
`Current vision model does not support titling without an attachment; falling back to default model ${settings.model.default}`,
);
this.modelOptions.model = settings.model.default;
}
try {
this.initializeClient();
title = await this.titleChatCompletion(payload, {
@ -892,48 +911,6 @@ class GoogleClient extends BaseClient {
return reply.trim();
}
getSafetySettings() {
const model = this.modelOptions.model;
const isGemini2 = model.includes('gemini-2.0') && !model.includes('thinking');
const mapThreshold = (value) => {
if (isGemini2 && value === 'BLOCK_NONE') {
return 'OFF';
}
return value;
};
return [
{
category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
threshold: mapThreshold(
process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
),
},
{
category: 'HARM_CATEGORY_HATE_SPEECH',
threshold: mapThreshold(
process.env.GOOGLE_SAFETY_HATE_SPEECH || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
),
},
{
category: 'HARM_CATEGORY_HARASSMENT',
threshold: mapThreshold(
process.env.GOOGLE_SAFETY_HARASSMENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
),
},
{
category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
threshold: mapThreshold(
process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
),
},
{
category: 'HARM_CATEGORY_CIVIC_INTEGRITY',
threshold: mapThreshold(process.env.GOOGLE_SAFETY_CIVIC_INTEGRITY || 'BLOCK_NONE'),
},
];
}
getEncoding() {
return 'cl100k_base';
}

View file

@ -4,7 +4,7 @@ const traverse = require('traverse');
const SPLAT_SYMBOL = Symbol.for('splat');
const MESSAGE_SYMBOL = Symbol.for('message');
const CONSOLE_JSON_LONG_STRING_LENGTH=parseInt(process.env.CONSOLE_JSON_LONG_STRING_LENGTH) || 255;
const CONSOLE_JSON_STRING_LENGTH = parseInt(process.env.CONSOLE_JSON_STRING_LENGTH) || 255;
const sensitiveKeys = [
/^(sk-)[^\s]+/, // OpenAI API key pattern
@ -206,13 +206,13 @@ const jsonTruncateFormat = winston.format((info) => {
seen.add(obj);
if (Array.isArray(obj)) {
return obj.map(item => truncateObject(item));
return obj.map((item) => truncateObject(item));
}
const newObj = {};
Object.entries(obj).forEach(([key, value]) => {
if (typeof value === 'string') {
newObj[key] = truncateLongStrings(value, CONSOLE_JSON_LONG_STRING_LENGTH);
newObj[key] = truncateLongStrings(value, CONSOLE_JSON_STRING_LENGTH);
} else {
newObj[key] = truncateObject(value);
}

View file

@ -102,9 +102,14 @@ const tokenValues = Object.assign(
/* cohere doesn't have rates for the older command models,
so this was from https://artificialanalysis.ai/models/command-light/providers */
command: { prompt: 0.38, completion: 0.38 },
'gemini-2.0-flash-lite': { prompt: 0.075, completion: 0.3 },
'gemini-2.0-flash': { prompt: 0.1, completion: 0.7 },
'gemini-2.0': { prompt: 0, completion: 0 }, // https://ai.google.dev/pricing
'gemini-1.5': { prompt: 7, completion: 21 }, // May 2nd, 2024 pricing
gemini: { prompt: 0.5, completion: 1.5 }, // May 2nd, 2024 pricing
'gemini-1.5-flash-8b': { prompt: 0.075, completion: 0.3 },
'gemini-1.5-flash': { prompt: 0.15, completion: 0.6 },
'gemini-1.5': { prompt: 2.5, completion: 10 },
'gemini-pro-vision': { prompt: 0.5, completion: 1.5 },
gemini: { prompt: 0.5, completion: 1.5 },
},
bedrockValues,
);

View file

@ -380,3 +380,81 @@ describe('getCacheMultiplier', () => {
).toBe(0.03);
});
});
describe('Google Model Tests', () => {
const googleModels = [
'gemini-2.0-flash-lite-preview-02-05',
'gemini-2.0-flash-001',
'gemini-2.0-flash-exp',
'gemini-2.0-pro-exp-02-05',
'gemini-1.5-flash-8b',
'gemini-1.5-flash-thinking',
'gemini-1.5-pro-latest',
'gemini-1.5-pro-preview-0409',
'gemini-pro-vision',
'gemini-1.0',
'gemini-pro',
];
it('should return the correct prompt and completion rates for all models', () => {
const results = googleModels.map((model) => {
const valueKey = getValueKey(model, EModelEndpoint.google);
const promptRate = getMultiplier({
model,
tokenType: 'prompt',
endpoint: EModelEndpoint.google,
});
const completionRate = getMultiplier({
model,
tokenType: 'completion',
endpoint: EModelEndpoint.google,
});
return { model, valueKey, promptRate, completionRate };
});
results.forEach(({ valueKey, promptRate, completionRate }) => {
expect(promptRate).toBe(tokenValues[valueKey].prompt);
expect(completionRate).toBe(tokenValues[valueKey].completion);
});
});
it('should map to the correct model keys', () => {
const expected = {
'gemini-2.0-flash-lite-preview-02-05': 'gemini-2.0-flash-lite',
'gemini-2.0-flash-001': 'gemini-2.0-flash',
'gemini-2.0-flash-exp': 'gemini-2.0-flash',
'gemini-2.0-pro-exp-02-05': 'gemini-2.0',
'gemini-1.5-flash-8b': 'gemini-1.5-flash-8b',
'gemini-1.5-flash-thinking': 'gemini-1.5-flash',
'gemini-1.5-pro-latest': 'gemini-1.5',
'gemini-1.5-pro-preview-0409': 'gemini-1.5',
'gemini-pro-vision': 'gemini-pro-vision',
'gemini-1.0': 'gemini',
'gemini-pro': 'gemini',
};
Object.entries(expected).forEach(([model, expectedKey]) => {
const valueKey = getValueKey(model, EModelEndpoint.google);
expect(valueKey).toBe(expectedKey);
});
});
it('should handle model names with different formats', () => {
const testCases = [
{ input: 'google/gemini-pro', expected: 'gemini' },
{ input: 'gemini-pro/google', expected: 'gemini' },
{ input: 'google/gemini-2.0-flash-lite', expected: 'gemini-2.0-flash-lite' },
];
testCases.forEach(({ input, expected }) => {
const valueKey = getValueKey(input, EModelEndpoint.google);
expect(valueKey).toBe(expected);
expect(
getMultiplier({ model: input, tokenType: 'prompt', endpoint: EModelEndpoint.google }),
).toBe(tokenValues[expected].prompt);
expect(
getMultiplier({ model: input, tokenType: 'completion', endpoint: EModelEndpoint.google }),
).toBe(tokenValues[expected].completion);
});
});
});

View file

@ -45,7 +45,7 @@
"@langchain/google-genai": "^0.1.7",
"@langchain/google-vertexai": "^0.1.8",
"@langchain/textsplitters": "^0.1.0",
"@librechat/agents": "^2.0.1",
"@librechat/agents": "^2.0.2",
"@waylaidwanderer/fetch-event-source": "^3.0.1",
"axios": "^1.7.7",
"bcryptjs": "^2.4.3",

View file

@ -1,18 +1,42 @@
const { Providers } = require('@librechat/agents');
const { AuthKeys } = require('librechat-data-provider');
const { isEnabled } = require('~/server/utils');
function getThresholdMapping(model) {
const gemini1Pattern = /gemini-(1\.0|1\.5|pro$|1\.0-pro|1\.5-pro|1\.5-flash-001)/;
const restrictedPattern = /(gemini-(1\.5-flash-8b|2\.0|exp)|learnlm)/;
if (gemini1Pattern.test(model)) {
return (value) => {
if (value === 'OFF') {
return 'BLOCK_NONE';
}
return value;
};
}
if (restrictedPattern.test(model)) {
return (value) => {
if (value === 'OFF' || value === 'HARM_BLOCK_THRESHOLD_UNSPECIFIED') {
return 'BLOCK_NONE';
}
return value;
};
}
return (value) => value;
}
/**
*
* @param {boolean} isGemini2
* @returns {Array<{category: string, threshold: string}>}
* @param {string} model
* @returns {Array<{category: string, threshold: string}> | undefined}
*/
function getSafetySettings(isGemini2) {
const mapThreshold = (value) => {
if (isGemini2 && value === 'BLOCK_NONE') {
return 'OFF';
}
return value;
};
function getSafetySettings(model) {
if (isEnabled(process.env.GOOGLE_EXCLUDE_SAFETY_SETTINGS)) {
return undefined;
}
const mapThreshold = getThresholdMapping(model);
return [
{
@ -85,8 +109,7 @@ function getLLMConfig(credentials, options = {}) {
};
/** Used only for Safety Settings */
const isGemini2 = llmConfig.model.includes('gemini-2.0') && !llmConfig.model.includes('thinking');
llmConfig.safetySettings = getSafetySettings(isGemini2);
llmConfig.safetySettings = getSafetySettings(llmConfig.model);
let provider;
@ -153,4 +176,5 @@ function getLLMConfig(credentials, options = {}) {
module.exports = {
getLLMConfig,
getSafetySettings,
};

View file

@ -1,9 +1,8 @@
const { CacheKeys, Constants } = require('librechat-data-provider');
const { EModelEndpoint, CacheKeys, Constants, googleSettings } = require('librechat-data-provider');
const getLogStores = require('~/cache/getLogStores');
const initializeClient = require('./initialize');
const { isEnabled } = require('~/server/utils');
const { saveConvo } = require('~/models');
const { logger } = require('~/config');
const initializeClient = require('./initialize');
const addTitle = async (req, { text, response, client }) => {
const { TITLE_CONVO = 'true' } = process.env ?? {};
@ -14,22 +13,16 @@ const addTitle = async (req, { text, response, client }) => {
if (client.options.titleConvo === false) {
return;
}
const DEFAULT_TITLE_MODEL = 'gemini-pro';
const { GOOGLE_TITLE_MODEL } = process.env ?? {};
let model = GOOGLE_TITLE_MODEL ?? DEFAULT_TITLE_MODEL;
const providerConfig = req.app.locals[EModelEndpoint.google];
let model =
providerConfig?.titleModel ??
GOOGLE_TITLE_MODEL ??
client.options?.modelOptions.model ??
googleSettings.model.default;
if (GOOGLE_TITLE_MODEL === Constants.CURRENT_MODEL) {
model = client.options?.modelOptions.model;
if (client.isVisionModel) {
logger.warn(
`current_model was specified for Google title request, but the model ${model} cannot process a text-only conversation. Falling back to ${DEFAULT_TITLE_MODEL}`,
);
model = DEFAULT_TITLE_MODEL;
}
}
const titleEndpointOptions = {

View file

@ -1,9 +1,11 @@
const { z } = require('zod');
const { tool } = require('@langchain/core/tools');
const { Constants: AgentConstants } = require('@librechat/agents');
const { Constants: AgentConstants, Providers } = require('@librechat/agents');
const {
Constants,
convertJsonSchemaToZod,
ContentTypes,
isAssistantsEndpoint,
convertJsonSchemaToZod,
} = require('librechat-data-provider');
const { logger, getMCPManager } = require('~/config');
@ -25,7 +27,15 @@ async function createMCPTool({ req, toolKey, provider }) {
}
/** @type {LCTool} */
const { description, parameters } = toolDefinition;
const schema = convertJsonSchemaToZod(parameters);
const isGoogle = provider === Providers.VERTEXAI || provider === Providers.GOOGLE;
let schema = convertJsonSchemaToZod(parameters, {
allowEmptyObject: !isGoogle,
});
if (!schema) {
schema = z.object({ input: z.string().optional() });
}
const [toolName, serverName] = toolKey.split(Constants.mcp_delimiter);
/** @type {(toolInput: Object | string) => Promise<unknown>} */
const _call = async (toolInput) => {
@ -35,6 +45,9 @@ async function createMCPTool({ req, toolKey, provider }) {
if (isAssistantsEndpoint(provider) && Array.isArray(result)) {
return result[0];
}
if (isGoogle && Array.isArray(result[0]) && result[0][0]?.type === ContentTypes.TEXT) {
return [result[0][0].text, result[1]];
}
return result;
} catch (error) {
logger.error(`${toolName} MCP server tool call failed`, error);

View file

@ -49,11 +49,14 @@ const cohereModels = {
const googleModels = {
/* Max I/O is combined so we subtract the amount from max response tokens for actual total */
gemini: 30720, // -2048 from max
'gemini-pro-vision': 12288, // -4096 from max
'gemini-exp': 8000,
'gemini-2.0-flash-thinking-exp': 30720, // -2048 from max
'gemini-2.0': 1048576,
'gemini-1.5': 1048576,
'gemini-pro-vision': 12288,
'gemini-exp': 2000000,
'gemini-2.0': 2000000,
'gemini-2.0-flash': 1000000,
'gemini-2.0-flash-lite': 1000000,
'gemini-1.5': 1000000,
'gemini-1.5-flash': 1000000,
'gemini-1.5-flash-8b': 1000000,
'text-bison-32k': 32758, // -10 from max
'chat-bison-32k': 32758, // -10 from max
'code-bison-32k': 32758, // -10 from max

View file

@ -154,6 +154,24 @@ describe('getModelMaxTokens', () => {
});
test('should return correct tokens for partial match - Google models', () => {
expect(getModelMaxTokens('gemini-2.0-flash-lite-preview-02-05', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-2.0-flash-lite'],
);
expect(getModelMaxTokens('gemini-2.0-flash-001', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-2.0-flash'],
);
expect(getModelMaxTokens('gemini-2.0-flash-exp', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-2.0-flash'],
);
expect(getModelMaxTokens('gemini-2.0-pro-exp-02-05', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-2.0'],
);
expect(getModelMaxTokens('gemini-1.5-flash-8b', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-1.5-flash-8b'],
);
expect(getModelMaxTokens('gemini-1.5-flash-thinking', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-1.5-flash'],
);
expect(getModelMaxTokens('gemini-1.5-pro-latest', EModelEndpoint.google)).toBe(
maxTokensMap[EModelEndpoint.google]['gemini-1.5'],
);