mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-25 03:44:09 +01:00
💎 fix: Gemini Image Gen Tool Vertex AI Auth and File Storage (#11923)
* chore: saveToCloudStorage function and enhance error handling - Removed unnecessary parameters and streamlined the logic for saving images to cloud storage. - Introduced buffer handling for base64 image data and improved the integration with file strategy functions. - Enhanced error handling during local image saving to ensure robustness. - Updated the createGeminiImageTool function to reflect changes in the saveToCloudStorage implementation. * refactor: streamline image persistence logic in GeminiImageGen - Consolidated image saving functionality by renaming and refactoring the saveToCloudStorage function to persistGeneratedImage. - Improved error handling and logging for image persistence operations. - Enhanced the replaceUnwantedChars function to better sanitize input strings. - Updated createGeminiImageTool to reflect changes in image handling and ensure consistent behavior across storage strategies. * fix: clean up GeminiImageGen by removing unused functions and improving logging - Removed the getSafeFormat and persistGeneratedImage functions to streamline image handling. - Updated logging in createGeminiImageTool for clarity and consistency. - Consolidated imports by eliminating unused dependencies, enhancing code maintainability. * chore: update environment configuration and manifest for unused GEMINI_VERTEX_ENABLED - Removed the Vertex AI configuration option from .env.example to simplify setup. - Updated the manifest.json to reflect the removal of the Vertex AI dependency in the authentication field. - Cleaned up the createGeminiImageTool function by eliminating unused fields related to Vertex AI, streamlining the code. * fix: update loadAuthValues call in loadTools function for GeminiImageGen tool - Modified the loadAuthValues function call to include throwError: false, preventing exceptions on authentication failures. - Removed the unused processFileURL parameter from the tool context object, streamlining the code. * refactor: streamline GoogleGenAI initialization in GeminiImageGen - Removed unused file system access check for Google application credentials, simplifying the environment setup. - Added googleAuthOptions to the GoogleGenAI instantiation, enhancing the configuration for authentication. * fix: update Gemini API Key label and description in manifest.json - Changed the label to indicate that the Gemini API Key is optional. - Revised the description to clarify usage with Vertex AI and service accounts, enhancing user guidance. * fix: enhance abort signal handling in createGeminiImageTool - Introduced derivedSignal to manage abort events during image generation, improving responsiveness to cancellation requests. - Added an abortHandler to log when image generation is aborted, enhancing debugging capabilities. - Ensured proper cleanup of event listeners in the finally block to prevent memory leaks. * fix: update authentication handling for plugins to support optional fields - Added support for optional authentication fields in the manifest and PluginAuthForm. - Updated the checkPluginAuth function to correctly validate plugins with optional fields. - Enhanced tests to cover scenarios with optional authentication fields, ensuring accurate validation logic.
This commit is contained in:
parent
1d0a4c501f
commit
f3eb197675
8 changed files with 136 additions and 181 deletions
|
|
@ -1,4 +1,3 @@
|
|||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const sharp = require('sharp');
|
||||
const { v4 } = require('uuid');
|
||||
|
|
@ -6,12 +5,7 @@ const { ProxyAgent } = require('undici');
|
|||
const { GoogleGenAI } = require('@google/genai');
|
||||
const { tool } = require('@langchain/core/tools');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
const {
|
||||
FileContext,
|
||||
ContentTypes,
|
||||
FileSources,
|
||||
EImageOutputType,
|
||||
} = require('librechat-data-provider');
|
||||
const { ContentTypes, EImageOutputType } = require('librechat-data-provider');
|
||||
const {
|
||||
geminiToolkit,
|
||||
loadServiceKey,
|
||||
|
|
@ -59,17 +53,12 @@ const displayMessage =
|
|||
* @returns {string} - The processed string
|
||||
*/
|
||||
function replaceUnwantedChars(inputString) {
|
||||
return inputString?.replace(/[^\w\s\-_.,!?()]/g, '') || '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate and sanitize image format
|
||||
* @param {string} format - The format to validate
|
||||
* @returns {string} - Safe format
|
||||
*/
|
||||
function getSafeFormat(format) {
|
||||
const allowedFormats = ['png', 'jpg', 'jpeg', 'webp', 'gif'];
|
||||
return allowedFormats.includes(format?.toLowerCase()) ? format.toLowerCase() : 'png';
|
||||
return (
|
||||
inputString
|
||||
?.replace(/\r\n|\r|\n/g, ' ')
|
||||
.replace(/"/g, '')
|
||||
.trim() || ''
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -117,11 +106,8 @@ async function initializeGeminiClient(options = {}) {
|
|||
return new GoogleGenAI({ apiKey: googleKey });
|
||||
}
|
||||
|
||||
// Fall back to Vertex AI with service account
|
||||
logger.debug('[GeminiImageGen] Using Vertex AI with service account');
|
||||
const credentialsPath = getDefaultServiceKeyPath();
|
||||
|
||||
// Use loadServiceKey for consistent loading (supports file paths, JSON strings, base64)
|
||||
const serviceKey = await loadServiceKey(credentialsPath);
|
||||
|
||||
if (!serviceKey || !serviceKey.project_id) {
|
||||
|
|
@ -131,75 +117,14 @@ async function initializeGeminiClient(options = {}) {
|
|||
);
|
||||
}
|
||||
|
||||
// Set GOOGLE_APPLICATION_CREDENTIALS for any Google Cloud SDK dependencies
|
||||
try {
|
||||
await fs.promises.access(credentialsPath);
|
||||
process.env.GOOGLE_APPLICATION_CREDENTIALS = credentialsPath;
|
||||
} catch {
|
||||
// File doesn't exist, skip setting env var
|
||||
}
|
||||
|
||||
return new GoogleGenAI({
|
||||
vertexai: true,
|
||||
project: serviceKey.project_id,
|
||||
location: process.env.GOOGLE_LOC || process.env.GOOGLE_CLOUD_LOCATION || 'global',
|
||||
googleAuthOptions: { credentials: serviceKey },
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Save image to local filesystem
|
||||
* @param {string} base64Data - Base64 encoded image data
|
||||
* @param {string} format - Image format
|
||||
* @param {string} userId - User ID
|
||||
* @returns {Promise<string>} - The relative URL
|
||||
*/
|
||||
async function saveImageLocally(base64Data, format, userId) {
|
||||
const safeFormat = getSafeFormat(format);
|
||||
const safeUserId = userId ? path.basename(userId) : 'default';
|
||||
const imageName = `gemini-img-${v4()}.${safeFormat}`;
|
||||
const userDir = path.join(process.cwd(), 'client/public/images', safeUserId);
|
||||
|
||||
await fs.promises.mkdir(userDir, { recursive: true });
|
||||
|
||||
const filePath = path.join(userDir, imageName);
|
||||
await fs.promises.writeFile(filePath, Buffer.from(base64Data, 'base64'));
|
||||
|
||||
logger.debug('[GeminiImageGen] Image saved locally to:', filePath);
|
||||
return `/images/${safeUserId}/${imageName}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Save image to cloud storage
|
||||
* @param {Object} params - Parameters
|
||||
* @returns {Promise<string|null>} - The storage URL or null
|
||||
*/
|
||||
async function saveToCloudStorage({ base64Data, format, processFileURL, fileStrategy, userId }) {
|
||||
if (!processFileURL || !fileStrategy || !userId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const safeFormat = getSafeFormat(format);
|
||||
const safeUserId = path.basename(userId);
|
||||
const dataURL = `data:image/${safeFormat};base64,${base64Data}`;
|
||||
const imageName = `gemini-img-${v4()}.${safeFormat}`;
|
||||
|
||||
const result = await processFileURL({
|
||||
URL: dataURL,
|
||||
basePath: 'images',
|
||||
userId: safeUserId,
|
||||
fileName: imageName,
|
||||
fileStrategy,
|
||||
context: FileContext.image_generation,
|
||||
});
|
||||
|
||||
return result.filepath;
|
||||
} catch (error) {
|
||||
logger.error('[GeminiImageGen] Error saving to cloud storage:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert image files to Gemini inline data format
|
||||
* @param {Object} params - Parameters
|
||||
|
|
@ -390,34 +315,18 @@ function createGeminiImageTool(fields = {}) {
|
|||
throw new Error('This tool is only available for agents.');
|
||||
}
|
||||
|
||||
// Skip validation during tool creation - validation happens at runtime in initializeGeminiClient
|
||||
// This allows the tool to be added to agents when using Vertex AI without requiring API keys
|
||||
// The actual credentials check happens when the tool is invoked
|
||||
|
||||
const {
|
||||
req,
|
||||
imageFiles = [],
|
||||
processFileURL,
|
||||
userId,
|
||||
fileStrategy,
|
||||
GEMINI_API_KEY,
|
||||
GOOGLE_KEY,
|
||||
// GEMINI_VERTEX_ENABLED is used for auth validation only (not used in code)
|
||||
// When set as env var, it signals Vertex AI is configured and bypasses API key requirement
|
||||
} = fields;
|
||||
const { req, imageFiles = [], userId, fileStrategy, GEMINI_API_KEY, GOOGLE_KEY } = fields;
|
||||
|
||||
const imageOutputType = fields.imageOutputType || EImageOutputType.PNG;
|
||||
|
||||
const geminiImageGenTool = tool(
|
||||
async ({ prompt, image_ids, aspectRatio, imageSize }, _runnableConfig) => {
|
||||
async ({ prompt, image_ids, aspectRatio, imageSize }, runnableConfig) => {
|
||||
if (!prompt) {
|
||||
throw new Error('Missing required field: prompt');
|
||||
}
|
||||
|
||||
logger.debug('[GeminiImageGen] Generating image with prompt:', prompt?.substring(0, 100));
|
||||
logger.debug('[GeminiImageGen] Options:', { aspectRatio, imageSize });
|
||||
logger.debug('[GeminiImageGen] Generating image', { aspectRatio, imageSize });
|
||||
|
||||
// Initialize Gemini client with user-provided credentials
|
||||
let ai;
|
||||
try {
|
||||
ai = await initializeGeminiClient({
|
||||
|
|
@ -432,10 +341,8 @@ function createGeminiImageTool(fields = {}) {
|
|||
];
|
||||
}
|
||||
|
||||
// Build request contents
|
||||
const contents = [{ text: replaceUnwantedChars(prompt) }];
|
||||
|
||||
// Add context images if provided
|
||||
if (image_ids?.length > 0) {
|
||||
const contextImages = await convertImagesToInlineData({
|
||||
imageFiles,
|
||||
|
|
@ -447,28 +354,34 @@ function createGeminiImageTool(fields = {}) {
|
|||
logger.debug('[GeminiImageGen] Added', contextImages.length, 'context images');
|
||||
}
|
||||
|
||||
// Generate image
|
||||
let apiResponse;
|
||||
const geminiModel = process.env.GEMINI_IMAGE_MODEL || 'gemini-2.5-flash-image';
|
||||
try {
|
||||
// Build config with optional imageConfig
|
||||
const config = {
|
||||
responseModalities: ['TEXT', 'IMAGE'],
|
||||
};
|
||||
const config = {
|
||||
responseModalities: ['TEXT', 'IMAGE'],
|
||||
};
|
||||
|
||||
// Add imageConfig if aspectRatio or imageSize is specified
|
||||
// Note: gemini-2.5-flash-image doesn't support imageSize
|
||||
const supportsImageSize = !geminiModel.includes('gemini-2.5-flash-image');
|
||||
if (aspectRatio || (imageSize && supportsImageSize)) {
|
||||
config.imageConfig = {};
|
||||
if (aspectRatio) {
|
||||
config.imageConfig.aspectRatio = aspectRatio;
|
||||
}
|
||||
if (imageSize && supportsImageSize) {
|
||||
config.imageConfig.imageSize = imageSize;
|
||||
}
|
||||
const supportsImageSize = !geminiModel.includes('gemini-2.5-flash-image');
|
||||
if (aspectRatio || (imageSize && supportsImageSize)) {
|
||||
config.imageConfig = {};
|
||||
if (aspectRatio) {
|
||||
config.imageConfig.aspectRatio = aspectRatio;
|
||||
}
|
||||
if (imageSize && supportsImageSize) {
|
||||
config.imageConfig.imageSize = imageSize;
|
||||
}
|
||||
}
|
||||
|
||||
let derivedSignal = null;
|
||||
let abortHandler = null;
|
||||
|
||||
if (runnableConfig?.signal) {
|
||||
derivedSignal = AbortSignal.any([runnableConfig.signal]);
|
||||
abortHandler = () => logger.debug('[GeminiImageGen] Image generation aborted');
|
||||
derivedSignal.addEventListener('abort', abortHandler, { once: true });
|
||||
config.abortSignal = derivedSignal;
|
||||
}
|
||||
|
||||
try {
|
||||
apiResponse = await ai.models.generateContent({
|
||||
model: geminiModel,
|
||||
contents,
|
||||
|
|
@ -480,9 +393,12 @@ function createGeminiImageTool(fields = {}) {
|
|||
[{ type: ContentTypes.TEXT, text: `Image generation failed: ${error.message}` }],
|
||||
{ content: [], file_ids: [] },
|
||||
];
|
||||
} finally {
|
||||
if (abortHandler && derivedSignal) {
|
||||
derivedSignal.removeEventListener('abort', abortHandler);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for safety blocks
|
||||
const safetyBlock = checkForSafetyBlock(apiResponse);
|
||||
if (safetyBlock) {
|
||||
logger.warn('[GeminiImageGen] Safety block:', safetyBlock);
|
||||
|
|
@ -509,46 +425,7 @@ function createGeminiImageTool(fields = {}) {
|
|||
const imageData = convertedBuffer.toString('base64');
|
||||
const mimeType = outputFormat === 'jpeg' ? 'image/jpeg' : `image/${outputFormat}`;
|
||||
|
||||
logger.debug('[GeminiImageGen] Image format:', { outputFormat, mimeType });
|
||||
|
||||
let imageUrl;
|
||||
const useLocalStorage = !fileStrategy || fileStrategy === FileSources.local;
|
||||
|
||||
if (useLocalStorage) {
|
||||
try {
|
||||
imageUrl = await saveImageLocally(imageData, outputFormat, userId);
|
||||
} catch (error) {
|
||||
logger.error('[GeminiImageGen] Local save failed:', error);
|
||||
imageUrl = `data:${mimeType};base64,${imageData}`;
|
||||
}
|
||||
} else {
|
||||
const cloudUrl = await saveToCloudStorage({
|
||||
base64Data: imageData,
|
||||
format: outputFormat,
|
||||
processFileURL,
|
||||
fileStrategy,
|
||||
userId,
|
||||
});
|
||||
|
||||
if (cloudUrl) {
|
||||
imageUrl = cloudUrl;
|
||||
} else {
|
||||
// Fallback to local
|
||||
try {
|
||||
imageUrl = await saveImageLocally(imageData, outputFormat, userId);
|
||||
} catch (_error) {
|
||||
imageUrl = `data:${mimeType};base64,${imageData}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug('[GeminiImageGen] Image URL:', imageUrl);
|
||||
|
||||
// For the artifact, we need a data URL (same as OpenAI)
|
||||
// The local file save is for persistence, but the response needs a data URL
|
||||
const dataUrl = `data:${mimeType};base64,${imageData}`;
|
||||
|
||||
// Return in content_and_artifact format (same as OpenAI)
|
||||
const file_ids = [v4()];
|
||||
const content = [
|
||||
{
|
||||
|
|
@ -567,8 +444,7 @@ function createGeminiImageTool(fields = {}) {
|
|||
},
|
||||
];
|
||||
|
||||
// Record token usage for balance tracking (don't await to avoid blocking response)
|
||||
const conversationId = _runnableConfig?.configurable?.thread_id;
|
||||
const conversationId = runnableConfig?.configurable?.thread_id;
|
||||
recordTokenUsage({
|
||||
usageMetadata: apiResponse.usageMetadata,
|
||||
req,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue