💎 fix: Gemini Image Gen Tool Vertex AI Auth and File Storage (#11923)

* chore: saveToCloudStorage function and enhance error handling - Removed unnecessary parameters and streamlined the logic for saving images to cloud storage. - Introduced buffer handling for base64 image data and improved the integration with file strategy functions. - Enhanced error handling during local image saving to ensure robustness. - Updated the createGeminiImageTool function to reflect changes in the saveToCloudStorage implementation. * refactor: streamline image persistence logic in GeminiImageGen - Consolidated image saving functionality by renaming and refactoring the saveToCloudStorage function to persistGeneratedImage. - Improved error handling and logging for image persistence operations. - Enhanced the replaceUnwantedChars function to better sanitize input strings. - Updated createGeminiImageTool to reflect changes in image handling and ensure consistent behavior across storage strategies. * fix: clean up GeminiImageGen by removing unused functions and improving logging - Removed the getSafeFormat and persistGeneratedImage functions to streamline image handling. - Updated logging in createGeminiImageTool for clarity and consistency. - Consolidated imports by eliminating unused dependencies, enhancing code maintainability. * chore: update environment configuration and manifest for unused GEMINI_VERTEX_ENABLED - Removed the Vertex AI configuration option from .env.example to simplify setup. - Updated the manifest.json to reflect the removal of the Vertex AI dependency in the authentication field. - Cleaned up the createGeminiImageTool function by eliminating unused fields related to Vertex AI, streamlining the code. * fix: update loadAuthValues call in loadTools function for GeminiImageGen tool - Modified the loadAuthValues function call to include throwError: false, preventing exceptions on authentication failures. - Removed the unused processFileURL parameter from the tool context object, streamlining the code. * refactor: streamline GoogleGenAI initialization in GeminiImageGen - Removed unused file system access check for Google application credentials, simplifying the environment setup. - Added googleAuthOptions to the GoogleGenAI instantiation, enhancing the configuration for authentication. * fix: update Gemini API Key label and description in manifest.json - Changed the label to indicate that the Gemini API Key is optional. - Revised the description to clarify usage with Vertex AI and service accounts, enhancing user guidance. * fix: enhance abort signal handling in createGeminiImageTool - Introduced derivedSignal to manage abort events during image generation, improving responsiveness to cancellation requests. - Added an abortHandler to log when image generation is aborted, enhancing debugging capabilities. - Ensured proper cleanup of event listeners in the finally block to prevent memory leaks. * fix: update authentication handling for plugins to support optional fields - Added support for optional authentication fields in the manifest and PluginAuthForm. - Updated the checkPluginAuth function to correctly validate plugins with optional fields. - Enhanced tests to cover scenarios with optional authentication fields, ensuring accurate validation logic.
2026-02-25 03:44:09 +01:00 · 2026-02-24 08:21:02 -05:00 · 2026-02-24 08:21:02 -05:00 · f3eb197675
commit f3eb197675
parent 1d0a4c501f
8 changed files with 136 additions and 181 deletions
--- a/api/app/clients/tools/structured/GeminiImageGen.js
+++ b/api/app/clients/tools/structured/GeminiImageGen.js
@ -1,4 +1,3 @@
-const fs = require('fs');
 const path = require('path');
 const sharp = require('sharp');
 const { v4 } = require('uuid');
@ -6,12 +5,7 @@ const { ProxyAgent } = require('undici');
 const { GoogleGenAI } = require('@google/genai');
 const { tool } = require('@langchain/core/tools');
 const { logger } = require('@librechat/data-schemas');
-const {
-  FileContext,
-  ContentTypes,
-  FileSources,
-  EImageOutputType,
-} = require('librechat-data-provider');
+const { ContentTypes, EImageOutputType } = require('librechat-data-provider');
 const {
  geminiToolkit,
  loadServiceKey,
@ -59,17 +53,12 @@ const displayMessage =
 * @returns {string} - The processed string
 */
 function replaceUnwantedChars(inputString) {
-  return inputString?.replace(/[^\w\s\-_.,!?()]/g, '') || '';
-}
-
-/**
- * Validate and sanitize image format
- * @param {string} format - The format to validate
- * @returns {string} - Safe format
- */
-function getSafeFormat(format) {
-  const allowedFormats = ['png', 'jpg', 'jpeg', 'webp', 'gif'];
-  return allowedFormats.includes(format?.toLowerCase()) ? format.toLowerCase() : 'png';
+  return (
+    inputString
+      ?.replace(/\r\n|\r|\n/g, ' ')
+      .replace(/"/g, '')
+      .trim() || ''
+  );
 }

 /**
@ -117,11 +106,8 @@ async function initializeGeminiClient(options = {}) {
    return new GoogleGenAI({ apiKey: googleKey });
  }

-  // Fall back to Vertex AI with service account
  logger.debug('[GeminiImageGen] Using Vertex AI with service account');
  const credentialsPath = getDefaultServiceKeyPath();
-
-  // Use loadServiceKey for consistent loading (supports file paths, JSON strings, base64)
  const serviceKey = await loadServiceKey(credentialsPath);

  if (!serviceKey || !serviceKey.project_id) {
@ -131,75 +117,14 @@ async function initializeGeminiClient(options = {}) {
    );
  }

-  // Set GOOGLE_APPLICATION_CREDENTIALS for any Google Cloud SDK dependencies
-  try {
-    await fs.promises.access(credentialsPath);
-    process.env.GOOGLE_APPLICATION_CREDENTIALS = credentialsPath;
-  } catch {
-    // File doesn't exist, skip setting env var
-  }
-
  return new GoogleGenAI({
    vertexai: true,
    project: serviceKey.project_id,
    location: process.env.GOOGLE_LOC || process.env.GOOGLE_CLOUD_LOCATION || 'global',
+    googleAuthOptions: { credentials: serviceKey },
  });
 }

-/**
- * Save image to local filesystem
- * @param {string} base64Data - Base64 encoded image data
- * @param {string} format - Image format
- * @param {string} userId - User ID
- * @returns {Promise<string>} - The relative URL
- */
-async function saveImageLocally(base64Data, format, userId) {
-  const safeFormat = getSafeFormat(format);
-  const safeUserId = userId ? path.basename(userId) : 'default';
-  const imageName = `gemini-img-${v4()}.${safeFormat}`;
-  const userDir = path.join(process.cwd(), 'client/public/images', safeUserId);
-
-  await fs.promises.mkdir(userDir, { recursive: true });
-
-  const filePath = path.join(userDir, imageName);
-  await fs.promises.writeFile(filePath, Buffer.from(base64Data, 'base64'));
-
-  logger.debug('[GeminiImageGen] Image saved locally to:', filePath);
-  return `/images/${safeUserId}/${imageName}`;
-}
-
-/**
- * Save image to cloud storage
- * @param {Object} params - Parameters
- * @returns {Promise<string|null>} - The storage URL or null
- */
-async function saveToCloudStorage({ base64Data, format, processFileURL, fileStrategy, userId }) {
-  if (!processFileURL || !fileStrategy || !userId) {
-    return null;
-  }
-
-  try {
-    const safeFormat = getSafeFormat(format);
-    const safeUserId = path.basename(userId);
-    const dataURL = `data:image/${safeFormat};base64,${base64Data}`;
-    const imageName = `gemini-img-${v4()}.${safeFormat}`;
-
-    const result = await processFileURL({
-      URL: dataURL,
-      basePath: 'images',
-      userId: safeUserId,
-      fileName: imageName,
-      fileStrategy,
-      context: FileContext.image_generation,
-    });
-
-    return result.filepath;
-  } catch (error) {
-    logger.error('[GeminiImageGen] Error saving to cloud storage:', error);
-    return null;
-  }
-}
-
 /**
 * Convert image files to Gemini inline data format
 * @param {Object} params - Parameters
@ -390,34 +315,18 @@ function createGeminiImageTool(fields = {}) {
    throw new Error('This tool is only available for agents.');
  }

-  // Skip validation during tool creation - validation happens at runtime in initializeGeminiClient
-  // This allows the tool to be added to agents when using Vertex AI without requiring API keys
-  // The actual credentials check happens when the tool is invoked
-
-  const {
-    req,
-    imageFiles = [],
-    processFileURL,
-    userId,
-    fileStrategy,
-    GEMINI_API_KEY,
-    GOOGLE_KEY,
-    // GEMINI_VERTEX_ENABLED is used for auth validation only (not used in code)
-    // When set as env var, it signals Vertex AI is configured and bypasses API key requirement
-  } = fields;
+  const { req, imageFiles = [], userId, fileStrategy, GEMINI_API_KEY, GOOGLE_KEY } = fields;

  const imageOutputType = fields.imageOutputType || EImageOutputType.PNG;

  const geminiImageGenTool = tool(
-    async ({ prompt, image_ids, aspectRatio, imageSize }, _runnableConfig) => {
+    async ({ prompt, image_ids, aspectRatio, imageSize }, runnableConfig) => {
      if (!prompt) {
        throw new Error('Missing required field: prompt');
      }

-      logger.debug('[GeminiImageGen] Generating image with prompt:', prompt?.substring(0, 100));
-      logger.debug('[GeminiImageGen] Options:', { aspectRatio, imageSize });
+      logger.debug('[GeminiImageGen] Generating image', { aspectRatio, imageSize });

-      // Initialize Gemini client with user-provided credentials
      let ai;
      try {
        ai = await initializeGeminiClient({
@ -432,10 +341,8 @@ function createGeminiImageTool(fields = {}) {
        ];
      }

-      // Build request contents
      const contents = [{ text: replaceUnwantedChars(prompt) }];

-      // Add context images if provided
      if (image_ids?.length > 0) {
        const contextImages = await convertImagesToInlineData({
          imageFiles,
@ -447,28 +354,34 @@ function createGeminiImageTool(fields = {}) {
        logger.debug('[GeminiImageGen] Added', contextImages.length, 'context images');
      }

-      // Generate image
      let apiResponse;
      const geminiModel = process.env.GEMINI_IMAGE_MODEL || 'gemini-2.5-flash-image';
-      try {
-        // Build config with optional imageConfig
-        const config = {
-          responseModalities: ['TEXT', 'IMAGE'],
-        };
+      const config = {
+        responseModalities: ['TEXT', 'IMAGE'],
+      };

-        // Add imageConfig if aspectRatio or imageSize is specified
-        // Note: gemini-2.5-flash-image doesn't support imageSize
-        const supportsImageSize = !geminiModel.includes('gemini-2.5-flash-image');
-        if (aspectRatio || (imageSize && supportsImageSize)) {
-          config.imageConfig = {};
-          if (aspectRatio) {
-            config.imageConfig.aspectRatio = aspectRatio;
-          }
-          if (imageSize && supportsImageSize) {
-            config.imageConfig.imageSize = imageSize;
-          }
+      const supportsImageSize = !geminiModel.includes('gemini-2.5-flash-image');
+      if (aspectRatio || (imageSize && supportsImageSize)) {
+        config.imageConfig = {};
+        if (aspectRatio) {
+          config.imageConfig.aspectRatio = aspectRatio;
        }
+        if (imageSize && supportsImageSize) {
+          config.imageConfig.imageSize = imageSize;
+        }
+      }

+      let derivedSignal = null;
+      let abortHandler = null;
+
+      if (runnableConfig?.signal) {
+        derivedSignal = AbortSignal.any([runnableConfig.signal]);
+        abortHandler = () => logger.debug('[GeminiImageGen] Image generation aborted');
+        derivedSignal.addEventListener('abort', abortHandler, { once: true });
+        config.abortSignal = derivedSignal;
+      }
+
+      try {
        apiResponse = await ai.models.generateContent({
          model: geminiModel,
          contents,
@ -480,9 +393,12 @@ function createGeminiImageTool(fields = {}) {
          [{ type: ContentTypes.TEXT, text: `Image generation failed: ${error.message}` }],
          { content: [], file_ids: [] },
        ];
+      } finally {
+        if (abortHandler && derivedSignal) {
+          derivedSignal.removeEventListener('abort', abortHandler);
+        }
      }

-      // Check for safety blocks
      const safetyBlock = checkForSafetyBlock(apiResponse);
      if (safetyBlock) {
        logger.warn('[GeminiImageGen] Safety block:', safetyBlock);
@ -509,46 +425,7 @@ function createGeminiImageTool(fields = {}) {
      const imageData = convertedBuffer.toString('base64');
      const mimeType = outputFormat === 'jpeg' ? 'image/jpeg' : `image/${outputFormat}`;

-      logger.debug('[GeminiImageGen] Image format:', { outputFormat, mimeType });
-
-      let imageUrl;
-      const useLocalStorage = !fileStrategy || fileStrategy === FileSources.local;
-
-      if (useLocalStorage) {
-        try {
-          imageUrl = await saveImageLocally(imageData, outputFormat, userId);
-        } catch (error) {
-          logger.error('[GeminiImageGen] Local save failed:', error);
-          imageUrl = `data:${mimeType};base64,${imageData}`;
-        }
-      } else {
-        const cloudUrl = await saveToCloudStorage({
-          base64Data: imageData,
-          format: outputFormat,
-          processFileURL,
-          fileStrategy,
-          userId,
-        });
-
-        if (cloudUrl) {
-          imageUrl = cloudUrl;
-        } else {
-          // Fallback to local
-          try {
-            imageUrl = await saveImageLocally(imageData, outputFormat, userId);
-          } catch (_error) {
-            imageUrl = `data:${mimeType};base64,${imageData}`;
-          }
-        }
-      }
-
-      logger.debug('[GeminiImageGen] Image URL:', imageUrl);
-
-      // For the artifact, we need a data URL (same as OpenAI)
-      // The local file save is for persistence, but the response needs a data URL
      const dataUrl = `data:${mimeType};base64,${imageData}`;
-
-      // Return in content_and_artifact format (same as OpenAI)
      const file_ids = [v4()];
      const content = [
        {
@ -567,8 +444,7 @@ function createGeminiImageTool(fields = {}) {
        },
      ];

-      // Record token usage for balance tracking (don't await to avoid blocking response)
-      const conversationId = _runnableConfig?.configurable?.thread_id;
+      const conversationId = runnableConfig?.configurable?.thread_id;
      recordTokenUsage({
        usageMetadata: apiResponse.usageMetadata,
        req,