LibreChat/api/app/clients/tools/structured/GeminiImageGen.js
Dev Bhanushali 1b5ab4cdf2
🔧 refactor: Update Google credentials handling in GeminiImageGen.js
* Refactored the credentials path to follow a consistent pattern with other Google service integrations, allowing for an environment variable override.
* Updated documentation in README-GeminiNanoBanana.md to reflect the new credentials handling approach and removed references to hardcoded paths.
2025-09-13 14:57:44 +05:30

657 lines
24 KiB
JavaScript

const { z } = require('zod');
const fs = require('fs');
const path = require('path');
const { v4 } = require('uuid');
const { Tool } = require('@langchain/core/tools');
const { FileContext, ContentTypes, FileSources } = require('librechat-data-provider');
const { logger } = require('@librechat/data-schemas');
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { getFiles } = require('~/models/File');
const { GoogleGenAI } = require('@google/genai');
const displayMessage =
"Gemini displayed an image. All generated images are already plainly visible, so don't repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.";
/**
* Save base64 image data to storage using processFileURL (GENERIC - works for any image generation tool)
* @param {Object} params - The parameters object
* @param {string} params.base64Data - Base64 encoded image data
* @param {string} params.outputFormat - Image format (png, webp, etc.)
* @param {Function} params.processFileURL - Function to process file URLs
* @param {string} params.fileStrategy - File storage strategy (local, s3, azure, firebase)
* @param {string} params.userId - User ID
* @returns {Promise<string>} - The file URL in storage
*/
async function saveBase64ImageToStorage({
base64Data,
outputFormat,
processFileURL,
fileStrategy,
userId,
}) {
if (!processFileURL || !fileStrategy || !userId) {
logger.warn(
'[GeminiImageGen] Missing required parameters for storage, falling back to data URL',
);
return null;
}
try {
// Create a data URL from the base64 data
const dataURL = `data:image/${outputFormat};base64,${base64Data}`;
// Generate a unique filename
const imageName = `gemini-img-${v4()}.${outputFormat}`;
// Save to storage using processFileURL (works with any file strategy)
const result = await processFileURL({
URL: dataURL,
basePath: 'images',
userId,
fileName: imageName,
fileStrategy,
context: FileContext.image_generation,
});
return result.filepath;
} catch (error) {
logger.error('[GeminiImageGen] Error saving image to storage:', error);
return null;
}
}
class GeminiImageGen extends Tool {
constructor(fields = {}) {
super();
/** @type {boolean} Used to initialize the Tool without necessary variables. */
this.override = fields.override ?? false;
/** @type {boolean} Necessary for output to contain all image metadata. */
this.returnMetadata = fields.returnMetadata ?? false;
this.userId = fields.userId;
this.fileStrategy = fields.fileStrategy;
/** @type {boolean} */
this.isAgent = fields.isAgent;
/** @type {ServerRequest} */
this.req = fields.req;
// Extract processFileURL if available
if (fields.processFileURL) {
this.processFileURL = fields.processFileURL.bind(this);
}
// Store imageFiles for context (copied from OpenAI pattern)
/** @type {MongoFile[]} */
this.imageFiles = fields.imageFiles || [];
// Validate config only if not in override mode
this.validateConfig();
this.name = 'gemini_image_gen';
this.description = `Generates high-quality, original images based on text prompts, with optional image context.
When to use \`gemini_image_gen\`:
- To create entirely new images from detailed text descriptions
- To generate images using existing images as context or inspiration
- When the user requests image generation, creation, or asks to "generate an image"
- When the user asks to "edit", "modify", "change", or "swap" elements in an image (generates new image with changes)
When NOT to use \`gemini_image_gen\`:
- For uploading or saving existing images without modification
Generated image IDs will be returned in the response, so you can refer to them in future requests.`;
this.description_for_model = `Use this tool to generate images from text descriptions using Vertex AI Gemini.
1. Prompts should be detailed and specific for best results.
2. One image per function call. Create only 1 image per request.
3. IMPORTANT: When user asks to "edit", "modify", "change", or "swap" elements in an existing image:
- ALWAYS include the original image ID in the image_ids array
- Describe the desired changes clearly in the prompt
- The tool will generate a new image based on the original image context + your prompt
4. IMPORTANT: For editing requests, use DIRECT editing instructions:
- User says "remove the gun" → prompt should be "remove the gun from this image"
- User says "make it blue" → prompt should be "make this image blue"
- User says "add sunglasses" → prompt should be "add sunglasses to this image"
- DO NOT reconstruct or modify the original prompt - use the user's editing instruction directly
- ALWAYS include the image being edited in image_ids array
5. OPTIONAL: Use image_ids to provide context images that will influence the generation:
- Include any relevant image IDs from the conversation in the image_ids array
- These images will be used as visual context/inspiration for the new generation
- For "editing" requests, always include the image being "edited"
6. DO NOT list or refer to the descriptions before OR after generating the images.
7. Always mention the image type (photo, oil painting, watercolor painting, illustration, cartoon, drawing, vector, render, etc.) at the beginning of the prompt.
The prompt should be a detailed paragraph describing every part of the image in concrete, objective detail.`;
this.schema = z.object({
prompt: z
.string()
.max(32000)
.describe(
'A detailed text description of the desired image, up to 32000 characters. For "editing" requests, describe the changes you want to make to the referenced image. Be specific about composition, style, lighting, and subject matter.',
),
image_ids: z
.array(z.string())
.optional()
.describe(
`
Optional array of image IDs to use as visual context for generation.
Guidelines:
- For "editing" requests: ALWAYS include the image ID being "edited"
- For new generation with context: Include any relevant reference image IDs
- If the user's request references any prior images, include their image IDs in this array
- These images will be used as visual context/inspiration for the new generation
- Never invent or hallucinate IDs; only use IDs that are visible in the conversation
- If no images are relevant, omit this field entirely
`.trim(),
),
});
}
validateConfig() {
if (this.override) {
return;
}
// Use same pattern as other Google service integrations
const credentialsPath =
process.env.GOOGLE_SERVICE_KEY_FILE ||
path.join(__dirname, '../../../..', 'data', 'auth.json');
if (!fs.existsSync(credentialsPath)) {
throw new Error(`Google credentials file not found at: ${credentialsPath}`);
}
}
wrapInMarkdown(imageUrl) {
return `![Generated Image](${imageUrl})`;
}
replaceUnwantedChars(inputString) {
return inputString?.replace(/[^\w\s\-_.,!?()]/g, '') || '';
}
returnValue(value) {
if (this.returnMetadata && typeof value === 'object') {
return value;
}
if (this.isAgent) {
return [displayMessage, value];
}
return value;
}
/**
* Check if the API response indicates content was blocked by safety filters
* @param {Object} response - The Gemini API response
* @returns {Object|null} - Safety block information or null if not blocked
*/
checkForSafetyBlock(response) {
try {
// Check if response has candidates
if (!response.candidates || response.candidates.length === 0) {
return { reason: 'NO_CANDIDATES', message: 'No candidates returned by Gemini' };
}
const candidate = response.candidates[0];
// Check finishReason for safety blocks
if (candidate.finishReason) {
const finishReason = candidate.finishReason;
if (finishReason === 'SAFETY' || finishReason === 'PROHIBITED_CONTENT') {
return {
reason: finishReason,
message: 'Content was blocked by Gemini safety filters',
safetyRatings: candidate.safetyRatings || [],
};
}
if (finishReason === 'RECITATION') {
return {
reason: finishReason,
message: 'Content was blocked due to recitation concerns',
};
}
}
// Check safety ratings for blocks
if (candidate.safetyRatings) {
for (const rating of candidate.safetyRatings) {
if (rating.probability === 'HIGH' || rating.blocked === true) {
return {
reason: 'SAFETY_RATING',
message: `Content blocked due to ${rating.category} safety concerns`,
category: rating.category,
probability: rating.probability,
};
}
}
}
return null; // No safety block detected
} catch (error) {
logger.error('[GeminiImageGen] Error checking safety block:', error);
return null;
}
}
async initializeGeminiVertexAI() {
try {
// Use same pattern as other Google service integrations
const credentialsPath =
process.env.GOOGLE_SERVICE_KEY_FILE ||
path.join(__dirname, '../../../..', 'data', 'auth.json');
// Set environment variables for Vertex AI (required by Google GenAI library)
process.env.GOOGLE_APPLICATION_CREDENTIALS = credentialsPath;
// Load service account credentials
const serviceKey = JSON.parse(fs.readFileSync(credentialsPath, 'utf8'));
// Initialize client
const ai = new GoogleGenAI({
vertexai: true,
project: serviceKey.project_id,
location: 'global',
});
return ai;
} catch (error) {
logger.error('[GeminiImageGen] Error initializing Gemini Vertex AI client:', error);
throw new Error(`Failed to initialize Gemini Vertex AI client: ${error.message}`);
}
}
async saveImageLocally(base64Data, outputFormat = 'png', userId) {
try {
// Create unique filename
const imageName = `gemini-img-${v4()}.${outputFormat}`;
// Create user-specific directory path for local storage
const userDir = path.join(process.cwd(), 'client/public/images', userId || 'default');
// Ensure directory exists
if (!fs.existsSync(userDir)) {
fs.mkdirSync(userDir, { recursive: true });
}
// Full file path
const filePath = path.join(userDir, imageName);
// Save image file directly
fs.writeFileSync(filePath, Buffer.from(base64Data, 'base64'));
// Return relative URL for the frontend
const relativeUrl = `/images/${userId || 'default'}/${imageName}`;
logger.debug('[GeminiImageGen] Image saved locally to:', filePath);
return relativeUrl;
} catch (error) {
logger.error('[GeminiImageGen] Error saving image locally:', error);
throw error;
}
}
/**
* Convert image file to base64 inlineData format for Gemini (copied and adapted from OpenAI pattern)
* @param {MongoFile[]} imageFiles - Array of image file objects
* @param {string[]} image_ids - Array of image IDs to process
* @returns {Promise<Array>} - Array of inlineData objects for Gemini
*/
async convertImagesToInlineData(imageFiles, image_ids) {
if (!image_ids || image_ids.length === 0) {
return [];
}
logger.debug('[GeminiImageGen] Converting images to inlineData format for IDs:', image_ids);
/** @type {Record<string, any>} */
const streamMethods = {};
const requestFilesMap = Object.fromEntries(imageFiles.map((f) => [f.file_id, { ...f }]));
const orderedFiles = new Array(image_ids.length);
const idsToFetch = [];
const indexOfMissing = Object.create(null);
// Map existing files and identify missing ones (copied from OpenAI)
for (let i = 0; i < image_ids.length; i++) {
const id = image_ids[i];
const file = requestFilesMap[id];
if (file) {
orderedFiles[i] = file;
logger.debug('[GeminiImageGen] Found file in request files:', id);
} else {
idsToFetch.push(id);
indexOfMissing[id] = i;
logger.debug('[GeminiImageGen] Need to fetch file from database:', id);
}
}
// Fetch missing files from database (copied from OpenAI)
if (idsToFetch.length) {
logger.debug('[GeminiImageGen] Fetching', idsToFetch.length, 'files from database');
const fetchedFiles = await getFiles(
{
user: this.req.user.id,
file_id: { $in: idsToFetch },
height: { $exists: true },
width: { $exists: true },
},
{},
{},
);
logger.debug('[GeminiImageGen] Fetched', fetchedFiles.length, 'files from database');
for (const file of fetchedFiles) {
requestFilesMap[file.file_id] = file;
orderedFiles[indexOfMissing[file.file_id]] = file;
}
}
// Convert files to Gemini inlineData format
const inlineDataArray = [];
for (const imageFile of orderedFiles) {
if (!imageFile) {
logger.warn('[GeminiImageGen] Skipping missing image file');
continue;
}
try {
// Get download stream (copied from OpenAI pattern)
const source = imageFile.source || this.fileStrategy;
if (!source) {
logger.error('[GeminiImageGen] No source found for image file:', imageFile.file_id);
continue;
}
let getDownloadStream;
if (streamMethods[source]) {
getDownloadStream = streamMethods[source];
} else {
({ getDownloadStream } = getStrategyFunctions(source));
streamMethods[source] = getDownloadStream;
}
if (!getDownloadStream) {
logger.error('[GeminiImageGen] No download stream method found for source:', source);
continue;
}
const stream = await getDownloadStream(this.req, imageFile.filepath);
if (!stream) {
logger.error(
'[GeminiImageGen] Failed to get download stream for image:',
imageFile.file_id,
);
continue;
}
// Convert stream to buffer then to base64
const chunks = [];
for await (const chunk of stream) {
chunks.push(chunk);
}
const buffer = Buffer.concat(chunks);
const base64Data = buffer.toString('base64');
// Determine MIME type
const mimeType = imageFile.type || 'image/png';
// Add to inlineData array in Gemini format
inlineDataArray.push({
inlineData: {
mimeType,
data: base64Data,
},
});
logger.debug('[GeminiImageGen] Converted image to inlineData:', {
file_id: imageFile.file_id,
mimeType,
dataLength: base64Data.length,
});
} catch (error) {
logger.error('[GeminiImageGen] Error processing image file:', imageFile.file_id, error);
// Continue with other images
}
}
logger.debug(
'[GeminiImageGen] Successfully converted',
inlineDataArray.length,
'images to inlineData',
);
return inlineDataArray;
}
async _call(data) {
// Ensure consistent parameter extraction regardless of order
const prompt = data.prompt;
const image_ids = data.image_ids;
if (!prompt) {
throw new Error('Missing required field: prompt');
}
logger.debug('[GeminiImageGen] Generating image with prompt:', prompt);
logger.debug('[GeminiImageGen] Image IDs provided:', image_ids);
logger.debug('[GeminiImageGen] Available imageFiles:', this.imageFiles.length);
let ai;
try {
ai = await this.initializeGeminiVertexAI();
} catch (error) {
logger.error('[GeminiImageGen] Failed to initialize Gemini Vertex AI:', error);
return this.returnValue(`Failed to initialize Gemini Vertex AI: ${error.message}`);
}
let apiResponse;
try {
// Build contents array for the request
const contents = [{ text: this.replaceUnwantedChars(prompt) }];
// Add context images if provided (NEW FUNCTIONALITY)
if (image_ids && image_ids.length > 0) {
logger.debug('[GeminiImageGen] Processing context images...');
const contextImages = await this.convertImagesToInlineData(this.imageFiles, image_ids);
// Add each context image to contents array
for (const imageData of contextImages) {
contents.push(imageData);
}
logger.debug('[GeminiImageGen] Added', contextImages.length, 'context images to request');
} else {
logger.debug('[GeminiImageGen] No image context provided - text-only generation');
}
logger.debug('[GeminiImageGen] Final contents array length:', contents.length);
// Generate image using Gemini Vertex AI with optional image context
apiResponse = await ai.models.generateContent({
model: 'gemini-2.5-flash-image-preview',
contents: contents,
config: {
responseModalities: ['TEXT', 'IMAGE'],
},
});
logger.debug('[GeminiImageGen] Received response from Gemini Vertex AI');
} catch (error) {
logger.error('[GeminiImageGen] Problem generating the image:', error);
return this
.returnValue(`Something went wrong when trying to generate the image. The Gemini Vertex AI API may be unavailable:
Error Message: ${error.message}`);
}
// Enhanced safety and response checking
if (!apiResponse || !apiResponse.candidates || !apiResponse.candidates[0]) {
return this.returnValue(
'Something went wrong when trying to generate the image. The Gemini Vertex AI API may be unavailable',
);
}
console.log('apiResponse', apiResponse);
// Check for content safety blocks BEFORE trying to extract image data
const safetyBlock = this.checkForSafetyBlock(apiResponse);
if (safetyBlock) {
logger.warn('[GeminiImageGen] Content blocked by safety filters:', safetyBlock);
// Provide user-friendly error message based on block reason
let errorMessage =
'I cannot generate this image because it was blocked by content safety filters. ';
if (safetyBlock.reason === 'SAFETY' || safetyBlock.reason === 'PROHIBITED_CONTENT') {
errorMessage +=
'The prompt may contain content that violates content policies (such as violence, weapons, or inappropriate content). ';
} else if (safetyBlock.reason === 'RECITATION') {
errorMessage += 'The content may be too similar to copyrighted material. ';
}
errorMessage += 'Please try rephrasing your request with different, safer content.';
if (safetyBlock.category) {
errorMessage += ` (Blocked category: ${safetyBlock.category})`;
}
return this.returnValue(errorMessage);
}
// Extract image data from response
const imageData = apiResponse.candidates[0].content.parts.find((part) => part.inlineData)
?.inlineData?.data;
if (!imageData) {
// Enhanced safety detection and logging
const candidate = apiResponse.candidates[0];
// Log detailed response info for debugging
logger.warn('[GeminiImageGen] No image data in response. Candidate details:', {
finishReason: candidate.finishReason,
hasContent: !!candidate.content,
contentParts: candidate.content?.parts?.length || 0,
safetyRatings: candidate.safetyRatings?.length || 0,
contentPartsTypes: candidate.content?.parts?.map((p) => Object.keys(p)) || [],
});
// Check for safety ratings to provide specific feedback
let safetyIssue = null;
if (candidate.safetyRatings && candidate.safetyRatings.length > 0) {
for (const rating of candidate.safetyRatings) {
if (
rating.probability === 'HIGH' ||
rating.probability === 'MEDIUM' ||
rating.blocked === true
) {
safetyIssue = rating;
break;
}
}
}
// Create appropriate error message
let errorMessage;
if (safetyIssue) {
errorMessage = `I cannot generate this image because it was blocked by content safety filters. The content was flagged for ${safetyIssue.category} (probability: ${safetyIssue.probability}). Please try rephrasing your request with different, safer content.`;
logger.warn('[GeminiImageGen] Content blocked by safety filter:', safetyIssue);
} else if (
candidate.finishReason === 'SAFETY' ||
candidate.finishReason === 'PROHIBITED_CONTENT'
) {
errorMessage =
'I cannot generate this image because it was blocked by content safety filters. Please try rephrasing your request with different, safer content.';
logger.warn('[GeminiImageGen] Content blocked by finishReason:', candidate.finishReason);
} else {
errorMessage =
'No image was generated. This might be due to content safety filters blocking the request, or the model being unable to create the requested image. Please try rephrasing your prompt with different content.';
logger.warn('[GeminiImageGen] Unknown reason for missing image data');
}
// Return error message in proper agent format
if (this.isAgent) {
const errorResponse = [
{
type: ContentTypes.TEXT,
text: errorMessage,
},
];
return [errorResponse, { content: [], file_ids: [] }];
} else {
return this.returnValue(errorMessage);
}
}
logger.debug('[GeminiImageGen] Successfully extracted image data');
// HYBRID APPROACH: Local strategy uses direct saving, others use OpenAI pattern
let imageUrl = `data:image/png;base64,${imageData}`;
if (this.fileStrategy === FileSources.local || this.fileStrategy === 'local') {
// For local strategy, save directly and use data URL to avoid artifact processing issues
logger.debug('[GeminiImageGen] Local strategy detected - using direct save + data URL');
try {
await this.saveImageLocally(imageData, 'png', this.userId);
logger.debug('[GeminiImageGen] Image saved locally successfully');
// Keep using data URL for response to avoid MIME type issues
} catch (error) {
logger.error('[GeminiImageGen] Local save failed:', error);
// Still continue with data URL
}
} else {
// For S3/cloud strategies, follow OpenAI pattern exactly
logger.debug('[GeminiImageGen] Cloud strategy detected - using OpenAI pattern');
try {
const storageUrl = await saveBase64ImageToStorage({
base64Data: imageData,
outputFormat: 'png',
processFileURL: this.processFileURL,
fileStrategy: this.fileStrategy,
userId: this.userId,
});
if (storageUrl) {
imageUrl = storageUrl;
logger.debug('[GeminiImageGen] Image saved to storage:', storageUrl);
} else {
logger.warn('[GeminiImageGen] Could not save to storage, using data URL');
}
} catch (error) {
logger.error('[GeminiImageGen] Error saving image to storage:', error);
logger.warn('[GeminiImageGen] Falling back to data URL');
}
}
// FOLLOW OPENAI RESPONSE PATTERN EXACTLY
const content = [
{
type: ContentTypes.IMAGE_URL,
image_url: {
url: imageUrl,
},
},
];
const file_ids = [v4()];
const textResponse = [
{
type: ContentTypes.TEXT,
text:
displayMessage +
`\n\ngenerated_image_id: "${file_ids[0]}"` +
(image_ids && image_ids.length > 0
? `\nreferenced_image_ids: ["${image_ids.join('", "')}"]`
: ''),
},
];
return [textResponse, { content, file_ids }];
}
}
module.exports = GeminiImageGen;