mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-11 13:08:51 +01:00
🍌 feat: Gemini Image Generation Tool (Nano Banana) (#10676)
* Added fully functioning Agent Tool supporting Google's Nano Banana * 🔧 refactor: Update Google credentials handling in GeminiImageGen.js * Refactored the credentials path to follow a consistent pattern with other Google service integrations, allowing for an environment variable override. * Updated documentation in README-GeminiNanoBanana.md to reflect the new credentials handling approach and removed references to hardcoded paths. * 🛠️ refactor: Remove unnecessary whitespace in handleTools.js * 🔧 feat: Update Gemini Image Generation Tool - Bump @google/genai package version to ^1.19.0 for improved functionality. - Refactor GeminiImageGen to createGeminiImageTool for better clarity and consistency. - Enhance manifest.json for Gemini Image Tools with updated descriptions and icon. - Add SVG icon for Gemini Image Tools. - Implement progress tracking for Gemini image generation in the UI. - Introduce new toolkit and context handling for image generation tools. This update improves the Gemini image generation capabilities and user experience. * 🗑️ chore: Remove outdated Gemini image generation PNG and update SVG icon - Deleted the obsolete PNG file for Gemini image generation. - Updated the SVG icon with a new design featuring a gradient and shadow effect, enhancing visual appeal and consistency. * fix: ESLint formatting and unused variable in GeminiImageGen * fix: Update default model to gemini-2.5-flash-image * ✨ feat: Enhance Gemini Image Generation Configuration - Updated .env.example to include new environment variables for Google Cloud region, service account configuration, and Gemini API key options. - Modified GeminiImageGen.js to support both user-provided API keys and Vertex AI service accounts, improving flexibility in client initialization. - Updated manifest.json to reflect changes in authentication methods for the Gemini Image Tools. - Bumped @google/genai package version to 1.19.0 in package-lock.json for compatibility with new features. * 🔧 fix: Format Default Service Key Path in GeminiImageGen.js - Adjusted the return statement in getDefaultServiceKeyPath function for improved readability by formatting it across multiple lines. This change enhances code clarity without altering functionality. * ✨ feat: Enhance Gemini Image Generation with Token Usage Tracking - Added `recordTokenUsage` function to track token usage for balance management. - Integrated token recording into the image generation process. - Updated Gemini image generation tool to accept optional `aspectRatio` and `imageSize` parameters for improved image customization. - Updated token values for new Gemini models in the transaction model. - Improved documentation for image generation tool descriptions and parameters. * ✨ feat: Add new Gemini models for image generation token limits - Introduced token limits for 'gemini-3-pro-image' and 'gemini-2.5-flash-image' models. - Updated token values to enhance the Gemini image generation capabilities. * 🔧 fix: Update Google Service Key Path for Consistency in Initialization (#11001) * 🔧 refactor: Update GeminiImageGen for improved file handling and path resolution - Changed the default service key path to use process.cwd() for better compatibility. - Replaced synchronous file system operations with asynchronous promises for mkdir and writeFile, enhancing performance and error handling. - Added error handling for credential file access to prevent crashes when the file does not exist. * 🔧 refactor: Update GeminiImageGen to streamline API key handling - Refactored API key checks to improve clarity and consistency. - Removed redundant checks for user-provided keys, enhancing code readability. - Ensured proper logging for API key usage across different configurations. * 🔧 fix: Update GeminiImageGen to handle imageSize support conditionally - Added a check to ensure imageSize is only applied if the gemini model does not include 'gemini-2.5-flash-image', improving compatibility. - Enhanced the logic for setting imageConfig to prevent potential issues with unsupported configurations. * 🔧 refactor: Simplify local storage condition in createGeminiImageTool function * 🔧 feat: Enhance image format handling in GeminiImageGen with conversion support * 🔧 refactor: Streamline API key initialization in GeminiImageGen - Simplified the handling of API keys by removing redundant checks for user-provided keys. - Updated logging to reflect the new priority order for API key usage, enhancing clarity and consistency. - Improved code readability by consolidating key retrieval logic. --------- Co-authored-by: Dev Bhanushali <dev.bhanushali@hingehealth.com> Co-authored-by: Danny Avila <danny@librechat.ai>
This commit is contained in:
parent
e452c1a8d9
commit
200098d992
19 changed files with 1063 additions and 55 deletions
|
|
@ -10,6 +10,7 @@ const {
|
|||
createSafeUser,
|
||||
mcpToolPattern,
|
||||
loadWebSearchAuth,
|
||||
buildImageToolContext,
|
||||
} = require('@librechat/api');
|
||||
const { getMCPServersRegistry } = require('~/config');
|
||||
const {
|
||||
|
|
@ -35,6 +36,7 @@ const {
|
|||
StructuredWolfram,
|
||||
createYouTubeTools,
|
||||
TavilySearchResults,
|
||||
createGeminiImageTool,
|
||||
createOpenAIImageTools,
|
||||
} = require('../');
|
||||
const { primeFiles: primeCodeFiles } = require('~/server/services/Files/Code/process');
|
||||
|
|
@ -192,21 +194,11 @@ const loadTools = async ({
|
|||
const authFields = getAuthFields('image_gen_oai');
|
||||
const authValues = await loadAuthValues({ userId: user, authFields });
|
||||
const imageFiles = options.tool_resources?.[EToolResources.image_edit]?.files ?? [];
|
||||
let toolContext = '';
|
||||
for (let i = 0; i < imageFiles.length; i++) {
|
||||
const file = imageFiles[i];
|
||||
if (!file) {
|
||||
continue;
|
||||
}
|
||||
if (i === 0) {
|
||||
toolContext =
|
||||
'Image files provided in this request (their image IDs listed in order of appearance) available for image editing:';
|
||||
}
|
||||
toolContext += `\n\t- ${file.file_id}`;
|
||||
if (i === imageFiles.length - 1) {
|
||||
toolContext += `\n\nInclude any you need in the \`image_ids\` array when calling \`${EToolResources.image_edit}_oai\`. You may also include previously referenced or generated image IDs.`;
|
||||
}
|
||||
}
|
||||
const toolContext = buildImageToolContext({
|
||||
imageFiles,
|
||||
toolName: `${EToolResources.image_edit}_oai`,
|
||||
contextDescription: 'image editing',
|
||||
});
|
||||
if (toolContext) {
|
||||
toolContextMap.image_edit_oai = toolContext;
|
||||
}
|
||||
|
|
@ -219,6 +211,28 @@ const loadTools = async ({
|
|||
imageFiles,
|
||||
});
|
||||
},
|
||||
gemini_image_gen: async (toolContextMap) => {
|
||||
const authFields = getAuthFields('gemini_image_gen');
|
||||
const authValues = await loadAuthValues({ userId: user, authFields });
|
||||
const imageFiles = options.tool_resources?.[EToolResources.image_edit]?.files ?? [];
|
||||
const toolContext = buildImageToolContext({
|
||||
imageFiles,
|
||||
toolName: 'gemini_image_gen',
|
||||
contextDescription: 'image context',
|
||||
});
|
||||
if (toolContext) {
|
||||
toolContextMap.gemini_image_gen = toolContext;
|
||||
}
|
||||
return createGeminiImageTool({
|
||||
...authValues,
|
||||
isAgent: !!agent,
|
||||
req: options.req,
|
||||
imageFiles,
|
||||
processFileURL: options.processFileURL,
|
||||
userId: user,
|
||||
fileStrategy,
|
||||
});
|
||||
},
|
||||
};
|
||||
|
||||
const requestedTools = {};
|
||||
|
|
@ -241,6 +255,7 @@ const loadTools = async ({
|
|||
flux: imageGenOptions,
|
||||
dalle: imageGenOptions,
|
||||
'stable-diffusion': imageGenOptions,
|
||||
gemini_image_gen: imageGenOptions,
|
||||
};
|
||||
|
||||
/** @type {Record<string, string>} */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue