LibreChat/api/app/clients/tools/structured/FluxAPI.js

561 lines
19 KiB
JavaScript
Raw Normal View History

const axios = require('axios');
const fetch = require('node-fetch');
const { v4: uuidv4 } = require('uuid');
const { Tool } = require('@langchain/core/tools');
const { logger } = require('@librechat/data-schemas');
const { HttpsProxyAgent } = require('https-proxy-agent');
const { FileContext, ContentTypes } = require('librechat-data-provider');
🦥 refactor: Event-Driven Lazy Tool Loading (#11588) * refactor: json schema tools with lazy loading - Added LocalToolExecutor class for lazy loading and caching of tools during execution. - Introduced ToolExecutionContext and ToolExecutor interfaces for better type management. - Created utility functions to generate tool proxies with JSON schema support. - Added ExtendedJsonSchema type for enhanced schema definitions. - Updated existing toolkits to utilize the new schema and executor functionalities. - Introduced a comprehensive tool definitions registry for managing various tool schemas. chore: update @librechat/agents to version 3.1.2 refactor: enhance tool loading optimization and classification - Improved the loadAgentToolsOptimized function to utilize a proxy pattern for all tools, enabling deferred execution and reducing overhead. - Introduced caching for tool instances and refined tool classification logic to streamline tool management. - Updated the handling of MCP tools to improve logging and error reporting for missing tools in the cache. - Enhanced the structure of tool definitions to support better classification and integration with existing tools. refactor: modularize tool loading and enhance optimization - Moved the loadAgentToolsOptimized function to a new service file for better organization and maintainability. - Updated the ToolService to utilize the new service for optimized tool loading, improving code clarity. - Removed legacy tool loading methods and streamlined the tool loading process to enhance performance and reduce complexity. - Introduced feature flag handling for optimized tool loading, allowing for easier toggling of this functionality. refactor: replace loadAgentToolsWithFlag with loadAgentTools in tool loader refactor: enhance MCP tool loading with proxy creation and classification refactor: optimize MCP tool loading by grouping tools by server - Introduced a Map to group cached tools by server name, improving the organization of tool data. - Updated the createMCPProxyTool function to accept server name directly, enhancing clarity. - Refactored the logic for handling MCP tools, streamlining the process of creating proxy tools for classification. refactor: enhance MCP tool loading and proxy creation - Added functionality to retrieve MCP server tools and reinitialize servers if necessary, improving tool availability. - Updated the tool loading logic to utilize a Map for organizing tools by server, enhancing clarity and performance. - Refactored the createToolProxy function to ensure a default response format, streamlining tool creation. refactor: update createToolProxy to ensure consistent response format - Modified the createToolProxy function to await the executor's execution and validate the result format. - Ensured that the function returns a default response structure when the result is not an array of two elements, enhancing reliability in tool proxy creation. refactor: ToolExecutionContext with toolCall property - Added toolCall property to ToolExecutionContext interface for improved context handling during tool execution. - Updated LocalToolExecutor to include toolCall in the runnable configuration, allowing for more flexible tool invocation. - Modified createToolProxy to pass toolCall from the configuration, ensuring consistent context across tool executions. refactor: enhance event-driven tool execution and logging - Introduced ToolExecuteOptions for improved handling of event-driven tool execution, allowing for parallel execution of tool calls. - Updated getDefaultHandlers to include support for ON_TOOL_EXECUTE events, enhancing the flexibility of tool invocation. - Added detailed logging in LocalToolExecutor to track tool loading and execution metrics, improving observability and debugging capabilities. - Refactored initializeClient to integrate event-driven tool loading, ensuring compatibility with the new execution model. chore: update @librechat/agents to version 3.1.21 refactor: remove legacy tool loading and executor components - Eliminated the loadAgentToolsWithFlag function, simplifying the tool loading process by directly using loadAgentTools. - Removed the LocalToolExecutor and related executor components to streamline the tool execution architecture. - Updated ToolService and related files to reflect the removal of deprecated features, enhancing code clarity and maintainability. refactor: enhance tool classification and definitions handling - Updated the loadAgentTools function to return toolDefinitions alongside toolRegistry, improving the structure of tool data returned to clients. - Removed the convertRegistryToDefinitions function from the initialize.js file, simplifying the initialization process. - Adjusted the buildToolClassification function to ensure toolDefinitions are built and returned simultaneously with the toolRegistry, enhancing efficiency in tool management. - Updated type definitions in initialize.ts to include toolDefinitions, ensuring consistency across the codebase. refactor: implement event-driven tool execution handler - Introduced createToolExecuteHandler function to streamline the handling of ON_TOOL_EXECUTE events, allowing for parallel execution of tool calls. - Updated getDefaultHandlers to utilize the new handler, simplifying the event-driven architecture. - Added handlers.ts file to encapsulate tool execution logic, improving code organization and maintainability. - Enhanced OpenAI handlers to integrate the new tool execution capabilities, ensuring consistent event handling across the application. refactor: integrate event-driven tool execution options - Added toolExecuteOptions to support event-driven tool execution in OpenAI and responses controllers, enhancing flexibility in tool handling. - Updated handlers to utilize createToolExecuteHandler, allowing for streamlined execution of tools during agent interactions. - Refactored service dependencies to include toolExecuteOptions, ensuring consistent integration across the application. refactor: enhance tool loading with definitionsOnly parameter - Updated createToolLoader and loadAgentTools functions to include a definitionsOnly parameter, allowing for the retrieval of only serializable tool definitions in event-driven mode. - Adjusted related interfaces and documentation to reflect the new parameter, improving clarity and flexibility in tool management. - Ensured compatibility across various components by integrating the definitionsOnly option in the initialization process. refactor: improve agent tool presence check in initialization - Added a check for tool presence using a new hasAgentTools variable, which evaluates both structuredTools and toolDefinitions. - Updated the conditional logic in the agent initialization process to utilize the hasAgentTools variable, enhancing clarity and maintainability in tool management. refactor: enhance agent tool extraction to support tool definitions - Updated the extractMCPServers function to handle both tool instances and serializable tool definitions, improving flexibility in agent tool management. - Added a new property toolDefinitions to the AgentWithTools type for better integration of event-driven mode. - Enhanced documentation to clarify the function's capabilities in extracting unique MCP server names from both tools and tool definitions. refactor: enhance tool classification and registry building - Added serverName property to ToolDefinition for improved tool identification. - Introduced buildToolRegistry function to streamline the creation of tool registries based on MCP tool definitions and agent options. - Updated buildToolClassification to utilize the new registry building logic, ensuring basic definitions are returned even when advanced classification features are not allowed. - Enhanced documentation and logging for clarity in tool classification processes. refactor: update @librechat/agents dependency to version 3.1.22 fix: expose loadTools function in ToolService - Added loadTools function to the exported module in ToolService.js, enhancing the accessibility of tool loading functionality. chore: remove configurable options from tool execute options in OpenAI controller refactor: enhance tool loading mechanism to utilize agent-specific context chore: update @librechat/agents dependency to version 3.1.23 fix: simplify result handling in createToolExecuteHandler * refactor: loadToolDefinitions for efficient tool loading in event-driven mode * refactor: replace legacy tool loading with loadToolsForExecution in OpenAI and responses controllers - Updated OpenAIChatCompletionController and createResponse functions to utilize loadToolsForExecution for improved tool loading. - Removed deprecated loadToolsLegacy references, streamlining the tool execution process. - Enhanced tool loading options to include agent-specific context and configurations. * refactor: enhance tool loading and execution handling - Introduced loadActionToolsForExecution function to streamline loading of action tools, improving organization and maintainability. - Updated loadToolsForExecution to handle both regular and action tools, optimizing the tool loading process. - Added detailed logging for missing tools in createToolExecuteHandler, enhancing error visibility. - Refactored tool definitions to normalize action tool names, improving consistency in tool management. * refactor: enhance built-in tool definitions loading - Updated loadToolDefinitions to include descriptions and parameters from the tool registry for built-in tools, improving the clarity and usability of tool definitions. - Integrated getToolDefinition to streamline the retrieval of tool metadata, enhancing the overall tool management process. * feat: add action tool definitions loading to tool service - Introduced getActionToolDefinitions function to load action tool definitions based on agent ID and tool names, enhancing the tool loading process. - Updated loadToolDefinitions to integrate action tool definitions, allowing for better management and retrieval of action-specific tools. - Added comprehensive tests for action tool definitions to ensure correct loading and parameter handling, improving overall reliability and functionality. * chore: update @librechat/agents dependency to version 3.1.26 * refactor: add toolEndCallback to handle tool execution results * fix: tool definitions and execution handling - Introduced native tools (execute_code, file_search, web_search) to the tool service, allowing for better integration and management of these tools. - Updated isBuiltInTool function to include native tools in the built-in check, improving tool recognition. - Added comprehensive tests for loading parameters of native tools, ensuring correct functionality and parameter handling. - Enhanced tool definitions registry to include new agent tool definitions, streamlining tool retrieval and management. * refactor: enhance tool loading and execution context - Added toolRegistry to the context for OpenAIChatCompletionController and createResponse functions, improving tool management. - Updated loadToolsForExecution to utilize toolRegistry for better integration of programmatic tools and tool search functionalities. - Enhanced the initialization process to include toolRegistry in agent context, streamlining tool access and configuration. - Refactored tool classification logic to support event-driven execution, ensuring compatibility with new tool definitions. * chore: add request duration logging to OpenAI and Responses controllers - Introduced logging for request start and completion times in OpenAIChatCompletionController and createResponse functions. - Calculated and logged the duration of each request, enhancing observability and performance tracking. - Improved debugging capabilities by providing detailed logs for both streaming and non-streaming responses. * chore: update @librechat/agents dependency to version 3.1.27 * refactor: implement buildToolSet function for tool management - Introduced buildToolSet function to streamline the creation of tool sets from agent configurations, enhancing tool management across various controllers. - Updated AgentClient, OpenAIChatCompletionController, and createResponse functions to utilize buildToolSet, improving consistency in tool handling. - Added comprehensive tests for buildToolSet to ensure correct functionality and edge case handling, enhancing overall reliability. * refactor: update import paths for ToolExecuteOptions and createToolExecuteHandler * fix: update GoogleSearch.js description for maximum search results - Changed the default maximum number of search results from 10 to 5 in the Google Search JSON schema description, ensuring accurate documentation of the expected behavior. * chore: remove deprecated Browser tool and associated assets - Deleted the Browser tool definition from manifest.json, which included its name, plugin key, description, and authentication configuration. - Removed the web-browser.svg asset as it is no longer needed following the removal of the Browser tool. * fix: ensure tool definitions are valid before processing - Added a check to verify the existence of tool definitions in the registry before accessing their properties, preventing potential runtime errors. - Updated the loading logic for built-in tool definitions to ensure that only valid definitions are pushed to the built-in tool definitions array. * fix: extend ExtendedJsonSchema to support 'null' type and nullable enums - Updated the ExtendedJsonSchema type to include 'null' as a valid type option. - Modified the enum property to accept an array of values that can include strings, numbers, booleans, and null, enhancing schema flexibility. * test: add comprehensive tests for tool definitions loading and registry behavior - Implemented tests to verify the handling of built-in tools without registry definitions, ensuring they are skipped correctly. - Added tests to confirm that built-in tools include descriptions and parameters in the registry. - Enhanced tests for action tools, checking for proper inclusion of metadata and handling of tools without parameters in the registry. * test: add tests for mixed-type and number enum schema handling - Introduced tests to validate the parsing of mixed-type enum values, including strings, numbers, booleans, and null. - Added tests for number enum schema values to ensure correct parsing of numeric inputs, enhancing schema validation coverage. * fix: update mock implementation for @librechat/agents - Changed the mock for @librechat/agents to spread the actual module's properties, ensuring that all necessary functionalities are preserved in tests. - This adjustment enhances the accuracy of the tests by reflecting the real structure of the module. * fix: change max_results type in GoogleSearch schema from number to integer - Updated the type of max_results in the Google Search JSON schema to 'integer' for better type accuracy and validation consistency. * fix: update max_results description and type in GoogleSearch schema - Changed the type of max_results from 'number' to 'integer' for improved type accuracy. - Updated the description to reflect the new default maximum number of search results, changing it from 10 to 5. * refactor: remove unused code and improve tool registry handling - Eliminated outdated comments and conditional logic related to event-driven mode in the ToolService. - Enhanced the handling of the tool registry by ensuring it is configurable for better integration during tool execution. * feat: add definitionsOnly option to buildToolClassification for event-driven mode - Introduced a new parameter, definitionsOnly, to the BuildToolClassificationParams interface to enable a mode that skips tool instance creation. - Updated the buildToolClassification function to conditionally add tool definitions without instantiating tools when definitionsOnly is true. - Modified the loadToolDefinitions function to pass definitionsOnly as true, ensuring compatibility with the new feature. * test: add unit tests for buildToolClassification with definitionsOnly option - Implemented tests to verify the behavior of buildToolClassification when definitionsOnly is set to true or false. - Ensured that tool instances are not created when definitionsOnly is true, while still adding necessary tool definitions. - Confirmed that loadAuthValues is called appropriately based on the definitionsOnly parameter, enhancing test coverage for this new feature.
2026-02-01 08:50:57 -05:00
const fluxApiJsonSchema = {
type: 'object',
properties: {
action: {
type: 'string',
enum: ['generate', 'list_finetunes', 'generate_finetuned'],
description:
'Action to perform: "generate" for image generation, "generate_finetuned" for finetuned model generation, "list_finetunes" to get available custom models',
},
prompt: {
type: 'string',
description:
'Text prompt for image generation. Required when action is "generate". Not used for list_finetunes.',
},
width: {
type: 'number',
description:
'Width of the generated image in pixels. Must be a multiple of 32. Default is 1024.',
},
height: {
type: 'number',
description:
'Height of the generated image in pixels. Must be a multiple of 32. Default is 768.',
},
prompt_upsampling: {
type: 'boolean',
description: 'Whether to perform upsampling on the prompt.',
},
steps: {
type: 'integer',
description: 'Number of steps to run the model for, a number from 1 to 50. Default is 40.',
},
seed: {
type: 'number',
description: 'Optional seed for reproducibility.',
},
safety_tolerance: {
type: 'number',
description:
'Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict.',
},
endpoint: {
type: 'string',
enum: [
'/v1/flux-pro-1.1',
'/v1/flux-pro',
'/v1/flux-dev',
'/v1/flux-pro-1.1-ultra',
'/v1/flux-pro-finetuned',
'/v1/flux-pro-1.1-ultra-finetuned',
],
description: 'Endpoint to use for image generation.',
},
raw: {
type: 'boolean',
description:
'Generate less processed, more natural-looking images. Only works for /v1/flux-pro-1.1-ultra.',
},
finetune_id: {
type: 'string',
description: 'ID of the finetuned model to use',
},
finetune_strength: {
type: 'number',
description: 'Strength of the finetuning effect (typically between 0.1 and 1.2)',
},
guidance: {
type: 'number',
description: 'Guidance scale for finetuned models',
},
aspect_ratio: {
type: 'string',
description: 'Aspect ratio for ultra models (e.g., "16:9")',
},
},
required: [],
};
const displayMessage =
"Flux displayed an image. All generated images are already plainly visible, so don't repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.";
/**
* FluxAPI - A tool for generating high-quality images from text prompts using the Flux API.
* Each call generates one image. If multiple images are needed, make multiple consecutive calls with the same or varied prompts.
*/
class FluxAPI extends Tool {
// Pricing constants in USD per image
static PRICING = {
FLUX_PRO_1_1_ULTRA: -0.06, // /v1/flux-pro-1.1-ultra
FLUX_PRO_1_1: -0.04, // /v1/flux-pro-1.1
FLUX_PRO: -0.05, // /v1/flux-pro
FLUX_DEV: -0.025, // /v1/flux-dev
FLUX_PRO_FINETUNED: -0.06, // /v1/flux-pro-finetuned
FLUX_PRO_1_1_ULTRA_FINETUNED: -0.07, // /v1/flux-pro-1.1-ultra-finetuned
};
constructor(fields = {}) {
super();
/** @type {boolean} Used to initialize the Tool without necessary variables. */
this.override = fields.override ?? false;
this.userId = fields.userId;
this.fileStrategy = fields.fileStrategy;
/** @type {boolean} **/
this.isAgent = fields.isAgent;
this.returnMetadata = fields.returnMetadata ?? false;
if (fields.processFileURL) {
/** @type {processFileURL} Necessary for output to contain all image metadata. */
this.processFileURL = fields.processFileURL.bind(this);
}
this.apiKey = fields.FLUX_API_KEY || this.getApiKey();
this.name = 'flux';
this.description =
'Use Flux to generate images from text descriptions. This tool can generate images and list available finetunes. Each generate call creates one image. For multiple images, make multiple consecutive calls.';
this.description_for_model = `// Transform any image description into a detailed, high-quality prompt. Never submit a prompt under 3 sentences. Follow these core rules:
// 1. ALWAYS enhance basic prompts into 5-10 detailed sentences (e.g., "a cat" becomes: "A close-up photo of a sleek Siamese cat with piercing blue eyes. The cat sits elegantly on a vintage leather armchair, its tail curled gracefully around its paws. Warm afternoon sunlight streams through a nearby window, casting gentle shadows across its face and highlighting the subtle variations in its cream and chocolate-point fur. The background is softly blurred, creating a shallow depth of field that draws attention to the cat's expressive features. The overall composition has a peaceful, contemplative mood with a professional photography style.")
// 2. Each prompt MUST be 3-6 descriptive sentences minimum, focusing on visual elements: lighting, composition, mood, and style
// Use action: 'list_finetunes' to see available custom models. When using finetunes, use endpoint: '/v1/flux-pro-finetuned' (default) or '/v1/flux-pro-1.1-ultra-finetuned' for higher quality and aspect ratio.`;
// Add base URL from environment variable with fallback
this.baseUrl = process.env.FLUX_API_BASE_URL || 'https://api.us1.bfl.ai';
🦥 refactor: Event-Driven Lazy Tool Loading (#11588) * refactor: json schema tools with lazy loading - Added LocalToolExecutor class for lazy loading and caching of tools during execution. - Introduced ToolExecutionContext and ToolExecutor interfaces for better type management. - Created utility functions to generate tool proxies with JSON schema support. - Added ExtendedJsonSchema type for enhanced schema definitions. - Updated existing toolkits to utilize the new schema and executor functionalities. - Introduced a comprehensive tool definitions registry for managing various tool schemas. chore: update @librechat/agents to version 3.1.2 refactor: enhance tool loading optimization and classification - Improved the loadAgentToolsOptimized function to utilize a proxy pattern for all tools, enabling deferred execution and reducing overhead. - Introduced caching for tool instances and refined tool classification logic to streamline tool management. - Updated the handling of MCP tools to improve logging and error reporting for missing tools in the cache. - Enhanced the structure of tool definitions to support better classification and integration with existing tools. refactor: modularize tool loading and enhance optimization - Moved the loadAgentToolsOptimized function to a new service file for better organization and maintainability. - Updated the ToolService to utilize the new service for optimized tool loading, improving code clarity. - Removed legacy tool loading methods and streamlined the tool loading process to enhance performance and reduce complexity. - Introduced feature flag handling for optimized tool loading, allowing for easier toggling of this functionality. refactor: replace loadAgentToolsWithFlag with loadAgentTools in tool loader refactor: enhance MCP tool loading with proxy creation and classification refactor: optimize MCP tool loading by grouping tools by server - Introduced a Map to group cached tools by server name, improving the organization of tool data. - Updated the createMCPProxyTool function to accept server name directly, enhancing clarity. - Refactored the logic for handling MCP tools, streamlining the process of creating proxy tools for classification. refactor: enhance MCP tool loading and proxy creation - Added functionality to retrieve MCP server tools and reinitialize servers if necessary, improving tool availability. - Updated the tool loading logic to utilize a Map for organizing tools by server, enhancing clarity and performance. - Refactored the createToolProxy function to ensure a default response format, streamlining tool creation. refactor: update createToolProxy to ensure consistent response format - Modified the createToolProxy function to await the executor's execution and validate the result format. - Ensured that the function returns a default response structure when the result is not an array of two elements, enhancing reliability in tool proxy creation. refactor: ToolExecutionContext with toolCall property - Added toolCall property to ToolExecutionContext interface for improved context handling during tool execution. - Updated LocalToolExecutor to include toolCall in the runnable configuration, allowing for more flexible tool invocation. - Modified createToolProxy to pass toolCall from the configuration, ensuring consistent context across tool executions. refactor: enhance event-driven tool execution and logging - Introduced ToolExecuteOptions for improved handling of event-driven tool execution, allowing for parallel execution of tool calls. - Updated getDefaultHandlers to include support for ON_TOOL_EXECUTE events, enhancing the flexibility of tool invocation. - Added detailed logging in LocalToolExecutor to track tool loading and execution metrics, improving observability and debugging capabilities. - Refactored initializeClient to integrate event-driven tool loading, ensuring compatibility with the new execution model. chore: update @librechat/agents to version 3.1.21 refactor: remove legacy tool loading and executor components - Eliminated the loadAgentToolsWithFlag function, simplifying the tool loading process by directly using loadAgentTools. - Removed the LocalToolExecutor and related executor components to streamline the tool execution architecture. - Updated ToolService and related files to reflect the removal of deprecated features, enhancing code clarity and maintainability. refactor: enhance tool classification and definitions handling - Updated the loadAgentTools function to return toolDefinitions alongside toolRegistry, improving the structure of tool data returned to clients. - Removed the convertRegistryToDefinitions function from the initialize.js file, simplifying the initialization process. - Adjusted the buildToolClassification function to ensure toolDefinitions are built and returned simultaneously with the toolRegistry, enhancing efficiency in tool management. - Updated type definitions in initialize.ts to include toolDefinitions, ensuring consistency across the codebase. refactor: implement event-driven tool execution handler - Introduced createToolExecuteHandler function to streamline the handling of ON_TOOL_EXECUTE events, allowing for parallel execution of tool calls. - Updated getDefaultHandlers to utilize the new handler, simplifying the event-driven architecture. - Added handlers.ts file to encapsulate tool execution logic, improving code organization and maintainability. - Enhanced OpenAI handlers to integrate the new tool execution capabilities, ensuring consistent event handling across the application. refactor: integrate event-driven tool execution options - Added toolExecuteOptions to support event-driven tool execution in OpenAI and responses controllers, enhancing flexibility in tool handling. - Updated handlers to utilize createToolExecuteHandler, allowing for streamlined execution of tools during agent interactions. - Refactored service dependencies to include toolExecuteOptions, ensuring consistent integration across the application. refactor: enhance tool loading with definitionsOnly parameter - Updated createToolLoader and loadAgentTools functions to include a definitionsOnly parameter, allowing for the retrieval of only serializable tool definitions in event-driven mode. - Adjusted related interfaces and documentation to reflect the new parameter, improving clarity and flexibility in tool management. - Ensured compatibility across various components by integrating the definitionsOnly option in the initialization process. refactor: improve agent tool presence check in initialization - Added a check for tool presence using a new hasAgentTools variable, which evaluates both structuredTools and toolDefinitions. - Updated the conditional logic in the agent initialization process to utilize the hasAgentTools variable, enhancing clarity and maintainability in tool management. refactor: enhance agent tool extraction to support tool definitions - Updated the extractMCPServers function to handle both tool instances and serializable tool definitions, improving flexibility in agent tool management. - Added a new property toolDefinitions to the AgentWithTools type for better integration of event-driven mode. - Enhanced documentation to clarify the function's capabilities in extracting unique MCP server names from both tools and tool definitions. refactor: enhance tool classification and registry building - Added serverName property to ToolDefinition for improved tool identification. - Introduced buildToolRegistry function to streamline the creation of tool registries based on MCP tool definitions and agent options. - Updated buildToolClassification to utilize the new registry building logic, ensuring basic definitions are returned even when advanced classification features are not allowed. - Enhanced documentation and logging for clarity in tool classification processes. refactor: update @librechat/agents dependency to version 3.1.22 fix: expose loadTools function in ToolService - Added loadTools function to the exported module in ToolService.js, enhancing the accessibility of tool loading functionality. chore: remove configurable options from tool execute options in OpenAI controller refactor: enhance tool loading mechanism to utilize agent-specific context chore: update @librechat/agents dependency to version 3.1.23 fix: simplify result handling in createToolExecuteHandler * refactor: loadToolDefinitions for efficient tool loading in event-driven mode * refactor: replace legacy tool loading with loadToolsForExecution in OpenAI and responses controllers - Updated OpenAIChatCompletionController and createResponse functions to utilize loadToolsForExecution for improved tool loading. - Removed deprecated loadToolsLegacy references, streamlining the tool execution process. - Enhanced tool loading options to include agent-specific context and configurations. * refactor: enhance tool loading and execution handling - Introduced loadActionToolsForExecution function to streamline loading of action tools, improving organization and maintainability. - Updated loadToolsForExecution to handle both regular and action tools, optimizing the tool loading process. - Added detailed logging for missing tools in createToolExecuteHandler, enhancing error visibility. - Refactored tool definitions to normalize action tool names, improving consistency in tool management. * refactor: enhance built-in tool definitions loading - Updated loadToolDefinitions to include descriptions and parameters from the tool registry for built-in tools, improving the clarity and usability of tool definitions. - Integrated getToolDefinition to streamline the retrieval of tool metadata, enhancing the overall tool management process. * feat: add action tool definitions loading to tool service - Introduced getActionToolDefinitions function to load action tool definitions based on agent ID and tool names, enhancing the tool loading process. - Updated loadToolDefinitions to integrate action tool definitions, allowing for better management and retrieval of action-specific tools. - Added comprehensive tests for action tool definitions to ensure correct loading and parameter handling, improving overall reliability and functionality. * chore: update @librechat/agents dependency to version 3.1.26 * refactor: add toolEndCallback to handle tool execution results * fix: tool definitions and execution handling - Introduced native tools (execute_code, file_search, web_search) to the tool service, allowing for better integration and management of these tools. - Updated isBuiltInTool function to include native tools in the built-in check, improving tool recognition. - Added comprehensive tests for loading parameters of native tools, ensuring correct functionality and parameter handling. - Enhanced tool definitions registry to include new agent tool definitions, streamlining tool retrieval and management. * refactor: enhance tool loading and execution context - Added toolRegistry to the context for OpenAIChatCompletionController and createResponse functions, improving tool management. - Updated loadToolsForExecution to utilize toolRegistry for better integration of programmatic tools and tool search functionalities. - Enhanced the initialization process to include toolRegistry in agent context, streamlining tool access and configuration. - Refactored tool classification logic to support event-driven execution, ensuring compatibility with new tool definitions. * chore: add request duration logging to OpenAI and Responses controllers - Introduced logging for request start and completion times in OpenAIChatCompletionController and createResponse functions. - Calculated and logged the duration of each request, enhancing observability and performance tracking. - Improved debugging capabilities by providing detailed logs for both streaming and non-streaming responses. * chore: update @librechat/agents dependency to version 3.1.27 * refactor: implement buildToolSet function for tool management - Introduced buildToolSet function to streamline the creation of tool sets from agent configurations, enhancing tool management across various controllers. - Updated AgentClient, OpenAIChatCompletionController, and createResponse functions to utilize buildToolSet, improving consistency in tool handling. - Added comprehensive tests for buildToolSet to ensure correct functionality and edge case handling, enhancing overall reliability. * refactor: update import paths for ToolExecuteOptions and createToolExecuteHandler * fix: update GoogleSearch.js description for maximum search results - Changed the default maximum number of search results from 10 to 5 in the Google Search JSON schema description, ensuring accurate documentation of the expected behavior. * chore: remove deprecated Browser tool and associated assets - Deleted the Browser tool definition from manifest.json, which included its name, plugin key, description, and authentication configuration. - Removed the web-browser.svg asset as it is no longer needed following the removal of the Browser tool. * fix: ensure tool definitions are valid before processing - Added a check to verify the existence of tool definitions in the registry before accessing their properties, preventing potential runtime errors. - Updated the loading logic for built-in tool definitions to ensure that only valid definitions are pushed to the built-in tool definitions array. * fix: extend ExtendedJsonSchema to support 'null' type and nullable enums - Updated the ExtendedJsonSchema type to include 'null' as a valid type option. - Modified the enum property to accept an array of values that can include strings, numbers, booleans, and null, enhancing schema flexibility. * test: add comprehensive tests for tool definitions loading and registry behavior - Implemented tests to verify the handling of built-in tools without registry definitions, ensuring they are skipped correctly. - Added tests to confirm that built-in tools include descriptions and parameters in the registry. - Enhanced tests for action tools, checking for proper inclusion of metadata and handling of tools without parameters in the registry. * test: add tests for mixed-type and number enum schema handling - Introduced tests to validate the parsing of mixed-type enum values, including strings, numbers, booleans, and null. - Added tests for number enum schema values to ensure correct parsing of numeric inputs, enhancing schema validation coverage. * fix: update mock implementation for @librechat/agents - Changed the mock for @librechat/agents to spread the actual module's properties, ensuring that all necessary functionalities are preserved in tests. - This adjustment enhances the accuracy of the tests by reflecting the real structure of the module. * fix: change max_results type in GoogleSearch schema from number to integer - Updated the type of max_results in the Google Search JSON schema to 'integer' for better type accuracy and validation consistency. * fix: update max_results description and type in GoogleSearch schema - Changed the type of max_results from 'number' to 'integer' for improved type accuracy. - Updated the description to reflect the new default maximum number of search results, changing it from 10 to 5. * refactor: remove unused code and improve tool registry handling - Eliminated outdated comments and conditional logic related to event-driven mode in the ToolService. - Enhanced the handling of the tool registry by ensuring it is configurable for better integration during tool execution. * feat: add definitionsOnly option to buildToolClassification for event-driven mode - Introduced a new parameter, definitionsOnly, to the BuildToolClassificationParams interface to enable a mode that skips tool instance creation. - Updated the buildToolClassification function to conditionally add tool definitions without instantiating tools when definitionsOnly is true. - Modified the loadToolDefinitions function to pass definitionsOnly as true, ensuring compatibility with the new feature. * test: add unit tests for buildToolClassification with definitionsOnly option - Implemented tests to verify the behavior of buildToolClassification when definitionsOnly is set to true or false. - Ensured that tool instances are not created when definitionsOnly is true, while still adding necessary tool definitions. - Confirmed that loadAuthValues is called appropriately based on the definitionsOnly parameter, enhancing test coverage for this new feature.
2026-02-01 08:50:57 -05:00
this.schema = fluxApiJsonSchema;
}
static get jsonSchema() {
return fluxApiJsonSchema;
}
getAxiosConfig() {
const config = {};
if (process.env.PROXY) {
config.httpsAgent = new HttpsProxyAgent(process.env.PROXY);
}
return config;
}
/** @param {Object|string} value */
getDetails(value) {
if (typeof value === 'string') {
return value;
}
return JSON.stringify(value, null, 2);
}
getApiKey() {
const apiKey = process.env.FLUX_API_KEY || '';
if (!apiKey && !this.override) {
throw new Error('Missing FLUX_API_KEY environment variable.');
}
return apiKey;
}
wrapInMarkdown(imageUrl) {
const serverDomain = process.env.DOMAIN_SERVER || 'http://localhost:3080';
return `![generated image](${serverDomain}${imageUrl})`;
}
returnValue(value) {
if (this.isAgent === true && typeof value === 'string') {
return [value, {}];
} else if (this.isAgent === true && typeof value === 'object') {
if (Array.isArray(value)) {
return value;
}
return [displayMessage, value];
}
return value;
}
async _call(data) {
const { action = 'generate', ...imageData } = data;
// Use provided API key for this request if available, otherwise use default
const requestApiKey = this.apiKey || this.getApiKey();
// Handle list_finetunes action
if (action === 'list_finetunes') {
return this.getMyFinetunes(requestApiKey);
}
// Handle finetuned generation
if (action === 'generate_finetuned') {
return this.generateFinetunedImage(imageData, requestApiKey);
}
// For generate action, ensure prompt is provided
if (!imageData.prompt) {
throw new Error('Missing required field: prompt');
}
let payload = {
prompt: imageData.prompt,
prompt_upsampling: imageData.prompt_upsampling || false,
safety_tolerance: imageData.safety_tolerance || 6,
output_format: imageData.output_format || 'png',
};
// Add optional parameters if provided
if (imageData.width) {
payload.width = imageData.width;
}
if (imageData.height) {
payload.height = imageData.height;
}
if (imageData.steps) {
payload.steps = imageData.steps;
}
if (imageData.seed !== undefined) {
payload.seed = imageData.seed;
}
if (imageData.raw) {
payload.raw = imageData.raw;
}
const generateUrl = `${this.baseUrl}${imageData.endpoint || '/v1/flux-pro'}`;
const resultUrl = `${this.baseUrl}/v1/get_result`;
logger.debug('[FluxAPI] Generating image with payload:', payload);
logger.debug('[FluxAPI] Using endpoint:', generateUrl);
let taskResponse;
try {
taskResponse = await axios.post(generateUrl, payload, {
headers: {
'x-key': requestApiKey,
'Content-Type': 'application/json',
Accept: 'application/json',
},
...this.getAxiosConfig(),
});
} catch (error) {
const details = this.getDetails(error?.response?.data || error.message);
logger.error('[FluxAPI] Error while submitting task:', details);
return this.returnValue(
`Something went wrong when trying to generate the image. The Flux API may be unavailable:
Error Message: ${details}`,
);
}
const taskId = taskResponse.data.id;
// Polling for the result
let status = 'Pending';
let resultData = null;
while (status !== 'Ready' && status !== 'Error') {
try {
// Wait 2 seconds between polls
await new Promise((resolve) => setTimeout(resolve, 2000));
const resultResponse = await axios.get(resultUrl, {
headers: {
'x-key': requestApiKey,
Accept: 'application/json',
},
params: { id: taskId },
...this.getAxiosConfig(),
});
status = resultResponse.data.status;
if (status === 'Ready') {
resultData = resultResponse.data.result;
break;
} else if (status === 'Error') {
logger.error('[FluxAPI] Error in task:', resultResponse.data);
return this.returnValue('An error occurred during image generation.');
}
} catch (error) {
const details = this.getDetails(error?.response?.data || error.message);
logger.error('[FluxAPI] Error while getting result:', details);
return this.returnValue('An error occurred while retrieving the image.');
}
}
// If no result data
if (!resultData || !resultData.sample) {
logger.error('[FluxAPI] No image data received from API. Response:', resultData);
return this.returnValue('No image data received from Flux API.');
}
// Try saving the image locally
const imageUrl = resultData.sample;
const imageName = `img-${uuidv4()}.png`;
if (this.isAgent) {
try {
// Fetch the image and convert to base64
const fetchOptions = {};
if (process.env.PROXY) {
fetchOptions.agent = new HttpsProxyAgent(process.env.PROXY);
}
const imageResponse = await fetch(imageUrl, fetchOptions);
const arrayBuffer = await imageResponse.arrayBuffer();
const base64 = Buffer.from(arrayBuffer).toString('base64');
const content = [
{
type: ContentTypes.IMAGE_URL,
image_url: {
url: `data:image/png;base64,${base64}`,
},
},
];
const response = [
{
type: ContentTypes.TEXT,
text: displayMessage,
},
];
return [response, { content }];
} catch (error) {
logger.error('Error processing image for agent:', error);
return this.returnValue(`Failed to process the image. ${error.message}`);
}
}
try {
logger.debug('[FluxAPI] Saving image:', imageUrl);
const result = await this.processFileURL({
fileStrategy: this.fileStrategy,
userId: this.userId,
URL: imageUrl,
fileName: imageName,
basePath: 'images',
context: FileContext.image_generation,
});
logger.debug('[FluxAPI] Image saved to path:', result.filepath);
// Calculate cost based on endpoint
/**
* TODO: Cost handling
const endpoint = imageData.endpoint || '/v1/flux-pro';
const endpointKey = Object.entries(FluxAPI.PRICING).find(([key, _]) =>
endpoint.includes(key.toLowerCase().replace(/_/g, '-')),
)?.[0];
const cost = FluxAPI.PRICING[endpointKey] || 0;
*/
this.result = this.returnMetadata ? result : this.wrapInMarkdown(result.filepath);
return this.returnValue(this.result);
} catch (error) {
const details = this.getDetails(error?.message ?? 'No additional error details.');
logger.error('Error while saving the image:', details);
return this.returnValue(`Failed to save the image locally. ${details}`);
}
}
async getMyFinetunes(apiKey = null) {
const finetunesUrl = `${this.baseUrl}/v1/my_finetunes`;
const detailsUrl = `${this.baseUrl}/v1/finetune_details`;
try {
const headers = {
'x-key': apiKey || this.getApiKey(),
'Content-Type': 'application/json',
Accept: 'application/json',
};
// Get list of finetunes
const response = await axios.get(finetunesUrl, {
headers,
...this.getAxiosConfig(),
});
const finetunes = response.data.finetunes;
// Fetch details for each finetune
const finetuneDetails = await Promise.all(
finetunes.map(async (finetuneId) => {
try {
const detailResponse = await axios.get(`${detailsUrl}?finetune_id=${finetuneId}`, {
headers,
...this.getAxiosConfig(),
});
return {
id: finetuneId,
...detailResponse.data,
};
} catch (error) {
logger.error(`[FluxAPI] Error fetching details for finetune ${finetuneId}:`, error);
return {
id: finetuneId,
error: 'Failed to fetch details',
};
}
}),
);
if (this.isAgent) {
const formattedDetails = JSON.stringify(finetuneDetails, null, 2);
return [`Here are the available finetunes:\n${formattedDetails}`, null];
}
return JSON.stringify(finetuneDetails);
} catch (error) {
const details = this.getDetails(error?.response?.data || error.message);
logger.error('[FluxAPI] Error while getting finetunes:', details);
const errorMsg = `Failed to get finetunes: ${details}`;
return this.isAgent ? this.returnValue([errorMsg, {}]) : new Error(errorMsg);
}
}
async generateFinetunedImage(imageData, requestApiKey) {
if (!imageData.prompt) {
throw new Error('Missing required field: prompt');
}
if (!imageData.finetune_id) {
throw new Error(
'Missing required field: finetune_id for finetuned generation. Please supply a finetune_id!',
);
}
// Validate endpoint is appropriate for finetuned generation
const validFinetunedEndpoints = ['/v1/flux-pro-finetuned', '/v1/flux-pro-1.1-ultra-finetuned'];
const endpoint = imageData.endpoint || '/v1/flux-pro-finetuned';
if (!validFinetunedEndpoints.includes(endpoint)) {
throw new Error(
`Invalid endpoint for finetuned generation. Must be one of: ${validFinetunedEndpoints.join(', ')}`,
);
}
let payload = {
prompt: imageData.prompt,
prompt_upsampling: imageData.prompt_upsampling || false,
safety_tolerance: imageData.safety_tolerance || 6,
output_format: imageData.output_format || 'png',
finetune_id: imageData.finetune_id,
finetune_strength: imageData.finetune_strength || 1.0,
guidance: imageData.guidance || 2.5,
};
// Add optional parameters if provided
if (imageData.width) {
payload.width = imageData.width;
}
if (imageData.height) {
payload.height = imageData.height;
}
if (imageData.steps) {
payload.steps = imageData.steps;
}
if (imageData.seed !== undefined) {
payload.seed = imageData.seed;
}
if (imageData.raw) {
payload.raw = imageData.raw;
}
const generateUrl = `${this.baseUrl}${endpoint}`;
const resultUrl = `${this.baseUrl}/v1/get_result`;
logger.debug('[FluxAPI] Generating finetuned image with payload:', payload);
logger.debug('[FluxAPI] Using endpoint:', generateUrl);
let taskResponse;
try {
taskResponse = await axios.post(generateUrl, payload, {
headers: {
'x-key': requestApiKey,
'Content-Type': 'application/json',
Accept: 'application/json',
},
...this.getAxiosConfig(),
});
} catch (error) {
const details = this.getDetails(error?.response?.data || error.message);
logger.error('[FluxAPI] Error while submitting finetuned task:', details);
return this.returnValue(
`Something went wrong when trying to generate the finetuned image. The Flux API may be unavailable:
Error Message: ${details}`,
);
}
const taskId = taskResponse.data.id;
// Polling for the result
let status = 'Pending';
let resultData = null;
while (status !== 'Ready' && status !== 'Error') {
try {
// Wait 2 seconds between polls
await new Promise((resolve) => setTimeout(resolve, 2000));
const resultResponse = await axios.get(resultUrl, {
headers: {
'x-key': requestApiKey,
Accept: 'application/json',
},
params: { id: taskId },
...this.getAxiosConfig(),
});
status = resultResponse.data.status;
if (status === 'Ready') {
resultData = resultResponse.data.result;
break;
} else if (status === 'Error') {
logger.error('[FluxAPI] Error in finetuned task:', resultResponse.data);
return this.returnValue('An error occurred during finetuned image generation.');
}
} catch (error) {
const details = this.getDetails(error?.response?.data || error.message);
logger.error('[FluxAPI] Error while getting finetuned result:', details);
return this.returnValue('An error occurred while retrieving the finetuned image.');
}
}
// If no result data
if (!resultData || !resultData.sample) {
logger.error('[FluxAPI] No image data received from API. Response:', resultData);
return this.returnValue('No image data received from Flux API.');
}
// Try saving the image locally
const imageUrl = resultData.sample;
const imageName = `img-${uuidv4()}.png`;
try {
logger.debug('[FluxAPI] Saving finetuned image:', imageUrl);
const result = await this.processFileURL({
fileStrategy: this.fileStrategy,
userId: this.userId,
URL: imageUrl,
fileName: imageName,
basePath: 'images',
context: FileContext.image_generation,
});
logger.debug('[FluxAPI] Finetuned image saved to path:', result.filepath);
// Calculate cost based on endpoint
const endpointKey = endpoint.includes('ultra')
? 'FLUX_PRO_1_1_ULTRA_FINETUNED'
: 'FLUX_PRO_FINETUNED';
const cost = FluxAPI.PRICING[endpointKey] || 0;
// Return the result based on returnMetadata flag
this.result = this.returnMetadata ? result : this.wrapInMarkdown(result.filepath);
return this.returnValue(this.result);
} catch (error) {
const details = this.getDetails(error?.message ?? 'No additional error details.');
logger.error('Error while saving the finetuned image:', details);
return this.returnValue(`Failed to save the finetuned image locally. ${details}`);
}
}
}
module.exports = FluxAPI;