LibreChat/api/app/clients/tools/structured/DALLE3.js
Danny Avila 9a210971f5
🛜 refactor: Streamline App Config Usage (#9234)
* WIP: app.locals refactoring

WIP: appConfig

fix: update memory configuration retrieval to use getAppConfig based on user role

fix: update comment for AppConfig interface to clarify purpose

🏷️ refactor: Update tests to use getAppConfig for endpoint configurations

ci: Update AppService tests to initialize app config instead of app.locals

ci: Integrate getAppConfig into remaining tests

refactor: Update multer storage destination to use promise-based getAppConfig and improve error handling in tests

refactor: Rename initializeAppConfig to setAppConfig and update related tests

ci: Mock getAppConfig in various tests to provide default configurations

refactor: Update convertMCPToolsToPlugins to use mcpManager for server configuration and adjust related tests

chore: rename `Config/getAppConfig` -> `Config/app`

fix: streamline OpenAI image tools configuration by removing direct appConfig dependency and using function parameters

chore: correct parameter documentation for imageOutputType in ToolService.js

refactor: remove `getCustomConfig` dependency in config route

refactor: update domain validation to use appConfig for allowed domains

refactor: use appConfig registration property

chore: remove app parameter from AppService invocation

refactor: update AppConfig interface to correct registration and turnstile configurations

refactor: remove getCustomConfig dependency and use getAppConfig in PluginController, multer, and MCP services

refactor: replace getCustomConfig with getAppConfig in STTService, TTSService, and related files

refactor: replace getCustomConfig with getAppConfig in Conversation and Message models, update tempChatRetention functions to use AppConfig type

refactor: update getAppConfig calls in Conversation and Message models to include user role for temporary chat expiration

ci: update related tests

refactor: update getAppConfig call in getCustomConfigSpeech to include user role

fix: update appConfig usage to access allowedDomains from actions instead of registration

refactor: enhance AppConfig to include fileStrategies and update related file strategy logic

refactor: update imports to use normalizeEndpointName from @librechat/api and remove redundant definitions

chore: remove deprecated unused RunManager

refactor: get balance config primarily from appConfig

refactor: remove customConfig dependency for appConfig and streamline loadConfigModels logic

refactor: remove getCustomConfig usage and use app config in file citations

refactor: consolidate endpoint loading logic into loadEndpoints function

refactor: update appConfig access to use endpoints structure across various services

refactor: implement custom endpoints configuration and streamline endpoint loading logic

refactor: update getAppConfig call to include user role parameter

refactor: streamline endpoint configuration and enhance appConfig usage across services

refactor: replace getMCPAuthMap with getUserMCPAuthMap and remove unused getCustomConfig file

refactor: add type annotation for loadedEndpoints in loadEndpoints function

refactor: move /services/Files/images/parse to TS API

chore: add missing FILE_CITATIONS permission to IRole interface

refactor: restructure toolkits to TS API

refactor: separate manifest logic into its own module

refactor: consolidate tool loading logic into a new tools module for startup logic

refactor: move interface config logic to TS API

refactor: migrate checkEmailConfig to TypeScript and update imports

refactor: add FunctionTool interface and availableTools to AppConfig

refactor: decouple caching and DB operations from AppService, make part of consolidated `getAppConfig`

WIP: fix tests

* fix: rebase conflicts

* refactor: remove app.locals references

* refactor: replace getBalanceConfig with getAppConfig in various strategies and middleware

* refactor: replace appConfig?.balance with getBalanceConfig in various controllers and clients

* test: add balance configuration to titleConvo method in AgentClient tests

* chore: remove unused `openai-chat-tokens` package

* chore: remove unused imports in initializeMCPs.js

* refactor: update balance configuration to use getAppConfig instead of getBalanceConfig

* refactor: integrate configMiddleware for centralized configuration handling

* refactor: optimize email domain validation by removing unnecessary async calls

* refactor: simplify multer storage configuration by removing async calls

* refactor: reorder imports for better readability in user.js

* refactor: replace getAppConfig calls with req.config for improved performance

* chore: replace getAppConfig calls with req.config in tests for centralized configuration handling

* chore: remove unused override config

* refactor: add configMiddleware to endpoint route and replace getAppConfig with req.config

* chore: remove customConfig parameter from TTSService constructor

* refactor: pass appConfig from request to processFileCitations for improved configuration handling

* refactor: remove configMiddleware from endpoint route and retrieve appConfig directly in getEndpointsConfig if not in `req.config`

* test: add mockAppConfig to processFileCitations tests for improved configuration handling

* fix: pass req.config to hasCustomUserVars and call without await after synchronous refactor

* fix: type safety in useExportConversation

* refactor: retrieve appConfig using getAppConfig in PluginController and remove configMiddleware from plugins route, to avoid always retrieving when plugins are cached

* chore: change `MongoUser` typedef to `IUser`

* fix: Add `user` and `config` fields to ServerRequest and update JSDoc type annotations from Express.Request to ServerRequest

* fix: remove unused setAppConfig mock from Server configuration tests
2025-08-26 12:10:18 -04:00

232 lines
9.6 KiB
JavaScript

const { z } = require('zod');
const path = require('path');
const OpenAI = require('openai');
const fetch = require('node-fetch');
const { v4: uuidv4 } = require('uuid');
const { ProxyAgent } = require('undici');
const { Tool } = require('@langchain/core/tools');
const { logger } = require('@librechat/data-schemas');
const { getImageBasename } = require('@librechat/api');
const { FileContext, ContentTypes } = require('librechat-data-provider');
const extractBaseURL = require('~/utils/extractBaseURL');
const displayMessage =
"DALL-E displayed an image. All generated images are already plainly visible, so don't repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.";
class DALLE3 extends Tool {
constructor(fields = {}) {
super();
/** @type {boolean} Used to initialize the Tool without necessary variables. */
this.override = fields.override ?? false;
/** @type {boolean} Necessary for output to contain all image metadata. */
this.returnMetadata = fields.returnMetadata ?? false;
this.userId = fields.userId;
this.fileStrategy = fields.fileStrategy;
/** @type {boolean} */
this.isAgent = fields.isAgent;
if (fields.processFileURL) {
/** @type {processFileURL} Necessary for output to contain all image metadata. */
this.processFileURL = fields.processFileURL.bind(this);
}
let apiKey = fields.DALLE3_API_KEY ?? fields.DALLE_API_KEY ?? this.getApiKey();
const config = { apiKey };
if (process.env.DALLE_REVERSE_PROXY) {
config.baseURL = extractBaseURL(process.env.DALLE_REVERSE_PROXY);
}
if (process.env.DALLE3_AZURE_API_VERSION && process.env.DALLE3_BASEURL) {
config.baseURL = process.env.DALLE3_BASEURL;
config.defaultQuery = { 'api-version': process.env.DALLE3_AZURE_API_VERSION };
config.defaultHeaders = {
'api-key': process.env.DALLE3_API_KEY,
'Content-Type': 'application/json',
};
config.apiKey = process.env.DALLE3_API_KEY;
}
if (process.env.PROXY) {
const proxyAgent = new ProxyAgent(process.env.PROXY);
config.fetchOptions = {
dispatcher: proxyAgent,
};
}
/** @type {OpenAI} */
this.openai = new OpenAI(config);
this.name = 'dalle';
this.description = `Use DALLE to create images from text descriptions.
- It requires prompts to be in English, detailed, and to specify image type and human features for diversity.
- Create only one image, without repeating or listing descriptions outside the "prompts" field.
- Maintains the original intent of the description, with parameters for image style, quality, and size to tailor the output.`;
this.description_for_model =
process.env.DALLE3_SYSTEM_PROMPT ??
`// Whenever a description of an image is given, generate prompts (following these rules), and use dalle to create the image. If the user does not ask for a specific number of images, default to creating 2 prompts to send to dalle that are written to be as diverse as possible. All prompts sent to dalle must abide by the following policies:
// 1. Prompts must be in English. Translate to English if needed.
// 2. One image per function call. Create only 1 image per request unless explicitly told to generate more than 1 image.
// 3. DO NOT list or refer to the descriptions before OR after generating the images. They should ONLY ever be written out ONCE, in the \`"prompts"\` field of the request. You do not need to ask for permission to generate, just do it!
// 4. Always mention the image type (photo, oil painting, watercolor painting, illustration, cartoon, drawing, vector, render, etc.) at the beginning of the caption. Unless the captions suggests otherwise, make one of the images a photo.
// 5. Diversify depictions of ALL images with people to always include always DESCENT and GENDER for EACH person using direct terms. Adjust only human descriptions.
// - EXPLICITLY specify these attributes, not abstractly reference them. The attributes should be specified in a minimal way and should directly describe their physical form.
// - Your choices should be grounded in reality. For example, all of a given OCCUPATION should not be the same gender or race. Additionally, focus on creating diverse, inclusive, and exploratory scenes via the properties you choose during rewrites. Make choices that may be insightful or unique sometimes.
// - Use "various" or "diverse" ONLY IF the description refers to groups of more than 3 people. Do not change the number of people requested in the original description.
// - Don't alter memes, fictional character origins, or unseen people. Maintain the original prompt's intent and prioritize quality.
// The prompt must intricately describe every part of the image in concrete, objective detail. THINK about what the end goal of the description is, and extrapolate that to what would make satisfying images.
// All descriptions sent to dalle should be a paragraph of text that is extremely descriptive and detailed. Each should be more than 3 sentences long.
// - The "vivid" style is HIGHLY preferred, but "natural" is also supported.`;
this.schema = z.object({
prompt: z
.string()
.max(4000)
.describe(
'A text description of the desired image, following the rules, up to 4000 characters.',
),
style: z
.enum(['vivid', 'natural'])
.describe(
'Must be one of `vivid` or `natural`. `vivid` generates hyper-real and dramatic images, `natural` produces more natural, less hyper-real looking images',
),
quality: z
.enum(['hd', 'standard'])
.describe('The quality of the generated image. Only `hd` and `standard` are supported.'),
size: z
.enum(['1024x1024', '1792x1024', '1024x1792'])
.describe(
'The size of the requested image. Use 1024x1024 (square) as the default, 1792x1024 if the user requests a wide image, and 1024x1792 for full-body portraits. Always include this parameter in the request.',
),
});
}
getApiKey() {
const apiKey = process.env.DALLE3_API_KEY ?? process.env.DALLE_API_KEY ?? '';
if (!apiKey && !this.override) {
throw new Error('Missing DALLE_API_KEY environment variable.');
}
return apiKey;
}
replaceUnwantedChars(inputString) {
return inputString
.replace(/\r\n|\r|\n/g, ' ')
.replace(/"/g, '')
.trim();
}
wrapInMarkdown(imageUrl) {
return `![generated image](${imageUrl})`;
}
returnValue(value) {
if (this.isAgent === true && typeof value === 'string') {
return [value, {}];
} else if (this.isAgent === true && typeof value === 'object') {
return [displayMessage, value];
}
return value;
}
async _call(data) {
const { prompt, quality = 'standard', size = '1024x1024', style = 'vivid' } = data;
if (!prompt) {
throw new Error('Missing required field: prompt');
}
let resp;
try {
resp = await this.openai.images.generate({
model: 'dall-e-3',
quality,
style,
size,
prompt: this.replaceUnwantedChars(prompt),
n: 1,
});
} catch (error) {
logger.error('[DALL-E-3] Problem generating the image:', error);
return this
.returnValue(`Something went wrong when trying to generate the image. The DALL-E API may be unavailable:
Error Message: ${error.message}`);
}
if (!resp) {
return this.returnValue(
'Something went wrong when trying to generate the image. The DALL-E API may be unavailable',
);
}
const theImageUrl = resp.data[0].url;
if (!theImageUrl) {
return this.returnValue(
'No image URL returned from OpenAI API. There may be a problem with the API or your configuration.',
);
}
if (this.isAgent) {
let fetchOptions = {};
if (process.env.PROXY) {
const proxyAgent = new ProxyAgent(process.env.PROXY);
fetchOptions.dispatcher = proxyAgent;
}
const imageResponse = await fetch(theImageUrl, fetchOptions);
const arrayBuffer = await imageResponse.arrayBuffer();
const base64 = Buffer.from(arrayBuffer).toString('base64');
const content = [
{
type: ContentTypes.IMAGE_URL,
image_url: {
url: `data:image/png;base64,${base64}`,
},
},
];
const response = [
{
type: ContentTypes.TEXT,
text: displayMessage,
},
];
return [response, { content }];
}
const imageBasename = getImageBasename(theImageUrl);
const imageExt = path.extname(imageBasename);
const extension = imageExt.startsWith('.') ? imageExt.slice(1) : imageExt;
const imageName = `img-${uuidv4()}.${extension}`;
logger.debug('[DALL-E-3]', {
imageName,
imageBasename,
imageExt,
extension,
theImageUrl,
data: resp.data[0],
});
try {
const result = await this.processFileURL({
URL: theImageUrl,
basePath: 'images',
userId: this.userId,
fileName: imageName,
fileStrategy: this.fileStrategy,
context: FileContext.image_generation,
});
if (this.returnMetadata) {
this.result = result;
} else {
this.result = this.wrapInMarkdown(result.filepath);
}
} catch (error) {
logger.error('Error while saving the image:', error);
this.result = `Failed to save the image locally. ${error.message}`;
}
return this.returnValue(this.result);
}
}
module.exports = DALLE3;