mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 08:20:14 +01:00
* refactor: move endpoint initialization methods to typescript * refactor: move agent init to packages/api - Introduced `initialize.ts` for agent initialization, including file processing and tool loading. - Updated `resources.ts` to allow optional appConfig parameter. - Enhanced endpoint configuration handling in various initialization files to support model parameters. - Added new artifacts and prompts for React component generation. - Refactored existing code to improve type safety and maintainability. * refactor: streamline endpoint initialization and enhance type safety - Updated initialization functions across various endpoints to use a consistent request structure, replacing `unknown` types with `ServerResponse`. - Simplified request handling by directly extracting keys from the request body. - Improved type safety by ensuring user IDs are safely accessed with optional chaining. - Removed unnecessary parameters and streamlined model options handling for better clarity and maintainability. * refactor: moved ModelService and extractBaseURL to packages/api - Added comprehensive tests for the models fetching functionality, covering scenarios for OpenAI, Anthropic, Google, and Ollama models. - Updated existing endpoint index to include the new models module. - Enhanced utility functions for URL extraction and model data processing. - Improved type safety and error handling across the models fetching logic. * refactor: consolidate utility functions and remove unused files - Merged `deriveBaseURL` and `extractBaseURL` into the `@librechat/api` module for better organization. - Removed redundant utility files and their associated tests to streamline the codebase. - Updated imports across various client files to utilize the new consolidated functions. - Enhanced overall maintainability by reducing the number of utility modules. * refactor: replace ModelService references with direct imports from @librechat/api and remove ModelService file * refactor: move encrypt/decrypt methods and key db methods to data-schemas, use `getProviderConfig` from `@librechat/api` * chore: remove unused 'res' from options in AgentClient * refactor: file model imports and methods - Updated imports in various controllers and services to use the unified file model from '~/models' instead of '~/models/File'. - Consolidated file-related methods into a new file methods module in the data-schemas package. - Added comprehensive tests for file methods including creation, retrieval, updating, and deletion. - Enhanced the initializeAgent function to accept dependency injection for file-related methods. - Improved error handling and logging in file methods. * refactor: streamline database method references in agent initialization * refactor: enhance file method tests and update type references to IMongoFile * refactor: consolidate database method imports in agent client and initialization * chore: remove redundant import of initializeAgent from @librechat/api * refactor: move checkUserKeyExpiry utility to @librechat/api and update references across endpoints * refactor: move updateUserPlugins logic to user.ts and simplify UserController * refactor: update imports for user key management and remove UserService * refactor: remove unused Anthropics and Bedrock endpoint files and clean up imports * refactor: consolidate and update encryption imports across various files to use @librechat/data-schemas * chore: update file model mock to use unified import from '~/models' * chore: import order * refactor: remove migrated to TS agent.js file and its associated logic from the endpoints * chore: add reusable function to extract imports from source code in unused-packages workflow * chore: enhance unused-packages workflow to include @librechat/api dependencies and improve dependency extraction * chore: improve dependency extraction in unused-packages workflow with enhanced error handling and debugging output * chore: add detailed debugging output to unused-packages workflow for better visibility into unused dependencies and exclusion lists * chore: refine subpath handling in unused-packages workflow to correctly process scoped and non-scoped package imports * chore: clean up unused debug output in unused-packages workflow and reorganize type imports in initialize.ts
230 lines
9.5 KiB
JavaScript
230 lines
9.5 KiB
JavaScript
const { z } = require('zod');
|
|
const path = require('path');
|
|
const OpenAI = require('openai');
|
|
const { v4: uuidv4 } = require('uuid');
|
|
const { ProxyAgent, fetch } = require('undici');
|
|
const { Tool } = require('@langchain/core/tools');
|
|
const { logger } = require('@librechat/data-schemas');
|
|
const { getImageBasename, extractBaseURL } = require('@librechat/api');
|
|
const { FileContext, ContentTypes } = require('librechat-data-provider');
|
|
|
|
const displayMessage =
|
|
"DALL-E displayed an image. All generated images are already plainly visible, so don't repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.";
|
|
class DALLE3 extends Tool {
|
|
constructor(fields = {}) {
|
|
super();
|
|
/** @type {boolean} Used to initialize the Tool without necessary variables. */
|
|
this.override = fields.override ?? false;
|
|
/** @type {boolean} Necessary for output to contain all image metadata. */
|
|
this.returnMetadata = fields.returnMetadata ?? false;
|
|
|
|
this.userId = fields.userId;
|
|
this.fileStrategy = fields.fileStrategy;
|
|
/** @type {boolean} */
|
|
this.isAgent = fields.isAgent;
|
|
if (fields.processFileURL) {
|
|
/** @type {processFileURL} Necessary for output to contain all image metadata. */
|
|
this.processFileURL = fields.processFileURL.bind(this);
|
|
}
|
|
|
|
let apiKey = fields.DALLE3_API_KEY ?? fields.DALLE_API_KEY ?? this.getApiKey();
|
|
const config = { apiKey };
|
|
if (process.env.DALLE_REVERSE_PROXY) {
|
|
config.baseURL = extractBaseURL(process.env.DALLE_REVERSE_PROXY);
|
|
}
|
|
|
|
if (process.env.DALLE3_AZURE_API_VERSION && process.env.DALLE3_BASEURL) {
|
|
config.baseURL = process.env.DALLE3_BASEURL;
|
|
config.defaultQuery = { 'api-version': process.env.DALLE3_AZURE_API_VERSION };
|
|
config.defaultHeaders = {
|
|
'api-key': process.env.DALLE3_API_KEY,
|
|
'Content-Type': 'application/json',
|
|
};
|
|
config.apiKey = process.env.DALLE3_API_KEY;
|
|
}
|
|
|
|
if (process.env.PROXY) {
|
|
const proxyAgent = new ProxyAgent(process.env.PROXY);
|
|
config.fetchOptions = {
|
|
dispatcher: proxyAgent,
|
|
};
|
|
}
|
|
|
|
/** @type {OpenAI} */
|
|
this.openai = new OpenAI(config);
|
|
this.name = 'dalle';
|
|
this.description = `Use DALLE to create images from text descriptions.
|
|
- It requires prompts to be in English, detailed, and to specify image type and human features for diversity.
|
|
- Create only one image, without repeating or listing descriptions outside the "prompts" field.
|
|
- Maintains the original intent of the description, with parameters for image style, quality, and size to tailor the output.`;
|
|
this.description_for_model =
|
|
process.env.DALLE3_SYSTEM_PROMPT ??
|
|
`// Whenever a description of an image is given, generate prompts (following these rules), and use dalle to create the image. If the user does not ask for a specific number of images, default to creating 2 prompts to send to dalle that are written to be as diverse as possible. All prompts sent to dalle must abide by the following policies:
|
|
// 1. Prompts must be in English. Translate to English if needed.
|
|
// 2. One image per function call. Create only 1 image per request unless explicitly told to generate more than 1 image.
|
|
// 3. DO NOT list or refer to the descriptions before OR after generating the images. They should ONLY ever be written out ONCE, in the \`"prompts"\` field of the request. You do not need to ask for permission to generate, just do it!
|
|
// 4. Always mention the image type (photo, oil painting, watercolor painting, illustration, cartoon, drawing, vector, render, etc.) at the beginning of the caption. Unless the captions suggests otherwise, make one of the images a photo.
|
|
// 5. Diversify depictions of ALL images with people to always include always DESCENT and GENDER for EACH person using direct terms. Adjust only human descriptions.
|
|
// - EXPLICITLY specify these attributes, not abstractly reference them. The attributes should be specified in a minimal way and should directly describe their physical form.
|
|
// - Your choices should be grounded in reality. For example, all of a given OCCUPATION should not be the same gender or race. Additionally, focus on creating diverse, inclusive, and exploratory scenes via the properties you choose during rewrites. Make choices that may be insightful or unique sometimes.
|
|
// - Use "various" or "diverse" ONLY IF the description refers to groups of more than 3 people. Do not change the number of people requested in the original description.
|
|
// - Don't alter memes, fictional character origins, or unseen people. Maintain the original prompt's intent and prioritize quality.
|
|
// The prompt must intricately describe every part of the image in concrete, objective detail. THINK about what the end goal of the description is, and extrapolate that to what would make satisfying images.
|
|
// All descriptions sent to dalle should be a paragraph of text that is extremely descriptive and detailed. Each should be more than 3 sentences long.
|
|
// - The "vivid" style is HIGHLY preferred, but "natural" is also supported.`;
|
|
this.schema = z.object({
|
|
prompt: z
|
|
.string()
|
|
.max(4000)
|
|
.describe(
|
|
'A text description of the desired image, following the rules, up to 4000 characters.',
|
|
),
|
|
style: z
|
|
.enum(['vivid', 'natural'])
|
|
.describe(
|
|
'Must be one of `vivid` or `natural`. `vivid` generates hyper-real and dramatic images, `natural` produces more natural, less hyper-real looking images',
|
|
),
|
|
quality: z
|
|
.enum(['hd', 'standard'])
|
|
.describe('The quality of the generated image. Only `hd` and `standard` are supported.'),
|
|
size: z
|
|
.enum(['1024x1024', '1792x1024', '1024x1792'])
|
|
.describe(
|
|
'The size of the requested image. Use 1024x1024 (square) as the default, 1792x1024 if the user requests a wide image, and 1024x1792 for full-body portraits. Always include this parameter in the request.',
|
|
),
|
|
});
|
|
}
|
|
|
|
getApiKey() {
|
|
const apiKey = process.env.DALLE3_API_KEY ?? process.env.DALLE_API_KEY ?? '';
|
|
if (!apiKey && !this.override) {
|
|
throw new Error('Missing DALLE_API_KEY environment variable.');
|
|
}
|
|
return apiKey;
|
|
}
|
|
|
|
replaceUnwantedChars(inputString) {
|
|
return inputString
|
|
.replace(/\r\n|\r|\n/g, ' ')
|
|
.replace(/"/g, '')
|
|
.trim();
|
|
}
|
|
|
|
wrapInMarkdown(imageUrl) {
|
|
return ``;
|
|
}
|
|
|
|
returnValue(value) {
|
|
if (this.isAgent === true && typeof value === 'string') {
|
|
return [value, {}];
|
|
} else if (this.isAgent === true && typeof value === 'object') {
|
|
return [displayMessage, value];
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
async _call(data) {
|
|
const { prompt, quality = 'standard', size = '1024x1024', style = 'vivid' } = data;
|
|
if (!prompt) {
|
|
throw new Error('Missing required field: prompt');
|
|
}
|
|
|
|
let resp;
|
|
try {
|
|
resp = await this.openai.images.generate({
|
|
model: 'dall-e-3',
|
|
quality,
|
|
style,
|
|
size,
|
|
prompt: this.replaceUnwantedChars(prompt),
|
|
n: 1,
|
|
});
|
|
} catch (error) {
|
|
logger.error('[DALL-E-3] Problem generating the image:', error);
|
|
return this
|
|
.returnValue(`Something went wrong when trying to generate the image. The DALL-E API may be unavailable:
|
|
Error Message: ${error.message}`);
|
|
}
|
|
|
|
if (!resp) {
|
|
return this.returnValue(
|
|
'Something went wrong when trying to generate the image. The DALL-E API may be unavailable',
|
|
);
|
|
}
|
|
|
|
const theImageUrl = resp.data[0].url;
|
|
|
|
if (!theImageUrl) {
|
|
return this.returnValue(
|
|
'No image URL returned from OpenAI API. There may be a problem with the API or your configuration.',
|
|
);
|
|
}
|
|
|
|
if (this.isAgent) {
|
|
let fetchOptions = {};
|
|
if (process.env.PROXY) {
|
|
const proxyAgent = new ProxyAgent(process.env.PROXY);
|
|
fetchOptions.dispatcher = proxyAgent;
|
|
}
|
|
const imageResponse = await fetch(theImageUrl, fetchOptions);
|
|
const arrayBuffer = await imageResponse.arrayBuffer();
|
|
const base64 = Buffer.from(arrayBuffer).toString('base64');
|
|
const content = [
|
|
{
|
|
type: ContentTypes.IMAGE_URL,
|
|
image_url: {
|
|
url: `data:image/png;base64,${base64}`,
|
|
},
|
|
},
|
|
];
|
|
|
|
const response = [
|
|
{
|
|
type: ContentTypes.TEXT,
|
|
text: displayMessage,
|
|
},
|
|
];
|
|
return [response, { content }];
|
|
}
|
|
|
|
const imageBasename = getImageBasename(theImageUrl);
|
|
const imageExt = path.extname(imageBasename);
|
|
|
|
const extension = imageExt.startsWith('.') ? imageExt.slice(1) : imageExt;
|
|
const imageName = `img-${uuidv4()}.${extension}`;
|
|
|
|
logger.debug('[DALL-E-3]', {
|
|
imageName,
|
|
imageBasename,
|
|
imageExt,
|
|
extension,
|
|
theImageUrl,
|
|
data: resp.data[0],
|
|
});
|
|
|
|
try {
|
|
const result = await this.processFileURL({
|
|
URL: theImageUrl,
|
|
basePath: 'images',
|
|
userId: this.userId,
|
|
fileName: imageName,
|
|
fileStrategy: this.fileStrategy,
|
|
context: FileContext.image_generation,
|
|
});
|
|
|
|
if (this.returnMetadata) {
|
|
this.result = result;
|
|
} else {
|
|
this.result = this.wrapInMarkdown(result.filepath);
|
|
}
|
|
} catch (error) {
|
|
logger.error('Error while saving the image:', error);
|
|
this.result = `Failed to save the image locally. ${error.message}`;
|
|
}
|
|
|
|
return this.returnValue(this.result);
|
|
}
|
|
}
|
|
|
|
module.exports = DALLE3;
|