mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-22 06:00:56 +02:00

* wip: OpenAI Image Generation Tool with customizable options * WIP: First pass OpenAI Image Generation Tool and integrate into existing tools * 🔀 fix: Comment out unused validation for image generation tool parameters * 🔀 refactor: Update primeResources function parameters for better destructuring * feat: Add image_edit resource to EToolResources and update AgentToolResources interface * feat: Enhance file retrieval with tool resource filtering for image editing * refactor: add OpenAI Image Tools for generation and editing, refactor related components, pass current request image attachments as tool resources for editing * refactor: Remove commented-out code and clean up API key retrieval in createOpenAIImageTools function * fix: show message attachments in shared links * fix: Correct parent message retrieval logic for regenerated messages in useChatFunctions * fix: Update primeResources to utilize requestFileSet for image file processing * refactor: Improve description for image generation tool and clarify usage conditions, only provide edit tool if there are images available to edit * chore: Update OpenAI Image Tools icon to use local asset * refactor: Update image generation tool description and logic to prioritize editing tool when files are uploaded * refactor: Enhance image tool descriptions to clarify usage conditions and note potential unavailability of uploaded images * refactor: Update useAttachmentHandler to accept queryClient to update query cache with newly created file * refactor: Add customizable descriptions and prompts for OpenAI image generation and editing tools * chore: Update comments to use JSDoc style for better clarity and consistency * refactor: Rename config variable to clientConfig for clarity and update signal handling in image generation * refactor: Update axios request configuration to include derived signal and baseURL for improved request handling * refactor: Update baseURL environment variable for OpenAI image generation tool configuration * refactor: Enhance axios request configuration with conditional headers and improved clientConfig setup * chore: Update comments for clarity and remove unnecessary lines in OpenAI image tools * refactor: Update description for image generation without files to clarify user instructions * refactor: Simplify target parent message logic for regeneration and resubmission cases * chore: Remove backticks from error messages in image generation and editing functions * refactor: Rename toolResources to toolResourceSet for clarity in file retrieval functions * chore: Remove redundant comments and clean up TODOs in OpenAI image tools * refactor: Rename fileStrategy to appFileStrategy for clarity and improve error handling in image processing * chore: Update react-resizable-panels to version 2.1.8 in package.json and package-lock.json * chore: Ensure required validation for logs and Code of Conduct agreement in bug report template * fix: Update ArtifactPreview to use startupConfig and currentCode from memoized props to prevent unnecessary re-renders * fix: improve robustness of `save & submit` when used from a user-message with existing attachments * fix: add null check for artifact index in CodeEditor to prevent errors, trigger re-render on artifact ID change * fix: standardize default values for artifact properties in Artifact component, avoiding prematurely setting an "empty/default" artifact * fix: reset current artifact ID before setting a new one in ArtifactButton to ensure correct state management * chore: rename `setArtifactId` variable to `setCurrentArtifactId` for consistency * chore: update type annotations in File and S3 CRUD functions for consistency * refactor: improve image handling in OpenAI tools by using image_id references and enhance tool context for image editing * fix: update image_ids schema in image_edit_oai to enforce presence and provide clear guidelines for usage * fix: enhance file fetching logic to ensure user-specific and dimension-validated results * chore: add details on image generation and editing capabilities with various models
518 lines
18 KiB
JavaScript
518 lines
18 KiB
JavaScript
const { z } = require('zod');
|
|
const axios = require('axios');
|
|
const { v4 } = require('uuid');
|
|
const OpenAI = require('openai');
|
|
const FormData = require('form-data');
|
|
const { tool } = require('@langchain/core/tools');
|
|
const { HttpsProxyAgent } = require('https-proxy-agent');
|
|
const { ContentTypes, EImageOutputType } = require('librechat-data-provider');
|
|
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
|
|
const { logAxiosError, extractBaseURL } = require('~/utils');
|
|
const { getFiles } = require('~/models/File');
|
|
const { logger } = require('~/config');
|
|
|
|
/** Default descriptions for image generation tool */
|
|
const DEFAULT_IMAGE_GEN_DESCRIPTION = `
|
|
Generates high-quality, original images based solely on text, not using any uploaded reference images.
|
|
|
|
When to use \`image_gen_oai\`:
|
|
- To create entirely new images from detailed text descriptions that do NOT reference any image files.
|
|
|
|
When NOT to use \`image_gen_oai\`:
|
|
- If the user has uploaded any images and requests modifications, enhancements, or remixing based on those uploads → use \`image_edit_oai\` instead.
|
|
|
|
Generated image IDs will be returned in the response, so you can refer to them in future requests made to \`image_edit_oai\`.
|
|
`.trim();
|
|
|
|
/** Default description for image editing tool */
|
|
const DEFAULT_IMAGE_EDIT_DESCRIPTION =
|
|
`Generates high-quality, original images based on text and one or more uploaded/referenced images.
|
|
|
|
When to use \`image_edit_oai\`:
|
|
- The user wants to modify, extend, or remix one **or more** uploaded images, either:
|
|
- Previously generated, or in the current request (both to be included in the \`image_ids\` array).
|
|
- Always when the user refers to uploaded images for editing, enhancement, remixing, style transfer, or combining elements.
|
|
- Any current or existing images are to be used as visual guides.
|
|
- If there are any files in the current request, they are more likely than not expected as references for image edit requests.
|
|
|
|
When NOT to use \`image_edit_oai\`:
|
|
- Brand-new generations that do not rely on an existing image → use \`image_gen_oai\` instead.
|
|
|
|
Both generated and referenced image IDs will be returned in the response, so you can refer to them in future requests made to \`image_edit_oai\`.
|
|
`.trim();
|
|
|
|
/** Default prompt descriptions */
|
|
const DEFAULT_IMAGE_GEN_PROMPT_DESCRIPTION = `Describe the image you want in detail.
|
|
Be highly specific—break your idea into layers:
|
|
(1) main concept and subject,
|
|
(2) composition and position,
|
|
(3) lighting and mood,
|
|
(4) style, medium, or camera details,
|
|
(5) important features (age, expression, clothing, etc.),
|
|
(6) background.
|
|
Use positive, descriptive language and specify what should be included, not what to avoid.
|
|
List number and characteristics of people/objects, and mention style/technical requirements (e.g., "DSLR photo, 85mm lens, golden hour").
|
|
Do not reference any uploaded images—use for new image creation from text only.`;
|
|
|
|
const DEFAULT_IMAGE_EDIT_PROMPT_DESCRIPTION = `Describe the changes, enhancements, or new ideas to apply to the uploaded image(s).
|
|
Be highly specific—break your request into layers:
|
|
(1) main concept or transformation,
|
|
(2) specific edits/replacements or composition guidance,
|
|
(3) desired style, mood, or technique,
|
|
(4) features/items to keep, change, or add (such as objects, people, clothing, lighting, etc.).
|
|
Use positive, descriptive language and clarify what should be included or changed, not what to avoid.
|
|
Always base this prompt on the most recently uploaded reference images.`;
|
|
|
|
const displayMessage =
|
|
'The tool displayed an image. All generated images are already plainly visible, so don\'t repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.';
|
|
|
|
/**
|
|
* Replaces unwanted characters from the input string
|
|
* @param {string} inputString - The input string to process
|
|
* @returns {string} - The processed string
|
|
*/
|
|
function replaceUnwantedChars(inputString) {
|
|
return inputString
|
|
.replace(/\r\n|\r|\n/g, ' ')
|
|
.replace(/"/g, '')
|
|
.trim();
|
|
}
|
|
|
|
function returnValue(value) {
|
|
if (typeof value === 'string') {
|
|
return [value, {}];
|
|
} else if (typeof value === 'object') {
|
|
if (Array.isArray(value)) {
|
|
return value;
|
|
}
|
|
return [displayMessage, value];
|
|
}
|
|
return value;
|
|
}
|
|
|
|
const getImageGenDescription = () => {
|
|
return process.env.IMAGE_GEN_OAI_DESCRIPTION || DEFAULT_IMAGE_GEN_DESCRIPTION;
|
|
};
|
|
|
|
const getImageEditDescription = () => {
|
|
return process.env.IMAGE_EDIT_OAI_DESCRIPTION || DEFAULT_IMAGE_EDIT_DESCRIPTION;
|
|
};
|
|
|
|
const getImageGenPromptDescription = () => {
|
|
return process.env.IMAGE_GEN_OAI_PROMPT_DESCRIPTION || DEFAULT_IMAGE_GEN_PROMPT_DESCRIPTION;
|
|
};
|
|
|
|
const getImageEditPromptDescription = () => {
|
|
return process.env.IMAGE_EDIT_OAI_PROMPT_DESCRIPTION || DEFAULT_IMAGE_EDIT_PROMPT_DESCRIPTION;
|
|
};
|
|
|
|
/**
|
|
* Creates OpenAI Image tools (generation and editing)
|
|
* @param {Object} fields - Configuration fields
|
|
* @param {ServerRequest} fields.req - Whether the tool is being used in an agent context
|
|
* @param {boolean} fields.isAgent - Whether the tool is being used in an agent context
|
|
* @param {string} fields.IMAGE_GEN_OAI_API_KEY - The OpenAI API key
|
|
* @param {boolean} [fields.override] - Whether to override the API key check, necessary for app initialization
|
|
* @param {MongoFile[]} [fields.imageFiles] - The images to be used for editing
|
|
* @returns {Array} - Array of image tools
|
|
*/
|
|
function createOpenAIImageTools(fields = {}) {
|
|
/** @type {boolean} Used to initialize the Tool without necessary variables. */
|
|
const override = fields.override ?? false;
|
|
/** @type {boolean} */
|
|
if (!override && !fields.isAgent) {
|
|
throw new Error('This tool is only available for agents.');
|
|
}
|
|
const { req } = fields;
|
|
const imageOutputType = req?.app.locals.imageOutputType || EImageOutputType.PNG;
|
|
const appFileStrategy = req?.app.locals.fileStrategy;
|
|
|
|
const getApiKey = () => {
|
|
const apiKey = process.env.IMAGE_GEN_OAI_API_KEY ?? '';
|
|
if (!apiKey && !override) {
|
|
throw new Error('Missing IMAGE_GEN_OAI_API_KEY environment variable.');
|
|
}
|
|
return apiKey;
|
|
};
|
|
|
|
let apiKey = fields.IMAGE_GEN_OAI_API_KEY ?? getApiKey();
|
|
const closureConfig = { apiKey };
|
|
|
|
let baseURL = 'https://api.openai.com/v1/';
|
|
if (!override && process.env.IMAGE_GEN_OAI_BASEURL) {
|
|
baseURL = extractBaseURL(process.env.IMAGE_GEN_OAI_BASEURL);
|
|
closureConfig.baseURL = baseURL;
|
|
}
|
|
|
|
// Note: Azure may not yet support the latest image generation models
|
|
if (
|
|
!override &&
|
|
process.env.IMAGE_GEN_OAI_AZURE_API_VERSION &&
|
|
process.env.IMAGE_GEN_OAI_BASEURL
|
|
) {
|
|
baseURL = process.env.IMAGE_GEN_OAI_BASEURL;
|
|
closureConfig.baseURL = baseURL;
|
|
closureConfig.defaultQuery = { 'api-version': process.env.IMAGE_GEN_OAI_AZURE_API_VERSION };
|
|
closureConfig.defaultHeaders = {
|
|
'api-key': process.env.IMAGE_GEN_OAI_API_KEY,
|
|
'Content-Type': 'application/json',
|
|
};
|
|
closureConfig.apiKey = process.env.IMAGE_GEN_OAI_API_KEY;
|
|
}
|
|
|
|
const imageFiles = fields.imageFiles ?? [];
|
|
|
|
/**
|
|
* Image Generation Tool
|
|
*/
|
|
const imageGenTool = tool(
|
|
async (
|
|
{
|
|
prompt,
|
|
background = 'auto',
|
|
n = 1,
|
|
output_compression = 100,
|
|
quality = 'auto',
|
|
size = 'auto',
|
|
},
|
|
runnableConfig,
|
|
) => {
|
|
if (!prompt) {
|
|
throw new Error('Missing required field: prompt');
|
|
}
|
|
const clientConfig = { ...closureConfig };
|
|
if (process.env.PROXY) {
|
|
clientConfig.httpAgent = new HttpsProxyAgent(process.env.PROXY);
|
|
}
|
|
|
|
/** @type {OpenAI} */
|
|
const openai = new OpenAI(clientConfig);
|
|
let output_format = imageOutputType;
|
|
if (
|
|
background === 'transparent' &&
|
|
output_format !== EImageOutputType.PNG &&
|
|
output_format !== EImageOutputType.WEBP
|
|
) {
|
|
logger.warn(
|
|
'[ImageGenOAI] Transparent background requires PNG or WebP format, defaulting to PNG',
|
|
);
|
|
output_format = EImageOutputType.PNG;
|
|
}
|
|
|
|
let resp;
|
|
try {
|
|
const derivedSignal = runnableConfig?.signal
|
|
? AbortSignal.any([runnableConfig.signal])
|
|
: undefined;
|
|
resp = await openai.images.generate(
|
|
{
|
|
model: 'gpt-image-1',
|
|
prompt: replaceUnwantedChars(prompt),
|
|
n: Math.min(Math.max(1, n), 10),
|
|
background,
|
|
output_format,
|
|
output_compression:
|
|
output_format === EImageOutputType.WEBP || output_format === EImageOutputType.JPEG
|
|
? output_compression
|
|
: undefined,
|
|
quality,
|
|
size,
|
|
},
|
|
{
|
|
signal: derivedSignal,
|
|
},
|
|
);
|
|
} catch (error) {
|
|
const message = '[image_gen_oai] Problem generating the image:';
|
|
logAxiosError({ error, message });
|
|
return returnValue(`Something went wrong when trying to generate the image. The OpenAI API may be unavailable:
|
|
Error Message: ${error.message}`);
|
|
}
|
|
|
|
if (!resp) {
|
|
return returnValue(
|
|
'Something went wrong when trying to generate the image. The OpenAI API may be unavailable',
|
|
);
|
|
}
|
|
|
|
// For gpt-image-1, the response contains base64-encoded images
|
|
// TODO: handle cost in `resp.usage`
|
|
const base64Image = resp.data[0].b64_json;
|
|
|
|
if (!base64Image) {
|
|
return returnValue(
|
|
'No image data returned from OpenAI API. There may be a problem with the API or your configuration.',
|
|
);
|
|
}
|
|
|
|
const content = [
|
|
{
|
|
type: ContentTypes.IMAGE_URL,
|
|
image_url: {
|
|
url: `data:image/${output_format};base64,${base64Image}`,
|
|
},
|
|
},
|
|
];
|
|
|
|
const file_ids = [v4()];
|
|
const response = [
|
|
{
|
|
type: ContentTypes.TEXT,
|
|
text: displayMessage + `\n\ngenerated_image_id: "${file_ids[0]}"`,
|
|
},
|
|
];
|
|
return [response, { content, file_ids }];
|
|
},
|
|
{
|
|
name: 'image_gen_oai',
|
|
description: getImageGenDescription(),
|
|
schema: z.object({
|
|
prompt: z.string().max(32000).describe(getImageGenPromptDescription()),
|
|
background: z
|
|
.enum(['transparent', 'opaque', 'auto'])
|
|
.optional()
|
|
.describe(
|
|
'Sets transparency for the background. Must be one of transparent, opaque or auto (default). When transparent, the output format should be png or webp.',
|
|
),
|
|
/*
|
|
n: z
|
|
.number()
|
|
.int()
|
|
.min(1)
|
|
.max(10)
|
|
.optional()
|
|
.describe('The number of images to generate. Must be between 1 and 10.'),
|
|
output_compression: z
|
|
.number()
|
|
.int()
|
|
.min(0)
|
|
.max(100)
|
|
.optional()
|
|
.describe('The compression level (0-100%) for webp or jpeg formats. Defaults to 100.'),
|
|
*/
|
|
quality: z
|
|
.enum(['auto', 'high', 'medium', 'low'])
|
|
.optional()
|
|
.describe('The quality of the image. One of auto (default), high, medium, or low.'),
|
|
size: z
|
|
.enum(['auto', '1024x1024', '1536x1024', '1024x1536'])
|
|
.optional()
|
|
.describe(
|
|
'The size of the generated image. One of 1024x1024, 1536x1024 (landscape), 1024x1536 (portrait), or auto (default).',
|
|
),
|
|
}),
|
|
responseFormat: 'content_and_artifact',
|
|
},
|
|
);
|
|
|
|
/**
|
|
* Image Editing Tool
|
|
*/
|
|
const imageEditTool = tool(
|
|
async ({ prompt, image_ids, quality = 'auto', size = 'auto' }, runnableConfig) => {
|
|
if (!prompt) {
|
|
throw new Error('Missing required field: prompt');
|
|
}
|
|
|
|
const clientConfig = { ...closureConfig };
|
|
if (process.env.PROXY) {
|
|
clientConfig.httpAgent = new HttpsProxyAgent(process.env.PROXY);
|
|
}
|
|
|
|
const formData = new FormData();
|
|
formData.append('model', 'gpt-image-1');
|
|
formData.append('prompt', replaceUnwantedChars(prompt));
|
|
// TODO: `mask` support
|
|
// TODO: more than 1 image support
|
|
// formData.append('n', n.toString());
|
|
formData.append('quality', quality);
|
|
formData.append('size', size);
|
|
|
|
/** @type {Record<FileSources, undefined | NodeStreamDownloader<File>>} */
|
|
const streamMethods = {};
|
|
|
|
const requestFilesMap = Object.fromEntries(imageFiles.map((f) => [f.file_id, { ...f }]));
|
|
|
|
const orderedFiles = new Array(image_ids.length);
|
|
const idsToFetch = [];
|
|
const indexOfMissing = Object.create(null);
|
|
|
|
for (let i = 0; i < image_ids.length; i++) {
|
|
const id = image_ids[i];
|
|
const file = requestFilesMap[id];
|
|
|
|
if (file) {
|
|
orderedFiles[i] = file;
|
|
} else {
|
|
idsToFetch.push(id);
|
|
indexOfMissing[id] = i;
|
|
}
|
|
}
|
|
|
|
if (idsToFetch.length) {
|
|
const fetchedFiles = await getFiles(
|
|
{
|
|
user: req.user.id,
|
|
file_id: { $in: idsToFetch },
|
|
height: { $exists: true },
|
|
width: { $exists: true },
|
|
},
|
|
{},
|
|
{},
|
|
);
|
|
|
|
for (const file of fetchedFiles) {
|
|
requestFilesMap[file.file_id] = file;
|
|
orderedFiles[indexOfMissing[file.file_id]] = file;
|
|
}
|
|
}
|
|
for (const imageFile of orderedFiles) {
|
|
if (!imageFile) {
|
|
continue;
|
|
}
|
|
/** @type {NodeStream<File>} */
|
|
let stream;
|
|
/** @type {NodeStreamDownloader<File>} */
|
|
let getDownloadStream;
|
|
const source = imageFile.source || appFileStrategy;
|
|
if (!source) {
|
|
throw new Error('No source found for image file');
|
|
}
|
|
if (streamMethods[source]) {
|
|
getDownloadStream = streamMethods[source];
|
|
} else {
|
|
({ getDownloadStream } = getStrategyFunctions(source));
|
|
streamMethods[source] = getDownloadStream;
|
|
}
|
|
if (!getDownloadStream) {
|
|
throw new Error(`No download stream method found for source: ${source}`);
|
|
}
|
|
stream = await getDownloadStream(req, imageFile.filepath);
|
|
if (!stream) {
|
|
throw new Error('Failed to get download stream for image file');
|
|
}
|
|
formData.append('image[]', stream, {
|
|
filename: imageFile.filename,
|
|
contentType: imageFile.type,
|
|
});
|
|
}
|
|
|
|
/** @type {import('axios').RawAxiosHeaders} */
|
|
let headers = {
|
|
...formData.getHeaders(),
|
|
};
|
|
|
|
if (process.env.IMAGE_GEN_OAI_AZURE_API_VERSION && process.env.IMAGE_GEN_OAI_BASEURL) {
|
|
headers['api-key'] = apiKey;
|
|
} else {
|
|
headers['Authorization'] = `Bearer ${apiKey}`;
|
|
}
|
|
|
|
try {
|
|
const derivedSignal = runnableConfig?.signal
|
|
? AbortSignal.any([runnableConfig.signal])
|
|
: undefined;
|
|
|
|
/** @type {import('axios').AxiosRequestConfig} */
|
|
const axiosConfig = {
|
|
headers,
|
|
...clientConfig,
|
|
signal: derivedSignal,
|
|
baseURL,
|
|
};
|
|
|
|
if (process.env.IMAGE_GEN_OAI_AZURE_API_VERSION && process.env.IMAGE_GEN_OAI_BASEURL) {
|
|
axiosConfig.params = {
|
|
'api-version': process.env.IMAGE_GEN_OAI_AZURE_API_VERSION,
|
|
...axiosConfig.params,
|
|
};
|
|
}
|
|
const response = await axios.post('/images/edits', formData, axiosConfig);
|
|
|
|
if (!response.data || !response.data.data || !response.data.data.length) {
|
|
return returnValue(
|
|
'No image data returned from OpenAI API. There may be a problem with the API or your configuration.',
|
|
);
|
|
}
|
|
|
|
const base64Image = response.data.data[0].b64_json;
|
|
if (!base64Image) {
|
|
return returnValue(
|
|
'No image data returned from OpenAI API. There may be a problem with the API or your configuration.',
|
|
);
|
|
}
|
|
|
|
const content = [
|
|
{
|
|
type: ContentTypes.IMAGE_URL,
|
|
image_url: {
|
|
url: `data:image/${imageOutputType};base64,${base64Image}`,
|
|
},
|
|
},
|
|
];
|
|
|
|
const file_ids = [v4()];
|
|
const textResponse = [
|
|
{
|
|
type: ContentTypes.TEXT,
|
|
text:
|
|
displayMessage +
|
|
`\n\ngenerated_image_id: "${file_ids[0]}"\nreferenced_image_ids: ["${image_ids.join('", "')}"]`,
|
|
},
|
|
];
|
|
return [textResponse, { content, file_ids }];
|
|
} catch (error) {
|
|
const message = '[image_edit_oai] Problem editing the image:';
|
|
logAxiosError({ error, message });
|
|
return returnValue(`Something went wrong when trying to edit the image. The OpenAI API may be unavailable:
|
|
Error Message: ${error.message || 'Unknown error'}`);
|
|
}
|
|
},
|
|
{
|
|
name: 'image_edit_oai',
|
|
description: getImageEditDescription(),
|
|
schema: z.object({
|
|
image_ids: z
|
|
.array(z.string())
|
|
.min(1)
|
|
.describe(
|
|
`
|
|
IDs (image ID strings) of previously generated or uploaded images that should guide the edit.
|
|
|
|
Guidelines:
|
|
- If the user's request depends on any prior image(s), copy their image IDs into the \`image_ids\` array (in the same order the user refers to them).
|
|
- Never invent or hallucinate IDs; only use IDs that are still visible in the conversation context.
|
|
- If no earlier image is relevant, omit the field entirely.
|
|
`.trim(),
|
|
),
|
|
prompt: z.string().max(32000).describe(getImageEditPromptDescription()),
|
|
/*
|
|
n: z
|
|
.number()
|
|
.int()
|
|
.min(1)
|
|
.max(10)
|
|
.optional()
|
|
.describe('The number of images to generate. Must be between 1 and 10. Defaults to 1.'),
|
|
*/
|
|
quality: z
|
|
.enum(['auto', 'high', 'medium', 'low'])
|
|
.optional()
|
|
.describe(
|
|
'The quality of the image. One of auto (default), high, medium, or low. High/medium/low only supported for gpt-image-1.',
|
|
),
|
|
size: z
|
|
.enum(['auto', '1024x1024', '1536x1024', '1024x1536', '256x256', '512x512'])
|
|
.optional()
|
|
.describe(
|
|
'The size of the generated images. For gpt-image-1: auto (default), 1024x1024, 1536x1024, 1024x1536. For dall-e-2: 256x256, 512x512, 1024x1024.',
|
|
),
|
|
}),
|
|
responseFormat: 'content_and_artifact',
|
|
},
|
|
);
|
|
|
|
return [imageGenTool, imageEditTool];
|
|
}
|
|
|
|
module.exports = createOpenAIImageTools;
|