diff --git a/.env.example b/.env.example index 6e552c24a..8ae71409c 100644 --- a/.env.example +++ b/.env.example @@ -231,6 +231,14 @@ AZURE_AI_SEARCH_SEARCH_OPTION_QUERY_TYPE= AZURE_AI_SEARCH_SEARCH_OPTION_TOP= AZURE_AI_SEARCH_SEARCH_OPTION_SELECT= +# OpenAI Image Tools Customization +#---------------- +# IMAGE_GEN_OAI_DESCRIPTION_WITH_FILES=Custom description for image generation tool when files are present +# IMAGE_GEN_OAI_DESCRIPTION_NO_FILES=Custom description for image generation tool when no files are present +# IMAGE_EDIT_OAI_DESCRIPTION=Custom description for image editing tool +# IMAGE_GEN_OAI_PROMPT_DESCRIPTION=Custom prompt description for image generation tool +# IMAGE_EDIT_OAI_PROMPT_DESCRIPTION=Custom prompt description for image editing tool + # DALL·E #---------------- # DALLE_API_KEY= diff --git a/.github/ISSUE_TEMPLATE/BUG-REPORT.yml b/.github/ISSUE_TEMPLATE/BUG-REPORT.yml index 3a3b828ee..610396959 100644 --- a/.github/ISSUE_TEMPLATE/BUG-REPORT.yml +++ b/.github/ISSUE_TEMPLATE/BUG-REPORT.yml @@ -79,6 +79,8 @@ body: For UI-related issues, browser console logs can be very helpful. You can provide these as screenshots or paste the text here. render: shell + validations: + required: true - type: textarea id: screenshots attributes: diff --git a/README.md b/README.md index 3e02c2cc0..6e0c92221 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,11 @@ - 🪄 **Generative UI with Code Artifacts**: - [Code Artifacts](https://youtu.be/GfTj7O4gmd0?si=WJbdnemZpJzBrJo3) allow creation of React, HTML, and Mermaid diagrams directly in chat +- 🎨 **Image Generation & Editing** + - Text-to-image and image-to-image with [GPT-Image-1](https://www.librechat.ai/docs/features/image_gen#1--openai-image-tools-recommended) + - Text-to-image with [DALL-E (3/2)](https://www.librechat.ai/docs/features/image_gen#2--dalle-legacy), [Stable Diffusion](https://www.librechat.ai/docs/features/image_gen#3--stable-diffusion-local), [Flux](https://www.librechat.ai/docs/features/image_gen#4--flux), or any [MCP server](https://www.librechat.ai/docs/features/image_gen#5--model-context-protocol-mcp) + - Produce stunning visuals from prompts or refine existing images with a single instruction + - 💾 **Presets & Context Management**: - Create, Save, & Share Custom Presets - Switch between AI Endpoints and Presets mid-chat diff --git a/api/app/clients/tools/index.js b/api/app/clients/tools/index.js index df436fb08..87b1884e8 100644 --- a/api/app/clients/tools/index.js +++ b/api/app/clients/tools/index.js @@ -10,6 +10,7 @@ const StructuredACS = require('./structured/AzureAISearch'); const StructuredSD = require('./structured/StableDiffusion'); const GoogleSearchAPI = require('./structured/GoogleSearch'); const TraversaalSearch = require('./structured/TraversaalSearch'); +const createOpenAIImageTools = require('./structured/OpenAIImageTools'); const TavilySearchResults = require('./structured/TavilySearchResults'); /** @type {Record} */ @@ -40,4 +41,5 @@ module.exports = { StructuredWolfram, createYouTubeTools, TavilySearchResults, + createOpenAIImageTools, }; diff --git a/api/app/clients/tools/manifest.json b/api/app/clients/tools/manifest.json index 43be7a4e6..55c1b1c51 100644 --- a/api/app/clients/tools/manifest.json +++ b/api/app/clients/tools/manifest.json @@ -44,6 +44,20 @@ } ] }, + { + "name": "OpenAI Image Tools", + "pluginKey": "image_gen_oai", + "toolkit": true, + "description": "Image Generation and Editing using OpenAI's latest state-of-the-art models", + "icon": "/assets/image_gen_oai.png", + "authConfig": [ + { + "authField": "IMAGE_GEN_OAI_API_KEY", + "label": "OpenAI Image Tools API Key", + "description": "Your OpenAI API Key for Image Generation and Editing" + } + ] + }, { "name": "Wolfram", "pluginKey": "wolfram", diff --git a/api/app/clients/tools/structured/OpenAIImageTools.js b/api/app/clients/tools/structured/OpenAIImageTools.js new file mode 100644 index 000000000..85941a779 --- /dev/null +++ b/api/app/clients/tools/structured/OpenAIImageTools.js @@ -0,0 +1,518 @@ +const { z } = require('zod'); +const axios = require('axios'); +const { v4 } = require('uuid'); +const OpenAI = require('openai'); +const FormData = require('form-data'); +const { tool } = require('@langchain/core/tools'); +const { HttpsProxyAgent } = require('https-proxy-agent'); +const { ContentTypes, EImageOutputType } = require('librechat-data-provider'); +const { getStrategyFunctions } = require('~/server/services/Files/strategies'); +const { logAxiosError, extractBaseURL } = require('~/utils'); +const { getFiles } = require('~/models/File'); +const { logger } = require('~/config'); + +/** Default descriptions for image generation tool */ +const DEFAULT_IMAGE_GEN_DESCRIPTION = ` +Generates high-quality, original images based solely on text, not using any uploaded reference images. + +When to use \`image_gen_oai\`: +- To create entirely new images from detailed text descriptions that do NOT reference any image files. + +When NOT to use \`image_gen_oai\`: +- If the user has uploaded any images and requests modifications, enhancements, or remixing based on those uploads → use \`image_edit_oai\` instead. + +Generated image IDs will be returned in the response, so you can refer to them in future requests made to \`image_edit_oai\`. +`.trim(); + +/** Default description for image editing tool */ +const DEFAULT_IMAGE_EDIT_DESCRIPTION = + `Generates high-quality, original images based on text and one or more uploaded/referenced images. + +When to use \`image_edit_oai\`: +- The user wants to modify, extend, or remix one **or more** uploaded images, either: + - Previously generated, or in the current request (both to be included in the \`image_ids\` array). +- Always when the user refers to uploaded images for editing, enhancement, remixing, style transfer, or combining elements. +- Any current or existing images are to be used as visual guides. +- If there are any files in the current request, they are more likely than not expected as references for image edit requests. + +When NOT to use \`image_edit_oai\`: +- Brand-new generations that do not rely on an existing image → use \`image_gen_oai\` instead. + +Both generated and referenced image IDs will be returned in the response, so you can refer to them in future requests made to \`image_edit_oai\`. +`.trim(); + +/** Default prompt descriptions */ +const DEFAULT_IMAGE_GEN_PROMPT_DESCRIPTION = `Describe the image you want in detail. + Be highly specific—break your idea into layers: + (1) main concept and subject, + (2) composition and position, + (3) lighting and mood, + (4) style, medium, or camera details, + (5) important features (age, expression, clothing, etc.), + (6) background. + Use positive, descriptive language and specify what should be included, not what to avoid. + List number and characteristics of people/objects, and mention style/technical requirements (e.g., "DSLR photo, 85mm lens, golden hour"). + Do not reference any uploaded images—use for new image creation from text only.`; + +const DEFAULT_IMAGE_EDIT_PROMPT_DESCRIPTION = `Describe the changes, enhancements, or new ideas to apply to the uploaded image(s). + Be highly specific—break your request into layers: + (1) main concept or transformation, + (2) specific edits/replacements or composition guidance, + (3) desired style, mood, or technique, + (4) features/items to keep, change, or add (such as objects, people, clothing, lighting, etc.). + Use positive, descriptive language and clarify what should be included or changed, not what to avoid. + Always base this prompt on the most recently uploaded reference images.`; + +const displayMessage = + 'The tool displayed an image. All generated images are already plainly visible, so don\'t repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.'; + +/** + * Replaces unwanted characters from the input string + * @param {string} inputString - The input string to process + * @returns {string} - The processed string + */ +function replaceUnwantedChars(inputString) { + return inputString + .replace(/\r\n|\r|\n/g, ' ') + .replace(/"/g, '') + .trim(); +} + +function returnValue(value) { + if (typeof value === 'string') { + return [value, {}]; + } else if (typeof value === 'object') { + if (Array.isArray(value)) { + return value; + } + return [displayMessage, value]; + } + return value; +} + +const getImageGenDescription = () => { + return process.env.IMAGE_GEN_OAI_DESCRIPTION || DEFAULT_IMAGE_GEN_DESCRIPTION; +}; + +const getImageEditDescription = () => { + return process.env.IMAGE_EDIT_OAI_DESCRIPTION || DEFAULT_IMAGE_EDIT_DESCRIPTION; +}; + +const getImageGenPromptDescription = () => { + return process.env.IMAGE_GEN_OAI_PROMPT_DESCRIPTION || DEFAULT_IMAGE_GEN_PROMPT_DESCRIPTION; +}; + +const getImageEditPromptDescription = () => { + return process.env.IMAGE_EDIT_OAI_PROMPT_DESCRIPTION || DEFAULT_IMAGE_EDIT_PROMPT_DESCRIPTION; +}; + +/** + * Creates OpenAI Image tools (generation and editing) + * @param {Object} fields - Configuration fields + * @param {ServerRequest} fields.req - Whether the tool is being used in an agent context + * @param {boolean} fields.isAgent - Whether the tool is being used in an agent context + * @param {string} fields.IMAGE_GEN_OAI_API_KEY - The OpenAI API key + * @param {boolean} [fields.override] - Whether to override the API key check, necessary for app initialization + * @param {MongoFile[]} [fields.imageFiles] - The images to be used for editing + * @returns {Array} - Array of image tools + */ +function createOpenAIImageTools(fields = {}) { + /** @type {boolean} Used to initialize the Tool without necessary variables. */ + const override = fields.override ?? false; + /** @type {boolean} */ + if (!override && !fields.isAgent) { + throw new Error('This tool is only available for agents.'); + } + const { req } = fields; + const imageOutputType = req?.app.locals.imageOutputType || EImageOutputType.PNG; + const appFileStrategy = req?.app.locals.fileStrategy; + + const getApiKey = () => { + const apiKey = process.env.IMAGE_GEN_OAI_API_KEY ?? ''; + if (!apiKey && !override) { + throw new Error('Missing IMAGE_GEN_OAI_API_KEY environment variable.'); + } + return apiKey; + }; + + let apiKey = fields.IMAGE_GEN_OAI_API_KEY ?? getApiKey(); + const closureConfig = { apiKey }; + + let baseURL = 'https://api.openai.com/v1/'; + if (!override && process.env.IMAGE_GEN_OAI_BASEURL) { + baseURL = extractBaseURL(process.env.IMAGE_GEN_OAI_BASEURL); + closureConfig.baseURL = baseURL; + } + + // Note: Azure may not yet support the latest image generation models + if ( + !override && + process.env.IMAGE_GEN_OAI_AZURE_API_VERSION && + process.env.IMAGE_GEN_OAI_BASEURL + ) { + baseURL = process.env.IMAGE_GEN_OAI_BASEURL; + closureConfig.baseURL = baseURL; + closureConfig.defaultQuery = { 'api-version': process.env.IMAGE_GEN_OAI_AZURE_API_VERSION }; + closureConfig.defaultHeaders = { + 'api-key': process.env.IMAGE_GEN_OAI_API_KEY, + 'Content-Type': 'application/json', + }; + closureConfig.apiKey = process.env.IMAGE_GEN_OAI_API_KEY; + } + + const imageFiles = fields.imageFiles ?? []; + + /** + * Image Generation Tool + */ + const imageGenTool = tool( + async ( + { + prompt, + background = 'auto', + n = 1, + output_compression = 100, + quality = 'auto', + size = 'auto', + }, + runnableConfig, + ) => { + if (!prompt) { + throw new Error('Missing required field: prompt'); + } + const clientConfig = { ...closureConfig }; + if (process.env.PROXY) { + clientConfig.httpAgent = new HttpsProxyAgent(process.env.PROXY); + } + + /** @type {OpenAI} */ + const openai = new OpenAI(clientConfig); + let output_format = imageOutputType; + if ( + background === 'transparent' && + output_format !== EImageOutputType.PNG && + output_format !== EImageOutputType.WEBP + ) { + logger.warn( + '[ImageGenOAI] Transparent background requires PNG or WebP format, defaulting to PNG', + ); + output_format = EImageOutputType.PNG; + } + + let resp; + try { + const derivedSignal = runnableConfig?.signal + ? AbortSignal.any([runnableConfig.signal]) + : undefined; + resp = await openai.images.generate( + { + model: 'gpt-image-1', + prompt: replaceUnwantedChars(prompt), + n: Math.min(Math.max(1, n), 10), + background, + output_format, + output_compression: + output_format === EImageOutputType.WEBP || output_format === EImageOutputType.JPEG + ? output_compression + : undefined, + quality, + size, + }, + { + signal: derivedSignal, + }, + ); + } catch (error) { + const message = '[image_gen_oai] Problem generating the image:'; + logAxiosError({ error, message }); + return returnValue(`Something went wrong when trying to generate the image. The OpenAI API may be unavailable: +Error Message: ${error.message}`); + } + + if (!resp) { + return returnValue( + 'Something went wrong when trying to generate the image. The OpenAI API may be unavailable', + ); + } + + // For gpt-image-1, the response contains base64-encoded images + // TODO: handle cost in `resp.usage` + const base64Image = resp.data[0].b64_json; + + if (!base64Image) { + return returnValue( + 'No image data returned from OpenAI API. There may be a problem with the API or your configuration.', + ); + } + + const content = [ + { + type: ContentTypes.IMAGE_URL, + image_url: { + url: `data:image/${output_format};base64,${base64Image}`, + }, + }, + ]; + + const file_ids = [v4()]; + const response = [ + { + type: ContentTypes.TEXT, + text: displayMessage + `\n\ngenerated_image_id: "${file_ids[0]}"`, + }, + ]; + return [response, { content, file_ids }]; + }, + { + name: 'image_gen_oai', + description: getImageGenDescription(), + schema: z.object({ + prompt: z.string().max(32000).describe(getImageGenPromptDescription()), + background: z + .enum(['transparent', 'opaque', 'auto']) + .optional() + .describe( + 'Sets transparency for the background. Must be one of transparent, opaque or auto (default). When transparent, the output format should be png or webp.', + ), + /* + n: z + .number() + .int() + .min(1) + .max(10) + .optional() + .describe('The number of images to generate. Must be between 1 and 10.'), + output_compression: z + .number() + .int() + .min(0) + .max(100) + .optional() + .describe('The compression level (0-100%) for webp or jpeg formats. Defaults to 100.'), + */ + quality: z + .enum(['auto', 'high', 'medium', 'low']) + .optional() + .describe('The quality of the image. One of auto (default), high, medium, or low.'), + size: z + .enum(['auto', '1024x1024', '1536x1024', '1024x1536']) + .optional() + .describe( + 'The size of the generated image. One of 1024x1024, 1536x1024 (landscape), 1024x1536 (portrait), or auto (default).', + ), + }), + responseFormat: 'content_and_artifact', + }, + ); + + /** + * Image Editing Tool + */ + const imageEditTool = tool( + async ({ prompt, image_ids, quality = 'auto', size = 'auto' }, runnableConfig) => { + if (!prompt) { + throw new Error('Missing required field: prompt'); + } + + const clientConfig = { ...closureConfig }; + if (process.env.PROXY) { + clientConfig.httpAgent = new HttpsProxyAgent(process.env.PROXY); + } + + const formData = new FormData(); + formData.append('model', 'gpt-image-1'); + formData.append('prompt', replaceUnwantedChars(prompt)); + // TODO: `mask` support + // TODO: more than 1 image support + // formData.append('n', n.toString()); + formData.append('quality', quality); + formData.append('size', size); + + /** @type {Record>} */ + const streamMethods = {}; + + const requestFilesMap = Object.fromEntries(imageFiles.map((f) => [f.file_id, { ...f }])); + + const orderedFiles = new Array(image_ids.length); + const idsToFetch = []; + const indexOfMissing = Object.create(null); + + for (let i = 0; i < image_ids.length; i++) { + const id = image_ids[i]; + const file = requestFilesMap[id]; + + if (file) { + orderedFiles[i] = file; + } else { + idsToFetch.push(id); + indexOfMissing[id] = i; + } + } + + if (idsToFetch.length) { + const fetchedFiles = await getFiles( + { + user: req.user.id, + file_id: { $in: idsToFetch }, + height: { $exists: true }, + width: { $exists: true }, + }, + {}, + {}, + ); + + for (const file of fetchedFiles) { + requestFilesMap[file.file_id] = file; + orderedFiles[indexOfMissing[file.file_id]] = file; + } + } + for (const imageFile of orderedFiles) { + if (!imageFile) { + continue; + } + /** @type {NodeStream} */ + let stream; + /** @type {NodeStreamDownloader} */ + let getDownloadStream; + const source = imageFile.source || appFileStrategy; + if (!source) { + throw new Error('No source found for image file'); + } + if (streamMethods[source]) { + getDownloadStream = streamMethods[source]; + } else { + ({ getDownloadStream } = getStrategyFunctions(source)); + streamMethods[source] = getDownloadStream; + } + if (!getDownloadStream) { + throw new Error(`No download stream method found for source: ${source}`); + } + stream = await getDownloadStream(req, imageFile.filepath); + if (!stream) { + throw new Error('Failed to get download stream for image file'); + } + formData.append('image[]', stream, { + filename: imageFile.filename, + contentType: imageFile.type, + }); + } + + /** @type {import('axios').RawAxiosHeaders} */ + let headers = { + ...formData.getHeaders(), + }; + + if (process.env.IMAGE_GEN_OAI_AZURE_API_VERSION && process.env.IMAGE_GEN_OAI_BASEURL) { + headers['api-key'] = apiKey; + } else { + headers['Authorization'] = `Bearer ${apiKey}`; + } + + try { + const derivedSignal = runnableConfig?.signal + ? AbortSignal.any([runnableConfig.signal]) + : undefined; + + /** @type {import('axios').AxiosRequestConfig} */ + const axiosConfig = { + headers, + ...clientConfig, + signal: derivedSignal, + baseURL, + }; + + if (process.env.IMAGE_GEN_OAI_AZURE_API_VERSION && process.env.IMAGE_GEN_OAI_BASEURL) { + axiosConfig.params = { + 'api-version': process.env.IMAGE_GEN_OAI_AZURE_API_VERSION, + ...axiosConfig.params, + }; + } + const response = await axios.post('/images/edits', formData, axiosConfig); + + if (!response.data || !response.data.data || !response.data.data.length) { + return returnValue( + 'No image data returned from OpenAI API. There may be a problem with the API or your configuration.', + ); + } + + const base64Image = response.data.data[0].b64_json; + if (!base64Image) { + return returnValue( + 'No image data returned from OpenAI API. There may be a problem with the API or your configuration.', + ); + } + + const content = [ + { + type: ContentTypes.IMAGE_URL, + image_url: { + url: `data:image/${imageOutputType};base64,${base64Image}`, + }, + }, + ]; + + const file_ids = [v4()]; + const textResponse = [ + { + type: ContentTypes.TEXT, + text: + displayMessage + + `\n\ngenerated_image_id: "${file_ids[0]}"\nreferenced_image_ids: ["${image_ids.join('", "')}"]`, + }, + ]; + return [textResponse, { content, file_ids }]; + } catch (error) { + const message = '[image_edit_oai] Problem editing the image:'; + logAxiosError({ error, message }); + return returnValue(`Something went wrong when trying to edit the image. The OpenAI API may be unavailable: +Error Message: ${error.message || 'Unknown error'}`); + } + }, + { + name: 'image_edit_oai', + description: getImageEditDescription(), + schema: z.object({ + image_ids: z + .array(z.string()) + .min(1) + .describe( + ` +IDs (image ID strings) of previously generated or uploaded images that should guide the edit. + +Guidelines: +- If the user's request depends on any prior image(s), copy their image IDs into the \`image_ids\` array (in the same order the user refers to them). +- Never invent or hallucinate IDs; only use IDs that are still visible in the conversation context. +- If no earlier image is relevant, omit the field entirely. +`.trim(), + ), + prompt: z.string().max(32000).describe(getImageEditPromptDescription()), + /* + n: z + .number() + .int() + .min(1) + .max(10) + .optional() + .describe('The number of images to generate. Must be between 1 and 10. Defaults to 1.'), + */ + quality: z + .enum(['auto', 'high', 'medium', 'low']) + .optional() + .describe( + 'The quality of the image. One of auto (default), high, medium, or low. High/medium/low only supported for gpt-image-1.', + ), + size: z + .enum(['auto', '1024x1024', '1536x1024', '1024x1536', '256x256', '512x512']) + .optional() + .describe( + 'The size of the generated images. For gpt-image-1: auto (default), 1024x1024, 1536x1024, 1024x1536. For dall-e-2: 256x256, 512x512, 1024x1024.', + ), + }), + responseFormat: 'content_and_artifact', + }, + ); + + return [imageGenTool, imageEditTool]; +} + +module.exports = createOpenAIImageTools; diff --git a/api/app/clients/tools/util/handleTools.js b/api/app/clients/tools/util/handleTools.js index 8ce9d7bc7..201009513 100644 --- a/api/app/clients/tools/util/handleTools.js +++ b/api/app/clients/tools/util/handleTools.js @@ -1,7 +1,7 @@ -const { Tools, Constants } = require('librechat-data-provider'); const { SerpAPI } = require('@langchain/community/tools/serpapi'); const { Calculator } = require('@langchain/community/tools/calculator'); const { createCodeExecutionTool, EnvVar } = require('@librechat/agents'); +const { Tools, Constants, EToolResources } = require('librechat-data-provider'); const { getUserPluginAuthValue } = require('~/server/services/PluginService'); const { availableTools, @@ -18,6 +18,7 @@ const { StructuredWolfram, createYouTubeTools, TavilySearchResults, + createOpenAIImageTools, } = require('../'); const { primeFiles: primeCodeFiles } = require('~/server/services/Files/Code/process'); const { createFileSearchTool, primeFiles: primeSearchFiles } = require('./fileSearch'); @@ -157,7 +158,7 @@ const loadTools = async ({ }; const customConstructors = { - serpapi: async () => { + serpapi: async (_toolContextMap) => { const authFields = getAuthFields('serpapi'); let envVar = authFields[0] ?? ''; let apiKey = process.env[envVar]; @@ -170,11 +171,40 @@ const loadTools = async ({ gl: 'us', }); }, - youtube: async () => { + youtube: async (_toolContextMap) => { const authFields = getAuthFields('youtube'); const authValues = await loadAuthValues({ userId: user, authFields }); return createYouTubeTools(authValues); }, + image_gen_oai: async (toolContextMap) => { + const authFields = getAuthFields('image_gen_oai'); + const authValues = await loadAuthValues({ userId: user, authFields }); + const imageFiles = options.tool_resources?.[EToolResources.image_edit]?.files ?? []; + let toolContext = ''; + for (let i = 0; i < imageFiles.length; i++) { + const file = imageFiles[i]; + if (!file) { + continue; + } + if (i === 0) { + toolContext = + 'Image files provided in this request (their image IDs listed in order of appearance) available for image editing:'; + } + toolContext += `\n\t- ${file.file_id}`; + if (i === imageFiles.length - 1) { + toolContext += `\n\nInclude any you need in the \`image_ids\` array when calling \`${EToolResources.image_edit}_oai\`. You may also include previously referenced or generated image IDs.`; + } + } + if (toolContext) { + toolContextMap.image_edit_oai = toolContext; + } + return createOpenAIImageTools({ + ...authValues, + isAgent: !!agent, + req: options.req, + imageFiles, + }); + }, }; const requestedTools = {}; @@ -200,6 +230,7 @@ const loadTools = async ({ serpapi: { location: 'Austin,Texas,United States', hl: 'en', gl: 'us' }, }; + /** @type {Record} */ const toolContextMap = {}; const remainingTools = []; const appTools = options.req?.app?.locals?.availableTools ?? {}; @@ -246,7 +277,7 @@ const loadTools = async ({ } if (customConstructors[tool]) { - requestedTools[tool] = customConstructors[tool]; + requestedTools[tool] = async () => customConstructors[tool](toolContextMap); continue; } diff --git a/api/models/File.js b/api/models/File.js index 87c91003e..4d9499447 100644 --- a/api/models/File.js +++ b/api/models/File.js @@ -1,4 +1,5 @@ const mongoose = require('mongoose'); +const { EToolResources } = require('librechat-data-provider'); const { fileSchema } = require('@librechat/data-schemas'); const { logger } = require('~/config'); @@ -8,7 +9,7 @@ const File = mongoose.model('File', fileSchema); * Finds a file by its file_id with additional query options. * @param {string} file_id - The unique identifier of the file. * @param {object} options - Query options for filtering, projection, etc. - * @returns {Promise} A promise that resolves to the file document or null. + * @returns {Promise} A promise that resolves to the file document or null. */ const findFileById = async (file_id, options = {}) => { return await File.findOne({ file_id, ...options }).lean(); @@ -20,7 +21,7 @@ const findFileById = async (file_id, options = {}) => { * @param {Object} [_sortOptions] - Optional sort parameters. * @param {Object|String} [selectFields={ text: 0 }] - Fields to include/exclude in the query results. * Default excludes the 'text' field. - * @returns {Promise>} A promise that resolves to an array of file documents. + * @returns {Promise>} A promise that resolves to an array of file documents. */ const getFiles = async (filter, _sortOptions, selectFields = { text: 0 }) => { const sortOptions = { updatedAt: -1, ..._sortOptions }; @@ -30,9 +31,10 @@ const getFiles = async (filter, _sortOptions, selectFields = { text: 0 }) => { /** * Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs * @param {string[]} fileIds - Array of file_id strings to search for - * @returns {Promise>} Files that match the criteria + * @param {Set} toolResourceSet - Optional filter for tool resources + * @returns {Promise>} Files that match the criteria */ -const getToolFilesByIds = async (fileIds) => { +const getToolFilesByIds = async (fileIds, toolResourceSet) => { if (!fileIds || !fileIds.length) { return []; } @@ -40,9 +42,19 @@ const getToolFilesByIds = async (fileIds) => { try { const filter = { file_id: { $in: fileIds }, - $or: [{ embedded: true }, { 'metadata.fileIdentifier': { $exists: true } }], }; + if (toolResourceSet.size) { + filter.$or = []; + } + + if (toolResourceSet.has(EToolResources.file_search)) { + filter.$or.push({ embedded: true }); + } + if (toolResourceSet.has(EToolResources.execute_code)) { + filter.$or.push({ 'metadata.fileIdentifier': { $exists: true } }); + } + const selectFields = { text: 0 }; const sortOptions = { updatedAt: -1 }; @@ -55,9 +67,9 @@ const getToolFilesByIds = async (fileIds) => { /** * Creates a new file with a TTL of 1 hour. - * @param {IMongoFile} data - The file data to be created, must contain file_id. + * @param {MongoFile} data - The file data to be created, must contain file_id. * @param {boolean} disableTTL - Whether to disable the TTL. - * @returns {Promise} A promise that resolves to the created file document. + * @returns {Promise} A promise that resolves to the created file document. */ const createFile = async (data, disableTTL) => { const fileData = { @@ -77,8 +89,8 @@ const createFile = async (data, disableTTL) => { /** * Updates a file identified by file_id with new data and removes the TTL. - * @param {IMongoFile} data - The data to update, must contain file_id. - * @returns {Promise} A promise that resolves to the updated file document. + * @param {MongoFile} data - The data to update, must contain file_id. + * @returns {Promise} A promise that resolves to the updated file document. */ const updateFile = async (data) => { const { file_id, ...update } = data; @@ -91,8 +103,8 @@ const updateFile = async (data) => { /** * Increments the usage of a file identified by file_id. - * @param {IMongoFile} data - The data to update, must contain file_id and the increment value for usage. - * @returns {Promise} A promise that resolves to the updated file document. + * @param {MongoFile} data - The data to update, must contain file_id and the increment value for usage. + * @returns {Promise} A promise that resolves to the updated file document. */ const updateFileUsage = async (data) => { const { file_id, inc = 1 } = data; @@ -106,7 +118,7 @@ const updateFileUsage = async (data) => { /** * Deletes a file identified by file_id. * @param {string} file_id - The unique identifier of the file to delete. - * @returns {Promise} A promise that resolves to the deleted file document or null. + * @returns {Promise} A promise that resolves to the deleted file document or null. */ const deleteFile = async (file_id) => { return await File.findOneAndDelete({ file_id }).lean(); @@ -115,7 +127,7 @@ const deleteFile = async (file_id) => { /** * Deletes a file identified by a filter. * @param {object} filter - The filter criteria to apply. - * @returns {Promise} A promise that resolves to the deleted file document or null. + * @returns {Promise} A promise that resolves to the deleted file document or null. */ const deleteFileByFilter = async (filter) => { return await File.findOneAndDelete(filter).lean(); diff --git a/api/models/Share.js b/api/models/Share.js index a8bfbce7f..8611d01bc 100644 --- a/api/models/Share.js +++ b/api/models/Share.js @@ -52,6 +52,14 @@ function anonymizeMessages(messages, newConvoId) { const newMessageId = anonymizeMessageId(message.messageId); idMap.set(message.messageId, newMessageId); + const anonymizedAttachments = message.attachments?.map((attachment) => { + return { + ...attachment, + messageId: newMessageId, + conversationId: newConvoId, + }; + }); + return { ...message, messageId: newMessageId, @@ -61,6 +69,7 @@ function anonymizeMessages(messages, newConvoId) { model: message.model?.startsWith('asst_') ? anonymizeAssistantId(message.model) : message.model, + attachments: anonymizedAttachments, }; }); } diff --git a/api/server/controllers/agents/callbacks.js b/api/server/controllers/agents/callbacks.js index 6622ec381..4ee57df67 100644 --- a/api/server/controllers/agents/callbacks.js +++ b/api/server/controllers/agents/callbacks.js @@ -246,7 +246,11 @@ function createToolEndCallback({ req, res, artifactPromises }) { if (output.artifact.content) { /** @type {FormattedContent[]} */ const content = output.artifact.content; - for (const part of content) { + for (let i = 0; i < content.length; i++) { + const part = content[i]; + if (!part) { + continue; + } if (part.type !== 'image_url') { continue; } @@ -254,8 +258,10 @@ function createToolEndCallback({ req, res, artifactPromises }) { artifactPromises.push( (async () => { const filename = `${output.name}_${output.tool_call_id}_img_${nanoid()}`; + const file_id = output.artifact.file_ids?.[i]; const file = await saveBase64Image(url, { req, + file_id, filename, endpoint: metadata.provider, context: FileContext.image_generation, diff --git a/api/server/services/Endpoints/agents/initialize.js b/api/server/services/Endpoints/agents/initialize.js index eaff058bf..e26ed0884 100644 --- a/api/server/services/Endpoints/agents/initialize.js +++ b/api/server/services/Endpoints/agents/initialize.js @@ -3,6 +3,7 @@ const { Constants, ErrorTypes, EModelEndpoint, + EToolResources, getResponseSender, AgentCapabilities, providerEndpointMap, @@ -41,12 +42,19 @@ const providerConfigMap = { }; /** - * @param {ServerRequest} req - * @param {Promise> | undefined} _attachments - * @param {AgentToolResources | undefined} _tool_resources + * @param {Object} params + * @param {ServerRequest} params.req + * @param {Promise> | undefined} [params.attachments] + * @param {Set} params.requestFileSet + * @param {AgentToolResources | undefined} [params.tool_resources] * @returns {Promise<{ attachments: Array | undefined, tool_resources: AgentToolResources | undefined }>} */ -const primeResources = async (req, _attachments, _tool_resources) => { +const primeResources = async ({ + req, + attachments: _attachments, + tool_resources: _tool_resources, + requestFileSet, +}) => { try { /** @type {Array | undefined} */ let attachments; @@ -54,7 +62,7 @@ const primeResources = async (req, _attachments, _tool_resources) => { const isOCREnabled = (req.app.locals?.[EModelEndpoint.agents]?.capabilities ?? []).includes( AgentCapabilities.ocr, ); - if (tool_resources.ocr?.file_ids && isOCREnabled) { + if (tool_resources[EToolResources.ocr]?.file_ids && isOCREnabled) { const context = await getFiles( { file_id: { $in: tool_resources.ocr.file_ids }, @@ -79,17 +87,28 @@ const primeResources = async (req, _attachments, _tool_resources) => { continue; } if (file.metadata?.fileIdentifier) { - const execute_code = tool_resources.execute_code ?? {}; + const execute_code = tool_resources[EToolResources.execute_code] ?? {}; if (!execute_code.files) { - tool_resources.execute_code = { ...execute_code, files: [] }; + tool_resources[EToolResources.execute_code] = { ...execute_code, files: [] }; } - tool_resources.execute_code.files.push(file); + tool_resources[EToolResources.execute_code].files.push(file); } else if (file.embedded === true) { - const file_search = tool_resources.file_search ?? {}; + const file_search = tool_resources[EToolResources.file_search] ?? {}; if (!file_search.files) { - tool_resources.file_search = { ...file_search, files: [] }; + tool_resources[EToolResources.file_search] = { ...file_search, files: [] }; } - tool_resources.file_search.files.push(file); + tool_resources[EToolResources.file_search].files.push(file); + } else if ( + requestFileSet.has(file.file_id) && + file.type.startsWith('image') && + file.height && + file.width + ) { + const image_edit = tool_resources[EToolResources.image_edit] ?? {}; + if (!image_edit.files) { + tool_resources[EToolResources.image_edit] = { ...image_edit, files: [] }; + } + tool_resources[EToolResources.image_edit].files.push(file); } attachments.push(file); @@ -146,7 +165,14 @@ const initializeAgentOptions = async ({ (agent.model_parameters?.resendFiles ?? true) === true ) { const fileIds = (await getConvoFiles(req.body.conversationId)) ?? []; - const toolFiles = await getToolFilesByIds(fileIds); + /** @type {Set} */ + const toolResourceSet = new Set(); + for (const tool of agent.tools) { + if (EToolResources[tool]) { + toolResourceSet.add(EToolResources[tool]); + } + } + const toolFiles = await getToolFilesByIds(fileIds, toolResourceSet); if (requestFiles.length || toolFiles.length) { currentFiles = await processFiles(requestFiles.concat(toolFiles)); } @@ -154,11 +180,12 @@ const initializeAgentOptions = async ({ currentFiles = await processFiles(requestFiles); } - const { attachments, tool_resources } = await primeResources( + const { attachments, tool_resources } = await primeResources({ req, - currentFiles, - agent.tool_resources, - ); + attachments: currentFiles, + tool_resources: agent.tool_resources, + requestFileSet: new Set(requestFiles.map((file) => file.file_id)), + }); const provider = agent.provider; const { tools, toolContextMap } = await loadAgentTools({ diff --git a/api/server/services/Files/Local/crud.js b/api/server/services/Files/Local/crud.js index c2bb75c12..783230f2f 100644 --- a/api/server/services/Files/Local/crud.js +++ b/api/server/services/Files/Local/crud.js @@ -309,6 +309,24 @@ function getLocalFileStream(req, filepath) { throw new Error(`Invalid file path: ${filepath}`); } + return fs.createReadStream(fullPath); + } else if (filepath.includes('/images/')) { + const basePath = filepath.split('/images/')[1]; + + if (!basePath) { + logger.warn(`Invalid base path: ${filepath}`); + throw new Error(`Invalid file path: ${filepath}`); + } + + const fullPath = path.join(req.app.locals.paths.imageOutput, basePath); + const publicDir = req.app.locals.paths.imageOutput; + + const rel = path.relative(publicDir, fullPath); + if (rel.startsWith('..') || path.isAbsolute(rel) || rel.includes(`..${path.sep}`)) { + logger.warn(`Invalid relative file path: ${filepath}`); + throw new Error(`Invalid file path: ${filepath}`); + } + return fs.createReadStream(fullPath); } return fs.createReadStream(filepath); diff --git a/api/server/services/Files/S3/crud.js b/api/server/services/Files/S3/crud.js index e685c8c8c..10c04106d 100644 --- a/api/server/services/Files/S3/crud.js +++ b/api/server/services/Files/S3/crud.js @@ -358,10 +358,10 @@ async function getNewS3URL(currentURL) { /** * Refreshes S3 URLs for an array of files if they're expired or close to expiring * - * @param {IMongoFile[]} files - Array of file documents + * @param {MongoFile[]} files - Array of file documents * @param {(files: MongoFile[]) => Promise} batchUpdateFiles - Function to update files in the database * @param {number} [bufferSeconds=3600] - Buffer time in seconds to check for expiration - * @returns {Promise} The files with refreshed URLs if needed + * @returns {Promise} The files with refreshed URLs if needed */ async function refreshS3FileUrls(files, batchUpdateFiles, bufferSeconds = 3600) { if (!files || !Array.isArray(files) || files.length === 0) { diff --git a/api/server/services/ToolService.js b/api/server/services/ToolService.js index 046d4e9bf..f3e7df5a7 100644 --- a/api/server/services/ToolService.js +++ b/api/server/services/ToolService.js @@ -16,13 +16,18 @@ const { validateAndParseOpenAPISpec, } = require('librechat-data-provider'); const { - loadActionSets, createActionTool, decryptMetadata, + loadActionSets, domainParser, } = require('./ActionService'); +const { + createOpenAIImageTools, + createYouTubeTools, + manifestToolMap, + toolkits, +} = require('~/app/clients/tools'); const { processFileURL, uploadImageBuffer } = require('~/server/services/Files/process'); -const { createYouTubeTools, manifestToolMap, toolkits } = require('~/app/clients/tools'); const { isActionDomainAllowed } = require('~/server/services/domains'); const { getEndpointsConfig } = require('~/server/services/Config'); const { recordUsage } = require('~/server/services/Threads'); @@ -104,7 +109,11 @@ function loadAndFormatTools({ directory, adminFilter = [], adminIncluded = [] }) } /** Basic Tools; schema: { input: string } */ - const basicToolInstances = [new Calculator(), ...createYouTubeTools({ override: true })]; + const basicToolInstances = [ + new Calculator(), + ...createOpenAIImageTools({ override: true }), + ...createYouTubeTools({ override: true }), + ]; for (const toolInstance of basicToolInstances) { const formattedTool = formatToOpenAIAssistantTool(toolInstance); let toolName = formattedTool[Tools.function].name; diff --git a/api/typedefs.js b/api/typedefs.js index 24dd29a93..0aab97c42 100644 --- a/api/typedefs.js +++ b/api/typedefs.js @@ -7,6 +7,11 @@ * @typedef {import('openai').OpenAI} OpenAI * @memberof typedefs */ +/** + * @exports OpenAIImagesResponse + * @typedef {Promise} OpenAIImagesResponse + * @memberof typedefs + */ /** * @exports ServerRequest @@ -14,6 +19,18 @@ * @memberof typedefs */ +/** + * @template T + * @typedef {ReadableStream | NodeJS.ReadableStream} NodeStream + * @memberof typedefs + */ + +/** + * @template T + * @typedef {(req: ServerRequest, filepath: string) => Promise>} NodeStreamDownloader + * @memberof typedefs + */ + /** * @exports ServerResponse * @typedef {import('express').Response} ServerResponse @@ -816,8 +833,9 @@ /** * @typedef {Partial & { * message?: string, - * signal?: AbortSignal - * memory?: ConversationSummaryBufferMemory + * signal?: AbortSignal, + * memory?: ConversationSummaryBufferMemory, + * tool_resources?: AgentToolResources, * }} LoadToolOptions * @memberof typedefs */ diff --git a/client/package.json b/client/package.json index 59e3dd831..7273ad9c4 100644 --- a/client/package.json +++ b/client/package.json @@ -86,7 +86,7 @@ "react-i18next": "^15.4.0", "react-lazy-load-image-component": "^1.6.0", "react-markdown": "^9.0.1", - "react-resizable-panels": "^2.1.7", + "react-resizable-panels": "^2.1.8", "react-router-dom": "^6.11.2", "react-speech-recognition": "^3.10.0", "react-textarea-autosize": "^8.4.0", diff --git a/client/public/assets/image_gen_oai.png b/client/public/assets/image_gen_oai.png new file mode 100644 index 000000000..e1762e709 Binary files /dev/null and b/client/public/assets/image_gen_oai.png differ diff --git a/client/src/common/types.ts b/client/src/common/types.ts index 1c0191477..7bbb78654 100644 --- a/client/src/common/types.ts +++ b/client/src/common/types.ts @@ -306,11 +306,14 @@ export type TAskProps = { export type TOptions = { editedMessageId?: string | null; editedText?: string | null; - isResubmission?: boolean; isRegenerate?: boolean; isContinued?: boolean; isEdited?: boolean; overrideMessages?: t.TMessage[]; + /** This value is only true when the user submits a message with "Save & Submit" for a user-created message */ + isResubmission?: boolean; + /** Currently only utilized when `isResubmission === true`, uses that message's currently attached files */ + overrideFiles?: t.TMessage['files']; }; export type TAskFunction = (props: TAskProps, options?: TOptions) => void; diff --git a/client/src/components/Artifacts/Artifact.tsx b/client/src/components/Artifacts/Artifact.tsx index d1bf22ef5..db193fe1e 100644 --- a/client/src/components/Artifacts/Artifact.tsx +++ b/client/src/components/Artifacts/Artifact.tsx @@ -34,6 +34,10 @@ export const artifactPlugin: Pluggable = () => { }; }; +const defaultTitle = 'untitled'; +const defaultType = 'unknown'; +const defaultIdentifier = 'lc-no-identifier'; + export function Artifact({ node, ...props @@ -58,15 +62,18 @@ export function Artifact({ const content = extractContent(props.children); logger.log('artifacts', 'updateArtifact: content.length', content.length); - const title = props.title ?? 'Untitled Artifact'; - const type = props.type ?? 'unknown'; - const identifier = props.identifier ?? 'no-identifier'; + const title = props.title ?? defaultTitle; + const type = props.type ?? defaultType; + const identifier = props.identifier ?? defaultIdentifier; const artifactKey = `${identifier}_${type}_${title}_${messageId}` .replace(/\s+/g, '_') .toLowerCase(); throttledUpdateRef.current(() => { const now = Date.now(); + if (artifactKey === `${defaultIdentifier}_${defaultType}_${defaultTitle}_${messageId}`) { + return; + } const currentArtifact: Artifact = { id: artifactKey, diff --git a/client/src/components/Artifacts/ArtifactButton.tsx b/client/src/components/Artifacts/ArtifactButton.tsx index d8fa55770..67082f490 100644 --- a/client/src/components/Artifacts/ArtifactButton.tsx +++ b/client/src/components/Artifacts/ArtifactButton.tsx @@ -1,4 +1,4 @@ -import { useSetRecoilState } from 'recoil'; +import { useSetRecoilState, useResetRecoilState } from 'recoil'; import type { Artifact } from '~/common'; import FilePreview from '~/components/Chat/Input/Files/FilePreview'; import { useLocalize } from '~/hooks'; @@ -8,7 +8,8 @@ import store from '~/store'; const ArtifactButton = ({ artifact }: { artifact: Artifact | null }) => { const localize = useLocalize(); const setVisible = useSetRecoilState(store.artifactsVisible); - const setArtifactId = useSetRecoilState(store.currentArtifactId); + const setCurrentArtifactId = useSetRecoilState(store.currentArtifactId); + const resetCurrentArtifactId = useResetRecoilState(store.currentArtifactId); if (artifact === null || artifact === undefined) { return null; } @@ -19,12 +20,15 @@ const ArtifactButton = ({ artifact }: { artifact: Artifact | null }) => {