LibreChat/api/app/clients/tools/structured/DALLE3.js
Danny Avila 1a815f5e19
🎉 feat: Code Interpreter API and Agents Release (#4860)
* feat: Code Interpreter API & File Search Agent Uploads

chore: add back code files

wip: first pass, abstract key dialog

refactor: influence checkbox on key changes

refactor: update localization keys for 'execute code' to 'run code'

wip: run code button

refactor: add throwError parameter to loadAuthValues and getUserPluginAuthValue functions

feat: first pass, API tool calling

fix: handle missing toolId in callTool function and return 404 for non-existent tools

feat: show code outputs

fix: improve error handling in callTool function and log errors

fix: handle potential null value for filepath in attachment destructuring

fix: normalize language before rendering and prevent null return

fix: add loading indicator in RunCode component while executing code

feat: add support for conditional code execution in Markdown components

feat: attachments

refactor: remove bash

fix: pass abort signal to graph/run

refactor: debounce and rate limit tool call

refactor: increase debounce delay for execute function

feat: set code output attachments

feat: image attachments

refactor: apply message context

refactor: pass `partIndex`

feat: toolCall schema/model/methods

feat: block indexing

feat: get tool calls

chore: imports

chore: typing

chore: condense type imports

feat: get tool calls

fix: block indexing

chore: typing

refactor: update tool calls mapping to support multiple results

fix: add unique key to nav link for rendering

wip: first pass, tool call results

refactor: update query cache from successful tool call mutation

style: improve result switcher styling

chore: note on using \`.toObject()\`

feat: add agent_id field to conversation schema

chore: typing

refactor: rename agentMap to agentsMap for consistency

feat: Agent Name as chat input placeholder

chore: bump agents

📦 chore: update @langchain dependencies to latest versions to match agents package

📦 chore: update @librechat/agents dependency to version 1.8.0

fix: Aborting agent stream removes sender; fix(bedrock): completion removes preset name label

refactor: remove direct file parameter to use req.file, add `processAgentFileUpload` for image uploads

feat: upload menu

feat: prime message_file resources

feat: implement conversation access validation in chat route

refactor: remove file parameter from processFileUpload and use req.file instead

feat: add savedMessageIds set to track saved message IDs in BaseClient, to prevent unnecessary double-write to db

feat: prevent duplicate message saves by checking savedMessageIds in AgentController

refactor: skip legacy RAG API handling for agents

feat: add files field to convoSchema

refactor: update request type annotations from Express.Request to ServerRequest in file processing functions

feat: track conversation files

fix: resendFiles, addPreviousAttachments handling

feat: add ID validation for session_id and file_id in download route

feat: entity_id for code file uploads/downloads

fix: code file edge cases

feat: delete related tool calls

feat: add stream rate handling for LLM configuration

feat: enhance system content with attached file information

fix: improve error logging in resource priming function

* WIP: PoC, sequential agents

WIP: PoC Sequential Agents, first pass content data + bump agents package

fix: package-lock

WIP: PoC, o1 support, refactor bufferString

feat: convertJsonSchemaToZod

fix: form issues and schema defining erroneous model

fix: max length issue on agent form instructions, limit conversation messages to sequential agents

feat: add abort signal support to createRun function and AgentClient

feat: PoC, hide prior sequential agent steps

fix: update parameter naming from config to metadata in event handlers for clarity, add model to usage data

refactor: use only last contentData, track model for usage data

chore: bump agents package

fix: content parts issue

refactor: filter contentParts to include tool calls and relevant indices

feat: show function calls

refactor: filter context messages to exclude tool calls when no tools are available to the agent

fix: ensure tool call content is not undefined in formatMessages

feat: add agent_id field to conversationPreset schema

feat: hide sequential agents

feat: increase upload toast duration to 10 seconds

* refactor: tool context handling & update Code API Key Dialog

feat: toolContextMap

chore: skipSpecs -> useSpecs

ci: fix handleTools tests

feat: API Key Dialog

* feat: Agent Permissions Admin Controls

feat: replace label with button for prompt permission toggle

feat: update agent permissions

feat: enable experimental agents and streamline capability configuration

feat: implement access control for agents and enhance endpoint menu items

feat: add welcome message for agent selection in localization

feat: add agents permission to access control and update version to 0.7.57

* fix: update types in useAssistantListMap and useMentions hooks for better null handling

* feat: mention agents

* fix: agent tool resource race conditions when deleting agent tool resource files

* feat: add error handling for code execution with user feedback

* refactor: rename AdminControls to AdminSettings for clarity

* style: add gap to button in AdminSettings for improved layout

* refactor: separate agent query hooks and check access to enable fetching

* fix: remove unused provider from agent initialization options, creates issue with custom endpoints

* refactor: remove redundant/deprecated modelOptions from AgentClient processes

* chore: update @librechat/agents to version 1.8.5 in package.json and package-lock.json

* fix: minor styling issues + agent panel uniformity

* fix: agent edge cases when set endpoint is no longer defined

* refactor: remove unused cleanup function call from AppService

* fix: update link in ApiKeyDialog to point to pricing page

* fix: improve type handling and layout calculations in SidePanel component

* fix: add missing localization string for agent selection in SidePanel

* chore: form styling and localizations for upload filesearch/code interpreter

* fix: model selection placeholder logic in AgentConfig component

* style: agent capabilities

* fix: add localization for provider selection and improve dropdown styling in ModelPanel

* refactor: use gpt-4o-mini > gpt-3.5-turbo

* fix: agents configuration for loadDefaultInterface and update related tests

* feat: DALLE Agents support
2024-12-04 15:48:13 -05:00

202 lines
8.8 KiB
JavaScript

const { z } = require('zod');
const path = require('path');
const OpenAI = require('openai');
const { v4: uuidv4 } = require('uuid');
const { Tool } = require('@langchain/core/tools');
const { HttpsProxyAgent } = require('https-proxy-agent');
const { FileContext } = require('librechat-data-provider');
const { getImageBasename } = require('~/server/services/Files/images');
const extractBaseURL = require('~/utils/extractBaseURL');
const { logger } = require('~/config');
class DALLE3 extends Tool {
constructor(fields = {}) {
super();
/** @type {boolean} Used to initialize the Tool without necessary variables. */
this.override = fields.override ?? false;
/** @type {boolean} Necessary for output to contain all image metadata. */
this.returnMetadata = fields.returnMetadata ?? false;
this.userId = fields.userId;
this.fileStrategy = fields.fileStrategy;
/** @type {boolean} */
this.isAgent = fields.isAgent;
if (fields.processFileURL) {
/** @type {processFileURL} Necessary for output to contain all image metadata. */
this.processFileURL = fields.processFileURL.bind(this);
}
let apiKey = fields.DALLE3_API_KEY ?? fields.DALLE_API_KEY ?? this.getApiKey();
const config = { apiKey };
if (process.env.DALLE_REVERSE_PROXY) {
config.baseURL = extractBaseURL(process.env.DALLE_REVERSE_PROXY);
}
if (process.env.DALLE3_AZURE_API_VERSION && process.env.DALLE3_BASEURL) {
config.baseURL = process.env.DALLE3_BASEURL;
config.defaultQuery = { 'api-version': process.env.DALLE3_AZURE_API_VERSION };
config.defaultHeaders = {
'api-key': process.env.DALLE3_API_KEY,
'Content-Type': 'application/json',
};
config.apiKey = process.env.DALLE3_API_KEY;
}
if (process.env.PROXY) {
config.httpAgent = new HttpsProxyAgent(process.env.PROXY);
}
/** @type {OpenAI} */
this.openai = new OpenAI(config);
this.name = 'dalle';
this.description = `Use DALLE to create images from text descriptions.
- It requires prompts to be in English, detailed, and to specify image type and human features for diversity.
- Create only one image, without repeating or listing descriptions outside the "prompts" field.
- Maintains the original intent of the description, with parameters for image style, quality, and size to tailor the output.`;
this.description_for_model =
process.env.DALLE3_SYSTEM_PROMPT ??
`// Whenever a description of an image is given, generate prompts (following these rules), and use dalle to create the image. If the user does not ask for a specific number of images, default to creating 2 prompts to send to dalle that are written to be as diverse as possible. All prompts sent to dalle must abide by the following policies:
// 1. Prompts must be in English. Translate to English if needed.
// 2. One image per function call. Create only 1 image per request unless explicitly told to generate more than 1 image.
// 3. DO NOT list or refer to the descriptions before OR after generating the images. They should ONLY ever be written out ONCE, in the \`"prompts"\` field of the request. You do not need to ask for permission to generate, just do it!
// 4. Always mention the image type (photo, oil painting, watercolor painting, illustration, cartoon, drawing, vector, render, etc.) at the beginning of the caption. Unless the captions suggests otherwise, make one of the images a photo.
// 5. Diversify depictions of ALL images with people to always include always DESCENT and GENDER for EACH person using direct terms. Adjust only human descriptions.
// - EXPLICITLY specify these attributes, not abstractly reference them. The attributes should be specified in a minimal way and should directly describe their physical form.
// - Your choices should be grounded in reality. For example, all of a given OCCUPATION should not be the same gender or race. Additionally, focus on creating diverse, inclusive, and exploratory scenes via the properties you choose during rewrites. Make choices that may be insightful or unique sometimes.
// - Use "various" or "diverse" ONLY IF the description refers to groups of more than 3 people. Do not change the number of people requested in the original description.
// - Don't alter memes, fictional character origins, or unseen people. Maintain the original prompt's intent and prioritize quality.
// The prompt must intricately describe every part of the image in concrete, objective detail. THINK about what the end goal of the description is, and extrapolate that to what would make satisfying images.
// All descriptions sent to dalle should be a paragraph of text that is extremely descriptive and detailed. Each should be more than 3 sentences long.
// - The "vivid" style is HIGHLY preferred, but "natural" is also supported.`;
this.schema = z.object({
prompt: z
.string()
.max(4000)
.describe(
'A text description of the desired image, following the rules, up to 4000 characters.',
),
style: z
.enum(['vivid', 'natural'])
.describe(
'Must be one of `vivid` or `natural`. `vivid` generates hyper-real and dramatic images, `natural` produces more natural, less hyper-real looking images',
),
quality: z
.enum(['hd', 'standard'])
.describe('The quality of the generated image. Only `hd` and `standard` are supported.'),
size: z
.enum(['1024x1024', '1792x1024', '1024x1792'])
.describe(
'The size of the requested image. Use 1024x1024 (square) as the default, 1792x1024 if the user requests a wide image, and 1024x1792 for full-body portraits. Always include this parameter in the request.',
),
});
}
getApiKey() {
const apiKey = process.env.DALLE3_API_KEY ?? process.env.DALLE_API_KEY ?? '';
if (!apiKey && !this.override) {
throw new Error('Missing DALLE_API_KEY environment variable.');
}
return apiKey;
}
replaceUnwantedChars(inputString) {
return inputString
.replace(/\r\n|\r|\n/g, ' ')
.replace(/"/g, '')
.trim();
}
wrapInMarkdown(imageUrl) {
return `![generated image](${imageUrl})`;
}
returnValue(value) {
if (this.isAgent === true && typeof value === 'string') {
return [value, {}];
} else if (this.isAgent === true && typeof value === 'object') {
return [
'DALL-E displayed an image. All generated images are already plainly visible, so don\'t repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.',
value,
];
}
return value;
}
async _call(data) {
const { prompt, quality = 'standard', size = '1024x1024', style = 'vivid' } = data;
if (!prompt) {
throw new Error('Missing required field: prompt');
}
let resp;
try {
resp = await this.openai.images.generate({
model: 'dall-e-3',
quality,
style,
size,
prompt: this.replaceUnwantedChars(prompt),
n: 1,
});
} catch (error) {
logger.error('[DALL-E-3] Problem generating the image:', error);
return this
.returnValue(`Something went wrong when trying to generate the image. The DALL-E API may be unavailable:
Error Message: ${error.message}`);
}
if (!resp) {
return this.returnValue(
'Something went wrong when trying to generate the image. The DALL-E API may be unavailable',
);
}
const theImageUrl = resp.data[0].url;
if (!theImageUrl) {
return this.returnValue(
'No image URL returned from OpenAI API. There may be a problem with the API or your configuration.',
);
}
const imageBasename = getImageBasename(theImageUrl);
const imageExt = path.extname(imageBasename);
const extension = imageExt.startsWith('.') ? imageExt.slice(1) : imageExt;
const imageName = `img-${uuidv4()}.${extension}`;
logger.debug('[DALL-E-3]', {
imageName,
imageBasename,
imageExt,
extension,
theImageUrl,
data: resp.data[0],
});
try {
const result = await this.processFileURL({
URL: theImageUrl,
basePath: 'images',
userId: this.userId,
fileName: imageName,
fileStrategy: this.fileStrategy,
context: FileContext.image_generation,
});
if (this.returnMetadata) {
this.result = result;
} else {
this.result = this.wrapInMarkdown(result.filepath);
}
} catch (error) {
logger.error('Error while saving the image:', error);
this.result = `Failed to save the image locally. ${error.message}`;
}
return this.returnValue(this.result);
}
}
module.exports = DALLE3;