mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-22 08:12:00 +02:00

* feat: Code Interpreter API & File Search Agent Uploads chore: add back code files wip: first pass, abstract key dialog refactor: influence checkbox on key changes refactor: update localization keys for 'execute code' to 'run code' wip: run code button refactor: add throwError parameter to loadAuthValues and getUserPluginAuthValue functions feat: first pass, API tool calling fix: handle missing toolId in callTool function and return 404 for non-existent tools feat: show code outputs fix: improve error handling in callTool function and log errors fix: handle potential null value for filepath in attachment destructuring fix: normalize language before rendering and prevent null return fix: add loading indicator in RunCode component while executing code feat: add support for conditional code execution in Markdown components feat: attachments refactor: remove bash fix: pass abort signal to graph/run refactor: debounce and rate limit tool call refactor: increase debounce delay for execute function feat: set code output attachments feat: image attachments refactor: apply message context refactor: pass `partIndex` feat: toolCall schema/model/methods feat: block indexing feat: get tool calls chore: imports chore: typing chore: condense type imports feat: get tool calls fix: block indexing chore: typing refactor: update tool calls mapping to support multiple results fix: add unique key to nav link for rendering wip: first pass, tool call results refactor: update query cache from successful tool call mutation style: improve result switcher styling chore: note on using \`.toObject()\` feat: add agent_id field to conversation schema chore: typing refactor: rename agentMap to agentsMap for consistency feat: Agent Name as chat input placeholder chore: bump agents 📦 chore: update @langchain dependencies to latest versions to match agents package 📦 chore: update @librechat/agents dependency to version 1.8.0 fix: Aborting agent stream removes sender; fix(bedrock): completion removes preset name label refactor: remove direct file parameter to use req.file, add `processAgentFileUpload` for image uploads feat: upload menu feat: prime message_file resources feat: implement conversation access validation in chat route refactor: remove file parameter from processFileUpload and use req.file instead feat: add savedMessageIds set to track saved message IDs in BaseClient, to prevent unnecessary double-write to db feat: prevent duplicate message saves by checking savedMessageIds in AgentController refactor: skip legacy RAG API handling for agents feat: add files field to convoSchema refactor: update request type annotations from Express.Request to ServerRequest in file processing functions feat: track conversation files fix: resendFiles, addPreviousAttachments handling feat: add ID validation for session_id and file_id in download route feat: entity_id for code file uploads/downloads fix: code file edge cases feat: delete related tool calls feat: add stream rate handling for LLM configuration feat: enhance system content with attached file information fix: improve error logging in resource priming function * WIP: PoC, sequential agents WIP: PoC Sequential Agents, first pass content data + bump agents package fix: package-lock WIP: PoC, o1 support, refactor bufferString feat: convertJsonSchemaToZod fix: form issues and schema defining erroneous model fix: max length issue on agent form instructions, limit conversation messages to sequential agents feat: add abort signal support to createRun function and AgentClient feat: PoC, hide prior sequential agent steps fix: update parameter naming from config to metadata in event handlers for clarity, add model to usage data refactor: use only last contentData, track model for usage data chore: bump agents package fix: content parts issue refactor: filter contentParts to include tool calls and relevant indices feat: show function calls refactor: filter context messages to exclude tool calls when no tools are available to the agent fix: ensure tool call content is not undefined in formatMessages feat: add agent_id field to conversationPreset schema feat: hide sequential agents feat: increase upload toast duration to 10 seconds * refactor: tool context handling & update Code API Key Dialog feat: toolContextMap chore: skipSpecs -> useSpecs ci: fix handleTools tests feat: API Key Dialog * feat: Agent Permissions Admin Controls feat: replace label with button for prompt permission toggle feat: update agent permissions feat: enable experimental agents and streamline capability configuration feat: implement access control for agents and enhance endpoint menu items feat: add welcome message for agent selection in localization feat: add agents permission to access control and update version to 0.7.57 * fix: update types in useAssistantListMap and useMentions hooks for better null handling * feat: mention agents * fix: agent tool resource race conditions when deleting agent tool resource files * feat: add error handling for code execution with user feedback * refactor: rename AdminControls to AdminSettings for clarity * style: add gap to button in AdminSettings for improved layout * refactor: separate agent query hooks and check access to enable fetching * fix: remove unused provider from agent initialization options, creates issue with custom endpoints * refactor: remove redundant/deprecated modelOptions from AgentClient processes * chore: update @librechat/agents to version 1.8.5 in package.json and package-lock.json * fix: minor styling issues + agent panel uniformity * fix: agent edge cases when set endpoint is no longer defined * refactor: remove unused cleanup function call from AppService * fix: update link in ApiKeyDialog to point to pricing page * fix: improve type handling and layout calculations in SidePanel component * fix: add missing localization string for agent selection in SidePanel * chore: form styling and localizations for upload filesearch/code interpreter * fix: model selection placeholder logic in AgentConfig component * style: agent capabilities * fix: add localization for provider selection and improve dropdown styling in ModelPanel * refactor: use gpt-4o-mini > gpt-3.5-turbo * fix: agents configuration for loadDefaultInterface and update related tests * feat: DALLE Agents support
202 lines
8.8 KiB
JavaScript
202 lines
8.8 KiB
JavaScript
const { z } = require('zod');
|
|
const path = require('path');
|
|
const OpenAI = require('openai');
|
|
const { v4: uuidv4 } = require('uuid');
|
|
const { Tool } = require('@langchain/core/tools');
|
|
const { HttpsProxyAgent } = require('https-proxy-agent');
|
|
const { FileContext } = require('librechat-data-provider');
|
|
const { getImageBasename } = require('~/server/services/Files/images');
|
|
const extractBaseURL = require('~/utils/extractBaseURL');
|
|
const { logger } = require('~/config');
|
|
|
|
class DALLE3 extends Tool {
|
|
constructor(fields = {}) {
|
|
super();
|
|
/** @type {boolean} Used to initialize the Tool without necessary variables. */
|
|
this.override = fields.override ?? false;
|
|
/** @type {boolean} Necessary for output to contain all image metadata. */
|
|
this.returnMetadata = fields.returnMetadata ?? false;
|
|
|
|
this.userId = fields.userId;
|
|
this.fileStrategy = fields.fileStrategy;
|
|
/** @type {boolean} */
|
|
this.isAgent = fields.isAgent;
|
|
if (fields.processFileURL) {
|
|
/** @type {processFileURL} Necessary for output to contain all image metadata. */
|
|
this.processFileURL = fields.processFileURL.bind(this);
|
|
}
|
|
|
|
let apiKey = fields.DALLE3_API_KEY ?? fields.DALLE_API_KEY ?? this.getApiKey();
|
|
const config = { apiKey };
|
|
if (process.env.DALLE_REVERSE_PROXY) {
|
|
config.baseURL = extractBaseURL(process.env.DALLE_REVERSE_PROXY);
|
|
}
|
|
|
|
if (process.env.DALLE3_AZURE_API_VERSION && process.env.DALLE3_BASEURL) {
|
|
config.baseURL = process.env.DALLE3_BASEURL;
|
|
config.defaultQuery = { 'api-version': process.env.DALLE3_AZURE_API_VERSION };
|
|
config.defaultHeaders = {
|
|
'api-key': process.env.DALLE3_API_KEY,
|
|
'Content-Type': 'application/json',
|
|
};
|
|
config.apiKey = process.env.DALLE3_API_KEY;
|
|
}
|
|
|
|
if (process.env.PROXY) {
|
|
config.httpAgent = new HttpsProxyAgent(process.env.PROXY);
|
|
}
|
|
|
|
/** @type {OpenAI} */
|
|
this.openai = new OpenAI(config);
|
|
this.name = 'dalle';
|
|
this.description = `Use DALLE to create images from text descriptions.
|
|
- It requires prompts to be in English, detailed, and to specify image type and human features for diversity.
|
|
- Create only one image, without repeating or listing descriptions outside the "prompts" field.
|
|
- Maintains the original intent of the description, with parameters for image style, quality, and size to tailor the output.`;
|
|
this.description_for_model =
|
|
process.env.DALLE3_SYSTEM_PROMPT ??
|
|
`// Whenever a description of an image is given, generate prompts (following these rules), and use dalle to create the image. If the user does not ask for a specific number of images, default to creating 2 prompts to send to dalle that are written to be as diverse as possible. All prompts sent to dalle must abide by the following policies:
|
|
// 1. Prompts must be in English. Translate to English if needed.
|
|
// 2. One image per function call. Create only 1 image per request unless explicitly told to generate more than 1 image.
|
|
// 3. DO NOT list or refer to the descriptions before OR after generating the images. They should ONLY ever be written out ONCE, in the \`"prompts"\` field of the request. You do not need to ask for permission to generate, just do it!
|
|
// 4. Always mention the image type (photo, oil painting, watercolor painting, illustration, cartoon, drawing, vector, render, etc.) at the beginning of the caption. Unless the captions suggests otherwise, make one of the images a photo.
|
|
// 5. Diversify depictions of ALL images with people to always include always DESCENT and GENDER for EACH person using direct terms. Adjust only human descriptions.
|
|
// - EXPLICITLY specify these attributes, not abstractly reference them. The attributes should be specified in a minimal way and should directly describe their physical form.
|
|
// - Your choices should be grounded in reality. For example, all of a given OCCUPATION should not be the same gender or race. Additionally, focus on creating diverse, inclusive, and exploratory scenes via the properties you choose during rewrites. Make choices that may be insightful or unique sometimes.
|
|
// - Use "various" or "diverse" ONLY IF the description refers to groups of more than 3 people. Do not change the number of people requested in the original description.
|
|
// - Don't alter memes, fictional character origins, or unseen people. Maintain the original prompt's intent and prioritize quality.
|
|
// The prompt must intricately describe every part of the image in concrete, objective detail. THINK about what the end goal of the description is, and extrapolate that to what would make satisfying images.
|
|
// All descriptions sent to dalle should be a paragraph of text that is extremely descriptive and detailed. Each should be more than 3 sentences long.
|
|
// - The "vivid" style is HIGHLY preferred, but "natural" is also supported.`;
|
|
this.schema = z.object({
|
|
prompt: z
|
|
.string()
|
|
.max(4000)
|
|
.describe(
|
|
'A text description of the desired image, following the rules, up to 4000 characters.',
|
|
),
|
|
style: z
|
|
.enum(['vivid', 'natural'])
|
|
.describe(
|
|
'Must be one of `vivid` or `natural`. `vivid` generates hyper-real and dramatic images, `natural` produces more natural, less hyper-real looking images',
|
|
),
|
|
quality: z
|
|
.enum(['hd', 'standard'])
|
|
.describe('The quality of the generated image. Only `hd` and `standard` are supported.'),
|
|
size: z
|
|
.enum(['1024x1024', '1792x1024', '1024x1792'])
|
|
.describe(
|
|
'The size of the requested image. Use 1024x1024 (square) as the default, 1792x1024 if the user requests a wide image, and 1024x1792 for full-body portraits. Always include this parameter in the request.',
|
|
),
|
|
});
|
|
}
|
|
|
|
getApiKey() {
|
|
const apiKey = process.env.DALLE3_API_KEY ?? process.env.DALLE_API_KEY ?? '';
|
|
if (!apiKey && !this.override) {
|
|
throw new Error('Missing DALLE_API_KEY environment variable.');
|
|
}
|
|
return apiKey;
|
|
}
|
|
|
|
replaceUnwantedChars(inputString) {
|
|
return inputString
|
|
.replace(/\r\n|\r|\n/g, ' ')
|
|
.replace(/"/g, '')
|
|
.trim();
|
|
}
|
|
|
|
wrapInMarkdown(imageUrl) {
|
|
return ``;
|
|
}
|
|
|
|
returnValue(value) {
|
|
if (this.isAgent === true && typeof value === 'string') {
|
|
return [value, {}];
|
|
} else if (this.isAgent === true && typeof value === 'object') {
|
|
return [
|
|
'DALL-E displayed an image. All generated images are already plainly visible, so don\'t repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.',
|
|
value,
|
|
];
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
async _call(data) {
|
|
const { prompt, quality = 'standard', size = '1024x1024', style = 'vivid' } = data;
|
|
if (!prompt) {
|
|
throw new Error('Missing required field: prompt');
|
|
}
|
|
|
|
let resp;
|
|
try {
|
|
resp = await this.openai.images.generate({
|
|
model: 'dall-e-3',
|
|
quality,
|
|
style,
|
|
size,
|
|
prompt: this.replaceUnwantedChars(prompt),
|
|
n: 1,
|
|
});
|
|
} catch (error) {
|
|
logger.error('[DALL-E-3] Problem generating the image:', error);
|
|
return this
|
|
.returnValue(`Something went wrong when trying to generate the image. The DALL-E API may be unavailable:
|
|
Error Message: ${error.message}`);
|
|
}
|
|
|
|
if (!resp) {
|
|
return this.returnValue(
|
|
'Something went wrong when trying to generate the image. The DALL-E API may be unavailable',
|
|
);
|
|
}
|
|
|
|
const theImageUrl = resp.data[0].url;
|
|
|
|
if (!theImageUrl) {
|
|
return this.returnValue(
|
|
'No image URL returned from OpenAI API. There may be a problem with the API or your configuration.',
|
|
);
|
|
}
|
|
|
|
const imageBasename = getImageBasename(theImageUrl);
|
|
const imageExt = path.extname(imageBasename);
|
|
|
|
const extension = imageExt.startsWith('.') ? imageExt.slice(1) : imageExt;
|
|
const imageName = `img-${uuidv4()}.${extension}`;
|
|
|
|
logger.debug('[DALL-E-3]', {
|
|
imageName,
|
|
imageBasename,
|
|
imageExt,
|
|
extension,
|
|
theImageUrl,
|
|
data: resp.data[0],
|
|
});
|
|
|
|
try {
|
|
const result = await this.processFileURL({
|
|
URL: theImageUrl,
|
|
basePath: 'images',
|
|
userId: this.userId,
|
|
fileName: imageName,
|
|
fileStrategy: this.fileStrategy,
|
|
context: FileContext.image_generation,
|
|
});
|
|
|
|
if (this.returnMetadata) {
|
|
this.result = result;
|
|
} else {
|
|
this.result = this.wrapInMarkdown(result.filepath);
|
|
}
|
|
} catch (error) {
|
|
logger.error('Error while saving the image:', error);
|
|
this.result = `Failed to save the image locally. ${error.message}`;
|
|
}
|
|
|
|
return this.returnValue(this.result);
|
|
}
|
|
}
|
|
|
|
module.exports = DALLE3;
|