LibreChat/api/app/clients/tools/DALL-E.js
Danny Avila ea1dd59ef4
refactor(api): Central Logging 📜 (#1348)
* WIP: initial logging changes
add several transports in ~/config/winston
omit messages in logs, truncate long strings
add short blurb in dotenv for debug logging
GoogleClient: using logger
OpenAIClient: using logger, handleOpenAIErrors
Adding typedef for payload message
bumped winston and using winston-daily-rotate-file
moved config for server paths to ~/config dir
Added `DEBUG_LOGGING=true` to .env.example

* WIP: Refactor logging statements in code

* WIP: Refactor logging statements and import configurations

* WIP: Refactor logging statements and import configurations

* refactor: broadcast Redis initialization message with `info` not `debug`

* refactor: complete Refactor logging statements and import configurations

* chore: delete unused tools

* fix: circular dependencies due to accessing logger

* refactor(handleText): handle booleans and write tests

* refactor: redact sensitive values, better formatting

* chore: improve log formatting, avoid passing strings to 2nd arg

* fix(ci): fix jest tests due to logger changes

* refactor(getAvailablePluginsController): cache plugins as they are static and avoids async addOpenAPISpecs call every time

* chore: update docs

* chore: update docs

* chore: create separate meiliSync logger, clean up logs to avoid being unnecessarily verbose

* chore: spread objects where they are commonly logged to allow string truncation

* chore: improve error log formatting
2023-12-14 07:49:27 -05:00

135 lines
4.7 KiB
JavaScript

// From https://platform.openai.com/docs/api-reference/images/create
// To use this tool, you must pass in a configured OpenAIApi object.
const fs = require('fs');
const path = require('path');
const OpenAI = require('openai');
// const { genAzureEndpoint } = require('~/utils/genAzureEndpoints');
const { Tool } = require('langchain/tools');
const { HttpsProxyAgent } = require('https-proxy-agent');
const extractBaseURL = require('~/utils/extractBaseURL');
const saveImageFromUrl = require('./saveImageFromUrl');
const { logger } = require('~/config');
const { DALLE_REVERSE_PROXY, PROXY } = process.env;
class OpenAICreateImage extends Tool {
constructor(fields = {}) {
super();
let apiKey = fields.DALLE_API_KEY || this.getApiKey();
const config = { apiKey };
if (DALLE_REVERSE_PROXY) {
config.baseURL = extractBaseURL(DALLE_REVERSE_PROXY);
}
if (PROXY) {
config.httpAgent = new HttpsProxyAgent(PROXY);
}
// let azureKey = fields.AZURE_API_KEY || process.env.AZURE_API_KEY;
// if (azureKey) {
// apiKey = azureKey;
// const azureConfig = {
// apiKey,
// azureOpenAIApiInstanceName: process.env.AZURE_OPENAI_API_INSTANCE_NAME || fields.azureOpenAIApiInstanceName,
// azureOpenAIApiDeploymentName: process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME || fields.azureOpenAIApiDeploymentName,
// azureOpenAIApiVersion: process.env.AZURE_OPENAI_API_VERSION || fields.azureOpenAIApiVersion
// };
// config = {
// apiKey,
// basePath: genAzureEndpoint({
// ...azureConfig,
// }),
// baseOptions: {
// headers: { 'api-key': apiKey },
// params: {
// 'api-version': azureConfig.azureOpenAIApiVersion // this might change. I got the current value from the sample code at https://oai.azure.com/portal/chat
// }
// }
// };
// }
this.openai = new OpenAI(config);
this.name = 'dall-e';
this.description = `You can generate images with 'dall-e'. This tool is exclusively for visual content.
Guidelines:
- Visually describe the moods, details, structures, styles, and/or proportions of the image. Remember, the focus is on visual attributes.
- Craft your input by "showing" and not "telling" the imagery. Think in terms of what you'd want to see in a photograph or a painting.
- It's best to follow this format for image creation. Come up with the optional inputs yourself if none are given:
"Subject: [subject], Style: [style], Color: [color], Details: [details], Emotion: [emotion]"
- Generate images only once per human query unless explicitly requested by the user`;
}
getApiKey() {
const apiKey = process.env.DALLE_API_KEY || '';
if (!apiKey) {
throw new Error('Missing DALLE_API_KEY environment variable.');
}
return apiKey;
}
replaceUnwantedChars(inputString) {
return inputString
.replace(/\r\n|\r|\n/g, ' ')
.replace(/"/g, '')
.trim();
}
getMarkdownImageUrl(imageName) {
const imageUrl = path
.join(this.relativeImageUrl, imageName)
.replace(/\\/g, '/')
.replace('public/', '');
return `![generated image](/${imageUrl})`;
}
async _call(input) {
const resp = await this.openai.images.generate({
prompt: this.replaceUnwantedChars(input),
// TODO: Future idea -- could we ask an LLM to extract these arguments from an input that might contain them?
n: 1,
// size: '1024x1024'
size: '512x512',
});
const theImageUrl = resp.data[0].url;
if (!theImageUrl) {
throw new Error('No image URL returned from OpenAI API.');
}
const regex = /img-[\w\d]+.png/;
const match = theImageUrl.match(regex);
let imageName = '1.png';
if (match) {
imageName = match[0];
logger.debug('[DALL-E]', { imageName }); // Output: img-lgCf7ppcbhqQrz6a5ear6FOb.png
} else {
logger.debug('[DALL-E] No image name found in the string.', {
theImageUrl,
data: resp.data[0],
});
}
this.outputPath = path.resolve(__dirname, '..', '..', '..', '..', 'client', 'public', 'images');
const appRoot = path.resolve(__dirname, '..', '..', '..', '..', 'client');
this.relativeImageUrl = path.relative(appRoot, this.outputPath);
// Check if directory exists, if not create it
if (!fs.existsSync(this.outputPath)) {
fs.mkdirSync(this.outputPath, { recursive: true });
}
try {
await saveImageFromUrl(theImageUrl, this.outputPath, imageName);
this.result = this.getMarkdownImageUrl(imageName);
} catch (error) {
logger.error('Error while saving the DALL-E image:', error);
this.result = theImageUrl;
}
return this.result;
}
}
module.exports = OpenAICreateImage;