mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-09 12:08:50 +01:00
👓 feat: Vision Support for Assistants (#2195)
* refactor(assistants/chat): use promises to speed up initialization, initialize shared variables, include `attachedFileIds` to streamRunManager * chore: additional typedefs * fix(OpenAIClient): handle edge case where attachments promise is resolved * feat: createVisionPrompt * feat: Vision Support for Assistants
This commit is contained in:
parent
1f0fb497f8
commit
798e8763d0
16 changed files with 376 additions and 100 deletions
|
|
@ -92,7 +92,11 @@ class OpenAIClient extends BaseClient {
|
|||
}
|
||||
|
||||
this.defaultVisionModel = this.options.visionModel ?? 'gpt-4-vision-preview';
|
||||
this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments));
|
||||
if (typeof this.options.attachments?.then === 'function') {
|
||||
this.options.attachments.then((attachments) => this.checkVisionRequest(attachments));
|
||||
} else {
|
||||
this.checkVisionRequest(this.options.attachments);
|
||||
}
|
||||
|
||||
const { OPENROUTER_API_KEY, OPENAI_FORCE_PROMPT } = process.env ?? {};
|
||||
if (OPENROUTER_API_KEY && !this.azure) {
|
||||
|
|
|
|||
34
api/app/clients/prompts/createVisionPrompt.js
Normal file
34
api/app/clients/prompts/createVisionPrompt.js
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
/**
|
||||
* Generates a prompt instructing the user to describe an image in detail, tailored to different types of visual content.
|
||||
* @param {boolean} pluralized - Whether to pluralize the prompt for multiple images.
|
||||
* @returns {string} - The generated vision prompt.
|
||||
*/
|
||||
const createVisionPrompt = (pluralized = false) => {
|
||||
return `Please describe the image${
|
||||
pluralized ? 's' : ''
|
||||
} in detail, covering relevant aspects such as:
|
||||
|
||||
For photographs, illustrations, or artwork:
|
||||
- The main subject(s) and their appearance, positioning, and actions
|
||||
- The setting, background, and any notable objects or elements
|
||||
- Colors, lighting, and overall mood or atmosphere
|
||||
- Any interesting details, textures, or patterns
|
||||
- The style, technique, or medium used (if discernible)
|
||||
|
||||
For screenshots or images containing text:
|
||||
- The content and purpose of the text
|
||||
- The layout, formatting, and organization of the information
|
||||
- Any notable visual elements, such as logos, icons, or graphics
|
||||
- The overall context or message conveyed by the screenshot
|
||||
|
||||
For graphs, charts, or data visualizations:
|
||||
- The type of graph or chart (e.g., bar graph, line chart, pie chart)
|
||||
- The variables being compared or analyzed
|
||||
- Any trends, patterns, or outliers in the data
|
||||
- The axis labels, scales, and units of measurement
|
||||
- The title, legend, and any additional context provided
|
||||
|
||||
Be as specific and descriptive as possible while maintaining clarity and concision.`;
|
||||
};
|
||||
|
||||
module.exports = createVisionPrompt;
|
||||
|
|
@ -4,6 +4,7 @@ const handleInputs = require('./handleInputs');
|
|||
const instructions = require('./instructions');
|
||||
const titlePrompts = require('./titlePrompts');
|
||||
const truncateText = require('./truncateText');
|
||||
const createVisionPrompt = require('./createVisionPrompt');
|
||||
const createContextHandlers = require('./createContextHandlers');
|
||||
|
||||
module.exports = {
|
||||
|
|
@ -13,5 +14,6 @@ module.exports = {
|
|||
...instructions,
|
||||
...titlePrompts,
|
||||
truncateText,
|
||||
createVisionPrompt,
|
||||
createContextHandlers,
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue