mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-05 17:21:50 +01:00
feat: Vision Support + New UI (#1203)
* feat: add timer duration to showToast, show toast for preset selection * refactor: replace old /chat/ route with /c/. e2e tests will fail here * refactor: move typedefs to root of /api/ and add a few to assistant types in TS * refactor: reorganize data-provider imports, fix dependency cycle, strategize new plan to separate react dependent packages * feat: add dataService for uploading images * feat(data-provider): add mutation keys * feat: file resizing and upload * WIP: initial API image handling * fix: catch JSON.parse of localStorage tools * chore: experimental: use module-alias for absolute imports * refactor: change temp_file_id strategy * fix: updating files state by using Map and defining react query callbacks in a way that keeps them during component unmount, initial delete handling * feat: properly handle file deletion * refactor: unexpose complete filepath and resize from server for higher fidelity * fix: make sure resized height, width is saved, catch bad requests * refactor: use absolute imports * fix: prevent setOptions from being called more than once for OpenAIClient, made note to fix for PluginsClient * refactor: import supportsFiles and models vars from schemas * fix: correctly replace temp file id * refactor(BaseClient): use absolute imports, pass message 'opts' to buildMessages method, count tokens for nested objects/arrays * feat: add validateVisionModel to determine if model has vision capabilities * chore(checkBalance): update jsdoc * feat: formatVisionMessage: change message content format dependent on role and image_urls passed * refactor: add usage to File schema, make create and updateFile, correctly set and remove TTL * feat: working vision support TODO: file size, type, amount validations, making sure they are styled right, and making sure you can add images from the clipboard/dragging * feat: clipboard support for uploading images * feat: handle files on drop to screen, refactor top level view code to Presentation component so the useDragHelpers hook has ChatContext * fix(Images): replace uploaded images in place * feat: add filepath validation to protect sensitive files * fix: ensure correct file_ids are push and not the Map key values * fix(ToastContext): type issue * feat: add basic file validation * fix(useDragHelpers): correct context issue with `files` dependency * refactor: consolidate setErrors logic to setError * feat: add dialog Image overlay on image click * fix: close endpoints menu on click * chore: set detail to auto, make note for configuration * fix: react warning (button desc. of button) * refactor: optimize filepath handling, pass file_ids to images for easier re-use * refactor: optimize image file handling, allow re-using files in regen, pass more file metadata in messages * feat: lazy loading images including use of upload preview * fix: SetKeyDialog closing, stopPropagation on Dialog content click * style(EndpointMenuItem): tighten up the style, fix dark theme showing in lightmode, make menu more ux friendly * style: change maxheight of all settings textareas to 138px from 300px * style: better styling for textarea and enclosing buttons * refactor(PresetItems): swap back edit and delete icons * feat: make textarea placeholder dynamic to endpoint * style: show user hover buttons only on hover when message is streaming * fix: ordered list not going past 9, fix css * feat: add User/AI labels; style: hide loading spinner * feat: add back custom footer, change original footer text * feat: dynamic landing icons based on endpoint * chore: comment out assistants route * fix: autoScroll to newest on /c/ view * fix: Export Conversation on new UI * style: match message style of official more closely * ci: fix api jest unit tests, comment out e2e tests for now as they will fail until addressed * feat: more file validation and use blob in preview field, not filepath, to fix temp deletion * feat: filefilter for multer * feat: better AI labels based on custom name, model, and endpoint instead of `ChatGPT`
This commit is contained in:
parent
345f4b2e85
commit
317cdd3f77
113 changed files with 2680 additions and 675 deletions
|
|
@ -1,8 +1,8 @@
|
|||
const crypto = require('crypto');
|
||||
const TextStream = require('./TextStream');
|
||||
const { getConvo, getMessages, saveMessage, updateMessage, saveConvo } = require('../../models');
|
||||
const { addSpaceIfNeeded, isEnabled } = require('../../server/utils');
|
||||
const checkBalance = require('../../models/checkBalance');
|
||||
const { getConvo, getMessages, saveMessage, updateMessage, saveConvo } = require('~/models');
|
||||
const { addSpaceIfNeeded, isEnabled } = require('~/server/utils');
|
||||
const checkBalance = require('~/models/checkBalance');
|
||||
|
||||
class BaseClient {
|
||||
constructor(apiKey, options = {}) {
|
||||
|
|
@ -62,7 +62,7 @@ class BaseClient {
|
|||
}
|
||||
|
||||
async setMessageOptions(opts = {}) {
|
||||
if (opts && typeof opts === 'object') {
|
||||
if (opts && opts.replaceOptions) {
|
||||
this.setOptions(opts);
|
||||
}
|
||||
|
||||
|
|
@ -417,6 +417,7 @@ class BaseClient {
|
|||
// this only matters when buildMessages is utilizing the parentMessageId, and may vary on implementation
|
||||
isEdited ? head : userMessage.messageId,
|
||||
this.getBuildMessagesOptions(opts),
|
||||
opts,
|
||||
);
|
||||
|
||||
if (tokenCountMap) {
|
||||
|
|
@ -636,14 +637,27 @@ class BaseClient {
|
|||
tokensPerName = -1;
|
||||
}
|
||||
|
||||
const processValue = (value) => {
|
||||
if (typeof value === 'object' && value !== null) {
|
||||
for (let [nestedKey, nestedValue] of Object.entries(value)) {
|
||||
if (nestedKey === 'image_url' || nestedValue === 'image_url') {
|
||||
continue;
|
||||
}
|
||||
processValue(nestedValue);
|
||||
}
|
||||
} else {
|
||||
numTokens += this.getTokenCount(value);
|
||||
}
|
||||
};
|
||||
|
||||
let numTokens = tokensPerMessage;
|
||||
for (let [key, value] of Object.entries(message)) {
|
||||
numTokens += this.getTokenCount(value);
|
||||
processValue(value);
|
||||
|
||||
if (key === 'name') {
|
||||
numTokens += tokensPerName;
|
||||
}
|
||||
}
|
||||
|
||||
return numTokens;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,14 @@
|
|||
const OpenAI = require('openai');
|
||||
const { HttpsProxyAgent } = require('https-proxy-agent');
|
||||
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
|
||||
const { getModelMaxTokens, genAzureChatCompletion, extractBaseURL } = require('../../utils');
|
||||
const { encodeAndFormat, validateVisionModel } = require('~/server/services/Files/images');
|
||||
const { getModelMaxTokens, genAzureChatCompletion, extractBaseURL } = require('~/utils');
|
||||
const { truncateText, formatMessage, CUT_OFF_PROMPT } = require('./prompts');
|
||||
const spendTokens = require('../../models/spendTokens');
|
||||
const { getResponseSender, EModelEndpoint } = require('~/server/routes/endpoints/schemas');
|
||||
const { handleOpenAIErrors } = require('./tools/util');
|
||||
const { isEnabled } = require('../../server/utils');
|
||||
const spendTokens = require('~/models/spendTokens');
|
||||
const { createLLM, RunManager } = require('./llm');
|
||||
const { isEnabled } = require('~/server/utils');
|
||||
const ChatGPTClient = require('./ChatGPTClient');
|
||||
const { summaryBuffer } = require('./memory');
|
||||
const { runTitleChain } = require('./chains');
|
||||
|
|
@ -24,7 +26,6 @@ class OpenAIClient extends BaseClient {
|
|||
this.ChatGPTClient = new ChatGPTClient();
|
||||
this.buildPrompt = this.ChatGPTClient.buildPrompt.bind(this);
|
||||
this.getCompletion = this.ChatGPTClient.getCompletion.bind(this);
|
||||
this.sender = options.sender ?? 'ChatGPT';
|
||||
this.contextStrategy = options.contextStrategy
|
||||
? options.contextStrategy.toLowerCase()
|
||||
: 'discard';
|
||||
|
|
@ -33,6 +34,7 @@ class OpenAIClient extends BaseClient {
|
|||
this.setOptions(options);
|
||||
}
|
||||
|
||||
// TODO: PluginsClient calls this 3x, unneeded
|
||||
setOptions(options) {
|
||||
if (this.options && !this.options.replaceOptions) {
|
||||
this.options.modelOptions = {
|
||||
|
|
@ -53,6 +55,7 @@ class OpenAIClient extends BaseClient {
|
|||
}
|
||||
|
||||
const modelOptions = this.options.modelOptions || {};
|
||||
|
||||
if (!this.modelOptions) {
|
||||
this.modelOptions = {
|
||||
...modelOptions,
|
||||
|
|
@ -72,6 +75,14 @@ class OpenAIClient extends BaseClient {
|
|||
};
|
||||
}
|
||||
|
||||
if (this.options.attachments && !validateVisionModel(this.modelOptions.model)) {
|
||||
this.modelOptions.model = 'gpt-4-vision-preview';
|
||||
}
|
||||
|
||||
if (validateVisionModel(this.modelOptions.model)) {
|
||||
delete this.modelOptions.stop;
|
||||
}
|
||||
|
||||
const { OPENROUTER_API_KEY, OPENAI_FORCE_PROMPT } = process.env ?? {};
|
||||
if (OPENROUTER_API_KEY && !this.azure) {
|
||||
this.apiKey = OPENROUTER_API_KEY;
|
||||
|
|
@ -127,12 +138,20 @@ class OpenAIClient extends BaseClient {
|
|||
);
|
||||
}
|
||||
|
||||
this.sender =
|
||||
this.options.sender ??
|
||||
getResponseSender({
|
||||
model: this.modelOptions.model,
|
||||
endpoint: EModelEndpoint.openAI,
|
||||
chatGptLabel: this.options.chatGptLabel,
|
||||
});
|
||||
|
||||
this.userLabel = this.options.userLabel || 'User';
|
||||
this.chatGptLabel = this.options.chatGptLabel || 'Assistant';
|
||||
|
||||
this.setupTokens();
|
||||
|
||||
if (!this.modelOptions.stop) {
|
||||
if (!this.modelOptions.stop && !validateVisionModel(this.modelOptions.model)) {
|
||||
const stopTokens = [this.startToken];
|
||||
if (this.endToken && this.endToken !== this.startToken) {
|
||||
stopTokens.push(this.endToken);
|
||||
|
|
@ -284,6 +303,7 @@ class OpenAIClient extends BaseClient {
|
|||
messages,
|
||||
parentMessageId,
|
||||
{ isChatCompletion = false, promptPrefix = null },
|
||||
opts,
|
||||
) {
|
||||
let orderedMessages = this.constructor.getMessagesForConversation({
|
||||
messages,
|
||||
|
|
@ -316,6 +336,17 @@ class OpenAIClient extends BaseClient {
|
|||
}
|
||||
}
|
||||
|
||||
if (this.options.attachments) {
|
||||
const attachments = await this.options.attachments;
|
||||
const { files, image_urls } = await encodeAndFormat(
|
||||
this.options.req,
|
||||
attachments.filter((file) => file.type.includes('image')),
|
||||
);
|
||||
|
||||
orderedMessages[orderedMessages.length - 1].image_urls = image_urls;
|
||||
this.options.attachments = files;
|
||||
}
|
||||
|
||||
const formattedMessages = orderedMessages.map((message, i) => {
|
||||
const formattedMessage = formatMessage({
|
||||
message,
|
||||
|
|
@ -350,8 +381,8 @@ class OpenAIClient extends BaseClient {
|
|||
result.tokenCountMap = tokenCountMap;
|
||||
}
|
||||
|
||||
if (promptTokens >= 0 && typeof this.options.getReqData === 'function') {
|
||||
this.options.getReqData({ promptTokens });
|
||||
if (promptTokens >= 0 && typeof opts?.getReqData === 'function') {
|
||||
opts.getReqData({ promptTokens });
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
@ -730,6 +761,10 @@ ${convo}
|
|||
opts.httpAgent = new HttpsProxyAgent(this.options.proxy);
|
||||
}
|
||||
|
||||
if (validateVisionModel(modelOptions.model)) {
|
||||
modelOptions.max_tokens = 4000;
|
||||
}
|
||||
|
||||
let chatCompletion;
|
||||
const openai = new OpenAI({
|
||||
apiKey: this.apiKey,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,21 @@
|
|||
const { HumanMessage, AIMessage, SystemMessage } = require('langchain/schema');
|
||||
|
||||
/**
|
||||
* Formats a message to OpenAI Vision API payload format.
|
||||
*
|
||||
* @param {Object} params - The parameters for formatting.
|
||||
* @param {Object} params.message - The message object to format.
|
||||
* @param {string} [params.message.role] - The role of the message sender (must be 'user').
|
||||
* @param {string} [params.message.content] - The text content of the message.
|
||||
* @param {Array<string>} [params.image_urls] - The image_urls to attach to the message.
|
||||
* @returns {(Object)} - The formatted message.
|
||||
*/
|
||||
const formatVisionMessage = ({ message, image_urls }) => {
|
||||
message.content = [{ type: 'text', text: message.content }, ...image_urls];
|
||||
|
||||
return message;
|
||||
};
|
||||
|
||||
/**
|
||||
* Formats a message to OpenAI payload format based on the provided options.
|
||||
*
|
||||
|
|
@ -10,6 +26,7 @@ const { HumanMessage, AIMessage, SystemMessage } = require('langchain/schema');
|
|||
* @param {string} [params.message.sender] - The sender of the message.
|
||||
* @param {string} [params.message.text] - The text content of the message.
|
||||
* @param {string} [params.message.content] - The content of the message.
|
||||
* @param {Array<string>} [params.message.image_urls] - The image_urls attached to the message for Vision API.
|
||||
* @param {string} [params.userName] - The name of the user.
|
||||
* @param {string} [params.assistantName] - The name of the assistant.
|
||||
* @param {boolean} [params.langChain=false] - Whether to return a LangChain message object.
|
||||
|
|
@ -32,6 +49,11 @@ const formatMessage = ({ message, userName, assistantName, langChain = false })
|
|||
content,
|
||||
};
|
||||
|
||||
const { image_urls } = message;
|
||||
if (Array.isArray(image_urls) && image_urls.length > 0 && role === 'user') {
|
||||
return formatVisionMessage({ message: formattedMessage, image_urls: message.image_urls });
|
||||
}
|
||||
|
||||
if (_name) {
|
||||
formattedMessage.name = _name;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -529,9 +529,9 @@ describe('BaseClient', () => {
|
|||
);
|
||||
});
|
||||
|
||||
test('setOptions is called with the correct arguments', async () => {
|
||||
test('setOptions is called with the correct arguments only when replaceOptions is set to true', async () => {
|
||||
TestClient.setOptions = jest.fn();
|
||||
const opts = { conversationId: '123', parentMessageId: '456' };
|
||||
const opts = { conversationId: '123', parentMessageId: '456', replaceOptions: true };
|
||||
await TestClient.sendMessage('Hello, world!', opts);
|
||||
expect(TestClient.setOptions).toHaveBeenCalledWith(opts);
|
||||
TestClient.setOptions.mockClear();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue