📸 feat: Gemini vision, Improved Logs and Multi-modal Handling (#1368)

* feat: add GOOGLE_MODELS env var

* feat: add gemini vision support

* refactor(GoogleClient): adjust clientOptions handling depending on model

* fix(logger): fix redact logic and redact errors only

* fix(GoogleClient): do not allow non-multiModal messages when gemini-pro-vision is selected

* refactor(OpenAIClient): use `isVisionModel` client property to avoid calling validateVisionModel multiple times

* refactor: better debug logging by correctly traversing, redacting sensitive info, and logging condensed versions of long values

* refactor(GoogleClient): allow response errors to be thrown/caught above client handling so user receives meaningful error message
debug orderedMessages, parentMessageId, and buildMessages result

* refactor(AskController): use model from client.modelOptions.model when saving intermediate messages, which requires for the progress callback to be initialized after the client is initialized

* feat(useSSE): revert to previous model if the model was auto-switched by backend due to message attachments

* docs: update with google updates, notes about Gemini Pro Vision

* fix: redis should not be initialized without USE_REDIS and increase max listeners to 20
This commit is contained in:
Danny Avila 2023-12-16 20:45:27 -05:00 committed by GitHub
parent 676f133545
commit 0c326797dd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 356 additions and 210 deletions

View file

@ -43,46 +43,51 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
}
};
const { onProgress: progressCallback, getPartialText } = createOnProgress({
onProgress: ({ text: partialText }) => {
const currentTimestamp = Date.now();
if (currentTimestamp - lastSavedTimestamp > saveDelay) {
lastSavedTimestamp = currentTimestamp;
saveMessage({
messageId: responseMessageId,
sender,
conversationId,
parentMessageId: overrideParentMessageId ?? userMessageId,
text: partialText,
model: endpointOption.modelOptions.model,
unfinished: true,
cancelled: false,
error: false,
user,
});
}
if (saveDelay < 500) {
saveDelay = 500;
}
},
});
const getAbortData = () => ({
sender,
conversationId,
messageId: responseMessageId,
parentMessageId: overrideParentMessageId ?? userMessageId,
text: getPartialText(),
userMessage,
promptTokens,
});
const { abortController, onStart } = createAbortController(req, res, getAbortData);
let getText;
try {
const { client } = await initializeClient({ req, res, endpointOption });
const { onProgress: progressCallback, getPartialText } = createOnProgress({
onProgress: ({ text: partialText }) => {
const currentTimestamp = Date.now();
if (currentTimestamp - lastSavedTimestamp > saveDelay) {
lastSavedTimestamp = currentTimestamp;
saveMessage({
messageId: responseMessageId,
sender,
conversationId,
parentMessageId: overrideParentMessageId ?? userMessageId,
text: partialText,
model: client.modelOptions.model,
unfinished: true,
cancelled: false,
error: false,
user,
});
}
if (saveDelay < 500) {
saveDelay = 500;
}
},
});
getText = getPartialText;
const getAbortData = () => ({
sender,
conversationId,
messageId: responseMessageId,
parentMessageId: overrideParentMessageId ?? userMessageId,
text: getPartialText(),
userMessage,
promptTokens,
});
const { abortController, onStart } = createAbortController(req, res, getAbortData);
const messageOptions = {
user,
parentMessageId,
@ -134,7 +139,7 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
});
}
} catch (error) {
const partialText = getPartialText();
const partialText = getText && getText();
handleAbortError(res, req, error, {
partialText,
conversationId,

View file

@ -2,6 +2,7 @@ const { sendMessage, sendError, countTokens, isEnabled } = require('~/server/uti
const { saveMessage, getConvo, getConvoTitle } = require('~/models');
const clearPendingReq = require('~/cache/clearPendingReq');
const abortControllers = require('./abortControllers');
const { redactMessage } = require('~/config/parsers');
const spendTokens = require('~/models/spendTokens');
const { logger } = require('~/config');
@ -92,7 +93,7 @@ const handleAbortError = async (res, req, error, data) => {
messageId,
conversationId,
parentMessageId,
text: error.message,
text: redactMessage(error.message),
shouldSaveMessage: true,
user: req.user.id,
};

View file

@ -1,9 +1,10 @@
const { EModelEndpoint, defaultModels } = require('librechat-data-provider');
const { EModelEndpoint } = require('librechat-data-provider');
const { useAzurePlugins } = require('~/server/services/Config/EndpointService').config;
const {
getOpenAIModels,
getChatGPTBrowserModels,
getGoogleModels,
getAnthropicModels,
getChatGPTBrowserModels,
} = require('~/server/services/ModelService');
const fitlerAssistantModels = (str) => {
@ -11,6 +12,7 @@ const fitlerAssistantModels = (str) => {
};
async function loadDefaultModels() {
const google = getGoogleModels();
const openAI = await getOpenAIModels();
const anthropic = getAnthropicModels();
const chatGPTBrowser = getChatGPTBrowserModels();
@ -19,13 +21,13 @@ async function loadDefaultModels() {
return {
[EModelEndpoint.openAI]: openAI,
[EModelEndpoint.google]: google,
[EModelEndpoint.anthropic]: anthropic,
[EModelEndpoint.gptPlugins]: gptPlugins,
[EModelEndpoint.azureOpenAI]: azureOpenAI,
[EModelEndpoint.assistant]: openAI.filter(fitlerAssistantModels),
[EModelEndpoint.google]: defaultModels[EModelEndpoint.google],
[EModelEndpoint.bingAI]: ['BingAI', 'Sydney'],
[EModelEndpoint.chatGPTBrowser]: chatGPTBrowser,
[EModelEndpoint.gptPlugins]: gptPlugins,
[EModelEndpoint.anthropic]: anthropic,
[EModelEndpoint.assistant]: openAI.filter(fitlerAssistantModels),
};
}

View file

@ -1,7 +1,13 @@
const fs = require('fs');
const path = require('path');
const { EModelEndpoint } = require('librechat-data-provider');
const { updateFile } = require('~/models');
/**
* Encodes an image file to base64.
* @param {string} imagePath - The path to the image file.
* @returns {Promise<string>} A promise that resolves with the base64 encoded image data.
*/
function encodeImage(imagePath) {
return new Promise((resolve, reject) => {
fs.readFile(imagePath, (err, data) => {
@ -14,6 +20,12 @@ function encodeImage(imagePath) {
});
}
/**
* Updates the file and encodes the image.
* @param {Object} req - The request object.
* @param {Object} file - The file object.
* @returns {Promise<[MongoFile, string]>} - A promise that resolves to an array of results from updateFile and encodeImage.
*/
async function updateAndEncode(req, file) {
const { publicPath, imageOutput } = req.app.locals.config;
const userPath = path.join(imageOutput, req.user.id);
@ -29,7 +41,14 @@ async function updateAndEncode(req, file) {
return await Promise.all(promises);
}
async function encodeAndFormat(req, files) {
/**
* Encodes and formats the given files.
* @param {Express.Request} req - The request object.
* @param {Array<MongoFile>} files - The array of files to encode and format.
* @param {EModelEndpoint} [endpoint] - Optional: The endpoint for the image.
* @returns {Promise<Object>} - A promise that resolves to the result object containing the encoded images and file details.
*/
async function encodeAndFormat(req, files, endpoint) {
const promises = [];
for (let file of files) {
promises.push(updateAndEncode(req, file));
@ -46,13 +65,19 @@ async function encodeAndFormat(req, files) {
};
for (const [file, base64] of encodedImages) {
result.image_urls.push({
const imagePart = {
type: 'image_url',
image_url: {
url: `data:image/webp;base64,${base64}`,
detail,
},
});
};
if (endpoint && endpoint === EModelEndpoint.google) {
imagePart.image_url = imagePart.image_url.url;
}
result.image_urls.push(imagePart);
result.files.push({
file_id: file.file_id,

View file

@ -15,8 +15,14 @@ const modelsCache = isEnabled(process.env.USE_REDIS)
? new Keyv({ store: keyvRedis })
: new Keyv({ namespace: 'models' });
const { OPENROUTER_API_KEY, OPENAI_REVERSE_PROXY, CHATGPT_MODELS, ANTHROPIC_MODELS, PROXY } =
process.env ?? {};
const {
OPENROUTER_API_KEY,
OPENAI_REVERSE_PROXY,
CHATGPT_MODELS,
ANTHROPIC_MODELS,
GOOGLE_MODELS,
PROXY,
} = process.env ?? {};
const fetchOpenAIModels = async (opts = { azure: false, plugins: false }, _models = []) => {
let models = _models.slice() ?? [];
@ -126,8 +132,18 @@ const getAnthropicModels = () => {
return models;
};
const getGoogleModels = () => {
let models = defaultModels[EModelEndpoint.google];
if (GOOGLE_MODELS) {
models = String(GOOGLE_MODELS).split(',');
}
return models;
};
module.exports = {
getOpenAIModels,
getChatGPTBrowserModels,
getAnthropicModels,
getGoogleModels,
};