🤖 feat: Gemini 1.5 Support (+Vertex AI) (#2383)

* WIP: gemini-1.5 support

* feat: extended vertex ai support

* fix: handle possibly undefined modelName

* fix: gpt-4-turbo-preview invalid vision model

* feat: specify `fileConfig.imageOutputType` and make PNG default image conversion type

* feat: better truncation for errors including base64 strings

* fix: gemini inlineData formatting

* feat: RAG augmented prompt for gemini-1.5

* feat: gemini-1.5 rates and token window

* chore: adjust tokens, update docs, update vision Models

* chore: add back `ChatGoogleVertexAI` for chat models via vertex ai

* refactor: ask/edit controllers to not use `unfinished` field for google endpoint

* chore: remove comment

* chore(ci): fix AppService test

* chore: remove comment

* refactor(GoogleSearch): use `GOOGLE_SEARCH_API_KEY` instead, issue warning for old variable

* chore: bump data-provider to 0.5.4

* chore: update docs

* fix: condition for gemini-1.5 using generative ai lib

* chore: update docs

* ci: add additional AppService test for `imageOutputType`

* refactor: optimize new config value `imageOutputType`

* chore: bump CONFIG_VERSION

* fix(assistants): avatar upload
This commit is contained in:
Danny Avila 2024-04-16 08:32:40 -04:00 committed by GitHub
parent fce7246ac1
commit 9d854dac07
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 1030 additions and 258 deletions

View file

@ -1,5 +1,5 @@
const axios = require('axios');
const { EModelEndpoint, FileSources } = require('librechat-data-provider');
const { EModelEndpoint, FileSources, VisionModes } = require('librechat-data-provider');
const { getStrategyFunctions } = require('../strategies');
const { logger } = require('~/config');
@ -30,11 +30,20 @@ const base64Only = new Set([EModelEndpoint.google, EModelEndpoint.anthropic]);
* @param {Express.Request} req - The request object.
* @param {Array<MongoFile>} files - The array of files to encode and format.
* @param {EModelEndpoint} [endpoint] - Optional: The endpoint for the image.
* @param {string} [mode] - Optional: The endpoint mode for the image.
* @returns {Promise<Object>} - A promise that resolves to the result object containing the encoded images and file details.
*/
async function encodeAndFormat(req, files, endpoint) {
async function encodeAndFormat(req, files, endpoint, mode) {
const promises = [];
const encodingMethods = {};
const result = {
files: [],
image_urls: [],
};
if (!files || !files.length) {
return result;
}
for (let file of files) {
const source = file.source ?? FileSources.local;
@ -69,11 +78,6 @@ async function encodeAndFormat(req, files, endpoint) {
/** @type {Array<[MongoFile, string]>} */
const formattedImages = await Promise.all(promises);
const result = {
files: [],
image_urls: [],
};
for (const [file, imageContent] of formattedImages) {
const fileMetadata = {
type: file.type,
@ -98,12 +102,18 @@ async function encodeAndFormat(req, files, endpoint) {
image_url: {
url: imageContent.startsWith('http')
? imageContent
: `data:image/webp;base64,${imageContent}`,
: `data:${file.type};base64,${imageContent}`,
detail,
},
};
if (endpoint && endpoint === EModelEndpoint.google) {
if (endpoint && endpoint === EModelEndpoint.google && mode === VisionModes.generative) {
delete imagePart.image_url;
imagePart.inlineData = {
mimeType: file.type,
data: imageContent,
};
} else if (endpoint && endpoint === EModelEndpoint.google) {
imagePart.image_url = imagePart.image_url.url;
} else if (endpoint && endpoint === EModelEndpoint.anthropic) {
imagePart.type = 'image';