🤖 feat: Gemini 1.5 Support (+Vertex AI) (#2383)

* WIP: gemini-1.5 support * feat: extended vertex ai support * fix: handle possibly undefined modelName * fix: gpt-4-turbo-preview invalid vision model * feat: specify `fileConfig.imageOutputType` and make PNG default image conversion type * feat: better truncation for errors including base64 strings * fix: gemini inlineData formatting * feat: RAG augmented prompt for gemini-1.5 * feat: gemini-1.5 rates and token window * chore: adjust tokens, update docs, update vision Models * chore: add back `ChatGoogleVertexAI` for chat models via vertex ai * refactor: ask/edit controllers to not use `unfinished` field for google endpoint * chore: remove comment * chore(ci): fix AppService test * chore: remove comment * refactor(GoogleSearch): use `GOOGLE_SEARCH_API_KEY` instead, issue warning for old variable * chore: bump data-provider to 0.5.4 * chore: update docs * fix: condition for gemini-1.5 using generative ai lib * chore: update docs * ci: add additional AppService test for `imageOutputType` * refactor: optimize new config value `imageOutputType` * chore: bump CONFIG_VERSION * fix(assistants): avatar upload
2026-02-04 08:41:49 +01:00 · 2024-04-16 08:32:40 -04:00 · 2024-04-16 08:32:40 -04:00 · 9d854dac07
commit 9d854dac07
parent fce7246ac1
37 changed files with 1030 additions and 258 deletions
--- a/api/server/services/Files/images/encode.js
+++ b/api/server/services/Files/images/encode.js
@ -1,5 +1,5 @@
 const axios = require('axios');
-const { EModelEndpoint, FileSources } = require('librechat-data-provider');
+const { EModelEndpoint, FileSources, VisionModes } = require('librechat-data-provider');
 const { getStrategyFunctions } = require('../strategies');
 const { logger } = require('~/config');

@ -30,11 +30,20 @@ const base64Only = new Set([EModelEndpoint.google, EModelEndpoint.anthropic]);
 * @param {Express.Request} req - The request object.
 * @param {Array<MongoFile>} files - The array of files to encode and format.
 * @param {EModelEndpoint} [endpoint] - Optional: The endpoint for the image.
+ * @param {string} [mode] - Optional: The endpoint mode for the image.
 * @returns {Promise<Object>} - A promise that resolves to the result object containing the encoded images and file details.
 */
-async function encodeAndFormat(req, files, endpoint) {
+async function encodeAndFormat(req, files, endpoint, mode) {
  const promises = [];
  const encodingMethods = {};
+  const result = {
+    files: [],
+    image_urls: [],
+  };
+
+  if (!files || !files.length) {
+    return result;
+  }

  for (let file of files) {
    const source = file.source ?? FileSources.local;
@ -69,11 +78,6 @@ async function encodeAndFormat(req, files, endpoint) {
  /** @type {Array<[MongoFile, string]>} */
  const formattedImages = await Promise.all(promises);

-  const result = {
-    files: [],
-    image_urls: [],
-  };
-
  for (const [file, imageContent] of formattedImages) {
    const fileMetadata = {
      type: file.type,
@ -98,12 +102,18 @@ async function encodeAndFormat(req, files, endpoint) {
      image_url: {
        url: imageContent.startsWith('http')
          ? imageContent
-          : `data:image/webp;base64,${imageContent}`,
+          : `data:${file.type};base64,${imageContent}`,
        detail,
      },
    };

-    if (endpoint && endpoint === EModelEndpoint.google) {
+    if (endpoint && endpoint === EModelEndpoint.google && mode === VisionModes.generative) {
+      delete imagePart.image_url;
+      imagePart.inlineData = {
+        mimeType: file.type,
+        data: imageContent,
+      };
+    } else if (endpoint && endpoint === EModelEndpoint.google) {
      imagePart.image_url = imagePart.image_url.url;
    } else if (endpoint && endpoint === EModelEndpoint.anthropic) {
      imagePart.type = 'image';