🤖 feat: Gemini 1.5 Support (+Vertex AI) (#2383)

* WIP: gemini-1.5 support * feat: extended vertex ai support * fix: handle possibly undefined modelName * fix: gpt-4-turbo-preview invalid vision model * feat: specify `fileConfig.imageOutputType` and make PNG default image conversion type * feat: better truncation for errors including base64 strings * fix: gemini inlineData formatting * feat: RAG augmented prompt for gemini-1.5 * feat: gemini-1.5 rates and token window * chore: adjust tokens, update docs, update vision Models * chore: add back `ChatGoogleVertexAI` for chat models via vertex ai * refactor: ask/edit controllers to not use `unfinished` field for google endpoint * chore: remove comment * chore(ci): fix AppService test * chore: remove comment * refactor(GoogleSearch): use `GOOGLE_SEARCH_API_KEY` instead, issue warning for old variable * chore: bump data-provider to 0.5.4 * chore: update docs * fix: condition for gemini-1.5 using generative ai lib * chore: update docs * ci: add additional AppService test for `imageOutputType` * refactor: optimize new config value `imageOutputType` * chore: bump CONFIG_VERSION * fix(assistants): avatar upload
2026-02-04 08:41:49 +01:00 · 2024-04-16 08:32:40 -04:00 · 2024-04-16 08:32:40 -04:00 · 9d854dac07
commit 9d854dac07
parent fce7246ac1
37 changed files with 1030 additions and 258 deletions
--- a/api/app/clients/GoogleClient.js
+++ b/api/app/clients/GoogleClient.js
@ -1,7 +1,9 @@
 const { google } = require('googleapis');
 const { Agent, ProxyAgent } = require('undici');
-const { GoogleVertexAI } = require('langchain/llms/googlevertexai');
+const { ChatVertexAI } = require('@langchain/google-vertexai');
 const { ChatGoogleGenerativeAI } = require('@langchain/google-genai');
+const { GoogleGenerativeAI: GenAI } = require('@google/generative-ai');
+const { GoogleVertexAI } = require('@langchain/community/llms/googlevertexai');
 const { ChatGoogleVertexAI } = require('langchain/chat_models/googlevertexai');
 const { AIMessage, HumanMessage, SystemMessage } = require('langchain/schema');
 const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
@ -10,6 +12,7 @@ const {
  getResponseSender,
  endpointSettings,
  EModelEndpoint,
+  VisionModes,
  AuthKeys,
 } = require('librechat-data-provider');
 const { encodeAndFormat } = require('~/server/services/Files/images');
@ -126,7 +129,7 @@ class GoogleClient extends BaseClient {

    this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments));

-    // TODO: as of 12/14/23, only gemini models are "Generative AI" models provided by Google
+    /** @type {boolean} Whether using a "GenerativeAI" Model */
    this.isGenerativeModel = this.modelOptions.model.includes('gemini');
    const { isGenerativeModel } = this;
    this.isChatModel = !isGenerativeModel && this.modelOptions.model.includes('chat');
@ -247,6 +250,40 @@ class GoogleClient extends BaseClient {
    })).bind(this);
  }

+  /**
+   * Formats messages for generative AI
+   * @param {TMessage[]} messages
+   * @returns
+   */
+  async formatGenerativeMessages(messages) {
+    const formattedMessages = [];
+    const attachments = await this.options.attachments;
+    const latestMessage = { ...messages[messages.length - 1] };
+    const files = await this.addImageURLs(latestMessage, attachments, VisionModes.generative);
+    this.options.attachments = files;
+    messages[messages.length - 1] = latestMessage;
+
+    for (const _message of messages) {
+      const role = _message.isCreatedByUser ? this.userLabel : this.modelLabel;
+      const parts = [];
+      parts.push({ text: _message.text });
+      if (!_message.image_urls?.length) {
+        formattedMessages.push({ role, parts });
+        continue;
+      }
+
+      for (const images of _message.image_urls) {
+        if (images.inlineData) {
+          parts.push({ inlineData: images.inlineData });
+        }
+      }
+
+      formattedMessages.push({ role, parts });
+    }
+
+    return formattedMessages;
+  }
+
  /**
   *
   * Adds image URLs to the message object and returns the files
@ -255,17 +292,23 @@ class GoogleClient extends BaseClient {
   * @param {MongoFile[]} files
   * @returns {Promise<MongoFile[]>}
   */
-  async addImageURLs(message, attachments) {
+  async addImageURLs(message, attachments, mode = '') {
    const { files, image_urls } = await encodeAndFormat(
      this.options.req,
      attachments,
      EModelEndpoint.google,
+      mode,
    );
    message.image_urls = image_urls.length ? image_urls : undefined;
    return files;
  }

-  async buildVisionMessages(messages = [], parentMessageId) {
+  /**
+   * Builds the augmented prompt for attachments
+   * TODO: Add File API Support
+   * @param {TMessage[]} messages
+   */
+  async buildAugmentedPrompt(messages = []) {
    const attachments = await this.options.attachments;
    const latestMessage = { ...messages[messages.length - 1] };
    this.contextHandlers = createContextHandlers(this.options.req, latestMessage.text);
@ -281,6 +324,12 @@ class GoogleClient extends BaseClient {
      this.augmentedPrompt = await this.contextHandlers.createContext();
      this.options.promptPrefix = this.augmentedPrompt + this.options.promptPrefix;
    }
+  }
+
+  async buildVisionMessages(messages = [], parentMessageId) {
+    const attachments = await this.options.attachments;
+    const latestMessage = { ...messages[messages.length - 1] };
+    await this.buildAugmentedPrompt(messages);

    const { prompt } = await this.buildMessagesPrompt(messages, parentMessageId);

@ -301,15 +350,26 @@ class GoogleClient extends BaseClient {
    return { prompt: payload };
  }

+  /** @param {TMessage[]} [messages=[]]  */
+  async buildGenerativeMessages(messages = []) {
+    this.userLabel = 'user';
+    this.modelLabel = 'model';
+    const promises = [];
+    promises.push(await this.formatGenerativeMessages(messages));
+    promises.push(this.buildAugmentedPrompt(messages));
+    const [formattedMessages] = await Promise.all(promises);
+    return { prompt: formattedMessages };
+  }
+
  async buildMessages(messages = [], parentMessageId) {
    if (!this.isGenerativeModel && !this.project_id) {
      throw new Error(
        '[GoogleClient] a Service Account JSON Key is required for PaLM 2 and Codey models (Vertex AI)',
      );
-    } else if (this.isGenerativeModel && (!this.apiKey || this.apiKey === 'user_provided')) {
-      throw new Error(
-        '[GoogleClient] an API Key is required for Gemini models (Generative Language API)',
-      );
+    }
+
+    if (!this.project_id && this.modelOptions.model.includes('1.5')) {
+      return await this.buildGenerativeMessages(messages);
    }

    if (this.options.attachments && this.isGenerativeModel) {
@ -526,13 +586,24 @@ class GoogleClient extends BaseClient {
  }

  createLLM(clientOptions) {
-    if (this.isGenerativeModel) {
-      return new ChatGoogleGenerativeAI({ ...clientOptions, apiKey: this.apiKey });
+    const model = clientOptions.modelName ?? clientOptions.model;
+    if (this.project_id && this.isTextModel) {
+      return new GoogleVertexAI(clientOptions);
+    } else if (this.project_id && this.isChatModel) {
+      return new ChatGoogleVertexAI(clientOptions);
+    } else if (this.project_id) {
+      return new ChatVertexAI(clientOptions);
+    } else if (model.includes('1.5')) {
+      return new GenAI(this.apiKey).getGenerativeModel(
+        {
+          ...clientOptions,
+          model,
+        },
+        { apiVersion: 'v1beta' },
+      );
    }

-    return this.isTextModel
-      ? new GoogleVertexAI(clientOptions)
-      : new ChatGoogleVertexAI(clientOptions);
+    return new ChatGoogleGenerativeAI({ ...clientOptions, apiKey: this.apiKey });
  }

  async getCompletion(_payload, options = {}) {
@ -544,7 +615,7 @@ class GoogleClient extends BaseClient {

    let clientOptions = { ...parameters, maxRetries: 2 };

-    if (!this.isGenerativeModel) {
+    if (this.project_id) {
      clientOptions['authOptions'] = {
        credentials: {
          ...this.serviceKey,
@ -557,7 +628,7 @@ class GoogleClient extends BaseClient {
      clientOptions = { ...clientOptions, ...this.modelOptions };
    }

-    if (this.isGenerativeModel) {
+    if (this.isGenerativeModel && !this.project_id) {
      clientOptions.modelName = clientOptions.model;
      delete clientOptions.model;
    }
@ -588,16 +659,46 @@ class GoogleClient extends BaseClient {
      messages.unshift(new SystemMessage(context));
    }

+    const modelName = clientOptions.modelName ?? clientOptions.model ?? '';
+    if (modelName?.includes('1.5') && !this.project_id) {
+      /** @type {GenerativeModel} */
+      const client = model;
+      const requestOptions = {
+        contents: _payload,
+      };
+
+      if (this.options?.promptPrefix?.length) {
+        requestOptions.systemInstruction = {
+          parts: [
+            {
+              text: this.options.promptPrefix,
+            },
+          ],
+        };
+      }
+
+      const result = await client.generateContentStream(requestOptions);
+      for await (const chunk of result.stream) {
+        const chunkText = chunk.text();
+        this.generateTextStream(chunkText, onProgress, {
+          delay: 12,
+        });
+        reply += chunkText;
+      }
+      return reply;
+    }
+
    const stream = await model.stream(messages, {
      signal: abortController.signal,
      timeout: 7000,
    });

    for await (const chunk of stream) {
-      await this.generateTextStream(chunk?.content ?? chunk, onProgress, {
+      const chunkText = chunk?.content ?? chunk;
+      this.generateTextStream(chunkText, onProgress, {
        delay: this.isGenerativeModel ? 12 : 8,
      });
-      reply += chunk?.content ?? chunk;
+      reply += chunkText;
    }

    return reply;