🗃️ feat: General File Support for OpenAI, Azure, Custom, Anthropic and Google (RAG) (#2143)

* refactor: re-purpose `resendImages` as `resendFiles` * refactor: re-purpose `resendImages` as `resendFiles` * feat: upload general files * feat: embed file during upload * feat: delete file embeddings on file deletion * chore(fileConfig): add epub+zip type * feat(encodeAndFormat): handle non-image files * feat(createContextHandlers): build context prompt from file attachments and successful RAG * fix: prevent non-temp files as well as embedded files to be deleted on new conversation * fix: remove temp_file_id on usage, prevent non-temp files as well as embedded files to be deleted on new conversation * fix: prevent non-temp files as well as embedded files to be deleted on new conversation * feat(OpenAI/Anthropic/Google): basic RAG support * fix: delete `resendFiles` only when true (Default) * refactor(RAG): update endpoints and pass JWT * fix(resendFiles): default values * fix(context/processFile): query unique ids only * feat: rag-api.yaml * feat: file upload improved ux for longer uploads * chore: await embed call and catch embedding errors * refactor: store augmentedPrompt in Client * refactor(processFileUpload): throw error if not assistant file upload * fix(useFileHandling): handle markdown empty mimetype issue * chore: necessary compose file changes
2025-12-18 01:10:14 +01:00 · 2024-03-19 20:54:30 -04:00 · 2024-03-19 20:54:30 -04:00 · f7761df52c
commit f7761df52c
parent af347cccde
38 changed files with 683 additions and 261 deletions
--- a/api/app/clients/GoogleClient.js
+++ b/api/app/clients/GoogleClient.js
@ -13,8 +13,8 @@ const {
  AuthKeys,
 } = require('librechat-data-provider');
 const { encodeAndFormat } = require('~/server/services/Files/images');
+const { formatMessage, createContextHandlers } = require('./prompts');
 const { getModelMaxTokens } = require('~/utils');
-const { formatMessage } = require('./prompts');
 const BaseClient = require('./BaseClient');
 const { logger } = require('~/config');

@ -124,24 +124,7 @@ class GoogleClient extends BaseClient {
      // stop: modelOptions.stop // no stop method for now
    };

-    /* Validation vision request */
-    this.defaultVisionModel = this.options.visionModel ?? 'gemini-pro-vision';
-    const availableModels = this.options.modelsConfig?.[EModelEndpoint.google];
-    this.isVisionModel = validateVisionModel({ model: this.modelOptions.model, availableModels });
-
-    if (
-      this.options.attachments &&
-      availableModels?.includes(this.defaultVisionModel) &&
-      !this.isVisionModel
-    ) {
-      this.modelOptions.model = this.defaultVisionModel;
-      this.isVisionModel = true;
-    }
-
-    if (this.isVisionModel && !this.options.attachments) {
-      this.modelOptions.model = 'gemini-pro';
-      this.isVisionModel = false;
-    }
+    this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments));

    // TODO: as of 12/14/23, only gemini models are "Generative AI" models provided by Google
    this.isGenerativeModel = this.modelOptions.model.includes('gemini');
@ -230,6 +213,33 @@ class GoogleClient extends BaseClient {
    return this;
  }

+  /**
+   *
+   * Checks if the model is a vision model based on request attachments and sets the appropriate options:
+   * @param {MongoFile[]} attachments
+   */
+  checkVisionRequest(attachments) {
+    /* Validation vision request */
+    this.defaultVisionModel = this.options.visionModel ?? 'gemini-pro-vision';
+    const availableModels = this.options.modelsConfig?.[EModelEndpoint.google];
+    this.isVisionModel = validateVisionModel({ model: this.modelOptions.model, availableModels });
+
+    if (
+      attachments &&
+      attachments.some((file) => file?.type && file?.type?.includes('image')) &&
+      availableModels?.includes(this.defaultVisionModel) &&
+      !this.isVisionModel
+    ) {
+      this.modelOptions.model = this.defaultVisionModel;
+      this.isVisionModel = true;
+    }
+
+    if (this.isVisionModel && !attachments) {
+      this.modelOptions.model = 'gemini-pro';
+      this.isVisionModel = false;
+    }
+  }
+
  formatMessages() {
    return ((message) => ({
      author: message?.author ?? (message.isCreatedByUser ? this.userLabel : this.modelLabel),
@ -237,18 +247,45 @@ class GoogleClient extends BaseClient {
    })).bind(this);
  }

-  async buildVisionMessages(messages = [], parentMessageId) {
-    const { prompt } = await this.buildMessagesPrompt(messages, parentMessageId);
-    const attachments = await this.options.attachments;
+  /**
+   *
+   * Adds image URLs to the message object and returns the files
+   *
+   * @param {TMessage[]} messages
+   * @param {MongoFile[]} files
+   * @returns {Promise<MongoFile[]>}
+   */
+  async addImageURLs(message, attachments) {
    const { files, image_urls } = await encodeAndFormat(
      this.options.req,
-      attachments.filter((file) => file.type.includes('image')),
+      attachments,
      EModelEndpoint.google,
    );
+    message.image_urls = image_urls.length ? image_urls : undefined;
+    return files;
+  }

+  async buildVisionMessages(messages = [], parentMessageId) {
+    const attachments = await this.options.attachments;
    const latestMessage = { ...messages[messages.length - 1] };
+    this.contextHandlers = createContextHandlers(this.options.req, latestMessage.text);
+
+    if (this.contextHandlers) {
+      for (const file of attachments) {
+        if (file.embedded) {
+          this.contextHandlers?.processFile(file);
+          continue;
+        }
+      }
+
+      this.augmentedPrompt = await this.contextHandlers.createContext();
+      this.options.promptPrefix = this.augmentedPrompt + this.options.promptPrefix;
+    }
+
+    const { prompt } = await this.buildMessagesPrompt(messages, parentMessageId);
+
+    const files = await this.addImageURLs(latestMessage, attachments);

-    latestMessage.image_urls = image_urls;
    this.options.attachments = files;

    latestMessage.text = prompt;
@ -275,7 +312,7 @@ class GoogleClient extends BaseClient {
      );
    }

-    if (this.options.attachments) {
+    if (this.options.attachments && this.isGenerativeModel) {
      return this.buildVisionMessages(messages, parentMessageId);
    }