📸 feat: Gemini vision, Improved Logs and Multi-modal Handling (#1368)

* feat: add GOOGLE_MODELS env var * feat: add gemini vision support * refactor(GoogleClient): adjust clientOptions handling depending on model * fix(logger): fix redact logic and redact errors only * fix(GoogleClient): do not allow non-multiModal messages when gemini-pro-vision is selected * refactor(OpenAIClient): use `isVisionModel` client property to avoid calling validateVisionModel multiple times * refactor: better debug logging by correctly traversing, redacting sensitive info, and logging condensed versions of long values * refactor(GoogleClient): allow response errors to be thrown/caught above client handling so user receives meaningful error message debug orderedMessages, parentMessageId, and buildMessages result * refactor(AskController): use model from client.modelOptions.model when saving intermediate messages, which requires for the progress callback to be initialized after the client is initialized * feat(useSSE): revert to previous model if the model was auto-switched by backend due to message attachments * docs: update with google updates, notes about Gemini Pro Vision * fix: redis should not be initialized without USE_REDIS and increase max listeners to 20
2026-01-03 09:08:52 +01:00 · 2023-12-16 20:45:27 -05:00 · 2023-12-16 20:45:27 -05:00 · 0c326797dd
commit 0c326797dd
parent 676f133545
21 changed files with 356 additions and 210 deletions
--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@ -1,7 +1,7 @@
 const OpenAI = require('openai');
 const { HttpsProxyAgent } = require('https-proxy-agent');
-const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
 const { getResponseSender, EModelEndpoint } = require('librechat-data-provider');
+const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
 const { encodeAndFormat, validateVisionModel } = require('~/server/services/Files/images');
 const { getModelMaxTokens, genAzureChatCompletion, extractBaseURL } = require('~/utils');
 const { truncateText, formatMessage, CUT_OFF_PROMPT } = require('./prompts');
@ -76,11 +76,14 @@ class OpenAIClient extends BaseClient {
      };
    }

-    if (this.options.attachments && !validateVisionModel(this.modelOptions.model)) {
+    this.isVisionModel = validateVisionModel(this.modelOptions.model);
+
+    if (this.options.attachments && !this.isVisionModel) {
      this.modelOptions.model = 'gpt-4-vision-preview';
+      this.isVisionModel = true;
    }

-    if (validateVisionModel(this.modelOptions.model)) {
+    if (this.isVisionModel) {
      delete this.modelOptions.stop;
    }

@ -152,7 +155,7 @@ class OpenAIClient extends BaseClient {

    this.setupTokens();

-    if (!this.modelOptions.stop && !validateVisionModel(this.modelOptions.model)) {
+    if (!this.modelOptions.stop && !this.isVisionModel) {
      const stopTokens = [this.startToken];
      if (this.endToken && this.endToken !== this.startToken) {
        stopTokens.push(this.endToken);
@ -689,7 +692,7 @@ ${convo}
  }

  async recordTokenUsage({ promptTokens, completionTokens }) {
-    logger.debug('[OpenAIClient]', { promptTokens, completionTokens });
+    logger.debug('[OpenAIClient] recordTokenUsage:', { promptTokens, completionTokens });
    await spendTokens(
      {
        user: this.user,
@ -757,7 +760,7 @@ ${convo}
        opts.httpAgent = new HttpsProxyAgent(this.options.proxy);
      }

-      if (validateVisionModel(modelOptions.model)) {
+      if (this.isVisionModel) {
        modelOptions.max_tokens = 4000;
      }