🤖 feat(Anthropic): Claude 3 & Vision Support (#1984)

* chore: bump anthropic SDK * chore: update anthropic config settings (fileSupport, default models) * feat: anthropic multi modal formatting * refactor: update vision models and use endpoint specific max long side resizing * feat(anthropic): multimodal messages, retry logic, and messages payload * chore: add more safety to trimming content due to whitespace error for assistant messages * feat(anthropic): token accounting and resending multiple images in progress * chore: bump data-provider * feat(anthropic): resendImages feature * chore: optimize Edit/Ask controllers, switch model back to req model * fix: false positive of invalid model * refactor(validateVisionModel): use object as arg, pass in additional/available models * refactor(validateModel): use helper function, `getModelsConfig` * feat: add modelsConfig to endpointOption so it gets passed to all clients, use for properly validating vision models * refactor: initialize default vision model and make sure it's available before assigning it * refactor(useSSE): avoid resetting model if user selected a new model between request and response * feat: show rate in transaction logging * fix: return tokenCountMap regardless of payload shape
2026-02-23 19:04:10 +01:00 · 2024-03-06 00:04:52 -05:00 · 2024-03-06 00:04:52 -05:00 · 8263ddda3f
commit 8263ddda3f
parent b023c5683d
28 changed files with 599 additions and 115 deletions
--- a/api/app/clients/GoogleClient.js
+++ b/api/app/clients/GoogleClient.js
@ -4,7 +4,6 @@ const { GoogleVertexAI } = require('langchain/llms/googlevertexai');
 const { ChatGoogleGenerativeAI } = require('@langchain/google-genai');
 const { ChatGoogleVertexAI } = require('langchain/chat_models/googlevertexai');
 const { AIMessage, HumanMessage, SystemMessage } = require('langchain/schema');
-const { encodeAndFormat } = require('~/server/services/Files/images');
 const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
 const {
  validateVisionModel,
@ -13,6 +12,7 @@ const {
  EModelEndpoint,
  AuthKeys,
 } = require('librechat-data-provider');
+const { encodeAndFormat } = require('~/server/services/Files/images');
 const { getModelMaxTokens } = require('~/utils');
 const { formatMessage } = require('./prompts');
 const BaseClient = require('./BaseClient');
@ -124,18 +124,28 @@ class GoogleClient extends BaseClient {
      // stop: modelOptions.stop // no stop method for now
    };

-    if (this.options.attachments) {
-      this.modelOptions.model = 'gemini-pro-vision';
+    /* Validation vision request */
+    this.defaultVisionModel = this.options.visionModel ?? 'gemini-pro-vision';
+    const availableModels = this.options.modelsConfig?.[EModelEndpoint.google];
+    this.isVisionModel = validateVisionModel({ model: this.modelOptions.model, availableModels });
+
+    if (
+      this.options.attachments &&
+      availableModels?.includes(this.defaultVisionModel) &&
+      !this.isVisionModel
+    ) {
+      this.modelOptions.model = this.defaultVisionModel;
+      this.isVisionModel = true;
    }

-    // TODO: as of 12/14/23, only gemini models are "Generative AI" models provided by Google
-    this.isGenerativeModel = this.modelOptions.model.includes('gemini');
-    this.isVisionModel = validateVisionModel(this.modelOptions.model);
-    const { isGenerativeModel } = this;
    if (this.isVisionModel && !this.options.attachments) {
      this.modelOptions.model = 'gemini-pro';
      this.isVisionModel = false;
    }
+
+    // TODO: as of 12/14/23, only gemini models are "Generative AI" models provided by Google
+    this.isGenerativeModel = this.modelOptions.model.includes('gemini');
+    const { isGenerativeModel } = this;
    this.isChatModel = !isGenerativeModel && this.modelOptions.model.includes('chat');
    const { isChatModel } = this;
    this.isTextModel =