🤖 feat(Anthropic): Claude 3 & Vision Support (#1984)

* chore: bump anthropic SDK * chore: update anthropic config settings (fileSupport, default models) * feat: anthropic multi modal formatting * refactor: update vision models and use endpoint specific max long side resizing * feat(anthropic): multimodal messages, retry logic, and messages payload * chore: add more safety to trimming content due to whitespace error for assistant messages * feat(anthropic): token accounting and resending multiple images in progress * chore: bump data-provider * feat(anthropic): resendImages feature * chore: optimize Edit/Ask controllers, switch model back to req model * fix: false positive of invalid model * refactor(validateVisionModel): use object as arg, pass in additional/available models * refactor(validateModel): use helper function, `getModelsConfig` * feat: add modelsConfig to endpointOption so it gets passed to all clients, use for properly validating vision models * refactor: initialize default vision model and make sure it's available before assigning it * refactor(useSSE): avoid resetting model if user selected a new model between request and response * feat: show rate in transaction logging * fix: return tokenCountMap regardless of payload shape
2026-03-10 01:52:37 +01:00 · 2024-03-06 00:04:52 -05:00 · 2024-03-06 00:04:52 -05:00 · 8263ddda3f
commit 8263ddda3f
parent b023c5683d
28 changed files with 599 additions and 115 deletions
--- a/api/server/controllers/AskController.js
+++ b/api/server/controllers/AskController.js
@ -1,7 +1,7 @@
 const { getResponseSender, Constants } = require('librechat-data-provider');
-const { sendMessage, createOnProgress } = require('~/server/utils');
-const { saveMessage, getConvoTitle, getConvo } = require('~/models');
 const { createAbortController, handleAbortError } = require('~/server/middleware');
+const { sendMessage, createOnProgress } = require('~/server/utils');
+const { saveMessage, getConvo } = require('~/models');
 const { logger } = require('~/config');

 const AskController = async (req, res, next, initializeClient, addTitle) => {
@ -134,16 +134,21 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {

    response.endpoint = endpointOption.endpoint;

+    const conversation = await getConvo(user, conversationId);
+    conversation.title =
+      conversation && !conversation.title ? null : conversation?.title || 'New Chat';
+
    if (client.options.attachments) {
      userMessage.files = client.options.attachments;
+      conversation.model = endpointOption.modelOptions.model;
      delete userMessage.image_urls;
    }

    if (!abortController.signal.aborted) {
      sendMessage(res, {
-        title: await getConvoTitle(user, conversationId),
        final: true,
-        conversation: await getConvo(user, conversationId),
+        conversation,
+        title: conversation.title,
        requestMessage: userMessage,
        responseMessage: response,
      });
--- a/api/server/controllers/EditController.js
+++ b/api/server/controllers/EditController.js
@ -1,7 +1,7 @@
 const { getResponseSender } = require('librechat-data-provider');
-const { sendMessage, createOnProgress } = require('~/server/utils');
-const { saveMessage, getConvoTitle, getConvo } = require('~/models');
 const { createAbortController, handleAbortError } = require('~/server/middleware');
+const { sendMessage, createOnProgress } = require('~/server/utils');
+const { saveMessage, getConvo } = require('~/models');
 const { logger } = require('~/config');

 const EditController = async (req, res, next, initializeClient) => {
@ -131,11 +131,19 @@ const EditController = async (req, res, next, initializeClient) => {
      response = { ...response, ...metadata };
    }

+    const conversation = await getConvo(user, conversationId);
+    conversation.title =
+      conversation && !conversation.title ? null : conversation?.title || 'New Chat';
+
+    if (client.options.attachments) {
+      conversation.model = endpointOption.modelOptions.model;
+    }
+
    if (!abortController.signal.aborted) {
      sendMessage(res, {
-        title: await getConvoTitle(user, conversationId),
        final: true,
-        conversation: await getConvo(user, conversationId),
+        conversation,
+        title: conversation.title,
        requestMessage: userMessage,
        responseMessage: response,
      });
--- a/api/server/controllers/ModelController.js
+++ b/api/server/controllers/ModelController.js
@ -2,6 +2,16 @@ const { CacheKeys } = require('librechat-data-provider');
 const { loadDefaultModels, loadConfigModels } = require('~/server/services/Config');
 const { getLogStores } = require('~/cache');

+const getModelsConfig = async (req) => {
+  const cache = getLogStores(CacheKeys.CONFIG_STORE);
+  let modelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
+  if (!modelsConfig) {
+    modelsConfig = await loadModels(req);
+  }
+
+  return modelsConfig;
+};
+
 /**
 * Loads the models from the config.
 * @param {Express.Request} req - The Express request object.
@ -27,4 +37,4 @@ async function modelController(req, res) {
  res.send(modelConfig);
 }

-module.exports = { modelController, loadModels };
+module.exports = { modelController, loadModels, getModelsConfig };
--- a/api/server/middleware/buildEndpointOption.js
+++ b/api/server/middleware/buildEndpointOption.js
@ -1,11 +1,12 @@
 const { parseConvo, EModelEndpoint } = require('librechat-data-provider');
+const { getModelsConfig } = require('~/server/controllers/ModelController');
 const { processFiles } = require('~/server/services/Files/process');
 const gptPlugins = require('~/server/services/Endpoints/gptPlugins');
 const anthropic = require('~/server/services/Endpoints/anthropic');
+const assistant = require('~/server/services/Endpoints/assistant');
 const openAI = require('~/server/services/Endpoints/openAI');
 const custom = require('~/server/services/Endpoints/custom');
 const google = require('~/server/services/Endpoints/google');
-const assistant = require('~/server/services/Endpoints/assistant');

 const buildFunction = {
  [EModelEndpoint.openAI]: openAI.buildOptions,
@ -17,7 +18,7 @@ const buildFunction = {
  [EModelEndpoint.assistants]: assistant.buildOptions,
 };

-function buildEndpointOption(req, res, next) {
+async function buildEndpointOption(req, res, next) {
  const { endpoint, endpointType } = req.body;
  const parsedBody = parseConvo({ endpoint, endpointType, conversation: req.body });
  req.body.endpointOption = buildFunction[endpointType ?? endpoint](
@ -25,6 +26,10 @@ function buildEndpointOption(req, res, next) {
    parsedBody,
    endpointType,
  );
+
+  const modelsConfig = await getModelsConfig(req);
+  req.body.endpointOption.modelsConfig = modelsConfig;
+
  if (req.body.files) {
    // hold the promise
    req.body.endpointOption.attachments = processFiles(req.body.files);
--- a/api/server/middleware/validateModel.js
+++ b/api/server/middleware/validateModel.js
@ -1,8 +1,7 @@
-const { CacheKeys, ViolationTypes } = require('librechat-data-provider');
-const { loadModels } = require('~/server/controllers/ModelController');
-const { logViolation, getLogStores } = require('~/cache');
+const { ViolationTypes } = require('librechat-data-provider');
+const { getModelsConfig } = require('~/server/controllers/ModelController');
 const { handleError } = require('~/server/utils');
-
+const { logViolation } = require('~/cache');
 /**
 * Validates the model of the request.
 *
@ -17,11 +16,7 @@ const validateModel = async (req, res, next) => {
    return handleError(res, { text: 'Model not provided' });
  }

-  const cache = getLogStores(CacheKeys.CONFIG_STORE);
-  let modelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
-  if (!modelsConfig) {
-    modelsConfig = await loadModels(req);
-  }
+  const modelsConfig = await getModelsConfig(req);

  if (!modelsConfig) {
    return handleError(res, { text: 'Models not loaded' });
--- a/api/server/services/Endpoints/anthropic/buildOptions.js
+++ b/api/server/services/Endpoints/anthropic/buildOptions.js
@ -1,9 +1,10 @@
 const buildOptions = (endpoint, parsedBody) => {
-  const { modelLabel, promptPrefix, ...rest } = parsedBody;
+  const { modelLabel, promptPrefix, resendImages, ...rest } = parsedBody;
  const endpointOption = {
    endpoint,
    modelLabel,
    promptPrefix,
+    resendImages,
    modelOptions: {
      ...rest,
    },
--- a/api/server/services/Files/Firebase/images.js
+++ b/api/server/services/Files/Firebase/images.js
@ -11,12 +11,13 @@ const { logger } = require('~/config');
 * Converts an image file to the WebP format. The function first resizes the image based on the specified
 * resolution.
 *
- *
- * @param {Express.Request} req - The request object from Express. It should have a `user` property with an `id`
+ * @param {Object} params - The params object.
+ * @param {Express.Request} params.req - The request object from Express. It should have a `user` property with an `id`
 *                       representing the user, and an `app.locals.paths` object with an `imageOutput` path.
- * @param {Express.Multer.File} file - The file object, which is part of the request. The file object should
+ * @param {Express.Multer.File} params.file - The file object, which is part of the request. The file object should
 *                                     have a `path` property that points to the location of the uploaded file.
- * @param {string} [resolution='high'] - Optional. The desired resolution for the image resizing. Default is 'high'.
+ * @param {EModelEndpoint} params.endpoint - The params object.
+ * @param {string} [params.resolution='high'] - Optional. The desired resolution for the image resizing. Default is 'high'.
 *
 * @returns {Promise<{ filepath: string, bytes: number, width: number, height: number}>}
 *          A promise that resolves to an object containing:
@ -25,10 +26,14 @@ const { logger } = require('~/config');
 *            - width: The width of the converted image.
 *            - height: The height of the converted image.
 */
-async function uploadImageToFirebase(req, file, resolution = 'high') {
+async function uploadImageToFirebase({ req, file, endpoint, resolution = 'high' }) {
  const inputFilePath = file.path;
  const inputBuffer = await fs.promises.readFile(inputFilePath);
-  const { buffer: resizedBuffer, width, height } = await resizeImageBuffer(inputBuffer, resolution);
+  const {
+    buffer: resizedBuffer,
+    width,
+    height,
+  } = await resizeImageBuffer(inputBuffer, resolution, endpoint);
  const extension = path.extname(inputFilePath);
  const userId = req.user.id;

--- a/api/server/services/Files/Local/images.js
+++ b/api/server/services/Files/Local/images.js
@ -13,12 +13,13 @@ const { updateFile } = require('~/models/File');
 * it converts the image to WebP format before saving.
 *
 * The original image is deleted after conversion.
- *
- * @param {Object} req - The request object from Express. It should have a `user` property with an `id`
+ * @param {Object} params - The params object.
+ * @param {Object} params.req - The request object from Express. It should have a `user` property with an `id`
 *                       representing the user, and an `app.locals.paths` object with an `imageOutput` path.
- * @param {Express.Multer.File} file - The file object, which is part of the request. The file object should
+ * @param {Express.Multer.File} params.file - The file object, which is part of the request. The file object should
 *                                     have a `path` property that points to the location of the uploaded file.
- * @param {string} [resolution='high'] - Optional. The desired resolution for the image resizing. Default is 'high'.
+ * @param {EModelEndpoint} params.endpoint - The params object.
+ * @param {string} [params.resolution='high'] - Optional. The desired resolution for the image resizing. Default is 'high'.
 *
 * @returns {Promise<{ filepath: string, bytes: number, width: number, height: number}>}
 *          A promise that resolves to an object containing:
@ -27,10 +28,14 @@ const { updateFile } = require('~/models/File');
 *            - width: The width of the converted image.
 *            - height: The height of the converted image.
 */
-async function uploadLocalImage(req, file, resolution = 'high') {
+async function uploadLocalImage({ req, file, endpoint, resolution = 'high' }) {
  const inputFilePath = file.path;
  const inputBuffer = await fs.promises.readFile(inputFilePath);
-  const { buffer: resizedBuffer, width, height } = await resizeImageBuffer(inputBuffer, resolution);
+  const {
+    buffer: resizedBuffer,
+    width,
+    height,
+  } = await resizeImageBuffer(inputBuffer, resolution, endpoint);
  const extension = path.extname(inputFilePath);

  const { imageOutput } = req.app.locals.paths;
--- a/api/server/services/Files/images/encode.js
+++ b/api/server/services/Files/images/encode.js
@ -23,6 +23,8 @@ async function fetchImageToBase64(url) {
  }
 }

+const base64Only = new Set([EModelEndpoint.google, EModelEndpoint.anthropic]);
+
 /**
 * Encodes and formats the given files.
 * @param {Express.Request} req - The request object.
@ -50,7 +52,7 @@ async function encodeAndFormat(req, files, endpoint) {
    encodingMethods[source] = prepareImagePayload;

    /* Google doesn't support passing URLs to payload */
-    if (source !== FileSources.local && endpoint === EModelEndpoint.google) {
+    if (source !== FileSources.local && base64Only.has(endpoint)) {
      const [_file, imageURL] = await prepareImagePayload(req, file);
      promises.push([_file, await fetchImageToBase64(imageURL)]);
      continue;
@ -81,6 +83,14 @@ async function encodeAndFormat(req, files, endpoint) {

    if (endpoint && endpoint === EModelEndpoint.google) {
      imagePart.image_url = imagePart.image_url.url;
+    } else if (endpoint && endpoint === EModelEndpoint.anthropic) {
+      imagePart.type = 'image';
+      imagePart.source = {
+        type: 'base64',
+        media_type: file.type,
+        data: imageContent,
+      };
+      delete imagePart.image_url;
    }

    result.image_urls.push(imagePart);
--- a/api/server/services/Files/images/resize.js
+++ b/api/server/services/Files/images/resize.js
@ -1,4 +1,5 @@
 const sharp = require('sharp');
+const { EModelEndpoint } = require('librechat-data-provider');

 /**
 * Resizes an image from a given buffer based on the specified resolution.
@ -7,13 +8,14 @@ const sharp = require('sharp');
 * @param {'low' | 'high'} resolution - The resolution to resize the image to.
 *                                      'low' for a maximum of 512x512 resolution,
 *                                      'high' for a maximum of 768x2000 resolution.
+ * @param {EModelEndpoint} endpoint - Identifier for specific endpoint handling
 * @returns {Promise<{buffer: Buffer, width: number, height: number}>} An object containing the resized image buffer and its dimensions.
 * @throws Will throw an error if the resolution parameter is invalid.
 */
-async function resizeImageBuffer(inputBuffer, resolution) {
+async function resizeImageBuffer(inputBuffer, resolution, endpoint) {
  const maxLowRes = 512;
  const maxShortSideHighRes = 768;
-  const maxLongSideHighRes = 2000;
+  const maxLongSideHighRes = endpoint === EModelEndpoint.anthropic ? 1568 : 2000;

  let newWidth, newHeight;
  let resizeOptions = { fit: 'inside', withoutEnlargement: true };
--- a/api/server/services/Files/process.js
+++ b/api/server/services/Files/process.js
@ -184,8 +184,8 @@ const processFileURL = async ({ fileStrategy, userId, URL, fileName, basePath, c
 const processImageFile = async ({ req, res, file, metadata }) => {
  const source = req.app.locals.fileStrategy;
  const { handleImageUpload } = getStrategyFunctions(source);
-  const { file_id, temp_file_id } = metadata;
-  const { filepath, bytes, width, height } = await handleImageUpload(req, file);
+  const { file_id, temp_file_id, endpoint } = metadata;
+  const { filepath, bytes, width, height } = await handleImageUpload({ req, file, endpoint });
  const result = await createFile(
    {
      user: req.user.id,