🤖 feat(Anthropic): Claude 3 & Vision Support (#1984)

* chore: bump anthropic SDK

* chore: update anthropic config settings (fileSupport, default models)

* feat: anthropic multi modal formatting

* refactor: update vision models and use endpoint specific max long side resizing

* feat(anthropic): multimodal messages, retry logic, and messages payload

* chore: add more safety to trimming content due to whitespace error for assistant messages

* feat(anthropic): token accounting and resending multiple images in progress

* chore: bump data-provider

* feat(anthropic): resendImages feature

* chore: optimize Edit/Ask controllers, switch model back to req model

* fix: false positive of invalid model

* refactor(validateVisionModel): use object as arg, pass in additional/available models

* refactor(validateModel): use helper function, `getModelsConfig`

* feat: add modelsConfig to endpointOption so it gets passed to all clients, use for properly validating vision models

* refactor: initialize default vision model and make sure it's available before assigning it

* refactor(useSSE): avoid resetting model if user selected a new model between request and response

* feat: show rate in transaction logging

* fix: return tokenCountMap regardless of payload shape
This commit is contained in:
Danny Avila 2024-03-06 00:04:52 -05:00 committed by GitHub
parent b023c5683d
commit 8263ddda3f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 599 additions and 115 deletions

View file

@ -1,7 +1,7 @@
const { getResponseSender, Constants } = require('librechat-data-provider');
const { sendMessage, createOnProgress } = require('~/server/utils');
const { saveMessage, getConvoTitle, getConvo } = require('~/models');
const { createAbortController, handleAbortError } = require('~/server/middleware');
const { sendMessage, createOnProgress } = require('~/server/utils');
const { saveMessage, getConvo } = require('~/models');
const { logger } = require('~/config');
const AskController = async (req, res, next, initializeClient, addTitle) => {
@ -134,16 +134,21 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
response.endpoint = endpointOption.endpoint;
const conversation = await getConvo(user, conversationId);
conversation.title =
conversation && !conversation.title ? null : conversation?.title || 'New Chat';
if (client.options.attachments) {
userMessage.files = client.options.attachments;
conversation.model = endpointOption.modelOptions.model;
delete userMessage.image_urls;
}
if (!abortController.signal.aborted) {
sendMessage(res, {
title: await getConvoTitle(user, conversationId),
final: true,
conversation: await getConvo(user, conversationId),
conversation,
title: conversation.title,
requestMessage: userMessage,
responseMessage: response,
});

View file

@ -1,7 +1,7 @@
const { getResponseSender } = require('librechat-data-provider');
const { sendMessage, createOnProgress } = require('~/server/utils');
const { saveMessage, getConvoTitle, getConvo } = require('~/models');
const { createAbortController, handleAbortError } = require('~/server/middleware');
const { sendMessage, createOnProgress } = require('~/server/utils');
const { saveMessage, getConvo } = require('~/models');
const { logger } = require('~/config');
const EditController = async (req, res, next, initializeClient) => {
@ -131,11 +131,19 @@ const EditController = async (req, res, next, initializeClient) => {
response = { ...response, ...metadata };
}
const conversation = await getConvo(user, conversationId);
conversation.title =
conversation && !conversation.title ? null : conversation?.title || 'New Chat';
if (client.options.attachments) {
conversation.model = endpointOption.modelOptions.model;
}
if (!abortController.signal.aborted) {
sendMessage(res, {
title: await getConvoTitle(user, conversationId),
final: true,
conversation: await getConvo(user, conversationId),
conversation,
title: conversation.title,
requestMessage: userMessage,
responseMessage: response,
});

View file

@ -2,6 +2,16 @@ const { CacheKeys } = require('librechat-data-provider');
const { loadDefaultModels, loadConfigModels } = require('~/server/services/Config');
const { getLogStores } = require('~/cache');
const getModelsConfig = async (req) => {
const cache = getLogStores(CacheKeys.CONFIG_STORE);
let modelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
if (!modelsConfig) {
modelsConfig = await loadModels(req);
}
return modelsConfig;
};
/**
* Loads the models from the config.
* @param {Express.Request} req - The Express request object.
@ -27,4 +37,4 @@ async function modelController(req, res) {
res.send(modelConfig);
}
module.exports = { modelController, loadModels };
module.exports = { modelController, loadModels, getModelsConfig };

View file

@ -1,11 +1,12 @@
const { parseConvo, EModelEndpoint } = require('librechat-data-provider');
const { getModelsConfig } = require('~/server/controllers/ModelController');
const { processFiles } = require('~/server/services/Files/process');
const gptPlugins = require('~/server/services/Endpoints/gptPlugins');
const anthropic = require('~/server/services/Endpoints/anthropic');
const assistant = require('~/server/services/Endpoints/assistant');
const openAI = require('~/server/services/Endpoints/openAI');
const custom = require('~/server/services/Endpoints/custom');
const google = require('~/server/services/Endpoints/google');
const assistant = require('~/server/services/Endpoints/assistant');
const buildFunction = {
[EModelEndpoint.openAI]: openAI.buildOptions,
@ -17,7 +18,7 @@ const buildFunction = {
[EModelEndpoint.assistants]: assistant.buildOptions,
};
function buildEndpointOption(req, res, next) {
async function buildEndpointOption(req, res, next) {
const { endpoint, endpointType } = req.body;
const parsedBody = parseConvo({ endpoint, endpointType, conversation: req.body });
req.body.endpointOption = buildFunction[endpointType ?? endpoint](
@ -25,6 +26,10 @@ function buildEndpointOption(req, res, next) {
parsedBody,
endpointType,
);
const modelsConfig = await getModelsConfig(req);
req.body.endpointOption.modelsConfig = modelsConfig;
if (req.body.files) {
// hold the promise
req.body.endpointOption.attachments = processFiles(req.body.files);

View file

@ -1,8 +1,7 @@
const { CacheKeys, ViolationTypes } = require('librechat-data-provider');
const { loadModels } = require('~/server/controllers/ModelController');
const { logViolation, getLogStores } = require('~/cache');
const { ViolationTypes } = require('librechat-data-provider');
const { getModelsConfig } = require('~/server/controllers/ModelController');
const { handleError } = require('~/server/utils');
const { logViolation } = require('~/cache');
/**
* Validates the model of the request.
*
@ -17,11 +16,7 @@ const validateModel = async (req, res, next) => {
return handleError(res, { text: 'Model not provided' });
}
const cache = getLogStores(CacheKeys.CONFIG_STORE);
let modelsConfig = await cache.get(CacheKeys.MODELS_CONFIG);
if (!modelsConfig) {
modelsConfig = await loadModels(req);
}
const modelsConfig = await getModelsConfig(req);
if (!modelsConfig) {
return handleError(res, { text: 'Models not loaded' });

View file

@ -1,9 +1,10 @@
const buildOptions = (endpoint, parsedBody) => {
const { modelLabel, promptPrefix, ...rest } = parsedBody;
const { modelLabel, promptPrefix, resendImages, ...rest } = parsedBody;
const endpointOption = {
endpoint,
modelLabel,
promptPrefix,
resendImages,
modelOptions: {
...rest,
},

View file

@ -11,12 +11,13 @@ const { logger } = require('~/config');
* Converts an image file to the WebP format. The function first resizes the image based on the specified
* resolution.
*
*
* @param {Express.Request} req - The request object from Express. It should have a `user` property with an `id`
* @param {Object} params - The params object.
* @param {Express.Request} params.req - The request object from Express. It should have a `user` property with an `id`
* representing the user, and an `app.locals.paths` object with an `imageOutput` path.
* @param {Express.Multer.File} file - The file object, which is part of the request. The file object should
* @param {Express.Multer.File} params.file - The file object, which is part of the request. The file object should
* have a `path` property that points to the location of the uploaded file.
* @param {string} [resolution='high'] - Optional. The desired resolution for the image resizing. Default is 'high'.
* @param {EModelEndpoint} params.endpoint - The params object.
* @param {string} [params.resolution='high'] - Optional. The desired resolution for the image resizing. Default is 'high'.
*
* @returns {Promise<{ filepath: string, bytes: number, width: number, height: number}>}
* A promise that resolves to an object containing:
@ -25,10 +26,14 @@ const { logger } = require('~/config');
* - width: The width of the converted image.
* - height: The height of the converted image.
*/
async function uploadImageToFirebase(req, file, resolution = 'high') {
async function uploadImageToFirebase({ req, file, endpoint, resolution = 'high' }) {
const inputFilePath = file.path;
const inputBuffer = await fs.promises.readFile(inputFilePath);
const { buffer: resizedBuffer, width, height } = await resizeImageBuffer(inputBuffer, resolution);
const {
buffer: resizedBuffer,
width,
height,
} = await resizeImageBuffer(inputBuffer, resolution, endpoint);
const extension = path.extname(inputFilePath);
const userId = req.user.id;

View file

@ -13,12 +13,13 @@ const { updateFile } = require('~/models/File');
* it converts the image to WebP format before saving.
*
* The original image is deleted after conversion.
*
* @param {Object} req - The request object from Express. It should have a `user` property with an `id`
* @param {Object} params - The params object.
* @param {Object} params.req - The request object from Express. It should have a `user` property with an `id`
* representing the user, and an `app.locals.paths` object with an `imageOutput` path.
* @param {Express.Multer.File} file - The file object, which is part of the request. The file object should
* @param {Express.Multer.File} params.file - The file object, which is part of the request. The file object should
* have a `path` property that points to the location of the uploaded file.
* @param {string} [resolution='high'] - Optional. The desired resolution for the image resizing. Default is 'high'.
* @param {EModelEndpoint} params.endpoint - The params object.
* @param {string} [params.resolution='high'] - Optional. The desired resolution for the image resizing. Default is 'high'.
*
* @returns {Promise<{ filepath: string, bytes: number, width: number, height: number}>}
* A promise that resolves to an object containing:
@ -27,10 +28,14 @@ const { updateFile } = require('~/models/File');
* - width: The width of the converted image.
* - height: The height of the converted image.
*/
async function uploadLocalImage(req, file, resolution = 'high') {
async function uploadLocalImage({ req, file, endpoint, resolution = 'high' }) {
const inputFilePath = file.path;
const inputBuffer = await fs.promises.readFile(inputFilePath);
const { buffer: resizedBuffer, width, height } = await resizeImageBuffer(inputBuffer, resolution);
const {
buffer: resizedBuffer,
width,
height,
} = await resizeImageBuffer(inputBuffer, resolution, endpoint);
const extension = path.extname(inputFilePath);
const { imageOutput } = req.app.locals.paths;

View file

@ -23,6 +23,8 @@ async function fetchImageToBase64(url) {
}
}
const base64Only = new Set([EModelEndpoint.google, EModelEndpoint.anthropic]);
/**
* Encodes and formats the given files.
* @param {Express.Request} req - The request object.
@ -50,7 +52,7 @@ async function encodeAndFormat(req, files, endpoint) {
encodingMethods[source] = prepareImagePayload;
/* Google doesn't support passing URLs to payload */
if (source !== FileSources.local && endpoint === EModelEndpoint.google) {
if (source !== FileSources.local && base64Only.has(endpoint)) {
const [_file, imageURL] = await prepareImagePayload(req, file);
promises.push([_file, await fetchImageToBase64(imageURL)]);
continue;
@ -81,6 +83,14 @@ async function encodeAndFormat(req, files, endpoint) {
if (endpoint && endpoint === EModelEndpoint.google) {
imagePart.image_url = imagePart.image_url.url;
} else if (endpoint && endpoint === EModelEndpoint.anthropic) {
imagePart.type = 'image';
imagePart.source = {
type: 'base64',
media_type: file.type,
data: imageContent,
};
delete imagePart.image_url;
}
result.image_urls.push(imagePart);

View file

@ -1,4 +1,5 @@
const sharp = require('sharp');
const { EModelEndpoint } = require('librechat-data-provider');
/**
* Resizes an image from a given buffer based on the specified resolution.
@ -7,13 +8,14 @@ const sharp = require('sharp');
* @param {'low' | 'high'} resolution - The resolution to resize the image to.
* 'low' for a maximum of 512x512 resolution,
* 'high' for a maximum of 768x2000 resolution.
* @param {EModelEndpoint} endpoint - Identifier for specific endpoint handling
* @returns {Promise<{buffer: Buffer, width: number, height: number}>} An object containing the resized image buffer and its dimensions.
* @throws Will throw an error if the resolution parameter is invalid.
*/
async function resizeImageBuffer(inputBuffer, resolution) {
async function resizeImageBuffer(inputBuffer, resolution, endpoint) {
const maxLowRes = 512;
const maxShortSideHighRes = 768;
const maxLongSideHighRes = 2000;
const maxLongSideHighRes = endpoint === EModelEndpoint.anthropic ? 1568 : 2000;
let newWidth, newHeight;
let resizeOptions = { fit: 'inside', withoutEnlargement: true };

View file

@ -184,8 +184,8 @@ const processFileURL = async ({ fileStrategy, userId, URL, fileName, basePath, c
const processImageFile = async ({ req, res, file, metadata }) => {
const source = req.app.locals.fileStrategy;
const { handleImageUpload } = getStrategyFunctions(source);
const { file_id, temp_file_id } = metadata;
const { filepath, bytes, width, height } = await handleImageUpload(req, file);
const { file_id, temp_file_id, endpoint } = metadata;
const { filepath, bytes, width, height } = await handleImageUpload({ req, file, endpoint });
const result = await createFile(
{
user: req.user.id,