LibreChat/api/server/services/Files/images/encode.js
Danny Avila 2524d33362
📂 refactor: Cleanup File Filtering Logic, Improve Validation (#10414)
* feat: add filterFilesByEndpointConfig to filter disabled file processing by provider

* chore: explicit define of endpointFileConfig for better debugging

* refactor: move `normalizeEndpointName` to data-provider as used app-wide

* chore: remove overrideEndpoint from useFileHandling

* refactor: improve endpoint file config selection

* refactor: update filterFilesByEndpointConfig to accept structured parameters and improve endpoint file config handling

* refactor: replace defaultFileConfig with getEndpointFileConfig for improved file configuration handling across components

* test: add comprehensive unit tests for getEndpointFileConfig to validate endpoint configuration handling

* refactor: streamline agent endpoint assignment and improve file filtering logic

* feat: add error handling for disabled file uploads in endpoint configuration

* refactor: update encodeAndFormat functions to accept structured parameters for provider and endpoint

* refactor: streamline requestFiles handling in initializeAgent function

* fix: getEndpointFileConfig partial config merging scenarios

* refactor: enhance mergeWithDefault function to support document-supported providers with comprehensive MIME types

* refactor: user-configured default file config in getEndpointFileConfig

* fix: prevent file handling when endpoint is disabled and file is dragged to chat

* refactor: move `getEndpointField` to `data-provider` and update usage across components and hooks

* fix: prioritize endpointType based on agent.endpoint in file filtering logic

* fix: prioritize agent.endpoint in file filtering logic and remove unnecessary endpointType defaulting
2025-11-10 19:05:30 -05:00

222 lines
6.7 KiB
JavaScript

const axios = require('axios');
const { logAxiosError } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const {
FileSources,
VisionModes,
ImageDetail,
ContentTypes,
EModelEndpoint,
} = require('librechat-data-provider');
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
/**
* Converts a readable stream to a base64 encoded string.
*
* @param {NodeJS.ReadableStream} stream - The readable stream to convert.
* @param {boolean} [destroyStream=true] - Whether to destroy the stream after processing.
* @returns {Promise<string>} - Promise resolving to the base64 encoded content.
*/
async function streamToBase64(stream, destroyStream = true) {
return new Promise((resolve, reject) => {
const chunks = [];
stream.on('data', (chunk) => {
chunks.push(chunk);
});
stream.on('end', () => {
try {
const buffer = Buffer.concat(chunks);
const base64Data = buffer.toString('base64');
chunks.length = 0; // Clear the array
resolve(base64Data);
} catch (err) {
reject(err);
}
});
stream.on('error', (error) => {
chunks.length = 0;
reject(error);
});
}).finally(() => {
// Clean up the stream if required
if (destroyStream && stream.destroy && typeof stream.destroy === 'function') {
stream.destroy();
}
});
}
/**
* Fetches an image from a URL and returns its base64 representation.
*
* @async
* @param {string} url The URL of the image.
* @returns {Promise<string>} The base64-encoded string of the image.
* @throws {Error} If there's an issue fetching the image or encoding it.
*/
async function fetchImageToBase64(url) {
try {
const response = await axios.get(url, {
responseType: 'arraybuffer',
});
const base64Data = Buffer.from(response.data).toString('base64');
response.data = null;
return base64Data;
} catch (error) {
const message = 'Error fetching image to convert to base64';
throw new Error(logAxiosError({ message, error }));
}
}
const base64Only = new Set([
EModelEndpoint.google,
EModelEndpoint.anthropic,
'Ollama',
'ollama',
EModelEndpoint.bedrock,
]);
const blobStorageSources = new Set([FileSources.azure_blob, FileSources.s3]);
/**
* Encodes and formats the given files.
* @param {ServerRequest} req - The request object.
* @param {Array<MongoFile>} files - The array of files to encode and format.
* @param {object} params - Object containing provider/endpoint information
* @param {Providers | EModelEndpoint | string} [params.provider] - The provider for the image
* @param {string} [params.endpoint] - Optional: The endpoint for the image
* @param {string} [mode] - Optional: The endpoint mode for the image.
* @returns {Promise<{ files: MongoFile[]; image_urls: MessageContentImageUrl[] }>} - A promise that resolves to the result object containing the encoded images and file details.
*/
async function encodeAndFormat(req, files, params, mode) {
const { provider, endpoint } = params;
const effectiveEndpoint = endpoint ?? provider;
const promises = [];
/** @type {Record<FileSources, Pick<ReturnType<typeof getStrategyFunctions>, 'prepareImagePayload' | 'getDownloadStream'>>} */
const encodingMethods = {};
/** @type {{ files: MongoFile[]; image_urls: MessageContentImageUrl[] }} */
const result = {
files: [],
image_urls: [],
};
if (!files || !files.length) {
return result;
}
for (let file of files) {
/** @type {FileSources} */
const source = file.source ?? FileSources.local;
if (!file.height) {
promises.push([file, null]);
continue;
}
if (!encodingMethods[source]) {
const { prepareImagePayload, getDownloadStream } = getStrategyFunctions(source);
if (!prepareImagePayload) {
throw new Error(`Encoding function not implemented for ${source}`);
}
encodingMethods[source] = { prepareImagePayload, getDownloadStream };
}
const preparePayload = encodingMethods[source].prepareImagePayload;
/* We need to fetch the image and convert it to base64 if we are using S3/Azure Blob storage. */
if (blobStorageSources.has(source)) {
try {
const downloadStream = encodingMethods[source].getDownloadStream;
let stream = await downloadStream(req, file.filepath);
let base64Data = await streamToBase64(stream);
stream = null;
promises.push([file, base64Data]);
base64Data = null;
continue;
} catch (error) {
logger.error('Error processing image from blob storage:', error);
}
} else if (source !== FileSources.local && base64Only.has(effectiveEndpoint)) {
const [_file, imageURL] = await preparePayload(req, file);
promises.push([_file, await fetchImageToBase64(imageURL)]);
continue;
}
promises.push(preparePayload(req, file));
}
const detail = req.body.imageDetail ?? ImageDetail.auto;
/** @type {Array<[MongoFile, string]>} */
const formattedImages = await Promise.all(promises);
promises.length = 0;
for (const [file, imageContent] of formattedImages) {
const fileMetadata = {
type: file.type,
file_id: file.file_id,
filepath: file.filepath,
filename: file.filename,
embedded: !!file.embedded,
metadata: file.metadata,
};
if (file.height && file.width) {
fileMetadata.height = file.height;
fileMetadata.width = file.width;
}
if (!imageContent) {
result.files.push(fileMetadata);
continue;
}
const imagePart = {
type: ContentTypes.IMAGE_URL,
image_url: {
url: imageContent.startsWith('http')
? imageContent
: `data:${file.type};base64,${imageContent}`,
detail,
},
};
if (mode === VisionModes.agents) {
result.image_urls.push({ ...imagePart });
result.files.push({ ...fileMetadata });
continue;
}
if (
effectiveEndpoint &&
effectiveEndpoint === EModelEndpoint.google &&
mode === VisionModes.generative
) {
delete imagePart.image_url;
imagePart.inlineData = {
mimeType: file.type,
data: imageContent,
};
} else if (effectiveEndpoint && effectiveEndpoint === EModelEndpoint.google) {
imagePart.image_url = imagePart.image_url.url;
} else if (effectiveEndpoint && effectiveEndpoint === EModelEndpoint.anthropic) {
imagePart.type = 'image';
imagePart.source = {
type: 'base64',
media_type: file.type,
data: imageContent,
};
delete imagePart.image_url;
}
result.image_urls.push({ ...imagePart });
result.files.push({ ...fileMetadata });
}
formattedImages.length = 0;
return { ...result };
}
module.exports = {
encodeAndFormat,
};