🛫 refactor: Move Encoding Logic to packages/api (#9182)

* refactor: move audio encode over to TS

* refactor: audio encoding now functional in LC again

* refactor: move video encode over to TS

* refactor: move document encode over to TS

* refactor: video encoding now functional in LC again

* refactor: document encoding now functional in LC again

* fix: extend file type options in AttachFileMenu to include 'google_multimodal' and update dependency array to include agent?.provider

* feat: only accept pdfs if responses api is enabled for openai convos
This commit is contained in:
Dustin Healy 2025-08-20 17:01:21 -07:00 committed by Dustin Healy
parent ccb2e031dd
commit e55264b22a
11 changed files with 408 additions and 415 deletions

View file

@ -11,6 +11,9 @@ const {
memoryInstructions,
formatContentStrings,
createMemoryProcessor,
encodeAndFormatAudios,
encodeAndFormatVideos,
encodeAndFormatDocuments,
} = require('@librechat/api');
const {
Callback,
@ -42,21 +45,19 @@ const {
setMemory,
} = require('~/models');
const { getMCPAuthMap, checkCapability, hasCustomUserVars } = require('~/server/services/Config');
const { encodeAndFormatDocuments } = require('~/server/services/Files/Documents/encode');
const { addCacheControl, createContextHandlers } = require('~/app/clients/prompts');
const { encodeAndFormatVideos } = require('~/server/services/Files/Video/encode');
const { encodeAndFormatAudios } = require('~/server/services/Files/Audio/encode');
const { getFiles } = require('~/models');
const { initializeAgent } = require('~/server/services/Endpoints/agents/agent');
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
const { getFormattedMemories, deleteMemory, setMemory } = require('~/models');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const { getProviderConfig } = require('~/server/services/Endpoints');
const { getStrategyFunctions } = require('~/server/services/Files');
const { checkCapability } = require('~/server/services/Config');
const BaseClient = require('~/app/clients/BaseClient');
const { getRoleByName } = require('~/models/Role');
const { loadAgent } = require('~/models/Agent');
const { getMCPManager } = require('~/config');
const { getFiles } = require('~/models');
const omitTitleOptions = new Set([
'stream',
@ -239,6 +240,7 @@ class AgentClient extends BaseClient {
this.options.req,
attachments,
this.options.agent.provider,
getStrategyFunctions,
);
message.documents =
documentResult.documents && documentResult.documents.length
@ -252,6 +254,7 @@ class AgentClient extends BaseClient {
this.options.req,
attachments,
this.options.agent.provider,
getStrategyFunctions,
);
message.videos =
videoResult.videos && videoResult.videos.length ? videoResult.videos : undefined;
@ -263,6 +266,7 @@ class AgentClient extends BaseClient {
this.options.req,
attachments,
this.options.agent.provider,
getStrategyFunctions,
);
message.audios =
audioResult.audios && audioResult.audios.length ? audioResult.audios : undefined;

View file

@ -1,111 +0,0 @@
const { EModelEndpoint, isDocumentSupportedEndpoint } = require('librechat-data-provider');
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { validateAudio } = require('@librechat/api');
const { streamToBuffer } = require('~/server/services/Files/Documents/encode');
/**
* Encodes and formats audio files for different endpoints
* @param {Express.Request} req - The request object
* @param {Array<MongoFile>} files - Array of audio files
* @param {EModelEndpoint} endpoint - The endpoint to format for
* @returns {Promise<{ audios: Array, files: Array<MongoFile> }>}
*/
async function encodeAndFormatAudios(req, files, endpoint) {
const promises = [];
const encodingMethods = {};
/** @type {{ audios: any[]; files: MongoFile[] }} */
const result = {
audios: [],
files: [],
};
for (const file of files) {
if (!file || !file.filepath) {
continue;
}
const source = file.source ?? 'local';
if (!encodingMethods[source]) {
encodingMethods[source] = getStrategyFunctions(source);
}
const fileMetadata = {
file_id: file.file_id || file._id,
temp_file_id: file.temp_file_id,
filepath: file.filepath,
source: file.source,
filename: file.filename,
type: file.type,
};
promises.push([file, fileMetadata]);
}
const results = await Promise.allSettled(
promises.map(async ([file, fileMetadata]) => {
if (!file || !fileMetadata) {
return { file: null, content: null, metadata: fileMetadata };
}
try {
const source = file.source ?? 'local';
const { getDownloadStream } = encodingMethods[source];
const stream = await getDownloadStream(req, file.filepath);
const buffer = await streamToBuffer(stream);
const audioContent = buffer.toString('base64');
return {
file,
content: audioContent,
metadata: fileMetadata,
};
} catch (error) {
console.error(`Error processing audio ${file.filename}:`, error);
return { file, content: null, metadata: fileMetadata };
}
}),
);
for (const settledResult of results) {
if (settledResult.status === 'rejected') {
console.error('Audio processing failed:', settledResult.reason);
continue;
}
const { file, content, metadata } = settledResult.value;
if (!content || !file) {
if (metadata) {
result.files.push(metadata);
}
continue;
}
if (file.type.startsWith('audio/') && isDocumentSupportedEndpoint(endpoint)) {
const audioBuffer = Buffer.from(content, 'base64');
const validation = await validateAudio(audioBuffer, audioBuffer.length, endpoint);
if (!validation.isValid) {
throw new Error(`Audio validation failed: ${validation.error}`);
}
if (endpoint === EModelEndpoint.google) {
const audioPart = {
type: 'audio',
mimeType: file.type,
data: content,
};
result.audios.push(audioPart);
}
result.files.push(metadata);
}
}
return result;
}
module.exports = {
encodeAndFormatAudios,
};

View file

@ -1,181 +0,0 @@
const { EModelEndpoint, isDocumentSupportedEndpoint } = require('librechat-data-provider');
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { validatePdf } = require('@librechat/api');
/**
* Converts a readable stream to a buffer.
*
* @param {NodeJS.ReadableStream} stream - The readable stream to convert.
* @returns {Promise<Buffer>} - Promise resolving to the buffer.
*/
async function streamToBuffer(stream) {
return new Promise((resolve, reject) => {
const chunks = [];
stream.on('data', (chunk) => {
chunks.push(chunk);
});
stream.on('end', () => {
try {
const buffer = Buffer.concat(chunks);
chunks.length = 0;
resolve(buffer);
} catch (err) {
reject(err);
}
});
stream.on('error', (error) => {
chunks.length = 0;
reject(error);
});
}).finally(() => {
if (stream.destroy && typeof stream.destroy === 'function') {
stream.destroy();
}
});
}
/**
* Processes and encodes document files for various endpoints
*
* @param {Express.Request} req - Express request object
* @param {MongoFile[]} files - Array of file objects to process
* @param {string} endpoint - The endpoint identifier (e.g., EModelEndpoint.anthropic)
* @returns {Promise<{documents: MessageContentDocument[], files: MongoFile[]}>}
*/
async function encodeAndFormatDocuments(req, files, endpoint) {
const promises = [];
/** @type {Record<FileSources, Pick<ReturnType<typeof getStrategyFunctions>, 'prepareDocumentPayload' | 'getDownloadStream'>>} */
const encodingMethods = {};
/** @type {{ documents: MessageContentDocument[]; files: MongoFile[] }} */
const result = {
documents: [],
files: [],
};
if (!files || !files.length) {
return result;
}
const documentFiles = files.filter(
(file) => file.type === 'application/pdf' || file.type?.startsWith('application/'), // Future: support for other document types
);
if (!documentFiles.length) {
return result;
}
for (let file of documentFiles) {
/** @type {FileSources} */
const source = file.source ?? 'local';
if (file.type !== 'application/pdf' || !isDocumentSupportedEndpoint(endpoint)) {
continue;
}
if (!encodingMethods[source]) {
encodingMethods[source] = getStrategyFunctions(source);
}
const fileMetadata = {
file_id: file.file_id || file._id,
temp_file_id: file.temp_file_id,
filepath: file.filepath,
source: file.source,
filename: file.filename,
type: file.type,
};
promises.push([file, fileMetadata]);
}
const results = await Promise.allSettled(
promises.map(async ([file, fileMetadata]) => {
if (!file || !fileMetadata) {
return { file: null, content: null, metadata: fileMetadata };
}
try {
const source = file.source ?? 'local';
const { getDownloadStream } = encodingMethods[source];
const stream = await getDownloadStream(req, file.filepath);
const buffer = await streamToBuffer(stream);
const documentContent = buffer.toString('base64');
return {
file,
content: documentContent,
metadata: fileMetadata,
};
} catch (error) {
console.error(`Error processing document ${file.filename}:`, error);
return { file, content: null, metadata: fileMetadata };
}
}),
);
for (const settledResult of results) {
if (settledResult.status === 'rejected') {
console.error('Document processing failed:', settledResult.reason);
continue;
}
const { file, content, metadata } = settledResult.value;
if (!content || !file) {
if (metadata) {
result.files.push(metadata);
}
continue;
}
if (file.type === 'application/pdf' && isDocumentSupportedEndpoint(endpoint)) {
const pdfBuffer = Buffer.from(content, 'base64');
const validation = await validatePdf(pdfBuffer, pdfBuffer.length, endpoint);
if (!validation.isValid) {
throw new Error(`PDF validation failed: ${validation.error}`);
}
if (endpoint === EModelEndpoint.anthropic) {
const documentPart = {
type: 'document',
source: {
type: 'base64',
media_type: 'application/pdf',
data: content,
},
cache_control: { type: 'ephemeral' },
citations: { enabled: true },
};
result.documents.push(documentPart);
} else if (endpoint === EModelEndpoint.openAI) {
const documentPart = {
type: 'input_file',
filename: file.filename,
file_data: `data:application/pdf;base64,${content}`,
};
result.documents.push(documentPart);
} else if (endpoint === EModelEndpoint.google) {
const documentPart = {
type: 'document',
mimeType: 'application/pdf',
data: content,
};
result.documents.push(documentPart);
}
result.files.push(metadata);
}
}
return result;
}
module.exports = {
encodeAndFormatDocuments,
streamToBuffer,
};

View file

@ -1,6 +0,0 @@
const { encodeAndFormatDocuments, streamToBuffer } = require('./encode');
module.exports = {
encodeAndFormatDocuments,
streamToBuffer,
};

View file

@ -1,111 +0,0 @@
const { EModelEndpoint, isDocumentSupportedEndpoint } = require('librechat-data-provider');
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { validateVideo } = require('@librechat/api');
const { streamToBuffer } = require('~/server/services/Files/Documents/encode');
/**
* Encodes and formats video files for different endpoints
* @param {Express.Request} req - The request object
* @param {Array<MongoFile>} files - Array of video files
* @param {EModelEndpoint} endpoint - The endpoint to format for
* @returns {Promise<{ videos: Array, files: Array<MongoFile> }>}
*/
async function encodeAndFormatVideos(req, files, endpoint) {
const promises = [];
const encodingMethods = {};
/** @type {{ videos: any[]; files: MongoFile[] }} */
const result = {
videos: [],
files: [],
};
for (const file of files) {
if (!file || !file.filepath) {
continue;
}
const source = file.source ?? 'local';
if (!encodingMethods[source]) {
encodingMethods[source] = getStrategyFunctions(source);
}
const fileMetadata = {
file_id: file.file_id || file._id,
temp_file_id: file.temp_file_id,
filepath: file.filepath,
source: file.source,
filename: file.filename,
type: file.type,
};
promises.push([file, fileMetadata]);
}
const results = await Promise.allSettled(
promises.map(async ([file, fileMetadata]) => {
if (!file || !fileMetadata) {
return { file: null, content: null, metadata: fileMetadata };
}
try {
const source = file.source ?? 'local';
const { getDownloadStream } = encodingMethods[source];
const stream = await getDownloadStream(req, file.filepath);
const buffer = await streamToBuffer(stream);
const videoContent = buffer.toString('base64');
return {
file,
content: videoContent,
metadata: fileMetadata,
};
} catch (error) {
console.error(`Error processing video ${file.filename}:`, error);
return { file, content: null, metadata: fileMetadata };
}
}),
);
for (const settledResult of results) {
if (settledResult.status === 'rejected') {
console.error('Video processing failed:', settledResult.reason);
continue;
}
const { file, content, metadata } = settledResult.value;
if (!content || !file) {
if (metadata) {
result.files.push(metadata);
}
continue;
}
if (file.type.startsWith('video/') && isDocumentSupportedEndpoint(endpoint)) {
const videoBuffer = Buffer.from(content, 'base64');
const validation = await validateVideo(videoBuffer, videoBuffer.length, endpoint);
if (!validation.isValid) {
throw new Error(`Video validation failed: ${validation.error}`);
}
if (endpoint === EModelEndpoint.google) {
const videoPart = {
type: 'video',
mimeType: file.type,
data: content,
};
result.videos.push(videoPart);
}
result.files.push(metadata);
}
}
return result;
}
module.exports = {
encodeAndFormatVideos,
};

View file

@ -2,11 +2,13 @@ const { processCodeFile } = require('./Code/process');
const { processFileUpload } = require('./process');
const { uploadImageBuffer } = require('./images');
const { hasAccessToFilesViaAgent, filterFilesByAgentAccess } = require('./permissions');
const { getStrategyFunctions } = require('./strategies');
module.exports = {
processCodeFile,
processFileUpload,
uploadImageBuffer,
getStrategyFunctions,
hasAccessToFilesViaAgent,
filterFilesByAgentAccess,
};

View file

@ -77,7 +77,9 @@ const AttachFileMenu = ({
* */
const capabilities = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities);
const handleUploadClick = (fileType?: 'image' | 'document' | 'multimodal') => {
const handleUploadClick = (
fileType?: 'image' | 'document' | 'multimodal' | 'google_multimodal',
) => {
if (!inputRef.current) {
return;
}
@ -103,7 +105,14 @@ const AttachFileMenu = ({
) => {
const items: MenuItemProps[] = [];
const shouldShowDirectAttach = isDocumentSupportedEndpoint(agent?.provider ?? endpoint);
const currentProvider = agent?.provider ?? endpoint;
const isOpenAIOrAzure =
currentProvider === EModelEndpoint.openAI || currentProvider === EModelEndpoint.azureOpenAI;
const useResponsesApiEnabled = conversation?.useResponsesApi ?? false;
const shouldShowDirectAttach =
isDocumentSupportedEndpoint(currentProvider) &&
(!isOpenAIOrAzure || useResponsesApiEnabled);
if (shouldShowDirectAttach) {
items.push({
@ -194,6 +203,7 @@ const AttachFileMenu = ({
sharePointEnabled,
setIsSharePointDialogOpen,
endpoint,
agent?.provider,
]);
const menuTrigger = (

View file

@ -0,0 +1,116 @@
import { Readable } from 'stream';
import getStream from 'get-stream';
import { EModelEndpoint, isDocumentSupportedEndpoint } from 'librechat-data-provider';
import type { IMongoFile } from '@librechat/data-schemas';
import type { Request } from 'express';
import { validateAudio } from '~/files/validation';
interface StrategyFunctions {
getDownloadStream: (req: Request, filepath: string) => Promise<Readable>;
}
interface AudioResult {
audios: Array<{
type: string;
mimeType: string;
data: string;
}>;
files: Array<{
file_id?: string;
temp_file_id?: string;
filepath: string;
source?: string;
filename: string;
type: string;
}>;
}
/**
* Encodes and formats audio files for different endpoints
* @param req - The request object
* @param files - Array of audio files
* @param endpoint - The endpoint to format for (currently only google is supported)
* @returns Promise that resolves to audio and file metadata
*/
export async function encodeAndFormatAudios(
req: Request,
files: IMongoFile[],
endpoint: EModelEndpoint,
getStrategyFunctions: (source: string) => StrategyFunctions,
): Promise<AudioResult> {
if (!files?.length) {
return { audios: [], files: [] };
}
const encodingMethods: Record<string, StrategyFunctions> = {};
const result: AudioResult = { audios: [], files: [] };
const processFile = async (file: IMongoFile) => {
if (!file?.filepath) return null;
const source = file.source ?? 'local';
if (!encodingMethods[source]) {
encodingMethods[source] = getStrategyFunctions(source);
}
const { getDownloadStream } = encodingMethods[source];
const stream = await getDownloadStream(req, file.filepath);
const buffer = await getStream.buffer(stream);
return {
file,
content: buffer.toString('base64'),
metadata: {
file_id: file.file_id,
temp_file_id: file.temp_file_id,
filepath: file.filepath,
source: file.source,
filename: file.filename,
type: file.type,
},
};
};
const results = await Promise.allSettled(files.map(processFile));
for (const settledResult of results) {
if (settledResult.status === 'rejected') {
console.error('Audio processing failed:', settledResult.reason);
continue;
}
const processed = settledResult.value;
if (!processed) continue;
const { file, content, metadata } = processed;
if (!content || !file) {
if (metadata) result.files.push(metadata);
continue;
}
if (!file.type.startsWith('audio/') || !isDocumentSupportedEndpoint(endpoint)) {
result.files.push(metadata);
continue;
}
const audioBuffer = Buffer.from(content, 'base64');
const validation = await validateAudio(audioBuffer, audioBuffer.length, endpoint);
if (!validation.isValid) {
throw new Error(`Audio validation failed: ${validation.error}`);
}
if (endpoint === EModelEndpoint.google) {
result.audios.push({
type: 'audio',
mimeType: file.type,
data: content,
});
}
result.files.push(metadata);
}
return result;
}

View file

@ -0,0 +1,150 @@
import { EModelEndpoint, isDocumentSupportedEndpoint } from 'librechat-data-provider';
import { validatePdf } from '@librechat/api';
import getStream from 'get-stream';
import type { Request } from 'express';
import type { IMongoFile } from '@librechat/data-schemas';
import { Readable } from 'stream';
interface StrategyFunctions {
getDownloadStream: (req: Request, filepath: string) => Promise<Readable>;
}
interface DocumentResult {
documents: Array<{
type: string;
source?: {
type: string;
media_type: string;
data: string;
};
cache_control?: { type: string };
citations?: { enabled: boolean };
filename?: string;
file_data?: string;
mimeType?: string;
data?: string;
}>;
files: Array<{
file_id?: string;
temp_file_id?: string;
filepath: string;
source?: string;
filename: string;
type: string;
}>;
}
/**
* Processes and encodes document files for various endpoints
* @param req - Express request object
* @param files - Array of file objects to process
* @param endpoint - The endpoint identifier (e.g., EModelEndpoint.anthropic)
* @param getStrategyFunctions - Function to get strategy functions
* @returns Promise that resolves to documents and file metadata
*/
export async function encodeAndFormatDocuments(
req: Request,
files: IMongoFile[],
endpoint: EModelEndpoint,
getStrategyFunctions: (source: string) => StrategyFunctions,
): Promise<DocumentResult> {
if (!files?.length) {
return { documents: [], files: [] };
}
const encodingMethods: Record<string, StrategyFunctions> = {};
const result: DocumentResult = { documents: [], files: [] };
const documentFiles = files.filter(
(file) => file.type === 'application/pdf' || file.type?.startsWith('application/'),
);
if (!documentFiles.length) {
return result;
}
const processFile = async (file: IMongoFile) => {
if (file.type !== 'application/pdf' || !isDocumentSupportedEndpoint(endpoint)) {
return null;
}
const source = file.source ?? 'local';
if (!encodingMethods[source]) {
encodingMethods[source] = getStrategyFunctions(source);
}
const { getDownloadStream } = encodingMethods[source];
const stream = await getDownloadStream(req, file.filepath);
const buffer = await getStream.buffer(stream);
return {
file,
content: buffer.toString('base64'),
metadata: {
file_id: file.file_id,
temp_file_id: file.temp_file_id,
filepath: file.filepath,
source: file.source,
filename: file.filename,
type: file.type,
},
};
};
const results = await Promise.allSettled(documentFiles.map(processFile));
for (const settledResult of results) {
if (settledResult.status === 'rejected') {
console.error('Document processing failed:', settledResult.reason);
continue;
}
const processed = settledResult.value;
if (!processed) continue;
const { file, content, metadata } = processed;
if (!content || !file) {
if (metadata) result.files.push(metadata);
continue;
}
if (file.type === 'application/pdf' && isDocumentSupportedEndpoint(endpoint)) {
const pdfBuffer = Buffer.from(content, 'base64');
const validation = await validatePdf(pdfBuffer, pdfBuffer.length, endpoint);
if (!validation.isValid) {
throw new Error(`PDF validation failed: ${validation.error}`);
}
if (endpoint === EModelEndpoint.anthropic) {
result.documents.push({
type: 'document',
source: {
type: 'base64',
media_type: 'application/pdf',
data: content,
},
cache_control: { type: 'ephemeral' },
citations: { enabled: true },
});
} else if (endpoint === EModelEndpoint.openAI) {
result.documents.push({
type: 'input_file',
filename: file.filename,
file_data: `data:application/pdf;base64,${content}`,
});
} else if (endpoint === EModelEndpoint.google) {
result.documents.push({
type: 'document',
mimeType: 'application/pdf',
data: content,
});
}
result.files.push(metadata);
}
}
return result;
}

View file

@ -3,3 +3,6 @@ export * from './audio';
export * from './text';
export * from './parse';
export * from './validation';
export * from './audio/encode';
export * from './video/encode';
export * from './document/encode';

View file

@ -0,0 +1,117 @@
import { EModelEndpoint, isDocumentSupportedEndpoint } from 'librechat-data-provider';
import { validateVideo } from '@librechat/api';
import getStream from 'get-stream';
import type { Request } from 'express';
import type { IMongoFile } from '@librechat/data-schemas';
import { Readable } from 'stream';
interface StrategyFunctions {
getDownloadStream: (req: Request, filepath: string) => Promise<Readable>;
}
interface VideoResult {
videos: Array<{
type: string;
mimeType: string;
data: string;
}>;
files: Array<{
file_id?: string;
temp_file_id?: string;
filepath: string;
source?: string;
filename: string;
type: string;
}>;
}
/**
* Encodes and formats video files for different endpoints
* @param req - The request object
* @param files - Array of video files
* @param endpoint - The endpoint to format for
* @param getStrategyFunctions - Function to get strategy functions
* @returns Promise that resolves to videos and file metadata
*/
export async function encodeAndFormatVideos(
req: Request,
files: IMongoFile[],
endpoint: EModelEndpoint,
getStrategyFunctions: (source: string) => StrategyFunctions,
): Promise<VideoResult> {
if (!files?.length) {
return { videos: [], files: [] };
}
const encodingMethods: Record<string, StrategyFunctions> = {};
const result: VideoResult = { videos: [], files: [] };
const processFile = async (file: IMongoFile) => {
if (!file?.filepath) return null;
const source = file.source ?? 'local';
if (!encodingMethods[source]) {
encodingMethods[source] = getStrategyFunctions(source);
}
const { getDownloadStream } = encodingMethods[source];
const stream = await getDownloadStream(req, file.filepath);
const buffer = await getStream.buffer(stream);
return {
file,
content: buffer.toString('base64'),
metadata: {
file_id: file.file_id,
temp_file_id: file.temp_file_id,
filepath: file.filepath,
source: file.source,
filename: file.filename,
type: file.type,
},
};
};
const results = await Promise.allSettled(files.map(processFile));
for (const settledResult of results) {
if (settledResult.status === 'rejected') {
console.error('Video processing failed:', settledResult.reason);
continue;
}
const processed = settledResult.value;
if (!processed) continue;
const { file, content, metadata } = processed;
if (!content || !file) {
if (metadata) result.files.push(metadata);
continue;
}
if (!file.type.startsWith('video/') || !isDocumentSupportedEndpoint(endpoint)) {
result.files.push(metadata);
continue;
}
const videoBuffer = Buffer.from(content, 'base64');
const validation = await validateVideo(videoBuffer, videoBuffer.length, endpoint);
if (!validation.isValid) {
throw new Error(`Video validation failed: ${validation.error}`);
}
if (endpoint === EModelEndpoint.google) {
result.videos.push({
type: 'video',
mimeType: file.type,
data: content,
});
}
result.files.push(metadata);
}
return result;
}