🛫 refactor: Move Encoding Logic to packages/api (#9182)

* refactor: move audio encode over to TS

* refactor: audio encoding now functional in LC again

* refactor: move video encode over to TS

* refactor: move document encode over to TS

* refactor: video encoding now functional in LC again

* refactor: document encoding now functional in LC again

* fix: extend file type options in AttachFileMenu to include 'google_multimodal' and update dependency array to include agent?.provider

* feat: only accept pdfs if responses api is enabled for openai convos
This commit is contained in:
Dustin Healy 2025-08-20 17:01:21 -07:00 committed by Dustin Healy
parent ccb2e031dd
commit e55264b22a
11 changed files with 408 additions and 415 deletions

View file

@ -0,0 +1,116 @@
import { Readable } from 'stream';
import getStream from 'get-stream';
import { EModelEndpoint, isDocumentSupportedEndpoint } from 'librechat-data-provider';
import type { IMongoFile } from '@librechat/data-schemas';
import type { Request } from 'express';
import { validateAudio } from '~/files/validation';
interface StrategyFunctions {
getDownloadStream: (req: Request, filepath: string) => Promise<Readable>;
}
interface AudioResult {
audios: Array<{
type: string;
mimeType: string;
data: string;
}>;
files: Array<{
file_id?: string;
temp_file_id?: string;
filepath: string;
source?: string;
filename: string;
type: string;
}>;
}
/**
* Encodes and formats audio files for different endpoints
* @param req - The request object
* @param files - Array of audio files
* @param endpoint - The endpoint to format for (currently only google is supported)
* @returns Promise that resolves to audio and file metadata
*/
export async function encodeAndFormatAudios(
req: Request,
files: IMongoFile[],
endpoint: EModelEndpoint,
getStrategyFunctions: (source: string) => StrategyFunctions,
): Promise<AudioResult> {
if (!files?.length) {
return { audios: [], files: [] };
}
const encodingMethods: Record<string, StrategyFunctions> = {};
const result: AudioResult = { audios: [], files: [] };
const processFile = async (file: IMongoFile) => {
if (!file?.filepath) return null;
const source = file.source ?? 'local';
if (!encodingMethods[source]) {
encodingMethods[source] = getStrategyFunctions(source);
}
const { getDownloadStream } = encodingMethods[source];
const stream = await getDownloadStream(req, file.filepath);
const buffer = await getStream.buffer(stream);
return {
file,
content: buffer.toString('base64'),
metadata: {
file_id: file.file_id,
temp_file_id: file.temp_file_id,
filepath: file.filepath,
source: file.source,
filename: file.filename,
type: file.type,
},
};
};
const results = await Promise.allSettled(files.map(processFile));
for (const settledResult of results) {
if (settledResult.status === 'rejected') {
console.error('Audio processing failed:', settledResult.reason);
continue;
}
const processed = settledResult.value;
if (!processed) continue;
const { file, content, metadata } = processed;
if (!content || !file) {
if (metadata) result.files.push(metadata);
continue;
}
if (!file.type.startsWith('audio/') || !isDocumentSupportedEndpoint(endpoint)) {
result.files.push(metadata);
continue;
}
const audioBuffer = Buffer.from(content, 'base64');
const validation = await validateAudio(audioBuffer, audioBuffer.length, endpoint);
if (!validation.isValid) {
throw new Error(`Audio validation failed: ${validation.error}`);
}
if (endpoint === EModelEndpoint.google) {
result.audios.push({
type: 'audio',
mimeType: file.type,
data: content,
});
}
result.files.push(metadata);
}
return result;
}

View file

@ -0,0 +1,150 @@
import { EModelEndpoint, isDocumentSupportedEndpoint } from 'librechat-data-provider';
import { validatePdf } from '@librechat/api';
import getStream from 'get-stream';
import type { Request } from 'express';
import type { IMongoFile } from '@librechat/data-schemas';
import { Readable } from 'stream';
interface StrategyFunctions {
getDownloadStream: (req: Request, filepath: string) => Promise<Readable>;
}
interface DocumentResult {
documents: Array<{
type: string;
source?: {
type: string;
media_type: string;
data: string;
};
cache_control?: { type: string };
citations?: { enabled: boolean };
filename?: string;
file_data?: string;
mimeType?: string;
data?: string;
}>;
files: Array<{
file_id?: string;
temp_file_id?: string;
filepath: string;
source?: string;
filename: string;
type: string;
}>;
}
/**
* Processes and encodes document files for various endpoints
* @param req - Express request object
* @param files - Array of file objects to process
* @param endpoint - The endpoint identifier (e.g., EModelEndpoint.anthropic)
* @param getStrategyFunctions - Function to get strategy functions
* @returns Promise that resolves to documents and file metadata
*/
export async function encodeAndFormatDocuments(
req: Request,
files: IMongoFile[],
endpoint: EModelEndpoint,
getStrategyFunctions: (source: string) => StrategyFunctions,
): Promise<DocumentResult> {
if (!files?.length) {
return { documents: [], files: [] };
}
const encodingMethods: Record<string, StrategyFunctions> = {};
const result: DocumentResult = { documents: [], files: [] };
const documentFiles = files.filter(
(file) => file.type === 'application/pdf' || file.type?.startsWith('application/'),
);
if (!documentFiles.length) {
return result;
}
const processFile = async (file: IMongoFile) => {
if (file.type !== 'application/pdf' || !isDocumentSupportedEndpoint(endpoint)) {
return null;
}
const source = file.source ?? 'local';
if (!encodingMethods[source]) {
encodingMethods[source] = getStrategyFunctions(source);
}
const { getDownloadStream } = encodingMethods[source];
const stream = await getDownloadStream(req, file.filepath);
const buffer = await getStream.buffer(stream);
return {
file,
content: buffer.toString('base64'),
metadata: {
file_id: file.file_id,
temp_file_id: file.temp_file_id,
filepath: file.filepath,
source: file.source,
filename: file.filename,
type: file.type,
},
};
};
const results = await Promise.allSettled(documentFiles.map(processFile));
for (const settledResult of results) {
if (settledResult.status === 'rejected') {
console.error('Document processing failed:', settledResult.reason);
continue;
}
const processed = settledResult.value;
if (!processed) continue;
const { file, content, metadata } = processed;
if (!content || !file) {
if (metadata) result.files.push(metadata);
continue;
}
if (file.type === 'application/pdf' && isDocumentSupportedEndpoint(endpoint)) {
const pdfBuffer = Buffer.from(content, 'base64');
const validation = await validatePdf(pdfBuffer, pdfBuffer.length, endpoint);
if (!validation.isValid) {
throw new Error(`PDF validation failed: ${validation.error}`);
}
if (endpoint === EModelEndpoint.anthropic) {
result.documents.push({
type: 'document',
source: {
type: 'base64',
media_type: 'application/pdf',
data: content,
},
cache_control: { type: 'ephemeral' },
citations: { enabled: true },
});
} else if (endpoint === EModelEndpoint.openAI) {
result.documents.push({
type: 'input_file',
filename: file.filename,
file_data: `data:application/pdf;base64,${content}`,
});
} else if (endpoint === EModelEndpoint.google) {
result.documents.push({
type: 'document',
mimeType: 'application/pdf',
data: content,
});
}
result.files.push(metadata);
}
}
return result;
}

View file

@ -3,3 +3,6 @@ export * from './audio';
export * from './text';
export * from './parse';
export * from './validation';
export * from './audio/encode';
export * from './video/encode';
export * from './document/encode';

View file

@ -0,0 +1,117 @@
import { EModelEndpoint, isDocumentSupportedEndpoint } from 'librechat-data-provider';
import { validateVideo } from '@librechat/api';
import getStream from 'get-stream';
import type { Request } from 'express';
import type { IMongoFile } from '@librechat/data-schemas';
import { Readable } from 'stream';
interface StrategyFunctions {
getDownloadStream: (req: Request, filepath: string) => Promise<Readable>;
}
interface VideoResult {
videos: Array<{
type: string;
mimeType: string;
data: string;
}>;
files: Array<{
file_id?: string;
temp_file_id?: string;
filepath: string;
source?: string;
filename: string;
type: string;
}>;
}
/**
* Encodes and formats video files for different endpoints
* @param req - The request object
* @param files - Array of video files
* @param endpoint - The endpoint to format for
* @param getStrategyFunctions - Function to get strategy functions
* @returns Promise that resolves to videos and file metadata
*/
export async function encodeAndFormatVideos(
req: Request,
files: IMongoFile[],
endpoint: EModelEndpoint,
getStrategyFunctions: (source: string) => StrategyFunctions,
): Promise<VideoResult> {
if (!files?.length) {
return { videos: [], files: [] };
}
const encodingMethods: Record<string, StrategyFunctions> = {};
const result: VideoResult = { videos: [], files: [] };
const processFile = async (file: IMongoFile) => {
if (!file?.filepath) return null;
const source = file.source ?? 'local';
if (!encodingMethods[source]) {
encodingMethods[source] = getStrategyFunctions(source);
}
const { getDownloadStream } = encodingMethods[source];
const stream = await getDownloadStream(req, file.filepath);
const buffer = await getStream.buffer(stream);
return {
file,
content: buffer.toString('base64'),
metadata: {
file_id: file.file_id,
temp_file_id: file.temp_file_id,
filepath: file.filepath,
source: file.source,
filename: file.filename,
type: file.type,
},
};
};
const results = await Promise.allSettled(files.map(processFile));
for (const settledResult of results) {
if (settledResult.status === 'rejected') {
console.error('Video processing failed:', settledResult.reason);
continue;
}
const processed = settledResult.value;
if (!processed) continue;
const { file, content, metadata } = processed;
if (!content || !file) {
if (metadata) result.files.push(metadata);
continue;
}
if (!file.type.startsWith('video/') || !isDocumentSupportedEndpoint(endpoint)) {
result.files.push(metadata);
continue;
}
const videoBuffer = Buffer.from(content, 'base64');
const validation = await validateVideo(videoBuffer, videoBuffer.length, endpoint);
if (!validation.isValid) {
throw new Error(`Video validation failed: ${validation.error}`);
}
if (endpoint === EModelEndpoint.google) {
result.videos.push({
type: 'video',
mimeType: file.type,
data: content,
});
}
result.files.push(metadata);
}
return result;
}