mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-20 17:26:12 +01:00
🛫 refactor: Move Encoding Logic to packages/api (#9182)
* refactor: move audio encode over to TS * refactor: audio encoding now functional in LC again * refactor: move video encode over to TS * refactor: move document encode over to TS * refactor: video encoding now functional in LC again * refactor: document encoding now functional in LC again * fix: extend file type options in AttachFileMenu to include 'google_multimodal' and update dependency array to include agent?.provider * feat: only accept pdfs if responses api is enabled for openai convos
This commit is contained in:
parent
ccb2e031dd
commit
e55264b22a
11 changed files with 408 additions and 415 deletions
116
packages/api/src/files/audio/encode.ts
Normal file
116
packages/api/src/files/audio/encode.ts
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
import { Readable } from 'stream';
|
||||
import getStream from 'get-stream';
|
||||
import { EModelEndpoint, isDocumentSupportedEndpoint } from 'librechat-data-provider';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import type { Request } from 'express';
|
||||
import { validateAudio } from '~/files/validation';
|
||||
|
||||
interface StrategyFunctions {
|
||||
getDownloadStream: (req: Request, filepath: string) => Promise<Readable>;
|
||||
}
|
||||
|
||||
interface AudioResult {
|
||||
audios: Array<{
|
||||
type: string;
|
||||
mimeType: string;
|
||||
data: string;
|
||||
}>;
|
||||
files: Array<{
|
||||
file_id?: string;
|
||||
temp_file_id?: string;
|
||||
filepath: string;
|
||||
source?: string;
|
||||
filename: string;
|
||||
type: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes and formats audio files for different endpoints
|
||||
* @param req - The request object
|
||||
* @param files - Array of audio files
|
||||
* @param endpoint - The endpoint to format for (currently only google is supported)
|
||||
* @returns Promise that resolves to audio and file metadata
|
||||
*/
|
||||
export async function encodeAndFormatAudios(
|
||||
req: Request,
|
||||
files: IMongoFile[],
|
||||
endpoint: EModelEndpoint,
|
||||
getStrategyFunctions: (source: string) => StrategyFunctions,
|
||||
): Promise<AudioResult> {
|
||||
if (!files?.length) {
|
||||
return { audios: [], files: [] };
|
||||
}
|
||||
|
||||
const encodingMethods: Record<string, StrategyFunctions> = {};
|
||||
const result: AudioResult = { audios: [], files: [] };
|
||||
|
||||
const processFile = async (file: IMongoFile) => {
|
||||
if (!file?.filepath) return null;
|
||||
|
||||
const source = file.source ?? 'local';
|
||||
if (!encodingMethods[source]) {
|
||||
encodingMethods[source] = getStrategyFunctions(source);
|
||||
}
|
||||
|
||||
const { getDownloadStream } = encodingMethods[source];
|
||||
const stream = await getDownloadStream(req, file.filepath);
|
||||
const buffer = await getStream.buffer(stream);
|
||||
|
||||
return {
|
||||
file,
|
||||
content: buffer.toString('base64'),
|
||||
metadata: {
|
||||
file_id: file.file_id,
|
||||
temp_file_id: file.temp_file_id,
|
||||
filepath: file.filepath,
|
||||
source: file.source,
|
||||
filename: file.filename,
|
||||
type: file.type,
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
const results = await Promise.allSettled(files.map(processFile));
|
||||
|
||||
for (const settledResult of results) {
|
||||
if (settledResult.status === 'rejected') {
|
||||
console.error('Audio processing failed:', settledResult.reason);
|
||||
continue;
|
||||
}
|
||||
|
||||
const processed = settledResult.value;
|
||||
if (!processed) continue;
|
||||
|
||||
const { file, content, metadata } = processed;
|
||||
|
||||
if (!content || !file) {
|
||||
if (metadata) result.files.push(metadata);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!file.type.startsWith('audio/') || !isDocumentSupportedEndpoint(endpoint)) {
|
||||
result.files.push(metadata);
|
||||
continue;
|
||||
}
|
||||
|
||||
const audioBuffer = Buffer.from(content, 'base64');
|
||||
const validation = await validateAudio(audioBuffer, audioBuffer.length, endpoint);
|
||||
|
||||
if (!validation.isValid) {
|
||||
throw new Error(`Audio validation failed: ${validation.error}`);
|
||||
}
|
||||
|
||||
if (endpoint === EModelEndpoint.google) {
|
||||
result.audios.push({
|
||||
type: 'audio',
|
||||
mimeType: file.type,
|
||||
data: content,
|
||||
});
|
||||
}
|
||||
|
||||
result.files.push(metadata);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
150
packages/api/src/files/document/encode.ts
Normal file
150
packages/api/src/files/document/encode.ts
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
import { EModelEndpoint, isDocumentSupportedEndpoint } from 'librechat-data-provider';
|
||||
import { validatePdf } from '@librechat/api';
|
||||
import getStream from 'get-stream';
|
||||
import type { Request } from 'express';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import { Readable } from 'stream';
|
||||
|
||||
interface StrategyFunctions {
|
||||
getDownloadStream: (req: Request, filepath: string) => Promise<Readable>;
|
||||
}
|
||||
|
||||
interface DocumentResult {
|
||||
documents: Array<{
|
||||
type: string;
|
||||
source?: {
|
||||
type: string;
|
||||
media_type: string;
|
||||
data: string;
|
||||
};
|
||||
cache_control?: { type: string };
|
||||
citations?: { enabled: boolean };
|
||||
filename?: string;
|
||||
file_data?: string;
|
||||
mimeType?: string;
|
||||
data?: string;
|
||||
}>;
|
||||
files: Array<{
|
||||
file_id?: string;
|
||||
temp_file_id?: string;
|
||||
filepath: string;
|
||||
source?: string;
|
||||
filename: string;
|
||||
type: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes and encodes document files for various endpoints
|
||||
* @param req - Express request object
|
||||
* @param files - Array of file objects to process
|
||||
* @param endpoint - The endpoint identifier (e.g., EModelEndpoint.anthropic)
|
||||
* @param getStrategyFunctions - Function to get strategy functions
|
||||
* @returns Promise that resolves to documents and file metadata
|
||||
*/
|
||||
export async function encodeAndFormatDocuments(
|
||||
req: Request,
|
||||
files: IMongoFile[],
|
||||
endpoint: EModelEndpoint,
|
||||
getStrategyFunctions: (source: string) => StrategyFunctions,
|
||||
): Promise<DocumentResult> {
|
||||
if (!files?.length) {
|
||||
return { documents: [], files: [] };
|
||||
}
|
||||
|
||||
const encodingMethods: Record<string, StrategyFunctions> = {};
|
||||
const result: DocumentResult = { documents: [], files: [] };
|
||||
|
||||
const documentFiles = files.filter(
|
||||
(file) => file.type === 'application/pdf' || file.type?.startsWith('application/'),
|
||||
);
|
||||
|
||||
if (!documentFiles.length) {
|
||||
return result;
|
||||
}
|
||||
|
||||
const processFile = async (file: IMongoFile) => {
|
||||
if (file.type !== 'application/pdf' || !isDocumentSupportedEndpoint(endpoint)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const source = file.source ?? 'local';
|
||||
if (!encodingMethods[source]) {
|
||||
encodingMethods[source] = getStrategyFunctions(source);
|
||||
}
|
||||
|
||||
const { getDownloadStream } = encodingMethods[source];
|
||||
const stream = await getDownloadStream(req, file.filepath);
|
||||
const buffer = await getStream.buffer(stream);
|
||||
|
||||
return {
|
||||
file,
|
||||
content: buffer.toString('base64'),
|
||||
metadata: {
|
||||
file_id: file.file_id,
|
||||
temp_file_id: file.temp_file_id,
|
||||
filepath: file.filepath,
|
||||
source: file.source,
|
||||
filename: file.filename,
|
||||
type: file.type,
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
const results = await Promise.allSettled(documentFiles.map(processFile));
|
||||
|
||||
for (const settledResult of results) {
|
||||
if (settledResult.status === 'rejected') {
|
||||
console.error('Document processing failed:', settledResult.reason);
|
||||
continue;
|
||||
}
|
||||
|
||||
const processed = settledResult.value;
|
||||
if (!processed) continue;
|
||||
|
||||
const { file, content, metadata } = processed;
|
||||
|
||||
if (!content || !file) {
|
||||
if (metadata) result.files.push(metadata);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (file.type === 'application/pdf' && isDocumentSupportedEndpoint(endpoint)) {
|
||||
const pdfBuffer = Buffer.from(content, 'base64');
|
||||
const validation = await validatePdf(pdfBuffer, pdfBuffer.length, endpoint);
|
||||
|
||||
if (!validation.isValid) {
|
||||
throw new Error(`PDF validation failed: ${validation.error}`);
|
||||
}
|
||||
|
||||
if (endpoint === EModelEndpoint.anthropic) {
|
||||
result.documents.push({
|
||||
type: 'document',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: 'application/pdf',
|
||||
data: content,
|
||||
},
|
||||
cache_control: { type: 'ephemeral' },
|
||||
citations: { enabled: true },
|
||||
});
|
||||
} else if (endpoint === EModelEndpoint.openAI) {
|
||||
result.documents.push({
|
||||
type: 'input_file',
|
||||
filename: file.filename,
|
||||
file_data: `data:application/pdf;base64,${content}`,
|
||||
});
|
||||
} else if (endpoint === EModelEndpoint.google) {
|
||||
result.documents.push({
|
||||
type: 'document',
|
||||
mimeType: 'application/pdf',
|
||||
data: content,
|
||||
});
|
||||
}
|
||||
|
||||
result.files.push(metadata);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
@ -3,3 +3,6 @@ export * from './audio';
|
|||
export * from './text';
|
||||
export * from './parse';
|
||||
export * from './validation';
|
||||
export * from './audio/encode';
|
||||
export * from './video/encode';
|
||||
export * from './document/encode';
|
||||
|
|
|
|||
117
packages/api/src/files/video/encode.ts
Normal file
117
packages/api/src/files/video/encode.ts
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
import { EModelEndpoint, isDocumentSupportedEndpoint } from 'librechat-data-provider';
|
||||
import { validateVideo } from '@librechat/api';
|
||||
import getStream from 'get-stream';
|
||||
import type { Request } from 'express';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import { Readable } from 'stream';
|
||||
|
||||
interface StrategyFunctions {
|
||||
getDownloadStream: (req: Request, filepath: string) => Promise<Readable>;
|
||||
}
|
||||
|
||||
interface VideoResult {
|
||||
videos: Array<{
|
||||
type: string;
|
||||
mimeType: string;
|
||||
data: string;
|
||||
}>;
|
||||
files: Array<{
|
||||
file_id?: string;
|
||||
temp_file_id?: string;
|
||||
filepath: string;
|
||||
source?: string;
|
||||
filename: string;
|
||||
type: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes and formats video files for different endpoints
|
||||
* @param req - The request object
|
||||
* @param files - Array of video files
|
||||
* @param endpoint - The endpoint to format for
|
||||
* @param getStrategyFunctions - Function to get strategy functions
|
||||
* @returns Promise that resolves to videos and file metadata
|
||||
*/
|
||||
export async function encodeAndFormatVideos(
|
||||
req: Request,
|
||||
files: IMongoFile[],
|
||||
endpoint: EModelEndpoint,
|
||||
getStrategyFunctions: (source: string) => StrategyFunctions,
|
||||
): Promise<VideoResult> {
|
||||
if (!files?.length) {
|
||||
return { videos: [], files: [] };
|
||||
}
|
||||
|
||||
const encodingMethods: Record<string, StrategyFunctions> = {};
|
||||
const result: VideoResult = { videos: [], files: [] };
|
||||
|
||||
const processFile = async (file: IMongoFile) => {
|
||||
if (!file?.filepath) return null;
|
||||
|
||||
const source = file.source ?? 'local';
|
||||
if (!encodingMethods[source]) {
|
||||
encodingMethods[source] = getStrategyFunctions(source);
|
||||
}
|
||||
|
||||
const { getDownloadStream } = encodingMethods[source];
|
||||
const stream = await getDownloadStream(req, file.filepath);
|
||||
const buffer = await getStream.buffer(stream);
|
||||
|
||||
return {
|
||||
file,
|
||||
content: buffer.toString('base64'),
|
||||
metadata: {
|
||||
file_id: file.file_id,
|
||||
temp_file_id: file.temp_file_id,
|
||||
filepath: file.filepath,
|
||||
source: file.source,
|
||||
filename: file.filename,
|
||||
type: file.type,
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
const results = await Promise.allSettled(files.map(processFile));
|
||||
|
||||
for (const settledResult of results) {
|
||||
if (settledResult.status === 'rejected') {
|
||||
console.error('Video processing failed:', settledResult.reason);
|
||||
continue;
|
||||
}
|
||||
|
||||
const processed = settledResult.value;
|
||||
if (!processed) continue;
|
||||
|
||||
const { file, content, metadata } = processed;
|
||||
|
||||
if (!content || !file) {
|
||||
if (metadata) result.files.push(metadata);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!file.type.startsWith('video/') || !isDocumentSupportedEndpoint(endpoint)) {
|
||||
result.files.push(metadata);
|
||||
continue;
|
||||
}
|
||||
|
||||
const videoBuffer = Buffer.from(content, 'base64');
|
||||
const validation = await validateVideo(videoBuffer, videoBuffer.length, endpoint);
|
||||
|
||||
if (!validation.isValid) {
|
||||
throw new Error(`Video validation failed: ${validation.error}`);
|
||||
}
|
||||
|
||||
if (endpoint === EModelEndpoint.google) {
|
||||
result.videos.push({
|
||||
type: 'video',
|
||||
mimeType: file.type,
|
||||
data: content,
|
||||
});
|
||||
}
|
||||
|
||||
result.files.push(metadata);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue