From e55264b22a0eb7800bb711be4c580409f6e220a7 Mon Sep 17 00:00:00 2001 From: Dustin Healy <54083382+dustinhealy@users.noreply.github.com> Date: Wed, 20 Aug 2025 17:01:21 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=AB=20refactor:=20Move=20Encoding=20Lo?= =?UTF-8?q?gic=20to=20packages/api=20(#9182)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: move audio encode over to TS * refactor: audio encoding now functional in LC again * refactor: move video encode over to TS * refactor: move document encode over to TS * refactor: video encoding now functional in LC again * refactor: document encoding now functional in LC again * fix: extend file type options in AttachFileMenu to include 'google_multimodal' and update dependency array to include agent?.provider * feat: only accept pdfs if responses api is enabled for openai convos --- api/server/controllers/agents/client.js | 12 +- api/server/services/Files/Audio/encode.js | 111 ----------- api/server/services/Files/Documents/encode.js | 181 ------------------ api/server/services/Files/Documents/index.js | 6 - api/server/services/Files/Video/encode.js | 111 ----------- api/server/services/Files/index.js | 2 + .../Chat/Input/Files/AttachFileMenu.tsx | 14 +- packages/api/src/files/audio/encode.ts | 116 +++++++++++ packages/api/src/files/document/encode.ts | 150 +++++++++++++++ packages/api/src/files/index.ts | 3 + packages/api/src/files/video/encode.ts | 117 +++++++++++ 11 files changed, 408 insertions(+), 415 deletions(-) delete mode 100644 api/server/services/Files/Audio/encode.js delete mode 100644 api/server/services/Files/Documents/encode.js delete mode 100644 api/server/services/Files/Documents/index.js delete mode 100644 api/server/services/Files/Video/encode.js create mode 100644 packages/api/src/files/audio/encode.ts create mode 100644 packages/api/src/files/document/encode.ts create mode 100644 packages/api/src/files/video/encode.ts diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 4411438abb..bb09e2c18d 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -11,6 +11,9 @@ const { memoryInstructions, formatContentStrings, createMemoryProcessor, + encodeAndFormatAudios, + encodeAndFormatVideos, + encodeAndFormatDocuments, } = require('@librechat/api'); const { Callback, @@ -42,21 +45,19 @@ const { setMemory, } = require('~/models'); const { getMCPAuthMap, checkCapability, hasCustomUserVars } = require('~/server/services/Config'); -const { encodeAndFormatDocuments } = require('~/server/services/Files/Documents/encode'); const { addCacheControl, createContextHandlers } = require('~/app/clients/prompts'); -const { encodeAndFormatVideos } = require('~/server/services/Files/Video/encode'); -const { encodeAndFormatAudios } = require('~/server/services/Files/Audio/encode'); -const { getFiles } = require('~/models'); const { initializeAgent } = require('~/server/services/Endpoints/agents/agent'); const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens'); const { getFormattedMemories, deleteMemory, setMemory } = require('~/models'); const { encodeAndFormat } = require('~/server/services/Files/images/encode'); const { getProviderConfig } = require('~/server/services/Endpoints'); +const { getStrategyFunctions } = require('~/server/services/Files'); const { checkCapability } = require('~/server/services/Config'); const BaseClient = require('~/app/clients/BaseClient'); const { getRoleByName } = require('~/models/Role'); const { loadAgent } = require('~/models/Agent'); const { getMCPManager } = require('~/config'); +const { getFiles } = require('~/models'); const omitTitleOptions = new Set([ 'stream', @@ -239,6 +240,7 @@ class AgentClient extends BaseClient { this.options.req, attachments, this.options.agent.provider, + getStrategyFunctions, ); message.documents = documentResult.documents && documentResult.documents.length @@ -252,6 +254,7 @@ class AgentClient extends BaseClient { this.options.req, attachments, this.options.agent.provider, + getStrategyFunctions, ); message.videos = videoResult.videos && videoResult.videos.length ? videoResult.videos : undefined; @@ -263,6 +266,7 @@ class AgentClient extends BaseClient { this.options.req, attachments, this.options.agent.provider, + getStrategyFunctions, ); message.audios = audioResult.audios && audioResult.audios.length ? audioResult.audios : undefined; diff --git a/api/server/services/Files/Audio/encode.js b/api/server/services/Files/Audio/encode.js deleted file mode 100644 index 98d920c565..0000000000 --- a/api/server/services/Files/Audio/encode.js +++ /dev/null @@ -1,111 +0,0 @@ -const { EModelEndpoint, isDocumentSupportedEndpoint } = require('librechat-data-provider'); -const { getStrategyFunctions } = require('~/server/services/Files/strategies'); -const { validateAudio } = require('@librechat/api'); -const { streamToBuffer } = require('~/server/services/Files/Documents/encode'); - -/** - * Encodes and formats audio files for different endpoints - * @param {Express.Request} req - The request object - * @param {Array} files - Array of audio files - * @param {EModelEndpoint} endpoint - The endpoint to format for - * @returns {Promise<{ audios: Array, files: Array }>} - */ -async function encodeAndFormatAudios(req, files, endpoint) { - const promises = []; - const encodingMethods = {}; - /** @type {{ audios: any[]; files: MongoFile[] }} */ - const result = { - audios: [], - files: [], - }; - - for (const file of files) { - if (!file || !file.filepath) { - continue; - } - - const source = file.source ?? 'local'; - if (!encodingMethods[source]) { - encodingMethods[source] = getStrategyFunctions(source); - } - - const fileMetadata = { - file_id: file.file_id || file._id, - temp_file_id: file.temp_file_id, - filepath: file.filepath, - source: file.source, - filename: file.filename, - type: file.type, - }; - - promises.push([file, fileMetadata]); - } - - const results = await Promise.allSettled( - promises.map(async ([file, fileMetadata]) => { - if (!file || !fileMetadata) { - return { file: null, content: null, metadata: fileMetadata }; - } - - try { - const source = file.source ?? 'local'; - const { getDownloadStream } = encodingMethods[source]; - - const stream = await getDownloadStream(req, file.filepath); - const buffer = await streamToBuffer(stream); - const audioContent = buffer.toString('base64'); - - return { - file, - content: audioContent, - metadata: fileMetadata, - }; - } catch (error) { - console.error(`Error processing audio ${file.filename}:`, error); - return { file, content: null, metadata: fileMetadata }; - } - }), - ); - - for (const settledResult of results) { - if (settledResult.status === 'rejected') { - console.error('Audio processing failed:', settledResult.reason); - continue; - } - - const { file, content, metadata } = settledResult.value; - - if (!content || !file) { - if (metadata) { - result.files.push(metadata); - } - continue; - } - - if (file.type.startsWith('audio/') && isDocumentSupportedEndpoint(endpoint)) { - const audioBuffer = Buffer.from(content, 'base64'); - - const validation = await validateAudio(audioBuffer, audioBuffer.length, endpoint); - if (!validation.isValid) { - throw new Error(`Audio validation failed: ${validation.error}`); - } - - if (endpoint === EModelEndpoint.google) { - const audioPart = { - type: 'audio', - mimeType: file.type, - data: content, - }; - result.audios.push(audioPart); - } - - result.files.push(metadata); - } - } - - return result; -} - -module.exports = { - encodeAndFormatAudios, -}; diff --git a/api/server/services/Files/Documents/encode.js b/api/server/services/Files/Documents/encode.js deleted file mode 100644 index 6970a8cc6a..0000000000 --- a/api/server/services/Files/Documents/encode.js +++ /dev/null @@ -1,181 +0,0 @@ -const { EModelEndpoint, isDocumentSupportedEndpoint } = require('librechat-data-provider'); -const { getStrategyFunctions } = require('~/server/services/Files/strategies'); -const { validatePdf } = require('@librechat/api'); - -/** - * Converts a readable stream to a buffer. - * - * @param {NodeJS.ReadableStream} stream - The readable stream to convert. - * @returns {Promise} - Promise resolving to the buffer. - */ -async function streamToBuffer(stream) { - return new Promise((resolve, reject) => { - const chunks = []; - - stream.on('data', (chunk) => { - chunks.push(chunk); - }); - - stream.on('end', () => { - try { - const buffer = Buffer.concat(chunks); - chunks.length = 0; - resolve(buffer); - } catch (err) { - reject(err); - } - }); - - stream.on('error', (error) => { - chunks.length = 0; - reject(error); - }); - }).finally(() => { - if (stream.destroy && typeof stream.destroy === 'function') { - stream.destroy(); - } - }); -} - -/** - * Processes and encodes document files for various endpoints - * - * @param {Express.Request} req - Express request object - * @param {MongoFile[]} files - Array of file objects to process - * @param {string} endpoint - The endpoint identifier (e.g., EModelEndpoint.anthropic) - * @returns {Promise<{documents: MessageContentDocument[], files: MongoFile[]}>} - */ -async function encodeAndFormatDocuments(req, files, endpoint) { - const promises = []; - /** @type {Record, 'prepareDocumentPayload' | 'getDownloadStream'>>} */ - const encodingMethods = {}; - /** @type {{ documents: MessageContentDocument[]; files: MongoFile[] }} */ - const result = { - documents: [], - files: [], - }; - - if (!files || !files.length) { - return result; - } - - const documentFiles = files.filter( - (file) => file.type === 'application/pdf' || file.type?.startsWith('application/'), // Future: support for other document types - ); - - if (!documentFiles.length) { - return result; - } - - for (let file of documentFiles) { - /** @type {FileSources} */ - const source = file.source ?? 'local'; - - if (file.type !== 'application/pdf' || !isDocumentSupportedEndpoint(endpoint)) { - continue; - } - - if (!encodingMethods[source]) { - encodingMethods[source] = getStrategyFunctions(source); - } - - const fileMetadata = { - file_id: file.file_id || file._id, - temp_file_id: file.temp_file_id, - filepath: file.filepath, - source: file.source, - filename: file.filename, - type: file.type, - }; - - promises.push([file, fileMetadata]); - } - - const results = await Promise.allSettled( - promises.map(async ([file, fileMetadata]) => { - if (!file || !fileMetadata) { - return { file: null, content: null, metadata: fileMetadata }; - } - - try { - const source = file.source ?? 'local'; - const { getDownloadStream } = encodingMethods[source]; - - const stream = await getDownloadStream(req, file.filepath); - const buffer = await streamToBuffer(stream); - const documentContent = buffer.toString('base64'); - - return { - file, - content: documentContent, - metadata: fileMetadata, - }; - } catch (error) { - console.error(`Error processing document ${file.filename}:`, error); - return { file, content: null, metadata: fileMetadata }; - } - }), - ); - - for (const settledResult of results) { - if (settledResult.status === 'rejected') { - console.error('Document processing failed:', settledResult.reason); - continue; - } - - const { file, content, metadata } = settledResult.value; - - if (!content || !file) { - if (metadata) { - result.files.push(metadata); - } - continue; - } - - if (file.type === 'application/pdf' && isDocumentSupportedEndpoint(endpoint)) { - const pdfBuffer = Buffer.from(content, 'base64'); - const validation = await validatePdf(pdfBuffer, pdfBuffer.length, endpoint); - - if (!validation.isValid) { - throw new Error(`PDF validation failed: ${validation.error}`); - } - - if (endpoint === EModelEndpoint.anthropic) { - const documentPart = { - type: 'document', - source: { - type: 'base64', - media_type: 'application/pdf', - data: content, - }, - cache_control: { type: 'ephemeral' }, - citations: { enabled: true }, - }; - result.documents.push(documentPart); - } else if (endpoint === EModelEndpoint.openAI) { - const documentPart = { - type: 'input_file', - filename: file.filename, - file_data: `data:application/pdf;base64,${content}`, - }; - result.documents.push(documentPart); - } else if (endpoint === EModelEndpoint.google) { - const documentPart = { - type: 'document', - mimeType: 'application/pdf', - data: content, - }; - result.documents.push(documentPart); - } - - result.files.push(metadata); - } - } - - return result; -} - -module.exports = { - encodeAndFormatDocuments, - streamToBuffer, -}; diff --git a/api/server/services/Files/Documents/index.js b/api/server/services/Files/Documents/index.js deleted file mode 100644 index 6cc1e42dcf..0000000000 --- a/api/server/services/Files/Documents/index.js +++ /dev/null @@ -1,6 +0,0 @@ -const { encodeAndFormatDocuments, streamToBuffer } = require('./encode'); - -module.exports = { - encodeAndFormatDocuments, - streamToBuffer, -}; diff --git a/api/server/services/Files/Video/encode.js b/api/server/services/Files/Video/encode.js deleted file mode 100644 index 2959b08799..0000000000 --- a/api/server/services/Files/Video/encode.js +++ /dev/null @@ -1,111 +0,0 @@ -const { EModelEndpoint, isDocumentSupportedEndpoint } = require('librechat-data-provider'); -const { getStrategyFunctions } = require('~/server/services/Files/strategies'); -const { validateVideo } = require('@librechat/api'); -const { streamToBuffer } = require('~/server/services/Files/Documents/encode'); - -/** - * Encodes and formats video files for different endpoints - * @param {Express.Request} req - The request object - * @param {Array} files - Array of video files - * @param {EModelEndpoint} endpoint - The endpoint to format for - * @returns {Promise<{ videos: Array, files: Array }>} - */ -async function encodeAndFormatVideos(req, files, endpoint) { - const promises = []; - const encodingMethods = {}; - /** @type {{ videos: any[]; files: MongoFile[] }} */ - const result = { - videos: [], - files: [], - }; - - for (const file of files) { - if (!file || !file.filepath) { - continue; - } - - const source = file.source ?? 'local'; - if (!encodingMethods[source]) { - encodingMethods[source] = getStrategyFunctions(source); - } - - const fileMetadata = { - file_id: file.file_id || file._id, - temp_file_id: file.temp_file_id, - filepath: file.filepath, - source: file.source, - filename: file.filename, - type: file.type, - }; - - promises.push([file, fileMetadata]); - } - - const results = await Promise.allSettled( - promises.map(async ([file, fileMetadata]) => { - if (!file || !fileMetadata) { - return { file: null, content: null, metadata: fileMetadata }; - } - - try { - const source = file.source ?? 'local'; - const { getDownloadStream } = encodingMethods[source]; - - const stream = await getDownloadStream(req, file.filepath); - const buffer = await streamToBuffer(stream); - const videoContent = buffer.toString('base64'); - - return { - file, - content: videoContent, - metadata: fileMetadata, - }; - } catch (error) { - console.error(`Error processing video ${file.filename}:`, error); - return { file, content: null, metadata: fileMetadata }; - } - }), - ); - - for (const settledResult of results) { - if (settledResult.status === 'rejected') { - console.error('Video processing failed:', settledResult.reason); - continue; - } - - const { file, content, metadata } = settledResult.value; - - if (!content || !file) { - if (metadata) { - result.files.push(metadata); - } - continue; - } - - if (file.type.startsWith('video/') && isDocumentSupportedEndpoint(endpoint)) { - const videoBuffer = Buffer.from(content, 'base64'); - - const validation = await validateVideo(videoBuffer, videoBuffer.length, endpoint); - if (!validation.isValid) { - throw new Error(`Video validation failed: ${validation.error}`); - } - - if (endpoint === EModelEndpoint.google) { - const videoPart = { - type: 'video', - mimeType: file.type, - data: content, - }; - result.videos.push(videoPart); - } - - result.files.push(metadata); - } - } - - return result; -} - -module.exports = { - encodeAndFormatVideos, -}; diff --git a/api/server/services/Files/index.js b/api/server/services/Files/index.js index 872e8a0e81..2ba5273c33 100644 --- a/api/server/services/Files/index.js +++ b/api/server/services/Files/index.js @@ -2,11 +2,13 @@ const { processCodeFile } = require('./Code/process'); const { processFileUpload } = require('./process'); const { uploadImageBuffer } = require('./images'); const { hasAccessToFilesViaAgent, filterFilesByAgentAccess } = require('./permissions'); +const { getStrategyFunctions } = require('./strategies'); module.exports = { processCodeFile, processFileUpload, uploadImageBuffer, + getStrategyFunctions, hasAccessToFilesViaAgent, filterFilesByAgentAccess, }; diff --git a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx index 33d01887e1..2a8d658115 100644 --- a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx +++ b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx @@ -77,7 +77,9 @@ const AttachFileMenu = ({ * */ const capabilities = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities); - const handleUploadClick = (fileType?: 'image' | 'document' | 'multimodal') => { + const handleUploadClick = ( + fileType?: 'image' | 'document' | 'multimodal' | 'google_multimodal', + ) => { if (!inputRef.current) { return; } @@ -103,7 +105,14 @@ const AttachFileMenu = ({ ) => { const items: MenuItemProps[] = []; - const shouldShowDirectAttach = isDocumentSupportedEndpoint(agent?.provider ?? endpoint); + const currentProvider = agent?.provider ?? endpoint; + const isOpenAIOrAzure = + currentProvider === EModelEndpoint.openAI || currentProvider === EModelEndpoint.azureOpenAI; + const useResponsesApiEnabled = conversation?.useResponsesApi ?? false; + + const shouldShowDirectAttach = + isDocumentSupportedEndpoint(currentProvider) && + (!isOpenAIOrAzure || useResponsesApiEnabled); if (shouldShowDirectAttach) { items.push({ @@ -194,6 +203,7 @@ const AttachFileMenu = ({ sharePointEnabled, setIsSharePointDialogOpen, endpoint, + agent?.provider, ]); const menuTrigger = ( diff --git a/packages/api/src/files/audio/encode.ts b/packages/api/src/files/audio/encode.ts new file mode 100644 index 0000000000..01f5b5246a --- /dev/null +++ b/packages/api/src/files/audio/encode.ts @@ -0,0 +1,116 @@ +import { Readable } from 'stream'; +import getStream from 'get-stream'; +import { EModelEndpoint, isDocumentSupportedEndpoint } from 'librechat-data-provider'; +import type { IMongoFile } from '@librechat/data-schemas'; +import type { Request } from 'express'; +import { validateAudio } from '~/files/validation'; + +interface StrategyFunctions { + getDownloadStream: (req: Request, filepath: string) => Promise; +} + +interface AudioResult { + audios: Array<{ + type: string; + mimeType: string; + data: string; + }>; + files: Array<{ + file_id?: string; + temp_file_id?: string; + filepath: string; + source?: string; + filename: string; + type: string; + }>; +} + +/** + * Encodes and formats audio files for different endpoints + * @param req - The request object + * @param files - Array of audio files + * @param endpoint - The endpoint to format for (currently only google is supported) + * @returns Promise that resolves to audio and file metadata + */ +export async function encodeAndFormatAudios( + req: Request, + files: IMongoFile[], + endpoint: EModelEndpoint, + getStrategyFunctions: (source: string) => StrategyFunctions, +): Promise { + if (!files?.length) { + return { audios: [], files: [] }; + } + + const encodingMethods: Record = {}; + const result: AudioResult = { audios: [], files: [] }; + + const processFile = async (file: IMongoFile) => { + if (!file?.filepath) return null; + + const source = file.source ?? 'local'; + if (!encodingMethods[source]) { + encodingMethods[source] = getStrategyFunctions(source); + } + + const { getDownloadStream } = encodingMethods[source]; + const stream = await getDownloadStream(req, file.filepath); + const buffer = await getStream.buffer(stream); + + return { + file, + content: buffer.toString('base64'), + metadata: { + file_id: file.file_id, + temp_file_id: file.temp_file_id, + filepath: file.filepath, + source: file.source, + filename: file.filename, + type: file.type, + }, + }; + }; + + const results = await Promise.allSettled(files.map(processFile)); + + for (const settledResult of results) { + if (settledResult.status === 'rejected') { + console.error('Audio processing failed:', settledResult.reason); + continue; + } + + const processed = settledResult.value; + if (!processed) continue; + + const { file, content, metadata } = processed; + + if (!content || !file) { + if (metadata) result.files.push(metadata); + continue; + } + + if (!file.type.startsWith('audio/') || !isDocumentSupportedEndpoint(endpoint)) { + result.files.push(metadata); + continue; + } + + const audioBuffer = Buffer.from(content, 'base64'); + const validation = await validateAudio(audioBuffer, audioBuffer.length, endpoint); + + if (!validation.isValid) { + throw new Error(`Audio validation failed: ${validation.error}`); + } + + if (endpoint === EModelEndpoint.google) { + result.audios.push({ + type: 'audio', + mimeType: file.type, + data: content, + }); + } + + result.files.push(metadata); + } + + return result; +} diff --git a/packages/api/src/files/document/encode.ts b/packages/api/src/files/document/encode.ts new file mode 100644 index 0000000000..20272c61dd --- /dev/null +++ b/packages/api/src/files/document/encode.ts @@ -0,0 +1,150 @@ +import { EModelEndpoint, isDocumentSupportedEndpoint } from 'librechat-data-provider'; +import { validatePdf } from '@librechat/api'; +import getStream from 'get-stream'; +import type { Request } from 'express'; +import type { IMongoFile } from '@librechat/data-schemas'; +import { Readable } from 'stream'; + +interface StrategyFunctions { + getDownloadStream: (req: Request, filepath: string) => Promise; +} + +interface DocumentResult { + documents: Array<{ + type: string; + source?: { + type: string; + media_type: string; + data: string; + }; + cache_control?: { type: string }; + citations?: { enabled: boolean }; + filename?: string; + file_data?: string; + mimeType?: string; + data?: string; + }>; + files: Array<{ + file_id?: string; + temp_file_id?: string; + filepath: string; + source?: string; + filename: string; + type: string; + }>; +} + +/** + * Processes and encodes document files for various endpoints + * @param req - Express request object + * @param files - Array of file objects to process + * @param endpoint - The endpoint identifier (e.g., EModelEndpoint.anthropic) + * @param getStrategyFunctions - Function to get strategy functions + * @returns Promise that resolves to documents and file metadata + */ +export async function encodeAndFormatDocuments( + req: Request, + files: IMongoFile[], + endpoint: EModelEndpoint, + getStrategyFunctions: (source: string) => StrategyFunctions, +): Promise { + if (!files?.length) { + return { documents: [], files: [] }; + } + + const encodingMethods: Record = {}; + const result: DocumentResult = { documents: [], files: [] }; + + const documentFiles = files.filter( + (file) => file.type === 'application/pdf' || file.type?.startsWith('application/'), + ); + + if (!documentFiles.length) { + return result; + } + + const processFile = async (file: IMongoFile) => { + if (file.type !== 'application/pdf' || !isDocumentSupportedEndpoint(endpoint)) { + return null; + } + + const source = file.source ?? 'local'; + if (!encodingMethods[source]) { + encodingMethods[source] = getStrategyFunctions(source); + } + + const { getDownloadStream } = encodingMethods[source]; + const stream = await getDownloadStream(req, file.filepath); + const buffer = await getStream.buffer(stream); + + return { + file, + content: buffer.toString('base64'), + metadata: { + file_id: file.file_id, + temp_file_id: file.temp_file_id, + filepath: file.filepath, + source: file.source, + filename: file.filename, + type: file.type, + }, + }; + }; + + const results = await Promise.allSettled(documentFiles.map(processFile)); + + for (const settledResult of results) { + if (settledResult.status === 'rejected') { + console.error('Document processing failed:', settledResult.reason); + continue; + } + + const processed = settledResult.value; + if (!processed) continue; + + const { file, content, metadata } = processed; + + if (!content || !file) { + if (metadata) result.files.push(metadata); + continue; + } + + if (file.type === 'application/pdf' && isDocumentSupportedEndpoint(endpoint)) { + const pdfBuffer = Buffer.from(content, 'base64'); + const validation = await validatePdf(pdfBuffer, pdfBuffer.length, endpoint); + + if (!validation.isValid) { + throw new Error(`PDF validation failed: ${validation.error}`); + } + + if (endpoint === EModelEndpoint.anthropic) { + result.documents.push({ + type: 'document', + source: { + type: 'base64', + media_type: 'application/pdf', + data: content, + }, + cache_control: { type: 'ephemeral' }, + citations: { enabled: true }, + }); + } else if (endpoint === EModelEndpoint.openAI) { + result.documents.push({ + type: 'input_file', + filename: file.filename, + file_data: `data:application/pdf;base64,${content}`, + }); + } else if (endpoint === EModelEndpoint.google) { + result.documents.push({ + type: 'document', + mimeType: 'application/pdf', + data: content, + }); + } + + result.files.push(metadata); + } + } + + return result; +} diff --git a/packages/api/src/files/index.ts b/packages/api/src/files/index.ts index a19584efca..b337b92a06 100644 --- a/packages/api/src/files/index.ts +++ b/packages/api/src/files/index.ts @@ -3,3 +3,6 @@ export * from './audio'; export * from './text'; export * from './parse'; export * from './validation'; +export * from './audio/encode'; +export * from './video/encode'; +export * from './document/encode'; diff --git a/packages/api/src/files/video/encode.ts b/packages/api/src/files/video/encode.ts new file mode 100644 index 0000000000..a5d1b48940 --- /dev/null +++ b/packages/api/src/files/video/encode.ts @@ -0,0 +1,117 @@ +import { EModelEndpoint, isDocumentSupportedEndpoint } from 'librechat-data-provider'; +import { validateVideo } from '@librechat/api'; +import getStream from 'get-stream'; +import type { Request } from 'express'; +import type { IMongoFile } from '@librechat/data-schemas'; +import { Readable } from 'stream'; + +interface StrategyFunctions { + getDownloadStream: (req: Request, filepath: string) => Promise; +} + +interface VideoResult { + videos: Array<{ + type: string; + mimeType: string; + data: string; + }>; + files: Array<{ + file_id?: string; + temp_file_id?: string; + filepath: string; + source?: string; + filename: string; + type: string; + }>; +} + +/** + * Encodes and formats video files for different endpoints + * @param req - The request object + * @param files - Array of video files + * @param endpoint - The endpoint to format for + * @param getStrategyFunctions - Function to get strategy functions + * @returns Promise that resolves to videos and file metadata + */ +export async function encodeAndFormatVideos( + req: Request, + files: IMongoFile[], + endpoint: EModelEndpoint, + getStrategyFunctions: (source: string) => StrategyFunctions, +): Promise { + if (!files?.length) { + return { videos: [], files: [] }; + } + + const encodingMethods: Record = {}; + const result: VideoResult = { videos: [], files: [] }; + + const processFile = async (file: IMongoFile) => { + if (!file?.filepath) return null; + + const source = file.source ?? 'local'; + if (!encodingMethods[source]) { + encodingMethods[source] = getStrategyFunctions(source); + } + + const { getDownloadStream } = encodingMethods[source]; + const stream = await getDownloadStream(req, file.filepath); + const buffer = await getStream.buffer(stream); + + return { + file, + content: buffer.toString('base64'), + metadata: { + file_id: file.file_id, + temp_file_id: file.temp_file_id, + filepath: file.filepath, + source: file.source, + filename: file.filename, + type: file.type, + }, + }; + }; + + const results = await Promise.allSettled(files.map(processFile)); + + for (const settledResult of results) { + if (settledResult.status === 'rejected') { + console.error('Video processing failed:', settledResult.reason); + continue; + } + + const processed = settledResult.value; + if (!processed) continue; + + const { file, content, metadata } = processed; + + if (!content || !file) { + if (metadata) result.files.push(metadata); + continue; + } + + if (!file.type.startsWith('video/') || !isDocumentSupportedEndpoint(endpoint)) { + result.files.push(metadata); + continue; + } + + const videoBuffer = Buffer.from(content, 'base64'); + const validation = await validateVideo(videoBuffer, videoBuffer.length, endpoint); + + if (!validation.isValid) { + throw new Error(`Video validation failed: ${validation.error}`); + } + + if (endpoint === EModelEndpoint.google) { + result.videos.push({ + type: 'video', + mimeType: file.type, + data: content, + }); + } + + result.files.push(metadata); + } + + return result; +}