From bcd97aad2ff9c60840bba7239ebd764e925eb4d5 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Mon, 6 Oct 2025 17:30:16 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=8E=20feat:=20Direct=20Provider=20Atta?= =?UTF-8?q?chment=20Support=20for=20Multimodal=20Content=20(#9994)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📎 feat: Direct Provider Attachment Support for Multimodal Content * 📑 feat: Anthropic Direct Provider Upload (#9072) * feat: implement Anthropic native PDF support with document preservation - Add comprehensive debug logging throughout PDF processing pipeline - Refactor attachment processing to separate image and document handling - Create distinct addImageURLs(), addDocuments(), and processAttachments() methods - Fix critical bugs in stream handling and parameter passing - Add streamToBuffer utility for proper stream-to-buffer conversion - Remove api/agents submodule from repository 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude * chore: remove out of scope formatting changes * fix: stop duplication of file in chat on end of response stream * chore: bring back file search and ocr options * chore: localize upload to provider string in file menu * refactor: change createMenuItems args to fit new pattern introduced by anthropic-native-pdf-support * feat: add cache point for pdfs processed by anthropic endpoint since they are unlikely to change and should benefit from caching * feat: combine Upload Image into Upload to Provider since they both perform direct upload and change provider upload icon to reflect multimodal upload * feat: add citations support according to docs * refactor: remove redundant 'document' check since documents are handled properly by formatMessage in the agents repo now * refactor: change upload logic so anthropic endpoint isn't exempted from normal upload path using Agents for consistency with the rest of the upload logic * fix: include width and height in return from uploadLocalFile so images are correctly identified when going through an AgentUpload in addImageURLs * chore: remove client specific handling since the direct provider stuff is handled by the agent client * feat: handle documents in AgentClient so no need for change to agents repo * chore: removed unused changes * chore: remove auto generated comments from OG commit * feat: add logic for agents to use direct to provider uploads if supported (currently just anthropic) * fix: reintroduce role check to fix render error because of undefined value for Content Part * fix: actually fix render bug by using proper isCreatedByUser check and making sure our mutation of formattedMessage.content is consistent --------- Co-authored-by: Andres Restrepo Co-authored-by: Claude 📁 feat: Send Attachments Directly to Provider (OpenAI) (#9098) * refactor: change references from direct upload to direct attach to better reflect functionality since we are just using base64 encoding strategy now rather than Files/File API for sending our attachments directly to the provider, the upload nomenclature no longer makes sense. direct_attach better describes the different methods of sending attachments to providers anyways even if we later introduce direct upload support * feat: add upload to provider option for openai (and agent) ui * chore: move anthropic pdf validator over to packages/api * feat: simple pdf validation according to openai docs * feat: add provider agnostic validatePdf logic to start handling multiple endpoints * feat: add handling for openai specific documentPart formatting * refactor: move require statement to proper place at top of file * chore: add in openAI endpoint for the rest of the document handling logic * feat: add direct attach support for azureOpenAI endpoint and agents * feat: add pdf validation for azureOpenAI endpoint * refactor: unify all the endpoint checks with isDocumentSupportedEndpoint * refactor: consolidate Upload to Provider vs Upload image logic for clarity * refactor: remove anthropic from anthropic_multimodal fileType since we support multiple providers now 🗂️ feat: Send Attachments Directly to Provider (Google) (#9100) * feat: add validation for google PDFs and add google endpoint as a document supporting endpoint * feat: add proper pdf formatting for google endpoints (requires PR #14 in agents) * feat: add multimodal support for google endpoint attachments * feat: add audio file svg * fix: refactor attachments logic so multi-attachment messages work properly * feat: add video file svg * fix: allows for followup questions of uploaded multimodal attachments * fix: remove incorrect final message filtering that was breaking Attachment component rendering fix: manualy rename 'documents' to 'Documents' in git since it wasn't picked up due to case insensitivity in dir name fix: add logic so filepicker for a google agent has proper filetype filtering 🛫 refactor: Move Encoding Logic to packages/api (#9182) * refactor: move audio encode over to TS * refactor: audio encoding now functional in LC again * refactor: move video encode over to TS * refactor: move document encode over to TS * refactor: video encoding now functional in LC again * refactor: document encoding now functional in LC again * fix: extend file type options in AttachFileMenu to include 'google_multimodal' and update dependency array to include agent?.provider * feat: only accept pdfs if responses api is enabled for openai convos chore: address ESLint comments chore: add missing audio mimetype * fix: type safety for message content parts and improve null handling * chore: reorder AttachFileMenuProps for consistency and clarity * chore: import order in AttachFileMenu * fix: improve null handling for text parts in parseTextParts function * fix: remove no longer used unsupported capability error message for file uploads * fix: OpenAI Direct File Attachment Format * fix: update encodeAndFormatDocuments to support OpenAI responses API and enhance document result types * refactor: broaden providers supported for documents * feat: enhance DragDrop context and modal to support document uploads based on provider capabilities * fix: reorder import statements for consistency in video encoding module --------- Co-authored-by: Dustin Healy <54083382+dustinhealy@users.noreply.github.com> --- api/app/clients/BaseClient.js | 115 ++++++++++- api/package.json | 2 +- api/server/controllers/agents/client.js | 2 +- api/server/services/Files/Local/crud.js | 14 +- api/server/services/Files/process.js | 5 - client/src/Providers/DragDropContext.tsx | 17 +- .../Chat/Input/Files/AttachFileChat.tsx | 17 +- .../Chat/Input/Files/AttachFileMenu.tsx | 79 ++++++-- .../Chat/Input/Files/DragDropModal.tsx | 49 ++++- .../components/Chat/Messages/Content/Part.tsx | 4 +- .../Messages/Content/Parts/LogContent.tsx | 14 +- .../hooks/Agents/useAgentToolPermissions.ts | 4 + client/src/hooks/Files/useFileHandling.ts | 7 - client/src/locales/en/translation.json | 2 +- client/src/utils/files.ts | 23 ++- client/src/utils/messages.ts | 9 +- package-lock.json | 10 +- packages/api/package.json | 2 +- packages/api/src/files/encode/audio.ts | 74 +++++++ packages/api/src/files/encode/document.ts | 108 ++++++++++ packages/api/src/files/encode/index.ts | 3 + packages/api/src/files/encode/utils.ts | 46 +++++ packages/api/src/files/encode/video.ts | 74 +++++++ packages/api/src/files/index.ts | 2 + packages/api/src/files/validation.ts | 186 ++++++++++++++++++ packages/api/src/types/files.ts | 85 ++++++++ packages/client/src/svgs/AudioPaths.tsx | 41 ++++ packages/client/src/svgs/VideoPaths.tsx | 10 + packages/client/src/svgs/index.ts | 2 + packages/data-provider/src/file-config.ts | 35 +++- packages/data-provider/src/parsers.ts | 2 +- packages/data-provider/src/schemas.ts | 55 ++++++ .../data-provider/src/types/assistants.ts | 16 +- 33 files changed, 1040 insertions(+), 74 deletions(-) create mode 100644 packages/api/src/files/encode/audio.ts create mode 100644 packages/api/src/files/encode/document.ts create mode 100644 packages/api/src/files/encode/index.ts create mode 100644 packages/api/src/files/encode/utils.ts create mode 100644 packages/api/src/files/encode/video.ts create mode 100644 packages/api/src/files/validation.ts create mode 100644 packages/client/src/svgs/AudioPaths.tsx create mode 100644 packages/client/src/svgs/VideoPaths.tsx diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js index 2458dc0ab3..32c76523f7 100644 --- a/api/app/clients/BaseClient.js +++ b/api/app/clients/BaseClient.js @@ -1,18 +1,24 @@ const crypto = require('crypto'); const fetch = require('node-fetch'); const { logger } = require('@librechat/data-schemas'); -const { getBalanceConfig } = require('@librechat/api'); const { - supportsBalanceCheck, - isAgentsEndpoint, - isParamEndpoint, - EModelEndpoint, + getBalanceConfig, + encodeAndFormatAudios, + encodeAndFormatVideos, + encodeAndFormatDocuments, +} = require('@librechat/api'); +const { + Constants, + ErrorTypes, ContentTypes, excludedKeys, - ErrorTypes, - Constants, + EModelEndpoint, + isParamEndpoint, + isAgentsEndpoint, + supportsBalanceCheck, } = require('librechat-data-provider'); const { getMessages, saveMessage, updateMessage, saveConvo, getConvo } = require('~/models'); +const { getStrategyFunctions } = require('~/server/services/Files/strategies'); const { checkBalance } = require('~/models/balanceMethods'); const { truncateToolCallOutputs } = require('./prompts'); const { getFiles } = require('~/models/File'); @@ -1198,8 +1204,99 @@ class BaseClient { return await this.sendCompletion(payload, opts); } + async addDocuments(message, attachments) { + const documentResult = await encodeAndFormatDocuments( + this.options.req, + attachments, + { + provider: this.options.agent?.provider, + useResponsesApi: this.options.agent?.model_parameters?.useResponsesApi, + }, + getStrategyFunctions, + ); + message.documents = + documentResult.documents && documentResult.documents.length + ? documentResult.documents + : undefined; + return documentResult.files; + } + + async addVideos(message, attachments) { + const videoResult = await encodeAndFormatVideos( + this.options.req, + attachments, + this.options.agent.provider, + getStrategyFunctions, + ); + message.videos = + videoResult.videos && videoResult.videos.length ? videoResult.videos : undefined; + return videoResult.files; + } + + async addAudios(message, attachments) { + const audioResult = await encodeAndFormatAudios( + this.options.req, + attachments, + this.options.agent.provider, + getStrategyFunctions, + ); + message.audios = + audioResult.audios && audioResult.audios.length ? audioResult.audios : undefined; + return audioResult.files; + } + + async processAttachments(message, attachments) { + const categorizedAttachments = { + images: [], + documents: [], + videos: [], + audios: [], + }; + + for (const file of attachments) { + if (file.type.startsWith('image/')) { + categorizedAttachments.images.push(file); + } else if (file.type === 'application/pdf') { + categorizedAttachments.documents.push(file); + } else if (file.type.startsWith('video/')) { + categorizedAttachments.videos.push(file); + } else if (file.type.startsWith('audio/')) { + categorizedAttachments.audios.push(file); + } + } + + const [imageFiles, documentFiles, videoFiles, audioFiles] = await Promise.all([ + categorizedAttachments.images.length > 0 + ? this.addImageURLs(message, categorizedAttachments.images) + : Promise.resolve([]), + categorizedAttachments.documents.length > 0 + ? this.addDocuments(message, categorizedAttachments.documents) + : Promise.resolve([]), + categorizedAttachments.videos.length > 0 + ? this.addVideos(message, categorizedAttachments.videos) + : Promise.resolve([]), + categorizedAttachments.audios.length > 0 + ? this.addAudios(message, categorizedAttachments.audios) + : Promise.resolve([]), + ]); + + const allFiles = [...imageFiles, ...documentFiles, ...videoFiles, ...audioFiles]; + const seenFileIds = new Set(); + const uniqueFiles = []; + + for (const file of allFiles) { + if (file.file_id && !seenFileIds.has(file.file_id)) { + seenFileIds.add(file.file_id); + uniqueFiles.push(file); + } else if (!file.file_id) { + uniqueFiles.push(file); + } + } + + return uniqueFiles; + } + /** - * * @param {TMessage[]} _messages * @returns {Promise} */ @@ -1248,7 +1345,7 @@ class BaseClient { {}, ); - await this.addImageURLs(message, files, this.visionMode); + await this.processAttachments(message, files); this.message_file_map[message.messageId] = files; return message; diff --git a/api/package.json b/api/package.json index 01c931a652..73cb0633e9 100644 --- a/api/package.json +++ b/api/package.json @@ -48,7 +48,7 @@ "@langchain/google-genai": "^0.2.13", "@langchain/google-vertexai": "^0.2.13", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^2.4.84", + "@librechat/agents": "^2.4.85", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index bf32385162..a9f5543a61 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -257,7 +257,7 @@ class AgentClient extends BaseClient { }; } - const files = await this.addImageURLs( + const files = await this.processAttachments( orderedMessages[orderedMessages.length - 1], attachments, ); diff --git a/api/server/services/Files/Local/crud.js b/api/server/services/Files/Local/crud.js index 16e75ba200..d3a3a21538 100644 --- a/api/server/services/Files/Local/crud.js +++ b/api/server/services/Files/Local/crud.js @@ -4,6 +4,7 @@ const axios = require('axios'); const { logger } = require('@librechat/data-schemas'); const { EModelEndpoint } = require('librechat-data-provider'); const { generateShortLivedToken } = require('@librechat/api'); +const { resizeImageBuffer } = require('~/server/services/Files/images/resize'); const { getBufferMetadata } = require('~/server/utils'); const paths = require('~/config/paths'); @@ -286,7 +287,18 @@ async function uploadLocalFile({ req, file, file_id }) { await fs.promises.writeFile(newPath, inputBuffer); const filepath = path.posix.join('/', 'uploads', req.user.id, path.basename(newPath)); - return { filepath, bytes }; + let height, width; + if (file.mimetype && file.mimetype.startsWith('image/')) { + try { + const { width: imgWidth, height: imgHeight } = await resizeImageBuffer(inputBuffer, 'high'); + height = imgHeight; + width = imgWidth; + } catch (error) { + logger.warn('[uploadLocalFile] Could not get image dimensions:', error.message); + } + } + + return { filepath, bytes, height, width }; } /** diff --git a/api/server/services/Files/process.js b/api/server/services/Files/process.js index c8221a6de5..f7220715f6 100644 --- a/api/server/services/Files/process.js +++ b/api/server/services/Files/process.js @@ -522,11 +522,6 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { } const isImage = file.mimetype.startsWith('image'); - if (!isImage && !tool_resource) { - /** Note: this needs to be removed when we can support files to providers */ - throw new Error('No tool resource provided for non-image agent file upload'); - } - let fileInfoMetadata; const entity_id = messageAttachment === true ? undefined : agent_id; const basePath = mime.getType(file.originalname)?.startsWith('image') ? 'images' : 'uploads'; diff --git a/client/src/Providers/DragDropContext.tsx b/client/src/Providers/DragDropContext.tsx index a86af6510b..3a5fe2924c 100644 --- a/client/src/Providers/DragDropContext.tsx +++ b/client/src/Providers/DragDropContext.tsx @@ -1,23 +1,38 @@ import React, { createContext, useContext, useMemo } from 'react'; +import type { EModelEndpoint } from 'librechat-data-provider'; +import { useGetEndpointsQuery } from '~/data-provider'; +import { getEndpointField } from '~/utils/endpoints'; import { useChatContext } from './ChatContext'; interface DragDropContextValue { conversationId: string | null | undefined; agentId: string | null | undefined; + endpoint: string | null | undefined; + endpointType?: EModelEndpoint | undefined; } const DragDropContext = createContext(undefined); export function DragDropProvider({ children }: { children: React.ReactNode }) { const { conversation } = useChatContext(); + const { data: endpointsConfig } = useGetEndpointsQuery(); + + const endpointType = useMemo(() => { + return ( + getEndpointField(endpointsConfig, conversation?.endpoint, 'type') || + (conversation?.endpoint as EModelEndpoint | undefined) + ); + }, [conversation?.endpoint, endpointsConfig]); /** Context value only created when conversation fields change */ const contextValue = useMemo( () => ({ conversationId: conversation?.conversationId, agentId: conversation?.agent_id, + endpoint: conversation?.endpoint, + endpointType: endpointType, }), - [conversation?.conversationId, conversation?.agent_id], + [conversation?.conversationId, conversation?.agent_id, conversation?.endpoint, endpointType], ); return {children}; diff --git a/client/src/components/Chat/Input/Files/AttachFileChat.tsx b/client/src/components/Chat/Input/Files/AttachFileChat.tsx index f80b8aef82..4757c598a5 100644 --- a/client/src/components/Chat/Input/Files/AttachFileChat.tsx +++ b/client/src/components/Chat/Input/Files/AttachFileChat.tsx @@ -2,13 +2,15 @@ import { memo, useMemo } from 'react'; import { Constants, supportsFiles, + EModelEndpoint, mergeFileConfig, isAgentsEndpoint, isAssistantsEndpoint, fileConfig as defaultFileConfig, } from 'librechat-data-provider'; import type { EndpointFileConfig, TConversation } from 'librechat-data-provider'; -import { useGetFileConfig } from '~/data-provider'; +import { useGetFileConfig, useGetEndpointsQuery } from '~/data-provider'; +import { getEndpointField } from '~/utils/endpoints'; import AttachFileMenu from './AttachFileMenu'; import AttachFile from './AttachFile'; @@ -20,7 +22,7 @@ function AttachFileChat({ conversation: TConversation | null; }) { const conversationId = conversation?.conversationId ?? Constants.NEW_CONVO; - const { endpoint, endpointType } = conversation ?? { endpoint: null }; + const { endpoint } = conversation ?? { endpoint: null }; const isAgents = useMemo(() => isAgentsEndpoint(endpoint), [endpoint]); const isAssistants = useMemo(() => isAssistantsEndpoint(endpoint), [endpoint]); @@ -28,6 +30,15 @@ function AttachFileChat({ select: (data) => mergeFileConfig(data), }); + const { data: endpointsConfig } = useGetEndpointsQuery(); + + const endpointType = useMemo(() => { + return ( + getEndpointField(endpointsConfig, endpoint, 'type') || + (endpoint as EModelEndpoint | undefined) + ); + }, [endpoint, endpointsConfig]); + const endpointFileConfig = fileConfig.endpoints[endpoint ?? ''] as EndpointFileConfig | undefined; const endpointSupportsFiles: boolean = supportsFiles[endpointType ?? endpoint ?? ''] ?? false; const isUploadDisabled = (disableInputs || endpointFileConfig?.disabled) ?? false; @@ -37,7 +48,9 @@ function AttachFileChat({ } else if (isAgents || (endpointSupportsFiles && !isUploadDisabled)) { return ( { @@ -55,44 +70,75 @@ const AttachFileMenu = ({ overrideEndpointFileConfig: endpointFileConfig, toolResource, }); + + const { agentsConfig } = useGetAgentsConfig(); const { data: startupConfig } = useGetStartupConfig(); const sharePointEnabled = startupConfig?.sharePointFilePickerEnabled; const [isSharePointDialogOpen, setIsSharePointDialogOpen] = useState(false); - const { agentsConfig } = useGetAgentsConfig(); + /** TODO: Ephemeral Agent Capabilities * Allow defining agent capabilities on a per-endpoint basis * Use definition for agents endpoint for ephemeral agents * */ const capabilities = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities); - const { fileSearchAllowedByAgent, codeAllowedByAgent } = useAgentToolPermissions( + const { fileSearchAllowedByAgent, codeAllowedByAgent, provider } = useAgentToolPermissions( agentId, ephemeralAgent, ); - const handleUploadClick = (isImage?: boolean) => { + const handleUploadClick = ( + fileType?: 'image' | 'document' | 'multimodal' | 'google_multimodal', + ) => { if (!inputRef.current) { return; } inputRef.current.value = ''; - inputRef.current.accept = isImage === true ? 'image/*' : ''; + if (fileType === 'image') { + inputRef.current.accept = 'image/*'; + } else if (fileType === 'document') { + inputRef.current.accept = '.pdf,application/pdf'; + } else if (fileType === 'multimodal') { + inputRef.current.accept = 'image/*,.pdf,application/pdf'; + } else if (fileType === 'google_multimodal') { + inputRef.current.accept = 'image/*,.pdf,application/pdf,video/*,audio/*'; + } else { + inputRef.current.accept = ''; + } inputRef.current.click(); inputRef.current.accept = ''; }; const dropdownItems = useMemo(() => { - const createMenuItems = (onAction: (isImage?: boolean) => void) => { - const items: MenuItemProps[] = [ - { + const createMenuItems = ( + onAction: (fileType?: 'image' | 'document' | 'multimodal' | 'google_multimodal') => void, + ) => { + const items: MenuItemProps[] = []; + + const currentProvider = provider || endpoint; + + if (isDocumentSupportedProvider(endpointType || currentProvider)) { + items.push({ + label: localize('com_ui_upload_provider'), + onClick: () => { + setToolResource(undefined); + onAction( + (provider || endpoint) === EModelEndpoint.google ? 'google_multimodal' : 'multimodal', + ); + }, + icon: , + }); + } else { + items.push({ label: localize('com_ui_upload_image_input'), onClick: () => { setToolResource(undefined); - onAction(true); + onAction('image'); }, icon: , - }, - ]; + }); + } if (capabilities.contextEnabled) { items.push({ @@ -156,8 +202,11 @@ const AttachFileMenu = ({ return localItems; }, [ - capabilities, localize, + endpoint, + provider, + endpointType, + capabilities, setToolResource, setEphemeralAgent, sharePointEnabled, diff --git a/client/src/components/Chat/Input/Files/DragDropModal.tsx b/client/src/components/Chat/Input/Files/DragDropModal.tsx index e9992c4dcb..209972e4a8 100644 --- a/client/src/components/Chat/Input/Files/DragDropModal.tsx +++ b/client/src/components/Chat/Input/Files/DragDropModal.tsx @@ -1,8 +1,18 @@ import React, { useMemo } from 'react'; import { useRecoilValue } from 'recoil'; import { OGDialog, OGDialogTemplate } from '@librechat/client'; -import { EToolResources, defaultAgentCapabilities } from 'librechat-data-provider'; -import { ImageUpIcon, FileSearch, TerminalSquareIcon, FileType2Icon } from 'lucide-react'; +import { + EToolResources, + defaultAgentCapabilities, + isDocumentSupportedProvider, +} from 'librechat-data-provider'; +import { + ImageUpIcon, + FileSearch, + FileType2Icon, + FileImageIcon, + TerminalSquareIcon, +} from 'lucide-react'; import { useAgentToolPermissions, useAgentCapabilities, @@ -34,22 +44,34 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD * Use definition for agents endpoint for ephemeral agents * */ const capabilities = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities); - const { conversationId, agentId } = useDragDropContext(); + const { conversationId, agentId, endpoint, endpointType } = useDragDropContext(); const ephemeralAgent = useRecoilValue(ephemeralAgentByConvoId(conversationId ?? '')); - const { fileSearchAllowedByAgent, codeAllowedByAgent } = useAgentToolPermissions( + const { fileSearchAllowedByAgent, codeAllowedByAgent, provider } = useAgentToolPermissions( agentId, ephemeralAgent, ); const options = useMemo(() => { - const _options: FileOption[] = [ - { + const _options: FileOption[] = []; + const currentProvider = provider || endpoint; + + // Check if provider supports document upload + if (isDocumentSupportedProvider(endpointType || currentProvider)) { + _options.push({ + label: localize('com_ui_upload_provider'), + value: undefined, + icon: , + condition: true, // Allow for both images and documents + }); + } else { + // Only show image upload option if all files are images and provider doesn't support documents + _options.push({ label: localize('com_ui_upload_image_input'), value: undefined, icon: , condition: files.every((file) => file.type?.startsWith('image/')), - }, - ]; + }); + } if (capabilities.fileSearchEnabled && fileSearchAllowedByAgent) { _options.push({ label: localize('com_ui_upload_file_search'), @@ -73,7 +95,16 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD } return _options; - }, [capabilities, files, localize, fileSearchAllowedByAgent, codeAllowedByAgent]); + }, [ + files, + localize, + provider, + endpoint, + endpointType, + capabilities, + codeAllowedByAgent, + fileSearchAllowedByAgent, + ]); if (!isVisible) { return null; diff --git a/client/src/components/Chat/Messages/Content/Part.tsx b/client/src/components/Chat/Messages/Content/Part.tsx index 5d98b5aca0..aa9f4da82d 100644 --- a/client/src/components/Chat/Messages/Content/Part.tsx +++ b/client/src/components/Chat/Messages/Content/Part.tsx @@ -57,7 +57,7 @@ const Part = memo( ); } else if (part.type === ContentTypes.TEXT) { - const text = typeof part.text === 'string' ? part.text : part.text.value; + const text = typeof part.text === 'string' ? part.text : part.text?.value; if (typeof text !== 'string') { return null; @@ -71,7 +71,7 @@ const Part = memo( ); } else if (part.type === ContentTypes.THINK) { - const reasoning = typeof part.think === 'string' ? part.think : part.think.value; + const reasoning = typeof part.think === 'string' ? part.think : part.think?.value; if (typeof reasoning !== 'string') { return null; } diff --git a/client/src/components/Chat/Messages/Content/Parts/LogContent.tsx b/client/src/components/Chat/Messages/Content/Parts/LogContent.tsx index 0d53fb50ee..d2a303f49f 100644 --- a/client/src/components/Chat/Messages/Content/Parts/LogContent.tsx +++ b/client/src/components/Chat/Messages/Content/Parts/LogContent.tsx @@ -37,7 +37,7 @@ const LogContent: React.FC = ({ output = '', renderImages, atta attachments?.forEach((attachment) => { const { width, height, filepath = null } = attachment as TFile & TAttachmentMetadata; const isImage = - imageExtRegex.test(attachment.filename) && + imageExtRegex.test(attachment.filename ?? '') && width != null && height != null && filepath != null; @@ -56,21 +56,25 @@ const LogContent: React.FC = ({ output = '', renderImages, atta const renderAttachment = (file: TAttachment) => { const now = new Date(); - const expiresAt = typeof file.expiresAt === 'number' ? new Date(file.expiresAt) : null; + const expiresAt = + 'expiresAt' in file && typeof file.expiresAt === 'number' ? new Date(file.expiresAt) : null; const isExpired = expiresAt ? isAfter(now, expiresAt) : false; + const filename = file.filename || ''; if (isExpired) { - return `${file.filename} ${localize('com_download_expired')}`; + return `${filename} ${localize('com_download_expired')}`; } + const filepath = file.filepath || ''; + // const expirationText = expiresAt // ? ` ${localize('com_download_expires', { 0: format(expiresAt, 'MM/dd/yy HH:mm') })}` // : ` ${localize('com_click_to_download')}`; return ( - + {'- '} - {file.filename} {localize('com_click_to_download')} + {filename} {localize('com_click_to_download')} ); }; diff --git a/client/src/hooks/Agents/useAgentToolPermissions.ts b/client/src/hooks/Agents/useAgentToolPermissions.ts index 90f2bc88e5..eea549d7a6 100644 --- a/client/src/hooks/Agents/useAgentToolPermissions.ts +++ b/client/src/hooks/Agents/useAgentToolPermissions.ts @@ -9,6 +9,7 @@ interface AgentToolPermissionsResult { fileSearchAllowedByAgent: boolean; codeAllowedByAgent: boolean; tools: string[] | undefined; + provider?: string; } /** @@ -36,6 +37,8 @@ export default function useAgentToolPermissions( [agentData?.tools, selectedAgent?.tools], ); + const provider = useMemo(() => selectedAgent?.provider, [selectedAgent?.provider]); + const fileSearchAllowedByAgent = useMemo(() => { // Check ephemeral agent settings if (isEphemeralAgent(agentId)) { @@ -61,6 +64,7 @@ export default function useAgentToolPermissions( return { fileSearchAllowedByAgent, codeAllowedByAgent, + provider, tools, }; } diff --git a/client/src/hooks/Files/useFileHandling.ts b/client/src/hooks/Files/useFileHandling.ts index 9deaa78f24..7825888985 100644 --- a/client/src/hooks/Files/useFileHandling.ts +++ b/client/src/hooks/Files/useFileHandling.ts @@ -392,13 +392,6 @@ const useFileHandling = (params?: UseFileHandling) => { } else { // File wasn't processed, proceed with original const isImage = originalFile.type.split('/')[0] === 'image'; - const tool_resource = - initialExtendedFile.tool_resource ?? params?.additionalMetadata?.tool_resource; - if (isAgentsEndpoint(endpoint) && !isImage && tool_resource == null) { - /** Note: this needs to be removed when we can support files to providers */ - setError('com_error_files_unsupported_capability'); - continue; - } // Update progress to show ready for upload const readyExtendedFile = { diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index 0facdb493a..c4b8481398 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -363,7 +363,6 @@ "com_error_files_dupe": "Duplicate file detected.", "com_error_files_empty": "Empty files are not allowed.", "com_error_files_process": "An error occurred while processing the file.", - "com_error_files_unsupported_capability": "No capabilities enabled that support this file type.", "com_error_files_upload": "An error occurred while uploading the file.", "com_error_files_upload_canceled": "The file upload request was canceled. Note: the file upload may still be processing and will need to be manually deleted.", "com_error_files_validation": "An error occurred while validating the file.", @@ -1230,6 +1229,7 @@ "com_ui_upload_invalid": "Invalid file for upload. Must be an image not exceeding the limit", "com_ui_upload_invalid_var": "Invalid file for upload. Must be an image not exceeding {{0}} MB", "com_ui_upload_ocr_text": "Upload as Text", + "com_ui_upload_provider": "Upload to Provider", "com_ui_upload_success": "Successfully uploaded file", "com_ui_upload_type": "Select Upload Type", "com_ui_usage": "Usage", diff --git a/client/src/utils/files.ts b/client/src/utils/files.ts index 81830c64ff..0959ba91bb 100644 --- a/client/src/utils/files.ts +++ b/client/src/utils/files.ts @@ -1,4 +1,11 @@ -import { SheetPaths, TextPaths, FilePaths, CodePaths } from '@librechat/client'; +import { + TextPaths, + FilePaths, + CodePaths, + AudioPaths, + VideoPaths, + SheetPaths, +} from '@librechat/client'; import { megabyte, QueryKeys, @@ -38,6 +45,18 @@ const artifact = { title: 'Code', }; +const audioFile = { + paths: AudioPaths, + fill: '#FF6B35', + title: 'Audio', +}; + +const videoFile = { + paths: VideoPaths, + fill: '#8B5CF6', + title: 'Video', +}; + export const fileTypes = { /* Category matches */ file: { @@ -47,6 +66,8 @@ export const fileTypes = { }, text: textDocument, txt: textDocument, + audio: audioFile, + video: videoFile, // application:, /* Partial matches */ diff --git a/client/src/utils/messages.ts b/client/src/utils/messages.ts index 7a52ff4106..caae46d923 100644 --- a/client/src/utils/messages.ts +++ b/client/src/utils/messages.ts @@ -25,7 +25,7 @@ export const getLatestText = (message?: TMessage | null, includeIndex?: boolean) continue; } - const text = (typeof part?.text === 'string' ? part.text : part?.text.value) ?? ''; + const text = (typeof part?.text === 'string' ? part.text : part?.text?.value) ?? ''; if (text.length > 0) { if (includeIndex === true) { return `${text}-${i}`; @@ -52,7 +52,12 @@ export const getAllContentText = (message?: TMessage | null): string => { if (message.content && message.content.length > 0) { return message.content .filter((part) => part.type === ContentTypes.TEXT) - .map((part) => (typeof part.text === 'string' ? part.text : part.text.value) || '') + .map((part) => { + if (!('text' in part)) return ''; + const text = part.text; + if (typeof text === 'string') return text; + return text?.value || ''; + }) .filter((text) => text.length > 0) .join('\n'); } diff --git a/package-lock.json b/package-lock.json index 019317a2ea..72bc3a7af6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -64,7 +64,7 @@ "@langchain/google-genai": "^0.2.13", "@langchain/google-vertexai": "^0.2.13", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^2.4.84", + "@librechat/agents": "^2.4.85", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", @@ -21522,9 +21522,9 @@ } }, "node_modules/@librechat/agents": { - "version": "2.4.84", - "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.84.tgz", - "integrity": "sha512-wOPqv5yQfhkuBZ29FrJGUdDMCIvcnqUAigFeoPU8QOeqi+S9rRobx2+2D3+JbbgSsDL5yO7SyxGEHkQ7A6xZDQ==", + "version": "2.4.85", + "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.85.tgz", + "integrity": "sha512-t6h5f6ApnoEC+x8kqBlke1RR6BPzT+9BvlkA8VxvQVJtYIt5Ey4BOTRDGjdilDoXUcLui11PbjCd17EbjPkTcA==", "license": "MIT", "dependencies": { "@langchain/anthropic": "^0.3.26", @@ -51336,7 +51336,7 @@ "@azure/storage-blob": "^12.27.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.62", - "@librechat/agents": "^2.4.84", + "@librechat/agents": "^2.4.85", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.17.1", "axios": "^1.12.1", diff --git a/packages/api/package.json b/packages/api/package.json index 3ea7d343e4..05054498e2 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -80,7 +80,7 @@ "@azure/storage-blob": "^12.27.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.62", - "@librechat/agents": "^2.4.84", + "@librechat/agents": "^2.4.85", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.17.1", "axios": "^1.12.1", diff --git a/packages/api/src/files/encode/audio.ts b/packages/api/src/files/encode/audio.ts new file mode 100644 index 0000000000..d411e61640 --- /dev/null +++ b/packages/api/src/files/encode/audio.ts @@ -0,0 +1,74 @@ +import { Providers } from '@librechat/agents'; +import { isDocumentSupportedProvider } from 'librechat-data-provider'; +import type { IMongoFile } from '@librechat/data-schemas'; +import type { Request } from 'express'; +import type { StrategyFunctions, AudioResult } from '~/types/files'; +import { validateAudio } from '~/files/validation'; +import { getFileStream } from './utils'; + +/** + * Encodes and formats audio files for different providers + * @param req - The request object + * @param files - Array of audio files + * @param provider - The provider to format for (currently only google is supported) + * @param getStrategyFunctions - Function to get strategy functions + * @returns Promise that resolves to audio and file metadata + */ +export async function encodeAndFormatAudios( + req: Request, + files: IMongoFile[], + provider: Providers, + getStrategyFunctions: (source: string) => StrategyFunctions, +): Promise { + if (!files?.length) { + return { audios: [], files: [] }; + } + + const encodingMethods: Record = {}; + const result: AudioResult = { audios: [], files: [] }; + + const results = await Promise.allSettled( + files.map((file) => getFileStream(req, file, encodingMethods, getStrategyFunctions)), + ); + + for (const settledResult of results) { + if (settledResult.status === 'rejected') { + console.error('Audio processing failed:', settledResult.reason); + continue; + } + + const processed = settledResult.value; + if (!processed) continue; + + const { file, content, metadata } = processed; + + if (!content || !file) { + if (metadata) result.files.push(metadata); + continue; + } + + if (!file.type.startsWith('audio/') || !isDocumentSupportedProvider(provider)) { + result.files.push(metadata); + continue; + } + + const audioBuffer = Buffer.from(content, 'base64'); + const validation = await validateAudio(audioBuffer, audioBuffer.length, provider); + + if (!validation.isValid) { + throw new Error(`Audio validation failed: ${validation.error}`); + } + + if (provider === Providers.GOOGLE || provider === Providers.VERTEXAI) { + result.audios.push({ + type: 'audio', + mimeType: file.type, + data: content, + }); + } + + result.files.push(metadata); + } + + return result; +} diff --git a/packages/api/src/files/encode/document.ts b/packages/api/src/files/encode/document.ts new file mode 100644 index 0000000000..bc1396958c --- /dev/null +++ b/packages/api/src/files/encode/document.ts @@ -0,0 +1,108 @@ +import { Providers } from '@librechat/agents'; +import { isOpenAILikeProvider, isDocumentSupportedProvider } from 'librechat-data-provider'; +import type { IMongoFile } from '@librechat/data-schemas'; +import type { Request } from 'express'; +import type { StrategyFunctions, DocumentResult } from '~/types/files'; +import { validatePdf } from '~/files/validation'; +import { getFileStream } from './utils'; + +/** + * Processes and encodes document files for various providers + * @param req - Express request object + * @param files - Array of file objects to process + * @param provider - The provider name + * @param getStrategyFunctions - Function to get strategy functions + * @returns Promise that resolves to documents and file metadata + */ +export async function encodeAndFormatDocuments( + req: Request, + files: IMongoFile[], + { provider, useResponsesApi }: { provider: Providers; useResponsesApi?: boolean }, + getStrategyFunctions: (source: string) => StrategyFunctions, +): Promise { + if (!files?.length) { + return { documents: [], files: [] }; + } + + const encodingMethods: Record = {}; + const result: DocumentResult = { documents: [], files: [] }; + + const documentFiles = files.filter( + (file) => file.type === 'application/pdf' || file.type?.startsWith('application/'), + ); + + if (!documentFiles.length) { + return result; + } + + const results = await Promise.allSettled( + documentFiles.map((file) => { + if (file.type !== 'application/pdf' || !isDocumentSupportedProvider(provider)) { + return Promise.resolve(null); + } + return getFileStream(req, file, encodingMethods, getStrategyFunctions); + }), + ); + + for (const settledResult of results) { + if (settledResult.status === 'rejected') { + console.error('Document processing failed:', settledResult.reason); + continue; + } + + const processed = settledResult.value; + if (!processed) continue; + + const { file, content, metadata } = processed; + + if (!content || !file) { + if (metadata) result.files.push(metadata); + continue; + } + + if (file.type === 'application/pdf' && isDocumentSupportedProvider(provider)) { + const pdfBuffer = Buffer.from(content, 'base64'); + const validation = await validatePdf(pdfBuffer, pdfBuffer.length, provider); + + if (!validation.isValid) { + throw new Error(`PDF validation failed: ${validation.error}`); + } + + if (provider === Providers.ANTHROPIC) { + result.documents.push({ + type: 'document', + source: { + type: 'base64', + media_type: 'application/pdf', + data: content, + }, + cache_control: { type: 'ephemeral' }, + citations: { enabled: true }, + }); + } else if (useResponsesApi) { + result.documents.push({ + type: 'input_file', + filename: file.filename, + file_data: `data:application/pdf;base64,${content}`, + }); + } else if (provider === Providers.GOOGLE || provider === Providers.VERTEXAI) { + result.documents.push({ + type: 'document', + mimeType: 'application/pdf', + data: content, + }); + } else if (isOpenAILikeProvider(provider) && provider != Providers.AZURE) { + result.documents.push({ + type: 'file', + file: { + filename: file.filename, + file_data: `data:application/pdf;base64,${content}`, + }, + }); + } + result.files.push(metadata); + } + } + + return result; +} diff --git a/packages/api/src/files/encode/index.ts b/packages/api/src/files/encode/index.ts new file mode 100644 index 0000000000..a0708596f3 --- /dev/null +++ b/packages/api/src/files/encode/index.ts @@ -0,0 +1,3 @@ +export * from './audio'; +export * from './document'; +export * from './video'; diff --git a/packages/api/src/files/encode/utils.ts b/packages/api/src/files/encode/utils.ts new file mode 100644 index 0000000000..3664e074c9 --- /dev/null +++ b/packages/api/src/files/encode/utils.ts @@ -0,0 +1,46 @@ +import getStream from 'get-stream'; +import { FileSources } from 'librechat-data-provider'; +import type { IMongoFile } from '@librechat/data-schemas'; +import type { Request } from 'express'; +import type { StrategyFunctions, ProcessedFile } from '~/types/files'; + +/** + * Processes a file by downloading and encoding it to base64 + * @param req - Express request object + * @param file - File object to process + * @param encodingMethods - Cache of encoding methods by source + * @param getStrategyFunctions - Function to get strategy functions for a source + * @returns Processed file with content and metadata, or null if filepath missing + */ +export async function getFileStream( + req: Request, + file: IMongoFile, + encodingMethods: Record, + getStrategyFunctions: (source: string) => StrategyFunctions, +): Promise { + if (!file?.filepath) { + return null; + } + + const source = file.source ?? FileSources.local; + if (!encodingMethods[source]) { + encodingMethods[source] = getStrategyFunctions(source); + } + + const { getDownloadStream } = encodingMethods[source]; + const stream = await getDownloadStream(req, file.filepath); + const buffer = await getStream.buffer(stream); + + return { + file, + content: buffer.toString('base64'), + metadata: { + file_id: file.file_id, + temp_file_id: file.temp_file_id, + filepath: file.filepath, + source: file.source, + filename: file.filename, + type: file.type, + }, + }; +} diff --git a/packages/api/src/files/encode/video.ts b/packages/api/src/files/encode/video.ts new file mode 100644 index 0000000000..10fd4c691f --- /dev/null +++ b/packages/api/src/files/encode/video.ts @@ -0,0 +1,74 @@ +import { Providers } from '@librechat/agents'; +import { isDocumentSupportedProvider } from 'librechat-data-provider'; +import type { IMongoFile } from '@librechat/data-schemas'; +import type { Request } from 'express'; +import type { StrategyFunctions, VideoResult } from '~/types/files'; +import { validateVideo } from '~/files/validation'; +import { getFileStream } from './utils'; + +/** + * Encodes and formats video files for different providers + * @param req - The request object + * @param files - Array of video files + * @param provider - The provider to format for + * @param getStrategyFunctions - Function to get strategy functions + * @returns Promise that resolves to videos and file metadata + */ +export async function encodeAndFormatVideos( + req: Request, + files: IMongoFile[], + provider: Providers, + getStrategyFunctions: (source: string) => StrategyFunctions, +): Promise { + if (!files?.length) { + return { videos: [], files: [] }; + } + + const encodingMethods: Record = {}; + const result: VideoResult = { videos: [], files: [] }; + + const results = await Promise.allSettled( + files.map((file) => getFileStream(req, file, encodingMethods, getStrategyFunctions)), + ); + + for (const settledResult of results) { + if (settledResult.status === 'rejected') { + console.error('Video processing failed:', settledResult.reason); + continue; + } + + const processed = settledResult.value; + if (!processed) continue; + + const { file, content, metadata } = processed; + + if (!content || !file) { + if (metadata) result.files.push(metadata); + continue; + } + + if (!file.type.startsWith('video/') || !isDocumentSupportedProvider(provider)) { + result.files.push(metadata); + continue; + } + + const videoBuffer = Buffer.from(content, 'base64'); + const validation = await validateVideo(videoBuffer, videoBuffer.length, provider); + + if (!validation.isValid) { + throw new Error(`Video validation failed: ${validation.error}`); + } + + if (provider === Providers.GOOGLE || provider === Providers.VERTEXAI) { + result.videos.push({ + type: 'video', + mimeType: file.type, + data: content, + }); + } + + result.files.push(metadata); + } + + return result; +} diff --git a/packages/api/src/files/index.ts b/packages/api/src/files/index.ts index 49e5bb4151..3d1a3118e3 100644 --- a/packages/api/src/files/index.ts +++ b/packages/api/src/files/index.ts @@ -1,5 +1,7 @@ export * from './audio'; +export * from './encode'; export * from './mistral/crud'; export * from './ocr'; export * from './parse'; +export * from './validation'; export * from './text'; diff --git a/packages/api/src/files/validation.ts b/packages/api/src/files/validation.ts new file mode 100644 index 0000000000..a8394fd8f6 --- /dev/null +++ b/packages/api/src/files/validation.ts @@ -0,0 +1,186 @@ +import { Providers } from '@librechat/agents'; +import { mbToBytes, isOpenAILikeProvider } from 'librechat-data-provider'; + +export interface PDFValidationResult { + isValid: boolean; + error?: string; +} + +export interface VideoValidationResult { + isValid: boolean; + error?: string; +} + +export interface AudioValidationResult { + isValid: boolean; + error?: string; +} + +export async function validatePdf( + pdfBuffer: Buffer, + fileSize: number, + provider: Providers, +): Promise { + if (provider === Providers.ANTHROPIC) { + return validateAnthropicPdf(pdfBuffer, fileSize); + } + + if (isOpenAILikeProvider(provider)) { + return validateOpenAIPdf(fileSize); + } + + if (provider === Providers.GOOGLE || provider === Providers.VERTEXAI) { + return validateGooglePdf(fileSize); + } + + return { isValid: true }; +} + +/** + * Validates if a PDF meets Anthropic's requirements + * @param pdfBuffer - The PDF file as a buffer + * @param fileSize - The file size in bytes + * @returns Promise that resolves to validation result + */ +async function validateAnthropicPdf( + pdfBuffer: Buffer, + fileSize: number, +): Promise { + try { + if (fileSize > mbToBytes(32)) { + return { + isValid: false, + error: `PDF file size (${Math.round(fileSize / (1024 * 1024))}MB) exceeds Anthropic's 32MB limit`, + }; + } + + if (!pdfBuffer || pdfBuffer.length < 5) { + return { + isValid: false, + error: 'Invalid PDF file: too small or corrupted', + }; + } + + const pdfHeader = pdfBuffer.subarray(0, 5).toString(); + if (!pdfHeader.startsWith('%PDF-')) { + return { + isValid: false, + error: 'Invalid PDF file: missing PDF header', + }; + } + + const pdfContent = pdfBuffer.toString('binary'); + if ( + pdfContent.includes('/Encrypt ') || + pdfContent.includes('/U (') || + pdfContent.includes('/O (') + ) { + return { + isValid: false, + error: 'PDF is password-protected or encrypted. Anthropic requires unencrypted PDFs.', + }; + } + + const pageMatches = pdfContent.match(/\/Type[\s]*\/Page[^s]/g); + const estimatedPages = pageMatches ? pageMatches.length : 1; + + if (estimatedPages > 100) { + return { + isValid: false, + error: `PDF has approximately ${estimatedPages} pages, exceeding Anthropic's 100-page limit`, + }; + } + + return { isValid: true }; + } catch (error) { + console.error('PDF validation error:', error); + return { + isValid: false, + error: 'Failed to validate PDF file', + }; + } +} + +async function validateOpenAIPdf(fileSize: number): Promise { + if (fileSize > 10 * 1024 * 1024) { + return { + isValid: false, + error: "PDF file size exceeds OpenAI's 10MB limit", + }; + } + + return { isValid: true }; +} + +async function validateGooglePdf(fileSize: number): Promise { + if (fileSize > 20 * 1024 * 1024) { + return { + isValid: false, + error: "PDF file size exceeds Google's 20MB limit", + }; + } + + return { isValid: true }; +} + +/** + * Validates video files for different providers + * @param videoBuffer - The video file as a buffer + * @param fileSize - The file size in bytes + * @param provider - The provider to validate for + * @returns Promise that resolves to validation result + */ +export async function validateVideo( + videoBuffer: Buffer, + fileSize: number, + provider: Providers, +): Promise { + if (provider === Providers.GOOGLE || provider === Providers.VERTEXAI) { + if (fileSize > 20 * 1024 * 1024) { + return { + isValid: false, + error: `Video file size (${Math.round(fileSize / (1024 * 1024))}MB) exceeds Google's 20MB limit`, + }; + } + } + + if (!videoBuffer || videoBuffer.length < 10) { + return { + isValid: false, + error: 'Invalid video file: too small or corrupted', + }; + } + + return { isValid: true }; +} + +/** + * Validates audio files for different providers + * @param audioBuffer - The audio file as a buffer + * @param fileSize - The file size in bytes + * @param provider - The provider to validate for + * @returns Promise that resolves to validation result + */ +export async function validateAudio( + audioBuffer: Buffer, + fileSize: number, + provider: Providers, +): Promise { + if (provider === Providers.GOOGLE || provider === Providers.VERTEXAI) { + if (fileSize > 20 * 1024 * 1024) { + return { + isValid: false, + error: `Audio file size (${Math.round(fileSize / (1024 * 1024))}MB) exceeds Google's 20MB limit`, + }; + } + } + + if (!audioBuffer || audioBuffer.length < 10) { + return { + isValid: false, + error: 'Invalid audio file: too small or corrupted', + }; + } + + return { isValid: true }; +} diff --git a/packages/api/src/types/files.ts b/packages/api/src/types/files.ts index 4bfcc23e46..dc37410050 100644 --- a/packages/api/src/types/files.ts +++ b/packages/api/src/types/files.ts @@ -1,4 +1,7 @@ +import type { IMongoFile } from '@librechat/data-schemas'; import type { ServerRequest } from './http'; +import type { Readable } from 'stream'; +import type { Request } from 'express'; export interface STTService { getInstance(): Promise; getProviderSchema(req: ServerRequest): Promise<[string, object]>; @@ -26,3 +29,85 @@ export interface AudioProcessingResult { text: string; bytes: number; } + +export interface VideoResult { + videos: Array<{ + type: string; + mimeType: string; + data: string; + }>; + files: Array<{ + file_id?: string; + temp_file_id?: string; + filepath: string; + source?: string; + filename: string; + type: string; + }>; +} + +export interface DocumentResult { + documents: Array<{ + type: 'document' | 'file' | 'input_file'; + /** Anthropic File Format, `document` */ + source?: { + type: string; + media_type: string; + data: string; + }; + cache_control?: { type: string }; + citations?: { enabled: boolean }; + /** Google File Format, `document` */ + mimeType?: string; + data?: string; + /** OpenAI File Format, `file` */ + file?: { + filename?: string; + file_data?: string; + }; + /** OpenAI Responses API File Format, `input_file` */ + filename?: string; + file_data?: string; + }>; + files: Array<{ + file_id?: string; + temp_file_id?: string; + filepath: string; + source?: string; + filename: string; + type: string; + }>; +} + +export interface AudioResult { + audios: Array<{ + type: string; + mimeType: string; + data: string; + }>; + files: Array<{ + file_id?: string; + temp_file_id?: string; + filepath: string; + source?: string; + filename: string; + type: string; + }>; +} + +export interface ProcessedFile { + file: IMongoFile; + content: string; + metadata: { + file_id: string; + temp_file_id?: string; + filepath: string; + source?: string; + filename: string; + type: string; + }; +} + +export interface StrategyFunctions { + getDownloadStream: (req: Request, filepath: string) => Promise; +} diff --git a/packages/client/src/svgs/AudioPaths.tsx b/packages/client/src/svgs/AudioPaths.tsx new file mode 100644 index 0000000000..874f54328d --- /dev/null +++ b/packages/client/src/svgs/AudioPaths.tsx @@ -0,0 +1,41 @@ +export default function AudioPaths() { + return ( + <> + + + + + + + ); +} diff --git a/packages/client/src/svgs/VideoPaths.tsx b/packages/client/src/svgs/VideoPaths.tsx new file mode 100644 index 0000000000..6876824e42 --- /dev/null +++ b/packages/client/src/svgs/VideoPaths.tsx @@ -0,0 +1,10 @@ +export default function VideoPaths() { + return ( + <> + {/* Video container - rounded rectangle (not filled) */} + + {/* Play button - centered and pointing right */} + + + ); +} diff --git a/packages/client/src/svgs/index.ts b/packages/client/src/svgs/index.ts index 13a5a1cc0a..d3f8c6e45b 100644 --- a/packages/client/src/svgs/index.ts +++ b/packages/client/src/svgs/index.ts @@ -65,9 +65,11 @@ export { default as PersonalizationIcon } from './PersonalizationIcon'; export { default as MCPIcon } from './MCPIcon'; export { default as VectorIcon } from './VectorIcon'; export { default as SquirclePlusIcon } from './SquirclePlusIcon'; +export { default as AudioPaths } from './AudioPaths'; export { default as CodePaths } from './CodePaths'; export { default as FileIcon } from './FileIcon'; export { default as FilePaths } from './FilePaths'; export { default as SheetPaths } from './SheetPaths'; export { default as TextPaths } from './TextPaths'; +export { default as VideoPaths } from './VideoPaths'; export { default as SharePointIcon } from './SharePointIcon'; diff --git a/packages/data-provider/src/file-config.ts b/packages/data-provider/src/file-config.ts index 75c403afbc..3ad24da67a 100644 --- a/packages/data-provider/src/file-config.ts +++ b/packages/data-provider/src/file-config.ts @@ -57,6 +57,27 @@ export const fullMimeTypesList = [ 'application/zip', 'image/svg', 'image/svg+xml', + // Video formats + 'video/mp4', + 'video/avi', + 'video/mov', + 'video/wmv', + 'video/flv', + 'video/webm', + 'video/mkv', + 'video/m4v', + 'video/3gp', + 'video/ogv', + // Audio formats + 'audio/mp3', + 'audio/wav', + 'audio/ogg', + 'audio/m4a', + 'audio/aac', + 'audio/flac', + 'audio/wma', + 'audio/opus', + 'audio/mpeg', ...excelFileTypes, ]; @@ -123,7 +144,9 @@ export const applicationMimeTypes = export const imageMimeTypes = /^image\/(jpeg|gif|png|webp|heic|heif)$/; export const audioMimeTypes = - /^audio\/(mp3|mpeg|mpeg3|wav|wave|x-wav|ogg|vorbis|mp4|x-m4a|flac|x-flac|webm)$/; + /^audio\/(mp3|mpeg|mpeg3|wav|wave|x-wav|ogg|vorbis|mp4|m4a|x-m4a|flac|x-flac|webm|aac|wma|opus)$/; + +export const videoMimeTypes = /^video\/(mp4|avi|mov|wmv|flv|webm|mkv|m4v|3gp|ogv)$/; export const defaultOCRMimeTypes = [ imageMimeTypes, @@ -142,8 +165,9 @@ export const supportedMimeTypes = [ excelMimeTypes, applicationMimeTypes, imageMimeTypes, + videoMimeTypes, audioMimeTypes, - /** Supported by LC Code Interpreter PAI */ + /** Supported by LC Code Interpreter API */ /^image\/(svg|svg\+xml)$/, ]; @@ -199,6 +223,13 @@ export const fileConfig = { [EModelEndpoint.assistants]: assistantsFileConfig, [EModelEndpoint.azureAssistants]: assistantsFileConfig, [EModelEndpoint.agents]: assistantsFileConfig, + [EModelEndpoint.anthropic]: { + fileLimit: 10, + fileSizeLimit: defaultSizeLimit, + totalSizeLimit: defaultSizeLimit, + supportedMimeTypes, + disabled: false, + }, default: { fileLimit: 10, fileSizeLimit: defaultSizeLimit, diff --git a/packages/data-provider/src/parsers.ts b/packages/data-provider/src/parsers.ts index 7d4016449a..61616a57a8 100644 --- a/packages/data-provider/src/parsers.ts +++ b/packages/data-provider/src/parsers.ts @@ -369,7 +369,7 @@ export function parseTextParts( continue; } if (part.type === ContentTypes.TEXT) { - const textValue = typeof part.text === 'string' ? part.text : part.text.value; + const textValue = (typeof part.text === 'string' ? part.text : part.text?.value) || ''; if ( result.length > 0 && diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index 8ae503ef25..a279f4f84d 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -31,6 +31,61 @@ export enum EModelEndpoint { gptPlugins = 'gptPlugins', } +/** Mirrors `@librechat/agents` providers */ +export enum Providers { + OPENAI = 'openAI', + ANTHROPIC = 'anthropic', + AZURE = 'azureOpenAI', + GOOGLE = 'google', + VERTEXAI = 'vertexai', + BEDROCK = 'bedrock', + BEDROCK_LEGACY = 'bedrock_legacy', + MISTRALAI = 'mistralai', + MISTRAL = 'mistral', + OLLAMA = 'ollama', + DEEPSEEK = 'deepseek', + OPENROUTER = 'openrouter', + XAI = 'xai', +} + +/** + * Endpoints that support direct PDF processing in the agent system + */ +export const documentSupportedProviders = new Set([ + EModelEndpoint.anthropic, + EModelEndpoint.openAI, + EModelEndpoint.custom, + EModelEndpoint.azureOpenAI, + EModelEndpoint.google, + Providers.VERTEXAI, + Providers.MISTRALAI, + Providers.MISTRAL, + Providers.OLLAMA, + Providers.DEEPSEEK, + Providers.OPENROUTER, + Providers.XAI, +]); + +const openAILikeProviders = new Set([ + Providers.OPENAI, + Providers.AZURE, + EModelEndpoint.custom, + Providers.MISTRALAI, + Providers.MISTRAL, + Providers.OLLAMA, + Providers.DEEPSEEK, + Providers.OPENROUTER, + Providers.XAI, +]); + +export const isOpenAILikeProvider = (provider?: string | null): boolean => { + return openAILikeProviders.has(provider ?? ''); +}; + +export const isDocumentSupportedProvider = (provider?: string | null): boolean => { + return documentSupportedProviders.has(provider ?? ''); +}; + export const paramEndpoints = new Set([ EModelEndpoint.agents, EModelEndpoint.openAI, diff --git a/packages/data-provider/src/types/assistants.ts b/packages/data-provider/src/types/assistants.ts index beb1e10701..246c60a5c3 100644 --- a/packages/data-provider/src/types/assistants.ts +++ b/packages/data-provider/src/types/assistants.ts @@ -475,10 +475,20 @@ export type ContentPart = ( ) & PartMetadata; +export type TextData = (Text & PartMetadata) | undefined; + export type TMessageContentParts = - | { type: ContentTypes.ERROR; text?: string | (Text & PartMetadata); error?: string } - | { type: ContentTypes.THINK; think: string | (Text & PartMetadata) } - | { type: ContentTypes.TEXT; text: string | (Text & PartMetadata); tool_call_ids?: string[] } + | { + type: ContentTypes.ERROR; + text?: string | TextData; + error?: string; + } + | { type: ContentTypes.THINK; think?: string | TextData } + | { + type: ContentTypes.TEXT; + text?: string | TextData; + tool_call_ids?: string[]; + } | { type: ContentTypes.TOOL_CALL; tool_call: (