From 1d0a4c501f00d336e1f00bbb50ff1985f6a7da0c Mon Sep 17 00:00:00 2001 From: Dustin Healy <54083382+dustinhealy@users.noreply.github.com> Date: Mon, 23 Feb 2026 19:32:44 -0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=AA=A8=20feat:=20AWS=20Bedrock=20Document?= =?UTF-8?q?=20Uploads=20(#11912)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add aws bedrock upload to provider support * chore: address copilot comments * feat: add shared Bedrock document format types and MIME mapping Bedrock Converse API accepts 9 document formats beyond PDF. Add BedrockDocumentFormat union type, MIME-to-format mapping, and helpers in data-provider so both client and backend can reference them. * refactor: generalize Bedrock PDF validation to support all document types Rename validateBedrockPdf to validateBedrockDocument with MIME-aware logic: 4.5MB hard limit applies to all types, PDF header check only runs for application/pdf. Adds test coverage for non-PDF documents. * feat: support all Bedrock document formats in encoding pipeline Widen file type gates to accept csv, doc, docx, xls, xlsx, html, txt, md for Bedrock. Uses shared MIME-to-format map instead of hardcoded 'pdf'. Other providers' PDF-only paths remain unchanged. * feat: expand Bedrock file upload UI to accept all document types Add 'image_document_extended' upload type for Bedrock with accept filters for all 9 supported formats. Update drag-and-drop validation to use isBedrockDocumentType helper. * fix: route Bedrock document types through provider pipeline --- api/app/clients/BaseClient.js | 7 + .../Chat/Input/Files/AttachFileMenu.tsx | 29 ++- .../Chat/Input/Files/DragDropModal.tsx | 51 ++--- .../api/src/files/encode/document.spec.ts | 185 +++++++++++++++++- packages/api/src/files/encode/document.ts | 64 ++++-- packages/api/src/files/validation.spec.ts | 118 ++++++++++- packages/api/src/files/validation.ts | 67 +++++++ packages/api/src/types/files.ts | 18 +- packages/data-provider/src/file-config.ts | 35 +++- packages/data-provider/src/schemas.ts | 1 + 10 files changed, 528 insertions(+), 47 deletions(-) diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js index a2dfaf9907..fab82db93b 100644 --- a/api/app/clients/BaseClient.js +++ b/api/app/clients/BaseClient.js @@ -20,6 +20,7 @@ const { isAgentsEndpoint, isEphemeralAgentId, supportsBalanceCheck, + isBedrockDocumentType, } = require('librechat-data-provider'); const { updateMessage, @@ -1300,6 +1301,9 @@ class BaseClient { const allFiles = []; + const provider = this.options.agent?.provider ?? this.options.endpoint; + const isBedrock = provider === EModelEndpoint.bedrock; + for (const file of attachments) { /** @type {FileSources} */ const source = file.source ?? FileSources.local; @@ -1317,6 +1321,9 @@ class BaseClient { } else if (file.type === 'application/pdf') { categorizedAttachments.documents.push(file); allFiles.push(file); + } else if (isBedrock && isBedrockDocumentType(file.type)) { + categorizedAttachments.documents.push(file); + allFiles.push(file); } else if (file.type.startsWith('video/')) { categorizedAttachments.videos.push(file); allFiles.push(file); diff --git a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx index 218328b086..5b7346f646 100644 --- a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx +++ b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx @@ -8,13 +8,6 @@ import { FileImageIcon, TerminalSquareIcon, } from 'lucide-react'; -import { - Providers, - EToolResources, - EModelEndpoint, - defaultAgentCapabilities, - isDocumentSupportedProvider, -} from 'librechat-data-provider'; import { FileUpload, TooltipAnchor, @@ -22,6 +15,14 @@ import { AttachmentIcon, SharePointIcon, } from '@librechat/client'; +import { + Providers, + EToolResources, + EModelEndpoint, + defaultAgentCapabilities, + bedrockDocumentExtensions, + isDocumentSupportedProvider, +} from 'librechat-data-provider'; import type { EndpointFileConfig } from 'librechat-data-provider'; import { useAgentToolPermissions, @@ -37,7 +38,12 @@ import { ephemeralAgentByConvoId } from '~/store'; import { MenuItemProps } from '~/common'; import { cn } from '~/utils'; -type FileUploadType = 'image' | 'document' | 'image_document' | 'image_document_video_audio'; +type FileUploadType = + | 'image' + | 'document' + | 'image_document' + | 'image_document_extended' + | 'image_document_video_audio'; interface AttachFileMenuProps { agentId?: string | null; @@ -99,6 +105,8 @@ const AttachFileMenu = ({ inputRef.current.accept = '.pdf,application/pdf'; } else if (fileType === 'image_document') { inputRef.current.accept = 'image/*,.heif,.heic,.pdf,application/pdf'; + } else if (fileType === 'image_document_extended') { + inputRef.current.accept = `image/*,.heif,.heic,${bedrockDocumentExtensions}`; } else if (fileType === 'image_document_video_audio') { inputRef.current.accept = 'image/*,.heif,.heic,.pdf,application/pdf,video/*,audio/*'; } else { @@ -134,6 +142,11 @@ const AttachFileMenu = ({ let fileType: Exclude = 'image_document'; if (currentProvider === Providers.GOOGLE || currentProvider === Providers.OPENROUTER) { fileType = 'image_document_video_audio'; + } else if ( + currentProvider === Providers.BEDROCK || + endpointType === EModelEndpoint.bedrock + ) { + fileType = 'image_document_extended'; } onAction(fileType); }, diff --git a/client/src/components/Chat/Input/Files/DragDropModal.tsx b/client/src/components/Chat/Input/Files/DragDropModal.tsx index a59a7e3e9d..cb5109c866 100644 --- a/client/src/components/Chat/Input/Files/DragDropModal.tsx +++ b/client/src/components/Chat/Input/Files/DragDropModal.tsx @@ -1,14 +1,6 @@ import React, { useMemo } from 'react'; import { useRecoilValue } from 'recoil'; import { OGDialog, OGDialogTemplate } from '@librechat/client'; -import { - Providers, - inferMimeType, - EToolResources, - EModelEndpoint, - defaultAgentCapabilities, - isDocumentSupportedProvider, -} from 'librechat-data-provider'; import { ImageUpIcon, FileSearch, @@ -16,6 +8,15 @@ import { FileImageIcon, TerminalSquareIcon, } from 'lucide-react'; +import { + Providers, + inferMimeType, + EToolResources, + EModelEndpoint, + isBedrockDocumentType, + defaultAgentCapabilities, + isDocumentSupportedProvider, +} from 'librechat-data-provider'; import { useAgentToolPermissions, useAgentCapabilities, @@ -77,20 +78,26 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD ) { const supportsImageDocVideoAudio = currentProvider === EModelEndpoint.google || currentProvider === Providers.OPENROUTER; - const validFileTypes = supportsImageDocVideoAudio - ? files.every((file) => { - const type = getFileType(file); - return ( - type?.startsWith('image/') || - type?.startsWith('video/') || - type?.startsWith('audio/') || - type === 'application/pdf' - ); - }) - : files.every((file) => { - const type = getFileType(file); - return type?.startsWith('image/') || type === 'application/pdf'; - }); + const isBedrock = + currentProvider === Providers.BEDROCK || endpointType === EModelEndpoint.bedrock; + + const isValidProviderFile = (file: File): boolean => { + const type = getFileType(file); + if (supportsImageDocVideoAudio) { + return ( + type?.startsWith('image/') || + type?.startsWith('video/') || + type?.startsWith('audio/') || + type === 'application/pdf' + ); + } + if (isBedrock) { + return type?.startsWith('image/') || isBedrockDocumentType(type); + } + return type?.startsWith('image/') || type === 'application/pdf'; + }; + + const validFileTypes = files.every(isValidProviderFile); _options.push({ label: localize('com_ui_upload_provider'), diff --git a/packages/api/src/files/encode/document.spec.ts b/packages/api/src/files/encode/document.spec.ts index 9091cedd9e..a93800b5e1 100644 --- a/packages/api/src/files/encode/document.spec.ts +++ b/packages/api/src/files/encode/document.spec.ts @@ -7,6 +7,7 @@ import { encodeAndFormatDocuments } from './document'; /** Mock the validation module */ jest.mock('~/files/validation', () => ({ validatePdf: jest.fn(), + validateBedrockDocument: jest.fn(), })); /** Mock the utils module */ @@ -15,11 +16,14 @@ jest.mock('./utils', () => ({ getConfiguredFileSizeLimit: jest.fn(), })); -import { validatePdf } from '~/files/validation'; +import { validatePdf, validateBedrockDocument } from '~/files/validation'; import { getFileStream, getConfiguredFileSizeLimit } from './utils'; import { Types } from 'mongoose'; const mockedValidatePdf = validatePdf as jest.MockedFunction; +const mockedValidateBedrockDocument = validateBedrockDocument as jest.MockedFunction< + typeof validateBedrockDocument +>; const mockedGetFileStream = getFileStream as jest.MockedFunction; const mockedGetConfiguredFileSizeLimit = getConfiguredFileSizeLimit as jest.MockedFunction< typeof getConfiguredFileSizeLimit @@ -84,6 +88,26 @@ describe('encodeAndFormatDocuments - fileConfig integration', () => { updatedAt: new Date(), }) as unknown as IMongoFile; + const createMockDocFile = ( + sizeInMB: number, + mimeType: string, + filename: string, + ): IMongoFile => + ({ + _id: new Types.ObjectId(), + user: new Types.ObjectId(), + file_id: new Types.ObjectId().toString(), + filename, + type: mimeType, + bytes: Math.floor(sizeInMB * 1024 * 1024), + object: 'file', + usage: 0, + source: 'test', + filepath: `/test/path/${filename}`, + createdAt: new Date(), + updatedAt: new Date(), + }) as unknown as IMongoFile; + describe('Configuration extraction and validation', () => { it('should pass configured file size limit to validatePdf for OpenAI', async () => { const configuredLimit = mbToBytes(15); @@ -500,6 +524,165 @@ describe('encodeAndFormatDocuments - fileConfig integration', () => { }); }); + it('should format Bedrock document with valid PDF', async () => { + const req = createMockRequest() as ServerRequest; + const file = createMockFile(3); + + const mockContent = Buffer.from('test-pdf-content').toString('base64'); + mockedGetFileStream.mockResolvedValue({ + file, + content: mockContent, + metadata: file, + }); + + mockedValidateBedrockDocument.mockResolvedValue({ isValid: true }); + + const result = await encodeAndFormatDocuments( + req, + [file], + { provider: Providers.BEDROCK }, + mockStrategyFunctions, + ); + + expect(result.documents).toHaveLength(1); + expect(result.documents[0]).toMatchObject({ + type: 'document', + document: { + name: 'test_pdf', + format: 'pdf', + source: { + bytes: expect.any(Buffer), + }, + }, + }); + }); + + it('should format Bedrock CSV document', async () => { + const req = createMockRequest() as ServerRequest; + const file = createMockDocFile(1, 'text/csv', 'data.csv'); + + const mockContent = Buffer.from('col1,col2\nval1,val2').toString('base64'); + mockedGetFileStream.mockResolvedValue({ + file, + content: mockContent, + metadata: file, + }); + + mockedValidateBedrockDocument.mockResolvedValue({ isValid: true }); + + const result = await encodeAndFormatDocuments( + req, + [file], + { provider: Providers.BEDROCK }, + mockStrategyFunctions, + ); + + expect(result.documents).toHaveLength(1); + expect(result.documents[0]).toMatchObject({ + type: 'document', + document: { + name: 'data_csv', + format: 'csv', + source: { + bytes: expect.any(Buffer), + }, + }, + }); + }); + + it('should format Bedrock DOCX document', async () => { + const req = createMockRequest() as ServerRequest; + const mimeType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'; + const file = createMockDocFile(2, mimeType, 'report.docx'); + + const mockContent = Buffer.from('docx-binary-content').toString('base64'); + mockedGetFileStream.mockResolvedValue({ + file, + content: mockContent, + metadata: file, + }); + + mockedValidateBedrockDocument.mockResolvedValue({ isValid: true }); + + const result = await encodeAndFormatDocuments( + req, + [file], + { provider: Providers.BEDROCK }, + mockStrategyFunctions, + ); + + expect(result.documents).toHaveLength(1); + expect(result.documents[0]).toMatchObject({ + type: 'document', + document: { + name: 'report_docx', + format: 'docx', + source: { + bytes: expect.any(Buffer), + }, + }, + }); + }); + + it('should format Bedrock plain text document', async () => { + const req = createMockRequest() as ServerRequest; + const file = createMockDocFile(0.5, 'text/plain', 'notes.txt'); + + const mockContent = Buffer.from('plain text content').toString('base64'); + mockedGetFileStream.mockResolvedValue({ + file, + content: mockContent, + metadata: file, + }); + + mockedValidateBedrockDocument.mockResolvedValue({ isValid: true }); + + const result = await encodeAndFormatDocuments( + req, + [file], + { provider: Providers.BEDROCK }, + mockStrategyFunctions, + ); + + expect(result.documents).toHaveLength(1); + expect(result.documents[0]).toMatchObject({ + type: 'document', + document: { + name: 'notes_txt', + format: 'txt', + source: { + bytes: expect.any(Buffer), + }, + }, + }); + }); + + it('should reject Bedrock document when validation fails', async () => { + const req = createMockRequest() as ServerRequest; + const file = createMockDocFile(5, 'text/csv', 'big.csv'); + + const mockContent = Buffer.from('large-csv-content').toString('base64'); + mockedGetFileStream.mockResolvedValue({ + file, + content: mockContent, + metadata: file, + }); + + mockedValidateBedrockDocument.mockResolvedValue({ + isValid: false, + error: 'File size (5.0MB) exceeds the 4.5MB limit for Bedrock', + }); + + await expect( + encodeAndFormatDocuments( + req, + [file], + { provider: Providers.BEDROCK }, + mockStrategyFunctions, + ), + ).rejects.toThrow('Document validation failed'); + }); + it('should format OpenAI document with responses API', async () => { const req = createMockRequest(15) as ServerRequest; const file = createMockFile(10); diff --git a/packages/api/src/files/encode/document.ts b/packages/api/src/files/encode/document.ts index 487a5503a4..e4fd066324 100644 --- a/packages/api/src/files/encode/document.ts +++ b/packages/api/src/files/encode/document.ts @@ -1,5 +1,10 @@ import { Providers } from '@librechat/agents'; -import { isOpenAILikeProvider, isDocumentSupportedProvider } from 'librechat-data-provider'; +import { + isOpenAILikeProvider, + isBedrockDocumentType, + bedrockDocumentFormats, + isDocumentSupportedProvider, +} from 'librechat-data-provider'; import type { IMongoFile } from '@librechat/data-schemas'; import type { AnthropicDocumentBlock, @@ -7,8 +12,8 @@ import type { DocumentResult, ServerRequest, } from '~/types'; +import { validatePdf, validateBedrockDocument } from '~/files/validation'; import { getFileStream, getConfiguredFileSizeLimit } from './utils'; -import { validatePdf } from '~/files/validation'; /** * Processes and encodes document files for various providers @@ -35,9 +40,15 @@ export async function encodeAndFormatDocuments( const encodingMethods: Record = {}; const result: DocumentResult = { documents: [], files: [] }; - const documentFiles = files.filter( - (file) => file.type === 'application/pdf' || file.type?.startsWith('application/'), - ); + const isBedrock = provider === Providers.BEDROCK; + const isDocSupported = isDocumentSupportedProvider(provider); + + const documentFiles = files.filter((file) => { + if (isBedrock && isBedrockDocumentType(file.type)) { + return true; + } + return file.type === 'application/pdf' || file.type?.startsWith('application/'); + }); if (!documentFiles.length) { return result; @@ -45,7 +56,10 @@ export async function encodeAndFormatDocuments( const results = await Promise.allSettled( documentFiles.map((file) => { - if (file.type !== 'application/pdf' || !isDocumentSupportedProvider(provider)) { + const isProcessable = isBedrock + ? isBedrockDocumentType(file.type) + : file.type === 'application/pdf' && isDocSupported; + if (!isProcessable) { return Promise.resolve(null); } return getFileStream(req, file, encodingMethods, getStrategyFunctions); @@ -68,14 +82,40 @@ export async function encodeAndFormatDocuments( continue; } - if (file.type === 'application/pdf' && isDocumentSupportedProvider(provider)) { - const pdfBuffer = Buffer.from(content, 'base64'); + const configuredFileSizeLimit = getConfiguredFileSizeLimit(req, { provider, endpoint }); + const mimeType = file.type ?? ''; - /** Extract configured file size limit from fileConfig for this endpoint */ - const configuredFileSizeLimit = getConfiguredFileSizeLimit(req, { - provider, - endpoint, + if (isBedrock && isBedrockDocumentType(mimeType)) { + const fileBuffer = Buffer.from(content, 'base64'); + const format = bedrockDocumentFormats[mimeType]; + + const validation = await validateBedrockDocument( + fileBuffer.length, + mimeType, + fileBuffer, + configuredFileSizeLimit, + ); + + if (!validation.isValid) { + throw new Error(`Document validation failed: ${validation.error}`); + } + + const sanitizedName = (file.filename || 'document') + .replace(/[^a-zA-Z0-9\s\-()[\]]/g, '_') + .slice(0, 200); + result.documents.push({ + type: 'document', + document: { + name: sanitizedName, + format, + source: { + bytes: fileBuffer, + }, + }, }); + result.files.push(metadata); + } else if (file.type === 'application/pdf' && isDocSupported) { + const pdfBuffer = Buffer.from(content, 'base64'); const validation = await validatePdf( pdfBuffer, diff --git a/packages/api/src/files/validation.spec.ts b/packages/api/src/files/validation.spec.ts index 384f499f43..98dcda4188 100644 --- a/packages/api/src/files/validation.spec.ts +++ b/packages/api/src/files/validation.spec.ts @@ -1,6 +1,6 @@ import { Providers } from '@librechat/agents'; import { mbToBytes } from 'librechat-data-provider'; -import { validatePdf, validateVideo, validateAudio } from './validation'; +import { validatePdf, validateBedrockDocument, validateVideo, validateAudio } from './validation'; describe('PDF Validation with fileConfig.endpoints.*.fileSizeLimit', () => { /** Helper to create a PDF buffer with valid header */ @@ -145,6 +145,122 @@ describe('PDF Validation with fileConfig.endpoints.*.fileSizeLimit', () => { }); }); + describe('validatePdf - Bedrock provider', () => { + const provider = Providers.BEDROCK; + + it('should accept PDF within provider limit when no config provided', async () => { + const pdfBuffer = createMockPdfBuffer(3); + const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider); + + expect(result.isValid).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('should reject PDF exceeding 4.5MB hard limit when no config provided', async () => { + const pdfBuffer = createMockPdfBuffer(5); + const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider); + + expect(result.isValid).toBe(false); + expect(result.error).toContain('4.5MB'); + }); + + it('should use configured limit when it is lower than provider limit', async () => { + const configuredLimit = mbToBytes(2); + const pdfBuffer = createMockPdfBuffer(3); + const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit); + + expect(result.isValid).toBe(false); + expect(result.error).toContain('2.0MB'); + }); + + it('should clamp to 4.5MB hard limit even when config is higher', async () => { + const configuredLimit = mbToBytes(512); + const pdfBuffer = createMockPdfBuffer(5); + const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit); + + expect(result.isValid).toBe(false); + expect(result.error).toContain('4.5MB'); + }); + + it('should reject PDFs with invalid header', async () => { + const pdfBuffer = Buffer.alloc(1024); + pdfBuffer.write('INVALID', 0); + const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider); + + expect(result.isValid).toBe(false); + expect(result.error).toContain('PDF header'); + }); + + it('should reject PDFs that are too small', async () => { + const pdfBuffer = Buffer.alloc(3); + const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider); + + expect(result.isValid).toBe(false); + expect(result.error).toContain('too small'); + }); + }); + + describe('validateBedrockDocument - non-PDF types', () => { + it('should accept CSV within 4.5MB limit', async () => { + const fileSize = 2 * 1024 * 1024; + const result = await validateBedrockDocument(fileSize, 'text/csv'); + + expect(result.isValid).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('should accept DOCX within 4.5MB limit', async () => { + const fileSize = 3 * 1024 * 1024; + const mimeType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'; + const result = await validateBedrockDocument(fileSize, mimeType); + + expect(result.isValid).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('should reject non-PDF document exceeding 4.5MB hard limit', async () => { + const fileSize = 5 * 1024 * 1024; + const result = await validateBedrockDocument(fileSize, 'text/plain'); + + expect(result.isValid).toBe(false); + expect(result.error).toContain('4.5MB'); + }); + + it('should clamp to 4.5MB even when config is higher for non-PDF', async () => { + const fileSize = 5 * 1024 * 1024; + const configuredLimit = mbToBytes(512); + const result = await validateBedrockDocument(fileSize, 'text/html', undefined, configuredLimit); + + expect(result.isValid).toBe(false); + expect(result.error).toContain('4.5MB'); + }); + + it('should use configured limit when lower than provider limit for non-PDF', async () => { + const fileSize = 3 * 1024 * 1024; + const configuredLimit = mbToBytes(2); + const result = await validateBedrockDocument(fileSize, 'text/markdown', undefined, configuredLimit); + + expect(result.isValid).toBe(false); + expect(result.error).toContain('2.0MB'); + }); + + it('should not run PDF header check on non-PDF types', async () => { + const buffer = Buffer.from('NOT-A-PDF-HEADER-but-valid-csv-content'); + const result = await validateBedrockDocument(buffer.length, 'text/csv', buffer); + + expect(result.isValid).toBe(true); + }); + + it('should still run PDF header check when mimeType is application/pdf', async () => { + const buffer = Buffer.alloc(1024); + buffer.write('INVALID', 0); + const result = await validateBedrockDocument(buffer.length, 'application/pdf', buffer); + + expect(result.isValid).toBe(false); + expect(result.error).toContain('PDF header'); + }); + }); + describe('validatePdf - Google provider', () => { const provider = Providers.GOOGLE; diff --git a/packages/api/src/files/validation.ts b/packages/api/src/files/validation.ts index 4b36ac0bff..b3db19e92a 100644 --- a/packages/api/src/files/validation.ts +++ b/packages/api/src/files/validation.ts @@ -1,6 +1,11 @@ import { Providers } from '@librechat/agents'; import { mbToBytes, isOpenAILikeProvider } from 'librechat-data-provider'; +export interface ValidationResult { + isValid: boolean; + error?: string; +} + export interface PDFValidationResult { isValid: boolean; error?: string; @@ -31,6 +36,10 @@ export async function validatePdf( return validateAnthropicPdf(pdfBuffer, fileSize, configuredFileSizeLimit); } + if (provider === Providers.BEDROCK) { + return validateBedrockDocument(fileSize, 'application/pdf', pdfBuffer, configuredFileSizeLimit); + } + if (isOpenAILikeProvider(provider)) { return validateOpenAIPdf(fileSize, configuredFileSizeLimit); } @@ -113,6 +122,64 @@ async function validateAnthropicPdf( } } +/** + * Validates a document against Bedrock's 4.5MB hard limit. PDF-specific header + * checks run only when the MIME type is `application/pdf`. + * @param fileSize - The file size in bytes + * @param mimeType - The MIME type of the document + * @param fileBuffer - The file buffer (used for PDF header validation) + * @param configuredFileSizeLimit - Optional configured file size limit from fileConfig (in bytes) + * @returns Promise that resolves to validation result + */ +export async function validateBedrockDocument( + fileSize: number, + mimeType: string, + fileBuffer?: Buffer, + configuredFileSizeLimit?: number, +): Promise { + try { + /** Bedrock enforces a hard 4.5MB per-document limit at the API level; config can only lower it */ + const providerLimit = mbToBytes(4.5); + const effectiveLimit = + configuredFileSizeLimit != null + ? Math.min(configuredFileSizeLimit, providerLimit) + : providerLimit; + + if (fileSize > effectiveLimit) { + const limitMB = (effectiveLimit / (1024 * 1024)).toFixed(1); + return { + isValid: false, + error: `File size (${(fileSize / (1024 * 1024)).toFixed(1)}MB) exceeds the ${limitMB}MB limit for Bedrock`, + }; + } + + if (mimeType === 'application/pdf' && fileBuffer) { + if (fileBuffer.length < 5) { + return { + isValid: false, + error: 'Invalid PDF file: too small or corrupted', + }; + } + + const pdfHeader = fileBuffer.subarray(0, 5).toString(); + if (!pdfHeader.startsWith('%PDF-')) { + return { + isValid: false, + error: 'Invalid PDF file: missing PDF header', + }; + } + } + + return { isValid: true }; + } catch (error) { + console.error('Bedrock document validation error:', error); + return { + isValid: false, + error: 'Failed to validate document file', + }; + } +} + /** * Validates if a PDF meets OpenAI's requirements * @param fileSize - The file size in bytes diff --git a/packages/api/src/types/files.ts b/packages/api/src/types/files.ts index 6a403932da..ada6ff024c 100644 --- a/packages/api/src/types/files.ts +++ b/packages/api/src/types/files.ts @@ -1,6 +1,7 @@ +import type { BedrockDocumentFormat } from 'librechat-data-provider'; import type { IMongoFile } from '@librechat/data-schemas'; -import type { ServerRequest } from './http'; import type { Readable } from 'stream'; +import type { ServerRequest } from './http'; export interface STTService { getInstance(): Promise; getProviderSchema(req: ServerRequest): Promise<[string, object]>; @@ -95,11 +96,24 @@ export interface OpenAIInputFileBlock { file_data: string; } +/** Bedrock Converse API document block (passthrough via @langchain/aws) */ +export interface BedrockDocumentBlock { + type: 'document'; + document: { + name: string; + format: BedrockDocumentFormat; + source: { + bytes: Buffer; + }; + }; +} + export type DocumentBlock = | AnthropicDocumentBlock | GoogleDocumentBlock | OpenAIFileBlock - | OpenAIInputFileBlock; + | OpenAIInputFileBlock + | BedrockDocumentBlock; export interface DocumentResult { documents: DocumentBlock[]; diff --git a/packages/data-provider/src/file-config.ts b/packages/data-provider/src/file-config.ts index 98254390b9..5a117eb760 100644 --- a/packages/data-provider/src/file-config.ts +++ b/packages/data-provider/src/file-config.ts @@ -139,6 +139,39 @@ export const retrievalMimeTypesList = [ export const imageExtRegex = /\.(jpg|jpeg|png|gif|webp|heic|heif)$/i; +/** @see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_DocumentBlock.html */ +export type BedrockDocumentFormat = + | 'pdf' + | 'csv' + | 'doc' + | 'docx' + | 'xls' + | 'xlsx' + | 'html' + | 'txt' + | 'md'; + +/** Maps MIME types to Bedrock Converse API document format values */ +export const bedrockDocumentFormats: Record = { + 'application/pdf': 'pdf', + 'text/csv': 'csv', + 'application/csv': 'csv', + 'application/msword': 'doc', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx', + 'application/vnd.ms-excel': 'xls', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx', + 'text/html': 'html', + 'text/plain': 'txt', + 'text/markdown': 'md', +}; + +export const isBedrockDocumentType = (mimeType?: string): boolean => + mimeType != null && mimeType in bedrockDocumentFormats; + +/** File extensions accepted by Bedrock document uploads (for input accept attributes) */ +export const bedrockDocumentExtensions = + '.pdf,.csv,.doc,.docx,.xls,.xlsx,.html,.htm,.txt,.md,application/pdf,text/csv,application/csv,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,text/html,text/plain,text/markdown'; + export const excelMimeTypes = /^application\/(vnd\.ms-excel|msexcel|x-msexcel|x-ms-excel|x-excel|x-dos_ms_excel|xls|x-xls|vnd\.openxmlformats-officedocument\.spreadsheetml\.sheet)$/; @@ -146,7 +179,7 @@ export const textMimeTypes = /^(text\/(x-c|x-csharp|tab-separated-values|x-c\+\+|x-h|x-java|html|markdown|x-php|x-python|x-script\.python|x-ruby|x-tex|plain|css|vtt|javascript|csv|xml))$/; export const applicationMimeTypes = - /^(application\/(epub\+zip|csv|json|pdf|x-tar|x-sh|typescript|sql|yaml|x-parquet|vnd\.apache\.parquet|vnd\.coffeescript|vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation|spreadsheetml\.sheet)|xml|zip))$/; + /^(application\/(epub\+zip|csv|json|msword|pdf|x-tar|x-sh|typescript|sql|yaml|x-parquet|vnd\.apache\.parquet|vnd\.coffeescript|vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation|spreadsheetml\.sheet)|xml|zip))$/; export const imageMimeTypes = /^image\/(jpeg|gif|png|webp|heic|heif)$/; diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index 039bfa572e..60a20ecae7 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -49,6 +49,7 @@ export enum Providers { export const documentSupportedProviders = new Set([ EModelEndpoint.anthropic, EModelEndpoint.openAI, + EModelEndpoint.bedrock, EModelEndpoint.custom, // handled in AttachFileMenu and DragDropModal since azureOpenAI only supports documents with Use Responses API set to true // EModelEndpoint.azureOpenAI,