🪨 feat: AWS Bedrock Document Uploads (#11912)
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions

* feat: add aws bedrock upload to provider support

* chore: address copilot comments

* feat: add shared Bedrock document format types and MIME mapping

Bedrock Converse API accepts 9 document formats beyond PDF. Add
BedrockDocumentFormat union type, MIME-to-format mapping, and helpers
in data-provider so both client and backend can reference them.

* refactor: generalize Bedrock PDF validation to support all document types

Rename validateBedrockPdf to validateBedrockDocument with MIME-aware
logic: 4.5MB hard limit applies to all types, PDF header check only
runs for application/pdf. Adds test coverage for non-PDF documents.

* feat: support all Bedrock document formats in encoding pipeline

Widen file type gates to accept csv, doc, docx, xls, xlsx, html, txt,
md for Bedrock. Uses shared MIME-to-format map instead of hardcoded
'pdf'. Other providers' PDF-only paths remain unchanged.

* feat: expand Bedrock file upload UI to accept all document types

Add 'image_document_extended' upload type for Bedrock with accept
filters for all 9 supported formats. Update drag-and-drop validation
to use isBedrockDocumentType helper.

* fix: route Bedrock document types through provider pipeline
This commit is contained in:
Dustin Healy 2026-02-23 19:32:44 -08:00 committed by GitHub
parent b349f2f876
commit 1d0a4c501f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 528 additions and 47 deletions

View file

@ -7,6 +7,7 @@ import { encodeAndFormatDocuments } from './document';
/** Mock the validation module */
jest.mock('~/files/validation', () => ({
validatePdf: jest.fn(),
validateBedrockDocument: jest.fn(),
}));
/** Mock the utils module */
@ -15,11 +16,14 @@ jest.mock('./utils', () => ({
getConfiguredFileSizeLimit: jest.fn(),
}));
import { validatePdf } from '~/files/validation';
import { validatePdf, validateBedrockDocument } from '~/files/validation';
import { getFileStream, getConfiguredFileSizeLimit } from './utils';
import { Types } from 'mongoose';
const mockedValidatePdf = validatePdf as jest.MockedFunction<typeof validatePdf>;
const mockedValidateBedrockDocument = validateBedrockDocument as jest.MockedFunction<
typeof validateBedrockDocument
>;
const mockedGetFileStream = getFileStream as jest.MockedFunction<typeof getFileStream>;
const mockedGetConfiguredFileSizeLimit = getConfiguredFileSizeLimit as jest.MockedFunction<
typeof getConfiguredFileSizeLimit
@ -84,6 +88,26 @@ describe('encodeAndFormatDocuments - fileConfig integration', () => {
updatedAt: new Date(),
}) as unknown as IMongoFile;
const createMockDocFile = (
sizeInMB: number,
mimeType: string,
filename: string,
): IMongoFile =>
({
_id: new Types.ObjectId(),
user: new Types.ObjectId(),
file_id: new Types.ObjectId().toString(),
filename,
type: mimeType,
bytes: Math.floor(sizeInMB * 1024 * 1024),
object: 'file',
usage: 0,
source: 'test',
filepath: `/test/path/${filename}`,
createdAt: new Date(),
updatedAt: new Date(),
}) as unknown as IMongoFile;
describe('Configuration extraction and validation', () => {
it('should pass configured file size limit to validatePdf for OpenAI', async () => {
const configuredLimit = mbToBytes(15);
@ -500,6 +524,165 @@ describe('encodeAndFormatDocuments - fileConfig integration', () => {
});
});
it('should format Bedrock document with valid PDF', async () => {
const req = createMockRequest() as ServerRequest;
const file = createMockFile(3);
const mockContent = Buffer.from('test-pdf-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidateBedrockDocument.mockResolvedValue({ isValid: true });
const result = await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.BEDROCK },
mockStrategyFunctions,
);
expect(result.documents).toHaveLength(1);
expect(result.documents[0]).toMatchObject({
type: 'document',
document: {
name: 'test_pdf',
format: 'pdf',
source: {
bytes: expect.any(Buffer),
},
},
});
});
it('should format Bedrock CSV document', async () => {
const req = createMockRequest() as ServerRequest;
const file = createMockDocFile(1, 'text/csv', 'data.csv');
const mockContent = Buffer.from('col1,col2\nval1,val2').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidateBedrockDocument.mockResolvedValue({ isValid: true });
const result = await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.BEDROCK },
mockStrategyFunctions,
);
expect(result.documents).toHaveLength(1);
expect(result.documents[0]).toMatchObject({
type: 'document',
document: {
name: 'data_csv',
format: 'csv',
source: {
bytes: expect.any(Buffer),
},
},
});
});
it('should format Bedrock DOCX document', async () => {
const req = createMockRequest() as ServerRequest;
const mimeType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
const file = createMockDocFile(2, mimeType, 'report.docx');
const mockContent = Buffer.from('docx-binary-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidateBedrockDocument.mockResolvedValue({ isValid: true });
const result = await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.BEDROCK },
mockStrategyFunctions,
);
expect(result.documents).toHaveLength(1);
expect(result.documents[0]).toMatchObject({
type: 'document',
document: {
name: 'report_docx',
format: 'docx',
source: {
bytes: expect.any(Buffer),
},
},
});
});
it('should format Bedrock plain text document', async () => {
const req = createMockRequest() as ServerRequest;
const file = createMockDocFile(0.5, 'text/plain', 'notes.txt');
const mockContent = Buffer.from('plain text content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidateBedrockDocument.mockResolvedValue({ isValid: true });
const result = await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.BEDROCK },
mockStrategyFunctions,
);
expect(result.documents).toHaveLength(1);
expect(result.documents[0]).toMatchObject({
type: 'document',
document: {
name: 'notes_txt',
format: 'txt',
source: {
bytes: expect.any(Buffer),
},
},
});
});
it('should reject Bedrock document when validation fails', async () => {
const req = createMockRequest() as ServerRequest;
const file = createMockDocFile(5, 'text/csv', 'big.csv');
const mockContent = Buffer.from('large-csv-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidateBedrockDocument.mockResolvedValue({
isValid: false,
error: 'File size (5.0MB) exceeds the 4.5MB limit for Bedrock',
});
await expect(
encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.BEDROCK },
mockStrategyFunctions,
),
).rejects.toThrow('Document validation failed');
});
it('should format OpenAI document with responses API', async () => {
const req = createMockRequest(15) as ServerRequest;
const file = createMockFile(10);

View file

@ -1,5 +1,10 @@
import { Providers } from '@librechat/agents';
import { isOpenAILikeProvider, isDocumentSupportedProvider } from 'librechat-data-provider';
import {
isOpenAILikeProvider,
isBedrockDocumentType,
bedrockDocumentFormats,
isDocumentSupportedProvider,
} from 'librechat-data-provider';
import type { IMongoFile } from '@librechat/data-schemas';
import type {
AnthropicDocumentBlock,
@ -7,8 +12,8 @@ import type {
DocumentResult,
ServerRequest,
} from '~/types';
import { validatePdf, validateBedrockDocument } from '~/files/validation';
import { getFileStream, getConfiguredFileSizeLimit } from './utils';
import { validatePdf } from '~/files/validation';
/**
* Processes and encodes document files for various providers
@ -35,9 +40,15 @@ export async function encodeAndFormatDocuments(
const encodingMethods: Record<string, StrategyFunctions> = {};
const result: DocumentResult = { documents: [], files: [] };
const documentFiles = files.filter(
(file) => file.type === 'application/pdf' || file.type?.startsWith('application/'),
);
const isBedrock = provider === Providers.BEDROCK;
const isDocSupported = isDocumentSupportedProvider(provider);
const documentFiles = files.filter((file) => {
if (isBedrock && isBedrockDocumentType(file.type)) {
return true;
}
return file.type === 'application/pdf' || file.type?.startsWith('application/');
});
if (!documentFiles.length) {
return result;
@ -45,7 +56,10 @@ export async function encodeAndFormatDocuments(
const results = await Promise.allSettled(
documentFiles.map((file) => {
if (file.type !== 'application/pdf' || !isDocumentSupportedProvider(provider)) {
const isProcessable = isBedrock
? isBedrockDocumentType(file.type)
: file.type === 'application/pdf' && isDocSupported;
if (!isProcessable) {
return Promise.resolve(null);
}
return getFileStream(req, file, encodingMethods, getStrategyFunctions);
@ -68,14 +82,40 @@ export async function encodeAndFormatDocuments(
continue;
}
if (file.type === 'application/pdf' && isDocumentSupportedProvider(provider)) {
const pdfBuffer = Buffer.from(content, 'base64');
const configuredFileSizeLimit = getConfiguredFileSizeLimit(req, { provider, endpoint });
const mimeType = file.type ?? '';
/** Extract configured file size limit from fileConfig for this endpoint */
const configuredFileSizeLimit = getConfiguredFileSizeLimit(req, {
provider,
endpoint,
if (isBedrock && isBedrockDocumentType(mimeType)) {
const fileBuffer = Buffer.from(content, 'base64');
const format = bedrockDocumentFormats[mimeType];
const validation = await validateBedrockDocument(
fileBuffer.length,
mimeType,
fileBuffer,
configuredFileSizeLimit,
);
if (!validation.isValid) {
throw new Error(`Document validation failed: ${validation.error}`);
}
const sanitizedName = (file.filename || 'document')
.replace(/[^a-zA-Z0-9\s\-()[\]]/g, '_')
.slice(0, 200);
result.documents.push({
type: 'document',
document: {
name: sanitizedName,
format,
source: {
bytes: fileBuffer,
},
},
});
result.files.push(metadata);
} else if (file.type === 'application/pdf' && isDocSupported) {
const pdfBuffer = Buffer.from(content, 'base64');
const validation = await validatePdf(
pdfBuffer,

View file

@ -1,6 +1,6 @@
import { Providers } from '@librechat/agents';
import { mbToBytes } from 'librechat-data-provider';
import { validatePdf, validateVideo, validateAudio } from './validation';
import { validatePdf, validateBedrockDocument, validateVideo, validateAudio } from './validation';
describe('PDF Validation with fileConfig.endpoints.*.fileSizeLimit', () => {
/** Helper to create a PDF buffer with valid header */
@ -145,6 +145,122 @@ describe('PDF Validation with fileConfig.endpoints.*.fileSizeLimit', () => {
});
});
describe('validatePdf - Bedrock provider', () => {
const provider = Providers.BEDROCK;
it('should accept PDF within provider limit when no config provided', async () => {
const pdfBuffer = createMockPdfBuffer(3);
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
expect(result.isValid).toBe(true);
expect(result.error).toBeUndefined();
});
it('should reject PDF exceeding 4.5MB hard limit when no config provided', async () => {
const pdfBuffer = createMockPdfBuffer(5);
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
expect(result.isValid).toBe(false);
expect(result.error).toContain('4.5MB');
});
it('should use configured limit when it is lower than provider limit', async () => {
const configuredLimit = mbToBytes(2);
const pdfBuffer = createMockPdfBuffer(3);
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
expect(result.isValid).toBe(false);
expect(result.error).toContain('2.0MB');
});
it('should clamp to 4.5MB hard limit even when config is higher', async () => {
const configuredLimit = mbToBytes(512);
const pdfBuffer = createMockPdfBuffer(5);
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
expect(result.isValid).toBe(false);
expect(result.error).toContain('4.5MB');
});
it('should reject PDFs with invalid header', async () => {
const pdfBuffer = Buffer.alloc(1024);
pdfBuffer.write('INVALID', 0);
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
expect(result.isValid).toBe(false);
expect(result.error).toContain('PDF header');
});
it('should reject PDFs that are too small', async () => {
const pdfBuffer = Buffer.alloc(3);
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
expect(result.isValid).toBe(false);
expect(result.error).toContain('too small');
});
});
describe('validateBedrockDocument - non-PDF types', () => {
it('should accept CSV within 4.5MB limit', async () => {
const fileSize = 2 * 1024 * 1024;
const result = await validateBedrockDocument(fileSize, 'text/csv');
expect(result.isValid).toBe(true);
expect(result.error).toBeUndefined();
});
it('should accept DOCX within 4.5MB limit', async () => {
const fileSize = 3 * 1024 * 1024;
const mimeType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
const result = await validateBedrockDocument(fileSize, mimeType);
expect(result.isValid).toBe(true);
expect(result.error).toBeUndefined();
});
it('should reject non-PDF document exceeding 4.5MB hard limit', async () => {
const fileSize = 5 * 1024 * 1024;
const result = await validateBedrockDocument(fileSize, 'text/plain');
expect(result.isValid).toBe(false);
expect(result.error).toContain('4.5MB');
});
it('should clamp to 4.5MB even when config is higher for non-PDF', async () => {
const fileSize = 5 * 1024 * 1024;
const configuredLimit = mbToBytes(512);
const result = await validateBedrockDocument(fileSize, 'text/html', undefined, configuredLimit);
expect(result.isValid).toBe(false);
expect(result.error).toContain('4.5MB');
});
it('should use configured limit when lower than provider limit for non-PDF', async () => {
const fileSize = 3 * 1024 * 1024;
const configuredLimit = mbToBytes(2);
const result = await validateBedrockDocument(fileSize, 'text/markdown', undefined, configuredLimit);
expect(result.isValid).toBe(false);
expect(result.error).toContain('2.0MB');
});
it('should not run PDF header check on non-PDF types', async () => {
const buffer = Buffer.from('NOT-A-PDF-HEADER-but-valid-csv-content');
const result = await validateBedrockDocument(buffer.length, 'text/csv', buffer);
expect(result.isValid).toBe(true);
});
it('should still run PDF header check when mimeType is application/pdf', async () => {
const buffer = Buffer.alloc(1024);
buffer.write('INVALID', 0);
const result = await validateBedrockDocument(buffer.length, 'application/pdf', buffer);
expect(result.isValid).toBe(false);
expect(result.error).toContain('PDF header');
});
});
describe('validatePdf - Google provider', () => {
const provider = Providers.GOOGLE;

View file

@ -1,6 +1,11 @@
import { Providers } from '@librechat/agents';
import { mbToBytes, isOpenAILikeProvider } from 'librechat-data-provider';
export interface ValidationResult {
isValid: boolean;
error?: string;
}
export interface PDFValidationResult {
isValid: boolean;
error?: string;
@ -31,6 +36,10 @@ export async function validatePdf(
return validateAnthropicPdf(pdfBuffer, fileSize, configuredFileSizeLimit);
}
if (provider === Providers.BEDROCK) {
return validateBedrockDocument(fileSize, 'application/pdf', pdfBuffer, configuredFileSizeLimit);
}
if (isOpenAILikeProvider(provider)) {
return validateOpenAIPdf(fileSize, configuredFileSizeLimit);
}
@ -113,6 +122,64 @@ async function validateAnthropicPdf(
}
}
/**
* Validates a document against Bedrock's 4.5MB hard limit. PDF-specific header
* checks run only when the MIME type is `application/pdf`.
* @param fileSize - The file size in bytes
* @param mimeType - The MIME type of the document
* @param fileBuffer - The file buffer (used for PDF header validation)
* @param configuredFileSizeLimit - Optional configured file size limit from fileConfig (in bytes)
* @returns Promise that resolves to validation result
*/
export async function validateBedrockDocument(
fileSize: number,
mimeType: string,
fileBuffer?: Buffer,
configuredFileSizeLimit?: number,
): Promise<ValidationResult> {
try {
/** Bedrock enforces a hard 4.5MB per-document limit at the API level; config can only lower it */
const providerLimit = mbToBytes(4.5);
const effectiveLimit =
configuredFileSizeLimit != null
? Math.min(configuredFileSizeLimit, providerLimit)
: providerLimit;
if (fileSize > effectiveLimit) {
const limitMB = (effectiveLimit / (1024 * 1024)).toFixed(1);
return {
isValid: false,
error: `File size (${(fileSize / (1024 * 1024)).toFixed(1)}MB) exceeds the ${limitMB}MB limit for Bedrock`,
};
}
if (mimeType === 'application/pdf' && fileBuffer) {
if (fileBuffer.length < 5) {
return {
isValid: false,
error: 'Invalid PDF file: too small or corrupted',
};
}
const pdfHeader = fileBuffer.subarray(0, 5).toString();
if (!pdfHeader.startsWith('%PDF-')) {
return {
isValid: false,
error: 'Invalid PDF file: missing PDF header',
};
}
}
return { isValid: true };
} catch (error) {
console.error('Bedrock document validation error:', error);
return {
isValid: false,
error: 'Failed to validate document file',
};
}
}
/**
* Validates if a PDF meets OpenAI's requirements
* @param fileSize - The file size in bytes

View file

@ -1,6 +1,7 @@
import type { BedrockDocumentFormat } from 'librechat-data-provider';
import type { IMongoFile } from '@librechat/data-schemas';
import type { ServerRequest } from './http';
import type { Readable } from 'stream';
import type { ServerRequest } from './http';
export interface STTService {
getInstance(): Promise<STTService>;
getProviderSchema(req: ServerRequest): Promise<[string, object]>;
@ -95,11 +96,24 @@ export interface OpenAIInputFileBlock {
file_data: string;
}
/** Bedrock Converse API document block (passthrough via @langchain/aws) */
export interface BedrockDocumentBlock {
type: 'document';
document: {
name: string;
format: BedrockDocumentFormat;
source: {
bytes: Buffer;
};
};
}
export type DocumentBlock =
| AnthropicDocumentBlock
| GoogleDocumentBlock
| OpenAIFileBlock
| OpenAIInputFileBlock;
| OpenAIInputFileBlock
| BedrockDocumentBlock;
export interface DocumentResult {
documents: DocumentBlock[];

View file

@ -139,6 +139,39 @@ export const retrievalMimeTypesList = [
export const imageExtRegex = /\.(jpg|jpeg|png|gif|webp|heic|heif)$/i;
/** @see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_DocumentBlock.html */
export type BedrockDocumentFormat =
| 'pdf'
| 'csv'
| 'doc'
| 'docx'
| 'xls'
| 'xlsx'
| 'html'
| 'txt'
| 'md';
/** Maps MIME types to Bedrock Converse API document format values */
export const bedrockDocumentFormats: Record<string, BedrockDocumentFormat> = {
'application/pdf': 'pdf',
'text/csv': 'csv',
'application/csv': 'csv',
'application/msword': 'doc',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
'application/vnd.ms-excel': 'xls',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
'text/html': 'html',
'text/plain': 'txt',
'text/markdown': 'md',
};
export const isBedrockDocumentType = (mimeType?: string): boolean =>
mimeType != null && mimeType in bedrockDocumentFormats;
/** File extensions accepted by Bedrock document uploads (for input accept attributes) */
export const bedrockDocumentExtensions =
'.pdf,.csv,.doc,.docx,.xls,.xlsx,.html,.htm,.txt,.md,application/pdf,text/csv,application/csv,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,text/html,text/plain,text/markdown';
export const excelMimeTypes =
/^application\/(vnd\.ms-excel|msexcel|x-msexcel|x-ms-excel|x-excel|x-dos_ms_excel|xls|x-xls|vnd\.openxmlformats-officedocument\.spreadsheetml\.sheet)$/;
@ -146,7 +179,7 @@ export const textMimeTypes =
/^(text\/(x-c|x-csharp|tab-separated-values|x-c\+\+|x-h|x-java|html|markdown|x-php|x-python|x-script\.python|x-ruby|x-tex|plain|css|vtt|javascript|csv|xml))$/;
export const applicationMimeTypes =
/^(application\/(epub\+zip|csv|json|pdf|x-tar|x-sh|typescript|sql|yaml|x-parquet|vnd\.apache\.parquet|vnd\.coffeescript|vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation|spreadsheetml\.sheet)|xml|zip))$/;
/^(application\/(epub\+zip|csv|json|msword|pdf|x-tar|x-sh|typescript|sql|yaml|x-parquet|vnd\.apache\.parquet|vnd\.coffeescript|vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation|spreadsheetml\.sheet)|xml|zip))$/;
export const imageMimeTypes = /^image\/(jpeg|gif|png|webp|heic|heif)$/;

View file

@ -49,6 +49,7 @@ export enum Providers {
export const documentSupportedProviders = new Set<string>([
EModelEndpoint.anthropic,
EModelEndpoint.openAI,
EModelEndpoint.bedrock,
EModelEndpoint.custom,
// handled in AttachFileMenu and DragDropModal since azureOpenAI only supports documents with Use Responses API set to true
// EModelEndpoint.azureOpenAI,