LibreChat/packages/api/src/files/encode/document.ts
Dustin Healy 1d0a4c501f
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
🪨 feat: AWS Bedrock Document Uploads (#11912)
* feat: add aws bedrock upload to provider support

* chore: address copilot comments

* feat: add shared Bedrock document format types and MIME mapping

Bedrock Converse API accepts 9 document formats beyond PDF. Add
BedrockDocumentFormat union type, MIME-to-format mapping, and helpers
in data-provider so both client and backend can reference them.

* refactor: generalize Bedrock PDF validation to support all document types

Rename validateBedrockPdf to validateBedrockDocument with MIME-aware
logic: 4.5MB hard limit applies to all types, PDF header check only
runs for application/pdf. Adds test coverage for non-PDF documents.

* feat: support all Bedrock document formats in encoding pipeline

Widen file type gates to accept csv, doc, docx, xls, xlsx, html, txt,
md for Bedrock. Uses shared MIME-to-format map instead of hardcoded
'pdf'. Other providers' PDF-only paths remain unchanged.

* feat: expand Bedrock file upload UI to accept all document types

Add 'image_document_extended' upload type for Bedrock with accept
filters for all 9 supported formats. Update drag-and-drop validation
to use isBedrockDocumentType helper.

* fix: route Bedrock document types through provider pipeline
2026-02-23 22:32:44 -05:00

173 lines
5.2 KiB
TypeScript

import { Providers } from '@librechat/agents';
import {
isOpenAILikeProvider,
isBedrockDocumentType,
bedrockDocumentFormats,
isDocumentSupportedProvider,
} from 'librechat-data-provider';
import type { IMongoFile } from '@librechat/data-schemas';
import type {
AnthropicDocumentBlock,
StrategyFunctions,
DocumentResult,
ServerRequest,
} from '~/types';
import { validatePdf, validateBedrockDocument } from '~/files/validation';
import { getFileStream, getConfiguredFileSizeLimit } from './utils';
/**
* Processes and encodes document files for various providers
* @param req - Express request object
* @param files - Array of file objects to process
* @param params - Object containing provider, endpoint, and other options
* @param params.provider - The provider name
* @param params.endpoint - Optional endpoint name for file config lookup
* @param params.useResponsesApi - Whether to use responses API format
* @param getStrategyFunctions - Function to get strategy functions
* @returns Promise that resolves to documents and file metadata
*/
export async function encodeAndFormatDocuments(
req: ServerRequest,
files: IMongoFile[],
params: { provider: Providers; endpoint?: string; useResponsesApi?: boolean },
getStrategyFunctions: (source: string) => StrategyFunctions,
): Promise<DocumentResult> {
const { provider, endpoint, useResponsesApi } = params;
if (!files?.length) {
return { documents: [], files: [] };
}
const encodingMethods: Record<string, StrategyFunctions> = {};
const result: DocumentResult = { documents: [], files: [] };
const isBedrock = provider === Providers.BEDROCK;
const isDocSupported = isDocumentSupportedProvider(provider);
const documentFiles = files.filter((file) => {
if (isBedrock && isBedrockDocumentType(file.type)) {
return true;
}
return file.type === 'application/pdf' || file.type?.startsWith('application/');
});
if (!documentFiles.length) {
return result;
}
const results = await Promise.allSettled(
documentFiles.map((file) => {
const isProcessable = isBedrock
? isBedrockDocumentType(file.type)
: file.type === 'application/pdf' && isDocSupported;
if (!isProcessable) {
return Promise.resolve(null);
}
return getFileStream(req, file, encodingMethods, getStrategyFunctions);
}),
);
for (const settledResult of results) {
if (settledResult.status === 'rejected') {
console.error('Document processing failed:', settledResult.reason);
continue;
}
const processed = settledResult.value;
if (!processed) continue;
const { file, content, metadata } = processed;
if (!content || !file) {
if (metadata) result.files.push(metadata);
continue;
}
const configuredFileSizeLimit = getConfiguredFileSizeLimit(req, { provider, endpoint });
const mimeType = file.type ?? '';
if (isBedrock && isBedrockDocumentType(mimeType)) {
const fileBuffer = Buffer.from(content, 'base64');
const format = bedrockDocumentFormats[mimeType];
const validation = await validateBedrockDocument(
fileBuffer.length,
mimeType,
fileBuffer,
configuredFileSizeLimit,
);
if (!validation.isValid) {
throw new Error(`Document validation failed: ${validation.error}`);
}
const sanitizedName = (file.filename || 'document')
.replace(/[^a-zA-Z0-9\s\-()[\]]/g, '_')
.slice(0, 200);
result.documents.push({
type: 'document',
document: {
name: sanitizedName,
format,
source: {
bytes: fileBuffer,
},
},
});
result.files.push(metadata);
} else if (file.type === 'application/pdf' && isDocSupported) {
const pdfBuffer = Buffer.from(content, 'base64');
const validation = await validatePdf(
pdfBuffer,
pdfBuffer.length,
provider,
configuredFileSizeLimit,
);
if (!validation.isValid) {
throw new Error(`PDF validation failed: ${validation.error}`);
}
if (provider === Providers.ANTHROPIC) {
const document: AnthropicDocumentBlock = {
type: 'document',
source: {
type: 'base64',
media_type: 'application/pdf',
data: content,
},
citations: { enabled: true },
};
if (file.filename) {
document.context = `File: "${file.filename}"`;
}
result.documents.push(document);
} else if (useResponsesApi) {
result.documents.push({
type: 'input_file',
filename: file.filename,
file_data: `data:application/pdf;base64,${content}`,
});
} else if (provider === Providers.GOOGLE || provider === Providers.VERTEXAI) {
result.documents.push({
type: 'media',
mimeType: 'application/pdf',
data: content,
});
} else if (isOpenAILikeProvider(provider) && provider != Providers.AZURE) {
result.documents.push({
type: 'file',
file: {
filename: file.filename,
file_data: `data:application/pdf;base64,${content}`,
},
});
}
result.files.push(metadata);
}
}
return result;
}