mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-25 11:54:08 +01:00
🪨 feat: AWS Bedrock Document Uploads (#11912)
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
* feat: add aws bedrock upload to provider support * chore: address copilot comments * feat: add shared Bedrock document format types and MIME mapping Bedrock Converse API accepts 9 document formats beyond PDF. Add BedrockDocumentFormat union type, MIME-to-format mapping, and helpers in data-provider so both client and backend can reference them. * refactor: generalize Bedrock PDF validation to support all document types Rename validateBedrockPdf to validateBedrockDocument with MIME-aware logic: 4.5MB hard limit applies to all types, PDF header check only runs for application/pdf. Adds test coverage for non-PDF documents. * feat: support all Bedrock document formats in encoding pipeline Widen file type gates to accept csv, doc, docx, xls, xlsx, html, txt, md for Bedrock. Uses shared MIME-to-format map instead of hardcoded 'pdf'. Other providers' PDF-only paths remain unchanged. * feat: expand Bedrock file upload UI to accept all document types Add 'image_document_extended' upload type for Bedrock with accept filters for all 9 supported formats. Update drag-and-drop validation to use isBedrockDocumentType helper. * fix: route Bedrock document types through provider pipeline
This commit is contained in:
parent
b349f2f876
commit
1d0a4c501f
10 changed files with 528 additions and 47 deletions
|
|
@ -7,6 +7,7 @@ import { encodeAndFormatDocuments } from './document';
|
|||
/** Mock the validation module */
|
||||
jest.mock('~/files/validation', () => ({
|
||||
validatePdf: jest.fn(),
|
||||
validateBedrockDocument: jest.fn(),
|
||||
}));
|
||||
|
||||
/** Mock the utils module */
|
||||
|
|
@ -15,11 +16,14 @@ jest.mock('./utils', () => ({
|
|||
getConfiguredFileSizeLimit: jest.fn(),
|
||||
}));
|
||||
|
||||
import { validatePdf } from '~/files/validation';
|
||||
import { validatePdf, validateBedrockDocument } from '~/files/validation';
|
||||
import { getFileStream, getConfiguredFileSizeLimit } from './utils';
|
||||
import { Types } from 'mongoose';
|
||||
|
||||
const mockedValidatePdf = validatePdf as jest.MockedFunction<typeof validatePdf>;
|
||||
const mockedValidateBedrockDocument = validateBedrockDocument as jest.MockedFunction<
|
||||
typeof validateBedrockDocument
|
||||
>;
|
||||
const mockedGetFileStream = getFileStream as jest.MockedFunction<typeof getFileStream>;
|
||||
const mockedGetConfiguredFileSizeLimit = getConfiguredFileSizeLimit as jest.MockedFunction<
|
||||
typeof getConfiguredFileSizeLimit
|
||||
|
|
@ -84,6 +88,26 @@ describe('encodeAndFormatDocuments - fileConfig integration', () => {
|
|||
updatedAt: new Date(),
|
||||
}) as unknown as IMongoFile;
|
||||
|
||||
const createMockDocFile = (
|
||||
sizeInMB: number,
|
||||
mimeType: string,
|
||||
filename: string,
|
||||
): IMongoFile =>
|
||||
({
|
||||
_id: new Types.ObjectId(),
|
||||
user: new Types.ObjectId(),
|
||||
file_id: new Types.ObjectId().toString(),
|
||||
filename,
|
||||
type: mimeType,
|
||||
bytes: Math.floor(sizeInMB * 1024 * 1024),
|
||||
object: 'file',
|
||||
usage: 0,
|
||||
source: 'test',
|
||||
filepath: `/test/path/${filename}`,
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
}) as unknown as IMongoFile;
|
||||
|
||||
describe('Configuration extraction and validation', () => {
|
||||
it('should pass configured file size limit to validatePdf for OpenAI', async () => {
|
||||
const configuredLimit = mbToBytes(15);
|
||||
|
|
@ -500,6 +524,165 @@ describe('encodeAndFormatDocuments - fileConfig integration', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('should format Bedrock document with valid PDF', async () => {
|
||||
const req = createMockRequest() as ServerRequest;
|
||||
const file = createMockFile(3);
|
||||
|
||||
const mockContent = Buffer.from('test-pdf-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidateBedrockDocument.mockResolvedValue({ isValid: true });
|
||||
|
||||
const result = await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.BEDROCK },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(result.documents).toHaveLength(1);
|
||||
expect(result.documents[0]).toMatchObject({
|
||||
type: 'document',
|
||||
document: {
|
||||
name: 'test_pdf',
|
||||
format: 'pdf',
|
||||
source: {
|
||||
bytes: expect.any(Buffer),
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should format Bedrock CSV document', async () => {
|
||||
const req = createMockRequest() as ServerRequest;
|
||||
const file = createMockDocFile(1, 'text/csv', 'data.csv');
|
||||
|
||||
const mockContent = Buffer.from('col1,col2\nval1,val2').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidateBedrockDocument.mockResolvedValue({ isValid: true });
|
||||
|
||||
const result = await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.BEDROCK },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(result.documents).toHaveLength(1);
|
||||
expect(result.documents[0]).toMatchObject({
|
||||
type: 'document',
|
||||
document: {
|
||||
name: 'data_csv',
|
||||
format: 'csv',
|
||||
source: {
|
||||
bytes: expect.any(Buffer),
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should format Bedrock DOCX document', async () => {
|
||||
const req = createMockRequest() as ServerRequest;
|
||||
const mimeType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
|
||||
const file = createMockDocFile(2, mimeType, 'report.docx');
|
||||
|
||||
const mockContent = Buffer.from('docx-binary-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidateBedrockDocument.mockResolvedValue({ isValid: true });
|
||||
|
||||
const result = await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.BEDROCK },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(result.documents).toHaveLength(1);
|
||||
expect(result.documents[0]).toMatchObject({
|
||||
type: 'document',
|
||||
document: {
|
||||
name: 'report_docx',
|
||||
format: 'docx',
|
||||
source: {
|
||||
bytes: expect.any(Buffer),
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should format Bedrock plain text document', async () => {
|
||||
const req = createMockRequest() as ServerRequest;
|
||||
const file = createMockDocFile(0.5, 'text/plain', 'notes.txt');
|
||||
|
||||
const mockContent = Buffer.from('plain text content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidateBedrockDocument.mockResolvedValue({ isValid: true });
|
||||
|
||||
const result = await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.BEDROCK },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(result.documents).toHaveLength(1);
|
||||
expect(result.documents[0]).toMatchObject({
|
||||
type: 'document',
|
||||
document: {
|
||||
name: 'notes_txt',
|
||||
format: 'txt',
|
||||
source: {
|
||||
bytes: expect.any(Buffer),
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should reject Bedrock document when validation fails', async () => {
|
||||
const req = createMockRequest() as ServerRequest;
|
||||
const file = createMockDocFile(5, 'text/csv', 'big.csv');
|
||||
|
||||
const mockContent = Buffer.from('large-csv-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidateBedrockDocument.mockResolvedValue({
|
||||
isValid: false,
|
||||
error: 'File size (5.0MB) exceeds the 4.5MB limit for Bedrock',
|
||||
});
|
||||
|
||||
await expect(
|
||||
encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.BEDROCK },
|
||||
mockStrategyFunctions,
|
||||
),
|
||||
).rejects.toThrow('Document validation failed');
|
||||
});
|
||||
|
||||
it('should format OpenAI document with responses API', async () => {
|
||||
const req = createMockRequest(15) as ServerRequest;
|
||||
const file = createMockFile(10);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,10 @@
|
|||
import { Providers } from '@librechat/agents';
|
||||
import { isOpenAILikeProvider, isDocumentSupportedProvider } from 'librechat-data-provider';
|
||||
import {
|
||||
isOpenAILikeProvider,
|
||||
isBedrockDocumentType,
|
||||
bedrockDocumentFormats,
|
||||
isDocumentSupportedProvider,
|
||||
} from 'librechat-data-provider';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import type {
|
||||
AnthropicDocumentBlock,
|
||||
|
|
@ -7,8 +12,8 @@ import type {
|
|||
DocumentResult,
|
||||
ServerRequest,
|
||||
} from '~/types';
|
||||
import { validatePdf, validateBedrockDocument } from '~/files/validation';
|
||||
import { getFileStream, getConfiguredFileSizeLimit } from './utils';
|
||||
import { validatePdf } from '~/files/validation';
|
||||
|
||||
/**
|
||||
* Processes and encodes document files for various providers
|
||||
|
|
@ -35,9 +40,15 @@ export async function encodeAndFormatDocuments(
|
|||
const encodingMethods: Record<string, StrategyFunctions> = {};
|
||||
const result: DocumentResult = { documents: [], files: [] };
|
||||
|
||||
const documentFiles = files.filter(
|
||||
(file) => file.type === 'application/pdf' || file.type?.startsWith('application/'),
|
||||
);
|
||||
const isBedrock = provider === Providers.BEDROCK;
|
||||
const isDocSupported = isDocumentSupportedProvider(provider);
|
||||
|
||||
const documentFiles = files.filter((file) => {
|
||||
if (isBedrock && isBedrockDocumentType(file.type)) {
|
||||
return true;
|
||||
}
|
||||
return file.type === 'application/pdf' || file.type?.startsWith('application/');
|
||||
});
|
||||
|
||||
if (!documentFiles.length) {
|
||||
return result;
|
||||
|
|
@ -45,7 +56,10 @@ export async function encodeAndFormatDocuments(
|
|||
|
||||
const results = await Promise.allSettled(
|
||||
documentFiles.map((file) => {
|
||||
if (file.type !== 'application/pdf' || !isDocumentSupportedProvider(provider)) {
|
||||
const isProcessable = isBedrock
|
||||
? isBedrockDocumentType(file.type)
|
||||
: file.type === 'application/pdf' && isDocSupported;
|
||||
if (!isProcessable) {
|
||||
return Promise.resolve(null);
|
||||
}
|
||||
return getFileStream(req, file, encodingMethods, getStrategyFunctions);
|
||||
|
|
@ -68,14 +82,40 @@ export async function encodeAndFormatDocuments(
|
|||
continue;
|
||||
}
|
||||
|
||||
if (file.type === 'application/pdf' && isDocumentSupportedProvider(provider)) {
|
||||
const pdfBuffer = Buffer.from(content, 'base64');
|
||||
const configuredFileSizeLimit = getConfiguredFileSizeLimit(req, { provider, endpoint });
|
||||
const mimeType = file.type ?? '';
|
||||
|
||||
/** Extract configured file size limit from fileConfig for this endpoint */
|
||||
const configuredFileSizeLimit = getConfiguredFileSizeLimit(req, {
|
||||
provider,
|
||||
endpoint,
|
||||
if (isBedrock && isBedrockDocumentType(mimeType)) {
|
||||
const fileBuffer = Buffer.from(content, 'base64');
|
||||
const format = bedrockDocumentFormats[mimeType];
|
||||
|
||||
const validation = await validateBedrockDocument(
|
||||
fileBuffer.length,
|
||||
mimeType,
|
||||
fileBuffer,
|
||||
configuredFileSizeLimit,
|
||||
);
|
||||
|
||||
if (!validation.isValid) {
|
||||
throw new Error(`Document validation failed: ${validation.error}`);
|
||||
}
|
||||
|
||||
const sanitizedName = (file.filename || 'document')
|
||||
.replace(/[^a-zA-Z0-9\s\-()[\]]/g, '_')
|
||||
.slice(0, 200);
|
||||
result.documents.push({
|
||||
type: 'document',
|
||||
document: {
|
||||
name: sanitizedName,
|
||||
format,
|
||||
source: {
|
||||
bytes: fileBuffer,
|
||||
},
|
||||
},
|
||||
});
|
||||
result.files.push(metadata);
|
||||
} else if (file.type === 'application/pdf' && isDocSupported) {
|
||||
const pdfBuffer = Buffer.from(content, 'base64');
|
||||
|
||||
const validation = await validatePdf(
|
||||
pdfBuffer,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { Providers } from '@librechat/agents';
|
||||
import { mbToBytes } from 'librechat-data-provider';
|
||||
import { validatePdf, validateVideo, validateAudio } from './validation';
|
||||
import { validatePdf, validateBedrockDocument, validateVideo, validateAudio } from './validation';
|
||||
|
||||
describe('PDF Validation with fileConfig.endpoints.*.fileSizeLimit', () => {
|
||||
/** Helper to create a PDF buffer with valid header */
|
||||
|
|
@ -145,6 +145,122 @@ describe('PDF Validation with fileConfig.endpoints.*.fileSizeLimit', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('validatePdf - Bedrock provider', () => {
|
||||
const provider = Providers.BEDROCK;
|
||||
|
||||
it('should accept PDF within provider limit when no config provided', async () => {
|
||||
const pdfBuffer = createMockPdfBuffer(3);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should reject PDF exceeding 4.5MB hard limit when no config provided', async () => {
|
||||
const pdfBuffer = createMockPdfBuffer(5);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('4.5MB');
|
||||
});
|
||||
|
||||
it('should use configured limit when it is lower than provider limit', async () => {
|
||||
const configuredLimit = mbToBytes(2);
|
||||
const pdfBuffer = createMockPdfBuffer(3);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('2.0MB');
|
||||
});
|
||||
|
||||
it('should clamp to 4.5MB hard limit even when config is higher', async () => {
|
||||
const configuredLimit = mbToBytes(512);
|
||||
const pdfBuffer = createMockPdfBuffer(5);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('4.5MB');
|
||||
});
|
||||
|
||||
it('should reject PDFs with invalid header', async () => {
|
||||
const pdfBuffer = Buffer.alloc(1024);
|
||||
pdfBuffer.write('INVALID', 0);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('PDF header');
|
||||
});
|
||||
|
||||
it('should reject PDFs that are too small', async () => {
|
||||
const pdfBuffer = Buffer.alloc(3);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('too small');
|
||||
});
|
||||
});
|
||||
|
||||
describe('validateBedrockDocument - non-PDF types', () => {
|
||||
it('should accept CSV within 4.5MB limit', async () => {
|
||||
const fileSize = 2 * 1024 * 1024;
|
||||
const result = await validateBedrockDocument(fileSize, 'text/csv');
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should accept DOCX within 4.5MB limit', async () => {
|
||||
const fileSize = 3 * 1024 * 1024;
|
||||
const mimeType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
|
||||
const result = await validateBedrockDocument(fileSize, mimeType);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should reject non-PDF document exceeding 4.5MB hard limit', async () => {
|
||||
const fileSize = 5 * 1024 * 1024;
|
||||
const result = await validateBedrockDocument(fileSize, 'text/plain');
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('4.5MB');
|
||||
});
|
||||
|
||||
it('should clamp to 4.5MB even when config is higher for non-PDF', async () => {
|
||||
const fileSize = 5 * 1024 * 1024;
|
||||
const configuredLimit = mbToBytes(512);
|
||||
const result = await validateBedrockDocument(fileSize, 'text/html', undefined, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('4.5MB');
|
||||
});
|
||||
|
||||
it('should use configured limit when lower than provider limit for non-PDF', async () => {
|
||||
const fileSize = 3 * 1024 * 1024;
|
||||
const configuredLimit = mbToBytes(2);
|
||||
const result = await validateBedrockDocument(fileSize, 'text/markdown', undefined, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('2.0MB');
|
||||
});
|
||||
|
||||
it('should not run PDF header check on non-PDF types', async () => {
|
||||
const buffer = Buffer.from('NOT-A-PDF-HEADER-but-valid-csv-content');
|
||||
const result = await validateBedrockDocument(buffer.length, 'text/csv', buffer);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
});
|
||||
|
||||
it('should still run PDF header check when mimeType is application/pdf', async () => {
|
||||
const buffer = Buffer.alloc(1024);
|
||||
buffer.write('INVALID', 0);
|
||||
const result = await validateBedrockDocument(buffer.length, 'application/pdf', buffer);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('PDF header');
|
||||
});
|
||||
});
|
||||
|
||||
describe('validatePdf - Google provider', () => {
|
||||
const provider = Providers.GOOGLE;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,11 @@
|
|||
import { Providers } from '@librechat/agents';
|
||||
import { mbToBytes, isOpenAILikeProvider } from 'librechat-data-provider';
|
||||
|
||||
export interface ValidationResult {
|
||||
isValid: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface PDFValidationResult {
|
||||
isValid: boolean;
|
||||
error?: string;
|
||||
|
|
@ -31,6 +36,10 @@ export async function validatePdf(
|
|||
return validateAnthropicPdf(pdfBuffer, fileSize, configuredFileSizeLimit);
|
||||
}
|
||||
|
||||
if (provider === Providers.BEDROCK) {
|
||||
return validateBedrockDocument(fileSize, 'application/pdf', pdfBuffer, configuredFileSizeLimit);
|
||||
}
|
||||
|
||||
if (isOpenAILikeProvider(provider)) {
|
||||
return validateOpenAIPdf(fileSize, configuredFileSizeLimit);
|
||||
}
|
||||
|
|
@ -113,6 +122,64 @@ async function validateAnthropicPdf(
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates a document against Bedrock's 4.5MB hard limit. PDF-specific header
|
||||
* checks run only when the MIME type is `application/pdf`.
|
||||
* @param fileSize - The file size in bytes
|
||||
* @param mimeType - The MIME type of the document
|
||||
* @param fileBuffer - The file buffer (used for PDF header validation)
|
||||
* @param configuredFileSizeLimit - Optional configured file size limit from fileConfig (in bytes)
|
||||
* @returns Promise that resolves to validation result
|
||||
*/
|
||||
export async function validateBedrockDocument(
|
||||
fileSize: number,
|
||||
mimeType: string,
|
||||
fileBuffer?: Buffer,
|
||||
configuredFileSizeLimit?: number,
|
||||
): Promise<ValidationResult> {
|
||||
try {
|
||||
/** Bedrock enforces a hard 4.5MB per-document limit at the API level; config can only lower it */
|
||||
const providerLimit = mbToBytes(4.5);
|
||||
const effectiveLimit =
|
||||
configuredFileSizeLimit != null
|
||||
? Math.min(configuredFileSizeLimit, providerLimit)
|
||||
: providerLimit;
|
||||
|
||||
if (fileSize > effectiveLimit) {
|
||||
const limitMB = (effectiveLimit / (1024 * 1024)).toFixed(1);
|
||||
return {
|
||||
isValid: false,
|
||||
error: `File size (${(fileSize / (1024 * 1024)).toFixed(1)}MB) exceeds the ${limitMB}MB limit for Bedrock`,
|
||||
};
|
||||
}
|
||||
|
||||
if (mimeType === 'application/pdf' && fileBuffer) {
|
||||
if (fileBuffer.length < 5) {
|
||||
return {
|
||||
isValid: false,
|
||||
error: 'Invalid PDF file: too small or corrupted',
|
||||
};
|
||||
}
|
||||
|
||||
const pdfHeader = fileBuffer.subarray(0, 5).toString();
|
||||
if (!pdfHeader.startsWith('%PDF-')) {
|
||||
return {
|
||||
isValid: false,
|
||||
error: 'Invalid PDF file: missing PDF header',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return { isValid: true };
|
||||
} catch (error) {
|
||||
console.error('Bedrock document validation error:', error);
|
||||
return {
|
||||
isValid: false,
|
||||
error: 'Failed to validate document file',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates if a PDF meets OpenAI's requirements
|
||||
* @param fileSize - The file size in bytes
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import type { BedrockDocumentFormat } from 'librechat-data-provider';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import type { ServerRequest } from './http';
|
||||
import type { Readable } from 'stream';
|
||||
import type { ServerRequest } from './http';
|
||||
export interface STTService {
|
||||
getInstance(): Promise<STTService>;
|
||||
getProviderSchema(req: ServerRequest): Promise<[string, object]>;
|
||||
|
|
@ -95,11 +96,24 @@ export interface OpenAIInputFileBlock {
|
|||
file_data: string;
|
||||
}
|
||||
|
||||
/** Bedrock Converse API document block (passthrough via @langchain/aws) */
|
||||
export interface BedrockDocumentBlock {
|
||||
type: 'document';
|
||||
document: {
|
||||
name: string;
|
||||
format: BedrockDocumentFormat;
|
||||
source: {
|
||||
bytes: Buffer;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
export type DocumentBlock =
|
||||
| AnthropicDocumentBlock
|
||||
| GoogleDocumentBlock
|
||||
| OpenAIFileBlock
|
||||
| OpenAIInputFileBlock;
|
||||
| OpenAIInputFileBlock
|
||||
| BedrockDocumentBlock;
|
||||
|
||||
export interface DocumentResult {
|
||||
documents: DocumentBlock[];
|
||||
|
|
|
|||
|
|
@ -139,6 +139,39 @@ export const retrievalMimeTypesList = [
|
|||
|
||||
export const imageExtRegex = /\.(jpg|jpeg|png|gif|webp|heic|heif)$/i;
|
||||
|
||||
/** @see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_DocumentBlock.html */
|
||||
export type BedrockDocumentFormat =
|
||||
| 'pdf'
|
||||
| 'csv'
|
||||
| 'doc'
|
||||
| 'docx'
|
||||
| 'xls'
|
||||
| 'xlsx'
|
||||
| 'html'
|
||||
| 'txt'
|
||||
| 'md';
|
||||
|
||||
/** Maps MIME types to Bedrock Converse API document format values */
|
||||
export const bedrockDocumentFormats: Record<string, BedrockDocumentFormat> = {
|
||||
'application/pdf': 'pdf',
|
||||
'text/csv': 'csv',
|
||||
'application/csv': 'csv',
|
||||
'application/msword': 'doc',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
|
||||
'application/vnd.ms-excel': 'xls',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
|
||||
'text/html': 'html',
|
||||
'text/plain': 'txt',
|
||||
'text/markdown': 'md',
|
||||
};
|
||||
|
||||
export const isBedrockDocumentType = (mimeType?: string): boolean =>
|
||||
mimeType != null && mimeType in bedrockDocumentFormats;
|
||||
|
||||
/** File extensions accepted by Bedrock document uploads (for input accept attributes) */
|
||||
export const bedrockDocumentExtensions =
|
||||
'.pdf,.csv,.doc,.docx,.xls,.xlsx,.html,.htm,.txt,.md,application/pdf,text/csv,application/csv,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,text/html,text/plain,text/markdown';
|
||||
|
||||
export const excelMimeTypes =
|
||||
/^application\/(vnd\.ms-excel|msexcel|x-msexcel|x-ms-excel|x-excel|x-dos_ms_excel|xls|x-xls|vnd\.openxmlformats-officedocument\.spreadsheetml\.sheet)$/;
|
||||
|
||||
|
|
@ -146,7 +179,7 @@ export const textMimeTypes =
|
|||
/^(text\/(x-c|x-csharp|tab-separated-values|x-c\+\+|x-h|x-java|html|markdown|x-php|x-python|x-script\.python|x-ruby|x-tex|plain|css|vtt|javascript|csv|xml))$/;
|
||||
|
||||
export const applicationMimeTypes =
|
||||
/^(application\/(epub\+zip|csv|json|pdf|x-tar|x-sh|typescript|sql|yaml|x-parquet|vnd\.apache\.parquet|vnd\.coffeescript|vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation|spreadsheetml\.sheet)|xml|zip))$/;
|
||||
/^(application\/(epub\+zip|csv|json|msword|pdf|x-tar|x-sh|typescript|sql|yaml|x-parquet|vnd\.apache\.parquet|vnd\.coffeescript|vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation|spreadsheetml\.sheet)|xml|zip))$/;
|
||||
|
||||
export const imageMimeTypes = /^image\/(jpeg|gif|png|webp|heic|heif)$/;
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ export enum Providers {
|
|||
export const documentSupportedProviders = new Set<string>([
|
||||
EModelEndpoint.anthropic,
|
||||
EModelEndpoint.openAI,
|
||||
EModelEndpoint.bedrock,
|
||||
EModelEndpoint.custom,
|
||||
// handled in AttachFileMenu and DragDropModal since azureOpenAI only supports documents with Use Responses API set to true
|
||||
// EModelEndpoint.azureOpenAI,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue