LibreChat/packages/api/src/files/documents/crud.spec.ts
Danny Avila 68435cdcd0
🧯 fix: Add Pre-Parse File Size Guard to Document Parser (#12275)
Prevent memory exhaustion DoS by rejecting documents exceeding 15MB
before reading them into memory, closing the gap between the 512MB
upload limit and unbounded in-memory parsing.
2026-03-17 02:36:18 -04:00

172 lines
5.2 KiB
TypeScript

import path from 'path';
import { parseDocument } from './crud';
describe('Document Parser', () => {
test('parseDocument() parses text from docx', async () => {
const file = {
originalname: 'sample.docx',
path: path.join(__dirname, 'sample.docx'),
mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
} as Express.Multer.File;
const document = await parseDocument({ file });
expect(document).toEqual({
bytes: 29,
filename: 'sample.docx',
filepath: 'document_parser',
images: [],
text: 'This is a sample DOCX file.\n\n',
});
});
test('parseDocument() parses text from xlsx', async () => {
const file = {
originalname: 'sample.xlsx',
path: path.join(__dirname, 'sample.xlsx'),
mimetype: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
} as Express.Multer.File;
const document = await parseDocument({ file });
expect(document).toEqual({
bytes: 66,
filename: 'sample.xlsx',
filepath: 'document_parser',
images: [],
text: 'Sheet One:\nData,on,first,sheet\nSecond Sheet:\nData,On\nSecond,Sheet\n',
});
});
test('parseDocument() parses text from xls', async () => {
const file = {
originalname: 'sample.xls',
path: path.join(__dirname, 'sample.xls'),
mimetype: 'application/vnd.ms-excel',
} as Express.Multer.File;
const document = await parseDocument({ file });
expect(document).toEqual({
bytes: 31,
filename: 'sample.xls',
filepath: 'document_parser',
images: [],
text: 'Sheet One:\nData,on,first,sheet\n',
});
});
test('parseDocument() parses text from ods', async () => {
const file = {
originalname: 'sample.ods',
path: path.join(__dirname, 'sample.ods'),
mimetype: 'application/vnd.oasis.opendocument.spreadsheet',
} as Express.Multer.File;
const document = await parseDocument({ file });
expect(document).toEqual({
bytes: 66,
filename: 'sample.ods',
filepath: 'document_parser',
images: [],
text: 'Sheet One:\nData,on,first,sheet\nSecond Sheet:\nData,On\nSecond,Sheet\n',
});
});
test.each([
'application/msexcel',
'application/x-msexcel',
'application/x-ms-excel',
'application/x-excel',
'application/x-dos_ms_excel',
'application/xls',
'application/x-xls',
])('parseDocument() parses xls with variant MIME type: %s', async (mimetype) => {
const file = {
originalname: 'sample.xls',
path: path.join(__dirname, 'sample.xls'),
mimetype,
} as Express.Multer.File;
const document = await parseDocument({ file });
expect(document).toEqual({
bytes: 31,
filename: 'sample.xls',
filepath: 'document_parser',
images: [],
text: 'Sheet One:\nData,on,first,sheet\n',
});
});
test('parseDocument() throws error for unhandled document type', async () => {
const file = {
originalname: 'nonexistent.file',
path: path.join(__dirname, 'nonexistent.file'),
mimetype: 'application/invalid',
} as Express.Multer.File;
await expect(parseDocument({ file })).rejects.toThrow(
'Unsupported file type in document parser: application/invalid',
);
});
test('parseDocument() throws error for empty document', async () => {
const file = {
originalname: 'empty.docx',
path: path.join(__dirname, 'empty.docx'),
mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
} as Express.Multer.File;
await expect(parseDocument({ file })).rejects.toThrow('No text found in document');
});
test('parseDocument() rejects files exceeding the pre-parse size limit', async () => {
const file = {
originalname: 'oversized.docx',
path: path.join(__dirname, 'sample.docx'),
mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
size: 16 * 1024 * 1024,
} as Express.Multer.File;
await expect(parseDocument({ file })).rejects.toThrow(
/exceeds the 15MB document parser limit \(16MB\)/,
);
});
test('parseDocument() allows files exactly at the size limit boundary', async () => {
const file = {
originalname: 'sample.docx',
path: path.join(__dirname, 'sample.docx'),
mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
size: 15 * 1024 * 1024,
} as Express.Multer.File;
await expect(parseDocument({ file })).resolves.toBeDefined();
});
test('parseDocument() parses empty xlsx with only sheet name', async () => {
const file = {
originalname: 'empty.xlsx',
path: path.join(__dirname, 'empty.xlsx'),
mimetype: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
} as Express.Multer.File;
const document = await parseDocument({ file });
expect(document).toEqual({
bytes: 8,
filename: 'empty.xlsx',
filepath: 'document_parser',
images: [],
text: 'Empty:\n\n',
});
});
test('xlsx exports read and utils as named imports', async () => {
const { read, utils } = await import('xlsx');
expect(typeof read).toBe('function');
expect(typeof utils?.sheet_to_csv).toBe('function');
});
});