mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-03-17 21:26:33 +01:00
Prevent memory exhaustion DoS by rejecting documents exceeding 15MB before reading them into memory, closing the gap between the 512MB upload limit and unbounded in-memory parsing.
172 lines
5.2 KiB
TypeScript
172 lines
5.2 KiB
TypeScript
import path from 'path';
|
|
import { parseDocument } from './crud';
|
|
|
|
describe('Document Parser', () => {
|
|
test('parseDocument() parses text from docx', async () => {
|
|
const file = {
|
|
originalname: 'sample.docx',
|
|
path: path.join(__dirname, 'sample.docx'),
|
|
mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
} as Express.Multer.File;
|
|
|
|
const document = await parseDocument({ file });
|
|
|
|
expect(document).toEqual({
|
|
bytes: 29,
|
|
filename: 'sample.docx',
|
|
filepath: 'document_parser',
|
|
images: [],
|
|
text: 'This is a sample DOCX file.\n\n',
|
|
});
|
|
});
|
|
|
|
test('parseDocument() parses text from xlsx', async () => {
|
|
const file = {
|
|
originalname: 'sample.xlsx',
|
|
path: path.join(__dirname, 'sample.xlsx'),
|
|
mimetype: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
} as Express.Multer.File;
|
|
|
|
const document = await parseDocument({ file });
|
|
|
|
expect(document).toEqual({
|
|
bytes: 66,
|
|
filename: 'sample.xlsx',
|
|
filepath: 'document_parser',
|
|
images: [],
|
|
text: 'Sheet One:\nData,on,first,sheet\nSecond Sheet:\nData,On\nSecond,Sheet\n',
|
|
});
|
|
});
|
|
|
|
test('parseDocument() parses text from xls', async () => {
|
|
const file = {
|
|
originalname: 'sample.xls',
|
|
path: path.join(__dirname, 'sample.xls'),
|
|
mimetype: 'application/vnd.ms-excel',
|
|
} as Express.Multer.File;
|
|
|
|
const document = await parseDocument({ file });
|
|
|
|
expect(document).toEqual({
|
|
bytes: 31,
|
|
filename: 'sample.xls',
|
|
filepath: 'document_parser',
|
|
images: [],
|
|
text: 'Sheet One:\nData,on,first,sheet\n',
|
|
});
|
|
});
|
|
|
|
test('parseDocument() parses text from ods', async () => {
|
|
const file = {
|
|
originalname: 'sample.ods',
|
|
path: path.join(__dirname, 'sample.ods'),
|
|
mimetype: 'application/vnd.oasis.opendocument.spreadsheet',
|
|
} as Express.Multer.File;
|
|
|
|
const document = await parseDocument({ file });
|
|
|
|
expect(document).toEqual({
|
|
bytes: 66,
|
|
filename: 'sample.ods',
|
|
filepath: 'document_parser',
|
|
images: [],
|
|
text: 'Sheet One:\nData,on,first,sheet\nSecond Sheet:\nData,On\nSecond,Sheet\n',
|
|
});
|
|
});
|
|
|
|
test.each([
|
|
'application/msexcel',
|
|
'application/x-msexcel',
|
|
'application/x-ms-excel',
|
|
'application/x-excel',
|
|
'application/x-dos_ms_excel',
|
|
'application/xls',
|
|
'application/x-xls',
|
|
])('parseDocument() parses xls with variant MIME type: %s', async (mimetype) => {
|
|
const file = {
|
|
originalname: 'sample.xls',
|
|
path: path.join(__dirname, 'sample.xls'),
|
|
mimetype,
|
|
} as Express.Multer.File;
|
|
|
|
const document = await parseDocument({ file });
|
|
|
|
expect(document).toEqual({
|
|
bytes: 31,
|
|
filename: 'sample.xls',
|
|
filepath: 'document_parser',
|
|
images: [],
|
|
text: 'Sheet One:\nData,on,first,sheet\n',
|
|
});
|
|
});
|
|
|
|
test('parseDocument() throws error for unhandled document type', async () => {
|
|
const file = {
|
|
originalname: 'nonexistent.file',
|
|
path: path.join(__dirname, 'nonexistent.file'),
|
|
mimetype: 'application/invalid',
|
|
} as Express.Multer.File;
|
|
|
|
await expect(parseDocument({ file })).rejects.toThrow(
|
|
'Unsupported file type in document parser: application/invalid',
|
|
);
|
|
});
|
|
|
|
test('parseDocument() throws error for empty document', async () => {
|
|
const file = {
|
|
originalname: 'empty.docx',
|
|
path: path.join(__dirname, 'empty.docx'),
|
|
mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
} as Express.Multer.File;
|
|
|
|
await expect(parseDocument({ file })).rejects.toThrow('No text found in document');
|
|
});
|
|
|
|
test('parseDocument() rejects files exceeding the pre-parse size limit', async () => {
|
|
const file = {
|
|
originalname: 'oversized.docx',
|
|
path: path.join(__dirname, 'sample.docx'),
|
|
mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
size: 16 * 1024 * 1024,
|
|
} as Express.Multer.File;
|
|
|
|
await expect(parseDocument({ file })).rejects.toThrow(
|
|
/exceeds the 15MB document parser limit \(16MB\)/,
|
|
);
|
|
});
|
|
|
|
test('parseDocument() allows files exactly at the size limit boundary', async () => {
|
|
const file = {
|
|
originalname: 'sample.docx',
|
|
path: path.join(__dirname, 'sample.docx'),
|
|
mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
size: 15 * 1024 * 1024,
|
|
} as Express.Multer.File;
|
|
|
|
await expect(parseDocument({ file })).resolves.toBeDefined();
|
|
});
|
|
|
|
test('parseDocument() parses empty xlsx with only sheet name', async () => {
|
|
const file = {
|
|
originalname: 'empty.xlsx',
|
|
path: path.join(__dirname, 'empty.xlsx'),
|
|
mimetype: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
} as Express.Multer.File;
|
|
|
|
const document = await parseDocument({ file });
|
|
|
|
expect(document).toEqual({
|
|
bytes: 8,
|
|
filename: 'empty.xlsx',
|
|
filepath: 'document_parser',
|
|
images: [],
|
|
text: 'Empty:\n\n',
|
|
});
|
|
});
|
|
|
|
test('xlsx exports read and utils as named imports', async () => {
|
|
const { read, utils } = await import('xlsx');
|
|
expect(typeof read).toBe('function');
|
|
expect(typeof utils?.sheet_to_csv).toBe('function');
|
|
});
|
|
});
|