🧯 fix: Add Pre-Parse File Size Guard to Document Parser (#12275)

Prevent memory exhaustion DoS by rejecting documents exceeding 15MB
before reading them into memory, closing the gap between the 512MB
upload limit and unbounded in-memory parsing.
This commit is contained in:
Danny Avila 2026-03-17 02:36:18 -04:00 committed by GitHub
parent 0c378811f1
commit 68435cdcd0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 60 additions and 15 deletions

View file

@ -122,6 +122,30 @@ describe('Document Parser', () => {
await expect(parseDocument({ file })).rejects.toThrow('No text found in document');
});
test('parseDocument() rejects files exceeding the pre-parse size limit', async () => {
const file = {
originalname: 'oversized.docx',
path: path.join(__dirname, 'sample.docx'),
mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
size: 16 * 1024 * 1024,
} as Express.Multer.File;
await expect(parseDocument({ file })).rejects.toThrow(
/exceeds the 15MB document parser limit \(16MB\)/,
);
});
test('parseDocument() allows files exactly at the size limit boundary', async () => {
const file = {
originalname: 'sample.docx',
path: path.join(__dirname, 'sample.docx'),
mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
size: 15 * 1024 * 1024,
} as Express.Multer.File;
await expect(parseDocument({ file })).resolves.toBeDefined();
});
test('parseDocument() parses empty xlsx with only sheet name', async () => {
const file = {
originalname: 'empty.xlsx',