From 68435cdcd06da5b694a01ccf719b022e7ae8cd2e Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Tue, 17 Mar 2026 02:36:18 -0400
Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=AF=20fix:=20Add=20Pre-Parse=20File=20?=
 =?UTF-8?q?Size=20Guard=20to=20Document=20Parser=20(#12275)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prevent memory exhaustion DoS by rejecting documents exceeding 15MB
before reading them into memory, closing the gap between the 512MB
upload limit and unbounded in-memory parsing.
---
 packages/api/src/files/documents/crud.spec.ts | 24 +++++++++
 packages/api/src/files/documents/crud.ts      | 51 +++++++++++++------
 2 files changed, 60 insertions(+), 15 deletions(-)
diff --git a/packages/api/src/files/documents/crud.spec.ts b/packages/api/src/files/documents/crud.spec.ts
index f22693718a..f8b255dd5e 100644
--- a/packages/api/src/files/documents/crud.spec.ts
+++ b/packages/api/src/files/documents/crud.spec.ts
@@ -122,6 +122,30 @@ describe('Document Parser', () => {
     await expect(parseDocument({ file })).rejects.toThrow('No text found in document');
   });
 
+  test('parseDocument() rejects files exceeding the pre-parse size limit', async () => {
+    const file = {
+      originalname: 'oversized.docx',
+      path: path.join(__dirname, 'sample.docx'),
+      mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+      size: 16 * 1024 * 1024,
+    } as Express.Multer.File;
+
+    await expect(parseDocument({ file })).rejects.toThrow(
+      /exceeds the 15MB document parser limit \(16MB\)/,
+    );
+  });
+
+  test('parseDocument() allows files exactly at the size limit boundary', async () => {
+    const file = {
+      originalname: 'sample.docx',
+      path: path.join(__dirname, 'sample.docx'),
+      mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+      size: 15 * 1024 * 1024,
+    } as Express.Multer.File;
+
+    await expect(parseDocument({ file })).resolves.toBeDefined();
+  });
+
   test('parseDocument() parses empty xlsx with only sheet name', async () => {
     const file = {
       originalname: 'empty.xlsx',
diff --git a/packages/api/src/files/documents/crud.ts b/packages/api/src/files/documents/crud.ts
index ab16534b45..61c1956542 100644
--- a/packages/api/src/files/documents/crud.ts
+++ b/packages/api/src/files/documents/crud.ts
@@ -1,35 +1,39 @@
 import * as fs from 'fs';
-import { excelMimeTypes, FileSources } from 'librechat-data-provider';
+import { megabyte, excelMimeTypes, FileSources } from 'librechat-data-provider';
 import type { TextItem } from 'pdfjs-dist/types/src/display/api';
 import type { MistralOCRUploadResult } from '~/types';
 
+type FileParseFn = (file: Express.Multer.File) => Promise<string>;
+
+const DOCUMENT_PARSER_MAX_FILE_SIZE = 15 * megabyte;
+
 /**
  * Parses an uploaded document and extracts its text content and metadata.
  * Handled types must stay in sync with `documentParserMimeTypes` from data-provider.
  *
- * @throws {Error} if `file.mimetype` is not handled or no text is found.
+ * @throws {Error} if `file.mimetype` is not handled, file exceeds size limit, or no text is found.
  */
 export async function parseDocument({
   file,
 }: {
   file: Express.Multer.File;
 }): Promise<MistralOCRUploadResult> {
-  let text: string;
-  if (file.mimetype === 'application/pdf') {
-    text = await pdfToText(file);
-  } else if (
-    file.mimetype === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
-  ) {
-    text = await wordDocToText(file);
-  } else if (
-    excelMimeTypes.test(file.mimetype) ||
-    file.mimetype === 'application/vnd.oasis.opendocument.spreadsheet'
-  ) {
-    text = await excelSheetToText(file);
-  } else {
+  const parseFn = getParserForMimeType(file.mimetype);
+  if (!parseFn) {
     throw new Error(`Unsupported file type in document parser: ${file.mimetype}`);
   }
 
+  const fileSize = file.size ?? (file.path != null ? (await fs.promises.stat(file.path)).size : 0);
+  if (fileSize > DOCUMENT_PARSER_MAX_FILE_SIZE) {
+    const limitMB = DOCUMENT_PARSER_MAX_FILE_SIZE / megabyte;
+    const sizeMB = Math.ceil(fileSize / megabyte);
+    throw new Error(
+      `File "${file.originalname}" exceeds the ${limitMB}MB document parser limit (${sizeMB}MB).`,
+    );
+  }
+
+  const text = await parseFn(file);
+
   if (!text?.trim()) {
     throw new Error('No text found in document');
   }
@@ -43,6 +47,23 @@ export async function parseDocument({
   };
 }
 
+/** Maps a MIME type to its document parser function, or `undefined` if unsupported. */
+function getParserForMimeType(mimetype: string): FileParseFn | undefined {
+  if (mimetype === 'application/pdf') {
+    return pdfToText;
+  }
+  if (mimetype === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
+    return wordDocToText;
+  }
+  if (
+    excelMimeTypes.test(mimetype) ||
+    mimetype === 'application/vnd.oasis.opendocument.spreadsheet'
+  ) {
+    return excelSheetToText;
+  }
+  return undefined;
+}
+
 /** Parses PDF, returns text inside. */
 async function pdfToText(file: Express.Multer.File): Promise<string> {
   // Imported inline so that Jest can test other routes without failing due to loading ESM