🖼️ fix: Extract File Context & Persist Attachments (#10069)

- problem: `addImageUrls` had a side effect that was being leveraged before to populate both the `ocr` message field, now `fileContext`, and `client.options.attachments`, which would record the user's uploaded message attachments to the user message when saved to the database and returned at the end of the request lifecycle - solution: created dedicated handling for file context, and made sure to populate `allFiles` with non-provider attachments
2026-01-31 23:01:51 +01:00 · 2025-10-10 12:35:37 +03:00 · 2025-10-10 12:35:37 +03:00 · 07d0abc9fd
commit 07d0abc9fd
parent fbe341a171
5 changed files with 128 additions and 50 deletions
--- a/packages/api/src/files/context.ts
+++ b/packages/api/src/files/context.ts
@ -0,0 +1,68 @@
+import { logger } from '@librechat/data-schemas';
+import { FileSources, mergeFileConfig } from 'librechat-data-provider';
+import type { fileConfigSchema } from 'librechat-data-provider';
+import type { IMongoFile } from '@librechat/data-schemas';
+import type { z } from 'zod';
+import { processTextWithTokenLimit } from '~/utils/text';
+
+/**
+ * Extracts text context from attachments and returns formatted text.
+ * This handles text that was already extracted from files (OCR, transcriptions, document text, etc.)
+ * @param params - The parameters object
+ * @param params.attachments - Array of file attachments
+ * @param params.req - Express request object for config access
+ * @param params.tokenCountFn - Function to count tokens in text
+ * @returns The formatted file context text, or undefined if no text found
+ */
+export async function extractFileContext({
+  attachments,
+  req,
+  tokenCountFn,
+}: {
+  attachments: IMongoFile[];
+  req?: {
+    body?: { fileTokenLimit?: number };
+    config?: { fileConfig?: z.infer<typeof fileConfigSchema> };
+  };
+  tokenCountFn: (text: string) => number;
+}): Promise<string | undefined> {
+  if (!attachments || attachments.length === 0) {
+    return undefined;
+  }
+
+  const fileConfig = mergeFileConfig(req?.config?.fileConfig);
+  const fileTokenLimit = req?.body?.fileTokenLimit ?? fileConfig.fileTokenLimit;
+
+  if (!fileTokenLimit) {
+    // If no token limit, return undefined (no processing)
+    return undefined;
+  }
+
+  let resultText = '';
+
+  for (const file of attachments) {
+    const source = file.source ?? FileSources.local;
+    if (source === FileSources.text && file.text) {
+      const { text: limitedText, wasTruncated } = await processTextWithTokenLimit({
+        text: file.text,
+        tokenLimit: fileTokenLimit,
+        tokenCountFn,
+      });
+
+      if (wasTruncated) {
+        logger.debug(
+          `[extractFileContext] Text content truncated for file: ${file.filename} due to token limits`,
+        );
+      }
+
+      resultText += `${!resultText ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${limitedText}\n`;
+    }
+  }
+
+  if (resultText) {
+    resultText += '\n```';
+    return resultText;
+  }
+
+  return undefined;
+}
--- a/packages/api/src/files/index.ts
+++ b/packages/api/src/files/index.ts
@ -1,4 +1,5 @@
 export * from './audio';
+export * from './context';
 export * from './encode';
 export * from './mistral/crud';
 export * from './ocr';