LibreChat/packages/api/src/files/context.ts

import { logger } from '@librechat/data-schemas';
import { FileSources, mergeFileConfig } from 'librechat-data-provider';
import type { IMongoFile } from '@librechat/data-schemas';
import type { ServerRequest } from '~/types';
import { processTextWithTokenLimit } from '~/utils/text';

/**
 * Extracts text context from attachments and returns formatted text.
 * This handles text that was already extracted from files (OCR, transcriptions, document text, etc.)
 * @param params - The parameters object
 * @param params.attachments - Array of file attachments
 * @param params.req - Express request object for config access
 * @param params.tokenCountFn - Function to count tokens in text
 * @returns The formatted file context text, or undefined if no text found
 */
export async function extractFileContext({
  attachments,
  req,
  tokenCountFn,
}: {
  attachments: IMongoFile[];
  req?: ServerRequest;
  tokenCountFn: (text: string) => number;
}): Promise<string | undefined> {
  if (!attachments || attachments.length === 0) {
    return undefined;
  }

  const fileConfig = mergeFileConfig(req?.config?.fileConfig);
  const fileTokenLimit = req?.body?.fileTokenLimit ?? fileConfig.fileTokenLimit;

  if (!fileTokenLimit) {
    // If no token limit, return undefined (no processing)
    return undefined;
  }

  let resultText = '';

  for (const file of attachments) {
    const source = file.source ?? FileSources.local;
    if (source === FileSources.text && file.text) {
      const { text: limitedText, wasTruncated } = await processTextWithTokenLimit({
        text: file.text,
        tokenLimit: fileTokenLimit,
        tokenCountFn,
      });

      if (wasTruncated) {
        logger.debug(
          `[extractFileContext] Text content truncated for file: ${file.filename} due to token limits`,
        );
      }

      resultText += `${!resultText ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${limitedText}\n`;
    }
  }

  if (resultText) {
    resultText += '\n```';
    return resultText;
  }

  return undefined;
}
🖼️ fix: Extract File Context & Persist Attachments (#10069) - problem: `addImageUrls` had a side effect that was being leveraged before to populate both the `ocr` message field, now `fileContext`, and `client.options.attachments`, which would record the user's uploaded message attachments to the user message when saved to the database and returned at the end of the request lifecycle - solution: created dedicated handling for file context, and made sure to populate `allFiles` with non-provider attachments 2025-10-10 12:35:37 +03:00			`import { logger } from '@librechat/data-schemas';`
			`import { FileSources, mergeFileConfig } from 'librechat-data-provider';`
			`import type { IMongoFile } from '@librechat/data-schemas';`
⚗️ refactor: Provider File Validation with Configurable Size Limits (#10405) * chore: correct type for ServerRequest * chore: improve ServerRequest typing across several modules * feat: Add PDF configured limit validation - Introduced comprehensive tests for PDF validation across multiple providers, ensuring correct behavior for file size limits and edge cases. - Enhanced the `validatePdf` function to accept an optional configured file size limit, allowing for stricter validation based on user configurations. - Updated related functions to utilize the new validation logic, ensuring consistent behavior across different providers. * chore: Update Request type to ServerRequest in audio and video encoding modules * refactor: move `getConfiguredFileSizeLimit` utility * feat: Add video and audio validation with configurable size limits - Introduced `validateVideo` and `validateAudio` functions to validate media files against provider-specific size limits. - Enhanced validation logic to consider optional configured file size limits, allowing for more flexible file handling. - Added comprehensive tests for video and audio validation across different providers, ensuring correct behavior for various scenarios. * refactor: Update PDF and media validation to allow higher configured limits - Modified validation logic to accept user-configured file size limits that exceed provider defaults, ensuring correct acceptance of files within the specified range. - Updated tests to reflect changes in validation behavior, confirming that files are accepted when within the configured limits. - Enhanced documentation in tests to clarify expected outcomes with the new validation rules. * chore: Add @types/node-fetch dependency to package.json and package-lock.json - Included the @types/node-fetch package to enhance type definitions for node-fetch usage. - Updated package-lock.json to reflect the addition of the new dependency. * fix: Rename FileConfigInput to TFileConfig 2025-11-07 10:57:15 -05:00			`import type { ServerRequest } from '~/types';`
🖼️ fix: Extract File Context & Persist Attachments (#10069) - problem: `addImageUrls` had a side effect that was being leveraged before to populate both the `ocr` message field, now `fileContext`, and `client.options.attachments`, which would record the user's uploaded message attachments to the user message when saved to the database and returned at the end of the request lifecycle - solution: created dedicated handling for file context, and made sure to populate `allFiles` with non-provider attachments 2025-10-10 12:35:37 +03:00			`import { processTextWithTokenLimit } from '~/utils/text';`

			`/**`
			`* Extracts text context from attachments and returns formatted text.`
			`* This handles text that was already extracted from files (OCR, transcriptions, document text, etc.)`
			`* @param params - The parameters object`
			`* @param params.attachments - Array of file attachments`
			`* @param params.req - Express request object for config access`
			`* @param params.tokenCountFn - Function to count tokens in text`
			`* @returns The formatted file context text, or undefined if no text found`
			`*/`
			`export async function extractFileContext({`
			`attachments,`
			`req,`
			`tokenCountFn,`
			`}: {`
			`attachments: IMongoFile[];`
⚗️ refactor: Provider File Validation with Configurable Size Limits (#10405) * chore: correct type for ServerRequest * chore: improve ServerRequest typing across several modules * feat: Add PDF configured limit validation - Introduced comprehensive tests for PDF validation across multiple providers, ensuring correct behavior for file size limits and edge cases. - Enhanced the `validatePdf` function to accept an optional configured file size limit, allowing for stricter validation based on user configurations. - Updated related functions to utilize the new validation logic, ensuring consistent behavior across different providers. * chore: Update Request type to ServerRequest in audio and video encoding modules * refactor: move `getConfiguredFileSizeLimit` utility * feat: Add video and audio validation with configurable size limits - Introduced `validateVideo` and `validateAudio` functions to validate media files against provider-specific size limits. - Enhanced validation logic to consider optional configured file size limits, allowing for more flexible file handling. - Added comprehensive tests for video and audio validation across different providers, ensuring correct behavior for various scenarios. * refactor: Update PDF and media validation to allow higher configured limits - Modified validation logic to accept user-configured file size limits that exceed provider defaults, ensuring correct acceptance of files within the specified range. - Updated tests to reflect changes in validation behavior, confirming that files are accepted when within the configured limits. - Enhanced documentation in tests to clarify expected outcomes with the new validation rules. * chore: Add @types/node-fetch dependency to package.json and package-lock.json - Included the @types/node-fetch package to enhance type definitions for node-fetch usage. - Updated package-lock.json to reflect the addition of the new dependency. * fix: Rename FileConfigInput to TFileConfig 2025-11-07 10:57:15 -05:00			`req?: ServerRequest;`
🖼️ fix: Extract File Context & Persist Attachments (#10069) - problem: `addImageUrls` had a side effect that was being leveraged before to populate both the `ocr` message field, now `fileContext`, and `client.options.attachments`, which would record the user's uploaded message attachments to the user message when saved to the database and returned at the end of the request lifecycle - solution: created dedicated handling for file context, and made sure to populate `allFiles` with non-provider attachments 2025-10-10 12:35:37 +03:00			`tokenCountFn: (text: string) => number;`
			`}): Promise<string \| undefined> {`
			`if (!attachments \|\| attachments.length === 0) {`
			`return undefined;`
			`}`

			`const fileConfig = mergeFileConfig(req?.config?.fileConfig);`
			`const fileTokenLimit = req?.body?.fileTokenLimit ?? fileConfig.fileTokenLimit;`

			`if (!fileTokenLimit) {`
			`// If no token limit, return undefined (no processing)`
			`return undefined;`
			`}`

			`let resultText = '';`

			`for (const file of attachments) {`
			`const source = file.source ?? FileSources.local;`
			`if (source === FileSources.text && file.text) {`
			`const { text: limitedText, wasTruncated } = await processTextWithTokenLimit({`
			`text: file.text,`
			`tokenLimit: fileTokenLimit,`
			`tokenCountFn,`
			`});`

			`if (wasTruncated) {`
			`logger.debug(`
			`[extractFileContext] Text content truncated for file: ${file.filename} due to token limits`,
			`);`
			`}`

			resultText += `${!resultText ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${limitedText}\n`;
			`}`
			`}`

			`if (resultText) {`
			resultText += '\n```';
			`return resultText;`
			`}`

			`return undefined;`
			`}`