LibreChat/packages/api/src/utils/message.ts

import { Constants } from 'librechat-data-provider';
import type { TFile, TMessage } from 'librechat-data-provider';

/** Fields to strip from files before client transmission */
const FILE_STRIP_FIELDS = ['text', '_id', '__v'] as const;

/** Fields to strip from messages before client transmission */
const MESSAGE_STRIP_FIELDS = ['fileContext'] as const;

/**
 * Strips large/unnecessary fields from a file object before transmitting to client.
 * Use this within existing loops when building file arrays to avoid extra iterations.
 *
 * @param file - The file object to sanitize
 * @returns A new file object without the stripped fields
 *
 * @example
 * // Use in existing file processing loop:
 * for (const attachment of client.options.attachments) {
 *   if (messageFiles.has(attachment.file_id)) {
 *     userMessage.files.push(sanitizeFileForTransmit(attachment));
 *   }
 * }
 */
export function sanitizeFileForTransmit<T extends Partial<TFile>>(
  file: T,
): Omit<T, (typeof FILE_STRIP_FIELDS)[number]> {
  const sanitized = { ...file };
  for (const field of FILE_STRIP_FIELDS) {
    delete sanitized[field as keyof typeof sanitized];
  }
  return sanitized;
}

/**
 * Sanitizes a message object before transmitting to client.
 * Removes large fields like `fileContext` and strips `text` from embedded files.
 *
 * @param message - The message object to sanitize
 * @returns A new message object safe for client transmission
 *
 * @example
 * sendEvent(res, {
 *   final: true,
 *   requestMessage: sanitizeMessageForTransmit(userMessage),
 *   responseMessage: response,
 * });
 */
export function sanitizeMessageForTransmit<T extends Partial<TMessage>>(
  message: T,
): Omit<T, (typeof MESSAGE_STRIP_FIELDS)[number]> {
  if (!message) {
    return message as Omit<T, (typeof MESSAGE_STRIP_FIELDS)[number]>;
  }

  const sanitized = { ...message };

  // Remove message-level fields
  for (const field of MESSAGE_STRIP_FIELDS) {
    delete sanitized[field as keyof typeof sanitized];
  }

  // Always create a new array when files exist to maintain full immutability
  if (Array.isArray(sanitized.files)) {
    sanitized.files = sanitized.files.map((file) => sanitizeFileForTransmit(file));
  }

  return sanitized;
}

/** Minimal message shape for thread traversal */
type ThreadMessage = {
  messageId: string;
  parentMessageId?: string | null;
  files?: Array<{ file_id?: string }>;
};

/** Result of thread data extraction */
export type ThreadData = {
  messageIds: string[];
  fileIds: string[];
};

/**
 * Extracts thread message IDs and file IDs in a single O(n) pass.
 * Builds a Map for O(1) lookups, then traverses the thread collecting both IDs.
 *
 * @param messages - All messages in the conversation (should be queried with select for efficiency)
 * @param parentMessageId - The ID of the parent message to start traversal from
 * @returns Object containing messageIds and fileIds arrays
 */
export function getThreadData(
  messages: ThreadMessage[],
  parentMessageId: string | null | undefined,
): ThreadData {
  const result: ThreadData = { messageIds: [], fileIds: [] };

  if (!messages || messages.length === 0 || !parentMessageId) {
    return result;
  }

  /** Build Map for O(1) lookups instead of O(n) .find() calls */
  const messageMap = new Map<string, ThreadMessage>();
  for (const msg of messages) {
    messageMap.set(msg.messageId, msg);
  }

  const fileIdSet = new Set<string>();
  const visitedIds = new Set<string>();
  let currentId: string | null | undefined = parentMessageId;

  /** Single traversal: collect message IDs and file IDs together */
  while (currentId) {
    if (visitedIds.has(currentId)) {
      break;
    }
    visitedIds.add(currentId);

    const message = messageMap.get(currentId);
    if (!message) {
      break;
    }

    result.messageIds.push(message.messageId);

    /** Collect file IDs from this message */
    if (message.files) {
      for (const file of message.files) {
        if (file.file_id) {
          fileIdSet.add(file.file_id);
        }
      }
    }

    currentId = message.parentMessageId === Constants.NO_PARENT ? null : message.parentMessageId;
  }

  result.fileIds = Array.from(fileIdSet);
  return result;
}
🗂️ feat: Better Persistence for Code Execution Files Between Sessions (#11362) * refactor: process code output files for re-use (WIP) * feat: file attachment handling with additional metadata for downloads * refactor: Update directory path logic for local file saving based on basePath * refactor: file attachment handling to support TFile type and improve data merging logic * feat: thread filtering of code-generated files - Introduced parentMessageId parameter in addedConvo and initialize functions to enhance thread management. - Updated related methods to utilize parentMessageId for retrieving messages and filtering code-generated files by conversation threads. - Enhanced type definitions to include parentMessageId in relevant interfaces for better clarity and usage. * chore: imports/params ordering * feat: update file model to use messageId for filtering and processing - Changed references from 'message' to 'messageId' in file-related methods for consistency. - Added messageId field to the file schema and updated related types. - Enhanced file processing logic to accommodate the new messageId structure. * feat: enhance file retrieval methods to support user-uploaded execute_code files - Added a new method `getUserCodeFiles` to retrieve user-uploaded execute_code files, excluding code-generated files. - Updated existing file retrieval methods to improve filtering logic and handle edge cases. - Enhanced thread data extraction to collect both message IDs and file IDs efficiently. - Integrated `getUserCodeFiles` into relevant endpoints for better file management in conversations. * chore: update @librechat/agents package version to 3.0.78 in package-lock.json and related package.json files * refactor: file processing and retrieval logic - Added a fallback mechanism for download URLs when files exceed size limits or cannot be processed locally. - Implemented a deduplication strategy for code-generated files based on conversationId and filename to optimize storage. - Updated file retrieval methods to ensure proper filtering by messageIds, preventing orphaned files from being included. - Introduced comprehensive tests for new thread data extraction functionality, covering edge cases and performance considerations. * fix: improve file retrieval tests and handling of optional properties - Updated tests to safely access optional properties using non-null assertions. - Modified test descriptions for clarity regarding the exclusion of execute_code files. - Ensured that the retrieval logic correctly reflects the expected outcomes for file queries. * test: add comprehensive unit tests for processCodeOutput functionality - Introduced a new test suite for the processCodeOutput function, covering various scenarios including file retrieval, creation, and processing for both image and non-image files. - Implemented mocks for dependencies such as axios, logger, and file models to isolate tests and ensure reliable outcomes. - Validated behavior for existing files, new file creation, and error handling, including size limits and fallback mechanisms. - Enhanced test coverage for metadata handling and usage increment logic, ensuring robust verification of file processing outcomes. * test: enhance file size limit enforcement in processCodeOutput tests - Introduced a configurable file size limit for tests to improve flexibility and coverage. - Mocked the `librechat-data-provider` to allow dynamic adjustment of file size limits during tests. - Updated the file size limit enforcement test to validate behavior when files exceed specified limits, ensuring proper fallback to download URLs. - Reset file size limit after tests to maintain isolation for subsequent test cases. 2026-01-16 10:06:24 -05:00			`import { Constants } from 'librechat-data-provider';`
📦 refactor: Request Message Sanitization for Smaller Final Response (#10792) * refactor: implement sanitizeFileForTransmit and sanitizeMessageForTransmit functions for smaller payload to client transmission * refactor: enhance sanitizeMessageForTransmit to preserve empty files array and avoid mutating original message * refactor: update sanitizeMessageForTransmit to ensure immutability of files array and improve test clarity 2025-12-03 14:26:49 -05:00			`import type { TFile, TMessage } from 'librechat-data-provider';`

			`/** Fields to strip from files before client transmission */`
			`const FILE_STRIP_FIELDS = ['text', '_id', '__v'] as const;`

			`/** Fields to strip from messages before client transmission */`
			`const MESSAGE_STRIP_FIELDS = ['fileContext'] as const;`

			`/**`
			`* Strips large/unnecessary fields from a file object before transmitting to client.`
			`* Use this within existing loops when building file arrays to avoid extra iterations.`
			`*`
			`* @param file - The file object to sanitize`
			`* @returns A new file object without the stripped fields`
			`*`
			`* @example`
			`* // Use in existing file processing loop:`
			`* for (const attachment of client.options.attachments) {`
			`* if (messageFiles.has(attachment.file_id)) {`
			`* userMessage.files.push(sanitizeFileForTransmit(attachment));`
			`* }`
			`* }`
			`*/`
			`export function sanitizeFileForTransmit<T extends Partial<TFile>>(`
			`file: T,`
			`): Omit<T, (typeof FILE_STRIP_FIELDS)[number]> {`
			`const sanitized = { ...file };`
			`for (const field of FILE_STRIP_FIELDS) {`
			`delete sanitized[field as keyof typeof sanitized];`
			`}`
			`return sanitized;`
			`}`

			`/**`
			`* Sanitizes a message object before transmitting to client.`
			* Removes large fields like `fileContext` and strips `text` from embedded files.
			`*`
			`* @param message - The message object to sanitize`
			`* @returns A new message object safe for client transmission`
			`*`
			`* @example`
			`* sendEvent(res, {`
			`* final: true,`
			`* requestMessage: sanitizeMessageForTransmit(userMessage),`
			`* responseMessage: response,`
			`* });`
			`*/`
			`export function sanitizeMessageForTransmit<T extends Partial<TMessage>>(`
			`message: T,`
			`): Omit<T, (typeof MESSAGE_STRIP_FIELDS)[number]> {`
			`if (!message) {`
			`return message as Omit<T, (typeof MESSAGE_STRIP_FIELDS)[number]>;`
			`}`

			`const sanitized = { ...message };`

			`// Remove message-level fields`
			`for (const field of MESSAGE_STRIP_FIELDS) {`
			`delete sanitized[field as keyof typeof sanitized];`
			`}`

			`// Always create a new array when files exist to maintain full immutability`
			`if (Array.isArray(sanitized.files)) {`
			`sanitized.files = sanitized.files.map((file) => sanitizeFileForTransmit(file));`
			`}`

			`return sanitized;`
			`}`
🗂️ feat: Better Persistence for Code Execution Files Between Sessions (#11362) * refactor: process code output files for re-use (WIP) * feat: file attachment handling with additional metadata for downloads * refactor: Update directory path logic for local file saving based on basePath * refactor: file attachment handling to support TFile type and improve data merging logic * feat: thread filtering of code-generated files - Introduced parentMessageId parameter in addedConvo and initialize functions to enhance thread management. - Updated related methods to utilize parentMessageId for retrieving messages and filtering code-generated files by conversation threads. - Enhanced type definitions to include parentMessageId in relevant interfaces for better clarity and usage. * chore: imports/params ordering * feat: update file model to use messageId for filtering and processing - Changed references from 'message' to 'messageId' in file-related methods for consistency. - Added messageId field to the file schema and updated related types. - Enhanced file processing logic to accommodate the new messageId structure. * feat: enhance file retrieval methods to support user-uploaded execute_code files - Added a new method `getUserCodeFiles` to retrieve user-uploaded execute_code files, excluding code-generated files. - Updated existing file retrieval methods to improve filtering logic and handle edge cases. - Enhanced thread data extraction to collect both message IDs and file IDs efficiently. - Integrated `getUserCodeFiles` into relevant endpoints for better file management in conversations. * chore: update @librechat/agents package version to 3.0.78 in package-lock.json and related package.json files * refactor: file processing and retrieval logic - Added a fallback mechanism for download URLs when files exceed size limits or cannot be processed locally. - Implemented a deduplication strategy for code-generated files based on conversationId and filename to optimize storage. - Updated file retrieval methods to ensure proper filtering by messageIds, preventing orphaned files from being included. - Introduced comprehensive tests for new thread data extraction functionality, covering edge cases and performance considerations. * fix: improve file retrieval tests and handling of optional properties - Updated tests to safely access optional properties using non-null assertions. - Modified test descriptions for clarity regarding the exclusion of execute_code files. - Ensured that the retrieval logic correctly reflects the expected outcomes for file queries. * test: add comprehensive unit tests for processCodeOutput functionality - Introduced a new test suite for the processCodeOutput function, covering various scenarios including file retrieval, creation, and processing for both image and non-image files. - Implemented mocks for dependencies such as axios, logger, and file models to isolate tests and ensure reliable outcomes. - Validated behavior for existing files, new file creation, and error handling, including size limits and fallback mechanisms. - Enhanced test coverage for metadata handling and usage increment logic, ensuring robust verification of file processing outcomes. * test: enhance file size limit enforcement in processCodeOutput tests - Introduced a configurable file size limit for tests to improve flexibility and coverage. - Mocked the `librechat-data-provider` to allow dynamic adjustment of file size limits during tests. - Updated the file size limit enforcement test to validate behavior when files exceed specified limits, ensuring proper fallback to download URLs. - Reset file size limit after tests to maintain isolation for subsequent test cases. 2026-01-16 10:06:24 -05:00
			`/** Minimal message shape for thread traversal */`
			`type ThreadMessage = {`
			`messageId: string;`
			`parentMessageId?: string \| null;`
			`files?: Array<{ file_id?: string }>;`
			`};`

			`/** Result of thread data extraction */`
			`export type ThreadData = {`
			`messageIds: string[];`
			`fileIds: string[];`
			`};`

			`/**`
			`* Extracts thread message IDs and file IDs in a single O(n) pass.`
			`* Builds a Map for O(1) lookups, then traverses the thread collecting both IDs.`
			`*`
			`* @param messages - All messages in the conversation (should be queried with select for efficiency)`
			`* @param parentMessageId - The ID of the parent message to start traversal from`
			`* @returns Object containing messageIds and fileIds arrays`
			`*/`
			`export function getThreadData(`
			`messages: ThreadMessage[],`
			`parentMessageId: string \| null \| undefined,`
			`): ThreadData {`
			`const result: ThreadData = { messageIds: [], fileIds: [] };`

			`if (!messages \|\| messages.length === 0 \|\| !parentMessageId) {`
			`return result;`
			`}`

			`/** Build Map for O(1) lookups instead of O(n) .find() calls */`
			`const messageMap = new Map<string, ThreadMessage>();`
			`for (const msg of messages) {`
			`messageMap.set(msg.messageId, msg);`
			`}`

			`const fileIdSet = new Set<string>();`
			`const visitedIds = new Set<string>();`
			`let currentId: string \| null \| undefined = parentMessageId;`

			`/** Single traversal: collect message IDs and file IDs together */`
			`while (currentId) {`
			`if (visitedIds.has(currentId)) {`
			`break;`
			`}`
			`visitedIds.add(currentId);`

			`const message = messageMap.get(currentId);`
			`if (!message) {`
			`break;`
			`}`

			`result.messageIds.push(message.messageId);`

			`/** Collect file IDs from this message */`
			`if (message.files) {`
			`for (const file of message.files) {`
			`if (file.file_id) {`
			`fileIdSet.add(file.file_id);`
			`}`
			`}`
			`}`

			`currentId = message.parentMessageId === Constants.NO_PARENT ? null : message.parentMessageId;`
			`}`

			`result.fileIds = Array.from(fileIdSet);`
			`return result;`
			`}`