🗂️ feat: Better Persistence for Code Execution Files Between Sessions (#11362)

* refactor: process code output files for re-use (WIP) * feat: file attachment handling with additional metadata for downloads * refactor: Update directory path logic for local file saving based on basePath * refactor: file attachment handling to support TFile type and improve data merging logic * feat: thread filtering of code-generated files - Introduced parentMessageId parameter in addedConvo and initialize functions to enhance thread management. - Updated related methods to utilize parentMessageId for retrieving messages and filtering code-generated files by conversation threads. - Enhanced type definitions to include parentMessageId in relevant interfaces for better clarity and usage. * chore: imports/params ordering * feat: update file model to use messageId for filtering and processing - Changed references from 'message' to 'messageId' in file-related methods for consistency. - Added messageId field to the file schema and updated related types. - Enhanced file processing logic to accommodate the new messageId structure. * feat: enhance file retrieval methods to support user-uploaded execute_code files - Added a new method `getUserCodeFiles` to retrieve user-uploaded execute_code files, excluding code-generated files. - Updated existing file retrieval methods to improve filtering logic and handle edge cases. - Enhanced thread data extraction to collect both message IDs and file IDs efficiently. - Integrated `getUserCodeFiles` into relevant endpoints for better file management in conversations. * chore: update @librechat/agents package version to 3.0.78 in package-lock.json and related package.json files * refactor: file processing and retrieval logic - Added a fallback mechanism for download URLs when files exceed size limits or cannot be processed locally. - Implemented a deduplication strategy for code-generated files based on conversationId and filename to optimize storage. - Updated file retrieval methods to ensure proper filtering by messageIds, preventing orphaned files from being included. - Introduced comprehensive tests for new thread data extraction functionality, covering edge cases and performance considerations. * fix: improve file retrieval tests and handling of optional properties - Updated tests to safely access optional properties using non-null assertions. - Modified test descriptions for clarity regarding the exclusion of execute_code files. - Ensured that the retrieval logic correctly reflects the expected outcomes for file queries. * test: add comprehensive unit tests for processCodeOutput functionality - Introduced a new test suite for the processCodeOutput function, covering various scenarios including file retrieval, creation, and processing for both image and non-image files. - Implemented mocks for dependencies such as axios, logger, and file models to isolate tests and ensure reliable outcomes. - Validated behavior for existing files, new file creation, and error handling, including size limits and fallback mechanisms. - Enhanced test coverage for metadata handling and usage increment logic, ensuring robust verification of file processing outcomes. * test: enhance file size limit enforcement in processCodeOutput tests - Introduced a configurable file size limit for tests to improve flexibility and coverage. - Mocked the `librechat-data-provider` to allow dynamic adjustment of file size limits during tests. - Updated the file size limit enforcement test to validate behavior when files exceed specified limits, ensuring proper fallback to download URLs. - Reset file size limit after tests to maintain isolation for subsequent test cases.
2026-01-20 09:16:13 +01:00 · 2026-01-16 10:06:24 -05:00 · 2026-01-16 10:06:24 -05:00 · cc32895d13
commit cc32895d13
parent fe32cbedf9
22 changed files with 1364 additions and 83 deletions
--- a/api/models/File.js
+++ b/api/models/File.js
@ -26,7 +26,8 @@ const getFiles = async (filter, _sortOptions, selectFields = { text: 0 }) => {
 };

 /**
- * Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs
+ * Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs.
+ * Note: execute_code files are handled separately by getCodeGeneratedFiles.
 * @param {string[]} fileIds - Array of file_id strings to search for
 * @param {Set<EToolResources>} toolResourceSet - Optional filter for tool resources
 * @returns {Promise<Array<MongoFile>>} Files that match the criteria
@ -37,21 +38,25 @@ const getToolFilesByIds = async (fileIds, toolResourceSet) => {
  }

  try {
-    const filter = {
-      file_id: { $in: fileIds },
-      $or: [],
-    };
+    const orConditions = [];

    if (toolResourceSet.has(EToolResources.context)) {
-      filter.$or.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
+      orConditions.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
    }
    if (toolResourceSet.has(EToolResources.file_search)) {
-      filter.$or.push({ embedded: true });
+      orConditions.push({ embedded: true });
    }
-    if (toolResourceSet.has(EToolResources.execute_code)) {
-      filter.$or.push({ 'metadata.fileIdentifier': { $exists: true } });
+
+    if (orConditions.length === 0) {
+      return [];
    }

+    const filter = {
+      file_id: { $in: fileIds },
+      context: { $ne: FileContext.execute_code }, // Exclude code-generated files
+      $or: orConditions,
+    };
+
    const selectFields = { text: 0 };
    const sortOptions = { updatedAt: -1 };

@ -62,6 +67,70 @@ const getToolFilesByIds = async (fileIds, toolResourceSet) => {
  }
 };

+/**
+ * Retrieves files generated by code execution for a given conversation.
+ * These files are stored locally with fileIdentifier metadata for code env re-upload.
+ * @param {string} conversationId - The conversation ID to search for
+ * @param {string[]} [messageIds] - Optional array of messageIds to filter by (for linear thread filtering)
+ * @returns {Promise<Array<MongoFile>>} Files generated by code execution in the conversation
+ */
+const getCodeGeneratedFiles = async (conversationId, messageIds) => {
+  if (!conversationId) {
+    return [];
+  }
+
+  /** messageIds are required for proper thread filtering of code-generated files */
+  if (!messageIds || messageIds.length === 0) {
+    return [];
+  }
+
+  try {
+    const filter = {
+      conversationId,
+      context: FileContext.execute_code,
+      messageId: { $exists: true, $in: messageIds },
+      'metadata.fileIdentifier': { $exists: true },
+    };
+
+    const selectFields = { text: 0 };
+    const sortOptions = { createdAt: 1 };
+
+    return await getFiles(filter, sortOptions, selectFields);
+  } catch (error) {
+    logger.error('[getCodeGeneratedFiles] Error retrieving code generated files:', error);
+    return [];
+  }
+};
+
+/**
+ * Retrieves user-uploaded execute_code files (not code-generated) by their file IDs.
+ * These are files with fileIdentifier metadata but context is NOT execute_code (e.g., agents or message_attachment).
+ * File IDs should be collected from message.files arrays in the current thread.
+ * @param {string[]} fileIds - Array of file IDs to fetch (from message.files in the thread)
+ * @returns {Promise<Array<MongoFile>>} User-uploaded execute_code files
+ */
+const getUserCodeFiles = async (fileIds) => {
+  if (!fileIds || fileIds.length === 0) {
+    return [];
+  }
+
+  try {
+    const filter = {
+      file_id: { $in: fileIds },
+      context: { $ne: FileContext.execute_code },
+      'metadata.fileIdentifier': { $exists: true },
+    };
+
+    const selectFields = { text: 0 };
+    const sortOptions = { createdAt: 1 };
+
+    return await getFiles(filter, sortOptions, selectFields);
+  } catch (error) {
+    logger.error('[getUserCodeFiles] Error retrieving user code files:', error);
+    return [];
+  }
+};
+
 /**
 * Creates a new file with a TTL of 1 hour.
 * @param {MongoFile} data - The file data to be created, must contain file_id.
@ -169,6 +238,8 @@ module.exports = {
  findFileById,
  getFiles,
  getToolFilesByIds,
+  getCodeGeneratedFiles,
+  getUserCodeFiles,
  createFile,
  updateFile,
  updateFileUsage,