🗂️ feat: Better Persistence for Code Execution Files Between Sessions (#11362)

* refactor: process code output files for re-use (WIP) * feat: file attachment handling with additional metadata for downloads * refactor: Update directory path logic for local file saving based on basePath * refactor: file attachment handling to support TFile type and improve data merging logic * feat: thread filtering of code-generated files - Introduced parentMessageId parameter in addedConvo and initialize functions to enhance thread management. - Updated related methods to utilize parentMessageId for retrieving messages and filtering code-generated files by conversation threads. - Enhanced type definitions to include parentMessageId in relevant interfaces for better clarity and usage. * chore: imports/params ordering * feat: update file model to use messageId for filtering and processing - Changed references from 'message' to 'messageId' in file-related methods for consistency. - Added messageId field to the file schema and updated related types. - Enhanced file processing logic to accommodate the new messageId structure. * feat: enhance file retrieval methods to support user-uploaded execute_code files - Added a new method `getUserCodeFiles` to retrieve user-uploaded execute_code files, excluding code-generated files. - Updated existing file retrieval methods to improve filtering logic and handle edge cases. - Enhanced thread data extraction to collect both message IDs and file IDs efficiently. - Integrated `getUserCodeFiles` into relevant endpoints for better file management in conversations. * chore: update @librechat/agents package version to 3.0.78 in package-lock.json and related package.json files * refactor: file processing and retrieval logic - Added a fallback mechanism for download URLs when files exceed size limits or cannot be processed locally. - Implemented a deduplication strategy for code-generated files based on conversationId and filename to optimize storage. - Updated file retrieval methods to ensure proper filtering by messageIds, preventing orphaned files from being included. - Introduced comprehensive tests for new thread data extraction functionality, covering edge cases and performance considerations. * fix: improve file retrieval tests and handling of optional properties - Updated tests to safely access optional properties using non-null assertions. - Modified test descriptions for clarity regarding the exclusion of execute_code files. - Ensured that the retrieval logic correctly reflects the expected outcomes for file queries. * test: add comprehensive unit tests for processCodeOutput functionality - Introduced a new test suite for the processCodeOutput function, covering various scenarios including file retrieval, creation, and processing for both image and non-image files. - Implemented mocks for dependencies such as axios, logger, and file models to isolate tests and ensure reliable outcomes. - Validated behavior for existing files, new file creation, and error handling, including size limits and fallback mechanisms. - Enhanced test coverage for metadata handling and usage increment logic, ensuring robust verification of file processing outcomes. * test: enhance file size limit enforcement in processCodeOutput tests - Introduced a configurable file size limit for tests to improve flexibility and coverage. - Mocked the `librechat-data-provider` to allow dynamic adjustment of file size limits during tests. - Updated the file size limit enforcement test to validate behavior when files exceed specified limits, ensuring proper fallback to download URLs. - Reset file size limit after tests to maintain isolation for subsequent test cases.
2026-03-06 00:00:18 +01:00 · 2026-01-16 10:06:24 -05:00 · 2026-01-16 10:06:24 -05:00 · cc32895d13
commit cc32895d13
parent fe32cbedf9
22 changed files with 1364 additions and 83 deletions
--- a/packages/data-schemas/src/methods/file.spec.ts
+++ b/packages/data-schemas/src/methods/file.spec.ts
@ -130,7 +130,7 @@ describe('File Methods', () => {

      const files = await fileMethods.getFiles({ user: userId });
      expect(files).toHaveLength(3);
-      expect(files.map((f) => f.file_id)).toEqual(expect.arrayContaining(fileIds));
+      expect(files!.map((f) => f.file_id)).toEqual(expect.arrayContaining(fileIds));
    });

    it('should exclude text field by default', async () => {
@ -149,7 +149,7 @@ describe('File Methods', () => {

      const files = await fileMethods.getFiles({ file_id: fileId });
      expect(files).toHaveLength(1);
-      expect(files[0].text).toBeUndefined();
+      expect(files![0].text).toBeUndefined();
    });
  });

@ -207,7 +207,7 @@ describe('File Methods', () => {
      expect(files[0].file_id).toBe(contextFileId);
    });

-    it('should retrieve files for execute_code tool', async () => {
+    it('should not retrieve execute_code files (handled by getCodeGeneratedFiles)', async () => {
      const userId = new mongoose.Types.ObjectId();
      const codeFileId = uuidv4();

@ -218,14 +218,16 @@ describe('File Methods', () => {
        filepath: '/uploads/code.py',
        type: 'text/x-python',
        bytes: 100,
+        context: FileContext.execute_code,
        metadata: { fileIdentifier: 'some-identifier' },
      });

+      // execute_code files are explicitly excluded from getToolFilesByIds
+      // They are retrieved via getCodeGeneratedFiles and getUserCodeFiles instead
      const toolSet = new Set([EToolResources.execute_code]);
      const files = await fileMethods.getToolFilesByIds([codeFileId], toolSet);

-      expect(files).toHaveLength(1);
-      expect(files[0].file_id).toBe(codeFileId);
+      expect(files).toHaveLength(0);
    });
  });

@ -490,7 +492,7 @@ describe('File Methods', () => {

      const remaining = await fileMethods.getFiles({});
      expect(remaining).toHaveLength(1);
-      expect(remaining[0].user?.toString()).toBe(otherUserId.toString());
+      expect(remaining![0].user?.toString()).toBe(otherUserId.toString());
    });
  });

--- a/packages/data-schemas/src/methods/file.ts
+++ b/packages/data-schemas/src/methods/file.ts
@ -47,7 +47,8 @@ export function createFileMethods(mongoose: typeof import('mongoose')) {
  }

  /**
-   * Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs
+   * Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs.
+   * Note: execute_code files are handled separately by getCodeGeneratedFiles.
   * @param fileIds - Array of file_id strings to search for
   * @param toolResourceSet - Optional filter for tool resources
   * @returns Files that match the criteria
@ -61,21 +62,26 @@ export function createFileMethods(mongoose: typeof import('mongoose')) {
    }

    try {
-      const filter: FilterQuery<IMongoFile> = {
-        file_id: { $in: fileIds },
-        $or: [],
-      };
+      const orConditions: FilterQuery<IMongoFile>[] = [];

      if (toolResourceSet.has(EToolResources.context)) {
-        filter.$or?.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
+        orConditions.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
      }
      if (toolResourceSet.has(EToolResources.file_search)) {
-        filter.$or?.push({ embedded: true });
+        orConditions.push({ embedded: true });
      }
-      if (toolResourceSet.has(EToolResources.execute_code)) {
-        filter.$or?.push({ 'metadata.fileIdentifier': { $exists: true } });
+
+      // If no conditions to match, return empty
+      if (orConditions.length === 0) {
+        return [];
      }

+      const filter: FilterQuery<IMongoFile> = {
+        file_id: { $in: fileIds },
+        context: { $ne: FileContext.execute_code },
+        $or: orConditions,
+      };
+
      const selectFields: SelectProjection = { text: 0 };
      const sortOptions = { updatedAt: -1 as SortOrder };

@ -87,6 +93,84 @@ export function createFileMethods(mongoose: typeof import('mongoose')) {
    }
  }

+  /**
+   * Retrieves files generated by code execution for a given conversation.
+   * These files are stored locally with fileIdentifier metadata for code env re-upload.
+   *
+   * @param conversationId - The conversation ID to search for
+   * @param messageIds - Array of messageIds to filter by (for linear thread filtering).
+   *   While technically optional, this function returns empty if not provided.
+   *   This is intentional: code-generated files must be filtered by thread to avoid
+   *   including files from other branches of a conversation.
+   * @returns Files generated by code execution in the conversation, filtered by messageIds
+   */
+  async function getCodeGeneratedFiles(
+    conversationId: string,
+    messageIds?: string[],
+  ): Promise<IMongoFile[]> {
+    if (!conversationId) {
+      return [];
+    }
+
+    /**
+     * Return early if messageIds not provided - this is intentional behavior.
+     * Code-generated files must be filtered by thread messageIds to ensure we only
+     * return files relevant to the current conversation branch, not orphaned files
+     * from other branches or deleted messages.
+     */
+    if (!messageIds || messageIds.length === 0) {
+      return [];
+    }
+
+    try {
+      const filter: FilterQuery<IMongoFile> = {
+        conversationId,
+        context: FileContext.execute_code,
+        messageId: { $exists: true, $in: messageIds },
+        'metadata.fileIdentifier': { $exists: true },
+      };
+
+      const selectFields: SelectProjection = { text: 0 };
+      const sortOptions = { createdAt: 1 as SortOrder };
+
+      const results = await getFiles(filter, sortOptions, selectFields);
+      return results ?? [];
+    } catch (error) {
+      logger.error('[getCodeGeneratedFiles] Error retrieving code generated files:', error);
+      return [];
+    }
+  }
+
+  /**
+   * Retrieves user-uploaded execute_code files (not code-generated) by their file IDs.
+   * These are files with fileIdentifier metadata but context is NOT execute_code (e.g., agents or message_attachment).
+   * File IDs should be collected from message.files arrays in the current thread.
+   * @param fileIds - Array of file IDs to fetch (from message.files in the thread)
+   * @returns User-uploaded execute_code files
+   */
+  async function getUserCodeFiles(fileIds?: string[]): Promise<IMongoFile[]> {
+    if (!fileIds || fileIds.length === 0) {
+      return [];
+    }
+
+    try {
+      const filter: FilterQuery<IMongoFile> = {
+        file_id: { $in: fileIds },
+        context: { $ne: FileContext.execute_code },
+        'metadata.fileIdentifier': { $exists: true },
+      };
+
+      const selectFields: SelectProjection = { text: 0 };
+      const sortOptions = { createdAt: 1 as SortOrder };
+
+      const results = await getFiles(filter, sortOptions, selectFields);
+      return results ?? [];
+    } catch (error) {
+      logger.error('[getUserCodeFiles] Error retrieving user code files:', error);
+      return [];
+    }
+  }
+
  /**
   * Creates a new file with a TTL of 1 hour.
   * @param data - The file data to be created, must contain file_id
@ -258,6 +342,8 @@ export function createFileMethods(mongoose: typeof import('mongoose')) {
    findFileById,
    getFiles,
    getToolFilesByIds,
+    getCodeGeneratedFiles,
+    getUserCodeFiles,
    createFile,
    updateFile,
    updateFileUsage,
--- a/packages/data-schemas/src/schema/file.ts
+++ b/packages/data-schemas/src/schema/file.ts
@ -15,6 +15,10 @@ const file: Schema<IMongoFile> = new Schema(
      ref: 'Conversation',
      index: true,
    },
+    messageId: {
+      type: String,
+      index: true,
+    },
    file_id: {
      type: String,
      index: true,
--- a/packages/data-schemas/src/types/file.ts
+++ b/packages/data-schemas/src/types/file.ts
@ -3,6 +3,7 @@ import { Document, Types } from 'mongoose';
 export interface IMongoFile extends Omit<Document, 'model'> {
  user: Types.ObjectId;
  conversationId?: string;
+  messageId?: string;
  file_id: string;
  temp_file_id?: string;
  bytes: number;