🆔 fix: Atomic File Dedupe, Bedrock Tokens Fix, and Allowed MIME Types (#11675)

* feat: Add support for Apache Parquet MIME types - Introduced 'application/x-parquet' to the full MIME types list and code interpreter MIME types list. - Updated application MIME types regex to include 'x-parquet' and 'vnd.apache.parquet'. - Added mapping for '.parquet' files to 'application/x-parquet' in code type mapping, enhancing file format support. * feat: Implement atomic file claiming for code execution outputs - Added a new `claimCodeFile` function to atomically claim a file_id for code execution outputs, preventing duplicates by using a compound key of filename and conversationId. - Updated `processCodeOutput` to utilize the new claiming mechanism, ensuring that concurrent calls for the same filename converge on a single record. - Refactored related tests to validate the new atomic claiming behavior and its impact on file usage tracking and versioning. * fix: Update image file handling to use cache-busting filepath - Modified the `processCodeOutput` function to generate a cache-busting filepath for updated image files, improving browser caching behavior. - Adjusted related tests to reflect the change from versioned filenames to cache-busted filepaths, ensuring accurate validation of image updates. * fix: Update step handler to prevent undefined content for non-tool call types - Modified the condition in useStepHandler to ensure that undefined content is only assigned for specific content types, enhancing the robustness of content handling. * fix: Update bedrockOutputParser to handle maxTokens for adaptive models - Modified the bedrockOutputParser logic to ensure that maxTokens is not set for adaptive models when neither maxTokens nor maxOutputTokens are provided, improving the handling of adaptive thinking configurations. - Updated related tests to reflect these changes, ensuring accurate validation of the output for adaptive models. * chore: Update @librechat/agents to version 3.1.38 in package.json and package-lock.json * fix: Enhance file claiming and error handling in code processing - Updated the `processCodeOutput` function to use a consistent file ID for claiming files, preventing duplicates and improving concurrency handling. - Refactored the `createFileMethods` to include error handling for failed file claims, ensuring robust behavior when claiming files for conversations. - These changes enhance the reliability of file management in the application. * fix: Update adaptive thinking test for Opus 4.6 model - Modified the test for configuring adaptive thinking to reflect that no default maxTokens should be set for the Opus 4.6 model. - Updated assertions to ensure that maxTokens is undefined, aligning with the expected behavior for adaptive models.
2026-02-17 16:08:10 +01:00 · 2026-02-07 13:26:18 -05:00 · 2026-02-07 13:26:18 -05:00 · 9054ca9c15
commit 9054ca9c15
parent a771d70b10
12 changed files with 130 additions and 126 deletions
--- a/api/server/services/Files/Code/process.js
+++ b/api/server/services/Files/Code/process.js
@ -18,9 +18,9 @@ const {
  getEndpointFileConfig,
 } = require('librechat-data-provider');
 const { filterFilesByAgentAccess } = require('~/server/services/Files/permissions');
+const { createFile, getFiles, updateFile, claimCodeFile } = require('~/models');
 const { getStrategyFunctions } = require('~/server/services/Files/strategies');
 const { convertImage } = require('~/server/services/Files/images/convert');
-const { createFile, getFiles, updateFile } = require('~/models');
 const { determineFileType } = require('~/server/utils');

 /**
@ -56,50 +56,6 @@ const createDownloadFallback = ({
  };
 };

-/**
- * Find an existing code-generated file by filename in the conversation.
- * Used to update existing files instead of creating duplicates.
- *
- * ## Deduplication Strategy
- *
- * Files are deduplicated by `(conversationId, filename)` - NOT including `messageId`.
- * This is an intentional design decision to handle iterative code development patterns:
- *
- * **Rationale:**
- * - When users iteratively refine code (e.g., "regenerate that chart with red bars"),
- *   the same logical file (e.g., "chart.png") is produced multiple times
- * - Without deduplication, each iteration would create a new file, leading to storage bloat
- * - The latest version is what matters for re-upload to the code environment
- *
- * **Implications:**
- * - Different messages producing files with the same name will update the same file record
- * - The `messageId` field tracks which message last updated the file
- * - The `usage` counter tracks how many times the file has been generated
- *
- * **Future Considerations:**
- * - If file versioning is needed, consider adding a `versions` array or separate version collection
- * - The current approach prioritizes storage efficiency over history preservation
- *
- * @param {string} filename - The filename to search for.
- * @param {string} conversationId - The conversation ID.
- * @returns {Promise<MongoFile | null>} The existing file or null.
- */
-const findExistingCodeFile = async (filename, conversationId) => {
-  if (!filename || !conversationId) {
-    return null;
-  }
-  const files = await getFiles(
-    {
-      filename,
-      conversationId,
-      context: FileContext.execute_code,
-    },
-    { createdAt: -1 },
-    { text: 0 },
-  );
-  return files?.[0] ?? null;
-};
-
 /**
 * Process code execution output files - downloads and saves both images and non-image files.
 * All files are saved to local storage with fileIdentifier metadata for code env re-upload.
@ -170,12 +126,19 @@ const processCodeOutput = async ({
    const fileIdentifier = `${session_id}/${id}`;

    /**
-     * Check for existing file with same filename in this conversation.
-     * If found, we'll update it instead of creating a duplicate.
+     * Atomically claim a file_id for this (filename, conversationId, context) tuple.
+     * Uses $setOnInsert so concurrent calls for the same filename converge on
+     * a single record instead of creating duplicates (TOCTOU race fix).
     */
-    const existingFile = await findExistingCodeFile(name, conversationId);
-    const file_id = existingFile?.file_id ?? v4();
-    const isUpdate = !!existingFile;
+    const newFileId = v4();
+    const claimed = await claimCodeFile({
+      filename: name,
+      conversationId,
+      file_id: newFileId,
+      user: req.user.id,
+    });
+    const file_id = claimed.file_id;
+    const isUpdate = file_id !== newFileId;

    if (isUpdate) {
      logger.debug(
@ -184,27 +147,29 @@ const processCodeOutput = async ({
    }

    if (isImage) {
+      const usage = isUpdate ? (claimed.usage ?? 0) + 1 : 1;
      const _file = await convertImage(req, buffer, 'high', `${file_id}${fileExt}`);
+      const filepath = usage > 1 ? `${_file.filepath}?v=${Date.now()}` : _file.filepath;
      const file = {
        ..._file,
+        filepath,
        file_id,
        messageId,
-        usage: isUpdate ? (existingFile.usage ?? 0) + 1 : 1,
+        usage,
        filename: name,
        conversationId,
        user: req.user.id,
        type: `image/${appConfig.imageOutputType}`,
-        createdAt: isUpdate ? existingFile.createdAt : formattedDate,
+        createdAt: isUpdate ? claimed.createdAt : formattedDate,
        updatedAt: formattedDate,
        source: appConfig.fileStrategy,
        context: FileContext.execute_code,
        metadata: { fileIdentifier },
      };
-      createFile(file, true);
+      await createFile(file, true);
      return Object.assign(file, { messageId, toolCallId });
    }

-    // For non-image files, save to configured storage strategy
    const { saveBuffer } = getStrategyFunctions(appConfig.fileStrategy);
    if (!saveBuffer) {
      logger.warn(
@ -221,7 +186,6 @@ const processCodeOutput = async ({
      });
    }

-    // Determine MIME type from buffer or extension
    const detectedType = await determineFileType(buffer, true);
    const mimeType = detectedType?.mime || inferMimeType(name, '') || 'application/octet-stream';

@ -258,11 +222,11 @@ const processCodeOutput = async ({
      metadata: { fileIdentifier },
      source: appConfig.fileStrategy,
      context: FileContext.execute_code,
-      usage: isUpdate ? (existingFile.usage ?? 0) + 1 : 1,
-      createdAt: isUpdate ? existingFile.createdAt : formattedDate,
+      usage: isUpdate ? (claimed.usage ?? 0) + 1 : 1,
+      createdAt: isUpdate ? claimed.createdAt : formattedDate,
    };

-    createFile(file, true);
+    await createFile(file, true);
    return Object.assign(file, { messageId, toolCallId });
  } catch (error) {
    logAxiosError({
--- a/api/server/services/Files/Code/process.spec.js
+++ b/api/server/services/Files/Code/process.spec.js
@ -61,10 +61,12 @@ jest.mock('@librechat/api', () => ({
 }));

 // Mock models
+const mockClaimCodeFile = jest.fn();
 jest.mock('~/models', () => ({
-  createFile: jest.fn(),
+  createFile: jest.fn().mockResolvedValue({}),
  getFiles: jest.fn(),
  updateFile: jest.fn(),
+  claimCodeFile: (...args) => mockClaimCodeFile(...args),
 }));

 // Mock permissions (must be before process.js import)
@ -119,7 +121,11 @@ describe('Code Process', () => {

  beforeEach(() => {
    jest.clearAllMocks();
-    // Default mock implementations
+    // Default mock: atomic claim returns a new file record (no existing file)
+    mockClaimCodeFile.mockResolvedValue({
+      file_id: 'mock-uuid-1234',
+      user: 'user-123',
+    });
    getFiles.mockResolvedValue(null);
    createFile.mockResolvedValue({});
    getStrategyFunctions.mockReturnValue({
@ -128,67 +134,46 @@ describe('Code Process', () => {
    determineFileType.mockResolvedValue({ mime: 'text/plain' });
  });

-  describe('findExistingCodeFile (via processCodeOutput)', () => {
-    it('should find existing file by filename and conversationId', async () => {
-      const existingFile = {
+  describe('atomic file claim (via processCodeOutput)', () => {
+    it('should reuse file_id from existing record via atomic claim', async () => {
+      mockClaimCodeFile.mockResolvedValue({
        file_id: 'existing-file-id',
        filename: 'test-file.txt',
        usage: 2,
        createdAt: '2024-01-01T00:00:00.000Z',
-      };
-      getFiles.mockResolvedValue([existingFile]);
+      });

      const smallBuffer = Buffer.alloc(100);
      axios.mockResolvedValue({ data: smallBuffer });

      const result = await processCodeOutput(baseParams);

-      // Verify getFiles was called with correct deduplication query
-      expect(getFiles).toHaveBeenCalledWith(
-        {
-          filename: 'test-file.txt',
-          conversationId: 'conv-123',
-          context: FileContext.execute_code,
-        },
-        { createdAt: -1 },
-        { text: 0 },
-      );
+      expect(mockClaimCodeFile).toHaveBeenCalledWith({
+        filename: 'test-file.txt',
+        conversationId: 'conv-123',
+        file_id: 'mock-uuid-1234',
+        user: 'user-123',
+      });

-      // Verify the existing file_id was reused
      expect(result.file_id).toBe('existing-file-id');
-      // Verify usage was incremented
      expect(result.usage).toBe(3);
-      // Verify original createdAt was preserved
      expect(result.createdAt).toBe('2024-01-01T00:00:00.000Z');
    });

    it('should create new file when no existing file found', async () => {
-      getFiles.mockResolvedValue(null);
+      mockClaimCodeFile.mockResolvedValue({
+        file_id: 'mock-uuid-1234',
+        user: 'user-123',
+      });

      const smallBuffer = Buffer.alloc(100);
      axios.mockResolvedValue({ data: smallBuffer });

      const result = await processCodeOutput(baseParams);

-      // Should use the mocked uuid
      expect(result.file_id).toBe('mock-uuid-1234');
-      // Should have usage of 1 for new file
      expect(result.usage).toBe(1);
    });
-
-    it('should return null for invalid inputs (empty filename)', async () => {
-      const smallBuffer = Buffer.alloc(100);
-      axios.mockResolvedValue({ data: smallBuffer });
-
-      // The function handles this internally - with empty name
-      // findExistingCodeFile returns null early for empty filename (guard clause)
-      const result = await processCodeOutput({ ...baseParams, name: '' });
-
-      // getFiles should NOT be called due to early return in findExistingCodeFile
-      expect(getFiles).not.toHaveBeenCalled();
-      // A new file_id should be generated since no existing file was found
-      expect(result.file_id).toBe('mock-uuid-1234');
-    });
  });

  describe('processCodeOutput', () => {
@ -203,7 +188,6 @@ describe('Code Process', () => {
          bytes: 400,
        };
        convertImage.mockResolvedValue(convertedFile);
-        getFiles.mockResolvedValue(null);

        const result = await processCodeOutput(imageParams);

@ -218,23 +202,29 @@ describe('Code Process', () => {
        expect(result.filename).toBe('chart.png');
      });

-      it('should update existing image file and increment usage', async () => {
+      it('should update existing image file with cache-busted filepath', async () => {
        const imageParams = { ...baseParams, name: 'chart.png' };
-        const existingFile = {
+        mockClaimCodeFile.mockResolvedValue({
          file_id: 'existing-img-id',
          usage: 1,
          createdAt: '2024-01-01T00:00:00.000Z',
-        };
-        getFiles.mockResolvedValue([existingFile]);
+        });

        const imageBuffer = Buffer.alloc(500);
        axios.mockResolvedValue({ data: imageBuffer });
-        convertImage.mockResolvedValue({ filepath: '/uploads/img.webp' });
+        convertImage.mockResolvedValue({ filepath: '/images/user-123/existing-img-id.webp' });

        const result = await processCodeOutput(imageParams);

+        expect(convertImage).toHaveBeenCalledWith(
+          mockReq,
+          imageBuffer,
+          'high',
+          'existing-img-id.png',
+        );
        expect(result.file_id).toBe('existing-img-id');
        expect(result.usage).toBe(2);
+        expect(result.filepath).toMatch(/^\/images\/user-123\/existing-img-id\.webp\?v=\d+$/);
        expect(logger.debug).toHaveBeenCalledWith(
          expect.stringContaining('Updating existing file'),
        );
@ -335,7 +325,6 @@ describe('Code Process', () => {

    describe('usage counter increment', () => {
      it('should set usage to 1 for new files', async () => {
-        getFiles.mockResolvedValue(null);
        const smallBuffer = Buffer.alloc(100);
        axios.mockResolvedValue({ data: smallBuffer });

@ -345,8 +334,11 @@ describe('Code Process', () => {
      });

      it('should increment usage for existing files', async () => {
-        const existingFile = { file_id: 'existing-id', usage: 5, createdAt: '2024-01-01' };
-        getFiles.mockResolvedValue([existingFile]);
+        mockClaimCodeFile.mockResolvedValue({
+          file_id: 'existing-id',
+          usage: 5,
+          createdAt: '2024-01-01',
+        });
        const smallBuffer = Buffer.alloc(100);
        axios.mockResolvedValue({ data: smallBuffer });

@ -356,14 +348,15 @@ describe('Code Process', () => {
      });

      it('should handle existing file with undefined usage', async () => {
-        const existingFile = { file_id: 'existing-id', createdAt: '2024-01-01' };
-        getFiles.mockResolvedValue([existingFile]);
+        mockClaimCodeFile.mockResolvedValue({
+          file_id: 'existing-id',
+          createdAt: '2024-01-01',
+        });
        const smallBuffer = Buffer.alloc(100);
        axios.mockResolvedValue({ data: smallBuffer });

        const result = await processCodeOutput(baseParams);

-        // (undefined ?? 0) + 1 = 1
        expect(result.usage).toBe(1);
      });
    });