From cc32895d13819faf2f2a7ff024cd4e0a61326673 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Fri, 16 Jan 2026 10:06:24 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=97=82=EF=B8=8F=20feat:=20Better=20Persis?= =?UTF-8?q?tence=20for=20Code=20Execution=20Files=20Between=20Sessions=20(?= =?UTF-8?q?#11362)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: process code output files for re-use (WIP) * feat: file attachment handling with additional metadata for downloads * refactor: Update directory path logic for local file saving based on basePath * refactor: file attachment handling to support TFile type and improve data merging logic * feat: thread filtering of code-generated files - Introduced parentMessageId parameter in addedConvo and initialize functions to enhance thread management. - Updated related methods to utilize parentMessageId for retrieving messages and filtering code-generated files by conversation threads. - Enhanced type definitions to include parentMessageId in relevant interfaces for better clarity and usage. * chore: imports/params ordering * feat: update file model to use messageId for filtering and processing - Changed references from 'message' to 'messageId' in file-related methods for consistency. - Added messageId field to the file schema and updated related types. - Enhanced file processing logic to accommodate the new messageId structure. * feat: enhance file retrieval methods to support user-uploaded execute_code files - Added a new method `getUserCodeFiles` to retrieve user-uploaded execute_code files, excluding code-generated files. - Updated existing file retrieval methods to improve filtering logic and handle edge cases. - Enhanced thread data extraction to collect both message IDs and file IDs efficiently. - Integrated `getUserCodeFiles` into relevant endpoints for better file management in conversations. * chore: update @librechat/agents package version to 3.0.78 in package-lock.json and related package.json files * refactor: file processing and retrieval logic - Added a fallback mechanism for download URLs when files exceed size limits or cannot be processed locally. - Implemented a deduplication strategy for code-generated files based on conversationId and filename to optimize storage. - Updated file retrieval methods to ensure proper filtering by messageIds, preventing orphaned files from being included. - Introduced comprehensive tests for new thread data extraction functionality, covering edge cases and performance considerations. * fix: improve file retrieval tests and handling of optional properties - Updated tests to safely access optional properties using non-null assertions. - Modified test descriptions for clarity regarding the exclusion of execute_code files. - Ensured that the retrieval logic correctly reflects the expected outcomes for file queries. * test: add comprehensive unit tests for processCodeOutput functionality - Introduced a new test suite for the processCodeOutput function, covering various scenarios including file retrieval, creation, and processing for both image and non-image files. - Implemented mocks for dependencies such as axios, logger, and file models to isolate tests and ensure reliable outcomes. - Validated behavior for existing files, new file creation, and error handling, including size limits and fallback mechanisms. - Enhanced test coverage for metadata handling and usage increment logic, ensuring robust verification of file processing outcomes. * test: enhance file size limit enforcement in processCodeOutput tests - Introduced a configurable file size limit for tests to improve flexibility and coverage. - Mocked the `librechat-data-provider` to allow dynamic adjustment of file size limits during tests. - Updated the file size limit enforcement test to validate behavior when files exceed specified limits, ensuring proper fallback to download URLs. - Reset file size limit after tests to maintain isolation for subsequent test cases. --- api/models/File.js | 89 +++- api/package.json | 2 +- api/server/controllers/agents/client.js | 1 + .../services/Endpoints/agents/addedConvo.js | 6 + .../services/Endpoints/agents/initialize.js | 27 +- api/server/services/Files/Code/process.js | 248 +++++++++-- .../services/Files/Code/process.spec.js | 418 ++++++++++++++++++ api/server/services/Files/Local/crud.js | 7 +- .../Messages/Content/Parts/Attachment.tsx | 4 + .../Messages/Content/Parts/LogContent.tsx | 9 +- .../Chat/Messages/Content/Parts/LogLink.tsx | 47 +- client/src/hooks/SSE/useAttachmentHandler.ts | 28 +- package-lock.json | 10 +- packages/api/package.json | 2 +- packages/api/src/agents/initialize.ts | 72 ++- packages/api/src/utils/message.spec.ts | 275 +++++++++++- packages/api/src/utils/message.ts | 72 +++ packages/data-provider/src/file-config.ts | 7 + .../data-schemas/src/methods/file.spec.ts | 14 +- packages/data-schemas/src/methods/file.ts | 104 ++++- packages/data-schemas/src/schema/file.ts | 4 + packages/data-schemas/src/types/file.ts | 1 + 22 files changed, 1364 insertions(+), 83 deletions(-) create mode 100644 api/server/services/Files/Code/process.spec.js diff --git a/api/models/File.js b/api/models/File.js index 5e90c86fe4..1a01ef12f9 100644 --- a/api/models/File.js +++ b/api/models/File.js @@ -26,7 +26,8 @@ const getFiles = async (filter, _sortOptions, selectFields = { text: 0 }) => { }; /** - * Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs + * Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs. + * Note: execute_code files are handled separately by getCodeGeneratedFiles. * @param {string[]} fileIds - Array of file_id strings to search for * @param {Set} toolResourceSet - Optional filter for tool resources * @returns {Promise>} Files that match the criteria @@ -37,21 +38,25 @@ const getToolFilesByIds = async (fileIds, toolResourceSet) => { } try { - const filter = { - file_id: { $in: fileIds }, - $or: [], - }; + const orConditions = []; if (toolResourceSet.has(EToolResources.context)) { - filter.$or.push({ text: { $exists: true, $ne: null }, context: FileContext.agents }); + orConditions.push({ text: { $exists: true, $ne: null }, context: FileContext.agents }); } if (toolResourceSet.has(EToolResources.file_search)) { - filter.$or.push({ embedded: true }); + orConditions.push({ embedded: true }); } - if (toolResourceSet.has(EToolResources.execute_code)) { - filter.$or.push({ 'metadata.fileIdentifier': { $exists: true } }); + + if (orConditions.length === 0) { + return []; } + const filter = { + file_id: { $in: fileIds }, + context: { $ne: FileContext.execute_code }, // Exclude code-generated files + $or: orConditions, + }; + const selectFields = { text: 0 }; const sortOptions = { updatedAt: -1 }; @@ -62,6 +67,70 @@ const getToolFilesByIds = async (fileIds, toolResourceSet) => { } }; +/** + * Retrieves files generated by code execution for a given conversation. + * These files are stored locally with fileIdentifier metadata for code env re-upload. + * @param {string} conversationId - The conversation ID to search for + * @param {string[]} [messageIds] - Optional array of messageIds to filter by (for linear thread filtering) + * @returns {Promise>} Files generated by code execution in the conversation + */ +const getCodeGeneratedFiles = async (conversationId, messageIds) => { + if (!conversationId) { + return []; + } + + /** messageIds are required for proper thread filtering of code-generated files */ + if (!messageIds || messageIds.length === 0) { + return []; + } + + try { + const filter = { + conversationId, + context: FileContext.execute_code, + messageId: { $exists: true, $in: messageIds }, + 'metadata.fileIdentifier': { $exists: true }, + }; + + const selectFields = { text: 0 }; + const sortOptions = { createdAt: 1 }; + + return await getFiles(filter, sortOptions, selectFields); + } catch (error) { + logger.error('[getCodeGeneratedFiles] Error retrieving code generated files:', error); + return []; + } +}; + +/** + * Retrieves user-uploaded execute_code files (not code-generated) by their file IDs. + * These are files with fileIdentifier metadata but context is NOT execute_code (e.g., agents or message_attachment). + * File IDs should be collected from message.files arrays in the current thread. + * @param {string[]} fileIds - Array of file IDs to fetch (from message.files in the thread) + * @returns {Promise>} User-uploaded execute_code files + */ +const getUserCodeFiles = async (fileIds) => { + if (!fileIds || fileIds.length === 0) { + return []; + } + + try { + const filter = { + file_id: { $in: fileIds }, + context: { $ne: FileContext.execute_code }, + 'metadata.fileIdentifier': { $exists: true }, + }; + + const selectFields = { text: 0 }; + const sortOptions = { createdAt: 1 }; + + return await getFiles(filter, sortOptions, selectFields); + } catch (error) { + logger.error('[getUserCodeFiles] Error retrieving user code files:', error); + return []; + } +}; + /** * Creates a new file with a TTL of 1 hour. * @param {MongoFile} data - The file data to be created, must contain file_id. @@ -169,6 +238,8 @@ module.exports = { findFileById, getFiles, getToolFilesByIds, + getCodeGeneratedFiles, + getUserCodeFiles, createFile, updateFile, updateFileUsage, diff --git a/api/package.json b/api/package.json index 9ceb9b624c..2a60c8d0bb 100644 --- a/api/package.json +++ b/api/package.json @@ -45,7 +45,7 @@ "@google/genai": "^1.19.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.80", - "@librechat/agents": "^3.0.77", + "@librechat/agents": "^3.0.78", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index d3b76900dc..664a1727a8 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -633,6 +633,7 @@ class AgentClient extends BaseClient { updateFilesUsage: db.updateFilesUsage, getUserKeyValues: db.getUserKeyValues, getToolFilesByIds: db.getToolFilesByIds, + getCodeGeneratedFiles: db.getCodeGeneratedFiles, }, ); diff --git a/api/server/services/Endpoints/agents/addedConvo.js b/api/server/services/Endpoints/agents/addedConvo.js index 240622ed9f..7e9385267a 100644 --- a/api/server/services/Endpoints/agents/addedConvo.js +++ b/api/server/services/Endpoints/agents/addedConvo.js @@ -31,6 +31,7 @@ setGetAgent(getAgent); * @param {Function} params.loadTools - Function to load agent tools * @param {Array} params.requestFiles - Request files * @param {string} params.conversationId - The conversation ID + * @param {string} [params.parentMessageId] - The parent message ID for thread filtering * @param {Set} params.allowedProviders - Set of allowed providers * @param {Map} params.agentConfigs - Map of agent configs to add to * @param {string} params.primaryAgentId - The primary agent ID @@ -46,6 +47,7 @@ const processAddedConvo = async ({ loadTools, requestFiles, conversationId, + parentMessageId, allowedProviders, agentConfigs, primaryAgentId, @@ -91,6 +93,7 @@ const processAddedConvo = async ({ loadTools, requestFiles, conversationId, + parentMessageId, agent: addedAgent, endpointOption, allowedProviders, @@ -99,9 +102,12 @@ const processAddedConvo = async ({ getConvoFiles, getFiles: db.getFiles, getUserKey: db.getUserKey, + getMessages: db.getMessages, updateFilesUsage: db.updateFilesUsage, + getUserCodeFiles: db.getUserCodeFiles, getUserKeyValues: db.getUserKeyValues, getToolFilesByIds: db.getToolFilesByIds, + getCodeGeneratedFiles: db.getCodeGeneratedFiles, }, ); diff --git a/api/server/services/Endpoints/agents/initialize.js b/api/server/services/Endpoints/agents/initialize.js index fc054c1e6f..5e45cb6aa0 100644 --- a/api/server/services/Endpoints/agents/initialize.js +++ b/api/server/services/Endpoints/agents/initialize.js @@ -3,10 +3,10 @@ const { createContentAggregator } = require('@librechat/agents'); const { initializeAgent, validateAgentModel, - getCustomEndpointConfig, - createSequentialChainEdges, createEdgeCollector, filterOrphanedEdges, + getCustomEndpointConfig, + createSequentialChainEdges, } = require('@librechat/api'); const { EModelEndpoint, @@ -129,6 +129,8 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { const requestFiles = req.body.files ?? []; /** @type {string} */ const conversationId = req.body.conversationId; + /** @type {string | undefined} */ + const parentMessageId = req.body.parentMessageId; const primaryConfig = await initializeAgent( { @@ -137,6 +139,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { loadTools, requestFiles, conversationId, + parentMessageId, agent: primaryAgent, endpointOption, allowedProviders, @@ -146,9 +149,12 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { getConvoFiles, getFiles: db.getFiles, getUserKey: db.getUserKey, + getMessages: db.getMessages, updateFilesUsage: db.updateFilesUsage, getUserKeyValues: db.getUserKeyValues, + getUserCodeFiles: db.getUserCodeFiles, getToolFilesByIds: db.getToolFilesByIds, + getCodeGeneratedFiles: db.getCodeGeneratedFiles, }, ); @@ -188,6 +194,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { loadTools, requestFiles, conversationId, + parentMessageId, endpointOption, allowedProviders, }, @@ -195,9 +202,12 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { getConvoFiles, getFiles: db.getFiles, getUserKey: db.getUserKey, + getMessages: db.getMessages, updateFilesUsage: db.updateFilesUsage, getUserKeyValues: db.getUserKeyValues, + getUserCodeFiles: db.getUserCodeFiles, getToolFilesByIds: db.getToolFilesByIds, + getCodeGeneratedFiles: db.getCodeGeneratedFiles, }, ); if (userMCPAuthMap != null) { @@ -252,17 +262,18 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { const { userMCPAuthMap: updatedMCPAuthMap } = await processAddedConvo({ req, res, - endpointOption, - modelsConfig, - logViolation, loadTools, + logViolation, + modelsConfig, requestFiles, - conversationId, - allowedProviders, agentConfigs, - primaryAgentId: primaryConfig.id, primaryAgent, + endpointOption, userMCPAuthMap, + conversationId, + parentMessageId, + allowedProviders, + primaryAgentId: primaryConfig.id, }); if (updatedMCPAuthMap) { diff --git a/api/server/services/Files/Code/process.js b/api/server/services/Files/Code/process.js index 15df6de0d6..b7e7f56552 100644 --- a/api/server/services/Files/Code/process.js +++ b/api/server/services/Files/Code/process.js @@ -6,27 +6,112 @@ const { getCodeBaseURL } = require('@librechat/agents'); const { logAxiosError, getBasePath } = require('@librechat/api'); const { Tools, + megabyte, + fileConfig, FileContext, FileSources, imageExtRegex, + inferMimeType, EToolResources, + EModelEndpoint, + mergeFileConfig, + getEndpointFileConfig, } = require('librechat-data-provider'); const { filterFilesByAgentAccess } = require('~/server/services/Files/permissions'); const { getStrategyFunctions } = require('~/server/services/Files/strategies'); const { convertImage } = require('~/server/services/Files/images/convert'); const { createFile, getFiles, updateFile } = require('~/models'); +const { determineFileType } = require('~/server/utils'); /** - * Process OpenAI image files, convert to target format, save and return file metadata. + * Creates a fallback download URL response when file cannot be processed locally. + * Used when: file exceeds size limit, storage strategy unavailable, or download error occurs. + * @param {Object} params - The parameters. + * @param {string} params.name - The filename. + * @param {string} params.session_id - The code execution session ID. + * @param {string} params.id - The file ID from the code environment. + * @param {string} params.conversationId - The current conversation ID. + * @param {string} params.toolCallId - The tool call ID that generated the file. + * @param {string} params.messageId - The current message ID. + * @param {number} params.expiresAt - Expiration timestamp (24 hours from creation). + * @returns {Object} Fallback response with download URL. + */ +const createDownloadFallback = ({ + id, + name, + messageId, + expiresAt, + session_id, + toolCallId, + conversationId, +}) => { + const basePath = getBasePath(); + return { + filename: name, + filepath: `${basePath}/api/files/code/download/${session_id}/${id}`, + expiresAt, + conversationId, + toolCallId, + messageId, + }; +}; + +/** + * Find an existing code-generated file by filename in the conversation. + * Used to update existing files instead of creating duplicates. + * + * ## Deduplication Strategy + * + * Files are deduplicated by `(conversationId, filename)` - NOT including `messageId`. + * This is an intentional design decision to handle iterative code development patterns: + * + * **Rationale:** + * - When users iteratively refine code (e.g., "regenerate that chart with red bars"), + * the same logical file (e.g., "chart.png") is produced multiple times + * - Without deduplication, each iteration would create a new file, leading to storage bloat + * - The latest version is what matters for re-upload to the code environment + * + * **Implications:** + * - Different messages producing files with the same name will update the same file record + * - The `messageId` field tracks which message last updated the file + * - The `usage` counter tracks how many times the file has been generated + * + * **Future Considerations:** + * - If file versioning is needed, consider adding a `versions` array or separate version collection + * - The current approach prioritizes storage efficiency over history preservation + * + * @param {string} filename - The filename to search for. + * @param {string} conversationId - The conversation ID. + * @returns {Promise} The existing file or null. + */ +const findExistingCodeFile = async (filename, conversationId) => { + if (!filename || !conversationId) { + return null; + } + const files = await getFiles( + { + filename, + conversationId, + context: FileContext.execute_code, + }, + { createdAt: -1 }, + { text: 0 }, + ); + return files?.[0] ?? null; +}; + +/** + * Process code execution output files - downloads and saves both images and non-image files. + * All files are saved to local storage with fileIdentifier metadata for code env re-upload. * @param {ServerRequest} params.req - The Express request object. - * @param {string} params.id - The file ID. + * @param {string} params.id - The file ID from the code environment. * @param {string} params.name - The filename. * @param {string} params.apiKey - The code execution API key. * @param {string} params.toolCallId - The tool call ID that generated the file. * @param {string} params.session_id - The code execution session ID. * @param {string} params.conversationId - The current conversation ID. * @param {string} params.messageId - The current message ID. - * @returns {Promise} The file metadata or undefined if an error occurs. + * @returns {Promise} The file metadata or undefined if an error occurs. */ const processCodeOutput = async ({ req, @@ -41,19 +126,15 @@ const processCodeOutput = async ({ const appConfig = req.config; const currentDate = new Date(); const baseURL = getCodeBaseURL(); - const basePath = getBasePath(); - const fileExt = path.extname(name); - if (!fileExt || !imageExtRegex.test(name)) { - return { - filename: name, - filepath: `${basePath}/api/files/code/download/${session_id}/${id}`, - /** Note: expires 24 hours after creation */ - expiresAt: currentDate.getTime() + 86400000, - conversationId, - toolCallId, - messageId, - }; - } + const fileExt = path.extname(name).toLowerCase(); + const isImage = fileExt && imageExtRegex.test(name); + + const mergedFileConfig = mergeFileConfig(appConfig.fileConfig); + const endpointFileConfig = getEndpointFileConfig({ + fileConfig: mergedFileConfig, + endpoint: EModelEndpoint.agents, + }); + const fileSizeLimit = endpointFileConfig.fileSizeLimit ?? mergedFileConfig.serverFileSizeLimit; try { const formattedDate = currentDate.toISOString(); @@ -70,29 +151,135 @@ const processCodeOutput = async ({ const buffer = Buffer.from(response.data, 'binary'); - const file_id = v4(); - const _file = await convertImage(req, buffer, 'high', `${file_id}${fileExt}`); + // Enforce file size limit + if (buffer.length > fileSizeLimit) { + logger.warn( + `[processCodeOutput] File "${name}" (${(buffer.length / megabyte).toFixed(2)} MB) exceeds size limit of ${(fileSizeLimit / megabyte).toFixed(2)} MB, falling back to download URL`, + ); + return createDownloadFallback({ + id, + name, + messageId, + toolCallId, + session_id, + conversationId, + expiresAt: currentDate.getTime() + 86400000, + }); + } + + const fileIdentifier = `${session_id}/${id}`; + + /** + * Check for existing file with same filename in this conversation. + * If found, we'll update it instead of creating a duplicate. + */ + const existingFile = await findExistingCodeFile(name, conversationId); + const file_id = existingFile?.file_id ?? v4(); + const isUpdate = !!existingFile; + + if (isUpdate) { + logger.debug( + `[processCodeOutput] Updating existing file "${name}" (${file_id}) instead of creating duplicate`, + ); + } + + if (isImage) { + const _file = await convertImage(req, buffer, 'high', `${file_id}${fileExt}`); + const file = { + ..._file, + file_id, + messageId, + usage: isUpdate ? (existingFile.usage ?? 0) + 1 : 1, + filename: name, + conversationId, + user: req.user.id, + type: `image/${appConfig.imageOutputType}`, + createdAt: isUpdate ? existingFile.createdAt : formattedDate, + updatedAt: formattedDate, + source: appConfig.fileStrategy, + context: FileContext.execute_code, + metadata: { fileIdentifier }, + }; + createFile(file, true); + return Object.assign(file, { messageId, toolCallId }); + } + + // For non-image files, save to configured storage strategy + const { saveBuffer } = getStrategyFunctions(appConfig.fileStrategy); + if (!saveBuffer) { + logger.warn( + `[processCodeOutput] saveBuffer not available for strategy ${appConfig.fileStrategy}, falling back to download URL`, + ); + return createDownloadFallback({ + id, + name, + messageId, + toolCallId, + session_id, + conversationId, + expiresAt: currentDate.getTime() + 86400000, + }); + } + + // Determine MIME type from buffer or extension + const detectedType = await determineFileType(buffer, true); + const mimeType = detectedType?.mime || inferMimeType(name, '') || 'application/octet-stream'; + + /** Check MIME type support - for code-generated files, we're lenient but log unsupported types */ + const isSupportedMimeType = fileConfig.checkType( + mimeType, + endpointFileConfig.supportedMimeTypes, + ); + if (!isSupportedMimeType) { + logger.warn( + `[processCodeOutput] File "${name}" has unsupported MIME type "${mimeType}", proceeding with storage but may not be usable as tool resource`, + ); + } + + const fileName = `${file_id}__${name}`; + const filepath = await saveBuffer({ + userId: req.user.id, + buffer, + fileName, + basePath: 'uploads', + }); + const file = { - ..._file, file_id, - usage: 1, + filepath, + messageId, + object: 'file', filename: name, + type: mimeType, conversationId, user: req.user.id, - type: `image/${appConfig.imageOutputType}`, - createdAt: formattedDate, + bytes: buffer.length, updatedAt: formattedDate, + metadata: { fileIdentifier }, source: appConfig.fileStrategy, context: FileContext.execute_code, + usage: isUpdate ? (existingFile.usage ?? 0) + 1 : 1, + createdAt: isUpdate ? existingFile.createdAt : formattedDate, }; + createFile(file, true); - /** Note: `messageId` & `toolCallId` are not part of file DB schema; message object records associated file ID */ return Object.assign(file, { messageId, toolCallId }); } catch (error) { logAxiosError({ - message: 'Error downloading code environment file', + message: 'Error downloading/processing code environment file', error, }); + + // Fallback for download errors - return download URL so user can still manually download + return createDownloadFallback({ + id, + name, + messageId, + toolCallId, + session_id, + conversationId, + expiresAt: currentDate.getTime() + 86400000, + }); } }; @@ -204,9 +391,16 @@ const primeFiles = async (options, apiKey) => { if (!toolContext) { toolContext = `- Note: The following files are available in the "${Tools.execute_code}" tool environment:`; } - toolContext += `\n\t- /mnt/data/${file.filename}${ - agentResourceIds.has(file.file_id) ? '' : ' (just attached by user)' - }`; + + let fileSuffix = ''; + if (!agentResourceIds.has(file.file_id)) { + fileSuffix = + file.context === FileContext.execute_code + ? ' (from previous code execution)' + : ' (attached by user)'; + } + + toolContext += `\n\t- /mnt/data/${file.filename}${fileSuffix}`; files.push({ id, session_id, diff --git a/api/server/services/Files/Code/process.spec.js b/api/server/services/Files/Code/process.spec.js new file mode 100644 index 0000000000..7e15888876 --- /dev/null +++ b/api/server/services/Files/Code/process.spec.js @@ -0,0 +1,418 @@ +// Configurable file size limit for tests - use a getter so it can be changed per test +const fileSizeLimitConfig = { value: 20 * 1024 * 1024 }; // Default 20MB + +// Mock librechat-data-provider with configurable file size limit +jest.mock('librechat-data-provider', () => { + const actual = jest.requireActual('librechat-data-provider'); + return { + ...actual, + mergeFileConfig: jest.fn((config) => { + const merged = actual.mergeFileConfig(config); + // Override the serverFileSizeLimit with our test value + return { + ...merged, + get serverFileSizeLimit() { + return fileSizeLimitConfig.value; + }, + }; + }), + getEndpointFileConfig: jest.fn((options) => { + const config = actual.getEndpointFileConfig(options); + // Override fileSizeLimit with our test value + return { + ...config, + get fileSizeLimit() { + return fileSizeLimitConfig.value; + }, + }; + }), + }; +}); + +const { FileContext } = require('librechat-data-provider'); + +// Mock uuid +jest.mock('uuid', () => ({ + v4: jest.fn(() => 'mock-uuid-1234'), +})); + +// Mock axios +jest.mock('axios'); +const axios = require('axios'); + +// Mock logger +jest.mock('@librechat/data-schemas', () => ({ + logger: { + warn: jest.fn(), + debug: jest.fn(), + error: jest.fn(), + }, +})); + +// Mock getCodeBaseURL +jest.mock('@librechat/agents', () => ({ + getCodeBaseURL: jest.fn(() => 'https://code-api.example.com'), +})); + +// Mock logAxiosError and getBasePath +jest.mock('@librechat/api', () => ({ + logAxiosError: jest.fn(), + getBasePath: jest.fn(() => ''), +})); + +// Mock models +jest.mock('~/models', () => ({ + createFile: jest.fn(), + getFiles: jest.fn(), + updateFile: jest.fn(), +})); + +// Mock permissions (must be before process.js import) +jest.mock('~/server/services/Files/permissions', () => ({ + filterFilesByAgentAccess: jest.fn((options) => Promise.resolve(options.files)), +})); + +// Mock strategy functions +jest.mock('~/server/services/Files/strategies', () => ({ + getStrategyFunctions: jest.fn(), +})); + +// Mock convertImage +jest.mock('~/server/services/Files/images/convert', () => ({ + convertImage: jest.fn(), +})); + +// Mock determineFileType +jest.mock('~/server/utils', () => ({ + determineFileType: jest.fn(), +})); + +const { createFile, getFiles } = require('~/models'); +const { getStrategyFunctions } = require('~/server/services/Files/strategies'); +const { convertImage } = require('~/server/services/Files/images/convert'); +const { determineFileType } = require('~/server/utils'); +const { logger } = require('@librechat/data-schemas'); + +// Import after mocks +const { processCodeOutput } = require('./process'); + +describe('Code Process', () => { + const mockReq = { + user: { id: 'user-123' }, + config: { + fileConfig: {}, + fileStrategy: 'local', + imageOutputType: 'webp', + }, + }; + + const baseParams = { + req: mockReq, + id: 'file-id-123', + name: 'test-file.txt', + apiKey: 'test-api-key', + toolCallId: 'tool-call-123', + conversationId: 'conv-123', + messageId: 'msg-123', + session_id: 'session-123', + }; + + beforeEach(() => { + jest.clearAllMocks(); + // Default mock implementations + getFiles.mockResolvedValue(null); + createFile.mockResolvedValue({}); + getStrategyFunctions.mockReturnValue({ + saveBuffer: jest.fn().mockResolvedValue('/uploads/mock-file-path.txt'), + }); + determineFileType.mockResolvedValue({ mime: 'text/plain' }); + }); + + describe('findExistingCodeFile (via processCodeOutput)', () => { + it('should find existing file by filename and conversationId', async () => { + const existingFile = { + file_id: 'existing-file-id', + filename: 'test-file.txt', + usage: 2, + createdAt: '2024-01-01T00:00:00.000Z', + }; + getFiles.mockResolvedValue([existingFile]); + + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + + const result = await processCodeOutput(baseParams); + + // Verify getFiles was called with correct deduplication query + expect(getFiles).toHaveBeenCalledWith( + { + filename: 'test-file.txt', + conversationId: 'conv-123', + context: FileContext.execute_code, + }, + { createdAt: -1 }, + { text: 0 }, + ); + + // Verify the existing file_id was reused + expect(result.file_id).toBe('existing-file-id'); + // Verify usage was incremented + expect(result.usage).toBe(3); + // Verify original createdAt was preserved + expect(result.createdAt).toBe('2024-01-01T00:00:00.000Z'); + }); + + it('should create new file when no existing file found', async () => { + getFiles.mockResolvedValue(null); + + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + + const result = await processCodeOutput(baseParams); + + // Should use the mocked uuid + expect(result.file_id).toBe('mock-uuid-1234'); + // Should have usage of 1 for new file + expect(result.usage).toBe(1); + }); + + it('should return null for invalid inputs (empty filename)', async () => { + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + + // The function handles this internally - with empty name + // findExistingCodeFile returns null early for empty filename (guard clause) + const result = await processCodeOutput({ ...baseParams, name: '' }); + + // getFiles should NOT be called due to early return in findExistingCodeFile + expect(getFiles).not.toHaveBeenCalled(); + // A new file_id should be generated since no existing file was found + expect(result.file_id).toBe('mock-uuid-1234'); + }); + }); + + describe('processCodeOutput', () => { + describe('image file processing', () => { + it('should process image files using convertImage', async () => { + const imageParams = { ...baseParams, name: 'chart.png' }; + const imageBuffer = Buffer.alloc(500); + axios.mockResolvedValue({ data: imageBuffer }); + + const convertedFile = { + filepath: '/uploads/converted-image.webp', + bytes: 400, + }; + convertImage.mockResolvedValue(convertedFile); + getFiles.mockResolvedValue(null); + + const result = await processCodeOutput(imageParams); + + expect(convertImage).toHaveBeenCalledWith( + mockReq, + imageBuffer, + 'high', + 'mock-uuid-1234.png', + ); + expect(result.type).toBe('image/webp'); + expect(result.context).toBe(FileContext.execute_code); + expect(result.filename).toBe('chart.png'); + }); + + it('should update existing image file and increment usage', async () => { + const imageParams = { ...baseParams, name: 'chart.png' }; + const existingFile = { + file_id: 'existing-img-id', + usage: 1, + createdAt: '2024-01-01T00:00:00.000Z', + }; + getFiles.mockResolvedValue([existingFile]); + + const imageBuffer = Buffer.alloc(500); + axios.mockResolvedValue({ data: imageBuffer }); + convertImage.mockResolvedValue({ filepath: '/uploads/img.webp' }); + + const result = await processCodeOutput(imageParams); + + expect(result.file_id).toBe('existing-img-id'); + expect(result.usage).toBe(2); + expect(logger.debug).toHaveBeenCalledWith( + expect.stringContaining('Updating existing file'), + ); + }); + }); + + describe('non-image file processing', () => { + it('should process non-image files using saveBuffer', async () => { + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + + const mockSaveBuffer = jest.fn().mockResolvedValue('/uploads/saved-file.txt'); + getStrategyFunctions.mockReturnValue({ saveBuffer: mockSaveBuffer }); + determineFileType.mockResolvedValue({ mime: 'text/plain' }); + + const result = await processCodeOutput(baseParams); + + expect(mockSaveBuffer).toHaveBeenCalledWith({ + userId: 'user-123', + buffer: smallBuffer, + fileName: 'mock-uuid-1234__test-file.txt', + basePath: 'uploads', + }); + expect(result.type).toBe('text/plain'); + expect(result.filepath).toBe('/uploads/saved-file.txt'); + expect(result.bytes).toBe(100); + }); + + it('should detect MIME type from buffer', async () => { + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + determineFileType.mockResolvedValue({ mime: 'application/pdf' }); + + const result = await processCodeOutput({ ...baseParams, name: 'document.pdf' }); + + expect(determineFileType).toHaveBeenCalledWith(smallBuffer, true); + expect(result.type).toBe('application/pdf'); + }); + + it('should fallback to application/octet-stream for unknown types', async () => { + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + determineFileType.mockResolvedValue(null); + + const result = await processCodeOutput({ ...baseParams, name: 'unknown.xyz' }); + + expect(result.type).toBe('application/octet-stream'); + }); + }); + + describe('file size limit enforcement', () => { + it('should fallback to download URL when file exceeds size limit', async () => { + // Set a small file size limit for this test + fileSizeLimitConfig.value = 1000; // 1KB limit + + const largeBuffer = Buffer.alloc(5000); // 5KB - exceeds 1KB limit + axios.mockResolvedValue({ data: largeBuffer }); + + const result = await processCodeOutput(baseParams); + + expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining('exceeds size limit')); + expect(result.filepath).toContain('/api/files/code/download/session-123/file-id-123'); + expect(result.expiresAt).toBeDefined(); + // Should not call createFile for oversized files (fallback path) + expect(createFile).not.toHaveBeenCalled(); + + // Reset to default for other tests + fileSizeLimitConfig.value = 20 * 1024 * 1024; + }); + }); + + describe('fallback behavior', () => { + it('should fallback to download URL when saveBuffer is not available', async () => { + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + getStrategyFunctions.mockReturnValue({ saveBuffer: null }); + + const result = await processCodeOutput(baseParams); + + expect(logger.warn).toHaveBeenCalledWith( + expect.stringContaining('saveBuffer not available'), + ); + expect(result.filepath).toContain('/api/files/code/download/'); + expect(result.filename).toBe('test-file.txt'); + }); + + it('should fallback to download URL on axios error', async () => { + axios.mockRejectedValue(new Error('Network error')); + + const result = await processCodeOutput(baseParams); + + expect(result.filepath).toContain('/api/files/code/download/session-123/file-id-123'); + expect(result.conversationId).toBe('conv-123'); + expect(result.messageId).toBe('msg-123'); + expect(result.toolCallId).toBe('tool-call-123'); + }); + }); + + describe('usage counter increment', () => { + it('should set usage to 1 for new files', async () => { + getFiles.mockResolvedValue(null); + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + + const result = await processCodeOutput(baseParams); + + expect(result.usage).toBe(1); + }); + + it('should increment usage for existing files', async () => { + const existingFile = { file_id: 'existing-id', usage: 5, createdAt: '2024-01-01' }; + getFiles.mockResolvedValue([existingFile]); + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + + const result = await processCodeOutput(baseParams); + + expect(result.usage).toBe(6); + }); + + it('should handle existing file with undefined usage', async () => { + const existingFile = { file_id: 'existing-id', createdAt: '2024-01-01' }; + getFiles.mockResolvedValue([existingFile]); + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + + const result = await processCodeOutput(baseParams); + + // (undefined ?? 0) + 1 = 1 + expect(result.usage).toBe(1); + }); + }); + + describe('metadata and file properties', () => { + it('should include fileIdentifier in metadata', async () => { + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + + const result = await processCodeOutput(baseParams); + + expect(result.metadata).toEqual({ + fileIdentifier: 'session-123/file-id-123', + }); + }); + + it('should set correct context for code-generated files', async () => { + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + + const result = await processCodeOutput(baseParams); + + expect(result.context).toBe(FileContext.execute_code); + }); + + it('should include toolCallId and messageId in result', async () => { + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + + const result = await processCodeOutput(baseParams); + + expect(result.toolCallId).toBe('tool-call-123'); + expect(result.messageId).toBe('msg-123'); + }); + + it('should call createFile with upsert enabled', async () => { + const smallBuffer = Buffer.alloc(100); + axios.mockResolvedValue({ data: smallBuffer }); + + await processCodeOutput(baseParams); + + expect(createFile).toHaveBeenCalledWith( + expect.objectContaining({ + file_id: 'mock-uuid-1234', + context: FileContext.execute_code, + }), + true, // upsert flag + ); + }); + }); + }); +}); diff --git a/api/server/services/Files/Local/crud.js b/api/server/services/Files/Local/crud.js index db553f57dd..b43ab75326 100644 --- a/api/server/services/Files/Local/crud.js +++ b/api/server/services/Files/Local/crud.js @@ -67,7 +67,12 @@ async function saveLocalBuffer({ userId, buffer, fileName, basePath = 'images' } try { const { publicPath, uploads } = paths; - const directoryPath = path.join(basePath === 'images' ? publicPath : uploads, basePath, userId); + /** + * For 'images': save to publicPath/images/userId (images are served statically) + * For 'uploads': save to uploads/userId (files downloaded via API) + * */ + const directoryPath = + basePath === 'images' ? path.join(publicPath, basePath, userId) : path.join(uploads, userId); if (!fs.existsSync(directoryPath)) { fs.mkdirSync(directoryPath, { recursive: true }); diff --git a/client/src/components/Chat/Messages/Content/Parts/Attachment.tsx b/client/src/components/Chat/Messages/Content/Parts/Attachment.tsx index 1d14534e0d..b5d1e07cbf 100644 --- a/client/src/components/Chat/Messages/Content/Parts/Attachment.tsx +++ b/client/src/components/Chat/Messages/Content/Parts/Attachment.tsx @@ -8,9 +8,13 @@ import { cn } from '~/utils'; const FileAttachment = memo(({ attachment }: { attachment: Partial }) => { const [isVisible, setIsVisible] = useState(false); + const file = attachment as TFile & TAttachmentMetadata; const { handleDownload } = useAttachmentLink({ href: attachment.filepath ?? '', filename: attachment.filename ?? '', + file_id: file.file_id, + user: file.user, + source: file.source, }); const extension = attachment.filename?.split('.').pop(); diff --git a/client/src/components/Chat/Messages/Content/Parts/LogContent.tsx b/client/src/components/Chat/Messages/Content/Parts/LogContent.tsx index d2a303f49f..da2a8f175e 100644 --- a/client/src/components/Chat/Messages/Content/Parts/LogContent.tsx +++ b/client/src/components/Chat/Messages/Content/Parts/LogContent.tsx @@ -65,6 +65,7 @@ const LogContent: React.FC = ({ output = '', renderImages, atta return `${filename} ${localize('com_download_expired')}`; } + const fileData = file as TFile & TAttachmentMetadata; const filepath = file.filepath || ''; // const expirationText = expiresAt @@ -72,7 +73,13 @@ const LogContent: React.FC = ({ output = '', renderImages, atta // : ` ${localize('com_click_to_download')}`; return ( - + {'- '} {filename} {localize('com_click_to_download')} diff --git a/client/src/components/Chat/Messages/Content/Parts/LogLink.tsx b/client/src/components/Chat/Messages/Content/Parts/LogLink.tsx index d328f202ee..070becf517 100644 --- a/client/src/components/Chat/Messages/Content/Parts/LogLink.tsx +++ b/client/src/components/Chat/Messages/Content/Parts/LogLink.tsx @@ -1,21 +1,56 @@ import React from 'react'; +import { FileSources } from 'librechat-data-provider'; import { useToastContext } from '@librechat/client'; -import { useCodeOutputDownload } from '~/data-provider'; +import { useCodeOutputDownload, useFileDownload } from '~/data-provider'; interface LogLinkProps { href: string; filename: string; + file_id?: string; + user?: string; + source?: string; children: React.ReactNode; } -export const useAttachmentLink = ({ href, filename }: Pick) => { +interface AttachmentLinkOptions { + href: string; + filename: string; + file_id?: string; + user?: string; + source?: string; +} + +/** + * Determines if a file is stored locally (not an external API URL). + * Files with these sources are stored on the LibreChat server and should + * use the /api/files/download endpoint instead of direct URL access. + */ +const isLocallyStoredSource = (source?: string): boolean => { + if (!source) { + return false; + } + return [FileSources.local, FileSources.firebase, FileSources.s3, FileSources.azure_blob].includes( + source as FileSources, + ); +}; + +export const useAttachmentLink = ({ + href, + filename, + file_id, + user, + source, +}: AttachmentLinkOptions) => { const { showToast } = useToastContext(); - const { refetch: downloadFile } = useCodeOutputDownload(href); + + const useLocalDownload = isLocallyStoredSource(source) && !!file_id && !!user; + const { refetch: downloadFromApi } = useFileDownload(user, file_id); + const { refetch: downloadFromUrl } = useCodeOutputDownload(href); const handleDownload = async (event: React.MouseEvent) => { event.preventDefault(); try { - const stream = await downloadFile(); + const stream = useLocalDownload ? await downloadFromApi() : await downloadFromUrl(); if (stream.data == null || stream.data === '') { console.error('Error downloading file: No data found'); showToast({ @@ -39,8 +74,8 @@ export const useAttachmentLink = ({ href, filename }: Pick = ({ href, filename, children }) => { - const { handleDownload } = useAttachmentLink({ href, filename }); +const LogLink: React.FC = ({ href, filename, file_id, user, source, children }) => { + const { handleDownload } = useAttachmentLink({ href, filename, file_id, user, source }); return ( { const { messageId } = data; - if (queryClient && data?.filepath && !data.filepath.includes('/api/files')) { - queryClient.setQueryData([QueryKeys.files], (oldData: TAttachment[] | undefined) => { - return [data, ...(oldData || [])]; + const fileData = data as TFile; + if ( + queryClient && + fileData?.file_id && + fileData?.filepath && + !fileData.filepath.includes('/api/files') + ) { + queryClient.setQueryData([QueryKeys.files], (oldData: TFile[] | undefined) => { + if (!oldData) { + return [fileData]; + } + const existingIndex = oldData.findIndex((file) => file.file_id === fileData.file_id); + if (existingIndex > -1) { + const updated = [...oldData]; + updated[existingIndex] = { ...oldData[existingIndex], ...fileData }; + return updated; + } + return [fileData, ...oldData]; }); } diff --git a/package-lock.json b/package-lock.json index d9fd999fc6..99ffe44648 100644 --- a/package-lock.json +++ b/package-lock.json @@ -59,7 +59,7 @@ "@google/genai": "^1.19.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.80", - "@librechat/agents": "^3.0.77", + "@librechat/agents": "^3.0.78", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", @@ -12646,9 +12646,9 @@ } }, "node_modules/@librechat/agents": { - "version": "3.0.77", - "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.77.tgz", - "integrity": "sha512-Wr9d8bjJAQSl03nEgnAPG6jBQT1fL3sNV3TFDN1FvFQt6WGfdok838Cbcn+/tSGXSPJcICTxNkMT7VN8P6bCPw==", + "version": "3.0.78", + "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.78.tgz", + "integrity": "sha512-+p4NuE2dBAbwm4gJc/jbBDIAfC8xNC0gUAb8wsLXA7zcORnnDRTQ+HWWYVJZ8e81dTIxHIl61hwsziFjVZHvUw==", "license": "MIT", "dependencies": { "@langchain/anthropic": "^0.3.26", @@ -43129,7 +43129,7 @@ "@google/genai": "^1.19.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.80", - "@librechat/agents": "^3.0.77", + "@librechat/agents": "^3.0.78", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.25.2", "@smithy/node-http-handler": "^4.4.5", diff --git a/packages/api/package.json b/packages/api/package.json index d8a06aad2b..8a18c3d21d 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -87,7 +87,7 @@ "@google/genai": "^1.19.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.80", - "@librechat/agents": "^3.0.77", + "@librechat/agents": "^3.0.78", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.25.2", "@smithy/node-http-handler": "^4.4.5", diff --git a/packages/api/src/agents/initialize.ts b/packages/api/src/agents/initialize.ts index 4223515ca3..dae58323b0 100644 --- a/packages/api/src/agents/initialize.ts +++ b/packages/api/src/agents/initialize.ts @@ -1,5 +1,6 @@ import { Providers } from '@librechat/agents'; import { + Constants, ErrorTypes, EModelEndpoint, EToolResources, @@ -20,7 +21,12 @@ import type { GenericTool, LCToolRegistry, ToolMap } from '@librechat/agents'; import type { Response as ServerResponse } from 'express'; import type { IMongoFile } from '@librechat/data-schemas'; import type { InitializeResultBase, ServerRequest, EndpointDbMethods } from '~/types'; -import { getModelMaxTokens, extractLibreChatParams, optionalChainWithEmptyCheck } from '~/utils'; +import { + optionalChainWithEmptyCheck, + extractLibreChatParams, + getModelMaxTokens, + getThreadData, +} from '~/utils'; import { filterFilesByEndpointConfig } from '~/files'; import { generateArtifactsPrompt } from '~/prompts'; import { getProviderConfig } from '~/endpoints'; @@ -58,6 +64,8 @@ export interface InitializeAgentParams { agent: Agent; /** Conversation ID (optional) */ conversationId?: string | null; + /** Parent message ID for determining the current thread (optional) */ + parentMessageId?: string | null; /** Request files */ requestFiles?: IMongoFile[]; /** Function to load agent tools */ @@ -95,10 +103,23 @@ export interface InitializeAgentDbMethods extends EndpointDbMethods { updateFilesUsage: (files: Array<{ file_id: string }>, fileIds?: string[]) => Promise; /** Get files from database */ getFiles: (filter: unknown, sort: unknown, select: unknown, opts?: unknown) => Promise; - /** Get tool files by IDs */ + /** Get tool files by IDs (user-uploaded files only, code files handled separately) */ getToolFilesByIds: (fileIds: string[], toolSet: Set) => Promise; /** Get conversation file IDs */ getConvoFiles: (conversationId: string) => Promise; + /** Get code-generated files by conversation ID and optional message IDs */ + getCodeGeneratedFiles?: (conversationId: string, messageIds?: string[]) => Promise; + /** Get user-uploaded execute_code files by file IDs (from message.files in thread) */ + getUserCodeFiles?: (fileIds: string[]) => Promise; + /** Get messages for a conversation (supports select for field projection) */ + getMessages?: ( + filter: { conversationId: string }, + select?: string, + ) => Promise; + }> | null>; } /** @@ -125,6 +146,7 @@ export async function initializeAgent( requestFiles = [], conversationId, endpointOption, + parentMessageId, allowedProviders, isInitialAgent = false, } = params; @@ -174,9 +196,51 @@ export async function initializeAgent( toolResourceSet.add(EToolResources[tool as keyof typeof EToolResources]); } } + const toolFiles = (await db.getToolFilesByIds(fileIds, toolResourceSet)) as IMongoFile[]; - if (requestFiles.length || toolFiles.length) { - currentFiles = (await db.updateFilesUsage(requestFiles.concat(toolFiles))) as IMongoFile[]; + + /** + * Retrieve execute_code files filtered to the current thread. + * This includes both code-generated files and user-uploaded execute_code files. + */ + let codeGeneratedFiles: IMongoFile[] = []; + let userCodeFiles: IMongoFile[] = []; + + if (toolResourceSet.has(EToolResources.execute_code)) { + let threadMessageIds: string[] | undefined; + let threadFileIds: string[] | undefined; + + if (parentMessageId && parentMessageId !== Constants.NO_PARENT && db.getMessages) { + /** Only select fields needed for thread traversal */ + const messages = await db.getMessages( + { conversationId }, + 'messageId parentMessageId files', + ); + if (messages && messages.length > 0) { + /** Single O(n) pass: build Map, traverse thread, collect both IDs */ + const threadData = getThreadData(messages, parentMessageId); + threadMessageIds = threadData.messageIds; + threadFileIds = threadData.fileIds; + } + } + + /** Code-generated files (context: execute_code) filtered by messageId */ + if (db.getCodeGeneratedFiles) { + codeGeneratedFiles = (await db.getCodeGeneratedFiles( + conversationId, + threadMessageIds, + )) as IMongoFile[]; + } + + /** User-uploaded execute_code files (context: agents/message_attachment) from thread messages */ + if (db.getUserCodeFiles && threadFileIds && threadFileIds.length > 0) { + userCodeFiles = (await db.getUserCodeFiles(threadFileIds)) as IMongoFile[]; + } + } + + const allToolFiles = toolFiles.concat(codeGeneratedFiles, userCodeFiles); + if (requestFiles.length || allToolFiles.length) { + currentFiles = (await db.updateFilesUsage(requestFiles.concat(allToolFiles))) as IMongoFile[]; } } else if (requestFiles.length) { currentFiles = (await db.updateFilesUsage(requestFiles)) as IMongoFile[]; diff --git a/packages/api/src/utils/message.spec.ts b/packages/api/src/utils/message.spec.ts index 144ebc1a92..ba626c83fd 100644 --- a/packages/api/src/utils/message.spec.ts +++ b/packages/api/src/utils/message.spec.ts @@ -1,4 +1,8 @@ -import { sanitizeFileForTransmit, sanitizeMessageForTransmit } from './message'; +import { Constants } from 'librechat-data-provider'; +import { sanitizeFileForTransmit, sanitizeMessageForTransmit, getThreadData } from './message'; + +/** Cast to string for type compatibility with ThreadMessage */ +const NO_PARENT = Constants.NO_PARENT as string; describe('sanitizeFileForTransmit', () => { it('should remove text field from file', () => { @@ -120,3 +124,272 @@ describe('sanitizeMessageForTransmit', () => { expect(message.files[0].text).toBe('original text'); }); }); + +describe('getThreadData', () => { + describe('edge cases - empty and null inputs', () => { + it('should return empty result for empty messages array', () => { + const result = getThreadData([], 'parent-123'); + + expect(result.messageIds).toEqual([]); + expect(result.fileIds).toEqual([]); + }); + + it('should return empty result for null parentMessageId', () => { + const messages = [ + { messageId: 'msg-1', parentMessageId: null }, + { messageId: 'msg-2', parentMessageId: 'msg-1' }, + ]; + + const result = getThreadData(messages, null); + + expect(result.messageIds).toEqual([]); + expect(result.fileIds).toEqual([]); + }); + + it('should return empty result for undefined parentMessageId', () => { + const messages = [{ messageId: 'msg-1', parentMessageId: null }]; + + const result = getThreadData(messages, undefined); + + expect(result.messageIds).toEqual([]); + expect(result.fileIds).toEqual([]); + }); + + it('should return empty result when parentMessageId not found in messages', () => { + const messages = [ + { messageId: 'msg-1', parentMessageId: null }, + { messageId: 'msg-2', parentMessageId: 'msg-1' }, + ]; + + const result = getThreadData(messages, 'non-existent'); + + expect(result.messageIds).toEqual([]); + expect(result.fileIds).toEqual([]); + }); + }); + + describe('thread traversal', () => { + it('should traverse a simple linear thread', () => { + const messages = [ + { messageId: 'msg-1', parentMessageId: NO_PARENT }, + { messageId: 'msg-2', parentMessageId: 'msg-1' }, + { messageId: 'msg-3', parentMessageId: 'msg-2' }, + ]; + + const result = getThreadData(messages, 'msg-3'); + + expect(result.messageIds).toEqual(['msg-3', 'msg-2', 'msg-1']); + expect(result.fileIds).toEqual([]); + }); + + it('should stop at NO_PARENT constant', () => { + const messages = [ + { messageId: 'msg-1', parentMessageId: NO_PARENT }, + { messageId: 'msg-2', parentMessageId: 'msg-1' }, + ]; + + const result = getThreadData(messages, 'msg-2'); + + expect(result.messageIds).toEqual(['msg-2', 'msg-1']); + }); + + it('should collect only messages in the thread branch', () => { + // Branched conversation: msg-1 -> msg-2 -> msg-3 (branch A) + // msg-1 -> msg-4 -> msg-5 (branch B) + const messages = [ + { messageId: 'msg-1', parentMessageId: NO_PARENT }, + { messageId: 'msg-2', parentMessageId: 'msg-1' }, + { messageId: 'msg-3', parentMessageId: 'msg-2' }, + { messageId: 'msg-4', parentMessageId: 'msg-1' }, + { messageId: 'msg-5', parentMessageId: 'msg-4' }, + ]; + + const resultBranchA = getThreadData(messages, 'msg-3'); + expect(resultBranchA.messageIds).toEqual(['msg-3', 'msg-2', 'msg-1']); + + const resultBranchB = getThreadData(messages, 'msg-5'); + expect(resultBranchB.messageIds).toEqual(['msg-5', 'msg-4', 'msg-1']); + }); + + it('should handle single message thread', () => { + const messages = [{ messageId: 'msg-1', parentMessageId: NO_PARENT }]; + + const result = getThreadData(messages, 'msg-1'); + + expect(result.messageIds).toEqual(['msg-1']); + expect(result.fileIds).toEqual([]); + }); + }); + + describe('circular reference protection', () => { + it('should handle circular references without infinite loop', () => { + // Malformed data: msg-2 points to msg-3 which points back to msg-2 + const messages = [ + { messageId: 'msg-1', parentMessageId: NO_PARENT }, + { messageId: 'msg-2', parentMessageId: 'msg-3' }, + { messageId: 'msg-3', parentMessageId: 'msg-2' }, + ]; + + const result = getThreadData(messages, 'msg-2'); + + // Should stop when encountering a visited ID + expect(result.messageIds).toEqual(['msg-2', 'msg-3']); + expect(result.fileIds).toEqual([]); + }); + + it('should handle self-referencing message', () => { + const messages = [{ messageId: 'msg-1', parentMessageId: 'msg-1' }]; + + const result = getThreadData(messages, 'msg-1'); + + expect(result.messageIds).toEqual(['msg-1']); + }); + }); + + describe('file ID collection', () => { + it('should collect file IDs from messages with files', () => { + const messages = [ + { + messageId: 'msg-1', + parentMessageId: NO_PARENT, + files: [{ file_id: 'file-1' }, { file_id: 'file-2' }], + }, + { + messageId: 'msg-2', + parentMessageId: 'msg-1', + files: [{ file_id: 'file-3' }], + }, + ]; + + const result = getThreadData(messages, 'msg-2'); + + expect(result.messageIds).toEqual(['msg-2', 'msg-1']); + expect(result.fileIds).toContain('file-1'); + expect(result.fileIds).toContain('file-2'); + expect(result.fileIds).toContain('file-3'); + expect(result.fileIds).toHaveLength(3); + }); + + it('should deduplicate file IDs across messages', () => { + const messages = [ + { + messageId: 'msg-1', + parentMessageId: NO_PARENT, + files: [{ file_id: 'file-shared' }, { file_id: 'file-1' }], + }, + { + messageId: 'msg-2', + parentMessageId: 'msg-1', + files: [{ file_id: 'file-shared' }, { file_id: 'file-2' }], + }, + ]; + + const result = getThreadData(messages, 'msg-2'); + + expect(result.fileIds).toContain('file-shared'); + expect(result.fileIds).toContain('file-1'); + expect(result.fileIds).toContain('file-2'); + expect(result.fileIds).toHaveLength(3); + }); + + it('should skip files without file_id', () => { + const messages = [ + { + messageId: 'msg-1', + parentMessageId: NO_PARENT, + files: [{ file_id: 'file-1' }, { file_id: undefined }, { file_id: '' }], + }, + ]; + + const result = getThreadData(messages, 'msg-1'); + + expect(result.fileIds).toEqual(['file-1']); + }); + + it('should handle messages with empty files array', () => { + const messages = [ + { + messageId: 'msg-1', + parentMessageId: NO_PARENT, + files: [], + }, + { + messageId: 'msg-2', + parentMessageId: 'msg-1', + files: [{ file_id: 'file-1' }], + }, + ]; + + const result = getThreadData(messages, 'msg-2'); + + expect(result.messageIds).toEqual(['msg-2', 'msg-1']); + expect(result.fileIds).toEqual(['file-1']); + }); + + it('should handle messages without files property', () => { + const messages = [ + { messageId: 'msg-1', parentMessageId: NO_PARENT }, + { + messageId: 'msg-2', + parentMessageId: 'msg-1', + files: [{ file_id: 'file-1' }], + }, + ]; + + const result = getThreadData(messages, 'msg-2'); + + expect(result.messageIds).toEqual(['msg-2', 'msg-1']); + expect(result.fileIds).toEqual(['file-1']); + }); + + it('should only collect files from messages in the thread', () => { + // msg-3 is not in the thread from msg-2 + const messages = [ + { + messageId: 'msg-1', + parentMessageId: NO_PARENT, + files: [{ file_id: 'file-1' }], + }, + { + messageId: 'msg-2', + parentMessageId: 'msg-1', + files: [{ file_id: 'file-2' }], + }, + { + messageId: 'msg-3', + parentMessageId: 'msg-1', + files: [{ file_id: 'file-3' }], + }, + ]; + + const result = getThreadData(messages, 'msg-2'); + + expect(result.fileIds).toContain('file-1'); + expect(result.fileIds).toContain('file-2'); + expect(result.fileIds).not.toContain('file-3'); + }); + }); + + describe('performance - O(1) lookups', () => { + it('should handle large message arrays efficiently', () => { + // Create a linear thread of 1000 messages + const messages = []; + for (let i = 0; i < 1000; i++) { + messages.push({ + messageId: `msg-${i}`, + parentMessageId: i === 0 ? NO_PARENT : `msg-${i - 1}`, + files: [{ file_id: `file-${i}` }], + }); + } + + const startTime = performance.now(); + const result = getThreadData(messages, 'msg-999'); + const endTime = performance.now(); + + expect(result.messageIds).toHaveLength(1000); + expect(result.fileIds).toHaveLength(1000); + // Should complete in reasonable time (< 100ms for 1000 messages) + expect(endTime - startTime).toBeLessThan(100); + }); + }); +}); diff --git a/packages/api/src/utils/message.ts b/packages/api/src/utils/message.ts index 312826b6ba..b1e939c6d7 100644 --- a/packages/api/src/utils/message.ts +++ b/packages/api/src/utils/message.ts @@ -1,3 +1,4 @@ +import { Constants } from 'librechat-data-provider'; import type { TFile, TMessage } from 'librechat-data-provider'; /** Fields to strip from files before client transmission */ @@ -66,3 +67,74 @@ export function sanitizeMessageForTransmit>( return sanitized; } + +/** Minimal message shape for thread traversal */ +type ThreadMessage = { + messageId: string; + parentMessageId?: string | null; + files?: Array<{ file_id?: string }>; +}; + +/** Result of thread data extraction */ +export type ThreadData = { + messageIds: string[]; + fileIds: string[]; +}; + +/** + * Extracts thread message IDs and file IDs in a single O(n) pass. + * Builds a Map for O(1) lookups, then traverses the thread collecting both IDs. + * + * @param messages - All messages in the conversation (should be queried with select for efficiency) + * @param parentMessageId - The ID of the parent message to start traversal from + * @returns Object containing messageIds and fileIds arrays + */ +export function getThreadData( + messages: ThreadMessage[], + parentMessageId: string | null | undefined, +): ThreadData { + const result: ThreadData = { messageIds: [], fileIds: [] }; + + if (!messages || messages.length === 0 || !parentMessageId) { + return result; + } + + /** Build Map for O(1) lookups instead of O(n) .find() calls */ + const messageMap = new Map(); + for (const msg of messages) { + messageMap.set(msg.messageId, msg); + } + + const fileIdSet = new Set(); + const visitedIds = new Set(); + let currentId: string | null | undefined = parentMessageId; + + /** Single traversal: collect message IDs and file IDs together */ + while (currentId) { + if (visitedIds.has(currentId)) { + break; + } + visitedIds.add(currentId); + + const message = messageMap.get(currentId); + if (!message) { + break; + } + + result.messageIds.push(message.messageId); + + /** Collect file IDs from this message */ + if (message.files) { + for (const file of message.files) { + if (file.file_id) { + fileIdSet.add(file.file_id); + } + } + } + + currentId = message.parentMessageId === Constants.NO_PARENT ? null : message.parentMessageId; + } + + result.fileIds = Array.from(fileIdSet); + return result; +} diff --git a/packages/data-provider/src/file-config.ts b/packages/data-provider/src/file-config.ts index 8dc3445be0..b2c24a47a0 100644 --- a/packages/data-provider/src/file-config.ts +++ b/packages/data-provider/src/file-config.ts @@ -198,8 +198,15 @@ export const codeTypeMapping: { [key: string]: string } = { ts: 'application/typescript', // .ts - TypeScript source tar: 'application/x-tar', // .tar - Tar archive zip: 'application/zip', // .zip - ZIP archive + txt: 'text/plain', // .txt - Plain text file log: 'text/plain', // .log - Log file + csv: 'text/csv', // .csv - Comma-separated values tsv: 'text/tab-separated-values', // .tsv - Tab-separated values + json: 'application/json', // .json - JSON file + xml: 'application/xml', // .xml - XML file + html: 'text/html', // .html - HTML file + htm: 'text/html', // .htm - HTML file + css: 'text/css', // .css - CSS file yml: 'application/yaml', // .yml - YAML yaml: 'application/yaml', // .yaml - YAML sql: 'application/sql', // .sql - SQL (IANA registered) diff --git a/packages/data-schemas/src/methods/file.spec.ts b/packages/data-schemas/src/methods/file.spec.ts index 5cf51684ac..390b6a8f5c 100644 --- a/packages/data-schemas/src/methods/file.spec.ts +++ b/packages/data-schemas/src/methods/file.spec.ts @@ -130,7 +130,7 @@ describe('File Methods', () => { const files = await fileMethods.getFiles({ user: userId }); expect(files).toHaveLength(3); - expect(files.map((f) => f.file_id)).toEqual(expect.arrayContaining(fileIds)); + expect(files!.map((f) => f.file_id)).toEqual(expect.arrayContaining(fileIds)); }); it('should exclude text field by default', async () => { @@ -149,7 +149,7 @@ describe('File Methods', () => { const files = await fileMethods.getFiles({ file_id: fileId }); expect(files).toHaveLength(1); - expect(files[0].text).toBeUndefined(); + expect(files![0].text).toBeUndefined(); }); }); @@ -207,7 +207,7 @@ describe('File Methods', () => { expect(files[0].file_id).toBe(contextFileId); }); - it('should retrieve files for execute_code tool', async () => { + it('should not retrieve execute_code files (handled by getCodeGeneratedFiles)', async () => { const userId = new mongoose.Types.ObjectId(); const codeFileId = uuidv4(); @@ -218,14 +218,16 @@ describe('File Methods', () => { filepath: '/uploads/code.py', type: 'text/x-python', bytes: 100, + context: FileContext.execute_code, metadata: { fileIdentifier: 'some-identifier' }, }); + // execute_code files are explicitly excluded from getToolFilesByIds + // They are retrieved via getCodeGeneratedFiles and getUserCodeFiles instead const toolSet = new Set([EToolResources.execute_code]); const files = await fileMethods.getToolFilesByIds([codeFileId], toolSet); - expect(files).toHaveLength(1); - expect(files[0].file_id).toBe(codeFileId); + expect(files).toHaveLength(0); }); }); @@ -490,7 +492,7 @@ describe('File Methods', () => { const remaining = await fileMethods.getFiles({}); expect(remaining).toHaveLength(1); - expect(remaining[0].user?.toString()).toBe(otherUserId.toString()); + expect(remaining![0].user?.toString()).toBe(otherUserId.toString()); }); }); diff --git a/packages/data-schemas/src/methods/file.ts b/packages/data-schemas/src/methods/file.ts index 5ea27aeb6c..751f23f5c0 100644 --- a/packages/data-schemas/src/methods/file.ts +++ b/packages/data-schemas/src/methods/file.ts @@ -47,7 +47,8 @@ export function createFileMethods(mongoose: typeof import('mongoose')) { } /** - * Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs + * Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs. + * Note: execute_code files are handled separately by getCodeGeneratedFiles. * @param fileIds - Array of file_id strings to search for * @param toolResourceSet - Optional filter for tool resources * @returns Files that match the criteria @@ -61,21 +62,26 @@ export function createFileMethods(mongoose: typeof import('mongoose')) { } try { - const filter: FilterQuery = { - file_id: { $in: fileIds }, - $or: [], - }; + const orConditions: FilterQuery[] = []; if (toolResourceSet.has(EToolResources.context)) { - filter.$or?.push({ text: { $exists: true, $ne: null }, context: FileContext.agents }); + orConditions.push({ text: { $exists: true, $ne: null }, context: FileContext.agents }); } if (toolResourceSet.has(EToolResources.file_search)) { - filter.$or?.push({ embedded: true }); + orConditions.push({ embedded: true }); } - if (toolResourceSet.has(EToolResources.execute_code)) { - filter.$or?.push({ 'metadata.fileIdentifier': { $exists: true } }); + + // If no conditions to match, return empty + if (orConditions.length === 0) { + return []; } + const filter: FilterQuery = { + file_id: { $in: fileIds }, + context: { $ne: FileContext.execute_code }, + $or: orConditions, + }; + const selectFields: SelectProjection = { text: 0 }; const sortOptions = { updatedAt: -1 as SortOrder }; @@ -87,6 +93,84 @@ export function createFileMethods(mongoose: typeof import('mongoose')) { } } + /** + * Retrieves files generated by code execution for a given conversation. + * These files are stored locally with fileIdentifier metadata for code env re-upload. + * + * @param conversationId - The conversation ID to search for + * @param messageIds - Array of messageIds to filter by (for linear thread filtering). + * While technically optional, this function returns empty if not provided. + * This is intentional: code-generated files must be filtered by thread to avoid + * including files from other branches of a conversation. + * @returns Files generated by code execution in the conversation, filtered by messageIds + */ + async function getCodeGeneratedFiles( + conversationId: string, + messageIds?: string[], + ): Promise { + if (!conversationId) { + return []; + } + + /** + * Return early if messageIds not provided - this is intentional behavior. + * Code-generated files must be filtered by thread messageIds to ensure we only + * return files relevant to the current conversation branch, not orphaned files + * from other branches or deleted messages. + */ + if (!messageIds || messageIds.length === 0) { + return []; + } + + try { + const filter: FilterQuery = { + conversationId, + context: FileContext.execute_code, + messageId: { $exists: true, $in: messageIds }, + 'metadata.fileIdentifier': { $exists: true }, + }; + + const selectFields: SelectProjection = { text: 0 }; + const sortOptions = { createdAt: 1 as SortOrder }; + + const results = await getFiles(filter, sortOptions, selectFields); + return results ?? []; + } catch (error) { + logger.error('[getCodeGeneratedFiles] Error retrieving code generated files:', error); + return []; + } + } + + /** + * Retrieves user-uploaded execute_code files (not code-generated) by their file IDs. + * These are files with fileIdentifier metadata but context is NOT execute_code (e.g., agents or message_attachment). + * File IDs should be collected from message.files arrays in the current thread. + * @param fileIds - Array of file IDs to fetch (from message.files in the thread) + * @returns User-uploaded execute_code files + */ + async function getUserCodeFiles(fileIds?: string[]): Promise { + if (!fileIds || fileIds.length === 0) { + return []; + } + + try { + const filter: FilterQuery = { + file_id: { $in: fileIds }, + context: { $ne: FileContext.execute_code }, + 'metadata.fileIdentifier': { $exists: true }, + }; + + const selectFields: SelectProjection = { text: 0 }; + const sortOptions = { createdAt: 1 as SortOrder }; + + const results = await getFiles(filter, sortOptions, selectFields); + return results ?? []; + } catch (error) { + logger.error('[getUserCodeFiles] Error retrieving user code files:', error); + return []; + } + } + /** * Creates a new file with a TTL of 1 hour. * @param data - The file data to be created, must contain file_id @@ -258,6 +342,8 @@ export function createFileMethods(mongoose: typeof import('mongoose')) { findFileById, getFiles, getToolFilesByIds, + getCodeGeneratedFiles, + getUserCodeFiles, createFile, updateFile, updateFileUsage, diff --git a/packages/data-schemas/src/schema/file.ts b/packages/data-schemas/src/schema/file.ts index 5bf4d95d87..d39672f1ea 100644 --- a/packages/data-schemas/src/schema/file.ts +++ b/packages/data-schemas/src/schema/file.ts @@ -15,6 +15,10 @@ const file: Schema = new Schema( ref: 'Conversation', index: true, }, + messageId: { + type: String, + index: true, + }, file_id: { type: String, index: true, diff --git a/packages/data-schemas/src/types/file.ts b/packages/data-schemas/src/types/file.ts index 231ab93332..8f17e3b597 100644 --- a/packages/data-schemas/src/types/file.ts +++ b/packages/data-schemas/src/types/file.ts @@ -3,6 +3,7 @@ import { Document, Types } from 'mongoose'; export interface IMongoFile extends Omit { user: Types.ObjectId; conversationId?: string; + messageId?: string; file_id: string; temp_file_id?: string; bytes: number;