🗂️ feat: Better Persistence for Code Execution Files Between Sessions (#11362)

* refactor: process code output files for re-use (WIP)

* feat: file attachment handling with additional metadata for downloads

* refactor: Update directory path logic for local file saving based on basePath

* refactor: file attachment handling to support TFile type and improve data merging logic

* feat: thread filtering of code-generated files

- Introduced parentMessageId parameter in addedConvo and initialize functions to enhance thread management.
- Updated related methods to utilize parentMessageId for retrieving messages and filtering code-generated files by conversation threads.
- Enhanced type definitions to include parentMessageId in relevant interfaces for better clarity and usage.

* chore: imports/params ordering

* feat: update file model to use messageId for filtering and processing

- Changed references from 'message' to 'messageId' in file-related methods for consistency.
- Added messageId field to the file schema and updated related types.
- Enhanced file processing logic to accommodate the new messageId structure.

* feat: enhance file retrieval methods to support user-uploaded execute_code files

- Added a new method `getUserCodeFiles` to retrieve user-uploaded execute_code files, excluding code-generated files.
- Updated existing file retrieval methods to improve filtering logic and handle edge cases.
- Enhanced thread data extraction to collect both message IDs and file IDs efficiently.
- Integrated `getUserCodeFiles` into relevant endpoints for better file management in conversations.

* chore: update @librechat/agents package version to 3.0.78 in package-lock.json and related package.json files

* refactor: file processing and retrieval logic

- Added a fallback mechanism for download URLs when files exceed size limits or cannot be processed locally.
- Implemented a deduplication strategy for code-generated files based on conversationId and filename to optimize storage.
- Updated file retrieval methods to ensure proper filtering by messageIds, preventing orphaned files from being included.
- Introduced comprehensive tests for new thread data extraction functionality, covering edge cases and performance considerations.

* fix: improve file retrieval tests and handling of optional properties

- Updated tests to safely access optional properties using non-null assertions.
- Modified test descriptions for clarity regarding the exclusion of execute_code files.
- Ensured that the retrieval logic correctly reflects the expected outcomes for file queries.

* test: add comprehensive unit tests for processCodeOutput functionality

- Introduced a new test suite for the processCodeOutput function, covering various scenarios including file retrieval, creation, and processing for both image and non-image files.
- Implemented mocks for dependencies such as axios, logger, and file models to isolate tests and ensure reliable outcomes.
- Validated behavior for existing files, new file creation, and error handling, including size limits and fallback mechanisms.
- Enhanced test coverage for metadata handling and usage increment logic, ensuring robust verification of file processing outcomes.

* test: enhance file size limit enforcement in processCodeOutput tests

- Introduced a configurable file size limit for tests to improve flexibility and coverage.
- Mocked the `librechat-data-provider` to allow dynamic adjustment of file size limits during tests.
- Updated the file size limit enforcement test to validate behavior when files exceed specified limits, ensuring proper fallback to download URLs.
- Reset file size limit after tests to maintain isolation for subsequent test cases.
This commit is contained in:
Danny Avila 2026-01-16 10:06:24 -05:00
parent fe32cbedf9
commit cc32895d13
No known key found for this signature in database
GPG key ID: BF31EEB2C5CA0956
22 changed files with 1364 additions and 83 deletions

View file

@ -26,7 +26,8 @@ const getFiles = async (filter, _sortOptions, selectFields = { text: 0 }) => {
};
/**
* Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs
* Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs.
* Note: execute_code files are handled separately by getCodeGeneratedFiles.
* @param {string[]} fileIds - Array of file_id strings to search for
* @param {Set<EToolResources>} toolResourceSet - Optional filter for tool resources
* @returns {Promise<Array<MongoFile>>} Files that match the criteria
@ -37,21 +38,25 @@ const getToolFilesByIds = async (fileIds, toolResourceSet) => {
}
try {
const filter = {
file_id: { $in: fileIds },
$or: [],
};
const orConditions = [];
if (toolResourceSet.has(EToolResources.context)) {
filter.$or.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
orConditions.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
}
if (toolResourceSet.has(EToolResources.file_search)) {
filter.$or.push({ embedded: true });
orConditions.push({ embedded: true });
}
if (toolResourceSet.has(EToolResources.execute_code)) {
filter.$or.push({ 'metadata.fileIdentifier': { $exists: true } });
if (orConditions.length === 0) {
return [];
}
const filter = {
file_id: { $in: fileIds },
context: { $ne: FileContext.execute_code }, // Exclude code-generated files
$or: orConditions,
};
const selectFields = { text: 0 };
const sortOptions = { updatedAt: -1 };
@ -62,6 +67,70 @@ const getToolFilesByIds = async (fileIds, toolResourceSet) => {
}
};
/**
* Retrieves files generated by code execution for a given conversation.
* These files are stored locally with fileIdentifier metadata for code env re-upload.
* @param {string} conversationId - The conversation ID to search for
* @param {string[]} [messageIds] - Optional array of messageIds to filter by (for linear thread filtering)
* @returns {Promise<Array<MongoFile>>} Files generated by code execution in the conversation
*/
const getCodeGeneratedFiles = async (conversationId, messageIds) => {
if (!conversationId) {
return [];
}
/** messageIds are required for proper thread filtering of code-generated files */
if (!messageIds || messageIds.length === 0) {
return [];
}
try {
const filter = {
conversationId,
context: FileContext.execute_code,
messageId: { $exists: true, $in: messageIds },
'metadata.fileIdentifier': { $exists: true },
};
const selectFields = { text: 0 };
const sortOptions = { createdAt: 1 };
return await getFiles(filter, sortOptions, selectFields);
} catch (error) {
logger.error('[getCodeGeneratedFiles] Error retrieving code generated files:', error);
return [];
}
};
/**
* Retrieves user-uploaded execute_code files (not code-generated) by their file IDs.
* These are files with fileIdentifier metadata but context is NOT execute_code (e.g., agents or message_attachment).
* File IDs should be collected from message.files arrays in the current thread.
* @param {string[]} fileIds - Array of file IDs to fetch (from message.files in the thread)
* @returns {Promise<Array<MongoFile>>} User-uploaded execute_code files
*/
const getUserCodeFiles = async (fileIds) => {
if (!fileIds || fileIds.length === 0) {
return [];
}
try {
const filter = {
file_id: { $in: fileIds },
context: { $ne: FileContext.execute_code },
'metadata.fileIdentifier': { $exists: true },
};
const selectFields = { text: 0 };
const sortOptions = { createdAt: 1 };
return await getFiles(filter, sortOptions, selectFields);
} catch (error) {
logger.error('[getUserCodeFiles] Error retrieving user code files:', error);
return [];
}
};
/**
* Creates a new file with a TTL of 1 hour.
* @param {MongoFile} data - The file data to be created, must contain file_id.
@ -169,6 +238,8 @@ module.exports = {
findFileById,
getFiles,
getToolFilesByIds,
getCodeGeneratedFiles,
getUserCodeFiles,
createFile,
updateFile,
updateFileUsage,

View file

@ -45,7 +45,7 @@
"@google/genai": "^1.19.0",
"@keyv/redis": "^4.3.3",
"@langchain/core": "^0.3.80",
"@librechat/agents": "^3.0.77",
"@librechat/agents": "^3.0.78",
"@librechat/api": "*",
"@librechat/data-schemas": "*",
"@microsoft/microsoft-graph-client": "^3.0.7",

View file

@ -633,6 +633,7 @@ class AgentClient extends BaseClient {
updateFilesUsage: db.updateFilesUsage,
getUserKeyValues: db.getUserKeyValues,
getToolFilesByIds: db.getToolFilesByIds,
getCodeGeneratedFiles: db.getCodeGeneratedFiles,
},
);

View file

@ -31,6 +31,7 @@ setGetAgent(getAgent);
* @param {Function} params.loadTools - Function to load agent tools
* @param {Array} params.requestFiles - Request files
* @param {string} params.conversationId - The conversation ID
* @param {string} [params.parentMessageId] - The parent message ID for thread filtering
* @param {Set} params.allowedProviders - Set of allowed providers
* @param {Map} params.agentConfigs - Map of agent configs to add to
* @param {string} params.primaryAgentId - The primary agent ID
@ -46,6 +47,7 @@ const processAddedConvo = async ({
loadTools,
requestFiles,
conversationId,
parentMessageId,
allowedProviders,
agentConfigs,
primaryAgentId,
@ -91,6 +93,7 @@ const processAddedConvo = async ({
loadTools,
requestFiles,
conversationId,
parentMessageId,
agent: addedAgent,
endpointOption,
allowedProviders,
@ -99,9 +102,12 @@ const processAddedConvo = async ({
getConvoFiles,
getFiles: db.getFiles,
getUserKey: db.getUserKey,
getMessages: db.getMessages,
updateFilesUsage: db.updateFilesUsage,
getUserCodeFiles: db.getUserCodeFiles,
getUserKeyValues: db.getUserKeyValues,
getToolFilesByIds: db.getToolFilesByIds,
getCodeGeneratedFiles: db.getCodeGeneratedFiles,
},
);

View file

@ -3,10 +3,10 @@ const { createContentAggregator } = require('@librechat/agents');
const {
initializeAgent,
validateAgentModel,
getCustomEndpointConfig,
createSequentialChainEdges,
createEdgeCollector,
filterOrphanedEdges,
getCustomEndpointConfig,
createSequentialChainEdges,
} = require('@librechat/api');
const {
EModelEndpoint,
@ -129,6 +129,8 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
const requestFiles = req.body.files ?? [];
/** @type {string} */
const conversationId = req.body.conversationId;
/** @type {string | undefined} */
const parentMessageId = req.body.parentMessageId;
const primaryConfig = await initializeAgent(
{
@ -137,6 +139,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
loadTools,
requestFiles,
conversationId,
parentMessageId,
agent: primaryAgent,
endpointOption,
allowedProviders,
@ -146,9 +149,12 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
getConvoFiles,
getFiles: db.getFiles,
getUserKey: db.getUserKey,
getMessages: db.getMessages,
updateFilesUsage: db.updateFilesUsage,
getUserKeyValues: db.getUserKeyValues,
getUserCodeFiles: db.getUserCodeFiles,
getToolFilesByIds: db.getToolFilesByIds,
getCodeGeneratedFiles: db.getCodeGeneratedFiles,
},
);
@ -188,6 +194,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
loadTools,
requestFiles,
conversationId,
parentMessageId,
endpointOption,
allowedProviders,
},
@ -195,9 +202,12 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
getConvoFiles,
getFiles: db.getFiles,
getUserKey: db.getUserKey,
getMessages: db.getMessages,
updateFilesUsage: db.updateFilesUsage,
getUserKeyValues: db.getUserKeyValues,
getUserCodeFiles: db.getUserCodeFiles,
getToolFilesByIds: db.getToolFilesByIds,
getCodeGeneratedFiles: db.getCodeGeneratedFiles,
},
);
if (userMCPAuthMap != null) {
@ -252,17 +262,18 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
const { userMCPAuthMap: updatedMCPAuthMap } = await processAddedConvo({
req,
res,
endpointOption,
modelsConfig,
logViolation,
loadTools,
logViolation,
modelsConfig,
requestFiles,
conversationId,
allowedProviders,
agentConfigs,
primaryAgentId: primaryConfig.id,
primaryAgent,
endpointOption,
userMCPAuthMap,
conversationId,
parentMessageId,
allowedProviders,
primaryAgentId: primaryConfig.id,
});
if (updatedMCPAuthMap) {

View file

@ -6,27 +6,112 @@ const { getCodeBaseURL } = require('@librechat/agents');
const { logAxiosError, getBasePath } = require('@librechat/api');
const {
Tools,
megabyte,
fileConfig,
FileContext,
FileSources,
imageExtRegex,
inferMimeType,
EToolResources,
EModelEndpoint,
mergeFileConfig,
getEndpointFileConfig,
} = require('librechat-data-provider');
const { filterFilesByAgentAccess } = require('~/server/services/Files/permissions');
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { convertImage } = require('~/server/services/Files/images/convert');
const { createFile, getFiles, updateFile } = require('~/models');
const { determineFileType } = require('~/server/utils');
/**
* Process OpenAI image files, convert to target format, save and return file metadata.
* Creates a fallback download URL response when file cannot be processed locally.
* Used when: file exceeds size limit, storage strategy unavailable, or download error occurs.
* @param {Object} params - The parameters.
* @param {string} params.name - The filename.
* @param {string} params.session_id - The code execution session ID.
* @param {string} params.id - The file ID from the code environment.
* @param {string} params.conversationId - The current conversation ID.
* @param {string} params.toolCallId - The tool call ID that generated the file.
* @param {string} params.messageId - The current message ID.
* @param {number} params.expiresAt - Expiration timestamp (24 hours from creation).
* @returns {Object} Fallback response with download URL.
*/
const createDownloadFallback = ({
id,
name,
messageId,
expiresAt,
session_id,
toolCallId,
conversationId,
}) => {
const basePath = getBasePath();
return {
filename: name,
filepath: `${basePath}/api/files/code/download/${session_id}/${id}`,
expiresAt,
conversationId,
toolCallId,
messageId,
};
};
/**
* Find an existing code-generated file by filename in the conversation.
* Used to update existing files instead of creating duplicates.
*
* ## Deduplication Strategy
*
* Files are deduplicated by `(conversationId, filename)` - NOT including `messageId`.
* This is an intentional design decision to handle iterative code development patterns:
*
* **Rationale:**
* - When users iteratively refine code (e.g., "regenerate that chart with red bars"),
* the same logical file (e.g., "chart.png") is produced multiple times
* - Without deduplication, each iteration would create a new file, leading to storage bloat
* - The latest version is what matters for re-upload to the code environment
*
* **Implications:**
* - Different messages producing files with the same name will update the same file record
* - The `messageId` field tracks which message last updated the file
* - The `usage` counter tracks how many times the file has been generated
*
* **Future Considerations:**
* - If file versioning is needed, consider adding a `versions` array or separate version collection
* - The current approach prioritizes storage efficiency over history preservation
*
* @param {string} filename - The filename to search for.
* @param {string} conversationId - The conversation ID.
* @returns {Promise<MongoFile | null>} The existing file or null.
*/
const findExistingCodeFile = async (filename, conversationId) => {
if (!filename || !conversationId) {
return null;
}
const files = await getFiles(
{
filename,
conversationId,
context: FileContext.execute_code,
},
{ createdAt: -1 },
{ text: 0 },
);
return files?.[0] ?? null;
};
/**
* Process code execution output files - downloads and saves both images and non-image files.
* All files are saved to local storage with fileIdentifier metadata for code env re-upload.
* @param {ServerRequest} params.req - The Express request object.
* @param {string} params.id - The file ID.
* @param {string} params.id - The file ID from the code environment.
* @param {string} params.name - The filename.
* @param {string} params.apiKey - The code execution API key.
* @param {string} params.toolCallId - The tool call ID that generated the file.
* @param {string} params.session_id - The code execution session ID.
* @param {string} params.conversationId - The current conversation ID.
* @param {string} params.messageId - The current message ID.
* @returns {Promise<MongoFile & { messageId: string, toolCallId: string } | { filename: string; filepath: string; expiresAt: number; conversationId: string; toolCallId: string; messageId: string } | undefined>} The file metadata or undefined if an error occurs.
* @returns {Promise<MongoFile & { messageId: string, toolCallId: string } | undefined>} The file metadata or undefined if an error occurs.
*/
const processCodeOutput = async ({
req,
@ -41,19 +126,15 @@ const processCodeOutput = async ({
const appConfig = req.config;
const currentDate = new Date();
const baseURL = getCodeBaseURL();
const basePath = getBasePath();
const fileExt = path.extname(name);
if (!fileExt || !imageExtRegex.test(name)) {
return {
filename: name,
filepath: `${basePath}/api/files/code/download/${session_id}/${id}`,
/** Note: expires 24 hours after creation */
expiresAt: currentDate.getTime() + 86400000,
conversationId,
toolCallId,
messageId,
};
}
const fileExt = path.extname(name).toLowerCase();
const isImage = fileExt && imageExtRegex.test(name);
const mergedFileConfig = mergeFileConfig(appConfig.fileConfig);
const endpointFileConfig = getEndpointFileConfig({
fileConfig: mergedFileConfig,
endpoint: EModelEndpoint.agents,
});
const fileSizeLimit = endpointFileConfig.fileSizeLimit ?? mergedFileConfig.serverFileSizeLimit;
try {
const formattedDate = currentDate.toISOString();
@ -70,29 +151,135 @@ const processCodeOutput = async ({
const buffer = Buffer.from(response.data, 'binary');
const file_id = v4();
const _file = await convertImage(req, buffer, 'high', `${file_id}${fileExt}`);
// Enforce file size limit
if (buffer.length > fileSizeLimit) {
logger.warn(
`[processCodeOutput] File "${name}" (${(buffer.length / megabyte).toFixed(2)} MB) exceeds size limit of ${(fileSizeLimit / megabyte).toFixed(2)} MB, falling back to download URL`,
);
return createDownloadFallback({
id,
name,
messageId,
toolCallId,
session_id,
conversationId,
expiresAt: currentDate.getTime() + 86400000,
});
}
const fileIdentifier = `${session_id}/${id}`;
/**
* Check for existing file with same filename in this conversation.
* If found, we'll update it instead of creating a duplicate.
*/
const existingFile = await findExistingCodeFile(name, conversationId);
const file_id = existingFile?.file_id ?? v4();
const isUpdate = !!existingFile;
if (isUpdate) {
logger.debug(
`[processCodeOutput] Updating existing file "${name}" (${file_id}) instead of creating duplicate`,
);
}
if (isImage) {
const _file = await convertImage(req, buffer, 'high', `${file_id}${fileExt}`);
const file = {
..._file,
file_id,
messageId,
usage: isUpdate ? (existingFile.usage ?? 0) + 1 : 1,
filename: name,
conversationId,
user: req.user.id,
type: `image/${appConfig.imageOutputType}`,
createdAt: isUpdate ? existingFile.createdAt : formattedDate,
updatedAt: formattedDate,
source: appConfig.fileStrategy,
context: FileContext.execute_code,
metadata: { fileIdentifier },
};
createFile(file, true);
return Object.assign(file, { messageId, toolCallId });
}
// For non-image files, save to configured storage strategy
const { saveBuffer } = getStrategyFunctions(appConfig.fileStrategy);
if (!saveBuffer) {
logger.warn(
`[processCodeOutput] saveBuffer not available for strategy ${appConfig.fileStrategy}, falling back to download URL`,
);
return createDownloadFallback({
id,
name,
messageId,
toolCallId,
session_id,
conversationId,
expiresAt: currentDate.getTime() + 86400000,
});
}
// Determine MIME type from buffer or extension
const detectedType = await determineFileType(buffer, true);
const mimeType = detectedType?.mime || inferMimeType(name, '') || 'application/octet-stream';
/** Check MIME type support - for code-generated files, we're lenient but log unsupported types */
const isSupportedMimeType = fileConfig.checkType(
mimeType,
endpointFileConfig.supportedMimeTypes,
);
if (!isSupportedMimeType) {
logger.warn(
`[processCodeOutput] File "${name}" has unsupported MIME type "${mimeType}", proceeding with storage but may not be usable as tool resource`,
);
}
const fileName = `${file_id}__${name}`;
const filepath = await saveBuffer({
userId: req.user.id,
buffer,
fileName,
basePath: 'uploads',
});
const file = {
..._file,
file_id,
usage: 1,
filepath,
messageId,
object: 'file',
filename: name,
type: mimeType,
conversationId,
user: req.user.id,
type: `image/${appConfig.imageOutputType}`,
createdAt: formattedDate,
bytes: buffer.length,
updatedAt: formattedDate,
metadata: { fileIdentifier },
source: appConfig.fileStrategy,
context: FileContext.execute_code,
usage: isUpdate ? (existingFile.usage ?? 0) + 1 : 1,
createdAt: isUpdate ? existingFile.createdAt : formattedDate,
};
createFile(file, true);
/** Note: `messageId` & `toolCallId` are not part of file DB schema; message object records associated file ID */
return Object.assign(file, { messageId, toolCallId });
} catch (error) {
logAxiosError({
message: 'Error downloading code environment file',
message: 'Error downloading/processing code environment file',
error,
});
// Fallback for download errors - return download URL so user can still manually download
return createDownloadFallback({
id,
name,
messageId,
toolCallId,
session_id,
conversationId,
expiresAt: currentDate.getTime() + 86400000,
});
}
};
@ -204,9 +391,16 @@ const primeFiles = async (options, apiKey) => {
if (!toolContext) {
toolContext = `- Note: The following files are available in the "${Tools.execute_code}" tool environment:`;
}
toolContext += `\n\t- /mnt/data/${file.filename}${
agentResourceIds.has(file.file_id) ? '' : ' (just attached by user)'
}`;
let fileSuffix = '';
if (!agentResourceIds.has(file.file_id)) {
fileSuffix =
file.context === FileContext.execute_code
? ' (from previous code execution)'
: ' (attached by user)';
}
toolContext += `\n\t- /mnt/data/${file.filename}${fileSuffix}`;
files.push({
id,
session_id,

View file

@ -0,0 +1,418 @@
// Configurable file size limit for tests - use a getter so it can be changed per test
const fileSizeLimitConfig = { value: 20 * 1024 * 1024 }; // Default 20MB
// Mock librechat-data-provider with configurable file size limit
jest.mock('librechat-data-provider', () => {
const actual = jest.requireActual('librechat-data-provider');
return {
...actual,
mergeFileConfig: jest.fn((config) => {
const merged = actual.mergeFileConfig(config);
// Override the serverFileSizeLimit with our test value
return {
...merged,
get serverFileSizeLimit() {
return fileSizeLimitConfig.value;
},
};
}),
getEndpointFileConfig: jest.fn((options) => {
const config = actual.getEndpointFileConfig(options);
// Override fileSizeLimit with our test value
return {
...config,
get fileSizeLimit() {
return fileSizeLimitConfig.value;
},
};
}),
};
});
const { FileContext } = require('librechat-data-provider');
// Mock uuid
jest.mock('uuid', () => ({
v4: jest.fn(() => 'mock-uuid-1234'),
}));
// Mock axios
jest.mock('axios');
const axios = require('axios');
// Mock logger
jest.mock('@librechat/data-schemas', () => ({
logger: {
warn: jest.fn(),
debug: jest.fn(),
error: jest.fn(),
},
}));
// Mock getCodeBaseURL
jest.mock('@librechat/agents', () => ({
getCodeBaseURL: jest.fn(() => 'https://code-api.example.com'),
}));
// Mock logAxiosError and getBasePath
jest.mock('@librechat/api', () => ({
logAxiosError: jest.fn(),
getBasePath: jest.fn(() => ''),
}));
// Mock models
jest.mock('~/models', () => ({
createFile: jest.fn(),
getFiles: jest.fn(),
updateFile: jest.fn(),
}));
// Mock permissions (must be before process.js import)
jest.mock('~/server/services/Files/permissions', () => ({
filterFilesByAgentAccess: jest.fn((options) => Promise.resolve(options.files)),
}));
// Mock strategy functions
jest.mock('~/server/services/Files/strategies', () => ({
getStrategyFunctions: jest.fn(),
}));
// Mock convertImage
jest.mock('~/server/services/Files/images/convert', () => ({
convertImage: jest.fn(),
}));
// Mock determineFileType
jest.mock('~/server/utils', () => ({
determineFileType: jest.fn(),
}));
const { createFile, getFiles } = require('~/models');
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { convertImage } = require('~/server/services/Files/images/convert');
const { determineFileType } = require('~/server/utils');
const { logger } = require('@librechat/data-schemas');
// Import after mocks
const { processCodeOutput } = require('./process');
describe('Code Process', () => {
const mockReq = {
user: { id: 'user-123' },
config: {
fileConfig: {},
fileStrategy: 'local',
imageOutputType: 'webp',
},
};
const baseParams = {
req: mockReq,
id: 'file-id-123',
name: 'test-file.txt',
apiKey: 'test-api-key',
toolCallId: 'tool-call-123',
conversationId: 'conv-123',
messageId: 'msg-123',
session_id: 'session-123',
};
beforeEach(() => {
jest.clearAllMocks();
// Default mock implementations
getFiles.mockResolvedValue(null);
createFile.mockResolvedValue({});
getStrategyFunctions.mockReturnValue({
saveBuffer: jest.fn().mockResolvedValue('/uploads/mock-file-path.txt'),
});
determineFileType.mockResolvedValue({ mime: 'text/plain' });
});
describe('findExistingCodeFile (via processCodeOutput)', () => {
it('should find existing file by filename and conversationId', async () => {
const existingFile = {
file_id: 'existing-file-id',
filename: 'test-file.txt',
usage: 2,
createdAt: '2024-01-01T00:00:00.000Z',
};
getFiles.mockResolvedValue([existingFile]);
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
const result = await processCodeOutput(baseParams);
// Verify getFiles was called with correct deduplication query
expect(getFiles).toHaveBeenCalledWith(
{
filename: 'test-file.txt',
conversationId: 'conv-123',
context: FileContext.execute_code,
},
{ createdAt: -1 },
{ text: 0 },
);
// Verify the existing file_id was reused
expect(result.file_id).toBe('existing-file-id');
// Verify usage was incremented
expect(result.usage).toBe(3);
// Verify original createdAt was preserved
expect(result.createdAt).toBe('2024-01-01T00:00:00.000Z');
});
it('should create new file when no existing file found', async () => {
getFiles.mockResolvedValue(null);
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
const result = await processCodeOutput(baseParams);
// Should use the mocked uuid
expect(result.file_id).toBe('mock-uuid-1234');
// Should have usage of 1 for new file
expect(result.usage).toBe(1);
});
it('should return null for invalid inputs (empty filename)', async () => {
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
// The function handles this internally - with empty name
// findExistingCodeFile returns null early for empty filename (guard clause)
const result = await processCodeOutput({ ...baseParams, name: '' });
// getFiles should NOT be called due to early return in findExistingCodeFile
expect(getFiles).not.toHaveBeenCalled();
// A new file_id should be generated since no existing file was found
expect(result.file_id).toBe('mock-uuid-1234');
});
});
describe('processCodeOutput', () => {
describe('image file processing', () => {
it('should process image files using convertImage', async () => {
const imageParams = { ...baseParams, name: 'chart.png' };
const imageBuffer = Buffer.alloc(500);
axios.mockResolvedValue({ data: imageBuffer });
const convertedFile = {
filepath: '/uploads/converted-image.webp',
bytes: 400,
};
convertImage.mockResolvedValue(convertedFile);
getFiles.mockResolvedValue(null);
const result = await processCodeOutput(imageParams);
expect(convertImage).toHaveBeenCalledWith(
mockReq,
imageBuffer,
'high',
'mock-uuid-1234.png',
);
expect(result.type).toBe('image/webp');
expect(result.context).toBe(FileContext.execute_code);
expect(result.filename).toBe('chart.png');
});
it('should update existing image file and increment usage', async () => {
const imageParams = { ...baseParams, name: 'chart.png' };
const existingFile = {
file_id: 'existing-img-id',
usage: 1,
createdAt: '2024-01-01T00:00:00.000Z',
};
getFiles.mockResolvedValue([existingFile]);
const imageBuffer = Buffer.alloc(500);
axios.mockResolvedValue({ data: imageBuffer });
convertImage.mockResolvedValue({ filepath: '/uploads/img.webp' });
const result = await processCodeOutput(imageParams);
expect(result.file_id).toBe('existing-img-id');
expect(result.usage).toBe(2);
expect(logger.debug).toHaveBeenCalledWith(
expect.stringContaining('Updating existing file'),
);
});
});
describe('non-image file processing', () => {
it('should process non-image files using saveBuffer', async () => {
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
const mockSaveBuffer = jest.fn().mockResolvedValue('/uploads/saved-file.txt');
getStrategyFunctions.mockReturnValue({ saveBuffer: mockSaveBuffer });
determineFileType.mockResolvedValue({ mime: 'text/plain' });
const result = await processCodeOutput(baseParams);
expect(mockSaveBuffer).toHaveBeenCalledWith({
userId: 'user-123',
buffer: smallBuffer,
fileName: 'mock-uuid-1234__test-file.txt',
basePath: 'uploads',
});
expect(result.type).toBe('text/plain');
expect(result.filepath).toBe('/uploads/saved-file.txt');
expect(result.bytes).toBe(100);
});
it('should detect MIME type from buffer', async () => {
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
determineFileType.mockResolvedValue({ mime: 'application/pdf' });
const result = await processCodeOutput({ ...baseParams, name: 'document.pdf' });
expect(determineFileType).toHaveBeenCalledWith(smallBuffer, true);
expect(result.type).toBe('application/pdf');
});
it('should fallback to application/octet-stream for unknown types', async () => {
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
determineFileType.mockResolvedValue(null);
const result = await processCodeOutput({ ...baseParams, name: 'unknown.xyz' });
expect(result.type).toBe('application/octet-stream');
});
});
describe('file size limit enforcement', () => {
it('should fallback to download URL when file exceeds size limit', async () => {
// Set a small file size limit for this test
fileSizeLimitConfig.value = 1000; // 1KB limit
const largeBuffer = Buffer.alloc(5000); // 5KB - exceeds 1KB limit
axios.mockResolvedValue({ data: largeBuffer });
const result = await processCodeOutput(baseParams);
expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining('exceeds size limit'));
expect(result.filepath).toContain('/api/files/code/download/session-123/file-id-123');
expect(result.expiresAt).toBeDefined();
// Should not call createFile for oversized files (fallback path)
expect(createFile).not.toHaveBeenCalled();
// Reset to default for other tests
fileSizeLimitConfig.value = 20 * 1024 * 1024;
});
});
describe('fallback behavior', () => {
it('should fallback to download URL when saveBuffer is not available', async () => {
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
getStrategyFunctions.mockReturnValue({ saveBuffer: null });
const result = await processCodeOutput(baseParams);
expect(logger.warn).toHaveBeenCalledWith(
expect.stringContaining('saveBuffer not available'),
);
expect(result.filepath).toContain('/api/files/code/download/');
expect(result.filename).toBe('test-file.txt');
});
it('should fallback to download URL on axios error', async () => {
axios.mockRejectedValue(new Error('Network error'));
const result = await processCodeOutput(baseParams);
expect(result.filepath).toContain('/api/files/code/download/session-123/file-id-123');
expect(result.conversationId).toBe('conv-123');
expect(result.messageId).toBe('msg-123');
expect(result.toolCallId).toBe('tool-call-123');
});
});
describe('usage counter increment', () => {
it('should set usage to 1 for new files', async () => {
getFiles.mockResolvedValue(null);
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
const result = await processCodeOutput(baseParams);
expect(result.usage).toBe(1);
});
it('should increment usage for existing files', async () => {
const existingFile = { file_id: 'existing-id', usage: 5, createdAt: '2024-01-01' };
getFiles.mockResolvedValue([existingFile]);
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
const result = await processCodeOutput(baseParams);
expect(result.usage).toBe(6);
});
it('should handle existing file with undefined usage', async () => {
const existingFile = { file_id: 'existing-id', createdAt: '2024-01-01' };
getFiles.mockResolvedValue([existingFile]);
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
const result = await processCodeOutput(baseParams);
// (undefined ?? 0) + 1 = 1
expect(result.usage).toBe(1);
});
});
describe('metadata and file properties', () => {
it('should include fileIdentifier in metadata', async () => {
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
const result = await processCodeOutput(baseParams);
expect(result.metadata).toEqual({
fileIdentifier: 'session-123/file-id-123',
});
});
it('should set correct context for code-generated files', async () => {
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
const result = await processCodeOutput(baseParams);
expect(result.context).toBe(FileContext.execute_code);
});
it('should include toolCallId and messageId in result', async () => {
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
const result = await processCodeOutput(baseParams);
expect(result.toolCallId).toBe('tool-call-123');
expect(result.messageId).toBe('msg-123');
});
it('should call createFile with upsert enabled', async () => {
const smallBuffer = Buffer.alloc(100);
axios.mockResolvedValue({ data: smallBuffer });
await processCodeOutput(baseParams);
expect(createFile).toHaveBeenCalledWith(
expect.objectContaining({
file_id: 'mock-uuid-1234',
context: FileContext.execute_code,
}),
true, // upsert flag
);
});
});
});
});

View file

@ -67,7 +67,12 @@ async function saveLocalBuffer({ userId, buffer, fileName, basePath = 'images' }
try {
const { publicPath, uploads } = paths;
const directoryPath = path.join(basePath === 'images' ? publicPath : uploads, basePath, userId);
/**
* For 'images': save to publicPath/images/userId (images are served statically)
* For 'uploads': save to uploads/userId (files downloaded via API)
* */
const directoryPath =
basePath === 'images' ? path.join(publicPath, basePath, userId) : path.join(uploads, userId);
if (!fs.existsSync(directoryPath)) {
fs.mkdirSync(directoryPath, { recursive: true });

View file

@ -8,9 +8,13 @@ import { cn } from '~/utils';
const FileAttachment = memo(({ attachment }: { attachment: Partial<TAttachment> }) => {
const [isVisible, setIsVisible] = useState(false);
const file = attachment as TFile & TAttachmentMetadata;
const { handleDownload } = useAttachmentLink({
href: attachment.filepath ?? '',
filename: attachment.filename ?? '',
file_id: file.file_id,
user: file.user,
source: file.source,
});
const extension = attachment.filename?.split('.').pop();

View file

@ -65,6 +65,7 @@ const LogContent: React.FC<LogContentProps> = ({ output = '', renderImages, atta
return `${filename} ${localize('com_download_expired')}`;
}
const fileData = file as TFile & TAttachmentMetadata;
const filepath = file.filepath || '';
// const expirationText = expiresAt
@ -72,7 +73,13 @@ const LogContent: React.FC<LogContentProps> = ({ output = '', renderImages, atta
// : ` ${localize('com_click_to_download')}`;
return (
<LogLink href={filepath} filename={filename}>
<LogLink
href={filepath}
filename={filename}
file_id={fileData.file_id}
user={fileData.user}
source={fileData.source}
>
{'- '}
{filename} {localize('com_click_to_download')}
</LogLink>

View file

@ -1,21 +1,56 @@
import React from 'react';
import { FileSources } from 'librechat-data-provider';
import { useToastContext } from '@librechat/client';
import { useCodeOutputDownload } from '~/data-provider';
import { useCodeOutputDownload, useFileDownload } from '~/data-provider';
interface LogLinkProps {
href: string;
filename: string;
file_id?: string;
user?: string;
source?: string;
children: React.ReactNode;
}
export const useAttachmentLink = ({ href, filename }: Pick<LogLinkProps, 'href' | 'filename'>) => {
interface AttachmentLinkOptions {
href: string;
filename: string;
file_id?: string;
user?: string;
source?: string;
}
/**
* Determines if a file is stored locally (not an external API URL).
* Files with these sources are stored on the LibreChat server and should
* use the /api/files/download endpoint instead of direct URL access.
*/
const isLocallyStoredSource = (source?: string): boolean => {
if (!source) {
return false;
}
return [FileSources.local, FileSources.firebase, FileSources.s3, FileSources.azure_blob].includes(
source as FileSources,
);
};
export const useAttachmentLink = ({
href,
filename,
file_id,
user,
source,
}: AttachmentLinkOptions) => {
const { showToast } = useToastContext();
const { refetch: downloadFile } = useCodeOutputDownload(href);
const useLocalDownload = isLocallyStoredSource(source) && !!file_id && !!user;
const { refetch: downloadFromApi } = useFileDownload(user, file_id);
const { refetch: downloadFromUrl } = useCodeOutputDownload(href);
const handleDownload = async (event: React.MouseEvent<HTMLAnchorElement | HTMLButtonElement>) => {
event.preventDefault();
try {
const stream = await downloadFile();
const stream = useLocalDownload ? await downloadFromApi() : await downloadFromUrl();
if (stream.data == null || stream.data === '') {
console.error('Error downloading file: No data found');
showToast({
@ -39,8 +74,8 @@ export const useAttachmentLink = ({ href, filename }: Pick<LogLinkProps, 'href'
return { handleDownload };
};
const LogLink: React.FC<LogLinkProps> = ({ href, filename, children }) => {
const { handleDownload } = useAttachmentLink({ href, filename });
const LogLink: React.FC<LogLinkProps> = ({ href, filename, file_id, user, source, children }) => {
const { handleDownload } = useAttachmentLink({ href, filename, file_id, user, source });
return (
<a
href={href}

View file

@ -1,7 +1,12 @@
import { useSetRecoilState } from 'recoil';
import type { QueryClient } from '@tanstack/react-query';
import { QueryKeys, Tools } from 'librechat-data-provider';
import type { TAttachment, EventSubmission, MemoriesResponse } from 'librechat-data-provider';
import type {
MemoriesResponse,
EventSubmission,
TAttachment,
TFile,
} from 'librechat-data-provider';
import { handleMemoryArtifact } from '~/utils/memory';
import store from '~/store';
@ -11,9 +16,24 @@ export default function useAttachmentHandler(queryClient?: QueryClient) {
return ({ data }: { data: TAttachment; submission: EventSubmission }) => {
const { messageId } = data;
if (queryClient && data?.filepath && !data.filepath.includes('/api/files')) {
queryClient.setQueryData([QueryKeys.files], (oldData: TAttachment[] | undefined) => {
return [data, ...(oldData || [])];
const fileData = data as TFile;
if (
queryClient &&
fileData?.file_id &&
fileData?.filepath &&
!fileData.filepath.includes('/api/files')
) {
queryClient.setQueryData([QueryKeys.files], (oldData: TFile[] | undefined) => {
if (!oldData) {
return [fileData];
}
const existingIndex = oldData.findIndex((file) => file.file_id === fileData.file_id);
if (existingIndex > -1) {
const updated = [...oldData];
updated[existingIndex] = { ...oldData[existingIndex], ...fileData };
return updated;
}
return [fileData, ...oldData];
});
}

10
package-lock.json generated
View file

@ -59,7 +59,7 @@
"@google/genai": "^1.19.0",
"@keyv/redis": "^4.3.3",
"@langchain/core": "^0.3.80",
"@librechat/agents": "^3.0.77",
"@librechat/agents": "^3.0.78",
"@librechat/api": "*",
"@librechat/data-schemas": "*",
"@microsoft/microsoft-graph-client": "^3.0.7",
@ -12646,9 +12646,9 @@
}
},
"node_modules/@librechat/agents": {
"version": "3.0.77",
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.77.tgz",
"integrity": "sha512-Wr9d8bjJAQSl03nEgnAPG6jBQT1fL3sNV3TFDN1FvFQt6WGfdok838Cbcn+/tSGXSPJcICTxNkMT7VN8P6bCPw==",
"version": "3.0.78",
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.78.tgz",
"integrity": "sha512-+p4NuE2dBAbwm4gJc/jbBDIAfC8xNC0gUAb8wsLXA7zcORnnDRTQ+HWWYVJZ8e81dTIxHIl61hwsziFjVZHvUw==",
"license": "MIT",
"dependencies": {
"@langchain/anthropic": "^0.3.26",
@ -43129,7 +43129,7 @@
"@google/genai": "^1.19.0",
"@keyv/redis": "^4.3.3",
"@langchain/core": "^0.3.80",
"@librechat/agents": "^3.0.77",
"@librechat/agents": "^3.0.78",
"@librechat/data-schemas": "*",
"@modelcontextprotocol/sdk": "^1.25.2",
"@smithy/node-http-handler": "^4.4.5",

View file

@ -87,7 +87,7 @@
"@google/genai": "^1.19.0",
"@keyv/redis": "^4.3.3",
"@langchain/core": "^0.3.80",
"@librechat/agents": "^3.0.77",
"@librechat/agents": "^3.0.78",
"@librechat/data-schemas": "*",
"@modelcontextprotocol/sdk": "^1.25.2",
"@smithy/node-http-handler": "^4.4.5",

View file

@ -1,5 +1,6 @@
import { Providers } from '@librechat/agents';
import {
Constants,
ErrorTypes,
EModelEndpoint,
EToolResources,
@ -20,7 +21,12 @@ import type { GenericTool, LCToolRegistry, ToolMap } from '@librechat/agents';
import type { Response as ServerResponse } from 'express';
import type { IMongoFile } from '@librechat/data-schemas';
import type { InitializeResultBase, ServerRequest, EndpointDbMethods } from '~/types';
import { getModelMaxTokens, extractLibreChatParams, optionalChainWithEmptyCheck } from '~/utils';
import {
optionalChainWithEmptyCheck,
extractLibreChatParams,
getModelMaxTokens,
getThreadData,
} from '~/utils';
import { filterFilesByEndpointConfig } from '~/files';
import { generateArtifactsPrompt } from '~/prompts';
import { getProviderConfig } from '~/endpoints';
@ -58,6 +64,8 @@ export interface InitializeAgentParams {
agent: Agent;
/** Conversation ID (optional) */
conversationId?: string | null;
/** Parent message ID for determining the current thread (optional) */
parentMessageId?: string | null;
/** Request files */
requestFiles?: IMongoFile[];
/** Function to load agent tools */
@ -95,10 +103,23 @@ export interface InitializeAgentDbMethods extends EndpointDbMethods {
updateFilesUsage: (files: Array<{ file_id: string }>, fileIds?: string[]) => Promise<unknown[]>;
/** Get files from database */
getFiles: (filter: unknown, sort: unknown, select: unknown, opts?: unknown) => Promise<unknown[]>;
/** Get tool files by IDs */
/** Get tool files by IDs (user-uploaded files only, code files handled separately) */
getToolFilesByIds: (fileIds: string[], toolSet: Set<EToolResources>) => Promise<unknown[]>;
/** Get conversation file IDs */
getConvoFiles: (conversationId: string) => Promise<string[] | null>;
/** Get code-generated files by conversation ID and optional message IDs */
getCodeGeneratedFiles?: (conversationId: string, messageIds?: string[]) => Promise<unknown[]>;
/** Get user-uploaded execute_code files by file IDs (from message.files in thread) */
getUserCodeFiles?: (fileIds: string[]) => Promise<unknown[]>;
/** Get messages for a conversation (supports select for field projection) */
getMessages?: (
filter: { conversationId: string },
select?: string,
) => Promise<Array<{
messageId: string;
parentMessageId?: string;
files?: Array<{ file_id: string }>;
}> | null>;
}
/**
@ -125,6 +146,7 @@ export async function initializeAgent(
requestFiles = [],
conversationId,
endpointOption,
parentMessageId,
allowedProviders,
isInitialAgent = false,
} = params;
@ -174,9 +196,51 @@ export async function initializeAgent(
toolResourceSet.add(EToolResources[tool as keyof typeof EToolResources]);
}
}
const toolFiles = (await db.getToolFilesByIds(fileIds, toolResourceSet)) as IMongoFile[];
if (requestFiles.length || toolFiles.length) {
currentFiles = (await db.updateFilesUsage(requestFiles.concat(toolFiles))) as IMongoFile[];
/**
* Retrieve execute_code files filtered to the current thread.
* This includes both code-generated files and user-uploaded execute_code files.
*/
let codeGeneratedFiles: IMongoFile[] = [];
let userCodeFiles: IMongoFile[] = [];
if (toolResourceSet.has(EToolResources.execute_code)) {
let threadMessageIds: string[] | undefined;
let threadFileIds: string[] | undefined;
if (parentMessageId && parentMessageId !== Constants.NO_PARENT && db.getMessages) {
/** Only select fields needed for thread traversal */
const messages = await db.getMessages(
{ conversationId },
'messageId parentMessageId files',
);
if (messages && messages.length > 0) {
/** Single O(n) pass: build Map, traverse thread, collect both IDs */
const threadData = getThreadData(messages, parentMessageId);
threadMessageIds = threadData.messageIds;
threadFileIds = threadData.fileIds;
}
}
/** Code-generated files (context: execute_code) filtered by messageId */
if (db.getCodeGeneratedFiles) {
codeGeneratedFiles = (await db.getCodeGeneratedFiles(
conversationId,
threadMessageIds,
)) as IMongoFile[];
}
/** User-uploaded execute_code files (context: agents/message_attachment) from thread messages */
if (db.getUserCodeFiles && threadFileIds && threadFileIds.length > 0) {
userCodeFiles = (await db.getUserCodeFiles(threadFileIds)) as IMongoFile[];
}
}
const allToolFiles = toolFiles.concat(codeGeneratedFiles, userCodeFiles);
if (requestFiles.length || allToolFiles.length) {
currentFiles = (await db.updateFilesUsage(requestFiles.concat(allToolFiles))) as IMongoFile[];
}
} else if (requestFiles.length) {
currentFiles = (await db.updateFilesUsage(requestFiles)) as IMongoFile[];

View file

@ -1,4 +1,8 @@
import { sanitizeFileForTransmit, sanitizeMessageForTransmit } from './message';
import { Constants } from 'librechat-data-provider';
import { sanitizeFileForTransmit, sanitizeMessageForTransmit, getThreadData } from './message';
/** Cast to string for type compatibility with ThreadMessage */
const NO_PARENT = Constants.NO_PARENT as string;
describe('sanitizeFileForTransmit', () => {
it('should remove text field from file', () => {
@ -120,3 +124,272 @@ describe('sanitizeMessageForTransmit', () => {
expect(message.files[0].text).toBe('original text');
});
});
describe('getThreadData', () => {
describe('edge cases - empty and null inputs', () => {
it('should return empty result for empty messages array', () => {
const result = getThreadData([], 'parent-123');
expect(result.messageIds).toEqual([]);
expect(result.fileIds).toEqual([]);
});
it('should return empty result for null parentMessageId', () => {
const messages = [
{ messageId: 'msg-1', parentMessageId: null },
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
];
const result = getThreadData(messages, null);
expect(result.messageIds).toEqual([]);
expect(result.fileIds).toEqual([]);
});
it('should return empty result for undefined parentMessageId', () => {
const messages = [{ messageId: 'msg-1', parentMessageId: null }];
const result = getThreadData(messages, undefined);
expect(result.messageIds).toEqual([]);
expect(result.fileIds).toEqual([]);
});
it('should return empty result when parentMessageId not found in messages', () => {
const messages = [
{ messageId: 'msg-1', parentMessageId: null },
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
];
const result = getThreadData(messages, 'non-existent');
expect(result.messageIds).toEqual([]);
expect(result.fileIds).toEqual([]);
});
});
describe('thread traversal', () => {
it('should traverse a simple linear thread', () => {
const messages = [
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
{ messageId: 'msg-3', parentMessageId: 'msg-2' },
];
const result = getThreadData(messages, 'msg-3');
expect(result.messageIds).toEqual(['msg-3', 'msg-2', 'msg-1']);
expect(result.fileIds).toEqual([]);
});
it('should stop at NO_PARENT constant', () => {
const messages = [
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
];
const result = getThreadData(messages, 'msg-2');
expect(result.messageIds).toEqual(['msg-2', 'msg-1']);
});
it('should collect only messages in the thread branch', () => {
// Branched conversation: msg-1 -> msg-2 -> msg-3 (branch A)
// msg-1 -> msg-4 -> msg-5 (branch B)
const messages = [
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
{ messageId: 'msg-3', parentMessageId: 'msg-2' },
{ messageId: 'msg-4', parentMessageId: 'msg-1' },
{ messageId: 'msg-5', parentMessageId: 'msg-4' },
];
const resultBranchA = getThreadData(messages, 'msg-3');
expect(resultBranchA.messageIds).toEqual(['msg-3', 'msg-2', 'msg-1']);
const resultBranchB = getThreadData(messages, 'msg-5');
expect(resultBranchB.messageIds).toEqual(['msg-5', 'msg-4', 'msg-1']);
});
it('should handle single message thread', () => {
const messages = [{ messageId: 'msg-1', parentMessageId: NO_PARENT }];
const result = getThreadData(messages, 'msg-1');
expect(result.messageIds).toEqual(['msg-1']);
expect(result.fileIds).toEqual([]);
});
});
describe('circular reference protection', () => {
it('should handle circular references without infinite loop', () => {
// Malformed data: msg-2 points to msg-3 which points back to msg-2
const messages = [
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
{ messageId: 'msg-2', parentMessageId: 'msg-3' },
{ messageId: 'msg-3', parentMessageId: 'msg-2' },
];
const result = getThreadData(messages, 'msg-2');
// Should stop when encountering a visited ID
expect(result.messageIds).toEqual(['msg-2', 'msg-3']);
expect(result.fileIds).toEqual([]);
});
it('should handle self-referencing message', () => {
const messages = [{ messageId: 'msg-1', parentMessageId: 'msg-1' }];
const result = getThreadData(messages, 'msg-1');
expect(result.messageIds).toEqual(['msg-1']);
});
});
describe('file ID collection', () => {
it('should collect file IDs from messages with files', () => {
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
files: [{ file_id: 'file-1' }, { file_id: 'file-2' }],
},
{
messageId: 'msg-2',
parentMessageId: 'msg-1',
files: [{ file_id: 'file-3' }],
},
];
const result = getThreadData(messages, 'msg-2');
expect(result.messageIds).toEqual(['msg-2', 'msg-1']);
expect(result.fileIds).toContain('file-1');
expect(result.fileIds).toContain('file-2');
expect(result.fileIds).toContain('file-3');
expect(result.fileIds).toHaveLength(3);
});
it('should deduplicate file IDs across messages', () => {
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
files: [{ file_id: 'file-shared' }, { file_id: 'file-1' }],
},
{
messageId: 'msg-2',
parentMessageId: 'msg-1',
files: [{ file_id: 'file-shared' }, { file_id: 'file-2' }],
},
];
const result = getThreadData(messages, 'msg-2');
expect(result.fileIds).toContain('file-shared');
expect(result.fileIds).toContain('file-1');
expect(result.fileIds).toContain('file-2');
expect(result.fileIds).toHaveLength(3);
});
it('should skip files without file_id', () => {
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
files: [{ file_id: 'file-1' }, { file_id: undefined }, { file_id: '' }],
},
];
const result = getThreadData(messages, 'msg-1');
expect(result.fileIds).toEqual(['file-1']);
});
it('should handle messages with empty files array', () => {
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
files: [],
},
{
messageId: 'msg-2',
parentMessageId: 'msg-1',
files: [{ file_id: 'file-1' }],
},
];
const result = getThreadData(messages, 'msg-2');
expect(result.messageIds).toEqual(['msg-2', 'msg-1']);
expect(result.fileIds).toEqual(['file-1']);
});
it('should handle messages without files property', () => {
const messages = [
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
{
messageId: 'msg-2',
parentMessageId: 'msg-1',
files: [{ file_id: 'file-1' }],
},
];
const result = getThreadData(messages, 'msg-2');
expect(result.messageIds).toEqual(['msg-2', 'msg-1']);
expect(result.fileIds).toEqual(['file-1']);
});
it('should only collect files from messages in the thread', () => {
// msg-3 is not in the thread from msg-2
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
files: [{ file_id: 'file-1' }],
},
{
messageId: 'msg-2',
parentMessageId: 'msg-1',
files: [{ file_id: 'file-2' }],
},
{
messageId: 'msg-3',
parentMessageId: 'msg-1',
files: [{ file_id: 'file-3' }],
},
];
const result = getThreadData(messages, 'msg-2');
expect(result.fileIds).toContain('file-1');
expect(result.fileIds).toContain('file-2');
expect(result.fileIds).not.toContain('file-3');
});
});
describe('performance - O(1) lookups', () => {
it('should handle large message arrays efficiently', () => {
// Create a linear thread of 1000 messages
const messages = [];
for (let i = 0; i < 1000; i++) {
messages.push({
messageId: `msg-${i}`,
parentMessageId: i === 0 ? NO_PARENT : `msg-${i - 1}`,
files: [{ file_id: `file-${i}` }],
});
}
const startTime = performance.now();
const result = getThreadData(messages, 'msg-999');
const endTime = performance.now();
expect(result.messageIds).toHaveLength(1000);
expect(result.fileIds).toHaveLength(1000);
// Should complete in reasonable time (< 100ms for 1000 messages)
expect(endTime - startTime).toBeLessThan(100);
});
});
});

View file

@ -1,3 +1,4 @@
import { Constants } from 'librechat-data-provider';
import type { TFile, TMessage } from 'librechat-data-provider';
/** Fields to strip from files before client transmission */
@ -66,3 +67,74 @@ export function sanitizeMessageForTransmit<T extends Partial<TMessage>>(
return sanitized;
}
/** Minimal message shape for thread traversal */
type ThreadMessage = {
messageId: string;
parentMessageId?: string | null;
files?: Array<{ file_id?: string }>;
};
/** Result of thread data extraction */
export type ThreadData = {
messageIds: string[];
fileIds: string[];
};
/**
* Extracts thread message IDs and file IDs in a single O(n) pass.
* Builds a Map for O(1) lookups, then traverses the thread collecting both IDs.
*
* @param messages - All messages in the conversation (should be queried with select for efficiency)
* @param parentMessageId - The ID of the parent message to start traversal from
* @returns Object containing messageIds and fileIds arrays
*/
export function getThreadData(
messages: ThreadMessage[],
parentMessageId: string | null | undefined,
): ThreadData {
const result: ThreadData = { messageIds: [], fileIds: [] };
if (!messages || messages.length === 0 || !parentMessageId) {
return result;
}
/** Build Map for O(1) lookups instead of O(n) .find() calls */
const messageMap = new Map<string, ThreadMessage>();
for (const msg of messages) {
messageMap.set(msg.messageId, msg);
}
const fileIdSet = new Set<string>();
const visitedIds = new Set<string>();
let currentId: string | null | undefined = parentMessageId;
/** Single traversal: collect message IDs and file IDs together */
while (currentId) {
if (visitedIds.has(currentId)) {
break;
}
visitedIds.add(currentId);
const message = messageMap.get(currentId);
if (!message) {
break;
}
result.messageIds.push(message.messageId);
/** Collect file IDs from this message */
if (message.files) {
for (const file of message.files) {
if (file.file_id) {
fileIdSet.add(file.file_id);
}
}
}
currentId = message.parentMessageId === Constants.NO_PARENT ? null : message.parentMessageId;
}
result.fileIds = Array.from(fileIdSet);
return result;
}

View file

@ -198,8 +198,15 @@ export const codeTypeMapping: { [key: string]: string } = {
ts: 'application/typescript', // .ts - TypeScript source
tar: 'application/x-tar', // .tar - Tar archive
zip: 'application/zip', // .zip - ZIP archive
txt: 'text/plain', // .txt - Plain text file
log: 'text/plain', // .log - Log file
csv: 'text/csv', // .csv - Comma-separated values
tsv: 'text/tab-separated-values', // .tsv - Tab-separated values
json: 'application/json', // .json - JSON file
xml: 'application/xml', // .xml - XML file
html: 'text/html', // .html - HTML file
htm: 'text/html', // .htm - HTML file
css: 'text/css', // .css - CSS file
yml: 'application/yaml', // .yml - YAML
yaml: 'application/yaml', // .yaml - YAML
sql: 'application/sql', // .sql - SQL (IANA registered)

View file

@ -130,7 +130,7 @@ describe('File Methods', () => {
const files = await fileMethods.getFiles({ user: userId });
expect(files).toHaveLength(3);
expect(files.map((f) => f.file_id)).toEqual(expect.arrayContaining(fileIds));
expect(files!.map((f) => f.file_id)).toEqual(expect.arrayContaining(fileIds));
});
it('should exclude text field by default', async () => {
@ -149,7 +149,7 @@ describe('File Methods', () => {
const files = await fileMethods.getFiles({ file_id: fileId });
expect(files).toHaveLength(1);
expect(files[0].text).toBeUndefined();
expect(files![0].text).toBeUndefined();
});
});
@ -207,7 +207,7 @@ describe('File Methods', () => {
expect(files[0].file_id).toBe(contextFileId);
});
it('should retrieve files for execute_code tool', async () => {
it('should not retrieve execute_code files (handled by getCodeGeneratedFiles)', async () => {
const userId = new mongoose.Types.ObjectId();
const codeFileId = uuidv4();
@ -218,14 +218,16 @@ describe('File Methods', () => {
filepath: '/uploads/code.py',
type: 'text/x-python',
bytes: 100,
context: FileContext.execute_code,
metadata: { fileIdentifier: 'some-identifier' },
});
// execute_code files are explicitly excluded from getToolFilesByIds
// They are retrieved via getCodeGeneratedFiles and getUserCodeFiles instead
const toolSet = new Set([EToolResources.execute_code]);
const files = await fileMethods.getToolFilesByIds([codeFileId], toolSet);
expect(files).toHaveLength(1);
expect(files[0].file_id).toBe(codeFileId);
expect(files).toHaveLength(0);
});
});
@ -490,7 +492,7 @@ describe('File Methods', () => {
const remaining = await fileMethods.getFiles({});
expect(remaining).toHaveLength(1);
expect(remaining[0].user?.toString()).toBe(otherUserId.toString());
expect(remaining![0].user?.toString()).toBe(otherUserId.toString());
});
});

View file

@ -47,7 +47,8 @@ export function createFileMethods(mongoose: typeof import('mongoose')) {
}
/**
* Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs
* Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs.
* Note: execute_code files are handled separately by getCodeGeneratedFiles.
* @param fileIds - Array of file_id strings to search for
* @param toolResourceSet - Optional filter for tool resources
* @returns Files that match the criteria
@ -61,21 +62,26 @@ export function createFileMethods(mongoose: typeof import('mongoose')) {
}
try {
const filter: FilterQuery<IMongoFile> = {
file_id: { $in: fileIds },
$or: [],
};
const orConditions: FilterQuery<IMongoFile>[] = [];
if (toolResourceSet.has(EToolResources.context)) {
filter.$or?.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
orConditions.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
}
if (toolResourceSet.has(EToolResources.file_search)) {
filter.$or?.push({ embedded: true });
orConditions.push({ embedded: true });
}
if (toolResourceSet.has(EToolResources.execute_code)) {
filter.$or?.push({ 'metadata.fileIdentifier': { $exists: true } });
// If no conditions to match, return empty
if (orConditions.length === 0) {
return [];
}
const filter: FilterQuery<IMongoFile> = {
file_id: { $in: fileIds },
context: { $ne: FileContext.execute_code },
$or: orConditions,
};
const selectFields: SelectProjection = { text: 0 };
const sortOptions = { updatedAt: -1 as SortOrder };
@ -87,6 +93,84 @@ export function createFileMethods(mongoose: typeof import('mongoose')) {
}
}
/**
* Retrieves files generated by code execution for a given conversation.
* These files are stored locally with fileIdentifier metadata for code env re-upload.
*
* @param conversationId - The conversation ID to search for
* @param messageIds - Array of messageIds to filter by (for linear thread filtering).
* While technically optional, this function returns empty if not provided.
* This is intentional: code-generated files must be filtered by thread to avoid
* including files from other branches of a conversation.
* @returns Files generated by code execution in the conversation, filtered by messageIds
*/
async function getCodeGeneratedFiles(
conversationId: string,
messageIds?: string[],
): Promise<IMongoFile[]> {
if (!conversationId) {
return [];
}
/**
* Return early if messageIds not provided - this is intentional behavior.
* Code-generated files must be filtered by thread messageIds to ensure we only
* return files relevant to the current conversation branch, not orphaned files
* from other branches or deleted messages.
*/
if (!messageIds || messageIds.length === 0) {
return [];
}
try {
const filter: FilterQuery<IMongoFile> = {
conversationId,
context: FileContext.execute_code,
messageId: { $exists: true, $in: messageIds },
'metadata.fileIdentifier': { $exists: true },
};
const selectFields: SelectProjection = { text: 0 };
const sortOptions = { createdAt: 1 as SortOrder };
const results = await getFiles(filter, sortOptions, selectFields);
return results ?? [];
} catch (error) {
logger.error('[getCodeGeneratedFiles] Error retrieving code generated files:', error);
return [];
}
}
/**
* Retrieves user-uploaded execute_code files (not code-generated) by their file IDs.
* These are files with fileIdentifier metadata but context is NOT execute_code (e.g., agents or message_attachment).
* File IDs should be collected from message.files arrays in the current thread.
* @param fileIds - Array of file IDs to fetch (from message.files in the thread)
* @returns User-uploaded execute_code files
*/
async function getUserCodeFiles(fileIds?: string[]): Promise<IMongoFile[]> {
if (!fileIds || fileIds.length === 0) {
return [];
}
try {
const filter: FilterQuery<IMongoFile> = {
file_id: { $in: fileIds },
context: { $ne: FileContext.execute_code },
'metadata.fileIdentifier': { $exists: true },
};
const selectFields: SelectProjection = { text: 0 };
const sortOptions = { createdAt: 1 as SortOrder };
const results = await getFiles(filter, sortOptions, selectFields);
return results ?? [];
} catch (error) {
logger.error('[getUserCodeFiles] Error retrieving user code files:', error);
return [];
}
}
/**
* Creates a new file with a TTL of 1 hour.
* @param data - The file data to be created, must contain file_id
@ -258,6 +342,8 @@ export function createFileMethods(mongoose: typeof import('mongoose')) {
findFileById,
getFiles,
getToolFilesByIds,
getCodeGeneratedFiles,
getUserCodeFiles,
createFile,
updateFile,
updateFileUsage,

View file

@ -15,6 +15,10 @@ const file: Schema<IMongoFile> = new Schema(
ref: 'Conversation',
index: true,
},
messageId: {
type: String,
index: true,
},
file_id: {
type: String,
index: true,

View file

@ -3,6 +3,7 @@ import { Document, Types } from 'mongoose';
export interface IMongoFile extends Omit<Document, 'model'> {
user: Types.ObjectId;
conversationId?: string;
messageId?: string;
file_id: string;
temp_file_id?: string;
bytes: number;