🗂️ feat: Better Persistence for Code Execution Files Between Sessions (#11362)

* refactor: process code output files for re-use (WIP)

* feat: file attachment handling with additional metadata for downloads

* refactor: Update directory path logic for local file saving based on basePath

* refactor: file attachment handling to support TFile type and improve data merging logic

* feat: thread filtering of code-generated files

- Introduced parentMessageId parameter in addedConvo and initialize functions to enhance thread management.
- Updated related methods to utilize parentMessageId for retrieving messages and filtering code-generated files by conversation threads.
- Enhanced type definitions to include parentMessageId in relevant interfaces for better clarity and usage.

* chore: imports/params ordering

* feat: update file model to use messageId for filtering and processing

- Changed references from 'message' to 'messageId' in file-related methods for consistency.
- Added messageId field to the file schema and updated related types.
- Enhanced file processing logic to accommodate the new messageId structure.

* feat: enhance file retrieval methods to support user-uploaded execute_code files

- Added a new method `getUserCodeFiles` to retrieve user-uploaded execute_code files, excluding code-generated files.
- Updated existing file retrieval methods to improve filtering logic and handle edge cases.
- Enhanced thread data extraction to collect both message IDs and file IDs efficiently.
- Integrated `getUserCodeFiles` into relevant endpoints for better file management in conversations.

* chore: update @librechat/agents package version to 3.0.78 in package-lock.json and related package.json files

* refactor: file processing and retrieval logic

- Added a fallback mechanism for download URLs when files exceed size limits or cannot be processed locally.
- Implemented a deduplication strategy for code-generated files based on conversationId and filename to optimize storage.
- Updated file retrieval methods to ensure proper filtering by messageIds, preventing orphaned files from being included.
- Introduced comprehensive tests for new thread data extraction functionality, covering edge cases and performance considerations.

* fix: improve file retrieval tests and handling of optional properties

- Updated tests to safely access optional properties using non-null assertions.
- Modified test descriptions for clarity regarding the exclusion of execute_code files.
- Ensured that the retrieval logic correctly reflects the expected outcomes for file queries.

* test: add comprehensive unit tests for processCodeOutput functionality

- Introduced a new test suite for the processCodeOutput function, covering various scenarios including file retrieval, creation, and processing for both image and non-image files.
- Implemented mocks for dependencies such as axios, logger, and file models to isolate tests and ensure reliable outcomes.
- Validated behavior for existing files, new file creation, and error handling, including size limits and fallback mechanisms.
- Enhanced test coverage for metadata handling and usage increment logic, ensuring robust verification of file processing outcomes.

* test: enhance file size limit enforcement in processCodeOutput tests

- Introduced a configurable file size limit for tests to improve flexibility and coverage.
- Mocked the `librechat-data-provider` to allow dynamic adjustment of file size limits during tests.
- Updated the file size limit enforcement test to validate behavior when files exceed specified limits, ensuring proper fallback to download URLs.
- Reset file size limit after tests to maintain isolation for subsequent test cases.
This commit is contained in:
Danny Avila 2026-01-16 10:06:24 -05:00
parent fe32cbedf9
commit cc32895d13
No known key found for this signature in database
GPG key ID: BF31EEB2C5CA0956
22 changed files with 1364 additions and 83 deletions

View file

@ -87,7 +87,7 @@
"@google/genai": "^1.19.0",
"@keyv/redis": "^4.3.3",
"@langchain/core": "^0.3.80",
"@librechat/agents": "^3.0.77",
"@librechat/agents": "^3.0.78",
"@librechat/data-schemas": "*",
"@modelcontextprotocol/sdk": "^1.25.2",
"@smithy/node-http-handler": "^4.4.5",

View file

@ -1,5 +1,6 @@
import { Providers } from '@librechat/agents';
import {
Constants,
ErrorTypes,
EModelEndpoint,
EToolResources,
@ -20,7 +21,12 @@ import type { GenericTool, LCToolRegistry, ToolMap } from '@librechat/agents';
import type { Response as ServerResponse } from 'express';
import type { IMongoFile } from '@librechat/data-schemas';
import type { InitializeResultBase, ServerRequest, EndpointDbMethods } from '~/types';
import { getModelMaxTokens, extractLibreChatParams, optionalChainWithEmptyCheck } from '~/utils';
import {
optionalChainWithEmptyCheck,
extractLibreChatParams,
getModelMaxTokens,
getThreadData,
} from '~/utils';
import { filterFilesByEndpointConfig } from '~/files';
import { generateArtifactsPrompt } from '~/prompts';
import { getProviderConfig } from '~/endpoints';
@ -58,6 +64,8 @@ export interface InitializeAgentParams {
agent: Agent;
/** Conversation ID (optional) */
conversationId?: string | null;
/** Parent message ID for determining the current thread (optional) */
parentMessageId?: string | null;
/** Request files */
requestFiles?: IMongoFile[];
/** Function to load agent tools */
@ -95,10 +103,23 @@ export interface InitializeAgentDbMethods extends EndpointDbMethods {
updateFilesUsage: (files: Array<{ file_id: string }>, fileIds?: string[]) => Promise<unknown[]>;
/** Get files from database */
getFiles: (filter: unknown, sort: unknown, select: unknown, opts?: unknown) => Promise<unknown[]>;
/** Get tool files by IDs */
/** Get tool files by IDs (user-uploaded files only, code files handled separately) */
getToolFilesByIds: (fileIds: string[], toolSet: Set<EToolResources>) => Promise<unknown[]>;
/** Get conversation file IDs */
getConvoFiles: (conversationId: string) => Promise<string[] | null>;
/** Get code-generated files by conversation ID and optional message IDs */
getCodeGeneratedFiles?: (conversationId: string, messageIds?: string[]) => Promise<unknown[]>;
/** Get user-uploaded execute_code files by file IDs (from message.files in thread) */
getUserCodeFiles?: (fileIds: string[]) => Promise<unknown[]>;
/** Get messages for a conversation (supports select for field projection) */
getMessages?: (
filter: { conversationId: string },
select?: string,
) => Promise<Array<{
messageId: string;
parentMessageId?: string;
files?: Array<{ file_id: string }>;
}> | null>;
}
/**
@ -125,6 +146,7 @@ export async function initializeAgent(
requestFiles = [],
conversationId,
endpointOption,
parentMessageId,
allowedProviders,
isInitialAgent = false,
} = params;
@ -174,9 +196,51 @@ export async function initializeAgent(
toolResourceSet.add(EToolResources[tool as keyof typeof EToolResources]);
}
}
const toolFiles = (await db.getToolFilesByIds(fileIds, toolResourceSet)) as IMongoFile[];
if (requestFiles.length || toolFiles.length) {
currentFiles = (await db.updateFilesUsage(requestFiles.concat(toolFiles))) as IMongoFile[];
/**
* Retrieve execute_code files filtered to the current thread.
* This includes both code-generated files and user-uploaded execute_code files.
*/
let codeGeneratedFiles: IMongoFile[] = [];
let userCodeFiles: IMongoFile[] = [];
if (toolResourceSet.has(EToolResources.execute_code)) {
let threadMessageIds: string[] | undefined;
let threadFileIds: string[] | undefined;
if (parentMessageId && parentMessageId !== Constants.NO_PARENT && db.getMessages) {
/** Only select fields needed for thread traversal */
const messages = await db.getMessages(
{ conversationId },
'messageId parentMessageId files',
);
if (messages && messages.length > 0) {
/** Single O(n) pass: build Map, traverse thread, collect both IDs */
const threadData = getThreadData(messages, parentMessageId);
threadMessageIds = threadData.messageIds;
threadFileIds = threadData.fileIds;
}
}
/** Code-generated files (context: execute_code) filtered by messageId */
if (db.getCodeGeneratedFiles) {
codeGeneratedFiles = (await db.getCodeGeneratedFiles(
conversationId,
threadMessageIds,
)) as IMongoFile[];
}
/** User-uploaded execute_code files (context: agents/message_attachment) from thread messages */
if (db.getUserCodeFiles && threadFileIds && threadFileIds.length > 0) {
userCodeFiles = (await db.getUserCodeFiles(threadFileIds)) as IMongoFile[];
}
}
const allToolFiles = toolFiles.concat(codeGeneratedFiles, userCodeFiles);
if (requestFiles.length || allToolFiles.length) {
currentFiles = (await db.updateFilesUsage(requestFiles.concat(allToolFiles))) as IMongoFile[];
}
} else if (requestFiles.length) {
currentFiles = (await db.updateFilesUsage(requestFiles)) as IMongoFile[];

View file

@ -1,4 +1,8 @@
import { sanitizeFileForTransmit, sanitizeMessageForTransmit } from './message';
import { Constants } from 'librechat-data-provider';
import { sanitizeFileForTransmit, sanitizeMessageForTransmit, getThreadData } from './message';
/** Cast to string for type compatibility with ThreadMessage */
const NO_PARENT = Constants.NO_PARENT as string;
describe('sanitizeFileForTransmit', () => {
it('should remove text field from file', () => {
@ -120,3 +124,272 @@ describe('sanitizeMessageForTransmit', () => {
expect(message.files[0].text).toBe('original text');
});
});
describe('getThreadData', () => {
describe('edge cases - empty and null inputs', () => {
it('should return empty result for empty messages array', () => {
const result = getThreadData([], 'parent-123');
expect(result.messageIds).toEqual([]);
expect(result.fileIds).toEqual([]);
});
it('should return empty result for null parentMessageId', () => {
const messages = [
{ messageId: 'msg-1', parentMessageId: null },
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
];
const result = getThreadData(messages, null);
expect(result.messageIds).toEqual([]);
expect(result.fileIds).toEqual([]);
});
it('should return empty result for undefined parentMessageId', () => {
const messages = [{ messageId: 'msg-1', parentMessageId: null }];
const result = getThreadData(messages, undefined);
expect(result.messageIds).toEqual([]);
expect(result.fileIds).toEqual([]);
});
it('should return empty result when parentMessageId not found in messages', () => {
const messages = [
{ messageId: 'msg-1', parentMessageId: null },
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
];
const result = getThreadData(messages, 'non-existent');
expect(result.messageIds).toEqual([]);
expect(result.fileIds).toEqual([]);
});
});
describe('thread traversal', () => {
it('should traverse a simple linear thread', () => {
const messages = [
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
{ messageId: 'msg-3', parentMessageId: 'msg-2' },
];
const result = getThreadData(messages, 'msg-3');
expect(result.messageIds).toEqual(['msg-3', 'msg-2', 'msg-1']);
expect(result.fileIds).toEqual([]);
});
it('should stop at NO_PARENT constant', () => {
const messages = [
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
];
const result = getThreadData(messages, 'msg-2');
expect(result.messageIds).toEqual(['msg-2', 'msg-1']);
});
it('should collect only messages in the thread branch', () => {
// Branched conversation: msg-1 -> msg-2 -> msg-3 (branch A)
// msg-1 -> msg-4 -> msg-5 (branch B)
const messages = [
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
{ messageId: 'msg-3', parentMessageId: 'msg-2' },
{ messageId: 'msg-4', parentMessageId: 'msg-1' },
{ messageId: 'msg-5', parentMessageId: 'msg-4' },
];
const resultBranchA = getThreadData(messages, 'msg-3');
expect(resultBranchA.messageIds).toEqual(['msg-3', 'msg-2', 'msg-1']);
const resultBranchB = getThreadData(messages, 'msg-5');
expect(resultBranchB.messageIds).toEqual(['msg-5', 'msg-4', 'msg-1']);
});
it('should handle single message thread', () => {
const messages = [{ messageId: 'msg-1', parentMessageId: NO_PARENT }];
const result = getThreadData(messages, 'msg-1');
expect(result.messageIds).toEqual(['msg-1']);
expect(result.fileIds).toEqual([]);
});
});
describe('circular reference protection', () => {
it('should handle circular references without infinite loop', () => {
// Malformed data: msg-2 points to msg-3 which points back to msg-2
const messages = [
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
{ messageId: 'msg-2', parentMessageId: 'msg-3' },
{ messageId: 'msg-3', parentMessageId: 'msg-2' },
];
const result = getThreadData(messages, 'msg-2');
// Should stop when encountering a visited ID
expect(result.messageIds).toEqual(['msg-2', 'msg-3']);
expect(result.fileIds).toEqual([]);
});
it('should handle self-referencing message', () => {
const messages = [{ messageId: 'msg-1', parentMessageId: 'msg-1' }];
const result = getThreadData(messages, 'msg-1');
expect(result.messageIds).toEqual(['msg-1']);
});
});
describe('file ID collection', () => {
it('should collect file IDs from messages with files', () => {
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
files: [{ file_id: 'file-1' }, { file_id: 'file-2' }],
},
{
messageId: 'msg-2',
parentMessageId: 'msg-1',
files: [{ file_id: 'file-3' }],
},
];
const result = getThreadData(messages, 'msg-2');
expect(result.messageIds).toEqual(['msg-2', 'msg-1']);
expect(result.fileIds).toContain('file-1');
expect(result.fileIds).toContain('file-2');
expect(result.fileIds).toContain('file-3');
expect(result.fileIds).toHaveLength(3);
});
it('should deduplicate file IDs across messages', () => {
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
files: [{ file_id: 'file-shared' }, { file_id: 'file-1' }],
},
{
messageId: 'msg-2',
parentMessageId: 'msg-1',
files: [{ file_id: 'file-shared' }, { file_id: 'file-2' }],
},
];
const result = getThreadData(messages, 'msg-2');
expect(result.fileIds).toContain('file-shared');
expect(result.fileIds).toContain('file-1');
expect(result.fileIds).toContain('file-2');
expect(result.fileIds).toHaveLength(3);
});
it('should skip files without file_id', () => {
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
files: [{ file_id: 'file-1' }, { file_id: undefined }, { file_id: '' }],
},
];
const result = getThreadData(messages, 'msg-1');
expect(result.fileIds).toEqual(['file-1']);
});
it('should handle messages with empty files array', () => {
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
files: [],
},
{
messageId: 'msg-2',
parentMessageId: 'msg-1',
files: [{ file_id: 'file-1' }],
},
];
const result = getThreadData(messages, 'msg-2');
expect(result.messageIds).toEqual(['msg-2', 'msg-1']);
expect(result.fileIds).toEqual(['file-1']);
});
it('should handle messages without files property', () => {
const messages = [
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
{
messageId: 'msg-2',
parentMessageId: 'msg-1',
files: [{ file_id: 'file-1' }],
},
];
const result = getThreadData(messages, 'msg-2');
expect(result.messageIds).toEqual(['msg-2', 'msg-1']);
expect(result.fileIds).toEqual(['file-1']);
});
it('should only collect files from messages in the thread', () => {
// msg-3 is not in the thread from msg-2
const messages = [
{
messageId: 'msg-1',
parentMessageId: NO_PARENT,
files: [{ file_id: 'file-1' }],
},
{
messageId: 'msg-2',
parentMessageId: 'msg-1',
files: [{ file_id: 'file-2' }],
},
{
messageId: 'msg-3',
parentMessageId: 'msg-1',
files: [{ file_id: 'file-3' }],
},
];
const result = getThreadData(messages, 'msg-2');
expect(result.fileIds).toContain('file-1');
expect(result.fileIds).toContain('file-2');
expect(result.fileIds).not.toContain('file-3');
});
});
describe('performance - O(1) lookups', () => {
it('should handle large message arrays efficiently', () => {
// Create a linear thread of 1000 messages
const messages = [];
for (let i = 0; i < 1000; i++) {
messages.push({
messageId: `msg-${i}`,
parentMessageId: i === 0 ? NO_PARENT : `msg-${i - 1}`,
files: [{ file_id: `file-${i}` }],
});
}
const startTime = performance.now();
const result = getThreadData(messages, 'msg-999');
const endTime = performance.now();
expect(result.messageIds).toHaveLength(1000);
expect(result.fileIds).toHaveLength(1000);
// Should complete in reasonable time (< 100ms for 1000 messages)
expect(endTime - startTime).toBeLessThan(100);
});
});
});

View file

@ -1,3 +1,4 @@
import { Constants } from 'librechat-data-provider';
import type { TFile, TMessage } from 'librechat-data-provider';
/** Fields to strip from files before client transmission */
@ -66,3 +67,74 @@ export function sanitizeMessageForTransmit<T extends Partial<TMessage>>(
return sanitized;
}
/** Minimal message shape for thread traversal */
type ThreadMessage = {
messageId: string;
parentMessageId?: string | null;
files?: Array<{ file_id?: string }>;
};
/** Result of thread data extraction */
export type ThreadData = {
messageIds: string[];
fileIds: string[];
};
/**
* Extracts thread message IDs and file IDs in a single O(n) pass.
* Builds a Map for O(1) lookups, then traverses the thread collecting both IDs.
*
* @param messages - All messages in the conversation (should be queried with select for efficiency)
* @param parentMessageId - The ID of the parent message to start traversal from
* @returns Object containing messageIds and fileIds arrays
*/
export function getThreadData(
messages: ThreadMessage[],
parentMessageId: string | null | undefined,
): ThreadData {
const result: ThreadData = { messageIds: [], fileIds: [] };
if (!messages || messages.length === 0 || !parentMessageId) {
return result;
}
/** Build Map for O(1) lookups instead of O(n) .find() calls */
const messageMap = new Map<string, ThreadMessage>();
for (const msg of messages) {
messageMap.set(msg.messageId, msg);
}
const fileIdSet = new Set<string>();
const visitedIds = new Set<string>();
let currentId: string | null | undefined = parentMessageId;
/** Single traversal: collect message IDs and file IDs together */
while (currentId) {
if (visitedIds.has(currentId)) {
break;
}
visitedIds.add(currentId);
const message = messageMap.get(currentId);
if (!message) {
break;
}
result.messageIds.push(message.messageId);
/** Collect file IDs from this message */
if (message.files) {
for (const file of message.files) {
if (file.file_id) {
fileIdSet.add(file.file_id);
}
}
}
currentId = message.parentMessageId === Constants.NO_PARENT ? null : message.parentMessageId;
}
result.fileIds = Array.from(fileIdSet);
return result;
}