🗂️ feat: Better Persistence for Code Execution Files Between Sessions (#11362)
* refactor: process code output files for re-use (WIP)
* feat: file attachment handling with additional metadata for downloads
* refactor: Update directory path logic for local file saving based on basePath
* refactor: file attachment handling to support TFile type and improve data merging logic
* feat: thread filtering of code-generated files
- Introduced parentMessageId parameter in addedConvo and initialize functions to enhance thread management.
- Updated related methods to utilize parentMessageId for retrieving messages and filtering code-generated files by conversation threads.
- Enhanced type definitions to include parentMessageId in relevant interfaces for better clarity and usage.
* chore: imports/params ordering
* feat: update file model to use messageId for filtering and processing
- Changed references from 'message' to 'messageId' in file-related methods for consistency.
- Added messageId field to the file schema and updated related types.
- Enhanced file processing logic to accommodate the new messageId structure.
* feat: enhance file retrieval methods to support user-uploaded execute_code files
- Added a new method `getUserCodeFiles` to retrieve user-uploaded execute_code files, excluding code-generated files.
- Updated existing file retrieval methods to improve filtering logic and handle edge cases.
- Enhanced thread data extraction to collect both message IDs and file IDs efficiently.
- Integrated `getUserCodeFiles` into relevant endpoints for better file management in conversations.
* chore: update @librechat/agents package version to 3.0.78 in package-lock.json and related package.json files
* refactor: file processing and retrieval logic
- Added a fallback mechanism for download URLs when files exceed size limits or cannot be processed locally.
- Implemented a deduplication strategy for code-generated files based on conversationId and filename to optimize storage.
- Updated file retrieval methods to ensure proper filtering by messageIds, preventing orphaned files from being included.
- Introduced comprehensive tests for new thread data extraction functionality, covering edge cases and performance considerations.
* fix: improve file retrieval tests and handling of optional properties
- Updated tests to safely access optional properties using non-null assertions.
- Modified test descriptions for clarity regarding the exclusion of execute_code files.
- Ensured that the retrieval logic correctly reflects the expected outcomes for file queries.
* test: add comprehensive unit tests for processCodeOutput functionality
- Introduced a new test suite for the processCodeOutput function, covering various scenarios including file retrieval, creation, and processing for both image and non-image files.
- Implemented mocks for dependencies such as axios, logger, and file models to isolate tests and ensure reliable outcomes.
- Validated behavior for existing files, new file creation, and error handling, including size limits and fallback mechanisms.
- Enhanced test coverage for metadata handling and usage increment logic, ensuring robust verification of file processing outcomes.
* test: enhance file size limit enforcement in processCodeOutput tests
- Introduced a configurable file size limit for tests to improve flexibility and coverage.
- Mocked the `librechat-data-provider` to allow dynamic adjustment of file size limits during tests.
- Updated the file size limit enforcement test to validate behavior when files exceed specified limits, ensuring proper fallback to download URLs.
- Reset file size limit after tests to maintain isolation for subsequent test cases.
2026-01-16 10:06:24 -05:00
|
|
|
import { Constants } from 'librechat-data-provider';
|
2025-12-03 14:26:49 -05:00
|
|
|
import type { TFile, TMessage } from 'librechat-data-provider';
|
|
|
|
|
|
|
|
|
|
/** Fields to strip from files before client transmission */
|
|
|
|
|
const FILE_STRIP_FIELDS = ['text', '_id', '__v'] as const;
|
|
|
|
|
|
|
|
|
|
/** Fields to strip from messages before client transmission */
|
|
|
|
|
const MESSAGE_STRIP_FIELDS = ['fileContext'] as const;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Strips large/unnecessary fields from a file object before transmitting to client.
|
|
|
|
|
* Use this within existing loops when building file arrays to avoid extra iterations.
|
|
|
|
|
*
|
|
|
|
|
* @param file - The file object to sanitize
|
|
|
|
|
* @returns A new file object without the stripped fields
|
|
|
|
|
*
|
|
|
|
|
* @example
|
|
|
|
|
* // Use in existing file processing loop:
|
|
|
|
|
* for (const attachment of client.options.attachments) {
|
|
|
|
|
* if (messageFiles.has(attachment.file_id)) {
|
|
|
|
|
* userMessage.files.push(sanitizeFileForTransmit(attachment));
|
|
|
|
|
* }
|
|
|
|
|
* }
|
|
|
|
|
*/
|
|
|
|
|
export function sanitizeFileForTransmit<T extends Partial<TFile>>(
|
|
|
|
|
file: T,
|
|
|
|
|
): Omit<T, (typeof FILE_STRIP_FIELDS)[number]> {
|
|
|
|
|
const sanitized = { ...file };
|
|
|
|
|
for (const field of FILE_STRIP_FIELDS) {
|
|
|
|
|
delete sanitized[field as keyof typeof sanitized];
|
|
|
|
|
}
|
|
|
|
|
return sanitized;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sanitizes a message object before transmitting to client.
|
|
|
|
|
* Removes large fields like `fileContext` and strips `text` from embedded files.
|
|
|
|
|
*
|
|
|
|
|
* @param message - The message object to sanitize
|
|
|
|
|
* @returns A new message object safe for client transmission
|
|
|
|
|
*
|
|
|
|
|
* @example
|
|
|
|
|
* sendEvent(res, {
|
|
|
|
|
* final: true,
|
|
|
|
|
* requestMessage: sanitizeMessageForTransmit(userMessage),
|
|
|
|
|
* responseMessage: response,
|
|
|
|
|
* });
|
|
|
|
|
*/
|
|
|
|
|
export function sanitizeMessageForTransmit<T extends Partial<TMessage>>(
|
|
|
|
|
message: T,
|
|
|
|
|
): Omit<T, (typeof MESSAGE_STRIP_FIELDS)[number]> {
|
|
|
|
|
if (!message) {
|
|
|
|
|
return message as Omit<T, (typeof MESSAGE_STRIP_FIELDS)[number]>;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const sanitized = { ...message };
|
|
|
|
|
|
|
|
|
|
// Remove message-level fields
|
|
|
|
|
for (const field of MESSAGE_STRIP_FIELDS) {
|
|
|
|
|
delete sanitized[field as keyof typeof sanitized];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Always create a new array when files exist to maintain full immutability
|
|
|
|
|
if (Array.isArray(sanitized.files)) {
|
|
|
|
|
sanitized.files = sanitized.files.map((file) => sanitizeFileForTransmit(file));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sanitized;
|
|
|
|
|
}
|
🗂️ feat: Better Persistence for Code Execution Files Between Sessions (#11362)
* refactor: process code output files for re-use (WIP)
* feat: file attachment handling with additional metadata for downloads
* refactor: Update directory path logic for local file saving based on basePath
* refactor: file attachment handling to support TFile type and improve data merging logic
* feat: thread filtering of code-generated files
- Introduced parentMessageId parameter in addedConvo and initialize functions to enhance thread management.
- Updated related methods to utilize parentMessageId for retrieving messages and filtering code-generated files by conversation threads.
- Enhanced type definitions to include parentMessageId in relevant interfaces for better clarity and usage.
* chore: imports/params ordering
* feat: update file model to use messageId for filtering and processing
- Changed references from 'message' to 'messageId' in file-related methods for consistency.
- Added messageId field to the file schema and updated related types.
- Enhanced file processing logic to accommodate the new messageId structure.
* feat: enhance file retrieval methods to support user-uploaded execute_code files
- Added a new method `getUserCodeFiles` to retrieve user-uploaded execute_code files, excluding code-generated files.
- Updated existing file retrieval methods to improve filtering logic and handle edge cases.
- Enhanced thread data extraction to collect both message IDs and file IDs efficiently.
- Integrated `getUserCodeFiles` into relevant endpoints for better file management in conversations.
* chore: update @librechat/agents package version to 3.0.78 in package-lock.json and related package.json files
* refactor: file processing and retrieval logic
- Added a fallback mechanism for download URLs when files exceed size limits or cannot be processed locally.
- Implemented a deduplication strategy for code-generated files based on conversationId and filename to optimize storage.
- Updated file retrieval methods to ensure proper filtering by messageIds, preventing orphaned files from being included.
- Introduced comprehensive tests for new thread data extraction functionality, covering edge cases and performance considerations.
* fix: improve file retrieval tests and handling of optional properties
- Updated tests to safely access optional properties using non-null assertions.
- Modified test descriptions for clarity regarding the exclusion of execute_code files.
- Ensured that the retrieval logic correctly reflects the expected outcomes for file queries.
* test: add comprehensive unit tests for processCodeOutput functionality
- Introduced a new test suite for the processCodeOutput function, covering various scenarios including file retrieval, creation, and processing for both image and non-image files.
- Implemented mocks for dependencies such as axios, logger, and file models to isolate tests and ensure reliable outcomes.
- Validated behavior for existing files, new file creation, and error handling, including size limits and fallback mechanisms.
- Enhanced test coverage for metadata handling and usage increment logic, ensuring robust verification of file processing outcomes.
* test: enhance file size limit enforcement in processCodeOutput tests
- Introduced a configurable file size limit for tests to improve flexibility and coverage.
- Mocked the `librechat-data-provider` to allow dynamic adjustment of file size limits during tests.
- Updated the file size limit enforcement test to validate behavior when files exceed specified limits, ensuring proper fallback to download URLs.
- Reset file size limit after tests to maintain isolation for subsequent test cases.
2026-01-16 10:06:24 -05:00
|
|
|
|
|
|
|
|
/** Minimal message shape for thread traversal */
|
|
|
|
|
type ThreadMessage = {
|
|
|
|
|
messageId: string;
|
|
|
|
|
parentMessageId?: string | null;
|
|
|
|
|
files?: Array<{ file_id?: string }>;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/** Result of thread data extraction */
|
|
|
|
|
export type ThreadData = {
|
|
|
|
|
messageIds: string[];
|
|
|
|
|
fileIds: string[];
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Extracts thread message IDs and file IDs in a single O(n) pass.
|
|
|
|
|
* Builds a Map for O(1) lookups, then traverses the thread collecting both IDs.
|
|
|
|
|
*
|
|
|
|
|
* @param messages - All messages in the conversation (should be queried with select for efficiency)
|
|
|
|
|
* @param parentMessageId - The ID of the parent message to start traversal from
|
|
|
|
|
* @returns Object containing messageIds and fileIds arrays
|
|
|
|
|
*/
|
|
|
|
|
export function getThreadData(
|
|
|
|
|
messages: ThreadMessage[],
|
|
|
|
|
parentMessageId: string | null | undefined,
|
|
|
|
|
): ThreadData {
|
|
|
|
|
const result: ThreadData = { messageIds: [], fileIds: [] };
|
|
|
|
|
|
|
|
|
|
if (!messages || messages.length === 0 || !parentMessageId) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Build Map for O(1) lookups instead of O(n) .find() calls */
|
|
|
|
|
const messageMap = new Map<string, ThreadMessage>();
|
|
|
|
|
for (const msg of messages) {
|
|
|
|
|
messageMap.set(msg.messageId, msg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const fileIdSet = new Set<string>();
|
|
|
|
|
const visitedIds = new Set<string>();
|
|
|
|
|
let currentId: string | null | undefined = parentMessageId;
|
|
|
|
|
|
|
|
|
|
/** Single traversal: collect message IDs and file IDs together */
|
|
|
|
|
while (currentId) {
|
|
|
|
|
if (visitedIds.has(currentId)) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
visitedIds.add(currentId);
|
|
|
|
|
|
|
|
|
|
const message = messageMap.get(currentId);
|
|
|
|
|
if (!message) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result.messageIds.push(message.messageId);
|
|
|
|
|
|
|
|
|
|
/** Collect file IDs from this message */
|
|
|
|
|
if (message.files) {
|
|
|
|
|
for (const file of message.files) {
|
|
|
|
|
if (file.file_id) {
|
|
|
|
|
fileIdSet.add(file.file_id);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
currentId = message.parentMessageId === Constants.NO_PARENT ? null : message.parentMessageId;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result.fileIds = Array.from(fileIdSet);
|
|
|
|
|
return result;
|
|
|
|
|
}
|