mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-19 16:56:12 +01:00
🗂️ feat: Better Persistence for Code Execution Files Between Sessions (#11362)
* refactor: process code output files for re-use (WIP) * feat: file attachment handling with additional metadata for downloads * refactor: Update directory path logic for local file saving based on basePath * refactor: file attachment handling to support TFile type and improve data merging logic * feat: thread filtering of code-generated files - Introduced parentMessageId parameter in addedConvo and initialize functions to enhance thread management. - Updated related methods to utilize parentMessageId for retrieving messages and filtering code-generated files by conversation threads. - Enhanced type definitions to include parentMessageId in relevant interfaces for better clarity and usage. * chore: imports/params ordering * feat: update file model to use messageId for filtering and processing - Changed references from 'message' to 'messageId' in file-related methods for consistency. - Added messageId field to the file schema and updated related types. - Enhanced file processing logic to accommodate the new messageId structure. * feat: enhance file retrieval methods to support user-uploaded execute_code files - Added a new method `getUserCodeFiles` to retrieve user-uploaded execute_code files, excluding code-generated files. - Updated existing file retrieval methods to improve filtering logic and handle edge cases. - Enhanced thread data extraction to collect both message IDs and file IDs efficiently. - Integrated `getUserCodeFiles` into relevant endpoints for better file management in conversations. * chore: update @librechat/agents package version to 3.0.78 in package-lock.json and related package.json files * refactor: file processing and retrieval logic - Added a fallback mechanism for download URLs when files exceed size limits or cannot be processed locally. - Implemented a deduplication strategy for code-generated files based on conversationId and filename to optimize storage. - Updated file retrieval methods to ensure proper filtering by messageIds, preventing orphaned files from being included. - Introduced comprehensive tests for new thread data extraction functionality, covering edge cases and performance considerations. * fix: improve file retrieval tests and handling of optional properties - Updated tests to safely access optional properties using non-null assertions. - Modified test descriptions for clarity regarding the exclusion of execute_code files. - Ensured that the retrieval logic correctly reflects the expected outcomes for file queries. * test: add comprehensive unit tests for processCodeOutput functionality - Introduced a new test suite for the processCodeOutput function, covering various scenarios including file retrieval, creation, and processing for both image and non-image files. - Implemented mocks for dependencies such as axios, logger, and file models to isolate tests and ensure reliable outcomes. - Validated behavior for existing files, new file creation, and error handling, including size limits and fallback mechanisms. - Enhanced test coverage for metadata handling and usage increment logic, ensuring robust verification of file processing outcomes. * test: enhance file size limit enforcement in processCodeOutput tests - Introduced a configurable file size limit for tests to improve flexibility and coverage. - Mocked the `librechat-data-provider` to allow dynamic adjustment of file size limits during tests. - Updated the file size limit enforcement test to validate behavior when files exceed specified limits, ensuring proper fallback to download URLs. - Reset file size limit after tests to maintain isolation for subsequent test cases.
This commit is contained in:
parent
fe32cbedf9
commit
cc32895d13
22 changed files with 1364 additions and 83 deletions
|
|
@ -26,7 +26,8 @@ const getFiles = async (filter, _sortOptions, selectFields = { text: 0 }) => {
|
|||
};
|
||||
|
||||
/**
|
||||
* Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs
|
||||
* Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs.
|
||||
* Note: execute_code files are handled separately by getCodeGeneratedFiles.
|
||||
* @param {string[]} fileIds - Array of file_id strings to search for
|
||||
* @param {Set<EToolResources>} toolResourceSet - Optional filter for tool resources
|
||||
* @returns {Promise<Array<MongoFile>>} Files that match the criteria
|
||||
|
|
@ -37,21 +38,25 @@ const getToolFilesByIds = async (fileIds, toolResourceSet) => {
|
|||
}
|
||||
|
||||
try {
|
||||
const filter = {
|
||||
file_id: { $in: fileIds },
|
||||
$or: [],
|
||||
};
|
||||
const orConditions = [];
|
||||
|
||||
if (toolResourceSet.has(EToolResources.context)) {
|
||||
filter.$or.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
|
||||
orConditions.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
|
||||
}
|
||||
if (toolResourceSet.has(EToolResources.file_search)) {
|
||||
filter.$or.push({ embedded: true });
|
||||
orConditions.push({ embedded: true });
|
||||
}
|
||||
if (toolResourceSet.has(EToolResources.execute_code)) {
|
||||
filter.$or.push({ 'metadata.fileIdentifier': { $exists: true } });
|
||||
|
||||
if (orConditions.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const filter = {
|
||||
file_id: { $in: fileIds },
|
||||
context: { $ne: FileContext.execute_code }, // Exclude code-generated files
|
||||
$or: orConditions,
|
||||
};
|
||||
|
||||
const selectFields = { text: 0 };
|
||||
const sortOptions = { updatedAt: -1 };
|
||||
|
||||
|
|
@ -62,6 +67,70 @@ const getToolFilesByIds = async (fileIds, toolResourceSet) => {
|
|||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves files generated by code execution for a given conversation.
|
||||
* These files are stored locally with fileIdentifier metadata for code env re-upload.
|
||||
* @param {string} conversationId - The conversation ID to search for
|
||||
* @param {string[]} [messageIds] - Optional array of messageIds to filter by (for linear thread filtering)
|
||||
* @returns {Promise<Array<MongoFile>>} Files generated by code execution in the conversation
|
||||
*/
|
||||
const getCodeGeneratedFiles = async (conversationId, messageIds) => {
|
||||
if (!conversationId) {
|
||||
return [];
|
||||
}
|
||||
|
||||
/** messageIds are required for proper thread filtering of code-generated files */
|
||||
if (!messageIds || messageIds.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const filter = {
|
||||
conversationId,
|
||||
context: FileContext.execute_code,
|
||||
messageId: { $exists: true, $in: messageIds },
|
||||
'metadata.fileIdentifier': { $exists: true },
|
||||
};
|
||||
|
||||
const selectFields = { text: 0 };
|
||||
const sortOptions = { createdAt: 1 };
|
||||
|
||||
return await getFiles(filter, sortOptions, selectFields);
|
||||
} catch (error) {
|
||||
logger.error('[getCodeGeneratedFiles] Error retrieving code generated files:', error);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves user-uploaded execute_code files (not code-generated) by their file IDs.
|
||||
* These are files with fileIdentifier metadata but context is NOT execute_code (e.g., agents or message_attachment).
|
||||
* File IDs should be collected from message.files arrays in the current thread.
|
||||
* @param {string[]} fileIds - Array of file IDs to fetch (from message.files in the thread)
|
||||
* @returns {Promise<Array<MongoFile>>} User-uploaded execute_code files
|
||||
*/
|
||||
const getUserCodeFiles = async (fileIds) => {
|
||||
if (!fileIds || fileIds.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const filter = {
|
||||
file_id: { $in: fileIds },
|
||||
context: { $ne: FileContext.execute_code },
|
||||
'metadata.fileIdentifier': { $exists: true },
|
||||
};
|
||||
|
||||
const selectFields = { text: 0 };
|
||||
const sortOptions = { createdAt: 1 };
|
||||
|
||||
return await getFiles(filter, sortOptions, selectFields);
|
||||
} catch (error) {
|
||||
logger.error('[getUserCodeFiles] Error retrieving user code files:', error);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a new file with a TTL of 1 hour.
|
||||
* @param {MongoFile} data - The file data to be created, must contain file_id.
|
||||
|
|
@ -169,6 +238,8 @@ module.exports = {
|
|||
findFileById,
|
||||
getFiles,
|
||||
getToolFilesByIds,
|
||||
getCodeGeneratedFiles,
|
||||
getUserCodeFiles,
|
||||
createFile,
|
||||
updateFile,
|
||||
updateFileUsage,
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@
|
|||
"@google/genai": "^1.19.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@langchain/core": "^0.3.80",
|
||||
"@librechat/agents": "^3.0.77",
|
||||
"@librechat/agents": "^3.0.78",
|
||||
"@librechat/api": "*",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@microsoft/microsoft-graph-client": "^3.0.7",
|
||||
|
|
|
|||
|
|
@ -633,6 +633,7 @@ class AgentClient extends BaseClient {
|
|||
updateFilesUsage: db.updateFilesUsage,
|
||||
getUserKeyValues: db.getUserKeyValues,
|
||||
getToolFilesByIds: db.getToolFilesByIds,
|
||||
getCodeGeneratedFiles: db.getCodeGeneratedFiles,
|
||||
},
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ setGetAgent(getAgent);
|
|||
* @param {Function} params.loadTools - Function to load agent tools
|
||||
* @param {Array} params.requestFiles - Request files
|
||||
* @param {string} params.conversationId - The conversation ID
|
||||
* @param {string} [params.parentMessageId] - The parent message ID for thread filtering
|
||||
* @param {Set} params.allowedProviders - Set of allowed providers
|
||||
* @param {Map} params.agentConfigs - Map of agent configs to add to
|
||||
* @param {string} params.primaryAgentId - The primary agent ID
|
||||
|
|
@ -46,6 +47,7 @@ const processAddedConvo = async ({
|
|||
loadTools,
|
||||
requestFiles,
|
||||
conversationId,
|
||||
parentMessageId,
|
||||
allowedProviders,
|
||||
agentConfigs,
|
||||
primaryAgentId,
|
||||
|
|
@ -91,6 +93,7 @@ const processAddedConvo = async ({
|
|||
loadTools,
|
||||
requestFiles,
|
||||
conversationId,
|
||||
parentMessageId,
|
||||
agent: addedAgent,
|
||||
endpointOption,
|
||||
allowedProviders,
|
||||
|
|
@ -99,9 +102,12 @@ const processAddedConvo = async ({
|
|||
getConvoFiles,
|
||||
getFiles: db.getFiles,
|
||||
getUserKey: db.getUserKey,
|
||||
getMessages: db.getMessages,
|
||||
updateFilesUsage: db.updateFilesUsage,
|
||||
getUserCodeFiles: db.getUserCodeFiles,
|
||||
getUserKeyValues: db.getUserKeyValues,
|
||||
getToolFilesByIds: db.getToolFilesByIds,
|
||||
getCodeGeneratedFiles: db.getCodeGeneratedFiles,
|
||||
},
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -3,10 +3,10 @@ const { createContentAggregator } = require('@librechat/agents');
|
|||
const {
|
||||
initializeAgent,
|
||||
validateAgentModel,
|
||||
getCustomEndpointConfig,
|
||||
createSequentialChainEdges,
|
||||
createEdgeCollector,
|
||||
filterOrphanedEdges,
|
||||
getCustomEndpointConfig,
|
||||
createSequentialChainEdges,
|
||||
} = require('@librechat/api');
|
||||
const {
|
||||
EModelEndpoint,
|
||||
|
|
@ -129,6 +129,8 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
|
|||
const requestFiles = req.body.files ?? [];
|
||||
/** @type {string} */
|
||||
const conversationId = req.body.conversationId;
|
||||
/** @type {string | undefined} */
|
||||
const parentMessageId = req.body.parentMessageId;
|
||||
|
||||
const primaryConfig = await initializeAgent(
|
||||
{
|
||||
|
|
@ -137,6 +139,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
|
|||
loadTools,
|
||||
requestFiles,
|
||||
conversationId,
|
||||
parentMessageId,
|
||||
agent: primaryAgent,
|
||||
endpointOption,
|
||||
allowedProviders,
|
||||
|
|
@ -146,9 +149,12 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
|
|||
getConvoFiles,
|
||||
getFiles: db.getFiles,
|
||||
getUserKey: db.getUserKey,
|
||||
getMessages: db.getMessages,
|
||||
updateFilesUsage: db.updateFilesUsage,
|
||||
getUserKeyValues: db.getUserKeyValues,
|
||||
getUserCodeFiles: db.getUserCodeFiles,
|
||||
getToolFilesByIds: db.getToolFilesByIds,
|
||||
getCodeGeneratedFiles: db.getCodeGeneratedFiles,
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -188,6 +194,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
|
|||
loadTools,
|
||||
requestFiles,
|
||||
conversationId,
|
||||
parentMessageId,
|
||||
endpointOption,
|
||||
allowedProviders,
|
||||
},
|
||||
|
|
@ -195,9 +202,12 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
|
|||
getConvoFiles,
|
||||
getFiles: db.getFiles,
|
||||
getUserKey: db.getUserKey,
|
||||
getMessages: db.getMessages,
|
||||
updateFilesUsage: db.updateFilesUsage,
|
||||
getUserKeyValues: db.getUserKeyValues,
|
||||
getUserCodeFiles: db.getUserCodeFiles,
|
||||
getToolFilesByIds: db.getToolFilesByIds,
|
||||
getCodeGeneratedFiles: db.getCodeGeneratedFiles,
|
||||
},
|
||||
);
|
||||
if (userMCPAuthMap != null) {
|
||||
|
|
@ -252,17 +262,18 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => {
|
|||
const { userMCPAuthMap: updatedMCPAuthMap } = await processAddedConvo({
|
||||
req,
|
||||
res,
|
||||
endpointOption,
|
||||
modelsConfig,
|
||||
logViolation,
|
||||
loadTools,
|
||||
logViolation,
|
||||
modelsConfig,
|
||||
requestFiles,
|
||||
conversationId,
|
||||
allowedProviders,
|
||||
agentConfigs,
|
||||
primaryAgentId: primaryConfig.id,
|
||||
primaryAgent,
|
||||
endpointOption,
|
||||
userMCPAuthMap,
|
||||
conversationId,
|
||||
parentMessageId,
|
||||
allowedProviders,
|
||||
primaryAgentId: primaryConfig.id,
|
||||
});
|
||||
|
||||
if (updatedMCPAuthMap) {
|
||||
|
|
|
|||
|
|
@ -6,27 +6,112 @@ const { getCodeBaseURL } = require('@librechat/agents');
|
|||
const { logAxiosError, getBasePath } = require('@librechat/api');
|
||||
const {
|
||||
Tools,
|
||||
megabyte,
|
||||
fileConfig,
|
||||
FileContext,
|
||||
FileSources,
|
||||
imageExtRegex,
|
||||
inferMimeType,
|
||||
EToolResources,
|
||||
EModelEndpoint,
|
||||
mergeFileConfig,
|
||||
getEndpointFileConfig,
|
||||
} = require('librechat-data-provider');
|
||||
const { filterFilesByAgentAccess } = require('~/server/services/Files/permissions');
|
||||
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
|
||||
const { convertImage } = require('~/server/services/Files/images/convert');
|
||||
const { createFile, getFiles, updateFile } = require('~/models');
|
||||
const { determineFileType } = require('~/server/utils');
|
||||
|
||||
/**
|
||||
* Process OpenAI image files, convert to target format, save and return file metadata.
|
||||
* Creates a fallback download URL response when file cannot be processed locally.
|
||||
* Used when: file exceeds size limit, storage strategy unavailable, or download error occurs.
|
||||
* @param {Object} params - The parameters.
|
||||
* @param {string} params.name - The filename.
|
||||
* @param {string} params.session_id - The code execution session ID.
|
||||
* @param {string} params.id - The file ID from the code environment.
|
||||
* @param {string} params.conversationId - The current conversation ID.
|
||||
* @param {string} params.toolCallId - The tool call ID that generated the file.
|
||||
* @param {string} params.messageId - The current message ID.
|
||||
* @param {number} params.expiresAt - Expiration timestamp (24 hours from creation).
|
||||
* @returns {Object} Fallback response with download URL.
|
||||
*/
|
||||
const createDownloadFallback = ({
|
||||
id,
|
||||
name,
|
||||
messageId,
|
||||
expiresAt,
|
||||
session_id,
|
||||
toolCallId,
|
||||
conversationId,
|
||||
}) => {
|
||||
const basePath = getBasePath();
|
||||
return {
|
||||
filename: name,
|
||||
filepath: `${basePath}/api/files/code/download/${session_id}/${id}`,
|
||||
expiresAt,
|
||||
conversationId,
|
||||
toolCallId,
|
||||
messageId,
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Find an existing code-generated file by filename in the conversation.
|
||||
* Used to update existing files instead of creating duplicates.
|
||||
*
|
||||
* ## Deduplication Strategy
|
||||
*
|
||||
* Files are deduplicated by `(conversationId, filename)` - NOT including `messageId`.
|
||||
* This is an intentional design decision to handle iterative code development patterns:
|
||||
*
|
||||
* **Rationale:**
|
||||
* - When users iteratively refine code (e.g., "regenerate that chart with red bars"),
|
||||
* the same logical file (e.g., "chart.png") is produced multiple times
|
||||
* - Without deduplication, each iteration would create a new file, leading to storage bloat
|
||||
* - The latest version is what matters for re-upload to the code environment
|
||||
*
|
||||
* **Implications:**
|
||||
* - Different messages producing files with the same name will update the same file record
|
||||
* - The `messageId` field tracks which message last updated the file
|
||||
* - The `usage` counter tracks how many times the file has been generated
|
||||
*
|
||||
* **Future Considerations:**
|
||||
* - If file versioning is needed, consider adding a `versions` array or separate version collection
|
||||
* - The current approach prioritizes storage efficiency over history preservation
|
||||
*
|
||||
* @param {string} filename - The filename to search for.
|
||||
* @param {string} conversationId - The conversation ID.
|
||||
* @returns {Promise<MongoFile | null>} The existing file or null.
|
||||
*/
|
||||
const findExistingCodeFile = async (filename, conversationId) => {
|
||||
if (!filename || !conversationId) {
|
||||
return null;
|
||||
}
|
||||
const files = await getFiles(
|
||||
{
|
||||
filename,
|
||||
conversationId,
|
||||
context: FileContext.execute_code,
|
||||
},
|
||||
{ createdAt: -1 },
|
||||
{ text: 0 },
|
||||
);
|
||||
return files?.[0] ?? null;
|
||||
};
|
||||
|
||||
/**
|
||||
* Process code execution output files - downloads and saves both images and non-image files.
|
||||
* All files are saved to local storage with fileIdentifier metadata for code env re-upload.
|
||||
* @param {ServerRequest} params.req - The Express request object.
|
||||
* @param {string} params.id - The file ID.
|
||||
* @param {string} params.id - The file ID from the code environment.
|
||||
* @param {string} params.name - The filename.
|
||||
* @param {string} params.apiKey - The code execution API key.
|
||||
* @param {string} params.toolCallId - The tool call ID that generated the file.
|
||||
* @param {string} params.session_id - The code execution session ID.
|
||||
* @param {string} params.conversationId - The current conversation ID.
|
||||
* @param {string} params.messageId - The current message ID.
|
||||
* @returns {Promise<MongoFile & { messageId: string, toolCallId: string } | { filename: string; filepath: string; expiresAt: number; conversationId: string; toolCallId: string; messageId: string } | undefined>} The file metadata or undefined if an error occurs.
|
||||
* @returns {Promise<MongoFile & { messageId: string, toolCallId: string } | undefined>} The file metadata or undefined if an error occurs.
|
||||
*/
|
||||
const processCodeOutput = async ({
|
||||
req,
|
||||
|
|
@ -41,19 +126,15 @@ const processCodeOutput = async ({
|
|||
const appConfig = req.config;
|
||||
const currentDate = new Date();
|
||||
const baseURL = getCodeBaseURL();
|
||||
const basePath = getBasePath();
|
||||
const fileExt = path.extname(name);
|
||||
if (!fileExt || !imageExtRegex.test(name)) {
|
||||
return {
|
||||
filename: name,
|
||||
filepath: `${basePath}/api/files/code/download/${session_id}/${id}`,
|
||||
/** Note: expires 24 hours after creation */
|
||||
expiresAt: currentDate.getTime() + 86400000,
|
||||
conversationId,
|
||||
toolCallId,
|
||||
messageId,
|
||||
};
|
||||
}
|
||||
const fileExt = path.extname(name).toLowerCase();
|
||||
const isImage = fileExt && imageExtRegex.test(name);
|
||||
|
||||
const mergedFileConfig = mergeFileConfig(appConfig.fileConfig);
|
||||
const endpointFileConfig = getEndpointFileConfig({
|
||||
fileConfig: mergedFileConfig,
|
||||
endpoint: EModelEndpoint.agents,
|
||||
});
|
||||
const fileSizeLimit = endpointFileConfig.fileSizeLimit ?? mergedFileConfig.serverFileSizeLimit;
|
||||
|
||||
try {
|
||||
const formattedDate = currentDate.toISOString();
|
||||
|
|
@ -70,29 +151,135 @@ const processCodeOutput = async ({
|
|||
|
||||
const buffer = Buffer.from(response.data, 'binary');
|
||||
|
||||
const file_id = v4();
|
||||
const _file = await convertImage(req, buffer, 'high', `${file_id}${fileExt}`);
|
||||
// Enforce file size limit
|
||||
if (buffer.length > fileSizeLimit) {
|
||||
logger.warn(
|
||||
`[processCodeOutput] File "${name}" (${(buffer.length / megabyte).toFixed(2)} MB) exceeds size limit of ${(fileSizeLimit / megabyte).toFixed(2)} MB, falling back to download URL`,
|
||||
);
|
||||
return createDownloadFallback({
|
||||
id,
|
||||
name,
|
||||
messageId,
|
||||
toolCallId,
|
||||
session_id,
|
||||
conversationId,
|
||||
expiresAt: currentDate.getTime() + 86400000,
|
||||
});
|
||||
}
|
||||
|
||||
const fileIdentifier = `${session_id}/${id}`;
|
||||
|
||||
/**
|
||||
* Check for existing file with same filename in this conversation.
|
||||
* If found, we'll update it instead of creating a duplicate.
|
||||
*/
|
||||
const existingFile = await findExistingCodeFile(name, conversationId);
|
||||
const file_id = existingFile?.file_id ?? v4();
|
||||
const isUpdate = !!existingFile;
|
||||
|
||||
if (isUpdate) {
|
||||
logger.debug(
|
||||
`[processCodeOutput] Updating existing file "${name}" (${file_id}) instead of creating duplicate`,
|
||||
);
|
||||
}
|
||||
|
||||
if (isImage) {
|
||||
const _file = await convertImage(req, buffer, 'high', `${file_id}${fileExt}`);
|
||||
const file = {
|
||||
..._file,
|
||||
file_id,
|
||||
messageId,
|
||||
usage: isUpdate ? (existingFile.usage ?? 0) + 1 : 1,
|
||||
filename: name,
|
||||
conversationId,
|
||||
user: req.user.id,
|
||||
type: `image/${appConfig.imageOutputType}`,
|
||||
createdAt: isUpdate ? existingFile.createdAt : formattedDate,
|
||||
updatedAt: formattedDate,
|
||||
source: appConfig.fileStrategy,
|
||||
context: FileContext.execute_code,
|
||||
metadata: { fileIdentifier },
|
||||
};
|
||||
createFile(file, true);
|
||||
return Object.assign(file, { messageId, toolCallId });
|
||||
}
|
||||
|
||||
// For non-image files, save to configured storage strategy
|
||||
const { saveBuffer } = getStrategyFunctions(appConfig.fileStrategy);
|
||||
if (!saveBuffer) {
|
||||
logger.warn(
|
||||
`[processCodeOutput] saveBuffer not available for strategy ${appConfig.fileStrategy}, falling back to download URL`,
|
||||
);
|
||||
return createDownloadFallback({
|
||||
id,
|
||||
name,
|
||||
messageId,
|
||||
toolCallId,
|
||||
session_id,
|
||||
conversationId,
|
||||
expiresAt: currentDate.getTime() + 86400000,
|
||||
});
|
||||
}
|
||||
|
||||
// Determine MIME type from buffer or extension
|
||||
const detectedType = await determineFileType(buffer, true);
|
||||
const mimeType = detectedType?.mime || inferMimeType(name, '') || 'application/octet-stream';
|
||||
|
||||
/** Check MIME type support - for code-generated files, we're lenient but log unsupported types */
|
||||
const isSupportedMimeType = fileConfig.checkType(
|
||||
mimeType,
|
||||
endpointFileConfig.supportedMimeTypes,
|
||||
);
|
||||
if (!isSupportedMimeType) {
|
||||
logger.warn(
|
||||
`[processCodeOutput] File "${name}" has unsupported MIME type "${mimeType}", proceeding with storage but may not be usable as tool resource`,
|
||||
);
|
||||
}
|
||||
|
||||
const fileName = `${file_id}__${name}`;
|
||||
const filepath = await saveBuffer({
|
||||
userId: req.user.id,
|
||||
buffer,
|
||||
fileName,
|
||||
basePath: 'uploads',
|
||||
});
|
||||
|
||||
const file = {
|
||||
..._file,
|
||||
file_id,
|
||||
usage: 1,
|
||||
filepath,
|
||||
messageId,
|
||||
object: 'file',
|
||||
filename: name,
|
||||
type: mimeType,
|
||||
conversationId,
|
||||
user: req.user.id,
|
||||
type: `image/${appConfig.imageOutputType}`,
|
||||
createdAt: formattedDate,
|
||||
bytes: buffer.length,
|
||||
updatedAt: formattedDate,
|
||||
metadata: { fileIdentifier },
|
||||
source: appConfig.fileStrategy,
|
||||
context: FileContext.execute_code,
|
||||
usage: isUpdate ? (existingFile.usage ?? 0) + 1 : 1,
|
||||
createdAt: isUpdate ? existingFile.createdAt : formattedDate,
|
||||
};
|
||||
|
||||
createFile(file, true);
|
||||
/** Note: `messageId` & `toolCallId` are not part of file DB schema; message object records associated file ID */
|
||||
return Object.assign(file, { messageId, toolCallId });
|
||||
} catch (error) {
|
||||
logAxiosError({
|
||||
message: 'Error downloading code environment file',
|
||||
message: 'Error downloading/processing code environment file',
|
||||
error,
|
||||
});
|
||||
|
||||
// Fallback for download errors - return download URL so user can still manually download
|
||||
return createDownloadFallback({
|
||||
id,
|
||||
name,
|
||||
messageId,
|
||||
toolCallId,
|
||||
session_id,
|
||||
conversationId,
|
||||
expiresAt: currentDate.getTime() + 86400000,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -204,9 +391,16 @@ const primeFiles = async (options, apiKey) => {
|
|||
if (!toolContext) {
|
||||
toolContext = `- Note: The following files are available in the "${Tools.execute_code}" tool environment:`;
|
||||
}
|
||||
toolContext += `\n\t- /mnt/data/${file.filename}${
|
||||
agentResourceIds.has(file.file_id) ? '' : ' (just attached by user)'
|
||||
}`;
|
||||
|
||||
let fileSuffix = '';
|
||||
if (!agentResourceIds.has(file.file_id)) {
|
||||
fileSuffix =
|
||||
file.context === FileContext.execute_code
|
||||
? ' (from previous code execution)'
|
||||
: ' (attached by user)';
|
||||
}
|
||||
|
||||
toolContext += `\n\t- /mnt/data/${file.filename}${fileSuffix}`;
|
||||
files.push({
|
||||
id,
|
||||
session_id,
|
||||
|
|
|
|||
418
api/server/services/Files/Code/process.spec.js
Normal file
418
api/server/services/Files/Code/process.spec.js
Normal file
|
|
@ -0,0 +1,418 @@
|
|||
// Configurable file size limit for tests - use a getter so it can be changed per test
|
||||
const fileSizeLimitConfig = { value: 20 * 1024 * 1024 }; // Default 20MB
|
||||
|
||||
// Mock librechat-data-provider with configurable file size limit
|
||||
jest.mock('librechat-data-provider', () => {
|
||||
const actual = jest.requireActual('librechat-data-provider');
|
||||
return {
|
||||
...actual,
|
||||
mergeFileConfig: jest.fn((config) => {
|
||||
const merged = actual.mergeFileConfig(config);
|
||||
// Override the serverFileSizeLimit with our test value
|
||||
return {
|
||||
...merged,
|
||||
get serverFileSizeLimit() {
|
||||
return fileSizeLimitConfig.value;
|
||||
},
|
||||
};
|
||||
}),
|
||||
getEndpointFileConfig: jest.fn((options) => {
|
||||
const config = actual.getEndpointFileConfig(options);
|
||||
// Override fileSizeLimit with our test value
|
||||
return {
|
||||
...config,
|
||||
get fileSizeLimit() {
|
||||
return fileSizeLimitConfig.value;
|
||||
},
|
||||
};
|
||||
}),
|
||||
};
|
||||
});
|
||||
|
||||
const { FileContext } = require('librechat-data-provider');
|
||||
|
||||
// Mock uuid
|
||||
jest.mock('uuid', () => ({
|
||||
v4: jest.fn(() => 'mock-uuid-1234'),
|
||||
}));
|
||||
|
||||
// Mock axios
|
||||
jest.mock('axios');
|
||||
const axios = require('axios');
|
||||
|
||||
// Mock logger
|
||||
jest.mock('@librechat/data-schemas', () => ({
|
||||
logger: {
|
||||
warn: jest.fn(),
|
||||
debug: jest.fn(),
|
||||
error: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
// Mock getCodeBaseURL
|
||||
jest.mock('@librechat/agents', () => ({
|
||||
getCodeBaseURL: jest.fn(() => 'https://code-api.example.com'),
|
||||
}));
|
||||
|
||||
// Mock logAxiosError and getBasePath
|
||||
jest.mock('@librechat/api', () => ({
|
||||
logAxiosError: jest.fn(),
|
||||
getBasePath: jest.fn(() => ''),
|
||||
}));
|
||||
|
||||
// Mock models
|
||||
jest.mock('~/models', () => ({
|
||||
createFile: jest.fn(),
|
||||
getFiles: jest.fn(),
|
||||
updateFile: jest.fn(),
|
||||
}));
|
||||
|
||||
// Mock permissions (must be before process.js import)
|
||||
jest.mock('~/server/services/Files/permissions', () => ({
|
||||
filterFilesByAgentAccess: jest.fn((options) => Promise.resolve(options.files)),
|
||||
}));
|
||||
|
||||
// Mock strategy functions
|
||||
jest.mock('~/server/services/Files/strategies', () => ({
|
||||
getStrategyFunctions: jest.fn(),
|
||||
}));
|
||||
|
||||
// Mock convertImage
|
||||
jest.mock('~/server/services/Files/images/convert', () => ({
|
||||
convertImage: jest.fn(),
|
||||
}));
|
||||
|
||||
// Mock determineFileType
|
||||
jest.mock('~/server/utils', () => ({
|
||||
determineFileType: jest.fn(),
|
||||
}));
|
||||
|
||||
const { createFile, getFiles } = require('~/models');
|
||||
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
|
||||
const { convertImage } = require('~/server/services/Files/images/convert');
|
||||
const { determineFileType } = require('~/server/utils');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
|
||||
// Import after mocks
|
||||
const { processCodeOutput } = require('./process');
|
||||
|
||||
describe('Code Process', () => {
|
||||
const mockReq = {
|
||||
user: { id: 'user-123' },
|
||||
config: {
|
||||
fileConfig: {},
|
||||
fileStrategy: 'local',
|
||||
imageOutputType: 'webp',
|
||||
},
|
||||
};
|
||||
|
||||
const baseParams = {
|
||||
req: mockReq,
|
||||
id: 'file-id-123',
|
||||
name: 'test-file.txt',
|
||||
apiKey: 'test-api-key',
|
||||
toolCallId: 'tool-call-123',
|
||||
conversationId: 'conv-123',
|
||||
messageId: 'msg-123',
|
||||
session_id: 'session-123',
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
// Default mock implementations
|
||||
getFiles.mockResolvedValue(null);
|
||||
createFile.mockResolvedValue({});
|
||||
getStrategyFunctions.mockReturnValue({
|
||||
saveBuffer: jest.fn().mockResolvedValue('/uploads/mock-file-path.txt'),
|
||||
});
|
||||
determineFileType.mockResolvedValue({ mime: 'text/plain' });
|
||||
});
|
||||
|
||||
describe('findExistingCodeFile (via processCodeOutput)', () => {
|
||||
it('should find existing file by filename and conversationId', async () => {
|
||||
const existingFile = {
|
||||
file_id: 'existing-file-id',
|
||||
filename: 'test-file.txt',
|
||||
usage: 2,
|
||||
createdAt: '2024-01-01T00:00:00.000Z',
|
||||
};
|
||||
getFiles.mockResolvedValue([existingFile]);
|
||||
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
|
||||
const result = await processCodeOutput(baseParams);
|
||||
|
||||
// Verify getFiles was called with correct deduplication query
|
||||
expect(getFiles).toHaveBeenCalledWith(
|
||||
{
|
||||
filename: 'test-file.txt',
|
||||
conversationId: 'conv-123',
|
||||
context: FileContext.execute_code,
|
||||
},
|
||||
{ createdAt: -1 },
|
||||
{ text: 0 },
|
||||
);
|
||||
|
||||
// Verify the existing file_id was reused
|
||||
expect(result.file_id).toBe('existing-file-id');
|
||||
// Verify usage was incremented
|
||||
expect(result.usage).toBe(3);
|
||||
// Verify original createdAt was preserved
|
||||
expect(result.createdAt).toBe('2024-01-01T00:00:00.000Z');
|
||||
});
|
||||
|
||||
it('should create new file when no existing file found', async () => {
|
||||
getFiles.mockResolvedValue(null);
|
||||
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
|
||||
const result = await processCodeOutput(baseParams);
|
||||
|
||||
// Should use the mocked uuid
|
||||
expect(result.file_id).toBe('mock-uuid-1234');
|
||||
// Should have usage of 1 for new file
|
||||
expect(result.usage).toBe(1);
|
||||
});
|
||||
|
||||
it('should return null for invalid inputs (empty filename)', async () => {
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
|
||||
// The function handles this internally - with empty name
|
||||
// findExistingCodeFile returns null early for empty filename (guard clause)
|
||||
const result = await processCodeOutput({ ...baseParams, name: '' });
|
||||
|
||||
// getFiles should NOT be called due to early return in findExistingCodeFile
|
||||
expect(getFiles).not.toHaveBeenCalled();
|
||||
// A new file_id should be generated since no existing file was found
|
||||
expect(result.file_id).toBe('mock-uuid-1234');
|
||||
});
|
||||
});
|
||||
|
||||
describe('processCodeOutput', () => {
|
||||
describe('image file processing', () => {
|
||||
it('should process image files using convertImage', async () => {
|
||||
const imageParams = { ...baseParams, name: 'chart.png' };
|
||||
const imageBuffer = Buffer.alloc(500);
|
||||
axios.mockResolvedValue({ data: imageBuffer });
|
||||
|
||||
const convertedFile = {
|
||||
filepath: '/uploads/converted-image.webp',
|
||||
bytes: 400,
|
||||
};
|
||||
convertImage.mockResolvedValue(convertedFile);
|
||||
getFiles.mockResolvedValue(null);
|
||||
|
||||
const result = await processCodeOutput(imageParams);
|
||||
|
||||
expect(convertImage).toHaveBeenCalledWith(
|
||||
mockReq,
|
||||
imageBuffer,
|
||||
'high',
|
||||
'mock-uuid-1234.png',
|
||||
);
|
||||
expect(result.type).toBe('image/webp');
|
||||
expect(result.context).toBe(FileContext.execute_code);
|
||||
expect(result.filename).toBe('chart.png');
|
||||
});
|
||||
|
||||
it('should update existing image file and increment usage', async () => {
|
||||
const imageParams = { ...baseParams, name: 'chart.png' };
|
||||
const existingFile = {
|
||||
file_id: 'existing-img-id',
|
||||
usage: 1,
|
||||
createdAt: '2024-01-01T00:00:00.000Z',
|
||||
};
|
||||
getFiles.mockResolvedValue([existingFile]);
|
||||
|
||||
const imageBuffer = Buffer.alloc(500);
|
||||
axios.mockResolvedValue({ data: imageBuffer });
|
||||
convertImage.mockResolvedValue({ filepath: '/uploads/img.webp' });
|
||||
|
||||
const result = await processCodeOutput(imageParams);
|
||||
|
||||
expect(result.file_id).toBe('existing-img-id');
|
||||
expect(result.usage).toBe(2);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
expect.stringContaining('Updating existing file'),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('non-image file processing', () => {
|
||||
it('should process non-image files using saveBuffer', async () => {
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
|
||||
const mockSaveBuffer = jest.fn().mockResolvedValue('/uploads/saved-file.txt');
|
||||
getStrategyFunctions.mockReturnValue({ saveBuffer: mockSaveBuffer });
|
||||
determineFileType.mockResolvedValue({ mime: 'text/plain' });
|
||||
|
||||
const result = await processCodeOutput(baseParams);
|
||||
|
||||
expect(mockSaveBuffer).toHaveBeenCalledWith({
|
||||
userId: 'user-123',
|
||||
buffer: smallBuffer,
|
||||
fileName: 'mock-uuid-1234__test-file.txt',
|
||||
basePath: 'uploads',
|
||||
});
|
||||
expect(result.type).toBe('text/plain');
|
||||
expect(result.filepath).toBe('/uploads/saved-file.txt');
|
||||
expect(result.bytes).toBe(100);
|
||||
});
|
||||
|
||||
it('should detect MIME type from buffer', async () => {
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
determineFileType.mockResolvedValue({ mime: 'application/pdf' });
|
||||
|
||||
const result = await processCodeOutput({ ...baseParams, name: 'document.pdf' });
|
||||
|
||||
expect(determineFileType).toHaveBeenCalledWith(smallBuffer, true);
|
||||
expect(result.type).toBe('application/pdf');
|
||||
});
|
||||
|
||||
it('should fallback to application/octet-stream for unknown types', async () => {
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
determineFileType.mockResolvedValue(null);
|
||||
|
||||
const result = await processCodeOutput({ ...baseParams, name: 'unknown.xyz' });
|
||||
|
||||
expect(result.type).toBe('application/octet-stream');
|
||||
});
|
||||
});
|
||||
|
||||
describe('file size limit enforcement', () => {
|
||||
it('should fallback to download URL when file exceeds size limit', async () => {
|
||||
// Set a small file size limit for this test
|
||||
fileSizeLimitConfig.value = 1000; // 1KB limit
|
||||
|
||||
const largeBuffer = Buffer.alloc(5000); // 5KB - exceeds 1KB limit
|
||||
axios.mockResolvedValue({ data: largeBuffer });
|
||||
|
||||
const result = await processCodeOutput(baseParams);
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining('exceeds size limit'));
|
||||
expect(result.filepath).toContain('/api/files/code/download/session-123/file-id-123');
|
||||
expect(result.expiresAt).toBeDefined();
|
||||
// Should not call createFile for oversized files (fallback path)
|
||||
expect(createFile).not.toHaveBeenCalled();
|
||||
|
||||
// Reset to default for other tests
|
||||
fileSizeLimitConfig.value = 20 * 1024 * 1024;
|
||||
});
|
||||
});
|
||||
|
||||
describe('fallback behavior', () => {
|
||||
it('should fallback to download URL when saveBuffer is not available', async () => {
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
getStrategyFunctions.mockReturnValue({ saveBuffer: null });
|
||||
|
||||
const result = await processCodeOutput(baseParams);
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
expect.stringContaining('saveBuffer not available'),
|
||||
);
|
||||
expect(result.filepath).toContain('/api/files/code/download/');
|
||||
expect(result.filename).toBe('test-file.txt');
|
||||
});
|
||||
|
||||
it('should fallback to download URL on axios error', async () => {
|
||||
axios.mockRejectedValue(new Error('Network error'));
|
||||
|
||||
const result = await processCodeOutput(baseParams);
|
||||
|
||||
expect(result.filepath).toContain('/api/files/code/download/session-123/file-id-123');
|
||||
expect(result.conversationId).toBe('conv-123');
|
||||
expect(result.messageId).toBe('msg-123');
|
||||
expect(result.toolCallId).toBe('tool-call-123');
|
||||
});
|
||||
});
|
||||
|
||||
describe('usage counter increment', () => {
|
||||
it('should set usage to 1 for new files', async () => {
|
||||
getFiles.mockResolvedValue(null);
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
|
||||
const result = await processCodeOutput(baseParams);
|
||||
|
||||
expect(result.usage).toBe(1);
|
||||
});
|
||||
|
||||
it('should increment usage for existing files', async () => {
|
||||
const existingFile = { file_id: 'existing-id', usage: 5, createdAt: '2024-01-01' };
|
||||
getFiles.mockResolvedValue([existingFile]);
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
|
||||
const result = await processCodeOutput(baseParams);
|
||||
|
||||
expect(result.usage).toBe(6);
|
||||
});
|
||||
|
||||
it('should handle existing file with undefined usage', async () => {
|
||||
const existingFile = { file_id: 'existing-id', createdAt: '2024-01-01' };
|
||||
getFiles.mockResolvedValue([existingFile]);
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
|
||||
const result = await processCodeOutput(baseParams);
|
||||
|
||||
// (undefined ?? 0) + 1 = 1
|
||||
expect(result.usage).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('metadata and file properties', () => {
|
||||
it('should include fileIdentifier in metadata', async () => {
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
|
||||
const result = await processCodeOutput(baseParams);
|
||||
|
||||
expect(result.metadata).toEqual({
|
||||
fileIdentifier: 'session-123/file-id-123',
|
||||
});
|
||||
});
|
||||
|
||||
it('should set correct context for code-generated files', async () => {
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
|
||||
const result = await processCodeOutput(baseParams);
|
||||
|
||||
expect(result.context).toBe(FileContext.execute_code);
|
||||
});
|
||||
|
||||
it('should include toolCallId and messageId in result', async () => {
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
|
||||
const result = await processCodeOutput(baseParams);
|
||||
|
||||
expect(result.toolCallId).toBe('tool-call-123');
|
||||
expect(result.messageId).toBe('msg-123');
|
||||
});
|
||||
|
||||
it('should call createFile with upsert enabled', async () => {
|
||||
const smallBuffer = Buffer.alloc(100);
|
||||
axios.mockResolvedValue({ data: smallBuffer });
|
||||
|
||||
await processCodeOutput(baseParams);
|
||||
|
||||
expect(createFile).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
file_id: 'mock-uuid-1234',
|
||||
context: FileContext.execute_code,
|
||||
}),
|
||||
true, // upsert flag
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -67,7 +67,12 @@ async function saveLocalBuffer({ userId, buffer, fileName, basePath = 'images' }
|
|||
try {
|
||||
const { publicPath, uploads } = paths;
|
||||
|
||||
const directoryPath = path.join(basePath === 'images' ? publicPath : uploads, basePath, userId);
|
||||
/**
|
||||
* For 'images': save to publicPath/images/userId (images are served statically)
|
||||
* For 'uploads': save to uploads/userId (files downloaded via API)
|
||||
* */
|
||||
const directoryPath =
|
||||
basePath === 'images' ? path.join(publicPath, basePath, userId) : path.join(uploads, userId);
|
||||
|
||||
if (!fs.existsSync(directoryPath)) {
|
||||
fs.mkdirSync(directoryPath, { recursive: true });
|
||||
|
|
|
|||
|
|
@ -8,9 +8,13 @@ import { cn } from '~/utils';
|
|||
|
||||
const FileAttachment = memo(({ attachment }: { attachment: Partial<TAttachment> }) => {
|
||||
const [isVisible, setIsVisible] = useState(false);
|
||||
const file = attachment as TFile & TAttachmentMetadata;
|
||||
const { handleDownload } = useAttachmentLink({
|
||||
href: attachment.filepath ?? '',
|
||||
filename: attachment.filename ?? '',
|
||||
file_id: file.file_id,
|
||||
user: file.user,
|
||||
source: file.source,
|
||||
});
|
||||
const extension = attachment.filename?.split('.').pop();
|
||||
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ const LogContent: React.FC<LogContentProps> = ({ output = '', renderImages, atta
|
|||
return `${filename} ${localize('com_download_expired')}`;
|
||||
}
|
||||
|
||||
const fileData = file as TFile & TAttachmentMetadata;
|
||||
const filepath = file.filepath || '';
|
||||
|
||||
// const expirationText = expiresAt
|
||||
|
|
@ -72,7 +73,13 @@ const LogContent: React.FC<LogContentProps> = ({ output = '', renderImages, atta
|
|||
// : ` ${localize('com_click_to_download')}`;
|
||||
|
||||
return (
|
||||
<LogLink href={filepath} filename={filename}>
|
||||
<LogLink
|
||||
href={filepath}
|
||||
filename={filename}
|
||||
file_id={fileData.file_id}
|
||||
user={fileData.user}
|
||||
source={fileData.source}
|
||||
>
|
||||
{'- '}
|
||||
{filename} {localize('com_click_to_download')}
|
||||
</LogLink>
|
||||
|
|
|
|||
|
|
@ -1,21 +1,56 @@
|
|||
import React from 'react';
|
||||
import { FileSources } from 'librechat-data-provider';
|
||||
import { useToastContext } from '@librechat/client';
|
||||
import { useCodeOutputDownload } from '~/data-provider';
|
||||
import { useCodeOutputDownload, useFileDownload } from '~/data-provider';
|
||||
|
||||
interface LogLinkProps {
|
||||
href: string;
|
||||
filename: string;
|
||||
file_id?: string;
|
||||
user?: string;
|
||||
source?: string;
|
||||
children: React.ReactNode;
|
||||
}
|
||||
|
||||
export const useAttachmentLink = ({ href, filename }: Pick<LogLinkProps, 'href' | 'filename'>) => {
|
||||
interface AttachmentLinkOptions {
|
||||
href: string;
|
||||
filename: string;
|
||||
file_id?: string;
|
||||
user?: string;
|
||||
source?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if a file is stored locally (not an external API URL).
|
||||
* Files with these sources are stored on the LibreChat server and should
|
||||
* use the /api/files/download endpoint instead of direct URL access.
|
||||
*/
|
||||
const isLocallyStoredSource = (source?: string): boolean => {
|
||||
if (!source) {
|
||||
return false;
|
||||
}
|
||||
return [FileSources.local, FileSources.firebase, FileSources.s3, FileSources.azure_blob].includes(
|
||||
source as FileSources,
|
||||
);
|
||||
};
|
||||
|
||||
export const useAttachmentLink = ({
|
||||
href,
|
||||
filename,
|
||||
file_id,
|
||||
user,
|
||||
source,
|
||||
}: AttachmentLinkOptions) => {
|
||||
const { showToast } = useToastContext();
|
||||
const { refetch: downloadFile } = useCodeOutputDownload(href);
|
||||
|
||||
const useLocalDownload = isLocallyStoredSource(source) && !!file_id && !!user;
|
||||
const { refetch: downloadFromApi } = useFileDownload(user, file_id);
|
||||
const { refetch: downloadFromUrl } = useCodeOutputDownload(href);
|
||||
|
||||
const handleDownload = async (event: React.MouseEvent<HTMLAnchorElement | HTMLButtonElement>) => {
|
||||
event.preventDefault();
|
||||
try {
|
||||
const stream = await downloadFile();
|
||||
const stream = useLocalDownload ? await downloadFromApi() : await downloadFromUrl();
|
||||
if (stream.data == null || stream.data === '') {
|
||||
console.error('Error downloading file: No data found');
|
||||
showToast({
|
||||
|
|
@ -39,8 +74,8 @@ export const useAttachmentLink = ({ href, filename }: Pick<LogLinkProps, 'href'
|
|||
return { handleDownload };
|
||||
};
|
||||
|
||||
const LogLink: React.FC<LogLinkProps> = ({ href, filename, children }) => {
|
||||
const { handleDownload } = useAttachmentLink({ href, filename });
|
||||
const LogLink: React.FC<LogLinkProps> = ({ href, filename, file_id, user, source, children }) => {
|
||||
const { handleDownload } = useAttachmentLink({ href, filename, file_id, user, source });
|
||||
return (
|
||||
<a
|
||||
href={href}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,12 @@
|
|||
import { useSetRecoilState } from 'recoil';
|
||||
import type { QueryClient } from '@tanstack/react-query';
|
||||
import { QueryKeys, Tools } from 'librechat-data-provider';
|
||||
import type { TAttachment, EventSubmission, MemoriesResponse } from 'librechat-data-provider';
|
||||
import type {
|
||||
MemoriesResponse,
|
||||
EventSubmission,
|
||||
TAttachment,
|
||||
TFile,
|
||||
} from 'librechat-data-provider';
|
||||
import { handleMemoryArtifact } from '~/utils/memory';
|
||||
import store from '~/store';
|
||||
|
||||
|
|
@ -11,9 +16,24 @@ export default function useAttachmentHandler(queryClient?: QueryClient) {
|
|||
return ({ data }: { data: TAttachment; submission: EventSubmission }) => {
|
||||
const { messageId } = data;
|
||||
|
||||
if (queryClient && data?.filepath && !data.filepath.includes('/api/files')) {
|
||||
queryClient.setQueryData([QueryKeys.files], (oldData: TAttachment[] | undefined) => {
|
||||
return [data, ...(oldData || [])];
|
||||
const fileData = data as TFile;
|
||||
if (
|
||||
queryClient &&
|
||||
fileData?.file_id &&
|
||||
fileData?.filepath &&
|
||||
!fileData.filepath.includes('/api/files')
|
||||
) {
|
||||
queryClient.setQueryData([QueryKeys.files], (oldData: TFile[] | undefined) => {
|
||||
if (!oldData) {
|
||||
return [fileData];
|
||||
}
|
||||
const existingIndex = oldData.findIndex((file) => file.file_id === fileData.file_id);
|
||||
if (existingIndex > -1) {
|
||||
const updated = [...oldData];
|
||||
updated[existingIndex] = { ...oldData[existingIndex], ...fileData };
|
||||
return updated;
|
||||
}
|
||||
return [fileData, ...oldData];
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
10
package-lock.json
generated
10
package-lock.json
generated
|
|
@ -59,7 +59,7 @@
|
|||
"@google/genai": "^1.19.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@langchain/core": "^0.3.80",
|
||||
"@librechat/agents": "^3.0.77",
|
||||
"@librechat/agents": "^3.0.78",
|
||||
"@librechat/api": "*",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@microsoft/microsoft-graph-client": "^3.0.7",
|
||||
|
|
@ -12646,9 +12646,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@librechat/agents": {
|
||||
"version": "3.0.77",
|
||||
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.77.tgz",
|
||||
"integrity": "sha512-Wr9d8bjJAQSl03nEgnAPG6jBQT1fL3sNV3TFDN1FvFQt6WGfdok838Cbcn+/tSGXSPJcICTxNkMT7VN8P6bCPw==",
|
||||
"version": "3.0.78",
|
||||
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.78.tgz",
|
||||
"integrity": "sha512-+p4NuE2dBAbwm4gJc/jbBDIAfC8xNC0gUAb8wsLXA7zcORnnDRTQ+HWWYVJZ8e81dTIxHIl61hwsziFjVZHvUw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@langchain/anthropic": "^0.3.26",
|
||||
|
|
@ -43129,7 +43129,7 @@
|
|||
"@google/genai": "^1.19.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@langchain/core": "^0.3.80",
|
||||
"@librechat/agents": "^3.0.77",
|
||||
"@librechat/agents": "^3.0.78",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@modelcontextprotocol/sdk": "^1.25.2",
|
||||
"@smithy/node-http-handler": "^4.4.5",
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@
|
|||
"@google/genai": "^1.19.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@langchain/core": "^0.3.80",
|
||||
"@librechat/agents": "^3.0.77",
|
||||
"@librechat/agents": "^3.0.78",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@modelcontextprotocol/sdk": "^1.25.2",
|
||||
"@smithy/node-http-handler": "^4.4.5",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { Providers } from '@librechat/agents';
|
||||
import {
|
||||
Constants,
|
||||
ErrorTypes,
|
||||
EModelEndpoint,
|
||||
EToolResources,
|
||||
|
|
@ -20,7 +21,12 @@ import type { GenericTool, LCToolRegistry, ToolMap } from '@librechat/agents';
|
|||
import type { Response as ServerResponse } from 'express';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import type { InitializeResultBase, ServerRequest, EndpointDbMethods } from '~/types';
|
||||
import { getModelMaxTokens, extractLibreChatParams, optionalChainWithEmptyCheck } from '~/utils';
|
||||
import {
|
||||
optionalChainWithEmptyCheck,
|
||||
extractLibreChatParams,
|
||||
getModelMaxTokens,
|
||||
getThreadData,
|
||||
} from '~/utils';
|
||||
import { filterFilesByEndpointConfig } from '~/files';
|
||||
import { generateArtifactsPrompt } from '~/prompts';
|
||||
import { getProviderConfig } from '~/endpoints';
|
||||
|
|
@ -58,6 +64,8 @@ export interface InitializeAgentParams {
|
|||
agent: Agent;
|
||||
/** Conversation ID (optional) */
|
||||
conversationId?: string | null;
|
||||
/** Parent message ID for determining the current thread (optional) */
|
||||
parentMessageId?: string | null;
|
||||
/** Request files */
|
||||
requestFiles?: IMongoFile[];
|
||||
/** Function to load agent tools */
|
||||
|
|
@ -95,10 +103,23 @@ export interface InitializeAgentDbMethods extends EndpointDbMethods {
|
|||
updateFilesUsage: (files: Array<{ file_id: string }>, fileIds?: string[]) => Promise<unknown[]>;
|
||||
/** Get files from database */
|
||||
getFiles: (filter: unknown, sort: unknown, select: unknown, opts?: unknown) => Promise<unknown[]>;
|
||||
/** Get tool files by IDs */
|
||||
/** Get tool files by IDs (user-uploaded files only, code files handled separately) */
|
||||
getToolFilesByIds: (fileIds: string[], toolSet: Set<EToolResources>) => Promise<unknown[]>;
|
||||
/** Get conversation file IDs */
|
||||
getConvoFiles: (conversationId: string) => Promise<string[] | null>;
|
||||
/** Get code-generated files by conversation ID and optional message IDs */
|
||||
getCodeGeneratedFiles?: (conversationId: string, messageIds?: string[]) => Promise<unknown[]>;
|
||||
/** Get user-uploaded execute_code files by file IDs (from message.files in thread) */
|
||||
getUserCodeFiles?: (fileIds: string[]) => Promise<unknown[]>;
|
||||
/** Get messages for a conversation (supports select for field projection) */
|
||||
getMessages?: (
|
||||
filter: { conversationId: string },
|
||||
select?: string,
|
||||
) => Promise<Array<{
|
||||
messageId: string;
|
||||
parentMessageId?: string;
|
||||
files?: Array<{ file_id: string }>;
|
||||
}> | null>;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -125,6 +146,7 @@ export async function initializeAgent(
|
|||
requestFiles = [],
|
||||
conversationId,
|
||||
endpointOption,
|
||||
parentMessageId,
|
||||
allowedProviders,
|
||||
isInitialAgent = false,
|
||||
} = params;
|
||||
|
|
@ -174,9 +196,51 @@ export async function initializeAgent(
|
|||
toolResourceSet.add(EToolResources[tool as keyof typeof EToolResources]);
|
||||
}
|
||||
}
|
||||
|
||||
const toolFiles = (await db.getToolFilesByIds(fileIds, toolResourceSet)) as IMongoFile[];
|
||||
if (requestFiles.length || toolFiles.length) {
|
||||
currentFiles = (await db.updateFilesUsage(requestFiles.concat(toolFiles))) as IMongoFile[];
|
||||
|
||||
/**
|
||||
* Retrieve execute_code files filtered to the current thread.
|
||||
* This includes both code-generated files and user-uploaded execute_code files.
|
||||
*/
|
||||
let codeGeneratedFiles: IMongoFile[] = [];
|
||||
let userCodeFiles: IMongoFile[] = [];
|
||||
|
||||
if (toolResourceSet.has(EToolResources.execute_code)) {
|
||||
let threadMessageIds: string[] | undefined;
|
||||
let threadFileIds: string[] | undefined;
|
||||
|
||||
if (parentMessageId && parentMessageId !== Constants.NO_PARENT && db.getMessages) {
|
||||
/** Only select fields needed for thread traversal */
|
||||
const messages = await db.getMessages(
|
||||
{ conversationId },
|
||||
'messageId parentMessageId files',
|
||||
);
|
||||
if (messages && messages.length > 0) {
|
||||
/** Single O(n) pass: build Map, traverse thread, collect both IDs */
|
||||
const threadData = getThreadData(messages, parentMessageId);
|
||||
threadMessageIds = threadData.messageIds;
|
||||
threadFileIds = threadData.fileIds;
|
||||
}
|
||||
}
|
||||
|
||||
/** Code-generated files (context: execute_code) filtered by messageId */
|
||||
if (db.getCodeGeneratedFiles) {
|
||||
codeGeneratedFiles = (await db.getCodeGeneratedFiles(
|
||||
conversationId,
|
||||
threadMessageIds,
|
||||
)) as IMongoFile[];
|
||||
}
|
||||
|
||||
/** User-uploaded execute_code files (context: agents/message_attachment) from thread messages */
|
||||
if (db.getUserCodeFiles && threadFileIds && threadFileIds.length > 0) {
|
||||
userCodeFiles = (await db.getUserCodeFiles(threadFileIds)) as IMongoFile[];
|
||||
}
|
||||
}
|
||||
|
||||
const allToolFiles = toolFiles.concat(codeGeneratedFiles, userCodeFiles);
|
||||
if (requestFiles.length || allToolFiles.length) {
|
||||
currentFiles = (await db.updateFilesUsage(requestFiles.concat(allToolFiles))) as IMongoFile[];
|
||||
}
|
||||
} else if (requestFiles.length) {
|
||||
currentFiles = (await db.updateFilesUsage(requestFiles)) as IMongoFile[];
|
||||
|
|
|
|||
|
|
@ -1,4 +1,8 @@
|
|||
import { sanitizeFileForTransmit, sanitizeMessageForTransmit } from './message';
|
||||
import { Constants } from 'librechat-data-provider';
|
||||
import { sanitizeFileForTransmit, sanitizeMessageForTransmit, getThreadData } from './message';
|
||||
|
||||
/** Cast to string for type compatibility with ThreadMessage */
|
||||
const NO_PARENT = Constants.NO_PARENT as string;
|
||||
|
||||
describe('sanitizeFileForTransmit', () => {
|
||||
it('should remove text field from file', () => {
|
||||
|
|
@ -120,3 +124,272 @@ describe('sanitizeMessageForTransmit', () => {
|
|||
expect(message.files[0].text).toBe('original text');
|
||||
});
|
||||
});
|
||||
|
||||
describe('getThreadData', () => {
|
||||
describe('edge cases - empty and null inputs', () => {
|
||||
it('should return empty result for empty messages array', () => {
|
||||
const result = getThreadData([], 'parent-123');
|
||||
|
||||
expect(result.messageIds).toEqual([]);
|
||||
expect(result.fileIds).toEqual([]);
|
||||
});
|
||||
|
||||
it('should return empty result for null parentMessageId', () => {
|
||||
const messages = [
|
||||
{ messageId: 'msg-1', parentMessageId: null },
|
||||
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
|
||||
];
|
||||
|
||||
const result = getThreadData(messages, null);
|
||||
|
||||
expect(result.messageIds).toEqual([]);
|
||||
expect(result.fileIds).toEqual([]);
|
||||
});
|
||||
|
||||
it('should return empty result for undefined parentMessageId', () => {
|
||||
const messages = [{ messageId: 'msg-1', parentMessageId: null }];
|
||||
|
||||
const result = getThreadData(messages, undefined);
|
||||
|
||||
expect(result.messageIds).toEqual([]);
|
||||
expect(result.fileIds).toEqual([]);
|
||||
});
|
||||
|
||||
it('should return empty result when parentMessageId not found in messages', () => {
|
||||
const messages = [
|
||||
{ messageId: 'msg-1', parentMessageId: null },
|
||||
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
|
||||
];
|
||||
|
||||
const result = getThreadData(messages, 'non-existent');
|
||||
|
||||
expect(result.messageIds).toEqual([]);
|
||||
expect(result.fileIds).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('thread traversal', () => {
|
||||
it('should traverse a simple linear thread', () => {
|
||||
const messages = [
|
||||
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
|
||||
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
|
||||
{ messageId: 'msg-3', parentMessageId: 'msg-2' },
|
||||
];
|
||||
|
||||
const result = getThreadData(messages, 'msg-3');
|
||||
|
||||
expect(result.messageIds).toEqual(['msg-3', 'msg-2', 'msg-1']);
|
||||
expect(result.fileIds).toEqual([]);
|
||||
});
|
||||
|
||||
it('should stop at NO_PARENT constant', () => {
|
||||
const messages = [
|
||||
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
|
||||
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
|
||||
];
|
||||
|
||||
const result = getThreadData(messages, 'msg-2');
|
||||
|
||||
expect(result.messageIds).toEqual(['msg-2', 'msg-1']);
|
||||
});
|
||||
|
||||
it('should collect only messages in the thread branch', () => {
|
||||
// Branched conversation: msg-1 -> msg-2 -> msg-3 (branch A)
|
||||
// msg-1 -> msg-4 -> msg-5 (branch B)
|
||||
const messages = [
|
||||
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
|
||||
{ messageId: 'msg-2', parentMessageId: 'msg-1' },
|
||||
{ messageId: 'msg-3', parentMessageId: 'msg-2' },
|
||||
{ messageId: 'msg-4', parentMessageId: 'msg-1' },
|
||||
{ messageId: 'msg-5', parentMessageId: 'msg-4' },
|
||||
];
|
||||
|
||||
const resultBranchA = getThreadData(messages, 'msg-3');
|
||||
expect(resultBranchA.messageIds).toEqual(['msg-3', 'msg-2', 'msg-1']);
|
||||
|
||||
const resultBranchB = getThreadData(messages, 'msg-5');
|
||||
expect(resultBranchB.messageIds).toEqual(['msg-5', 'msg-4', 'msg-1']);
|
||||
});
|
||||
|
||||
it('should handle single message thread', () => {
|
||||
const messages = [{ messageId: 'msg-1', parentMessageId: NO_PARENT }];
|
||||
|
||||
const result = getThreadData(messages, 'msg-1');
|
||||
|
||||
expect(result.messageIds).toEqual(['msg-1']);
|
||||
expect(result.fileIds).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('circular reference protection', () => {
|
||||
it('should handle circular references without infinite loop', () => {
|
||||
// Malformed data: msg-2 points to msg-3 which points back to msg-2
|
||||
const messages = [
|
||||
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
|
||||
{ messageId: 'msg-2', parentMessageId: 'msg-3' },
|
||||
{ messageId: 'msg-3', parentMessageId: 'msg-2' },
|
||||
];
|
||||
|
||||
const result = getThreadData(messages, 'msg-2');
|
||||
|
||||
// Should stop when encountering a visited ID
|
||||
expect(result.messageIds).toEqual(['msg-2', 'msg-3']);
|
||||
expect(result.fileIds).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle self-referencing message', () => {
|
||||
const messages = [{ messageId: 'msg-1', parentMessageId: 'msg-1' }];
|
||||
|
||||
const result = getThreadData(messages, 'msg-1');
|
||||
|
||||
expect(result.messageIds).toEqual(['msg-1']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('file ID collection', () => {
|
||||
it('should collect file IDs from messages with files', () => {
|
||||
const messages = [
|
||||
{
|
||||
messageId: 'msg-1',
|
||||
parentMessageId: NO_PARENT,
|
||||
files: [{ file_id: 'file-1' }, { file_id: 'file-2' }],
|
||||
},
|
||||
{
|
||||
messageId: 'msg-2',
|
||||
parentMessageId: 'msg-1',
|
||||
files: [{ file_id: 'file-3' }],
|
||||
},
|
||||
];
|
||||
|
||||
const result = getThreadData(messages, 'msg-2');
|
||||
|
||||
expect(result.messageIds).toEqual(['msg-2', 'msg-1']);
|
||||
expect(result.fileIds).toContain('file-1');
|
||||
expect(result.fileIds).toContain('file-2');
|
||||
expect(result.fileIds).toContain('file-3');
|
||||
expect(result.fileIds).toHaveLength(3);
|
||||
});
|
||||
|
||||
it('should deduplicate file IDs across messages', () => {
|
||||
const messages = [
|
||||
{
|
||||
messageId: 'msg-1',
|
||||
parentMessageId: NO_PARENT,
|
||||
files: [{ file_id: 'file-shared' }, { file_id: 'file-1' }],
|
||||
},
|
||||
{
|
||||
messageId: 'msg-2',
|
||||
parentMessageId: 'msg-1',
|
||||
files: [{ file_id: 'file-shared' }, { file_id: 'file-2' }],
|
||||
},
|
||||
];
|
||||
|
||||
const result = getThreadData(messages, 'msg-2');
|
||||
|
||||
expect(result.fileIds).toContain('file-shared');
|
||||
expect(result.fileIds).toContain('file-1');
|
||||
expect(result.fileIds).toContain('file-2');
|
||||
expect(result.fileIds).toHaveLength(3);
|
||||
});
|
||||
|
||||
it('should skip files without file_id', () => {
|
||||
const messages = [
|
||||
{
|
||||
messageId: 'msg-1',
|
||||
parentMessageId: NO_PARENT,
|
||||
files: [{ file_id: 'file-1' }, { file_id: undefined }, { file_id: '' }],
|
||||
},
|
||||
];
|
||||
|
||||
const result = getThreadData(messages, 'msg-1');
|
||||
|
||||
expect(result.fileIds).toEqual(['file-1']);
|
||||
});
|
||||
|
||||
it('should handle messages with empty files array', () => {
|
||||
const messages = [
|
||||
{
|
||||
messageId: 'msg-1',
|
||||
parentMessageId: NO_PARENT,
|
||||
files: [],
|
||||
},
|
||||
{
|
||||
messageId: 'msg-2',
|
||||
parentMessageId: 'msg-1',
|
||||
files: [{ file_id: 'file-1' }],
|
||||
},
|
||||
];
|
||||
|
||||
const result = getThreadData(messages, 'msg-2');
|
||||
|
||||
expect(result.messageIds).toEqual(['msg-2', 'msg-1']);
|
||||
expect(result.fileIds).toEqual(['file-1']);
|
||||
});
|
||||
|
||||
it('should handle messages without files property', () => {
|
||||
const messages = [
|
||||
{ messageId: 'msg-1', parentMessageId: NO_PARENT },
|
||||
{
|
||||
messageId: 'msg-2',
|
||||
parentMessageId: 'msg-1',
|
||||
files: [{ file_id: 'file-1' }],
|
||||
},
|
||||
];
|
||||
|
||||
const result = getThreadData(messages, 'msg-2');
|
||||
|
||||
expect(result.messageIds).toEqual(['msg-2', 'msg-1']);
|
||||
expect(result.fileIds).toEqual(['file-1']);
|
||||
});
|
||||
|
||||
it('should only collect files from messages in the thread', () => {
|
||||
// msg-3 is not in the thread from msg-2
|
||||
const messages = [
|
||||
{
|
||||
messageId: 'msg-1',
|
||||
parentMessageId: NO_PARENT,
|
||||
files: [{ file_id: 'file-1' }],
|
||||
},
|
||||
{
|
||||
messageId: 'msg-2',
|
||||
parentMessageId: 'msg-1',
|
||||
files: [{ file_id: 'file-2' }],
|
||||
},
|
||||
{
|
||||
messageId: 'msg-3',
|
||||
parentMessageId: 'msg-1',
|
||||
files: [{ file_id: 'file-3' }],
|
||||
},
|
||||
];
|
||||
|
||||
const result = getThreadData(messages, 'msg-2');
|
||||
|
||||
expect(result.fileIds).toContain('file-1');
|
||||
expect(result.fileIds).toContain('file-2');
|
||||
expect(result.fileIds).not.toContain('file-3');
|
||||
});
|
||||
});
|
||||
|
||||
describe('performance - O(1) lookups', () => {
|
||||
it('should handle large message arrays efficiently', () => {
|
||||
// Create a linear thread of 1000 messages
|
||||
const messages = [];
|
||||
for (let i = 0; i < 1000; i++) {
|
||||
messages.push({
|
||||
messageId: `msg-${i}`,
|
||||
parentMessageId: i === 0 ? NO_PARENT : `msg-${i - 1}`,
|
||||
files: [{ file_id: `file-${i}` }],
|
||||
});
|
||||
}
|
||||
|
||||
const startTime = performance.now();
|
||||
const result = getThreadData(messages, 'msg-999');
|
||||
const endTime = performance.now();
|
||||
|
||||
expect(result.messageIds).toHaveLength(1000);
|
||||
expect(result.fileIds).toHaveLength(1000);
|
||||
// Should complete in reasonable time (< 100ms for 1000 messages)
|
||||
expect(endTime - startTime).toBeLessThan(100);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { Constants } from 'librechat-data-provider';
|
||||
import type { TFile, TMessage } from 'librechat-data-provider';
|
||||
|
||||
/** Fields to strip from files before client transmission */
|
||||
|
|
@ -66,3 +67,74 @@ export function sanitizeMessageForTransmit<T extends Partial<TMessage>>(
|
|||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
/** Minimal message shape for thread traversal */
|
||||
type ThreadMessage = {
|
||||
messageId: string;
|
||||
parentMessageId?: string | null;
|
||||
files?: Array<{ file_id?: string }>;
|
||||
};
|
||||
|
||||
/** Result of thread data extraction */
|
||||
export type ThreadData = {
|
||||
messageIds: string[];
|
||||
fileIds: string[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Extracts thread message IDs and file IDs in a single O(n) pass.
|
||||
* Builds a Map for O(1) lookups, then traverses the thread collecting both IDs.
|
||||
*
|
||||
* @param messages - All messages in the conversation (should be queried with select for efficiency)
|
||||
* @param parentMessageId - The ID of the parent message to start traversal from
|
||||
* @returns Object containing messageIds and fileIds arrays
|
||||
*/
|
||||
export function getThreadData(
|
||||
messages: ThreadMessage[],
|
||||
parentMessageId: string | null | undefined,
|
||||
): ThreadData {
|
||||
const result: ThreadData = { messageIds: [], fileIds: [] };
|
||||
|
||||
if (!messages || messages.length === 0 || !parentMessageId) {
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Build Map for O(1) lookups instead of O(n) .find() calls */
|
||||
const messageMap = new Map<string, ThreadMessage>();
|
||||
for (const msg of messages) {
|
||||
messageMap.set(msg.messageId, msg);
|
||||
}
|
||||
|
||||
const fileIdSet = new Set<string>();
|
||||
const visitedIds = new Set<string>();
|
||||
let currentId: string | null | undefined = parentMessageId;
|
||||
|
||||
/** Single traversal: collect message IDs and file IDs together */
|
||||
while (currentId) {
|
||||
if (visitedIds.has(currentId)) {
|
||||
break;
|
||||
}
|
||||
visitedIds.add(currentId);
|
||||
|
||||
const message = messageMap.get(currentId);
|
||||
if (!message) {
|
||||
break;
|
||||
}
|
||||
|
||||
result.messageIds.push(message.messageId);
|
||||
|
||||
/** Collect file IDs from this message */
|
||||
if (message.files) {
|
||||
for (const file of message.files) {
|
||||
if (file.file_id) {
|
||||
fileIdSet.add(file.file_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
currentId = message.parentMessageId === Constants.NO_PARENT ? null : message.parentMessageId;
|
||||
}
|
||||
|
||||
result.fileIds = Array.from(fileIdSet);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -198,8 +198,15 @@ export const codeTypeMapping: { [key: string]: string } = {
|
|||
ts: 'application/typescript', // .ts - TypeScript source
|
||||
tar: 'application/x-tar', // .tar - Tar archive
|
||||
zip: 'application/zip', // .zip - ZIP archive
|
||||
txt: 'text/plain', // .txt - Plain text file
|
||||
log: 'text/plain', // .log - Log file
|
||||
csv: 'text/csv', // .csv - Comma-separated values
|
||||
tsv: 'text/tab-separated-values', // .tsv - Tab-separated values
|
||||
json: 'application/json', // .json - JSON file
|
||||
xml: 'application/xml', // .xml - XML file
|
||||
html: 'text/html', // .html - HTML file
|
||||
htm: 'text/html', // .htm - HTML file
|
||||
css: 'text/css', // .css - CSS file
|
||||
yml: 'application/yaml', // .yml - YAML
|
||||
yaml: 'application/yaml', // .yaml - YAML
|
||||
sql: 'application/sql', // .sql - SQL (IANA registered)
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ describe('File Methods', () => {
|
|||
|
||||
const files = await fileMethods.getFiles({ user: userId });
|
||||
expect(files).toHaveLength(3);
|
||||
expect(files.map((f) => f.file_id)).toEqual(expect.arrayContaining(fileIds));
|
||||
expect(files!.map((f) => f.file_id)).toEqual(expect.arrayContaining(fileIds));
|
||||
});
|
||||
|
||||
it('should exclude text field by default', async () => {
|
||||
|
|
@ -149,7 +149,7 @@ describe('File Methods', () => {
|
|||
|
||||
const files = await fileMethods.getFiles({ file_id: fileId });
|
||||
expect(files).toHaveLength(1);
|
||||
expect(files[0].text).toBeUndefined();
|
||||
expect(files![0].text).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -207,7 +207,7 @@ describe('File Methods', () => {
|
|||
expect(files[0].file_id).toBe(contextFileId);
|
||||
});
|
||||
|
||||
it('should retrieve files for execute_code tool', async () => {
|
||||
it('should not retrieve execute_code files (handled by getCodeGeneratedFiles)', async () => {
|
||||
const userId = new mongoose.Types.ObjectId();
|
||||
const codeFileId = uuidv4();
|
||||
|
||||
|
|
@ -218,14 +218,16 @@ describe('File Methods', () => {
|
|||
filepath: '/uploads/code.py',
|
||||
type: 'text/x-python',
|
||||
bytes: 100,
|
||||
context: FileContext.execute_code,
|
||||
metadata: { fileIdentifier: 'some-identifier' },
|
||||
});
|
||||
|
||||
// execute_code files are explicitly excluded from getToolFilesByIds
|
||||
// They are retrieved via getCodeGeneratedFiles and getUserCodeFiles instead
|
||||
const toolSet = new Set([EToolResources.execute_code]);
|
||||
const files = await fileMethods.getToolFilesByIds([codeFileId], toolSet);
|
||||
|
||||
expect(files).toHaveLength(1);
|
||||
expect(files[0].file_id).toBe(codeFileId);
|
||||
expect(files).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -490,7 +492,7 @@ describe('File Methods', () => {
|
|||
|
||||
const remaining = await fileMethods.getFiles({});
|
||||
expect(remaining).toHaveLength(1);
|
||||
expect(remaining[0].user?.toString()).toBe(otherUserId.toString());
|
||||
expect(remaining![0].user?.toString()).toBe(otherUserId.toString());
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,8 @@ export function createFileMethods(mongoose: typeof import('mongoose')) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs
|
||||
* Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs.
|
||||
* Note: execute_code files are handled separately by getCodeGeneratedFiles.
|
||||
* @param fileIds - Array of file_id strings to search for
|
||||
* @param toolResourceSet - Optional filter for tool resources
|
||||
* @returns Files that match the criteria
|
||||
|
|
@ -61,21 +62,26 @@ export function createFileMethods(mongoose: typeof import('mongoose')) {
|
|||
}
|
||||
|
||||
try {
|
||||
const filter: FilterQuery<IMongoFile> = {
|
||||
file_id: { $in: fileIds },
|
||||
$or: [],
|
||||
};
|
||||
const orConditions: FilterQuery<IMongoFile>[] = [];
|
||||
|
||||
if (toolResourceSet.has(EToolResources.context)) {
|
||||
filter.$or?.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
|
||||
orConditions.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
|
||||
}
|
||||
if (toolResourceSet.has(EToolResources.file_search)) {
|
||||
filter.$or?.push({ embedded: true });
|
||||
orConditions.push({ embedded: true });
|
||||
}
|
||||
if (toolResourceSet.has(EToolResources.execute_code)) {
|
||||
filter.$or?.push({ 'metadata.fileIdentifier': { $exists: true } });
|
||||
|
||||
// If no conditions to match, return empty
|
||||
if (orConditions.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const filter: FilterQuery<IMongoFile> = {
|
||||
file_id: { $in: fileIds },
|
||||
context: { $ne: FileContext.execute_code },
|
||||
$or: orConditions,
|
||||
};
|
||||
|
||||
const selectFields: SelectProjection = { text: 0 };
|
||||
const sortOptions = { updatedAt: -1 as SortOrder };
|
||||
|
||||
|
|
@ -87,6 +93,84 @@ export function createFileMethods(mongoose: typeof import('mongoose')) {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves files generated by code execution for a given conversation.
|
||||
* These files are stored locally with fileIdentifier metadata for code env re-upload.
|
||||
*
|
||||
* @param conversationId - The conversation ID to search for
|
||||
* @param messageIds - Array of messageIds to filter by (for linear thread filtering).
|
||||
* While technically optional, this function returns empty if not provided.
|
||||
* This is intentional: code-generated files must be filtered by thread to avoid
|
||||
* including files from other branches of a conversation.
|
||||
* @returns Files generated by code execution in the conversation, filtered by messageIds
|
||||
*/
|
||||
async function getCodeGeneratedFiles(
|
||||
conversationId: string,
|
||||
messageIds?: string[],
|
||||
): Promise<IMongoFile[]> {
|
||||
if (!conversationId) {
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Return early if messageIds not provided - this is intentional behavior.
|
||||
* Code-generated files must be filtered by thread messageIds to ensure we only
|
||||
* return files relevant to the current conversation branch, not orphaned files
|
||||
* from other branches or deleted messages.
|
||||
*/
|
||||
if (!messageIds || messageIds.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const filter: FilterQuery<IMongoFile> = {
|
||||
conversationId,
|
||||
context: FileContext.execute_code,
|
||||
messageId: { $exists: true, $in: messageIds },
|
||||
'metadata.fileIdentifier': { $exists: true },
|
||||
};
|
||||
|
||||
const selectFields: SelectProjection = { text: 0 };
|
||||
const sortOptions = { createdAt: 1 as SortOrder };
|
||||
|
||||
const results = await getFiles(filter, sortOptions, selectFields);
|
||||
return results ?? [];
|
||||
} catch (error) {
|
||||
logger.error('[getCodeGeneratedFiles] Error retrieving code generated files:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves user-uploaded execute_code files (not code-generated) by their file IDs.
|
||||
* These are files with fileIdentifier metadata but context is NOT execute_code (e.g., agents or message_attachment).
|
||||
* File IDs should be collected from message.files arrays in the current thread.
|
||||
* @param fileIds - Array of file IDs to fetch (from message.files in the thread)
|
||||
* @returns User-uploaded execute_code files
|
||||
*/
|
||||
async function getUserCodeFiles(fileIds?: string[]): Promise<IMongoFile[]> {
|
||||
if (!fileIds || fileIds.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const filter: FilterQuery<IMongoFile> = {
|
||||
file_id: { $in: fileIds },
|
||||
context: { $ne: FileContext.execute_code },
|
||||
'metadata.fileIdentifier': { $exists: true },
|
||||
};
|
||||
|
||||
const selectFields: SelectProjection = { text: 0 };
|
||||
const sortOptions = { createdAt: 1 as SortOrder };
|
||||
|
||||
const results = await getFiles(filter, sortOptions, selectFields);
|
||||
return results ?? [];
|
||||
} catch (error) {
|
||||
logger.error('[getUserCodeFiles] Error retrieving user code files:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new file with a TTL of 1 hour.
|
||||
* @param data - The file data to be created, must contain file_id
|
||||
|
|
@ -258,6 +342,8 @@ export function createFileMethods(mongoose: typeof import('mongoose')) {
|
|||
findFileById,
|
||||
getFiles,
|
||||
getToolFilesByIds,
|
||||
getCodeGeneratedFiles,
|
||||
getUserCodeFiles,
|
||||
createFile,
|
||||
updateFile,
|
||||
updateFileUsage,
|
||||
|
|
|
|||
|
|
@ -15,6 +15,10 @@ const file: Schema<IMongoFile> = new Schema(
|
|||
ref: 'Conversation',
|
||||
index: true,
|
||||
},
|
||||
messageId: {
|
||||
type: String,
|
||||
index: true,
|
||||
},
|
||||
file_id: {
|
||||
type: String,
|
||||
index: true,
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { Document, Types } from 'mongoose';
|
|||
export interface IMongoFile extends Omit<Document, 'model'> {
|
||||
user: Types.ObjectId;
|
||||
conversationId?: string;
|
||||
messageId?: string;
|
||||
file_id: string;
|
||||
temp_file_id?: string;
|
||||
bytes: number;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue