mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-20 01:06:11 +01:00
* refactor: process code output files for re-use (WIP) * feat: file attachment handling with additional metadata for downloads * refactor: Update directory path logic for local file saving based on basePath * refactor: file attachment handling to support TFile type and improve data merging logic * feat: thread filtering of code-generated files - Introduced parentMessageId parameter in addedConvo and initialize functions to enhance thread management. - Updated related methods to utilize parentMessageId for retrieving messages and filtering code-generated files by conversation threads. - Enhanced type definitions to include parentMessageId in relevant interfaces for better clarity and usage. * chore: imports/params ordering * feat: update file model to use messageId for filtering and processing - Changed references from 'message' to 'messageId' in file-related methods for consistency. - Added messageId field to the file schema and updated related types. - Enhanced file processing logic to accommodate the new messageId structure. * feat: enhance file retrieval methods to support user-uploaded execute_code files - Added a new method `getUserCodeFiles` to retrieve user-uploaded execute_code files, excluding code-generated files. - Updated existing file retrieval methods to improve filtering logic and handle edge cases. - Enhanced thread data extraction to collect both message IDs and file IDs efficiently. - Integrated `getUserCodeFiles` into relevant endpoints for better file management in conversations. * chore: update @librechat/agents package version to 3.0.78 in package-lock.json and related package.json files * refactor: file processing and retrieval logic - Added a fallback mechanism for download URLs when files exceed size limits or cannot be processed locally. - Implemented a deduplication strategy for code-generated files based on conversationId and filename to optimize storage. - Updated file retrieval methods to ensure proper filtering by messageIds, preventing orphaned files from being included. - Introduced comprehensive tests for new thread data extraction functionality, covering edge cases and performance considerations. * fix: improve file retrieval tests and handling of optional properties - Updated tests to safely access optional properties using non-null assertions. - Modified test descriptions for clarity regarding the exclusion of execute_code files. - Ensured that the retrieval logic correctly reflects the expected outcomes for file queries. * test: add comprehensive unit tests for processCodeOutput functionality - Introduced a new test suite for the processCodeOutput function, covering various scenarios including file retrieval, creation, and processing for both image and non-image files. - Implemented mocks for dependencies such as axios, logger, and file models to isolate tests and ensure reliable outcomes. - Validated behavior for existing files, new file creation, and error handling, including size limits and fallback mechanisms. - Enhanced test coverage for metadata handling and usage increment logic, ensuring robust verification of file processing outcomes. * test: enhance file size limit enforcement in processCodeOutput tests - Introduced a configurable file size limit for tests to improve flexibility and coverage. - Mocked the `librechat-data-provider` to allow dynamic adjustment of file size limits during tests. - Updated the file size limit enforcement test to validate behavior when files exceed specified limits, ensuring proper fallback to download URLs. - Reset file size limit after tests to maintain isolation for subsequent test cases.
250 lines
8.3 KiB
JavaScript
250 lines
8.3 KiB
JavaScript
const { logger } = require('@librechat/data-schemas');
|
|
const { EToolResources, FileContext } = require('librechat-data-provider');
|
|
const { File } = require('~/db/models');
|
|
|
|
/**
|
|
* Finds a file by its file_id with additional query options.
|
|
* @param {string} file_id - The unique identifier of the file.
|
|
* @param {object} options - Query options for filtering, projection, etc.
|
|
* @returns {Promise<MongoFile>} A promise that resolves to the file document or null.
|
|
*/
|
|
const findFileById = async (file_id, options = {}) => {
|
|
return await File.findOne({ file_id, ...options }).lean();
|
|
};
|
|
|
|
/**
|
|
* Retrieves files matching a given filter, sorted by the most recently updated.
|
|
* @param {Object} filter - The filter criteria to apply.
|
|
* @param {Object} [_sortOptions] - Optional sort parameters.
|
|
* @param {Object|String} [selectFields={ text: 0 }] - Fields to include/exclude in the query results.
|
|
* Default excludes the 'text' field.
|
|
* @returns {Promise<Array<MongoFile>>} A promise that resolves to an array of file documents.
|
|
*/
|
|
const getFiles = async (filter, _sortOptions, selectFields = { text: 0 }) => {
|
|
const sortOptions = { updatedAt: -1, ..._sortOptions };
|
|
return await File.find(filter).select(selectFields).sort(sortOptions).lean();
|
|
};
|
|
|
|
/**
|
|
* Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs.
|
|
* Note: execute_code files are handled separately by getCodeGeneratedFiles.
|
|
* @param {string[]} fileIds - Array of file_id strings to search for
|
|
* @param {Set<EToolResources>} toolResourceSet - Optional filter for tool resources
|
|
* @returns {Promise<Array<MongoFile>>} Files that match the criteria
|
|
*/
|
|
const getToolFilesByIds = async (fileIds, toolResourceSet) => {
|
|
if (!fileIds || !fileIds.length || !toolResourceSet?.size) {
|
|
return [];
|
|
}
|
|
|
|
try {
|
|
const orConditions = [];
|
|
|
|
if (toolResourceSet.has(EToolResources.context)) {
|
|
orConditions.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
|
|
}
|
|
if (toolResourceSet.has(EToolResources.file_search)) {
|
|
orConditions.push({ embedded: true });
|
|
}
|
|
|
|
if (orConditions.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const filter = {
|
|
file_id: { $in: fileIds },
|
|
context: { $ne: FileContext.execute_code }, // Exclude code-generated files
|
|
$or: orConditions,
|
|
};
|
|
|
|
const selectFields = { text: 0 };
|
|
const sortOptions = { updatedAt: -1 };
|
|
|
|
return await getFiles(filter, sortOptions, selectFields);
|
|
} catch (error) {
|
|
logger.error('[getToolFilesByIds] Error retrieving tool files:', error);
|
|
throw new Error('Error retrieving tool files');
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Retrieves files generated by code execution for a given conversation.
|
|
* These files are stored locally with fileIdentifier metadata for code env re-upload.
|
|
* @param {string} conversationId - The conversation ID to search for
|
|
* @param {string[]} [messageIds] - Optional array of messageIds to filter by (for linear thread filtering)
|
|
* @returns {Promise<Array<MongoFile>>} Files generated by code execution in the conversation
|
|
*/
|
|
const getCodeGeneratedFiles = async (conversationId, messageIds) => {
|
|
if (!conversationId) {
|
|
return [];
|
|
}
|
|
|
|
/** messageIds are required for proper thread filtering of code-generated files */
|
|
if (!messageIds || messageIds.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
try {
|
|
const filter = {
|
|
conversationId,
|
|
context: FileContext.execute_code,
|
|
messageId: { $exists: true, $in: messageIds },
|
|
'metadata.fileIdentifier': { $exists: true },
|
|
};
|
|
|
|
const selectFields = { text: 0 };
|
|
const sortOptions = { createdAt: 1 };
|
|
|
|
return await getFiles(filter, sortOptions, selectFields);
|
|
} catch (error) {
|
|
logger.error('[getCodeGeneratedFiles] Error retrieving code generated files:', error);
|
|
return [];
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Retrieves user-uploaded execute_code files (not code-generated) by their file IDs.
|
|
* These are files with fileIdentifier metadata but context is NOT execute_code (e.g., agents or message_attachment).
|
|
* File IDs should be collected from message.files arrays in the current thread.
|
|
* @param {string[]} fileIds - Array of file IDs to fetch (from message.files in the thread)
|
|
* @returns {Promise<Array<MongoFile>>} User-uploaded execute_code files
|
|
*/
|
|
const getUserCodeFiles = async (fileIds) => {
|
|
if (!fileIds || fileIds.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
try {
|
|
const filter = {
|
|
file_id: { $in: fileIds },
|
|
context: { $ne: FileContext.execute_code },
|
|
'metadata.fileIdentifier': { $exists: true },
|
|
};
|
|
|
|
const selectFields = { text: 0 };
|
|
const sortOptions = { createdAt: 1 };
|
|
|
|
return await getFiles(filter, sortOptions, selectFields);
|
|
} catch (error) {
|
|
logger.error('[getUserCodeFiles] Error retrieving user code files:', error);
|
|
return [];
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Creates a new file with a TTL of 1 hour.
|
|
* @param {MongoFile} data - The file data to be created, must contain file_id.
|
|
* @param {boolean} disableTTL - Whether to disable the TTL.
|
|
* @returns {Promise<MongoFile>} A promise that resolves to the created file document.
|
|
*/
|
|
const createFile = async (data, disableTTL) => {
|
|
const fileData = {
|
|
...data,
|
|
expiresAt: new Date(Date.now() + 3600 * 1000),
|
|
};
|
|
|
|
if (disableTTL) {
|
|
delete fileData.expiresAt;
|
|
}
|
|
|
|
return await File.findOneAndUpdate({ file_id: data.file_id }, fileData, {
|
|
new: true,
|
|
upsert: true,
|
|
}).lean();
|
|
};
|
|
|
|
/**
|
|
* Updates a file identified by file_id with new data and removes the TTL.
|
|
* @param {MongoFile} data - The data to update, must contain file_id.
|
|
* @returns {Promise<MongoFile>} A promise that resolves to the updated file document.
|
|
*/
|
|
const updateFile = async (data) => {
|
|
const { file_id, ...update } = data;
|
|
const updateOperation = {
|
|
$set: update,
|
|
$unset: { expiresAt: '' }, // Remove the expiresAt field to prevent TTL
|
|
};
|
|
return await File.findOneAndUpdate({ file_id }, updateOperation, { new: true }).lean();
|
|
};
|
|
|
|
/**
|
|
* Increments the usage of a file identified by file_id.
|
|
* @param {MongoFile} data - The data to update, must contain file_id and the increment value for usage.
|
|
* @returns {Promise<MongoFile>} A promise that resolves to the updated file document.
|
|
*/
|
|
const updateFileUsage = async (data) => {
|
|
const { file_id, inc = 1 } = data;
|
|
const updateOperation = {
|
|
$inc: { usage: inc },
|
|
$unset: { expiresAt: '', temp_file_id: '' },
|
|
};
|
|
return await File.findOneAndUpdate({ file_id }, updateOperation, { new: true }).lean();
|
|
};
|
|
|
|
/**
|
|
* Deletes a file identified by file_id.
|
|
* @param {string} file_id - The unique identifier of the file to delete.
|
|
* @returns {Promise<MongoFile>} A promise that resolves to the deleted file document or null.
|
|
*/
|
|
const deleteFile = async (file_id) => {
|
|
return await File.findOneAndDelete({ file_id }).lean();
|
|
};
|
|
|
|
/**
|
|
* Deletes a file identified by a filter.
|
|
* @param {object} filter - The filter criteria to apply.
|
|
* @returns {Promise<MongoFile>} A promise that resolves to the deleted file document or null.
|
|
*/
|
|
const deleteFileByFilter = async (filter) => {
|
|
return await File.findOneAndDelete(filter).lean();
|
|
};
|
|
|
|
/**
|
|
* Deletes multiple files identified by an array of file_ids.
|
|
* @param {Array<string>} file_ids - The unique identifiers of the files to delete.
|
|
* @returns {Promise<Object>} A promise that resolves to the result of the deletion operation.
|
|
*/
|
|
const deleteFiles = async (file_ids, user) => {
|
|
let deleteQuery = { file_id: { $in: file_ids } };
|
|
if (user) {
|
|
deleteQuery = { user: user };
|
|
}
|
|
return await File.deleteMany(deleteQuery);
|
|
};
|
|
|
|
/**
|
|
* Batch updates files with new signed URLs in MongoDB
|
|
*
|
|
* @param {MongoFile[]} updates - Array of updates in the format { file_id, filepath }
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async function batchUpdateFiles(updates) {
|
|
if (!updates || updates.length === 0) {
|
|
return;
|
|
}
|
|
|
|
const bulkOperations = updates.map((update) => ({
|
|
updateOne: {
|
|
filter: { file_id: update.file_id },
|
|
update: { $set: { filepath: update.filepath } },
|
|
},
|
|
}));
|
|
|
|
const result = await File.bulkWrite(bulkOperations);
|
|
logger.info(`Updated ${result.modifiedCount} files with new S3 URLs`);
|
|
}
|
|
|
|
module.exports = {
|
|
findFileById,
|
|
getFiles,
|
|
getToolFilesByIds,
|
|
getCodeGeneratedFiles,
|
|
getUserCodeFiles,
|
|
createFile,
|
|
updateFile,
|
|
updateFileUsage,
|
|
deleteFile,
|
|
deleteFiles,
|
|
deleteFileByFilter,
|
|
batchUpdateFiles,
|
|
};
|