mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-08 10:34:23 +01:00
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
* feat: Add support for Apache Parquet MIME types - Introduced 'application/x-parquet' to the full MIME types list and code interpreter MIME types list. - Updated application MIME types regex to include 'x-parquet' and 'vnd.apache.parquet'. - Added mapping for '.parquet' files to 'application/x-parquet' in code type mapping, enhancing file format support. * feat: Implement atomic file claiming for code execution outputs - Added a new `claimCodeFile` function to atomically claim a file_id for code execution outputs, preventing duplicates by using a compound key of filename and conversationId. - Updated `processCodeOutput` to utilize the new claiming mechanism, ensuring that concurrent calls for the same filename converge on a single record. - Refactored related tests to validate the new atomic claiming behavior and its impact on file usage tracking and versioning. * fix: Update image file handling to use cache-busting filepath - Modified the `processCodeOutput` function to generate a cache-busting filepath for updated image files, improving browser caching behavior. - Adjusted related tests to reflect the change from versioned filenames to cache-busted filepaths, ensuring accurate validation of image updates. * fix: Update step handler to prevent undefined content for non-tool call types - Modified the condition in useStepHandler to ensure that undefined content is only assigned for specific content types, enhancing the robustness of content handling. * fix: Update bedrockOutputParser to handle maxTokens for adaptive models - Modified the bedrockOutputParser logic to ensure that maxTokens is not set for adaptive models when neither maxTokens nor maxOutputTokens are provided, improving the handling of adaptive thinking configurations. - Updated related tests to reflect these changes, ensuring accurate validation of the output for adaptive models. * chore: Update @librechat/agents to version 3.1.38 in package.json and package-lock.json * fix: Enhance file claiming and error handling in code processing - Updated the `processCodeOutput` function to use a consistent file ID for claiming files, preventing duplicates and improving concurrency handling. - Refactored the `createFileMethods` to include error handling for failed file claims, ensuring robust behavior when claiming files for conversations. - These changes enhance the reliability of file management in the application. * fix: Update adaptive thinking test for Opus 4.6 model - Modified the test for configuring adaptive thinking to reflect that no default maxTokens should be set for the Opus 4.6 model. - Updated assertions to ensure that maxTokens is undefined, aligning with the expected behavior for adaptive models.
388 lines
13 KiB
TypeScript
388 lines
13 KiB
TypeScript
import logger from '../config/winston';
|
|
import { EToolResources, FileContext } from 'librechat-data-provider';
|
|
import type { FilterQuery, SortOrder, Model } from 'mongoose';
|
|
import type { IMongoFile } from '~/types/file';
|
|
|
|
/** Factory function that takes mongoose instance and returns the file methods */
|
|
export function createFileMethods(mongoose: typeof import('mongoose')) {
|
|
/**
|
|
* Finds a file by its file_id with additional query options.
|
|
* @param file_id - The unique identifier of the file
|
|
* @param options - Query options for filtering, projection, etc.
|
|
* @returns A promise that resolves to the file document or null
|
|
*/
|
|
async function findFileById(
|
|
file_id: string,
|
|
options: Record<string, unknown> = {},
|
|
): Promise<IMongoFile | null> {
|
|
const File = mongoose.models.File as Model<IMongoFile>;
|
|
return File.findOne({ file_id, ...options }).lean();
|
|
}
|
|
|
|
/** Select fields for query projection - 0 to exclude, 1 to include */
|
|
type SelectProjection = Record<string, 0 | 1>;
|
|
|
|
/**
|
|
* Retrieves files matching a given filter, sorted by the most recently updated.
|
|
* @param filter - The filter criteria to apply
|
|
* @param _sortOptions - Optional sort parameters
|
|
* @param selectFields - Fields to include/exclude in the query results. Default excludes the 'text' field
|
|
* @param options - Additional query options (userId, agentId for ACL)
|
|
* @returns A promise that resolves to an array of file documents
|
|
*/
|
|
async function getFiles(
|
|
filter: FilterQuery<IMongoFile>,
|
|
_sortOptions?: Record<string, SortOrder> | null,
|
|
selectFields?: SelectProjection | string | null,
|
|
): Promise<IMongoFile[] | null> {
|
|
const File = mongoose.models.File as Model<IMongoFile>;
|
|
const sortOptions = { updatedAt: -1 as SortOrder, ..._sortOptions };
|
|
const query = File.find(filter);
|
|
if (selectFields != null) {
|
|
query.select(selectFields);
|
|
} else {
|
|
query.select({ text: 0 });
|
|
}
|
|
return await query.sort(sortOptions).lean();
|
|
}
|
|
|
|
/**
|
|
* Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs.
|
|
* Note: execute_code files are handled separately by getCodeGeneratedFiles.
|
|
* @param fileIds - Array of file_id strings to search for
|
|
* @param toolResourceSet - Optional filter for tool resources
|
|
* @returns Files that match the criteria
|
|
*/
|
|
async function getToolFilesByIds(
|
|
fileIds: string[],
|
|
toolResourceSet?: Set<EToolResources>,
|
|
): Promise<IMongoFile[]> {
|
|
if (!fileIds || !fileIds.length || !toolResourceSet?.size) {
|
|
return [];
|
|
}
|
|
|
|
try {
|
|
const orConditions: FilterQuery<IMongoFile>[] = [];
|
|
|
|
if (toolResourceSet.has(EToolResources.context)) {
|
|
orConditions.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
|
|
}
|
|
if (toolResourceSet.has(EToolResources.file_search)) {
|
|
orConditions.push({ embedded: true });
|
|
}
|
|
|
|
// If no conditions to match, return empty
|
|
if (orConditions.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const filter: FilterQuery<IMongoFile> = {
|
|
file_id: { $in: fileIds },
|
|
context: { $ne: FileContext.execute_code },
|
|
$or: orConditions,
|
|
};
|
|
|
|
const selectFields: SelectProjection = { text: 0 };
|
|
const sortOptions = { updatedAt: -1 as SortOrder };
|
|
|
|
const results = await getFiles(filter, sortOptions, selectFields);
|
|
return results ?? [];
|
|
} catch (error) {
|
|
logger.error('[getToolFilesByIds] Error retrieving tool files:', error);
|
|
throw new Error('Error retrieving tool files');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Retrieves files generated by code execution for a given conversation.
|
|
* These files are stored locally with fileIdentifier metadata for code env re-upload.
|
|
*
|
|
* @param conversationId - The conversation ID to search for
|
|
* @param messageIds - Array of messageIds to filter by (for linear thread filtering).
|
|
* While technically optional, this function returns empty if not provided.
|
|
* This is intentional: code-generated files must be filtered by thread to avoid
|
|
* including files from other branches of a conversation.
|
|
* @returns Files generated by code execution in the conversation, filtered by messageIds
|
|
*/
|
|
async function getCodeGeneratedFiles(
|
|
conversationId: string,
|
|
messageIds?: string[],
|
|
): Promise<IMongoFile[]> {
|
|
if (!conversationId) {
|
|
return [];
|
|
}
|
|
|
|
/**
|
|
* Return early if messageIds not provided - this is intentional behavior.
|
|
* Code-generated files must be filtered by thread messageIds to ensure we only
|
|
* return files relevant to the current conversation branch, not orphaned files
|
|
* from other branches or deleted messages.
|
|
*/
|
|
if (!messageIds || messageIds.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
try {
|
|
const filter: FilterQuery<IMongoFile> = {
|
|
conversationId,
|
|
context: FileContext.execute_code,
|
|
messageId: { $exists: true, $in: messageIds },
|
|
'metadata.fileIdentifier': { $exists: true },
|
|
};
|
|
|
|
const selectFields: SelectProjection = { text: 0 };
|
|
const sortOptions = { createdAt: 1 as SortOrder };
|
|
|
|
const results = await getFiles(filter, sortOptions, selectFields);
|
|
return results ?? [];
|
|
} catch (error) {
|
|
logger.error('[getCodeGeneratedFiles] Error retrieving code generated files:', error);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Retrieves user-uploaded execute_code files (not code-generated) by their file IDs.
|
|
* These are files with fileIdentifier metadata but context is NOT execute_code (e.g., agents or message_attachment).
|
|
* File IDs should be collected from message.files arrays in the current thread.
|
|
* @param fileIds - Array of file IDs to fetch (from message.files in the thread)
|
|
* @returns User-uploaded execute_code files
|
|
*/
|
|
async function getUserCodeFiles(fileIds?: string[]): Promise<IMongoFile[]> {
|
|
if (!fileIds || fileIds.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
try {
|
|
const filter: FilterQuery<IMongoFile> = {
|
|
file_id: { $in: fileIds },
|
|
context: { $ne: FileContext.execute_code },
|
|
'metadata.fileIdentifier': { $exists: true },
|
|
};
|
|
|
|
const selectFields: SelectProjection = { text: 0 };
|
|
const sortOptions = { createdAt: 1 as SortOrder };
|
|
|
|
const results = await getFiles(filter, sortOptions, selectFields);
|
|
return results ?? [];
|
|
} catch (error) {
|
|
logger.error('[getUserCodeFiles] Error retrieving user code files:', error);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Atomically claims a file_id for a code-execution output by compound key.
|
|
* Uses $setOnInsert so concurrent calls for the same (filename, conversationId)
|
|
* converge on a single record instead of creating duplicates.
|
|
*/
|
|
async function claimCodeFile(data: {
|
|
filename: string;
|
|
conversationId: string;
|
|
file_id: string;
|
|
user: string;
|
|
}): Promise<IMongoFile> {
|
|
const File = mongoose.models.File as Model<IMongoFile>;
|
|
const result = await File.findOneAndUpdate(
|
|
{
|
|
filename: data.filename,
|
|
conversationId: data.conversationId,
|
|
context: FileContext.execute_code,
|
|
},
|
|
{ $setOnInsert: { file_id: data.file_id, user: data.user } },
|
|
{ upsert: true, new: true },
|
|
).lean();
|
|
if (!result) {
|
|
throw new Error(
|
|
`[claimCodeFile] Failed to claim file "${data.filename}" for conversation ${data.conversationId}`,
|
|
);
|
|
}
|
|
return result as IMongoFile;
|
|
}
|
|
|
|
/**
|
|
* Creates a new file with a TTL of 1 hour.
|
|
* @param data - The file data to be created, must contain file_id
|
|
* @param disableTTL - Whether to disable the TTL
|
|
* @returns A promise that resolves to the created file document
|
|
*/
|
|
async function createFile(
|
|
data: Partial<IMongoFile>,
|
|
disableTTL?: boolean,
|
|
): Promise<IMongoFile | null> {
|
|
const File = mongoose.models.File as Model<IMongoFile>;
|
|
const fileData: Partial<IMongoFile> = {
|
|
...data,
|
|
expiresAt: new Date(Date.now() + 3600 * 1000),
|
|
};
|
|
|
|
if (disableTTL) {
|
|
delete fileData.expiresAt;
|
|
}
|
|
|
|
return File.findOneAndUpdate({ file_id: data.file_id }, fileData, {
|
|
new: true,
|
|
upsert: true,
|
|
}).lean();
|
|
}
|
|
|
|
/**
|
|
* Updates a file identified by file_id with new data and removes the TTL.
|
|
* @param data - The data to update, must contain file_id
|
|
* @returns A promise that resolves to the updated file document
|
|
*/
|
|
async function updateFile(
|
|
data: Partial<IMongoFile> & { file_id: string },
|
|
): Promise<IMongoFile | null> {
|
|
const File = mongoose.models.File as Model<IMongoFile>;
|
|
const { file_id, ...update } = data;
|
|
const updateOperation = {
|
|
$set: update,
|
|
$unset: { expiresAt: '' },
|
|
};
|
|
return File.findOneAndUpdate({ file_id }, updateOperation, {
|
|
new: true,
|
|
}).lean();
|
|
}
|
|
|
|
/**
|
|
* Increments the usage of a file identified by file_id.
|
|
* @param data - The data to update, must contain file_id and the increment value for usage
|
|
* @returns A promise that resolves to the updated file document
|
|
*/
|
|
async function updateFileUsage(data: {
|
|
file_id: string;
|
|
inc?: number;
|
|
}): Promise<IMongoFile | null> {
|
|
const File = mongoose.models.File as Model<IMongoFile>;
|
|
const { file_id, inc = 1 } = data;
|
|
const updateOperation = {
|
|
$inc: { usage: inc },
|
|
$unset: { expiresAt: '', temp_file_id: '' },
|
|
};
|
|
return File.findOneAndUpdate({ file_id }, updateOperation, {
|
|
new: true,
|
|
}).lean();
|
|
}
|
|
|
|
/**
|
|
* Deletes a file identified by file_id.
|
|
* @param file_id - The unique identifier of the file to delete
|
|
* @returns A promise that resolves to the deleted file document or null
|
|
*/
|
|
async function deleteFile(file_id: string): Promise<IMongoFile | null> {
|
|
const File = mongoose.models.File as Model<IMongoFile>;
|
|
return File.findOneAndDelete({ file_id }).lean();
|
|
}
|
|
|
|
/**
|
|
* Deletes a file identified by a filter.
|
|
* @param filter - The filter criteria to apply
|
|
* @returns A promise that resolves to the deleted file document or null
|
|
*/
|
|
async function deleteFileByFilter(filter: FilterQuery<IMongoFile>): Promise<IMongoFile | null> {
|
|
const File = mongoose.models.File as Model<IMongoFile>;
|
|
return File.findOneAndDelete(filter).lean();
|
|
}
|
|
|
|
/**
|
|
* Deletes multiple files identified by an array of file_ids.
|
|
* @param file_ids - The unique identifiers of the files to delete
|
|
* @param user - Optional user ID to filter by
|
|
* @returns A promise that resolves to the result of the deletion operation
|
|
*/
|
|
async function deleteFiles(
|
|
file_ids: string[],
|
|
user?: string,
|
|
): Promise<{ deletedCount?: number }> {
|
|
const File = mongoose.models.File as Model<IMongoFile>;
|
|
let deleteQuery: FilterQuery<IMongoFile> = { file_id: { $in: file_ids } };
|
|
if (user) {
|
|
deleteQuery = { user: user };
|
|
}
|
|
return File.deleteMany(deleteQuery);
|
|
}
|
|
|
|
/**
|
|
* Batch updates files with new signed URLs in MongoDB
|
|
* @param updates - Array of updates in the format { file_id, filepath }
|
|
*/
|
|
async function batchUpdateFiles(
|
|
updates: Array<{ file_id: string; filepath: string }>,
|
|
): Promise<void> {
|
|
if (!updates || updates.length === 0) {
|
|
return;
|
|
}
|
|
|
|
const File = mongoose.models.File as Model<IMongoFile>;
|
|
const bulkOperations = updates.map((update) => ({
|
|
updateOne: {
|
|
filter: { file_id: update.file_id },
|
|
update: { $set: { filepath: update.filepath } },
|
|
},
|
|
}));
|
|
|
|
const result = await File.bulkWrite(bulkOperations);
|
|
logger.info(`Updated ${result.modifiedCount} files with new S3 URLs`);
|
|
}
|
|
|
|
/**
|
|
* Updates usage tracking for multiple files.
|
|
* Processes files and optional fileIds, updating their usage count in the database.
|
|
*
|
|
* @param files - Array of file objects to process
|
|
* @param fileIds - Optional array of file IDs to process
|
|
* @returns Array of updated file documents (with null results filtered out)
|
|
*/
|
|
async function updateFilesUsage(
|
|
files: Array<{ file_id: string }>,
|
|
fileIds?: string[],
|
|
): Promise<IMongoFile[]> {
|
|
const promises: Promise<IMongoFile | null>[] = [];
|
|
const seen = new Set<string>();
|
|
|
|
for (const file of files) {
|
|
const { file_id } = file;
|
|
if (seen.has(file_id)) {
|
|
continue;
|
|
}
|
|
seen.add(file_id);
|
|
promises.push(updateFileUsage({ file_id }));
|
|
}
|
|
|
|
if (!fileIds) {
|
|
const results = await Promise.all(promises);
|
|
return results.filter((result): result is IMongoFile => result != null);
|
|
}
|
|
|
|
for (const file_id of fileIds) {
|
|
if (seen.has(file_id)) {
|
|
continue;
|
|
}
|
|
seen.add(file_id);
|
|
promises.push(updateFileUsage({ file_id }));
|
|
}
|
|
|
|
const results = await Promise.all(promises);
|
|
return results.filter((result): result is IMongoFile => result != null);
|
|
}
|
|
|
|
return {
|
|
findFileById,
|
|
getFiles,
|
|
getToolFilesByIds,
|
|
getCodeGeneratedFiles,
|
|
getUserCodeFiles,
|
|
claimCodeFile,
|
|
createFile,
|
|
updateFile,
|
|
updateFileUsage,
|
|
deleteFile,
|
|
deleteFiles,
|
|
deleteFileByFilter,
|
|
batchUpdateFiles,
|
|
updateFilesUsage,
|
|
};
|
|
}
|
|
|
|
export type FileMethods = ReturnType<typeof createFileMethods>;
|