📋 fix: Agent Resource Deduplication & Sharing Duplicate False Positive (#7835)

* fix: `primeResources` to Prevent Duplicate Files Across Sources

- Added multiple test cases to ensure that the `primeResources` function correctly handles duplicate files from OCR and attachments, including scenarios with shared files, files without IDs, and duplicates within attachments.
- Implemented logic to categorize files into appropriate tool resources while preventing duplicates across different categories.
- Enhanced error handling and ensured that unique files are returned in the final attachments array.

* fix: Update ToolService to handle single OCR tool case (no loaded tool necessary)

* refactor: Add skipVersioning option to updateAgent for isolated updates

- for now, mainly concerns sharing/unsharing of agents

* chore: Update translation for shared agent message in UI
This commit is contained in:
Danny Avila 2025-06-11 14:17:48 -04:00 committed by GitHub
parent cdf42b3a03
commit 13c7ceb918
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 730 additions and 43 deletions

View file

@ -404,6 +404,453 @@ describe('primeResources', () => {
expect(result.attachments?.[0]?.file_id).toBe('ocr-file-1');
expect(result.attachments?.[1]?.file_id).toBe('file1');
});
it('should prevent duplicate files when same file exists in OCR and attachments', async () => {
const sharedFile: TFile = {
user: 'user1',
file_id: 'shared-file-id',
filename: 'document.pdf',
filepath: '/uploads/document.pdf',
object: 'file',
type: 'application/pdf',
bytes: 1024,
embedded: false,
usage: 0,
};
const mockOcrFiles: TFile[] = [sharedFile];
const mockAttachmentFiles: TFile[] = [
sharedFile,
{
user: 'user1',
file_id: 'unique-file',
filename: 'other.txt',
filepath: '/uploads/other.txt',
object: 'file',
type: 'text/plain',
bytes: 256,
embedded: false,
usage: 0,
},
];
mockGetFiles.mockResolvedValue(mockOcrFiles);
const attachments = Promise.resolve(mockAttachmentFiles);
const tool_resources = {
[EToolResources.ocr]: {
file_ids: ['shared-file-id'],
},
};
const result = await primeResources({
req: mockReq,
getFiles: mockGetFiles,
requestFileSet,
attachments,
tool_resources,
});
// Should only have 2 files, not 3 (no duplicate)
expect(result.attachments).toHaveLength(2);
expect(result.attachments?.filter((f) => f?.file_id === 'shared-file-id')).toHaveLength(1);
expect(result.attachments?.find((f) => f?.file_id === 'unique-file')).toBeDefined();
});
it('should still categorize duplicate files for tool_resources', async () => {
const sharedFile: TFile = {
user: 'user1',
file_id: 'shared-file-id',
filename: 'script.py',
filepath: '/uploads/script.py',
object: 'file',
type: 'text/x-python',
bytes: 512,
embedded: false,
usage: 0,
metadata: {
fileIdentifier: 'python-script',
},
};
const mockOcrFiles: TFile[] = [sharedFile];
const mockAttachmentFiles: TFile[] = [sharedFile];
mockGetFiles.mockResolvedValue(mockOcrFiles);
const attachments = Promise.resolve(mockAttachmentFiles);
const tool_resources = {
[EToolResources.ocr]: {
file_ids: ['shared-file-id'],
},
};
const result = await primeResources({
req: mockReq,
getFiles: mockGetFiles,
requestFileSet,
attachments,
tool_resources,
});
// File should appear only once in attachments
expect(result.attachments).toHaveLength(1);
expect(result.attachments?.[0]?.file_id).toBe('shared-file-id');
// But should still be categorized in tool_resources
expect(result.tool_resources?.[EToolResources.execute_code]?.files).toHaveLength(1);
expect(result.tool_resources?.[EToolResources.execute_code]?.files?.[0]?.file_id).toBe(
'shared-file-id',
);
});
it('should handle multiple duplicate files', async () => {
const file1: TFile = {
user: 'user1',
file_id: 'file-1',
filename: 'doc1.pdf',
filepath: '/uploads/doc1.pdf',
object: 'file',
type: 'application/pdf',
bytes: 1024,
embedded: false,
usage: 0,
};
const file2: TFile = {
user: 'user1',
file_id: 'file-2',
filename: 'doc2.pdf',
filepath: '/uploads/doc2.pdf',
object: 'file',
type: 'application/pdf',
bytes: 2048,
embedded: false,
usage: 0,
};
const uniqueFile: TFile = {
user: 'user1',
file_id: 'unique-file',
filename: 'unique.txt',
filepath: '/uploads/unique.txt',
object: 'file',
type: 'text/plain',
bytes: 256,
embedded: false,
usage: 0,
};
const mockOcrFiles: TFile[] = [file1, file2];
const mockAttachmentFiles: TFile[] = [file1, file2, uniqueFile];
mockGetFiles.mockResolvedValue(mockOcrFiles);
const attachments = Promise.resolve(mockAttachmentFiles);
const tool_resources = {
[EToolResources.ocr]: {
file_ids: ['file-1', 'file-2'],
},
};
const result = await primeResources({
req: mockReq,
getFiles: mockGetFiles,
requestFileSet,
attachments,
tool_resources,
});
// Should have 3 files total (2 from OCR + 1 unique from attachments)
expect(result.attachments).toHaveLength(3);
// Each file should appear only once
const fileIds = result.attachments?.map((f) => f?.file_id);
expect(fileIds).toContain('file-1');
expect(fileIds).toContain('file-2');
expect(fileIds).toContain('unique-file');
// Check no duplicates
const uniqueFileIds = new Set(fileIds);
expect(uniqueFileIds.size).toBe(fileIds?.length);
});
it('should handle files without file_id gracefully', async () => {
const fileWithoutId: Partial<TFile> = {
user: 'user1',
filename: 'no-id.txt',
filepath: '/uploads/no-id.txt',
object: 'file',
type: 'text/plain',
bytes: 256,
embedded: false,
usage: 0,
};
const normalFile: TFile = {
user: 'user1',
file_id: 'normal-file',
filename: 'normal.txt',
filepath: '/uploads/normal.txt',
object: 'file',
type: 'text/plain',
bytes: 512,
embedded: false,
usage: 0,
};
const mockOcrFiles: TFile[] = [normalFile];
const mockAttachmentFiles = [fileWithoutId as TFile, normalFile];
mockGetFiles.mockResolvedValue(mockOcrFiles);
const attachments = Promise.resolve(mockAttachmentFiles);
const tool_resources = {
[EToolResources.ocr]: {
file_ids: ['normal-file'],
},
};
const result = await primeResources({
req: mockReq,
getFiles: mockGetFiles,
requestFileSet,
attachments,
tool_resources,
});
// Should include file without ID and one instance of normal file
expect(result.attachments).toHaveLength(2);
expect(result.attachments?.filter((f) => f?.file_id === 'normal-file')).toHaveLength(1);
expect(result.attachments?.some((f) => !f?.file_id)).toBe(true);
});
it('should prevent duplicates from existing tool_resources', async () => {
const existingFile: TFile = {
user: 'user1',
file_id: 'existing-file',
filename: 'existing.py',
filepath: '/uploads/existing.py',
object: 'file',
type: 'text/x-python',
bytes: 512,
embedded: false,
usage: 0,
metadata: {
fileIdentifier: 'python-script',
},
};
const newFile: TFile = {
user: 'user1',
file_id: 'new-file',
filename: 'new.py',
filepath: '/uploads/new.py',
object: 'file',
type: 'text/x-python',
bytes: 256,
embedded: false,
usage: 0,
metadata: {
fileIdentifier: 'python-script',
},
};
const existingToolResources = {
[EToolResources.execute_code]: {
files: [existingFile],
},
};
const attachments = Promise.resolve([existingFile, newFile]);
const result = await primeResources({
req: mockReq,
getFiles: mockGetFiles,
requestFileSet,
attachments,
tool_resources: existingToolResources,
});
// Should only add the new file to attachments
expect(result.attachments).toHaveLength(1);
expect(result.attachments?.[0]?.file_id).toBe('new-file');
// Should not duplicate the existing file in tool_resources
expect(result.tool_resources?.[EToolResources.execute_code]?.files).toHaveLength(2);
const fileIds = result.tool_resources?.[EToolResources.execute_code]?.files?.map(
(f) => f.file_id,
);
expect(fileIds).toEqual(['existing-file', 'new-file']);
});
it('should handle duplicates within attachments array', async () => {
const duplicatedFile: TFile = {
user: 'user1',
file_id: 'dup-file',
filename: 'duplicate.txt',
filepath: '/uploads/duplicate.txt',
object: 'file',
type: 'text/plain',
bytes: 256,
embedded: false,
usage: 0,
};
const uniqueFile: TFile = {
user: 'user1',
file_id: 'unique-file',
filename: 'unique.txt',
filepath: '/uploads/unique.txt',
object: 'file',
type: 'text/plain',
bytes: 128,
embedded: false,
usage: 0,
};
// Same file appears multiple times in attachments
const attachments = Promise.resolve([
duplicatedFile,
duplicatedFile,
uniqueFile,
duplicatedFile,
]);
const result = await primeResources({
req: mockReq,
getFiles: mockGetFiles,
requestFileSet,
attachments,
tool_resources: {},
});
// Should only have 2 unique files
expect(result.attachments).toHaveLength(2);
const fileIds = result.attachments?.map((f) => f?.file_id);
expect(fileIds).toContain('dup-file');
expect(fileIds).toContain('unique-file');
// Verify no duplicates
expect(fileIds?.filter((id) => id === 'dup-file')).toHaveLength(1);
});
it('should prevent duplicates across different tool_resource categories', async () => {
const multiPurposeFile: TFile = {
user: 'user1',
file_id: 'multi-file',
filename: 'data.txt',
filepath: '/uploads/data.txt',
object: 'file',
type: 'text/plain',
bytes: 512,
embedded: true, // Will be categorized as file_search
usage: 0,
};
const existingToolResources = {
[EToolResources.file_search]: {
files: [multiPurposeFile],
},
};
// Try to add the same file again
const attachments = Promise.resolve([multiPurposeFile]);
const result = await primeResources({
req: mockReq,
getFiles: mockGetFiles,
requestFileSet,
attachments,
tool_resources: existingToolResources,
});
// Should not add to attachments (already exists)
expect(result.attachments).toHaveLength(0);
// Should not duplicate in file_search
expect(result.tool_resources?.[EToolResources.file_search]?.files).toHaveLength(1);
expect(result.tool_resources?.[EToolResources.file_search]?.files?.[0]?.file_id).toBe(
'multi-file',
);
});
it('should handle complex scenario with OCR, existing tool_resources, and attachments', async () => {
const ocrFile: TFile = {
user: 'user1',
file_id: 'ocr-file',
filename: 'scan.pdf',
filepath: '/uploads/scan.pdf',
object: 'file',
type: 'application/pdf',
bytes: 2048,
embedded: false,
usage: 0,
};
const existingFile: TFile = {
user: 'user1',
file_id: 'existing-file',
filename: 'code.py',
filepath: '/uploads/code.py',
object: 'file',
type: 'text/x-python',
bytes: 512,
embedded: false,
usage: 0,
metadata: {
fileIdentifier: 'python-script',
},
};
const newFile: TFile = {
user: 'user1',
file_id: 'new-file',
filename: 'image.png',
filepath: '/uploads/image.png',
object: 'file',
type: 'image/png',
bytes: 4096,
embedded: false,
usage: 0,
height: 800,
width: 600,
};
mockGetFiles.mockResolvedValue([ocrFile, existingFile]); // OCR returns both files
const attachments = Promise.resolve([existingFile, ocrFile, newFile]); // Attachments has duplicates
const existingToolResources = {
[EToolResources.ocr]: {
file_ids: ['ocr-file', 'existing-file'],
},
[EToolResources.execute_code]: {
files: [existingFile],
},
};
requestFileSet.add('new-file'); // Only new-file is in request set
const result = await primeResources({
req: mockReq,
getFiles: mockGetFiles,
requestFileSet,
attachments,
tool_resources: existingToolResources,
});
// Should have 3 unique files total
expect(result.attachments).toHaveLength(3);
const attachmentIds = result.attachments?.map((f) => f?.file_id).sort();
expect(attachmentIds).toEqual(['existing-file', 'new-file', 'ocr-file']);
// Check tool_resources
expect(result.tool_resources?.[EToolResources.execute_code]?.files).toHaveLength(1);
expect(result.tool_resources?.[EToolResources.image_edit]?.files).toHaveLength(1);
expect(result.tool_resources?.[EToolResources.image_edit]?.files?.[0]?.file_id).toBe(
'new-file',
);
});
});
describe('error handling', () => {

View file

@ -1,10 +1,17 @@
import { logger } from '@librechat/data-schemas';
import { EModelEndpoint, EToolResources, AgentCapabilities } from 'librechat-data-provider';
import type { AgentToolResources, TFile, AgentBaseResource } from 'librechat-data-provider';
import type { FilterQuery, QueryOptions, ProjectionType } from 'mongoose';
import type { AgentToolResources, TFile } from 'librechat-data-provider';
import type { IMongoFile } from '@librechat/data-schemas';
import type { Request as ServerRequest } from 'express';
/**
* Function type for retrieving files from the database
* @param filter - MongoDB filter query for files
* @param _sortOptions - Sorting options (currently unused)
* @param selectFields - Field selection options
* @returns Promise resolving to array of files
*/
export type TGetFiles = (
filter: FilterQuery<IMongoFile>,
_sortOptions: ProjectionType<IMongoFile> | null | undefined,
@ -12,11 +19,125 @@ export type TGetFiles = (
) => Promise<Array<TFile>>;
/**
* @param params
* @param params.req
* @param params.attachments
* @param params.requestFileSet
* @param params.tool_resources
* Helper function to add a file to a specific tool resource category
* Prevents duplicate files within the same resource category
* @param params - Parameters object
* @param params.file - The file to add to the resource
* @param params.resourceType - The type of tool resource (e.g., execute_code, file_search, image_edit)
* @param params.tool_resources - The agent's tool resources object to update
* @param params.processedResourceFiles - Set tracking processed files per resource type
*/
const addFileToResource = ({
file,
resourceType,
tool_resources,
processedResourceFiles,
}: {
file: TFile;
resourceType: EToolResources;
tool_resources: AgentToolResources;
processedResourceFiles: Set<string>;
}): void => {
if (!file.file_id) {
return;
}
const resourceKey = `${resourceType}:${file.file_id}`;
if (processedResourceFiles.has(resourceKey)) {
return;
}
const resource = tool_resources[resourceType as keyof AgentToolResources] ?? {};
if (!resource.files) {
(tool_resources[resourceType as keyof AgentToolResources] as AgentBaseResource) = {
...resource,
files: [],
};
}
// Check if already exists in the files array
const resourceFiles = tool_resources[resourceType as keyof AgentToolResources]?.files;
const alreadyExists = resourceFiles?.some((f: TFile) => f.file_id === file.file_id);
if (!alreadyExists) {
resourceFiles?.push(file);
processedResourceFiles.add(resourceKey);
}
};
/**
* Categorizes a file into the appropriate tool resource based on its properties
* Files are categorized as:
* - execute_code: Files with fileIdentifier metadata
* - file_search: Files marked as embedded
* - image_edit: Image files in the request file set with dimensions
* @param params - Parameters object
* @param params.file - The file to categorize
* @param params.tool_resources - The agent's tool resources to update
* @param params.requestFileSet - Set of file IDs from the current request
* @param params.processedResourceFiles - Set tracking processed files per resource type
*/
const categorizeFileForToolResources = ({
file,
tool_resources,
requestFileSet,
processedResourceFiles,
}: {
file: TFile;
tool_resources: AgentToolResources;
requestFileSet: Set<string>;
processedResourceFiles: Set<string>;
}): void => {
if (file.metadata?.fileIdentifier) {
addFileToResource({
file,
resourceType: EToolResources.execute_code,
tool_resources,
processedResourceFiles,
});
return;
}
if (file.embedded === true) {
addFileToResource({
file,
resourceType: EToolResources.file_search,
tool_resources,
processedResourceFiles,
});
return;
}
if (
requestFileSet.has(file.file_id) &&
file.type.startsWith('image') &&
file.height &&
file.width
) {
addFileToResource({
file,
resourceType: EToolResources.image_edit,
tool_resources,
processedResourceFiles,
});
}
};
/**
* Primes resources for agent execution by processing attachments and tool resources
* This function:
* 1. Fetches OCR files if OCR is enabled
* 2. Processes attachment files
* 3. Categorizes files into appropriate tool resources
* 4. Prevents duplicate files across all sources
*
* @param params - Parameters object
* @param params.req - Express request object containing app configuration
* @param params.getFiles - Function to retrieve files from database
* @param params.requestFileSet - Set of file IDs from the current request
* @param params.attachments - Promise resolving to array of attachment files
* @param params.tool_resources - Existing tool resources for the agent
* @returns Promise resolving to processed attachments and updated tool resources
*/
export const primeResources = async ({
req,
@ -35,11 +156,48 @@ export const primeResources = async ({
tool_resources: AgentToolResources | undefined;
}> => {
try {
let attachments: Array<TFile | undefined> | undefined;
/**
* Array to collect all unique files that will be returned as attachments
* Files are added from OCR results and attachment promises, with duplicates prevented
*/
const attachments: Array<TFile> = [];
/**
* Set of file IDs already added to the attachments array
* Used to prevent duplicate files from being added multiple times
* Pre-populated with files from non-OCR tool_resources to prevent re-adding them
*/
const attachmentFileIds = new Set<string>();
/**
* Set tracking which files have been added to specific tool resource categories
* Format: "resourceType:fileId" (e.g., "execute_code:file123")
* Prevents the same file from being added multiple times to the same resource
*/
const processedResourceFiles = new Set<string>();
/**
* The agent's tool resources object that will be updated with categorized files
* Initialized from input parameter or empty object if not provided
*/
const tool_resources = _tool_resources ?? {};
// Track existing files in tool_resources to prevent duplicates within resources
for (const [resourceType, resource] of Object.entries(tool_resources)) {
if (resource?.files && Array.isArray(resource.files)) {
for (const file of resource.files) {
if (file?.file_id) {
processedResourceFiles.add(`${resourceType}:${file.file_id}`);
// Files from non-OCR resources should not be added to attachments from _attachments
if (resourceType !== EToolResources.ocr) {
attachmentFileIds.add(file.file_id);
}
}
}
}
}
const isOCREnabled = (req.app.locals?.[EModelEndpoint.agents]?.capabilities ?? []).includes(
AgentCapabilities.ocr,
);
if (tool_resources[EToolResources.ocr]?.file_ids && isOCREnabled) {
const context = await getFiles(
{
@ -48,48 +206,58 @@ export const primeResources = async ({
{},
{},
);
attachments = (attachments ?? []).concat(context);
for (const file of context) {
if (!file?.file_id) {
continue;
}
// Clear from attachmentFileIds if it was pre-added
attachmentFileIds.delete(file.file_id);
// Add to attachments
attachments.push(file);
attachmentFileIds.add(file.file_id);
// Categorize for tool resources
categorizeFileForToolResources({
file,
tool_resources,
requestFileSet,
processedResourceFiles,
});
}
}
if (!_attachments) {
return { attachments, tool_resources };
return { attachments: attachments.length > 0 ? attachments : undefined, tool_resources };
}
const files = await _attachments;
if (!attachments) {
attachments = [];
}
for (const file of files) {
if (!file) {
continue;
}
if (file.metadata?.fileIdentifier) {
const execute_code = tool_resources[EToolResources.execute_code] ?? {};
if (!execute_code.files) {
tool_resources[EToolResources.execute_code] = { ...execute_code, files: [] };
}
tool_resources[EToolResources.execute_code]?.files?.push(file);
} else if (file.embedded === true) {
const file_search = tool_resources[EToolResources.file_search] ?? {};
if (!file_search.files) {
tool_resources[EToolResources.file_search] = { ...file_search, files: [] };
}
tool_resources[EToolResources.file_search]?.files?.push(file);
} else if (
requestFileSet.has(file.file_id) &&
file.type.startsWith('image') &&
file.height &&
file.width
) {
const image_edit = tool_resources[EToolResources.image_edit] ?? {};
if (!image_edit.files) {
tool_resources[EToolResources.image_edit] = { ...image_edit, files: [] };
}
tool_resources[EToolResources.image_edit]?.files?.push(file);
categorizeFileForToolResources({
file,
tool_resources,
requestFileSet,
processedResourceFiles,
});
if (file.file_id && attachmentFileIds.has(file.file_id)) {
continue;
}
attachments.push(file);
if (file.file_id) {
attachmentFileIds.add(file.file_id);
}
}
return { attachments, tool_resources };
return { attachments: attachments.length > 0 ? attachments : [], tool_resources };
} catch (error) {
logger.error('Error priming resources', error);