mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-22 08:12:00 +02:00
📋 fix: Agent Resource Deduplication & Sharing Duplicate False Positive (#7835)
* fix: `primeResources` to Prevent Duplicate Files Across Sources - Added multiple test cases to ensure that the `primeResources` function correctly handles duplicate files from OCR and attachments, including scenarios with shared files, files without IDs, and duplicates within attachments. - Implemented logic to categorize files into appropriate tool resources while preventing duplicates across different categories. - Enhanced error handling and ensured that unique files are returned in the final attachments array. * fix: Update ToolService to handle single OCR tool case (no loaded tool necessary) * refactor: Add skipVersioning option to updateAgent for isolated updates - for now, mainly concerns sharing/unsharing of agents * chore: Update translation for shared agent message in UI
This commit is contained in:
parent
cdf42b3a03
commit
13c7ceb918
7 changed files with 730 additions and 43 deletions
|
@ -259,11 +259,12 @@ const isDuplicateVersion = (updateData, currentData, versions, actionsHash = nul
|
|||
* @param {Object} [options] - Optional configuration object.
|
||||
* @param {string} [options.updatingUserId] - The ID of the user performing the update (used for tracking non-author updates).
|
||||
* @param {boolean} [options.forceVersion] - Force creation of a new version even if no fields changed.
|
||||
* @param {boolean} [options.skipVersioning] - Skip version creation entirely (useful for isolated operations like sharing).
|
||||
* @returns {Promise<Agent>} The updated or newly created agent document as a plain object.
|
||||
* @throws {Error} If the update would create a duplicate version
|
||||
*/
|
||||
const updateAgent = async (searchParameter, updateData, options = {}) => {
|
||||
const { updatingUserId = null, forceVersion = false } = options;
|
||||
const { updatingUserId = null, forceVersion = false, skipVersioning = false } = options;
|
||||
const mongoOptions = { new: true, upsert: false };
|
||||
|
||||
const currentAgent = await Agent.findOne(searchParameter);
|
||||
|
@ -300,7 +301,8 @@ const updateAgent = async (searchParameter, updateData, options = {}) => {
|
|||
}
|
||||
|
||||
const shouldCreateVersion =
|
||||
forceVersion || Object.keys(directUpdates).length > 0 || $push || $pull || $addToSet;
|
||||
!skipVersioning &&
|
||||
(forceVersion || Object.keys(directUpdates).length > 0 || $push || $pull || $addToSet);
|
||||
|
||||
if (shouldCreateVersion) {
|
||||
const duplicateVersion = isDuplicateVersion(updateData, versionData, versions, actionsHash);
|
||||
|
@ -335,7 +337,7 @@ const updateAgent = async (searchParameter, updateData, options = {}) => {
|
|||
versionEntry.updatedBy = new mongoose.Types.ObjectId(updatingUserId);
|
||||
}
|
||||
|
||||
if (shouldCreateVersion || forceVersion) {
|
||||
if (shouldCreateVersion) {
|
||||
updateData.$push = {
|
||||
...($push || {}),
|
||||
versions: versionEntry,
|
||||
|
@ -546,7 +548,10 @@ const updateAgentProjects = async ({ user, agentId, projectIds, removeProjectIds
|
|||
delete updateQuery.author;
|
||||
}
|
||||
|
||||
const updatedAgent = await updateAgent(updateQuery, updateOps, { updatingUserId: user.id });
|
||||
const updatedAgent = await updateAgent(updateQuery, updateOps, {
|
||||
updatingUserId: user.id,
|
||||
skipVersioning: true,
|
||||
});
|
||||
if (updatedAgent) {
|
||||
return updatedAgent;
|
||||
}
|
||||
|
|
|
@ -2447,6 +2447,65 @@ describe('models/Agent', () => {
|
|||
expect(updated2.description).toBe('Another description');
|
||||
});
|
||||
|
||||
test('should skip version creation when skipVersioning option is used', async () => {
|
||||
const agentId = `agent_${uuidv4()}`;
|
||||
const authorId = new mongoose.Types.ObjectId();
|
||||
const projectId1 = new mongoose.Types.ObjectId();
|
||||
const projectId2 = new mongoose.Types.ObjectId();
|
||||
|
||||
// Create agent with initial projectIds
|
||||
await createAgent({
|
||||
id: agentId,
|
||||
name: 'Test Agent',
|
||||
provider: 'test',
|
||||
model: 'test-model',
|
||||
author: authorId,
|
||||
projectIds: [projectId1],
|
||||
});
|
||||
|
||||
// Share agent using updateAgentProjects (which uses skipVersioning)
|
||||
const shared = await updateAgentProjects({
|
||||
user: { id: authorId.toString() }, // Use the same author ID
|
||||
agentId: agentId,
|
||||
projectIds: [projectId2.toString()],
|
||||
});
|
||||
|
||||
// Should NOT create a new version due to skipVersioning
|
||||
expect(shared.versions).toHaveLength(1);
|
||||
expect(shared.projectIds.map((id) => id.toString())).toContain(projectId1.toString());
|
||||
expect(shared.projectIds.map((id) => id.toString())).toContain(projectId2.toString());
|
||||
|
||||
// Unshare agent using updateAgentProjects
|
||||
const unshared = await updateAgentProjects({
|
||||
user: { id: authorId.toString() },
|
||||
agentId: agentId,
|
||||
removeProjectIds: [projectId1.toString()],
|
||||
});
|
||||
|
||||
// Still should NOT create a new version
|
||||
expect(unshared.versions).toHaveLength(1);
|
||||
expect(unshared.projectIds.map((id) => id.toString())).not.toContain(projectId1.toString());
|
||||
expect(unshared.projectIds.map((id) => id.toString())).toContain(projectId2.toString());
|
||||
|
||||
// Regular update without skipVersioning should create a version
|
||||
const regularUpdate = await updateAgent(
|
||||
{ id: agentId },
|
||||
{ description: 'Updated description' },
|
||||
);
|
||||
|
||||
expect(regularUpdate.versions).toHaveLength(2);
|
||||
expect(regularUpdate.description).toBe('Updated description');
|
||||
|
||||
// Direct updateAgent with MongoDB operators should still create versions
|
||||
const directUpdate = await updateAgent(
|
||||
{ id: agentId },
|
||||
{ $addToSet: { projectIds: { $each: [projectId1] } } },
|
||||
);
|
||||
|
||||
expect(directUpdate.versions).toHaveLength(3);
|
||||
expect(directUpdate.projectIds.length).toBe(2);
|
||||
});
|
||||
|
||||
test('should preserve agent_ids in version history', async () => {
|
||||
const agentId = `agent_${uuidv4()}`;
|
||||
const authorId = new mongoose.Types.ObjectId();
|
||||
|
|
|
@ -169,12 +169,18 @@ const updateAgentHandler = async (req, res) => {
|
|||
});
|
||||
}
|
||||
|
||||
/** @type {boolean} */
|
||||
const isProjectUpdate = (projectIds?.length ?? 0) > 0 || (removeProjectIds?.length ?? 0) > 0;
|
||||
|
||||
let updatedAgent =
|
||||
Object.keys(updateData).length > 0
|
||||
? await updateAgent({ id }, updateData, { updatingUserId: req.user.id })
|
||||
? await updateAgent({ id }, updateData, {
|
||||
updatingUserId: req.user.id,
|
||||
skipVersioning: isProjectUpdate,
|
||||
})
|
||||
: existingAgent;
|
||||
|
||||
if (projectIds || removeProjectIds) {
|
||||
if (isProjectUpdate) {
|
||||
updatedAgent = await updateAgentProjects({
|
||||
user: req.user,
|
||||
agentId: id,
|
||||
|
|
|
@ -500,6 +500,8 @@ async function processRequiredActions(client, requiredActions) {
|
|||
async function loadAgentTools({ req, res, agent, tool_resources, openAIApiKey }) {
|
||||
if (!agent.tools || agent.tools.length === 0) {
|
||||
return {};
|
||||
} else if (agent.tools && agent.tools.length === 1 && agent.tools[0] === AgentCapabilities.ocr) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const endpointsConfig = await getEndpointsConfig(req);
|
||||
|
|
|
@ -517,7 +517,7 @@
|
|||
"com_ui_agent_editing_allowed": "Other users can already edit this agent",
|
||||
"com_ui_agent_recursion_limit": "Max Agent Steps",
|
||||
"com_ui_agent_recursion_limit_info": "Limits how many steps the agent can take in a run before giving a final response. Default is 25 steps. A step is either an AI API request or a tool usage round. For example, a basic tool interaction takes 3 steps: initial request, tool usage, and follow-up request.",
|
||||
"com_ui_agent_shared_to_all": "something needs to go here. was empty",
|
||||
"com_ui_agent_shared_to_all": "Agent is shared to all users",
|
||||
"com_ui_agent_var": "{{0}} agent",
|
||||
"com_ui_agent_version": "Version",
|
||||
"com_ui_agent_version_active": "Active Version",
|
||||
|
|
|
@ -404,6 +404,453 @@ describe('primeResources', () => {
|
|||
expect(result.attachments?.[0]?.file_id).toBe('ocr-file-1');
|
||||
expect(result.attachments?.[1]?.file_id).toBe('file1');
|
||||
});
|
||||
|
||||
it('should prevent duplicate files when same file exists in OCR and attachments', async () => {
|
||||
const sharedFile: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'shared-file-id',
|
||||
filename: 'document.pdf',
|
||||
filepath: '/uploads/document.pdf',
|
||||
object: 'file',
|
||||
type: 'application/pdf',
|
||||
bytes: 1024,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
};
|
||||
|
||||
const mockOcrFiles: TFile[] = [sharedFile];
|
||||
const mockAttachmentFiles: TFile[] = [
|
||||
sharedFile,
|
||||
{
|
||||
user: 'user1',
|
||||
file_id: 'unique-file',
|
||||
filename: 'other.txt',
|
||||
filepath: '/uploads/other.txt',
|
||||
object: 'file',
|
||||
type: 'text/plain',
|
||||
bytes: 256,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
},
|
||||
];
|
||||
|
||||
mockGetFiles.mockResolvedValue(mockOcrFiles);
|
||||
const attachments = Promise.resolve(mockAttachmentFiles);
|
||||
|
||||
const tool_resources = {
|
||||
[EToolResources.ocr]: {
|
||||
file_ids: ['shared-file-id'],
|
||||
},
|
||||
};
|
||||
|
||||
const result = await primeResources({
|
||||
req: mockReq,
|
||||
getFiles: mockGetFiles,
|
||||
requestFileSet,
|
||||
attachments,
|
||||
tool_resources,
|
||||
});
|
||||
|
||||
// Should only have 2 files, not 3 (no duplicate)
|
||||
expect(result.attachments).toHaveLength(2);
|
||||
expect(result.attachments?.filter((f) => f?.file_id === 'shared-file-id')).toHaveLength(1);
|
||||
expect(result.attachments?.find((f) => f?.file_id === 'unique-file')).toBeDefined();
|
||||
});
|
||||
|
||||
it('should still categorize duplicate files for tool_resources', async () => {
|
||||
const sharedFile: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'shared-file-id',
|
||||
filename: 'script.py',
|
||||
filepath: '/uploads/script.py',
|
||||
object: 'file',
|
||||
type: 'text/x-python',
|
||||
bytes: 512,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
metadata: {
|
||||
fileIdentifier: 'python-script',
|
||||
},
|
||||
};
|
||||
|
||||
const mockOcrFiles: TFile[] = [sharedFile];
|
||||
const mockAttachmentFiles: TFile[] = [sharedFile];
|
||||
|
||||
mockGetFiles.mockResolvedValue(mockOcrFiles);
|
||||
const attachments = Promise.resolve(mockAttachmentFiles);
|
||||
|
||||
const tool_resources = {
|
||||
[EToolResources.ocr]: {
|
||||
file_ids: ['shared-file-id'],
|
||||
},
|
||||
};
|
||||
|
||||
const result = await primeResources({
|
||||
req: mockReq,
|
||||
getFiles: mockGetFiles,
|
||||
requestFileSet,
|
||||
attachments,
|
||||
tool_resources,
|
||||
});
|
||||
|
||||
// File should appear only once in attachments
|
||||
expect(result.attachments).toHaveLength(1);
|
||||
expect(result.attachments?.[0]?.file_id).toBe('shared-file-id');
|
||||
|
||||
// But should still be categorized in tool_resources
|
||||
expect(result.tool_resources?.[EToolResources.execute_code]?.files).toHaveLength(1);
|
||||
expect(result.tool_resources?.[EToolResources.execute_code]?.files?.[0]?.file_id).toBe(
|
||||
'shared-file-id',
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle multiple duplicate files', async () => {
|
||||
const file1: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'file-1',
|
||||
filename: 'doc1.pdf',
|
||||
filepath: '/uploads/doc1.pdf',
|
||||
object: 'file',
|
||||
type: 'application/pdf',
|
||||
bytes: 1024,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
};
|
||||
|
||||
const file2: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'file-2',
|
||||
filename: 'doc2.pdf',
|
||||
filepath: '/uploads/doc2.pdf',
|
||||
object: 'file',
|
||||
type: 'application/pdf',
|
||||
bytes: 2048,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
};
|
||||
|
||||
const uniqueFile: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'unique-file',
|
||||
filename: 'unique.txt',
|
||||
filepath: '/uploads/unique.txt',
|
||||
object: 'file',
|
||||
type: 'text/plain',
|
||||
bytes: 256,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
};
|
||||
|
||||
const mockOcrFiles: TFile[] = [file1, file2];
|
||||
const mockAttachmentFiles: TFile[] = [file1, file2, uniqueFile];
|
||||
|
||||
mockGetFiles.mockResolvedValue(mockOcrFiles);
|
||||
const attachments = Promise.resolve(mockAttachmentFiles);
|
||||
|
||||
const tool_resources = {
|
||||
[EToolResources.ocr]: {
|
||||
file_ids: ['file-1', 'file-2'],
|
||||
},
|
||||
};
|
||||
|
||||
const result = await primeResources({
|
||||
req: mockReq,
|
||||
getFiles: mockGetFiles,
|
||||
requestFileSet,
|
||||
attachments,
|
||||
tool_resources,
|
||||
});
|
||||
|
||||
// Should have 3 files total (2 from OCR + 1 unique from attachments)
|
||||
expect(result.attachments).toHaveLength(3);
|
||||
|
||||
// Each file should appear only once
|
||||
const fileIds = result.attachments?.map((f) => f?.file_id);
|
||||
expect(fileIds).toContain('file-1');
|
||||
expect(fileIds).toContain('file-2');
|
||||
expect(fileIds).toContain('unique-file');
|
||||
|
||||
// Check no duplicates
|
||||
const uniqueFileIds = new Set(fileIds);
|
||||
expect(uniqueFileIds.size).toBe(fileIds?.length);
|
||||
});
|
||||
|
||||
it('should handle files without file_id gracefully', async () => {
|
||||
const fileWithoutId: Partial<TFile> = {
|
||||
user: 'user1',
|
||||
filename: 'no-id.txt',
|
||||
filepath: '/uploads/no-id.txt',
|
||||
object: 'file',
|
||||
type: 'text/plain',
|
||||
bytes: 256,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
};
|
||||
|
||||
const normalFile: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'normal-file',
|
||||
filename: 'normal.txt',
|
||||
filepath: '/uploads/normal.txt',
|
||||
object: 'file',
|
||||
type: 'text/plain',
|
||||
bytes: 512,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
};
|
||||
|
||||
const mockOcrFiles: TFile[] = [normalFile];
|
||||
const mockAttachmentFiles = [fileWithoutId as TFile, normalFile];
|
||||
|
||||
mockGetFiles.mockResolvedValue(mockOcrFiles);
|
||||
const attachments = Promise.resolve(mockAttachmentFiles);
|
||||
|
||||
const tool_resources = {
|
||||
[EToolResources.ocr]: {
|
||||
file_ids: ['normal-file'],
|
||||
},
|
||||
};
|
||||
|
||||
const result = await primeResources({
|
||||
req: mockReq,
|
||||
getFiles: mockGetFiles,
|
||||
requestFileSet,
|
||||
attachments,
|
||||
tool_resources,
|
||||
});
|
||||
|
||||
// Should include file without ID and one instance of normal file
|
||||
expect(result.attachments).toHaveLength(2);
|
||||
expect(result.attachments?.filter((f) => f?.file_id === 'normal-file')).toHaveLength(1);
|
||||
expect(result.attachments?.some((f) => !f?.file_id)).toBe(true);
|
||||
});
|
||||
|
||||
it('should prevent duplicates from existing tool_resources', async () => {
|
||||
const existingFile: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'existing-file',
|
||||
filename: 'existing.py',
|
||||
filepath: '/uploads/existing.py',
|
||||
object: 'file',
|
||||
type: 'text/x-python',
|
||||
bytes: 512,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
metadata: {
|
||||
fileIdentifier: 'python-script',
|
||||
},
|
||||
};
|
||||
|
||||
const newFile: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'new-file',
|
||||
filename: 'new.py',
|
||||
filepath: '/uploads/new.py',
|
||||
object: 'file',
|
||||
type: 'text/x-python',
|
||||
bytes: 256,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
metadata: {
|
||||
fileIdentifier: 'python-script',
|
||||
},
|
||||
};
|
||||
|
||||
const existingToolResources = {
|
||||
[EToolResources.execute_code]: {
|
||||
files: [existingFile],
|
||||
},
|
||||
};
|
||||
|
||||
const attachments = Promise.resolve([existingFile, newFile]);
|
||||
|
||||
const result = await primeResources({
|
||||
req: mockReq,
|
||||
getFiles: mockGetFiles,
|
||||
requestFileSet,
|
||||
attachments,
|
||||
tool_resources: existingToolResources,
|
||||
});
|
||||
|
||||
// Should only add the new file to attachments
|
||||
expect(result.attachments).toHaveLength(1);
|
||||
expect(result.attachments?.[0]?.file_id).toBe('new-file');
|
||||
|
||||
// Should not duplicate the existing file in tool_resources
|
||||
expect(result.tool_resources?.[EToolResources.execute_code]?.files).toHaveLength(2);
|
||||
const fileIds = result.tool_resources?.[EToolResources.execute_code]?.files?.map(
|
||||
(f) => f.file_id,
|
||||
);
|
||||
expect(fileIds).toEqual(['existing-file', 'new-file']);
|
||||
});
|
||||
|
||||
it('should handle duplicates within attachments array', async () => {
|
||||
const duplicatedFile: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'dup-file',
|
||||
filename: 'duplicate.txt',
|
||||
filepath: '/uploads/duplicate.txt',
|
||||
object: 'file',
|
||||
type: 'text/plain',
|
||||
bytes: 256,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
};
|
||||
|
||||
const uniqueFile: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'unique-file',
|
||||
filename: 'unique.txt',
|
||||
filepath: '/uploads/unique.txt',
|
||||
object: 'file',
|
||||
type: 'text/plain',
|
||||
bytes: 128,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
};
|
||||
|
||||
// Same file appears multiple times in attachments
|
||||
const attachments = Promise.resolve([
|
||||
duplicatedFile,
|
||||
duplicatedFile,
|
||||
uniqueFile,
|
||||
duplicatedFile,
|
||||
]);
|
||||
|
||||
const result = await primeResources({
|
||||
req: mockReq,
|
||||
getFiles: mockGetFiles,
|
||||
requestFileSet,
|
||||
attachments,
|
||||
tool_resources: {},
|
||||
});
|
||||
|
||||
// Should only have 2 unique files
|
||||
expect(result.attachments).toHaveLength(2);
|
||||
const fileIds = result.attachments?.map((f) => f?.file_id);
|
||||
expect(fileIds).toContain('dup-file');
|
||||
expect(fileIds).toContain('unique-file');
|
||||
|
||||
// Verify no duplicates
|
||||
expect(fileIds?.filter((id) => id === 'dup-file')).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('should prevent duplicates across different tool_resource categories', async () => {
|
||||
const multiPurposeFile: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'multi-file',
|
||||
filename: 'data.txt',
|
||||
filepath: '/uploads/data.txt',
|
||||
object: 'file',
|
||||
type: 'text/plain',
|
||||
bytes: 512,
|
||||
embedded: true, // Will be categorized as file_search
|
||||
usage: 0,
|
||||
};
|
||||
|
||||
const existingToolResources = {
|
||||
[EToolResources.file_search]: {
|
||||
files: [multiPurposeFile],
|
||||
},
|
||||
};
|
||||
|
||||
// Try to add the same file again
|
||||
const attachments = Promise.resolve([multiPurposeFile]);
|
||||
|
||||
const result = await primeResources({
|
||||
req: mockReq,
|
||||
getFiles: mockGetFiles,
|
||||
requestFileSet,
|
||||
attachments,
|
||||
tool_resources: existingToolResources,
|
||||
});
|
||||
|
||||
// Should not add to attachments (already exists)
|
||||
expect(result.attachments).toHaveLength(0);
|
||||
|
||||
// Should not duplicate in file_search
|
||||
expect(result.tool_resources?.[EToolResources.file_search]?.files).toHaveLength(1);
|
||||
expect(result.tool_resources?.[EToolResources.file_search]?.files?.[0]?.file_id).toBe(
|
||||
'multi-file',
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle complex scenario with OCR, existing tool_resources, and attachments', async () => {
|
||||
const ocrFile: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'ocr-file',
|
||||
filename: 'scan.pdf',
|
||||
filepath: '/uploads/scan.pdf',
|
||||
object: 'file',
|
||||
type: 'application/pdf',
|
||||
bytes: 2048,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
};
|
||||
|
||||
const existingFile: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'existing-file',
|
||||
filename: 'code.py',
|
||||
filepath: '/uploads/code.py',
|
||||
object: 'file',
|
||||
type: 'text/x-python',
|
||||
bytes: 512,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
metadata: {
|
||||
fileIdentifier: 'python-script',
|
||||
},
|
||||
};
|
||||
|
||||
const newFile: TFile = {
|
||||
user: 'user1',
|
||||
file_id: 'new-file',
|
||||
filename: 'image.png',
|
||||
filepath: '/uploads/image.png',
|
||||
object: 'file',
|
||||
type: 'image/png',
|
||||
bytes: 4096,
|
||||
embedded: false,
|
||||
usage: 0,
|
||||
height: 800,
|
||||
width: 600,
|
||||
};
|
||||
|
||||
mockGetFiles.mockResolvedValue([ocrFile, existingFile]); // OCR returns both files
|
||||
const attachments = Promise.resolve([existingFile, ocrFile, newFile]); // Attachments has duplicates
|
||||
|
||||
const existingToolResources = {
|
||||
[EToolResources.ocr]: {
|
||||
file_ids: ['ocr-file', 'existing-file'],
|
||||
},
|
||||
[EToolResources.execute_code]: {
|
||||
files: [existingFile],
|
||||
},
|
||||
};
|
||||
|
||||
requestFileSet.add('new-file'); // Only new-file is in request set
|
||||
|
||||
const result = await primeResources({
|
||||
req: mockReq,
|
||||
getFiles: mockGetFiles,
|
||||
requestFileSet,
|
||||
attachments,
|
||||
tool_resources: existingToolResources,
|
||||
});
|
||||
|
||||
// Should have 3 unique files total
|
||||
expect(result.attachments).toHaveLength(3);
|
||||
const attachmentIds = result.attachments?.map((f) => f?.file_id).sort();
|
||||
expect(attachmentIds).toEqual(['existing-file', 'new-file', 'ocr-file']);
|
||||
|
||||
// Check tool_resources
|
||||
expect(result.tool_resources?.[EToolResources.execute_code]?.files).toHaveLength(1);
|
||||
expect(result.tool_resources?.[EToolResources.image_edit]?.files).toHaveLength(1);
|
||||
expect(result.tool_resources?.[EToolResources.image_edit]?.files?.[0]?.file_id).toBe(
|
||||
'new-file',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('error handling', () => {
|
||||
|
|
|
@ -1,10 +1,17 @@
|
|||
import { logger } from '@librechat/data-schemas';
|
||||
import { EModelEndpoint, EToolResources, AgentCapabilities } from 'librechat-data-provider';
|
||||
import type { AgentToolResources, TFile, AgentBaseResource } from 'librechat-data-provider';
|
||||
import type { FilterQuery, QueryOptions, ProjectionType } from 'mongoose';
|
||||
import type { AgentToolResources, TFile } from 'librechat-data-provider';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import type { Request as ServerRequest } from 'express';
|
||||
|
||||
/**
|
||||
* Function type for retrieving files from the database
|
||||
* @param filter - MongoDB filter query for files
|
||||
* @param _sortOptions - Sorting options (currently unused)
|
||||
* @param selectFields - Field selection options
|
||||
* @returns Promise resolving to array of files
|
||||
*/
|
||||
export type TGetFiles = (
|
||||
filter: FilterQuery<IMongoFile>,
|
||||
_sortOptions: ProjectionType<IMongoFile> | null | undefined,
|
||||
|
@ -12,11 +19,125 @@ export type TGetFiles = (
|
|||
) => Promise<Array<TFile>>;
|
||||
|
||||
/**
|
||||
* @param params
|
||||
* @param params.req
|
||||
* @param params.attachments
|
||||
* @param params.requestFileSet
|
||||
* @param params.tool_resources
|
||||
* Helper function to add a file to a specific tool resource category
|
||||
* Prevents duplicate files within the same resource category
|
||||
* @param params - Parameters object
|
||||
* @param params.file - The file to add to the resource
|
||||
* @param params.resourceType - The type of tool resource (e.g., execute_code, file_search, image_edit)
|
||||
* @param params.tool_resources - The agent's tool resources object to update
|
||||
* @param params.processedResourceFiles - Set tracking processed files per resource type
|
||||
*/
|
||||
const addFileToResource = ({
|
||||
file,
|
||||
resourceType,
|
||||
tool_resources,
|
||||
processedResourceFiles,
|
||||
}: {
|
||||
file: TFile;
|
||||
resourceType: EToolResources;
|
||||
tool_resources: AgentToolResources;
|
||||
processedResourceFiles: Set<string>;
|
||||
}): void => {
|
||||
if (!file.file_id) {
|
||||
return;
|
||||
}
|
||||
|
||||
const resourceKey = `${resourceType}:${file.file_id}`;
|
||||
if (processedResourceFiles.has(resourceKey)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const resource = tool_resources[resourceType as keyof AgentToolResources] ?? {};
|
||||
if (!resource.files) {
|
||||
(tool_resources[resourceType as keyof AgentToolResources] as AgentBaseResource) = {
|
||||
...resource,
|
||||
files: [],
|
||||
};
|
||||
}
|
||||
|
||||
// Check if already exists in the files array
|
||||
const resourceFiles = tool_resources[resourceType as keyof AgentToolResources]?.files;
|
||||
const alreadyExists = resourceFiles?.some((f: TFile) => f.file_id === file.file_id);
|
||||
|
||||
if (!alreadyExists) {
|
||||
resourceFiles?.push(file);
|
||||
processedResourceFiles.add(resourceKey);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Categorizes a file into the appropriate tool resource based on its properties
|
||||
* Files are categorized as:
|
||||
* - execute_code: Files with fileIdentifier metadata
|
||||
* - file_search: Files marked as embedded
|
||||
* - image_edit: Image files in the request file set with dimensions
|
||||
* @param params - Parameters object
|
||||
* @param params.file - The file to categorize
|
||||
* @param params.tool_resources - The agent's tool resources to update
|
||||
* @param params.requestFileSet - Set of file IDs from the current request
|
||||
* @param params.processedResourceFiles - Set tracking processed files per resource type
|
||||
*/
|
||||
const categorizeFileForToolResources = ({
|
||||
file,
|
||||
tool_resources,
|
||||
requestFileSet,
|
||||
processedResourceFiles,
|
||||
}: {
|
||||
file: TFile;
|
||||
tool_resources: AgentToolResources;
|
||||
requestFileSet: Set<string>;
|
||||
processedResourceFiles: Set<string>;
|
||||
}): void => {
|
||||
if (file.metadata?.fileIdentifier) {
|
||||
addFileToResource({
|
||||
file,
|
||||
resourceType: EToolResources.execute_code,
|
||||
tool_resources,
|
||||
processedResourceFiles,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (file.embedded === true) {
|
||||
addFileToResource({
|
||||
file,
|
||||
resourceType: EToolResources.file_search,
|
||||
tool_resources,
|
||||
processedResourceFiles,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
requestFileSet.has(file.file_id) &&
|
||||
file.type.startsWith('image') &&
|
||||
file.height &&
|
||||
file.width
|
||||
) {
|
||||
addFileToResource({
|
||||
file,
|
||||
resourceType: EToolResources.image_edit,
|
||||
tool_resources,
|
||||
processedResourceFiles,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Primes resources for agent execution by processing attachments and tool resources
|
||||
* This function:
|
||||
* 1. Fetches OCR files if OCR is enabled
|
||||
* 2. Processes attachment files
|
||||
* 3. Categorizes files into appropriate tool resources
|
||||
* 4. Prevents duplicate files across all sources
|
||||
*
|
||||
* @param params - Parameters object
|
||||
* @param params.req - Express request object containing app configuration
|
||||
* @param params.getFiles - Function to retrieve files from database
|
||||
* @param params.requestFileSet - Set of file IDs from the current request
|
||||
* @param params.attachments - Promise resolving to array of attachment files
|
||||
* @param params.tool_resources - Existing tool resources for the agent
|
||||
* @returns Promise resolving to processed attachments and updated tool resources
|
||||
*/
|
||||
export const primeResources = async ({
|
||||
req,
|
||||
|
@ -35,11 +156,48 @@ export const primeResources = async ({
|
|||
tool_resources: AgentToolResources | undefined;
|
||||
}> => {
|
||||
try {
|
||||
let attachments: Array<TFile | undefined> | undefined;
|
||||
/**
|
||||
* Array to collect all unique files that will be returned as attachments
|
||||
* Files are added from OCR results and attachment promises, with duplicates prevented
|
||||
*/
|
||||
const attachments: Array<TFile> = [];
|
||||
/**
|
||||
* Set of file IDs already added to the attachments array
|
||||
* Used to prevent duplicate files from being added multiple times
|
||||
* Pre-populated with files from non-OCR tool_resources to prevent re-adding them
|
||||
*/
|
||||
const attachmentFileIds = new Set<string>();
|
||||
/**
|
||||
* Set tracking which files have been added to specific tool resource categories
|
||||
* Format: "resourceType:fileId" (e.g., "execute_code:file123")
|
||||
* Prevents the same file from being added multiple times to the same resource
|
||||
*/
|
||||
const processedResourceFiles = new Set<string>();
|
||||
/**
|
||||
* The agent's tool resources object that will be updated with categorized files
|
||||
* Initialized from input parameter or empty object if not provided
|
||||
*/
|
||||
const tool_resources = _tool_resources ?? {};
|
||||
|
||||
// Track existing files in tool_resources to prevent duplicates within resources
|
||||
for (const [resourceType, resource] of Object.entries(tool_resources)) {
|
||||
if (resource?.files && Array.isArray(resource.files)) {
|
||||
for (const file of resource.files) {
|
||||
if (file?.file_id) {
|
||||
processedResourceFiles.add(`${resourceType}:${file.file_id}`);
|
||||
// Files from non-OCR resources should not be added to attachments from _attachments
|
||||
if (resourceType !== EToolResources.ocr) {
|
||||
attachmentFileIds.add(file.file_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const isOCREnabled = (req.app.locals?.[EModelEndpoint.agents]?.capabilities ?? []).includes(
|
||||
AgentCapabilities.ocr,
|
||||
);
|
||||
|
||||
if (tool_resources[EToolResources.ocr]?.file_ids && isOCREnabled) {
|
||||
const context = await getFiles(
|
||||
{
|
||||
|
@ -48,48 +206,58 @@ export const primeResources = async ({
|
|||
{},
|
||||
{},
|
||||
);
|
||||
attachments = (attachments ?? []).concat(context);
|
||||
|
||||
for (const file of context) {
|
||||
if (!file?.file_id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Clear from attachmentFileIds if it was pre-added
|
||||
attachmentFileIds.delete(file.file_id);
|
||||
|
||||
// Add to attachments
|
||||
attachments.push(file);
|
||||
attachmentFileIds.add(file.file_id);
|
||||
|
||||
// Categorize for tool resources
|
||||
categorizeFileForToolResources({
|
||||
file,
|
||||
tool_resources,
|
||||
requestFileSet,
|
||||
processedResourceFiles,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (!_attachments) {
|
||||
return { attachments, tool_resources };
|
||||
return { attachments: attachments.length > 0 ? attachments : undefined, tool_resources };
|
||||
}
|
||||
|
||||
const files = await _attachments;
|
||||
if (!attachments) {
|
||||
attachments = [];
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
if (!file) {
|
||||
continue;
|
||||
}
|
||||
if (file.metadata?.fileIdentifier) {
|
||||
const execute_code = tool_resources[EToolResources.execute_code] ?? {};
|
||||
if (!execute_code.files) {
|
||||
tool_resources[EToolResources.execute_code] = { ...execute_code, files: [] };
|
||||
}
|
||||
tool_resources[EToolResources.execute_code]?.files?.push(file);
|
||||
} else if (file.embedded === true) {
|
||||
const file_search = tool_resources[EToolResources.file_search] ?? {};
|
||||
if (!file_search.files) {
|
||||
tool_resources[EToolResources.file_search] = { ...file_search, files: [] };
|
||||
}
|
||||
tool_resources[EToolResources.file_search]?.files?.push(file);
|
||||
} else if (
|
||||
requestFileSet.has(file.file_id) &&
|
||||
file.type.startsWith('image') &&
|
||||
file.height &&
|
||||
file.width
|
||||
) {
|
||||
const image_edit = tool_resources[EToolResources.image_edit] ?? {};
|
||||
if (!image_edit.files) {
|
||||
tool_resources[EToolResources.image_edit] = { ...image_edit, files: [] };
|
||||
}
|
||||
tool_resources[EToolResources.image_edit]?.files?.push(file);
|
||||
|
||||
categorizeFileForToolResources({
|
||||
file,
|
||||
tool_resources,
|
||||
requestFileSet,
|
||||
processedResourceFiles,
|
||||
});
|
||||
|
||||
if (file.file_id && attachmentFileIds.has(file.file_id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
attachments.push(file);
|
||||
if (file.file_id) {
|
||||
attachmentFileIds.add(file.file_id);
|
||||
}
|
||||
}
|
||||
return { attachments, tool_resources };
|
||||
|
||||
return { attachments: attachments.length > 0 ? attachments : [], tool_resources };
|
||||
} catch (error) {
|
||||
logger.error('Error priming resources', error);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue