mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 08:50:15 +01:00
📋 fix: Agent Resource Deduplication & Sharing Duplicate False Positive (#7835)
* fix: `primeResources` to Prevent Duplicate Files Across Sources - Added multiple test cases to ensure that the `primeResources` function correctly handles duplicate files from OCR and attachments, including scenarios with shared files, files without IDs, and duplicates within attachments. - Implemented logic to categorize files into appropriate tool resources while preventing duplicates across different categories. - Enhanced error handling and ensured that unique files are returned in the final attachments array. * fix: Update ToolService to handle single OCR tool case (no loaded tool necessary) * refactor: Add skipVersioning option to updateAgent for isolated updates - for now, mainly concerns sharing/unsharing of agents * chore: Update translation for shared agent message in UI
This commit is contained in:
parent
cdf42b3a03
commit
13c7ceb918
7 changed files with 730 additions and 43 deletions
|
|
@ -259,11 +259,12 @@ const isDuplicateVersion = (updateData, currentData, versions, actionsHash = nul
|
||||||
* @param {Object} [options] - Optional configuration object.
|
* @param {Object} [options] - Optional configuration object.
|
||||||
* @param {string} [options.updatingUserId] - The ID of the user performing the update (used for tracking non-author updates).
|
* @param {string} [options.updatingUserId] - The ID of the user performing the update (used for tracking non-author updates).
|
||||||
* @param {boolean} [options.forceVersion] - Force creation of a new version even if no fields changed.
|
* @param {boolean} [options.forceVersion] - Force creation of a new version even if no fields changed.
|
||||||
|
* @param {boolean} [options.skipVersioning] - Skip version creation entirely (useful for isolated operations like sharing).
|
||||||
* @returns {Promise<Agent>} The updated or newly created agent document as a plain object.
|
* @returns {Promise<Agent>} The updated or newly created agent document as a plain object.
|
||||||
* @throws {Error} If the update would create a duplicate version
|
* @throws {Error} If the update would create a duplicate version
|
||||||
*/
|
*/
|
||||||
const updateAgent = async (searchParameter, updateData, options = {}) => {
|
const updateAgent = async (searchParameter, updateData, options = {}) => {
|
||||||
const { updatingUserId = null, forceVersion = false } = options;
|
const { updatingUserId = null, forceVersion = false, skipVersioning = false } = options;
|
||||||
const mongoOptions = { new: true, upsert: false };
|
const mongoOptions = { new: true, upsert: false };
|
||||||
|
|
||||||
const currentAgent = await Agent.findOne(searchParameter);
|
const currentAgent = await Agent.findOne(searchParameter);
|
||||||
|
|
@ -300,7 +301,8 @@ const updateAgent = async (searchParameter, updateData, options = {}) => {
|
||||||
}
|
}
|
||||||
|
|
||||||
const shouldCreateVersion =
|
const shouldCreateVersion =
|
||||||
forceVersion || Object.keys(directUpdates).length > 0 || $push || $pull || $addToSet;
|
!skipVersioning &&
|
||||||
|
(forceVersion || Object.keys(directUpdates).length > 0 || $push || $pull || $addToSet);
|
||||||
|
|
||||||
if (shouldCreateVersion) {
|
if (shouldCreateVersion) {
|
||||||
const duplicateVersion = isDuplicateVersion(updateData, versionData, versions, actionsHash);
|
const duplicateVersion = isDuplicateVersion(updateData, versionData, versions, actionsHash);
|
||||||
|
|
@ -335,7 +337,7 @@ const updateAgent = async (searchParameter, updateData, options = {}) => {
|
||||||
versionEntry.updatedBy = new mongoose.Types.ObjectId(updatingUserId);
|
versionEntry.updatedBy = new mongoose.Types.ObjectId(updatingUserId);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shouldCreateVersion || forceVersion) {
|
if (shouldCreateVersion) {
|
||||||
updateData.$push = {
|
updateData.$push = {
|
||||||
...($push || {}),
|
...($push || {}),
|
||||||
versions: versionEntry,
|
versions: versionEntry,
|
||||||
|
|
@ -546,7 +548,10 @@ const updateAgentProjects = async ({ user, agentId, projectIds, removeProjectIds
|
||||||
delete updateQuery.author;
|
delete updateQuery.author;
|
||||||
}
|
}
|
||||||
|
|
||||||
const updatedAgent = await updateAgent(updateQuery, updateOps, { updatingUserId: user.id });
|
const updatedAgent = await updateAgent(updateQuery, updateOps, {
|
||||||
|
updatingUserId: user.id,
|
||||||
|
skipVersioning: true,
|
||||||
|
});
|
||||||
if (updatedAgent) {
|
if (updatedAgent) {
|
||||||
return updatedAgent;
|
return updatedAgent;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2447,6 +2447,65 @@ describe('models/Agent', () => {
|
||||||
expect(updated2.description).toBe('Another description');
|
expect(updated2.description).toBe('Another description');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('should skip version creation when skipVersioning option is used', async () => {
|
||||||
|
const agentId = `agent_${uuidv4()}`;
|
||||||
|
const authorId = new mongoose.Types.ObjectId();
|
||||||
|
const projectId1 = new mongoose.Types.ObjectId();
|
||||||
|
const projectId2 = new mongoose.Types.ObjectId();
|
||||||
|
|
||||||
|
// Create agent with initial projectIds
|
||||||
|
await createAgent({
|
||||||
|
id: agentId,
|
||||||
|
name: 'Test Agent',
|
||||||
|
provider: 'test',
|
||||||
|
model: 'test-model',
|
||||||
|
author: authorId,
|
||||||
|
projectIds: [projectId1],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Share agent using updateAgentProjects (which uses skipVersioning)
|
||||||
|
const shared = await updateAgentProjects({
|
||||||
|
user: { id: authorId.toString() }, // Use the same author ID
|
||||||
|
agentId: agentId,
|
||||||
|
projectIds: [projectId2.toString()],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should NOT create a new version due to skipVersioning
|
||||||
|
expect(shared.versions).toHaveLength(1);
|
||||||
|
expect(shared.projectIds.map((id) => id.toString())).toContain(projectId1.toString());
|
||||||
|
expect(shared.projectIds.map((id) => id.toString())).toContain(projectId2.toString());
|
||||||
|
|
||||||
|
// Unshare agent using updateAgentProjects
|
||||||
|
const unshared = await updateAgentProjects({
|
||||||
|
user: { id: authorId.toString() },
|
||||||
|
agentId: agentId,
|
||||||
|
removeProjectIds: [projectId1.toString()],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Still should NOT create a new version
|
||||||
|
expect(unshared.versions).toHaveLength(1);
|
||||||
|
expect(unshared.projectIds.map((id) => id.toString())).not.toContain(projectId1.toString());
|
||||||
|
expect(unshared.projectIds.map((id) => id.toString())).toContain(projectId2.toString());
|
||||||
|
|
||||||
|
// Regular update without skipVersioning should create a version
|
||||||
|
const regularUpdate = await updateAgent(
|
||||||
|
{ id: agentId },
|
||||||
|
{ description: 'Updated description' },
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(regularUpdate.versions).toHaveLength(2);
|
||||||
|
expect(regularUpdate.description).toBe('Updated description');
|
||||||
|
|
||||||
|
// Direct updateAgent with MongoDB operators should still create versions
|
||||||
|
const directUpdate = await updateAgent(
|
||||||
|
{ id: agentId },
|
||||||
|
{ $addToSet: { projectIds: { $each: [projectId1] } } },
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(directUpdate.versions).toHaveLength(3);
|
||||||
|
expect(directUpdate.projectIds.length).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
test('should preserve agent_ids in version history', async () => {
|
test('should preserve agent_ids in version history', async () => {
|
||||||
const agentId = `agent_${uuidv4()}`;
|
const agentId = `agent_${uuidv4()}`;
|
||||||
const authorId = new mongoose.Types.ObjectId();
|
const authorId = new mongoose.Types.ObjectId();
|
||||||
|
|
|
||||||
|
|
@ -169,12 +169,18 @@ const updateAgentHandler = async (req, res) => {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @type {boolean} */
|
||||||
|
const isProjectUpdate = (projectIds?.length ?? 0) > 0 || (removeProjectIds?.length ?? 0) > 0;
|
||||||
|
|
||||||
let updatedAgent =
|
let updatedAgent =
|
||||||
Object.keys(updateData).length > 0
|
Object.keys(updateData).length > 0
|
||||||
? await updateAgent({ id }, updateData, { updatingUserId: req.user.id })
|
? await updateAgent({ id }, updateData, {
|
||||||
|
updatingUserId: req.user.id,
|
||||||
|
skipVersioning: isProjectUpdate,
|
||||||
|
})
|
||||||
: existingAgent;
|
: existingAgent;
|
||||||
|
|
||||||
if (projectIds || removeProjectIds) {
|
if (isProjectUpdate) {
|
||||||
updatedAgent = await updateAgentProjects({
|
updatedAgent = await updateAgentProjects({
|
||||||
user: req.user,
|
user: req.user,
|
||||||
agentId: id,
|
agentId: id,
|
||||||
|
|
|
||||||
|
|
@ -500,6 +500,8 @@ async function processRequiredActions(client, requiredActions) {
|
||||||
async function loadAgentTools({ req, res, agent, tool_resources, openAIApiKey }) {
|
async function loadAgentTools({ req, res, agent, tool_resources, openAIApiKey }) {
|
||||||
if (!agent.tools || agent.tools.length === 0) {
|
if (!agent.tools || agent.tools.length === 0) {
|
||||||
return {};
|
return {};
|
||||||
|
} else if (agent.tools && agent.tools.length === 1 && agent.tools[0] === AgentCapabilities.ocr) {
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
const endpointsConfig = await getEndpointsConfig(req);
|
const endpointsConfig = await getEndpointsConfig(req);
|
||||||
|
|
|
||||||
|
|
@ -517,7 +517,7 @@
|
||||||
"com_ui_agent_editing_allowed": "Other users can already edit this agent",
|
"com_ui_agent_editing_allowed": "Other users can already edit this agent",
|
||||||
"com_ui_agent_recursion_limit": "Max Agent Steps",
|
"com_ui_agent_recursion_limit": "Max Agent Steps",
|
||||||
"com_ui_agent_recursion_limit_info": "Limits how many steps the agent can take in a run before giving a final response. Default is 25 steps. A step is either an AI API request or a tool usage round. For example, a basic tool interaction takes 3 steps: initial request, tool usage, and follow-up request.",
|
"com_ui_agent_recursion_limit_info": "Limits how many steps the agent can take in a run before giving a final response. Default is 25 steps. A step is either an AI API request or a tool usage round. For example, a basic tool interaction takes 3 steps: initial request, tool usage, and follow-up request.",
|
||||||
"com_ui_agent_shared_to_all": "something needs to go here. was empty",
|
"com_ui_agent_shared_to_all": "Agent is shared to all users",
|
||||||
"com_ui_agent_var": "{{0}} agent",
|
"com_ui_agent_var": "{{0}} agent",
|
||||||
"com_ui_agent_version": "Version",
|
"com_ui_agent_version": "Version",
|
||||||
"com_ui_agent_version_active": "Active Version",
|
"com_ui_agent_version_active": "Active Version",
|
||||||
|
|
|
||||||
|
|
@ -404,6 +404,453 @@ describe('primeResources', () => {
|
||||||
expect(result.attachments?.[0]?.file_id).toBe('ocr-file-1');
|
expect(result.attachments?.[0]?.file_id).toBe('ocr-file-1');
|
||||||
expect(result.attachments?.[1]?.file_id).toBe('file1');
|
expect(result.attachments?.[1]?.file_id).toBe('file1');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should prevent duplicate files when same file exists in OCR and attachments', async () => {
|
||||||
|
const sharedFile: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'shared-file-id',
|
||||||
|
filename: 'document.pdf',
|
||||||
|
filepath: '/uploads/document.pdf',
|
||||||
|
object: 'file',
|
||||||
|
type: 'application/pdf',
|
||||||
|
bytes: 1024,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const mockOcrFiles: TFile[] = [sharedFile];
|
||||||
|
const mockAttachmentFiles: TFile[] = [
|
||||||
|
sharedFile,
|
||||||
|
{
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'unique-file',
|
||||||
|
filename: 'other.txt',
|
||||||
|
filepath: '/uploads/other.txt',
|
||||||
|
object: 'file',
|
||||||
|
type: 'text/plain',
|
||||||
|
bytes: 256,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
mockGetFiles.mockResolvedValue(mockOcrFiles);
|
||||||
|
const attachments = Promise.resolve(mockAttachmentFiles);
|
||||||
|
|
||||||
|
const tool_resources = {
|
||||||
|
[EToolResources.ocr]: {
|
||||||
|
file_ids: ['shared-file-id'],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await primeResources({
|
||||||
|
req: mockReq,
|
||||||
|
getFiles: mockGetFiles,
|
||||||
|
requestFileSet,
|
||||||
|
attachments,
|
||||||
|
tool_resources,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should only have 2 files, not 3 (no duplicate)
|
||||||
|
expect(result.attachments).toHaveLength(2);
|
||||||
|
expect(result.attachments?.filter((f) => f?.file_id === 'shared-file-id')).toHaveLength(1);
|
||||||
|
expect(result.attachments?.find((f) => f?.file_id === 'unique-file')).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should still categorize duplicate files for tool_resources', async () => {
|
||||||
|
const sharedFile: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'shared-file-id',
|
||||||
|
filename: 'script.py',
|
||||||
|
filepath: '/uploads/script.py',
|
||||||
|
object: 'file',
|
||||||
|
type: 'text/x-python',
|
||||||
|
bytes: 512,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
metadata: {
|
||||||
|
fileIdentifier: 'python-script',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const mockOcrFiles: TFile[] = [sharedFile];
|
||||||
|
const mockAttachmentFiles: TFile[] = [sharedFile];
|
||||||
|
|
||||||
|
mockGetFiles.mockResolvedValue(mockOcrFiles);
|
||||||
|
const attachments = Promise.resolve(mockAttachmentFiles);
|
||||||
|
|
||||||
|
const tool_resources = {
|
||||||
|
[EToolResources.ocr]: {
|
||||||
|
file_ids: ['shared-file-id'],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await primeResources({
|
||||||
|
req: mockReq,
|
||||||
|
getFiles: mockGetFiles,
|
||||||
|
requestFileSet,
|
||||||
|
attachments,
|
||||||
|
tool_resources,
|
||||||
|
});
|
||||||
|
|
||||||
|
// File should appear only once in attachments
|
||||||
|
expect(result.attachments).toHaveLength(1);
|
||||||
|
expect(result.attachments?.[0]?.file_id).toBe('shared-file-id');
|
||||||
|
|
||||||
|
// But should still be categorized in tool_resources
|
||||||
|
expect(result.tool_resources?.[EToolResources.execute_code]?.files).toHaveLength(1);
|
||||||
|
expect(result.tool_resources?.[EToolResources.execute_code]?.files?.[0]?.file_id).toBe(
|
||||||
|
'shared-file-id',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle multiple duplicate files', async () => {
|
||||||
|
const file1: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'file-1',
|
||||||
|
filename: 'doc1.pdf',
|
||||||
|
filepath: '/uploads/doc1.pdf',
|
||||||
|
object: 'file',
|
||||||
|
type: 'application/pdf',
|
||||||
|
bytes: 1024,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const file2: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'file-2',
|
||||||
|
filename: 'doc2.pdf',
|
||||||
|
filepath: '/uploads/doc2.pdf',
|
||||||
|
object: 'file',
|
||||||
|
type: 'application/pdf',
|
||||||
|
bytes: 2048,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const uniqueFile: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'unique-file',
|
||||||
|
filename: 'unique.txt',
|
||||||
|
filepath: '/uploads/unique.txt',
|
||||||
|
object: 'file',
|
||||||
|
type: 'text/plain',
|
||||||
|
bytes: 256,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const mockOcrFiles: TFile[] = [file1, file2];
|
||||||
|
const mockAttachmentFiles: TFile[] = [file1, file2, uniqueFile];
|
||||||
|
|
||||||
|
mockGetFiles.mockResolvedValue(mockOcrFiles);
|
||||||
|
const attachments = Promise.resolve(mockAttachmentFiles);
|
||||||
|
|
||||||
|
const tool_resources = {
|
||||||
|
[EToolResources.ocr]: {
|
||||||
|
file_ids: ['file-1', 'file-2'],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await primeResources({
|
||||||
|
req: mockReq,
|
||||||
|
getFiles: mockGetFiles,
|
||||||
|
requestFileSet,
|
||||||
|
attachments,
|
||||||
|
tool_resources,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should have 3 files total (2 from OCR + 1 unique from attachments)
|
||||||
|
expect(result.attachments).toHaveLength(3);
|
||||||
|
|
||||||
|
// Each file should appear only once
|
||||||
|
const fileIds = result.attachments?.map((f) => f?.file_id);
|
||||||
|
expect(fileIds).toContain('file-1');
|
||||||
|
expect(fileIds).toContain('file-2');
|
||||||
|
expect(fileIds).toContain('unique-file');
|
||||||
|
|
||||||
|
// Check no duplicates
|
||||||
|
const uniqueFileIds = new Set(fileIds);
|
||||||
|
expect(uniqueFileIds.size).toBe(fileIds?.length);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle files without file_id gracefully', async () => {
|
||||||
|
const fileWithoutId: Partial<TFile> = {
|
||||||
|
user: 'user1',
|
||||||
|
filename: 'no-id.txt',
|
||||||
|
filepath: '/uploads/no-id.txt',
|
||||||
|
object: 'file',
|
||||||
|
type: 'text/plain',
|
||||||
|
bytes: 256,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const normalFile: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'normal-file',
|
||||||
|
filename: 'normal.txt',
|
||||||
|
filepath: '/uploads/normal.txt',
|
||||||
|
object: 'file',
|
||||||
|
type: 'text/plain',
|
||||||
|
bytes: 512,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const mockOcrFiles: TFile[] = [normalFile];
|
||||||
|
const mockAttachmentFiles = [fileWithoutId as TFile, normalFile];
|
||||||
|
|
||||||
|
mockGetFiles.mockResolvedValue(mockOcrFiles);
|
||||||
|
const attachments = Promise.resolve(mockAttachmentFiles);
|
||||||
|
|
||||||
|
const tool_resources = {
|
||||||
|
[EToolResources.ocr]: {
|
||||||
|
file_ids: ['normal-file'],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await primeResources({
|
||||||
|
req: mockReq,
|
||||||
|
getFiles: mockGetFiles,
|
||||||
|
requestFileSet,
|
||||||
|
attachments,
|
||||||
|
tool_resources,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should include file without ID and one instance of normal file
|
||||||
|
expect(result.attachments).toHaveLength(2);
|
||||||
|
expect(result.attachments?.filter((f) => f?.file_id === 'normal-file')).toHaveLength(1);
|
||||||
|
expect(result.attachments?.some((f) => !f?.file_id)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should prevent duplicates from existing tool_resources', async () => {
|
||||||
|
const existingFile: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'existing-file',
|
||||||
|
filename: 'existing.py',
|
||||||
|
filepath: '/uploads/existing.py',
|
||||||
|
object: 'file',
|
||||||
|
type: 'text/x-python',
|
||||||
|
bytes: 512,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
metadata: {
|
||||||
|
fileIdentifier: 'python-script',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const newFile: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'new-file',
|
||||||
|
filename: 'new.py',
|
||||||
|
filepath: '/uploads/new.py',
|
||||||
|
object: 'file',
|
||||||
|
type: 'text/x-python',
|
||||||
|
bytes: 256,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
metadata: {
|
||||||
|
fileIdentifier: 'python-script',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const existingToolResources = {
|
||||||
|
[EToolResources.execute_code]: {
|
||||||
|
files: [existingFile],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const attachments = Promise.resolve([existingFile, newFile]);
|
||||||
|
|
||||||
|
const result = await primeResources({
|
||||||
|
req: mockReq,
|
||||||
|
getFiles: mockGetFiles,
|
||||||
|
requestFileSet,
|
||||||
|
attachments,
|
||||||
|
tool_resources: existingToolResources,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should only add the new file to attachments
|
||||||
|
expect(result.attachments).toHaveLength(1);
|
||||||
|
expect(result.attachments?.[0]?.file_id).toBe('new-file');
|
||||||
|
|
||||||
|
// Should not duplicate the existing file in tool_resources
|
||||||
|
expect(result.tool_resources?.[EToolResources.execute_code]?.files).toHaveLength(2);
|
||||||
|
const fileIds = result.tool_resources?.[EToolResources.execute_code]?.files?.map(
|
||||||
|
(f) => f.file_id,
|
||||||
|
);
|
||||||
|
expect(fileIds).toEqual(['existing-file', 'new-file']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle duplicates within attachments array', async () => {
|
||||||
|
const duplicatedFile: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'dup-file',
|
||||||
|
filename: 'duplicate.txt',
|
||||||
|
filepath: '/uploads/duplicate.txt',
|
||||||
|
object: 'file',
|
||||||
|
type: 'text/plain',
|
||||||
|
bytes: 256,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const uniqueFile: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'unique-file',
|
||||||
|
filename: 'unique.txt',
|
||||||
|
filepath: '/uploads/unique.txt',
|
||||||
|
object: 'file',
|
||||||
|
type: 'text/plain',
|
||||||
|
bytes: 128,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Same file appears multiple times in attachments
|
||||||
|
const attachments = Promise.resolve([
|
||||||
|
duplicatedFile,
|
||||||
|
duplicatedFile,
|
||||||
|
uniqueFile,
|
||||||
|
duplicatedFile,
|
||||||
|
]);
|
||||||
|
|
||||||
|
const result = await primeResources({
|
||||||
|
req: mockReq,
|
||||||
|
getFiles: mockGetFiles,
|
||||||
|
requestFileSet,
|
||||||
|
attachments,
|
||||||
|
tool_resources: {},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should only have 2 unique files
|
||||||
|
expect(result.attachments).toHaveLength(2);
|
||||||
|
const fileIds = result.attachments?.map((f) => f?.file_id);
|
||||||
|
expect(fileIds).toContain('dup-file');
|
||||||
|
expect(fileIds).toContain('unique-file');
|
||||||
|
|
||||||
|
// Verify no duplicates
|
||||||
|
expect(fileIds?.filter((id) => id === 'dup-file')).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should prevent duplicates across different tool_resource categories', async () => {
|
||||||
|
const multiPurposeFile: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'multi-file',
|
||||||
|
filename: 'data.txt',
|
||||||
|
filepath: '/uploads/data.txt',
|
||||||
|
object: 'file',
|
||||||
|
type: 'text/plain',
|
||||||
|
bytes: 512,
|
||||||
|
embedded: true, // Will be categorized as file_search
|
||||||
|
usage: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const existingToolResources = {
|
||||||
|
[EToolResources.file_search]: {
|
||||||
|
files: [multiPurposeFile],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Try to add the same file again
|
||||||
|
const attachments = Promise.resolve([multiPurposeFile]);
|
||||||
|
|
||||||
|
const result = await primeResources({
|
||||||
|
req: mockReq,
|
||||||
|
getFiles: mockGetFiles,
|
||||||
|
requestFileSet,
|
||||||
|
attachments,
|
||||||
|
tool_resources: existingToolResources,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should not add to attachments (already exists)
|
||||||
|
expect(result.attachments).toHaveLength(0);
|
||||||
|
|
||||||
|
// Should not duplicate in file_search
|
||||||
|
expect(result.tool_resources?.[EToolResources.file_search]?.files).toHaveLength(1);
|
||||||
|
expect(result.tool_resources?.[EToolResources.file_search]?.files?.[0]?.file_id).toBe(
|
||||||
|
'multi-file',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle complex scenario with OCR, existing tool_resources, and attachments', async () => {
|
||||||
|
const ocrFile: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'ocr-file',
|
||||||
|
filename: 'scan.pdf',
|
||||||
|
filepath: '/uploads/scan.pdf',
|
||||||
|
object: 'file',
|
||||||
|
type: 'application/pdf',
|
||||||
|
bytes: 2048,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const existingFile: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'existing-file',
|
||||||
|
filename: 'code.py',
|
||||||
|
filepath: '/uploads/code.py',
|
||||||
|
object: 'file',
|
||||||
|
type: 'text/x-python',
|
||||||
|
bytes: 512,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
metadata: {
|
||||||
|
fileIdentifier: 'python-script',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const newFile: TFile = {
|
||||||
|
user: 'user1',
|
||||||
|
file_id: 'new-file',
|
||||||
|
filename: 'image.png',
|
||||||
|
filepath: '/uploads/image.png',
|
||||||
|
object: 'file',
|
||||||
|
type: 'image/png',
|
||||||
|
bytes: 4096,
|
||||||
|
embedded: false,
|
||||||
|
usage: 0,
|
||||||
|
height: 800,
|
||||||
|
width: 600,
|
||||||
|
};
|
||||||
|
|
||||||
|
mockGetFiles.mockResolvedValue([ocrFile, existingFile]); // OCR returns both files
|
||||||
|
const attachments = Promise.resolve([existingFile, ocrFile, newFile]); // Attachments has duplicates
|
||||||
|
|
||||||
|
const existingToolResources = {
|
||||||
|
[EToolResources.ocr]: {
|
||||||
|
file_ids: ['ocr-file', 'existing-file'],
|
||||||
|
},
|
||||||
|
[EToolResources.execute_code]: {
|
||||||
|
files: [existingFile],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
requestFileSet.add('new-file'); // Only new-file is in request set
|
||||||
|
|
||||||
|
const result = await primeResources({
|
||||||
|
req: mockReq,
|
||||||
|
getFiles: mockGetFiles,
|
||||||
|
requestFileSet,
|
||||||
|
attachments,
|
||||||
|
tool_resources: existingToolResources,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should have 3 unique files total
|
||||||
|
expect(result.attachments).toHaveLength(3);
|
||||||
|
const attachmentIds = result.attachments?.map((f) => f?.file_id).sort();
|
||||||
|
expect(attachmentIds).toEqual(['existing-file', 'new-file', 'ocr-file']);
|
||||||
|
|
||||||
|
// Check tool_resources
|
||||||
|
expect(result.tool_resources?.[EToolResources.execute_code]?.files).toHaveLength(1);
|
||||||
|
expect(result.tool_resources?.[EToolResources.image_edit]?.files).toHaveLength(1);
|
||||||
|
expect(result.tool_resources?.[EToolResources.image_edit]?.files?.[0]?.file_id).toBe(
|
||||||
|
'new-file',
|
||||||
|
);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('error handling', () => {
|
describe('error handling', () => {
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,17 @@
|
||||||
import { logger } from '@librechat/data-schemas';
|
import { logger } from '@librechat/data-schemas';
|
||||||
import { EModelEndpoint, EToolResources, AgentCapabilities } from 'librechat-data-provider';
|
import { EModelEndpoint, EToolResources, AgentCapabilities } from 'librechat-data-provider';
|
||||||
|
import type { AgentToolResources, TFile, AgentBaseResource } from 'librechat-data-provider';
|
||||||
import type { FilterQuery, QueryOptions, ProjectionType } from 'mongoose';
|
import type { FilterQuery, QueryOptions, ProjectionType } from 'mongoose';
|
||||||
import type { AgentToolResources, TFile } from 'librechat-data-provider';
|
|
||||||
import type { IMongoFile } from '@librechat/data-schemas';
|
import type { IMongoFile } from '@librechat/data-schemas';
|
||||||
import type { Request as ServerRequest } from 'express';
|
import type { Request as ServerRequest } from 'express';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function type for retrieving files from the database
|
||||||
|
* @param filter - MongoDB filter query for files
|
||||||
|
* @param _sortOptions - Sorting options (currently unused)
|
||||||
|
* @param selectFields - Field selection options
|
||||||
|
* @returns Promise resolving to array of files
|
||||||
|
*/
|
||||||
export type TGetFiles = (
|
export type TGetFiles = (
|
||||||
filter: FilterQuery<IMongoFile>,
|
filter: FilterQuery<IMongoFile>,
|
||||||
_sortOptions: ProjectionType<IMongoFile> | null | undefined,
|
_sortOptions: ProjectionType<IMongoFile> | null | undefined,
|
||||||
|
|
@ -12,11 +19,125 @@ export type TGetFiles = (
|
||||||
) => Promise<Array<TFile>>;
|
) => Promise<Array<TFile>>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param params
|
* Helper function to add a file to a specific tool resource category
|
||||||
* @param params.req
|
* Prevents duplicate files within the same resource category
|
||||||
* @param params.attachments
|
* @param params - Parameters object
|
||||||
* @param params.requestFileSet
|
* @param params.file - The file to add to the resource
|
||||||
* @param params.tool_resources
|
* @param params.resourceType - The type of tool resource (e.g., execute_code, file_search, image_edit)
|
||||||
|
* @param params.tool_resources - The agent's tool resources object to update
|
||||||
|
* @param params.processedResourceFiles - Set tracking processed files per resource type
|
||||||
|
*/
|
||||||
|
const addFileToResource = ({
|
||||||
|
file,
|
||||||
|
resourceType,
|
||||||
|
tool_resources,
|
||||||
|
processedResourceFiles,
|
||||||
|
}: {
|
||||||
|
file: TFile;
|
||||||
|
resourceType: EToolResources;
|
||||||
|
tool_resources: AgentToolResources;
|
||||||
|
processedResourceFiles: Set<string>;
|
||||||
|
}): void => {
|
||||||
|
if (!file.file_id) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const resourceKey = `${resourceType}:${file.file_id}`;
|
||||||
|
if (processedResourceFiles.has(resourceKey)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const resource = tool_resources[resourceType as keyof AgentToolResources] ?? {};
|
||||||
|
if (!resource.files) {
|
||||||
|
(tool_resources[resourceType as keyof AgentToolResources] as AgentBaseResource) = {
|
||||||
|
...resource,
|
||||||
|
files: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if already exists in the files array
|
||||||
|
const resourceFiles = tool_resources[resourceType as keyof AgentToolResources]?.files;
|
||||||
|
const alreadyExists = resourceFiles?.some((f: TFile) => f.file_id === file.file_id);
|
||||||
|
|
||||||
|
if (!alreadyExists) {
|
||||||
|
resourceFiles?.push(file);
|
||||||
|
processedResourceFiles.add(resourceKey);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Categorizes a file into the appropriate tool resource based on its properties
|
||||||
|
* Files are categorized as:
|
||||||
|
* - execute_code: Files with fileIdentifier metadata
|
||||||
|
* - file_search: Files marked as embedded
|
||||||
|
* - image_edit: Image files in the request file set with dimensions
|
||||||
|
* @param params - Parameters object
|
||||||
|
* @param params.file - The file to categorize
|
||||||
|
* @param params.tool_resources - The agent's tool resources to update
|
||||||
|
* @param params.requestFileSet - Set of file IDs from the current request
|
||||||
|
* @param params.processedResourceFiles - Set tracking processed files per resource type
|
||||||
|
*/
|
||||||
|
const categorizeFileForToolResources = ({
|
||||||
|
file,
|
||||||
|
tool_resources,
|
||||||
|
requestFileSet,
|
||||||
|
processedResourceFiles,
|
||||||
|
}: {
|
||||||
|
file: TFile;
|
||||||
|
tool_resources: AgentToolResources;
|
||||||
|
requestFileSet: Set<string>;
|
||||||
|
processedResourceFiles: Set<string>;
|
||||||
|
}): void => {
|
||||||
|
if (file.metadata?.fileIdentifier) {
|
||||||
|
addFileToResource({
|
||||||
|
file,
|
||||||
|
resourceType: EToolResources.execute_code,
|
||||||
|
tool_resources,
|
||||||
|
processedResourceFiles,
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (file.embedded === true) {
|
||||||
|
addFileToResource({
|
||||||
|
file,
|
||||||
|
resourceType: EToolResources.file_search,
|
||||||
|
tool_resources,
|
||||||
|
processedResourceFiles,
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
requestFileSet.has(file.file_id) &&
|
||||||
|
file.type.startsWith('image') &&
|
||||||
|
file.height &&
|
||||||
|
file.width
|
||||||
|
) {
|
||||||
|
addFileToResource({
|
||||||
|
file,
|
||||||
|
resourceType: EToolResources.image_edit,
|
||||||
|
tool_resources,
|
||||||
|
processedResourceFiles,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Primes resources for agent execution by processing attachments and tool resources
|
||||||
|
* This function:
|
||||||
|
* 1. Fetches OCR files if OCR is enabled
|
||||||
|
* 2. Processes attachment files
|
||||||
|
* 3. Categorizes files into appropriate tool resources
|
||||||
|
* 4. Prevents duplicate files across all sources
|
||||||
|
*
|
||||||
|
* @param params - Parameters object
|
||||||
|
* @param params.req - Express request object containing app configuration
|
||||||
|
* @param params.getFiles - Function to retrieve files from database
|
||||||
|
* @param params.requestFileSet - Set of file IDs from the current request
|
||||||
|
* @param params.attachments - Promise resolving to array of attachment files
|
||||||
|
* @param params.tool_resources - Existing tool resources for the agent
|
||||||
|
* @returns Promise resolving to processed attachments and updated tool resources
|
||||||
*/
|
*/
|
||||||
export const primeResources = async ({
|
export const primeResources = async ({
|
||||||
req,
|
req,
|
||||||
|
|
@ -35,11 +156,48 @@ export const primeResources = async ({
|
||||||
tool_resources: AgentToolResources | undefined;
|
tool_resources: AgentToolResources | undefined;
|
||||||
}> => {
|
}> => {
|
||||||
try {
|
try {
|
||||||
let attachments: Array<TFile | undefined> | undefined;
|
/**
|
||||||
|
* Array to collect all unique files that will be returned as attachments
|
||||||
|
* Files are added from OCR results and attachment promises, with duplicates prevented
|
||||||
|
*/
|
||||||
|
const attachments: Array<TFile> = [];
|
||||||
|
/**
|
||||||
|
* Set of file IDs already added to the attachments array
|
||||||
|
* Used to prevent duplicate files from being added multiple times
|
||||||
|
* Pre-populated with files from non-OCR tool_resources to prevent re-adding them
|
||||||
|
*/
|
||||||
|
const attachmentFileIds = new Set<string>();
|
||||||
|
/**
|
||||||
|
* Set tracking which files have been added to specific tool resource categories
|
||||||
|
* Format: "resourceType:fileId" (e.g., "execute_code:file123")
|
||||||
|
* Prevents the same file from being added multiple times to the same resource
|
||||||
|
*/
|
||||||
|
const processedResourceFiles = new Set<string>();
|
||||||
|
/**
|
||||||
|
* The agent's tool resources object that will be updated with categorized files
|
||||||
|
* Initialized from input parameter or empty object if not provided
|
||||||
|
*/
|
||||||
const tool_resources = _tool_resources ?? {};
|
const tool_resources = _tool_resources ?? {};
|
||||||
|
|
||||||
|
// Track existing files in tool_resources to prevent duplicates within resources
|
||||||
|
for (const [resourceType, resource] of Object.entries(tool_resources)) {
|
||||||
|
if (resource?.files && Array.isArray(resource.files)) {
|
||||||
|
for (const file of resource.files) {
|
||||||
|
if (file?.file_id) {
|
||||||
|
processedResourceFiles.add(`${resourceType}:${file.file_id}`);
|
||||||
|
// Files from non-OCR resources should not be added to attachments from _attachments
|
||||||
|
if (resourceType !== EToolResources.ocr) {
|
||||||
|
attachmentFileIds.add(file.file_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const isOCREnabled = (req.app.locals?.[EModelEndpoint.agents]?.capabilities ?? []).includes(
|
const isOCREnabled = (req.app.locals?.[EModelEndpoint.agents]?.capabilities ?? []).includes(
|
||||||
AgentCapabilities.ocr,
|
AgentCapabilities.ocr,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (tool_resources[EToolResources.ocr]?.file_ids && isOCREnabled) {
|
if (tool_resources[EToolResources.ocr]?.file_ids && isOCREnabled) {
|
||||||
const context = await getFiles(
|
const context = await getFiles(
|
||||||
{
|
{
|
||||||
|
|
@ -48,48 +206,58 @@ export const primeResources = async ({
|
||||||
{},
|
{},
|
||||||
{},
|
{},
|
||||||
);
|
);
|
||||||
attachments = (attachments ?? []).concat(context);
|
|
||||||
|
for (const file of context) {
|
||||||
|
if (!file?.file_id) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear from attachmentFileIds if it was pre-added
|
||||||
|
attachmentFileIds.delete(file.file_id);
|
||||||
|
|
||||||
|
// Add to attachments
|
||||||
|
attachments.push(file);
|
||||||
|
attachmentFileIds.add(file.file_id);
|
||||||
|
|
||||||
|
// Categorize for tool resources
|
||||||
|
categorizeFileForToolResources({
|
||||||
|
file,
|
||||||
|
tool_resources,
|
||||||
|
requestFileSet,
|
||||||
|
processedResourceFiles,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!_attachments) {
|
if (!_attachments) {
|
||||||
return { attachments, tool_resources };
|
return { attachments: attachments.length > 0 ? attachments : undefined, tool_resources };
|
||||||
}
|
}
|
||||||
|
|
||||||
const files = await _attachments;
|
const files = await _attachments;
|
||||||
if (!attachments) {
|
|
||||||
attachments = [];
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const file of files) {
|
for (const file of files) {
|
||||||
if (!file) {
|
if (!file) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (file.metadata?.fileIdentifier) {
|
|
||||||
const execute_code = tool_resources[EToolResources.execute_code] ?? {};
|
categorizeFileForToolResources({
|
||||||
if (!execute_code.files) {
|
file,
|
||||||
tool_resources[EToolResources.execute_code] = { ...execute_code, files: [] };
|
tool_resources,
|
||||||
}
|
requestFileSet,
|
||||||
tool_resources[EToolResources.execute_code]?.files?.push(file);
|
processedResourceFiles,
|
||||||
} else if (file.embedded === true) {
|
});
|
||||||
const file_search = tool_resources[EToolResources.file_search] ?? {};
|
|
||||||
if (!file_search.files) {
|
if (file.file_id && attachmentFileIds.has(file.file_id)) {
|
||||||
tool_resources[EToolResources.file_search] = { ...file_search, files: [] };
|
continue;
|
||||||
}
|
|
||||||
tool_resources[EToolResources.file_search]?.files?.push(file);
|
|
||||||
} else if (
|
|
||||||
requestFileSet.has(file.file_id) &&
|
|
||||||
file.type.startsWith('image') &&
|
|
||||||
file.height &&
|
|
||||||
file.width
|
|
||||||
) {
|
|
||||||
const image_edit = tool_resources[EToolResources.image_edit] ?? {};
|
|
||||||
if (!image_edit.files) {
|
|
||||||
tool_resources[EToolResources.image_edit] = { ...image_edit, files: [] };
|
|
||||||
}
|
|
||||||
tool_resources[EToolResources.image_edit]?.files?.push(file);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
attachments.push(file);
|
attachments.push(file);
|
||||||
|
if (file.file_id) {
|
||||||
|
attachmentFileIds.add(file.file_id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return { attachments, tool_resources };
|
|
||||||
|
return { attachments: attachments.length > 0 ? attachments : [], tool_resources };
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('Error priming resources', error);
|
logger.error('Error priming resources', error);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue