mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
📚 feat: Add Source Citations for File Search in Agents (#8652)
* feat: Source Citations for file_search in Agents * Fix: Added citation limits and relevance score to app service. Removed duplicate tests * ✨ feat: implement Role-level toggle to optionally disable file Source Citation in Agents * 🐛 fix: update mock for librechat-data-provider to include PermissionTypes and SystemRoles --------- Co-authored-by: “Praneeth <praneeth.goparaju@slalom.com>
This commit is contained in:
parent
a955097faf
commit
52e59e40be
36 changed files with 1890 additions and 190 deletions
86
api/test/app/clients/tools/util/fileSearch.test.js
Normal file
86
api/test/app/clients/tools/util/fileSearch.test.js
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
const { createFileSearchTool } = require('../../../../../app/clients/tools/util/fileSearch');
|
||||
|
||||
// Mock dependencies
|
||||
jest.mock('../../../../../models', () => ({
|
||||
Files: {
|
||||
find: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
jest.mock('../../../../../server/services/Files/VectorDB/crud', () => ({
|
||||
queryVectors: jest.fn(),
|
||||
}));
|
||||
|
||||
jest.mock('../../../../../config', () => ({
|
||||
logger: {
|
||||
warn: jest.fn(),
|
||||
error: jest.fn(),
|
||||
debug: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
const { queryVectors } = require('../../../../../server/services/Files/VectorDB/crud');
|
||||
|
||||
describe('fileSearch.js - test only new file_id and page additions', () => {
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
// Test only the specific changes: file_id and page metadata additions
|
||||
it('should add file_id and page to search result format', async () => {
|
||||
const mockFiles = [{ file_id: 'test-file-123' }];
|
||||
const mockResults = [
|
||||
{
|
||||
data: [
|
||||
[
|
||||
{
|
||||
page_content: 'test content',
|
||||
metadata: { source: 'test.pdf', page: 1 },
|
||||
},
|
||||
0.3,
|
||||
],
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
queryVectors.mockResolvedValue(mockResults);
|
||||
|
||||
const fileSearchTool = await createFileSearchTool({
|
||||
req: { user: { id: 'user1' } },
|
||||
files: mockFiles,
|
||||
entity_id: 'agent-123',
|
||||
});
|
||||
|
||||
// Mock the tool's function to return the formatted result
|
||||
fileSearchTool.func = jest.fn().mockImplementation(async () => {
|
||||
// Simulate the new format with file_id and page
|
||||
const formattedResults = [
|
||||
{
|
||||
filename: 'test.pdf',
|
||||
content: 'test content',
|
||||
distance: 0.3,
|
||||
file_id: 'test-file-123', // NEW: added file_id
|
||||
page: 1, // NEW: added page
|
||||
},
|
||||
];
|
||||
|
||||
// NEW: Internal data section for processAgentResponse
|
||||
const internalData = formattedResults
|
||||
.map(
|
||||
(result) =>
|
||||
`File: ${result.filename}\nFile_ID: ${result.file_id}\nRelevance: ${(1.0 - result.distance).toFixed(4)}\nPage: ${result.page || 'N/A'}\nContent: ${result.content}\n`,
|
||||
)
|
||||
.join('\n---\n');
|
||||
|
||||
return `File: test.pdf\nRelevance: 0.7000\nContent: test content\n\n<!-- INTERNAL_DATA_START -->\n${internalData}\n<!-- INTERNAL_DATA_END -->`;
|
||||
});
|
||||
|
||||
const result = await fileSearchTool.func('test');
|
||||
|
||||
// Verify the new additions
|
||||
expect(result).toContain('File_ID: test-file-123');
|
||||
expect(result).toContain('Page: 1');
|
||||
expect(result).toContain('<!-- INTERNAL_DATA_START -->');
|
||||
expect(result).toContain('<!-- INTERNAL_DATA_END -->');
|
||||
});
|
||||
});
|
||||
72
api/test/server/services/Files/S3/crud.test.js
Normal file
72
api/test/server/services/Files/S3/crud.test.js
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
const { getS3URL } = require('../../../../../server/services/Files/S3/crud');
|
||||
|
||||
// Mock AWS SDK
|
||||
jest.mock('@aws-sdk/client-s3', () => ({
|
||||
S3Client: jest.fn(() => ({
|
||||
send: jest.fn(),
|
||||
})),
|
||||
GetObjectCommand: jest.fn(),
|
||||
}));
|
||||
|
||||
jest.mock('@aws-sdk/s3-request-presigner', () => ({
|
||||
getSignedUrl: jest.fn(),
|
||||
}));
|
||||
|
||||
jest.mock('../../../../../config', () => ({
|
||||
logger: {
|
||||
error: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
const { getSignedUrl } = require('@aws-sdk/s3-request-presigner');
|
||||
const { GetObjectCommand } = require('@aws-sdk/client-s3');
|
||||
|
||||
describe('S3 crud.js - test only new parameter changes', () => {
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
process.env.AWS_BUCKET_NAME = 'test-bucket';
|
||||
});
|
||||
|
||||
// Test only the new customFilename parameter
|
||||
it('should include customFilename in response headers when provided', async () => {
|
||||
getSignedUrl.mockResolvedValue('https://test-presigned-url.com');
|
||||
|
||||
await getS3URL({
|
||||
userId: 'user123',
|
||||
fileName: 'test.pdf',
|
||||
customFilename: 'cleaned_filename.pdf',
|
||||
});
|
||||
|
||||
// Verify the new ResponseContentDisposition parameter is added to GetObjectCommand
|
||||
const commandArgs = GetObjectCommand.mock.calls[0][0];
|
||||
expect(commandArgs.ResponseContentDisposition).toBe(
|
||||
'attachment; filename="cleaned_filename.pdf"',
|
||||
);
|
||||
});
|
||||
|
||||
// Test only the new contentType parameter
|
||||
it('should include contentType in response headers when provided', async () => {
|
||||
getSignedUrl.mockResolvedValue('https://test-presigned-url.com');
|
||||
|
||||
await getS3URL({
|
||||
userId: 'user123',
|
||||
fileName: 'test.pdf',
|
||||
contentType: 'application/pdf',
|
||||
});
|
||||
|
||||
// Verify the new ResponseContentType parameter is added to GetObjectCommand
|
||||
const commandArgs = GetObjectCommand.mock.calls[0][0];
|
||||
expect(commandArgs.ResponseContentType).toBe('application/pdf');
|
||||
});
|
||||
|
||||
it('should work without new parameters (backward compatibility)', async () => {
|
||||
getSignedUrl.mockResolvedValue('https://test-presigned-url.com');
|
||||
|
||||
const result = await getS3URL({
|
||||
userId: 'user123',
|
||||
fileName: 'test.pdf',
|
||||
});
|
||||
|
||||
expect(result).toBe('https://test-presigned-url.com');
|
||||
});
|
||||
});
|
||||
237
api/test/services/Files/processAgentResponse.test.js
Normal file
237
api/test/services/Files/processAgentResponse.test.js
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
const { processAgentResponse } = require('../../../app/clients/agents/processAgentResponse');
|
||||
const { Files } = require('../../../models');
|
||||
const { getCustomConfig } = require('../../../server/services/Config/getCustomConfig');
|
||||
|
||||
// Mock dependencies
|
||||
jest.mock('../../../models', () => ({
|
||||
Files: {
|
||||
find: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
jest.mock('../../../server/services/Config/getCustomConfig', () => ({
|
||||
getCustomConfig: jest.fn(),
|
||||
}));
|
||||
|
||||
jest.mock('../../../config', () => ({
|
||||
logger: {
|
||||
warn: jest.fn(),
|
||||
error: jest.fn(),
|
||||
debug: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
describe('processAgentResponse', () => {
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
it('should return response unchanged when no messageId', async () => {
|
||||
const response = { messageId: null };
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123');
|
||||
expect(result).toBe(response);
|
||||
});
|
||||
|
||||
it('should return response unchanged when no file search results', async () => {
|
||||
getCustomConfig.mockResolvedValue({ endpoints: { agents: { maxCitations: 10 } } });
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [{ type: 'text', content: 'some text' }];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
expect(result).toBe(response);
|
||||
});
|
||||
|
||||
it('should process file search results and create attachments', async () => {
|
||||
getCustomConfig.mockResolvedValue({
|
||||
endpoints: { agents: { maxCitations: 10 } },
|
||||
fileStrategy: 's3',
|
||||
});
|
||||
|
||||
Files.find.mockResolvedValue([
|
||||
{
|
||||
file_id: 'file123',
|
||||
source: 's3',
|
||||
filename: 'test.pdf',
|
||||
},
|
||||
]);
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [
|
||||
{
|
||||
type: 'tool_call',
|
||||
tool_call: {
|
||||
name: 'file_search',
|
||||
output: `File: test.pdf
|
||||
File_ID: file123
|
||||
Relevance: 0.8
|
||||
Page: 1
|
||||
Storage_Type: s3
|
||||
S3_Bucket: test-bucket
|
||||
S3_Key: uploads/user123/file123__test.pdf
|
||||
Content: Test content`,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
|
||||
expect(result.attachments).toBeDefined();
|
||||
expect(result.attachments).toHaveLength(1);
|
||||
expect(result.attachments[0].type).toBe('file_search');
|
||||
expect(result.attachments[0].file_search.sources).toBeDefined();
|
||||
expect(result.attachments[0].file_search.sources).toHaveLength(1);
|
||||
|
||||
const source = result.attachments[0].file_search.sources[0];
|
||||
expect(source.fileId).toBe('file123');
|
||||
expect(source.fileName).toBe('test.pdf');
|
||||
expect(source.metadata.storageType).toBe('s3');
|
||||
});
|
||||
|
||||
it('should use configured fileStrategy when file metadata is missing', async () => {
|
||||
getCustomConfig.mockResolvedValue({
|
||||
endpoints: { agents: { maxCitations: 10 } },
|
||||
fileStrategy: 's3',
|
||||
});
|
||||
|
||||
Files.find.mockResolvedValue([
|
||||
{
|
||||
file_id: 'file123',
|
||||
// source is undefined, should fallback to fileStrategy
|
||||
},
|
||||
]);
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [
|
||||
{
|
||||
type: 'tool_call',
|
||||
tool_call: {
|
||||
name: 'file_search',
|
||||
output: `File: test.pdf
|
||||
File_ID: file123
|
||||
Relevance: 0.8
|
||||
Content: Test content`,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
|
||||
const source = result.attachments[0].file_search.sources[0];
|
||||
expect(source.metadata.storageType).toBe('s3'); // Should use fileStrategy
|
||||
});
|
||||
|
||||
it('should handle file diversity and allow multiple pages per file', async () => {
|
||||
getCustomConfig.mockResolvedValue({
|
||||
endpoints: { agents: { maxCitations: 5, maxCitationsPerFile: 3 } },
|
||||
fileStrategy: 's3',
|
||||
});
|
||||
|
||||
Files.find.mockResolvedValue([
|
||||
{ file_id: 'file1', source: 'local', filename: 'test1.pdf' },
|
||||
{ file_id: 'file2', source: 'local', filename: 'test2.pdf' },
|
||||
]);
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [
|
||||
{
|
||||
type: 'tool_call',
|
||||
tool_call: {
|
||||
name: 'file_search',
|
||||
output: `File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.9
|
||||
Page: 1
|
||||
Content: High relevance content
|
||||
|
||||
---
|
||||
|
||||
File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.7
|
||||
Page: 2
|
||||
Content: Lower relevance content
|
||||
|
||||
---
|
||||
|
||||
File: test2.pdf
|
||||
File_ID: file2
|
||||
Relevance: 0.8
|
||||
Page: 1
|
||||
Content: Different file content`,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
|
||||
const sources = result.attachments[0].file_search.sources;
|
||||
expect(sources.length).toBeGreaterThanOrEqual(2); // Can include multiple pages per file now
|
||||
|
||||
// Should have both files represented
|
||||
const fileIds = sources.map((s) => s.fileId);
|
||||
expect(fileIds).toContain('file1');
|
||||
expect(fileIds).toContain('file2');
|
||||
|
||||
// Should include multiple pages from file1 due to high relevance
|
||||
const file1Sources = sources.filter((s) => s.fileId === 'file1');
|
||||
expect(file1Sources.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
|
||||
it('should respect maxCitationsPerFile configuration', async () => {
|
||||
getCustomConfig.mockResolvedValue({
|
||||
endpoints: { agents: { maxCitations: 10, maxCitationsPerFile: 2 } },
|
||||
fileStrategy: 'local',
|
||||
});
|
||||
|
||||
Files.find.mockResolvedValue([{ file_id: 'file1', source: 'local', filename: 'test1.pdf' }]);
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [
|
||||
{
|
||||
type: 'tool_call',
|
||||
tool_call: {
|
||||
name: 'file_search',
|
||||
output: `File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.9
|
||||
Page: 1
|
||||
Content: Page 1 content
|
||||
|
||||
---
|
||||
|
||||
File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.8
|
||||
Page: 2
|
||||
Content: Page 2 content
|
||||
|
||||
---
|
||||
|
||||
File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.7
|
||||
Page: 3
|
||||
Content: Page 3 content
|
||||
|
||||
---
|
||||
|
||||
File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.6
|
||||
Page: 4
|
||||
Content: Page 4 content`,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
|
||||
const sources = result.attachments[0].file_search.sources;
|
||||
expect(sources).toHaveLength(2); // Should be limited to maxCitationsPerFile (2)
|
||||
|
||||
// Should include the 2 highest relevance pages (0.9 and 0.8)
|
||||
expect(sources[0].relevance).toBe(0.9);
|
||||
expect(sources[1].relevance).toBe(0.8);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue