mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-21 21:50:49 +02:00
🔗 fix: File Citation Processing to Use Tool Artifacts
This commit is contained in:
parent
81b32e400a
commit
fc8fd489d6
8 changed files with 524 additions and 538 deletions
|
@ -1,237 +0,0 @@
|
|||
const { processAgentResponse } = require('../../../app/clients/agents/processAgentResponse');
|
||||
const { Files } = require('../../../models');
|
||||
const { getCustomConfig } = require('../../../server/services/Config/getCustomConfig');
|
||||
|
||||
// Mock dependencies
|
||||
jest.mock('../../../models', () => ({
|
||||
Files: {
|
||||
find: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
jest.mock('../../../server/services/Config/getCustomConfig', () => ({
|
||||
getCustomConfig: jest.fn(),
|
||||
}));
|
||||
|
||||
jest.mock('../../../config', () => ({
|
||||
logger: {
|
||||
warn: jest.fn(),
|
||||
error: jest.fn(),
|
||||
debug: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
describe('processAgentResponse', () => {
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
it('should return response unchanged when no messageId', async () => {
|
||||
const response = { messageId: null };
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123');
|
||||
expect(result).toBe(response);
|
||||
});
|
||||
|
||||
it('should return response unchanged when no file search results', async () => {
|
||||
getCustomConfig.mockResolvedValue({ endpoints: { agents: { maxCitations: 10 } } });
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [{ type: 'text', content: 'some text' }];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
expect(result).toBe(response);
|
||||
});
|
||||
|
||||
it('should process file search results and create attachments', async () => {
|
||||
getCustomConfig.mockResolvedValue({
|
||||
endpoints: { agents: { maxCitations: 10 } },
|
||||
fileStrategy: 's3',
|
||||
});
|
||||
|
||||
Files.find.mockResolvedValue([
|
||||
{
|
||||
file_id: 'file123',
|
||||
source: 's3',
|
||||
filename: 'test.pdf',
|
||||
},
|
||||
]);
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [
|
||||
{
|
||||
type: 'tool_call',
|
||||
tool_call: {
|
||||
name: 'file_search',
|
||||
output: `File: test.pdf
|
||||
File_ID: file123
|
||||
Relevance: 0.8
|
||||
Page: 1
|
||||
Storage_Type: s3
|
||||
S3_Bucket: test-bucket
|
||||
S3_Key: uploads/user123/file123__test.pdf
|
||||
Content: Test content`,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
|
||||
expect(result.attachments).toBeDefined();
|
||||
expect(result.attachments).toHaveLength(1);
|
||||
expect(result.attachments[0].type).toBe('file_search');
|
||||
expect(result.attachments[0].file_search.sources).toBeDefined();
|
||||
expect(result.attachments[0].file_search.sources).toHaveLength(1);
|
||||
|
||||
const source = result.attachments[0].file_search.sources[0];
|
||||
expect(source.fileId).toBe('file123');
|
||||
expect(source.fileName).toBe('test.pdf');
|
||||
expect(source.metadata.storageType).toBe('s3');
|
||||
});
|
||||
|
||||
it('should use configured fileStrategy when file metadata is missing', async () => {
|
||||
getCustomConfig.mockResolvedValue({
|
||||
endpoints: { agents: { maxCitations: 10 } },
|
||||
fileStrategy: 's3',
|
||||
});
|
||||
|
||||
Files.find.mockResolvedValue([
|
||||
{
|
||||
file_id: 'file123',
|
||||
// source is undefined, should fallback to fileStrategy
|
||||
},
|
||||
]);
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [
|
||||
{
|
||||
type: 'tool_call',
|
||||
tool_call: {
|
||||
name: 'file_search',
|
||||
output: `File: test.pdf
|
||||
File_ID: file123
|
||||
Relevance: 0.8
|
||||
Content: Test content`,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
|
||||
const source = result.attachments[0].file_search.sources[0];
|
||||
expect(source.metadata.storageType).toBe('s3'); // Should use fileStrategy
|
||||
});
|
||||
|
||||
it('should handle file diversity and allow multiple pages per file', async () => {
|
||||
getCustomConfig.mockResolvedValue({
|
||||
endpoints: { agents: { maxCitations: 5, maxCitationsPerFile: 3 } },
|
||||
fileStrategy: 's3',
|
||||
});
|
||||
|
||||
Files.find.mockResolvedValue([
|
||||
{ file_id: 'file1', source: 'local', filename: 'test1.pdf' },
|
||||
{ file_id: 'file2', source: 'local', filename: 'test2.pdf' },
|
||||
]);
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [
|
||||
{
|
||||
type: 'tool_call',
|
||||
tool_call: {
|
||||
name: 'file_search',
|
||||
output: `File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.9
|
||||
Page: 1
|
||||
Content: High relevance content
|
||||
|
||||
---
|
||||
|
||||
File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.7
|
||||
Page: 2
|
||||
Content: Lower relevance content
|
||||
|
||||
---
|
||||
|
||||
File: test2.pdf
|
||||
File_ID: file2
|
||||
Relevance: 0.8
|
||||
Page: 1
|
||||
Content: Different file content`,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
|
||||
const sources = result.attachments[0].file_search.sources;
|
||||
expect(sources.length).toBeGreaterThanOrEqual(2); // Can include multiple pages per file now
|
||||
|
||||
// Should have both files represented
|
||||
const fileIds = sources.map((s) => s.fileId);
|
||||
expect(fileIds).toContain('file1');
|
||||
expect(fileIds).toContain('file2');
|
||||
|
||||
// Should include multiple pages from file1 due to high relevance
|
||||
const file1Sources = sources.filter((s) => s.fileId === 'file1');
|
||||
expect(file1Sources.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
|
||||
it('should respect maxCitationsPerFile configuration', async () => {
|
||||
getCustomConfig.mockResolvedValue({
|
||||
endpoints: { agents: { maxCitations: 10, maxCitationsPerFile: 2 } },
|
||||
fileStrategy: 'local',
|
||||
});
|
||||
|
||||
Files.find.mockResolvedValue([{ file_id: 'file1', source: 'local', filename: 'test1.pdf' }]);
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [
|
||||
{
|
||||
type: 'tool_call',
|
||||
tool_call: {
|
||||
name: 'file_search',
|
||||
output: `File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.9
|
||||
Page: 1
|
||||
Content: Page 1 content
|
||||
|
||||
---
|
||||
|
||||
File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.8
|
||||
Page: 2
|
||||
Content: Page 2 content
|
||||
|
||||
---
|
||||
|
||||
File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.7
|
||||
Page: 3
|
||||
Content: Page 3 content
|
||||
|
||||
---
|
||||
|
||||
File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.6
|
||||
Page: 4
|
||||
Content: Page 4 content`,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
|
||||
const sources = result.attachments[0].file_search.sources;
|
||||
expect(sources).toHaveLength(2); // Should be limited to maxCitationsPerFile (2)
|
||||
|
||||
// Should include the 2 highest relevance pages (0.9 and 0.8)
|
||||
expect(sources[0].relevance).toBe(0.9);
|
||||
expect(sources[1].relevance).toBe(0.8);
|
||||
});
|
||||
});
|
337
api/test/services/Files/processFileCitations.test.js
Normal file
337
api/test/services/Files/processFileCitations.test.js
Normal file
|
@ -0,0 +1,337 @@
|
|||
const { Tools } = require('librechat-data-provider');
|
||||
const {
|
||||
processFileCitations,
|
||||
applyCitationLimits,
|
||||
enhanceSourcesWithMetadata,
|
||||
} = require('~/server/services/Files/Citations');
|
||||
|
||||
// Mock dependencies
|
||||
jest.mock('~/models', () => ({
|
||||
Files: {
|
||||
find: jest.fn().mockResolvedValue([]),
|
||||
},
|
||||
}));
|
||||
|
||||
jest.mock('~/models/Role', () => ({
|
||||
getRoleByName: jest.fn(),
|
||||
}));
|
||||
|
||||
jest.mock('@librechat/api', () => ({
|
||||
checkAccess: jest.fn().mockResolvedValue(true),
|
||||
}));
|
||||
|
||||
jest.mock('~/server/services/Config/getCustomConfig', () => ({
|
||||
getCustomConfig: jest.fn().mockResolvedValue({
|
||||
endpoints: {
|
||||
agents: {
|
||||
maxCitations: 30,
|
||||
maxCitationsPerFile: 5,
|
||||
minRelevanceScore: 0.45,
|
||||
},
|
||||
},
|
||||
fileStrategy: 'local',
|
||||
}),
|
||||
}));
|
||||
|
||||
jest.mock('~/config', () => ({
|
||||
logger: {
|
||||
debug: jest.fn(),
|
||||
error: jest.fn(),
|
||||
warn: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
describe('processFileCitations', () => {
|
||||
const mockReq = {
|
||||
user: {
|
||||
id: 'user123',
|
||||
},
|
||||
};
|
||||
|
||||
const mockMetadata = {
|
||||
run_id: 'run123',
|
||||
thread_id: 'conv123',
|
||||
};
|
||||
|
||||
describe('file search artifact processing', () => {
|
||||
it('should process file search artifacts correctly', async () => {
|
||||
const toolArtifact = {
|
||||
[Tools.file_search]: {
|
||||
sources: [
|
||||
{
|
||||
fileId: 'file_123',
|
||||
fileName: 'example.pdf',
|
||||
pages: [5],
|
||||
relevance: 0.85,
|
||||
type: 'file',
|
||||
pageRelevance: { 5: 0.85 },
|
||||
content: 'This is the content',
|
||||
},
|
||||
{
|
||||
fileId: 'file_456',
|
||||
fileName: 'document.txt',
|
||||
pages: [],
|
||||
relevance: 0.72,
|
||||
type: 'file',
|
||||
pageRelevance: {},
|
||||
content: 'Another document',
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
const result = await processFileCitations({
|
||||
toolArtifact,
|
||||
toolCallId: 'call_123',
|
||||
metadata: mockMetadata,
|
||||
user: mockReq.user,
|
||||
});
|
||||
|
||||
expect(result).toBeTruthy();
|
||||
expect(result.type).toBe('file_search');
|
||||
expect(result.file_search.sources).toHaveLength(2);
|
||||
expect(result.file_search.sources[0].fileId).toBe('file_123');
|
||||
expect(result.file_search.sources[0].relevance).toBe(0.85);
|
||||
});
|
||||
|
||||
it('should return null for non-file_search tools', async () => {
|
||||
const result = await processFileCitations({
|
||||
toolArtifact: { other_tool: {} },
|
||||
toolCallId: 'call_123',
|
||||
metadata: mockMetadata,
|
||||
user: mockReq.user,
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it('should filter results below relevance threshold', async () => {
|
||||
const toolArtifact = {
|
||||
[Tools.file_search]: {
|
||||
sources: [
|
||||
{
|
||||
fileId: 'file_789',
|
||||
fileName: 'low_relevance.pdf',
|
||||
pages: [],
|
||||
relevance: 0.2,
|
||||
type: 'file',
|
||||
pageRelevance: {},
|
||||
content: 'Low relevance content',
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
const result = await processFileCitations({
|
||||
toolArtifact,
|
||||
toolCallId: 'call_123',
|
||||
metadata: mockMetadata,
|
||||
user: mockReq.user,
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it('should return null when artifact is missing file_search data', async () => {
|
||||
const result = await processFileCitations({
|
||||
toolArtifact: {},
|
||||
toolCallId: 'call_123',
|
||||
metadata: mockMetadata,
|
||||
user: mockReq.user,
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('applyCitationLimits', () => {
|
||||
it('should limit citations per file and total', () => {
|
||||
const sources = [
|
||||
{ fileId: 'file1', relevance: 0.9 },
|
||||
{ fileId: 'file1', relevance: 0.8 },
|
||||
{ fileId: 'file1', relevance: 0.7 },
|
||||
{ fileId: 'file2', relevance: 0.85 },
|
||||
{ fileId: 'file2', relevance: 0.75 },
|
||||
];
|
||||
|
||||
const result = applyCitationLimits(sources, 3, 2);
|
||||
|
||||
expect(result).toHaveLength(3);
|
||||
expect(result[0].relevance).toBe(0.9);
|
||||
expect(result[1].relevance).toBe(0.85);
|
||||
expect(result[2].relevance).toBe(0.8);
|
||||
});
|
||||
});
|
||||
|
||||
describe('enhanceSourcesWithMetadata', () => {
|
||||
const { Files } = require('~/models');
|
||||
const mockCustomConfig = {
|
||||
fileStrategy: 'local',
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
it('should enhance sources with file metadata from database', async () => {
|
||||
const sources = [
|
||||
{
|
||||
fileId: 'file_123',
|
||||
fileName: 'example.pdf',
|
||||
relevance: 0.85,
|
||||
type: 'file',
|
||||
},
|
||||
{
|
||||
fileId: 'file_456',
|
||||
fileName: 'document.txt',
|
||||
relevance: 0.72,
|
||||
type: 'file',
|
||||
},
|
||||
];
|
||||
|
||||
Files.find.mockResolvedValue([
|
||||
{
|
||||
file_id: 'file_123',
|
||||
filename: 'example_from_db.pdf',
|
||||
source: 's3',
|
||||
},
|
||||
{
|
||||
file_id: 'file_456',
|
||||
filename: 'document_from_db.txt',
|
||||
source: 'local',
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await enhanceSourcesWithMetadata(sources, mockCustomConfig);
|
||||
|
||||
expect(Files.find).toHaveBeenCalledWith({ file_id: { $in: ['file_123', 'file_456'] } });
|
||||
expect(result).toHaveLength(2);
|
||||
|
||||
expect(result[0]).toEqual({
|
||||
fileId: 'file_123',
|
||||
fileName: 'example_from_db.pdf',
|
||||
relevance: 0.85,
|
||||
type: 'file',
|
||||
metadata: {
|
||||
storageType: 's3',
|
||||
},
|
||||
});
|
||||
|
||||
expect(result[1]).toEqual({
|
||||
fileId: 'file_456',
|
||||
fileName: 'document_from_db.txt',
|
||||
relevance: 0.72,
|
||||
type: 'file',
|
||||
metadata: {
|
||||
storageType: 'local',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should preserve existing metadata and source data', async () => {
|
||||
const sources = [
|
||||
{
|
||||
fileId: 'file_123',
|
||||
fileName: 'example.pdf',
|
||||
relevance: 0.85,
|
||||
type: 'file',
|
||||
pages: [1, 2, 3],
|
||||
content: 'Some content',
|
||||
metadata: {
|
||||
existingField: 'value',
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
Files.find.mockResolvedValue([
|
||||
{
|
||||
file_id: 'file_123',
|
||||
filename: 'example_from_db.pdf',
|
||||
source: 'gcs',
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await enhanceSourcesWithMetadata(sources, mockCustomConfig);
|
||||
|
||||
expect(result[0]).toEqual({
|
||||
fileId: 'file_123',
|
||||
fileName: 'example_from_db.pdf',
|
||||
relevance: 0.85,
|
||||
type: 'file',
|
||||
pages: [1, 2, 3],
|
||||
content: 'Some content',
|
||||
metadata: {
|
||||
existingField: 'value',
|
||||
storageType: 'gcs',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle missing file metadata gracefully', async () => {
|
||||
const sources = [
|
||||
{
|
||||
fileId: 'file_789',
|
||||
fileName: 'missing.pdf',
|
||||
relevance: 0.9,
|
||||
type: 'file',
|
||||
},
|
||||
];
|
||||
|
||||
Files.find.mockResolvedValue([]);
|
||||
|
||||
const result = await enhanceSourcesWithMetadata(sources, mockCustomConfig);
|
||||
|
||||
expect(result[0]).toEqual({
|
||||
fileId: 'file_789',
|
||||
fileName: 'missing.pdf',
|
||||
relevance: 0.9,
|
||||
type: 'file',
|
||||
metadata: {
|
||||
storageType: 'local', // Falls back to customConfig.fileStrategy
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle database errors gracefully', async () => {
|
||||
const sources = [
|
||||
{
|
||||
fileId: 'file_123',
|
||||
fileName: 'example.pdf',
|
||||
relevance: 0.85,
|
||||
type: 'file',
|
||||
},
|
||||
];
|
||||
|
||||
Files.find.mockRejectedValue(new Error('Database error'));
|
||||
|
||||
const result = await enhanceSourcesWithMetadata(sources, mockCustomConfig);
|
||||
|
||||
expect(result[0]).toEqual({
|
||||
fileId: 'file_123',
|
||||
fileName: 'example.pdf',
|
||||
relevance: 0.85,
|
||||
type: 'file',
|
||||
metadata: {
|
||||
storageType: 'local',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should deduplicate file IDs when querying database', async () => {
|
||||
const sources = [
|
||||
{ fileId: 'file_123', fileName: 'doc1.pdf', relevance: 0.9, type: 'file' },
|
||||
{ fileId: 'file_123', fileName: 'doc1.pdf', relevance: 0.8, type: 'file' },
|
||||
{ fileId: 'file_456', fileName: 'doc2.pdf', relevance: 0.7, type: 'file' },
|
||||
];
|
||||
|
||||
Files.find.mockResolvedValue([
|
||||
{ file_id: 'file_123', filename: 'document1.pdf', source: 's3' },
|
||||
{ file_id: 'file_456', filename: 'document2.pdf', source: 'local' },
|
||||
]);
|
||||
|
||||
await enhanceSourcesWithMetadata(sources, mockCustomConfig);
|
||||
|
||||
expect(Files.find).toHaveBeenCalledWith({ file_id: { $in: ['file_123', 'file_456'] } });
|
||||
});
|
||||
});
|
||||
});
|
Loading…
Add table
Add a link
Reference in a new issue