diff --git a/api/app/clients/agents/processAgentResponse.js b/api/app/clients/agents/processAgentResponse.js deleted file mode 100644 index 3d2d8610c..000000000 --- a/api/app/clients/agents/processAgentResponse.js +++ /dev/null @@ -1,266 +0,0 @@ -const { Files } = require('~/models'); -const { getCustomConfig } = require('~/server/services/Config/getCustomConfig'); -const { nanoid } = require('nanoid'); -const { Tools, PermissionTypes, Permissions } = require('librechat-data-provider'); -const { logger } = require('~/config'); -const { checkAccess } = require('@librechat/api'); -const { getRoleByName } = require('~/models/Role'); - -/** - * Processes agent response to extract and capture file references from tool calls - */ -const processAgentResponse = async ( - response, - userId, - conversationId, - contentParts = [], - user = null, -) => { - try { - if (!response.messageId) { - logger.warn('[processAgentResponse] No messageId in response'); - return response; - } - - // Check file citations permission following PROMPTS pattern - if (user) { - try { - // Clear role cache to ensure fresh data (following PROMPTS pattern) - const hasFileCitationsAccess = await checkAccess({ - user, - permissionType: PermissionTypes.FILE_CITATIONS, - permissions: [Permissions.USE], - getRoleByName, - }); - - if (!hasFileCitationsAccess) { - logger.debug( - `[processAgentResponse] User ${userId} does not have FILE_CITATIONS permission`, - ); - return response; // Return response without file citations - } - - logger.debug( - `[processAgentResponse] FILE_CITATIONS permission verified for user ${userId}`, - ); - } catch (error) { - logger.error( - `[processAgentResponse] Permission check failed for FILE_CITATIONS: ${error.message}`, - ); - // Fail open for permission errors to avoid breaking existing functionality - logger.debug(`[processAgentResponse] Proceeding with citations due to permission error`); - } - } - - logger.debug(`[processAgentResponse] Processing citations for user ${userId}`); - - const customConfig = await getCustomConfig(); - const maxCitations = customConfig?.endpoints?.agents?.maxCitations ?? 30; - const maxCitationsPerFile = customConfig?.endpoints?.agents?.maxCitationsPerFile ?? 5; - const minRelevanceScore = customConfig?.endpoints?.agents?.minRelevanceScore ?? 0.45; - - const fileSearchResults = extractFileResults(contentParts); - if (!fileSearchResults.length) { - logger.warn('[processAgentResponse] No file search results found'); - return response; - } - - // Filter results based on relevance score cutoff - const filteredResults = fileSearchResults.filter( - (result) => result.relevance >= minRelevanceScore, - ); - - const filteredCount = fileSearchResults.length - filteredResults.length; - if (filteredCount > 0) { - logger.debug( - `[processAgentResponse] Filtered out ${filteredCount} sources below relevance threshold of ${minRelevanceScore}`, - ); - } - - if (filteredResults.length === 0) { - logger.debug( - `[processAgentResponse] No results above relevance threshold of ${minRelevanceScore} (filtered ${fileSearchResults.length} total results)`, - ); - return response; - } - - const selectedResults = selectBestResults(filteredResults, maxCitations, maxCitationsPerFile); - const sources = await createSourcesWithMetadata(selectedResults, customConfig); - - if (sources.length > 0) { - logger.debug( - '[processAgentResponse] Creating file search attachment with sources:', - sources.length, - ); - - const fileSearchAttachment = { - messageId: response.messageId, - toolCallId: 'file_search_results', - conversationId, - name: `${Tools.file_search}_file_search_results_${nanoid()}`, - type: Tools.file_search, - [Tools.file_search]: { sources }, - }; - - response.attachments = response.attachments || []; - response.attachments.push(fileSearchAttachment); - } - - return response; - } catch (error) { - logger.error('[processAgentResponse] Error processing agent response:', error); - return response; - } -}; - -/** - * Extract file results from content parts (simplified) - */ -const extractFileResults = (contentParts) => { - const results = []; - - for (const part of contentParts) { - let toolResult = null; - - if (part.type === 'tool_call' && part.tool_call?.name === 'file_search') { - toolResult = part.tool_result || part.tool_call?.output; - } else if ( - (part.type === 'tool_result' || part.type === 'tool_call') && - part.tool_result && - typeof part.tool_result === 'string' && - part.tool_result.includes('File:') - ) { - toolResult = part.tool_result; - } else if (part.content && typeof part.content === 'string' && part.content.includes('File:')) { - toolResult = part.content; - } - - if (toolResult) { - results.push(...parseFileSearchResults(toolResult)); - } - } - - return results; -}; - -/** - * Select best results with file diversity, allowing multiple pages per file - */ -const selectBestResults = (results, maxCitations, maxCitationsPerFile = 5) => { - const byFile = {}; - results.forEach((result) => { - if (!byFile[result.file_id]) { - byFile[result.file_id] = []; - } - byFile[result.file_id].push(result); - }); - - const representatives = []; - for (const fileId in byFile) { - const fileResults = byFile[fileId].sort((a, b) => b.relevance - a.relevance); - // Take up to maxCitationsPerFile results per file instead of just one - const selectedFromFile = fileResults.slice(0, maxCitationsPerFile); - representatives.push(...selectedFromFile); - } - - return representatives.sort((a, b) => b.relevance - a.relevance).slice(0, maxCitations); -}; - -/** - * Create sources with metadata - */ -const createSourcesWithMetadata = async (results, customConfig) => { - const fileIds = [...new Set(results.map((result) => result.file_id))]; - - let fileMetadataMap = {}; - try { - const files = await Files.find({ file_id: { $in: fileIds } }); - fileMetadataMap = files.reduce((map, file) => { - map[file.file_id] = file; - return map; - }, {}); - } catch (error) { - logger.error('[processAgentResponse] Error looking up file metadata:', error); - } - - return results.map((result) => { - const fileRecord = fileMetadataMap[result.file_id] || {}; - const configuredStorageType = fileRecord.source || customConfig?.fileStrategy || 'local'; - - return { - fileId: result.file_id, - fileName: fileRecord.filename || 'Unknown File', - pages: result.page ? [result.page] : [], - relevance: result.relevance, - type: 'file', - pageRelevance: result.pageRelevance || {}, - metadata: { storageType: configuredStorageType }, - }; - }); -}; - -/** - * Parse file search results (simplified) - */ -const parseFileSearchResults = (formattedResults) => { - const results = []; - - try { - let dataToProcess = formattedResults; - const internalDataMatch = formattedResults.match( - /\n(.*?)\n/s, - ); - if (internalDataMatch) { - dataToProcess = internalDataMatch[1]; - } - - const sections = dataToProcess.split(/\n\s*\n|\n---\n/); - - for (const section of sections) { - if (!section.trim()) continue; - - const lines = section.trim().split('\n'); - let filename = ''; - let file_id = ''; - let relevance = 0; - let content = ''; - let page = null; - - for (const line of lines) { - const trimmedLine = line.trim(); - if (trimmedLine.startsWith('File: ')) { - filename = trimmedLine.replace('File: ', '').trim(); - } else if (trimmedLine.startsWith('File_ID: ')) { - file_id = trimmedLine.replace('File_ID: ', '').trim(); - } else if (trimmedLine.startsWith('Relevance: ')) { - relevance = parseFloat(trimmedLine.replace('Relevance: ', '').trim()) || 0; - } else if (trimmedLine.startsWith('Page: ')) { - const pageStr = trimmedLine.replace('Page: ', '').trim(); - page = pageStr !== 'N/A' && pageStr !== '' ? parseInt(pageStr) : null; - } else if (trimmedLine.startsWith('Content: ')) { - content = trimmedLine.replace('Content: ', '').trim(); - } - } - - if (filename && (relevance > 0 || file_id)) { - const finalFileId = file_id || filename.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase(); - results.push({ - file_id: finalFileId, - filename, - relevance: relevance || 0.5, - content, - page, - pageRelevance: page ? { [page]: relevance || 0.5 } : {}, - }); - } - } - } catch (error) { - logger.error('[parseFileSearchResults] Error parsing results:', error); - } - - return results; -}; - -module.exports = { - processAgentResponse, -}; diff --git a/api/app/clients/tools/util/fileSearch.js b/api/app/clients/tools/util/fileSearch.js index c97b2e0b6..7221e9e3f 100644 --- a/api/app/clients/tools/util/fileSearch.js +++ b/api/app/clients/tools/util/fileSearch.js @@ -143,18 +143,21 @@ const createFileSearchTool = async ({ req, files, entity_id }) => { ) .join('\n---\n'); - // Add hidden file_id data for processAgentResponse parsing - const internalData = formattedResults - .map( - (result) => - `File: ${result.filename}\nFile_ID: ${result.file_id}\nRelevance: ${(1.0 - result.distance).toFixed(4)}\nPage: ${result.page || 'N/A'}\nContent: ${result.content}\n`, - ) - .join('\n---\n'); + const sources = formattedResults.map((result) => ({ + type: 'file', + fileId: result.file_id, + content: result.content, + fileName: result.filename, + relevance: 1.0 - result.distance, + pages: result.page ? [result.page] : [], + pageRelevance: result.page ? { [result.page]: 1.0 - result.distance } : {}, + })); - return `${formattedString}\n\n\n${internalData}\n`; + return [formattedString, { [Tools.file_search]: { sources } }]; }, { name: Tools.file_search, + responseFormat: 'content_and_artifact', description: `Performs semantic search across attached "${Tools.file_search}" documents using natural language queries. This tool analyzes the content of uploaded files to find relevant information, quotes, and passages that best match your query. Use this to extract specific information or find relevant sections within the available documents. **CITE FILE SEARCH RESULTS:** diff --git a/api/server/controllers/agents/callbacks.js b/api/server/controllers/agents/callbacks.js index 60e68b5f2..a9291aa7f 100644 --- a/api/server/controllers/agents/callbacks.js +++ b/api/server/controllers/agents/callbacks.js @@ -11,6 +11,7 @@ const { handleToolCalls, ChatModelStreamHandler, } = require('@librechat/agents'); +const { processFileCitations } = require('~/server/services/Files/Citations'); const { processCodeOutput } = require('~/server/services/Files/Code/process'); const { loadAuthValues } = require('~/server/services/Tools/credentials'); const { saveBase64Image } = require('~/server/services/Files/process'); @@ -238,6 +239,31 @@ function createToolEndCallback({ req, res, artifactPromises }) { return; } + if (output.artifact[Tools.file_search]) { + artifactPromises.push( + (async () => { + const user = req.user; + const attachment = await processFileCitations({ + user, + metadata, + toolArtifact: output.artifact, + toolCallId: output.tool_call_id, + }); + if (!attachment) { + return null; + } + if (!res.headersSent) { + return attachment; + } + res.write(`event: attachment\ndata: ${JSON.stringify(attachment)}\n\n`); + return attachment; + })().catch((error) => { + logger.error('Error processing file citations:', error); + return null; + }), + ); + } + if (output.artifact[Tools.web_search]) { artifactPromises.push( (async () => { diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 89fd8a5b1..b578087d3 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -49,7 +49,6 @@ const BaseClient = require('~/app/clients/BaseClient'); const { getRoleByName } = require('~/models/Role'); const { loadAgent } = require('~/models/Agent'); const { getMCPManager } = require('~/config'); -const { processAgentResponse } = require('~/app/clients/agents/processAgentResponse'); const omitTitleOptions = new Set([ 'stream', @@ -1036,27 +1035,6 @@ class AgentClient extends BaseClient { this.artifactPromises.push(...attachments); } - // Process agent response to capture file references and create attachments - - const processedResponse = await processAgentResponse( - { - messageId: this.responseMessageId, - attachments: this.artifactPromises, - }, - this.user ?? this.options.req.user?.id, - this.conversationId, - this.contentParts, - this.options.req.user, - ); - - // Update artifact promises with any new attachments from agent response - if (processedResponse.attachments && processedResponse.attachments.length > 0) { - // Add new attachments to existing artifactPromises - processedResponse.attachments.forEach((attachment) => { - this.artifactPromises.push(Promise.resolve(attachment)); - }); - } - await this.recordCollectedUsage({ context: 'message' }); } catch (err) { logger.error( diff --git a/api/server/services/Files/Citations/index.js b/api/server/services/Files/Citations/index.js new file mode 100644 index 000000000..6ee7258d9 --- /dev/null +++ b/api/server/services/Files/Citations/index.js @@ -0,0 +1,149 @@ +const { nanoid } = require('nanoid'); +const { checkAccess } = require('@librechat/api'); +const { Tools, PermissionTypes, Permissions } = require('librechat-data-provider'); +const { getCustomConfig } = require('~/server/services/Config/getCustomConfig'); +const { getRoleByName } = require('~/models/Role'); +const { logger } = require('~/config'); +const { Files } = require('~/models'); + +/** + * Process file search results from tool calls + * @param {Object} options + * @param {IUser} options.user - The user object + * @param {GraphRunnableConfig['configurable']} options.metadata - The metadata + * @param {any} options.toolArtifact - The tool artifact containing structured data + * @param {string} options.toolCallId - The tool call ID + * @returns {Promise} The file search attachment or null + */ +async function processFileCitations({ user, toolArtifact, toolCallId, metadata }) { + try { + if (!toolArtifact?.[Tools.file_search]?.sources) { + return null; + } + + if (user) { + try { + const hasFileCitationsAccess = await checkAccess({ + user, + permissionType: PermissionTypes.FILE_CITATIONS, + permissions: [Permissions.USE], + getRoleByName, + }); + + if (!hasFileCitationsAccess) { + logger.debug( + `[processFileCitations] User ${user.id} does not have FILE_CITATIONS permission`, + ); + return null; + } + } catch (error) { + logger.error( + `[processFileCitations] Permission check failed for FILE_CITATIONS: ${error.message}`, + ); + logger.debug(`[processFileCitations] Proceeding with citations due to permission error`); + } + } + + const customConfig = await getCustomConfig(); + const maxCitations = customConfig?.endpoints?.agents?.maxCitations ?? 30; + const maxCitationsPerFile = customConfig?.endpoints?.agents?.maxCitationsPerFile ?? 5; + const minRelevanceScore = customConfig?.endpoints?.agents?.minRelevanceScore ?? 0.45; + + const sources = toolArtifact[Tools.file_search].sources || []; + const filteredSources = sources.filter((source) => source.relevance >= minRelevanceScore); + if (filteredSources.length === 0) { + logger.debug( + `[processFileCitations] No sources above relevance threshold of ${minRelevanceScore}`, + ); + return null; + } + + const selectedSources = applyCitationLimits(filteredSources, maxCitations, maxCitationsPerFile); + const enhancedSources = await enhanceSourcesWithMetadata(selectedSources, customConfig); + + if (enhancedSources.length > 0) { + const fileSearchAttachment = { + type: Tools.file_search, + [Tools.file_search]: { sources: enhancedSources }, + toolCallId: toolCallId, + messageId: metadata.run_id, + conversationId: metadata.thread_id, + name: `${Tools.file_search}_file_search_results_${nanoid()}`, + }; + + return fileSearchAttachment; + } + + return null; + } catch (error) { + logger.error('[processFileCitations] Error processing file citations:', error); + return null; + } +} + +/** + * Apply citation limits to sources + * @param {Array} sources - All sources + * @param {number} maxCitations - Maximum total citations + * @param {number} maxCitationsPerFile - Maximum citations per file + * @returns {Array} Selected sources + */ +function applyCitationLimits(sources, maxCitations, maxCitationsPerFile) { + const byFile = {}; + sources.forEach((source) => { + if (!byFile[source.fileId]) { + byFile[source.fileId] = []; + } + byFile[source.fileId].push(source); + }); + + const representatives = []; + for (const fileId in byFile) { + const fileSources = byFile[fileId].sort((a, b) => b.relevance - a.relevance); + const selectedFromFile = fileSources.slice(0, maxCitationsPerFile); + representatives.push(...selectedFromFile); + } + + return representatives.sort((a, b) => b.relevance - a.relevance).slice(0, maxCitations); +} + +/** + * Enhance sources with file metadata from database + * @param {Array} sources - Selected sources + * @param {Object} customConfig - Custom configuration + * @returns {Promise} Enhanced sources + */ +async function enhanceSourcesWithMetadata(sources, customConfig) { + const fileIds = [...new Set(sources.map((source) => source.fileId))]; + + let fileMetadataMap = {}; + try { + const files = await Files.find({ file_id: { $in: fileIds } }); + fileMetadataMap = files.reduce((map, file) => { + map[file.file_id] = file; + return map; + }, {}); + } catch (error) { + logger.error('[enhanceSourcesWithMetadata] Error looking up file metadata:', error); + } + + return sources.map((source) => { + const fileRecord = fileMetadataMap[source.fileId] || {}; + const configuredStorageType = fileRecord.source || customConfig?.fileStrategy || 'local'; + + return { + ...source, + fileName: fileRecord.filename || source.fileName || 'Unknown File', + metadata: { + ...source.metadata, + storageType: configuredStorageType, + }, + }; + }); +} + +module.exports = { + applyCitationLimits, + processFileCitations, + enhanceSourcesWithMetadata, +}; diff --git a/api/test/services/Files/processAgentResponse.test.js b/api/test/services/Files/processAgentResponse.test.js deleted file mode 100644 index 8267c0fab..000000000 --- a/api/test/services/Files/processAgentResponse.test.js +++ /dev/null @@ -1,237 +0,0 @@ -const { processAgentResponse } = require('../../../app/clients/agents/processAgentResponse'); -const { Files } = require('../../../models'); -const { getCustomConfig } = require('../../../server/services/Config/getCustomConfig'); - -// Mock dependencies -jest.mock('../../../models', () => ({ - Files: { - find: jest.fn(), - }, -})); - -jest.mock('../../../server/services/Config/getCustomConfig', () => ({ - getCustomConfig: jest.fn(), -})); - -jest.mock('../../../config', () => ({ - logger: { - warn: jest.fn(), - error: jest.fn(), - debug: jest.fn(), - }, -})); - -describe('processAgentResponse', () => { - beforeEach(() => { - jest.clearAllMocks(); - }); - - it('should return response unchanged when no messageId', async () => { - const response = { messageId: null }; - const result = await processAgentResponse(response, 'user123', 'conv123'); - expect(result).toBe(response); - }); - - it('should return response unchanged when no file search results', async () => { - getCustomConfig.mockResolvedValue({ endpoints: { agents: { maxCitations: 10 } } }); - - const response = { messageId: 'msg123' }; - const contentParts = [{ type: 'text', content: 'some text' }]; - - const result = await processAgentResponse(response, 'user123', 'conv123', contentParts); - expect(result).toBe(response); - }); - - it('should process file search results and create attachments', async () => { - getCustomConfig.mockResolvedValue({ - endpoints: { agents: { maxCitations: 10 } }, - fileStrategy: 's3', - }); - - Files.find.mockResolvedValue([ - { - file_id: 'file123', - source: 's3', - filename: 'test.pdf', - }, - ]); - - const response = { messageId: 'msg123' }; - const contentParts = [ - { - type: 'tool_call', - tool_call: { - name: 'file_search', - output: `File: test.pdf -File_ID: file123 -Relevance: 0.8 -Page: 1 -Storage_Type: s3 -S3_Bucket: test-bucket -S3_Key: uploads/user123/file123__test.pdf -Content: Test content`, - }, - }, - ]; - - const result = await processAgentResponse(response, 'user123', 'conv123', contentParts); - - expect(result.attachments).toBeDefined(); - expect(result.attachments).toHaveLength(1); - expect(result.attachments[0].type).toBe('file_search'); - expect(result.attachments[0].file_search.sources).toBeDefined(); - expect(result.attachments[0].file_search.sources).toHaveLength(1); - - const source = result.attachments[0].file_search.sources[0]; - expect(source.fileId).toBe('file123'); - expect(source.fileName).toBe('test.pdf'); - expect(source.metadata.storageType).toBe('s3'); - }); - - it('should use configured fileStrategy when file metadata is missing', async () => { - getCustomConfig.mockResolvedValue({ - endpoints: { agents: { maxCitations: 10 } }, - fileStrategy: 's3', - }); - - Files.find.mockResolvedValue([ - { - file_id: 'file123', - // source is undefined, should fallback to fileStrategy - }, - ]); - - const response = { messageId: 'msg123' }; - const contentParts = [ - { - type: 'tool_call', - tool_call: { - name: 'file_search', - output: `File: test.pdf -File_ID: file123 -Relevance: 0.8 -Content: Test content`, - }, - }, - ]; - - const result = await processAgentResponse(response, 'user123', 'conv123', contentParts); - - const source = result.attachments[0].file_search.sources[0]; - expect(source.metadata.storageType).toBe('s3'); // Should use fileStrategy - }); - - it('should handle file diversity and allow multiple pages per file', async () => { - getCustomConfig.mockResolvedValue({ - endpoints: { agents: { maxCitations: 5, maxCitationsPerFile: 3 } }, - fileStrategy: 's3', - }); - - Files.find.mockResolvedValue([ - { file_id: 'file1', source: 'local', filename: 'test1.pdf' }, - { file_id: 'file2', source: 'local', filename: 'test2.pdf' }, - ]); - - const response = { messageId: 'msg123' }; - const contentParts = [ - { - type: 'tool_call', - tool_call: { - name: 'file_search', - output: `File: test1.pdf -File_ID: file1 -Relevance: 0.9 -Page: 1 -Content: High relevance content - ---- - -File: test1.pdf -File_ID: file1 -Relevance: 0.7 -Page: 2 -Content: Lower relevance content - ---- - -File: test2.pdf -File_ID: file2 -Relevance: 0.8 -Page: 1 -Content: Different file content`, - }, - }, - ]; - - const result = await processAgentResponse(response, 'user123', 'conv123', contentParts); - - const sources = result.attachments[0].file_search.sources; - expect(sources.length).toBeGreaterThanOrEqual(2); // Can include multiple pages per file now - - // Should have both files represented - const fileIds = sources.map((s) => s.fileId); - expect(fileIds).toContain('file1'); - expect(fileIds).toContain('file2'); - - // Should include multiple pages from file1 due to high relevance - const file1Sources = sources.filter((s) => s.fileId === 'file1'); - expect(file1Sources.length).toBeGreaterThanOrEqual(1); - }); - - it('should respect maxCitationsPerFile configuration', async () => { - getCustomConfig.mockResolvedValue({ - endpoints: { agents: { maxCitations: 10, maxCitationsPerFile: 2 } }, - fileStrategy: 'local', - }); - - Files.find.mockResolvedValue([{ file_id: 'file1', source: 'local', filename: 'test1.pdf' }]); - - const response = { messageId: 'msg123' }; - const contentParts = [ - { - type: 'tool_call', - tool_call: { - name: 'file_search', - output: `File: test1.pdf -File_ID: file1 -Relevance: 0.9 -Page: 1 -Content: Page 1 content - ---- - -File: test1.pdf -File_ID: file1 -Relevance: 0.8 -Page: 2 -Content: Page 2 content - ---- - -File: test1.pdf -File_ID: file1 -Relevance: 0.7 -Page: 3 -Content: Page 3 content - ---- - -File: test1.pdf -File_ID: file1 -Relevance: 0.6 -Page: 4 -Content: Page 4 content`, - }, - }, - ]; - - const result = await processAgentResponse(response, 'user123', 'conv123', contentParts); - - const sources = result.attachments[0].file_search.sources; - expect(sources).toHaveLength(2); // Should be limited to maxCitationsPerFile (2) - - // Should include the 2 highest relevance pages (0.9 and 0.8) - expect(sources[0].relevance).toBe(0.9); - expect(sources[1].relevance).toBe(0.8); - }); -}); diff --git a/api/test/services/Files/processFileCitations.test.js b/api/test/services/Files/processFileCitations.test.js new file mode 100644 index 000000000..1370dc287 --- /dev/null +++ b/api/test/services/Files/processFileCitations.test.js @@ -0,0 +1,337 @@ +const { Tools } = require('librechat-data-provider'); +const { + processFileCitations, + applyCitationLimits, + enhanceSourcesWithMetadata, +} = require('~/server/services/Files/Citations'); + +// Mock dependencies +jest.mock('~/models', () => ({ + Files: { + find: jest.fn().mockResolvedValue([]), + }, +})); + +jest.mock('~/models/Role', () => ({ + getRoleByName: jest.fn(), +})); + +jest.mock('@librechat/api', () => ({ + checkAccess: jest.fn().mockResolvedValue(true), +})); + +jest.mock('~/server/services/Config/getCustomConfig', () => ({ + getCustomConfig: jest.fn().mockResolvedValue({ + endpoints: { + agents: { + maxCitations: 30, + maxCitationsPerFile: 5, + minRelevanceScore: 0.45, + }, + }, + fileStrategy: 'local', + }), +})); + +jest.mock('~/config', () => ({ + logger: { + debug: jest.fn(), + error: jest.fn(), + warn: jest.fn(), + }, +})); + +describe('processFileCitations', () => { + const mockReq = { + user: { + id: 'user123', + }, + }; + + const mockMetadata = { + run_id: 'run123', + thread_id: 'conv123', + }; + + describe('file search artifact processing', () => { + it('should process file search artifacts correctly', async () => { + const toolArtifact = { + [Tools.file_search]: { + sources: [ + { + fileId: 'file_123', + fileName: 'example.pdf', + pages: [5], + relevance: 0.85, + type: 'file', + pageRelevance: { 5: 0.85 }, + content: 'This is the content', + }, + { + fileId: 'file_456', + fileName: 'document.txt', + pages: [], + relevance: 0.72, + type: 'file', + pageRelevance: {}, + content: 'Another document', + }, + ], + }, + }; + + const result = await processFileCitations({ + toolArtifact, + toolCallId: 'call_123', + metadata: mockMetadata, + user: mockReq.user, + }); + + expect(result).toBeTruthy(); + expect(result.type).toBe('file_search'); + expect(result.file_search.sources).toHaveLength(2); + expect(result.file_search.sources[0].fileId).toBe('file_123'); + expect(result.file_search.sources[0].relevance).toBe(0.85); + }); + + it('should return null for non-file_search tools', async () => { + const result = await processFileCitations({ + toolArtifact: { other_tool: {} }, + toolCallId: 'call_123', + metadata: mockMetadata, + user: mockReq.user, + }); + + expect(result).toBeNull(); + }); + + it('should filter results below relevance threshold', async () => { + const toolArtifact = { + [Tools.file_search]: { + sources: [ + { + fileId: 'file_789', + fileName: 'low_relevance.pdf', + pages: [], + relevance: 0.2, + type: 'file', + pageRelevance: {}, + content: 'Low relevance content', + }, + ], + }, + }; + + const result = await processFileCitations({ + toolArtifact, + toolCallId: 'call_123', + metadata: mockMetadata, + user: mockReq.user, + }); + + expect(result).toBeNull(); + }); + + it('should return null when artifact is missing file_search data', async () => { + const result = await processFileCitations({ + toolArtifact: {}, + toolCallId: 'call_123', + metadata: mockMetadata, + user: mockReq.user, + }); + + expect(result).toBeNull(); + }); + }); + + describe('applyCitationLimits', () => { + it('should limit citations per file and total', () => { + const sources = [ + { fileId: 'file1', relevance: 0.9 }, + { fileId: 'file1', relevance: 0.8 }, + { fileId: 'file1', relevance: 0.7 }, + { fileId: 'file2', relevance: 0.85 }, + { fileId: 'file2', relevance: 0.75 }, + ]; + + const result = applyCitationLimits(sources, 3, 2); + + expect(result).toHaveLength(3); + expect(result[0].relevance).toBe(0.9); + expect(result[1].relevance).toBe(0.85); + expect(result[2].relevance).toBe(0.8); + }); + }); + + describe('enhanceSourcesWithMetadata', () => { + const { Files } = require('~/models'); + const mockCustomConfig = { + fileStrategy: 'local', + }; + + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('should enhance sources with file metadata from database', async () => { + const sources = [ + { + fileId: 'file_123', + fileName: 'example.pdf', + relevance: 0.85, + type: 'file', + }, + { + fileId: 'file_456', + fileName: 'document.txt', + relevance: 0.72, + type: 'file', + }, + ]; + + Files.find.mockResolvedValue([ + { + file_id: 'file_123', + filename: 'example_from_db.pdf', + source: 's3', + }, + { + file_id: 'file_456', + filename: 'document_from_db.txt', + source: 'local', + }, + ]); + + const result = await enhanceSourcesWithMetadata(sources, mockCustomConfig); + + expect(Files.find).toHaveBeenCalledWith({ file_id: { $in: ['file_123', 'file_456'] } }); + expect(result).toHaveLength(2); + + expect(result[0]).toEqual({ + fileId: 'file_123', + fileName: 'example_from_db.pdf', + relevance: 0.85, + type: 'file', + metadata: { + storageType: 's3', + }, + }); + + expect(result[1]).toEqual({ + fileId: 'file_456', + fileName: 'document_from_db.txt', + relevance: 0.72, + type: 'file', + metadata: { + storageType: 'local', + }, + }); + }); + + it('should preserve existing metadata and source data', async () => { + const sources = [ + { + fileId: 'file_123', + fileName: 'example.pdf', + relevance: 0.85, + type: 'file', + pages: [1, 2, 3], + content: 'Some content', + metadata: { + existingField: 'value', + }, + }, + ]; + + Files.find.mockResolvedValue([ + { + file_id: 'file_123', + filename: 'example_from_db.pdf', + source: 'gcs', + }, + ]); + + const result = await enhanceSourcesWithMetadata(sources, mockCustomConfig); + + expect(result[0]).toEqual({ + fileId: 'file_123', + fileName: 'example_from_db.pdf', + relevance: 0.85, + type: 'file', + pages: [1, 2, 3], + content: 'Some content', + metadata: { + existingField: 'value', + storageType: 'gcs', + }, + }); + }); + + it('should handle missing file metadata gracefully', async () => { + const sources = [ + { + fileId: 'file_789', + fileName: 'missing.pdf', + relevance: 0.9, + type: 'file', + }, + ]; + + Files.find.mockResolvedValue([]); + + const result = await enhanceSourcesWithMetadata(sources, mockCustomConfig); + + expect(result[0]).toEqual({ + fileId: 'file_789', + fileName: 'missing.pdf', + relevance: 0.9, + type: 'file', + metadata: { + storageType: 'local', // Falls back to customConfig.fileStrategy + }, + }); + }); + + it('should handle database errors gracefully', async () => { + const sources = [ + { + fileId: 'file_123', + fileName: 'example.pdf', + relevance: 0.85, + type: 'file', + }, + ]; + + Files.find.mockRejectedValue(new Error('Database error')); + + const result = await enhanceSourcesWithMetadata(sources, mockCustomConfig); + + expect(result[0]).toEqual({ + fileId: 'file_123', + fileName: 'example.pdf', + relevance: 0.85, + type: 'file', + metadata: { + storageType: 'local', + }, + }); + }); + + it('should deduplicate file IDs when querying database', async () => { + const sources = [ + { fileId: 'file_123', fileName: 'doc1.pdf', relevance: 0.9, type: 'file' }, + { fileId: 'file_123', fileName: 'doc1.pdf', relevance: 0.8, type: 'file' }, + { fileId: 'file_456', fileName: 'doc2.pdf', relevance: 0.7, type: 'file' }, + ]; + + Files.find.mockResolvedValue([ + { file_id: 'file_123', filename: 'document1.pdf', source: 's3' }, + { file_id: 'file_456', filename: 'document2.pdf', source: 'local' }, + ]); + + await enhanceSourcesWithMetadata(sources, mockCustomConfig); + + expect(Files.find).toHaveBeenCalledWith({ file_id: { $in: ['file_123', 'file_456'] } }); + }); + }); +}); diff --git a/client/src/components/Web/SourceHovercard.tsx b/client/src/components/Web/SourceHovercard.tsx index 550f30854..fee922549 100644 --- a/client/src/components/Web/SourceHovercard.tsx +++ b/client/src/components/Web/SourceHovercard.tsx @@ -66,11 +66,7 @@ export function SourceHovercard({ isFile ? (