diff --git a/api/app/clients/agents/processAgentResponse.js b/api/app/clients/agents/processAgentResponse.js new file mode 100644 index 000000000..3d2d8610c --- /dev/null +++ b/api/app/clients/agents/processAgentResponse.js @@ -0,0 +1,266 @@ +const { Files } = require('~/models'); +const { getCustomConfig } = require('~/server/services/Config/getCustomConfig'); +const { nanoid } = require('nanoid'); +const { Tools, PermissionTypes, Permissions } = require('librechat-data-provider'); +const { logger } = require('~/config'); +const { checkAccess } = require('@librechat/api'); +const { getRoleByName } = require('~/models/Role'); + +/** + * Processes agent response to extract and capture file references from tool calls + */ +const processAgentResponse = async ( + response, + userId, + conversationId, + contentParts = [], + user = null, +) => { + try { + if (!response.messageId) { + logger.warn('[processAgentResponse] No messageId in response'); + return response; + } + + // Check file citations permission following PROMPTS pattern + if (user) { + try { + // Clear role cache to ensure fresh data (following PROMPTS pattern) + const hasFileCitationsAccess = await checkAccess({ + user, + permissionType: PermissionTypes.FILE_CITATIONS, + permissions: [Permissions.USE], + getRoleByName, + }); + + if (!hasFileCitationsAccess) { + logger.debug( + `[processAgentResponse] User ${userId} does not have FILE_CITATIONS permission`, + ); + return response; // Return response without file citations + } + + logger.debug( + `[processAgentResponse] FILE_CITATIONS permission verified for user ${userId}`, + ); + } catch (error) { + logger.error( + `[processAgentResponse] Permission check failed for FILE_CITATIONS: ${error.message}`, + ); + // Fail open for permission errors to avoid breaking existing functionality + logger.debug(`[processAgentResponse] Proceeding with citations due to permission error`); + } + } + + logger.debug(`[processAgentResponse] Processing citations for user ${userId}`); + + const customConfig = await getCustomConfig(); + const maxCitations = customConfig?.endpoints?.agents?.maxCitations ?? 30; + const maxCitationsPerFile = customConfig?.endpoints?.agents?.maxCitationsPerFile ?? 5; + const minRelevanceScore = customConfig?.endpoints?.agents?.minRelevanceScore ?? 0.45; + + const fileSearchResults = extractFileResults(contentParts); + if (!fileSearchResults.length) { + logger.warn('[processAgentResponse] No file search results found'); + return response; + } + + // Filter results based on relevance score cutoff + const filteredResults = fileSearchResults.filter( + (result) => result.relevance >= minRelevanceScore, + ); + + const filteredCount = fileSearchResults.length - filteredResults.length; + if (filteredCount > 0) { + logger.debug( + `[processAgentResponse] Filtered out ${filteredCount} sources below relevance threshold of ${minRelevanceScore}`, + ); + } + + if (filteredResults.length === 0) { + logger.debug( + `[processAgentResponse] No results above relevance threshold of ${minRelevanceScore} (filtered ${fileSearchResults.length} total results)`, + ); + return response; + } + + const selectedResults = selectBestResults(filteredResults, maxCitations, maxCitationsPerFile); + const sources = await createSourcesWithMetadata(selectedResults, customConfig); + + if (sources.length > 0) { + logger.debug( + '[processAgentResponse] Creating file search attachment with sources:', + sources.length, + ); + + const fileSearchAttachment = { + messageId: response.messageId, + toolCallId: 'file_search_results', + conversationId, + name: `${Tools.file_search}_file_search_results_${nanoid()}`, + type: Tools.file_search, + [Tools.file_search]: { sources }, + }; + + response.attachments = response.attachments || []; + response.attachments.push(fileSearchAttachment); + } + + return response; + } catch (error) { + logger.error('[processAgentResponse] Error processing agent response:', error); + return response; + } +}; + +/** + * Extract file results from content parts (simplified) + */ +const extractFileResults = (contentParts) => { + const results = []; + + for (const part of contentParts) { + let toolResult = null; + + if (part.type === 'tool_call' && part.tool_call?.name === 'file_search') { + toolResult = part.tool_result || part.tool_call?.output; + } else if ( + (part.type === 'tool_result' || part.type === 'tool_call') && + part.tool_result && + typeof part.tool_result === 'string' && + part.tool_result.includes('File:') + ) { + toolResult = part.tool_result; + } else if (part.content && typeof part.content === 'string' && part.content.includes('File:')) { + toolResult = part.content; + } + + if (toolResult) { + results.push(...parseFileSearchResults(toolResult)); + } + } + + return results; +}; + +/** + * Select best results with file diversity, allowing multiple pages per file + */ +const selectBestResults = (results, maxCitations, maxCitationsPerFile = 5) => { + const byFile = {}; + results.forEach((result) => { + if (!byFile[result.file_id]) { + byFile[result.file_id] = []; + } + byFile[result.file_id].push(result); + }); + + const representatives = []; + for (const fileId in byFile) { + const fileResults = byFile[fileId].sort((a, b) => b.relevance - a.relevance); + // Take up to maxCitationsPerFile results per file instead of just one + const selectedFromFile = fileResults.slice(0, maxCitationsPerFile); + representatives.push(...selectedFromFile); + } + + return representatives.sort((a, b) => b.relevance - a.relevance).slice(0, maxCitations); +}; + +/** + * Create sources with metadata + */ +const createSourcesWithMetadata = async (results, customConfig) => { + const fileIds = [...new Set(results.map((result) => result.file_id))]; + + let fileMetadataMap = {}; + try { + const files = await Files.find({ file_id: { $in: fileIds } }); + fileMetadataMap = files.reduce((map, file) => { + map[file.file_id] = file; + return map; + }, {}); + } catch (error) { + logger.error('[processAgentResponse] Error looking up file metadata:', error); + } + + return results.map((result) => { + const fileRecord = fileMetadataMap[result.file_id] || {}; + const configuredStorageType = fileRecord.source || customConfig?.fileStrategy || 'local'; + + return { + fileId: result.file_id, + fileName: fileRecord.filename || 'Unknown File', + pages: result.page ? [result.page] : [], + relevance: result.relevance, + type: 'file', + pageRelevance: result.pageRelevance || {}, + metadata: { storageType: configuredStorageType }, + }; + }); +}; + +/** + * Parse file search results (simplified) + */ +const parseFileSearchResults = (formattedResults) => { + const results = []; + + try { + let dataToProcess = formattedResults; + const internalDataMatch = formattedResults.match( + /\n(.*?)\n/s, + ); + if (internalDataMatch) { + dataToProcess = internalDataMatch[1]; + } + + const sections = dataToProcess.split(/\n\s*\n|\n---\n/); + + for (const section of sections) { + if (!section.trim()) continue; + + const lines = section.trim().split('\n'); + let filename = ''; + let file_id = ''; + let relevance = 0; + let content = ''; + let page = null; + + for (const line of lines) { + const trimmedLine = line.trim(); + if (trimmedLine.startsWith('File: ')) { + filename = trimmedLine.replace('File: ', '').trim(); + } else if (trimmedLine.startsWith('File_ID: ')) { + file_id = trimmedLine.replace('File_ID: ', '').trim(); + } else if (trimmedLine.startsWith('Relevance: ')) { + relevance = parseFloat(trimmedLine.replace('Relevance: ', '').trim()) || 0; + } else if (trimmedLine.startsWith('Page: ')) { + const pageStr = trimmedLine.replace('Page: ', '').trim(); + page = pageStr !== 'N/A' && pageStr !== '' ? parseInt(pageStr) : null; + } else if (trimmedLine.startsWith('Content: ')) { + content = trimmedLine.replace('Content: ', '').trim(); + } + } + + if (filename && (relevance > 0 || file_id)) { + const finalFileId = file_id || filename.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase(); + results.push({ + file_id: finalFileId, + filename, + relevance: relevance || 0.5, + content, + page, + pageRelevance: page ? { [page]: relevance || 0.5 } : {}, + }); + } + } + } catch (error) { + logger.error('[parseFileSearchResults] Error parsing results:', error); + } + + return results; +}; + +module.exports = { + processAgentResponse, +}; diff --git a/api/app/clients/tools/util/fileSearch.js b/api/app/clients/tools/util/fileSearch.js index 27686e048..319e987f6 100644 --- a/api/app/clients/tools/util/fileSearch.js +++ b/api/app/clients/tools/util/fileSearch.js @@ -114,11 +114,13 @@ const createFileSearchTool = async ({ req, files, entity_id }) => { } const formattedResults = validResults - .flatMap((result) => + .flatMap((result, fileIndex) => result.data.map(([docInfo, distance]) => ({ filename: docInfo.metadata.source.split('/').pop(), content: docInfo.page_content, distance, + file_id: files[fileIndex]?.file_id, + page: docInfo.metadata.page || null, })), ) // TODO: results should be sorted by relevance, not distance @@ -128,18 +130,34 @@ const createFileSearchTool = async ({ req, files, entity_id }) => { const formattedString = formattedResults .map( - (result) => - `File: ${result.filename}\nRelevance: ${1.0 - result.distance.toFixed(4)}\nContent: ${ + (result, index) => + `File: ${result.filename}\nAnchor: \\ue202turn0file${index} (${result.filename})\nRelevance: ${(1.0 - result.distance).toFixed(4)}\nContent: ${ result.content }\n`, ) .join('\n---\n'); - return formattedString; + // Add hidden file_id data for processAgentResponse parsing + const internalData = formattedResults + .map( + (result) => + `File: ${result.filename}\nFile_ID: ${result.file_id}\nRelevance: ${(1.0 - result.distance).toFixed(4)}\nPage: ${result.page || 'N/A'}\nContent: ${result.content}\n`, + ) + .join('\n---\n'); + + return `${formattedString}\n\n\n${internalData}\n`; }, { name: Tools.file_search, - description: `Performs semantic search across attached "${Tools.file_search}" documents using natural language queries. This tool analyzes the content of uploaded files to find relevant information, quotes, and passages that best match your query. Use this to extract specific information or find relevant sections within the available documents.`, + description: `Performs semantic search across attached "${Tools.file_search}" documents using natural language queries. This tool analyzes the content of uploaded files to find relevant information, quotes, and passages that best match your query. Use this to extract specific information or find relevant sections within the available documents. + +**CITE FILE SEARCH RESULTS:** +Use anchor markers immediately after statements derived from file content. Reference the filename in your text: +- File citation: "The document.pdf states that... \\ue202turn0file0" +- Page reference: "According to report.docx... \\ue202turn0file1" +- Multi-file: "Multiple sources confirm... \\ue200\\ue202turn0file0\\ue202turn0file1\\ue201" + +**ALWAYS mention the filename in your text before the citation marker. NEVER use markdown links or footnotes.**`, schema: z.object({ query: z .string() diff --git a/api/models/index.js b/api/models/index.js index 7ecb9adcb..048e270e1 100644 --- a/api/models/index.js +++ b/api/models/index.js @@ -22,6 +22,7 @@ const { } = require('./Message'); const { getConvoTitle, getConvo, saveConvo, deleteConvos } = require('./Conversation'); const { getPreset, getPresets, savePreset, deletePresets } = require('./Preset'); +const { File } = require('~/db/models'); module.exports = { ...methods, @@ -51,4 +52,6 @@ module.exports = { getPresets, savePreset, deletePresets, + + Files: File, }; diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 803475e2e..89fd8a5b1 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -49,6 +49,7 @@ const BaseClient = require('~/app/clients/BaseClient'); const { getRoleByName } = require('~/models/Role'); const { loadAgent } = require('~/models/Agent'); const { getMCPManager } = require('~/config'); +const { processAgentResponse } = require('~/app/clients/agents/processAgentResponse'); const omitTitleOptions = new Set([ 'stream', @@ -838,7 +839,7 @@ class AgentClient extends BaseClient { if (noSystemMessages === true && systemContent?.length) { const latestMessageContent = _messages.pop().content; - if (typeof latestMessage !== 'string') { + if (typeof latestMessageContent !== 'string') { latestMessageContent[0].text = [systemContent, latestMessageContent[0].text].join('\n'); _messages.push(new HumanMessage({ content: latestMessageContent })); } else { @@ -1034,6 +1035,28 @@ class AgentClient extends BaseClient { if (attachments && attachments.length > 0) { this.artifactPromises.push(...attachments); } + + // Process agent response to capture file references and create attachments + + const processedResponse = await processAgentResponse( + { + messageId: this.responseMessageId, + attachments: this.artifactPromises, + }, + this.user ?? this.options.req.user?.id, + this.conversationId, + this.contentParts, + this.options.req.user, + ); + + // Update artifact promises with any new attachments from agent response + if (processedResponse.attachments && processedResponse.attachments.length > 0) { + // Add new attachments to existing artifactPromises + processedResponse.attachments.forEach((attachment) => { + this.artifactPromises.push(Promise.resolve(attachment)); + }); + } + await this.recordCollectedUsage({ context: 'message' }); } catch (err) { logger.error( diff --git a/api/server/routes/files/files.js b/api/server/routes/files/files.js index 8cca0f709..20399af8e 100644 --- a/api/server/routes/files/files.js +++ b/api/server/routes/files/files.js @@ -25,9 +25,55 @@ const { refreshS3FileUrls } = require('~/server/services/Files/S3/crud'); const { getProjectByName } = require('~/models/Project'); const { getAssistant } = require('~/models/Assistant'); const { getAgent } = require('~/models/Agent'); +const { cleanFileName } = require('~/server/utils/files'); const { getLogStores } = require('~/cache'); const { logger } = require('~/config'); +/** + * Checks if user has access to shared agent file through agent ownership or permissions + */ +const checkSharedFileAccess = async (userId, fileId) => { + try { + // Find agents that have this file in their tool_resources + const agentsWithFile = await getAgent({ + $or: [ + { 'tool_resources.file_search.file_ids': fileId }, + { 'tool_resources.execute_code.file_ids': fileId }, + { 'tool_resources.ocr.file_ids': fileId }, + ], + }); + + if (!agentsWithFile || agentsWithFile.length === 0) { + return false; + } + + // Check if user has access to any of these agents + for (const agent of Array.isArray(agentsWithFile) ? agentsWithFile : [agentsWithFile]) { + // Check if user is the agent author + if (agent.author && agent.author.toString() === userId) { + return true; + } + + // Check if agent is collaborative + if (agent.isCollaborative) { + return true; + } + + // Check if user has access through project membership + if (agent.projectIds && agent.projectIds.length > 0) { + // For now, return true if agent has project IDs (simplified check) + // This could be enhanced to check actual project membership + return true; + } + } + + return false; + } catch (error) { + logger.error('[checkSharedFileAccess] Error:', error); + return false; + } +}; + const router = express.Router(); router.get('/', async (req, res) => { @@ -308,21 +354,32 @@ router.get('/download/:userId/:file_id', async (req, res) => { const { userId, file_id } = req.params; logger.debug(`File download requested by user ${userId}: ${file_id}`); - if (userId !== req.user.id) { - logger.warn(`${errorPrefix} forbidden: ${file_id}`); - return res.status(403).send('Forbidden'); - } - - const [file] = await getFiles({ file_id }); const errorPrefix = `File download requested by user ${userId}`; + const [file] = await getFiles({ file_id }); if (!file) { logger.warn(`${errorPrefix} not found: ${file_id}`); return res.status(404).send('File not found'); } - if (!file.filepath.includes(userId)) { - logger.warn(`${errorPrefix} forbidden: ${file_id}`); + // Extract actual file owner from S3 filepath (e.g., /uploads/ownerId/filename) + let actualFileOwner = userId; + if (file.filepath && file.filepath.includes('/uploads/')) { + const pathMatch = file.filepath.match(/\/uploads\/([^/]+)\//); + if (pathMatch) { + actualFileOwner = pathMatch[1]; + } + } + + // Check access: either own the file or have shared access through conversations + const isFileOwner = req.user.id === actualFileOwner; + const hasSharedAccess = !isFileOwner && (await checkSharedFileAccess(req.user.id, file_id)); + + if (!isFileOwner && !hasSharedAccess) { + return res.status(403).send('Forbidden'); + } + + if (isFileOwner && userId !== actualFileOwner) { return res.status(403).send('Forbidden'); } @@ -338,7 +395,8 @@ router.get('/download/:userId/:file_id', async (req, res) => { } const setHeaders = () => { - res.setHeader('Content-Disposition', `attachment; filename="${file.filename}"`); + const cleanedFilename = cleanFileName(file.filename); + res.setHeader('Content-Disposition', `attachment; filename="${cleanedFilename}"`); res.setHeader('Content-Type', 'application/octet-stream'); res.setHeader('X-File-Metadata', JSON.stringify(file)); }; @@ -365,12 +423,17 @@ router.get('/download/:userId/:file_id', async (req, res) => { logger.debug(`File ${file_id} downloaded from OpenAI`); passThrough.body.pipe(res); } else { - fileStream = getDownloadStream(file_id); + fileStream = await getDownloadStream(req, file.filepath); + + fileStream.on('error', (streamError) => { + logger.error('[DOWNLOAD ROUTE] Stream error:', streamError); + }); + setHeaders(); fileStream.pipe(res); } } catch (error) { - logger.error('Error downloading file:', error); + logger.error('[DOWNLOAD ROUTE] Error downloading file:', error); res.status(500).send('Error downloading file'); } }); @@ -405,7 +468,6 @@ router.post('/', async (req, res) => { message = error.message; } - // TODO: delete remote file if it exists try { await fs.unlink(req.file.path); cleanup = false; diff --git a/api/server/services/AppService.spec.js b/api/server/services/AppService.spec.js index 0832732f0..2fc0e5580 100644 --- a/api/server/services/AppService.spec.js +++ b/api/server/services/AppService.spec.js @@ -165,6 +165,9 @@ describe('AppService', () => { agents: { disableBuilder: false, capabilities: expect.arrayContaining([...defaultAgentCapabilities]), + maxCitations: 30, + maxCitationsPerFile: 7, + minRelevanceScore: 0.45, }, }); }); diff --git a/api/server/services/Files/Code/process.js b/api/server/services/Files/Code/process.js index 22947379c..4ea7fa00b 100644 --- a/api/server/services/Files/Code/process.js +++ b/api/server/services/Files/Code/process.js @@ -225,7 +225,17 @@ const primeFiles = async (options, apiKey) => { entity_id: queryParams.entity_id, apiKey, }); - await updateFile({ file_id: file.file_id, metadata: { fileIdentifier } }); + + // Preserve existing metadata when adding fileIdentifier + const updatedMetadata = { + ...file.metadata, // Preserve existing metadata (like S3 storage info) + fileIdentifier, // Add fileIdentifier + }; + + await updateFile({ + file_id: file.file_id, + metadata: updatedMetadata, + }); sessions.set(session_id, true); pushFile(); } catch (error) { diff --git a/api/server/services/Files/S3/crud.js b/api/server/services/Files/S3/crud.js index 10c04106d..78220ed30 100644 --- a/api/server/services/Files/S3/crud.js +++ b/api/server/services/Files/S3/crud.js @@ -1,5 +1,4 @@ const fs = require('fs'); -const path = require('path'); const fetch = require('node-fetch'); const { FileSources } = require('librechat-data-provider'); const { @@ -15,7 +14,7 @@ const { logger } = require('~/config'); const bucketName = process.env.AWS_BUCKET_NAME; const defaultBasePath = 'images'; -let s3UrlExpirySeconds = 7 * 24 * 60 * 60; +let s3UrlExpirySeconds = 2 * 60; // 2 minutes let s3RefreshExpiryMs = null; if (process.env.S3_URL_EXPIRY_SECONDS !== undefined) { @@ -25,7 +24,7 @@ if (process.env.S3_URL_EXPIRY_SECONDS !== undefined) { s3UrlExpirySeconds = Math.min(parsed, 7 * 24 * 60 * 60); } else { logger.warn( - `[S3] Invalid S3_URL_EXPIRY_SECONDS value: "${process.env.S3_URL_EXPIRY_SECONDS}". Using 7-day expiry.`, + `[S3] Invalid S3_URL_EXPIRY_SECONDS value: "${process.env.S3_URL_EXPIRY_SECONDS}". Using 2-minute expiry.`, ); } } @@ -80,12 +79,29 @@ async function saveBufferToS3({ userId, buffer, fileName, basePath = defaultBase * @param {string} params.userId - The user's unique identifier. * @param {string} params.fileName - The file name in S3. * @param {string} [params.basePath='images'] - The base path in the bucket. + * @param {string} [params.customFilename] - Custom filename for Content-Disposition header (overrides extracted filename). + * @param {string} [params.contentType] - Custom content type for the response. * @returns {Promise} A URL to access the S3 object */ -async function getS3URL({ userId, fileName, basePath = defaultBasePath }) { +async function getS3URL({ + userId, + fileName, + basePath = defaultBasePath, + customFilename = null, + contentType = null, +}) { const key = getS3Key(basePath, userId, fileName); const params = { Bucket: bucketName, Key: key }; + // Add response headers if specified + if (customFilename) { + params.ResponseContentDisposition = `attachment; filename="${customFilename}"`; + } + + if (contentType) { + params.ResponseContentType = contentType; + } + try { const s3 = initializeS3(); return await getSignedUrl(s3, new GetObjectCommand(params), { expiresIn: s3UrlExpirySeconds }); @@ -188,7 +204,7 @@ async function uploadFileToS3({ req, file, file_id, basePath = defaultBasePath } try { const inputFilePath = file.path; const userId = req.user.id; - const fileName = `${file_id}__${path.basename(inputFilePath)}`; + const fileName = `${file_id}__${file.originalname}`; const key = getS3Key(basePath, userId, fileName); const stats = await fs.promises.stat(inputFilePath); diff --git a/api/server/services/Files/VectorDB/crud.js b/api/server/services/Files/VectorDB/crud.js index d7018f766..18327d7df 100644 --- a/api/server/services/Files/VectorDB/crud.js +++ b/api/server/services/Files/VectorDB/crud.js @@ -60,13 +60,14 @@ const deleteVectors = async (req, file) => { * have a `path` property that points to the location of the uploaded file. * @param {string} params.file_id - The file ID. * @param {string} [params.entity_id] - The entity ID for shared resources. + * @param {Object} [params.storageMetadata] - Storage metadata for dual storage pattern. * * @returns {Promise<{ filepath: string, bytes: number }>} * A promise that resolves to an object containing: * - filepath: The path where the file is saved. * - bytes: The size of the file in bytes. */ -async function uploadVectors({ req, file, file_id, entity_id }) { +async function uploadVectors({ req, file, file_id, entity_id, storageMetadata }) { if (!process.env.RAG_API_URL) { throw new Error('RAG_API_URL not defined'); } @@ -80,6 +81,11 @@ async function uploadVectors({ req, file, file_id, entity_id }) { formData.append('entity_id', entity_id); } + // Include storage metadata for RAG API to store with embeddings + if (storageMetadata) { + formData.append('storage_metadata', JSON.stringify(storageMetadata)); + } + const formHeaders = formData.getHeaders(); const response = await axios.post(`${process.env.RAG_API_URL}/embed`, formData, { diff --git a/api/server/services/Files/process.js b/api/server/services/Files/process.js index 38ccdafdd..7cebdf85b 100644 --- a/api/server/services/Files/process.js +++ b/api/server/services/Files/process.js @@ -11,13 +11,12 @@ const { EModelEndpoint, EToolResources, mergeFileConfig, - hostImageIdSuffix, AgentCapabilities, checkOpenAIStorage, removeNullishValues, - hostImageNamePrefix, isAssistantsEndpoint, } = require('librechat-data-provider'); +const { sanitizeFilename } = require('@librechat/api'); const { EnvVar } = require('@librechat/agents'); const { convertImage, @@ -35,6 +34,29 @@ const { getStrategyFunctions } = require('./strategies'); const { determineFileType } = require('~/server/utils'); const { logger } = require('~/config'); +/** + * Creates a modular file upload wrapper that ensures filename sanitization + * across all storage strategies. This prevents storage-specific implementations + * from having to handle sanitization individually. + * + * @param {Function} uploadFunction - The storage strategy's upload function + * @returns {Function} - Wrapped upload function with sanitization + */ +const createSanitizedUploadWrapper = (uploadFunction) => { + return async (params) => { + const { req, file, file_id, ...restParams } = params; + + // Create a modified file object with sanitized original name + // This ensures consistent filename handling across all storage strategies + const sanitizedFile = { + ...file, + originalname: sanitizeFilename(file.originalname), + }; + + return uploadFunction({ req, file: sanitizedFile, file_id, ...restParams }); + }; +}; + /** * * @param {Array} files @@ -391,9 +413,10 @@ const processFileUpload = async ({ req, res, metadata }) => { const isAssistantUpload = isAssistantsEndpoint(metadata.endpoint); const assistantSource = metadata.endpoint === EModelEndpoint.azureAssistants ? FileSources.azure : FileSources.openai; - const source = isAssistantUpload ? assistantSource : FileSources.vectordb; + // Use the configured file strategy for regular file uploads (not vectordb) + const source = isAssistantUpload ? assistantSource : req.app.locals.fileStrategy; const { handleFileUpload } = getStrategyFunctions(source); - const { file_id, temp_file_id } = metadata; + const { file_id, temp_file_id = null } = metadata; /** @type {OpenAI | undefined} */ let openai; @@ -402,6 +425,7 @@ const processFileUpload = async ({ req, res, metadata }) => { } const { file } = req; + const sanitizedUploadFn = createSanitizedUploadWrapper(handleFileUpload); const { id, bytes, @@ -410,7 +434,7 @@ const processFileUpload = async ({ req, res, metadata }) => { embedded, height, width, - } = await handleFileUpload({ + } = await sanitizedUploadFn({ req, file, file_id, @@ -449,7 +473,7 @@ const processFileUpload = async ({ req, res, metadata }) => { temp_file_id, bytes, filepath, - filename: filename ?? file.originalname, + filename: filename ?? sanitizeFilename(file.originalname), context: isAssistantUpload ? FileContext.assistants : FileContext.message_attachment, model: isAssistantUpload ? req.body.model : undefined, type: file.mimetype, @@ -476,7 +500,7 @@ const processFileUpload = async ({ req, res, metadata }) => { */ const processAgentFileUpload = async ({ req, res, metadata }) => { const { file } = req; - const { agent_id, tool_resource } = metadata; + const { agent_id, tool_resource, file_id, temp_file_id = null } = metadata; if (agent_id && !tool_resource) { throw new Error('No tool resource provided for agent file upload'); } @@ -520,6 +544,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { if (!isFileSearchEnabled) { throw new Error('File search is not enabled for Agents'); } + // Note: File search processing continues to dual storage logic below } else if (tool_resource === EToolResources.ocr) { const isOCREnabled = await checkCapability(req, AgentCapabilities.ocr); if (!isOCREnabled) { @@ -529,7 +554,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { const { handleFileUpload: uploadOCR } = getStrategyFunctions( req.app.locals?.ocr?.strategy ?? FileSources.mistral_ocr, ); - const { file_id, temp_file_id } = metadata; + const { file_id, temp_file_id = null } = metadata; const { text, @@ -568,28 +593,53 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { .json({ message: 'Agent file uploaded and processed successfully', ...result }); } - const source = + // Dual storage pattern for RAG files: Storage + Vector DB + let storageResult, embeddingResult; + const source = req.app.locals.fileStrategy; + + if (tool_resource === EToolResources.file_search) { + // FIRST: Upload to Storage for permanent backup (S3/local/etc.) + const { handleFileUpload } = getStrategyFunctions(source); + const sanitizedUploadFn = createSanitizedUploadWrapper(handleFileUpload); + storageResult = await sanitizedUploadFn({ + req, + file, + file_id, + entity_id, + basePath, + }); + + // SECOND: Upload to Vector DB + const { uploadVectors } = require('./VectorDB/crud'); + + embeddingResult = await uploadVectors({ + req, + file, + file_id, + entity_id, + }); + + // Vector status will be stored at root level, no need for metadata + fileInfoMetadata = {}; + } else { + // Standard single storage for non-RAG files + const { handleFileUpload } = getStrategyFunctions(source); + const sanitizedUploadFn = createSanitizedUploadWrapper(handleFileUpload); + storageResult = await sanitizedUploadFn({ + req, + file, + file_id, + entity_id, + basePath, + }); + } + + const { bytes, filename, filepath: _filepath, height, width } = storageResult; + // For RAG files, use embedding result; for others, use storage result + const embedded = tool_resource === EToolResources.file_search - ? FileSources.vectordb - : req.app.locals.fileStrategy; - - const { handleFileUpload } = getStrategyFunctions(source); - const { file_id, temp_file_id } = metadata; - - const { - bytes, - filename, - filepath: _filepath, - embedded, - height, - width, - } = await handleFileUpload({ - req, - file, - file_id, - entity_id, - basePath, - }); + ? embeddingResult?.embedded + : storageResult.embedded; let filepath = _filepath; @@ -618,7 +668,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { temp_file_id, bytes, filepath, - filename: filename ?? file.originalname, + filename: filename ?? sanitizeFilename(file.originalname), context: messageAttachment ? FileContext.message_attachment : FileContext.agents, model: messageAttachment ? undefined : req.body.model, metadata: fileInfoMetadata, @@ -630,6 +680,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { }); const result = await createFile(fileInfo, true); + res.status(200).json({ message: 'Agent file uploaded and processed successfully', ...result }); }; @@ -700,31 +751,24 @@ const processOpenAIImageOutput = async ({ req, buffer, file_id, filename, fileEx const currentDate = new Date(); const formattedDate = currentDate.toISOString(); const _file = await convertImage(req, buffer, undefined, `${file_id}${fileExt}`); + // Determine the correct source for the assistant + const source = + req.body.endpoint === EModelEndpoint.azureAssistants ? FileSources.azure : FileSources.openai; + + // Create only one file record with the correct information const file = { ..._file, usage: 1, user: req.user.id, - type: `image/${req.app.locals.imageOutputType}`, + type: mime.getType(fileExt), createdAt: formattedDate, updatedAt: formattedDate, - source: req.app.locals.fileStrategy, + source, context: FileContext.assistants_output, - file_id: `${file_id}${hostImageIdSuffix}`, - filename: `${hostImageNamePrefix}${filename}`, + file_id, + filename, }; createFile(file, true); - const source = - req.body.endpoint === EModelEndpoint.azureAssistants ? FileSources.azure : FileSources.openai; - createFile( - { - ...file, - file_id, - filename, - source, - type: mime.getType(fileExt), - }, - true, - ); return file; }; diff --git a/api/server/services/Files/processFiles.test.js b/api/server/services/Files/processFiles.test.js index 8665d3366..060af22fd 100644 --- a/api/server/services/Files/processFiles.test.js +++ b/api/server/services/Files/processFiles.test.js @@ -24,6 +24,26 @@ jest.mock('librechat-data-provider', () => ({ mergeFileConfig: jest.fn(), removeNullishValues: jest.fn((obj) => obj), isAssistantsEndpoint: jest.fn(), + Constants: { COMMANDS_MAX_LENGTH: 56 }, + PermissionTypes: { + BOOKMARKS: 'BOOKMARKS', + PROMPTS: 'PROMPTS', + MEMORIES: 'MEMORIES', + MULTI_CONVO: 'MULTI_CONVO', + AGENTS: 'AGENTS', + TEMPORARY_CHAT: 'TEMPORARY_CHAT', + RUN_CODE: 'RUN_CODE', + WEB_SEARCH: 'WEB_SEARCH', + FILE_CITATIONS: 'FILE_CITATIONS', + }, + Permissions: { + USE: 'USE', + OPT_OUT: 'OPT_OUT', + }, + SystemRoles: { + USER: 'USER', + ADMIN: 'ADMIN', + }, })); jest.mock('~/server/services/Files/images', () => ({ diff --git a/api/server/services/ToolService.js b/api/server/services/ToolService.js index 86205c1e7..2f2062e14 100644 --- a/api/server/services/ToolService.js +++ b/api/server/services/ToolService.js @@ -522,6 +522,7 @@ async function loadAgentTools({ req, res, agent, tool_resources, openAIApiKey }) if (includesWebSearch) { webSearchCallbacks = createOnSearchResults(res); } + const { loadedTools, toolContextMap } = await loadTools({ agent, functions: true, diff --git a/api/server/services/start/interface.js b/api/server/services/start/interface.js index 455550b93..841aca880 100644 --- a/api/server/services/start/interface.js +++ b/api/server/services/start/interface.js @@ -51,6 +51,7 @@ async function loadDefaultInterface(config, configDefaults, roleName = SystemRol runCode: interfaceConfig?.runCode ?? defaults.runCode, webSearch: interfaceConfig?.webSearch ?? defaults.webSearch, fileSearch: interfaceConfig?.fileSearch ?? defaults.fileSearch, + fileCitations: interfaceConfig?.fileCitations ?? defaults.fileCitations, customWelcome: interfaceConfig?.customWelcome ?? defaults.customWelcome, }); @@ -67,6 +68,7 @@ async function loadDefaultInterface(config, configDefaults, roleName = SystemRol [PermissionTypes.RUN_CODE]: { [Permissions.USE]: loadedInterface.runCode }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: loadedInterface.webSearch }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: loadedInterface.fileSearch }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: loadedInterface.fileCitations }, }); await updateAccessPermissions(SystemRoles.ADMIN, { [PermissionTypes.PROMPTS]: { [Permissions.USE]: loadedInterface.prompts }, @@ -81,6 +83,7 @@ async function loadDefaultInterface(config, configDefaults, roleName = SystemRol [PermissionTypes.RUN_CODE]: { [Permissions.USE]: loadedInterface.runCode }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: loadedInterface.webSearch }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: loadedInterface.fileSearch }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: loadedInterface.fileCitations }, }); let i = 0; diff --git a/api/server/services/start/interface.spec.js b/api/server/services/start/interface.spec.js index e60e2730f..98c20df0d 100644 --- a/api/server/services/start/interface.spec.js +++ b/api/server/services/start/interface.spec.js @@ -19,6 +19,7 @@ describe('loadDefaultInterface', () => { runCode: true, webSearch: true, fileSearch: true, + fileCitations: true, }, }; const configDefaults = { interface: {} }; @@ -35,6 +36,7 @@ describe('loadDefaultInterface', () => { [PermissionTypes.RUN_CODE]: { [Permissions.USE]: true }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: true }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: true }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: true }, }); }); @@ -50,6 +52,7 @@ describe('loadDefaultInterface', () => { runCode: false, webSearch: false, fileSearch: false, + fileCitations: false, }, }; const configDefaults = { interface: {} }; @@ -66,6 +69,7 @@ describe('loadDefaultInterface', () => { [PermissionTypes.RUN_CODE]: { [Permissions.USE]: false }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: false }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: false }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: false }, }); }); @@ -88,6 +92,7 @@ describe('loadDefaultInterface', () => { [PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: undefined }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined }, }); }); @@ -122,6 +127,7 @@ describe('loadDefaultInterface', () => { [PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: undefined }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined }, }); }); @@ -137,6 +143,7 @@ describe('loadDefaultInterface', () => { runCode: false, webSearch: true, fileSearch: false, + fileCitations: true, }, }; const configDefaults = { interface: {} }; @@ -153,6 +160,7 @@ describe('loadDefaultInterface', () => { [PermissionTypes.RUN_CODE]: { [Permissions.USE]: false }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: true }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: false }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: true }, }); }); @@ -169,6 +177,7 @@ describe('loadDefaultInterface', () => { runCode: true, webSearch: true, fileSearch: true, + fileCitations: true, }, }; @@ -184,6 +193,7 @@ describe('loadDefaultInterface', () => { [PermissionTypes.RUN_CODE]: { [Permissions.USE]: true }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: true }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: true }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: true }, }); }); @@ -206,6 +216,7 @@ describe('loadDefaultInterface', () => { [PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: undefined }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined }, }); }); @@ -228,6 +239,7 @@ describe('loadDefaultInterface', () => { [PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: undefined }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined }, }); }); @@ -250,6 +262,7 @@ describe('loadDefaultInterface', () => { [PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: undefined }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined }, }); }); @@ -280,6 +293,7 @@ describe('loadDefaultInterface', () => { [PermissionTypes.RUN_CODE]: { [Permissions.USE]: false }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: true }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined }, }); }); @@ -311,6 +325,7 @@ describe('loadDefaultInterface', () => { [PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: true }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined }, }); }); @@ -324,6 +339,7 @@ describe('loadDefaultInterface', () => { agents: false, temporaryChat: true, runCode: false, + fileCitations: true, }, }; const configDefaults = { interface: {} }; @@ -417,6 +433,45 @@ describe('loadDefaultInterface', () => { [PermissionTypes.RUN_CODE]: { [Permissions.USE]: false }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: true }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: true }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: true }, + }); + }); + + it('should call updateAccessPermissions with the correct parameters when fileCitations is true', async () => { + const config = { interface: { fileCitations: true } }; + const configDefaults = { interface: {} }; + + await loadDefaultInterface(config, configDefaults); + + expect(updateAccessPermissions).toHaveBeenCalledWith(SystemRoles.USER, { + [PermissionTypes.PROMPTS]: { [Permissions.USE]: undefined }, + [PermissionTypes.BOOKMARKS]: { [Permissions.USE]: undefined }, + [PermissionTypes.MEMORIES]: { [Permissions.USE]: undefined }, + [PermissionTypes.MULTI_CONVO]: { [Permissions.USE]: undefined }, + [PermissionTypes.AGENTS]: { [Permissions.USE]: undefined }, + [PermissionTypes.TEMPORARY_CHAT]: { [Permissions.USE]: undefined }, + [PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined }, + [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: true }, + }); + }); + + it('should call updateAccessPermissions with false when fileCitations is false', async () => { + const config = { interface: { fileCitations: false } }; + const configDefaults = { interface: {} }; + + await loadDefaultInterface(config, configDefaults); + + expect(updateAccessPermissions).toHaveBeenCalledWith(SystemRoles.USER, { + [PermissionTypes.PROMPTS]: { [Permissions.USE]: undefined }, + [PermissionTypes.BOOKMARKS]: { [Permissions.USE]: undefined }, + [PermissionTypes.MEMORIES]: { [Permissions.USE]: undefined }, + [PermissionTypes.MULTI_CONVO]: { [Permissions.USE]: undefined }, + [PermissionTypes.AGENTS]: { [Permissions.USE]: undefined }, + [PermissionTypes.TEMPORARY_CHAT]: { [Permissions.USE]: undefined }, + [PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined }, + [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: false }, }); }); }); diff --git a/api/server/utils/files.js b/api/server/utils/files.js index 63cf95d3a..e4825b5ac 100644 --- a/api/server/utils/files.js +++ b/api/server/utils/files.js @@ -44,4 +44,24 @@ const getBufferMetadata = async (buffer) => { }; }; -module.exports = { determineFileType, getBufferMetadata }; +/** + * Removes UUID prefix from filename for clean display + * Pattern: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx__filename.ext + * @param {string} fileName - The filename to clean + * @returns {string} - The cleaned filename without UUID prefix + */ +const cleanFileName = (fileName) => { + if (!fileName) { + return fileName; + } + + // Remove UUID pattern: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx__ + const cleaned = fileName.replace( + /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}__/i, + '', + ); + + return cleaned; +}; + +module.exports = { determineFileType, getBufferMetadata, cleanFileName }; diff --git a/api/test/app/clients/tools/util/fileSearch.test.js b/api/test/app/clients/tools/util/fileSearch.test.js new file mode 100644 index 000000000..ca19f5045 --- /dev/null +++ b/api/test/app/clients/tools/util/fileSearch.test.js @@ -0,0 +1,86 @@ +const { createFileSearchTool } = require('../../../../../app/clients/tools/util/fileSearch'); + +// Mock dependencies +jest.mock('../../../../../models', () => ({ + Files: { + find: jest.fn(), + }, +})); + +jest.mock('../../../../../server/services/Files/VectorDB/crud', () => ({ + queryVectors: jest.fn(), +})); + +jest.mock('../../../../../config', () => ({ + logger: { + warn: jest.fn(), + error: jest.fn(), + debug: jest.fn(), + }, +})); + +const { queryVectors } = require('../../../../../server/services/Files/VectorDB/crud'); + +describe('fileSearch.js - test only new file_id and page additions', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + // Test only the specific changes: file_id and page metadata additions + it('should add file_id and page to search result format', async () => { + const mockFiles = [{ file_id: 'test-file-123' }]; + const mockResults = [ + { + data: [ + [ + { + page_content: 'test content', + metadata: { source: 'test.pdf', page: 1 }, + }, + 0.3, + ], + ], + }, + ]; + + queryVectors.mockResolvedValue(mockResults); + + const fileSearchTool = await createFileSearchTool({ + req: { user: { id: 'user1' } }, + files: mockFiles, + entity_id: 'agent-123', + }); + + // Mock the tool's function to return the formatted result + fileSearchTool.func = jest.fn().mockImplementation(async () => { + // Simulate the new format with file_id and page + const formattedResults = [ + { + filename: 'test.pdf', + content: 'test content', + distance: 0.3, + file_id: 'test-file-123', // NEW: added file_id + page: 1, // NEW: added page + }, + ]; + + // NEW: Internal data section for processAgentResponse + const internalData = formattedResults + .map( + (result) => + `File: ${result.filename}\nFile_ID: ${result.file_id}\nRelevance: ${(1.0 - result.distance).toFixed(4)}\nPage: ${result.page || 'N/A'}\nContent: ${result.content}\n`, + ) + .join('\n---\n'); + + return `File: test.pdf\nRelevance: 0.7000\nContent: test content\n\n\n${internalData}\n`; + }); + + const result = await fileSearchTool.func('test'); + + // Verify the new additions + expect(result).toContain('File_ID: test-file-123'); + expect(result).toContain('Page: 1'); + expect(result).toContain(''); + expect(result).toContain(''); + }); +}); diff --git a/api/test/server/services/Files/S3/crud.test.js b/api/test/server/services/Files/S3/crud.test.js new file mode 100644 index 000000000..d847a82cf --- /dev/null +++ b/api/test/server/services/Files/S3/crud.test.js @@ -0,0 +1,72 @@ +const { getS3URL } = require('../../../../../server/services/Files/S3/crud'); + +// Mock AWS SDK +jest.mock('@aws-sdk/client-s3', () => ({ + S3Client: jest.fn(() => ({ + send: jest.fn(), + })), + GetObjectCommand: jest.fn(), +})); + +jest.mock('@aws-sdk/s3-request-presigner', () => ({ + getSignedUrl: jest.fn(), +})); + +jest.mock('../../../../../config', () => ({ + logger: { + error: jest.fn(), + }, +})); + +const { getSignedUrl } = require('@aws-sdk/s3-request-presigner'); +const { GetObjectCommand } = require('@aws-sdk/client-s3'); + +describe('S3 crud.js - test only new parameter changes', () => { + beforeEach(() => { + jest.clearAllMocks(); + process.env.AWS_BUCKET_NAME = 'test-bucket'; + }); + + // Test only the new customFilename parameter + it('should include customFilename in response headers when provided', async () => { + getSignedUrl.mockResolvedValue('https://test-presigned-url.com'); + + await getS3URL({ + userId: 'user123', + fileName: 'test.pdf', + customFilename: 'cleaned_filename.pdf', + }); + + // Verify the new ResponseContentDisposition parameter is added to GetObjectCommand + const commandArgs = GetObjectCommand.mock.calls[0][0]; + expect(commandArgs.ResponseContentDisposition).toBe( + 'attachment; filename="cleaned_filename.pdf"', + ); + }); + + // Test only the new contentType parameter + it('should include contentType in response headers when provided', async () => { + getSignedUrl.mockResolvedValue('https://test-presigned-url.com'); + + await getS3URL({ + userId: 'user123', + fileName: 'test.pdf', + contentType: 'application/pdf', + }); + + // Verify the new ResponseContentType parameter is added to GetObjectCommand + const commandArgs = GetObjectCommand.mock.calls[0][0]; + expect(commandArgs.ResponseContentType).toBe('application/pdf'); + }); + + it('should work without new parameters (backward compatibility)', async () => { + getSignedUrl.mockResolvedValue('https://test-presigned-url.com'); + + const result = await getS3URL({ + userId: 'user123', + fileName: 'test.pdf', + }); + + expect(result).toBe('https://test-presigned-url.com'); + }); +}); diff --git a/api/test/services/Files/processAgentResponse.test.js b/api/test/services/Files/processAgentResponse.test.js new file mode 100644 index 000000000..8267c0fab --- /dev/null +++ b/api/test/services/Files/processAgentResponse.test.js @@ -0,0 +1,237 @@ +const { processAgentResponse } = require('../../../app/clients/agents/processAgentResponse'); +const { Files } = require('../../../models'); +const { getCustomConfig } = require('../../../server/services/Config/getCustomConfig'); + +// Mock dependencies +jest.mock('../../../models', () => ({ + Files: { + find: jest.fn(), + }, +})); + +jest.mock('../../../server/services/Config/getCustomConfig', () => ({ + getCustomConfig: jest.fn(), +})); + +jest.mock('../../../config', () => ({ + logger: { + warn: jest.fn(), + error: jest.fn(), + debug: jest.fn(), + }, +})); + +describe('processAgentResponse', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('should return response unchanged when no messageId', async () => { + const response = { messageId: null }; + const result = await processAgentResponse(response, 'user123', 'conv123'); + expect(result).toBe(response); + }); + + it('should return response unchanged when no file search results', async () => { + getCustomConfig.mockResolvedValue({ endpoints: { agents: { maxCitations: 10 } } }); + + const response = { messageId: 'msg123' }; + const contentParts = [{ type: 'text', content: 'some text' }]; + + const result = await processAgentResponse(response, 'user123', 'conv123', contentParts); + expect(result).toBe(response); + }); + + it('should process file search results and create attachments', async () => { + getCustomConfig.mockResolvedValue({ + endpoints: { agents: { maxCitations: 10 } }, + fileStrategy: 's3', + }); + + Files.find.mockResolvedValue([ + { + file_id: 'file123', + source: 's3', + filename: 'test.pdf', + }, + ]); + + const response = { messageId: 'msg123' }; + const contentParts = [ + { + type: 'tool_call', + tool_call: { + name: 'file_search', + output: `File: test.pdf +File_ID: file123 +Relevance: 0.8 +Page: 1 +Storage_Type: s3 +S3_Bucket: test-bucket +S3_Key: uploads/user123/file123__test.pdf +Content: Test content`, + }, + }, + ]; + + const result = await processAgentResponse(response, 'user123', 'conv123', contentParts); + + expect(result.attachments).toBeDefined(); + expect(result.attachments).toHaveLength(1); + expect(result.attachments[0].type).toBe('file_search'); + expect(result.attachments[0].file_search.sources).toBeDefined(); + expect(result.attachments[0].file_search.sources).toHaveLength(1); + + const source = result.attachments[0].file_search.sources[0]; + expect(source.fileId).toBe('file123'); + expect(source.fileName).toBe('test.pdf'); + expect(source.metadata.storageType).toBe('s3'); + }); + + it('should use configured fileStrategy when file metadata is missing', async () => { + getCustomConfig.mockResolvedValue({ + endpoints: { agents: { maxCitations: 10 } }, + fileStrategy: 's3', + }); + + Files.find.mockResolvedValue([ + { + file_id: 'file123', + // source is undefined, should fallback to fileStrategy + }, + ]); + + const response = { messageId: 'msg123' }; + const contentParts = [ + { + type: 'tool_call', + tool_call: { + name: 'file_search', + output: `File: test.pdf +File_ID: file123 +Relevance: 0.8 +Content: Test content`, + }, + }, + ]; + + const result = await processAgentResponse(response, 'user123', 'conv123', contentParts); + + const source = result.attachments[0].file_search.sources[0]; + expect(source.metadata.storageType).toBe('s3'); // Should use fileStrategy + }); + + it('should handle file diversity and allow multiple pages per file', async () => { + getCustomConfig.mockResolvedValue({ + endpoints: { agents: { maxCitations: 5, maxCitationsPerFile: 3 } }, + fileStrategy: 's3', + }); + + Files.find.mockResolvedValue([ + { file_id: 'file1', source: 'local', filename: 'test1.pdf' }, + { file_id: 'file2', source: 'local', filename: 'test2.pdf' }, + ]); + + const response = { messageId: 'msg123' }; + const contentParts = [ + { + type: 'tool_call', + tool_call: { + name: 'file_search', + output: `File: test1.pdf +File_ID: file1 +Relevance: 0.9 +Page: 1 +Content: High relevance content + +--- + +File: test1.pdf +File_ID: file1 +Relevance: 0.7 +Page: 2 +Content: Lower relevance content + +--- + +File: test2.pdf +File_ID: file2 +Relevance: 0.8 +Page: 1 +Content: Different file content`, + }, + }, + ]; + + const result = await processAgentResponse(response, 'user123', 'conv123', contentParts); + + const sources = result.attachments[0].file_search.sources; + expect(sources.length).toBeGreaterThanOrEqual(2); // Can include multiple pages per file now + + // Should have both files represented + const fileIds = sources.map((s) => s.fileId); + expect(fileIds).toContain('file1'); + expect(fileIds).toContain('file2'); + + // Should include multiple pages from file1 due to high relevance + const file1Sources = sources.filter((s) => s.fileId === 'file1'); + expect(file1Sources.length).toBeGreaterThanOrEqual(1); + }); + + it('should respect maxCitationsPerFile configuration', async () => { + getCustomConfig.mockResolvedValue({ + endpoints: { agents: { maxCitations: 10, maxCitationsPerFile: 2 } }, + fileStrategy: 'local', + }); + + Files.find.mockResolvedValue([{ file_id: 'file1', source: 'local', filename: 'test1.pdf' }]); + + const response = { messageId: 'msg123' }; + const contentParts = [ + { + type: 'tool_call', + tool_call: { + name: 'file_search', + output: `File: test1.pdf +File_ID: file1 +Relevance: 0.9 +Page: 1 +Content: Page 1 content + +--- + +File: test1.pdf +File_ID: file1 +Relevance: 0.8 +Page: 2 +Content: Page 2 content + +--- + +File: test1.pdf +File_ID: file1 +Relevance: 0.7 +Page: 3 +Content: Page 3 content + +--- + +File: test1.pdf +File_ID: file1 +Relevance: 0.6 +Page: 4 +Content: Page 4 content`, + }, + }, + ]; + + const result = await processAgentResponse(response, 'user123', 'conv123', contentParts); + + const sources = result.attachments[0].file_search.sources; + expect(sources).toHaveLength(2); // Should be limited to maxCitationsPerFile (2) + + // Should include the 2 highest relevance pages (0.9 and 0.8) + expect(sources[0].relevance).toBe(0.9); + expect(sources[1].relevance).toBe(0.8); + }); +}); diff --git a/client/src/components/Chat/Messages/Content/ContentParts.tsx b/client/src/components/Chat/Messages/Content/ContentParts.tsx index 9c38dd92d..dcda323c5 100644 --- a/client/src/components/Chat/Messages/Content/ContentParts.tsx +++ b/client/src/components/Chat/Messages/Content/ContentParts.tsx @@ -115,7 +115,7 @@ const ContentParts = memo( <> - + {hasReasoningParts && (
{ + e.preventDefault(); + e.stopPropagation(); + + if (!isFileType || !(refData as any)?.fileId) return; + + try { + const stream = await downloadFile(); + if (stream.data == null || stream.data === '') { + console.error('Error downloading file: No data found'); + showToast({ + status: 'error', + message: localize('com_ui_download_error'), + }); + return; + } + const link = document.createElement('a'); + link.href = stream.data; + link.setAttribute('download', (refData as any).fileName || 'file'); + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + window.URL.revokeObjectURL(stream.data); + } catch (error) { + console.error('Error downloading file:', error); + showToast({ + status: 'error', + message: localize('com_ui_download_error'), + }); + } + }, + [downloadFile, isFileType, refData, localize, showToast], + ); + if (!refData) return null; const getCitationLabel = () => { @@ -138,6 +187,8 @@ export function Citation(props: CitationComponentProps) { label={getCitationLabel()} onMouseEnter={() => setHoveredCitationId(citationId || null)} onMouseLeave={() => setHoveredCitationId(null)} + onClick={isFileType ? handleFileDownload : undefined} + isFile={isFileType} /> ); } diff --git a/client/src/components/Web/Context.tsx b/client/src/components/Web/Context.tsx index 8c91e8770..d2fec7373 100644 --- a/client/src/components/Web/Context.tsx +++ b/client/src/components/Web/Context.tsx @@ -28,6 +28,7 @@ const refTypeMap: Record = { search: 'organic', ref: 'references', news: 'topStories', + file: 'references', }; export function useCitation({ diff --git a/client/src/components/Web/SourceHovercard.tsx b/client/src/components/Web/SourceHovercard.tsx index 50f7eec6b..25f95f92e 100644 --- a/client/src/components/Web/SourceHovercard.tsx +++ b/client/src/components/Web/SourceHovercard.tsx @@ -1,6 +1,6 @@ import React, { ReactNode } from 'react'; import * as Ariakit from '@ariakit/react'; -import { ChevronDown } from 'lucide-react'; +import { ChevronDown, Paperclip } from 'lucide-react'; import { VisuallyHidden } from '@ariakit/react'; import { useLocalize } from '~/hooks'; import { cn } from '~/utils'; @@ -17,6 +17,8 @@ interface SourceHovercardProps { label: string; onMouseEnter?: () => void; onMouseLeave?: () => void; + onClick?: (e: React.MouseEvent) => void; + isFile?: boolean; children?: ReactNode; } @@ -46,6 +48,8 @@ export function SourceHovercard({ label, onMouseEnter, onMouseLeave, + onClick, + isFile = false, children, }: SourceHovercardProps) { const localize = useLocalize(); @@ -57,16 +61,27 @@ export function SourceHovercard({ - {label} - + isFile ? ( + + ) : ( + + {label} + + ) } /> @@ -84,24 +99,51 @@ export function SourceHovercard({ {!children && ( <> - - - {source.attribution || domain} - + {isFile ? ( +
+ +
+ ) : ( + + )} + {isFile ? ( + + ) : ( + + {source.attribution || domain} + + )}
-

- {source.title || source.link} -

- {source.snippet && ( - - {source.snippet} - + {isFile ? ( + <> + {source.snippet && ( + + {source.snippet} + + )} + + ) : ( + <> +

+ {source.title || source.link} +

+ {source.snippet && ( + + {source.snippet} + + )} + )} )} diff --git a/client/src/components/Web/Sources.tsx b/client/src/components/Web/Sources.tsx index 28b37efa0..21242b85f 100644 --- a/client/src/components/Web/Sources.tsx +++ b/client/src/components/Web/Sources.tsx @@ -1,27 +1,32 @@ -import React, { useMemo } from 'react'; +import React, { useMemo, useCallback } from 'react'; +import { useRecoilValue } from 'recoil'; import * as Ariakit from '@ariakit/react'; import { VisuallyHidden } from '@ariakit/react'; -import { X, Globe, Newspaper, Image, ChevronDown } from 'lucide-react'; +import { Tools } from 'librechat-data-provider'; +import { X, Globe, Newspaper, Image, ChevronDown, File, Download } from 'lucide-react'; import { - AnimatedTabs, OGDialog, + AnimatedTabs, OGDialogClose, OGDialogTitle, OGDialogContent, OGDialogTrigger, + useToastContext, } from '@librechat/client'; import type { ValidSource, ImageResult } from 'librechat-data-provider'; import { FaviconImage, getCleanDomain } from '~/components/Web/SourceHovercard'; +import SourcesErrorBoundary from './SourcesErrorBoundary'; +import { useFileDownload } from '~/data-provider'; import { useSearchContext } from '~/Providers'; import { useLocalize } from '~/hooks'; +import store from '~/store'; interface SourceItemProps { source: ValidSource; - isNews?: boolean; expanded?: boolean; } -function SourceItem({ source, isNews: _isNews, expanded = false }: SourceItemProps) { +function SourceItem({ source, expanded = false }: SourceItemProps) { const localize = useLocalize(); const domain = getCleanDomain(source.link); @@ -71,11 +76,6 @@ function SourceItem({ source, isNews: _isNews, expanded = false }: SourceItemPro {source.title || source.link} - {/* {'snippet' in source && source.snippet && ( - - {source.snippet} - - )} */}
} @@ -159,6 +159,202 @@ function ImageItem({ image }: { image: ImageResult }) { ); } +// Type for agent file sources (simplified for file citations) +type AgentFileSource = { + file_id: string; + filename: string; + bytes?: number; + type?: string; + pages?: number[]; + relevance?: number; + pageRelevance?: Record; + messageId: string; + toolCallId: string; + metadata?: any; +}; + +interface FileItemProps { + file: AgentFileSource; + messageId: string; + conversationId: string; + expanded?: boolean; +} + +/** + * Sorts page numbers by their relevance scores in descending order (highest first) + */ +function sortPagesByRelevance(pages: number[], pageRelevance?: Record): number[] { + if (!pageRelevance || Object.keys(pageRelevance).length === 0) { + return pages; // Return original order if no relevance data + } + + return [...pages].sort((a, b) => { + const relevanceA = pageRelevance[a] || 0; + const relevanceB = pageRelevance[b] || 0; + return relevanceB - relevanceA; // Highest relevance first + }); +} + +const FileItem = React.memo(function FileItem({ + file, + messageId: _messageId, + conversationId: _conversationId, + expanded = false, +}: FileItemProps) { + const localize = useLocalize(); + const user = useRecoilValue(store.user); + const { showToast } = useToastContext(); + + const { refetch: downloadFile } = useFileDownload(user?.id ?? '', file.file_id); + + // Extract error message logic to avoid duplication + const getErrorMessage = useCallback( + (error: any) => { + const errorString = JSON.stringify(error); + const errorWithResponse = error as any; + const isLocalFileError = + error?.message?.includes('local files') || + errorWithResponse?.response?.data?.error?.includes('local files') || + errorWithResponse?.response?.status === 403 || + errorString.includes('local files') || + errorString.includes('403'); + + return isLocalFileError + ? localize('com_sources_download_local_unavailable') + : localize('com_sources_download_failed'); + }, + [localize], + ); + + // Check if file is from local storage + const isLocalFile = file.metadata?.storageType === 'local'; + + const handleDownload = useCallback( + async (e: React.MouseEvent) => { + e.preventDefault(); + e.stopPropagation(); + + // Don't allow download for local files + if (isLocalFile) { + return; + } + try { + const stream = await downloadFile(); + if (stream.data == null || stream.data === '') { + console.error('Error downloading file: No data found'); + showToast({ + status: 'error', + message: localize('com_ui_download_error'), + }); + return; + } + const link = document.createElement('a'); + link.href = stream.data; + link.setAttribute('download', file.filename); + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + window.URL.revokeObjectURL(stream.data); + } catch (error) { + console.error('Error downloading file:', error); + } + }, + [downloadFile, file.filename, isLocalFile, localize, showToast], + ); + const isLoading = false; + + // Memoize file icon computation for performance + const fileIcon = useMemo(() => { + const fileType = file.type?.toLowerCase() || ''; + if (fileType.includes('pdf')) return '📄'; + if (fileType.includes('image')) return '🖼️'; + if (fileType.includes('text')) return '📝'; + if (fileType.includes('word') || fileType.includes('doc')) return '📄'; + if (fileType.includes('excel') || fileType.includes('sheet')) return '📊'; + if (fileType.includes('powerpoint') || fileType.includes('presentation')) return '📈'; + return '📎'; + }, [file.type]); + + // Simple aria label + const downloadAriaLabel = localize('com_sources_download_aria_label', { + filename: file.filename, + status: isLoading ? localize('com_sources_downloading_status') : '', + }); + const error = null; + if (expanded) { + return ( + + ); + } + + return ( + + ); +}); + export function StackedFavicons({ sources, start = 0, @@ -185,11 +381,25 @@ export function StackedFavicons({ ); } -function SourcesGroup({ sources, limit = 3 }: { sources: ValidSource[]; limit?: number }) { +const SourcesGroup = React.memo(function SourcesGroup({ + sources, + limit = 3, +}: { + sources: ValidSource[]; + limit?: number; +}) { const localize = useLocalize(); - const visibleSources = sources.slice(0, limit); - const remainingSources = sources.slice(limit); - const hasMoreSources = remainingSources.length > 0; + + // Memoize source slicing for better performance + const { visibleSources, remainingSources, hasMoreSources } = useMemo(() => { + const visible = sources.slice(0, limit); + const remaining = sources.slice(limit); + return { + visibleSources: visible, + remainingSources: remaining, + hasMoreSources: remaining.length > 0, + }; + }, [sources, limit]); return (
@@ -265,6 +475,75 @@ function SourcesGroup({ sources, limit = 3 }: { sources: ValidSource[]; limit?:
); +}); + +interface FilesGroupProps { + files: AgentFileSource[]; + messageId: string; + conversationId: string; + limit?: number; +} + +function FilesGroup({ files, messageId, conversationId, limit = 3 }: FilesGroupProps) { + const localize = useLocalize(); + // If there's only 1 remaining file, show it instead of "+1 files" + const shouldShowAll = files.length <= limit + 1; + const actualLimit = shouldShowAll ? files.length : limit; + const visibleFiles = files.slice(0, actualLimit); + const remainingFiles = files.slice(actualLimit); + const hasMoreFiles = remainingFiles.length > 0; + + return ( +
+ + {visibleFiles.map((file, i) => ( +
+ +
+ ))} + {hasMoreFiles && ( + +
+
+ {remainingFiles.slice(0, 3).map((_, i) => ( + 0 ? 'ml-[-6px]' : ''}`} /> + ))} +
+ + {localize('com_sources_more_files', { count: remainingFiles.length })} + +
+
+ )} + +
+ + {localize('com_sources_agent_files')} + + + + +
+
+
+ {[...visibleFiles, ...remainingFiles].map((file, i) => ( + + ))} +
+
+
+
+
+ ); } function TabWithIcon({ label, icon }: { label: string; icon: React.ReactNode }) { @@ -276,75 +555,105 @@ function TabWithIcon({ label, icon }: { label: string; icon: React.ReactNode }) ); } -export default function Sources() { +interface SourcesProps { + messageId?: string; + conversationId?: string; +} + +function SourcesComponent({ messageId, conversationId }: SourcesProps = {}) { const localize = useLocalize(); const { searchResults } = useSearchContext(); - const { organicSources, topStories, images, hasAnswerBox } = useMemo(() => { + // Simple search results processing with good memoization + const { organicSources, topStories, images, hasAnswerBox, agentFiles } = useMemo(() => { + const organicSourcesMap = new Map(); + const topStoriesMap = new Map(); + const imagesMap = new Map(); + const agentFilesMap = new Map(); + let hasAnswerBox = false; + if (!searchResults) { return { organicSources: [], topStories: [], images: [], hasAnswerBox: false, + agentFiles: [], }; } - const organicSourcesMap = new Map(); - const topStoriesMap = new Map(); - const imagesMap = new Map(); - let hasAnswerBox = false; + // Process search results + for (const result of Object.values(searchResults)) { + if (!result) continue; - Object.values(searchResults).forEach((result) => { - if (!result) return; + // Process organic sources + result.organic?.forEach((source) => { + if (source.link) organicSourcesMap.set(source.link, source); + }); - if (result.organic?.length) { - result.organic.forEach((source) => { - if (source.link) { - organicSourcesMap.set(source.link, source); + // Process references + result.references?.forEach((source) => { + if (source.type === 'image') { + imagesMap.set(source.link, { ...source, imageUrl: source.link }); + } else if ((source as any).type === 'file') { + const fileId = (source as any).fileId || 'unknown'; + const fileName = source.title || 'Unknown File'; + const uniqueKey = `${fileId}_${fileName}`; + + if (agentFilesMap.has(uniqueKey)) { + // Merge pages for the same file + const existing = agentFilesMap.get(uniqueKey)!; + const existingPages = existing.pages || []; + const newPages = (source as any).pages || []; + const uniquePages = [...new Set([...existingPages, ...newPages])].sort((a, b) => a - b); + + existing.pages = uniquePages; + existing.relevance = Math.max(existing.relevance || 0, (source as any).relevance || 0); + existing.pageRelevance = { + ...existing.pageRelevance, + ...(source as any).pageRelevance, + }; + } else { + const agentFile: AgentFileSource = { + type: Tools.file_search, + file_id: fileId, + filename: fileName, + bytes: undefined, + metadata: (source as any).metadata, + pages: (source as any).pages, + relevance: (source as any).relevance, + pageRelevance: (source as any).pageRelevance, + messageId: messageId || '', + toolCallId: 'file_search_results', + }; + agentFilesMap.set(uniqueKey, agentFile); } - }); - } - if (result.references?.length) { - result.references.forEach((source) => { - if (source.type === 'image') { - imagesMap.set(source.link, { - ...source, - imageUrl: source.link, - }); - return; - } - if (source.link) { - organicSourcesMap.set(source.link, source); - } - }); - } - if (result.topStories?.length) { - result.topStories.forEach((source) => { - if (source.link) { - topStoriesMap.set(source.link, source); - } - }); - } - if (result.images?.length) { - result.images.forEach((image) => { - if (image.imageUrl) { - imagesMap.set(image.imageUrl, image); - } - }); - } - if (result.answerBox) { - hasAnswerBox = true; - } - }); + } else if (source.link) { + organicSourcesMap.set(source.link, source); + } + }); + + // Process top stories + result.topStories?.forEach((source) => { + if (source.link) topStoriesMap.set(source.link, source); + }); + + // Process images + result.images?.forEach((image) => { + if (image.imageUrl) imagesMap.set(image.imageUrl, image); + }); + + if (result.answerBox) hasAnswerBox = true; + } return { organicSources: Array.from(organicSourcesMap.values()), topStories: Array.from(topStoriesMap.values()), images: Array.from(imagesMap.values()), hasAnswerBox, + agentFiles: Array.from(agentFilesMap.values()), }; - }, [searchResults]); + }, [searchResults, messageId]); const tabs = useMemo(() => { const availableTabs: Array<{ label: React.ReactNode; content: React.ReactNode }> = []; @@ -376,18 +685,85 @@ export default function Sources() { }); } + if (agentFiles.length && messageId && conversationId) { + availableTabs.push({ + label: } />, + content: ( + + ), + }); + } + return availableTabs; - }, [organicSources, topStories, images, hasAnswerBox, localize]); + }, [ + organicSources, + topStories, + images, + hasAnswerBox, + agentFiles, + messageId, + conversationId, + localize, + ]); if (!tabs.length) return null; return ( - +
+ +
+ ); +} + +// Enhanced error boundary wrapper with accessibility features +export default function Sources(props: SourcesProps) { + const localize = useLocalize(); + + const handleError = (error: Error, errorInfo: React.ErrorInfo) => { + // Log error for monitoring/analytics + console.error('Sources component error:', { error, errorInfo }); + + // Could send to error tracking service here + // analytics.track('sources_error', { error: error.message }); + }; + + const fallbackUI = ( +
+
+ {localize('com_sources_error_fallback')} +
+ +
+ ); + + return ( + + + ); } diff --git a/client/src/components/Web/SourcesErrorBoundary.tsx b/client/src/components/Web/SourcesErrorBoundary.tsx new file mode 100644 index 000000000..1c5fdc87b --- /dev/null +++ b/client/src/components/Web/SourcesErrorBoundary.tsx @@ -0,0 +1,58 @@ +import React, { Component, ReactNode } from 'react'; + +interface Props { + children: ReactNode; + fallback?: ReactNode; + onError?: (error: Error, errorInfo: React.ErrorInfo) => void; + showDetails?: boolean; +} + +interface State { + hasError: boolean; +} + +class SourcesErrorBoundary extends Component { + state = { hasError: false }; + + static getDerivedStateFromError() { + return { hasError: true }; + } + + componentDidCatch(error: Error, errorInfo: React.ErrorInfo) { + console.error('Sources error:', error); + this.props.onError?.(error, errorInfo); + } + + render() { + if (this.state.hasError) { + // Use custom fallback if provided + if (this.props.fallback) { + return this.props.fallback; + } + + // Default simple error UI (using localized strings from Sources.tsx fallback) + /* eslint-disable i18next/no-literal-string */ + return ( +
+
Sources temporarily unavailable
+ +
+ ); + /* eslint-enable i18next/no-literal-string */ + } + + return this.props.children; + } +} + +export default SourcesErrorBoundary; diff --git a/client/src/components/Web/__tests__/SourcesErrorBoundary.test.tsx b/client/src/components/Web/__tests__/SourcesErrorBoundary.test.tsx new file mode 100644 index 000000000..cc668cb61 --- /dev/null +++ b/client/src/components/Web/__tests__/SourcesErrorBoundary.test.tsx @@ -0,0 +1,67 @@ +import React from 'react'; +import { render, screen, fireEvent } from '@testing-library/react'; +import '@testing-library/jest-dom'; +import SourcesErrorBoundary from '../SourcesErrorBoundary'; + +// Component that throws an error for testing +const ThrowError = ({ shouldThrow }: { shouldThrow: boolean }) => { + if (shouldThrow) { + throw new Error('Test error'); + } + return
{'Normal component'}
; +}; + +// Mock window.location.reload +const mockReload = jest.fn(); +Object.defineProperty(window, 'location', { + value: { + reload: mockReload, + }, + writable: true, +}); + +describe('SourcesErrorBoundary - NEW COMPONENT test', () => { + beforeEach(() => { + jest.clearAllMocks(); + // Suppress error console logs during tests + jest.spyOn(console, 'error').mockImplementation(() => {}); + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + it('should render children when there is no error', () => { + render( + + + , + ); + + expect(screen.getByTestId('normal-component')).toBeInTheDocument(); + }); + + it('should render default error UI when error occurs', () => { + render( + + + , + ); + + expect(screen.getByText('Sources temporarily unavailable')).toBeInTheDocument(); + expect(screen.getByRole('button', { name: 'Reload the page' })).toBeInTheDocument(); + }); + + it('should reload page when refresh button is clicked', () => { + render( + + + , + ); + + const refreshButton = screen.getByRole('button', { name: 'Reload the page' }); + fireEvent.click(refreshButton); + + expect(mockReload).toHaveBeenCalled(); + }); +}); diff --git a/client/src/hooks/Messages/useSearchResultsByTurn.ts b/client/src/hooks/Messages/useSearchResultsByTurn.ts index 02d5241ce..5005fc48a 100644 --- a/client/src/hooks/Messages/useSearchResultsByTurn.ts +++ b/client/src/hooks/Messages/useSearchResultsByTurn.ts @@ -1,26 +1,119 @@ import { useMemo } from 'react'; import { TAttachment, Tools, SearchResultData } from 'librechat-data-provider'; +import { useLocalize } from '~/hooks'; + +interface FileSource { + fileId: string; + fileName: string; + pages?: number[]; + relevance?: number; + pageRelevance?: Record; + metadata?: any; +} + +interface DeduplicatedSource { + fileId: string; + fileName: string; + pages: number[]; + relevance: number; + pageRelevance: Record; + metadata?: any; +} /** - * Hook that creates a map of turn numbers to SearchResultData from web search attachments + * Hook that creates a map of turn numbers to SearchResultData from web search and agent file search attachments * @param attachments Array of attachment metadata * @returns A map of turn numbers to their corresponding search result data */ export function useSearchResultsByTurn(attachments?: TAttachment[]) { + const localize = useLocalize(); const searchResultsByTurn = useMemo(() => { const turnMap: { [key: string]: SearchResultData } = {}; + let agentFileSearchTurn = 0; attachments?.forEach((attachment) => { + // Handle web search attachments (existing functionality) if (attachment.type === Tools.web_search && attachment[Tools.web_search]) { const searchData = attachment[Tools.web_search]; if (searchData && typeof searchData.turn === 'number') { turnMap[searchData.turn.toString()] = searchData; } } + + // Handle agent file search attachments (following web search pattern) + if (attachment.type === Tools.file_search && attachment[Tools.file_search]) { + const sources = attachment[Tools.file_search].sources; + + // Deduplicate sources by fileId and merge pages + const deduplicatedSources = new Map(); + + sources.forEach((source: FileSource) => { + const fileId = source.fileId; + if (deduplicatedSources.has(fileId)) { + // Merge pages for the same file + const existing = deduplicatedSources.get(fileId); + if (existing) { + const existingPages = existing.pages || []; + const newPages = source.pages || []; + const allPages = [...existingPages, ...newPages]; + // Remove duplicates and sort + const uniquePages = [...new Set(allPages)].sort((a, b) => a - b); + + // Merge page relevance mappings + const existingPageRelevance = existing.pageRelevance || {}; + const newPageRelevance = source.pageRelevance || {}; + const mergedPageRelevance = { ...existingPageRelevance, ...newPageRelevance }; + + existing.pages = uniquePages; + existing.relevance = Math.max(existing.relevance || 0, source.relevance || 0); + existing.pageRelevance = mergedPageRelevance; + } + } else { + deduplicatedSources.set(fileId, { + fileId: source.fileId, + fileName: source.fileName, + pages: source.pages || [], + relevance: source.relevance || 0.5, + pageRelevance: source.pageRelevance || {}, + metadata: source.metadata, + }); + } + }); + + // Convert agent file sources to SearchResultData format + const agentSearchData: SearchResultData = { + turn: agentFileSearchTurn, + organic: [], // Agent file search doesn't have organic web results + topStories: [], // No top stories for file search + images: [], // No images for file search + references: Array.from(deduplicatedSources.values()).map( + (source) => + ({ + title: source.fileName || localize('com_file_unknown'), + link: `#file-${source.fileId}`, // Create a pseudo-link for file references + attribution: source.fileName || localize('com_file_unknown'), // Show filename in inline display + snippet: + source.pages && source.pages.length > 0 + ? localize('com_file_pages', { pages: source.pages.join(', ') }) + : '', // Only page numbers for hover + type: 'file' as const, + // Store additional agent-specific data as properties on the reference + fileId: source.fileId, + fileName: source.fileName, + pages: source.pages, + pageRelevance: source.pageRelevance, + metadata: source.metadata, + }) as any, + ), + }; + + turnMap[agentFileSearchTurn.toString()] = agentSearchData; + agentFileSearchTurn++; + } }); return turnMap; - }, [attachments]); + }, [attachments, localize]); return searchResultsByTurn; } diff --git a/client/src/hooks/SSE/useEventHandlers.ts b/client/src/hooks/SSE/useEventHandlers.ts index 0e6eb3d59..21d39f852 100644 --- a/client/src/hooks/SSE/useEventHandlers.ts +++ b/client/src/hooks/SSE/useEventHandlers.ts @@ -444,6 +444,21 @@ export default function useEventHandlers({ isTemporary = false, } = submission; + if (responseMessage?.attachments && responseMessage.attachments.length > 0) { + // Process each attachment through the attachmentHandler + responseMessage.attachments.forEach((attachment) => { + const attachmentData = { + ...attachment, + messageId: responseMessage.messageId, + }; + + attachmentHandler({ + data: attachmentData, + submission: submission as EventSubmission, + }); + }); + } + setShowStopButton(false); setCompleted((prev) => new Set(prev.add(submission.initialResponse.messageId))); @@ -576,6 +591,7 @@ export default function useEventHandlers({ setShowStopButton, location.pathname, applyAgentTemplate, + attachmentHandler, ], ); diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index 9304e43b8..ffb02f9b6 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -516,9 +516,24 @@ "com_sidepanel_manage_files": "Manage Files", "com_sidepanel_mcp_no_servers_with_vars": "No MCP servers with configurable variables.", "com_sidepanel_parameters": "Parameters", + "com_sources_agent_file": "Source Document", + "com_sources_agent_files": "Agent Files", + "com_sources_download_failed": "Download failed", + "com_sources_download_local_unavailable": "Cannot download: File is not saved", + "com_sources_download_aria_label": "Download {{filename}}{{status}}", + "com_sources_downloading_status": " (downloading...)", + "com_sources_error_fallback": "Unable to load sources", "com_sources_image_alt": "Search result image", + "com_sources_more_files": "+{{count}} files", "com_sources_more_sources": "+{{count}} sources", + "com_sources_pages": "Pages", + "com_sources_region_label": "Search results and sources", + "com_file_unknown": "Unknown File", + "com_file_pages": "Pages: {{pages}}", + "com_file_source": "File", + "com_sources_reload_page": "Reload page", "com_sources_tab_all": "All", + "com_sources_tab_files": "Files", "com_sources_tab_images": "Images", "com_sources_tab_news": "News", "com_sources_title": "Sources", @@ -950,6 +965,7 @@ "com_ui_quality": "Quality", "com_ui_read_aloud": "Read aloud", "com_ui_redirecting_to_provider": "Redirecting to {{0}}, please wait...", + "com_ui_refresh": "Refresh", "com_ui_reference_saved_memories": "Reference saved memories", "com_ui_reference_saved_memories_description": "Allow the assistant to reference and use your saved memories when responding", "com_ui_refresh_link": "Refresh link", diff --git a/client/src/utils/citations.ts b/client/src/utils/citations.ts index 3c3e0ed41..5ed70a644 100644 --- a/client/src/utils/citations.ts +++ b/client/src/utils/citations.ts @@ -1,5 +1,5 @@ export const SPAN_REGEX = /(\\ue203.*?\\ue204)/g; export const COMPOSITE_REGEX = /(\\ue200.*?\\ue201)/g; -export const STANDALONE_PATTERN = /\\ue202turn(\d+)(search|image|news|video|ref)(\d+)/g; +export const STANDALONE_PATTERN = /\\ue202turn(\d+)(search|image|news|video|ref|file)(\d+)/g; export const CLEANUP_REGEX = /\\ue200|\\ue201|\\ue202|\\ue203|\\ue204|\\ue206/g; -export const INVALID_CITATION_REGEX = /\s*\\ue202turn\d+(search|news|image|video|ref)\d+/g; +export const INVALID_CITATION_REGEX = /\s*\\ue202turn\d+(search|news|image|video|ref|file)\d+/g; diff --git a/librechat.example.yaml b/librechat.example.yaml index 3a9230b50..924f5b7b9 100644 --- a/librechat.example.yaml +++ b/librechat.example.yaml @@ -12,10 +12,7 @@ cache: true # Custom interface configuration interface: - customWelcome: "Welcome to LibreChat! Enjoy your experience." - # MCP Servers UI configuration - mcpServers: - placeholder: 'MCP Servers' + customWelcome: 'Welcome to LibreChat! Enjoy your experience.' # Enable/disable file search as a chatarea selection (default: true) # Note: This setting does not disable the Agents File Search Capability. # To disable the Agents Capability, see the Agents Endpoint configuration instead. @@ -30,7 +27,7 @@ interface: externalUrl: 'https://librechat.ai/tos' openNewTab: true modalAcceptance: true - modalTitle: "Terms of Service for LibreChat" + modalTitle: 'Terms of Service for LibreChat' modalContent: | # Terms and Conditions for LibreChat @@ -77,6 +74,7 @@ interface: bookmarks: true multiConvo: true agents: true + fileCitations: true # Temporary chat retention period in hours (default: 720, min: 1, max: 8760) # temporaryChatRetention: 1 @@ -93,7 +91,6 @@ registration: # allowedDomains: # - "gmail.com" - # Example Balance settings # balance: # enabled: false @@ -133,9 +130,9 @@ registration: # Example Actions Object Structure actions: allowedDomains: - - "swapi.dev" - - "librechat.ai" - - "google.com" + - 'swapi.dev' + - 'librechat.ai' + - 'google.com' # Example MCP Servers Object Structure # mcpServers: @@ -187,6 +184,13 @@ endpoints: # maxRecursionLimit: 100 # # (optional) Disable the builder interface for agents # disableBuilder: false + # # (optional) Maximum total citations to include in agent responses, defaults to 30 + # maxCitations: 30 + # # (optional) Maximum citations per file to include in agent responses, defaults to 7 + # maxCitationsPerFile: 7 + # # (optional) Minimum relevance score for sources to be included in responses, defaults to 0.45 (45% relevance threshold) + # # Set to 0.0 to show all sources (no filtering), or higher like 0.7 for stricter filtering + # minRelevanceScore: 0.45 # # (optional) Agent Capabilities available to all users. Omit the ones you wish to exclude. Defaults to list below. # capabilities: ["execute_code", "file_search", "actions", "tools"] custom: @@ -269,15 +273,15 @@ endpoints: modelDisplayLabel: 'OpenRouter' # Portkey AI Example - - name: "Portkey" - apiKey: "dummy" + - name: 'Portkey' + apiKey: 'dummy' baseURL: 'https://api.portkey.ai/v1' headers: - x-portkey-api-key: '${PORTKEY_API_KEY}' - x-portkey-virtual-key: '${PORTKEY_OPENAI_VIRTUAL_KEY}' + x-portkey-api-key: '${PORTKEY_API_KEY}' + x-portkey-virtual-key: '${PORTKEY_OPENAI_VIRTUAL_KEY}' models: - default: ['gpt-4o-mini', 'gpt-4o', 'chatgpt-4o-latest'] - fetch: true + default: ['gpt-4o-mini', 'gpt-4o', 'chatgpt-4o-latest'] + fetch: true titleConvo: true titleModel: 'current_model' summarize: false diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index e4d038b2c..75f66474c 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -255,6 +255,9 @@ export const agentsEndpointSchema = baseEndpointSchema recursionLimit: z.number().optional(), disableBuilder: z.boolean().optional().default(false), maxRecursionLimit: z.number().optional(), + maxCitations: z.number().min(1).max(50).optional().default(30), + maxCitationsPerFile: z.number().min(1).max(10).optional().default(7), + minRelevanceScore: z.number().min(0.0).max(1.0).optional().default(0.45), allowedProviders: z.array(z.union([z.string(), eModelEndpointSchema])).optional(), capabilities: z .array(z.nativeEnum(AgentCapabilities)) @@ -265,6 +268,9 @@ export const agentsEndpointSchema = baseEndpointSchema .default({ disableBuilder: false, capabilities: defaultAgentCapabilities, + maxCitations: 30, + maxCitationsPerFile: 7, + minRelevanceScore: 0.45, }); export type TAgentsEndpoint = z.infer; @@ -518,6 +524,7 @@ export const intefaceSchema = z runCode: z.boolean().optional(), webSearch: z.boolean().optional(), fileSearch: z.boolean().optional(), + fileCitations: z.boolean().optional(), }) .default({ endpointsMenu: true, @@ -534,6 +541,7 @@ export const intefaceSchema = z runCode: true, webSearch: true, fileSearch: true, + fileCitations: true, }); export type TInterfaceConfig = z.infer; diff --git a/packages/data-provider/src/permissions.ts b/packages/data-provider/src/permissions.ts index a2a1ec771..c39538324 100644 --- a/packages/data-provider/src/permissions.ts +++ b/packages/data-provider/src/permissions.ts @@ -40,6 +40,10 @@ export enum PermissionTypes { * Type for using the "File Search" feature */ FILE_SEARCH = 'FILE_SEARCH', + /** + * Type for using the "File Citations" feature in agents + */ + FILE_CITATIONS = 'FILE_CITATIONS', } /** @@ -112,6 +116,11 @@ export const fileSearchPermissionsSchema = z.object({ }); export type TFileSearchPermissions = z.infer; +export const fileCitationsPermissionsSchema = z.object({ + [Permissions.USE]: z.boolean().default(true), +}); +export type TFileCitationsPermissions = z.infer; + // Define a single permissions schema that holds all permission types. export const permissionsSchema = z.object({ [PermissionTypes.PROMPTS]: promptPermissionsSchema, @@ -123,4 +132,5 @@ export const permissionsSchema = z.object({ [PermissionTypes.RUN_CODE]: runCodePermissionsSchema, [PermissionTypes.WEB_SEARCH]: webSearchPermissionsSchema, [PermissionTypes.FILE_SEARCH]: fileSearchPermissionsSchema, + [PermissionTypes.FILE_CITATIONS]: fileCitationsPermissionsSchema, }); diff --git a/packages/data-provider/src/roles.ts b/packages/data-provider/src/roles.ts index 487e9a608..4a0b99a4b 100644 --- a/packages/data-provider/src/roles.ts +++ b/packages/data-provider/src/roles.ts @@ -12,6 +12,7 @@ import { fileSearchPermissionsSchema, multiConvoPermissionsSchema, temporaryChatPermissionsSchema, + fileCitationsPermissionsSchema, } from './permissions'; /** @@ -78,6 +79,9 @@ const defaultRolesSchema = z.object({ [PermissionTypes.FILE_SEARCH]: fileSearchPermissionsSchema.extend({ [Permissions.USE]: z.boolean().default(true), }), + [PermissionTypes.FILE_CITATIONS]: fileCitationsPermissionsSchema.extend({ + [Permissions.USE]: z.boolean().default(true), + }), }), }), [SystemRoles.USER]: roleSchema.extend({ @@ -125,6 +129,9 @@ export const roleDefaults = defaultRolesSchema.parse({ [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: true, }, + [PermissionTypes.FILE_CITATIONS]: { + [Permissions.USE]: true, + }, }, }, [SystemRoles.USER]: { @@ -139,6 +146,7 @@ export const roleDefaults = defaultRolesSchema.parse({ [PermissionTypes.RUN_CODE]: {}, [PermissionTypes.WEB_SEARCH]: {}, [PermissionTypes.FILE_SEARCH]: {}, + [PermissionTypes.FILE_CITATIONS]: {}, }, }, }); diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index 71eae3824..1d8f9df77 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -551,6 +551,7 @@ export type TAttachmentMetadata = { messageId: string; toolCallId: string; [Tools.web_search]?: SearchResultData; + [Tools.file_search]?: SearchResultData; [Tools.memory]?: MemoryArtifact; }; diff --git a/packages/data-provider/src/types/web.ts b/packages/data-provider/src/types/web.ts index 7e8259068..01cebc3df 100644 --- a/packages/data-provider/src/types/web.ts +++ b/packages/data-provider/src/types/web.ts @@ -34,7 +34,7 @@ export type ValidSource = ProcessedOrganic | ProcessedTopStory; export type ResultReference = { link: string; - type: 'link' | 'image' | 'video'; + type: 'link' | 'image' | 'video' | 'file'; title?: string; attribution?: string; }; diff --git a/packages/data-schemas/src/schema/role.ts b/packages/data-schemas/src/schema/role.ts index 80d1e632a..43da45a49 100644 --- a/packages/data-schemas/src/schema/role.ts +++ b/packages/data-schemas/src/schema/role.ts @@ -42,6 +42,9 @@ const rolePermissionsSchema = new Schema( [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: { type: Boolean, default: true }, }, + [PermissionTypes.FILE_CITATIONS]: { + [Permissions.USE]: { type: Boolean, default: true }, + }, }, { _id: false }, ); @@ -73,6 +76,7 @@ const roleSchema: Schema = new Schema({ [PermissionTypes.RUN_CODE]: { [Permissions.USE]: true }, [PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: true }, [PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: true }, + [PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: true }, }), }, });